diff --git a/.github/workflows/append-release-cmake.yml b/.github/workflows/append-release-cmake.yml new file mode 100644 index 000000000000..91a4e26e1c29 --- /dev/null +++ b/.github/workflows/append-release-cmake.yml @@ -0,0 +1,80 @@ +name: Append Release CMake + +on: + workflow_dispatch: + inputs: + ref: + description: 'Ref to be used as release' + default: 'latest' + required: true + type: string + workflow_call: + inputs: + ref: + description: 'Ref to be used as release' + default: 'latest' + required: true + type: string + +jobs: + generate_cmake_files: + runs-on: ubuntu-latest + steps: + - name: Checkout TileDB + uses: actions/checkout@v3 + + - name: Make release and output directories + run: | + mkdir release output + + - name: Github release data + id: release_data + uses: KevinRohn/github-full-release-data@v2.0.4 + with: + # repository: 'TileDB-Inc/TileDB' + version: ${{ inputs.ref }} + asset-file: '*.zip,*.tar.gz' + asset-output: './release/' + + - name: Render template + run: | + PATTERN="tiledb-([^-]+)-([^-]+)(-noavx2)?-([^-]+).(tar.gz|zip)$" + RELLIST="output/releases.csv" + MODULE="output/DownloadPrebuiltTileDB.cmake" + cp cmake/inputs/DownloadPrebuiltTileDB.cmake $MODULE + echo "platform,url,sha256" > $RELLIST + + for FILE in $(ls release) + do + if [[ $FILE =~ $PATTERN ]] + then + OS=${BASH_REMATCH[1]^^} + ARCH=${BASH_REMATCH[2]^^} + NOAVX2=${BASH_REMATCH[3]^^} + PLATFORM=${OS}-${ARCH}${NOAVX2} + + URL="${{ github.server_url }}/${{ github.repository }}/releases/download/${{ inputs.ref }}/$FILE" + HASH=$(cat release/$FILE.sha256 | cut -d \t -f 1) + + echo "${PLATFORM},${URL},${HASH}" >> $RELLIST + fi + done + + SOURCE_FILE_NAME=$(ls release/tiledb-source-*.tar.gz) + URL_TILEDB_SOURCE="${{ github.server_url }}/${{ github.repository }}/releases/download/${{ inputs.ref }}/$(basename $SOURCE_FILE_NAME)" + HASH_TILEDB_SOURCE=$(cat $SOURCE_FILE_NAME.sha256 | cut -d \t -f 1) + + echo "source,${URL_TILEDB_SOURCE},${HASH_TILEDB_SOURCE}" >> $RELLIST + + HASH=$(sha256sum $RELLIST | cut -d " " -f 1) + echo $HASH > $RELLIST.sha256 + + cat $RELLIST + + - name: Upload template to release + uses: svenstaro/upload-release-action@v2 + with: + file: output/* + tag: ${{ steps.release_data.outputs.tag_name }} + overwrite: true + file_glob: true \ No newline at end of file diff --git a/.github/workflows/build-docs.yml b/.github/workflows/build-docs.yml index 1bcc9623790b..31c15addeda8 100644 --- a/.github/workflows/build-docs.yml +++ b/.github/workflows/build-docs.yml @@ -28,6 +28,8 @@ jobs: runs-on: ubuntu-22.04 timeout-minutes: 90 name: Build Docs + env: + VCPKG_BINARY_SOURCES: 'clear;x-gha,readwrite' steps: - uses: actions/checkout@v3 - name: 'Print env' @@ -41,6 +43,13 @@ jobs: printenv shell: bash + - name: Set environment variables for vcpkg binary caching + uses: actions/github-script@v6 + with: + script: | + core.exportVariable('ACTIONS_CACHE_URL', process.env.ACTIONS_CACHE_URL || ''); + core.exportVariable('ACTIONS_RUNTIME_TOKEN', process.env.ACTIONS_RUNTIME_TOKEN || ''); + - name: Set up Python uses: actions/setup-python@v4 diff --git a/.github/workflows/build-ubuntu20.04-backwards-compatibility.yml b/.github/workflows/build-ubuntu20.04-backwards-compatibility.yml index df0dbbab047c..d7bfa7a884eb 100644 --- a/.github/workflows/build-ubuntu20.04-backwards-compatibility.yml +++ b/.github/workflows/build-ubuntu20.04-backwards-compatibility.yml @@ -59,7 +59,7 @@ jobs: - ubuntu-20.04 # Note: v2_1_0 arrays were never created so its currently skipped # Note: This matrix is used to set the value of TILEDB_COMPATIBILITY_VERSION - tiledb_version: ["v1_4_0", "v1_5_0", "v1_6_0", "v1_7_0", "v2_0_0", "v2_2_0", "v2_2_3", "v2_3_0", "v2_4_0", "v2_5_0", "v2_6_0", "v2_7_0", "v2_8_3", "v2_9_1", "v2_10_0", "v2_11_0", "v2_12_3", "v2_13_2", "v2_14_0", "v2_15_0", "v2_16_3", "v2_17_5", "v2_18_3", "v2_19_1"] + tiledb_version: ["v1_4_0", "v1_5_0", "v1_6_0", "v1_7_0", "v2_0_0", "v2_2_0", "v2_2_3", "v2_3_0", "v2_4_0", "v2_5_0", "v2_6_0", "v2_7_0", "v2_8_3", "v2_9_1", "v2_10_0", "v2_11_0", "v2_12_3", "v2_13_2", "v2_14_0", "v2_15_0", "v2_16_3", "v2_17_5", "v2_18_3", "v2_19_1", "v2_20_1"] timeout-minutes: 30 name: ${{ matrix.tiledb_version }} steps: diff --git a/.github/workflows/build-windows.yml b/.github/workflows/build-windows.yml index 2b898a1a333a..0fb5879d53ac 100644 --- a/.github/workflows/build-windows.yml +++ b/.github/workflows/build-windows.yml @@ -60,8 +60,22 @@ jobs: TILEDB_ARROW_TESTS: ${{ matrix.TILEDB_ARROW_TESTS }} TILEDB_WEBP: ${{ matrix.TILEDB_WEBP }} TILEDB_CMAKE_BUILD_TYPE: 'Release' + # On windows-2019 we are using the Visual Studio generator, which is multi-config and places the build artifacts in a subdirectory + CONFIG_PATH_FIXUP: ${{ matrix.os == 'windows-2019' && 'Release' || '' }} VCPKG_BINARY_SOURCES: 'clear;x-gha,readwrite' steps: + # By default Visual Studio chooses the earliest installed toolset version + # for the main build and vcpkg chooses the latest. Force it to use the + # latest (14.39 currently). + - name: Setup MSVC toolset (VS 2022) + uses: TheMrMilchmann/setup-msvc-dev@v3 + if: matrix.os == 'windows-2022' + with: + arch: x64 + toolset: 14.39 + - name: Install Ninja (VS 2022) + uses: seanmiddleditch/gha-setup-ninja@v4 + if: matrix.os == 'windows-2022' - name: 'tiledb env prep' run: | $env:BUILD_BUILDDIRECTORY = $env:GITHUB_WORKSPACE.replace("TileDB\TileDB","tdbbd") # 't'ile'db' 'b'uild 'd'ir @@ -141,7 +155,7 @@ jobs: # allow double-checking path cmd /c "echo $PATH" - $bootstrapOptions = $env:TILEDB_BASE_BOOTSTRAP_OPTIONS + $bootstrapOptions = $env:TILEDB_BASE_BOOTSTRAP_OPTIONS + " -CMakeGenerator ${{ matrix.os == 'windows-2022' && 'Ninja' || '`"Visual Studio 16 2019`"' }}" if ($env:TILEDB_S3 -eq "ON") { $bootstrapOptions = "-EnableS3 " + $bootstrapOptions } @@ -239,9 +253,7 @@ jobs: } # CMake exits with non-0 status if there are any warnings during the build, so - cmake --build $env:BUILD_BUILDDIRECTORY\tiledb -j --target tiledb_unit --config $CMakeBuildType -- /verbosity:minimal - cmake --build $env:BUILD_BUILDDIRECTORY\tiledb -j --target tiledb_regression --config $CMakeBuildType -- /verbosity:minimal - cmake --build $env:BUILD_BUILDDIRECTORY\tiledb -j --target all_link_complete --config $CMakeBuildType -- /verbosity:minimal + cmake --build $env:BUILD_BUILDDIRECTORY\tiledb -j --target tiledb_unit unit_vfs tiledb_regression all_link_complete --config $CMakeBuildType if ($env:TILEDB_AZURE -eq "ON") { if($env.TILEDB_USE_CUSTOM_NODE_JS) { @@ -272,15 +284,23 @@ jobs: # Actually run tests - $cmds = "$env:BUILD_BUILDDIRECTORY\tiledb\test\$CMakeBuildType\tiledb_unit.exe -d=yes" + $cmds = "$env:BUILD_BUILDDIRECTORY\tiledb\test\$env:CONFIG_PATH_FIXUP\tiledb_unit.exe -d=yes" Write-Host "cmds: '$cmds'" Invoke-Expression $cmds if ($LastExitCode -ne 0) { Write-Host "Tests failed. tiledb_unit exit status: " $LastExitCocde $host.SetShouldExit($LastExitCode) } + + $cmds = "$env:BUILD_BUILDDIRECTORY\tiledb\tiledb\sm\filesystem\test\$env:CONFIG_PATH_FIXUP\unit_vfs -d=yes" + Write-Host "cmds: '$cmds'" + Invoke-Expression $cmds + if ($LastExitCode -ne 0) { + Write-Host "Tests failed. tiledb_vfs exit status: " $LastExitCocde + $host.SetShouldExit($LastExitCode) + } - $cmds = "$env:BUILD_BUILDDIRECTORY\tiledb\test\ci\$CMakeBuildType\test_assert.exe -d=yes" + $cmds = "$env:BUILD_BUILDDIRECTORY\tiledb\test\ci\$env:CONFIG_PATH_FIXUP\test_assert.exe -d=yes" Invoke-Expression $cmds if ($LastExitCode -ne 0) { Write-Host "Tests failed. test_assert exit status: " $LastExitCocde @@ -302,7 +322,7 @@ jobs: $TestAppDir = (Join-Path $env:BUILD_BUILDDIRECTORY "tiledb\examples\c_api") $TestAppDataDir = (Join-Path $env:BUILD_BUILDDIRECTORY "tiledb\examples\c_api\test_app_data") - Get-ChildItem (Join-Path $env:BUILD_BUILDDIRECTORY "tiledb\examples\c_api\$CMakeBuildType") -Filter *.exe | + Get-ChildItem (Join-Path $env:BUILD_BUILDDIRECTORY "tiledb\examples\c_api\$env:CONFIG_PATH_FIXUP\") -Filter *.exe | Foreach-Object { try { Set-Location -path $TestAppDir @@ -335,7 +355,7 @@ jobs: $TestAppDir = (Join-Path $env:BUILD_BUILDDIRECTORY "tiledb\examples\cpp_api") $TestAppDataDir = (Join-Path $env:BUILD_BUILDDIRECTORY "tiledb\examples\cpp_api\test_app_data") - Get-ChildItem (Join-Path $env:BUILD_BUILDDIRECTORY "tiledb\examples\cpp_api\$CMakeBuildType") -Filter *.exe | + Get-ChildItem (Join-Path $env:BUILD_BUILDDIRECTORY "tiledb\examples\cpp_api\$env:CONFIG_PATH_FIXUP\") -Filter *.exe | Foreach-Object { try { Set-Location -path $TestAppDir @@ -380,12 +400,12 @@ jobs: cd build # Build zip artifact - cmake -A X64 -DCMAKE_PREFIX_PATH="$env:BUILD_BUILDDIRECTORY\dist;$env:BUILD_BUILDDIRECTORY\vcpkg_installed\x64-windows" .. + cmake ${{ matrix.os != 'windows-2019' && '-G Ninja' || '' }} -DCMAKE_BUILD_TYPE="$CMakeBuildType" -DCMAKE_PREFIX_PATH="$env:BUILD_BUILDDIRECTORY\dist;$env:BUILD_BUILDDIRECTORY\vcpkg_installed\x64-windows" .. cmake --build . --config $CMakeBuildType -v - #.\$CMakeBuildType\ExampleExe.exe - $cmd = ".\$CMakeBuildType\ExampleExe.exe" + #.\$env:CONFIG_PATH_FIXUP\ExampleExe.exe + $cmd = ".\$env:CONFIG_PATH_FIXUP\ExampleExe.exe" Write-Host "cmd: '$cmd'" Invoke-Expression $cmd diff --git a/.github/workflows/ci-linux_mac.yml b/.github/workflows/ci-linux_mac.yml index 4be5997183ff..0b9dd95d6096 100644 --- a/.github/workflows/ci-linux_mac.yml +++ b/.github/workflows/ci-linux_mac.yml @@ -63,7 +63,7 @@ env: CC: ${{ inputs.matrix_compiler_cc }} CFLAGS: ${{ inputs.matrix_compiler_cflags }} CXXFLAGS: ${{ inputs.matrix_compiler_cxxflags }} - bootstrap_args: "--enable-ccache ${{ inputs.bootstrap_args }} ${{ inputs.asan && '--enable-sanitizer=address' || '' }}" + bootstrap_args: "--enable-ccache --vcpkg-base-triplet=x64-${{ startsWith(inputs.matrix_image, 'ubuntu-') && 'linux' || 'osx' }} ${{ inputs.bootstrap_args }} ${{ inputs.asan && '--enable-sanitizer=address' || '' }}" VCPKG_BINARY_SOURCES: 'clear;x-gha,readwrite' SCCACHE_GHA_ENABLED: "true" @@ -191,6 +191,7 @@ jobs: ./tiledb/test/regression/tiledb_regression -d yes ./tiledb/test/ci/test_assert -d yes ./tiledb/test/tiledb_unit -d yes | awk '/1: ::set-output/{sub(/.*1: /, ""); print; next} 1' + ./tiledb/tiledb/sm/filesystem/test/unit_vfs -d yes | awk '/1: ::set-output/{sub(/.*1: /, ""); print; next} 1' ################################################### # Stop helper processes, if applicable diff --git a/.github/workflows/ci-rest.yml b/.github/workflows/ci-rest.yml index 4c69656c1de5..0f5caab07213 100644 --- a/.github/workflows/ci-rest.yml +++ b/.github/workflows/ci-rest.yml @@ -1,8 +1,14 @@ name: REST CI on: - - workflow_call - - workflow_dispatch + workflow_call: + workflow_dispatch: + + push: + branches: + - dev + - release-* + - refs/tags/* jobs: rest-ci: @@ -10,11 +16,11 @@ jobs: steps: # For easy access to lookup dispatched CI job. - - name: Print URL for TileDB-REST-CI actions - run: echo https://github.com/TileDB-Inc/TileDB-REST-CI/actions + - name: Print URL for REST CI actions + run: echo https://github.com/TileDB-Inc/TileDB-Internal/actions # If this workflow fails on the remote repository, this CI job will also fail. - - name: Workflow dispatch to TileDB-REST-CI + - name: Workflow dispatch to REST CI id: trigger-step uses: aurelien-baudet/workflow-dispatch@v2 env: @@ -22,8 +28,8 @@ jobs: # Skip if no PAT is set (e.g. for PRs from forks). if: env.TILEDB_REST_CI_PAT != null with: - repo: TileDB-Inc/TileDB-REST-CI - # Trigger workflow on TileDB-REST-CI at this ref. + repo: TileDB-Inc/TileDB-Internal + # Trigger workflow on TileDB-Internal at this ref. ref: "main" workflow: full-ci.yml token: ${{ secrets.TILEDB_REST_CI_PAT }} diff --git a/.github/workflows/full-ci.yml b/.github/workflows/full-ci.yml index a525a9cf382a..7ef0d811ce2d 100644 --- a/.github/workflows/full-ci.yml +++ b/.github/workflows/full-ci.yml @@ -101,7 +101,7 @@ jobs: matrix_compiler_cc: 'gcc-10' matrix_compiler_cxx: 'g++-10' timeout: 120 - bootstrap_args: '--enable-serialization --vcpkg-base-triplet=x64-linux' + bootstrap_args: '--enable-serialization' asan: true ci10: @@ -138,10 +138,6 @@ jobs: ci_docker: uses: ./.github/workflows/build-dockerfile.yml - ci_rest: - uses: ./.github/workflows/ci-rest.yml - secrets: inherit - # dummy job for branch protection check full_ci_passed: needs: [ diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 9c373fd607a8..8584e0d18709 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -1,16 +1,6 @@ name: Release on: - pull_request: - branches: - - '*' # must quote since "*" is a YAML reserved character; we want a string - paths-ignore: - - '.github/workflows/quarto-render.yml' - - '_quarto.yml' - - 'quarto-materials/*' - - '**/.md' - - 'tiledb/doxygen/source/*' - - 'tiledb/sm/c_api/tiledb_version.h' push: branches: - dev @@ -19,7 +9,26 @@ on: - '*' jobs: - Build-Release: + Package-Source-Release: + strategy: + matrix: + os: [ubuntu-20.04] + runs-on: ${{ matrix.os }} + steps: + - name: Checkout TileDB + uses: actions/checkout@v3 + - name: CMake Configure + run: cmake -S . -B build -DTILEDB_CMAKE_IDE=ON + - name: CPack Package Source + run: cd build && cpack --config CPackSourceConfig.cmake + - name: Upload Release Artifacts + uses: actions/upload-artifact@v3 + with: + name: release + path: | + build/tiledb-*.tar.gz* + + Package-Binary-Release: strategy: fail-fast: false matrix: @@ -58,7 +67,7 @@ jobs: uses: actions/checkout@v3 - name: 'Homebrew setup' run: brew install automake pkg-config - if: ${{ startsWith(matrix.os, 'macos-') == true }} + if: ${{ startsWith(matrix.os, 'macos-') == true }} - name: Export GitHub Actions cache variables uses: actions/github-script@v6 with: @@ -97,44 +106,27 @@ jobs: ${{ matrix.cmake_args }} shell: bash - name: Build TileDB - run: cmake --build build --config Release - - name: Install TileDB - run: cmake --build build --config Release --target install-tiledb - - name: Archive installed artifacts (non-Windows) - if: ${{ !startsWith(matrix.platform, 'windows') }} - run: | - tar -czf ${{ steps.get-values.outputs.archive_name }}.tar.gz -C dist . - - name: Archive installed artifacts (Windows) - if: startsWith(matrix.platform, 'windows') - run: | - Compress-Archive -Path dist\* -DestinationPath ${{ steps.get-values.outputs.archive_name }}.zip - shell: pwsh + run: cmake --build build --config Release --target package - name: Upload release artifacts uses: actions/upload-artifact@v3 with: - name: tiledb-dist - path: ${{ steps.get-values.outputs.archive_name }}.* - - name: Archive build directory - run: | - tar -czf build-${{ matrix.platform }}.tar.gz -C build . - - name: Upload build directory - uses: actions/upload-artifact@v3 - with: - name: tiledb-build - path: build-${{ matrix.platform }}.tar.gz + name: release + path: | + build/tiledb/tiledb-*.tar.gz* + build/tiledb/tiledb-*.zip* - name: "Print log files (failed build only)" run: | source $GITHUB_WORKSPACE/scripts/ci/print_logs.sh if: failure() # only run this job if the build step failed Test-Release-Artifacts: - needs: Build-Release + needs: Package-Binary-Release runs-on: ubuntu-latest steps: - name: Download release artifacts - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: - name: tiledb-dist + name: release path: dist - name: Test names of release artifacts run: | @@ -144,14 +136,16 @@ jobs: fi Publish-Release: - needs: Test-Release-Artifacts + needs: + - Test-Release-Artifacts + - Package-Source-Release if: startsWith(github.ref, 'refs/tags/') runs-on: ubuntu-latest steps: - name: Download release artifacts - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: - name: tiledb-dist + name: release path: dist - name: Publish release artifacts uses: actions/github-script@v6 @@ -179,3 +173,24 @@ jobs: data: fs.readFileSync(file) }); } + + Generate-Release-List: + needs: + - Publish-Release + uses: ./.github/workflows/append-release-cmake.yml + with: + ref: ${{ github.ref_name }} + + Create-Issue-On-Fail: + permissions: + issues: write + runs-on: ubuntu-latest + needs: Publish-Release + if: (failure() || cancelled()) && github.event_name != 'workflow_dispatch' + steps: + - name: Create Issue if Build Fails + uses: TileDB-Inc/github-actions/open-issue@main + with: + name: Release failed + label: bug + assignee: KiterLuc,teo-tsirpanis,davisp \ No newline at end of file diff --git a/.gitignore b/.gitignore index 051e2eb8e318..5c529f44c40e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +.nova* .vscode* *.sw? build/* diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100644 index 000000000000..0cb52e929843 --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,21 @@ +stages: + - test + +trigger_pipeline: + stage: test + rules: + - if: $CI_COMMIT_BRANCH =~ /^dev|^release-.*/ || $CI_COMMIT_TAG != "" # only/except rules are no longer actively developed. Please use `rules` instead. + - if: $CI_PIPELINE_SOURCE == "external_pull_request_event" + changes: + - "!.github/workflows/quarto-render.yml" + - "!_quarto.yml" + - "!quarto-materials/*" + - "!**/.md" + - "!tiledb/doxygen/source/*" + - "!tiledb/sm/c_api/tiledb_version.h" + + variables: + TILEDB_REF: ${CI_COMMIT_REF_NAME} + trigger: + project: tiledb-inc/tiledb-internal + strategy: depend diff --git a/.readthedocs.yml b/.readthedocs.yml index 459b768ef7f1..0c2c9b0fa841 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -12,6 +12,17 @@ build: os: "ubuntu-22.04" tools: python: "3.8" + apt_packages: + - autoconf + - autoconf-archive + - automake + - curl + - git + - libtool + - make + - pkg-config + - unzip + - zip python: install: diff --git a/CMakeLists.txt b/CMakeLists.txt index c84b2d66fe08..2d643c3ac7b9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -109,6 +109,9 @@ endif() set(CMAKE_C_VISIBILITY_PRESET hidden) set(CMAKE_CXX_VISIBILITY_PRESET hidden) +# Disable warnings from Boost +set(Boost_NO_WARN_NEW_VERSIONS ON) + ############################################################ # Superbuild setup ############################################################ @@ -255,6 +258,15 @@ if(TILEDB_SANITIZER) validate_sanitizer_options() endif() +include(DetectStdPmr) + +if(TILEDB_USE_CPP17_PMR) + message(STATUS "Building with cpp17::pmr") + add_definitions(-DUSE_CPP17_PMR) +else() + message(STATUS "Building with std::pmr") +endif() + ####################################################### # Header Files ####################################################### diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 71d27bd3f953..627ab9af9e4b 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -61,27 +61,6 @@ Formatting conventions: - comments are good, TileDB uses [doxygen](http://www.stack.nl/~dimitri/doxygen/manual/docblocks.html) for class doc strings. - format code using [clang-format](https://clang.llvm.org/docs/ClangFormat.html) -### Building with sanitizers - -TileDB can be built with [clang sanitizers](https://clang.llvm.org/docs/AddressSanitizer.html) enabled. To enable them, you have to bootstrap with the `--enable-sanitizer` flag, as well as the vcpkg base triplet corresponding to your platform. The following platforms support sanitizers: - -* `arm64-osx` -* `x64-linux` -* `x64-osx` -* `x64-windows` - -> [!NOTE] -> Currently only the `address` sanitizer is supported. - -```bash -cd TileDB && mkdir build-asan && cd build-asan -../bootstrap --enable-sanitizer=address --vcpkg-base-triplet=x64-linux -make && make check -``` - -> [!IMPORTANT] -> To avoid errors, building with sanitizers must be done in a separate build directory. - ### Pull Requests: - `dev` is the development branch, all PR’s should be rebased on top of the latest `dev` commit. diff --git a/HISTORY.md b/HISTORY.md index 91ea067476ee..bd326298fc93 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,3 +1,13 @@ +# TileDB v2.20.1 Release Notes + +## Defects removed + +* Support reading V1 group details with explicit version in the name. [#4744](https://github.com/TileDB-Inc/TileDB/pull/4744) + +## Build System Changes + +* Reintroduce the `TILEDB_STATIC` option under a deprecation warning. [#4732](https://github.com/TileDB-Inc/TileDB/pull/4732) + # TileDB v2.20.0 Release Notes ## Breaking behavior @@ -18,7 +28,6 @@ * Fix consolidation plan to print relative paths in output. [#4604](https://github.com/TileDB-Inc/TileDB/pull/4604) * Fix traversal limit in array deserialization. [#4606](https://github.com/TileDB-Inc/TileDB/pull/4606) * Add function random_label to utilize PRNG for random string generation. [#4564](https://github.com/TileDB-Inc/TileDB/pull/4564) -* Remove uuid in favor of random_label. [#4589](https://github.com/TileDB-Inc/TileDB/pull/4589) * Improve large dense aggregate reads with tile metadata only. [#4657](https://github.com/TileDB-Inc/TileDB/pull/4657) ## Defects removed @@ -43,6 +52,19 @@ * Add packaging tests into linux and mac CI pipelines. [#4567](https://github.com/TileDB-Inc/TileDB/pull/4567) * Add vcpkg triplets for Address Sanitizer. [#4515](https://github.com/TileDB-Inc/TileDB/pull/4515) * Fix regression where release artifacts had 8-digit commit hashes. [#4599](https://github.com/TileDB-Inc/TileDB/pull/4599) +* Fix importing TileDB in CMake versions prior to 3.18. [#4671](https://github.com/TileDB-Inc/TileDB/pull/4671) + +# TileDB v2.19.2 Release Notes + +## Defects removed + +* Fix bug with new minio behavior. [#4725](https://github.com/TileDB-Inc/TileDB/pull/4725) +* Support reading V1 group details with explicit version in the name. [#4744](https://github.com/TileDB-Inc/TileDB/pull/4744) + +## Build System Changes + +* Fix regression where release artifacts had 8-digit commit hashes. [#4599](https://github.com/TileDB-Inc/TileDB/pull/4599) +* Fix linker errors when building with MSVC. [#4759](https://github.com/TileDB-Inc/TileDB/pull/4759) # TileDB v2.19.1 Release Notes diff --git a/bootstrap.ps1 b/bootstrap.ps1 index e9a7e7c3895e..66fa18b6ba30 100644 --- a/bootstrap.ps1 +++ b/bootstrap.ps1 @@ -27,6 +27,9 @@ Specify the linkage type to build TileDB with. Valid values are .PARAMETER RemoveDeprecations Build TileDB without any deprecated APIs. +.PARAMETER Architecture +Specify the architecture to configure for. + .PARAMETER CMakeGenerator Optionally specify the CMake generator string, e.g. "Visual Studio 15 2017". Check 'cmake --help' for a list of supported generators. @@ -128,6 +131,7 @@ Param( [string]$Dependency, [string]$Linkage = "shared", [switch]$RemoveDeprecations, + [string]$Architecture, [string]$CMakeGenerator, [switch]$EnableAssert, [switch]$EnableDebug, @@ -327,6 +331,11 @@ if (![string]::IsNullOrEmpty($Dependency)) { $DependencyDir = $Dependency } +$ArchFlag = "" +if ($PSBoundParameters.ContainsKey("Architecture")) { + $ArchFlag = "-A $Architecture" +} + # Set CMake generator type. $GeneratorFlag = "" if ($PSBoundParameters.ContainsKey("CMakeGenerator")) { @@ -348,7 +357,7 @@ if ($CMakeGenerator -eq $null) { # Run CMake. # We use Invoke-Expression so we can echo the command to the user. -$CommandString = "cmake -A X64 -DTILEDB_VCPKG=$UseVcpkg -DCMAKE_BUILD_TYPE=$BuildType -DCMAKE_INSTALL_PREFIX=""$InstallPrefix"" $VcpkgBaseTriplet -DCMAKE_PREFIX_PATH=""$DependencyDir"" -DMSVC_MP_FLAG=""/MP$BuildProcesses"" -DTILEDB_ASSERTIONS=$AssertionMode -DTILEDB_VERBOSE=$Verbosity -DTILEDB_AZURE=$UseAzure -DTILEDB_S3=$UseS3 -DTILEDB_GCS=$UseGcs -DTILEDB_SERIALIZATION=$UseSerialization -DTILEDB_WERROR=$Werror -DTILEDB_CPP_API=$CppApi -DTILEDB_TESTS=$Tests -DTILEDB_STATS=$Stats -DBUILD_SHARED_LIBS=$BuildSharedLibs -DTILEDB_FORCE_ALL_DEPS=$TileDBBuildDeps -DTILEDB_REMOVE_DEPRECATIONS=$_RemoveDeprecations -DTILEDB_TOOLS=$TileDBTools -DTILEDB_EXPERIMENTAL_FEATURES=$TileDBExperimentalFeatures -DTILEDB_WEBP=$BuildWebP -DTILEDB_CRC32=$BuildCrc32 -DTILEDB_ARROW_TESTS=$ArrowTests -DTILEDB_TESTS_ENABLE_REST=$RestTests -DTILEDB_TESTS_AWS_S3_CONFIG=$ConfigureS3 $GeneratorFlag ""$SourceDirectory""" +$CommandString = "cmake $ArchFlag -DTILEDB_VCPKG=$UseVcpkg -DCMAKE_BUILD_TYPE=$BuildType -DCMAKE_INSTALL_PREFIX=""$InstallPrefix"" $VcpkgBaseTriplet -DCMAKE_PREFIX_PATH=""$DependencyDir"" -DMSVC_MP_FLAG=""/MP$BuildProcesses"" -DTILEDB_ASSERTIONS=$AssertionMode -DTILEDB_VERBOSE=$Verbosity -DTILEDB_AZURE=$UseAzure -DTILEDB_S3=$UseS3 -DTILEDB_GCS=$UseGcs -DTILEDB_SERIALIZATION=$UseSerialization -DTILEDB_WERROR=$Werror -DTILEDB_CPP_API=$CppApi -DTILEDB_TESTS=$Tests -DTILEDB_STATS=$Stats -DBUILD_SHARED_LIBS=$BuildSharedLibs -DTILEDB_FORCE_ALL_DEPS=$TileDBBuildDeps -DTILEDB_REMOVE_DEPRECATIONS=$_RemoveDeprecations -DTILEDB_TOOLS=$TileDBTools -DTILEDB_EXPERIMENTAL_FEATURES=$TileDBExperimentalFeatures -DTILEDB_WEBP=$BuildWebP -DTILEDB_CRC32=$BuildCrc32 -DTILEDB_ARROW_TESTS=$ArrowTests -DTILEDB_TESTS_ENABLE_REST=$RestTests -DTILEDB_TESTS_AWS_S3_CONFIG=$ConfigureS3 $GeneratorFlag ""$SourceDirectory""" Write-Host $CommandString Write-Host Invoke-Expression "$CommandString" diff --git a/cmake/Modules/DetectStdPmr.cmake b/cmake/Modules/DetectStdPmr.cmake new file mode 100644 index 000000000000..3f96e9435490 --- /dev/null +++ b/cmake/Modules/DetectStdPmr.cmake @@ -0,0 +1,69 @@ +# +# DetectStdPmr.cmake +# +# +# The MIT License +# +# Copyright (c) 2024 TileDB, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# Detect whether polymorphic allocators are available on the system. + +# Special case for macOS when the MACOSX_DEPLOYMENT_TARGET is set to anything +# less than 14. For some reason, std::pmr is still detectable, but the resulting +# binary dies with a dyld missing symbol error. + +if (ENV{MACOSX_DEPLOYMENT_TARGET}) + string(COMPARE LESS "$ENV{MACOSX_DEPLOYMENT_TARGET}" "14" MACOS_BAD_PMR_SUPPORT) + if (MACOS_BAD_PMR_SUPPORT) + set(TILEDB_USE_CPP17_PMR ON) + message(STATUS "Using vendored cpp17::pmr for polymorphic allocators") + return() + endif() +endif() + +# Otherwise, if we're not building a targeted macOS version, we just detect +# whether std::pmr is available. +# +# However CMake makes this extra awesome because try_run appears to have +# changed in a backwards compatible manner. We'll just version check for +# selecting which to run. + +if (CMAKE_VERSION VERSION_LESS "3.25") + try_run( + TILEDB_CAN_RUN_STD_PMR + TILEDB_CAN_COMPILE_STD_PMR + "${CMAKE_CURRENT_BINARY_DIR}" + "${CMAKE_SOURCE_DIR}/cmake/inputs/detect_std_pmr.cc" + ) +else() + try_run( + TILEDB_CAN_RUN_STD_PMR + TILEDB_CAN_COMPILE_STD_PMR + SOURCES "${CMAKE_SOURCE_DIR}/cmake/inputs/detect_std_pmr.cc" + ) +endif() + +if ("${TILEDB_CAN_COMPILE_STD_PMR}" AND "${TILEDB_CAN_RUN_STD_PMR}" EQUAL 0) + message(STATUS "Using std::pmr for polymorphic allocators") +else() + set(TILEDB_USE_CPP17_PMR ON) + message(STATUS "Using vendored cpp17::pmr for polymorphic allocators") +endif() diff --git a/cmake/Options/BuildOptions.cmake b/cmake/Options/BuildOptions.cmake index fd439db47006..c88c3f10ac0a 100644 --- a/cmake/Options/BuildOptions.cmake +++ b/cmake/Options/BuildOptions.cmake @@ -40,6 +40,15 @@ option(CMAKE_EXPORT_COMPILE_COMMANDS "cmake compile commands" ON) set(TILEDB_INSTALL_LIBDIR "" CACHE STRING "If non-empty, install TileDB library to this directory instead of CMAKE_INSTALL_LIBDIR.") +if (DEFINED TILEDB_STATIC) + message(DEPRECATION "TILEDB_STATIC is deprecated and will be removed in version 2.28, to be released in Q3 2024. Use BUILD_SHARED_LIBS INSTEAD. Building both static and shared libraries is no longer available.") + if (TILEDB_STATIC) + set(BUILD_SHARED_LIBS OFF) + else() + set(BUILD_SHARED_LIBS ON) + endif() +endif() + if (NOT TILEDB_VCPKG) message(DEPRECATION "Disabling TILEDB_VCPKG is deprecated and will be removed in a future version.") endif() diff --git a/cmake/Options/TileDBToolchain.cmake b/cmake/Options/TileDBToolchain.cmake index d80215128a8a..40befe0460ae 100644 --- a/cmake/Options/TileDBToolchain.cmake +++ b/cmake/Options/TileDBToolchain.cmake @@ -65,6 +65,21 @@ if(TILEDB_SANITIZER STREQUAL "address") set(VCPKG_TARGET_TRIPLET "${TILEDB_VCPKG_BASE_TRIPLET}-asan") endif() +get_cmake_property(is_multi_config GENERATOR_IS_MULTI_CONFIG) +# On Windows vcpkg always builds dependencies with symbols. +# https://github.com/microsoft/vcpkg/blob/master/scripts/toolchains/windows.cmake +if(NOT WIN32 AND NOT is_multi_config AND CMAKE_BUILD_TYPE STREQUAL "RelWithDebInfo" AND NOT VCPKG_TARGET_TRIPLET) + if(TILEDB_SANITIZER STREQUAL "address") + message(FATAL_ERROR "Cannot enable both RelWithDebInfo and ASAN at the same time.") + endif() + if(TILEDB_VCPKG_BASE_TRIPLET) + message(STATUS "Overriding vcpkg triplet to ${TILEDB_VCPKG_BASE_TRIPLET}-relwithdebinfo") + set(VCPKG_TARGET_TRIPLET "${TILEDB_VCPKG_BASE_TRIPLET}-relwithdebinfo") + else() + message(WARNING "Dependencies will be built without symbols. You have to set either VCPKG_TARGET_TRIPLET or TILEDB_VCPKG_BASE_TRIPLET.") + endif() +endif() + set(VCPKG_INSTALL_OPTIONS "--no-print-usage") macro(tiledb_vcpkg_enable_if tiledb_feature vcpkg_feature) diff --git a/cmake/TileDB-Superbuild.cmake b/cmake/TileDB-Superbuild.cmake index 8e05bb83fd84..f963294790c8 100644 --- a/cmake/TileDB-Superbuild.cmake +++ b/cmake/TileDB-Superbuild.cmake @@ -188,4 +188,4 @@ add_custom_target(package DEPENDS tiledb COMMAND ${CMAKE_CPACK_COMMAND} --config CPackConfig.cmake -G "$,ZIP,TGZ>" WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/tiledb -) \ No newline at end of file +) diff --git a/cmake/inputs/CustomCPackOptions.cmake.in b/cmake/inputs/CustomCPackOptions.cmake.in index 60f201003463..71d8fbea0686 100644 --- a/cmake/inputs/CustomCPackOptions.cmake.in +++ b/cmake/inputs/CustomCPackOptions.cmake.in @@ -1,18 +1,7 @@ -set(CPACK_SOURCE_IGNORE_FILES ".*\\.git;.*build.*") - set(CPACK_PACKAGE_VENDOR "TileDB Inc.") set(CPACK_PACKAGE_VERSION "@TILEDB_VERSION@") set(CPACK_INCLUDE_TOPLEVEL_DIRECTORY OFF) -# Append NOAVX2 if needed -if(NOT ${COMPILER_SUPPORTS_AVX2}) - set(NOAVX2 "-noavx2") -endif() - -# Set output name -set(CPACK_PACKAGE_FILE_NAME "${CPACK_PACKAGE_NAME}-${CPACK_SYSTEM_NAME}-${CMAKE_SYSTEM_PROCESSOR}${NOAVX2}-${CPACK_PACKAGE_VERSION}") -string(TOLOWER ${CPACK_PACKAGE_FILE_NAME} CPACK_PACKAGE_FILE_NAME ) - # Enable HASH -set(CPACK_PACKAGE_CHECKSUM "SHA256") \ No newline at end of file +set(CPACK_PACKAGE_CHECKSUM "SHA256") diff --git a/cmake/inputs/DownloadPrebuiltTileDB.cmake b/cmake/inputs/DownloadPrebuiltTileDB.cmake new file mode 100644 index 000000000000..608478abc343 --- /dev/null +++ b/cmake/inputs/DownloadPrebuiltTileDB.cmake @@ -0,0 +1,128 @@ +# +# FindTileDB_EP.cmake +# +# +# The MIT License +# +# Copyright (c) 2023 TileDB, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +include(FetchContent) + +function(fetch_tiledb_release_list VERSION EXPECTED_HASH) + # Local constants + set(UPSTREAM_URL "https://github.com/TileDB-Inc/TileDB/releases/download") + + if(NOT VERSION) + set(VERSION latest) + endif() + + if(${EXPECTED_HASH}) + file(DOWNLOAD + ${UPSTREAM_URL}/${VERSION}/releases.csv + releases.csv + SHOW_PROGRESS + EXPECTED_HASH ${EXPECTED_HASH} + ) + else() + message(WARNING "Downloading release list without SHA checksum!") + file(DOWNLOAD + ${UPSTREAM_URL}/${VERSION}/releases.csv + releases.csv + SHOW_PROGRESS + ) + endif() + + file(STRINGS + ${CMAKE_CURRENT_BINARY_DIR}/releases.csv + RELLIST + ) + + # Remove csv table headers + list(POP_FRONT RELLIST) + + foreach(LINE ${RELLIST}) + string(REPLACE "," ";" LINE ${LINE}) + list(LENGTH LINE LENGTH) + + list(GET LINE 0 PLATFORM) + list(GET LINE 1 URL) + list(GET LINE 2 SHA) + + set(RELEASE_VAR TILEDB_${PLATFORM}) + set(URL_${RELEASE_VAR} ${URL} PARENT_SCOPE) + set(HASH_${RELEASE_VAR} ${SHA} PARENT_SCOPE) + endforeach() +endfunction() + +function(detect_artifact_name OUT_VAR) + if (WIN32) # Windows + SET(${OUT_VAR} TILEDB_WINDOWS-X86_64 PARENT_SCOPE) + elseif(APPLE) # OSX + if (DEFINED CMAKE_OSX_ARCHITECTURES) + set(ACTUAL_TARGET ${CMAKE_OSX_ARCHITECTURES}) + else() + set(ACTUAL_TARGET ${CMAKE_SYSTEM_PROCESSOR}) + endif() + + + if (ACTUAL_TARGET MATCHES "(x86_64)|(AMD64|amd64)|(^i.86$)") + SET(${OUT_VAR} TILEDB_MACOS-X86_64 PARENT_SCOPE) + elseif (ACTUAL_TARGET STREQUAL arm64 OR ACTUAL_TARGET MATCHES "^aarch64" OR CMAKE_SYSTEM_PROCESSOR MATCHES "^arm") + SET(${OUT_VAR} TILEDB_MACOS-ARM64 PARENT_SCOPE) + endif() + else() # Linux + SET(${OUT_VAR} TILEDB_LINUX-X86_64 PARENT_SCOPE) + endif() +endfunction() + +function(fetch_prebuilt_tiledb) + # Arguments + set(options RELLIST_HASH) + set(oneValueArgs VERSION ARTIFACT_NAME) + set(multiValueArgs) + cmake_parse_arguments( + FETCH_PREBUILT_TILEDB + "${options}" + "${oneValueArgs}" + "${multiValueArgs}" + ${ARGN} + ) + + fetch_tiledb_release_list(${FETCH_PREBUILT_TILEDB_VERSION} ${FETCH_PREBUILT_TILEDB_RELLIST_HASH}) + + if(NOT FETCH_PREBUILT_TILEDB_ARTIFACT_NAME) + detect_artifact_name(FETCH_PREBUILT_TILEDB_ARTIFACT_NAME) + endif() + + string(STRIP ${HASH_${FETCH_PREBUILT_TILEDB_ARTIFACT_NAME}} HASH_${FETCH_PREBUILT_TILEDB_ARTIFACT_NAME}) + FetchContent_Declare( + tiledb-prebuilt + URL ${URL_${FETCH_PREBUILT_TILEDB_ARTIFACT_NAME}} + URL_HASH SHA256=${HASH_${FETCH_PREBUILT_TILEDB_ARTIFACT_NAME}} + DOWNLOAD_EXTRACT_TIMESTAMP FALSE + ) + + FetchContent_MakeAvailable( + tiledb-prebuilt + ) + + set(TileDB_DIR "${tiledb-prebuilt_SOURCE_DIR}/lib/cmake/TileDB" PARENT_SCOPE) +endfunction() diff --git a/cmake/inputs/detect_std_pmr.cc b/cmake/inputs/detect_std_pmr.cc new file mode 100644 index 000000000000..0d6b0eb93dd6 --- /dev/null +++ b/cmake/inputs/detect_std_pmr.cc @@ -0,0 +1,7 @@ + +#include + +int +main() { + auto resource = std::pmr::get_default_resource(); +} diff --git a/cmake/package.cmake b/cmake/package.cmake new file mode 100644 index 000000000000..08bfae9049c3 --- /dev/null +++ b/cmake/package.cmake @@ -0,0 +1,45 @@ +# Packaging configuration +configure_file ("${PROJECT_SOURCE_DIR}/cmake/inputs/CustomCPackOptions.cmake.in" + "${PROJECT_BINARY_DIR}/CustomCPackOptions.cmake" + @ONLY) +set (CPACK_PROJECT_CONFIG_FILE + "${PROJECT_BINARY_DIR}/CustomCPackOptions.cmake") + +# Not all options can be set in CustomCPackOptions.cmake +if (CMAKE_SYSTEM_NAME STREQUAL "Windows") + set(CPACK_SOURCE_GENERATOR "ZIP") + set(CPACK_GENERATOR "ZIP") +else() + set(CPACK_SOURCE_GENERATOR "TGZ") + set(CPACK_GENERATOR "TGZ") +endif() + +# Package file name variables can not be in config file as well + +# Append NOAVX2 if needed +if(NOT ${COMPILER_SUPPORTS_AVX2}) + set(CPACK_NOAVX2 "-noavx2") +endif() + +# Properly set system name and architecture +if(CMAKE_SYSTEM_NAME STREQUAL "Darwin") + set(CPACK_SYSTEM_NAME "MacOS") + if(CMAKE_OSX_ARCHITECTURES STREQUAL "arm64") + set(CPACK_SYSTEM_PROCESSOR ${CMAKE_OSX_ARCHITECTURES}) + else() + set(CPACK_SYSTEM_PROCESSOR ${CMAKE_SYSTEM_PROCESSOR}) + endif() +else() + set(CPACK_SYSTEM_NAME ${CMAKE_SYSTEM_NAME}) + set(CPACK_SYSTEM_PROCESSOR ${CMAKE_SYSTEM_PROCESSOR}) +endif() + +set(CPACK_SOURCE_IGNORE_FILES ".*\.git;.*build.*/.*") + +set(CPACK_SOURCE_PACKAGE_FILE_NAME "${PROJECT_NAME}-source-${TILEDB_VERSION}") +string(TOLOWER ${CPACK_SOURCE_PACKAGE_FILE_NAME} CPACK_SOURCE_PACKAGE_FILE_NAME) + +set(CPACK_PACKAGE_FILE_NAME "${PROJECT_NAME}-${CPACK_SYSTEM_NAME}-${CPACK_SYSTEM_PROCESSOR}${CPACK_NOAVX2}-${TILEDB_VERSION}") +string(TOLOWER ${CPACK_PACKAGE_FILE_NAME} CPACK_PACKAGE_FILE_NAME) + +include(CPack) diff --git a/doc/dev/BUILD.md b/doc/dev/BUILD.md index 4f0e0bc898c0..d0c8140d1454 100644 --- a/doc/dev/BUILD.md +++ b/doc/dev/BUILD.md @@ -12,6 +12,7 @@ title: Building TileDB from source * Apple Clang 14 * Git (required by vcpkg) * curl (required by vcpkg on non-Windows) +* autoconf (required by building libmagic on non-Windows) ## Downloading the source code @@ -66,6 +67,7 @@ The following are the most common configuration options: |macOS/Linux flag|Windows flag|CMake variable|Description| |----------------|------------|--------------|-----------| |`--prefix=PREFIX`|`-Prefix=PREFIX`|`CMAKE_INSTALL_PREFIX=`|Install files in tree rooted at `PREFIX` (defaults to `TileDB/dist`)| +|`--vcpkg-base-triplet=TRIPLET`|`-VcpkgBaseTriplet=TRIPLET`|`TILEDB_VCPKG_BASE_TRIPLET=TRIPLET`|Vcpkg base triplet, needed for features like ASAN and RelWithDebInfo| |`--linkage=shared/static`|`-Linkage=shared/static`|`BUILD_SHARED_LIBS=ON/OFF`|Linkage of the compiled TileDB library (defaults to `shared`) | |`--remove-deprecations`|`-RemoveDeprecations`|`TILEDB_REMOVE_DEPRECATIONS=ON`|Build TileDB without deprecated APIs| |`--enable-debug`|`-EnableDebug`|`CMAKE_BUILD_TYPE=Debug`|Enables debug build| @@ -84,6 +86,13 @@ The following are the most common configuration options: |`--disable-stats`|`-DisableStats`|`TILEDB_STATS=OFF`|Disables internal TileDB statistics| |`--disable-tests`|`-DisableTests`|`TILEDB_TESTS=OFF`|Disables building the TileDB test suite| +The supported vcpkg base triplet values are: + +* `arm64-osx` +* `x64-linux` +* `x64-osx` +* `x64-windows` + > [!TIP] > You can see all TileDB-specific CMake variables in [BuildOptions.cmake](../../cmake/Options/BuildOptions.cmake). @@ -125,4 +134,29 @@ Vcpkg will not be automatically downloaded if: * The `TILEDB_DISABLE_AUTO_VCPKG` environment variable has been defined. * The build tree has been configured by directly calling CMake and the `CMAKE_TOOLCHAIN_FILE` variable has been set by the user. -In these cases no dependencies CMake will find the dependencies based on the rules of the [`find_package`](https://cmake.org/cmake/help/latest/command/find_package.html#command:find_package) command. The user is responsible for providing them. +In these cases CMake will find the dependencies based on the rules of the [`find_package`](https://cmake.org/cmake/help/latest/command/find_package.html) command. The user is responsible for providing the dependencies. + +### Building with sanitizers + +TileDB can be built with [clang sanitizers](https://clang.llvm.org/docs/AddressSanitizer.html) enabled. To enable them, you have to bootstrap with the `--enable-sanitizer` flag, as well as the [vcpkg base triplet](#configuration-options) corresponding to your platform: + +> [!NOTE] +> Currently only the `address` sanitizer is supported. + +```bash +cd TileDB && mkdir build-asan && cd build-asan +../bootstrap --enable-sanitizer=address --vcpkg-base-triplet=x64-linux +make && make check +``` + +> [!IMPORTANT] +> To avoid errors, building with sanitizers must be done in a separate build directory. + +### Building with optimizations and debug symbols + +TileDB supports configuring in `RelWithDebInfo` mode, which compiles code with optimizations while also emitting debug symbols. However on non-Windows platforms the dependencies built by vcpkg do not build by default with symbols. To enable that you have to do either of the following: + +* [Specify a vcpkg base triplet](#configuration-options). +* Configure by directly calling CMake and setting a vcpkg triplet with the `VCPKG_DEFAULT_TRIPLET` variable. In this case you are responsible to ensure the appropriate options are passed to the triplet file. + +Configuring in `RelWithDebInfo` mode and enabling ASAN at the same time is not supported. diff --git a/ports/README.md b/ports/README.md index 7faf08053d33..09981c7650fd 100644 --- a/ports/README.md +++ b/ports/README.md @@ -30,7 +30,6 @@ After copying the port, add an entry to the table below. You should also contrib |Port|Reason| |----|------| -|`libmagic`|Updating to the upstream port deferred due to failures.| |`openssl`|Pinning to OpenSSL 1.1 until we can move to 3.0 in January 2024.| -|`pcre2`|To be removed alongside libmagic.| |`azure-storage-common-cpp`|Patching to disable default features on libxml2 (https://github.com/Azure/azure-sdk-for-cpp/pull/5221).| +|`libmagic`|Patching to add features explicitly enabling compression support.| diff --git a/ports/libmagic/0001-Use-pcre2.patch b/ports/libmagic/0001-Use-libtre.patch similarity index 98% rename from ports/libmagic/0001-Use-pcre2.patch rename to ports/libmagic/0001-Use-libtre.patch index 2bcf73cd2bc6..a1080d5a070b 100644 --- a/ports/libmagic/0001-Use-pcre2.patch +++ b/ports/libmagic/0001-Use-libtre.patch @@ -44,7 +44,7 @@ index c548e97..299ac0c 100644 #include #include /* For open and flags */ -#include -+#include ++#include #include #include #ifndef WIN32 diff --git a/ports/libmagic/0002-Change-zlib-lib-name-to-match-CMake-output.patch b/ports/libmagic/0002-Change-zlib-lib-name-to-match-CMake-output.patch new file mode 100644 index 000000000000..5e070cc07fe0 --- /dev/null +++ b/ports/libmagic/0002-Change-zlib-lib-name-to-match-CMake-output.patch @@ -0,0 +1,39 @@ +From 2fa43ece9ec7564e1fbb9867bb5852b834643aa4 Mon Sep 17 00:00:00 2001 +From: Long Nguyen +Date: Sat, 8 May 2021 19:36:11 +0700 +Subject: [PATCH 02/14] Change zlib lib name to match CMake output + +--- + configure.ac | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/configure.ac b/configure.ac +index b05c334..dd4063c 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -173,7 +173,7 @@ AC_REPLACE_FUNCS(getopt_long asprintf vasprintf strlcpy strlcat getline ctime_r + + dnl Checks for libraries + if test "$enable_zlib" != "no"; then +- AC_CHECK_LIB(z, gzopen) ++ AC_SEARCH_LIBS(gzopen, [z zlib zlibd], have_zlib = "yes", have_zlib = "no") + fi + if test "$enable_bzlib" != "no"; then + AC_CHECK_LIB(bz2, BZ2_bzCompressInit) +@@ -193,11 +193,11 @@ AM_CONDITIONAL(IS_CROSS_COMPILE, test "$cross_compiling" = yes) + + dnl Final sanity checks + if test "$enable_zlib" = "yes"; then +- if test "$ac_cv_header_zlib_h$ac_cv_lib_z_gzopen" != "yesyes"; then ++ if test "$ac_cv_header_zlib_h$have_zlib" != "yesyes"; then + AC_MSG_ERROR([zlib support requested but not found]) + fi + fi +-if test "$ac_cv_header_zlib_h$ac_cv_lib_z_gzopen" = "yesyes"; then ++if test "$ac_cv_header_zlib_h$have_zlib" = "yesyes"; then + AC_DEFINE([ZLIBSUPPORT], 1, [Enable zlib compression support]) + fi + if test "$enable_bzlib" = "yes"; then +-- +2.29.2.windows.2 + diff --git a/ports/libmagic/0003-Fix-WIN32-macro-checks.patch b/ports/libmagic/0003-Fix-WIN32-macro-checks.patch index 44ab2ddea2af..e90beb4d4097 100644 --- a/ports/libmagic/0003-Fix-WIN32-macro-checks.patch +++ b/ports/libmagic/0003-Fix-WIN32-macro-checks.patch @@ -12,7 +12,7 @@ index 299ac0c..2c365a6 100644 --- a/src/file.h +++ b/src/file.h @@ -82,7 +82,7 @@ - #include + #include #include #include -#ifndef WIN32 @@ -31,13 +31,13 @@ index 299ac0c..2c365a6 100644 #define PATHSEP ':' @@ -103,7 +103,7 @@ - #define private static + #define file_private static -#if HAVE_VISIBILITY && !defined(WIN32) +#if HAVE_VISIBILITY && !defined(_WIN32) - #define public __attribute__ ((__visibility__("default"))) - #ifndef protected - #define protected __attribute__ ((__visibility__("hidden"))) + #define file_public __attribute__ ((__visibility__("default"))) + #ifndef file_protected + #define file_protected __attribute__ ((__visibility__("hidden"))) -- 2.29.2.windows.2 diff --git a/ports/libmagic/0005-Include-dirent.h-for-S_ISREG-and-S_ISDIR.patch b/ports/libmagic/0005-Include-dirent.h-for-S_ISREG-and-S_ISDIR.patch index 76cbc1222c8b..afcb658d8890 100644 --- a/ports/libmagic/0005-Include-dirent.h-for-S_ISREG-and-S_ISDIR.patch +++ b/ports/libmagic/0005-Include-dirent.h-for-S_ISREG-and-S_ISDIR.patch @@ -11,14 +11,15 @@ diff --git a/src/file.h b/src/file.h index 0332506..4aa9f60 100644 --- a/src/file.h +++ b/src/file.h -@@ -88,6 +88,7 @@ +@@ -88,7 +88,8 @@ /* Do this here and now, because struct stat gets re-defined on solaris */ #include #include +#include - - #define ENABLE_CONDITIONALS - + #include + #if defined(HAVE_XLOCALE_H) + #include + #endif -- 2.29.2.windows.2 diff --git a/ports/libmagic/0006-Remove-Wrap-POSIX-headers.patch b/ports/libmagic/0006-Remove-Wrap-POSIX-headers.patch index 56efac248157..169561d24372 100644 --- a/ports/libmagic/0006-Remove-Wrap-POSIX-headers.patch +++ b/ports/libmagic/0006-Remove-Wrap-POSIX-headers.patch @@ -161,7 +161,7 @@ index 3ab52d1..fc48d84 100644 --- a/src/pread.c +++ b/src/pread.c @@ -3,7 +3,9 @@ - FILE_RCSID("@(#)$File: pread.c,v 1.2 2013/04/02 16:23:07 christos Exp $") + FILE_RCSID("@(#)$File: pread.c,v 1.5 2022/09/24 20:30:13 christos Exp $") #endif /* lint */ #include +#ifdef HAVE_UNISTD_H diff --git a/ports/libmagic/0009-No-fcntl-in-magic.c.patch b/ports/libmagic/0009-No-fcntl-in-magic.c.patch deleted file mode 100644 index 47bf16165ef8..000000000000 --- a/ports/libmagic/0009-No-fcntl-in-magic.c.patch +++ /dev/null @@ -1,25 +0,0 @@ -From 6d10bef865b69764f6e0ddd2b0f6a84e484cdb37 Mon Sep 17 00:00:00 2001 -From: Long Nguyen -Date: Sun, 9 May 2021 13:25:14 +0700 -Subject: [PATCH 09/14] No fcntl in magic.c - ---- - src/magic.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/src/magic.c b/src/magic.c -index e9aeafa..382bd96 100644 ---- a/src/magic.c -+++ b/src/magic.c -@@ -462,7 +462,7 @@ file_or_fd(struct magic_set *ms, const char *inname, int fd) - rv = 0; - goto done; - } --#if O_CLOEXEC == 0 -+#if O_CLOEXEC == 0 && !defined(_WIN32) - (void)fcntl(fd, F_SETFD, FD_CLOEXEC); - #endif - } --- -2.29.2.windows.2 - diff --git a/ports/libmagic/0010-Properly-check-for-the-presence-of-bitmasks.patch b/ports/libmagic/0010-Properly-check-for-the-presence-of-bitmasks.patch index f0d8738e465e..e241f5e49c09 100644 --- a/ports/libmagic/0010-Properly-check-for-the-presence-of-bitmasks.patch +++ b/ports/libmagic/0010-Properly-check-for-the-presence-of-bitmasks.patch @@ -17,7 +17,7 @@ index 5204f20..7244841 100644 #undef HAVE_MAJOR -#ifdef S_IFLNK +#if S_IFLNK != 0 - private int + file_private int bad_link(struct magic_set *ms, int err, char *buf) { @@ -108,7 +108,7 @@ file_fsmagic(struct magic_set *ms, const char *fn, struct stat *sb) diff --git a/ports/libmagic/0011-Remove-pipe-related-functions-in-funcs.c.patch b/ports/libmagic/0011-Remove-pipe-related-functions-in-funcs.c.patch index ae2720ca3617..13c24faaca0b 100644 --- a/ports/libmagic/0011-Remove-pipe-related-functions-in-funcs.c.patch +++ b/ports/libmagic/0011-Remove-pipe-related-functions-in-funcs.c.patch @@ -11,21 +11,19 @@ diff --git a/src/funcs.c b/src/funcs.c index b926625..b585486 100644 --- a/src/funcs.c +++ b/src/funcs.c -@@ -809,6 +809,7 @@ file_print_guid(char *str, size_t len, const uint64_t *guid) - g->data4[6], g->data4[7]); +@@ -888,5 +888,6 @@ } +#ifndef _WIN32 - protected int + file_protected int file_pipe_closexec(int *fds) { -@@ -827,6 +828,7 @@ protected int - file_clear_closexec(int fd) { - return fcntl(fd, F_SETFD, 0); +@@ -914,5 +915,6 @@ + #endif } +#endif - protected char * + file_protected char * file_strtrim(char *str) -- 2.29.2.windows.2 diff --git a/ports/libmagic/0012-Convert-MSYS2-paths-to-Windows-paths.patch b/ports/libmagic/0012-Convert-MSYS2-paths-to-Windows-paths.patch new file mode 100644 index 000000000000..728fe77539f2 --- /dev/null +++ b/ports/libmagic/0012-Convert-MSYS2-paths-to-Windows-paths.patch @@ -0,0 +1,24 @@ +From f0139ced57c1579450f3d09b6e3ae0159aae031b Mon Sep 17 00:00:00 2001 +From: Long Nguyen +Date: Mon, 10 May 2021 08:43:28 +0700 +Subject: [PATCH 12/14] Convert MSYS2 paths to Windows paths + +--- + src/Makefile.am | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/src/Makefile.am b/src/Makefile.am +index 34781b9..26f853f 100644 +--- a/src/Makefile.am ++++ b/src/Makefile.am +@@ -1,4 +1,7 @@ + MAGIC = $(pkgdatadir)/magic ++if MINGW ++MAGIC != cygpath -m -a -l '$(MAGIC)' ++endif + lib_LTLIBRARIES = libmagic.la + nodist_include_HEADERS = magic.h + +-- +2.29.2.windows.2 + diff --git a/ports/libmagic/0013-Check-for-backslash-in-argv-0-on-Windows.patch b/ports/libmagic/0013-Check-for-backslash-in-argv-0-on-Windows.patch new file mode 100644 index 000000000000..77777fc61e8e --- /dev/null +++ b/ports/libmagic/0013-Check-for-backslash-in-argv-0-on-Windows.patch @@ -0,0 +1,42 @@ +From 3f10c7d31627b64b068b84ba72e706991f672560 Mon Sep 17 00:00:00 2001 +From: Long Nguyen +Date: Fri, 14 May 2021 08:14:05 +0700 +Subject: [PATCH 13/14] Check for backslash in argv[0] on Windows + +--- + magic/Makefile.am | 2 +- + src/file.c | 5 +++++ + 2 files changed, 6 insertions(+), 1 deletion(-) + +diff --git a/magic/Makefile.am b/magic/Makefile.am +index 0eb5865..170bbab 100644 +--- a/magic/Makefile.am ++++ b/magic/Makefile.am +@@ -353,7 +353,7 @@ ${MAGIC}: $(EXTRA_DIST) $(FILE_COMPILE_DEP) + @(if expr "${FILE_COMPILE}" : '.*/.*' > /dev/null; then \ + echo "Using ${FILE_COMPILE} to generate ${MAGIC}" > /dev/null; \ + else \ +- v=$$(${FILE_COMPILE} --version | sed -e s/file-// -e q); \ ++ v=$$(${FILE_COMPILE} --version | sed -e s/file${EXEEXT}-// -e q); \ + if [ "$$v" != "${PACKAGE_VERSION}" ]; then \ + echo "Cannot use the installed version of file ($$v) to"; \ + echo "cross-compile file ${PACKAGE_VERSION}"; \ +diff --git a/src/file.c b/src/file.c +index 2889f8a..12a604b 100644 +--- a/src/file.c ++++ b/src/file.c +@@ -200,6 +200,11 @@ main(int argc, char *argv[]) + _wildcard(&argc, &argv); + #endif + ++#ifdef _WIN32 ++ if ((progname = strrchr(argv[0], '\\')) != NULL) ++ progname++; ++ else ++#endif + if ((progname = strrchr(argv[0], '/')) != NULL) + progname++; + else +-- +2.29.2.windows.2 + diff --git a/ports/libmagic/0014-Define-POSIX-macros-if-missing.patch b/ports/libmagic/0014-Define-POSIX-macros-if-missing.patch deleted file mode 100644 index 09ac7a11926e..000000000000 --- a/ports/libmagic/0014-Define-POSIX-macros-if-missing.patch +++ /dev/null @@ -1,38 +0,0 @@ -From fa0e11f36bb0e322250e1e488ced9f2bf166874f Mon Sep 17 00:00:00 2001 -From: Long Nguyen -Date: Fri, 14 May 2021 18:11:39 +0700 -Subject: [PATCH 14/14] Define POSIX macros if missing - ---- - src/file.h | 15 +++++++++++++++ - 1 file changed, 15 insertions(+) - -diff --git a/src/file.h b/src/file.h -index ccfe0da..98cd37b 100644 ---- a/src/file.h -+++ b/src/file.h -@@ -100,6 +100,21 @@ - #include - #include - -+#if !defined(S_IFBLK) -+#define S_IFBLK 0 -+#define S_ISBLK(mode) (((mode) & S_IFMT) == S_IFBLK) -+#endif -+ -+#if !defined(S_IFLNK) -+#define S_IFLNK 0 -+#define S_ISLNK(mode) (((mode) & S_IFMT) == S_IFLNK) -+#endif -+ -+#if !defined(S_IFSOCK) -+#define S_IFSOCK 0 -+#define S_ISSOCK(mode) (((mode) & S_IFMT) == S_IFSOCK) -+#endif -+ - #define ENABLE_CONDITIONALS - - #ifndef MAGIC --- -2.29.2.windows.2 - diff --git a/ports/libmagic/0015-MSYS2-Remove-ioctl-call.patch b/ports/libmagic/0015-MSYS2-Remove-ioctl-call.patch index da8ef79df29a..13fe3cf0c550 100644 --- a/ports/libmagic/0015-MSYS2-Remove-ioctl-call.patch +++ b/ports/libmagic/0015-MSYS2-Remove-ioctl-call.patch @@ -7,20 +7,20 @@ diff --git a/src/compress.c b/src/compress.c index 33ce2bc..f172eda 100644 --- a/src/compress.c +++ b/src/compress.c -@@ -378,7 +378,7 @@ +@@ -407,7 +407,7 @@ sread(int fd, void *buf, size_t n, int canbepipe __attribute__((__unused__))) { ssize_t rv; --#ifdef FIONREAD +-#if defined(FIONREAD) && !defined(__MINGW32__) +#if defined(FIONREAD) && !defined(__MINGW32__) && !defined(WIN32) int t = 0; #endif size_t rn = n; -@@ -386,7 +386,7 @@ +@@ -418,7 +418,7 @@ if (fd == STDIN_FILENO) goto nocheck; --#ifdef FIONREAD +-#if defined(FIONREAD) && !defined(__MINGW32__) +#if defined(FIONREAD) && !defined(__MINGW32__) && !defined(WIN32) if (canbepipe && (ioctl(fd, FIONREAD, &t) == -1 || t == 0)) { #ifdef FD_ZERO diff --git a/ports/libmagic/0016-Fix-file_famagic-function.patch b/ports/libmagic/0016-Fix-file_famagic-function.patch new file mode 100644 index 000000000000..5eaba925a589 --- /dev/null +++ b/ports/libmagic/0016-Fix-file_famagic-function.patch @@ -0,0 +1,40 @@ +diff --git a/src/fsmagic.c b/src/fsmagic.c +index 7244841..2c553c1 100644 +--- a/src/fsmagic.c ++++ b/src/fsmagic.c +@@ -66,7 +66,7 @@ # define major(dev) (((dev) >> 8) & 0xff) + # define minor(dev) ((dev) & 0xff) + #endif + #undef HAVE_MAJOR +-#if S_IFLNK != 0 ++#if S_IFLNK != 0 && ! defined(_WIN32) + file_private int + bad_link(struct magic_set *ms, int err, char *buf) + { +@@ -108,7 +108,7 @@ file_fsmagic(struct magic_set *ms, const char *fn, struct stat *sb) + int ret, did = 0; + int mime = ms->flags & MAGIC_MIME; + int silent = ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION); +-#if S_IFLNK != 0 ++#if S_IFLNK != 0 && ! defined(_WIN32) + char buf[BUFSIZ+4]; + ssize_t nch; + struct stat tstatbuf; +@@ -122,7 +122,7 @@ file_fsmagic(struct magic_set *ms, const char *fn, struct stat *sb) + * Fstat is cheaper but fails for files you don't have read perms on. + * On 4.2BSD and similar systems, use lstat() to identify symlinks. + */ +-#if S_IFLNK != 0 ++#if S_IFLNK != 0 && ! defined(_WIN32) + if ((ms->flags & MAGIC_SYMLINK) == 0) + ret = lstat(fn, sb); + else +@@ -290,7 +290,7 @@ file_fsmagic(struct magic_set *ms, const char *fn, struct stat *sb) + return -1; + break; + #endif +-#if S_IFLNK != 0 ++#if S_IFLNK != 0 && ! defined(_WIN32) + case S_IFLNK: + if ((nch = readlink(fn, buf, BUFSIZ-1)) <= 0) { + if (ms->flags & MAGIC_ERROR) { diff --git a/ports/libmagic/CMakeLists.txt b/ports/libmagic/CMakeLists.txt deleted file mode 100644 index a6248bdf871a..000000000000 --- a/ports/libmagic/CMakeLists.txt +++ /dev/null @@ -1,170 +0,0 @@ -cmake_minimum_required(VERSION 3.12) - -file(READ ${CMAKE_CURRENT_SOURCE_DIR}/configure.ac CONFIGURE_AC_CONTENT) -string(REGEX MATCH "AC_INIT\\(\\[file\\],\\[([0-9]+\\.[0-9]+)\\]" _ ${CONFIGURE_AC_CONTENT}) -unset(CONFIGURE_AC_CONTENT) - -project(file VERSION ${CMAKE_MATCH_1}) - -option(FILE_TESTS "Enable file tests" OFF) - -# Get library directory for multiarch linux distros -include(GNUInstallDirs) - -list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules") -set(targets) -find_package(PCRE2 CONFIG REQUIRED COMPONENTS 8BIT) -if(WIN32 AND NOT MINGW) - find_package(unofficial-getopt-win32 CONFIG REQUIRED) -endif() - -set(LIBMAGIC_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/src") -set(LIBMAGIC_TEST_DIR "${CMAKE_CURRENT_SOURCE_DIR}/tests") - -set(WIN_COMPAT_SOURCES - ${LIBMAGIC_SOURCE_DIR}/asctime_r.c - ${LIBMAGIC_SOURCE_DIR}/asprintf.c - ${LIBMAGIC_SOURCE_DIR}/ctime_r.c - ${LIBMAGIC_SOURCE_DIR}/dprintf.c - ${LIBMAGIC_SOURCE_DIR}/getline.c - ${LIBMAGIC_SOURCE_DIR}/gmtime_r.c - ${LIBMAGIC_SOURCE_DIR}/localtime_r.c - ${LIBMAGIC_SOURCE_DIR}/pread.c - ${LIBMAGIC_SOURCE_DIR}/strcasestr.c - # ${LIBMAGIC_SOURCE_DIR}/strlcat.c - # ${LIBMAGIC_SOURCE_DIR}/strlcpy.c - ${LIBMAGIC_SOURCE_DIR}/vasprintf.c - ${LIBMAGIC_SOURCE_DIR}/fmtcheck.c - ${LIBMAGIC_SOURCE_DIR}/magic.def -) - -if(WIN32) - set(LIBMAGIC_SOURCE_FILES ${WIN_COMPAT_SOURCES}) -else() - set(LIBMAGIC_SOURCE_FILES) -endif() - -list(APPEND LIBMAGIC_SOURCE_FILES - ${LIBMAGIC_SOURCE_DIR}/apprentice.c - ${LIBMAGIC_SOURCE_DIR}/apptype.c - ${LIBMAGIC_SOURCE_DIR}/ascmagic.c - ${LIBMAGIC_SOURCE_DIR}/buffer.c - ${LIBMAGIC_SOURCE_DIR}/cdf.c - ${LIBMAGIC_SOURCE_DIR}/cdf_time.c - ${LIBMAGIC_SOURCE_DIR}/compress.c - ${LIBMAGIC_SOURCE_DIR}/der.c - ${LIBMAGIC_SOURCE_DIR}/encoding.c - ${LIBMAGIC_SOURCE_DIR}/fmtcheck.c - ${LIBMAGIC_SOURCE_DIR}/fsmagic.c - ${LIBMAGIC_SOURCE_DIR}/funcs.c - ${LIBMAGIC_SOURCE_DIR}/is_json.c - ${LIBMAGIC_SOURCE_DIR}/is_tar.c - ${LIBMAGIC_SOURCE_DIR}/magic.c - ${LIBMAGIC_SOURCE_DIR}/print.c - ${LIBMAGIC_SOURCE_DIR}/readcdf.c - ${LIBMAGIC_SOURCE_DIR}/readelf.c - ${LIBMAGIC_SOURCE_DIR}/softmagic.c - # ${LIBMAGIC_SOURCE_DIR}/strlcat.c - # ${LIBMAGIC_SOURCE_DIR}/strlcpy.c - ${LIBMAGIC_SOURCE_DIR}/is_csv.c -) -if(NOT APPLE) -list(APPEND LIBMAGIC_SOURCE_FILES - ${LIBMAGIC_SOURCE_DIR}/strlcat.c - ${LIBMAGIC_SOURCE_DIR}/strlcpy.c -) -endif() - -# replace the version in the magic.h.in and write it to magic.h -FILE(READ ${LIBMAGIC_SOURCE_DIR}/magic.h.in MAGIC_H_CONTENT) -STRING(REPLACE "." "" FILE_VERSION_WITHOUT_DOT "${CMAKE_PROJECT_VERSION}") -STRING(REPLACE "X.YY" ${FILE_VERSION_WITHOUT_DOT} MAGIC_H_CONTENT_NEW "${MAGIC_H_CONTENT}") -FILE(WRITE ${LIBMAGIC_SOURCE_DIR}/magic.h "${MAGIC_H_CONTENT_NEW}") - -add_compile_definitions(HAVE_CONFIG_H VERSION="${CMAKE_PROJECT_VERSION}") -if(WIN32) - add_compile_definitions(WIN32_LEAN_AND_MEAN WIN32) -endif() - -add_library(libmagic ${LIBMAGIC_SOURCE_FILES}) -set(targets ${targets} libmagic) - -target_link_libraries(libmagic PRIVATE PCRE2::POSIX) - -target_include_directories(libmagic - PUBLIC - "$" - "$" -) - -# 'file' CLI -add_executable(file ${LIBMAGIC_SOURCE_DIR}/file.c) -set(targets ${targets} file) -target_link_libraries(file PRIVATE PCRE2::POSIX libmagic) -if (WIN32) - if (NOT MINGW) - target_link_libraries(file PRIVATE unofficial::getopt-win32::getopt) - endif() - target_link_libraries(file PRIVATE shlwapi) -endif() - -if(MSVC) - target_include_directories(file PRIVATE getopt) -endif() - -# Following is the compilation of the magic file -file(GLOB MAGIC_FRAGMENTS magic/Magdir/*) - -# Prepare a temporary file to combine the magic fragments: -set(MAGIC_FRAGMENTS_FILE ${CMAKE_CURRENT_BINARY_DIR}/magic) -file(WRITE ${MAGIC_FRAGMENTS_FILE} "") - -# Call the "cat" function for each input file -foreach(MAGIC_FRAGMENT ${MAGIC_FRAGMENTS}) - file(APPEND ${MAGIC_FRAGMENTS_FILE} "############## ${MAGIC_FRAGMENT} ##############\n") - file(READ ${MAGIC_FRAGMENT} MAGIC_FRAGMENT_CONTENTS) - file(APPEND ${MAGIC_FRAGMENTS_FILE} "${MAGIC_FRAGMENT_CONTENTS}\n") - unset(MAGIC_FRAGMENT_CONTENTS) -endforeach() - -add_custom_command(OUTPUT magic.mgc - COMMAND file -C -m magic - COMMENT "Compiling magic file" -) - -add_custom_target(magic_mgc ALL DEPENDS magic.mgc) - -# Include module with function 'configure_package_config_file' -include(CMakePackageConfigHelpers) - -set(PORT_NAME unofficial-libmagic) -set(TARGETS_EXPORT_NAME ${PORT_NAME}-targets) - -install(FILES ${CMAKE_CURRENT_BINARY_DIR}/magic.mgc DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PORT_NAME}) - -install(FILES ${LIBMAGIC_SOURCE_DIR}/magic.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) - -install(TARGETS ${targets} - EXPORT ${TARGETS_EXPORT_NAME} - RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} - LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} - ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} - INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} -) -install(EXPORT ${TARGETS_EXPORT_NAME} - FILE ${TARGETS_EXPORT_NAME}.cmake - DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${PORT_NAME} - NAMESPACE unofficial::libmagic:: - ) -configure_package_config_file( - ${PORT_NAME}-config.cmake.in - ${PORT_NAME}-config.cmake - INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${PORT_NAME}) -write_basic_package_version_file( - ${PORT_NAME}-config-version.cmake - VERSION ${CMAKE_PROJECT_VERSION} - COMPATIBILITY SameMajorVersion) -install(FILES - ${CMAKE_BINARY_DIR}/${PORT_NAME}-config.cmake - ${CMAKE_BINARY_DIR}/${PORT_NAME}-config-version.cmake - DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${PORT_NAME}) diff --git a/ports/libmagic/config.h b/ports/libmagic/config.h deleted file mode 100644 index 3a6e492f7c21..000000000000 --- a/ports/libmagic/config.h +++ /dev/null @@ -1,401 +0,0 @@ -/* Define in built-in ELF support is used */ -#define BUILTIN_ELF 1 - -/* Define for ELF core file support */ -#define ELFCORE 1 - -/* Define to 1 if you have the `asctime_r' function. */ -#undef HAVE_ASCTIME_R - -/* Define to 1 if you have the `asprintf' function. */ -#undef HAVE_ASPRINTF - -/* Define to 1 if you have the `ctime_r' function. */ -#undef HAVE_CTIME_R - -/* HAVE_DAYLIGHT */ -#define HAVE_DAYLIGHT 1 - -/* Define to 1 if you have the declaration of `daylight', and to 0 if you - don't. */ -#undef HAVE_DECL_DAYLIGHT - -/* Define to 1 if you have the declaration of `tzname', and to 0 if you don't. - */ -#undef HAVE_DECL_TZNAME - -/* Define to 1 if you have the header file. */ -#undef HAVE_DLFCN_H - -/* Define to 1 if you have the `dprintf' function. */ -#undef HAVE_DPRINTF - -/* Define to 1 if you have the header file. */ -#undef HAVE_ERR_H - -/* Define to 1 if you have the header file. */ -#ifdef WIN32 -#undef HAVE_DIRENT_H -#else -// TBD: will all non-win32 xplatforms we want have this? -#define HAVE_DIRENT_H 1 -#endif - -/* Define to 1 if you have the header file. */ -#define HAVE_FCNTL_H 1 - -/* Define to 1 if you have the `fmtcheck' function. */ -/*#undef HAVE_FMTCHECK*/ - -/* Define to 1 if you have the `fork' function. */ -#undef HAVE_FORK - -/* Define to 1 if fseeko (and presumably ftello) exists and is declared. */ -#undef HAVE_FSEEKO - -/* Define to 1 if you have the `getline' function. */ -#undef HAVE_GETLINE - -/* Define to 1 if you have the header file. */ -#ifdef _WIN32 -#define HAVE_GETOPT_H 1 -#endif - -/* Define to 1 if you have the `getopt_long' function. */ -#undef HAVE_GETOPT_LONG - -/* Define to 1 if you have the `getpagesize' function. */ -#undef HAVE_GETPAGESIZE - -/* Define to 1 if you have the header file. */ -#define HAVE_INTTYPES_H 1 - -/* Define to 1 if you have the `gnurx' library (-lgnurx). */ -#undef HAVE_LIBGNURX - -/* Define to 1 if you have the `z' library (-lz). */ -/* #undef HAVE_LIBZ */ - -/* Define to 1 if you have the header file. */ -#define HAVE_LIMITS_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_LOCALE_H 1 - -/* Define to 1 if mbrtowc and mbstate_t are properly declared. */ -#define HAVE_MBRTOWC 1 - -/* Define to 1 if declares mbstate_t. */ -#define HAVE_MBSTATE_T 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_MEMORY_H 1 - -/* Define to 1 if you have the `mkostemp' function. */ -#undef HAVE_MKOSTEMP - -/* Define to 1 if you have the `mkstemp' function. */ -#ifdef _WIN32 -#undef HAVE_MKSTEMP -#else -#define HAVE_MKSTEMP 1 -#endif - -/* Define to 1 if you have a working `mmap' system call. */ -#undef HAVE_MMAP - -/* Define to 1 if you have the `pread' function. */ -#undef HAVE_PREAD - -/* Define to 1 if you have the header file. */ -#define HAVE_STDDEF_H 1 - -/* Define to 1 if the system has the type `pid_t'. */ -#undef HAVE_PID_T - -/* Define to 1 if you have the header file. */ -#define HAVE_STDINT_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STDLIB_H 1 - -/* Define to 1 if you have the `strcasestr' function. */ -#if defined(_WIN32) && !defined(__MINGW32__) -#define HAVE_STRCASESTR 1 -#else -#undef HAVE_STRCASESTR -#endif - -/* Define to 1 if you have the `strerror' function. */ -#define HAVE_STRERROR 1 - -/* Define to 1 if you have the header file. */ -#undef HAVE_STRINGS_H - -/* Define to 1 if you have the header file. */ -#define HAVE_STRING_H 1 - -/* Define to 1 if you have the `strlcat' function. */ -#undef HAVE_STRLCAT - -/* Define to 1 if you have the `strlcpy' function. */ -#undef HAVE_STRLCPY - -/* Define to 1 if you have the `strndup' function. */ -#undef HAVE_STRNDUP - -/* Define to 1 if you have the `strtof' function. */ -#undef HAVE_STRTOF - -/* Define to 1 if you have the `strtoul' function. */ -#define HAVE_STRTOUL 1 - -/* HAVE_STRUCT_OPTION */ -#define HAVE_STRUCT_OPTION 1 - -/* Define to 1 if `st_rdev' is a member of `struct stat'. */ -#undef HAVE_STRUCT_STAT_ST_RDEV - -/* Define to 1 if `tm_gmtoff' is a member of `struct tm'. */ -#undef HAVE_STRUCT_TM_TM_GMTOFF - -/* Define to 1 if `tm_zone' is a member of `struct tm'. */ -#undef HAVE_STRUCT_TM_TM_ZONE - -/* Define to 1 if you have the header file. */ -#undef HAVE_SYS_MMAN_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_SYS_PARAM_H - -/* Define to 1 if you have the header file. */ -#define HAVE_SYS_STAT_H 1 - -/* Define to 1 if you have the header file. */ -#undef HAVE_SYS_TIME_H - -/* Define to 1 if you have the header file. */ -#define HAVE_SYS_TYPES_H 1 - -/* Define to 1 if you have the header file. */ -#undef HAVE_SYS_UTIME_H - -/* Define to 1 if you have that is POSIX.1 compatible. */ -#undef HAVE_SYS_WAIT_H - -/* HAVE_TM_ISDST */ -#undef HAVE_TM_ISDST - -/* HAVE_TM_ZONE */ -#undef HAVE_TM_ZONE - -/* HAVE_TZNAME */ -#undef HAVE_TZNAME - -/* Define to 1 if the system has the type `int32_t'. */ -#define HAVE_INT32_T 1 - -/* Define to 1 if the system has the type `int64_t'. */ -#define HAVE_INT64_T 1 - -/* Define to 1 if the system has the type `uint16_t'. */ -#define HAVE_UINT16_T 1 - -/* Define to 1 if the system has the type `uint32_t'. */ -#define HAVE_UINT32_T 1 - -/* Define to 1 if the system has the type `uint64_t'. */ -#define HAVE_UINT64_T 1 - -/* Define to 1 if the system has the type `uint8_t'. */ -#define HAVE_UINT8_T 1 - -/* Define to 1 if you have the header file. */ -/* turns out, v5.39 file/src/buffer.c does -not- subject inclusion to this define */ -#ifndef _WIN32 -#define HAVE_UNISTD_H 1 -#endif - -/* Define to 1 if you have the `utime' function. */ -#undef HAVE_UTIME - -/* Define to 1 if you have the `utimes' function. */ -#undef HAVE_UTIMES - -/* Define to 1 if you have the header file. */ -#undef HAVE_UTIME_H - -/* Define to 1 if you have the `vasprintf' function. */ -#undef HAVE_VASPRINTF - -/* Define to 1 if you have the `vfork' function. */ -#undef HAVE_VFORK - -/* Define to 1 if you have the header file. */ -/* #undef HAVE_VFORK_H */ - -/* Define to 1 or 0, depending whether the compiler supports simple visibility - declarations. */ -#undef HAVE_VISIBILITY - -/* Define to 1 if you have the header file. */ -#define HAVE_WCHAR_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_WCTYPE_H 1 - -/* Define to 1 if you have the `wcwidth' function. */ -#undef HAVE_WCWIDTH - -/* Define to 1 if `fork' works. */ -#undef HAVE_WORKING_FORK - -/* Define to 1 if `vfork' works. */ -#undef HAVE_WORKING_VFORK - -/* Define to 1 if you have the header file. */ -/* #undef HAVE_ZLIB_H */ - -/* Define to the sub-directory in which libtool stores uninstalled libraries. - */ -#undef LT_OBJDIR - -/* Define to 1 if `major', `minor', and `makedev' are declared in . - */ -#undef MAJOR_IN_MKDEV - -/* Define to 1 if `major', `minor', and `makedev' are declared in - . */ -#undef MAJOR_IN_SYSMACROS - -/* Name of package */ -#undef PACKAGE - -/* Define to the address where bug reports for this package should be sent. */ -#undef PACKAGE_BUGREPORT - -/* Define to the full name of this package. */ -#undef PACKAGE_NAME - -/* Define to the full name and version of this package. */ -#undef PACKAGE_STRING - -/* Define to the one symbol short name of this package. */ -#undef PACKAGE_TARNAME - -/* Define to the home page for this package. */ -#undef PACKAGE_URL - -/* Define to the version of this package. */ -#undef PACKAGE_VERSION - -/* The size of `long long', as computed by sizeof. */ -#undef SIZEOF_LONG_LONG - -/* Define to 1 if you have the ANSI C header files. */ -#undef STDC_HEADERS - -/* Define to 1 if your declares `struct tm'. */ -#undef TM_IN_SYS_TIME - -/* Enable extensions on AIX 3, Interix. */ -#ifndef _ALL_SOURCE -# undef _ALL_SOURCE -#endif -/* Enable GNU extensions on systems that have them. */ -#ifndef _GNU_SOURCE -# undef _GNU_SOURCE -#endif -/* Enable threading extensions on Solaris. */ -#ifndef _POSIX_PTHREAD_SEMANTICS -# undef _POSIX_PTHREAD_SEMANTICS -#endif -/* Enable extensions on HP NonStop. */ -#ifndef _TANDEM_SOURCE -# undef _TANDEM_SOURCE -#endif -/* Enable general extensions on Solaris. */ -#ifndef __EXTENSIONS__ -# undef __EXTENSIONS__ -#endif - - -/* Number of bits in a file offset, on hosts where this is settable. */ -#undef _FILE_OFFSET_BITS - -/* Define to 1 to make fseeko visible on some hosts (e.g. glibc 2.2). */ -#undef _LARGEFILE_SOURCE - -/* Define for large files, on AIX-style hosts. */ -#undef _LARGE_FILES - -/* Define to 1 if on MINIX. */ -#undef _MINIX - -/* Define to 2 if the system does not provide POSIX.1 features except with - this defined. */ -#undef _POSIX_1_SOURCE - -/* Define to 1 if you need to in order for `stat' and other things to work. */ -#undef _POSIX_SOURCE - -/* Define to empty if `const' does not conform to ANSI C. */ -//#define const - -/* Define to a type if does not define. */ -//#undef mbstate_t - -/* Define to `long int' if does not define. */ -//#undef off_t - -/* Define to `unsigned int' if does not define. */ -//#undef size_t - - -#ifndef HAVE_UINT8_T -typedef unsigned char uint8_t; -#endif -#ifndef HAVE_UINT16_T -typedef unsigned short uint16_t; -#endif -#ifndef HAVE_UINT32_T -typedef unsigned int uint32_t; -#endif -#ifndef HAVE_INT32_T -typedef int int32_t; -#endif -#ifndef HAVE_UINT64_T -#if SIZEOF_LONG_LONG == 8 -typedef unsigned long long uint64_t; -#else -typedef unsigned long uint64_t; -#endif -#endif -#ifndef HAVE_INT64_T -#if SIZEOF_LONG_LONG == 8 -typedef long long int64_t; -#else -typedef long int64_t; -#endif -#endif - -#ifndef _SSIZE_T_DEFINED -#ifdef _WIN32 -#if defined(__MINGW32__) && !defined(__MINGW64__) -typedef int ssize_t; -#else -#include -typedef int64_t ssize_t; -#endif -#endif -#define _SSIZE_T_DEFINED -#endif - -#ifdef _WIN32 -#include - -#include -#endif - -/* Define as `fork' if `vfork' does not work. */ -/* #undef vfork */ diff --git a/ports/libmagic/magic.def b/ports/libmagic/magic.def deleted file mode 100644 index f286a62ae487..000000000000 --- a/ports/libmagic/magic.def +++ /dev/null @@ -1,20 +0,0 @@ -LIBRARY libmagic -EXPORTS - magic_open - magic_close - magic_getpath - magic_file - magic_descriptor - magic_buffer - magic_error - magic_setflags - magic_version - magic_load - magic_load_buffers - magic_compile - magic_check - magic_list - magic_errno - magic_setparam - magic_getparam - getline diff --git a/ports/libmagic/portfile.cmake b/ports/libmagic/portfile.cmake index c40c3e2bac64..34ecdb150d1e 100644 --- a/ports/libmagic/portfile.cmake +++ b/ports/libmagic/portfile.cmake @@ -1,53 +1,62 @@ -set(PATCHES - "0001-Use-pcre2.patch" -) - if(VCPKG_TARGET_IS_WINDOWS) set(PATCHES - ${PATCHES} + "0001-Use-libtre.patch" + "0002-Change-zlib-lib-name-to-match-CMake-output.patch" "0003-Fix-WIN32-macro-checks.patch" "0004-Typedef-POSIX-types-on-Windows.patch" "0005-Include-dirent.h-for-S_ISREG-and-S_ISDIR.patch" "0006-Remove-Wrap-POSIX-headers.patch" "0007-Substitute-unistd-macros-for-MSVC.patch" "0008-Add-FILENO-defines.patch" - "0009-No-fcntl-in-magic.c.patch" "0010-Properly-check-for-the-presence-of-bitmasks.patch" "0011-Remove-pipe-related-functions-in-funcs.c.patch" - "0014-Define-POSIX-macros-if-missing.patch" + "0012-Convert-MSYS2-paths-to-Windows-paths.patch" + "0013-Check-for-backslash-in-argv-0-on-Windows.patch" "0015-MSYS2-Remove-ioctl-call.patch" + "0016-Fix-file_famagic-function.patch" ) endif() vcpkg_from_github( OUT_SOURCE_PATH SOURCE_PATH REPO file/file - REF FILE5_40 - SHA512 d76bfe5326e1b40368e055c2e049a24b4ffdbd727371f4f3aa1dd3f53787d16b88550b3cc71ecf02151e2fb3e567eb2598e4707badab8c391eb71113c2dcc319 + REF FILE5_45 + SHA512 fdd4c5d13d5ea1d25686c76d8ebc3252c54040c4871e3f0f623c4548b3841795d4e36050292a9453eedf0fbf932573890e9d6ac9fa63ccf577215598ae84b9ea HEAD_REF master PATCHES ${PATCHES} ) -file(COPY "${CMAKE_CURRENT_LIST_DIR}/CMakeLists.txt" DESTINATION "${SOURCE_PATH}") -file(COPY "${CMAKE_CURRENT_LIST_DIR}/unofficial-libmagic-config.cmake.in" DESTINATION "${SOURCE_PATH}") -file(COPY "${CMAKE_CURRENT_LIST_DIR}/magic.def" DESTINATION "${SOURCE_PATH}/src") -file(COPY "${CMAKE_CURRENT_LIST_DIR}/config.h" DESTINATION "${SOURCE_PATH}/src") +if(VCPKG_TARGET_IS_WINDOWS) + set(VCPKG_C_FLAGS "${VCPKG_C_FLAGS} -D_CRT_SECURE_NO_WARNINGS -D_CRT_NONSTDC_NO_WARNINGS") + set(VCPKG_CXX_FLAGS "${VCPKG_CXX_FLAGS} -D_CRT_SECURE_NO_WARNINGS -D_CRT_NONSTDC_NO_WARNINGS") +endif() -vcpkg_cmake_configure( - SOURCE_PATH ${SOURCE_PATH} -) +set(CONFIG_OPTIONS) -vcpkg_cmake_install() -vcpkg_copy_pdbs() -vcpkg_fixup_pkgconfig() -vcpkg_copy_tools(TOOL_NAMES file AUTO_CLEAN) -vcpkg_cmake_config_fixup( - CONFIG_PATH lib/cmake/unofficial-libmagic - PACKAGE_NAME unofficial-libmagic) +if(NOT "zlib" IN_LIST FEATURES) + list(APPEND CONFIG_OPTIONS "--disable-zlib") +endif() -file(REMOVE_RECURSE "${CURRENT_PACKAGES_DIR}/debug/include") -file(REMOVE_RECURSE "${CURRENT_PACKAGES_DIR}/debug/share") -file(REMOVE_RECURSE "${CURRENT_PACKAGES_DIR}/share/${PORT}/man5") +if(NOT "bzip2" IN_LIST FEATURES) + list(APPEND CONFIG_OPTIONS "--disable-bzlib") +endif() + +if(NOT "lzma" IN_LIST FEATURES) + list(APPEND CONFIG_OPTIONS "--disable-xzlib") +endif() + +if(NOT "zstd" IN_LIST FEATURES) + list(APPEND CONFIG_OPTIONS "--disable-zstdlib") +endif() + +vcpkg_configure_make( + AUTOCONFIG + SOURCE_PATH "${SOURCE_PATH}" + OPTIONS + ${CONFIG_OPTIONS} + "--disable-lzlib" + "--disable-libseccomp" +) if(VCPKG_CROSSCOMPILING) vcpkg_add_to_path(PREPEND "${CURRENT_HOST_INSTALLED_DIR}/tools/libmagic/bin") @@ -55,23 +64,37 @@ elseif(VCPKG_TARGET_IS_WINDOWS AND VCPKG_LIBRARY_LINKAGE STREQUAL dynamic) set(EXTRA_ARGS "ADD_BIN_TO_PATH") endif() +vcpkg_install_make(${EXTRA_ARGS}) +vcpkg_copy_tool_dependencies("${CURRENT_PACKAGES_DIR}/tools/${PORT}/bin") +vcpkg_copy_tool_dependencies("${CURRENT_PACKAGES_DIR}/tools/${PORT}/debug/bin") +vcpkg_fixup_pkgconfig() + if(VCPKG_LIBRARY_LINKAGE STREQUAL static) file(REMOVE_RECURSE "${CURRENT_PACKAGES_DIR}/bin" "${CURRENT_PACKAGES_DIR}/debug/bin") endif() -set(UNOFFICIAL_PORT unofficial-${PORT}) - if(VCPKG_TARGET_IS_WINDOWS) if(NOT VCPKG_BUILD_TYPE OR VCPKG_BUILD_TYPE STREQUAL "release") - file(MAKE_DIRECTORY "${CURRENT_PACKAGES_DIR}/tools/${UNOFFICIAL_PORT}/share/misc") - file(COPY "${CURRENT_PACKAGES_DIR}/share/${UNOFFICIAL_PORT}/magic.mgc" DESTINATION "${CURRENT_PACKAGES_DIR}/tools/${UNOFFICIAL_PORT}/share/misc") + file(MAKE_DIRECTORY "${CURRENT_PACKAGES_DIR}/tools/${PORT}/share/misc") + file(COPY "${CURRENT_PACKAGES_DIR}/share/${PORT}/misc/magic.mgc" DESTINATION "${CURRENT_PACKAGES_DIR}/tools/${PORT}/share/misc") endif() if(NOT VCPKG_BUILD_TYPE OR VCPKG_BUILD_TYPE STREQUAL "debug") - file(MAKE_DIRECTORY "${CURRENT_PACKAGES_DIR}/tools/${UNOFFICIAL_PORT}/debug/share/misc") - file(COPY "${CURRENT_PACKAGES_DIR}/share/${UNOFFICIAL_PORT}/magic.mgc" DESTINATION "${CURRENT_PACKAGES_DIR}/tools/${UNOFFICIAL_PORT}/debug/share/misc") + file(MAKE_DIRECTORY "${CURRENT_PACKAGES_DIR}/tools/${PORT}/debug/share/misc") + file(COPY "${CURRENT_PACKAGES_DIR}/share/${PORT}/misc/magic.mgc" DESTINATION "${CURRENT_PACKAGES_DIR}/tools/${PORT}/debug/share/misc") endif() endif() +file(REMOVE_RECURSE "${CURRENT_PACKAGES_DIR}/debug/include") +file(REMOVE_RECURSE "${CURRENT_PACKAGES_DIR}/debug/share") +file(REMOVE_RECURSE "${CURRENT_PACKAGES_DIR}/share/${PORT}/man5") + +include(CMakePackageConfigHelpers) +configure_package_config_file( + "${CMAKE_CURRENT_LIST_DIR}/unofficial-${PORT}-config.cmake.in" + "${CURRENT_PACKAGES_DIR}/share/unofficial-${PORT}/unofficial-${PORT}-config.cmake" + INSTALL_DESTINATION "share/unofficial-${PORT}" +) + # Handle copyright and usage vcpkg_install_copyright(FILE_LIST "${SOURCE_PATH}/COPYING") file(INSTALL "${CMAKE_CURRENT_LIST_DIR}/usage" DESTINATION "${CURRENT_PACKAGES_DIR}/share/${PORT}") diff --git a/ports/libmagic/unofficial-libmagic-config.cmake.in b/ports/libmagic/unofficial-libmagic-config.cmake.in index d3671f89b479..5ff3b0380aa7 100644 --- a/ports/libmagic/unofficial-libmagic-config.cmake.in +++ b/ports/libmagic/unofficial-libmagic-config.cmake.in @@ -1,12 +1,69 @@ @PACKAGE_INIT@ -include(CMakeFindDependencyMacro) -find_dependency(PCRE2 COMPONENTS 8BIT POSIX) +if(WIN32 AND "@VCPKG_LIBRARY_LINKAGE@" STREQUAL "static") + include(CMakeFindDependencyMacro) + find_dependency(unofficial-tre) +endif() -include("${CMAKE_CURRENT_LIST_DIR}/@TARGETS_EXPORT_NAME@.cmake") +# Compute the installation prefix relative to this file. +get_filename_component(_IMPORT_PREFIX "${CMAKE_CURRENT_LIST_FILE}" PATH) +get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH) +get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH) +if(_IMPORT_PREFIX STREQUAL "/") + set(_IMPORT_PREFIX "") +endif() -set(unofficial-libmagic_VERSION @CMAKE_PROJECT_VERSION@) -set(unofficial-libmagic_FOUND true) -set(unofficial-libmagic_DICTIONARY "${CMAKE_CURRENT_LIST_DIR}/magic.mgc") +if("@VCPKG_LIBRARY_LINKAGE@" STREQUAL "static") + add_library(unofficial::libmagic::libmagic STATIC IMPORTED) +else() + add_library(unofficial::libmagic::libmagic SHARED IMPORTED) +endif() -check_required_components("unofficial-libmagic") +set_target_properties(unofficial::libmagic::libmagic PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include" +) + +if(WIN32 AND "@VCPKG_LIBRARY_LINKAGE@" STREQUAL "static") + set_target_properties(unofficial::libmagic::libmagic PROPERTIES + INTERFACE_LINK_LIBRARIES "\$" + ) +endif() + +macro(add_library_config config prefix) + set_property(TARGET unofficial::libmagic::libmagic APPEND PROPERTY IMPORTED_CONFIGURATIONS ${config}) + if("@VCPKG_LIBRARY_LINKAGE@" STREQUAL "static") + set_target_properties(unofficial::libmagic::libmagic PROPERTIES + IMPORTED_LOCATION_${config} "${_IMPORT_PREFIX}/${prefix}lib/@VCPKG_TARGET_STATIC_LIBRARY_PREFIX@magic@VCPKG_TARGET_STATIC_LIBRARY_SUFFIX@" + IMPORTED_LINK_INTERFACE_LANGUAGES_${config} "C" + ) + else() + if(WIN32) + set(library_dir "${_IMPORT_PREFIX}/${prefix}bin/") + set(soversion_suffix "-1") + set_target_properties(unofficial::libmagic::libmagic PROPERTIES + IMPORTED_IMPLIB_${config} "${_IMPORT_PREFIX}/${prefix}/lib/@VCPKG_TARGET_IMPORT_LIBRARY_PREFIX@magic@VCPKG_TARGET_IMPORT_LIBRARY_SUFFIX@" + ) + else() + set(library_dir "${_IMPORT_PREFIX}/${prefix}lib/") + endif() + set_target_properties(unofficial::libmagic::libmagic PROPERTIES + IMPORTED_LOCATION_${config} "${library_dir}@VCPKG_TARGET_SHARED_LIBRARY_PREFIX@magic${soversion_suffix}@VCPKG_TARGET_SHARED_LIBRARY_SUFFIX@" + ) + unset(soversion_suffix) + unset(library_dir) + endif() +endmacro() + +if("@VCPKG_BUILD_TYPE@" STREQUAL "" OR "@VCPKG_BUILD_TYPE@" STREQUAL "debug") + add_library_config(DEBUG "debug/") +endif() + +if("@VCPKG_BUILD_TYPE@" STREQUAL "" OR "@VCPKG_BUILD_TYPE@" STREQUAL "release") + add_library_config(RELEASE "") +endif() + +set_and_check(unofficial-libmagic_DICTIONARY "${_IMPORT_PREFIX}/share/libmagic/misc/magic.mgc") + +unset(_IMPORT_PREFIX) + +check_required_components(unofficial-libmagic) diff --git a/ports/libmagic/vcpkg.json b/ports/libmagic/vcpkg.json index 63944114b565..e809501e900c 100644 --- a/ports/libmagic/vcpkg.json +++ b/ports/libmagic/vcpkg.json @@ -1,26 +1,52 @@ { "name": "libmagic", - "version-string": "5.40", - "port-version": 1, + "version": "5.45", + "port-version": 2, "description": "This library can be used to classify files according to magic number tests.", "homepage": "https://github.com/file/file", + "license": "BSD-2-Clause", "dependencies": [ { - "name": "vcpkg-cmake", - "host": true + "name": "dirent", + "platform": "windows" }, { - "name": "vcpkg-cmake-config", - "host": true + "name": "getopt", + "platform": "windows" }, { - "name": "dirent", - "platform": "windows" + "name": "libmagic", + "host": true }, { - "name": "getopt", - "platform": "windows & !mingw" + "name": "tre", + "platform": "windows | mingw" + } + ], + "features": { + "zlib": { + "description": "Enable zlib support", + "dependencies": [ + "zlib" + ] + }, + "bzlib": { + "description": "Enable Bzip2 support", + "dependencies": [ + "bzip2" + ] + }, + "xzlib": { + "description": "Enable liblzma/xz support", + "dependencies": [ + "liblzma" + ] }, - "pcre2" - ] + "zstdlib": { + "description": "Enable zstdlib support", + "dependencies": [ + "zstd" + ] + } + } } diff --git a/ports/pcre2/fix-cmake.patch b/ports/pcre2/fix-cmake.patch deleted file mode 100644 index 93d2f7196957..000000000000 --- a/ports/pcre2/fix-cmake.patch +++ /dev/null @@ -1,334 +0,0 @@ -diff --git a/CMakeLists.txt b/CMakeLists.txt -index cec7dfb..84d1769 100644 ---- a/CMakeLists.txt -+++ b/CMakeLists.txt -@@ -100,6 +100,9 @@ - # 2021-08-28 PH increased minimum version - # 2021-08-28 PH added test for realpath() - # 2022-12-10 PH added support for pcre2posix_test -+# 2023-01-15 Carlo added C99 as the minimum required -+# 2023-06-03 Theodore used standard CMake constructs to export the library's targets. -+# 2023-08-06 PH added support for setting variable length lookbehind maximum - - # Increased minimum to 2.8.5 to support GNUInstallDirs. - # Increased minimum to 3.1 to support imported targets. -@@ -136,6 +139,7 @@ INCLUDE(CheckFunctionExists) - INCLUDE(CheckSymbolExists) - INCLUDE(CheckIncludeFile) - INCLUDE(CheckTypeSize) -+INCLUDE(CMakePackageConfigHelpers) - INCLUDE(GNUInstallDirs) # for CMAKE_INSTALL_LIBDIR - - CHECK_INCLUDE_FILE(dirent.h HAVE_DIRENT_H) -@@ -705,7 +709,9 @@ IF(PCRE2_BUILD_PCRE2_8) - VERSION ${LIBPCRE2_8_VERSION} - SOVERSION ${LIBPCRE2_8_SOVERSION}) - TARGET_COMPILE_DEFINITIONS(pcre2-8-static PUBLIC PCRE2_STATIC) -- TARGET_INCLUDE_DIRECTORIES(pcre2-8-static PUBLIC ${PROJECT_BINARY_DIR}) -+ TARGET_INCLUDE_DIRECTORIES(pcre2-8-static PUBLIC -+ $ -+ $) - IF(REQUIRE_PTHREAD) - TARGET_LINK_LIBRARIES(pcre2-8-static Threads::Threads) - ENDIF(REQUIRE_PTHREAD) -@@ -718,8 +724,9 @@ IF(PCRE2_BUILD_PCRE2_8) - VERSION ${LIBPCRE2_POSIX_VERSION} - SOVERSION ${LIBPCRE2_POSIX_SOVERSION}) - TARGET_LINK_LIBRARIES(pcre2-posix-static pcre2-8-static) -- TARGET_COMPILE_DEFINITIONS(pcre2-posix-static PUBLIC PCRE2_STATIC) -- TARGET_INCLUDE_DIRECTORIES(pcre2-posix-static PUBLIC ${PROJECT_BINARY_DIR}) -+ TARGET_INCLUDE_DIRECTORIES(pcre2-posix-static PUBLIC -+ $ -+ $) - SET(targets ${targets} pcre2-posix-static) - - IF(MSVC) -@@ -736,7 +743,9 @@ IF(PCRE2_BUILD_PCRE2_8) - - IF(BUILD_SHARED_LIBS) - ADD_LIBRARY(pcre2-8-shared SHARED ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h) -- TARGET_INCLUDE_DIRECTORIES(pcre2-8-shared PUBLIC ${PROJECT_BINARY_DIR}) -+ TARGET_INCLUDE_DIRECTORIES(pcre2-8-shared PUBLIC -+ $ -+ $) - SET_TARGET_PROPERTIES(pcre2-8-shared PROPERTIES - COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8 - MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_8_MACHO_COMPATIBILITY_VERSION}" -@@ -749,7 +758,9 @@ IF(PCRE2_BUILD_PCRE2_8) - ENDIF(REQUIRE_PTHREAD) - SET(targets ${targets} pcre2-8-shared) - ADD_LIBRARY(pcre2-posix-shared SHARED ${PCRE2POSIX_HEADERS} ${PCRE2POSIX_SOURCES}) -- TARGET_INCLUDE_DIRECTORIES(pcre2-posix-shared PUBLIC ${PROJECT_BINARY_DIR}) -+ TARGET_INCLUDE_DIRECTORIES(pcre2-posix-shared PUBLIC -+ $ -+ $) - SET_TARGET_PROPERTIES(pcre2-posix-shared PROPERTIES - COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8 - MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_POSIX_MACHO_COMPATIBILITY_VERSION}" -@@ -786,7 +797,9 @@ ENDIF(PCRE2_BUILD_PCRE2_8) - IF(PCRE2_BUILD_PCRE2_16) - IF(BUILD_STATIC_LIBS) - ADD_LIBRARY(pcre2-16-static STATIC ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h) -- TARGET_INCLUDE_DIRECTORIES(pcre2-16-static PUBLIC ${PROJECT_BINARY_DIR}) -+ TARGET_INCLUDE_DIRECTORIES(pcre2-16-static PUBLIC -+ $ -+ $) - SET_TARGET_PROPERTIES(pcre2-16-static PROPERTIES - COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=16 - MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_32_MACHO_COMPATIBILITY_VERSION}" -@@ -811,7 +824,9 @@ IF(PCRE2_BUILD_PCRE2_16) - - IF(BUILD_SHARED_LIBS) - ADD_LIBRARY(pcre2-16-shared SHARED ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h) -- TARGET_INCLUDE_DIRECTORIES(pcre2-16-shared PUBLIC ${PROJECT_BINARY_DIR}) -+ TARGET_INCLUDE_DIRECTORIES(pcre2-16-shared PUBLIC -+ $ -+ $) - SET_TARGET_PROPERTIES(pcre2-16-shared PROPERTIES - COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=16 - MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_32_MACHO_COMPATIBILITY_VERSION}" -@@ -848,7 +863,9 @@ ENDIF(PCRE2_BUILD_PCRE2_16) - IF(PCRE2_BUILD_PCRE2_32) - IF(BUILD_STATIC_LIBS) - ADD_LIBRARY(pcre2-32-static STATIC ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h) -- TARGET_INCLUDE_DIRECTORIES(pcre2-32-static PUBLIC ${PROJECT_BINARY_DIR}) -+ TARGET_INCLUDE_DIRECTORIES(pcre2-32-static PUBLIC -+ $ -+ $) - SET_TARGET_PROPERTIES(pcre2-32-static PROPERTIES - COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=32 - MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_32_MACHO_COMPATIBILITY_VERSION}" -@@ -873,7 +890,9 @@ IF(PCRE2_BUILD_PCRE2_32) - - IF(BUILD_SHARED_LIBS) - ADD_LIBRARY(pcre2-32-shared SHARED ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h) -- TARGET_INCLUDE_DIRECTORIES(pcre2-32-shared PUBLIC ${PROJECT_BINARY_DIR}) -+ TARGET_INCLUDE_DIRECTORIES(pcre2-32-shared PUBLIC -+ $ -+ $) - SET_TARGET_PROPERTIES(pcre2-32-shared PROPERTIES - COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=32 - MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_32_MACHO_COMPATIBILITY_VERSION}" -@@ -1075,9 +1094,13 @@ ENDIF(PCRE2_BUILD_TESTS) - SET(CMAKE_INSTALL_ALWAYS 1) - - INSTALL(TARGETS ${targets} -- RUNTIME DESTINATION bin -+ EXPORT pcre2-targets -+ RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} - LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} - ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) -+INSTALL(EXPORT pcre2-targets -+ DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/pcre2 -+ NAMESPACE pcre2::) - INSTALL(FILES ${pkg_config_files} DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig) - INSTALL(FILES "${CMAKE_CURRENT_BINARY_DIR}/pcre2-config" - DESTINATION bin -@@ -1089,11 +1112,12 @@ INSTALL(FILES ${PCRE2_HEADERS} ${PCRE2POSIX_HEADERS} DESTINATION include) - # CMake config files. - set(PCRE2_CONFIG_IN ${CMAKE_CURRENT_SOURCE_DIR}/cmake/pcre2-config.cmake.in) - set(PCRE2_CONFIG_OUT ${CMAKE_CURRENT_BINARY_DIR}/cmake/pcre2-config.cmake) --configure_file(${PCRE2_CONFIG_IN} ${PCRE2_CONFIG_OUT} @ONLY) --set(PCRE2_CONFIG_VERSION_IN ${CMAKE_CURRENT_SOURCE_DIR}/cmake/pcre2-config-version.cmake.in) -+configure_package_config_file(${PCRE2_CONFIG_IN} ${PCRE2_CONFIG_OUT} INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/pcre2) - set(PCRE2_CONFIG_VERSION_OUT ${CMAKE_CURRENT_BINARY_DIR}/cmake/pcre2-config-version.cmake) --configure_file(${PCRE2_CONFIG_VERSION_IN} ${PCRE2_CONFIG_VERSION_OUT} @ONLY) --install(FILES ${PCRE2_CONFIG_OUT} ${PCRE2_CONFIG_VERSION_OUT} DESTINATION cmake) -+write_basic_package_version_file(${PCRE2_CONFIG_VERSION_OUT} -+ VERSION ${PCRE2_MAJOR}.${PCRE2_MINOR}.0 -+ COMPATIBILITY SameMajorVersion) -+install(FILES ${PCRE2_CONFIG_OUT} ${PCRE2_CONFIG_VERSION_OUT} DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/pcre2) - - FILE(GLOB html ${PROJECT_SOURCE_DIR}/doc/html/*.html) - FILE(GLOB man1 ${PROJECT_SOURCE_DIR}/doc/*.1) -diff --git a/cmake/pcre2-config-version.cmake.in b/cmake/pcre2-config-version.cmake.in -deleted file mode 100644 -index dac149e..0000000 ---- a/cmake/pcre2-config-version.cmake.in -+++ /dev/null -@@ -1,15 +0,0 @@ --set(PACKAGE_VERSION_MAJOR @PCRE2_MAJOR@) --set(PACKAGE_VERSION_MINOR @PCRE2_MINOR@) --set(PACKAGE_VERSION_PATCH 0) --set(PACKAGE_VERSION @PCRE2_MAJOR@.@PCRE2_MINOR@.0) -- --# Check whether the requested PACKAGE_FIND_VERSION is compatible --if(PACKAGE_VERSION VERSION_LESS PACKAGE_FIND_VERSION OR -- PACKAGE_VERSION_MAJOR GREATER PACKAGE_FIND_VERSION_MAJOR) -- set(PACKAGE_VERSION_COMPATIBLE FALSE) --else() -- set(PACKAGE_VERSION_COMPATIBLE TRUE) -- if(PACKAGE_VERSION VERSION_EQUAL PACKAGE_FIND_VERSION) -- set(PACKAGE_VERSION_EXACT TRUE) -- endif() --endif() -diff --git a/cmake/pcre2-config.cmake.in b/cmake/pcre2-config.cmake.in -index b313d6d..159669b 100644 ---- a/cmake/pcre2-config.cmake.in -+++ b/cmake/pcre2-config.cmake.in -@@ -5,11 +5,17 @@ - # - # Static vs. shared - # ----------------- --# To make use of the static library instead of the shared one, one needs -+# To force using the static library instead of the shared one, one needs - # to set the variable PCRE2_USE_STATIC_LIBS to ON before calling find_package. -+# If the variable is not set, the static library will be used if only that has -+# been built, otherwise the shared library will be used. -+# -+# The following components are supported: 8BIT, 16BIT, 32BIT and POSIX. -+# They used to be required but not anymore; all available targets will -+# be defined regardless of the requested components. - # Example: - # set(PCRE2_USE_STATIC_LIBS ON) --# find_package(PCRE2 CONFIG COMPONENTS 8BIT) -+# find_package(PCRE2 CONFIG) - # - # This will define the following variables: - # -@@ -23,70 +29,42 @@ - # PCRE2::32BIT - The 32 bit PCRE2 library. - # PCRE2::POSIX - The POSIX PCRE2 library. - --set(PCRE2_NON_STANDARD_LIB_PREFIX @NON_STANDARD_LIB_PREFIX@) --set(PCRE2_NON_STANDARD_LIB_SUFFIX @NON_STANDARD_LIB_SUFFIX@) --set(PCRE2_8BIT_NAME pcre2-8) --set(PCRE2_16BIT_NAME pcre2-16) --set(PCRE2_32BIT_NAME pcre2-32) --set(PCRE2_POSIX_NAME pcre2-posix) --find_path(PCRE2_INCLUDE_DIR NAMES pcre2.h DOC "PCRE2 include directory") --if (PCRE2_USE_STATIC_LIBS) -- if (MSVC) -- set(PCRE2_8BIT_NAME pcre2-8-static) -- set(PCRE2_16BIT_NAME pcre2-16-static) -- set(PCRE2_32BIT_NAME pcre2-32-static) -- set(PCRE2_POSIX_NAME pcre2-posix-static) -- endif () -+@PACKAGE_INIT@ - -- set(PCRE2_PREFIX ${CMAKE_STATIC_LIBRARY_PREFIX}) -- set(PCRE2_SUFFIX ${CMAKE_STATIC_LIBRARY_SUFFIX}) --else () -- set(PCRE2_PREFIX ${CMAKE_SHARED_LIBRARY_PREFIX}) -- if (MINGW AND PCRE2_NON_STANDARD_LIB_PREFIX) -- set(PCRE2_PREFIX "") -- endif () -+include(CMakeFindDependencyMacro) -+if("@REQUIRE_PTHREAD@") # REQUIRE_PTHREAD -+ find_dependency(Threads) -+endif() - -- set(PCRE2_SUFFIX ${CMAKE_SHARED_LIBRARY_SUFFIX}) -- if (MINGW AND PCRE2_NON_STANDARD_LIB_SUFFIX) -- set(PCRE2_SUFFIX "-0.dll") -- endif () --endif () --find_library(PCRE2_8BIT_LIBRARY NAMES ${PCRE2_PREFIX}${PCRE2_8BIT_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_8BIT_NAME}d${PCRE2_SUFFIX} DOC "8 bit PCRE2 library") --find_library(PCRE2_16BIT_LIBRARY NAMES ${PCRE2_PREFIX}${PCRE2_16BIT_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_8BIT_NAME}d${PCRE2_SUFFIX} DOC "16 bit PCRE2 library") --find_library(PCRE2_32BIT_LIBRARY NAMES ${PCRE2_PREFIX}${PCRE2_32BIT_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_8BIT_NAME}d${PCRE2_SUFFIX} DOC "32 bit PCRE2 library") --find_library(PCRE2_POSIX_LIBRARY NAMES ${PCRE2_PREFIX}${PCRE2_POSIX_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_8BIT_NAME}d${PCRE2_SUFFIX} DOC "8 bit POSIX PCRE2 library") --unset(PCRE2_NON_STANDARD_LIB_PREFIX) --unset(PCRE2_NON_STANDARD_LIB_SUFFIX) --unset(PCRE2_8BIT_NAME) --unset(PCRE2_16BIT_NAME) --unset(PCRE2_32BIT_NAME) --unset(PCRE2_POSIX_NAME) -+include("${CMAKE_CURRENT_LIST_DIR}/pcre2-targets.cmake") - - # Set version --if (PCRE2_INCLUDE_DIR) -- set(PCRE2_VERSION "@PCRE2_MAJOR@.@PCRE2_MINOR@.0") --endif () -+set(PCRE2_VERSION "@PCRE2_MAJOR@.@PCRE2_MINOR@.0") - --# Which components have been found. --if (PCRE2_8BIT_LIBRARY) -- set(PCRE2_8BIT_FOUND TRUE) --endif () --if (PCRE2_16BIT_LIBRARY) -- set(PCRE2_16BIT_FOUND TRUE) --endif () --if (PCRE2_32BIT_LIBRARY) -- set(PCRE2_32BIT_FOUND TRUE) --endif () --if (PCRE2_POSIX_LIBRARY) -- set(PCRE2_POSIX_FOUND TRUE) --endif () -- --# Check if at least one component has been specified. --list(LENGTH PCRE2_FIND_COMPONENTS PCRE2_NCOMPONENTS) --if (PCRE2_NCOMPONENTS LESS 1) -- message(FATAL_ERROR "No components have been specified. This is not allowed. Please, specify at least one component.") --endif () --unset(PCRE2_NCOMPONENTS) -+# Chooses the linkage of the library to expose in the -+# unsuffixed edition of the target. -+macro(_pcre2_add_component_target component target) -+ # If the static library exists and either PCRE2_USE_STATIC_LIBS -+ # is defined, or the dynamic library does not exist, use the static library. -+ if(NOT TARGET PCRE2::${component}) -+ if(TARGET pcre2::pcre2-${target}-static AND (PCRE2_USE_STATIC_LIBS OR NOT TARGET pcre2::pcre2-${target}-shared)) -+ add_library(PCRE2::${component} ALIAS pcre2::pcre2-${target}-static) -+ set(PCRE2_${component}_FOUND TRUE) -+ # Otherwise use the dynamic library if it exists. -+ elseif(TARGET pcre2::pcre2-${target}-shared AND NOT PCRE2_USE_STATIC_LIBS) -+ add_library(PCRE2::${component} ALIAS pcre2::pcre2-${target}-shared) -+ set(PCRE2_${component}_FOUND TRUE) -+ endif() -+ if(PCRE2_${component}_FOUND) -+ get_target_property(PCRE2_${component}_LIBRARY PCRE2::${component} IMPORTED_LOCATION) -+ set(PCRE2_LIBRARIES ${PCRE2_LIBRARIES} ${PCRE2_${component}_LIBRARY}) -+ endif() -+ endif() -+endmacro() -+_pcre2_add_component_target(8BIT 8) -+_pcre2_add_component_target(16BIT 16) -+_pcre2_add_component_target(32BIT 32) -+_pcre2_add_component_target(POSIX posix) - - # When POSIX component has been specified make sure that also 8BIT component is specified. - set(PCRE2_8BIT_COMPONENT FALSE) -@@ -105,41 +83,5 @@ endif() - unset(PCRE2_8BIT_COMPONENT) - unset(PCRE2_POSIX_COMPONENT) - --include(FindPackageHandleStandardArgs) --set(${CMAKE_FIND_PACKAGE_NAME}_CONFIG "${CMAKE_CURRENT_LIST_FILE}") --find_package_handle_standard_args(PCRE2 -- FOUND_VAR PCRE2_FOUND -- REQUIRED_VARS PCRE2_INCLUDE_DIR -- HANDLE_COMPONENTS -- VERSION_VAR PCRE2_VERSION -- CONFIG_MODE --) -- --set(PCRE2_LIBRARIES) --if (PCRE2_FOUND) -- foreach(component ${PCRE2_FIND_COMPONENTS}) -- if (PCRE2_USE_STATIC_LIBS) -- add_library(PCRE2::${component} STATIC IMPORTED) -- target_compile_definitions(PCRE2::${component} INTERFACE PCRE2_STATIC) -- else () -- add_library(PCRE2::${component} SHARED IMPORTED) -- endif () -- set_target_properties(PCRE2::${component} PROPERTIES -- IMPORTED_LOCATION "${PCRE2_${component}_LIBRARY}" -- INTERFACE_INCLUDE_DIRECTORIES "${PCRE2_INCLUDE_DIR}" -- ) -- if (component STREQUAL "POSIX") -- set_target_properties(PCRE2::${component} PROPERTIES -- INTERFACE_LINK_LIBRARIES "PCRE2::8BIT" -- LINK_LIBRARIES "PCRE2::8BIT" -- ) -- endif () -- -- set(PCRE2_LIBRARIES ${PCRE2_LIBRARIES} ${PCRE2_${component}_LIBRARY}) -- mark_as_advanced(PCRE2_${component}_LIBRARY) -- endforeach() --endif () -- --mark_as_advanced( -- PCRE2_INCLUDE_DIR --) -+# Check for required components. -+check_required_components("PCRE2") diff --git a/ports/pcre2/no-static-suffix.patch b/ports/pcre2/no-static-suffix.patch deleted file mode 100644 index 7f41bcd566cf..000000000000 --- a/ports/pcre2/no-static-suffix.patch +++ /dev/null @@ -1,33 +0,0 @@ -diff --git a/CMakeLists.txt b/CMakeLists.txt -index fa2181e..3bf5317 100644 ---- a/CMakeLists.txt -+++ b/CMakeLists.txt -@@ -711,8 +711,8 @@ IF(PCRE2_BUILD_PCRE2_8) - SET(targets ${targets} pcre2-posix-static) - - IF(MSVC) -- SET_TARGET_PROPERTIES(pcre2-8-static PROPERTIES OUTPUT_NAME pcre2-8-static) -- SET_TARGET_PROPERTIES(pcre2-posix-static PROPERTIES OUTPUT_NAME pcre2-posix-static) -+ SET_TARGET_PROPERTIES(pcre2-8-static PROPERTIES OUTPUT_NAME pcre2-8) -+ SET_TARGET_PROPERTIES(pcre2-posix-static PROPERTIES OUTPUT_NAME pcre2-posix) - ELSE(MSVC) - SET_TARGET_PROPERTIES(pcre2-8-static PROPERTIES OUTPUT_NAME pcre2-8) - SET_TARGET_PROPERTIES(pcre2-posix-static PROPERTIES OUTPUT_NAME pcre2-posix) -@@ -777,7 +777,7 @@ IF(PCRE2_BUILD_PCRE2_16) - SET(targets ${targets} pcre2-16-static) - - IF(MSVC) -- SET_TARGET_PROPERTIES(pcre2-16-static PROPERTIES OUTPUT_NAME pcre2-16-static) -+ SET_TARGET_PROPERTIES(pcre2-16-static PROPERTIES OUTPUT_NAME pcre2-16) - ELSE(MSVC) - SET_TARGET_PROPERTIES(pcre2-16-static PROPERTIES OUTPUT_NAME pcre2-16) - ENDIF(MSVC) -@@ -829,7 +829,7 @@ IF(PCRE2_BUILD_PCRE2_32) - SET(targets ${targets} pcre2-32-static) - - IF(MSVC) -- SET_TARGET_PROPERTIES(pcre2-32-static PROPERTIES OUTPUT_NAME pcre2-32-static) -+ SET_TARGET_PROPERTIES(pcre2-32-static PROPERTIES OUTPUT_NAME pcre2-32) - ELSE(MSVC) - SET_TARGET_PROPERTIES(pcre2-32-static PROPERTIES OUTPUT_NAME pcre2-32) - ENDIF(MSVC) diff --git a/ports/pcre2/pcre2-10.35_fix-uwp.patch b/ports/pcre2/pcre2-10.35_fix-uwp.patch deleted file mode 100644 index 476dde0f6a4c..000000000000 --- a/ports/pcre2/pcre2-10.35_fix-uwp.patch +++ /dev/null @@ -1,10 +0,0 @@ ---- a/CMakeLists.txt 2020-05-09 16:43:10.000000000 +0200 -+++ b/CMakeLists.txt 2020-06-03 20:57:17.026182500 +0200 -@@ -619,6 +619,7 @@ - - IF(MSVC) - ADD_DEFINITIONS(-D_CRT_SECURE_NO_DEPRECATE -D_CRT_SECURE_NO_WARNINGS) -+ add_compile_options(/wd4146) - ENDIF(MSVC) - - SET(CMAKE_INCLUDE_CURRENT_DIR 1) diff --git a/ports/pcre2/portfile.cmake b/ports/pcre2/portfile.cmake deleted file mode 100644 index 5d6c5c39b6a6..000000000000 --- a/ports/pcre2/portfile.cmake +++ /dev/null @@ -1,73 +0,0 @@ -vcpkg_from_github( - OUT_SOURCE_PATH SOURCE_PATH - REPO PCRE2Project/pcre2 - REF "pcre2-${VERSION}" - SHA512 3d0ee66e23809d3da2fe2bf4ed6e20b0fb96c293a91668935f6319e8d02e480eeef33da01e08a7436a18a1a85a116d83186b953520f394c866aad3cea73c7f5c - HEAD_REF master - PATCHES - pcre2-10.35_fix-uwp.patch - no-static-suffix.patch - fix-cmake.patch -) - -string(COMPARE EQUAL "${VCPKG_LIBRARY_LINKAGE}" "static" BUILD_STATIC) -string(COMPARE EQUAL "${VCPKG_LIBRARY_LINKAGE}" "dynamic" INSTALL_PDB) -string(COMPARE EQUAL "${VCPKG_CRT_LINKAGE}" "static" BUILD_STATIC_CRT) - -vcpkg_check_features( - OUT_FEATURE_OPTIONS FEATURE_OPTIONS - FEATURES - jit PCRE2_SUPPORT_JIT -) - -vcpkg_cmake_configure( - SOURCE_PATH "${SOURCE_PATH}" - OPTIONS - ${FEATURE_OPTIONS} - -DBUILD_STATIC_LIBS=${BUILD_STATIC} - -DPCRE2_STATIC_RUNTIME=${BUILD_STATIC_CRT} - -DPCRE2_BUILD_PCRE2_8=ON - -DPCRE2_BUILD_PCRE2_16=ON - -DPCRE2_BUILD_PCRE2_32=ON - -DPCRE2_SUPPORT_UNICODE=ON - -DPCRE2_BUILD_TESTS=OFF - -DPCRE2_BUILD_PCRE2GREP=OFF - -DCMAKE_DISABLE_FIND_PACKAGE_BZip2=ON - -DCMAKE_DISABLE_FIND_PACKAGE_ZLIB=ON - -DCMAKE_DISABLE_FIND_PACKAGE_Readline=ON - -DCMAKE_DISABLE_FIND_PACKAGE_Editline=ON - -DINSTALL_MSVC_PDB=${INSTALL_PDB} - ) - -vcpkg_cmake_install() -vcpkg_copy_pdbs() - -file(READ "${CURRENT_PACKAGES_DIR}/include/pcre2.h" PCRE2_H) -if(BUILD_STATIC) - string(REPLACE "defined(PCRE2_STATIC)" "1" PCRE2_H "${PCRE2_H}") -else() - string(REPLACE "defined(PCRE2_STATIC)" "0" PCRE2_H "${PCRE2_H}") -endif() -file(WRITE "${CURRENT_PACKAGES_DIR}/include/pcre2.h" "${PCRE2_H}") - -vcpkg_fixup_pkgconfig() -vcpkg_cmake_config_fixup(CONFIG_PATH lib/cmake/${PORT}) - -file(REMOVE_RECURSE - "${CURRENT_PACKAGES_DIR}/man" - "${CURRENT_PACKAGES_DIR}/share/doc" - "${CURRENT_PACKAGES_DIR}/debug/include" - "${CURRENT_PACKAGES_DIR}/debug/man" - "${CURRENT_PACKAGES_DIR}/debug/share") - -if(BUILD_STATIC) - file(REMOVE_RECURSE "${CURRENT_PACKAGES_DIR}/bin" "${CURRENT_PACKAGES_DIR}/debug/bin") -elseif(VCPKG_TARGET_IS_WINDOWS) - vcpkg_replace_string("${CURRENT_PACKAGES_DIR}/bin/pcre2-config" "${CURRENT_PACKAGES_DIR}" "`dirname $0`/..") - if(EXISTS "${CURRENT_PACKAGES_DIR}/debug/bin/pcre2-config") - vcpkg_replace_string("${CURRENT_PACKAGES_DIR}/debug/bin/pcre2-config" "${CURRENT_PACKAGES_DIR}" "`dirname $0`/../..") - endif() -endif() - -file(INSTALL "${CMAKE_CURRENT_LIST_DIR}/usage" DESTINATION "${CURRENT_PACKAGES_DIR}/share/${PORT}") -vcpkg_install_copyright(FILE_LIST "${SOURCE_PATH}/COPYING") diff --git a/ports/pcre2/usage b/ports/pcre2/usage deleted file mode 100644 index a8e97871bd52..000000000000 --- a/ports/pcre2/usage +++ /dev/null @@ -1,6 +0,0 @@ -The package pcre2 is compatible with built-in CMake targets: - - # Each component imports a target: - # TARGETS: pcre2::8BIT pcre2::16BIT pcre2::32BIT pcre2::POSIX - find_package(pcre2 CONFIG REQUIRED) - target_link_libraries(main PRIVATE pcre2::8BIT pcre2::POSIX) diff --git a/ports/pcre2/vcpkg.json b/ports/pcre2/vcpkg.json deleted file mode 100644 index 3dd6b9455d55..000000000000 --- a/ports/pcre2/vcpkg.json +++ /dev/null @@ -1,39 +0,0 @@ -{ - "name": "pcre2", - "version": "10.42", - "port-version": 1, - "description": "Regular Expression pattern matching using the same syntax and semantics as Perl 5.", - "homepage": "https://github.com/PCRE2Project/pcre2", - "license": "BSD-3-Clause", - "dependencies": [ - { - "name": "vcpkg-cmake", - "host": true - }, - { - "name": "vcpkg-cmake-config", - "host": true - } - ], - "default-features": [ - "platform-default-features" - ], - "features": { - "jit": { - "description": "Enable support for Just-In-Time compiling regex matchers", - "supports": "!emscripten" - }, - "platform-default-features": { - "description": "Enable default features", - "dependencies": [ - { - "name": "pcre2", - "features": [ - "jit" - ], - "platform": "!emscripten" - } - ] - } - } -} diff --git a/ports/triplets/arm64-osx-relwithdebinfo.cmake b/ports/triplets/arm64-osx-relwithdebinfo.cmake new file mode 100644 index 000000000000..592c7f2ae832 --- /dev/null +++ b/ports/triplets/arm64-osx-relwithdebinfo.cmake @@ -0,0 +1,10 @@ +set(VCPKG_TARGET_ARCHITECTURE arm64) +set(VCPKG_CRT_LINKAGE dynamic) +set(VCPKG_LIBRARY_LINKAGE static) + +set(VCPKG_CMAKE_SYSTEM_NAME Darwin) +set(VCPKG_OSX_ARCHITECTURES arm64) +set(VCPKG_OSX_DEPLOYMENT_TARGET 11) + +set(VCPKG_CXX_FLAGS "-g") +set(VCPKG_C_FLAGS "-g") diff --git a/ports/triplets/x64-linux-relwithdebinfo.cmake b/ports/triplets/x64-linux-relwithdebinfo.cmake new file mode 100644 index 000000000000..533c1f3928cb --- /dev/null +++ b/ports/triplets/x64-linux-relwithdebinfo.cmake @@ -0,0 +1,8 @@ +set(VCPKG_TARGET_ARCHITECTURE x64) +set(VCPKG_CRT_LINKAGE dynamic) +set(VCPKG_LIBRARY_LINKAGE static) + +set(VCPKG_CMAKE_SYSTEM_NAME Linux) + +set(VCPKG_CXX_FLAGS "-g") +set(VCPKG_C_FLAGS "-g") diff --git a/ports/triplets/x64-osx-relwithdebinfo.cmake b/ports/triplets/x64-osx-relwithdebinfo.cmake new file mode 100644 index 000000000000..db1bfabf76d9 --- /dev/null +++ b/ports/triplets/x64-osx-relwithdebinfo.cmake @@ -0,0 +1,10 @@ +set(VCPKG_TARGET_ARCHITECTURE x64) +set(VCPKG_CRT_LINKAGE dynamic) +set(VCPKG_LIBRARY_LINKAGE static) + +set(VCPKG_CMAKE_SYSTEM_NAME Darwin) +set(VCPKG_OSX_ARCHITECTURES x86_64) +set(VCPKG_OSX_DEPLOYMENT_TARGET 11) + +set(VCPKG_CXX_FLAGS "-g") +set(VCPKG_C_FLAGS "-g") diff --git a/scripts/ci/build_libtiledb.sh b/scripts/ci/build_libtiledb.sh index f851a15830da..17adb2388b96 100644 --- a/scripts/ci/build_libtiledb.sh +++ b/scripts/ci/build_libtiledb.sh @@ -38,5 +38,6 @@ make -C tiledb install ls -la make -j4 -C tiledb tiledb_unit +make -j4 -C tiledb unit_vfs make -j4 -C tiledb tiledb_regression make -j4 -C tiledb all_link_complete diff --git a/scripts/find_heap_api_violations.py b/scripts/find_heap_api_violations.py index a0ffd80947eb..1bc28dd84638 100755 --- a/scripts/find_heap_api_violations.py +++ b/scripts/find_heap_api_violations.py @@ -101,11 +101,11 @@ # Contains per-file exceptions to violations of "make_unique". unique_ptr_exceptions = { - "*": ["tdb_unique_ptr", "tiledb_unique_ptr"], + "*": ["tdb_unique_ptr", "tiledb_unique_ptr", "tdb::pmr::unique_ptr"], "zstd_compressor.h": ["std::unique_ptr ctx_;", "std::unique_ptr ctx_;"], "posix.cc": ["static std::unique_ptr cwd_(getcwd(nullptr, 0), free);"], "curl.h": ["std::unique_ptr"], - "tile.h": ["std::unique_ptr data_;"], + "pmr.h": ["std::unique_ptr", "unique_ptr make_unique("], } diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index ec67498ef670..10cb67d82fda 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -4,7 +4,7 @@ # # The MIT License # -# Copyright (c) 2017-2022 TileDB, Inc. +# Copyright (c) 2017-2024 TileDB, Inc. # Copyright (c) 2016 MIT and Intel Corporation # # Permission is hereby granted, free of charge, to any person obtaining a copy @@ -119,7 +119,6 @@ set(TILEDB_UNIT_TEST_SOURCES src/test-cppapi-aggregates.cc src/test-cppapi-consolidation-plan.cc src/unit-average-cell-size.cc - src/unit-azure.cc src/unit-backwards_compat.cc src/unit-bufferlist.cc src/unit-capi-any.cc @@ -180,19 +179,16 @@ set(TILEDB_UNIT_TEST_SOURCES src/unit-filter-buffer.cc src/unit-filter-pipeline.cc src/unit-global-order.cc - src/unit-gcs.cc - src/unit-gs.cc - src/unit-hdfs-filesystem.cc src/unit-ordered-dim-label-reader.cc src/unit-tile-metadata.cc src/unit-tile-metadata-generator.cc + src/unit-query-plan.cc src/unit-ReadCellSlabIter.cc src/unit-Reader.cc src/unit-request-handlers.cc src/unit-resource-pool.cc src/unit-result-coords.cc src/unit-result-tile.cc - src/unit-s3-no-multipart.cc src/unit-s3.cc src/unit-sparse-global-order-reader.cc src/unit-sparse-unordered-with-dups-reader.cc @@ -234,6 +230,7 @@ if (TILEDB_CPP_API) src/unit-cppapi-nullable.cc src/unit-cppapi-partial-attribute-write.cc src/unit-cppapi-query.cc + src/unit-cppapi-query-condition-enumerations.cc src/unit-cppapi-query-condition-sets.cc src/cpp-integration-query-condition.cc src/unit-cppapi-schema.cc diff --git a/test/inputs/groups/group_v1/__group/__1708362383727_1708362383727_a464f3a3a7e740c8856a476ee4c66ce1 b/test/inputs/groups/group_v1/__group/__1708362383727_1708362383727_a464f3a3a7e740c8856a476ee4c66ce1 new file mode 100644 index 000000000000..48772da90954 Binary files /dev/null and b/test/inputs/groups/group_v1/__group/__1708362383727_1708362383727_a464f3a3a7e740c8856a476ee4c66ce1 differ diff --git a/test/inputs/groups/group_v1/__tiledb_group.tdb b/test/inputs/groups/group_v1/__tiledb_group.tdb new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/test/inputs/groups/group_v1/subgroup1/__tiledb_group.tdb b/test/inputs/groups/group_v1/subgroup1/__tiledb_group.tdb new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/test/src/test-capi-consolidation-plan.cc b/test/src/test-capi-consolidation-plan.cc index bce48e219d1d..11e6ea64e6bd 100644 --- a/test/src/test-capi-consolidation-plan.cc +++ b/test/src/test-capi-consolidation-plan.cc @@ -30,6 +30,7 @@ * Tests the ConsolidationPlan API */ +#include #include "test/support/src/helpers.h" #include "tiledb/api/c_api/context/context_api_internal.h" #include "tiledb/sm/c_api/tiledb.h" @@ -42,17 +43,13 @@ using namespace tiledb; using namespace tiledb::test; -struct ConsolidationPlanFx { - // Constants. - const char* SPARSE_ARRAY_NAME = "test_deletes_array"; - - // TileDB context. - Context ctx_; - VFS vfs_; - - std::string key_ = "0123456789abcdeF0123456789abcdeF"; - const tiledb_encryption_type_t enc_type_ = TILEDB_AES_256_GCM; +#ifndef TILEDB_TESTS_ENABLE_REST +constexpr bool rest_tests = false; +#else +constexpr bool rest_tests = true; +#endif +struct ConsolidationPlanFx { // Constructors/destructors. ConsolidationPlanFx(); ~ConsolidationPlanFx(); @@ -69,20 +66,39 @@ struct ConsolidationPlanFx { void remove_array(const std::string& array_name); bool is_array(const std::string& array_name); void check_last_error(std::string expected); + + // TileDB context. + Context ctx_; + // Full URI initialized using fs_vec_ random temp directory. + std::string array_name_; + + // Vector of supported filsystems + tiledb_vfs_handle_t* vfs_c_{nullptr}; + tiledb_ctx_handle_t* ctx_c_{nullptr}; + const std::vector> fs_vec_; + + std::string key_ = "0123456789abcdeF0123456789abcdeF"; + const tiledb_encryption_type_t enc_type_ = TILEDB_AES_256_GCM; }; ConsolidationPlanFx::ConsolidationPlanFx() - : vfs_(ctx_) { + : fs_vec_(test::vfs_test_get_fs_vec()) { Config config; config.set("sm.consolidation.buffer_size", "1000"); - ctx_ = Context(config); - vfs_ = VFS(ctx_); - - remove_sparse_array(); + REQUIRE( + test::vfs_test_init(fs_vec_, &ctx_c_, &vfs_c_, config.ptr().get()).ok()); + ctx_ = Context(ctx_c_); + std::string temp_dir = fs_vec_[0]->temp_dir(); + if constexpr (rest_tests) { + array_name_ = "tiledb://unit/"; + } + array_name_ += temp_dir + "test_consolidation_plan_array"; + test::vfs_test_create_temp_dir(ctx_c_, vfs_c_, temp_dir); } ConsolidationPlanFx::~ConsolidationPlanFx() { - remove_sparse_array(); + Array::delete_array(ctx_, array_name_); + REQUIRE(test::vfs_test_close(fs_vec_, ctx_c_, vfs_c_).ok()); } void ConsolidationPlanFx::create_sparse_array(bool allows_dups, bool encrypt) { @@ -115,9 +131,9 @@ void ConsolidationPlanFx::create_sparse_array(bool allows_dups, bool encrypt) { schema.set_coords_filter_list(filter_list); if (encrypt) { - Array::create(SPARSE_ARRAY_NAME, schema, enc_type_, key_); + Array::create(array_name_, schema, enc_type_, key_); } else { - Array::create(SPARSE_ARRAY_NAME, schema); + Array::create(array_name_, schema); } } @@ -132,16 +148,13 @@ void ConsolidationPlanFx::write_sparse( if (encrypt) { array = std::make_unique( ctx_, - SPARSE_ARRAY_NAME, + array_name_, TILEDB_WRITE, TemporalPolicy(TimeTravel, timestamp), EncryptionAlgorithm(AESGCM, key_.c_str())); } else { array = std::make_unique( - ctx_, - SPARSE_ARRAY_NAME, - TILEDB_WRITE, - TemporalPolicy(TimeTravel, timestamp)); + ctx_, array_name_, TILEDB_WRITE, TemporalPolicy(TimeTravel, timestamp)); } // Create query. @@ -152,28 +165,12 @@ void ConsolidationPlanFx::write_sparse( query.set_data_buffer("d2", dim2); // Submit/finalize the query. - query.submit(); - query.finalize(); + query.submit_and_finalize(); // Close array. array->close(); } -void ConsolidationPlanFx::remove_array(const std::string& array_name) { - if (!is_array(array_name)) - return; - - vfs_.remove_dir(array_name); -} - -void ConsolidationPlanFx::remove_sparse_array() { - remove_array(SPARSE_ARRAY_NAME); -} - -bool ConsolidationPlanFx::is_array(const std::string& array_name) { - return vfs_.is_dir(array_name); -} - void ConsolidationPlanFx::check_last_error(std::string expected) { const char* msg = "unset"; tiledb_error_t* err{nullptr}; @@ -188,11 +185,11 @@ void ConsolidationPlanFx::check_last_error(std::string expected) { TEST_CASE_METHOD( ConsolidationPlanFx, "CAPI: Consolidation plan", - "[capi][consolidation-plan]") { + "[capi][consolidation-plan][rest]") { create_sparse_array(); write_sparse({0, 1, 2, 3}, {1, 1, 1, 2}, {1, 2, 4, 3}, 1); - Array array{ctx_, SPARSE_ARRAY_NAME, TILEDB_READ}; + Array array{ctx_, array_name_, TILEDB_READ}; tiledb_consolidation_plan_t* consolidation_plan{}; CHECK( @@ -231,11 +228,11 @@ TEST_CASE_METHOD( TEST_CASE_METHOD( ConsolidationPlanFx, "CAPI: Consolidation plan dump", - "[capi][consolidation-plan][dump]") { + "[capi][consolidation-plan][dump][rest]") { create_sparse_array(); write_sparse({0, 1, 2, 3}, {1, 1, 1, 2}, {1, 2, 4, 3}, 1); - Array array{ctx_, SPARSE_ARRAY_NAME, TILEDB_READ}; + Array array{ctx_, array_name_, TILEDB_READ}; tiledb_consolidation_plan_t* consolidation_plan{}; CHECK( diff --git a/test/src/test-cppapi-aggregates.cc b/test/src/test-cppapi-aggregates.cc index 07c4ee4a5a25..5d10e7c7e6b6 100644 --- a/test/src/test-cppapi-aggregates.cc +++ b/test/src/test-cppapi-aggregates.cc @@ -61,8 +61,12 @@ struct CppAggregatesFx { bool allow_dups_; bool set_ranges_; bool set_qc_; + bool use_dim_; + std::string dim_name_; tiledb_layout_t layout_; std::vector set_qc_values_; + std::vector use_dim_values_; + std::vector dim_name_values_; std::vector layout_values_; std::string key_ = "0123456789abcdeF0123456789abcdeF"; @@ -74,6 +78,7 @@ struct CppAggregatesFx { // Functions. void generate_test_params(); + void run_all_combinations(std::function fn); void create_dense_array(bool var = false, bool encrypt = false); void create_sparse_array(bool var = false, bool encrypt = false); void write_sparse( @@ -121,6 +126,7 @@ struct CppAggregatesFx { std::vector& dim2, std::vector& a1, std::vector& a1_validity, + std::function aggregate_fn, const bool validate_count = true); void validate_data_var( Query& query, @@ -161,6 +167,12 @@ void CppAggregatesFx::generate_test_params() { allow_dups_ = false; set_qc_values_ = {true, false}; layout_values_ = {TILEDB_ROW_MAJOR, TILEDB_COL_MAJOR, TILEDB_GLOBAL_ORDER}; + use_dim_values_ = {true, false}; + dim_name_values_ = {"d2"}; + if (nullable_ || !std::is_same::value) { + use_dim_values_ = {false}; + dim_name_values_ = {"d1"}; + } } SECTION("sparse") { @@ -169,6 +181,34 @@ void CppAggregatesFx::generate_test_params() { allow_dups_ = GENERATE(true, false); set_qc_values_ = {false}; layout_values_ = {TILEDB_UNORDERED}; + use_dim_values_ = {true, false}; + if (nullable_ || !std::is_same::value) { + use_dim_values_ = {false}; + } + dim_name_values_ = {"d1"}; + } +} + +template +void CppAggregatesFx::run_all_combinations(std::function fn) { + for (bool use_dim : use_dim_values_) { + use_dim_ = use_dim; + for (std::string dim_name : dim_name_values_) { + dim_name_ = dim_name; + for (bool set_ranges : {true, false}) { + set_ranges_ = set_ranges; + for (bool request_data : {true, false}) { + request_data_ = request_data; + for (bool set_qc : set_qc_values_) { + set_qc_ = set_qc; + for (tiledb_layout_t layout : layout_values_) { + layout_ = layout; + fn(); + } + } + } + } + } } } @@ -771,6 +811,7 @@ void CppAggregatesFx::validate_data( std::vector& dim2, std::vector& a1, std::vector& a1_validity, + std::function aggregate_fn, const bool validate_count) { uint64_t expected_count = 0; std::vector expected_dim1; @@ -899,25 +940,50 @@ void CppAggregatesFx::validate_data( CppAggregatesFx::STRING_CELL_VAL_NUM : 1; - dim1.resize(expected_dim1.size()); - dim2.resize(expected_dim2.size()); - a1.resize(expected_a1.size()); - CHECK(dim1 == expected_dim1); - CHECK(dim2 == expected_dim2); - CHECK(a1 == expected_a1); + if (request_data_) { + dim1.resize(expected_dim1.size()); + dim2.resize(expected_dim2.size()); + a1.resize(expected_a1.size()); + CHECK(dim1 == expected_dim1); + CHECK(dim2 == expected_dim2); + CHECK(a1 == expected_a1); - if (nullable_) { - a1_validity.resize(expected_a1_validity.size()); - CHECK(a1_validity == expected_a1_validity); + if (nullable_) { + a1_validity.resize(expected_a1_validity.size()); + CHECK(a1_validity == expected_a1_validity); + } + + if (validate_count) { + auto result_el = query.result_buffer_elements_nullable(); + CHECK(std::get<1>(result_el["d1"]) == expected_count); + CHECK(std::get<1>(result_el["d2"]) == expected_count); + CHECK(std::get<1>(result_el["a1"]) == expected_count * cell_val_num); + if (nullable_) { + CHECK(std::get<2>(result_el["a1"]) == expected_count); + } + } } - if (validate_count) { - auto result_el = query.result_buffer_elements_nullable(); - CHECK(std::get<1>(result_el["d1"]) == expected_count); - CHECK(std::get<1>(result_el["d2"]) == expected_count); - CHECK(std::get<1>(result_el["a1"]) == expected_count * cell_val_num); - if (nullable_) { - CHECK(std::get<2>(result_el["a1"]) == expected_count); + // Call the aggregate function for all expected values. + if (use_dim_ && dim_name_ == "d1") { + for (uint64_t i = 0; i < expected_a1_int.size(); i++) { + if (!set_qc_ || (expected_a1_int[i] != 4 && expected_a1_int[i] != 35)) { + aggregate_fn(expected_dim1[i]); + } + } + } else if (use_dim_ && dim_name_ == "d2") { + for (uint64_t i = 0; i < expected_a1_int.size(); i++) { + if (!set_qc_ || (expected_a1_int[i] != 4 && expected_a1_int[i] != 35)) { + aggregate_fn(expected_dim2[i]); + } + } + } else { + for (uint64_t i = 0; i < expected_a1_int.size(); i++) { + if (!set_qc_ || (expected_a1_int[i] != 4 && expected_a1_int[i] != 35)) { + if (!nullable_ || expected_a1_validity[i] == 1) { + aggregate_fn(expected_a1_int[i]); + } + } } } } @@ -1172,11 +1238,11 @@ void CppAggregatesFx::validate_tiles_read(Query& query, bool is_count) { } else if (set_ranges_) { // If we request range, we split all tiles, we'll have to read all instead // of using fragment metadata. - expected_num_tiles_read = is_count ? 0 : 5; + expected_num_tiles_read = is_count || use_dim_ ? 0 : 5; } else { // One space tile has two result tiles, we'll have to read them instead of // using fragment metadata. - expected_num_tiles_read = is_count ? 0 : 2; + expected_num_tiles_read = is_count || use_dim_ ? 0 : 2; } } else { if (request_data_) { @@ -1195,9 +1261,9 @@ void CppAggregatesFx::validate_tiles_read(Query& query, bool is_count) { // we read 2 dims * 4 tiles. For the attribute, we can process 2 tiles // with duplicates and 1 without using the fragment metadata. if (allow_dups_) { - expected_num_tiles_read = is_count ? 8 : 10; + expected_num_tiles_read = use_dim_ || is_count ? 8 : 10; } else { - expected_num_tiles_read = is_count ? 8 : 11; + expected_num_tiles_read = use_dim_ || is_count ? 8 : 11; } } else { if (allow_dups_) { @@ -1208,7 +1274,7 @@ void CppAggregatesFx::validate_tiles_read(Query& query, bool is_count) { // Arrays without duplicates need to run deduplication, so we read the // dimension tiles (2 dims * 5 tiles). Only one tile for the attribute // can be processed with fragment metadata only. - expected_num_tiles_read = is_count ? 10 : 14; + expected_num_tiles_read = use_dim_ || is_count ? 10 : 14; } } } @@ -1431,6 +1497,7 @@ TEST_CASE_METHOD( for (bool set_ranges : {true, false}) { set_ranges_ = set_ranges; + use_dim_ = use_dim_ && !set_ranges; for (bool request_data : {true, false}) { request_data_ = request_data; for (bool set_qc : set_qc_values_) { @@ -1472,7 +1539,10 @@ TEST_CASE_METHOD( // Check the results. uint64_t expected_count; - if (dense_) { + + if (use_dim_) { + expected_count = 999; + } else if (dense_) { expected_count = set_ranges ? 24 : 36; } else { if (set_ranges) { @@ -1487,7 +1557,8 @@ TEST_CASE_METHOD( CHECK(count[0] == expected_count); if (request_data) { - validate_data(query, dim1, dim2, a1, a1_validity); + validate_data( + query, dim1, dim2, a1, a1_validity, [&](uint64_t) -> void {}); } validate_tiles_read(query, true); @@ -1524,101 +1595,63 @@ TEMPLATE_LIST_TEST_CASE_METHOD( Array array{ CppAggregatesFx::ctx_, CppAggregatesFx::ARRAY_NAME, TILEDB_READ}; - for (bool set_ranges : {true, false}) { - CppAggregatesFx::set_ranges_ = set_ranges; - for (bool request_data : {true, false}) { - CppAggregatesFx::request_data_ = request_data; - for (bool set_qc : CppAggregatesFx::set_qc_values_) { - CppAggregatesFx::set_qc_ = set_qc; - for (tiledb_layout_t layout : CppAggregatesFx::layout_values_) { - CppAggregatesFx::layout_ = layout; - Query query(CppAggregatesFx::ctx_, array, TILEDB_READ); - - // Add a sum aggregator to the query. - QueryChannel default_channel = - QueryExperimental::get_default_channel(query); - ChannelOperation operation = - QueryExperimental::create_unary_aggregate( - query, "a1"); - default_channel.apply_aggregate("Sum", operation); + CppAggregatesFx::run_all_combinations([&]() -> void { + Query query(CppAggregatesFx::ctx_, array, TILEDB_READ); + + // Add a sum aggregator to the query. + QueryChannel default_channel = + QueryExperimental::get_default_channel(query); + ChannelOperation operation = + QueryExperimental::create_unary_aggregate( + query, + CppAggregatesFx::use_dim_ ? CppAggregatesFx::dim_name_ : + "a1"); + default_channel.apply_aggregate("Sum", operation); + + CppAggregatesFx::set_ranges_and_condition_if_needed(array, query, false); + + // Set the data buffer for the aggregator. + uint64_t cell_size = sizeof(T); + std::vector::sum_type> sum(1); + std::vector sum_validity(1); + std::vector dim1(100); + std::vector dim2(100); + std::vector a1(100 * cell_size); + std::vector a1_validity(100); + query.set_layout(CppAggregatesFx::layout_); + query.set_data_buffer("Sum", sum); + if (CppAggregatesFx::nullable_) { + query.set_validity_buffer("Sum", sum_validity); + } - CppAggregatesFx::set_ranges_and_condition_if_needed( - array, query, false); + if (CppAggregatesFx::request_data_) { + query.set_data_buffer("d1", dim1); + query.set_data_buffer("d2", dim2); + query.set_data_buffer( + "a1", static_cast(a1.data()), a1.size() / cell_size); - // Set the data buffer for the aggregator. - uint64_t cell_size = sizeof(T); - std::vector::sum_type> sum(1); - std::vector sum_validity(1); - std::vector dim1(100); - std::vector dim2(100); - std::vector a1(100 * cell_size); - std::vector a1_validity(100); - query.set_layout(layout); - query.set_data_buffer("Sum", sum); - if (CppAggregatesFx::nullable_) { - query.set_validity_buffer("Sum", sum_validity); - } - - if (request_data) { - query.set_data_buffer("d1", dim1); - query.set_data_buffer("d2", dim2); - query.set_data_buffer( - "a1", static_cast(a1.data()), a1.size() / cell_size); + if (CppAggregatesFx::nullable_) { + query.set_validity_buffer("a1", a1_validity); + } + } - if (CppAggregatesFx::nullable_) { - query.set_validity_buffer("a1", a1_validity); - } - } + // Submit the query. + query.submit(); - // Submit the query. - query.submit(); + // Check the results. + uint64_t expected = 0; + CppAggregatesFx::validate_data( + query, dim1, dim2, a1, a1_validity, [&](uint64_t v) -> void { + expected += v; + }); + typename tiledb::sm::sum_type_data::sum_type expected_sum = expected; - // Check the results. - typename tiledb::sm::sum_type_data::sum_type expected_sum; - if (CppAggregatesFx::dense_) { - if (CppAggregatesFx::nullable_) { - if (set_ranges) { - expected_sum = set_qc ? 197 : 201; - } else { - expected_sum = set_qc ? 315 : 319; - } - } else { - if (set_ranges) { - expected_sum = set_qc ? 398 : 402; - } else { - expected_sum = set_qc ? 591 : 630; - } - } - } else { - if (CppAggregatesFx::nullable_) { - if (set_ranges) { - expected_sum = 42; - } else { - expected_sum = 56; - } - } else { - if (set_ranges) { - expected_sum = CppAggregatesFx::allow_dups_ ? 88 : 81; - } else { - expected_sum = CppAggregatesFx::allow_dups_ ? 120 : 113; - } - } - } - - auto result_el = query.result_buffer_elements_nullable(); - CHECK(std::get<1>(result_el["Sum"]) == 1); - CHECK(sum[0] == expected_sum); - - if (request_data) { - CppAggregatesFx::validate_data( - query, dim1, dim2, a1, a1_validity); - } + auto result_el = query.result_buffer_elements_nullable(); + CHECK(std::get<1>(result_el["Sum"]) == 1); + CHECK(sum[0] == expected_sum); - CppAggregatesFx::validate_tiles_read(query); - } - } - } - } + CppAggregatesFx::validate_tiles_read(query); + }); // Close array. array.close(); @@ -1648,103 +1681,66 @@ TEMPLATE_LIST_TEST_CASE_METHOD( Array array{ CppAggregatesFx::ctx_, CppAggregatesFx::ARRAY_NAME, TILEDB_READ}; - for (bool set_ranges : {true, false}) { - CppAggregatesFx::set_ranges_ = set_ranges; - for (bool request_data : {true, false}) { - CppAggregatesFx::request_data_ = request_data; - for (bool set_qc : CppAggregatesFx::set_qc_values_) { - CppAggregatesFx::set_qc_ = set_qc; - for (tiledb_layout_t layout : CppAggregatesFx::layout_values_) { - CppAggregatesFx::layout_ = layout; - Query query(CppAggregatesFx::ctx_, array, TILEDB_READ); - - QueryChannel default_channel = - QueryExperimental::get_default_channel(query); - ChannelOperation operation = - QueryExperimental::create_unary_aggregate( - query, "a1"); - default_channel.apply_aggregate("Mean", operation); - - CppAggregatesFx::set_ranges_and_condition_if_needed( - array, query, false); - - // Set the data buffer for the aggregator. - uint64_t cell_size = sizeof(T); - std::vector mean(1); - std::vector mean_validity(1); - std::vector dim1(100); - std::vector dim2(100); - std::vector a1(100 * cell_size); - std::vector a1_validity(100); - query.set_layout(layout); - query.set_data_buffer("Mean", mean); - if (CppAggregatesFx::nullable_) { - query.set_validity_buffer("Mean", mean_validity); - } + CppAggregatesFx::run_all_combinations([&]() -> void { + Query query(CppAggregatesFx::ctx_, array, TILEDB_READ); + + QueryChannel default_channel = + QueryExperimental::get_default_channel(query); + ChannelOperation operation = + QueryExperimental::create_unary_aggregate( + query, + CppAggregatesFx::use_dim_ ? CppAggregatesFx::dim_name_ : + "a1"); + default_channel.apply_aggregate("Mean", operation); + + CppAggregatesFx::set_ranges_and_condition_if_needed(array, query, false); + + // Set the data buffer for the aggregator. + uint64_t cell_size = sizeof(T); + std::vector mean(1); + std::vector mean_validity(1); + std::vector dim1(100); + std::vector dim2(100); + std::vector a1(100 * cell_size); + std::vector a1_validity(100); + query.set_layout(CppAggregatesFx::layout_); + query.set_data_buffer("Mean", mean); + if (CppAggregatesFx::nullable_) { + query.set_validity_buffer("Mean", mean_validity); + } - if (request_data) { - query.set_data_buffer("d1", dim1); - query.set_data_buffer("d2", dim2); - query.set_data_buffer( - "a1", static_cast(a1.data()), a1.size() / cell_size); + if (CppAggregatesFx::request_data_) { + query.set_data_buffer("d1", dim1); + query.set_data_buffer("d2", dim2); + query.set_data_buffer( + "a1", static_cast(a1.data()), a1.size() / cell_size); - if (CppAggregatesFx::nullable_) { - query.set_validity_buffer("a1", a1_validity); - } - } + if (CppAggregatesFx::nullable_) { + query.set_validity_buffer("a1", a1_validity); + } + } - // Submit the query. - query.submit(); + // Submit the query. + query.submit(); - // Check the results. - double expected_mean; - if (CppAggregatesFx::dense_) { - if (CppAggregatesFx::nullable_) { - if (set_ranges) { - expected_mean = set_qc ? (197.0 / 11.0) : (201.0 / 12.0); - } else { - expected_mean = set_qc ? (315.0 / 18.0) : (319.0 / 19.0); - } - } else { - if (set_ranges) { - expected_mean = set_qc ? (398.0 / 23.0) : (402.0 / 24.0); - } else { - expected_mean = set_qc ? (591.0 / 34.0) : (630.0 / 36.0); - } - } - } else { - if (CppAggregatesFx::nullable_) { - if (set_ranges) { - expected_mean = (42.0 / 4.0); - } else { - expected_mean = (56.0 / 8.0); - } - } else { - if (set_ranges) { - expected_mean = CppAggregatesFx::allow_dups_ ? (88.0 / 8.0) : - (81.0 / 7.0); - } else { - expected_mean = CppAggregatesFx::allow_dups_ ? - (120.0 / 16.0) : - (113.0 / 15.0); - } - } - } + // Check the results. + uint64_t expected_sum = 0; + uint64_t expected_count = 0; + CppAggregatesFx::validate_data( + query, dim1, dim2, a1, a1_validity, [&](uint64_t v) -> void { + expected_sum += v; + expected_count++; + }); - auto result_el = query.result_buffer_elements_nullable(); - CHECK(std::get<1>(result_el["Mean"]) == 1); - CHECK(mean[0] == expected_mean); + double expected_mean = + static_cast(expected_sum) / static_cast(expected_count); - if (request_data) { - CppAggregatesFx::validate_data( - query, dim1, dim2, a1, a1_validity); - } + auto result_el = query.result_buffer_elements_nullable(); + CHECK(std::get<1>(result_el["Mean"]) == 1); + CHECK(mean[0] == expected_mean); - CppAggregatesFx::validate_tiles_read(query); - } - } - } - } + CppAggregatesFx::validate_tiles_read(query); + }); // Close array. array.close(); @@ -1787,114 +1783,81 @@ TEMPLATE_LIST_TEST_CASE( Array array{fx.ctx_, fx.ARRAY_NAME, TILEDB_READ}; - for (bool set_ranges : {true, false}) { - fx.set_ranges_ = set_ranges; - for (bool request_data : {true, false}) { - fx.request_data_ = request_data; - for (bool set_qc : fx.set_qc_values_) { - fx.set_qc_ = set_qc; - for (tiledb_layout_t layout : fx.layout_values_) { - fx.layout_ = layout; - Query query(fx.ctx_, array, TILEDB_READ); - - // Add a min/max aggregator to the query. - QueryChannel default_channel = - QueryExperimental::get_default_channel(query); - ChannelOperation operation = - QueryExperimental::create_unary_aggregate(query, "a1"); - default_channel.apply_aggregate("MinMax", operation); - - fx.set_ranges_and_condition_if_needed(array, query, false); - - // Set the data buffer for the aggregator. - uint64_t cell_size = std::is_same::value ? - fx.STRING_CELL_VAL_NUM : - sizeof(T); - std::vector min_max(cell_size); - std::vector min_max_validity(1); - std::vector dim1(100); - std::vector dim2(100); - std::vector a1(100 * cell_size); - std::vector a1_validity(100); - query.set_layout(layout); - if constexpr (std::is_same::value) { - query.set_data_buffer( - "MinMax", - static_cast(static_cast(min_max.data())), - min_max.size()); - } else { - query.set_data_buffer( - "MinMax", - static_cast(static_cast(min_max.data())), - min_max.size() / cell_size); - } - if (fx.nullable_) { - query.set_validity_buffer("MinMax", min_max_validity); - } + fx.run_all_combinations([&]() -> void { + Query query(fx.ctx_, array, TILEDB_READ); + + // Add a min/max aggregator to the query. + QueryChannel default_channel = + QueryExperimental::get_default_channel(query); + ChannelOperation operation = QueryExperimental::create_unary_aggregate( + query, fx.use_dim_ ? fx.dim_name_ : "a1"); + default_channel.apply_aggregate("MinMax", operation); + + fx.set_ranges_and_condition_if_needed(array, query, false); + + // Set the data buffer for the aggregator. + uint64_t cell_size = std::is_same::value ? + fx.STRING_CELL_VAL_NUM : + sizeof(T); + std::vector min_max(cell_size); + std::vector min_max_validity(1); + std::vector dim1(100); + std::vector dim2(100); + std::vector a1(100 * cell_size); + std::vector a1_validity(100); + query.set_layout(fx.layout_); + if constexpr (std::is_same::value) { + query.set_data_buffer( + "MinMax", + static_cast(static_cast(min_max.data())), + min_max.size()); + } else { + query.set_data_buffer( + "MinMax", + static_cast(static_cast(min_max.data())), + min_max.size() / cell_size); + } + if (fx.nullable_) { + query.set_validity_buffer("MinMax", min_max_validity); + } - if (request_data) { - query.set_data_buffer("d1", dim1); - query.set_data_buffer("d2", dim2); - query.set_data_buffer( - "a1", static_cast(a1.data()), a1.size() / cell_size); + if (fx.request_data_) { + query.set_data_buffer("d1", dim1); + query.set_data_buffer("d2", dim2); + query.set_data_buffer( + "a1", static_cast(a1.data()), a1.size() / cell_size); - if (fx.nullable_) { - query.set_validity_buffer("a1", a1_validity); - } - } + if (fx.nullable_) { + query.set_validity_buffer("a1", a1_validity); + } + } - // Submit the query. - query.submit(); + // Submit the query. + query.submit(); - // Check the results. - std::vector expected_min_max; - if (fx.dense_) { - if (fx.nullable_) { - if (set_ranges) { - expected_min_max = - fx.make_data_buff({min ? (set_qc ? 6 : 4) : 28}); - } else { - expected_min_max = fx.make_data_buff({min ? 0 : 34}); - } - } else { - if (set_ranges) { - expected_min_max = fx.make_data_buff({min ? 3 : 29}); - } else { - expected_min_max = - fx.make_data_buff({min ? 0 : (set_qc ? 34 : 35)}); - } - } + // Check the results. + uint64_t expected = min ? std::numeric_limits::max() : + std::numeric_limits::min(); + fx.validate_data( + query, dim1, dim2, a1, a1_validity, [&](uint64_t v) -> void { + if (min) { + expected = std::min(expected, v); } else { - if (fx.nullable_) { - if (set_ranges) { - expected_min_max = fx.make_data_buff({min ? 6 : 14}); - } else { - expected_min_max = fx.make_data_buff({min ? 0 : 14}); - } - } else { - if (set_ranges) { - expected_min_max = fx.make_data_buff({min ? 6 : 15}); - } else { - expected_min_max = fx.make_data_buff({min ? 0 : 15}); - } - } + expected = std::max(expected, v); } + }); - auto result_el = query.result_buffer_elements_nullable(); - CHECK( - std::get<1>(result_el["MinMax"]) == - (std::is_same::value ? 2 : 1)); - CHECK(min_max == expected_min_max); + std::vector expected_min_max = + fx.make_data_buff({static_cast(expected)}); - if (request_data) { - fx.validate_data(query, dim1, dim2, a1, a1_validity); - } + auto result_el = query.result_buffer_elements_nullable(); + CHECK( + std::get<1>(result_el["MinMax"]) == + (std::is_same::value ? 2 : 1)); + CHECK(min_max == expected_min_max); - fx.validate_tiles_read(query); - } - } - } - } + fx.validate_tiles_read(query); + }); // Close array. array.close(); @@ -2050,84 +2013,75 @@ TEMPLATE_LIST_TEST_CASE_METHOD( Array array{ CppAggregatesFx::ctx_, CppAggregatesFx::ARRAY_NAME, TILEDB_READ}; - for (bool set_ranges : {true, false}) { - CppAggregatesFx::set_ranges_ = set_ranges; - for (bool request_data : {true, false}) { - CppAggregatesFx::request_data_ = request_data; - for (bool set_qc : CppAggregatesFx::set_qc_values_) { - CppAggregatesFx::set_qc_ = set_qc; - for (tiledb_layout_t layout : CppAggregatesFx::layout_values_) { - CppAggregatesFx::layout_ = layout; - Query query(CppAggregatesFx::ctx_, array, TILEDB_READ); - - QueryChannel default_channel = - QueryExperimental::get_default_channel(query); - ChannelOperation operation = - QueryExperimental::create_unary_aggregate( - query, "a1"); - default_channel.apply_aggregate("NullCount", operation); - - CppAggregatesFx::set_ranges_and_condition_if_needed( - array, query, false); - - // Set the data buffer for the aggregator. - uint64_t cell_size = std::is_same::value ? - CppAggregatesFx::STRING_CELL_VAL_NUM : - sizeof(T); - std::vector null_count(1); - std::vector dim1(100); - std::vector dim2(100); - std::vector a1(100 * cell_size); - std::vector a1_validity(100); - query.set_layout(layout); - query.set_data_buffer("NullCount", null_count); - - if (request_data) { - query.set_data_buffer("d1", dim1); - query.set_data_buffer("d2", dim2); - query.set_data_buffer( - "a1", static_cast(a1.data()), a1.size() / cell_size); - query.set_validity_buffer("a1", a1_validity); - } - - // Submit the query. - query.submit(); - - // Check the results. - uint64_t expected_null_count; - if (CppAggregatesFx::dense_) { - if (set_qc) { - expected_null_count = 0; - } else { - if (set_ranges) { - expected_null_count = 12; - } else { - expected_null_count = 17; - } - } - } else { - if (set_ranges) { - expected_null_count = CppAggregatesFx::allow_dups_ ? 4 : 3; - } else { - expected_null_count = CppAggregatesFx::allow_dups_ ? 8 : 7; - } - } - - auto result_el = query.result_buffer_elements_nullable(); - CHECK(std::get<1>(result_el["NullCount"]) == 1); - CHECK(null_count[0] == expected_null_count); + CppAggregatesFx::run_all_combinations([&]() -> void { + Query query(CppAggregatesFx::ctx_, array, TILEDB_READ); + + QueryChannel default_channel = + QueryExperimental::get_default_channel(query); + ChannelOperation operation = + QueryExperimental::create_unary_aggregate( + query, "a1"); + default_channel.apply_aggregate("NullCount", operation); + + CppAggregatesFx::set_ranges_and_condition_if_needed(array, query, false); + + // Set the data buffer for the aggregator. + uint64_t cell_size = std::is_same::value ? + CppAggregatesFx::STRING_CELL_VAL_NUM : + sizeof(T); + std::vector null_count(1); + std::vector dim1(100); + std::vector dim2(100); + std::vector a1(100 * cell_size); + std::vector a1_validity(100); + query.set_layout(CppAggregatesFx::layout_); + query.set_data_buffer("NullCount", null_count); + + if (CppAggregatesFx::request_data_) { + query.set_data_buffer("d1", dim1); + query.set_data_buffer("d2", dim2); + query.set_data_buffer( + "a1", static_cast(a1.data()), a1.size() / cell_size); + query.set_validity_buffer("a1", a1_validity); + } - if (request_data) { - CppAggregatesFx::validate_data( - query, dim1, dim2, a1, a1_validity); - } + // Submit the query. + query.submit(); - CppAggregatesFx::validate_tiles_read(query); - CppAggregatesFx::validate_tiles_read_null_count(query); + // Check the results. + uint64_t expected_null_count; + if (CppAggregatesFx::dense_) { + if (CppAggregatesFx::set_qc_) { + expected_null_count = 0; + } else { + if (CppAggregatesFx::set_ranges_) { + expected_null_count = 12; + } else { + expected_null_count = 17; } } + } else { + if (CppAggregatesFx::set_ranges_) { + expected_null_count = CppAggregatesFx::allow_dups_ ? 4 : 3; + } else { + expected_null_count = CppAggregatesFx::allow_dups_ ? 8 : 7; + } } - } + + auto result_el = query.result_buffer_elements_nullable(); + CHECK(std::get<1>(result_el["NullCount"]) == 1); + CHECK(null_count[0] == expected_null_count); + + if (CppAggregatesFx::request_data_) { + CppAggregatesFx::validate_data( + query, dim1, dim2, a1, a1_validity, [&](uint64_t) -> void { + expected_null_count++; + }); + } + + CppAggregatesFx::validate_tiles_read(query); + CppAggregatesFx::validate_tiles_read_null_count(query); + }); // Close array. array.close(); @@ -2595,7 +2549,13 @@ TEMPLATE_LIST_TEST_CASE_METHOD( CHECK(sum[0] == expected_sum); CppAggregatesFx::validate_data( - query, dim1, dim2, a1, a1_validity, false); + query, + dim1, + dim2, + a1, + a1_validity, + [&](uint64_t) -> void {}, + false); } } } diff --git a/test/src/unit-DenseTiler.cc b/test/src/unit-DenseTiler.cc index 785e07e8909f..f73f2834fed7 100644 --- a/test/src/unit-DenseTiler.cc +++ b/test/src/unit-DenseTiler.cc @@ -64,6 +64,9 @@ struct DenseTilerFx { tiledb_ctx_t* ctx_; tiledb_array_t* array_; + // Test MemoryTracker + shared_ptr tracker_; + // Constructors/Destructors DenseTilerFx(); ~DenseTilerFx(); @@ -86,7 +89,8 @@ struct DenseTilerFx { bool check_tile(WriterTile& tile, const std::vector& data); }; -DenseTilerFx::DenseTilerFx() { +DenseTilerFx::DenseTilerFx() + : tracker_(tiledb::test::create_test_memory_tracker()) { REQUIRE(tiledb_ctx_alloc(NULL, &ctx_) == TILEDB_OK); array_ = NULL; } @@ -211,7 +215,8 @@ TEST_CASE_METHOD( add_ranges({sub1}, sizeof(sub1), &subarray1); // Create DenseTiler - DenseTiler tiler1(&buffers, &subarray1, &test::g_helper_stats); + DenseTiler tiler1( + tracker_, &buffers, &subarray1, &test::g_helper_stats); // Test correctness of initialization CHECK(tiler1.tile_num() == 2); @@ -232,7 +237,8 @@ TEST_CASE_METHOD( add_ranges({sub2}, sizeof(sub2), &subarray2); // Create DenseTiler - DenseTiler tiler2(&buffers, &subarray2, &test::g_helper_stats); + DenseTiler tiler2( + tracker_, &buffers, &subarray2, &test::g_helper_stats); // Test correctness of initialization CHECK(tiler2.tile_num() == 1); @@ -278,7 +284,8 @@ TEST_CASE_METHOD( add_ranges({sub1}, sizeof(sub1), &subarray1); // Create DenseTiler - DenseTiler tiler1(&buffers, &subarray1, &test::g_helper_stats); + DenseTiler tiler1( + tracker_, &buffers, &subarray1, &test::g_helper_stats); // Test correctness of copy plan for tile 0 auto copy_plan1_0 = tiler1.copy_plan(0); @@ -314,7 +321,8 @@ TEST_CASE_METHOD( add_ranges({sub2}, sizeof(sub2), &subarray2); // Create DenseTiler - DenseTiler tiler2(&buffers, &subarray2, &test::g_helper_stats); + DenseTiler tiler2( + tracker_, &buffers, &subarray2, &test::g_helper_stats); // Test correctness of copy plan for tile 0 auto copy_plan2 = tiler2.copy_plan(0); @@ -338,7 +346,8 @@ TEST_CASE_METHOD( add_ranges({sub3}, sizeof(sub3), &subarray3); // Create DenseTiler - DenseTiler tiler3(&buffers, &subarray3, &test::g_helper_stats); + DenseTiler tiler3( + tracker_, &buffers, &subarray3, &test::g_helper_stats); // Test correctness of copy plan for tile 0 auto copy_plan3 = tiler3.copy_plan(0); @@ -387,7 +396,8 @@ TEST_CASE_METHOD( add_ranges({sub1}, sizeof(sub1), &subarray1); // Create DenseTiler - DenseTiler tiler1(&buffers, &subarray1, &test::g_helper_stats); + DenseTiler tiler1( + tracker_, &buffers, &subarray1, &test::g_helper_stats); // Test get tile 0 WriterTileTuple tile1_0( @@ -396,7 +406,8 @@ TEST_CASE_METHOD( false, false, sizeof(int32_t), - Datatype::INT32); + Datatype::INT32, + tracker_); CHECK(!tiler1.get_tile(0, "foo", tile1_0).ok()); CHECK(!tiler1.get_tile(10, "a", tile1_0).ok()); CHECK(tiler1.get_tile(0, "a", tile1_0).ok()); @@ -410,7 +421,8 @@ TEST_CASE_METHOD( false, false, sizeof(int32_t), - Datatype::INT32); + Datatype::INT32, + tracker_); CHECK(tiler1.get_tile(1, "a", tile1_1).ok()); std::vector c_data1_1 = { 4, fill_value, fill_value, fill_value, fill_value}; @@ -428,7 +440,8 @@ TEST_CASE_METHOD( add_ranges({sub2}, sizeof(sub2), &subarray2); // Create DenseTiler - DenseTiler tiler2(&buffers, &subarray2, &test::g_helper_stats); + DenseTiler tiler2( + tracker_, &buffers, &subarray2, &test::g_helper_stats); // Test get tile 0 WriterTileTuple tile2( @@ -437,7 +450,8 @@ TEST_CASE_METHOD( false, false, sizeof(int32_t), - Datatype::INT32); + Datatype::INT32, + tracker_); CHECK(tiler2.get_tile(0, "a", tile2).ok()); std::vector c_data2 = {fill_value, 1, 2, 3, 4}; CHECK(check_tile(tile2.fixed_tile(), c_data2)); @@ -454,7 +468,8 @@ TEST_CASE_METHOD( add_ranges({sub3}, sizeof(sub3), &subarray3); // Create DenseTiler - DenseTiler tiler3(&buffers, &subarray3, &test::g_helper_stats); + DenseTiler tiler3( + tracker_, &buffers, &subarray3, &test::g_helper_stats); // Test get tile 0 WriterTileTuple tile3( @@ -463,7 +478,8 @@ TEST_CASE_METHOD( false, false, sizeof(int32_t), - Datatype::INT32); + Datatype::INT32, + tracker_); CHECK(tiler3.get_tile(0, "a", tile3).ok()); std::vector c_data3 = {fill_value, 1, 2, 3, 4}; CHECK(check_tile(tile3.fixed_tile(), c_data3)); @@ -505,7 +521,8 @@ TEST_CASE_METHOD( add_ranges({sub1}, sizeof(sub1), &subarray1); // Create DenseTiler - DenseTiler tiler1(&buffers, &subarray1, &test::g_helper_stats); + DenseTiler tiler1( + tracker_, &buffers, &subarray1, &test::g_helper_stats); // Test get tile 0 WriterTileTuple tile1_0( @@ -514,7 +531,8 @@ TEST_CASE_METHOD( false, false, sizeof(int32_t), - Datatype::INT32); + Datatype::INT32, + tracker_); CHECK(!tiler1.get_tile(0, "foo", tile1_0).ok()); CHECK(!tiler1.get_tile(10, "a", tile1_0).ok()); CHECK(tiler1.get_tile(0, "a", tile1_0).ok()); @@ -528,7 +546,8 @@ TEST_CASE_METHOD( false, false, sizeof(int32_t), - Datatype::INT32); + Datatype::INT32, + tracker_); CHECK(tiler1.get_tile(1, "a", tile1_1).ok()); std::vector c_data1_1 = { 4, fill_value, fill_value, fill_value, fill_value}; @@ -571,7 +590,8 @@ TEST_CASE_METHOD( add_ranges({sub1}, sizeof(sub1), &subarray1); // Create DenseTiler - DenseTiler tiler1(&buffers, &subarray1, &test::g_helper_stats); + DenseTiler tiler1( + tracker_, &buffers, &subarray1, &test::g_helper_stats); // Test get tile 0 WriterTileTuple tile1_0( @@ -580,7 +600,8 @@ TEST_CASE_METHOD( false, false, sizeof(int32_t), - Datatype::INT32); + Datatype::INT32, + tracker_); CHECK(!tiler1.get_tile(0, "foo", tile1_0).ok()); CHECK(!tiler1.get_tile(10, "a", tile1_0).ok()); CHECK(tiler1.get_tile(0, "a", tile1_0).ok()); @@ -594,7 +615,8 @@ TEST_CASE_METHOD( false, false, sizeof(int32_t), - Datatype::INT32); + Datatype::INT32, + tracker_); CHECK(tiler1.get_tile(1, "a", tile1_1).ok()); std::vector c_data1_1 = { 4, fill_value, fill_value, fill_value, fill_value}; @@ -641,7 +663,8 @@ TEST_CASE_METHOD( add_ranges({sub1_0, sub1_1}, sizeof(sub1_0), &subarray1); // Create DenseTiler - DenseTiler tiler1(&buffers, &subarray1, &test::g_helper_stats); + DenseTiler tiler1( + tracker_, &buffers, &subarray1, &test::g_helper_stats); // Test correctness of initialization CHECK(tiler1.tile_num() == 4); @@ -663,7 +686,8 @@ TEST_CASE_METHOD( add_ranges({sub2_0, sub2_1}, sizeof(sub2_0), &subarray2); // Create DenseTiler - DenseTiler tiler2(&buffers, &subarray2, &test::g_helper_stats); + DenseTiler tiler2( + tracker_, &buffers, &subarray2, &test::g_helper_stats); // Test correctness of initialization CHECK(tiler2.tile_num() == 1); @@ -685,7 +709,8 @@ TEST_CASE_METHOD( add_ranges({sub3_0, sub3_1}, sizeof(sub3_0), &subarray3); // Create DenseTiler - DenseTiler tiler3(&buffers, &subarray3, &test::g_helper_stats); + DenseTiler tiler3( + tracker_, &buffers, &subarray3, &test::g_helper_stats); // Test correctness of initialization CHECK(tiler3.tile_num() == 4); @@ -707,7 +732,8 @@ TEST_CASE_METHOD( add_ranges({sub4_0, sub4_1}, sizeof(sub4_0), &subarray4); // Create DenseTiler - DenseTiler tiler4(&buffers, &subarray4, &test::g_helper_stats); + DenseTiler tiler4( + tracker_, &buffers, &subarray4, &test::g_helper_stats); // Test correctness of initialization CHECK(tiler4.tile_num() == 1); @@ -757,7 +783,8 @@ TEST_CASE_METHOD( add_ranges({sub1_0, sub1_1}, sizeof(sub1_0), &subarray1); // Create DenseTiler - DenseTiler tiler1(&buffers, &subarray1, &test::g_helper_stats); + DenseTiler tiler1( + tracker_, &buffers, &subarray1, &test::g_helper_stats); // Test correctness of initialization CHECK(tiler1.tile_num() == 4); @@ -779,7 +806,8 @@ TEST_CASE_METHOD( add_ranges({sub2_0, sub2_1}, sizeof(sub2_0), &subarray2); // Create DenseTiler - DenseTiler tiler2(&buffers, &subarray2, &test::g_helper_stats); + DenseTiler tiler2( + tracker_, &buffers, &subarray2, &test::g_helper_stats); // Test correctness of initialization CHECK(tiler2.tile_num() == 1); @@ -801,7 +829,8 @@ TEST_CASE_METHOD( add_ranges({sub3_0, sub3_1}, sizeof(sub3_0), &subarray3); // Create DenseTiler - DenseTiler tiler3(&buffers, &subarray3, &test::g_helper_stats); + DenseTiler tiler3( + tracker_, &buffers, &subarray3, &test::g_helper_stats); // Test correctness of initialization CHECK(tiler3.tile_num() == 4); @@ -823,7 +852,8 @@ TEST_CASE_METHOD( add_ranges({sub4_0, sub4_1}, sizeof(sub4_0), &subarray4); // Create DenseTiler - DenseTiler tiler4(&buffers, &subarray4, &test::g_helper_stats); + DenseTiler tiler4( + tracker_, &buffers, &subarray4, &test::g_helper_stats); // Test correctness of initialization CHECK(tiler4.tile_num() == 1); @@ -873,7 +903,8 @@ TEST_CASE_METHOD( add_ranges({sub1_0, sub1_1}, sizeof(sub1_0), &subarray1); // Create DenseTiler - DenseTiler tiler1(&buffers, &subarray1, &test::g_helper_stats); + DenseTiler tiler1( + tracker_, &buffers, &subarray1, &test::g_helper_stats); // Test correctness of copy plan for tile 0 auto copy_plan1_0 = tiler1.copy_plan(0); @@ -932,7 +963,8 @@ TEST_CASE_METHOD( add_ranges({sub2_0, sub2_1}, sizeof(sub2_0), &subarray2); // Create DenseTiler - DenseTiler tiler2(&buffers, &subarray2, &test::g_helper_stats); + DenseTiler tiler2( + tracker_, &buffers, &subarray2, &test::g_helper_stats); // Test correctness of copy plan for tile 0 auto copy_plan2_0 = tiler2.copy_plan(0); @@ -958,7 +990,8 @@ TEST_CASE_METHOD( add_ranges({sub3_0, sub3_1}, sizeof(sub3_0), &subarray3); // Create DenseTiler - DenseTiler tiler3(&buffers, &subarray3, &test::g_helper_stats); + DenseTiler tiler3( + tracker_, &buffers, &subarray3, &test::g_helper_stats); // Test correctness of copy plan for tile 0 auto copy_plan3_0 = tiler3.copy_plan(0); @@ -1021,7 +1054,8 @@ TEST_CASE_METHOD( add_ranges({sub4_0, sub4_1}, sizeof(sub4_0), &subarray4); // Create DenseTiler - DenseTiler tiler4(&buffers, &subarray4, &test::g_helper_stats); + DenseTiler tiler4( + tracker_, &buffers, &subarray4, &test::g_helper_stats); // Test correctness of copy plan for tile 0 auto copy_plan4_0 = tiler4.copy_plan(0); @@ -1076,7 +1110,8 @@ TEST_CASE_METHOD( add_ranges({sub1_0, sub1_1}, sizeof(sub1_0), &subarray1); // Create DenseTiler - DenseTiler tiler1(&buffers, &subarray1, &test::g_helper_stats); + DenseTiler tiler1( + tracker_, &buffers, &subarray1, &test::g_helper_stats); // Test correctness of copy plan for tile 0 auto copy_plan1_0 = tiler1.copy_plan(0); @@ -1139,7 +1174,8 @@ TEST_CASE_METHOD( add_ranges({sub2_0, sub2_1}, sizeof(sub2_0), &subarray2); // Create DenseTiler - DenseTiler tiler2(&buffers, &subarray2, &test::g_helper_stats); + DenseTiler tiler2( + tracker_, &buffers, &subarray2, &test::g_helper_stats); // Test correctness of copy plan for tile 0 auto copy_plan2_0 = tiler2.copy_plan(0); @@ -1166,7 +1202,8 @@ TEST_CASE_METHOD( add_ranges({sub3_0, sub3_1}, sizeof(sub3_0), &subarray3); // Create DenseTiler - DenseTiler tiler3(&buffers, &subarray3, &test::g_helper_stats); + DenseTiler tiler3( + tracker_, &buffers, &subarray3, &test::g_helper_stats); // Test correctness of copy plan for tile 0 auto copy_plan3_0 = tiler3.copy_plan(0); @@ -1225,7 +1262,8 @@ TEST_CASE_METHOD( add_ranges({sub4_0, sub4_1}, sizeof(sub4_0), &subarray4); // Create DenseTiler - DenseTiler tiler4(&buffers, &subarray4, &test::g_helper_stats); + DenseTiler tiler4( + tracker_, &buffers, &subarray4, &test::g_helper_stats); // Test correctness of copy plan for tile 0 auto copy_plan4_0 = tiler4.copy_plan(0); @@ -1279,7 +1317,8 @@ TEST_CASE_METHOD( add_ranges({sub1_0, sub1_1}, sizeof(sub1_0), &subarray1); // Create DenseTiler - DenseTiler tiler1(&buffers, &subarray1, &test::g_helper_stats); + DenseTiler tiler1( + tracker_, &buffers, &subarray1, &test::g_helper_stats); // Test correctness of copy plan for tile 0 auto copy_plan1_0 = tiler1.copy_plan(0); @@ -1344,7 +1383,8 @@ TEST_CASE_METHOD( add_ranges({sub1_0, sub1_1}, sizeof(sub1_0), &subarray1); // Create DenseTiler - DenseTiler tiler1(&buffers, &subarray1, &test::g_helper_stats); + DenseTiler tiler1( + tracker_, &buffers, &subarray1, &test::g_helper_stats); // Test correctness of copy plan for tile 0 auto copy_plan1_0 = tiler1.copy_plan(0); @@ -1410,7 +1450,8 @@ TEST_CASE_METHOD( add_ranges({sub1_0, sub1_1}, sizeof(sub1_0), &subarray1); // Create DenseTiler - DenseTiler tiler1(&buffers, &subarray1, &test::g_helper_stats); + DenseTiler tiler1( + tracker_, &buffers, &subarray1, &test::g_helper_stats); // Test get tile 0 WriterTileTuple tile1_0( @@ -1419,7 +1460,8 @@ TEST_CASE_METHOD( false, false, sizeof(int32_t), - Datatype::INT32); + Datatype::INT32, + tracker_); CHECK(tiler1.get_tile(0, "a", tile1_0).ok()); std::vector c_data1_0(50); for (int i = 0; i <= 36; ++i) @@ -1439,7 +1481,8 @@ TEST_CASE_METHOD( false, false, sizeof(int32_t), - Datatype::INT32); + Datatype::INT32, + tracker_); CHECK(tiler1.get_tile(1, "a", tile1_1).ok()); std::vector c_data1_1(50); for (int i = 0; i <= 29; ++i) @@ -1461,7 +1504,8 @@ TEST_CASE_METHOD( false, false, sizeof(int32_t), - Datatype::INT32); + Datatype::INT32, + tracker_); CHECK(tiler1.get_tile(2, "a", tile1_2).ok()); std::vector c_data1_2(50); for (int i = 0; i <= 6; ++i) @@ -1479,7 +1523,8 @@ TEST_CASE_METHOD( false, false, sizeof(int32_t), - Datatype::INT32); + Datatype::INT32, + tracker_); CHECK(tiler1.get_tile(3, "a", tile1_3).ok()); std::vector c_data1_3(50); for (int i = 0; i <= 1; ++i) @@ -1504,7 +1549,8 @@ TEST_CASE_METHOD( buff_a = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18}; buff_a_size = sizeof(buff_a); buffers["a"] = QueryBuffer(&buff_a[0], nullptr, &buff_a_size, nullptr); - DenseTiler tiler2(&buffers, &subarray2, &test::g_helper_stats); + DenseTiler tiler2( + tracker_, &buffers, &subarray2, &test::g_helper_stats); // Test get tile 0 WriterTileTuple tile2_0( @@ -1513,7 +1559,8 @@ TEST_CASE_METHOD( false, false, sizeof(int32_t), - Datatype::INT32); + Datatype::INT32, + tracker_); CHECK(tiler2.get_tile(0, "a", tile2_0).ok()); std::vector c_data2_0(50); for (int i = 0; i <= 21; ++i) @@ -1548,7 +1595,8 @@ TEST_CASE_METHOD( buff_a = {1, 6, 11, 2, 7, 12, 3, 8, 13, 4, 9, 14, 5, 10, 15}; buff_a_size = sizeof(buff_a); buffers["a"] = QueryBuffer(&buff_a[0], nullptr, &buff_a_size, nullptr); - DenseTiler tiler3(&buffers, &subarray3, &test::g_helper_stats); + DenseTiler tiler3( + tracker_, &buffers, &subarray3, &test::g_helper_stats); // Test get tile 0 WriterTileTuple tile3_0( @@ -1557,7 +1605,8 @@ TEST_CASE_METHOD( false, false, sizeof(int32_t), - Datatype::INT32); + Datatype::INT32, + tracker_); CHECK(tiler3.get_tile(0, "a", tile3_0).ok()); std::vector c_data3_0(50); for (int i = 0; i <= 36; ++i) @@ -1577,7 +1626,8 @@ TEST_CASE_METHOD( false, false, sizeof(int32_t), - Datatype::INT32); + Datatype::INT32, + tracker_); CHECK(tiler3.get_tile(1, "a", tile3_1).ok()); std::vector c_data3_1(50); for (int i = 0; i <= 29; ++i) @@ -1599,7 +1649,8 @@ TEST_CASE_METHOD( false, false, sizeof(int32_t), - Datatype::INT32); + Datatype::INT32, + tracker_); CHECK(tiler3.get_tile(2, "a", tile3_2).ok()); std::vector c_data3_2(50); for (int i = 0; i <= 6; ++i) @@ -1617,7 +1668,8 @@ TEST_CASE_METHOD( false, false, sizeof(int32_t), - Datatype::INT32); + Datatype::INT32, + tracker_); CHECK(tiler3.get_tile(3, "a", tile3_3).ok()); std::vector c_data3_3(50); for (int i = 0; i <= 1; ++i) @@ -1642,7 +1694,8 @@ TEST_CASE_METHOD( buff_a = {1, 7, 13, 2, 8, 14, 3, 9, 15, 4, 10, 16, 5, 11, 17, 6, 12, 18}; buff_a_size = sizeof(buff_a); buffers["a"] = QueryBuffer(&buff_a[0], nullptr, &buff_a_size, nullptr); - DenseTiler tiler4(&buffers, &subarray4, &test::g_helper_stats); + DenseTiler tiler4( + tracker_, &buffers, &subarray4, &test::g_helper_stats); // Test get tile 0 WriterTileTuple tile4_0( @@ -1651,7 +1704,8 @@ TEST_CASE_METHOD( false, false, sizeof(int32_t), - Datatype::INT32); + Datatype::INT32, + tracker_); CHECK(tiler4.get_tile(0, "a", tile4_0).ok()); std::vector c_data4_0(50); for (int i = 0; i <= 21; ++i) @@ -1712,7 +1766,8 @@ TEST_CASE_METHOD( add_ranges({sub1_0, sub1_1}, sizeof(sub1_0), &subarray1); // Create DenseTiler - DenseTiler tiler1(&buffers, &subarray1, &test::g_helper_stats); + DenseTiler tiler1( + tracker_, &buffers, &subarray1, &test::g_helper_stats); // Test get tile 0 WriterTileTuple tile1_0( @@ -1721,7 +1776,8 @@ TEST_CASE_METHOD( false, false, sizeof(int32_t), - Datatype::INT32); + Datatype::INT32, + tracker_); CHECK(tiler1.get_tile(0, "a", tile1_0).ok()); std::vector c_data1_0(50); for (int i = 0; i <= 37; ++i) @@ -1745,7 +1801,8 @@ TEST_CASE_METHOD( false, false, sizeof(int32_t), - Datatype::INT32); + Datatype::INT32, + tracker_); CHECK(tiler1.get_tile(1, "a", tile1_1).ok()); std::vector c_data1_1(50); for (int i = 0; i <= 34; ++i) @@ -1768,7 +1825,8 @@ TEST_CASE_METHOD( false, false, sizeof(int32_t), - Datatype::INT32); + Datatype::INT32, + tracker_); CHECK(tiler1.get_tile(2, "a", tile1_2).ok()); std::vector c_data1_2(50); for (int i = 0; i <= 2; ++i) @@ -1790,7 +1848,8 @@ TEST_CASE_METHOD( false, false, sizeof(int32_t), - Datatype::INT32); + Datatype::INT32, + tracker_); CHECK(tiler1.get_tile(3, "a", tile1_3).ok()); std::vector c_data1_3(50); c_data1_3[0] = 14; @@ -1817,7 +1876,8 @@ TEST_CASE_METHOD( buff_a = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18}; buff_a_size = sizeof(buff_a); buffers["a"] = QueryBuffer(&buff_a[0], nullptr, &buff_a_size, nullptr); - DenseTiler tiler2(&buffers, &subarray2, &test::g_helper_stats); + DenseTiler tiler2( + tracker_, &buffers, &subarray2, &test::g_helper_stats); // Test get tile 0 WriterTileTuple tile2_0( @@ -1826,7 +1886,8 @@ TEST_CASE_METHOD( false, false, sizeof(int32_t), - Datatype::INT32); + Datatype::INT32, + tracker_); CHECK(tiler2.get_tile(0, "a", tile2_0).ok()); std::vector c_data2_0(50); for (int i = 0; i <= 11; ++i) @@ -1879,7 +1940,8 @@ TEST_CASE_METHOD( buff_a = {1, 6, 11, 2, 7, 12, 3, 8, 13, 4, 9, 14, 5, 10, 15}; buff_a_size = sizeof(buff_a); buffers["a"] = QueryBuffer(&buff_a[0], nullptr, &buff_a_size, nullptr); - DenseTiler tiler3(&buffers, &subarray3, &test::g_helper_stats); + DenseTiler tiler3( + tracker_, &buffers, &subarray3, &test::g_helper_stats); // Test get tile 0 WriterTileTuple tile3_0( @@ -1888,7 +1950,8 @@ TEST_CASE_METHOD( false, false, sizeof(int32_t), - Datatype::INT32); + Datatype::INT32, + tracker_); CHECK(tiler3.get_tile(0, "a", tile3_0).ok()); std::vector c_data3_0(50); for (int i = 0; i <= 37; ++i) @@ -1912,7 +1975,8 @@ TEST_CASE_METHOD( false, false, sizeof(int32_t), - Datatype::INT32); + Datatype::INT32, + tracker_); CHECK(tiler3.get_tile(1, "a", tile3_1).ok()); std::vector c_data3_1(50); for (int i = 0; i <= 34; ++i) @@ -1935,7 +1999,8 @@ TEST_CASE_METHOD( false, false, sizeof(int32_t), - Datatype::INT32); + Datatype::INT32, + tracker_); CHECK(tiler3.get_tile(2, "a", tile3_2).ok()); std::vector c_data3_2(50); for (int i = 0; i <= 2; ++i) @@ -1957,7 +2022,8 @@ TEST_CASE_METHOD( false, false, sizeof(int32_t), - Datatype::INT32); + Datatype::INT32, + tracker_); CHECK(tiler3.get_tile(3, "a", tile3_3).ok()); std::vector c_data3_3(50); c_data3_3[0] = 14; @@ -1984,7 +2050,8 @@ TEST_CASE_METHOD( buff_a = {1, 7, 13, 2, 8, 14, 3, 9, 15, 4, 10, 16, 5, 11, 17, 6, 12, 18}; buff_a_size = sizeof(buff_a); buffers["a"] = QueryBuffer(&buff_a[0], nullptr, &buff_a_size, nullptr); - DenseTiler tiler4(&buffers, &subarray4, &test::g_helper_stats); + DenseTiler tiler4( + tracker_, &buffers, &subarray4, &test::g_helper_stats); // Test get tile 0 WriterTileTuple tile4_0( @@ -1993,7 +2060,8 @@ TEST_CASE_METHOD( false, false, sizeof(int32_t), - Datatype::INT32); + Datatype::INT32, + tracker_); CHECK(tiler4.get_tile(0, "a", tile4_0).ok()); std::vector c_data4_0(50); for (int i = 0; i <= 11; ++i) @@ -2075,7 +2143,8 @@ TEST_CASE_METHOD( add_ranges({sub1_0, sub1_1}, sizeof(sub1_0), &subarray1); // Create DenseTiler - DenseTiler tiler1(&buffers, &subarray1, &test::g_helper_stats); + DenseTiler tiler1( + tracker_, &buffers, &subarray1, &test::g_helper_stats); // Test get tile 0 WriterTileTuple tile1_0( @@ -2084,7 +2153,8 @@ TEST_CASE_METHOD( false, false, sizeof(int32_t), - Datatype::INT32); + Datatype::INT32, + tracker_); CHECK(tiler1.get_tile(0, "a", tile1_0).ok()); std::vector c_data1_0(50); for (int i = 0; i <= 29; ++i) @@ -2100,7 +2170,8 @@ TEST_CASE_METHOD( false, false, sizeof(int32_t), - Datatype::INT32); + Datatype::INT32, + tracker_); CHECK(tiler1.get_tile(1, "a", tile1_1).ok()); std::vector c_data1_1(50); for (int i = 0; i <= 39; ++i) @@ -2152,7 +2223,8 @@ TEST_CASE_METHOD( add_ranges({sub1_0, sub1_1}, sizeof(sub1_0), &subarray1); // Create DenseTiler - DenseTiler tiler1(&buffers, &subarray1, &test::g_helper_stats); + DenseTiler tiler1( + tracker_, &buffers, &subarray1, &test::g_helper_stats); // Test get tile 0 WriterTileTuple tile1_0( @@ -2161,7 +2233,8 @@ TEST_CASE_METHOD( false, false, sizeof(int32_t), - Datatype::INT32); + Datatype::INT32, + tracker_); CHECK(tiler1.get_tile(0, "a", tile1_0).ok()); std::vector c_data1_0(50); for (int i = 0; i <= 34; ++i) @@ -2177,7 +2250,8 @@ TEST_CASE_METHOD( false, false, sizeof(int32_t), - Datatype::INT32); + Datatype::INT32, + tracker_); CHECK(tiler1.get_tile(1, "a", tile1_1).ok()); std::vector c_data1_1(50); for (int i = 0; i <= 9; ++i) @@ -2223,7 +2297,8 @@ TEST_CASE_METHOD( add_ranges({sub1}, sizeof(sub1), &subarray1); // Create DenseTiler - DenseTiler tiler1(&buffers, &subarray1, &test::g_helper_stats); + DenseTiler tiler1( + tracker_, &buffers, &subarray1, &test::g_helper_stats); // Test get tile 0 WriterTileTuple tile1_0( @@ -2232,7 +2307,8 @@ TEST_CASE_METHOD( false, false, 2 * sizeof(int32_t), - Datatype::INT32); + Datatype::INT32, + tracker_); CHECK(tiler1.get_tile(0, "a", tile1_0).ok()); std::vector c_data1_0 = { fill_value, fill_value, fill_value, fill_value, 1, 11, 2, 22, 3, 33}; @@ -2245,7 +2321,8 @@ TEST_CASE_METHOD( false, false, 2 * sizeof(int32_t), - Datatype::INT32); + Datatype::INT32, + tracker_); CHECK(tiler1.get_tile(1, "a", tile1_1).ok()); std::vector c_data1_1 = { 4, @@ -2272,7 +2349,8 @@ TEST_CASE_METHOD( add_ranges({sub2}, sizeof(sub2), &subarray2); // Create DenseTiler - DenseTiler tiler2(&buffers, &subarray2, &test::g_helper_stats); + DenseTiler tiler2( + tracker_, &buffers, &subarray2, &test::g_helper_stats); // Test get tile 0 WriterTileTuple tile2( @@ -2281,7 +2359,8 @@ TEST_CASE_METHOD( false, false, 2 * sizeof(int32_t), - Datatype::INT32); + Datatype::INT32, + tracker_); CHECK(tiler2.get_tile(0, "a", tile2).ok()); std::vector c_data2 = { fill_value, fill_value, 1, 11, 2, 22, 3, 33, 4, 44}; @@ -2299,7 +2378,8 @@ TEST_CASE_METHOD( add_ranges({sub3}, sizeof(sub3), &subarray3); // Create DenseTiler - DenseTiler tiler3(&buffers, &subarray3, &test::g_helper_stats); + DenseTiler tiler3( + tracker_, &buffers, &subarray3, &test::g_helper_stats); // Test get tile 0 WriterTileTuple tile3( @@ -2308,7 +2388,8 @@ TEST_CASE_METHOD( false, false, 2 * sizeof(int32_t), - Datatype::INT32); + Datatype::INT32, + tracker_); CHECK(tiler3.get_tile(0, "a", tile3).ok()); std::vector c_data3 = { fill_value, fill_value, 1, 11, 2, 22, 3, 33, 4, 44}; @@ -2354,7 +2435,8 @@ TEST_CASE_METHOD( add_ranges({sub1}, sizeof(sub1), &subarray1); // Create DenseTiler - DenseTiler tiler1(&buffers, &subarray1, &test::g_helper_stats); + DenseTiler tiler1( + tracker_, &buffers, &subarray1, &test::g_helper_stats); // Test get tile 0 WriterTileTuple tile1_0_a1( @@ -2363,7 +2445,8 @@ TEST_CASE_METHOD( false, false, sizeof(int32_t), - Datatype::INT32); + Datatype::INT32, + tracker_); CHECK(tiler1.get_tile(0, "a1", tile1_0_a1).ok()); std::vector c_data1_0_a1 = {fill_value, fill_value, 1, 2, 3}; CHECK(check_tile(tile1_0_a1.fixed_tile(), c_data1_0_a1)); @@ -2373,7 +2456,8 @@ TEST_CASE_METHOD( false, false, sizeof(double), - Datatype::FLOAT64); + Datatype::FLOAT64, + tracker_); CHECK(tiler1.get_tile(0, "a2", tile1_0_a2).ok()); std::vector c_data1_0_a2 = { double(fill_value), double(fill_value), 1.1, 2.2, 3.3}; @@ -2386,7 +2470,8 @@ TEST_CASE_METHOD( false, false, sizeof(int32_t), - Datatype::INT32); + Datatype::INT32, + tracker_); CHECK(tiler1.get_tile(1, "a1", tile1_1_a1).ok()); std::vector c_data1_1_a1 = { 4, fill_value, fill_value, fill_value, fill_value}; @@ -2397,7 +2482,8 @@ TEST_CASE_METHOD( false, false, sizeof(double), - Datatype::FLOAT64); + Datatype::FLOAT64, + tracker_); CHECK(tiler1.get_tile(1, "a2", tile1_1_a2).ok()); std::vector c_data1_1_a2 = { 4.4, @@ -2419,7 +2505,8 @@ TEST_CASE_METHOD( add_ranges({sub2}, sizeof(sub2), &subarray2); // Create DenseTiler - DenseTiler tiler2(&buffers, &subarray2, &test::g_helper_stats); + DenseTiler tiler2( + tracker_, &buffers, &subarray2, &test::g_helper_stats); // Test get tile 0 WriterTileTuple tile2_a1( @@ -2428,7 +2515,8 @@ TEST_CASE_METHOD( false, false, sizeof(int32_t), - Datatype::INT32); + Datatype::INT32, + tracker_); CHECK(tiler2.get_tile(0, "a1", tile2_a1).ok()); std::vector c_data2_a1 = {fill_value, 1, 2, 3, 4}; CHECK(check_tile(tile2_a1.fixed_tile(), c_data2_a1)); @@ -2438,7 +2526,8 @@ TEST_CASE_METHOD( false, false, sizeof(double), - Datatype::FLOAT64); + Datatype::FLOAT64, + tracker_); CHECK(tiler2.get_tile(0, "a2", tile2_a2).ok()); std::vector c_data2_a2 = {double(fill_value), 1.1, 2.2, 3.3, 4.4}; CHECK(check_tile(tile2_a2.fixed_tile(), c_data2_a2)); @@ -2455,7 +2544,8 @@ TEST_CASE_METHOD( add_ranges({sub3}, sizeof(sub3), &subarray3); // Create DenseTiler - DenseTiler tiler3(&buffers, &subarray3, &test::g_helper_stats); + DenseTiler tiler3( + tracker_, &buffers, &subarray3, &test::g_helper_stats); // Test get tile 0 WriterTileTuple tile3_a1( @@ -2464,7 +2554,8 @@ TEST_CASE_METHOD( false, false, sizeof(int32_t), - Datatype::INT32); + Datatype::INT32, + tracker_); CHECK(tiler3.get_tile(0, "a1", tile3_a1).ok()); std::vector c_data3_a1 = {fill_value, 1, 2, 3, 4}; CHECK(check_tile(tile3_a1.fixed_tile(), c_data3_a1)); @@ -2474,7 +2565,8 @@ TEST_CASE_METHOD( false, false, sizeof(double), - Datatype::FLOAT64); + Datatype::FLOAT64, + tracker_); CHECK(tiler3.get_tile(0, "a2", tile3_a2).ok()); std::vector c_data3_a2 = {double(fill_value), 1.1, 2.2, 3.3, 4.4}; CHECK(check_tile(tile3_a2.fixed_tile(), c_data3_a2)); @@ -2528,7 +2620,8 @@ TEST_CASE_METHOD( add_ranges({sub1_0, sub1_1}, sizeof(sub1_0), &subarray1); // Create DenseTiler - DenseTiler tiler1(&buffers, &subarray1, &test::g_helper_stats); + DenseTiler tiler1( + tracker_, &buffers, &subarray1, &test::g_helper_stats); // Test get tile 0 WriterTileTuple tile1_0( @@ -2537,7 +2630,8 @@ TEST_CASE_METHOD( false, true, sizeof(int32_t), - Datatype::INT32); + Datatype::INT32, + tracker_); CHECK(tiler1.get_tile(0, "a", tile1_0).ok()); std::vector c_data1_0(50); for (int i = 0; i <= 36; ++i) @@ -2559,7 +2653,8 @@ TEST_CASE_METHOD( false, true, sizeof(int32_t), - Datatype::INT32); + Datatype::INT32, + tracker_); CHECK(tiler1.get_tile(1, "a", tile1_1).ok()); std::vector c_data1_1(50); for (int i = 0; i <= 29; ++i) @@ -2581,7 +2676,8 @@ TEST_CASE_METHOD( false, true, sizeof(int32_t), - Datatype::INT32); + Datatype::INT32, + tracker_); CHECK(tiler1.get_tile(2, "a", tile1_2).ok()); std::vector c_data1_2(50); for (int i = 0; i <= 6; ++i) @@ -2600,7 +2696,8 @@ TEST_CASE_METHOD( false, true, sizeof(int32_t), - Datatype::INT32); + Datatype::INT32, + tracker_); CHECK(tiler1.get_tile(3, "a", tile1_3).ok()); std::vector c_data1_3(50); c_data1_3[0] = 0; @@ -2632,7 +2729,8 @@ TEST_CASE_METHOD( &buff_a_size, nullptr, ValidityVector(&buff_a_n[0], &buff_a_n_size)); - DenseTiler tiler2(&buffers, &subarray2, &test::g_helper_stats); + DenseTiler tiler2( + tracker_, &buffers, &subarray2, &test::g_helper_stats); // Test get tile 0 WriterTileTuple tile2_0( @@ -2641,7 +2739,8 @@ TEST_CASE_METHOD( false, true, sizeof(int32_t), - Datatype::INT32); + Datatype::INT32, + tracker_); CHECK(tiler2.get_tile(0, "a", tile2_0).ok()); std::vector c_data2_0(50); for (int i = 0; i <= 21; ++i) @@ -2695,7 +2794,8 @@ TEST_CASE_METHOD( &buff_a_size, nullptr, ValidityVector(&buff_a_n[0], &buff_a_n_size)); - DenseTiler tiler3(&buffers, &subarray3, &test::g_helper_stats); + DenseTiler tiler3( + tracker_, &buffers, &subarray3, &test::g_helper_stats); // Test get tile 0 WriterTileTuple tile3_0( @@ -2704,7 +2804,8 @@ TEST_CASE_METHOD( false, true, sizeof(int32_t), - Datatype::INT32); + Datatype::INT32, + tracker_); CHECK(tiler3.get_tile(0, "a", tile3_0).ok()); std::vector c_data3_0(50); for (int i = 0; i <= 36; ++i) @@ -2726,7 +2827,8 @@ TEST_CASE_METHOD( false, true, sizeof(int32_t), - Datatype::INT32); + Datatype::INT32, + tracker_); CHECK(tiler3.get_tile(1, "a", tile3_1).ok()); std::vector c_data3_1(50); for (int i = 0; i <= 29; ++i) @@ -2748,7 +2850,8 @@ TEST_CASE_METHOD( false, true, sizeof(int32_t), - Datatype::INT32); + Datatype::INT32, + tracker_); CHECK(tiler3.get_tile(2, "a", tile3_2).ok()); std::vector c_data3_2(50); for (int i = 0; i <= 6; ++i) @@ -2767,7 +2870,8 @@ TEST_CASE_METHOD( false, true, sizeof(int32_t), - Datatype::INT32); + Datatype::INT32, + tracker_); CHECK(tiler3.get_tile(3, "a", tile3_3).ok()); std::vector c_data3_3(50); c_data3_3[0] = 1; @@ -2799,7 +2903,8 @@ TEST_CASE_METHOD( &buff_a_size, nullptr, ValidityVector(&buff_a_n[0], &buff_a_n_size)); - DenseTiler tiler4(&buffers, &subarray4, &test::g_helper_stats); + DenseTiler tiler4( + tracker_, &buffers, &subarray4, &test::g_helper_stats); // Test get tile 0 WriterTileTuple tile4_0( @@ -2808,7 +2913,8 @@ TEST_CASE_METHOD( false, true, sizeof(int32_t), - Datatype::INT32); + Datatype::INT32, + tracker_); CHECK(tiler4.get_tile(0, "a", tile4_0).ok()); std::vector c_data4_0(50); for (int i = 0; i <= 21; ++i) @@ -2887,7 +2993,8 @@ TEST_CASE_METHOD( add_ranges({sub1_0, sub1_1}, sizeof(sub1_0), &subarray1); // Create DenseTiler - DenseTiler tiler1(&buffers, &subarray1, &test::g_helper_stats); + DenseTiler tiler1( + tracker_, &buffers, &subarray1, &test::g_helper_stats); // Test get tile 0 WriterTileTuple tile1_0( @@ -2896,7 +3003,8 @@ TEST_CASE_METHOD( true, false, 1, - Datatype::STRING_ASCII); + Datatype::STRING_ASCII, + tracker_); CHECK(tiler1.get_tile(0, "a", tile1_0).ok()); std::vector c_data1_0_off(50); for (int i = 0; i <= 37; ++i) @@ -2936,7 +3044,8 @@ TEST_CASE_METHOD( true, false, 1, - Datatype::STRING_ASCII); + Datatype::STRING_ASCII, + tracker_); CHECK(tiler1.get_tile(1, "a", tile1_1).ok()); std::vector c_data1_1_off(50); for (int i = 0; i <= 30; ++i) @@ -2984,7 +3093,8 @@ TEST_CASE_METHOD( true, false, 1, - Datatype::STRING_ASCII); + Datatype::STRING_ASCII, + tracker_); CHECK(tiler1.get_tile(2, "a", tile1_2).ok()); std::vector c_data1_2_off(50); for (int i = 0; i <= 7; ++i) @@ -3015,7 +3125,8 @@ TEST_CASE_METHOD( true, false, 1, - Datatype::STRING_ASCII); + Datatype::STRING_ASCII, + tracker_); CHECK(tiler1.get_tile(3, "a", tile1_3).ok()); std::vector c_data1_3_off(50); c_data1_3_off[0] = 0; @@ -3059,7 +3170,8 @@ TEST_CASE_METHOD( buff_a_val_size = buff_a_val.size(); buffers["a"] = QueryBuffer( &buff_a_off[0], &buff_a_val[0], &buff_a_off_size, &buff_a_val_size); - DenseTiler tiler2(&buffers, &subarray2, &test::g_helper_stats); + DenseTiler tiler2( + tracker_, &buffers, &subarray2, &test::g_helper_stats); // Test get tile 0 WriterTileTuple tile2_0( @@ -3068,7 +3180,8 @@ TEST_CASE_METHOD( true, false, 1, - Datatype::STRING_ASCII); + Datatype::STRING_ASCII, + tracker_); CHECK(tiler2.get_tile(0, "a", tile2_0).ok()); std::vector c_data2_0_off(50); for (int i = 0; i <= 22; ++i) @@ -3222,7 +3335,8 @@ TEST_CASE_METHOD( add_ranges({sub1_0, sub1_1}, sizeof(sub1_0), &subarray1); // Create DenseTiler - DenseTiler tiler1(&buffers, &subarray1, &test::g_helper_stats); + DenseTiler tiler1( + tracker_, &buffers, &subarray1, &test::g_helper_stats); // Test get tile 0 WriterTileTuple tile1_0( @@ -3231,7 +3345,8 @@ TEST_CASE_METHOD( true, false, 1, - Datatype::INT32); + Datatype::INT32, + tracker_); CHECK(tiler1.get_tile(0, "a", tile1_0).ok()); std::vector c_data1_0_off(50); for (int i = 0; i <= 37; ++i) @@ -3271,7 +3386,8 @@ TEST_CASE_METHOD( true, false, 1, - Datatype::INT32); + Datatype::INT32, + tracker_); CHECK(tiler1.get_tile(1, "a", tile1_1).ok()); std::vector c_data1_1_off(50); for (int i = 0; i <= 30; ++i) @@ -3319,7 +3435,8 @@ TEST_CASE_METHOD( true, false, 1, - Datatype::INT32); + Datatype::INT32, + tracker_); CHECK(tiler1.get_tile(2, "a", tile1_2).ok()); std::vector c_data1_2_off(50); for (int i = 0; i <= 7; ++i) @@ -3350,7 +3467,8 @@ TEST_CASE_METHOD( true, false, 1, - Datatype::INT32); + Datatype::INT32, + tracker_); CHECK(tiler1.get_tile(3, "a", tile1_3).ok()); std::vector c_data1_3_off(50); c_data1_3_off[0] = 0; @@ -3414,7 +3532,8 @@ TEST_CASE_METHOD( buff_a_val_size = buff_a_val.size() * sizeof(int32_t); buffers["a"] = QueryBuffer( &buff_a_off[0], &buff_a_val[0], &buff_a_off_size, &buff_a_val_size); - DenseTiler tiler2(&buffers, &subarray2, &test::g_helper_stats); + DenseTiler tiler2( + tracker_, &buffers, &subarray2, &test::g_helper_stats); // Test get tile 0 WriterTileTuple tile2_0( @@ -3423,7 +3542,8 @@ TEST_CASE_METHOD( true, false, 1, - Datatype::STRING_ASCII); + Datatype::STRING_ASCII, + tracker_); CHECK(tiler2.get_tile(0, "a", tile2_0).ok()); std::vector c_data2_0_off(50); for (int i = 0; i <= 22; ++i) @@ -3579,7 +3699,7 @@ TEST_CASE_METHOD( // Create DenseTiler DenseTiler tiler1( - &buffers, &subarray1, &test::g_helper_stats, "bytes", 64, true); + tracker_, &buffers, &subarray1, &test::g_helper_stats, "bytes", 64, true); // Test get tile 0 WriterTileTuple tile1_0( @@ -3588,7 +3708,8 @@ TEST_CASE_METHOD( true, false, 1, - Datatype::INT32); + Datatype::INT32, + tracker_); CHECK(tiler1.get_tile(0, "a", tile1_0).ok()); std::vector c_data1_0_off(50); for (int i = 0; i <= 37; ++i) @@ -3628,7 +3749,8 @@ TEST_CASE_METHOD( true, false, 1, - Datatype::INT32); + Datatype::INT32, + tracker_); CHECK(tiler1.get_tile(1, "a", tile1_1).ok()); std::vector c_data1_1_off(50); for (int i = 0; i <= 30; ++i) @@ -3676,7 +3798,8 @@ TEST_CASE_METHOD( true, false, 1, - Datatype::INT32); + Datatype::INT32, + tracker_); CHECK(tiler1.get_tile(2, "a", tile1_2).ok()); std::vector c_data1_2_off(50); for (int i = 0; i <= 7; ++i) @@ -3707,7 +3830,8 @@ TEST_CASE_METHOD( true, false, 1, - Datatype::INT32); + Datatype::INT32, + tracker_); CHECK(tiler1.get_tile(3, "a", tile1_3).ok()); std::vector c_data1_3_off(50); c_data1_3_off[0] = 0; @@ -3773,7 +3897,7 @@ TEST_CASE_METHOD( buffers["a"] = QueryBuffer( &buff_a_off[0], &buff_a_val[0], &buff_a_off_size, &buff_a_val_size); DenseTiler tiler2( - &buffers, &subarray2, &test::g_helper_stats, "bytes", 64, true); + tracker_, &buffers, &subarray2, &test::g_helper_stats, "bytes", 64, true); // Test get tile 0 WriterTileTuple tile2_0( @@ -3782,7 +3906,8 @@ TEST_CASE_METHOD( true, false, 1, - Datatype::INT32); + Datatype::INT32, + tracker_); CHECK(tiler2.get_tile(0, "a", tile2_0).ok()); std::vector c_data2_0_off(50); for (int i = 0; i <= 22; ++i) @@ -3923,7 +4048,13 @@ TEST_CASE_METHOD( // Create DenseTiler DenseTiler tiler1( - &buffers, &subarray1, &test::g_helper_stats, "elements", 64, false); + tracker_, + &buffers, + &subarray1, + &test::g_helper_stats, + "elements", + 64, + false); // Test get tile 0 WriterTileTuple tile1_0( @@ -3932,7 +4063,8 @@ TEST_CASE_METHOD( true, false, 1, - Datatype::INT32); + Datatype::INT32, + tracker_); CHECK(tiler1.get_tile(0, "a", tile1_0).ok()); std::vector c_data1_0_off(50); for (int i = 0; i <= 37; ++i) @@ -3972,7 +4104,8 @@ TEST_CASE_METHOD( true, false, 1, - Datatype::INT32); + Datatype::INT32, + tracker_); CHECK(tiler1.get_tile(1, "a", tile1_1).ok()); std::vector c_data1_1_off(50); for (int i = 0; i <= 30; ++i) @@ -4020,7 +4153,8 @@ TEST_CASE_METHOD( true, false, 1, - Datatype::INT32); + Datatype::INT32, + tracker_); CHECK(tiler1.get_tile(2, "a", tile1_2).ok()); std::vector c_data1_2_off(50); for (int i = 0; i <= 7; ++i) @@ -4051,7 +4185,8 @@ TEST_CASE_METHOD( true, false, 1, - Datatype::INT32); + Datatype::INT32, + tracker_); CHECK(tiler1.get_tile(3, "a", tile1_3).ok()); std::vector c_data1_3_off(50); c_data1_3_off[0] = 0; @@ -4099,7 +4234,13 @@ TEST_CASE_METHOD( buffers["a"] = QueryBuffer( &buff_a_off[0], &buff_a_val[0], &buff_a_off_size, &buff_a_val_size); DenseTiler tiler2( - &buffers, &subarray2, &test::g_helper_stats, "elements", 64, false); + tracker_, + &buffers, + &subarray2, + &test::g_helper_stats, + "elements", + 64, + false); // Test get tile 0 WriterTileTuple tile2_0( @@ -4108,7 +4249,8 @@ TEST_CASE_METHOD( true, false, 1, - Datatype::INT32); + Datatype::INT32, + tracker_); CHECK(tiler2.get_tile(0, "a", tile2_0).ok()); std::vector c_data2_0_off(50); for (int i = 0; i <= 22; ++i) @@ -4249,7 +4391,13 @@ TEST_CASE_METHOD( // Create DenseTiler DenseTiler tiler1( - &buffers, &subarray1, &test::g_helper_stats, "elements", 32, false); + tracker_, + &buffers, + &subarray1, + &test::g_helper_stats, + "elements", + 32, + false); // Test get tile 0 WriterTileTuple tile1_0( @@ -4258,7 +4406,8 @@ TEST_CASE_METHOD( true, false, 1, - Datatype::INT32); + Datatype::INT32, + tracker_); CHECK(tiler1.get_tile(0, "a", tile1_0).ok()); std::vector c_data1_0_off(50); for (int i = 0; i <= 37; ++i) @@ -4298,7 +4447,8 @@ TEST_CASE_METHOD( true, false, 1, - Datatype::INT32); + Datatype::INT32, + tracker_); CHECK(tiler1.get_tile(1, "a", tile1_1).ok()); std::vector c_data1_1_off(50); for (int i = 0; i <= 30; ++i) @@ -4346,7 +4496,8 @@ TEST_CASE_METHOD( true, false, 1, - Datatype::INT32); + Datatype::INT32, + tracker_); CHECK(tiler1.get_tile(2, "a", tile1_2).ok()); std::vector c_data1_2_off(50); for (int i = 0; i <= 7; ++i) @@ -4377,7 +4528,8 @@ TEST_CASE_METHOD( true, false, 1, - Datatype::INT32); + Datatype::INT32, + tracker_); CHECK(tiler1.get_tile(3, "a", tile1_3).ok()); std::vector c_data1_3_off(50); c_data1_3_off[0] = 0; @@ -4425,7 +4577,13 @@ TEST_CASE_METHOD( buffers["a"] = QueryBuffer( &buff_a_off[0], &buff_a_val[0], &buff_a_off_size, &buff_a_val_size); DenseTiler tiler2( - &buffers, &subarray2, &test::g_helper_stats, "elements", 32, false); + tracker_, + &buffers, + &subarray2, + &test::g_helper_stats, + "elements", + 32, + false); // Test get tile 0 WriterTileTuple tile2_0( @@ -4434,7 +4592,8 @@ TEST_CASE_METHOD( true, false, 1, - Datatype::INT32); + Datatype::INT32, + tracker_); CHECK(tiler2.get_tile(0, "a", tile2_0).ok()); std::vector c_data2_0_off(50); for (int i = 0; i <= 22; ++i) diff --git a/test/src/unit-ReadCellSlabIter.cc b/test/src/unit-ReadCellSlabIter.cc index ce047421e2d1..b84c0a58bec0 100644 --- a/test/src/unit-ReadCellSlabIter.cc +++ b/test/src/unit-ReadCellSlabIter.cc @@ -33,6 +33,7 @@ #include "test/support/src/helpers.h" #include "test/support/src/vfs_helpers.h" #include "tiledb/common/common.h" +#include "tiledb/common/memory_tracker.h" #include "tiledb/sm/array_schema/tile_domain.h" #include "tiledb/sm/c_api/tiledb_struct_def.h" #include "tiledb/sm/query/legacy/read_cell_slab_iter.h" @@ -63,6 +64,8 @@ struct ReadCellSlabIterFx { const char* ARRAY_NAME = "read_cell_slab_iter"; tiledb_array_t* array_ = nullptr; + shared_ptr tracker_; + ReadCellSlabIterFx(); ~ReadCellSlabIterFx(); @@ -82,7 +85,8 @@ struct ReadCellSlabIterFx { }; ReadCellSlabIterFx::ReadCellSlabIterFx() - : fs_vec_(vfs_test_get_fs_vec()) { + : fs_vec_(vfs_test_get_fs_vec()) + , tracker_(tiledb::test::create_test_memory_tracker()) { // Initialize vfs test REQUIRE(vfs_test_init(fs_vec_, &ctx_, &vfs_).ok()); @@ -162,7 +166,8 @@ void ReadCellSlabIterFx::create_result_space_tiles( tile_coords, array_tile_domain, frag_tile_domains, - result_space_tiles); + result_space_tiles, + tiledb::test::get_test_memory_tracker()); } void set_result_tile_dim( @@ -256,10 +261,10 @@ TEST_CASE_METHOD( shared_ptr fragment = make_shared( HERE(), nullptr, - nullptr, array_->array_->array_schema_latest_ptr(), URI(), std::make_pair(0, 0), + tiledb::test::create_test_memory_tracker(), true); fragments.emplace_back(std::move(fragment)); @@ -330,10 +335,10 @@ TEST_CASE_METHOD( shared_ptr fragment = make_shared( HERE(), nullptr, - nullptr, array_->array_->array_schema_latest_ptr(), URI(), std::make_pair(0, 0), + tiledb::test::create_test_memory_tracker(), true); fragments.emplace_back(std::move(fragment)); @@ -408,10 +413,10 @@ TEST_CASE_METHOD( shared_ptr fragment = make_shared( HERE(), nullptr, - nullptr, array_->array_->array_schema_latest_ptr(), URI(), std::make_pair(0, 0), + tiledb::test::create_test_memory_tracker(), true); fragments.emplace_back(std::move(fragment)); } @@ -491,10 +496,10 @@ TEST_CASE_METHOD( shared_ptr fragment = make_shared( HERE(), nullptr, - nullptr, array_->array_->array_schema_latest_ptr(), URI(), std::make_pair(0, 0), + tiledb::test::create_test_memory_tracker(), true); fragments.emplace_back(std::move(fragment)); } @@ -510,9 +515,12 @@ TEST_CASE_METHOD( // Create result coordinates std::vector result_coords; - ResultTile result_tile_2_0(1, 0, *fragments[0]); - ResultTile result_tile_3_0(2, 0, *fragments[0]); - ResultTile result_tile_3_1(2, 1, *fragments[1]); + ResultTile result_tile_2_0( + 1, 0, *fragments[0], tiledb::test::get_test_memory_tracker()); + ResultTile result_tile_3_0( + 2, 0, *fragments[0], tiledb::test::get_test_memory_tracker()); + ResultTile result_tile_3_1( + 2, 1, *fragments[1], tiledb::test::get_test_memory_tracker()); set_result_tile_dim( array_schema, result_tile_2_0, "d", 0, {{1000, 3, 1000, 5}}); @@ -711,10 +719,10 @@ TEST_CASE_METHOD( shared_ptr fragment = make_shared( HERE(), nullptr, - nullptr, array_->array_->array_schema_latest_ptr(), URI(), std::make_pair(0, 0), + tiledb::test::create_test_memory_tracker(), true); fragments.emplace_back(std::move(fragment)); @@ -897,10 +905,10 @@ TEST_CASE_METHOD( shared_ptr fragment = make_shared( HERE(), nullptr, - nullptr, array_->array_->array_schema_latest_ptr(), URI(), std::make_pair(0, 0), + tiledb::test::create_test_memory_tracker(), true); fragments.emplace_back(std::move(fragment)); @@ -1096,10 +1104,10 @@ TEST_CASE_METHOD( shared_ptr fragment = make_shared( HERE(), nullptr, - nullptr, array_->array_->array_schema_latest_ptr(), URI(), std::make_pair(0, 0), + tiledb::test::create_test_memory_tracker(), true); fragments.emplace_back(std::move(fragment)); @@ -1342,10 +1350,10 @@ TEST_CASE_METHOD( shared_ptr fragment = make_shared( HERE(), nullptr, - nullptr, array_->array_->array_schema_latest_ptr(), URI(), std::make_pair(0, 0), + tiledb::test::create_test_memory_tracker(), true); fragments.emplace_back(std::move(fragment)); } @@ -1361,8 +1369,10 @@ TEST_CASE_METHOD( // Create result coordinates std::vector result_coords; - ResultTile result_tile_3_0(2, 0, *fragments[0]); - ResultTile result_tile_3_1(2, 1, *fragments[1]); + ResultTile result_tile_3_0( + 2, 0, *fragments[0], tiledb::test::get_test_memory_tracker()); + ResultTile result_tile_3_1( + 2, 1, *fragments[1], tiledb::test::get_test_memory_tracker()); set_result_tile_dim( array_schema, result_tile_3_0, "d1", 0, {{1000, 3, 1000, 1000}}); diff --git a/test/src/unit-Reader.cc b/test/src/unit-Reader.cc index dbc2cd34287e..6b053017984e 100644 --- a/test/src/unit-Reader.cc +++ b/test/src/unit-Reader.cc @@ -31,12 +31,15 @@ */ #include "test/support/src/helpers.h" +#include "test/support/src/mem_helpers.h" #include "test/support/src/vfs_helpers.h" #include "tiledb/common/common.h" #include "tiledb/common/dynamic_memory/dynamic_memory.h" #include "tiledb/common/heap_memory.h" #include "tiledb/common/logger.h" +#include "tiledb/common/memory_tracker.h" #include "tiledb/sm/c_api/tiledb_struct_def.h" +#include "tiledb/sm/enums/array_type.h" #include "tiledb/sm/enums/encryption_type.h" #include "tiledb/sm/misc/types.h" #include "tiledb/sm/query/legacy/reader.h" @@ -68,12 +71,15 @@ struct ReaderFx { const char* ARRAY_NAME = "reader"; tiledb_array_t* array_ = nullptr; + shared_ptr tracker_; + ReaderFx(); ~ReaderFx(); }; ReaderFx::ReaderFx() - : fs_vec_(vfs_test_get_fs_vec()) { + : fs_vec_(vfs_test_get_fs_vec()) + , tracker_(tiledb::test::create_test_memory_tracker()) { // Initialize vfs test REQUIRE(vfs_test_init(fs_vec_, &ctx_, &vfs_).ok()); @@ -164,6 +170,8 @@ TEST_CASE_METHOD( Subarray subarray(&array, &g_helper_stats, g_helper_logger()); DefaultChannelAggregates default_channel_aggregates; auto params = StrategyParams( + array.memory_tracker(), + tracker_, context.storage_manager(), array.opened_array(), config, @@ -173,8 +181,7 @@ TEST_CASE_METHOD( Layout::ROW_MAJOR, condition, default_channel_aggregates, - false, - array.memory_tracker()); + false); Reader reader(&g_helper_stats, g_helper_logger(), params); unsigned dim_num = 2; auto size = 2 * sizeof(int32_t); @@ -235,17 +242,17 @@ TEST_CASE_METHOD( TileDomain array_tile_domain( UINT32_MAX, domain, dsd, tile_extents, layout); - auto d1{make_shared(HERE(), "d1", Datatype::INT32)}; + auto d1{make_shared(HERE(), "d1", Datatype::INT32, tracker_)}; CHECK(d1->set_domain(domain_vec).ok()); CHECK(d1->set_tile_extent(&tile_extents_vec[0]).ok()); - auto d2{make_shared(HERE(), "d2", Datatype::INT32)}; + auto d2{make_shared(HERE(), "d2", Datatype::INT32, tracker_)}; CHECK(d2->set_domain(&domain_vec[2]).ok()); CHECK(d2->set_tile_extent(&tile_extents_vec[1]).ok()); - auto dom{make_shared(HERE())}; + auto dom{make_shared(HERE(), tracker_)}; CHECK(dom->add_dimension(d1).ok()); CHECK(dom->add_dimension(d2).ok()); - auto schema = make_shared(HERE()); + auto schema = make_shared(HERE(), ArrayType::DENSE, tracker_); CHECK(schema->set_domain(dom).ok()); std::vector> fragments; @@ -253,10 +260,10 @@ TEST_CASE_METHOD( shared_ptr fragment = make_shared( HERE(), nullptr, - nullptr, schema, URI(), std::make_pair(0, 0), + tracker_, true); fragments.emplace_back(std::move(fragment)); } @@ -268,49 +275,39 @@ TEST_CASE_METHOD( tile_coords, array_tile_domain, frag_tile_domains, - result_space_tiles); + result_space_tiles, + tiledb::test::get_test_memory_tracker()); CHECK(result_space_tiles.size() == 6); - // Result tiles for fragment #1 - ResultTile result_tile_1_0_1(1, 0, *fragments[0]); - ResultTile result_tile_1_2_1(1, 2, *fragments[0]); - - // Result tiles for fragment #2 - ResultTile result_tile_1_0_2(2, 0, *fragments[1]); - - // Result tiles for fragment #3 - ResultTile result_tile_2_0_3(3, 0, *fragments[2]); - ResultTile result_tile_3_0_3(3, 2, *fragments[2]); - // Initialize result_space_tiles - ResultSpaceTile rst_1_0; + ResultSpaceTile rst_1_0(tiledb::test::get_test_memory_tracker()); rst_1_0.set_start_coords({3, 1}); rst_1_0.append_frag_domain(2, ds2); rst_1_0.append_frag_domain(1, ds1); - rst_1_0.set_result_tile(1, result_tile_1_0_1); - rst_1_0.set_result_tile(2, result_tile_1_0_2); - ResultSpaceTile rst_1_2; + rst_1_0.set_result_tile(1, 0, *fragments[0]); + rst_1_0.set_result_tile(2, 0, *fragments[1]); + ResultSpaceTile rst_1_2(tiledb::test::get_test_memory_tracker()); rst_1_2.set_start_coords({3, 11}); rst_1_2.append_frag_domain(1, ds1); - rst_1_2.set_result_tile(1, result_tile_1_2_1); - ResultSpaceTile rst_2_0; + rst_1_2.set_result_tile(1, 2, *fragments[0]); + ResultSpaceTile rst_2_0(tiledb::test::get_test_memory_tracker()); rst_2_0.set_start_coords({5, 1}); rst_2_0.append_frag_domain(3, ds3); - rst_2_0.set_result_tile(3, result_tile_2_0_3); - ResultSpaceTile rst_2_2; + rst_2_0.set_result_tile(3, 0, *fragments[2]); + ResultSpaceTile rst_2_2(tiledb::test::get_test_memory_tracker()); rst_2_2.set_start_coords({5, 11}); - ResultSpaceTile rst_3_0; + ResultSpaceTile rst_3_0(tiledb::test::get_test_memory_tracker()); rst_3_0.set_start_coords({7, 1}); rst_3_0.append_frag_domain(3, ds3); - rst_3_0.set_result_tile(3, result_tile_3_0_3); - ResultSpaceTile rst_3_2; + rst_3_0.set_result_tile(3, 2, *fragments[2]); + ResultSpaceTile rst_3_2(tiledb::test::get_test_memory_tracker()); rst_3_2.set_start_coords({7, 11}); // Check correctness - CHECK(result_space_tiles[(const int32_t*)&(tile_coords[0][0])] == rst_1_0); - CHECK(result_space_tiles[(const int32_t*)&(tile_coords[1][0])] == rst_1_2); - CHECK(result_space_tiles[(const int32_t*)&(tile_coords[2][0])] == rst_2_0); - CHECK(result_space_tiles[(const int32_t*)&(tile_coords[3][0])] == rst_2_2); - CHECK(result_space_tiles[(const int32_t*)&(tile_coords[4][0])] == rst_3_0); - CHECK(result_space_tiles[(const int32_t*)&(tile_coords[5][0])] == rst_3_2); + CHECK(result_space_tiles.at((const int32_t*)&(tile_coords[0][0])) == rst_1_0); + CHECK(result_space_tiles.at((const int32_t*)&(tile_coords[1][0])) == rst_1_2); + CHECK(result_space_tiles.at((const int32_t*)&(tile_coords[2][0])) == rst_2_0); + CHECK(result_space_tiles.at((const int32_t*)&(tile_coords[3][0])) == rst_2_2); + CHECK(result_space_tiles.at((const int32_t*)&(tile_coords[4][0])) == rst_3_0); + CHECK(result_space_tiles.at((const int32_t*)&(tile_coords[5][0])) == rst_3_2); } diff --git a/test/src/unit-azure.cc b/test/src/unit-azure.cc deleted file mode 100644 index f0ad9fbdc361..000000000000 --- a/test/src/unit-azure.cc +++ /dev/null @@ -1,386 +0,0 @@ -/** - * @file unit-azure.cc - * - * @section LICENSE - * - * The MIT License - * - * @copyright Copyright (c) 2017-2023 TileDB, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - * - * @section DESCRIPTION - * - * Tests for AZURE API filesystem functions. - */ - -#ifdef HAVE_AZURE - -#include -#include -#include "tiledb/common/filesystem/directory_entry.h" -#include "tiledb/common/thread_pool.h" -#include "tiledb/sm/config/config.h" -#include "tiledb/sm/filesystem/azure.h" -#include "tiledb/sm/global_state/unit_test_config.h" -#include "tiledb/sm/misc/tdb_time.h" - -#include -#include - -using namespace tiledb::common; -using namespace tiledb::sm; - -using ConfMap = std::map; -using ConfList = std::vector; - -static ConfList test_settings = { - {{"vfs.azure.storage_account_name", "devstoreaccount1"}, - {"vfs.azure.storage_account_key", - "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/" - "K1SZFPTOtr/KBHBeksoGMGw=="}, - {"vfs.azure.blob_endpoint", "http://127.0.0.1:10000/devstoreaccount1"}}, - // Currently disabled because it does not work with the Azurite emulator - // The SAS path was manually tested against the Azure Blob Service. - //{{"vfs.azure.storage_account_name", "devstoreaccount2"}, - // {"vfs.azure.storage_sas_token", ""}, - // {"vfs.azure.blob_endpoint", "http://127.0.0.1:10000/devstoreaccount2"}} -}; - -struct AzureFx { - const std::string AZURE_PREFIX = "azure://"; - const tiledb::sm::URI AZURE_CONTAINER = - tiledb::sm::URI(AZURE_PREFIX + random_container_name("tiledb") + "/"); - const std::string TEST_DIR = AZURE_CONTAINER.to_string() + "tiledb_test_dir/"; - - tiledb::sm::Azure azure_; - ThreadPool thread_pool_{2}; - - AzureFx() = default; - ~AzureFx(); - - void init_azure(Config&& config, ConfMap); - - static std::string random_container_name(const std::string& prefix); -}; - -AzureFx::~AzureFx() { - // Empty container - bool is_empty; - REQUIRE(azure_.is_empty_container(AZURE_CONTAINER, &is_empty).ok()); - if (!is_empty) { - REQUIRE(azure_.empty_container(AZURE_CONTAINER).ok()); - REQUIRE(azure_.is_empty_container(AZURE_CONTAINER, &is_empty).ok()); - REQUIRE(is_empty); - } - - // Delete container - REQUIRE(azure_.remove_container(AZURE_CONTAINER).ok()); -} - -void AzureFx::init_azure(Config&& config, ConfMap settings) { - auto set_conf = [&](auto iter) { - std::string key = iter.first; - std::string val = iter.second; - REQUIRE(config.set(key, val).ok()); - }; - - // Set provided config settings for connection - std::for_each(settings.begin(), settings.end(), set_conf); - - // Initialize - REQUIRE(azure_.init(config, &thread_pool_).ok()); - - // Create container - bool is_container; - REQUIRE(azure_.is_container(AZURE_CONTAINER, &is_container).ok()); - if (is_container) { - REQUIRE(azure_.remove_container(AZURE_CONTAINER).ok()); - } - - REQUIRE(azure_.is_container(AZURE_CONTAINER, &is_container).ok()); - REQUIRE(!is_container); - REQUIRE(azure_.create_container(AZURE_CONTAINER).ok()); - - // Check if container is empty - bool is_empty; - REQUIRE(azure_.is_empty_container(AZURE_CONTAINER, &is_empty).ok()); - REQUIRE(is_empty); -} - -std::string AzureFx::random_container_name(const std::string& prefix) { - std::stringstream ss; - ss << prefix << "-" << std::this_thread::get_id() << "-" - << tiledb::sm::utils::time::timestamp_now_ms(); - return ss.str(); -} - -TEST_CASE_METHOD( - AzureFx, "Test Azure filesystem, file I/O", "[azure][multipart]") { - Config config; - const uint64_t max_parallel_ops = 2; - const uint64_t block_list_block_size = 4 * 1024 * 1024; - REQUIRE( - config.set("vfs.azure.max_parallel_ops", std::to_string(max_parallel_ops)) - .ok()); - REQUIRE(config - .set( - "vfs.azure.block_list_block_size", - std::to_string(block_list_block_size)) - .ok()); - - auto settings = - GENERATE(from_range(test_settings.begin(), test_settings.end())); - init_azure(std::move(config), settings); - - const uint64_t write_cache_max_size = - max_parallel_ops * block_list_block_size; - - // Prepare buffers - uint64_t buffer_size = write_cache_max_size * 5; - auto write_buffer = new char[buffer_size]; - for (uint64_t i = 0; i < buffer_size; i++) - write_buffer[i] = (char)('a' + (i % 26)); - uint64_t buffer_size_small = 1024 * 1024; - auto write_buffer_small = new char[buffer_size_small]; - for (uint64_t i = 0; i < buffer_size_small; i++) - write_buffer_small[i] = (char)('a' + (i % 26)); - - // Write to two files - auto largefile = TEST_DIR + "largefile"; - REQUIRE(azure_.write(URI(largefile), write_buffer, buffer_size).ok()); - REQUIRE( - azure_.write(URI(largefile), write_buffer_small, buffer_size_small).ok()); - auto smallfile = TEST_DIR + "smallfile"; - REQUIRE( - azure_.write(URI(smallfile), write_buffer_small, buffer_size_small).ok()); - - // Before flushing, the files do not exist - bool is_blob = false; - REQUIRE(azure_.is_blob(URI(largefile), &is_blob).ok()); - REQUIRE(!is_blob); - REQUIRE(azure_.is_blob(URI(smallfile), &is_blob).ok()); - REQUIRE(!is_blob); - - // Flush the files - REQUIRE(azure_.flush_blob(URI(largefile)).ok()); - REQUIRE(azure_.flush_blob(URI(smallfile)).ok()); - - // After flushing, the files exist - REQUIRE(azure_.is_blob(URI(largefile), &is_blob).ok()); - REQUIRE(is_blob); - REQUIRE(azure_.is_blob(URI(smallfile), &is_blob).ok()); - REQUIRE(is_blob); - - // Get file sizes - uint64_t nbytes = 0; - REQUIRE(azure_.blob_size(URI(largefile), &nbytes).ok()); - REQUIRE(nbytes == (buffer_size + buffer_size_small)); - REQUIRE(azure_.blob_size(URI(smallfile), &nbytes).ok()); - REQUIRE(nbytes == buffer_size_small); - - // Read from the beginning - auto read_buffer = new char[26]; - uint64_t bytes_read = 0; - REQUIRE(azure_.read(URI(largefile), 0, read_buffer, 26, 0, &bytes_read).ok()); - CHECK(26 == bytes_read); - bool allok = true; - for (int i = 0; i < 26; i++) { - if (read_buffer[i] != static_cast('a' + i)) { - allok = false; - break; - } - } - REQUIRE(allok); - - // Read from a different offset - REQUIRE( - azure_.read(URI(largefile), 11, read_buffer, 26, 0, &bytes_read).ok()); - CHECK(26 == bytes_read); - allok = true; - for (int i = 0; i < 26; i++) { - if (read_buffer[i] != static_cast('a' + (i + 11) % 26)) { - allok = false; - break; - } - } - REQUIRE(allok); -} - -TEST_CASE_METHOD( - AzureFx, - "Test Azure filesystem, file I/O, no multipart", - "[azure][no_multipart]") { - Config config; - const uint64_t max_parallel_ops = 2; - const uint64_t block_list_block_size = 4 * 1024 * 1024; - REQUIRE(config.set("vfs.azure.use_block_list_upload", "false").ok()); - REQUIRE( - config.set("vfs.azure.max_parallel_ops", std::to_string(max_parallel_ops)) - .ok()); - REQUIRE(config - .set( - "vfs.azure.block_list_block_size", - std::to_string(block_list_block_size)) - .ok()); - - auto settings = - GENERATE(from_range(test_settings.begin(), test_settings.end())); - init_azure(std::move(config), settings); - - const uint64_t write_cache_max_size = - max_parallel_ops * block_list_block_size; - - // Prepare a large buffer that can fit in the write cache. - uint64_t large_buffer_size = write_cache_max_size; - auto large_write_buffer = new char[large_buffer_size]; - for (uint64_t i = 0; i < large_buffer_size; i++) - large_write_buffer[i] = (char)('a' + (i % 26)); - - // Prepare a small buffer that can fit in the write cache. - uint64_t small_buffer_size = write_cache_max_size / 1024; - auto small_write_buffer = new char[small_buffer_size]; - for (uint64_t i = 0; i < small_buffer_size; i++) - small_write_buffer[i] = (char)('a' + (i % 26)); - - // Prepare a buffer too large to fit in the write cache. - uint64_t oob_buffer_size = write_cache_max_size + 1; - auto oob_write_buffer = new char[oob_buffer_size]; - for (uint64_t i = 0; i < oob_buffer_size; i++) - oob_write_buffer[i] = (char)('a' + (i % 26)); - - auto large_file = TEST_DIR + "largefile"; - REQUIRE(azure_.write(URI(large_file), large_write_buffer, large_buffer_size) - .ok()); - - auto small_file_1 = TEST_DIR + "smallfile1"; - REQUIRE(azure_.write(URI(small_file_1), small_write_buffer, small_buffer_size) - .ok()); - - auto small_file_2 = TEST_DIR + "smallfile2"; - REQUIRE(azure_.write(URI(small_file_2), small_write_buffer, small_buffer_size) - .ok()); - REQUIRE(azure_.write(URI(small_file_2), small_write_buffer, small_buffer_size) - .ok()); - - auto oob_file = TEST_DIR + "oobfile"; - REQUIRE(!azure_.write(URI(oob_file), oob_write_buffer, oob_buffer_size).ok()); - - // Before flushing, the files do not exist - bool is_blob = false; - REQUIRE(azure_.is_blob(URI(large_file), &is_blob).ok()); - REQUIRE(!is_blob); - REQUIRE(azure_.is_blob(URI(small_file_1), &is_blob).ok()); - REQUIRE(!is_blob); - REQUIRE(azure_.is_blob(URI(small_file_2), &is_blob).ok()); - REQUIRE(!is_blob); - REQUIRE(azure_.is_blob(URI(oob_file), &is_blob).ok()); - REQUIRE(!is_blob); - - // Flush the files - REQUIRE(azure_.flush_blob(URI(small_file_1)).ok()); - REQUIRE(azure_.flush_blob(URI(small_file_2)).ok()); - REQUIRE(azure_.flush_blob(URI(large_file)).ok()); - - // After flushing, the files exist - REQUIRE(azure_.is_blob(URI(large_file), &is_blob).ok()); - REQUIRE(is_blob); - REQUIRE(azure_.is_blob(URI(small_file_1), &is_blob).ok()); - REQUIRE(is_blob); - REQUIRE(azure_.is_blob(URI(small_file_2), &is_blob).ok()); - REQUIRE(is_blob); - - // Get file sizes - uint64_t nbytes = 0; - REQUIRE(azure_.blob_size(URI(large_file), &nbytes).ok()); - CHECK(nbytes == large_buffer_size); - REQUIRE(azure_.blob_size(URI(small_file_1), &nbytes).ok()); - CHECK(nbytes == small_buffer_size); - REQUIRE(azure_.blob_size(URI(small_file_2), &nbytes).ok()); - CHECK(nbytes == (small_buffer_size + small_buffer_size)); - - // Read from the beginning - auto read_buffer = new char[26]; - uint64_t bytes_read = 0; - REQUIRE( - azure_.read(URI(large_file), 0, read_buffer, 26, 0, &bytes_read).ok()); - CHECK(26 == bytes_read); - bool allok = true; - for (int i = 0; i < 26; i++) { - if (read_buffer[i] != static_cast('a' + i)) { - allok = false; - break; - } - } - REQUIRE(allok); - - // Read from a different offset - REQUIRE( - azure_.read(URI(large_file), 11, read_buffer, 26, 0, &bytes_read).ok()); - CHECK(26 == bytes_read); - allok = true; - for (int i = 0; i < 26; i++) { - if (read_buffer[i] != static_cast('a' + (i + 11) % 26)) { - allok = false; - break; - } - } - REQUIRE(allok); -} - -TEST_CASE( - "Test constructing Azure Blob Storage endpoint URIs", "[azure][uri]") { - std::string sas_token, custom_endpoint, expected_endpoint; - SECTION("No SAS token") { - sas_token = ""; - expected_endpoint = "https://devstoreaccount1.blob.core.windows.net"; - } - SECTION("SAS token without leading question mark") { - sas_token = "baz=qux&foo=bar"; - expected_endpoint = - "https://devstoreaccount1.blob.core.windows.net?baz=qux&foo=bar"; - } - SECTION("SAS token with leading question mark") { - sas_token = "?baz=qux&foo=bar"; - expected_endpoint = - "https://devstoreaccount1.blob.core.windows.net?baz=qux&foo=bar"; - } - SECTION("SAS token in both endpoint and config option") { - sas_token = "baz=qux&foo=bar"; - custom_endpoint = - "https://devstoreaccount1.blob.core.windows.net?baz=qux&foo=bar"; - expected_endpoint = - "https://devstoreaccount1.blob.core.windows.net?baz=qux&foo=bar"; - } - SECTION("No SAS token") { - sas_token = ""; - expected_endpoint = "https://devstoreaccount1.blob.core.windows.net"; - } - Config config; - REQUIRE( - config.set("vfs.azure.storage_account_name", "devstoreaccount1").ok()); - REQUIRE(config.set("vfs.azure.blob_endpoint", custom_endpoint).ok()); - REQUIRE(config.set("vfs.azure.storage_sas_token", sas_token).ok()); - tiledb::sm::Azure azure; - ThreadPool thread_pool(1); - REQUIRE(azure.init(config, &thread_pool).ok()); - REQUIRE(azure.client().GetUrl() == expected_endpoint); -} - -#endif diff --git a/test/src/unit-backwards_compat.cc b/test/src/unit-backwards_compat.cc index 685c942cf35e..425e7afdf7f4 100644 --- a/test/src/unit-backwards_compat.cc +++ b/test/src/unit-backwards_compat.cc @@ -34,12 +34,15 @@ #include #include "test/support/src/helpers.h" #include "test/support/src/serialization_wrappers.h" +#include "test/support/src/temporary_local_directory.h" #include "tiledb/common/common.h" +#include "tiledb/common/stdx_string.h" #include "tiledb/sm/cpp_api/tiledb" #include "tiledb/sm/cpp_api/tiledb_experimental" #include "tiledb/sm/misc/constants.h" #include +#include #include #include #include @@ -52,6 +55,9 @@ namespace { static const std::string arrays_dir = std::string(TILEDB_TEST_INPUTS_DIR) + "/arrays"; +static const std::string groups_dir = + std::string(TILEDB_TEST_INPUTS_DIR) + "/groups"; + template void set_query_coords( const Domain& domain, @@ -1428,3 +1434,53 @@ TEST_CASE( g, "u64", TILEDB_UINT64, 0x7777777777777777); } } + +TEST_CASE( + "Backwards compatibility: Test v1 groups", + "[backwards-compat][group][v1]") { + Context ctx; + VFS vfs(ctx); + + // Copy the group to a temporary directory because we will be modifying it. + tiledb::sm::TemporaryLocalDirectory temp_dir; + std::filesystem::copy( + groups_dir + "/group_v1", + temp_dir.path(), + std::filesystem::copy_options::recursive); + + // Read the group + { + Group g{ctx, temp_dir.path(), TILEDB_READ}; + + CHECK(g.dump(false) != ""); + CHECK(g.member_count() == 1); + } + + // Add a member to the group + { + Group g{ctx, temp_dir.path(), TILEDB_WRITE}; + + Group::create(ctx, temp_dir.path() + "/subgroup2"); + + g.add_member("subgroup2", true, "subgroup2"); + + g.close(); + } + + // Read the group again + { + Group g{ctx, temp_dir.path(), TILEDB_READ}; + + CHECK(g.dump(false) != ""); + CHECK(g.member_count() == 2); + CHECK(g.member(1).name() == "subgroup2"); + } + + // Read the raw group details files + auto children = vfs.ls(temp_dir.path() + "/__group"); + CHECK(children.size() == 2); + std::sort(children.begin(), children.end()); + CHECK(!tiledb::sm::utils::parse::ends_with(children[0], "_1")); + // This is the file written by this test. + CHECK(tiledb::sm::utils::parse::ends_with(children[1], "_1")); +} diff --git a/test/src/unit-capi-array.cc b/test/src/unit-capi-array.cc index 3da9120f9110..123212deb8d6 100644 --- a/test/src/unit-capi-array.cc +++ b/test/src/unit-capi-array.cc @@ -79,6 +79,9 @@ using namespace tiledb::common; using namespace tiledb::sm; struct ArrayFx { + // The memory tracker + shared_ptr memory_tracker_; + // TileDB context tiledb_ctx_t* ctx_; tiledb_vfs_t* vfs_; @@ -116,7 +119,8 @@ static const std::string test_ca_file = std::string(TILEDB_TEST_INPUTS_DIR) + "/test_certs/public.crt"; ArrayFx::ArrayFx() - : fs_vec_(vfs_test_get_fs_vec()) { + : memory_tracker_(tiledb::test::create_test_memory_tracker()) + , fs_vec_(vfs_test_get_fs_vec()) { // Initialize vfs test REQUIRE(vfs_test_init(fs_vec_, &ctx_, &vfs_).ok()); } @@ -2289,8 +2293,8 @@ TEST_CASE_METHOD( // in array open v1 but with separate requests, so we simulate // this here by forcing metadata loading if (!array_v2) { - Metadata* metadata = nullptr; - CHECK(array->array_->metadata(&metadata).ok()); + auto metadata = &array->array_->metadata(); + CHECK(metadata != nullptr); array->array_->non_empty_domain(); } @@ -2340,7 +2344,7 @@ TEST_CASE_METHOD( Datatype type; const void* v_r; uint32_t v_num; - auto new_metadata = new_array->array_->unsafe_metadata(); + auto new_metadata = &new_array->array_->metadata(); new_metadata->get("aaa", &type, &v_num, &v_r); CHECK(static_cast(type) == TILEDB_INT32); CHECK(v_num == 1); @@ -2590,7 +2594,8 @@ TEST_CASE_METHOD( array->array_.get(), tiledb::sm::SerializationType::CAPNP, buff->buffer(), - ctx_->storage_manager()); + ctx_->storage_manager(), + memory_tracker_); REQUIRE(st.ok()); // 6. Server: Close array and clean up diff --git a/test/src/unit-cppapi-array.cc b/test/src/unit-cppapi-array.cc index effff473a670..12ec1d2f1dcb 100644 --- a/test/src/unit-cppapi-array.cc +++ b/test/src/unit-cppapi-array.cc @@ -2209,3 +2209,37 @@ TEST_CASE( CHECK(i > 0); } + +TEST_CASE("C++ API: Read empty array", "[cppapi][read-empty-array]") { + const std::string array_name_1d = "cpp_unit_array_1d"; + Context ctx; + VFS vfs(ctx); + + bool dups = GENERATE(true, false); + + if (vfs.is_dir(array_name_1d)) { + vfs.remove_dir(array_name_1d); + } + + ArraySchema schema(ctx, TILEDB_SPARSE); + Domain domain(ctx); + domain.add_dimension(Dimension::create(ctx, "d", {{0, 1000}}, 1001)); + schema.set_domain(domain); + schema.add_attribute(Attribute::create(ctx, "a")); + schema.set_allows_dups(dups); + Array::create(array_name_1d, schema); + Array array(ctx, array_name_1d, TILEDB_READ); + + std::vector d(1); + std::vector a(1); + Query q(ctx, array, TILEDB_READ); + q.set_layout(TILEDB_UNORDERED); + q.set_data_buffer("d", d); + q.set_data_buffer("a", a); + q.submit(); + array.close(); + + if (vfs.is_dir(array_name_1d)) { + vfs.remove_dir(array_name_1d); + } +} diff --git a/test/src/unit-cppapi-config.cc b/test/src/unit-cppapi-config.cc index 609ece4c7c0e..7be58bb59d32 100644 --- a/test/src/unit-cppapi-config.cc +++ b/test/src/unit-cppapi-config.cc @@ -33,17 +33,10 @@ #include #include +#include "test/support/src/helpers.h" #include "tiledb/sm/c_api/tiledb_serialization.h" #include "tiledb/sm/cpp_api/tiledb" -int setenv_local(const char* __name, const char* __value) { -#ifdef _WIN32 - return _putenv_s(__name, __value); -#else - return ::setenv(__name, __value, 1); -#endif -} - TEST_CASE("C++ API: Config", "[cppapi][config]") { tiledb::Config config; config["foo"] = "bar"; diff --git a/test/src/unit-cppapi-consolidation-with-timestamps.cc b/test/src/unit-cppapi-consolidation-with-timestamps.cc index 77aa4dedd6d5..4f6d614775ba 100644 --- a/test/src/unit-cppapi-consolidation-with-timestamps.cc +++ b/test/src/unit-cppapi-consolidation-with-timestamps.cc @@ -763,7 +763,7 @@ TEST_CASE_METHOD( // Will only allow to load two tiles out of 3. Config cfg; - cfg.set("sm.mem.total_budget", "9000"); + cfg.set("sm.mem.total_budget", "11000"); cfg.set("sm.mem.reader.sparse_global_order.ratio_coords", "0.4"); ctx_ = Context(cfg); @@ -822,7 +822,7 @@ TEST_CASE_METHOD( // Will only allow to load two tiles out of 3. Config cfg; - cfg.set("sm.mem.total_budget", "9000"); + cfg.set("sm.mem.total_budget", "11000"); cfg.set("sm.mem.reader.sparse_global_order.ratio_coords", "0.4"); ctx_ = Context(cfg); diff --git a/test/src/unit-cppapi-enumerations.cc b/test/src/unit-cppapi-enumerations.cc index 15990fc00590..baf04bbea852 100644 --- a/test/src/unit-cppapi-enumerations.cc +++ b/test/src/unit-cppapi-enumerations.cc @@ -522,7 +522,7 @@ TEST_CASE_METHOD( TEST_CASE_METHOD( CPPEnumerationFx, - "CPP: Enumeration Query - Invalid Enumeration Value", + "CPP: Enumeration Query - Invalid Enumeration Value is Always False", "[enumeration][query][basic]") { create_array(); @@ -542,10 +542,64 @@ TEST_CASE_METHOD( .set_data_buffer("attr1", attr1) .set_condition(qc); - // Check that the error message is helpful to users. - auto matcher = Catch::Matchers::ContainsSubstring( - "Enumeration value not found for field 'attr1'"); - REQUIRE_THROWS_WITH(query.submit(), matcher); + REQUIRE_NOTHROW(query.submit()); + + std::vector dim_expect = {1, 2, 3, 4, 5}; + std::vector attr1_expect = { + INT32_MIN, INT32_MIN, INT32_MIN, INT32_MIN, INT32_MIN}; + + REQUIRE(dim == dim_expect); + REQUIRE(attr1 == attr1_expect); +} + +TEST_CASE_METHOD( + CPPEnumerationFx, + "CPP: Enumeration Query - Invalid Enumeration Value Accepted by EQ", + "[enumeration][query][basic]") { + create_array(); + + // Attempt to query with an enumeration value that isn't in the Enumeration + QueryCondition qc(ctx_); + qc.init("attr1", "alf", 3, TILEDB_EQ); + + // Execute the query condition against the array + std::vector dim(5); + std::vector attr1(5); + + auto array = Array(ctx_, uri_, TILEDB_READ); + Query query(ctx_, array); + query.add_range("dim", 1, 5) + .set_layout(TILEDB_ROW_MAJOR) + .set_data_buffer("dim", dim) + .set_data_buffer("attr1", attr1) + .set_condition(qc); + + CHECK_NOTHROW(query.submit()); +} + +TEST_CASE_METHOD( + CPPEnumerationFx, + "CPP: Enumeration Query - Invalid Enumeration Value Accepted by IN", + "[enumeration][query][basic]") { + create_array(); + + // Attempt to query with an enumeration value that isn't in the Enumeration + std::vector vals = {"alf", "fred"}; + auto qc = QueryConditionExperimental::create(ctx_, "attr1", vals, TILEDB_IN); + + // Execute the query condition against the array + std::vector dim(5); + std::vector attr1(5); + + auto array = Array(ctx_, uri_, TILEDB_READ); + Query query(ctx_, array); + query.add_range("dim", 1, 5) + .set_layout(TILEDB_ROW_MAJOR) + .set_data_buffer("dim", dim) + .set_data_buffer("attr1", attr1) + .set_condition(qc); + + CHECK_NOTHROW(query.submit()); } TEST_CASE_METHOD( @@ -572,7 +626,7 @@ TEST_CASE_METHOD( TEST_CASE_METHOD( CPPEnumerationFx, "CPP: Enumeration Query - Attempt to query on empty enumeration", - "[enumeration][query][error]") { + "[enumeration][query][empty-results]") { create_array(true); // Attempt to query with an enumeration value that isn't in the Enumeration @@ -591,10 +645,7 @@ TEST_CASE_METHOD( .set_data_buffer("attr3", attr3) .set_condition(qc); - // Check that the error message is helpful to users. - auto matcher = Catch::Matchers::ContainsSubstring( - "Enumeration value not found for field 'attr3'"); - REQUIRE_THROWS_WITH(query.submit(), matcher); + REQUIRE_NOTHROW(query.submit()); } CPPEnumerationFx::CPPEnumerationFx() diff --git a/test/src/unit-cppapi-query-condition-enumerations.cc b/test/src/unit-cppapi-query-condition-enumerations.cc new file mode 100644 index 000000000000..872fedab7c76 --- /dev/null +++ b/test/src/unit-cppapi-query-condition-enumerations.cc @@ -0,0 +1,1169 @@ +/** + * @file unit-cppapi-query-condition-enumerations.cc + * + * @section LICENSE + * + * The MIT License + * + * @copyright Copyright (c) 2024 TileDB Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * @section DESCRIPTION + * + * Tests the C++ API for query condition related functions. + */ + +#include +#include +#include + +#include "test/support/src/ast_helpers.h" +#include "test/support/tdb_catch.h" +#include "tiledb/sm/c_api/tiledb_struct_def.h" +#include "tiledb/sm/cpp_api/tiledb" +#include "tiledb/sm/cpp_api/tiledb_experimental" +#include "tiledb/sm/serialization/query.h" + +#ifdef TILEDB_SERIALIZATION +#include +#include +#define GENERATE_SERIALIZATION() GENERATE(false, true) +#else +#define GENERATE_SERIALIZATION() false +#endif + +using namespace tiledb; + +/* + * The test fixture. See the first test for a basic example of expected + * usage. + */ + +struct EnmrQCCell; +struct ResultEnmrQCCell; + +using EnmrQCMatcher = std::function; +using EnmrQCCreator = std::function; + +class CPPQueryConditionEnumerationFx { + public: + CPPQueryConditionEnumerationFx(); + ~CPPQueryConditionEnumerationFx(); + + uint32_t run_test( + tiledb_array_type_t type, + bool serialize, + EnmrQCMatcher matcher, + EnmrQCCreator creator, + uint32_t num_rows = 1024); + + protected: + std::string uri_; + Context ctx_; + VFS vfs_; + + tiledb_array_type_t type_; + bool serialize_; + uint32_t num_rows_; + + // A fill value result. This is the value that a dense array query returns + // for a non-matching result. + std::unique_ptr fill_; + + // Our random source + std::mt19937_64 rand_; + + // Enumeration helpers + std::unordered_map cell_type_values_; + std::unordered_map cycle_phase_values_; + std::unordered_map wavelength_values_; + + std::unordered_map cell_type_index_; + std::unordered_map cycle_phase_index_; + std::unordered_map wavelength_index_; + + // The data in the array represented as a vector of EnmrQCCell instances. + std::vector data_; + + // Private API + void create_array( + tiledb_array_type_t type, bool serialize, uint32_t num_rows); + void remove_array(); + void write_array(); + uint32_t check_read(EnmrQCMatcher matcher, EnmrQCCreator creator); + std::vector read_array(EnmrQCCreator creator); + std::vector generate_data(uint32_t num_rows); + void create_enumeration( + ArraySchema& schema, + const std::string& name, + const std::unordered_map& values, + bool ordered); + QueryCondition serialize_deserialize_qc(QueryCondition& qc); + void validate_query_condition(EnmrQCCreator creator); + std::unordered_map make_index( + std::unordered_map values); +}; + +/* + * Test Schema + * =========== + * + * row_id - A numeric integer in the range 1 - $NUM_ROWS + * sample_name - A random string with the format [A-J]{4}[0-9]{8} + * cell_type - An enumeration of cell types, listed below. + * cycle_phase - A nullable enumeration of cell cycle phase, listed below. + * wavelength - An ordered enumeration of laser wavelengths, listed below. + * luminosity - A float value in the range [0.0, 1.0] + * + * Cell Type Enumeration Values: + * + * For the non biologists: Endothelial cells have to do with blood vessels + * and epithelial has to do with skin and other membranes. Stem cells are + * progenitors that can become other types of cells, and neurons are cells + * in the brain. Muscle and bone cell types are both self documenting. + * + * - endothelial + * - epithelial + * - muscle + * - bone + * - neuron + * - stem + * + * Cell Cycle Phases (These are actually real): + * + * Fun fact, G1 and G2 literally stand for Gap 1 and Gap 2. M stands for the + * mitosis/meiosis stage (i.e., cell division), S is the synthesis phase + * (i.e., when a cell is replicating its DNA in preparation to divide), while + * G1 and G2 are basically a historical "We're not sure what's going on + * exactly" stages. I'm sure they know more now, but this entire anecdote is + * the only reason I remember the stages. + * + * Also, this enumeration is ordered in this test even though it really + * hasn't got an order since there's no obvious first step of the cycle given + * that its actually the definition of a chicken and egg issue. + * + * - G1 + * - S + * - G2 + * - M + * + * Laser Wavelengths (Also real, but no, I don't have these memorized): + * + * N.B., the values are "355nm" or "552nm" for example. I've labeled each + * wavelength with their corresponding color only for reference for folks that + * haven't memorized the electromagnetic spectrum. + * + * Also, a quick background on the science of fluorescent microscopy and why + * wavelengths as an ordered enumeration is actually an interesting use case. + * First, the basic principle of fluorescence is that an atom or molecule can + * be excited by a photon of a certain frequency into a new state, which + * then after some time relaxes and emits a photon of a different wavelength. + * Anything that can do this is called a fluorophore. The important part here + * is that the both of the excitation and relaxation photons are set at + * specific wavelengths because physics. + * + * The result of all that is that you can detect fluorophores by shining + * one color of light on it and then looking for a specific *different* color + * of light being emitted. With that knowledge, applying it to science is just + * a matter of tagging something of interest with a fluorophore and then + * setting up various light sources and wavelength filters and voila, you get + * a useful measurable signal. + * + * So back to lasers, given that we have specific wavelengths that are chosen + * based on what fluorophore we're using, we wouldn't want this to just be a + * integer. Allowing raw integral values means that there's a possibility we + * end up with data that's not one of our lasers due to data entry + * errors and so on. However, they're quite comparable as obviously the + * enumerated values are numeric in nature. + * + * - 355nm (ultra violet) + * - 405nm (blue) + * - 488nm (violet) + * - 532nm (green) + * - 552nm (greener?) + * - 561nm (green-yellow) + * - 640nm (red) + */ + +struct EnmrQCCell { + EnmrQCCell(); + + uint32_t row_id; + std::string sample_name; + std::string cell_type; + std::string cycle_phase; + bool cycle_phase_valid; + std::string wavelength; + float luminosity; +}; + +// Used by test internals +struct ResultEnmrQCCell : public EnmrQCCell { + ResultEnmrQCCell(); + + // We're purposefully avoiding a copy constructor so that the single case + // we need to copy a fill value is made obvious. + void copy_fill(const std::unique_ptr& rhs); + + bool operator==(const EnmrQCCell& rhs); + bool valid; +}; + +/* + * Test case definitions start here. + */ + +TEST_CASE_METHOD( + CPPQueryConditionEnumerationFx, + "Basic Tests", + "[query-condition][enumeration][logic]") { + auto type = GENERATE(TILEDB_SPARSE, TILEDB_DENSE); + auto serialize = GENERATE_SERIALIZATION(); + auto threshold = num_rows_ / 2; + auto matcher = [=](const EnmrQCCell& cell) { + return cell.row_id < threshold; + }; + auto creator = [=](Context& ctx) { + return QueryCondition::create(ctx, "row_id", threshold, TILEDB_LT); + }; + + run_test(type, serialize, matcher, creator); +} + +TEST_CASE_METHOD( + CPPQueryConditionEnumerationFx, + "Simple Enumeration Equality", + "[query-condition][enumeration][logic]") { + auto type = GENERATE(TILEDB_SPARSE, TILEDB_DENSE); + auto serialize = GENERATE_SERIALIZATION(); + auto matcher = [](const EnmrQCCell& cell) { + return cell.cell_type == "bone"; + }; + auto creator = [](Context& ctx) { + return QueryCondition::create( + ctx, "cell_type", std::string("bone"), TILEDB_EQ); + }; + + run_test(type, serialize, matcher, creator); +} + +TEST_CASE_METHOD( + CPPQueryConditionEnumerationFx, + "Simple Enumeration Non-Equality", + "[query-condition][enumeration][logic]") { + auto type = GENERATE(TILEDB_SPARSE, TILEDB_DENSE); + auto serialize = GENERATE_SERIALIZATION(); + auto matcher = [](const EnmrQCCell& cell) { + return cell.cell_type != "bone"; + }; + auto creator = [](Context& ctx) { + return QueryCondition::create( + ctx, "cell_type", std::string("bone"), TILEDB_NE); + }; + + run_test(type, serialize, matcher, creator); +} + +TEST_CASE_METHOD( + CPPQueryConditionEnumerationFx, + "Simple Enumeration Inequality", + "[query-condition][enumeration][logic]") { + auto type = GENERATE(TILEDB_SPARSE, TILEDB_DENSE); + auto serialize = GENERATE_SERIALIZATION(); + auto matcher = [](const EnmrQCCell& cell) { + return cell.wavelength <= "532nm"; + }; + auto creator = [](Context& ctx) { + return QueryCondition::create( + ctx, "wavelength", std::string("532nm"), TILEDB_LE); + }; + + run_test(type, serialize, matcher, creator); +} + +TEST_CASE_METHOD( + CPPQueryConditionEnumerationFx, + "Simple Enumeration Equality to Invalid Value", + "[query-condition][enumeration][logic]") { + auto type = GENERATE(TILEDB_SPARSE, TILEDB_DENSE); + auto serialize = GENERATE_SERIALIZATION(); + auto matcher = [](const EnmrQCCell& cell) { + return cell.cell_type == "fruit"; + }; + auto creator = [](Context& ctx) { + return QueryCondition::create( + ctx, "cell_type", std::string("fruit"), TILEDB_EQ); + }; + + // Assert that == invalid enumeration value matches nothing. + auto matched = run_test(type, serialize, matcher, creator); + REQUIRE(matched == 0); +} + +TEST_CASE_METHOD( + CPPQueryConditionEnumerationFx, + "Simple Enumeration Non-Equality to Invalid Value", + "[query-condition][enumeration][logic]") { + auto type = GENERATE(TILEDB_SPARSE, TILEDB_DENSE); + auto serialize = GENERATE_SERIALIZATION(); + auto matcher = [](const EnmrQCCell& cell) { + return cell.cell_type != "fruit"; + }; + auto creator = [](Context& ctx) { + return QueryCondition::create( + ctx, "cell_type", std::string("fruit"), TILEDB_NE); + }; + + // Assert that != invalid value matches everything. + auto matched = run_test(type, serialize, matcher, creator); + REQUIRE(matched == num_rows_); +} + +TEST_CASE_METHOD( + CPPQueryConditionEnumerationFx, + "Enumeration Equality to Negated Invalid Value", + "[query-condition][enumeration][logic]") { + auto type = GENERATE(TILEDB_SPARSE, TILEDB_DENSE); + auto serialize = GENERATE_SERIALIZATION(); + auto matcher = [](const EnmrQCCell& cell) { + return cell.cell_type == "fruit"; + }; + auto creator = [](Context& ctx) { + auto qc = QueryCondition::create( + ctx, "cell_type", std::string("fruit"), TILEDB_NE); + return qc.negate(); + }; + + // Assert that (not !=) invalid value matches nothing. + auto matched = run_test(type, serialize, matcher, creator); + REQUIRE(matched == 0); +} + +TEST_CASE_METHOD( + CPPQueryConditionEnumerationFx, + "Enumeration Non-Equality to Negated Invalid Value", + "[query-condition][enumeration][logic]") { + auto type = GENERATE(TILEDB_SPARSE, TILEDB_DENSE); + auto serialize = GENERATE_SERIALIZATION(); + auto matcher = [](const EnmrQCCell& cell) { + return cell.cell_type != "fruit"; + }; + auto creator = [](Context& ctx) { + auto qc = QueryCondition::create( + ctx, "cell_type", std::string("fruit"), TILEDB_EQ); + return qc.negate(); + }; + + // Assert that (not ==) invalid value matches everything + auto matched = run_test(type, serialize, matcher, creator); + REQUIRE(matched == num_rows_); +} + +TEST_CASE_METHOD( + CPPQueryConditionEnumerationFx, + "Enumeration Inequality with Invalid Value", + "[query-condition][enumeration][logic]") { + auto type = GENERATE(TILEDB_SPARSE, TILEDB_DENSE); + auto serialize = GENERATE_SERIALIZATION(); + auto matcher = [](const EnmrQCCell&) { return false; }; + auto creator = [](Context& ctx) { + return QueryCondition::create( + ctx, "wavelength", std::string("6000nm"), TILEDB_LE); + }; + + // Assert that (<=) invalid value matches nothing. + auto matched = run_test(type, serialize, matcher, creator); + REQUIRE(matched == 0); +} + +TEST_CASE_METHOD( + CPPQueryConditionEnumerationFx, + "Enumeration Inequality with Negated Invalid Value", + "[query-condition][enumeration][logic]") { + auto type = GENERATE(TILEDB_SPARSE, TILEDB_DENSE); + auto serialize = GENERATE_SERIALIZATION(); + auto matcher = [](const EnmrQCCell&) { return false; }; + auto creator = [](Context& ctx) { + auto qc = QueryCondition::create( + ctx, "wavelength", std::string("6000nm"), TILEDB_LE); + return qc.negate(); + }; + + // Assert that (not <=) invalid value matches nothing. + auto matched = run_test(type, serialize, matcher, creator); + REQUIRE(matched == 0); +} + +TEST_CASE_METHOD( + CPPQueryConditionEnumerationFx, + "Enumeration IN Set with Invalid Member", + "[query-condition][enumeration][logic]") { + auto type = GENERATE(TILEDB_SPARSE, TILEDB_DENSE); + auto serialize = GENERATE_SERIALIZATION(); + auto matcher = [](const EnmrQCCell& cell) { + return cell.cell_type == "bone" || cell.cell_type == "stem"; + }; + auto creator = [](Context& ctx) { + std::vector values = {"bone", "stem", "fish"}; + return QueryConditionExperimental::create( + ctx, "cell_type", values, TILEDB_IN); + }; + + run_test(type, serialize, matcher, creator); +} + +TEST_CASE_METHOD( + CPPQueryConditionEnumerationFx, + "Enumeration NOT_IN Set with Invalid Member", + "[query-condition][enumeration][logic]") { + auto type = GENERATE(TILEDB_SPARSE, TILEDB_DENSE); + auto serialize = GENERATE_SERIALIZATION(); + auto matcher = [](const EnmrQCCell& cell) { + return cell.cell_type != "bone" && cell.cell_type != "stem"; + }; + auto creator = [](Context& ctx) { + std::vector values = {"bone", "stem", "fish"}; + return QueryConditionExperimental::create( + ctx, "cell_type", values, TILEDB_NOT_IN); + }; + + run_test(type, serialize, matcher, creator); +} + +TEST_CASE_METHOD( + CPPQueryConditionEnumerationFx, + "Enumeration IN Set with Negated Invalid Member", + "[query-condition][enumeration][logic]") { + auto type = GENERATE(TILEDB_SPARSE, TILEDB_DENSE); + auto serialize = GENERATE_SERIALIZATION(); + auto matcher = [](const EnmrQCCell& cell) { + return cell.cell_type == "bone" || cell.cell_type == "stem"; + }; + auto creator = [](Context& ctx) { + std::vector values = {"bone", "stem", "fish"}; + auto qc = QueryConditionExperimental::create( + ctx, "cell_type", values, TILEDB_NOT_IN); + return qc.negate(); + }; + + run_test(type, serialize, matcher, creator); +} + +TEST_CASE_METHOD( + CPPQueryConditionEnumerationFx, + "Enumeration NOT IN Set with Negated Invalid Member", + "[query-condition][enumeration][logic]") { + auto type = GENERATE(TILEDB_SPARSE, TILEDB_DENSE); + auto serialize = GENERATE_SERIALIZATION(); + auto matcher = [](const EnmrQCCell& cell) { + return cell.cell_type != "bone" && cell.cell_type != "stem"; + }; + auto creator = [](Context& ctx) { + std::vector values = {"bone", "stem", "fish"}; + auto qc = + QueryConditionExperimental::create(ctx, "cell_type", values, TILEDB_IN); + return qc.negate(); + }; + + run_test(type, serialize, matcher, creator); +} + +TEST_CASE_METHOD( + CPPQueryConditionEnumerationFx, + "RowID inequality AND Enumeration IN Set with Invalid Member", + "[query-condition][enumeration][logic]") { + auto type = GENERATE(TILEDB_SPARSE, TILEDB_DENSE); + auto serialize = GENERATE_SERIALIZATION(); + auto matcher = [](const EnmrQCCell& cell) { + auto r1 = cell.row_id < 512; + auto r2 = cell.cell_type == "bone" || cell.cell_type == "stem"; + return r1 && r2; + }; + auto creator = [](Context& ctx) { + auto qc1 = QueryCondition::create(ctx, "row_id", 512, TILEDB_LT); + std::vector values = {"bone", "stem", "fish"}; + auto qc2 = + QueryConditionExperimental::create(ctx, "cell_type", values, TILEDB_IN); + return qc1.combine(qc2, TILEDB_AND); + }; + + run_test(type, serialize, matcher, creator); +} + +TEST_CASE_METHOD( + CPPQueryConditionEnumerationFx, + "RowID inequality OR Enumeration NOT_IN Set with Invalid Member", + "[query-condition][enumeration][logic]") { + auto type = GENERATE(TILEDB_SPARSE, TILEDB_DENSE); + auto serialize = GENERATE_SERIALIZATION(); + auto matcher = [](const EnmrQCCell& cell) { + auto r1 = cell.row_id < 512; + auto r2 = cell.cell_type != "bone" && cell.cell_type != "stem"; + return r1 || r2; + }; + auto creator = [](Context& ctx) { + auto qc1 = QueryCondition::create(ctx, "row_id", 512, TILEDB_LT); + std::vector values = {"bone", "stem", "fish"}; + auto qc2 = QueryConditionExperimental::create( + ctx, "cell_type", values, TILEDB_NOT_IN); + return qc1.combine(qc2, TILEDB_OR); + }; + + run_test(type, serialize, matcher, creator); +} + +TEST_CASE_METHOD( + CPPQueryConditionEnumerationFx, + "Check error on negation of TILEDB_ALWAYS_TRUE after rewrite.", + "[query-condition][enumeration][logic][rewrite-error]") { + auto type = GENERATE(TILEDB_SPARSE, TILEDB_DENSE); + auto serialize = GENERATE_SERIALIZATION(); + + create_array(type, serialize, 1024); + + Array array(ctx_, uri_, TILEDB_READ); + auto core_array = array.ptr().get()->array_; + core_array->load_all_enumerations(); + + auto qc = + QueryCondition::create(ctx_, "cell_type", std::string("fish"), TILEDB_NE); + auto core_qc = qc.ptr().get()->query_condition_; + core_qc->rewrite_enumeration_conditions(core_array->array_schema_latest()); + + auto matcher = Catch::Matchers::ContainsSubstring( + "Invalid negation of rewritten query."); + REQUIRE_THROWS_WITH(qc.negate(), matcher); +} + +TEST_CASE_METHOD( + CPPQueryConditionEnumerationFx, + "Check error on negation of TILEDB_ALWAYS_FALSE after rewrite.", + "[query-condition][enumeration][logic][rewrite-error]") { + auto type = GENERATE(TILEDB_SPARSE, TILEDB_DENSE); + auto serialize = GENERATE_SERIALIZATION(); + + create_array(type, serialize, 1024); + + Array array(ctx_, uri_, TILEDB_READ); + auto core_array = array.ptr().get()->array_; + core_array->load_all_enumerations(); + + auto qc = + QueryCondition::create(ctx_, "cell_type", std::string("fish"), TILEDB_EQ); + auto core_qc = qc.ptr().get()->query_condition_; + core_qc->rewrite_enumeration_conditions(core_array->array_schema_latest()); + + auto matcher = Catch::Matchers::ContainsSubstring( + "Invalid negation of rewritten query."); + REQUIRE_THROWS_WITH(qc.negate(), matcher); +} + +TEST_CASE( + "Check error on creating a TILEDB_ALWAYS_TRUE QueryCondition", + "[query-condition][enumeration][logic][op-error]") { + Context ctx; + // TILEDB_ALWAYS_TRUE is not an exposed symbol so we even have to force + // the issue by knowing the internal value and casting it. + auto op = static_cast(253); + auto matcher = Catch::Matchers::ContainsSubstring( + "Invalid use of internal operation: ALWAYS_TRUE"); + REQUIRE_THROWS_WITH(QueryCondition::create(ctx, "foo", 0, op), matcher); +} + +TEST_CASE( + "Check error on creating a TILEDB_ALWAYS_FALSE QueryCondition", + "[query-condition][enumeration][logic][op-error]") { + Context ctx; + // TILEDB_ALWAYS_FALSE is not an exposed symbol so we even have to force + // the issue by knowing the internal value and casting it. + auto op = static_cast(254); + auto matcher = Catch::Matchers::ContainsSubstring( + "Invalid use of internal operation: ALWAYS_FALSE"); + REQUIRE_THROWS_WITH(QueryCondition::create(ctx, "foo", 0, op), matcher); +} + +/* + * All code below here is test support implementation. + */ + +EnmrQCCell::EnmrQCCell() + : row_id(0) + , sample_name("Uninitialied Data Cell") + , cell_type("Uninitialised Data Cell") + , cycle_phase("Uninitialized Data Cell") + , wavelength("Uninitialized Data Cell") + , luminosity(3.14159f) { +} + +ResultEnmrQCCell::ResultEnmrQCCell() { + row_id = std::numeric_limits::max(); + sample_name = "Uninitialized Result Cell"; + cell_type = "Uninitialized Result Cell"; + cycle_phase = "Uninitialized Result Cell"; + wavelength = "Uninitialized Result Cell"; + luminosity = 1.618f; + valid = false; +} + +void ResultEnmrQCCell::copy_fill(const std::unique_ptr& rhs) { + row_id = rhs->row_id; + sample_name = rhs->sample_name; + cell_type = rhs->cell_type; + cycle_phase = rhs->cycle_phase; + wavelength = rhs->wavelength; + luminosity = rhs->luminosity; + valid = true; +} + +bool ResultEnmrQCCell::operator==(const EnmrQCCell& rhs) { + if (row_id != rhs.row_id) { + return false; + } + + if (sample_name != rhs.sample_name) { + return false; + } + + if (cell_type != rhs.cell_type) { + return false; + } + + if (cycle_phase != rhs.cycle_phase) { + return false; + } + + if (wavelength != rhs.wavelength) { + return false; + } + + if (luminosity != rhs.luminosity) { + return false; + } + + return true; +} + +std::ostream& operator<<(std::ostream& os, const EnmrQCCell& cell) { + return os << "EnmrQCCell{" + << "row_id: " << cell.row_id << ", " + << "sample_name: '" << cell.sample_name << "', " + << "cell_type: '" << cell.cell_type << "', " + << "cycle_phase: '" << cell.cycle_phase << "', " + << "cycle_phase_valid: " << (cell.cycle_phase_valid ? "yes" : "no") + << ", " + << "wavelength: '" << cell.wavelength << "', " + << "luminosity: " << cell.luminosity << "}"; +} + +CPPQueryConditionEnumerationFx::CPPQueryConditionEnumerationFx() + : uri_("query_condition_enumeration_array") + , vfs_(ctx_) { + remove_array(); + + // This is used for asserting the dense-non-match case. + fill_ = std::make_unique(); + fill_->sample_name = ""; + fill_->cell_type = ""; + fill_->cycle_phase = ""; + fill_->cycle_phase_valid = false; + fill_->wavelength = ""; + fill_->luminosity = std::numeric_limits::min(); + + std::random_device rdev; + rand_.seed(rdev()); + + cell_type_values_ = { + {"bone", 0}, + {"endothelial", 1}, + {"epithelial", 2}, + {"muscle", 3}, + {"neuron", 4}, + {"stem", 5}}; + + cycle_phase_values_ = {{"G1", 0}, {"S", 1}, {"G2", 2}, {"M", 3}}; + + wavelength_values_ = { + {"355nm", 0}, + {"405nm", 1}, + {"488nm", 2}, + {"532nm", 3}, + {"552nm", 4}, + {"561nm", 5}, + {"640nm", 6}}; + + cell_type_index_ = make_index(cell_type_values_); + cycle_phase_index_ = make_index(cycle_phase_values_); + wavelength_index_ = make_index(wavelength_values_); +} + +CPPQueryConditionEnumerationFx::~CPPQueryConditionEnumerationFx() { + remove_array(); +} + +uint32_t CPPQueryConditionEnumerationFx::run_test( + tiledb_array_type_t type, + bool serialize, + EnmrQCMatcher matcher, + EnmrQCCreator creator, + uint32_t num_rows) { + create_array(type, serialize, num_rows); + return check_read(matcher, creator); +} + +void CPPQueryConditionEnumerationFx::create_array( + tiledb_array_type_t type, bool serialize, uint32_t num_rows) { + type_ = type; + serialize_ = serialize; + num_rows_ = num_rows; + data_ = generate_data(num_rows_); + + // Create our array schema + ArraySchema schema(ctx_, type_); + + if (type_ == TILEDB_SPARSE) { + schema.set_capacity(num_rows_); + } + + // Create a single dimension row_id as uint32_t + auto dim = Dimension::create(ctx_, "row_id", {{1, num_rows_}}); + auto dom = Domain(ctx_); + dom.add_dimension(dim); + schema.set_domain(dom); + + // Create our enumerations + create_enumeration(schema, "cell_types", cell_type_index_, false); + create_enumeration(schema, "cycle_phases", cycle_phase_index_, true); + create_enumeration(schema, "wavelengths", wavelength_index_, true); + + // Create our attributes + auto sample_name = Attribute::create(ctx_, "sample_name"); + + auto cell_type = Attribute::create(ctx_, "cell_type"); + AttributeExperimental::set_enumeration_name(ctx_, cell_type, "cell_types"); + + auto cell_phase = Attribute::create(ctx_, "cycle_phase"); + AttributeExperimental::set_enumeration_name(ctx_, cell_phase, "cycle_phases"); + cell_phase.set_nullable(true); + + auto wavelength = Attribute::create(ctx_, "wavelength"); + AttributeExperimental::set_enumeration_name(ctx_, wavelength, "wavelengths"); + + auto luminosity = Attribute::create(ctx_, "luminosity"); + + schema.add_attributes( + sample_name, cell_type, cell_phase, wavelength, luminosity); + + // Create and write the array. + Array::create(uri_, schema); + write_array(); +} + +void CPPQueryConditionEnumerationFx::write_array() { + Array array(ctx_, uri_, TILEDB_WRITE); + Query query(ctx_, array); + + std::vector row_ids(num_rows_); + std::iota(row_ids.begin(), row_ids.end(), 1); + + if (type_ == TILEDB_DENSE) { + Subarray subarray(ctx_, array); + subarray.add_range(0, 1, num_rows_); + query.set_subarray(subarray); + } else { + query.set_data_buffer("row_id", row_ids); + } + + // Generate our write buffers + std::vector names(num_rows_ * strlen("AAAA00000000")); + std::vector name_offsets(num_rows_); + std::vector cell_types(num_rows_); + std::vector cycle_phases(num_rows_); + std::vector cycle_phases_validity(num_rows_); + std::vector wavelengths(num_rows_); + std::vector luminosity(num_rows_); + + uint64_t name_offset = 0; + for (size_t i = 0; i < num_rows_; i++) { + auto& cell = data_[i]; + + std::memcpy( + names.data() + name_offset, + cell.sample_name.data(), + cell.sample_name.size()); + name_offsets[i] = name_offset; + name_offset += cell.sample_name.size(); + + cell_types[i] = cell_type_values_.at(cell.cell_type); + if (cell.cycle_phase_valid) { + cycle_phases[i] = cycle_phase_values_.at(cell.cycle_phase); + } else { + cycle_phases[i] = 254; + } + cycle_phases_validity[i] = cell.cycle_phase_valid ? 1 : 0; + wavelengths[i] = wavelength_values_.at(cell.wavelength); + luminosity[i] = cell.luminosity; + } + + // Attach the buffers to our write query + query.set_data_buffer("sample_name", names) + .set_offsets_buffer("sample_name", name_offsets) + .set_data_buffer("cell_type", cell_types) + .set_data_buffer("cycle_phase", cycle_phases) + .set_validity_buffer("cycle_phase", cycle_phases_validity) + .set_data_buffer("wavelength", wavelengths) + .set_data_buffer("luminosity", luminosity); + + CHECK_NOTHROW(query.submit() == Query::Status::COMPLETE); + query.finalize(); + array.close(); +} + +uint32_t CPPQueryConditionEnumerationFx::check_read( + EnmrQCMatcher matcher, EnmrQCCreator creator) { + validate_query_condition(creator); + + // Calculate the number of matches to expect. + uint32_t should_match = 0; + for (auto& cell : data_) { + if (matcher(cell)) { + should_match += 1; + } + } + + auto results = read_array(creator); + + uint32_t num_matched = 0; + + for (size_t i = 0; i < num_rows_; i++) { + if (type_ == TILEDB_DENSE) { + // Dense reads always return a value where non-matching cells are just + // the fill values for all attributes. + if (matcher(data_[i])) { + REQUIRE(results[i] == data_[i]); + num_matched += 1; + } else { + REQUIRE(results[i] == *fill_.get()); + } + // Just an internal test assertion that all dense values are valid. + REQUIRE(results[i].valid); + } else { + // Sparse queries only return cells that match. We mark this with whether + // the ResultEnmrQCCell has its valid flag set or not. + if (matcher(data_[i])) { + REQUIRE(results[i] == data_[i]); + num_matched += 1; + } else { + REQUIRE(results[i].valid == false); + } + } + } + + REQUIRE(num_matched == should_match); + + return num_matched; +} + +std::vector CPPQueryConditionEnumerationFx::read_array( + EnmrQCCreator creator) { + Array array(ctx_, uri_, TILEDB_READ); + Query query(ctx_, array); + + if (type_ == TILEDB_DENSE) { + Subarray subarray(ctx_, array); + subarray.add_range(0, 1, num_rows_); + query.set_subarray(subarray); + } else { + query.set_layout(TILEDB_GLOBAL_ORDER); + } + + std::vector row_ids(num_rows_); + std::vector sample_names(num_rows_ * 2 * strlen("AAAA00000000")); + std::vector sample_name_offsets(num_rows_); + std::vector cell_types(num_rows_); + std::vector cycle_phases(num_rows_); + std::vector cycle_phases_validity(num_rows_); + std::vector wavelengths(num_rows_); + std::vector luminosities(num_rows_); + + auto qc = creator(ctx_); + if (serialize_) { + qc = serialize_deserialize_qc(qc); + } + + query.set_condition(qc) + .set_data_buffer("row_id", row_ids) + .set_data_buffer("sample_name", sample_names) + .set_offsets_buffer("sample_name", sample_name_offsets) + .set_data_buffer("cell_type", cell_types) + .set_data_buffer("cycle_phase", cycle_phases) + .set_validity_buffer("cycle_phase", cycle_phases_validity) + .set_data_buffer("wavelength", wavelengths) + .set_data_buffer("luminosity", luminosities); + + REQUIRE(query.submit() == Query::Status::COMPLETE); + + auto table = query.result_buffer_elements(); + + row_ids.resize(table["row_id"].second); + sample_name_offsets.resize(table["sample_name"].first); + cell_types.resize(table["cell_type"].second); + cycle_phases.resize(table["cycle_phase"].second); + cycle_phases_validity.resize(table["cycle_phase"].second); + wavelengths.resize(table["wavelength"].second); + luminosities.resize(table["luminosity"].second); + + // Create our result cell instances + // + // Remember here that the default constructed instances are in the + // test-uninitialized state. + // + // The second thing to remember, is that this for loop is has two slightly + // different behaviors between dense and sparse queries. For spares, we can + // iterate over 0, 1, or up to num_rows_ matches. For dense, it always + // iterates of num_rows_ entries because non-matches are returned as fill + // values. + std::vector ret(num_rows_); + for (size_t i = 0; i < row_ids.size(); i++) { + auto row_id = row_ids[i]; + + // There are basically three states a result can be in. The first obvious + // case is when its a match and it should equal the cell in data_. The + // second obvious case is when its a non-match which should never ever match + // anything in data_. The third case that makes things weird is a non-match + // on a dense array which returns fill values. + // + // The logic below is dealing with each of those cases. Currently it relies + // on a bit of a hack. This is using an implementation detail to detect the + // difference between the non-match and dense-fill-values cases. We can do + // this because when we write null cycle phase values, we set the cycle + // phase enumeration value to 254. We do that on purpose to distinguish + // these cases. Core will return what we write regardless of the null-ness + // so we're abusing that for testing here. + // + // So lets dive in: + // + // If cycle_phase is 255, this is the dense-non-match case so the cell is + // a copy of the random dense_non_match_ instance.. + if (cycle_phases[i] == std::numeric_limits::max()) { + ret[i].copy_fill(fill_); + continue; + } + + // At this point the dense-non-match case is handled. So now all we have + // to worry about is match vs non-match cases. The following logic could + // easily seem redundant when we could just check one attribute and return + // a default constructed ResultEnmrQCCell or a copy of data_[i]. + // + // However, we can't rely on the compiler defaults for asserting match + // semantics here because of the nullptr ternary logic involved in the + // cycle_phase case. That's a fancy way of saying (x < null) and + // (x > null) are both false and the compiler can't figure that out for us. + + // A subtle dense vs sparse issue here. We're setting result[i].valid to + // true because the default constructed value is false. This gives us + // an extra sparse/dense behavior assertion for free because a sparse + // non-match will be false in the results. + ret[row_id - 1].valid = true; + + // Calculate the sample name length even though we know its 12. + uint64_t name_length = 0; + if (i < sample_name_offsets.size() - 1) { + name_length = sample_name_offsets[i + 1] - sample_name_offsets[i]; + } else { + name_length = table["sample_name"].second - sample_name_offsets[i]; + } + + // Make sure we're dealing with the correct cell. + ret[row_id - 1].row_id = row_ids[i]; + + // Copy over the sample name. Either the whole "AAAA00000000" id or the + // empty string if we're on a fill value. + ret[row_id - 1].sample_name = + std::string(sample_names.data() + sample_name_offsets[i], name_length); + + // The cell_type attribute is non-nullable so the 255 distinguishes between + // match and non-match for this cell. + if (cell_types[i] == std::numeric_limits::max()) { + ret[row_id - 1].cell_type = ""; + } else { + ret[row_id - 1].cell_type = cell_type_index_.at(cell_types[i]); + } + + // This is a bit weird because there's null-ability logic involved with + // cell not matching logic. One thing to keep in mind, is that we write + // 254 as the data value when we mark a cycle phase as null. Currently + // TileDB repeats this value back so we can use it to deduce when we wrote + // null vs seeing an non-matching cell in the dense results. + if (cycle_phases_validity[i]) { + // We have a non-null cycle phase. + ret[row_id - 1].cycle_phase = cycle_phase_index_.at(cycle_phases[i]); + ret[row_id - 1].cycle_phase_valid = true; + } else { + // A null cycle phase. The assertion on cycle_phases[i] here is testing + // our precondition that we know this should be null by the fact that + // core returns our invalid 254 value. + assert(cycle_phases[i] == 254); + ret[row_id - 1].cycle_phase = ""; + ret[row_id - 1].cycle_phase_valid = false; + } + + if (wavelengths[i] == std::numeric_limits::max()) { + // Cell didn't match, so wavelength gets the non-match value of an empty + // string. + ret[row_id - 1].wavelength = ""; + } else { + ret[row_id - 1].wavelength = wavelength_index_.at(wavelengths[i]); + } + + // This is a bit silly, but in the interest of preventing accidental + // matches, I'm using the nanf function to give a float that is NaN with + // the fraction as the leading digits of pi to help debugging. + // + // That is to say, if you start seeing NaN issues with this test, you can + // check the fraction to see if its a "real" NaN or a logic error because + // of how we're creating the NaN instance here. + if (luminosities[i] == std::numeric_limits::min()) { + ret[row_id - 1].luminosity = nanf("3141592"); + } else { + ret[row_id - 1].luminosity = luminosities[i]; + } + } + + return ret; +} + +std::vector CPPQueryConditionEnumerationFx::generate_data( + uint32_t num_rows) { + std::vector ret(num_rows); + + std::uniform_int_distribution sn_rng(0, 9); + std::uniform_int_distribution ct_rng( + 0, static_cast(cell_type_values_.size()) - 1); + std::uniform_int_distribution cp_rng( + 0, static_cast(cycle_phase_values_.size()) - 1); + std::uniform_int_distribution wl_rng( + 0, static_cast(wavelength_values_.size()) - 1); + std::uniform_real_distribution lum_rng(0.0, 1.0); + + std::string sample_name = "AAAA00000000"; + + for (uint32_t i = 0; i < ret.size(); i++) { + ret[i].row_id = i + 1; + + for (size_t i = 0; i < sample_name.size(); i++) { + if (i < 4) { + sample_name[i] = 'A' + static_cast(sn_rng(rand_)); + } else { + sample_name[i] = '0' + static_cast(sn_rng(rand_)); + } + } + + ret[i].sample_name = sample_name; + REQUIRE(ret[i].sample_name.size() == 12); + ret[i].cell_type = cell_type_index_.at(static_cast(ct_rng(rand_))); + // A bit hacky, but I'm reusing the luminescence RNG to make the cycle + // phase null 30% of the time. + if (lum_rng(rand_) < 0.3) { + ret[i].cycle_phase = ""; + ret[i].cycle_phase_valid = false; + } else { + ret[i].cycle_phase = + cycle_phase_index_.at(static_cast(cp_rng(rand_))); + ret[i].cycle_phase_valid = true; + } + ret[i].wavelength = + wavelength_index_.at(static_cast(wl_rng(rand_))); + ret[i].luminosity = lum_rng(rand_); + } + + return ret; +} + +#ifdef TILEDB_SERIALIZATION +QueryCondition CPPQueryConditionEnumerationFx::serialize_deserialize_qc( + QueryCondition& qc) { + using namespace tiledb::sm::serialization; + using Condition = tiledb::sm::serialization::capnp::Condition; + + auto qc_ptr = qc.ptr().get()->query_condition_; + + QueryCondition ret(ctx_); + auto ret_ptr = ret.ptr().get()->query_condition_; + + // Serialize the query condition. + ::capnp::MallocMessageBuilder message; + auto builder = message.initRoot(); + throw_if_not_ok(condition_to_capnp(*qc_ptr, &builder)); + + // Deserialize the query condition. + *ret_ptr = condition_from_capnp(builder); + REQUIRE(tiledb::test::ast_equal(ret_ptr->ast(), qc_ptr->ast())); + + return ret; +} +#else +QueryCondition CPPQueryConditionEnumerationFx::serialize_deserialize_qc( + QueryCondition&) { + throw std::logic_error("Unable to serialize when serialization is disabled."); +} +#endif + +void CPPQueryConditionEnumerationFx::create_enumeration( + ArraySchema& schema, + const std::string& name, + const std::unordered_map& index, + bool ordered) { + std::vector enmr_values; + for (uint8_t i = 0; i < static_cast(index.size()); i++) { + enmr_values.push_back(index.at(i)); + } + auto enmr = Enumeration::create(ctx_, name, enmr_values, ordered); + ArraySchemaExperimental::add_enumeration(ctx_, schema, enmr); +} + +void CPPQueryConditionEnumerationFx::validate_query_condition( + EnmrQCCreator creator) { + Array array(ctx_, uri_, TILEDB_READ); + auto core_array = array.ptr().get()->array_; + core_array->load_all_enumerations(); + + auto qc = creator(ctx_); + auto core_qc = qc.ptr().get()->query_condition_; + core_qc->rewrite_enumeration_conditions(core_array->array_schema_latest()); + + REQUIRE(core_qc->check(core_array->array_schema_latest()).ok()); +} + +std::unordered_map +CPPQueryConditionEnumerationFx::make_index( + std::unordered_map values) { + std::unordered_map ret; + for (auto& [name, idx] : values) { + assert(ret.find(idx) == ret.end()); + ret[idx] = name; + } + return ret; +} + +void CPPQueryConditionEnumerationFx::remove_array() { + if (vfs_.is_dir(uri_)) { + vfs_.remove_dir(uri_); + } +} diff --git a/test/src/unit-cppapi-query-condition-sets.cc b/test/src/unit-cppapi-query-condition-sets.cc index 76ba87d2918b..dadb33c08e61 100644 --- a/test/src/unit-cppapi-query-condition-sets.cc +++ b/test/src/unit-cppapi-query-condition-sets.cc @@ -260,6 +260,22 @@ TEST_CASE_METHOD( check_read(qc, [](const QCSetsCell& c) { return !(c.a2 == "wilma"); }); } +TEST_CASE_METHOD( + CPPQueryConditionFx, + "IN - String With Non-Enumeration Value", + "[query-condition][set][non-enum-value][string]") { + auto type = GENERATE( + TestArrayType::DENSE, TestArrayType::SPARSE, TestArrayType::LEGACY); + auto serialize = SERIALIZE_TESTS(); + create_array(type, serialize); + + std::vector values = {"wilma", "astro"}; + auto qc = + QueryConditionExperimental::create(ctx_, "attr2", values, TILEDB_NOT_IN); + + check_read(qc, [](const QCSetsCell& c) { return !(c.a2 == "wilma"); }); +} + TEST_CASE_METHOD( CPPQueryConditionFx, "NOT_IN - Enumeration", @@ -828,7 +844,7 @@ void CPPQueryConditionFx::rm_array() { } void CPPQueryConditionFx::generate_data() { - num_elements_ = 10; // * 1024; + num_elements_ = 1024; dim_values_.clear(); attr1_values_.clear(); @@ -1012,7 +1028,8 @@ std::vector CPPQueryConditionFx::to_vector( template T CPPQueryConditionFx::choose_value(std::vector& values) { auto rval = random(); - auto idx = static_cast(rval * values.size()); + // Note the `% values.size()` which handles when rval is 1.0 + auto idx = static_cast(rval * values.size()) % values.size(); return values[idx]; } diff --git a/test/src/unit-cppapi-schema-evolution.cc b/test/src/unit-cppapi-schema-evolution.cc index 651b667759bb..83a64f6e33e0 100644 --- a/test/src/unit-cppapi-schema-evolution.cc +++ b/test/src/unit-cppapi-schema-evolution.cc @@ -31,6 +31,7 @@ */ #include +#include "test/support/src/mem_helpers.h" #include "tiledb/sm/array_schema/array_schema.h" #include "tiledb/sm/array_schema/array_schema_evolution.h" #include "tiledb/sm/array_schema/attribute.h" @@ -796,7 +797,8 @@ TEST_CASE( TEST_CASE( "SchemaEvolution Error Handling Tests", "[cppapi][schema][evolution][errors]") { - auto ase = make_shared(HERE()); + auto ase = make_shared( + HERE(), tiledb::test::create_test_memory_tracker()); REQUIRE_THROWS(ase->evolve_schema(nullptr)); REQUIRE_THROWS(ase->add_attribute(nullptr)); @@ -808,13 +810,19 @@ TEST_CASE( ase->set_timestamp_range(std::make_pair(1, 1)); auto schema = make_shared( - HERE(), tiledb::sm::ArrayType::SPARSE); + HERE(), + tiledb::sm::ArrayType::SPARSE, + tiledb::test::create_test_memory_tracker()); auto dim = make_shared( - HERE(), "dim1", tiledb::sm::Datatype::INT32); + HERE(), + "dim1", + tiledb::sm::Datatype::INT32, + tiledb::test::get_test_memory_tracker()); int range[2] = {0, 1000}; throw_if_not_ok(dim->set_domain(range)); - auto dom = make_shared(HERE()); + auto dom = make_shared( + HERE(), tiledb::test::get_test_memory_tracker()); throw_if_not_ok(dom->add_dimension(dim)); throw_if_not_ok(schema->set_domain(dom)); diff --git a/test/src/unit-cppapi-vfs.cc b/test/src/unit-cppapi-vfs.cc index 52bf25ac41f2..0a0c78bced4e 100644 --- a/test/src/unit-cppapi-vfs.cc +++ b/test/src/unit-cppapi-vfs.cc @@ -63,6 +63,7 @@ TEST_CASE("C++ API: Test VFS ls", "[cppapi][cppapi-vfs][cppapi-vfs-ls]") { std::string file2 = dir + "/file2"; std::string subdir = dir + "/subdir"; std::string subdir2 = dir + "/subdir2"; + std::string subdir_empty = dir + "/subdir_empty"; std::string subdir_file = subdir + "/file"; std::string subdir_file2 = subdir2 + "/file2"; @@ -71,6 +72,7 @@ TEST_CASE("C++ API: Test VFS ls", "[cppapi][cppapi-vfs][cppapi-vfs-ls]") { vfs.create_dir(dir); vfs.create_dir(subdir); vfs.create_dir(subdir2); + vfs.create_dir(subdir_empty); vfs.touch(file); vfs.touch(file2); vfs.touch(subdir_file); @@ -85,15 +87,17 @@ TEST_CASE("C++ API: Test VFS ls", "[cppapi][cppapi-vfs][cppapi-vfs-ls]") { file2 = tiledb::sm::path_win::uri_from_path(file2); subdir = tiledb::sm::path_win::uri_from_path(subdir); subdir2 = tiledb::sm::path_win::uri_from_path(subdir2); + subdir_empty = tiledb::sm::path_win::uri_from_path(subdir_empty); #endif // Check results std::sort(children.begin(), children.end()); - REQUIRE(children.size() == 4); + REQUIRE(children.size() == 5); CHECK(children[0] == file); CHECK(children[1] == file2); CHECK(children[2] == subdir); CHECK(children[3] == subdir2); + CHECK(children[4] == subdir_empty); // Clean up vfs.remove_dir(path); @@ -503,13 +507,18 @@ TEST_CASE( } } -TEST_CASE("CPP API: VFS ls_recursive filter", "[cppapi][vfs][ls-recursive]") { +using ls_recursive_test_types = + std::tuple; +TEMPLATE_LIST_TEST_CASE( + "CPP API: VFS ls_recursive filter", + "[cppapi][vfs][ls-recursive]", + ls_recursive_test_types) { using namespace tiledb::test; - S3Test s3_test({10, 100, 0}); - if (!s3_test.is_supported()) { + TestType test({10, 100, 0}); + if (!test.is_supported()) { return; } - auto expected_results = s3_test.expected_results(); + auto expected_results = test.expected_results(); vfs_config cfg; tiledb::Context ctx(tiledb::Config(&cfg.config)); @@ -534,14 +543,6 @@ TEST_CASE("CPP API: VFS ls_recursive filter", "[cppapi][vfs][ls-recursive]") { include = [](std::string_view, uint64_t) { return false; }; } - bool include_result = true; - SECTION("Custom filter (include half)") { - include = [&include_result](std::string_view, uint64_t) { - include_result = !include_result; - return include_result; - }; - } - SECTION("Custom filter (search for test_file_50)") { include = [](std::string_view object_name, uint64_t) { return object_name.find("test_file_50") != std::string::npos; @@ -553,21 +554,25 @@ TEST_CASE("CPP API: VFS ls_recursive filter", "[cppapi][vfs][ls-recursive]") { }; } SECTION("Custom filter (reject files over 50 bytes)") { - include = [](std::string_view, uint64_t size) { return size <= 50; }; + include = []([[maybe_unused]] std::string_view entry, uint64_t size) { + return size <= 50; + }; } // Test collecting results with LsInclude predicate. auto results = tiledb::VFSExperimental::ls_recursive_filter( - ctx, vfs, s3_test.temp_dir_.to_string(), include); + ctx, vfs, test.temp_dir_.to_string(), include); std::erase_if(expected_results, [&include](const auto& object) { return !include(object.first, object.second); }); + std::sort(results.begin(), results.end()); CHECK(results.size() == expected_results.size()); CHECK(expected_results == results); // Test collecting results with LsCallback, writing data into ls_objects. tiledb::VFSExperimental::ls_recursive( - ctx, vfs, s3_test.temp_dir_.to_string(), cb); + ctx, vfs, test.temp_dir_.to_string(), cb); + std::sort(ls_objects.begin(), ls_objects.end()); CHECK(ls_objects.size() == expected_results.size()); CHECK(expected_results == ls_objects); } diff --git a/test/src/unit-curl.cc b/test/src/unit-curl.cc index 7ae68275e38d..9241941ad2fb 100644 --- a/test/src/unit-curl.cc +++ b/test/src/unit-curl.cc @@ -31,13 +31,15 @@ */ #include -#include "tiledb/sm/rest/curl.h" -#ifdef _WIN32 -#include "tiledb/sm/filesystem/win.h" -#else -#include "tiledb/sm/filesystem/posix.h" -#endif +// clang-format off +#include "test/support/src/helpers.h" +#include "tiledb/sm/rest/rest_client.h" +#include "tiledb/sm/rest/curl.h" // Must be included last to avoid Windows.h +// clang-format on + +#include +#include using namespace tiledb::sm; @@ -91,3 +93,38 @@ TEST_CASE("CURL: Test curl's header parsing callback", "[curl]") { userdata.redirect_uri_map->find(ns_array)->second == "tiledb://my_username"); } + +TEST_CASE( + "RestClient: Remove trailing slash from rest_server_", "[rest-client]") { + std::string rest_server = + GENERATE("http://localhost:8080/", "http://localhost:8080//"); + tiledb::sm::Config cfg; + SECTION("rest.server_address set in Config") { + cfg.set("rest.server_address", rest_server).ok(); + } + SECTION("rest.server_address set in environment") { + setenv_local("TILEDB_REST_SERVER_ADDRESS", rest_server.c_str()); + } + SECTION("rest.server_address set by loaded config file") { + std::string cfg_file = "tiledb_config.txt"; + std::ofstream file(cfg_file); + file << "rest.server_address " << rest_server << std::endl; + file.close(); + cfg.load_from_file(cfg_file).ok(); + std::filesystem::remove(cfg_file); + } + + ThreadPool tp{1}; + ContextResources resources( + cfg, tiledb::test::g_helper_logger(), 1, 1, "test"); + tiledb::sm::RestClient rest_client; + REQUIRE(rest_client + .init( + &tiledb::test::g_helper_stats, + &cfg, + &tp, + tiledb::test::g_helper_logger(), + resources) + .ok()); + CHECK(rest_client.rest_server() == "http://localhost:8080"); +} diff --git a/test/src/unit-dimension.cc b/test/src/unit-dimension.cc index 990790d79418..dd8027052895 100644 --- a/test/src/unit-dimension.cc +++ b/test/src/unit-dimension.cc @@ -31,6 +31,7 @@ */ #include "test/support/src/helpers-dimension.h" +#include "test/support/src/mem_helpers.h" #include "tiledb/sm/array_schema/dimension.h" #include "tiledb/sm/enums/datatype.h" #include "tiledb/sm/misc/hilbert.h" @@ -47,10 +48,11 @@ TEST_CASE( "Dimension: Test map_to_uint64, integers", "[dimension][map_to_uint64][int]") { // Create dimensions - Dimension d1("d1", Datatype::INT32); + auto memory_tracker = get_test_memory_tracker(); + Dimension d1("d1", Datatype::INT32, memory_tracker); int32_t dom1[] = {0, 100}; CHECK(d1.set_domain(dom1).ok()); - Dimension d2("d2", Datatype::INT32); + Dimension d2("d2", Datatype::INT32, memory_tracker); int32_t dom2[] = {0, 200}; CHECK(d2.set_domain(dom2).ok()); @@ -161,10 +163,11 @@ TEST_CASE( "Dimension: Test map_to_uint64, int32, negative", "[dimension][map_to_uint64][int32][negative]") { // Create dimensions - Dimension d1("d1", Datatype::INT32); + auto memory_tracker = get_test_memory_tracker(); + Dimension d1("d1", Datatype::INT32, memory_tracker); int32_t dom1[] = {-50, 50}; CHECK(d1.set_domain(dom1).ok()); - Dimension d2("d2", Datatype::INT32); + Dimension d2("d2", Datatype::INT32, memory_tracker); int32_t dom2[] = {-100, 100}; CHECK(d2.set_domain(dom2).ok()); @@ -275,10 +278,11 @@ TEST_CASE( "Dimension: Test map_to_uint64, float32", "[dimension][map_to_uint64][float32]") { // Create dimensions - Dimension d1("d1", Datatype::FLOAT32); + auto memory_tracker = get_test_memory_tracker(); + Dimension d1("d1", Datatype::FLOAT32, memory_tracker); float dom1[] = {0.0f, 1.0f}; CHECK(d1.set_domain(dom1).ok()); - Dimension d2("d2", Datatype::FLOAT32); + Dimension d2("d2", Datatype::FLOAT32, memory_tracker); float dom2[] = {0.0f, 2.0f}; CHECK(d2.set_domain(dom2).ok()); @@ -398,8 +402,9 @@ TEST_CASE( "Dimension: Test map_to_uint64, string", "[dimension][map_to_uint64][string]") { // Create dimensions - Dimension d1("d1", Datatype::STRING_ASCII); - Dimension d2("d2", Datatype::STRING_ASCII); + auto memory_tracker = get_test_memory_tracker(); + Dimension d1("d1", Datatype::STRING_ASCII, memory_tracker); + Dimension d2("d2", Datatype::STRING_ASCII, memory_tracker); // Create 2D hilbert curve (auxiliary here) Hilbert h(2); @@ -517,7 +522,8 @@ TEST_CASE( "Dimension: Test map_from_uint64, int32", "[dimension][map_from_uint64][int32]") { // Create dimensions - Dimension d1("d1", Datatype::INT32); + auto memory_tracker = get_test_memory_tracker(); + Dimension d1("d1", Datatype::INT32, memory_tracker); int32_t dom1[] = {0, 100}; CHECK(d1.set_domain(dom1).ok()); @@ -538,7 +544,8 @@ TEST_CASE( "Dimension: Test map_from_uint64, int32, negative", "[dimension][map_from_uint64][int32][negative]") { // Create dimensions - Dimension d1("d1", Datatype::INT32); + auto memory_tracker = get_test_memory_tracker(); + Dimension d1("d1", Datatype::INT32, memory_tracker); int32_t dom1[] = {-50, 50}; CHECK(d1.set_domain(dom1).ok()); @@ -559,7 +566,8 @@ TEST_CASE( "Dimension: Test map_from_uint64, float32", "[dimension][map_from_uint64][float32]") { // Create dimensions - Dimension d1("d1", Datatype::FLOAT32); + auto memory_tracker = get_test_memory_tracker(); + Dimension d1("d1", Datatype::FLOAT32, memory_tracker); float dom1[] = {0.0f, 1.0f}; CHECK(d1.set_domain(dom1).ok()); @@ -580,7 +588,8 @@ TEST_CASE( "Dimension: Test map_from_uint64, string", "[dimension][map_from_uint64][string]") { // Create dimensions - Dimension d1("d1", Datatype::STRING_ASCII); + auto memory_tracker = get_test_memory_tracker(); + Dimension d1("d1", Datatype::STRING_ASCII, memory_tracker); // Set number of buckets Hilbert h(2); @@ -611,7 +620,7 @@ double basic_verify_overlap_ratio( T range1_low, T range1_high, T range2_low, T range2_high) { auto r1 = TypedRange(range1_low, range1_high); auto r2 = TypedRange(range2_low, range2_high); - Dimension d("foo", RangeTraits::datatype); + Dimension d("foo", RangeTraits::datatype, get_test_memory_tracker()); auto ratio = d.overlap_ratio(r1, r2); CHECK(0.0 <= ratio); CHECK(ratio <= 1.0); diff --git a/test/src/unit-enumerations.cc b/test/src/unit-enumerations.cc index 6d9ae77052d3..c7f5747d0e24 100644 --- a/test/src/unit-enumerations.cc +++ b/test/src/unit-enumerations.cc @@ -32,7 +32,9 @@ #include +#include "test/support/src/mem_helpers.h" #include "test/support/tdb_catch.h" +#include "tiledb/common/memory_tracker.h" #include "tiledb/sm/array/array.h" #include "tiledb/sm/array/array_directory.h" #include "tiledb/sm/array_schema/array_schema.h" @@ -101,7 +103,7 @@ struct EnumerationFx { void check_storage_deserialization(const std::vector& values); storage_size_t calculate_serialized_size(shared_ptr enmr); - WriterTile serialize_to_tile(shared_ptr enmr); + shared_ptr serialize_to_tile(shared_ptr enmr); template std::vector as_vector(shared_ptr enmr); @@ -114,7 +116,7 @@ struct EnumerationFx { shared_ptr get_array_schema_latest(); // Serialization helpers - ArraySchema ser_des_array_schema( + shared_ptr ser_des_array_schema( shared_ptr schema, bool client_side, SerializationType stype); @@ -139,6 +141,7 @@ struct EnumerationFx { void rm_array(); + shared_ptr memory_tracker_; URI uri_; Config cfg_; Context ctx_; @@ -149,6 +152,11 @@ template QueryCondition create_qc( const char* field_name, T condition_value, const QueryConditionOp& op); +QueryCondition create_qc( + const char* field_name, + std::vector values, + const QueryConditionOp& op); + /* ********************************* */ /* Testing Enumeration */ /* ********************************* */ @@ -156,7 +164,15 @@ QueryCondition create_qc( TEST_CASE_METHOD( EnumerationFx, "Create Empty Enumeration", "[enumeration][empty]") { Enumeration::create( - default_enmr_name, Datatype::INT32, 1, false, nullptr, 0, nullptr, 0); + default_enmr_name, + Datatype::INT32, + 1, + false, + nullptr, + 0, + nullptr, + 0, + memory_tracker_); } TEST_CASE_METHOD( @@ -171,7 +187,8 @@ TEST_CASE_METHOD( nullptr, 0, nullptr, - 0); + 0, + memory_tracker_); } TEST_CASE_METHOD( @@ -251,7 +268,9 @@ TEST_CASE_METHOD( nullptr, 0, &offsets, - sizeof(uint64_t)); + sizeof(uint64_t), + memory_tracker_); + std::vector values = {""}; check_enumeration( enmr, @@ -320,7 +339,8 @@ TEST_CASE_METHOD( values.data(), values.size() * sizeof(int), nullptr, - 0); + 0, + memory_tracker_); check_enumeration(enmr, default_enmr_name, values, Datatype::INT32, 2, false); } @@ -339,7 +359,8 @@ TEST_CASE_METHOD( nullptr, 10, nullptr, - 0), + 0, + memory_tracker_), matcher); } @@ -352,7 +373,15 @@ TEST_CASE_METHOD( "Invalid data size; must be non-zero for fixed size data."); REQUIRE_THROWS_WITH( Enumeration::create( - default_enmr_name, Datatype::INT32, 1, false, &val, 0, nullptr, 0), + default_enmr_name, + Datatype::INT32, + 1, + false, + &val, + 0, + nullptr, + 0, + memory_tracker_), matcher); } @@ -372,7 +401,8 @@ TEST_CASE_METHOD( val, strlen(val), nullptr, - 8), + 8, + memory_tracker_), matcher); } @@ -393,7 +423,8 @@ TEST_CASE_METHOD( val, strlen(val), &offset, - 0), + 0, + memory_tracker_), matcher); } @@ -414,7 +445,8 @@ TEST_CASE_METHOD( nullptr, 5, &offsets, - sizeof(uint64_t)), + sizeof(uint64_t), + memory_tracker_), matcher); } @@ -435,7 +467,8 @@ TEST_CASE_METHOD( nullptr, 5, &offsets, - sizeof(uint64_t)), + sizeof(uint64_t), + memory_tracker_), matcher); } @@ -457,7 +490,8 @@ TEST_CASE_METHOD( data, 2, &offsets, - sizeof(uint64_t)), + sizeof(uint64_t), + memory_tracker_), matcher); } @@ -474,7 +508,8 @@ TEST_CASE_METHOD( values.data(), values.size() * sizeof(int), nullptr, - 0)); + 0, + memory_tracker_)); } TEST_CASE_METHOD( @@ -490,7 +525,8 @@ TEST_CASE_METHOD( values.data(), values.size() * sizeof(int), nullptr, - 0)); + 0, + memory_tracker_)); } TEST_CASE_METHOD( @@ -507,7 +543,8 @@ TEST_CASE_METHOD( values.data(), values.size() * sizeof(int), nullptr, - 0)); + 0, + memory_tracker_)); } TEST_CASE_METHOD( @@ -523,7 +560,8 @@ TEST_CASE_METHOD( values.data(), values.size() * sizeof(int), nullptr, - 0)); + 0, + memory_tracker_)); } TEST_CASE_METHOD( @@ -539,7 +577,8 @@ TEST_CASE_METHOD( nullptr, values.size() * sizeof(int), nullptr, - 0)); + 0, + memory_tracker_)); } TEST_CASE_METHOD( @@ -555,7 +594,8 @@ TEST_CASE_METHOD( values.data(), 0, nullptr, - 0)); + 0, + memory_tracker_)); } TEST_CASE_METHOD( @@ -572,7 +612,8 @@ TEST_CASE_METHOD( data, strlen(data), nullptr, - offsets.size() * sizeof(uint64_t))); + offsets.size() * sizeof(uint64_t), + memory_tracker_)); } TEST_CASE_METHOD( @@ -589,7 +630,8 @@ TEST_CASE_METHOD( data, strlen(data), offsets.data(), - 0)); + 0, + memory_tracker_)); } TEST_CASE_METHOD( @@ -606,7 +648,8 @@ TEST_CASE_METHOD( values.data(), values.size() * sizeof(int), offsets.data(), - 0)); + 0, + memory_tracker_)); } TEST_CASE_METHOD( @@ -622,7 +665,8 @@ TEST_CASE_METHOD( values.data(), values.size() * sizeof(int), nullptr, - 100)); + 100, + memory_tracker_)); } TEST_CASE_METHOD( @@ -641,7 +685,8 @@ TEST_CASE_METHOD( data, strlen(data), offsets.data(), - 3)); + 3, + memory_tracker_)); } TEST_CASE_METHOD( @@ -659,7 +704,8 @@ TEST_CASE_METHOD( data, strlen(data), offsets.data(), - offsets.size() * sizeof(uint64_t))); + offsets.size() * sizeof(uint64_t), + memory_tracker_)); } TEST_CASE_METHOD( @@ -677,7 +723,8 @@ TEST_CASE_METHOD( values.data(), 3, nullptr, - 0)); + 0, + memory_tracker_)); } TEST_CASE_METHOD( @@ -747,7 +794,8 @@ TEST_CASE_METHOD( init_values.data(), init_values.size() * sizeof(int), nullptr, - 0); + 0, + memory_tracker_); auto enmr2 = extend_enumeration(enmr1, extend_values); check_enumeration( enmr2, default_enmr_name, final_values, Datatype::INT32, 2, false); @@ -939,12 +987,12 @@ TEST_CASE_METHOD( auto enmr = create_enumeration(values); auto tile = serialize_to_tile(enmr); - REQUIRE(tile.size() > 4); - auto data = tile.data(); + REQUIRE(tile->size() > 4); + auto data = tile->data(); memset(data, 1, 4); - Deserializer deserializer(tile.data(), tile.size()); - REQUIRE_THROWS(Enumeration::deserialize(deserializer)); + Deserializer deserializer(tile->data(), tile->size()); + REQUIRE_THROWS(Enumeration::deserialize(deserializer, memory_tracker_)); } TEST_CASE_METHOD( @@ -1114,9 +1162,8 @@ TEST_CASE_METHOD( auto enmr_path = schema->get_enumeration_path_name(enmr_name.value()); - MemoryTracker tracker; auto loaded = - ad->load_enumerations_from_paths({enmr_path}, enc_key_, tracker); + ad->load_enumerations_from_paths({enmr_path}, enc_key_, memory_tracker_); REQUIRE(loaded.size() == 1); auto enmr = loaded[0]; @@ -1138,7 +1185,6 @@ TEST_CASE_METHOD( auto schema = get_array_schema_latest(); auto ad = get_array_directory(); - MemoryTracker tracker; // Check that this function throws an exception when attempting to load // an unknown enumeration @@ -1147,7 +1193,8 @@ TEST_CASE_METHOD( auto windows_matcher = Catch::Matchers::ContainsSubstring( "The system cannot find the file specified."); REQUIRE_THROWS_WITH( - ad->load_enumerations_from_paths({"unknown_enmr"}, enc_key_, tracker), + ad->load_enumerations_from_paths( + {"unknown_enmr"}, enc_key_, memory_tracker_), posix_matcher || windows_matcher); } @@ -1163,21 +1210,20 @@ TEST_CASE_METHOD( auto enmr_name = schema->attribute("attr1")->get_enumeration_name(); auto enmr_path = schema->get_enumeration_path_name(enmr_name.value()); - MemoryTracker tracker; - tracker.set_budget(1); + memory_tracker_->set_budget(memory_tracker_->get_memory_usage() + 1); // Check that this function throws an exception when attempting to load // an enumeration that exceeds the memory budget. auto matcher = Catch::Matchers::ContainsSubstring( "Error loading enumeration; Insufficient memory budget;"); REQUIRE_THROWS_WITH( - ad->load_enumerations_from_paths({enmr_path}, enc_key_, tracker), + ad->load_enumerations_from_paths({enmr_path}, enc_key_, memory_tracker_), matcher); // Check that the fix is to increase the memory budget. - tracker.set_budget(std::numeric_limits::max()); + memory_tracker_->set_budget(std::numeric_limits::max()); REQUIRE_NOTHROW( - ad->load_enumerations_from_paths({enmr_path}, enc_key_, tracker)); + ad->load_enumerations_from_paths({enmr_path}, enc_key_, memory_tracker_)); } /* ********************************* */ @@ -1188,7 +1234,8 @@ TEST_CASE_METHOD( EnumerationFx, "ArraySchema - Add Enumeration - Enumeration nullptr Error", "[enumeration][array-schema][error]") { - auto schema = make_shared(HERE()); + auto schema = + make_shared(HERE(), ArrayType::DENSE, memory_tracker_); REQUIRE_THROWS(schema->add_enumeration(nullptr)); } @@ -1196,7 +1243,8 @@ TEST_CASE_METHOD( EnumerationFx, "ArraySchema - Add Basic Enumeration", "[enumeration][array-schema][basic]") { - auto schema = make_shared(HERE()); + auto schema = + make_shared(HERE(), ArrayType::DENSE, memory_tracker_); std::vector values = {1, 2, 3, 4, 5}; auto enmr = create_enumeration(values); @@ -1211,7 +1259,8 @@ TEST_CASE_METHOD( EnumerationFx, "ArraySchema - Get Enumeration", "[enumeration][array-schema][get]") { - auto schema = make_shared(HERE(), ArrayType::DENSE); + auto schema = + make_shared(HERE(), ArrayType::DENSE, memory_tracker_); std::vector values = {1, 2, 3, 4, 5}; auto enmr1 = create_enumeration(values); @@ -1225,7 +1274,8 @@ TEST_CASE_METHOD( EnumerationFx, "ArraySchema - Get Missing Enumeration Error", "[enumeration][array-schema][error]") { - auto schema = make_shared(HERE(), ArrayType::SPARSE); + auto schema = + make_shared(HERE(), ArrayType::SPARSE, memory_tracker_); REQUIRE_THROWS(schema->get_enumeration("not_an_enumeration")); } @@ -1233,7 +1283,8 @@ TEST_CASE_METHOD( EnumerationFx, "ArraySchema - Add Enumeration with Existing Enumeration of same Name", "[enumeration][array-schema][eror]") { - auto schema = make_shared(HERE(), ArrayType::SPARSE); + auto schema = + make_shared(HERE(), ArrayType::SPARSE, memory_tracker_); std::vector values = {1, 2, 3, 4, 5}; auto enmr = create_enumeration(values); @@ -1245,7 +1296,8 @@ TEST_CASE_METHOD( EnumerationFx, "ArraySchema - Add Attribute with Missing Enumeration Error", "[enumeration][array-schema][eror]") { - auto schema = make_shared(HERE(), ArrayType::SPARSE); + auto schema = + make_shared(HERE(), ArrayType::SPARSE, memory_tracker_); auto attr = make_shared(HERE(), "an_attr", Datatype::INT32); attr->set_enumeration_name("not_an_enumeration"); REQUIRE(!schema->add_attribute(attr).ok()); @@ -1255,7 +1307,8 @@ TEST_CASE_METHOD( EnumerationFx, "ArraySchema - Get All Enumeration Names Empty", "[enumeration][array-schema][get-all][empty]") { - auto schema = make_shared(HERE(), ArrayType::DENSE); + auto schema = + make_shared(HERE(), ArrayType::DENSE, memory_tracker_); auto enmr_names = schema->get_enumeration_names(); REQUIRE(enmr_names.size() == 0); } @@ -1264,7 +1317,8 @@ TEST_CASE_METHOD( EnumerationFx, "ArraySchema - Get All Enumeration Names", "[enumeration][array-schema][get-all]") { - auto schema = make_shared(HERE(), ArrayType::DENSE); + auto schema = + make_shared(HERE(), ArrayType::DENSE, memory_tracker_); std::vector values = {1.0f, 1.1f, 1.2f, 1.3f, 1.4f}; auto enmr1 = create_enumeration(values); @@ -1283,7 +1337,8 @@ TEST_CASE_METHOD( EnumerationFx, "ArraySchema - Attribute with Invalid Datatype", "[enumeration][array-schema][error][bad-attr-datatype]") { - auto schema = make_shared(HERE(), ArrayType::DENSE); + auto schema = + make_shared(HERE(), ArrayType::DENSE, memory_tracker_); std::vector values = {1, 2, 3, 4, 5}; auto enmr = create_enumeration(values); @@ -1298,7 +1353,8 @@ TEST_CASE_METHOD( EnumerationFx, "ArraySchema - Attribute with Invalid Cell Val Num", "[enumeration][array-schema][error][bad-attr-cell-val-num]") { - auto schema = make_shared(HERE(), ArrayType::DENSE); + auto schema = + make_shared(HERE(), ArrayType::DENSE, memory_tracker_); std::vector values = {1, 2, 3, 4, 5}; auto enmr = create_enumeration(values); @@ -1314,7 +1370,8 @@ TEST_CASE_METHOD( EnumerationFx, "ArraySchema - Store nullptr Enumeration Error", "[enumeration][array-schema][error][store-nullptr-enumeration]") { - auto schema = make_shared(HERE(), ArrayType::DENSE); + auto schema = + make_shared(HERE(), ArrayType::DENSE, memory_tracker_); REQUIRE_THROWS(schema->store_enumeration(nullptr)); } @@ -1322,7 +1379,8 @@ TEST_CASE_METHOD( EnumerationFx, "ArraySchema - Store Enumeration Error", "[enumeration][array-schema][error][store-unknown-enumeration]") { - auto schema = make_shared(HERE(), ArrayType::DENSE); + auto schema = + make_shared(HERE(), ArrayType::DENSE, memory_tracker_); std::vector values = {1, 2, 3, 4, 5}; auto enmr = create_enumeration(values, false, Datatype::INT32, "unknown_enmr"); @@ -1333,7 +1391,8 @@ TEST_CASE_METHOD( EnumerationFx, "ArraySchema - Store Enumeration Error - Already Loaded", "[enumeration][array-schema][error][store-loaded-enumeration]") { - auto schema = make_shared(HERE(), ArrayType::DENSE); + auto schema = + make_shared(HERE(), ArrayType::DENSE, memory_tracker_); std::vector values = {0, 1, 2, 100000000}; auto enmr = create_enumeration(values); @@ -1349,7 +1408,8 @@ TEST_CASE_METHOD( EnumerationFx, "ArraySchema - Attribute Get Enumeration Name From Attribute", "[enumeration][array-schema][has-enumeration]") { - auto schema = make_shared(HERE(), ArrayType::SPARSE); + auto schema = + make_shared(HERE(), ArrayType::SPARSE, memory_tracker_); std::vector values = {"a", "spot", "of", "tea", "perhaps?"}; auto enmr = create_enumeration(values); @@ -1382,7 +1442,8 @@ TEST_CASE_METHOD( data.data(), data.size(), offsets.data(), - offsets.size() * constants::cell_var_offset_size); + offsets.size() * constants::cell_var_offset_size, + memory_tracker_); schema->add_enumeration(enmr); @@ -1391,6 +1452,18 @@ TEST_CASE_METHOD( REQUIRE_THROWS_WITH(schema->check(cfg_), matcher); } +TEST_CASE_METHOD( + EnumerationFx, + "ArraySchema - No Segfault on Check", + "[enumeration][array-scehma][size-check]") { + create_array(); + auto array = get_array(QueryType::READ); + auto schema = array->array_schema_latest_ptr(); + // Schema has unloaded enumerations at this point. Make sure that check + // doesn't segfault. + REQUIRE_NOTHROW(schema->check(cfg_)); +} + TEST_CASE_METHOD( EnumerationFx, "ArraySchema - Many Large Enumerations", @@ -1411,7 +1484,8 @@ TEST_CASE_METHOD( data.data(), data.size(), offsets.data(), - offsets.size() * constants::cell_var_offset_size); + offsets.size() * constants::cell_var_offset_size, + memory_tracker_); schema->add_enumeration(enmr); } @@ -1426,10 +1500,8 @@ TEST_CASE_METHOD( "[enumeration][array-schema][copy-ctor]") { auto schema = create_schema(); - // Check that the schema is valid and that we can copy it using the - // copy constructor. + // Check that the schema is valid. CHECK_NOTHROW(schema->check(cfg_)); - CHECK_NOTHROW(make_shared(HERE(), *(schema.get()))); } TEST_CASE_METHOD( @@ -1508,7 +1580,8 @@ TEST_CASE_METHOD( auto array = get_array(QueryType::READ); array->load_all_enumerations(); - auto schema = make_shared(HERE(), array->array_schema_latest()); + auto schema = array->array_schema_latest().clone(); + auto enmr = create_empty_enumeration(Datatype::INT32, 1, false, "test_enmr"); auto matcher = Catch::Matchers::ContainsSubstring( @@ -1525,7 +1598,7 @@ TEST_CASE_METHOD( auto array = get_array(QueryType::READ); array->load_all_enumerations(); - auto schema = make_shared(HERE(), array->array_schema_latest()); + auto schema = array->array_schema_latest().clone(); auto enmr1 = schema->get_enumeration("test_enmr"); std::vector extra_values = {"manatee", "narwhal", "oppossum"}; @@ -1542,7 +1615,8 @@ TEST_CASE_METHOD( enmr2->data().data(), enmr2->data().size(), enmr2->offsets().data(), - enmr2->offsets().size()); + enmr2->offsets().size(), + memory_tracker_); auto matcher = Catch::Matchers::ContainsSubstring( "Enumeration path name for 'test_enmr' already exists in this schema."); @@ -1562,7 +1636,7 @@ TEST_CASE_METHOD( array->load_all_enumerations(); auto orig_schema = array->array_schema_latest_ptr(); - auto ase = make_shared(HERE()); + auto ase = make_shared(HERE(), memory_tracker_); auto attr3 = make_shared(HERE(), "attr3", Datatype::UINT32); ase->add_attribute(attr3); CHECK_NOTHROW(ase->evolve_schema(orig_schema)); @@ -1574,7 +1648,7 @@ TEST_CASE_METHOD( "[enumeration][array-schema-evolution][simple]") { create_array(); auto orig_schema = get_array_schema_latest(); - auto ase = make_shared(HERE()); + auto ase = make_shared(HERE(), memory_tracker_); std::vector values{0, 1, 2, 3, 4, 1000}; auto enmr = create_enumeration(values); @@ -1595,7 +1669,7 @@ TEST_CASE_METHOD( "[enumeration][array-schema-evolution][drop-add]") { create_array(); auto orig_schema = get_array_schema_latest(); - auto ase = make_shared(HERE()); + auto ase = make_shared(HERE(), memory_tracker_); std::vector values{0, 1, 2, 3, 4, 1000}; auto enmr = create_enumeration(values); @@ -1616,7 +1690,7 @@ TEST_CASE_METHOD( create_array(); auto orig_schema = get_array_schema_latest(); - auto ase = make_shared(HERE()); + auto ase = make_shared(HERE(), memory_tracker_); std::vector values{0, 1, 2, 3, 4, 1000}; auto enmr = create_enumeration(values); @@ -1635,7 +1709,7 @@ TEST_CASE_METHOD( "[enumeration][array-schema-evolution][enmr-to-add]") { create_array(); auto orig_schema = get_array_schema_latest(); - auto ase = make_shared(HERE()); + auto ase = make_shared(HERE(), memory_tracker_); std::vector values{0, 1, 2, 3, 4, 1000}; auto enmr1 = create_enumeration(values); @@ -1661,7 +1735,7 @@ TEST_CASE_METHOD( REQUIRE(old_enmr != nullptr); auto new_enmr = extend_enumeration(old_enmr, values_to_add); - auto ase = make_shared(HERE()); + auto ase = make_shared(HERE(), memory_tracker_); ase->extend_enumeration(new_enmr); CHECK_NOTHROW(ase->evolve_schema(orig_schema)); } @@ -1670,7 +1744,7 @@ TEST_CASE_METHOD( EnumerationFx, "ArraySchemaEvolution - Drop Enumeration", "[enumeration][array-schema-evolution][enmr-to-drop]") { - auto ase = make_shared(HERE()); + auto ase = make_shared(HERE(), memory_tracker_); CHECK_NOTHROW(ase->drop_enumeration("test_enmr")); } @@ -1680,7 +1754,7 @@ TEST_CASE_METHOD( "[enumeration][array-schema-evolution][enmr-to-drop]") { create_array(); auto orig_schema = get_array_schema_latest(); - auto ase1 = make_shared(HERE()); + auto ase1 = make_shared(HERE(), memory_tracker_); std::vector values{0, 1, 2, 3, 4, 1000}; auto enmr1 = create_enumeration(values, false, Datatype::UINT64, "enmr"); @@ -1688,7 +1762,7 @@ TEST_CASE_METHOD( auto new_schema = ase1->evolve_schema(orig_schema); - auto ase2 = make_shared(HERE()); + auto ase2 = make_shared(HERE(), memory_tracker_); ase2->drop_enumeration("enmr"); CHECK_NOTHROW(ase2->evolve_schema(new_schema)); @@ -1698,7 +1772,7 @@ TEST_CASE_METHOD( EnumerationFx, "ArraySchemaEvolution - Drop Enumeration Repeated", "[enumeration][array-schema-evolution][enmr-to-drop-repeated]") { - auto ase = make_shared(HERE()); + auto ase = make_shared(HERE(), memory_tracker_); CHECK_NOTHROW(ase->drop_enumeration("test_enmr")); CHECK_NOTHROW(ase->drop_enumeration("test_enmr")); } @@ -1707,7 +1781,7 @@ TEST_CASE_METHOD( EnumerationFx, "ArraySchemaEvolution - Drop Enumeration After Add", "[enumeration][array-schema-evolution][enmr-add-drop]") { - auto ase = make_shared(HERE()); + auto ase = make_shared(HERE(), memory_tracker_); std::vector values{0, 1, 2, 3, 4, 1000}; auto enmr = create_enumeration(values, false, Datatype::UINT64, "enmr"); @@ -1722,7 +1796,7 @@ TEST_CASE_METHOD( "ArraySchemaEvolution - Enumeration to Add - nullptr", "[enumeration][array-schema-evolution][enmr-nullptr]") { create_array(); - auto ase = make_shared(HERE()); + auto ase = make_shared(HERE(), memory_tracker_); REQUIRE_THROWS(ase->add_enumeration(nullptr)); } @@ -1731,7 +1805,7 @@ TEST_CASE_METHOD( "ArraySchemaEvolution - Enumeration to Add - Already Added", "[enumeration][array-schema-evolution][enmr-already-added]") { create_array(); - auto ase = make_shared(HERE()); + auto ase = make_shared(HERE(), memory_tracker_); std::vector values{0, 1, 2, 3, 4, 1000}; auto enmr1 = create_enumeration(values, false, Datatype::UINT64, "enmr"); @@ -1745,7 +1819,7 @@ TEST_CASE_METHOD( "ArraySchemaEvolution - Enumeration to Add - Missing Name", "[enumeration][array-schema-evolution][missing-name]") { create_array(); - auto ase = make_shared(HERE()); + auto ase = make_shared(HERE(), memory_tracker_); REQUIRE(ase->enumeration_to_add("foo") == nullptr); } @@ -1755,7 +1829,7 @@ TEST_CASE_METHOD( "[enumeration][array-schema-evolution][enmr-still-in-use]") { create_array(); auto orig_schema = get_array_schema_latest(); - auto ase = make_shared(HERE()); + auto ase = make_shared(HERE(), memory_tracker_); ase->drop_enumeration("test_enmr"); REQUIRE_THROWS(ase->evolve_schema(orig_schema)); @@ -1770,7 +1844,7 @@ TEST_CASE_METHOD( auto attr3 = make_shared(HERE(), "attr3", Datatype::UINT32); attr3->set_enumeration_name("test_enmr"); - auto ase = make_shared(HERE()); + auto ase = make_shared(HERE(), memory_tracker_); ase->add_attribute(attr3); auto orig_schema = get_array_schema_latest(); @@ -1791,7 +1865,7 @@ TEST_CASE_METHOD( auto attr3 = make_shared(HERE(), "attr3", Datatype::INT8); attr3->set_enumeration_name("big_enmr"); - auto ase = make_shared(HERE()); + auto ase = make_shared(HERE(), memory_tracker_); ase->add_enumeration(enmr); ase->add_attribute(attr3); @@ -1813,7 +1887,7 @@ TEST_CASE_METHOD( auto attr3 = make_shared(HERE(), "attr3", Datatype::UINT8); attr3->set_enumeration_name("big_enmr"); - auto ase = make_shared(HERE()); + auto ase = make_shared(HERE(), memory_tracker_); ase->add_enumeration(enmr); ase->add_attribute(attr3); @@ -1825,7 +1899,7 @@ TEST_CASE_METHOD( EnumerationFx, "ArraySchemaEvolution - Extend Enumeration nullptr", "[enumeration][array-schema-evolution][extend][error]") { - auto ase = make_shared(HERE()); + auto ase = make_shared(HERE(), memory_tracker_); auto matcher = Catch::Matchers::ContainsSubstring( "Cannot extend enumeration; Input enumeration is null"); REQUIRE_THROWS_WITH(ase->extend_enumeration(nullptr), matcher); @@ -1835,7 +1909,7 @@ TEST_CASE_METHOD( EnumerationFx, "ArraySchemaEvolution - Extend Enumeration Already Extended", "[enumeration][array-schema-evolution][extend][error]") { - auto ase = make_shared(HERE()); + auto ase = make_shared(HERE(), memory_tracker_); std::vector values = {1, 2, 3, 4, 5}; auto enmr = create_enumeration(values); auto matcher = Catch::Matchers::ContainsSubstring( @@ -1889,6 +1963,81 @@ TEST_CASE_METHOD( REQUIRE(data2.rvalue_as() == 2); } +TEST_CASE_METHOD( + EnumerationFx, + "QueryCondition - Non-Enumeration Values Are Always False", + "[enumeration][query-condition][rewrite-enumeration-value]") { + create_array(); + auto array = get_array(QueryType::READ); + auto schema = array->array_schema_latest_ptr(); + + // This is normally invoked by the query class when not being tested. It's + // required here so that the enumeration's data is loaded. + array->get_enumeration("test_enmr"); + + // Create two copies of the same query condition for assertions + auto qc1 = create_qc("attr1", "cthulu", QueryConditionOp::EQ); + auto qc2 = qc1; + + qc2.rewrite_enumeration_conditions(*(schema.get())); + + // Assert that the rewritten tree matches in the right places while also + // different to verify the assertion of having been rewritten. + auto& tree1 = qc1.ast(); + auto& tree2 = qc2.ast(); + + REQUIRE(tree1->is_expr() == false); + REQUIRE(tree1->get_field_name() == "attr1"); + + REQUIRE(tree2->is_expr() == tree1->is_expr()); + REQUIRE(tree2->get_field_name() == tree1->get_field_name()); + + auto data1 = tree1->get_data(); + auto data2 = tree2->get_data(); + REQUIRE(data2.size() != data1.size()); + + // "cthulu" is converted a 4 byte int with value 0 + REQUIRE(data2.size() == 4); + REQUIRE(data2.rvalue_as() == 0); +} + +TEST_CASE_METHOD( + EnumerationFx, + "QueryCondition - Non-Enumeration Set Members Are Ignored", + "[enumeration][query-condition][rewrite-enumeration-value]") { + create_array(); + auto array = get_array(QueryType::READ); + auto schema = array->array_schema_latest_ptr(); + + // This is normally invoked by the query class when not being tested. It's + // required here so that the enumeration's data is loaded. + array->get_enumeration("test_enmr"); + + // Create two copies of the same query condition for assertions + std::vector vals = {"ant", "bat", "cthulhu"}; + auto qc1 = create_qc("attr1", vals, QueryConditionOp::IN); + auto qc2 = qc1; + + qc2.rewrite_enumeration_conditions(*(schema.get())); + + // Assert that the rewritten tree matches in the right places while also + // different to verify the assertion of having been rewritten. + auto& tree1 = qc1.ast(); + auto& tree2 = qc2.ast(); + + REQUIRE(tree1->is_expr() == false); + REQUIRE(tree1->get_field_name() == "attr1"); + + REQUIRE(tree2->is_expr() == tree1->is_expr()); + REQUIRE(tree2->get_field_name() == tree1->get_field_name()); + + auto data1 = tree1->get_data(); + auto data2 = tree2->get_data(); + REQUIRE(data2.size() != data1.size()); + REQUIRE(data2.size() == 8); + REQUIRE(tree2->get_offsets().size() == 16); +} + TEST_CASE_METHOD( EnumerationFx, "QueryCondition - Rewrite Enumeration Value After Extension", @@ -1903,18 +2052,17 @@ TEST_CASE_METHOD( auto qc1 = create_qc("attr1", std::string("gerbil"), QueryConditionOp::EQ); auto qc2 = qc1; - // Check that we fail the rewrite before extension. - auto matcher = Catch::Matchers::ContainsSubstring( - "Enumeration value not found for field 'attr1'"); - REQUIRE_THROWS_WITH( - qc1.rewrite_enumeration_conditions(*(schema.get())), matcher); + // Check that the value was converted to 0. + REQUIRE_NOTHROW(qc1.rewrite_enumeration_conditions(*(schema.get()))); + REQUIRE(qc1.ast()->get_op() == QueryConditionOp::ALWAYS_FALSE); + REQUIRE(qc1.ast()->get_data().rvalue_as() == 0); // Extend enumeration via schema evolution. std::vector values_to_add = {"firefly", "gerbil", "hamster"}; auto old_enmr = schema->get_enumeration("test_enmr"); auto new_enmr = extend_enumeration(old_enmr, values_to_add); - auto ase = make_shared(HERE()); + auto ase = make_shared(HERE(), memory_tracker_); ase->extend_enumeration(new_enmr); auto st = ctx_.storage_manager()->array_evolve_schema( array->array_uri(), ase.get(), array->get_encryption_key()); @@ -2081,11 +2229,11 @@ TEST_CASE_METHOD( auto schema2 = ser_des_array_schema(schema1, client_side, ser_type); auto all_names1 = schema1->get_enumeration_names(); - auto all_names2 = schema2.get_enumeration_names(); + auto all_names2 = schema2->get_enumeration_names(); REQUIRE(vec_cmp(all_names1, all_names2)); auto loaded_names1 = schema1->get_loaded_enumeration_names(); - auto loaded_names2 = schema2.get_loaded_enumeration_names(); + auto loaded_names2 = schema2->get_loaded_enumeration_names(); REQUIRE(vec_cmp(loaded_names1, loaded_names2)); // This is a new schema in RAM, so the loaded names should be the same @@ -2107,13 +2255,13 @@ TEST_CASE_METHOD( auto schema2 = ser_des_array_schema(schema1, client_side, ser_type); auto all_names1 = schema1->get_enumeration_names(); - auto all_names2 = schema2.get_enumeration_names(); + auto all_names2 = schema2->get_enumeration_names(); REQUIRE(vec_cmp(all_names1, all_names2)); // This schema was deserialized from disk without any enumerations loaded // so both of these should be empty. auto loaded_names1 = schema1->get_loaded_enumeration_names(); - auto loaded_names2 = schema2.get_loaded_enumeration_names(); + auto loaded_names2 = schema2->get_loaded_enumeration_names(); REQUIRE(loaded_names1.empty()); REQUIRE(loaded_names2.empty()); @@ -2131,7 +2279,15 @@ TEST_CASE_METHOD( auto schema1 = create_schema(); auto enmr1 = Enumeration::create( - "empty_fixed", Datatype::INT32, 1, false, nullptr, 0, nullptr, 0); + "empty_fixed", + Datatype::INT32, + 1, + false, + nullptr, + 0, + nullptr, + 0, + memory_tracker_); auto enmr2 = Enumeration::create( "empty_var", Datatype::STRING_ASCII, @@ -2140,7 +2296,8 @@ TEST_CASE_METHOD( nullptr, 0, nullptr, - 0); + 0, + memory_tracker_); schema1->add_enumeration(enmr1); schema1->add_enumeration(enmr2); @@ -2148,7 +2305,7 @@ TEST_CASE_METHOD( auto schema2 = ser_des_array_schema(schema1, client_side, ser_type); auto all_names1 = schema1->get_enumeration_names(); - auto all_names2 = schema2.get_enumeration_names(); + auto all_names2 = schema2->get_enumeration_names(); REQUIRE(vec_cmp(all_names1, all_names2)); } @@ -2168,7 +2325,7 @@ TEST_CASE_METHOD( auto attr = make_shared(HERE(), "ohai", Datatype::INT64); attr->set_enumeration_name("enmr2"); - ArraySchemaEvolution ase1; + ArraySchemaEvolution ase1(memory_tracker_); ase1.add_attribute(attr); ase1.add_enumeration(enmr1); ase1.add_enumeration(enmr2); @@ -2202,7 +2359,7 @@ TEST_CASE_METHOD( std::vector values2 = {1.0, 2.0, 3.0, 4.0, 5.0}; auto enmr2 = create_enumeration(values2, true, Datatype::FLOAT64, "enmr2"); - ArraySchemaEvolution ase1; + ArraySchemaEvolution ase1(memory_tracker_); ase1.extend_enumeration(enmr1); ase1.extend_enumeration(enmr2); @@ -2397,10 +2554,12 @@ struct TypeParams { }; EnumerationFx::EnumerationFx() - : uri_("enumeration_test_array") + : memory_tracker_(tiledb::test::create_test_memory_tracker()) + , uri_("enumeration_test_array") , ctx_(cfg_) { rm_array(); throw_if_not_ok(enc_key_.set_key(EncryptionType::NO_ENCRYPTION, nullptr, 0)); + memory_tracker_ = tiledb::test::create_test_memory_tracker(); } EnumerationFx::~EnumerationFx() { @@ -2434,7 +2593,8 @@ shared_ptr EnumerationFx::create_enumeration( raw_values.data(), raw_values.size() * sizeof(uint8_t), nullptr, - 0); + 0, + memory_tracker_); } else if constexpr (std::is_pod_v) { return Enumeration::create( name, @@ -2444,7 +2604,8 @@ shared_ptr EnumerationFx::create_enumeration( values.data(), values.size() * sizeof(T), nullptr, - 0); + 0, + memory_tracker_); } else { uint64_t total_size = 0; for (auto v : values) { @@ -2470,14 +2631,23 @@ shared_ptr EnumerationFx::create_enumeration( data.data(), total_size, offsets.data(), - offsets.size() * sizeof(uint64_t)); + offsets.size() * sizeof(uint64_t), + memory_tracker_); } } shared_ptr EnumerationFx::create_empty_enumeration( Datatype type, uint32_t cell_val_num, bool ordered, std::string name) { return Enumeration::create( - name, type, cell_val_num, ordered, nullptr, 0, nullptr, 0); + name, + type, + cell_val_num, + ordered, + nullptr, + 0, + nullptr, + 0, + memory_tracker_); } template @@ -2541,7 +2711,7 @@ template void EnumerationFx::check_storage_serialization(const std::vector& values) { auto enmr = create_enumeration(values); auto tile = serialize_to_tile(enmr); - REQUIRE(tile.size() == calculate_serialized_size(enmr)); + REQUIRE(tile->size() == calculate_serialized_size(enmr)); } template @@ -2550,8 +2720,8 @@ void EnumerationFx::check_storage_deserialization( auto enmr = create_enumeration(values); auto tile = serialize_to_tile(enmr); - Deserializer deserializer(tile.data(), tile.size()); - auto deserialized = Enumeration::deserialize(deserializer); + Deserializer deserializer(tile->data(), tile->size()); + auto deserialized = Enumeration::deserialize(deserializer, memory_tracker_); REQUIRE(deserialized->name() == enmr->name()); REQUIRE(deserialized->path_name().empty() == false); @@ -2620,13 +2790,13 @@ storage_size_t EnumerationFx::calculate_serialized_size( return num_bytes; } -WriterTile EnumerationFx::serialize_to_tile( +shared_ptr EnumerationFx::serialize_to_tile( shared_ptr enmr) { SizeComputationSerializer size_serializer; enmr->serialize(size_serializer); - WriterTile tile{WriterTile::from_generic(size_serializer.size())}; - Serializer serializer(tile.data(), tile.size()); + auto tile{WriterTile::from_generic(size_serializer.size(), memory_tracker_)}; + Serializer serializer(tile->data(), tile->size()); enmr->serialize(serializer); return tile; @@ -2671,13 +2841,15 @@ std::vector EnumerationFx::as_vector(shared_ptr enmr) { shared_ptr EnumerationFx::create_schema() { // Create a schema to serialize - auto schema = make_shared(HERE(), ArrayType::SPARSE); + auto schema = + make_shared(HERE(), ArrayType::SPARSE, memory_tracker_); - auto dim = make_shared(HERE(), "dim1", Datatype::INT32); + auto dim = + make_shared(HERE(), "dim1", Datatype::INT32, memory_tracker_); int range[2] = {0, 1000}; throw_if_not_ok(dim->set_domain(range)); - auto dom = make_shared(HERE()); + auto dom = make_shared(HERE(), memory_tracker_); throw_if_not_ok(dom->add_dimension(dim)); throw_if_not_ok(schema->set_domain(dom)); @@ -2723,19 +2895,19 @@ shared_ptr EnumerationFx::get_array_directory() { shared_ptr EnumerationFx::get_array_schema_latest() { auto array_dir = get_array_directory(); - return array_dir->load_array_schema_latest(enc_key_); + return array_dir->load_array_schema_latest(enc_key_, memory_tracker_); } #ifdef TILEDB_SERIALIZATION -ArraySchema EnumerationFx::ser_des_array_schema( +shared_ptr EnumerationFx::ser_des_array_schema( shared_ptr schema, bool client_side, SerializationType stype) { Buffer buf; throw_if_not_ok(serialization::array_schema_serialize( *(schema.get()), stype, &buf, client_side)); - return serialization::array_schema_deserialize(stype, buf); + return serialization::array_schema_deserialize(stype, buf, memory_tracker_); } shared_ptr EnumerationFx::ser_des_array_schema_evolution( @@ -2745,8 +2917,8 @@ shared_ptr EnumerationFx::ser_des_array_schema_evolution( ase, stype, &buf, client_side)); ArraySchemaEvolution* ret; - throw_if_not_ok( - serialization::array_schema_evolution_deserialize(&ret, stype, buf)); + throw_if_not_ok(serialization::array_schema_evolution_deserialize( + &ret, stype, buf, memory_tracker_)); return shared_ptr(ret); } @@ -2778,13 +2950,13 @@ void EnumerationFx::ser_des_array( SerializationType stype) { Buffer buf; throw_if_not_ok(serialization::array_serialize(in, stype, &buf, client_side)); - throw_if_not_ok( - serialization::array_deserialize(out, stype, buf, ctx.storage_manager())); + throw_if_not_ok(serialization::array_deserialize( + out, stype, buf, ctx.storage_manager(), memory_tracker_)); } #else // No TILEDB_SERIALIZATION -ArraySchema EnumerationFx::ser_des_array_schema( +shared_ptr EnumerationFx::ser_des_array_schema( shared_ptr, bool, SerializationType) { throw std::logic_error("Serialization not enabled."); } @@ -2854,3 +3026,32 @@ QueryCondition create_qc( return ret; } + +QueryCondition create_qc( + const char* field_name, + std::vector values, + const QueryConditionOp& op) { + std::vector data; + std::vector offsets; + + uint64_t data_size = 0; + for (auto& val : values) { + data_size += val.size(); + } + + data.resize(data_size); + uint64_t curr_offset = 0; + for (auto& val : values) { + offsets.push_back(curr_offset); + memcpy(data.data() + curr_offset, val.data(), val.size()); + curr_offset += val.size(); + } + + return QueryCondition( + field_name, + data.data(), + data.size(), + offsets.data(), + offsets.size() * sizeof(uint64_t), + op); +} diff --git a/test/src/unit-filter-pipeline.cc b/test/src/unit-filter-pipeline.cc index 6e20ce2cb97d..84e0060b5f22 100644 --- a/test/src/unit-filter-pipeline.cc +++ b/test/src/unit-filter-pipeline.cc @@ -68,32 +68,41 @@ using namespace tiledb; using namespace tiledb::common; using namespace tiledb::sm; -WriterTile make_increasing_tile(const uint64_t nelts) { +shared_ptr make_increasing_tile( + const uint64_t nelts, shared_ptr tracker) { const uint64_t tile_size = nelts * sizeof(uint64_t); const uint64_t cell_size = sizeof(uint64_t); - WriterTile tile( - constants::format_version, Datatype::UINT64, cell_size, tile_size); + auto tile = make_shared( + HERE(), + constants::format_version, + Datatype::UINT64, + cell_size, + tile_size, + tracker); for (uint64_t i = 0; i < nelts; i++) { - CHECK_NOTHROW(tile.write(&i, i * sizeof(uint64_t), sizeof(uint64_t))); + CHECK_NOTHROW(tile->write(&i, i * sizeof(uint64_t), sizeof(uint64_t))); } return tile; } -WriterTile make_offsets_tile(std::vector& offsets) { +shared_ptr make_offsets_tile( + std::vector& offsets, shared_ptr tracker) { const uint64_t offsets_tile_size = offsets.size() * constants::cell_var_offset_size; - WriterTile offsets_tile( + auto offsets_tile = make_shared( + HERE(), constants::format_version, Datatype::UINT64, constants::cell_var_offset_size, - offsets_tile_size); + offsets_tile_size, + tracker); // Set up test data for (uint64_t i = 0; i < offsets.size(); i++) { - CHECK_NOTHROW(offsets_tile.write( + CHECK_NOTHROW(offsets_tile->write( &offsets[i], i * constants::cell_var_offset_size, constants::cell_var_offset_size)); @@ -102,16 +111,19 @@ WriterTile make_offsets_tile(std::vector& offsets) { return offsets_tile; } -Tile create_tile_for_unfiltering(uint64_t nelts, WriterTile& tile) { - Tile ret( - tile.format_version(), - tile.type(), - tile.cell_size(), +Tile create_tile_for_unfiltering( + uint64_t nelts, + shared_ptr tile, + shared_ptr tracker) { + return { + tile->format_version(), + tile->type(), + tile->cell_size(), 0, - tile.cell_size() * nelts, - tile.filtered_buffer().data(), - tile.filtered_buffer().size()); - return ret; + tile->cell_size() * nelts, + tile->filtered_buffer().data(), + tile->filtered_buffer().size(), + tracker}; } void run_reverse( @@ -136,509 +148,29 @@ void run_reverse( .ok()); } -/** - * Simple filter that modifies the input stream by adding 1 to every input - * element. - */ -class Add1InPlace : public tiledb::sm::Filter { - public: - // Just use a dummy filter type - Add1InPlace(Datatype filter_data_type) - : Filter(FilterType::FILTER_NONE, filter_data_type) { - } - - void dump(FILE* out) const override { - (void)out; - } - - Status run_forward( - const WriterTile&, - WriterTile* const, - FilterBuffer* input_metadata, - FilterBuffer* input, - FilterBuffer* output_metadata, - FilterBuffer* output) const override { - auto input_size = input->size(); - RETURN_NOT_OK(output->append_view(input)); - output->reset_offset(); - - uint64_t nelts = input_size / sizeof(uint64_t); - for (uint64_t i = 0; i < nelts; i++) { - uint64_t* val = output->value_ptr(); - *val += 1; - output->advance_offset(sizeof(uint64_t)); - } - - // Metadata not modified by this filter. - RETURN_NOT_OK(output_metadata->append_view(input_metadata)); - - return Status::Ok(); - } - - Status run_reverse( - const Tile&, - Tile*, - FilterBuffer* input_metadata, - FilterBuffer* input, - FilterBuffer* output_metadata, - FilterBuffer* output, - const tiledb::sm::Config& config) const override { - (void)config; - - auto input_size = input->size(); - RETURN_NOT_OK(output->append_view(input)); - output->reset_offset(); - - uint64_t nelts = input_size / sizeof(uint64_t); - for (uint64_t i = 0; i < nelts; i++) { - uint64_t* val = output->value_ptr(); - *val -= 1; - output->advance_offset(sizeof(uint64_t)); - } - - // Metadata not modified by this filter. - RETURN_NOT_OK(output_metadata->append_view(input_metadata)); - - return Status::Ok(); - } - - Add1InPlace* clone_impl() const override { - return new Add1InPlace(filter_data_type_); - } -}; - -/** - * Simple filter that increments every element of the input stream, writing the - * output to a new buffer. Does not modify the input stream. - */ -class Add1OutOfPlace : public tiledb::sm::Filter { - public: - // Just use a dummy filter type - Add1OutOfPlace(Datatype filter_data_type) - : Filter(FilterType::FILTER_NONE, filter_data_type) { - } - - void dump(FILE* out) const override { - (void)out; - } - - Status run_forward( - const WriterTile&, - WriterTile* const, - FilterBuffer* input_metadata, - FilterBuffer* input, - FilterBuffer* output_metadata, - FilterBuffer* output) const override { - auto input_size = input->size(); - auto nelts = input_size / sizeof(uint64_t); - - // Add a new output buffer. - RETURN_NOT_OK(output->prepend_buffer(input_size)); - output->reset_offset(); - - for (uint64_t i = 0; i < nelts; i++) { - uint64_t inc; - RETURN_NOT_OK(input->read(&inc, sizeof(uint64_t))); - inc++; - RETURN_NOT_OK(output->write(&inc, sizeof(uint64_t))); - } - - // Finish any remaining bytes to ensure no data loss. - auto rem = input_size % sizeof(uint64_t); - for (unsigned i = 0; i < rem; i++) { - char byte; - RETURN_NOT_OK(input->read(&byte, sizeof(char))); - RETURN_NOT_OK(output->write(&byte, sizeof(char))); - } - - // Metadata not modified by this filter. - RETURN_NOT_OK(output_metadata->append_view(input_metadata)); - - return Status::Ok(); - } - - Status run_reverse( - const Tile&, - Tile*, - FilterBuffer* input_metadata, - FilterBuffer* input, - FilterBuffer* output_metadata, - FilterBuffer* output, - const tiledb::sm::Config& config) const override { - (void)config; - - auto input_size = input->size(); - auto nelts = input->size() / sizeof(uint64_t); - - // Add a new output buffer. - RETURN_NOT_OK(output->prepend_buffer(input->size())); - output->reset_offset(); - - for (uint64_t i = 0; i < nelts; i++) { - uint64_t inc; - RETURN_NOT_OK(input->read(&inc, sizeof(uint64_t))); - inc--; - RETURN_NOT_OK(output->write(&inc, sizeof(uint64_t))); - } - - auto rem = input_size % sizeof(uint64_t); - for (unsigned i = 0; i < rem; i++) { - char byte; - RETURN_NOT_OK(input->read(&byte, sizeof(char))); - RETURN_NOT_OK(output->write(&byte, sizeof(char))); - } - - // Metadata not modified by this filter. - RETURN_NOT_OK(output_metadata->append_view(input_metadata)); - - return Status::Ok(); - } - - Add1OutOfPlace* clone_impl() const override { - return new Add1OutOfPlace(filter_data_type_); - } -}; - -/** - * Simple filter which computes the sum of its input and prepends the sum - * to the output. In reverse execute, checks that the sum is correct. - */ -class PseudoChecksumFilter : public tiledb::sm::Filter { - public: - // Just use a dummy filter type - PseudoChecksumFilter(Datatype filter_data_type) - : Filter(FilterType::FILTER_NONE, filter_data_type) { - } - - void dump(FILE* out) const override { - (void)out; - } - - Status run_forward( - const WriterTile&, - WriterTile* const, - FilterBuffer* input_metadata, - FilterBuffer* input, - FilterBuffer* output_metadata, - FilterBuffer* output) const override { - auto input_size = input->size(); - auto nelts = input_size / sizeof(uint64_t); - - // The input is unmodified by this filter. - RETURN_NOT_OK(output->append_view(input)); - - // Forward the existing metadata and prepend a metadata buffer for the - // checksum. - RETURN_NOT_OK(output_metadata->append_view(input_metadata)); - RETURN_NOT_OK(output_metadata->prepend_buffer(sizeof(uint64_t))); - output_metadata->reset_offset(); - - uint64_t sum = 0; - for (uint64_t i = 0; i < nelts; i++) { - uint64_t val; - RETURN_NOT_OK(input->read(&val, sizeof(uint64_t))); - sum += val; - } - - RETURN_NOT_OK(output_metadata->write(&sum, sizeof(uint64_t))); - - return Status::Ok(); - } - - Status run_reverse( - const Tile&, - Tile*, - FilterBuffer* input_metadata, - FilterBuffer* input, - FilterBuffer* output_metadata, - FilterBuffer* output, - const tiledb::sm::Config& config) const override { - (void)config; - - auto input_size = input->size(); - auto nelts = input_size / sizeof(uint64_t); - - uint64_t input_sum; - RETURN_NOT_OK(input_metadata->read(&input_sum, sizeof(uint64_t))); - - uint64_t sum = 0; - for (uint64_t i = 0; i < nelts; i++) { - uint64_t val; - RETURN_NOT_OK(input->read(&val, sizeof(uint64_t))); - sum += val; - } - - if (sum != input_sum) - return Status_FilterError("Filter error; sum does not match."); - - // The output metadata is just a view on the input metadata, skipping the - // checksum bytes. - RETURN_NOT_OK(output_metadata->append_view( - input_metadata, - sizeof(uint64_t), - input_metadata->size() - sizeof(uint64_t))); - - // The output data is just a view on the unmodified input. - RETURN_NOT_OK(output->append_view(input)); - - return Status::Ok(); - } - - PseudoChecksumFilter* clone_impl() const override { - return new PseudoChecksumFilter(filter_data_type_); - } -}; - -TEST_CASE("Filter: Test compression", "[filter][compression]") { - tiledb::sm::Config config; - - const uint64_t nelts = 100; - auto tile = make_increasing_tile(nelts); - - // Set up dummy array schema (needed by compressor filter for cell size, etc). - uint32_t dim_dom[] = {1, 10}; - auto dim{make_shared(HERE(), "", Datatype::INT32)}; - CHECK(dim->set_domain(dim_dom).ok()); - auto domain{make_shared(HERE())}; - CHECK(domain->add_dimension(dim).ok()); - tiledb::sm::ArraySchema schema; - tiledb::sm::Attribute attr("attr", Datatype::UINT64); - CHECK(schema.add_attribute(make_shared(HERE(), attr)) - .ok()); - CHECK(schema.set_domain(domain).ok()); - - FilterPipeline pipeline; - ThreadPool tp(4); - - SECTION("- Simple") { - pipeline.add_filter(Add1InPlace(Datatype::UINT64)); - pipeline.add_filter(Add1OutOfPlace(Datatype::UINT64)); - pipeline.add_filter( - CompressionFilter(tiledb::sm::Compressor::LZ4, 5, Datatype::UINT64)); - - CHECK( - pipeline.run_forward(&test::g_helper_stats, &tile, nullptr, &tp).ok()); - // Check compression worked - CHECK(tile.size() == 0); - CHECK(tile.filtered_buffer().size() < nelts * sizeof(uint64_t)); - - auto unfiltered_tile = create_tile_for_unfiltering(nelts, tile); - run_reverse(config, tp, unfiltered_tile, pipeline); - - // Check all elements original values. - for (uint64_t i = 0; i < nelts; i++) { - uint64_t elt = 0; - CHECK_NOTHROW( - unfiltered_tile.read(&elt, i * sizeof(uint64_t), sizeof(uint64_t))); - CHECK(elt == i); - } - } - - SECTION("- With checksum stage") { - pipeline.add_filter(PseudoChecksumFilter(Datatype::UINT64)); - pipeline.add_filter( - CompressionFilter(tiledb::sm::Compressor::LZ4, 5, Datatype::UINT64)); - - CHECK( - pipeline.run_forward(&test::g_helper_stats, &tile, nullptr, &tp).ok()); - // Check compression worked - CHECK(tile.size() == 0); - CHECK(tile.filtered_buffer().size() < nelts * sizeof(uint64_t)); - - auto unfiltered_tile = create_tile_for_unfiltering(nelts, tile); - run_reverse(config, tp, unfiltered_tile, pipeline); - - // Check all elements original values. - for (uint64_t i = 0; i < nelts; i++) { - uint64_t elt = 0; - CHECK_NOTHROW( - unfiltered_tile.read(&elt, i * sizeof(uint64_t), sizeof(uint64_t))); - CHECK(elt == i); - } - } - - SECTION("- With multiple stages") { - pipeline.add_filter(Add1InPlace(Datatype::UINT64)); - pipeline.add_filter(PseudoChecksumFilter(Datatype::UINT64)); - pipeline.add_filter(Add1OutOfPlace(Datatype::UINT64)); - pipeline.add_filter( - CompressionFilter(tiledb::sm::Compressor::LZ4, 5, Datatype::UINT64)); - - CHECK( - pipeline.run_forward(&test::g_helper_stats, &tile, nullptr, &tp).ok()); - // Check compression worked - CHECK(tile.size() == 0); - CHECK(tile.filtered_buffer().size() < nelts * sizeof(uint64_t)); - - auto unfiltered_tile = create_tile_for_unfiltering(nelts, tile); - run_reverse(config, tp, unfiltered_tile, pipeline); - - // Check all elements original values. - for (uint64_t i = 0; i < nelts; i++) { - uint64_t elt = 0; - CHECK_NOTHROW( - unfiltered_tile.read(&elt, i * sizeof(uint64_t), sizeof(uint64_t))); - CHECK(elt == i); - } - } -} - -TEST_CASE("Filter: Test compression var", "[filter][compression][var]") { - tiledb::sm::Config config; - - const uint64_t nelts = 100; - auto tile = make_increasing_tile(nelts); - - // Set up test data - std::vector sizes{ - 0, - 32, // Chunk0: 4 cells. - 80, // 10 cells, still makes it into this chunk as current size < 50%. - 48, // Chunk1: 6 cells. - 88, // Chunk2: 11 cells, new size > 50% and > than 10 cells. - 56, // Chunk3: 7 cells. - 72, // Chunk4: 9 cells, new size > 50%. - 8, // Chunk4: 10 cell, full. - 80, // Chunk5: 10 cells. - 160, // Chunk6: 20 cells. - 16, // Chunk7: 2 cells. - 16, // Chunk7: 4 cells. - 16, // Chunk7: 6 cells. - 16, // Chunk7: 8 cells. - 16, // Chunk7: 10 cells. - }; // Chunk8: 12 cells. - - std::vector out_sizes{112, 48, 88, 56, 80, 80, 160, 80, 96}; - - std::vector offsets(sizes.size()); - uint64_t offset = 0; - for (uint64_t i = 0; i < offsets.size() - 1; i++) { - offsets[i] = offset; - offset += sizes[i + 1]; - } - offsets[offsets.size() - 1] = offset; - - auto offsets_tile = make_offsets_tile(offsets); - - // Set up dummy array schema (needed by compressor filter for cell size, etc). - uint32_t dim_dom[] = {1, 10}; - auto dim{make_shared(HERE(), "", Datatype::INT32)}; - CHECK(dim->set_domain(dim_dom).ok()); - auto domain{make_shared(HERE())}; - CHECK(domain->add_dimension(dim).ok()); - tiledb::sm::ArraySchema schema; - tiledb::sm::Attribute attr("attr", Datatype::UINT64); - CHECK(schema.add_attribute(make_shared(HERE(), attr)) - .ok()); - CHECK(schema.set_domain(domain).ok()); - - FilterPipeline pipeline; - ThreadPool tp(4); - - SECTION("- Simple") { - WriterTile::set_max_tile_chunk_size(80); - pipeline.add_filter(Add1InPlace(Datatype::UINT64)); - pipeline.add_filter(Add1OutOfPlace(Datatype::UINT64)); - pipeline.add_filter( - CompressionFilter(tiledb::sm::Compressor::LZ4, 5, Datatype::UINT64)); - - CHECK(pipeline.run_forward(&test::g_helper_stats, &tile, &offsets_tile, &tp) - .ok()); - // Check number of chunks - CHECK(tile.size() == 0); - CHECK( - tile.filtered_buffer().value_at_as(0) == - 9); // Number of chunks - - auto unfiltered_tile = create_tile_for_unfiltering(nelts, tile); - run_reverse(config, tp, unfiltered_tile, pipeline); - - // Check all elements original values. - for (uint64_t i = 0; i < nelts; i++) { - uint64_t elt = 0; - CHECK_NOTHROW( - unfiltered_tile.read(&elt, i * sizeof(uint64_t), sizeof(uint64_t))); - CHECK(elt == i); - } - } - - SECTION("- With checksum stage") { - WriterTile::set_max_tile_chunk_size(80); - pipeline.add_filter(PseudoChecksumFilter(Datatype::UINT64)); - pipeline.add_filter( - CompressionFilter(tiledb::sm::Compressor::LZ4, 5, Datatype::UINT64)); - - CHECK(pipeline.run_forward(&test::g_helper_stats, &tile, &offsets_tile, &tp) - .ok()); - // Check number of chunks - CHECK(tile.size() == 0); - CHECK( - tile.filtered_buffer().value_at_as(0) == - 9); // Number of chunks - - auto unfiltered_tile = create_tile_for_unfiltering(nelts, tile); - run_reverse(config, tp, unfiltered_tile, pipeline); - - // Check all elements original values. - for (uint64_t i = 0; i < nelts; i++) { - uint64_t elt = 0; - CHECK_NOTHROW( - unfiltered_tile.read(&elt, i * sizeof(uint64_t), sizeof(uint64_t))); - CHECK(elt == i); - } - } - - SECTION("- With multiple stages") { - WriterTile::set_max_tile_chunk_size(80); - pipeline.add_filter(Add1InPlace(Datatype::UINT64)); - pipeline.add_filter(PseudoChecksumFilter(Datatype::UINT64)); - pipeline.add_filter(Add1OutOfPlace(Datatype::UINT64)); - pipeline.add_filter( - CompressionFilter(tiledb::sm::Compressor::LZ4, 5, Datatype::UINT64)); - - CHECK(pipeline.run_forward(&test::g_helper_stats, &tile, &offsets_tile, &tp) - .ok()); - // Check number of chunks - CHECK(tile.size() == 0); - CHECK( - tile.filtered_buffer().value_at_as(0) == - 9); // Number of chunks - - auto unfiltered_tile = create_tile_for_unfiltering(nelts, tile); - run_reverse(config, tp, unfiltered_tile, pipeline); - - // Check all elements original values. - for (uint64_t i = 0; i < nelts; i++) { - uint64_t elt = 0; - CHECK_NOTHROW( - unfiltered_tile.read(&elt, i * sizeof(uint64_t), sizeof(uint64_t))); - CHECK(elt == i); - } - } - - WriterTile::set_max_tile_chunk_size(constants::max_tile_chunk_size); -} - TEST_CASE( "Filter: Test skip checksum validation", "[filter][skip-checksum-validation]") { tiledb::sm::Config config; REQUIRE(config.set("sm.skip_checksum_validation", "true").ok()); + auto tracker = tiledb::test::create_test_memory_tracker(); + const uint64_t nelts = 100; - auto tile = make_increasing_tile(nelts); + auto tile = make_increasing_tile(nelts, tracker); // MD5 FilterPipeline md5_pipeline; ThreadPool tp(4); ChecksumMD5Filter md5_filter(Datatype::UINT64); md5_pipeline.add_filter(md5_filter); - CHECK(md5_pipeline.run_forward(&test::g_helper_stats, &tile, nullptr, &tp) - .ok()); - CHECK(tile.size() == 0); - CHECK(tile.filtered_buffer().size() != 0); + CHECK( + md5_pipeline.run_forward(&test::g_helper_stats, tile.get(), nullptr, &tp) + .ok()); + CHECK(tile->size() == 0); + CHECK(tile->filtered_buffer().size() != 0); - auto unfiltered_tile = create_tile_for_unfiltering(nelts, tile); + auto unfiltered_tile = create_tile_for_unfiltering(nelts, tile, tracker); run_reverse(config, tp, unfiltered_tile, md5_pipeline); for (uint64_t n = 0; n < nelts; n++) { @@ -649,18 +181,18 @@ TEST_CASE( } // SHA256 - auto tile2 = make_increasing_tile(nelts); + auto tile2 = make_increasing_tile(nelts, tracker); FilterPipeline sha_256_pipeline; ChecksumMD5Filter sha_256_filter(Datatype::UINT64); sha_256_pipeline.add_filter(sha_256_filter); - CHECK( - sha_256_pipeline.run_forward(&test::g_helper_stats, &tile2, nullptr, &tp) - .ok()); - CHECK(tile2.size() == 0); - CHECK(tile2.filtered_buffer().size() != 0); + CHECK(sha_256_pipeline + .run_forward(&test::g_helper_stats, tile2.get(), nullptr, &tp) + .ok()); + CHECK(tile2->size() == 0); + CHECK(tile2->filtered_buffer().size() != 0); - auto unfiltered_tile2 = create_tile_for_unfiltering(nelts, tile2); + auto unfiltered_tile2 = create_tile_for_unfiltering(nelts, tile2, tracker); run_reverse(config, tp, unfiltered_tile2, sha_256_pipeline); for (uint64_t n = 0; n < nelts; n++) { uint64_t elt = 0; @@ -673,6 +205,8 @@ TEST_CASE( TEST_CASE("Filter: Test bit width reduction", "[filter][bit-width-reduction]") { tiledb::sm::Config config; + auto tracker = tiledb::test::create_test_memory_tracker(); + // Set up test data const uint64_t nelts = 1000; @@ -681,13 +215,13 @@ TEST_CASE("Filter: Test bit width reduction", "[filter][bit-width-reduction]") { pipeline.add_filter(BitWidthReductionFilter(Datatype::UINT64)); SECTION("- Single stage") { - auto tile = make_increasing_tile(nelts); + auto tile = make_increasing_tile(nelts, tracker); - CHECK( - pipeline.run_forward(&test::g_helper_stats, &tile, nullptr, &tp).ok()); + CHECK(pipeline.run_forward(&test::g_helper_stats, tile.get(), nullptr, &tp) + .ok()); - CHECK(tile.size() == 0); - CHECK(tile.filtered_buffer().size() != 0); + CHECK(tile->size() == 0); + CHECK(tile->filtered_buffer().size() != 0); // Sanity check number of windows value uint64_t offset = 0; @@ -697,7 +231,7 @@ TEST_CASE("Filter: Test bit width reduction", "[filter][bit-width-reduction]") { offset += sizeof(uint32_t); // First chunk metadata size CHECK( - tile.filtered_buffer().value_at_as(offset) == + tile->filtered_buffer().value_at_as(offset) == nelts * sizeof(uint64_t)); // Original length offset += sizeof(uint32_t); @@ -707,14 +241,14 @@ TEST_CASE("Filter: Test bit width reduction", "[filter][bit-width-reduction]") { (nelts * sizeof(uint64_t)) / max_win_size + uint32_t(bool((nelts * sizeof(uint64_t)) % max_win_size)); CHECK( - tile.filtered_buffer().value_at_as(offset) == + tile->filtered_buffer().value_at_as(offset) == expected_num_win); // Number of windows // Check compression worked - auto compressed_size = tile.filtered_buffer().size(); + auto compressed_size = tile->filtered_buffer().size(); CHECK(compressed_size < nelts * sizeof(uint64_t)); - auto unfiltered_tile = create_tile_for_unfiltering(nelts, tile); + auto unfiltered_tile = create_tile_for_unfiltering(nelts, tile, tracker); run_reverse(config, tp, unfiltered_tile, pipeline); for (uint64_t i = 0; i < nelts; i++) { uint64_t elt = 0; @@ -728,17 +262,18 @@ TEST_CASE("Filter: Test bit width reduction", "[filter][bit-width-reduction]") { std::vector window_sizes = { 32, 64, 128, 256, 437, 512, 1024, 2000}; for (auto window_size : window_sizes) { - auto tile = make_increasing_tile(nelts); + auto tile = make_increasing_tile(nelts, tracker); pipeline.get_filter()->set_max_window_size( window_size); - CHECK(pipeline.run_forward(&test::g_helper_stats, &tile, nullptr, &tp) - .ok()); - CHECK(tile.size() == 0); - CHECK(tile.filtered_buffer().size() != 0); + CHECK( + pipeline.run_forward(&test::g_helper_stats, tile.get(), nullptr, &tp) + .ok()); + CHECK(tile->size() == 0); + CHECK(tile->filtered_buffer().size() != 0); - auto unfiltered_tile = create_tile_for_unfiltering(nelts, tile); + auto unfiltered_tile = create_tile_for_unfiltering(nelts, tile, tracker); run_reverse(config, tp, unfiltered_tile, pipeline); for (uint64_t i = 0; i < nelts; i++) { uint64_t elt = 0; @@ -756,24 +291,26 @@ TEST_CASE("Filter: Test bit width reduction", "[filter][bit-width-reduction]") { std::uniform_int_distribution<> rng(0, std::numeric_limits::max()); INFO("Random element seed: " << seed); - WriterTile tile( + auto tile = make_shared( + HERE(), constants::format_version, Datatype::UINT64, sizeof(uint64_t), - nelts * sizeof(uint64_t)); + nelts * sizeof(uint64_t), + tracker); // Set up test data for (uint64_t i = 0; i < nelts; i++) { uint64_t val = (uint64_t)rng(gen); - CHECK_NOTHROW(tile.write(&val, i * sizeof(uint64_t), sizeof(uint64_t))); + CHECK_NOTHROW(tile->write(&val, i * sizeof(uint64_t), sizeof(uint64_t))); } - CHECK( - pipeline.run_forward(&test::g_helper_stats, &tile, nullptr, &tp).ok()); - CHECK(tile.size() == 0); - CHECK(tile.filtered_buffer().size() != 0); + CHECK(pipeline.run_forward(&test::g_helper_stats, tile.get(), nullptr, &tp) + .ok()); + CHECK(tile->size() == 0); + CHECK(tile->filtered_buffer().size() != 0); - auto unfiltered_tile = create_tile_for_unfiltering(nelts, tile); + auto unfiltered_tile = create_tile_for_unfiltering(nelts, tile, tracker); run_reverse(config, tp, unfiltered_tile, pipeline); for (uint64_t i = 0; i < nelts; i++) { uint64_t elt = 0; @@ -792,24 +329,26 @@ TEST_CASE("Filter: Test bit width reduction", "[filter][bit-width-reduction]") { std::numeric_limits::max()); INFO("Random element seed: " << seed); - WriterTile tile( + auto tile = make_shared( + HERE(), constants::format_version, Datatype::UINT32, sizeof(uint32_t), - nelts * sizeof(uint32_t)); + nelts * sizeof(uint32_t), + tracker); // Set up test data for (uint64_t i = 0; i < nelts; i++) { uint32_t val = (uint32_t)rng(gen); - CHECK_NOTHROW(tile.write(&val, i * sizeof(uint32_t), sizeof(uint32_t))); + CHECK_NOTHROW(tile->write(&val, i * sizeof(uint32_t), sizeof(uint32_t))); } - CHECK( - pipeline.run_forward(&test::g_helper_stats, &tile, nullptr, &tp).ok()); - CHECK(tile.size() == 0); - CHECK(tile.filtered_buffer().size() != 0); + CHECK(pipeline.run_forward(&test::g_helper_stats, tile.get(), nullptr, &tp) + .ok()); + CHECK(tile->size() == 0); + CHECK(tile->filtered_buffer().size() != 0); - auto unfiltered_tile = create_tile_for_unfiltering(nelts, tile); + auto unfiltered_tile = create_tile_for_unfiltering(nelts, tile, tracker); run_reverse(config, tp, unfiltered_tile, pipeline); for (uint64_t i = 0; i < nelts; i++) { int32_t elt = 0; @@ -820,24 +359,26 @@ TEST_CASE("Filter: Test bit width reduction", "[filter][bit-width-reduction]") { } SECTION("- Byte overflow") { - WriterTile tile( + auto tile = make_shared( + HERE(), constants::format_version, Datatype::UINT64, sizeof(uint64_t), - nelts * sizeof(uint64_t)); + nelts * sizeof(uint64_t), + tracker); // Set up test data for (uint64_t i = 0; i < nelts; i++) { uint64_t val = i % 257; - CHECK_NOTHROW(tile.write(&val, i * sizeof(uint64_t), sizeof(uint64_t))); + CHECK_NOTHROW(tile->write(&val, i * sizeof(uint64_t), sizeof(uint64_t))); } - CHECK( - pipeline.run_forward(&test::g_helper_stats, &tile, nullptr, &tp).ok()); - CHECK(tile.size() == 0); - CHECK(tile.filtered_buffer().size() != 0); + CHECK(pipeline.run_forward(&test::g_helper_stats, tile.get(), nullptr, &tp) + .ok()); + CHECK(tile->size() == 0); + CHECK(tile->filtered_buffer().size() != 0); - auto unfiltered_tile = create_tile_for_unfiltering(nelts, tile); + auto unfiltered_tile = create_tile_for_unfiltering(nelts, tile, tracker); run_reverse(config, tp, unfiltered_tile, pipeline); for (uint64_t i = 0; i < nelts; i++) { uint64_t elt = 0; @@ -853,6 +394,8 @@ TEST_CASE( "[filter][bit-width-reduction][var]") { tiledb::sm::Config config; + auto tracker = tiledb::test::create_test_memory_tracker(); + const uint64_t nelts = 100; // Set up test data @@ -892,37 +435,39 @@ TEST_CASE( pipeline.add_filter(BitWidthReductionFilter(Datatype::UINT64)); SECTION("- Single stage") { - auto tile = make_increasing_tile(nelts); - auto offsets_tile = make_offsets_tile(offsets); + auto tile = make_increasing_tile(nelts, tracker); + auto offsets_tile = make_offsets_tile(offsets, tracker); WriterTile::set_max_tile_chunk_size(80); - CHECK(pipeline.run_forward(&test::g_helper_stats, &tile, &offsets_tile, &tp) + CHECK(pipeline + .run_forward( + &test::g_helper_stats, tile.get(), offsets_tile.get(), &tp) .ok()); - CHECK(tile.size() == 0); - CHECK(tile.filtered_buffer().size() != 0); + CHECK(tile->size() == 0); + CHECK(tile->filtered_buffer().size() != 0); uint64_t offset = 0; CHECK( - tile.filtered_buffer().value_at_as(offset) == + tile->filtered_buffer().value_at_as(offset) == 9); // Number of chunks offset += sizeof(uint64_t); for (uint64_t i = 0; i < 9; i++) { CHECK( - tile.filtered_buffer().value_at_as(offset) == + tile->filtered_buffer().value_at_as(offset) == out_sizes[i]); // Chunk orig size offset += sizeof(uint32_t); CHECK( - tile.filtered_buffer().value_at_as(offset) == + tile->filtered_buffer().value_at_as(offset) == out_sizes[i] / 8); // Chunk filtered size offset += sizeof(uint32_t); - uint32_t md_size = tile.filtered_buffer().value_at_as(offset); + uint32_t md_size = tile->filtered_buffer().value_at_as(offset); offset += sizeof(uint32_t); CHECK( - tile.filtered_buffer().value_at_as(offset) == + tile->filtered_buffer().value_at_as(offset) == out_sizes[i]); // Original length offset += sizeof(uint32_t); @@ -932,7 +477,7 @@ TEST_CASE( auto expected_num_win = out_sizes[i] / max_win_size + uint32_t(bool(out_sizes[0] % max_win_size)); CHECK( - tile.filtered_buffer().value_at_as(offset) == + tile->filtered_buffer().value_at_as(offset) == expected_num_win); // Number of windows offset += md_size - sizeof(uint32_t); @@ -940,16 +485,16 @@ TEST_CASE( // Check all elements are good. uint8_t el = 0; for (uint64_t j = 0; j < out_sizes[i] / sizeof(uint64_t); j++) { - CHECK(tile.filtered_buffer().value_at_as(offset) == el++); + CHECK(tile->filtered_buffer().value_at_as(offset) == el++); offset += sizeof(uint8_t); } } // Check compression worked - auto compressed_size = tile.filtered_buffer().size(); + auto compressed_size = tile->filtered_buffer().size(); CHECK(compressed_size < nelts * sizeof(uint64_t)); - auto unfiltered_tile = create_tile_for_unfiltering(nelts, tile); + auto unfiltered_tile = create_tile_for_unfiltering(nelts, tile, tracker); run_reverse(config, tp, unfiltered_tile, pipeline); for (uint64_t i = 0; i < nelts; i++) { uint64_t elt = 0; @@ -964,18 +509,19 @@ TEST_CASE( std::vector window_sizes = { 32, 64, 128, 256, 437, 512, 1024, 2000}; for (auto window_size : window_sizes) { - auto tile = make_increasing_tile(nelts); - auto offsets_tile = make_offsets_tile(offsets); + auto tile = make_increasing_tile(nelts, tracker); + auto offsets_tile = make_offsets_tile(offsets, tracker); pipeline.get_filter()->set_max_window_size( window_size); - CHECK( - pipeline.run_forward(&test::g_helper_stats, &tile, &offsets_tile, &tp) - .ok()); - CHECK(tile.size() == 0); - CHECK(tile.filtered_buffer().size() != 0); + CHECK(pipeline + .run_forward( + &test::g_helper_stats, tile.get(), offsets_tile.get(), &tp) + .ok()); + CHECK(tile->size() == 0); + CHECK(tile->filtered_buffer().size() != 0); - auto unfiltered_tile = create_tile_for_unfiltering(nelts, tile); + auto unfiltered_tile = create_tile_for_unfiltering(nelts, tile, tracker); run_reverse(config, tp, unfiltered_tile, pipeline); for (uint64_t i = 0; i < nelts; i++) { uint64_t elt = 0; @@ -994,25 +540,29 @@ TEST_CASE( std::uniform_int_distribution<> rng(0, std::numeric_limits::max()); INFO("Random element seed: " << seed); - WriterTile tile( + auto tile = make_shared( + HERE(), constants::format_version, Datatype::UINT64, sizeof(uint64_t), - nelts * sizeof(uint64_t)); - auto offsets_tile = make_offsets_tile(offsets); + nelts * sizeof(uint64_t), + tracker); + auto offsets_tile = make_offsets_tile(offsets, tracker); // Set up test data for (uint64_t i = 0; i < nelts; i++) { uint64_t val = (uint64_t)rng(gen); - CHECK_NOTHROW(tile.write(&val, i * sizeof(uint64_t), sizeof(uint64_t))); + CHECK_NOTHROW(tile->write(&val, i * sizeof(uint64_t), sizeof(uint64_t))); } - CHECK(pipeline.run_forward(&test::g_helper_stats, &tile, &offsets_tile, &tp) + CHECK(pipeline + .run_forward( + &test::g_helper_stats, tile.get(), offsets_tile.get(), &tp) .ok()); - CHECK(tile.size() == 0); - CHECK(tile.filtered_buffer().size() != 0); + CHECK(tile->size() == 0); + CHECK(tile->filtered_buffer().size() != 0); - auto unfiltered_tile = create_tile_for_unfiltering(nelts, tile); + auto unfiltered_tile = create_tile_for_unfiltering(nelts, tile, tracker); run_reverse(config, tp, unfiltered_tile, pipeline); for (uint64_t i = 0; i < nelts; i++) { uint64_t elt = 0; @@ -1032,16 +582,18 @@ TEST_CASE( std::numeric_limits::max()); INFO("Random element seed: " << seed); - WriterTile tile( + auto tile = make_shared( + HERE(), constants::format_version, Datatype::UINT32, sizeof(uint32_t), - nelts * sizeof(uint32_t)); + nelts * sizeof(uint32_t), + tracker); // Set up test data for (uint64_t i = 0; i < nelts; i++) { uint32_t val = (uint32_t)rng(gen); - CHECK_NOTHROW(tile.write(&val, i * sizeof(uint32_t), sizeof(uint32_t))); + CHECK_NOTHROW(tile->write(&val, i * sizeof(uint32_t), sizeof(uint32_t))); } std::vector offsets32(offsets); @@ -1053,7 +605,8 @@ TEST_CASE( constants::format_version, Datatype::UINT64, constants::cell_var_offset_size, - offsets_tile_size); + offsets_tile_size, + tracker); // Set up test data for (uint64_t i = 0; i < offsets.size(); i++) { @@ -1063,13 +616,14 @@ TEST_CASE( constants::cell_var_offset_size)); } - CHECK( - pipeline.run_forward(&test::g_helper_stats, &tile, &offsets_tile32, &tp) - .ok()); - CHECK(tile.size() == 0); - CHECK(tile.filtered_buffer().size() != 0); + CHECK(pipeline + .run_forward( + &test::g_helper_stats, tile.get(), &offsets_tile32, &tp) + .ok()); + CHECK(tile->size() == 0); + CHECK(tile->filtered_buffer().size() != 0); - auto unfiltered_tile = create_tile_for_unfiltering(nelts, tile); + auto unfiltered_tile = create_tile_for_unfiltering(nelts, tile, tracker); run_reverse(config, tp, unfiltered_tile, pipeline); for (uint64_t i = 0; i < nelts; i++) { int32_t elt = 0; @@ -1081,26 +635,30 @@ TEST_CASE( SECTION("- Byte overflow") { WriterTile::set_max_tile_chunk_size(80); - WriterTile tile( + auto tile = make_shared( + HERE(), constants::format_version, Datatype::UINT64, sizeof(uint64_t), - nelts * sizeof(uint64_t)); + nelts * sizeof(uint64_t), + tracker); // Set up test data for (uint64_t i = 0; i < nelts; i++) { uint64_t val = i % 257; - CHECK_NOTHROW(tile.write(&val, i * sizeof(uint64_t), sizeof(uint64_t))); + CHECK_NOTHROW(tile->write(&val, i * sizeof(uint64_t), sizeof(uint64_t))); } - auto offsets_tile = make_offsets_tile(offsets); + auto offsets_tile = make_offsets_tile(offsets, tracker); - CHECK(pipeline.run_forward(&test::g_helper_stats, &tile, &offsets_tile, &tp) + CHECK(pipeline + .run_forward( + &test::g_helper_stats, tile.get(), offsets_tile.get(), &tp) .ok()); - CHECK(tile.size() == 0); - CHECK(tile.filtered_buffer().size() != 0); + CHECK(tile->size() == 0); + CHECK(tile->filtered_buffer().size() != 0); - auto unfiltered_tile = create_tile_for_unfiltering(nelts, tile); + auto unfiltered_tile = create_tile_for_unfiltering(nelts, tile, tracker); run_reverse(config, tp, unfiltered_tile, pipeline); for (uint64_t i = 0; i < nelts; i++) { uint64_t elt = 0; @@ -1116,6 +674,8 @@ TEST_CASE( TEST_CASE("Filter: Test positive-delta encoding", "[filter][positive-delta]") { tiledb::sm::Config config; + auto tracker = tiledb::test::create_test_memory_tracker(); + // Set up test data const uint64_t nelts = 1000; @@ -1124,12 +684,12 @@ TEST_CASE("Filter: Test positive-delta encoding", "[filter][positive-delta]") { pipeline.add_filter(PositiveDeltaFilter(Datatype::UINT64)); SECTION("- Single stage") { - auto tile = make_increasing_tile(nelts); - CHECK( - pipeline.run_forward(&test::g_helper_stats, &tile, nullptr, &tp).ok()); + auto tile = make_increasing_tile(nelts, tracker); + CHECK(pipeline.run_forward(&test::g_helper_stats, tile.get(), nullptr, &tp) + .ok()); - CHECK(tile.size() == 0); - CHECK(tile.filtered_buffer().size() != 0); + CHECK(tile->size() == 0); + CHECK(tile->filtered_buffer().size() != 0); auto pipeline_metadata_size = sizeof(uint64_t) + 3 * sizeof(uint32_t); @@ -1137,7 +697,7 @@ TEST_CASE("Filter: Test positive-delta encoding", "[filter][positive-delta]") { offset += sizeof(uint64_t); // Number of chunks offset += sizeof(uint32_t); // First chunk orig size offset += sizeof(uint32_t); // First chunk filtered size - auto filter_metadata_size = tile.filtered_buffer().value_at_as( + auto filter_metadata_size = tile->filtered_buffer().value_at_as( offset); // First chunk metadata size offset += sizeof(uint32_t); @@ -1147,16 +707,16 @@ TEST_CASE("Filter: Test positive-delta encoding", "[filter][positive-delta]") { (nelts * sizeof(uint64_t)) / max_win_size + uint32_t(bool((nelts * sizeof(uint64_t)) % max_win_size)); CHECK( - tile.filtered_buffer().value_at_as(offset) == + tile->filtered_buffer().value_at_as(offset) == expected_num_win); // Number of windows // Check encoded size - auto encoded_size = tile.filtered_buffer().size(); + auto encoded_size = tile->filtered_buffer().size(); CHECK( encoded_size == pipeline_metadata_size + filter_metadata_size + nelts * sizeof(uint64_t)); - auto unfiltered_tile = create_tile_for_unfiltering(nelts, tile); + auto unfiltered_tile = create_tile_for_unfiltering(nelts, tile, tracker); run_reverse(config, tp, unfiltered_tile, pipeline); for (uint64_t i = 0; i < nelts; i++) { uint64_t elt = 0; @@ -1170,16 +730,17 @@ TEST_CASE("Filter: Test positive-delta encoding", "[filter][positive-delta]") { std::vector window_sizes = { 32, 64, 128, 256, 437, 512, 1024, 2000}; for (auto window_size : window_sizes) { - auto tile = make_increasing_tile(nelts); + auto tile = make_increasing_tile(nelts, tracker); pipeline.get_filter()->set_max_window_size( window_size); - CHECK(pipeline.run_forward(&test::g_helper_stats, &tile, nullptr, &tp) - .ok()); - CHECK(tile.size() == 0); - CHECK(tile.filtered_buffer().size() != 0); + CHECK( + pipeline.run_forward(&test::g_helper_stats, tile.get(), nullptr, &tp) + .ok()); + CHECK(tile->size() == 0); + CHECK(tile->filtered_buffer().size() != 0); - auto unfiltered_tile = create_tile_for_unfiltering(nelts, tile); + auto unfiltered_tile = create_tile_for_unfiltering(nelts, tile, tracker); run_reverse(config, tp, unfiltered_tile, pipeline); for (uint64_t i = 0; i < nelts; i++) { uint64_t elt = 0; @@ -1191,14 +752,14 @@ TEST_CASE("Filter: Test positive-delta encoding", "[filter][positive-delta]") { } SECTION("- Error on non-positive delta data") { - auto tile = make_increasing_tile(nelts); + auto tile = make_increasing_tile(nelts, tracker); for (uint64_t i = 0; i < nelts; i++) { auto val = nelts - i; - CHECK_NOTHROW(tile.write(&val, i * sizeof(uint64_t), sizeof(uint64_t))); + CHECK_NOTHROW(tile->write(&val, i * sizeof(uint64_t), sizeof(uint64_t))); } - CHECK( - !pipeline.run_forward(&test::g_helper_stats, &tile, nullptr, &tp).ok()); + CHECK(!pipeline.run_forward(&test::g_helper_stats, tile.get(), nullptr, &tp) + .ok()); } } @@ -1207,6 +768,8 @@ TEST_CASE( "[filter][positive-delta][var]") { tiledb::sm::Config config; + auto tracker = tiledb::test::create_test_memory_tracker(); + const uint64_t nelts = 100; // Set up test data @@ -1243,34 +806,36 @@ TEST_CASE( pipeline.add_filter(PositiveDeltaFilter(Datatype::UINT64)); SECTION("- Single stage") { - auto tile = make_increasing_tile(nelts); - auto offsets_tile = make_offsets_tile(offsets); + auto tile = make_increasing_tile(nelts, tracker); + auto offsets_tile = make_offsets_tile(offsets, tracker); WriterTile::set_max_tile_chunk_size(80); - CHECK(pipeline.run_forward(&test::g_helper_stats, &tile, &offsets_tile, &tp) + CHECK(pipeline + .run_forward( + &test::g_helper_stats, tile.get(), offsets_tile.get(), &tp) .ok()); - CHECK(tile.size() == 0); - CHECK(tile.filtered_buffer().size() != 0); + CHECK(tile->size() == 0); + CHECK(tile->filtered_buffer().size() != 0); uint64_t offset = 0; CHECK( - tile.filtered_buffer().value_at_as(offset) == + tile->filtered_buffer().value_at_as(offset) == 9); // Number of chunks offset += sizeof(uint64_t); uint64_t total_md_size = 0; for (uint64_t i = 0; i < 9; i++) { CHECK( - tile.filtered_buffer().value_at_as(offset) == + tile->filtered_buffer().value_at_as(offset) == out_sizes[i]); // Chunk orig size offset += sizeof(uint32_t); CHECK( - tile.filtered_buffer().value_at_as(offset) == + tile->filtered_buffer().value_at_as(offset) == out_sizes[i]); // Chunk filtered size offset += sizeof(uint32_t); - uint32_t md_size = tile.filtered_buffer().value_at_as(offset); + uint32_t md_size = tile->filtered_buffer().value_at_as(offset); offset += sizeof(uint32_t); total_md_size += md_size; @@ -1280,7 +845,7 @@ TEST_CASE( (nelts * sizeof(uint64_t)) / max_win_size + uint32_t(bool((nelts * sizeof(uint64_t)) % max_win_size)); CHECK( - tile.filtered_buffer().value_at_as(offset) == + tile->filtered_buffer().value_at_as(offset) == expected_num_win); // Number of windows offset += md_size; @@ -1288,7 +853,7 @@ TEST_CASE( // Check all elements are good. for (uint64_t j = 0; j < out_sizes[i] / sizeof(uint64_t); j++) { CHECK( - tile.filtered_buffer().value_at_as(offset) == + tile->filtered_buffer().value_at_as(offset) == (j == 0 ? 0 : 1)); offset += sizeof(uint64_t); } @@ -1296,12 +861,12 @@ TEST_CASE( // Check encoded size auto pipeline_metadata_size = sizeof(uint64_t) + 9 * 3 * sizeof(uint32_t); - auto encoded_size = tile.filtered_buffer().size(); + auto encoded_size = tile->filtered_buffer().size(); CHECK( encoded_size == pipeline_metadata_size + total_md_size + nelts * sizeof(uint64_t)); - auto unfiltered_tile = create_tile_for_unfiltering(nelts, tile); + auto unfiltered_tile = create_tile_for_unfiltering(nelts, tile, tracker); run_reverse(config, tp, unfiltered_tile, pipeline); for (uint64_t i = 0; i < nelts; i++) { uint64_t elt = 0; @@ -1316,19 +881,20 @@ TEST_CASE( std::vector window_sizes = { 32, 64, 128, 256, 437, 512, 1024, 2000}; for (auto window_size : window_sizes) { - auto tile = make_increasing_tile(nelts); - auto offsets_tile = make_offsets_tile(offsets); + auto tile = make_increasing_tile(nelts, tracker); + auto offsets_tile = make_offsets_tile(offsets, tracker); pipeline.get_filter()->set_max_window_size( window_size); - CHECK( - pipeline.run_forward(&test::g_helper_stats, &tile, &offsets_tile, &tp) - .ok()); - CHECK(tile.size() == 0); - CHECK(tile.filtered_buffer().size() != 0); + CHECK(pipeline + .run_forward( + &test::g_helper_stats, tile.get(), offsets_tile.get(), &tp) + .ok()); + CHECK(tile->size() == 0); + CHECK(tile->filtered_buffer().size() != 0); - auto unfiltered_tile = create_tile_for_unfiltering(nelts, tile); + auto unfiltered_tile = create_tile_for_unfiltering(nelts, tile, tracker); run_reverse(config, tp, unfiltered_tile, pipeline); for (uint64_t i = 0; i < nelts; i++) { uint64_t elt = 0; @@ -1340,18 +906,19 @@ TEST_CASE( } SECTION("- Error on non-positive delta data") { - auto tile = make_increasing_tile(nelts); - auto offsets_tile = make_offsets_tile(offsets); + auto tile = make_increasing_tile(nelts, tracker); + auto offsets_tile = make_offsets_tile(offsets, tracker); WriterTile::set_max_tile_chunk_size(80); for (uint64_t i = 0; i < nelts; i++) { auto val = nelts - i; - CHECK_NOTHROW(tile.write(&val, i * sizeof(uint64_t), sizeof(uint64_t))); + CHECK_NOTHROW(tile->write(&val, i * sizeof(uint64_t), sizeof(uint64_t))); } - CHECK( - !pipeline.run_forward(&test::g_helper_stats, &tile, &offsets_tile, &tp) - .ok()); + CHECK(!pipeline + .run_forward( + &test::g_helper_stats, tile.get(), offsets_tile.get(), &tp) + .ok()); } WriterTile::set_max_tile_chunk_size(constants::max_tile_chunk_size); @@ -1360,21 +927,23 @@ TEST_CASE( TEST_CASE("Filter: Test bitshuffle", "[filter][bitshuffle]") { tiledb::sm::Config config; + auto tracker = tiledb::test::create_test_memory_tracker(); + // Set up test data const uint64_t nelts = 1000; - auto tile = make_increasing_tile(nelts); + auto tile = make_increasing_tile(nelts, tracker); FilterPipeline pipeline; ThreadPool tp(4); pipeline.add_filter(BitshuffleFilter(Datatype::UINT64)); SECTION("- Single stage") { - CHECK( - pipeline.run_forward(&test::g_helper_stats, &tile, nullptr, &tp).ok()); - CHECK(tile.size() == 0); - CHECK(tile.filtered_buffer().size() != 0); + CHECK(pipeline.run_forward(&test::g_helper_stats, tile.get(), nullptr, &tp) + .ok()); + CHECK(tile->size() == 0); + CHECK(tile->filtered_buffer().size() != 0); - auto unfiltered_tile = create_tile_for_unfiltering(nelts, tile); + auto unfiltered_tile = create_tile_for_unfiltering(nelts, tile, tracker); run_reverse(config, tp, unfiltered_tile, pipeline); for (uint64_t i = 0; i < nelts; i++) { uint64_t elt = 0; @@ -1388,23 +957,25 @@ TEST_CASE("Filter: Test bitshuffle", "[filter][bitshuffle]") { const uint32_t nelts2 = 1001; const uint64_t tile_size2 = nelts2 * sizeof(uint32_t); - WriterTile tile2( + auto tile2 = make_shared( + HERE(), constants::format_version, Datatype::UINT32, sizeof(uint32_t), - tile_size2); + tile_size2, + tracker); // Set up test data for (uint32_t i = 0; i < nelts2; i++) { - CHECK_NOTHROW(tile2.write(&i, i * sizeof(uint32_t), sizeof(uint32_t))); + CHECK_NOTHROW(tile2->write(&i, i * sizeof(uint32_t), sizeof(uint32_t))); } - CHECK( - pipeline.run_forward(&test::g_helper_stats, &tile2, nullptr, &tp).ok()); - CHECK(tile2.size() == 0); - CHECK(tile2.filtered_buffer().size() != 0); + CHECK(pipeline.run_forward(&test::g_helper_stats, tile2.get(), nullptr, &tp) + .ok()); + CHECK(tile2->size() == 0); + CHECK(tile2->filtered_buffer().size() != 0); - auto unfiltered_tile2 = create_tile_for_unfiltering(nelts2, tile2); + auto unfiltered_tile2 = create_tile_for_unfiltering(nelts2, tile2, tracker); run_reverse(config, tp, unfiltered_tile2, pipeline); for (uint64_t i = 0; i < nelts2; i++) { uint32_t elt = 0; @@ -1418,8 +989,10 @@ TEST_CASE("Filter: Test bitshuffle", "[filter][bitshuffle]") { TEST_CASE("Filter: Test bitshuffle var", "[filter][bitshuffle][var]") { tiledb::sm::Config config; + auto tracker = tiledb::test::create_test_memory_tracker(); + const uint64_t nelts = 100; - auto tile = make_increasing_tile(nelts); + auto tile = make_increasing_tile(nelts, tracker); // Set up test data std::vector sizes{ @@ -1450,7 +1023,7 @@ TEST_CASE("Filter: Test bitshuffle var", "[filter][bitshuffle][var]") { } offsets[offsets.size() - 1] = offset; - auto offsets_tile = make_offsets_tile(offsets); + auto offsets_tile = make_offsets_tile(offsets, tracker); FilterPipeline pipeline; ThreadPool tp(4); @@ -1458,12 +1031,14 @@ TEST_CASE("Filter: Test bitshuffle var", "[filter][bitshuffle][var]") { SECTION("- Single stage") { WriterTile::set_max_tile_chunk_size(80); - CHECK(pipeline.run_forward(&test::g_helper_stats, &tile, &offsets_tile, &tp) + CHECK(pipeline + .run_forward( + &test::g_helper_stats, tile.get(), offsets_tile.get(), &tp) .ok()); - CHECK(tile.size() == 0); - CHECK(tile.filtered_buffer().size() != 0); + CHECK(tile->size() == 0); + CHECK(tile->filtered_buffer().size() != 0); - auto unfiltered_tile = create_tile_for_unfiltering(nelts, tile); + auto unfiltered_tile = create_tile_for_unfiltering(nelts, tile, tracker); run_reverse(config, tp, unfiltered_tile, pipeline); for (uint64_t i = 0; i < nelts; i++) { uint64_t elt = 0; @@ -1478,24 +1053,27 @@ TEST_CASE("Filter: Test bitshuffle var", "[filter][bitshuffle][var]") { const uint32_t nelts2 = 1001; const uint64_t tile_size2 = nelts2 * sizeof(uint32_t); - WriterTile tile2( + auto tile2 = make_shared( + HERE(), constants::format_version, Datatype::UINT32, sizeof(uint32_t), - tile_size2); + tile_size2, + tracker); // Set up test data for (uint32_t i = 0; i < nelts2; i++) { - CHECK_NOTHROW(tile2.write(&i, i * sizeof(uint32_t), sizeof(uint32_t))); + CHECK_NOTHROW(tile2->write(&i, i * sizeof(uint32_t), sizeof(uint32_t))); } - CHECK( - pipeline.run_forward(&test::g_helper_stats, &tile2, &offsets_tile, &tp) - .ok()); - CHECK(tile2.size() == 0); - CHECK(tile2.filtered_buffer().size() != 0); + CHECK(pipeline + .run_forward( + &test::g_helper_stats, tile2.get(), offsets_tile.get(), &tp) + .ok()); + CHECK(tile2->size() == 0); + CHECK(tile2->filtered_buffer().size() != 0); - auto unfiltered_tile2 = create_tile_for_unfiltering(nelts2, tile2); + auto unfiltered_tile2 = create_tile_for_unfiltering(nelts2, tile2, tracker); run_reverse(config, tp, unfiltered_tile2, pipeline); for (uint64_t i = 0; i < nelts2; i++) { uint32_t elt = 0; @@ -1511,21 +1089,23 @@ TEST_CASE("Filter: Test bitshuffle var", "[filter][bitshuffle][var]") { TEST_CASE("Filter: Test byteshuffle", "[filter][byteshuffle]") { tiledb::sm::Config config; + auto tracker = tiledb::test::create_test_memory_tracker(); + // Set up test data const uint64_t nelts = 1000; - auto tile = make_increasing_tile(nelts); + auto tile = make_increasing_tile(nelts, tracker); FilterPipeline pipeline; ThreadPool tp(4); pipeline.add_filter(ByteshuffleFilter(Datatype::UINT64)); SECTION("- Single stage") { - CHECK( - pipeline.run_forward(&test::g_helper_stats, &tile, nullptr, &tp).ok()); - CHECK(tile.size() == 0); - CHECK(tile.filtered_buffer().size() != 0); + CHECK(pipeline.run_forward(&test::g_helper_stats, tile.get(), nullptr, &tp) + .ok()); + CHECK(tile->size() == 0); + CHECK(tile->filtered_buffer().size() != 0); - auto unfiltered_tile = create_tile_for_unfiltering(nelts, tile); + auto unfiltered_tile = create_tile_for_unfiltering(nelts, tile, tracker); run_reverse(config, tp, unfiltered_tile, pipeline); for (uint64_t i = 0; i < nelts; i++) { uint64_t elt = 0; @@ -1539,23 +1119,25 @@ TEST_CASE("Filter: Test byteshuffle", "[filter][byteshuffle]") { const uint32_t nelts2 = 1001; const uint64_t tile_size2 = nelts2 * sizeof(uint32_t); - WriterTile tile2( + auto tile2 = make_shared( + HERE(), constants::format_version, Datatype::UINT32, sizeof(uint32_t), - tile_size2); + tile_size2, + tracker); // Set up test data for (uint32_t i = 0; i < nelts2; i++) { - CHECK_NOTHROW(tile2.write(&i, i * sizeof(uint32_t), sizeof(uint32_t))); + CHECK_NOTHROW(tile2->write(&i, i * sizeof(uint32_t), sizeof(uint32_t))); } - CHECK( - pipeline.run_forward(&test::g_helper_stats, &tile2, nullptr, &tp).ok()); - CHECK(tile2.size() == 0); - CHECK(tile2.filtered_buffer().size() != 0); + CHECK(pipeline.run_forward(&test::g_helper_stats, tile2.get(), nullptr, &tp) + .ok()); + CHECK(tile2->size() == 0); + CHECK(tile2->filtered_buffer().size() != 0); - auto unfiltered_tile2 = create_tile_for_unfiltering(nelts2, tile2); + auto unfiltered_tile2 = create_tile_for_unfiltering(nelts2, tile2, tracker); run_reverse(config, tp, unfiltered_tile2, pipeline); for (uint64_t i = 0; i < nelts2; i++) { uint32_t elt = 0; @@ -1569,8 +1151,10 @@ TEST_CASE("Filter: Test byteshuffle", "[filter][byteshuffle]") { TEST_CASE("Filter: Test byteshuffle var", "[filter][byteshuffle][var]") { tiledb::sm::Config config; + auto tracker = tiledb::test::create_test_memory_tracker(); + const uint64_t nelts = 100; - auto tile = make_increasing_tile(nelts); + auto tile = make_increasing_tile(nelts, tracker); // Set up test data std::vector sizes{ @@ -1601,7 +1185,7 @@ TEST_CASE("Filter: Test byteshuffle var", "[filter][byteshuffle][var]") { } offsets[offsets.size() - 1] = offset; - auto offsets_tile = make_offsets_tile(offsets); + auto offsets_tile = make_offsets_tile(offsets, tracker); FilterPipeline pipeline; ThreadPool tp(4); @@ -1609,12 +1193,14 @@ TEST_CASE("Filter: Test byteshuffle var", "[filter][byteshuffle][var]") { SECTION("- Single stage") { WriterTile::set_max_tile_chunk_size(80); - CHECK(pipeline.run_forward(&test::g_helper_stats, &tile, &offsets_tile, &tp) + CHECK(pipeline + .run_forward( + &test::g_helper_stats, tile.get(), offsets_tile.get(), &tp) .ok()); - CHECK(tile.size() == 0); - CHECK(tile.filtered_buffer().size() != 0); + CHECK(tile->size() == 0); + CHECK(tile->filtered_buffer().size() != 0); - auto unfiltered_tile = create_tile_for_unfiltering(nelts, tile); + auto unfiltered_tile = create_tile_for_unfiltering(nelts, tile, tracker); run_reverse(config, tp, unfiltered_tile, pipeline); for (uint64_t i = 0; i < nelts; i++) { uint64_t elt = 0; @@ -1629,24 +1215,27 @@ TEST_CASE("Filter: Test byteshuffle var", "[filter][byteshuffle][var]") { const uint32_t nelts2 = 1001; const uint64_t tile_size2 = nelts2 * sizeof(uint32_t); - WriterTile tile2( + auto tile2 = make_shared( + HERE(), constants::format_version, Datatype::UINT32, sizeof(uint32_t), - tile_size2); + tile_size2, + tracker); // Set up test data for (uint32_t i = 0; i < nelts2; i++) { - CHECK_NOTHROW(tile2.write(&i, i * sizeof(uint32_t), sizeof(uint32_t))); + CHECK_NOTHROW(tile2->write(&i, i * sizeof(uint32_t), sizeof(uint32_t))); } - CHECK( - pipeline.run_forward(&test::g_helper_stats, &tile2, &offsets_tile, &tp) - .ok()); - CHECK(tile2.size() == 0); - CHECK(tile2.filtered_buffer().size() != 0); + CHECK(pipeline + .run_forward( + &test::g_helper_stats, tile2.get(), offsets_tile.get(), &tp) + .ok()); + CHECK(tile2->size() == 0); + CHECK(tile2->filtered_buffer().size() != 0); - auto unfiltered_tile2 = create_tile_for_unfiltering(nelts2, tile2); + auto unfiltered_tile2 = create_tile_for_unfiltering(nelts2, tile2, tracker); run_reverse(config, tp, unfiltered_tile2, pipeline); for (uint64_t i = 0; i < nelts2; i++) { uint32_t elt = 0; @@ -1662,9 +1251,11 @@ TEST_CASE("Filter: Test byteshuffle var", "[filter][byteshuffle][var]") { TEST_CASE("Filter: Test encryption", "[filter][encryption]") { tiledb::sm::Config config; + auto tracker = tiledb::test::create_test_memory_tracker(); + // Set up test data const uint64_t nelts = 1000; - auto tile = make_increasing_tile(nelts); + auto tile = make_increasing_tile(nelts, tracker); SECTION("- AES-256-GCM") { FilterPipeline pipeline; @@ -1672,8 +1263,8 @@ TEST_CASE("Filter: Test encryption", "[filter][encryption]") { pipeline.add_filter(EncryptionAES256GCMFilter(Datatype::UINT64)); // No key set - CHECK( - !pipeline.run_forward(&test::g_helper_stats, &tile, nullptr, &tp).ok()); + CHECK(!pipeline.run_forward(&test::g_helper_stats, tile.get(), nullptr, &tp) + .ok()); // Create and set a key char key[32]; @@ -1683,12 +1274,12 @@ TEST_CASE("Filter: Test encryption", "[filter][encryption]") { filter->set_key(key); // Check success - CHECK( - pipeline.run_forward(&test::g_helper_stats, &tile, nullptr, &tp).ok()); - CHECK(tile.size() == 0); - CHECK(tile.filtered_buffer().size() != 0); + CHECK(pipeline.run_forward(&test::g_helper_stats, tile.get(), nullptr, &tp) + .ok()); + CHECK(tile->size() == 0); + CHECK(tile->filtered_buffer().size() != 0); - auto unfiltered_tile = create_tile_for_unfiltering(nelts, tile); + auto unfiltered_tile = create_tile_for_unfiltering(nelts, tile, tracker); run_reverse(config, tp, unfiltered_tile, pipeline); for (uint64_t i = 0; i < nelts; i++) { uint64_t elt = 0; @@ -1698,25 +1289,26 @@ TEST_CASE("Filter: Test encryption", "[filter][encryption]") { } // Check error decrypting with wrong key. - tile = make_increasing_tile(nelts); - CHECK( - pipeline.run_forward(&test::g_helper_stats, &tile, nullptr, &tp).ok()); + tile = make_increasing_tile(nelts, tracker); + CHECK(pipeline.run_forward(&test::g_helper_stats, tile.get(), nullptr, &tp) + .ok()); key[0]++; filter->set_key(key); - unfiltered_tile = create_tile_for_unfiltering(nelts, tile); - run_reverse(config, tp, unfiltered_tile, pipeline, false); + auto unfiltered_tile2 = create_tile_for_unfiltering(nelts, tile, tracker); + run_reverse(config, tp, unfiltered_tile2, pipeline, false); // Fix key and check success. - unfiltered_tile = create_tile_for_unfiltering(nelts, tile); + auto unfiltered_tile3 = create_tile_for_unfiltering(nelts, tile, tracker); + key[0]--; filter->set_key(key); - run_reverse(config, tp, unfiltered_tile, pipeline); + run_reverse(config, tp, unfiltered_tile3, pipeline); for (uint64_t i = 0; i < nelts; i++) { uint64_t elt = 0; CHECK_NOTHROW( - unfiltered_tile.read(&elt, i * sizeof(uint64_t), sizeof(uint64_t))); + unfiltered_tile3.read(&elt, i * sizeof(uint64_t), sizeof(uint64_t))); CHECK(elt == i); } } @@ -1726,6 +1318,8 @@ template void testing_float_scaling_filter() { tiledb::sm::Config config; + auto tracker = tiledb::test::create_test_memory_tracker(); + // Set up test data const uint64_t nelts = 100; const uint64_t tile_size = nelts * sizeof(FloatingType); @@ -1747,7 +1341,8 @@ void testing_float_scaling_filter() { } } - WriterTile tile(constants::format_version, t, cell_size, tile_size); + auto tile = make_shared( + HERE(), constants::format_version, t, cell_size, tile_size, tracker); std::vector float_result_vec; double scale = 2.53; @@ -1761,7 +1356,7 @@ void testing_float_scaling_filter() { for (uint64_t i = 0; i < nelts; i++) { FloatingType f = dis(gen); CHECK_NOTHROW( - tile.write(&f, i * sizeof(FloatingType), sizeof(FloatingType))); + tile->write(&f, i * sizeof(FloatingType), sizeof(FloatingType))); IntType val = static_cast(round( (f - static_cast(foffset)) / @@ -1785,13 +1380,14 @@ void testing_float_scaling_filter() { ->set_option(FilterOption::SCALE_FLOAT_OFFSET, &foffset) .ok()); - CHECK(pipeline.run_forward(&test::g_helper_stats, &tile, nullptr, &tp).ok()); + CHECK(pipeline.run_forward(&test::g_helper_stats, tile.get(), nullptr, &tp) + .ok()); // Check new size and number of chunks - CHECK(tile.size() == 0); - CHECK(tile.filtered_buffer().size() != 0); + CHECK(tile->size() == 0); + CHECK(tile->filtered_buffer().size() != 0); - auto unfiltered_tile = create_tile_for_unfiltering(nelts, tile); + auto unfiltered_tile = create_tile_for_unfiltering(nelts, tile, tracker); run_reverse(config, tp, unfiltered_tile, pipeline); for (uint64_t i = 0; i < nelts; i++) { FloatingType elt = 0.0f; @@ -1822,12 +1418,15 @@ template void testing_xor_filter(Datatype t) { tiledb::sm::Config config; + auto tracker = tiledb::test::create_test_memory_tracker(); + // Set up test data const uint64_t nelts = 100; const uint64_t tile_size = nelts * sizeof(T); const uint64_t cell_size = sizeof(T); - WriterTile tile(constants::format_version, t, cell_size, tile_size); + auto tile = make_shared( + HERE(), constants::format_version, t, cell_size, tile_size, tracker); // Setting up the random number generator for the XOR filter testing. std::mt19937_64 gen(0x57A672DE); @@ -1838,7 +1437,7 @@ void testing_xor_filter(Datatype t) { for (uint64_t i = 0; i < nelts; i++) { T val = static_cast(dis(gen)); - CHECK_NOTHROW(tile.write(&val, i * sizeof(T), sizeof(T))); + CHECK_NOTHROW(tile->write(&val, i * sizeof(T), sizeof(T))); results.push_back(val); } @@ -1846,13 +1445,14 @@ void testing_xor_filter(Datatype t) { ThreadPool tp(4); pipeline.add_filter(XORFilter(t)); - CHECK(pipeline.run_forward(&test::g_helper_stats, &tile, nullptr, &tp).ok()); + CHECK(pipeline.run_forward(&test::g_helper_stats, tile.get(), nullptr, &tp) + .ok()); // Check new size and number of chunks - CHECK(tile.size() == 0); - CHECK(tile.filtered_buffer().size() != 0); + CHECK(tile->size() == 0); + CHECK(tile->filtered_buffer().size() != 0); - auto unfiltered_tile = create_tile_for_unfiltering(nelts, tile); + auto unfiltered_tile = create_tile_for_unfiltering(nelts, tile, tracker); run_reverse(config, tp, unfiltered_tile, pipeline); for (uint64_t i = 0; i < nelts; i++) { T elt = 0; @@ -1891,6 +1491,7 @@ TEST_CASE("Filter: Test XOR", "[filter][xor]") { TEST_CASE("Filter: Pipeline filtered output types", "[filter][pipeline]") { FilterPipeline pipeline; + auto tracker = tiledb::test::create_test_memory_tracker(); SECTION("- DoubleDelta filter reinterprets float->int32") { pipeline.add_filter(CompressionFilter( @@ -1949,22 +1550,25 @@ TEST_CASE("Filter: Pipeline filtered output types", "[filter][pipeline]") { // Initial type of tile is float. std::vector data = { 1.0f, 2.1f, 3.2f, 4.3f, 5.4f, 6.5f, 7.6f, 8.7f, 9.8f, 10.9f}; - WriterTile tile( + auto tile = make_shared( + HERE(), constants::format_version, Datatype::FLOAT32, sizeof(float), - sizeof(float) * data.size()); + sizeof(float) * data.size(), + tracker); for (size_t i = 0; i < data.size(); i++) { - CHECK_NOTHROW(tile.write(&data[i], i * sizeof(float), sizeof(float))); + CHECK_NOTHROW(tile->write(&data[i], i * sizeof(float), sizeof(float))); } ThreadPool tp(4); - REQUIRE( - pipeline.run_forward(&test::g_helper_stats, &tile, nullptr, &tp).ok()); - CHECK(tile.size() == 0); - CHECK(tile.filtered_buffer().size() != 0); + REQUIRE(pipeline.run_forward(&test::g_helper_stats, tile.get(), nullptr, &tp) + .ok()); + CHECK(tile->size() == 0); + CHECK(tile->filtered_buffer().size() != 0); - auto unfiltered_tile = create_tile_for_unfiltering(data.size(), tile); + auto unfiltered_tile = + create_tile_for_unfiltering(data.size(), tile, tracker); ChunkData chunk_data; unfiltered_tile.load_chunk_data(chunk_data); REQUIRE(pipeline diff --git a/test/src/unit-gcs.cc b/test/src/unit-gcs.cc deleted file mode 100644 index e8065a9e665a..000000000000 --- a/test/src/unit-gcs.cc +++ /dev/null @@ -1,476 +0,0 @@ -/** - * @file unit-gcs.cc - * - * @section LICENSE - * - * The MIT License - * - * @copyright Copyright (c) 2017-2023 TileDB, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - * - * @section DESCRIPTION - * - * Tests for GCS API filesystem functions. - */ - -#ifdef HAVE_GCS - -#include -#include "tiledb/common/filesystem/directory_entry.h" -#include "tiledb/common/thread_pool.h" -#include "tiledb/sm/config/config.h" -#include "tiledb/sm/filesystem/gcs.h" -#include "tiledb/sm/global_state/unit_test_config.h" -#include "tiledb/sm/misc/tdb_math.h" -#include "tiledb/sm/misc/tdb_time.h" -#include "tiledb/sm/misc/utils.h" - -#include - -using namespace tiledb::common; -using namespace tiledb::sm; - -struct GCSFx { - const std::string GCS_PREFIX = "gcs://"; - const tiledb::sm::URI GCS_BUCKET = - tiledb::sm::URI(GCS_PREFIX + random_bucket_name("tiledb") + "/"); - const std::string TEST_DIR = GCS_BUCKET.to_string() + "tiledb_test_dir/"; - - tiledb::sm::GCS gcs_; - ThreadPool thread_pool_{2}; - - GCSFx() = default; - ~GCSFx(); - - void init_gcs(Config&& config); - - static std::string random_bucket_name(const std::string& prefix); -}; - -GCSFx::~GCSFx() { - // Empty bucket - bool is_empty; - REQUIRE(gcs_.is_empty_bucket(GCS_BUCKET, &is_empty).ok()); - if (!is_empty) { - REQUIRE(gcs_.empty_bucket(GCS_BUCKET).ok()); - REQUIRE(gcs_.is_empty_bucket(GCS_BUCKET, &is_empty).ok()); - REQUIRE(is_empty); - } - - // Delete bucket - REQUIRE(gcs_.remove_bucket(GCS_BUCKET).ok()); -} - -void GCSFx::init_gcs(Config&& config) { - REQUIRE(config.set("vfs.gcs.project_id", "TODO").ok()); - REQUIRE(gcs_.init(config, &thread_pool_).ok()); - - // Create bucket - bool is_bucket; - REQUIRE(gcs_.is_bucket(GCS_BUCKET, &is_bucket).ok()); - if (is_bucket) { - REQUIRE(gcs_.remove_bucket(GCS_BUCKET).ok()); - } - - REQUIRE(gcs_.is_bucket(GCS_BUCKET, &is_bucket).ok()); - REQUIRE(!is_bucket); - REQUIRE(gcs_.create_bucket(GCS_BUCKET).ok()); - - // Check if bucket is empty - bool is_empty; - REQUIRE(gcs_.is_empty_bucket(GCS_BUCKET, &is_empty).ok()); - REQUIRE(is_empty); -} - -std::string GCSFx::random_bucket_name(const std::string& prefix) { - std::stringstream ss; - ss << prefix << "-" << std::this_thread::get_id() << "-" - << tiledb::sm::utils::time::timestamp_now_ms(); - return ss.str(); -} - -TEST_CASE_METHOD(GCSFx, "Test GCS init", "[gcs]") { - try { - Config config; - REQUIRE(config.set("vfs.gcs.use_multi_part_upload", "true").ok()); - init_gcs(std::move(config)); - } catch (...) { - INFO( - "GCS initialization failed. In order to run GCS tests, be sure to " - "source scripts/run-gcs.sh in this shell session before starting test " - "runner."); - REQUIRE(false); - } -} - -TEST_CASE_METHOD( - GCSFx, - "Test GCS filesystem I/O, multipart, serial", - "[gcs][multipart][serial]") { - Config config; - const uint64_t max_parallel_ops = 1; - const uint64_t multi_part_size = 4 * 1024 * 1024; - REQUIRE( - config.set("vfs.gcs.max_parallel_ops", std::to_string(max_parallel_ops)) - .ok()); - REQUIRE(config.set("vfs.gcs.use_multi_part_upload", "true").ok()); - REQUIRE(config.set("vfs.gcs.multi_part_size", std::to_string(multi_part_size)) - .ok()); - init_gcs(std::move(config)); - - const uint64_t write_cache_max_size = max_parallel_ops * multi_part_size; - - // Prepare buffers - uint64_t buffer_size_large = write_cache_max_size; - auto write_buffer_large = new char[buffer_size_large]; - for (uint64_t i = 0; i < buffer_size_large; i++) - write_buffer_large[i] = (char)('a' + (i % 26)); - uint64_t buffer_size_small = 1024 * 1024; - auto write_buffer_small = new char[buffer_size_small]; - for (uint64_t i = 0; i < buffer_size_small; i++) - write_buffer_small[i] = (char)('a' + (i % 26)); - - // Write to two files - auto largefile = TEST_DIR + "largefile"; - REQUIRE( - gcs_.write(URI(largefile), write_buffer_large, buffer_size_large).ok()); - REQUIRE( - gcs_.write(URI(largefile), write_buffer_small, buffer_size_small).ok()); - auto smallfile = TEST_DIR + "smallfile"; - REQUIRE( - gcs_.write(URI(smallfile), write_buffer_small, buffer_size_small).ok()); - - // Before flushing, the files do not exist - bool is_object = false; - REQUIRE(gcs_.is_object(URI(largefile), &is_object).ok()); - REQUIRE(!is_object); - REQUIRE(gcs_.is_object(URI(smallfile), &is_object).ok()); - REQUIRE(!is_object); - - // Flush the files - REQUIRE(gcs_.flush_object(URI(largefile)).ok()); - REQUIRE(gcs_.flush_object(URI(smallfile)).ok()); - - // After flushing, the files exist - REQUIRE(gcs_.is_object(URI(largefile), &is_object).ok()); - REQUIRE(is_object); - REQUIRE(gcs_.is_object(URI(smallfile), &is_object).ok()); - REQUIRE(is_object); - - // Get file sizes - uint64_t nbytes = 0; - REQUIRE(gcs_.object_size(URI(largefile), &nbytes).ok()); - REQUIRE(nbytes == (buffer_size_large + buffer_size_small)); - REQUIRE(gcs_.object_size(URI(smallfile), &nbytes).ok()); - REQUIRE(nbytes == buffer_size_small); - - // Read from the beginning - auto read_buffer = new char[26]; - uint64_t bytes_read = 0; - REQUIRE(gcs_.read(URI(largefile), 0, read_buffer, 26, 0, &bytes_read).ok()); - CHECK(26 == bytes_read); - bool allok = true; - for (int i = 0; i < 26; i++) { - if (read_buffer[i] != static_cast('a' + i)) { - allok = false; - break; - } - } - REQUIRE(allok); - - // Read from a different offset - REQUIRE(gcs_.read(URI(largefile), 11, read_buffer, 26, 0, &bytes_read).ok()); - CHECK(26 == bytes_read); - allok = true; - for (int i = 0; i < 26; i++) { - if (read_buffer[i] != static_cast('a' + (i + 11) % 26)) { - allok = false; - break; - } - } - REQUIRE(allok); -} - -TEST_CASE_METHOD( - GCSFx, "Test GCS filesystem I/O, non-multipart", "[gcs][non-multipart]") { - Config config; - const uint64_t max_parallel_ops = 1; - const uint64_t write_cache_max_size = 4 * 1024 * 1024; - REQUIRE( - config.set("vfs.gcs.max_parallel_ops", std::to_string(max_parallel_ops)) - .ok()); - REQUIRE(config.set("vfs.gcs.use_multi_part_upload", "false").ok()); - REQUIRE(config - .set( - "vfs.gcs.max_direct_upload_size", - std::to_string(write_cache_max_size)) - .ok()); - init_gcs(std::move(config)); - - // Prepare buffers - uint64_t buffer_size_large = write_cache_max_size; - auto write_buffer_large = new char[buffer_size_large]; - for (uint64_t i = 0; i < buffer_size_large; i++) - write_buffer_large[i] = (char)('a' + (i % 26)); - uint64_t buffer_size_small = 1024 * 1024; - auto write_buffer_small = new char[buffer_size_small]; - for (uint64_t i = 0; i < buffer_size_small; i++) - write_buffer_small[i] = (char)('a' + (i % 26)); - - // Write to two files - auto largefile = TEST_DIR + "largefile"; - REQUIRE( - gcs_.write(URI(largefile), write_buffer_large, buffer_size_large).ok()); - REQUIRE( - !gcs_.write(URI(largefile), write_buffer_small, buffer_size_small).ok()); - auto smallfile = TEST_DIR + "smallfile"; - REQUIRE( - gcs_.write(URI(smallfile), write_buffer_small, buffer_size_small).ok()); - - // Before flushing, the file does not exist - bool is_object = false; - REQUIRE(gcs_.is_object(URI(smallfile), &is_object).ok()); - REQUIRE(!is_object); - - // Flush the file - REQUIRE(gcs_.flush_object(URI(smallfile)).ok()); - - // After flushing, the file exists - REQUIRE(gcs_.is_object(URI(smallfile), &is_object).ok()); - REQUIRE(is_object); - - // Get file size - uint64_t nbytes = 0; - REQUIRE(gcs_.object_size(URI(smallfile), &nbytes).ok()); - REQUIRE(nbytes == buffer_size_small); - - // Read from the beginning - auto read_buffer = new char[26]; - uint64_t bytes_read = 0; - REQUIRE(gcs_.read(URI(smallfile), 0, read_buffer, 26, 0, &bytes_read).ok()); - CHECK(26 == bytes_read); - bool allok = true; - for (int i = 0; i < 26; i++) { - if (read_buffer[i] != static_cast('a' + i)) { - allok = false; - break; - } - } - REQUIRE(allok); - - // Read from a different offset - REQUIRE(gcs_.read(URI(smallfile), 11, read_buffer, 26, 0, &bytes_read).ok()); - CHECK(26 == bytes_read); - allok = true; - for (int i = 0; i < 26; i++) { - if (read_buffer[i] != static_cast('a' + (i + 11) % 26)) { - allok = false; - break; - } - } - REQUIRE(allok); -} - -TEST_CASE_METHOD( - GCSFx, - "Test GCS filesystem I/O, multipart, concurrent", - "[gcs][multipart][concurrent]") { - Config config; - const uint64_t max_parallel_ops = 4; - const uint64_t multi_part_size = 4 * 1024 * 1024; - REQUIRE( - config.set("vfs.gcs.max_parallel_ops", std::to_string(max_parallel_ops)) - .ok()); - REQUIRE(config.set("vfs.gcs.use_multi_part_upload", "true").ok()); - REQUIRE(config.set("vfs.gcs.multi_part_size", std::to_string(multi_part_size)) - .ok()); - init_gcs(std::move(config)); - - const uint64_t write_cache_max_size = max_parallel_ops * multi_part_size; - - // Prepare buffers - uint64_t buffer_size_large = write_cache_max_size; - auto write_buffer_large = new char[buffer_size_large]; - for (uint64_t i = 0; i < buffer_size_large; i++) - write_buffer_large[i] = (char)('a' + (i % 26)); - uint64_t buffer_size_small = 1024 * 1024; - auto write_buffer_small = new char[buffer_size_small]; - for (uint64_t i = 0; i < buffer_size_small; i++) - write_buffer_small[i] = (char)('a' + (i % 26)); - - // Write to two files - auto largefile = TEST_DIR + "largefile"; - REQUIRE( - gcs_.write(URI(largefile), write_buffer_large, buffer_size_large).ok()); - REQUIRE( - gcs_.write(URI(largefile), write_buffer_small, buffer_size_small).ok()); - auto smallfile = TEST_DIR + "smallfile"; - REQUIRE( - gcs_.write(URI(smallfile), write_buffer_small, buffer_size_small).ok()); - - // Before flushing, the files do not exist - bool is_object = false; - REQUIRE(gcs_.is_object(URI(largefile), &is_object).ok()); - REQUIRE(!is_object); - REQUIRE(gcs_.is_object(URI(smallfile), &is_object).ok()); - REQUIRE(!is_object); - - // Flush the files - REQUIRE(gcs_.flush_object(URI(largefile)).ok()); - REQUIRE(gcs_.flush_object(URI(smallfile)).ok()); - - // After flushing, the files exist - REQUIRE(gcs_.is_object(URI(largefile), &is_object).ok()); - REQUIRE(is_object); - REQUIRE(gcs_.is_object(URI(smallfile), &is_object).ok()); - REQUIRE(is_object); - - // Get file sizes - uint64_t nbytes = 0; - REQUIRE(gcs_.object_size(URI(largefile), &nbytes).ok()); - REQUIRE(nbytes == (buffer_size_large + buffer_size_small)); - REQUIRE(gcs_.object_size(URI(smallfile), &nbytes).ok()); - REQUIRE(nbytes == buffer_size_small); - - // Read from the beginning - auto read_buffer = new char[26]; - uint64_t bytes_read = 0; - REQUIRE(gcs_.read(URI(largefile), 0, read_buffer, 26, 0, &bytes_read).ok()); - CHECK(26 == bytes_read); - bool allok = true; - for (int i = 0; i < 26; i++) { - if (read_buffer[i] != static_cast('a' + i)) { - allok = false; - break; - } - } - REQUIRE(allok); - - // Read from a different offset - REQUIRE(gcs_.read(URI(largefile), 11, read_buffer, 26, 0, &bytes_read).ok()); - CHECK(26 == bytes_read); - allok = true; - for (int i = 0; i < 26; i++) { - if (read_buffer[i] != static_cast('a' + (i + 11) % 26)) { - allok = false; - break; - } - } - REQUIRE(allok); -} - -TEST_CASE_METHOD( - GCSFx, - "Test GCS filesystem I/O, multipart, composition", - "[gcs][multipart][composition]") { - Config config; - const uint64_t max_parallel_ops = 4; - const uint64_t multi_part_size = 4 * 1024; - REQUIRE( - config.set("vfs.gcs.max_parallel_ops", std::to_string(max_parallel_ops)) - .ok()); - REQUIRE(config.set("vfs.gcs.use_multi_part_upload", "true").ok()); - REQUIRE(config.set("vfs.gcs.multi_part_size", std::to_string(multi_part_size)) - .ok()); - init_gcs(std::move(config)); - - const uint64_t write_cache_max_size = max_parallel_ops * multi_part_size; - - // Prepare a buffer that will write 200 (50 * 4 threads) objects. - // The maximum number of objects per composition operation is 32. - uint64_t buffer_size_large = 50 * write_cache_max_size; - auto write_buffer_large = new char[buffer_size_large]; - for (uint64_t i = 0; i < buffer_size_large; i++) - write_buffer_large[i] = (char)('a' + (i % 26)); - - // Write to the file - auto largefile = TEST_DIR + "largefile"; - REQUIRE( - gcs_.write(URI(largefile), write_buffer_large, buffer_size_large).ok()); - - // Before flushing, the file does not exist - bool is_object = false; - REQUIRE(gcs_.is_object(URI(largefile), &is_object).ok()); - REQUIRE(!is_object); - - // Flush the file - REQUIRE(gcs_.flush_object(URI(largefile)).ok()); - - // After flushing, the file exists - REQUIRE(gcs_.is_object(URI(largefile), &is_object).ok()); - REQUIRE(is_object); - - // Get file size - uint64_t nbytes = 0; - REQUIRE(gcs_.object_size(URI(largefile), &nbytes).ok()); - REQUIRE(nbytes == buffer_size_large); - - // Read from the beginning - auto read_buffer = new char[26]; - uint64_t bytes_read = 0; - REQUIRE(gcs_.read(URI(largefile), 0, read_buffer, 26, 0, &bytes_read).ok()); - CHECK(26 == bytes_read); - bool allok = true; - for (int i = 0; i < 26; i++) { - if (read_buffer[i] != static_cast('a' + i)) { - allok = false; - break; - } - } - REQUIRE(allok); - - // Read from a different offset - REQUIRE(gcs_.read(URI(largefile), 11, read_buffer, 26, 0, &bytes_read).ok()); - CHECK(26 == bytes_read); - allok = true; - for (int i = 0; i < 26; i++) { - if (read_buffer[i] != static_cast('a' + (i + 11) % 26)) { - allok = false; - break; - } - } - REQUIRE(allok); - - // Prepare a buffer that will overwrite the original with a smaller - // size. - uint64_t buffer_size_overwrite = 10 * write_cache_max_size; - auto write_buffer_overwrite = new char[buffer_size_overwrite]; - for (uint64_t i = 0; i < buffer_size_overwrite; i++) - write_buffer_overwrite[i] = (char)('a' + (i % 26)); - - // Write to the file - REQUIRE( - gcs_.write(URI(largefile), write_buffer_overwrite, buffer_size_overwrite) - .ok()); - - // Flush the file - REQUIRE(gcs_.flush_object(URI(largefile)).ok()); - - // After flushing, the file exists - REQUIRE(gcs_.is_object(URI(largefile), &is_object).ok()); - REQUIRE(is_object); - - // Get file size - nbytes = 0; - REQUIRE(gcs_.object_size(URI(largefile), &nbytes).ok()); - REQUIRE(nbytes == buffer_size_overwrite); -} - -#endif diff --git a/test/src/unit-gs.cc b/test/src/unit-gs.cc deleted file mode 100644 index c1cc1b367daf..000000000000 --- a/test/src/unit-gs.cc +++ /dev/null @@ -1,575 +0,0 @@ -/** - * @file unit-gs.cc - * - * @section LICENSE - * - * The MIT License - * - * @copyright Copyright (c) 2017-2022 TileDB, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - * - * @section DESCRIPTION - * - * Tests for GCS API filesystem functions. (gs extension) - */ - -#ifdef HAVE_GCS - -#include -#include "tiledb/common/thread_pool.h" -#include "tiledb/sm/config/config.h" -#include "tiledb/sm/filesystem/gcs.h" -#include "tiledb/sm/global_state/unit_test_config.h" -#include "tiledb/sm/misc/tdb_math.h" -#include "tiledb/sm/misc/tdb_time.h" -#include "tiledb/sm/misc/utils.h" - -#include - -using namespace tiledb::common; -using namespace tiledb::sm; - -struct GSFx { - const std::string GCS_PREFIX = "gs://"; - const tiledb::sm::URI GCS_BUCKET = - tiledb::sm::URI(GCS_PREFIX + random_bucket_name("tiledb") + "/"); - const std::string TEST_DIR = GCS_BUCKET.to_string() + "tiledb_test_dir/"; - - tiledb::sm::GCS gcs_; - ThreadPool thread_pool_{2}; - - GSFx() = default; - ~GSFx(); - - void init_gcs(Config&& config); - - static std::string random_bucket_name(const std::string& prefix); -}; - -GSFx::~GSFx() { - // Empty bucket - bool is_empty; - REQUIRE(gcs_.is_empty_bucket(GCS_BUCKET, &is_empty).ok()); - if (!is_empty) { - REQUIRE(gcs_.empty_bucket(GCS_BUCKET).ok()); - REQUIRE(gcs_.is_empty_bucket(GCS_BUCKET, &is_empty).ok()); - REQUIRE(is_empty); - } - - // Delete bucket - REQUIRE(gcs_.remove_bucket(GCS_BUCKET).ok()); -} - -void GSFx::init_gcs(Config&& config) { - REQUIRE(config.set("vfs.gcs.project_id", "TODO").ok()); - REQUIRE(gcs_.init(config, &thread_pool_).ok()); - - // Create bucket - bool is_bucket; - REQUIRE(gcs_.is_bucket(GCS_BUCKET, &is_bucket).ok()); - if (is_bucket) { - REQUIRE(gcs_.remove_bucket(GCS_BUCKET).ok()); - } - - REQUIRE(gcs_.is_bucket(GCS_BUCKET, &is_bucket).ok()); - REQUIRE(!is_bucket); - REQUIRE(gcs_.create_bucket(GCS_BUCKET).ok()); - - // Check if bucket is empty - bool is_empty; - REQUIRE(gcs_.is_empty_bucket(GCS_BUCKET, &is_empty).ok()); - REQUIRE(is_empty); -} - -std::string GSFx::random_bucket_name(const std::string& prefix) { - std::stringstream ss; - ss << prefix << "-" << std::this_thread::get_id() << "-" - << tiledb::sm::utils::time::timestamp_now_ms(); - return ss.str(); -} - -TEST_CASE_METHOD(GSFx, "Test GS filesystem, file management", "[gs]") { - Config config; - REQUIRE(config.set("vfs.gcs.use_multi_part_upload", "true").ok()); - init_gcs(std::move(config)); - - /* Create the following file hierarchy: - * - * TEST_DIR/dir/subdir/file1 - * TEST_DIR/dir/subdir/file2 - * TEST_DIR/dir/file3 - * TEST_DIR/file4 - * TEST_DIR/file5 - */ - auto dir = TEST_DIR + "dir/"; - auto dir2 = TEST_DIR + "dir2/"; - auto subdir = dir + "subdir/"; - auto file1 = subdir + "file1"; - auto file2 = subdir + "file2"; - auto file3 = dir + "file3"; - auto file4 = TEST_DIR + "file4"; - auto file5 = TEST_DIR + "file5"; - auto file6 = TEST_DIR + "file6"; - - // Check that bucket is empty - bool is_empty; - REQUIRE(gcs_.is_empty_bucket(GCS_BUCKET, &is_empty).ok()); - REQUIRE(is_empty); - - // Continue building the hierarchy - bool is_object = false; - REQUIRE(gcs_.touch(URI(file1)).ok()); - REQUIRE(gcs_.is_object(URI(file1), &is_object).ok()); - REQUIRE(is_object); - REQUIRE(gcs_.touch(URI(file2)).ok()); - REQUIRE(gcs_.is_object(URI(file2), &is_object).ok()); - REQUIRE(is_object); - REQUIRE(gcs_.touch(URI(file3)).ok()); - REQUIRE(gcs_.is_object(URI(file3), &is_object).ok()); - REQUIRE(is_object); - REQUIRE(gcs_.touch(URI(file4)).ok()); - REQUIRE(gcs_.is_object(URI(file4), &is_object).ok()); - REQUIRE(is_object); - REQUIRE(gcs_.touch(URI(file5)).ok()); - REQUIRE(gcs_.is_object(URI(file5), &is_object).ok()); - REQUIRE(is_object); - - // Check that bucket is not empty - REQUIRE(gcs_.is_empty_bucket(GCS_BUCKET, &is_empty).ok()); - REQUIRE(!is_empty); - - // Check invalid file - REQUIRE(gcs_.is_object(URI(TEST_DIR + "foo"), &is_object).ok()); - REQUIRE(!is_object); - - // List with prefix - std::vector paths; - REQUIRE(gcs_.ls(URI(TEST_DIR), &paths).ok()); - REQUIRE(paths.size() == 3); - paths.clear(); - REQUIRE(gcs_.ls(URI(dir), &paths).ok()); - REQUIRE(paths.size() == 2); - paths.clear(); - REQUIRE(gcs_.ls(URI(subdir), &paths).ok()); - REQUIRE(paths.size() == 2); - paths.clear(); - REQUIRE(gcs_.ls(GCS_BUCKET, &paths, "").ok()); // No delimiter - REQUIRE(paths.size() == 5); - - // Check if a directory exists - bool is_dir = false; - REQUIRE(gcs_.is_dir(URI(file1), &is_dir).ok()); - REQUIRE(!is_dir); // Not a dir - REQUIRE(gcs_.is_dir(URI(file4), &is_dir).ok()); - REQUIRE(!is_dir); // Not a dir - REQUIRE(gcs_.is_dir(URI(dir), &is_dir).ok()); - REQUIRE(is_dir); // This is viewed as a dir - REQUIRE(gcs_.is_dir(URI(TEST_DIR + "dir"), &is_dir).ok()); - REQUIRE(is_dir); // This is viewed as a dir - - // Move file - REQUIRE(gcs_.move_object(URI(file5), URI(file6)).ok()); - REQUIRE(gcs_.is_object(URI(file5), &is_object).ok()); - REQUIRE(!is_object); - REQUIRE(gcs_.is_object(URI(file6), &is_object).ok()); - REQUIRE(is_object); - paths.clear(); - REQUIRE(gcs_.ls(GCS_BUCKET, &paths, "").ok()); // No delimiter - REQUIRE(paths.size() == 5); - - // Move directory - REQUIRE(gcs_.move_dir(URI(dir), URI(dir2)).ok()); - REQUIRE(gcs_.is_dir(URI(dir), &is_dir).ok()); - REQUIRE(!is_dir); - REQUIRE(gcs_.is_dir(URI(dir2), &is_dir).ok()); - REQUIRE(is_dir); - paths.clear(); - REQUIRE(gcs_.ls(GCS_BUCKET, &paths, "").ok()); // No delimiter - REQUIRE(paths.size() == 5); - - // Remove files - REQUIRE(gcs_.remove_object(URI(file4)).ok()); - REQUIRE(gcs_.is_object(URI(file4), &is_object).ok()); - REQUIRE(!is_object); - - // Remove directories - REQUIRE(gcs_.remove_dir(URI(dir2)).ok()); - REQUIRE(gcs_.is_object(URI(file1), &is_object).ok()); - REQUIRE(!is_object); - REQUIRE(gcs_.is_object(URI(file2), &is_object).ok()); - REQUIRE(!is_object); - REQUIRE(gcs_.is_object(URI(file3), &is_object).ok()); - REQUIRE(!is_object); -} - -TEST_CASE_METHOD( - GSFx, - "Test GS filesystem I/O, multipart, serial", - "[gs][multipart][serial]") { - Config config; - const uint64_t max_parallel_ops = 1; - const uint64_t multi_part_size = 4 * 1024 * 1024; - REQUIRE( - config.set("vfs.gcs.max_parallel_ops", std::to_string(max_parallel_ops)) - .ok()); - REQUIRE(config.set("vfs.gcs.use_multi_part_upload", "true").ok()); - REQUIRE(config.set("vfs.gcs.multi_part_size", std::to_string(multi_part_size)) - .ok()); - init_gcs(std::move(config)); - - const uint64_t write_cache_max_size = max_parallel_ops * multi_part_size; - - // Prepare buffers - uint64_t buffer_size_large = write_cache_max_size; - auto write_buffer_large = new char[buffer_size_large]; - for (uint64_t i = 0; i < buffer_size_large; i++) - write_buffer_large[i] = (char)('a' + (i % 26)); - uint64_t buffer_size_small = 1024 * 1024; - auto write_buffer_small = new char[buffer_size_small]; - for (uint64_t i = 0; i < buffer_size_small; i++) - write_buffer_small[i] = (char)('a' + (i % 26)); - - // Write to two files - auto largefile = TEST_DIR + "largefile"; - REQUIRE( - gcs_.write(URI(largefile), write_buffer_large, buffer_size_large).ok()); - REQUIRE( - gcs_.write(URI(largefile), write_buffer_small, buffer_size_small).ok()); - auto smallfile = TEST_DIR + "smallfile"; - REQUIRE( - gcs_.write(URI(smallfile), write_buffer_small, buffer_size_small).ok()); - - // Before flushing, the files do not exist - bool is_object = false; - REQUIRE(gcs_.is_object(URI(largefile), &is_object).ok()); - REQUIRE(!is_object); - REQUIRE(gcs_.is_object(URI(smallfile), &is_object).ok()); - REQUIRE(!is_object); - - // Flush the files - REQUIRE(gcs_.flush_object(URI(largefile)).ok()); - REQUIRE(gcs_.flush_object(URI(smallfile)).ok()); - - // After flushing, the files exist - REQUIRE(gcs_.is_object(URI(largefile), &is_object).ok()); - REQUIRE(is_object); - REQUIRE(gcs_.is_object(URI(smallfile), &is_object).ok()); - REQUIRE(is_object); - - // Get file sizes - uint64_t nbytes = 0; - REQUIRE(gcs_.object_size(URI(largefile), &nbytes).ok()); - REQUIRE(nbytes == (buffer_size_large + buffer_size_small)); - REQUIRE(gcs_.object_size(URI(smallfile), &nbytes).ok()); - REQUIRE(nbytes == buffer_size_small); - - // Read from the beginning - auto read_buffer = new char[26]; - uint64_t bytes_read = 0; - REQUIRE(gcs_.read(URI(largefile), 0, read_buffer, 26, 0, &bytes_read).ok()); - CHECK(26 == bytes_read); - bool allok = true; - for (int i = 0; i < 26; i++) { - if (read_buffer[i] != static_cast('a' + i)) { - allok = false; - break; - } - } - REQUIRE(allok); - - // Read from a different offset - REQUIRE(gcs_.read(URI(largefile), 11, read_buffer, 26, 0, &bytes_read).ok()); - CHECK(26 == bytes_read); - allok = true; - for (int i = 0; i < 26; i++) { - if (read_buffer[i] != static_cast('a' + (i + 11) % 26)) { - allok = false; - break; - } - } - REQUIRE(allok); -} - -TEST_CASE_METHOD( - GSFx, "Test GS filesystem I/O, non-multipart", "[gs][non-multipart]") { - Config config; - const uint64_t max_parallel_ops = 1; - const uint64_t write_cache_max_size = 4 * 1024 * 1024; - REQUIRE( - config.set("vfs.gcs.max_parallel_ops", std::to_string(max_parallel_ops)) - .ok()); - REQUIRE(config.set("vfs.gcs.use_multi_part_upload", "false").ok()); - REQUIRE(config - .set( - "vfs.gcs.max_direct_upload_size", - std::to_string(write_cache_max_size)) - .ok()); - init_gcs(std::move(config)); - - // Prepare buffers - uint64_t buffer_size_large = write_cache_max_size; - auto write_buffer_large = new char[buffer_size_large]; - for (uint64_t i = 0; i < buffer_size_large; i++) - write_buffer_large[i] = (char)('a' + (i % 26)); - uint64_t buffer_size_small = 1024 * 1024; - auto write_buffer_small = new char[buffer_size_small]; - for (uint64_t i = 0; i < buffer_size_small; i++) - write_buffer_small[i] = (char)('a' + (i % 26)); - - // Write to two files - auto largefile = TEST_DIR + "largefile"; - REQUIRE( - gcs_.write(URI(largefile), write_buffer_large, buffer_size_large).ok()); - REQUIRE( - !gcs_.write(URI(largefile), write_buffer_small, buffer_size_small).ok()); - auto smallfile = TEST_DIR + "smallfile"; - REQUIRE( - gcs_.write(URI(smallfile), write_buffer_small, buffer_size_small).ok()); - - // Before flushing, the file does not exist - bool is_object = false; - REQUIRE(gcs_.is_object(URI(smallfile), &is_object).ok()); - REQUIRE(!is_object); - - // Flush the file - REQUIRE(gcs_.flush_object(URI(smallfile)).ok()); - - // After flushing, the file exists - REQUIRE(gcs_.is_object(URI(smallfile), &is_object).ok()); - REQUIRE(is_object); - - // Get file size - uint64_t nbytes = 0; - REQUIRE(gcs_.object_size(URI(smallfile), &nbytes).ok()); - REQUIRE(nbytes == buffer_size_small); - - // Read from the beginning - auto read_buffer = new char[26]; - uint64_t bytes_read = 0; - REQUIRE(gcs_.read(URI(smallfile), 0, read_buffer, 26, 0, &bytes_read).ok()); - CHECK(26 == bytes_read); - bool allok = true; - for (int i = 0; i < 26; i++) { - if (read_buffer[i] != static_cast('a' + i)) { - allok = false; - break; - } - } - REQUIRE(allok); - - // Read from a different offset - REQUIRE(gcs_.read(URI(smallfile), 11, read_buffer, 26, 0, &bytes_read).ok()); - CHECK(26 == bytes_read); - allok = true; - for (int i = 0; i < 26; i++) { - if (read_buffer[i] != static_cast('a' + (i + 11) % 26)) { - allok = false; - break; - } - } - REQUIRE(allok); -} - -TEST_CASE_METHOD( - GSFx, - "Test GS filesystem I/O, multipart, concurrent", - "[gs][multipart][concurrent]") { - Config config; - const uint64_t max_parallel_ops = 4; - const uint64_t multi_part_size = 4 * 1024 * 1024; - REQUIRE( - config.set("vfs.gcs.max_parallel_ops", std::to_string(max_parallel_ops)) - .ok()); - REQUIRE(config.set("vfs.gcs.use_multi_part_upload", "true").ok()); - REQUIRE(config.set("vfs.gcs.multi_part_size", std::to_string(multi_part_size)) - .ok()); - init_gcs(std::move(config)); - - const uint64_t write_cache_max_size = max_parallel_ops * multi_part_size; - - // Prepare buffers - uint64_t buffer_size_large = write_cache_max_size; - auto write_buffer_large = new char[buffer_size_large]; - for (uint64_t i = 0; i < buffer_size_large; i++) - write_buffer_large[i] = (char)('a' + (i % 26)); - uint64_t buffer_size_small = 1024 * 1024; - auto write_buffer_small = new char[buffer_size_small]; - for (uint64_t i = 0; i < buffer_size_small; i++) - write_buffer_small[i] = (char)('a' + (i % 26)); - - // Write to two files - auto largefile = TEST_DIR + "largefile"; - REQUIRE( - gcs_.write(URI(largefile), write_buffer_large, buffer_size_large).ok()); - REQUIRE( - gcs_.write(URI(largefile), write_buffer_small, buffer_size_small).ok()); - auto smallfile = TEST_DIR + "smallfile"; - REQUIRE( - gcs_.write(URI(smallfile), write_buffer_small, buffer_size_small).ok()); - - // Before flushing, the files do not exist - bool is_object = false; - REQUIRE(gcs_.is_object(URI(largefile), &is_object).ok()); - REQUIRE(!is_object); - REQUIRE(gcs_.is_object(URI(smallfile), &is_object).ok()); - REQUIRE(!is_object); - - // Flush the files - REQUIRE(gcs_.flush_object(URI(largefile)).ok()); - REQUIRE(gcs_.flush_object(URI(smallfile)).ok()); - - // After flushing, the files exist - REQUIRE(gcs_.is_object(URI(largefile), &is_object).ok()); - REQUIRE(is_object); - REQUIRE(gcs_.is_object(URI(smallfile), &is_object).ok()); - REQUIRE(is_object); - - // Get file sizes - uint64_t nbytes = 0; - REQUIRE(gcs_.object_size(URI(largefile), &nbytes).ok()); - REQUIRE(nbytes == (buffer_size_large + buffer_size_small)); - REQUIRE(gcs_.object_size(URI(smallfile), &nbytes).ok()); - REQUIRE(nbytes == buffer_size_small); - - // Read from the beginning - auto read_buffer = new char[26]; - uint64_t bytes_read = 0; - REQUIRE(gcs_.read(URI(largefile), 0, read_buffer, 26, 0, &bytes_read).ok()); - CHECK(26 == bytes_read); - bool allok = true; - for (int i = 0; i < 26; i++) { - if (read_buffer[i] != static_cast('a' + i)) { - allok = false; - break; - } - } - REQUIRE(allok); - - // Read from a different offset - REQUIRE(gcs_.read(URI(largefile), 11, read_buffer, 26, 0, &bytes_read).ok()); - CHECK(26 == bytes_read); - allok = true; - for (int i = 0; i < 26; i++) { - if (read_buffer[i] != static_cast('a' + (i + 11) % 26)) { - allok = false; - break; - } - } - REQUIRE(allok); -} - -TEST_CASE_METHOD( - GSFx, - "Test GS filesystem I/O, multipart, composition", - "[gs][multipart][composition]") { - Config config; - const uint64_t max_parallel_ops = 4; - const uint64_t multi_part_size = 4 * 1024; - REQUIRE( - config.set("vfs.gcs.max_parallel_ops", std::to_string(max_parallel_ops)) - .ok()); - REQUIRE(config.set("vfs.gcs.use_multi_part_upload", "true").ok()); - REQUIRE(config.set("vfs.gcs.multi_part_size", std::to_string(multi_part_size)) - .ok()); - init_gcs(std::move(config)); - - const uint64_t write_cache_max_size = max_parallel_ops * multi_part_size; - - // Prepare a buffer that will write 200 (50 * 4 threads) objects. - // The maximum number of objects per composition operation is 32. - uint64_t buffer_size_large = 50 * write_cache_max_size; - auto write_buffer_large = new char[buffer_size_large]; - for (uint64_t i = 0; i < buffer_size_large; i++) - write_buffer_large[i] = (char)('a' + (i % 26)); - - // Write to the file - auto largefile = TEST_DIR + "largefile"; - REQUIRE( - gcs_.write(URI(largefile), write_buffer_large, buffer_size_large).ok()); - - // Before flushing, the file does not exist - bool is_object = false; - REQUIRE(gcs_.is_object(URI(largefile), &is_object).ok()); - REQUIRE(!is_object); - - // Flush the file - REQUIRE(gcs_.flush_object(URI(largefile)).ok()); - - // After flushing, the file exists - REQUIRE(gcs_.is_object(URI(largefile), &is_object).ok()); - REQUIRE(is_object); - - // Get file size - uint64_t nbytes = 0; - REQUIRE(gcs_.object_size(URI(largefile), &nbytes).ok()); - REQUIRE(nbytes == buffer_size_large); - - // Read from the beginning - auto read_buffer = new char[26]; - uint64_t bytes_read = 0; - REQUIRE(gcs_.read(URI(largefile), 0, read_buffer, 26, 0, &bytes_read).ok()); - CHECK(26 == bytes_read); - bool allok = true; - for (int i = 0; i < 26; i++) { - if (read_buffer[i] != static_cast('a' + i)) { - allok = false; - break; - } - } - REQUIRE(allok); - - // Read from a different offset - REQUIRE(gcs_.read(URI(largefile), 11, read_buffer, 26, 0, &bytes_read).ok()); - CHECK(26 == bytes_read); - allok = true; - for (int i = 0; i < 26; i++) { - if (read_buffer[i] != static_cast('a' + (i + 11) % 26)) { - allok = false; - break; - } - } - REQUIRE(allok); - - // Prepare a buffer that will overwrite the original with a smaller - // size. - uint64_t buffer_size_overwrite = 10 * write_cache_max_size; - auto write_buffer_overwrite = new char[buffer_size_overwrite]; - for (uint64_t i = 0; i < buffer_size_overwrite; i++) - write_buffer_overwrite[i] = (char)('a' + (i % 26)); - - // Write to the file - REQUIRE( - gcs_.write(URI(largefile), write_buffer_overwrite, buffer_size_overwrite) - .ok()); - - // Flush the file - REQUIRE(gcs_.flush_object(URI(largefile)).ok()); - - // After flushing, the file exists - REQUIRE(gcs_.is_object(URI(largefile), &is_object).ok()); - REQUIRE(is_object); - - // Get file size - nbytes = 0; - REQUIRE(gcs_.object_size(URI(largefile), &nbytes).ok()); - REQUIRE(nbytes == buffer_size_overwrite); -} - -#endif diff --git a/test/src/unit-hdfs-filesystem.cc b/test/src/unit-hdfs-filesystem.cc deleted file mode 100644 index 4715ccb098f3..000000000000 --- a/test/src/unit-hdfs-filesystem.cc +++ /dev/null @@ -1,139 +0,0 @@ -/** - * @file hdfs-unit-filesystem.cc - * - * @section LICENSE - * - * The MIT License - * - * @copyright Copyright (c) 2017-2023 TileDB, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - * - * @section DESCRIPTION - * - * Tests for HDFS API filesystem functions. - */ - -#ifdef HAVE_HDFS - -#include -#include "tiledb/common/filesystem/directory_entry.h" -#include "tiledb/sm/config/config.h" -#include "tiledb/sm/filesystem/hdfs_filesystem.h" -#include "tiledb/sm/filesystem/uri.h" - -#include -#include - -using namespace tiledb::common; -using namespace tiledb::sm; -using namespace tiledb::sm::hdfs; - -TEST_CASE("Test HDFS filesystem", "[hdfs]") { - Config config; - HDFS hdfs; - - Status st = hdfs.init(config); - REQUIRE(st.ok()); - - bool is_dir; - st = hdfs.is_dir(URI("hdfs:///tiledb_test"), &is_dir); - CHECK(st.ok()); - if (is_dir) { - st = hdfs.remove_dir(URI("hdfs:///tiledb_test")); - CHECK(st.ok()); - } - - st = hdfs.create_dir(URI("hdfs:///tiledb_test")); - CHECK(st.ok()); - - CHECK(hdfs.is_dir(URI("hdfs:///tiledb_test"), &is_dir).ok()); - CHECK(is_dir); - - st = hdfs.create_dir(URI("hdfs:///tiledb_test")); - CHECK(!st.ok()); - - st = hdfs.touch(URI("hdfs:///tiledb_test_file")); - CHECK(st.ok()); - - bool is_file; - CHECK(hdfs.is_file(URI("hdfs:///tiledb_test_file"), &is_file).ok()); - CHECK(is_file); - - st = hdfs.remove_file(URI("hdfs:///tiledb_test_file")); - CHECK(st.ok()); - - st = hdfs.touch(URI("hdfs:///tiledb_test/tiledb_test_file")); - CHECK(st.ok()); - - uint64_t buffer_size = 100000; - auto write_buffer = new char[buffer_size]; - for (uint64_t i = 0; i < buffer_size; i++) { - write_buffer[i] = 'a' + (i % 26); - } - st = hdfs.write( - URI("hdfs:///tiledb_test/tiledb_test_file"), write_buffer, buffer_size); - CHECK(st.ok()); - - auto read_buffer = new char[26]; - st = hdfs.read( - URI("hdfs:///tiledb_test/tiledb_test_file"), 0, read_buffer, 26); - CHECK(st.ok()); - - bool allok = true; - for (int i = 0; i < 26; i++) { - if (read_buffer[i] != static_cast('a' + i)) { - allok = false; - break; - } - } - CHECK(allok); - - st = hdfs.read( - URI("hdfs:///tiledb_test/tiledb_test_file"), 11, read_buffer, 26); - CHECK(st.ok()); - - allok = true; - for (int i = 0; i < 26; ++i) { - if (read_buffer[i] != static_cast('a' + (i + 11) % 26)) { - allok = false; - break; - } - } - CHECK(allok); - - uint64_t nbytes = 0; - st = hdfs.file_size(URI("hdfs:///tiledb_test/tiledb_test_file"), &nbytes); - CHECK(st.ok()); - CHECK(nbytes == buffer_size); - - st = hdfs.remove_file(URI("hdfs:///tiledb_test/i_dont_exist")); - CHECK(!st.ok()); - - st = hdfs.remove_file(URI("hdfs:///tiledb_test/tiledb_test_file")); - CHECK(st.ok()); - - st = hdfs.remove_dir(URI("hdfs:///tiledb_test")); - CHECK(st.ok()); - - st = hdfs.disconnect(); - CHECK(st.ok()); -} - -#endif diff --git a/test/src/unit-query-plan.cc b/test/src/unit-query-plan.cc new file mode 100644 index 000000000000..f3fe5ec7fe02 --- /dev/null +++ b/test/src/unit-query-plan.cc @@ -0,0 +1,421 @@ +/** + * @file unit-query-plan.cc + * + * @section LICENSE + * + * The MIT License + * + * @copyright Copyright (c) 2023 TileDB Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * @section DESCRIPTION + * + * Fuctional test for Query Plan locally and via REST. + */ + +#include "external/include/nlohmann/json.hpp" +#include "test/support/src/vfs_helpers.h" +#include "test/support/tdb_catch.h" +#include "tiledb/sm/c_api/tiledb_struct_def.h" +#include "tiledb/sm/cpp_api/tiledb" +#include "tiledb/sm/cpp_api/tiledb_experimental" + +using namespace tiledb; + +#ifndef TILEDB_TESTS_ENABLE_REST +constexpr bool rest_tests = false; +#else +constexpr bool rest_tests = true; +#endif + +struct QueryPlanFx { + QueryPlanFx(); + ~QueryPlanFx(); + + void create_dense_array(const std::string& array_name); + void create_sparse_array(const std::string& array_name); + + // Vector of supported filsystems + tiledb_ctx_handle_t* ctx_c_{nullptr}; + tiledb_vfs_handle_t* vfs_c_{nullptr}; + const std::vector> fs_vec_; + + std::string temp_dir_; + std::string abs_uri_; + std::string uri_; + Context ctx_; +}; + +TEST_CASE_METHOD( + QueryPlanFx, + "C API: tiledb_query_get_plan API lifecycle checks", + "[query_plan][lifecycle][rest]") { + create_dense_array("queryplan_array_lifecycle"); + + tiledb_array_t* array; + REQUIRE(tiledb_array_alloc(ctx_c_, uri_.c_str(), &array) == TILEDB_OK); + REQUIRE(tiledb_array_open(ctx_c_, array, TILEDB_READ) == TILEDB_OK); + + tiledb_query_t* query; + REQUIRE(tiledb_query_alloc(ctx_c_, array, TILEDB_READ, &query) == TILEDB_OK); + + CHECK(tiledb_query_set_layout(ctx_c_, query, TILEDB_ROW_MAJOR) == TILEDB_OK); + + int64_t dom[] = {1, 2, 1, 2}; + CHECK(tiledb_query_set_subarray(ctx_c_, query, &dom) == TILEDB_OK); + + std::vector d(4); + uint64_t size = 1; + CHECK( + tiledb_query_set_data_buffer(ctx_c_, query, "a1", d.data(), &size) == + TILEDB_OK); + + tiledb_string_handle_t* string_handle; + CHECK(tiledb_query_get_plan(ctx_c_, query, &string_handle) == TILEDB_OK); + + // API lifecycle checks + // It's not possible to set subarrays, layout, query condition or new buffers + // once the query plan got generated. + CHECK(tiledb_query_set_subarray(ctx_c_, query, &dom) == TILEDB_ERR); + CHECK(tiledb_query_set_layout(ctx_c_, query, TILEDB_COL_MAJOR) == TILEDB_ERR); + tiledb_query_condition_t* qc; + CHECK(tiledb_query_condition_alloc(ctx_c_, &qc) == TILEDB_OK); + int32_t val = 10000; + CHECK( + tiledb_query_condition_init( + ctx_c_, qc, "a1", &val, sizeof(int32_t), TILEDB_LT) == TILEDB_OK); + CHECK(tiledb_query_set_condition(ctx_c_, query, qc) == TILEDB_ERR); + CHECK( + tiledb_query_set_data_buffer(ctx_c_, query, "a2", d.data(), &size) == + TILEDB_ERR); + + // But it's possible to set existing buffers to accomodate existing + // query INCOMPLETEs functionality + CHECK( + tiledb_query_set_data_buffer(ctx_c_, query, "a1", d.data(), &size) == + TILEDB_OK); + + REQUIRE(tiledb_string_free(&string_handle) == TILEDB_OK); + REQUIRE(tiledb_array_close(ctx_c_, array) == TILEDB_OK); + tiledb_query_free(&query); + tiledb_array_free(&array); +} + +TEST_CASE_METHOD( + QueryPlanFx, + "C API: Query plan basic bahaviour", + "[query_plan][read][rest]") { + create_dense_array("queryplan_array_read"); + + tiledb_array_t* array; + REQUIRE(tiledb_array_alloc(ctx_c_, uri_.c_str(), &array) == TILEDB_OK); + REQUIRE(tiledb_array_open(ctx_c_, array, TILEDB_READ) == TILEDB_OK); + + tiledb_query_t* query; + REQUIRE(tiledb_query_alloc(ctx_c_, array, TILEDB_READ, &query) == TILEDB_OK); + + CHECK(tiledb_query_set_layout(ctx_c_, query, TILEDB_ROW_MAJOR) == TILEDB_OK); + + int64_t dom[] = {1, 2, 1, 2}; + CHECK(tiledb_query_set_subarray(ctx_c_, query, &dom) == TILEDB_OK); + + std::vector d(4); + uint64_t size = 1; + CHECK( + tiledb_query_set_data_buffer(ctx_c_, query, "a1", d.data(), &size) == + TILEDB_OK); + CHECK( + tiledb_query_set_data_buffer(ctx_c_, query, "a2", d.data(), &size) == + TILEDB_OK); + + tiledb_string_handle_t* string_handle; + const char* data; + size_t len; + CHECK(tiledb_query_get_plan(ctx_c_, query, &string_handle) == TILEDB_OK); + CHECK(tiledb_string_view(string_handle, &data, &len) == TILEDB_OK); + + // This throws if the query plan is not valid JSON + std::string str_plan(data, len); + nlohmann::json json_plan = nlohmann::json::parse(str_plan); + std::string array_uri_from_json = json_plan["TileDB Query Plan"]["Array.URI"]; + + CHECK( + json_plan["TileDB Query Plan"]["Array.URI"] == + tiledb::sm::URI(uri_, true).to_string()); + ; + CHECK(json_plan["TileDB Query Plan"]["Array.Type"] == "dense"); + if (!array_uri_from_json.starts_with("tiledb://")) { + CHECK( + json_plan["TileDB Query Plan"]["VFS.Backend"] == + tiledb::sm::URI(uri_).backend_name()); + } + CHECK(json_plan["TileDB Query Plan"]["Query.Layout"] == "row-major"); + CHECK(json_plan["TileDB Query Plan"]["Query.Strategy.Name"] == "DenseReader"); + CHECK( + json_plan["TileDB Query Plan"]["Query.Attributes"] == + std::vector({"a1", "a2"})); + CHECK( + json_plan["TileDB Query Plan"]["Query.Dimensions"] == + std::vector({"dim_1", "dim_2"})); + + REQUIRE(tiledb_string_free(&string_handle) == TILEDB_OK); + REQUIRE(tiledb_array_close(ctx_c_, array) == TILEDB_OK); + tiledb_query_free(&query); + tiledb_array_free(&array); +} + +TEST_CASE_METHOD( + QueryPlanFx, "C API: Query plan write query", "[query_plan][write][rest]") { + create_sparse_array("queryplan_array_write"); + + tiledb_array_t* array; + REQUIRE(tiledb_array_alloc(ctx_c_, uri_.c_str(), &array) == TILEDB_OK); + REQUIRE(tiledb_array_open(ctx_c_, array, TILEDB_WRITE) == TILEDB_OK); + + tiledb_query_t* query; + REQUIRE(tiledb_query_alloc(ctx_c_, array, TILEDB_WRITE, &query) == TILEDB_OK); + + CHECK( + tiledb_query_set_layout(ctx_c_, query, TILEDB_GLOBAL_ORDER) == TILEDB_OK); + + std::vector coords = {1, 2, 3}; + uint64_t coords_size = coords.size() * sizeof(uint64_t); + std::vector a = {1, 2, 3}; + uint64_t a_size = a.size() * sizeof(int); + std::vector b = {1, 2, 3}; + uint64_t b_size = b.size() * sizeof(int); + + CHECK( + tiledb_query_set_data_buffer( + ctx_c_, query, "a", (void*)a.data(), &a_size) == TILEDB_OK); + CHECK( + tiledb_query_set_data_buffer( + ctx_c_, query, "b", (void*)b.data(), &b_size) == TILEDB_OK); + CHECK( + tiledb_query_set_data_buffer( + ctx_c_, + query, + tiledb::test::TILEDB_COORDS, + (void*)coords.data(), + &coords_size) == TILEDB_OK); + + tiledb_string_handle_t* string_handle; + const char* data; + size_t len; + CHECK(tiledb_query_get_plan(ctx_c_, query, &string_handle) == TILEDB_OK); + CHECK(tiledb_string_view(string_handle, &data, &len) == TILEDB_OK); + + // This throws if the query plan is not valid JSON + std::string str_plan(data, len); + nlohmann::json json_plan = nlohmann::json::parse(str_plan); + std::string array_uri_from_json = json_plan["TileDB Query Plan"]["Array.URI"]; + + CHECK( + json_plan["TileDB Query Plan"]["Array.URI"] == + tiledb::sm::URI(uri_, true).to_string()); + CHECK(json_plan["TileDB Query Plan"]["Array.Type"] == "sparse"); + if (!array_uri_from_json.starts_with("tiledb://")) { + CHECK( + json_plan["TileDB Query Plan"]["VFS.Backend"] == + tiledb::sm::URI(uri_).backend_name()); + } + CHECK(json_plan["TileDB Query Plan"]["Query.Layout"] == "global-order"); + CHECK( + json_plan["TileDB Query Plan"]["Query.Strategy.Name"] == + "GlobalOrderWriter"); + CHECK( + json_plan["TileDB Query Plan"]["Query.Attributes"] == + std::vector({"__coords", "a", "b"})); + CHECK( + json_plan["TileDB Query Plan"]["Query.Dimensions"] == + std::vector()); + + REQUIRE(tiledb_string_free(&string_handle) == TILEDB_OK); + REQUIRE(tiledb_array_close(ctx_c_, array) == TILEDB_OK); + tiledb_query_free(&query); + tiledb_array_free(&array); +} + +QueryPlanFx::QueryPlanFx() + : fs_vec_(test::vfs_test_get_fs_vec()) { + auto rc = test::vfs_test_init(fs_vec_, &ctx_c_, &vfs_c_); + if (!rc.ok()) { + throw std::runtime_error("Error initializing vfs in test set up."); + } + + ctx_ = Context(ctx_c_); + temp_dir_ = fs_vec_[0]->temp_dir(); + test::vfs_test_create_temp_dir(ctx_c_, vfs_c_, temp_dir_); +} + +QueryPlanFx::~QueryPlanFx() { + test::vfs_test_remove_temp_dir(ctx_c_, vfs_c_, temp_dir_); + test::vfs_test_close(fs_vec_, ctx_c_, vfs_c_).ok(); + tiledb_vfs_free(&vfs_c_); +} + +void QueryPlanFx::create_dense_array(const std::string& array_name) { + if constexpr (rest_tests) { + uri_ = "tiledb://unit/"; + } + + abs_uri_ = temp_dir_ + "/" + array_name; + uri_ += abs_uri_; + + // Create array schema + tiledb_array_schema_t* array_schema; + int rc = tiledb_array_schema_alloc(ctx_c_, TILEDB_DENSE, &array_schema); + REQUIRE(rc == TILEDB_OK); + + // Set schema members + rc = tiledb_array_schema_set_capacity(ctx_c_, array_schema, 10000); + REQUIRE(rc == TILEDB_OK); + rc = tiledb_array_schema_set_cell_order( + ctx_c_, array_schema, TILEDB_ROW_MAJOR); + REQUIRE(rc == TILEDB_OK); + rc = tiledb_array_schema_set_tile_order( + ctx_c_, array_schema, TILEDB_ROW_MAJOR); + REQUIRE(rc == TILEDB_OK); + + // Create dimensions + tiledb_dimension_t* d1; + uint64_t dim_domain[] = {1, 10, 1, 10}; + uint64_t extents[] = {5, 5}; + rc = tiledb_dimension_alloc( + ctx_c_, "dim_1", TILEDB_INT64, &dim_domain[0], &extents[0], &d1); + REQUIRE(rc == TILEDB_OK); + tiledb_dimension_t* d2; + rc = tiledb_dimension_alloc( + ctx_c_, "dim_2", TILEDB_INT64, &dim_domain[2], &extents[1], &d2); + REQUIRE(rc == TILEDB_OK); + + // Set domain + tiledb_domain_t* domain; + rc = tiledb_domain_alloc(ctx_c_, &domain); + REQUIRE(rc == TILEDB_OK); + rc = tiledb_domain_add_dimension(ctx_c_, domain, d1); + REQUIRE(rc == TILEDB_OK); + tiledb_datatype_t domain_type; + rc = tiledb_domain_get_type(ctx_c_, domain, &domain_type); + REQUIRE(rc == TILEDB_OK); + REQUIRE(domain_type == TILEDB_INT64); + rc = tiledb_domain_add_dimension(ctx_c_, domain, d2); + REQUIRE(rc == TILEDB_OK); + rc = tiledb_array_schema_set_domain(ctx_c_, array_schema, domain); + REQUIRE(rc == TILEDB_OK); + + // Add attributes + tiledb_attribute_t* a1; + rc = tiledb_attribute_alloc(ctx_c_, "a1", TILEDB_INT32, &a1); + REQUIRE(rc == TILEDB_OK); + rc = tiledb_array_schema_add_attribute(ctx_c_, array_schema, a1); + REQUIRE(rc == TILEDB_OK); + tiledb_attribute_t* a2; + rc = tiledb_attribute_alloc(ctx_c_, "a2", TILEDB_INT32, &a2); + REQUIRE(rc == TILEDB_OK); + rc = tiledb_array_schema_add_attribute(ctx_c_, array_schema, a2); + REQUIRE(rc == TILEDB_OK); + + // Create array + rc = tiledb_array_create(ctx_c_, uri_.c_str(), array_schema); + REQUIRE(rc == TILEDB_OK); + + // Clean up + tiledb_array_schema_free(&array_schema); + tiledb_attribute_free(&a1); + tiledb_attribute_free(&a2); + tiledb_dimension_free(&d1); + tiledb_dimension_free(&d2); + tiledb_domain_free(&domain); +} + +void QueryPlanFx::create_sparse_array(const std::string& array_name) { + if constexpr (rest_tests) { + uri_ = "tiledb://unit/"; + } + + abs_uri_ = temp_dir_ + "/" + array_name; + uri_ += abs_uri_; + + // Create dimensions + uint64_t tile_extents[] = {2, 2}; + uint64_t dim_domain[] = {1, 10, 1, 10}; + + tiledb_dimension_t* d1; + int rc = tiledb_dimension_alloc( + ctx_c_, "d1", TILEDB_UINT64, &dim_domain[0], &tile_extents[0], &d1); + CHECK(rc == TILEDB_OK); + tiledb_dimension_t* d2; + rc = tiledb_dimension_alloc( + ctx_c_, "d2", TILEDB_UINT64, &dim_domain[2], &tile_extents[1], &d2); + CHECK(rc == TILEDB_OK); + + // Create domain + tiledb_domain_t* domain; + rc = tiledb_domain_alloc(ctx_c_, &domain); + CHECK(rc == TILEDB_OK); + rc = tiledb_domain_add_dimension(ctx_c_, domain, d1); + CHECK(rc == TILEDB_OK); + rc = tiledb_domain_add_dimension(ctx_c_, domain, d2); + CHECK(rc == TILEDB_OK); + + // Create attributes + tiledb_attribute_t* a; + rc = tiledb_attribute_alloc(ctx_c_, "a", TILEDB_INT32, &a); + CHECK(rc == TILEDB_OK); + tiledb_attribute_t* b; + rc = tiledb_attribute_alloc(ctx_c_, "b", TILEDB_INT32, &b); + CHECK(rc == TILEDB_OK); + + // Create array schema + tiledb_array_schema_t* array_schema; + rc = tiledb_array_schema_alloc(ctx_c_, TILEDB_SPARSE, &array_schema); + CHECK(rc == TILEDB_OK); + rc = tiledb_array_schema_set_cell_order( + ctx_c_, array_schema, TILEDB_ROW_MAJOR); + CHECK(rc == TILEDB_OK); + rc = tiledb_array_schema_set_tile_order( + ctx_c_, array_schema, TILEDB_ROW_MAJOR); + CHECK(rc == TILEDB_OK); + rc = tiledb_array_schema_set_capacity(ctx_c_, array_schema, 4); + CHECK(rc == TILEDB_OK); + rc = tiledb_array_schema_set_domain(ctx_c_, array_schema, domain); + CHECK(rc == TILEDB_OK); + rc = tiledb_array_schema_add_attribute(ctx_c_, array_schema, a); + CHECK(rc == TILEDB_OK); + rc = tiledb_array_schema_add_attribute(ctx_c_, array_schema, b); + CHECK(rc == TILEDB_OK); + + // Check array schema + rc = tiledb_array_schema_check(ctx_c_, array_schema); + CHECK(rc == TILEDB_OK); + + // Create array + rc = tiledb_array_create(ctx_c_, uri_.c_str(), array_schema); + CHECK(rc == TILEDB_OK); + + // Clean up + tiledb_attribute_free(&a); + tiledb_attribute_free(&b); + tiledb_dimension_free(&d1); + tiledb_dimension_free(&d2); + tiledb_domain_free(&domain); + tiledb_array_schema_free(&array_schema); +} diff --git a/test/src/unit-request-handlers.cc b/test/src/unit-request-handlers.cc index f3378026adc3..8585cf1a9a61 100644 --- a/test/src/unit-request-handlers.cc +++ b/test/src/unit-request-handlers.cc @@ -32,8 +32,10 @@ #ifdef TILEDB_SERIALIZATION +#include "test/support/src/mem_helpers.h" #include "test/support/tdb_catch.h" #include "tiledb/api/c_api/buffer/buffer_api_internal.h" +#include "tiledb/api/c_api/context/context_api_internal.h" #include "tiledb/api/c_api/string/string_api_internal.h" #include "tiledb/sm/array_schema/enumeration.h" #include "tiledb/sm/c_api/tiledb_serialization.h" @@ -60,6 +62,7 @@ struct RequestHandlerFx { shared_ptr get_array(QueryType type); + shared_ptr memory_tracker_; URI uri_; Config cfg_; Context ctx_; @@ -72,7 +75,7 @@ struct HandleLoadArraySchemaRequestFx : RequestHandlerFx { } virtual shared_ptr create_schema() override; - ArraySchema call_handler( + shared_ptr call_handler( serialization::LoadArraySchemaRequest req, SerializationType stype); shared_ptr create_string_enumeration( @@ -94,11 +97,13 @@ struct HandleConsolidationPlanRequestFx : RequestHandlerFx { } virtual shared_ptr create_schema() override { - auto schema = make_shared(HERE(), ArrayType::SPARSE); - auto dim = make_shared(HERE(), "dim1", Datatype::INT32); + auto schema = + make_shared(HERE(), ArrayType::SPARSE, memory_tracker_); + auto dim = make_shared( + HERE(), "dim1", Datatype::INT32, memory_tracker_); int range[2] = {0, 1000}; throw_if_not_ok(dim->set_domain(range)); - auto dom = make_shared(HERE()); + auto dom = make_shared(HERE(), memory_tracker_); throw_if_not_ok(dom->add_dimension(dim)); throw_if_not_ok(schema->set_domain(dom)); return schema; @@ -118,8 +123,8 @@ TEST_CASE_METHOD( create_array(); auto schema = call_handler(serialization::LoadArraySchemaRequest(false), stype); - REQUIRE(schema.has_enumeration("enmr")); - REQUIRE(schema.get_loaded_enumeration_names().size() == 0); + REQUIRE(schema->has_enumeration("enmr")); + REQUIRE(schema->get_loaded_enumeration_names().size() == 0); } TEST_CASE_METHOD( @@ -131,10 +136,10 @@ TEST_CASE_METHOD( create_array(); auto schema = call_handler(serialization::LoadArraySchemaRequest(true), stype); - REQUIRE(schema.has_enumeration("enmr")); - REQUIRE(schema.get_loaded_enumeration_names().size() == 1); - REQUIRE(schema.get_loaded_enumeration_names()[0] == "enmr"); - REQUIRE(schema.get_enumeration("enmr") != nullptr); + REQUIRE(schema->has_enumeration("enmr")); + REQUIRE(schema->get_loaded_enumeration_names().size() == 1); + REQUIRE(schema->get_loaded_enumeration_names()[0] == "enmr"); + REQUIRE(schema->get_enumeration("enmr") != nullptr); } TEST_CASE_METHOD( @@ -333,7 +338,8 @@ TEST_CASE_METHOD( /* ********************************* */ RequestHandlerFx::RequestHandlerFx(const std::string uri) - : uri_(uri) + : memory_tracker_(tiledb::test::create_test_memory_tracker()) + , uri_(uri) , ctx_(cfg_) { delete_array(); throw_if_not_ok(enc_key_.set_key(EncryptionType::NO_ENCRYPTION, nullptr, 0)); @@ -389,17 +395,20 @@ HandleLoadArraySchemaRequestFx::create_string_enumeration( data.data(), total_size, offsets.data(), - offsets.size() * sizeof(uint64_t)); + offsets.size() * sizeof(uint64_t), + tiledb::test::create_test_memory_tracker()); } shared_ptr HandleLoadArraySchemaRequestFx::create_schema() { // Create a schema to serialize - auto schema = make_shared(HERE(), ArrayType::SPARSE); - auto dim = make_shared(HERE(), "dim1", Datatype::INT32); + auto schema = + make_shared(HERE(), ArrayType::SPARSE, memory_tracker_); + auto dim = + make_shared(HERE(), "dim1", Datatype::INT32, memory_tracker_); int range[2] = {0, 1000}; throw_if_not_ok(dim->set_domain(range)); - auto dom = make_shared(HERE()); + auto dom = make_shared(HERE(), memory_tracker_); throw_if_not_ok(dom->add_dimension(dim)); throw_if_not_ok(schema->set_domain(dom)); @@ -414,7 +423,7 @@ shared_ptr HandleLoadArraySchemaRequestFx::create_schema() { return schema; } -ArraySchema HandleLoadArraySchemaRequestFx::call_handler( +shared_ptr HandleLoadArraySchemaRequestFx::call_handler( serialization::LoadArraySchemaRequest req, SerializationType stype) { // If this looks weird, its because we're using the public C++ API to create // these objets instead of the internal APIs elsewhere in this test suite. @@ -436,22 +445,25 @@ ArraySchema HandleLoadArraySchemaRequestFx::call_handler( REQUIRE(rval == TILEDB_OK); return serialization::deserialize_load_array_schema_response( - stype, resp_buf->buffer()); + stype, resp_buf->buffer(), memory_tracker_); } shared_ptr HandleQueryPlanRequestFx::create_schema() { - auto schema = make_shared(HERE(), ArrayType::DENSE); + auto schema = + make_shared(HERE(), ArrayType::DENSE, memory_tracker_); schema->set_capacity(10000); throw_if_not_ok(schema->set_cell_order(Layout::ROW_MAJOR)); throw_if_not_ok(schema->set_tile_order(Layout::ROW_MAJOR)); uint32_t dim_domain[] = {1, 10, 1, 10}; - auto dim1 = make_shared(HERE(), "dim1", Datatype::INT32); + auto dim1 = make_shared( + HERE(), "dim1", Datatype::INT32, tiledb::test::get_test_memory_tracker()); throw_if_not_ok(dim1->set_domain(&dim_domain[0])); - auto dim2 = make_shared(HERE(), "dim2", Datatype::INT32); + auto dim2 = make_shared( + HERE(), "dim2", Datatype::INT32, tiledb::test::get_test_memory_tracker()); throw_if_not_ok(dim2->set_domain(&dim_domain[2])); - auto dom = make_shared(HERE()); + auto dom = make_shared(HERE(), memory_tracker_); throw_if_not_ok(dom->add_dimension(dim1)); throw_if_not_ok(dom->add_dimension(dim2)); throw_if_not_ok(schema->set_domain(dom)); diff --git a/test/src/unit-result-coords.cc b/test/src/unit-result-coords.cc index 8e0044401b74..b4cbe94daba7 100644 --- a/test/src/unit-result-coords.cc +++ b/test/src/unit-result-coords.cc @@ -29,6 +29,8 @@ * * Tests for the ResultCoords classes. */ + +#include "tiledb/common/memory_tracker.h" #include "tiledb/sm/c_api/tiledb.h" #include "tiledb/sm/c_api/tiledb_struct_def.h" @@ -54,7 +56,7 @@ struct CResultCoordsFx { std::string array_name_; const char* ARRAY_NAME = "test_result_coords"; tiledb_array_t* array_; - std::unique_ptr frag_md_; + std::shared_ptr frag_md_; CResultCoordsFx(uint64_t num_cells); ~CResultCoordsFx(); @@ -104,13 +106,14 @@ CResultCoordsFx::CResultCoordsFx(uint64_t num_cells) { rc = tiledb_array_open(ctx_, array_, TILEDB_READ); REQUIRE(rc == TILEDB_OK); - frag_md_.reset(new FragmentMetadata( - nullptr, + frag_md_ = make_shared( + HERE(), nullptr, array_->array_->array_schema_latest_ptr(), URI(), std::make_pair(0, 0), - true)); + tiledb::test::create_test_memory_tracker(), + true); } CResultCoordsFx::~CResultCoordsFx() { @@ -164,24 +167,30 @@ TEST_CASE_METHOD( CResultCoordsFxSmall, "GlobalOrderResultCoords: test max_slab_length", "[globalorderresultcoords][max_slab_length]") { - GlobalOrderResultTile tile(0, 0, false, false, *fx_.frag_md_); + GlobalOrderResultTile tile( + 0, + 0, + false, + false, + *fx_.frag_md_, + tiledb::test::get_test_memory_tracker()); // Test max_slab_length with no bitmap. GlobalOrderResultCoords rc1(&tile, 1); REQUIRE(rc1.max_slab_length() == 4); // Test max_slab_length with bitmap 1. - tile.bitmap() = {0, 1, 1, 1, 1}; + tile.bitmap().assign({0, 1, 1, 1, 1}); tile.count_cells(); REQUIRE(rc1.max_slab_length() == 4); // Test max_slab_length with bitmap 2. - tile.bitmap() = {0, 1, 1, 1, 0}; + tile.bitmap().assign({0, 1, 1, 1, 0}); tile.count_cells(); REQUIRE(rc1.max_slab_length() == 3); // Test max_slab_length with bitmap 3. - tile.bitmap() = {0, 1, 1, 1, 0}; + tile.bitmap().assign({0, 1, 1, 1, 0}); tile.count_cells(); rc1.pos_ = 0; REQUIRE(rc1.max_slab_length() == 0); @@ -191,7 +200,13 @@ TEST_CASE_METHOD( CResultCoordsFxSmall, "GlobalOrderResultCoords: test max_slab_length with comparator", "[globalorderresultcoords][max_slab_length_with_comp]") { - GlobalOrderResultTile tile(0, 0, false, false, *fx_.frag_md_); + GlobalOrderResultTile tile( + 0, + 0, + false, + false, + *fx_.frag_md_, + tiledb::test::get_test_memory_tracker()); Cmp cmp; // Test max_slab_length with no bitmap and comparator. @@ -199,19 +214,19 @@ TEST_CASE_METHOD( REQUIRE(rc1.max_slab_length(GlobalOrderResultCoords(&tile, 3), cmp) == 2); // Test max_slab_length with bitmap and comparator 1. - tile.bitmap() = {0, 1, 1, 1, 1}; + tile.bitmap().assign({0, 1, 1, 1, 1}); tile.count_cells(); REQUIRE(rc1.max_slab_length(GlobalOrderResultCoords(&tile, 10), cmp) == 4); REQUIRE(rc1.max_slab_length(GlobalOrderResultCoords(&tile, 3), cmp) == 2); // Test max_slab_length with bitmap and comparator 2. - tile.bitmap() = {0, 1, 1, 1, 0}; + tile.bitmap().assign({0, 1, 1, 1, 0}); tile.count_cells(); REQUIRE(rc1.max_slab_length(GlobalOrderResultCoords(&tile, 10), cmp) == 3); REQUIRE(rc1.max_slab_length(GlobalOrderResultCoords(&tile, 3), cmp) == 2); // Test max_slab_length with bitmap and comparator 3. - tile.bitmap() = {0, 1, 1, 1, 0}; + tile.bitmap().assign({0, 1, 1, 1, 0}); tile.count_cells(); rc1.pos_ = 0; REQUIRE(rc1.max_slab_length(GlobalOrderResultCoords(&tile, 3), cmp) == 0); @@ -221,7 +236,13 @@ TEST_CASE_METHOD( CResultCoordsFxLarge, "GlobalOrderResultCoords: test max_slab_length with comparator, large tile", "[globalorderresultcoords][max_slab_length_with_comp]") { - GlobalOrderResultTile tile(0, 0, false, false, *fx_.frag_md_); + GlobalOrderResultTile tile( + 0, + 0, + false, + false, + *fx_.frag_md_, + tiledb::test::get_test_memory_tracker()); Cmp cmp; GlobalOrderResultCoords rc1(&tile, 1); @@ -238,11 +259,17 @@ TEST_CASE_METHOD( CResultCoordsFxSmall, "GlobalOrderResultCoords: advance_to_next_cell", "[globalorderresultcoords][advance_to_next_cell]") { - GlobalOrderResultTile tile(0, 0, false, false, *fx_.frag_md_); + GlobalOrderResultTile tile( + 0, + 0, + false, + false, + *fx_.frag_md_, + tiledb::test::get_test_memory_tracker()); Cmp cmp; GlobalOrderResultCoords rc1(&tile, 0); - tile.bitmap() = {0, 1, 1, 0, 1}; + tile.bitmap().assign({0, 1, 1, 0, 1}); tile.count_cells(); REQUIRE(rc1.advance_to_next_cell() == true); REQUIRE(rc1.pos_ == 1); @@ -254,7 +281,7 @@ TEST_CASE_METHOD( // Recreate to test that we don't move pos_ on the first call. GlobalOrderResultCoords rc2(&tile, 0); - tile.bitmap() = {1, 1, 1, 0, 0}; + tile.bitmap().assign({1, 1, 1, 0, 0}); tile.count_cells(); REQUIRE(rc2.advance_to_next_cell() == true); REQUIRE(rc2.pos_ == 0); diff --git a/test/src/unit-result-tile.cc b/test/src/unit-result-tile.cc index 77677e6a1d3c..046cb1c8e7af 100644 --- a/test/src/unit-result-tile.cc +++ b/test/src/unit-result-tile.cc @@ -29,6 +29,8 @@ * * Tests for the ResultTile classes. */ + +#include "tiledb/common/memory_tracker.h" #include "tiledb/sm/c_api/tiledb.h" #include "tiledb/sm/c_api/tiledb_struct_def.h" #include "tiledb/sm/misc/types.h" @@ -59,13 +61,15 @@ struct CResultTileFx { std::string array_name_; const char* ARRAY_NAME = "test_result_coords"; tiledb_array_t* array_; - std::unique_ptr frag_md_; + std::shared_ptr frag_md_; + shared_ptr memory_tracker_; CResultTileFx(); ~CResultTileFx(); }; -CResultTileFx::CResultTileFx() { +CResultTileFx::CResultTileFx() + : memory_tracker_(tiledb::test::get_test_memory_tracker()) { tiledb_config_t* config; tiledb_error_t* error = nullptr; REQUIRE(tiledb_config_alloc(&config, &error) == TILEDB_OK); @@ -107,13 +111,17 @@ CResultTileFx::CResultTileFx() { rc = tiledb_array_open(ctx_, array_, TILEDB_READ); REQUIRE(rc == TILEDB_OK); - frag_md_.reset(new FragmentMetadata( - nullptr, + // Create test memory tracker. + memory_tracker_ = tiledb::test::create_test_memory_tracker(); + + frag_md_ = make_shared( + HERE(), nullptr, array_->array_->array_schema_latest_ptr(), URI(), std::make_pair(0, 0), - false)); + memory_tracker_, + false); } CResultTileFx::~CResultTileFx() { @@ -159,7 +167,8 @@ TEST_CASE_METHOD( REQUIRE(rc == TILEDB_OK); tiledb_domain_free(&domain); - UnorderedWithDupsResultTile tile(0, 0, *frag_md_); + UnorderedWithDupsResultTile tile( + 0, 0, *frag_md_, tiledb::test::get_test_memory_tracker()); // Check the function with an empty bitmap. CHECK(tile.result_num_between_pos(2, 10) == 8); @@ -187,13 +196,13 @@ TEST_CASE_METHOD( auto& array_schema = array_->array_->array_schema_latest(); FragmentMetadata frag_md( - nullptr, nullptr, array_->array_->array_schema_latest_ptr(), URI(), std::make_pair(0, 0), + memory_tracker_, true); - ResultTile rt(0, 0, frag_md); + ResultTile rt(0, 0, frag_md, tiledb::test::get_test_memory_tracker()); // Make sure cell_num() will return the correct value. if (!first_dim) { @@ -269,10 +278,13 @@ TEST_CASE_METHOD( exp_result_count = {0, 1, 1, 1, 1, 1, 1, 0}; } - std::vector range_indexes(ranges.size()); + tdb::pmr::vector range_indexes( + ranges.size(), memory_tracker_->get_resource(MemoryType::DIMENSIONS)); std::iota(range_indexes.begin(), range_indexes.end(), 0); - std::vector result_count(num_cells, 1); + auto resource = tiledb::test::get_test_memory_tracker()->get_resource( + MemoryType::TILE_BITMAP); + tdb::pmr::vector result_count(num_cells, 1, resource); ResultTile::compute_results_count_sparse_string( &rt, dim_idx, @@ -297,13 +309,13 @@ TEST_CASE_METHOD( auto& array_schema = array_->array_->array_schema_latest(); FragmentMetadata frag_md( - nullptr, nullptr, array_->array_->array_schema_latest_ptr(), URI(), std::make_pair(0, 0), + memory_tracker_, true); - ResultTile rt(0, 0, frag_md); + ResultTile rt(0, 0, frag_md, tiledb::test::get_test_memory_tracker()); // Make sure cell_num() will return the correct value. if (!first_dim) { @@ -409,10 +421,13 @@ TEST_CASE_METHOD( exp_result_count = {0, 1, 2, 1, 0, 1, 3, 2}; } - std::vector range_indexes(ranges.size()); + tdb::pmr::vector range_indexes( + ranges.size(), memory_tracker_->get_resource(MemoryType::DIMENSIONS)); std::iota(range_indexes.begin(), range_indexes.end(), 0); - std::vector result_count(num_cells, 1); + auto resource = tiledb::test::get_test_memory_tracker()->get_resource( + MemoryType::TILE_BITMAP); + tdb::pmr::vector result_count(num_cells, 1, resource); ResultTile::compute_results_count_sparse_string( &rt, dim_idx, diff --git a/test/src/unit-s3-no-multipart.cc b/test/src/unit-s3-no-multipart.cc deleted file mode 100644 index 2c973cf29b75..000000000000 --- a/test/src/unit-s3-no-multipart.cc +++ /dev/null @@ -1,210 +0,0 @@ -/** - * @file unit-s3-no-multipart.cc - * - * @section LICENSE - * - * The MIT License - * - * @copyright Copyright (c) 2017-2023 TileDB, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - * - * @section DESCRIPTION - * - * Tests for S3 API filesystem functions. - */ - -#ifdef HAVE_S3 - -#include -#include "test/support/src/helpers.h" -#include "tiledb/common/thread_pool.h" -#include "tiledb/sm/config/config.h" -#include "tiledb/sm/filesystem/s3.h" -#include "tiledb/sm/global_state/unit_test_config.h" -#include "tiledb/sm/misc/tdb_time.h" - -#include -#include - -using namespace tiledb::test; -using namespace tiledb::common; -using namespace tiledb::sm; - -struct S3DirectFx { - S3DirectFx(); - ~S3DirectFx(); - static Config set_config_params(); - - const std::string S3_PREFIX = "s3://"; - const tiledb::sm::URI S3_BUCKET = - tiledb::sm::URI(S3_PREFIX + "tiledb-" + random_label() + "/"); - const std::string TEST_DIR = S3_BUCKET.to_string() + "tiledb_test_dir/"; - ThreadPool thread_pool_{2}; - tiledb::sm::S3 s3_{&g_helper_stats, &thread_pool_, set_config_params()}; -}; - -S3DirectFx::S3DirectFx() { - // Create bucket - bool exists = s3_.is_bucket(S3_BUCKET); - if (exists) - REQUIRE_NOTHROW(s3_.remove_bucket(S3_BUCKET)); - - exists = s3_.is_bucket(S3_BUCKET); - REQUIRE(!exists); - REQUIRE_NOTHROW(s3_.create_bucket(S3_BUCKET)); - - // Check if bucket is empty - bool is_empty = s3_.is_empty_bucket(S3_BUCKET); - CHECK(is_empty); -} - -S3DirectFx::~S3DirectFx() { - // Empty bucket - bool is_empty = s3_.is_empty_bucket(S3_BUCKET); - if (!is_empty) { - CHECK_NOTHROW(s3_.empty_bucket(S3_BUCKET)); - is_empty = s3_.is_empty_bucket(S3_BUCKET); - CHECK(is_empty); - } - - // Delete bucket - CHECK_NOTHROW(s3_.remove_bucket(S3_BUCKET)); - CHECK(s3_.disconnect().ok()); -} - -Config S3DirectFx::set_config_params() { - // Connect - Config config; -#ifndef TILEDB_TESTS_AWS_S3_CONFIG - REQUIRE(config.set("vfs.s3.endpoint_override", "localhost:9999").ok()); - REQUIRE(config.set("vfs.s3.scheme", "https").ok()); - REQUIRE(config.set("vfs.s3.use_virtual_addressing", "false").ok()); - REQUIRE(config.set("vfs.s3.verify_ssl", "false").ok()); -#endif - REQUIRE(config.set("vfs.s3.max_parallel_ops", "1").ok()); - // set max buffer size to 10 MB - REQUIRE(config.set("vfs.s3.multipart_part_size", "10000000").ok()); - REQUIRE(config.set("vfs.s3.use_multipart_upload", "false").ok()); - return config; -} - -TEST_CASE_METHOD( - S3DirectFx, - "Test S3 filesystem, file I/O with multipart API disabled", - "[s3]") { - // Prepare buffers - uint64_t buffer_size = 5 * 1024 * 1024; - auto write_buffer = new char[buffer_size]; - for (uint64_t i = 0; i < buffer_size; i++) - write_buffer[i] = (char)('a' + (i % 26)); - uint64_t buffer_size_small = 1024 * 1024; - auto write_buffer_small = new char[buffer_size_small]; - for (uint64_t i = 0; i < buffer_size_small; i++) - write_buffer_small[i] = (char)('a' + (i % 26)); - - // Write to two files - auto largefile = TEST_DIR + "largefile"; - CHECK_NOTHROW(s3_.write(URI(largefile), write_buffer, buffer_size)); - CHECK_NOTHROW( - s3_.write(URI(largefile), write_buffer_small, buffer_size_small)); - auto smallfile = TEST_DIR + "smallfile"; - CHECK_NOTHROW( - s3_.write(URI(smallfile), write_buffer_small, buffer_size_small)); - - // Before flushing, the files do not exist - bool exists = false; - CHECK(s3_.is_object(URI(largefile), &exists).ok()); - CHECK(!exists); - CHECK(s3_.is_object(URI(smallfile), &exists).ok()); - CHECK(!exists); - - // Flush the files - CHECK(s3_.flush_object(URI(largefile)).ok()); - CHECK(s3_.flush_object(URI(smallfile)).ok()); - - // After flushing, the files exist - CHECK(s3_.is_object(URI(largefile), &exists).ok()); - CHECK(exists); - CHECK(s3_.is_object(URI(smallfile), &exists).ok()); - CHECK(exists); - - // Get file sizes - uint64_t nbytes = 0; - CHECK(s3_.object_size(URI(largefile), &nbytes).ok()); - CHECK(nbytes == (buffer_size + buffer_size_small)); - CHECK(s3_.object_size(URI(smallfile), &nbytes).ok()); - CHECK(nbytes == buffer_size_small); - - // Read from the beginning - auto read_buffer = new char[26]; - uint64_t bytes_read = 0; - CHECK_NOTHROW( - s3_.read_impl(URI(largefile), 0, read_buffer, 26, 0, &bytes_read)); - assert(26 == bytes_read); - bool allok = true; - for (int i = 0; i < 26; i++) { - if (read_buffer[i] != static_cast('a' + i)) { - allok = false; - break; - } - } - CHECK(allok); - - // Read from a different offset - CHECK_NOTHROW( - s3_.read_impl(URI(largefile), 11, read_buffer, 26, 0, &bytes_read)); - assert(26 == bytes_read); - allok = true; - for (int i = 0; i < 26; i++) { - if (read_buffer[i] != static_cast('a' + (i + 11) % 26)) { - allok = false; - break; - } - } - CHECK(allok); - - // Try to write 11 MB file, should fail with given buffer configuration - auto badfile = TEST_DIR + "badfile"; - auto badbuffer = (char*)malloc(11000000); - CHECK_THROWS((s3_.write(URI(badfile), badbuffer, 11000000))); -} - -TEST_CASE_METHOD( - S3DirectFx, "Validate vfs.s3.custom_headers.*", "[s3][custom-headers]") { - Config cfg = set_config_params(); - - // Check the edge case of a key matching the ConfigIter prefix. - REQUIRE(cfg.set("vfs.s3.custom_headers.", "").ok()); - - // Set an unexpected value for Content-MD5, which minio should reject - REQUIRE(cfg.set("vfs.s3.custom_headers.Content-MD5", "unexpected").ok()); - - // Recreate a new S3 client because config is not dynamic - tiledb::sm::S3 s3{&g_helper_stats, &thread_pool_, cfg}; - auto uri = URI(TEST_DIR + "writefailure"); - - // This is a buffered write, which is why it should not throw. - CHECK_NOTHROW(s3.write(uri, "Validate s3 custom headers", 26)); - - auto matcher = Catch::Matchers::ContainsSubstring( - "The Content-Md5 you specified is not valid."); - REQUIRE_THROWS_WITH(s3.flush_object(uri), matcher); -} -#endif diff --git a/test/src/unit-s3.cc b/test/src/unit-s3.cc index cf05f8a9b3c4..aa2c68a593cd 100644 --- a/test/src/unit-s3.cc +++ b/test/src/unit-s3.cc @@ -5,7 +5,7 @@ * * The MIT License * - * @copyright Copyright (c) 2017-2023 TileDB, Inc. + * @copyright Copyright (c) 2017-2024 TileDB, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -102,79 +102,6 @@ Config S3Fx::set_config_params() { return config; } -TEST_CASE_METHOD(S3Fx, "Test S3 filesystem, file I/O", "[s3]") { - // Prepare buffers - uint64_t buffer_size = 5 * 1024 * 1024; - auto write_buffer = new char[buffer_size]; - for (uint64_t i = 0; i < buffer_size; i++) - write_buffer[i] = (char)('a' + (i % 26)); - uint64_t buffer_size_small = 1024 * 1024; - auto write_buffer_small = new char[buffer_size_small]; - for (uint64_t i = 0; i < buffer_size_small; i++) - write_buffer_small[i] = (char)('a' + (i % 26)); - - // Write to two files - auto largefile = TEST_DIR + "largefile"; - CHECK_NOTHROW(s3_.write(URI(largefile), write_buffer, buffer_size)); - CHECK_NOTHROW( - s3_.write(URI(largefile), write_buffer_small, buffer_size_small)); - auto smallfile = TEST_DIR + "smallfile"; - CHECK_NOTHROW( - s3_.write(URI(smallfile), write_buffer_small, buffer_size_small)); - - // Before flushing, the files do not exist - bool exists = false; - CHECK(s3_.is_object(URI(largefile), &exists).ok()); - CHECK(!exists); - CHECK(s3_.is_object(URI(smallfile), &exists).ok()); - CHECK(!exists); - - // Flush the files - CHECK(s3_.flush_object(URI(largefile)).ok()); - CHECK(s3_.flush_object(URI(smallfile)).ok()); - - // After flushing, the files exist - CHECK(s3_.is_object(URI(largefile), &exists).ok()); - CHECK(exists); - CHECK(s3_.is_object(URI(smallfile), &exists).ok()); - CHECK(exists); - - // Get file sizes - uint64_t nbytes = 0; - CHECK(s3_.object_size(URI(largefile), &nbytes).ok()); - CHECK(nbytes == (buffer_size + buffer_size_small)); - CHECK(s3_.object_size(URI(smallfile), &nbytes).ok()); - CHECK(nbytes == buffer_size_small); - - // Read from the beginning - auto read_buffer = new char[26]; - uint64_t bytes_read = 0; - CHECK_NOTHROW( - s3_.read_impl(URI(largefile), 0, read_buffer, 26, 0, &bytes_read)); - CHECK(26 == bytes_read); - bool allok = true; - for (int i = 0; i < 26; i++) { - if (read_buffer[i] != static_cast('a' + i)) { - allok = false; - break; - } - } - CHECK(allok); - - // Read from a different offset - CHECK_NOTHROW( - s3_.read_impl(URI(largefile), 11, read_buffer, 26, 0, &bytes_read)); - CHECK(26 == bytes_read); - allok = true; - for (int i = 0; i < 26; i++) { - if (read_buffer[i] != static_cast('a' + (i + 11) % 26)) { - allok = false; - break; - } - } - CHECK(allok); -} - TEST_CASE_METHOD(S3Fx, "Test S3 multiupload abort path", "[s3]") { // Prepare a large buffer uint64_t buffer_size = 100 * 1024 * 1024; diff --git a/test/src/unit-sparse-global-order-reader.cc b/test/src/unit-sparse-global-order-reader.cc index 582f59268ad4..77580430b1e6 100644 --- a/test/src/unit-sparse-global-order-reader.cc +++ b/test/src/unit-sparse-global-order-reader.cc @@ -777,7 +777,7 @@ TEST_CASE_METHOD( // Two result tile (2 * (~1200 + 8) will be bigger than the per fragment // budget (1000). - total_budget_ = "10000"; + total_budget_ = "12000"; ratio_coords_ = "0.30"; update_config(); @@ -1348,7 +1348,7 @@ TEST_CASE_METHOD( // Two result tile (2 * (~1200 + 8) will be bigger than the per fragment // budget (1000). - total_budget_ = "10000"; + total_budget_ = "12000"; ratio_coords_ = "0.30"; update_config(); diff --git a/test/src/unit-sparse-unordered-with-dups-reader.cc b/test/src/unit-sparse-unordered-with-dups-reader.cc index 74daab6ff01e..3f3192690679 100644 --- a/test/src/unit-sparse-unordered-with-dups-reader.cc +++ b/test/src/unit-sparse-unordered-with-dups-reader.cc @@ -32,6 +32,7 @@ #include "test/support/src/helpers.h" #include "tiledb/common/common.h" +#include "tiledb/common/memory_tracker.h" #include "tiledb/sm/c_api/tiledb.h" #include "tiledb/sm/c_api/tiledb_struct_def.h" #include "tiledb/sm/query/query_buffer.h" @@ -767,10 +768,10 @@ CSparseUnorderedWithDupsVarDataFx::open_default_array_1d_with_fragments( shared_ptr fragment = make_shared( HERE(), nullptr, - nullptr, array->array_->array_schema_latest_ptr(), URI(), std::make_pair(0, 0), + tiledb::test::create_test_memory_tracker(), true); fragments.emplace_back(std::move(fragment)); @@ -1526,21 +1527,23 @@ TEST_CASE_METHOD( auto&& [array, fragments] = open_default_array_1d_with_fragments(capacity); // Make a vector of tiles. - std::vector> rt; + std::list> rt; for (uint64_t t = 0; t < num_tiles; t++) { - rt.emplace_back(0, t, *fragments[0]); + rt.emplace_back( + 0, t, *fragments[0], tiledb::test::get_test_memory_tracker()); // Allocate and set the bitmap if required. if (bitmaps[t].size() > 0) { - rt.back().bitmap() = bitmaps[t]; + rt.back().bitmap().assign(bitmaps[t].begin(), bitmaps[t].end()); rt.back().count_cells(); } } // Create the result_tiles pointer vector. std::vector result_tiles(rt.size()); - for (uint64_t i = 0; i < rt.size(); i++) { - result_tiles[i] = &rt[i]; + uint64_t i = 0; + for (auto& t : rt) { + result_tiles[i++] = &t; } // Create a Query buffer. @@ -1742,21 +1745,23 @@ TEST_CASE_METHOD( auto&& [array, fragments] = open_default_array_1d_with_fragments(capacity); // Make a vector of tiles. - std::vector> rt; + std::list> rt; for (uint64_t t = 0; t < num_tiles; t++) { - rt.emplace_back(0, t, *fragments[0]); + rt.emplace_back( + 0, t, *fragments[0], tiledb::test::get_test_memory_tracker()); // Allocate and set the bitmap if required. if (bitmaps[t].size() > 0) { - rt.back().bitmap() = bitmaps[t]; + rt.back().bitmap().assign(bitmaps[t].begin(), bitmaps[t].end()); rt.back().count_cells(); } } // Create the result_tiles pointer vector. std::vector result_tiles(rt.size()); - for (uint64_t i = 0; i < rt.size(); i++) { - result_tiles[i] = &rt[i]; + uint64_t i = 0; + for (auto& t : rt) { + result_tiles[i++] = &t; } // Call the function. diff --git a/test/src/unit-tile-metadata-generator.cc b/test/src/unit-tile-metadata-generator.cc index b754f7fdac96..8ededa58c32c 100644 --- a/test/src/unit-tile-metadata-generator.cc +++ b/test/src/unit-tile-metadata-generator.cc @@ -32,10 +32,13 @@ #include +#include #include #include "test/support/src/helpers.h" +#include "test/support/src/mem_helpers.h" #include "tiledb/common/common.h" #include "tiledb/sm/cpp_api/tiledb" +#include "tiledb/sm/enums/array_type.h" #include "tiledb/sm/tile/tile_metadata_generator.h" #include "tiledb/sm/tile/writer_tile_tuple.h" @@ -80,7 +83,8 @@ TEMPLATE_LIST_TEST_CASE( // Generate the array schema. uint64_t num_cells = empty_tile ? 0 : 1000; - ArraySchema schema; + ArraySchema schema( + ArrayType::DENSE, tiledb::test::create_test_memory_tracker()); schema.set_capacity(num_cells); Attribute a("a", tiledb_type); a.set_cell_val_num(cell_val_num); @@ -103,7 +107,8 @@ TEMPLATE_LIST_TEST_CASE( false, nullable, cell_val_num * sizeof(T), - tiledb_type); + tiledb_type, + tiledb::test::create_test_memory_tracker()); auto tile_buff = writer_tile.fixed_tile().data_as(); uint8_t* nullable_buff = nullptr; if (nullable) { @@ -258,14 +263,22 @@ TEMPLATE_LIST_TEST_CASE( auto type = tiledb::impl::type_to_tiledb(); // Generate the array schema. - ArraySchema schema; + ArraySchema schema( + ArrayType::DENSE, tiledb::test::create_test_memory_tracker()); schema.set_capacity(4); Attribute a("a", (Datatype)type.tiledb_type); CHECK(schema.add_attribute(make_shared(HERE(), a)).ok()); // Initialize a new tile. auto tiledb_type = static_cast(type.tiledb_type); - WriterTileTuple writer_tile(schema, 4, false, false, sizeof(T), tiledb_type); + WriterTileTuple writer_tile( + schema, + 4, + false, + false, + sizeof(T), + tiledb_type, + tiledb::test::create_test_memory_tracker()); auto tile_buff = writer_tile.fixed_tile().data_as(); // Once an overflow happens, the computation should abort, try to add a few @@ -291,7 +304,13 @@ TEMPLATE_LIST_TEST_CASE( if constexpr (std::is_signed_v) { // Initialize a new tile. WriterTileTuple writer_tile( - schema, 4, false, false, sizeof(T), tiledb_type); + schema, + 4, + false, + false, + sizeof(T), + tiledb_type, + tiledb::test::create_test_memory_tracker()); auto tile_buff = writer_tile.fixed_tile().data_as(); // Once an overflow happens, the computation should abort, try to add a few @@ -329,11 +348,12 @@ TEST_CASE( bool empty_tile = test == "empty tile"; uint64_t max_string_size = 100; - uint64_t num_strings = 2000; + int num_strings = 2000; // Generate the array schema. uint64_t num_cells = empty_tile ? 0 : 20; - ArraySchema schema; + ArraySchema schema( + ArrayType::DENSE, tiledb::test::create_test_memory_tracker()); schema.set_capacity(num_cells); Attribute a("a", Datatype::STRING_ASCII); a.set_cell_val_num(constants::var_num); @@ -342,7 +362,7 @@ TEST_CASE( // Generate random, sorted strings for the string ascii type. std::vector strings; strings.reserve(num_strings); - for (uint64_t i = 0; i < num_strings; i++) { + for (int i = 0; i < num_strings; i++) { strings.emplace_back(tiledb::test::random_string(rand() % max_string_size)); } std::sort(strings.begin(), strings.end()); @@ -358,7 +378,13 @@ TEST_CASE( // Initialize tile. WriterTileTuple writer_tile( - schema, num_cells, true, nullable, 1, Datatype::CHAR); + schema, + num_cells, + true, + nullable, + 1, + Datatype::CHAR, + tiledb::test::create_test_memory_tracker()); auto offsets_tile_buff = writer_tile.offset_tile().data_as(); // Initialize a new nullable tile. @@ -433,7 +459,8 @@ TEST_CASE( "TileMetadataGenerator: var data tiles same string, different lengths", "[tile-metadata-generator][var-data][same-length]") { // Generate the array schema. - ArraySchema schema; + ArraySchema schema( + ArrayType::DENSE, tiledb::test::create_test_memory_tracker()); schema.set_capacity(2); Attribute a("a", Datatype::CHAR); a.set_cell_val_num(constants::var_num); @@ -441,7 +468,14 @@ TEST_CASE( // Store '123' and '12' // Initialize offsets tile. - WriterTileTuple writer_tile(schema, 2, true, false, 1, Datatype::CHAR); + WriterTileTuple writer_tile( + schema, + 2, + true, + false, + 1, + Datatype::CHAR, + tiledb::test::create_test_memory_tracker()); auto offsets_tile_buff = writer_tile.offset_tile().data_as(); offsets_tile_buff[0] = 0; offsets_tile_buff[1] = 3; diff --git a/test/src/unit-tile-metadata.cc b/test/src/unit-tile-metadata.cc index f478b2d30953..8b92b13f9b0e 100644 --- a/test/src/unit-tile-metadata.cc +++ b/test/src/unit-tile-metadata.cc @@ -748,12 +748,12 @@ struct CPPVarTileMetadataFx { std::default_random_engine random_engine; uint64_t max_string_size = 100; - uint64_t num_strings = 2000; + int num_strings = 2000; if (f == 0) { // Generate random, sorted strings for the string ascii type. strings_.reserve(num_strings); - for (uint64_t i = 0; i < num_strings; i++) { + for (int i = 0; i < num_strings; i++) { strings_.emplace_back( tiledb::test::random_string(rand() % max_string_size)); } diff --git a/test/src/unit-vfs.cc b/test/src/unit-vfs.cc index ae6fa1d250b6..b1c9b8b1f54c 100644 --- a/test/src/unit-vfs.cc +++ b/test/src/unit-vfs.cc @@ -5,7 +5,7 @@ * * The MIT License * - * @copyright Copyright (c) 2018-2023 TileDB, Inc. + * @copyright Copyright (c) 2018-2024 TileDB, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -33,8 +33,13 @@ #include #include "test/support/src/helpers.h" #include "test/support/src/temporary_local_directory.h" +#ifdef HAVE_AZURE +#include +#include "tiledb/sm/filesystem/azure.h" +#endif #include "test/support/src/vfs_helpers.h" #include "tiledb/sm/filesystem/vfs.h" +#include "tiledb/sm/global_state/unit_test_config.h" #ifdef _WIN32 #include "tiledb/sm/filesystem/path_win.h" #endif @@ -53,15 +58,25 @@ void require_tiledb_ok(Status st) { REQUIRE(st.ok()); } -void require_tiledb_err(Status st) { - REQUIRE(!st.ok()); -} - -Config set_config_params() { +Config set_config_params( + bool disable_multipart = false, uint64_t parallel_ops = 1) { Config config; if constexpr (tiledb::sm::filesystem::gcs_enabled) { require_tiledb_ok(config.set("vfs.gcs.project_id", "TODO")); + if (parallel_ops != 1) { + require_tiledb_ok( + config.set("vfs.gcs.max_parallel_ops", std::to_string(parallel_ops))); + require_tiledb_ok(config.set( + "vfs.gcs.multi_part_size", std::to_string(4 * 1024 * 1024))); + } + if (disable_multipart) { + require_tiledb_ok( + config.set("vfs.gcs.max_parallel_ops", std::to_string(1))); + require_tiledb_ok(config.set("vfs.gcs.use_multi_part_upload", "false")); + require_tiledb_ok(config.set( + "vfs.gcs.max_direct_upload_size", std::to_string(4 * 1024 * 1024))); + } } if constexpr (tiledb::sm::filesystem::s3_enabled) { @@ -69,6 +84,11 @@ Config set_config_params() { require_tiledb_ok(config.set("vfs.s3.scheme", "https")); require_tiledb_ok(config.set("vfs.s3.use_virtual_addressing", "false")); require_tiledb_ok(config.set("vfs.s3.verify_ssl", "false")); + if (disable_multipart) { + require_tiledb_ok(config.set("vfs.s3.max_parallel_ops", "1")); + require_tiledb_ok(config.set("vfs.s3.multipart_part_size", "10000000")); + require_tiledb_ok(config.set("vfs.s3.use_multipart_upload", "false")); + } } if constexpr (tiledb::sm::filesystem::azure_enabled) { @@ -87,11 +107,29 @@ Config set_config_params() { // require_tiledb_ok(config.set("vfs.azure.storage_sas_token", "")); // require_tiledb_ok(config.set( // "vfs.azure.blob_endpoint", "http://127.0.0.1:10000/devstoreaccount2")); + if (parallel_ops != 1) { + require_tiledb_ok(config.set( + "vfs.azure.max_parallel_ops", std::to_string(parallel_ops))); + require_tiledb_ok(config.set( + "vfs.azure.block_list_block_size", std::to_string(4 * 1024 * 1024))); + } + if (disable_multipart) { + require_tiledb_ok(config.set("vfs.azure.use_block_list_upload", "false")); + } } return config; } +std::string local_path() { + std::string local_prefix = ""; + if constexpr (!tiledb::sm::filesystem::windows_enabled) { + local_prefix = "file://"; + } + + return local_prefix + unit_vfs_dir_.path(); +} + TEST_CASE("VFS: Test long local paths", "[vfs]") { ThreadPool compute_tp(4); ThreadPool io_tp(4); @@ -99,11 +137,7 @@ TEST_CASE("VFS: Test long local paths", "[vfs]") { SECTION("- Deep hierarchy") { // Create a nested path with a long total length - std::string local_prefix = ""; - if constexpr (!tiledb::sm::filesystem::windows_enabled) { - local_prefix = "file://"; - } - std::string tmpdir = local_prefix + unit_vfs_dir_.path(); + std::string tmpdir = local_path(); bool success = true; while (tmpdir.size() < 512) { tmpdir += "subdir/"; @@ -113,8 +147,8 @@ TEST_CASE("VFS: Test long local paths", "[vfs]") { } } - // On some Windows platforms, the path length of a directory must be <= 248 - // chars. On others (that have opted in to a configuration that allows + // On some Windows platforms, the path length of a directory must be <= + // 248 chars. On others (that have opted in to a configuration that allows // long paths) the limit is ~32,767. Here we check for either case. if (success) { // Check we can create files within the deep hierarchy @@ -138,11 +172,7 @@ TEST_CASE("VFS: Test long local paths", "[vfs]") { for (unsigned i = 0; i < 256; i++) { name += "x"; } - std::string local_prefix = ""; - if constexpr (!tiledb::sm::filesystem::windows_enabled) { - local_prefix = "file://"; - } - std::string tmpdir = local_prefix + unit_vfs_dir_.path(); + std::string tmpdir = local_path(); URI testfile(tmpdir + name); // Creating the URI and checking its existence is fine on posix @@ -162,45 +192,21 @@ TEST_CASE("VFS: Test long local paths", "[vfs]") { } } -TEST_CASE("VFS: URI semantics and file management", "[vfs][uri]") { +using AllBackends = + std::tuple; +TEMPLATE_LIST_TEST_CASE( + "VFS: URI semantics and file management", "[vfs][uri]", AllBackends) { + TestType fs({0}); + if (!fs.is_supported()) { + return; + } + ThreadPool compute_tp(4); ThreadPool io_tp(4); Config config = set_config_params(); VFS vfs{&g_helper_stats, &compute_tp, &io_tp, config}; - // Sections to test each enabled filesystem - URI path; - std::string local_prefix = ""; - SECTION("Filesystem: Local") { - if constexpr (!tiledb::sm::filesystem::windows_enabled) { - local_prefix = "file://"; - } - path = URI(local_prefix + unit_vfs_dir_.path()); - } - - if constexpr (tiledb::sm::filesystem::gcs_enabled) { - SECTION("Filesystem: GCS") { - path = URI("gcs://vfs-" + random_label() + "/"); - } - } - - if constexpr (tiledb::sm::filesystem::s3_enabled) { - SECTION("Filesystem: S3") { - path = URI("s3://vfs-" + random_label() + "/"); - } - } - - if constexpr (tiledb::sm::filesystem::hdfs_enabled) { - SECTION("Filesystem: HDFS") { - path = URI("hdfs:///vfs-" + random_label() + "/"); - } - } - - if constexpr (tiledb::sm::filesystem::azure_enabled) { - SECTION("Filesystem: Azure") { - path = URI("azure://vfs-" + random_label() + "/"); - } - } + URI path = fs.temp_dir_.add_trailing_slash(); // Set up bool exists = false; @@ -399,16 +405,142 @@ TEST_CASE("VFS: URI semantics and file management", "[vfs][uri]") { } } +TEMPLATE_LIST_TEST_CASE("VFS: File I/O", "[vfs][uri][file_io]", AllBackends) { + TestType fs({0}); + if (!fs.is_supported()) { + return; + } + + bool disable_multipart = GENERATE(true, false); + uint64_t max_parallel_ops = 1; + uint64_t chunk_size = 1024 * 1024; + int multiplier = 5; + + URI path = fs.temp_dir_.add_trailing_slash(); + + if constexpr ( + std::is_same::value || + std::is_same::value) { + chunk_size = 4 * 1024 * 1024; + multiplier = 1; + + if (!disable_multipart) { + max_parallel_ops = GENERATE(1, 4); + } + } + + if constexpr (std::is_same::value) { + max_parallel_ops = 2; + chunk_size = 4 * 1024 * 1024; + if (disable_multipart) { + multiplier = 1; + } + } + + ThreadPool compute_tp(4); + ThreadPool io_tp(4); + Config config = set_config_params(disable_multipart, max_parallel_ops); + VFS vfs{&g_helper_stats, &compute_tp, &io_tp, config}; + + // Set up + bool exists = false; + if (path.is_gcs() || path.is_s3() || path.is_azure()) { + require_tiledb_ok(vfs.is_bucket(path, &exists)); + if (exists) { + require_tiledb_ok(vfs.remove_bucket(path)); + } + require_tiledb_ok(vfs.create_bucket(path)); + } else { + require_tiledb_ok(vfs.is_dir(path, &exists)); + if (exists) { + require_tiledb_ok(vfs.remove_dir(path)); + } + require_tiledb_ok(vfs.create_dir(path)); + } + + // Prepare buffers + uint64_t buffer_size = multiplier * max_parallel_ops * chunk_size; + auto write_buffer = new char[buffer_size]; + for (uint64_t i = 0; i < buffer_size; i++) + write_buffer[i] = (char)('a' + (i % 26)); + uint64_t buffer_size_small = 1024 * 1024; + auto write_buffer_small = new char[buffer_size_small]; + for (uint64_t i = 0; i < buffer_size_small; i++) + write_buffer_small[i] = (char)('a' + (i % 26)); + + // Write to two files + URI largefile = URI(path.to_string() + "largefile"); + require_tiledb_ok(vfs.write(largefile, write_buffer, buffer_size)); + URI smallfile = URI(path.to_string() + "smallfile"); + require_tiledb_ok( + vfs.write(smallfile, write_buffer_small, buffer_size_small)); + + // On non-local and hdfs systems, before flushing, the files do not exist + if (!(path.is_file() || path.is_hdfs())) { + require_tiledb_ok(vfs.is_file(largefile, &exists)); + CHECK(!exists); + require_tiledb_ok(vfs.is_file(smallfile, &exists)); + CHECK(!exists); + + // Flush the files + require_tiledb_ok(vfs.close_file(largefile)); + require_tiledb_ok(vfs.close_file(smallfile)); + } + + // After flushing, the files exist + require_tiledb_ok(vfs.is_file(largefile, &exists)); + CHECK(exists); + require_tiledb_ok(vfs.is_file(smallfile, &exists)); + CHECK(exists); + + // Get file sizes + uint64_t nbytes = 0; + require_tiledb_ok(vfs.file_size(largefile, &nbytes)); + CHECK(nbytes == (buffer_size)); + require_tiledb_ok(vfs.file_size(smallfile, &nbytes)); + CHECK(nbytes == buffer_size_small); + + // Read from the beginning + auto read_buffer = new char[26]; + require_tiledb_ok(vfs.read(largefile, 0, read_buffer, 26)); + bool allok = true; + for (int i = 0; i < 26; i++) { + if (read_buffer[i] != static_cast('a' + i)) { + allok = false; + break; + } + } + CHECK(allok); + + // Read from a different offset + require_tiledb_ok(vfs.read(largefile, 11, read_buffer, 26)); + allok = true; + for (int i = 0; i < 26; i++) { + if (read_buffer[i] != static_cast('a' + (i + 11) % 26)) { + allok = false; + break; + } + } + CHECK(allok); + + // Clean up + if (path.is_gcs() || path.is_s3() || path.is_azure()) { + require_tiledb_ok(vfs.remove_bucket(path)); + require_tiledb_ok(vfs.is_bucket(path, &exists)); + REQUIRE(!exists); + } else { + require_tiledb_ok(vfs.remove_dir(path)); + require_tiledb_ok(vfs.is_dir(path, &exists)); + REQUIRE(!exists); + } +} + TEST_CASE("VFS: test ls_with_sizes", "[vfs][ls-with-sizes]") { ThreadPool compute_tp(4); ThreadPool io_tp(4); VFS vfs_ls{&g_helper_stats, &compute_tp, &io_tp, Config{}}; - std::string local_prefix = ""; - if constexpr (!tiledb::sm::filesystem::windows_enabled) { - local_prefix = "file://"; - } - std::string path = local_prefix + unit_vfs_dir_.path(); + std::string path = local_path(); std::string dir = path + "ls_dir"; std::string file = dir + "/file"; std::string subdir = dir + "/subdir"; @@ -542,3 +674,67 @@ TEST_CASE( Catch::Matchers::ContainsSubstring("Throwing FileFilter")); } } + +#ifdef HAVE_AZURE +TEST_CASE("VFS: Construct Azure Blob Storage endpoint URIs", "[azure][uri]") { + std::string sas_token, custom_endpoint, expected_endpoint; + SECTION("No SAS token") { + sas_token = ""; + expected_endpoint = "https://devstoreaccount1.blob.core.windows.net"; + } + SECTION("SAS token without leading question mark") { + sas_token = "baz=qux&foo=bar"; + expected_endpoint = + "https://devstoreaccount1.blob.core.windows.net?baz=qux&foo=bar"; + } + SECTION("SAS token with leading question mark") { + sas_token = "?baz=qux&foo=bar"; + expected_endpoint = + "https://devstoreaccount1.blob.core.windows.net?baz=qux&foo=bar"; + } + SECTION("SAS token in both endpoint and config option") { + sas_token = "baz=qux&foo=bar"; + custom_endpoint = + "https://devstoreaccount1.blob.core.windows.net?baz=qux&foo=bar"; + expected_endpoint = + "https://devstoreaccount1.blob.core.windows.net?baz=qux&foo=bar"; + } + SECTION("No SAS token") { + sas_token = ""; + expected_endpoint = "https://devstoreaccount1.blob.core.windows.net"; + } + Config config; + require_tiledb_ok( + config.set("vfs.azure.storage_account_name", "devstoreaccount1")); + require_tiledb_ok(config.set("vfs.azure.blob_endpoint", custom_endpoint)); + require_tiledb_ok(config.set("vfs.azure.storage_sas_token", sas_token)); + tiledb::sm::Azure azure; + ThreadPool thread_pool(1); + require_tiledb_ok(azure.init(config, &thread_pool)); + REQUIRE(azure.client().GetUrl() == expected_endpoint); +} +#endif + +#ifdef HAVE_S3 +TEST_CASE("Validate vfs.s3.custom_headers.*", "[s3][custom-headers]") { + Config cfg = set_config_params(true); + + // Check the edge case of a key matching the ConfigIter prefix. + REQUIRE(cfg.set("vfs.s3.custom_headers.", "").ok()); + + // Set an unexpected value for Content-MD5, which minio should reject + REQUIRE(cfg.set("vfs.s3.custom_headers.Content-MD5", "unexpected").ok()); + + // Recreate a new S3 client because config is not dynamic + ThreadPool thread_pool(2); + S3 s3{&g_helper_stats, &thread_pool, cfg}; + auto uri = URI("s3://tiledb-" + random_label() + "/writefailure"); + + // This is a buffered write, which is why it should not throw. + CHECK_NOTHROW(s3.write(uri, "Validate s3 custom headers", 26)); + + auto matcher = Catch::Matchers::ContainsSubstring( + "The Content-Md5 you specified is not valid."); + REQUIRE_THROWS_WITH(s3.flush_object(uri), matcher); +} +#endif diff --git a/test/support/CMakeLists.txt b/test/support/CMakeLists.txt index 977d65a60480..96a59b151318 100644 --- a/test/support/CMakeLists.txt +++ b/test/support/CMakeLists.txt @@ -41,6 +41,8 @@ set(TILEDB_TEST_SUPPORT_SOURCES src/helpers.h src/helpers.cc src/helpers-dimension.h + src/mem_helpers.h + src/mem_helpers.cc src/serialization_wrappers.cc src/temporary_local_directory.cc src/vfs_helpers.cc @@ -65,6 +67,7 @@ if (NOT MSVC) endif() target_link_libraries(tiledb_test_support_lib PRIVATE $) +target_link_libraries(tiledb_test_support_lib PRIVATE baseline) target_include_directories( tiledb_test_support_lib BEFORE PRIVATE @@ -109,4 +112,9 @@ commence(object_library tdb_catch) this_target_object_libraries(seedable_global_PRNG) conclude(object_library) +commence(object_library mem_helpers) + this_target_sources(src/mem_helpers.cc) + this_target_object_libraries(baseline) +conclude(object_library) + add_test_subdirectory() diff --git a/test/support/src/helpers.cc b/test/support/src/helpers.cc index 72c60f954e6b..f7192213ad9e 100644 --- a/test/support/src/helpers.cc +++ b/test/support/src/helpers.cc @@ -45,6 +45,7 @@ #include "tiledb/api/c_api/context/context_api_external.h" #include "tiledb/api/c_api/context/context_api_internal.h" #include "tiledb/common/logger.h" +#include "tiledb/common/memory_tracker.h" #include "tiledb/common/stdx_string.h" #include "tiledb/sm/c_api/tiledb_struct_def.h" #include "tiledb/sm/cpp_api/tiledb" @@ -56,6 +57,14 @@ #include "tiledb/sm/serialization/array.h" #include "tiledb/sm/serialization/query.h" +int setenv_local(const char* __name, const char* __value) { +#ifdef _WIN32 + return _putenv_s(__name, __value); +#else + return ::setenv(__name, __value, 1); +#endif +} + std::mutex catch2_macro_mutex; namespace tiledb::test { @@ -1592,7 +1601,8 @@ int array_open_wrapper( (*open_array)->array_.get(), tiledb::sm::SerializationType::CAPNP, buff->buffer(), - client_ctx->storage_manager()); + client_ctx->storage_manager(), + tiledb::test::create_test_memory_tracker()); REQUIRE(st.ok()); // 6. Server: Close array and clean up diff --git a/test/support/src/helpers.h b/test/support/src/helpers.h index f45054d4e429..ddf3bc581a7c 100644 --- a/test/support/src/helpers.h +++ b/test/support/src/helpers.h @@ -35,6 +35,7 @@ #include #include "test/support/src/coords_workaround.h" +#include "test/support/src/mem_helpers.h" #include "tiledb.h" #include "tiledb/common/common.h" #include "tiledb/common/random/random_label.h" @@ -51,6 +52,15 @@ #include #include +/** + * Helper function to set environment variables across platforms. + * + * @param __name Name of the environment variable. + * @param __value Value of the environment variable. + * @return 0 on success, -1 on error. + */ +int setenv_local(const char* __name, const char* __value); + // A mutex for protecting the thread-unsafe Catch2 macros. extern std::mutex catch2_macro_mutex; diff --git a/test/support/src/mem_helpers.cc b/test/support/src/mem_helpers.cc new file mode 100644 index 000000000000..50a18ad5b26e --- /dev/null +++ b/test/support/src/mem_helpers.cc @@ -0,0 +1,59 @@ +/** + * @file mem_helpers.cc + * + * @section LICENSE + * + * The MIT License + * + * @copyright Copyright (c) 2024 TileDB, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * @section DESCRIPTION + * + * This file defines some test suite helper functions, specific to memory + * tracking. + */ + +#include "tiledb/common/memory_tracker.h" + +namespace tiledb::test { + +shared_ptr get_test_memory_tracker() { + class MemoryTrackerCreator : public sm::MemoryTracker { + public: + MemoryTrackerCreator() + : sm::MemoryTracker() { + } + + static shared_ptr get_instance() { + static shared_ptr tracker{ + new MemoryTrackerCreator()}; + return tracker; + } + }; + + return MemoryTrackerCreator::get_instance(); +} + +shared_ptr create_test_memory_tracker() { + return get_test_memory_tracker(); +} + +} // namespace tiledb::test diff --git a/test/support/src/mem_helpers.h b/test/support/src/mem_helpers.h new file mode 100644 index 000000000000..363a1e78a76c --- /dev/null +++ b/test/support/src/mem_helpers.h @@ -0,0 +1,60 @@ +/** + * @file mem_helpers.h + * + * @section LICENSE + * + * The MIT License + * + * @copyright Copyright (c) 2024 TileDB, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * @section DESCRIPTION + * + * This file declares some test suite helper functions specific to memory + * tracking. + */ + +#ifndef TILEDB_MEM_HELPERS_H +#define TILEDB_MEM_HELPERS_H + +#include "tiledb/common/memory_tracker.h" + +namespace tiledb::test { + +/** + * Helper function get the test instance of a shared_ptr + * + * This is the preferred function. The create_test_memory_tracker will be + * replaced shortly and only serves as a proxy to this function while we + * transition the first few PRs to use this new function. + * + * The reasoning here is that creating memory trackers has turned out to be a + * bit of a footgun with lifetime issues. + */ +shared_ptr get_test_memory_tracker(); + +/** + * Helper function to create test instances of shared_ptr + */ +shared_ptr create_test_memory_tracker(); + +} // namespace tiledb::test + +#endif // TILEDB_MEM_HELPERS_H diff --git a/test/support/src/vfs_helpers.cc b/test/support/src/vfs_helpers.cc index 14bde730bd1a..98d831136cd5 100644 --- a/test/support/src/vfs_helpers.cc +++ b/test/support/src/vfs_helpers.cc @@ -497,6 +497,24 @@ LocalFsTest::LocalFsTest(const std::vector& test_tree) temp_dir_ = tiledb::test::test_dir(prefix_ + tiledb::sm::Posix::current_dir() + "/"); #endif + + vfs_.create_dir(temp_dir_).ok(); + // TODO: We could refactor to remove duplication with S3Test() + for (size_t i = 1; i <= test_tree_.size(); i++) { + sm::URI path = temp_dir_.join_path("subdir_" + std::to_string(i)); + vfs_.create_dir(path).ok(); + expected_results().emplace_back(path.to_string(), 0); + for (size_t j = 1; j <= test_tree_[i - 1]; j++) { + auto object_uri = path.join_path("test_file_" + std::to_string(j)); + vfs_.touch(object_uri).ok(); + std::string data(j * 10, 'a'); + vfs_.open_file(object_uri, sm::VFSMode::VFS_WRITE).ok(); + vfs_.write(object_uri, data.data(), data.size()).ok(); + vfs_.close_file(object_uri).ok(); + expected_results().emplace_back(object_uri.to_string(), data.size()); + } + } + std::sort(expected_results().begin(), expected_results().end()); } } // namespace tiledb::test diff --git a/test/support/src/vfs_helpers.h b/test/support/src/vfs_helpers.h index c343f023d1a0..bce5c43d70ca 100644 --- a/test/support/src/vfs_helpers.h +++ b/test/support/src/vfs_helpers.h @@ -914,11 +914,19 @@ class GCSTest : public VFSTestBase { } }; +/** Stub test object for tiledb::sm::GS functionality. */ +class GSTest : public VFSTestBase { + public: + explicit GSTest(const std::vector& test_tree) + : VFSTestBase(test_tree, "gs://") { + } +}; + /** Stub test object for tiledb::sm::HDFS functionality. */ class HDFSTest : public VFSTestBase { public: explicit HDFSTest(const std::vector& test_tree) - : VFSTestBase(test_tree, "hdfs://") { + : VFSTestBase(test_tree, "hdfs:///") { } }; diff --git a/tiledb/sm/filesystem/ls_scanner.cc b/test/support/test/compile_mem_helpers_main.cc similarity index 88% rename from tiledb/sm/filesystem/ls_scanner.cc rename to test/support/test/compile_mem_helpers_main.cc index bcccd82870ec..df5adeb2dc5f 100644 --- a/tiledb/sm/filesystem/ls_scanner.cc +++ b/test/support/test/compile_mem_helpers_main.cc @@ -1,5 +1,5 @@ /** - * @file ls_scanner.cc + * @file test/support/test/compile_mem_helpers_main.cc * * @section LICENSE * @@ -24,10 +24,11 @@ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. - * - * @section DESCRIPTION - * - * This defines the LsScanner class and related types used for VFS. */ -#include "ls_scanner.h" +#include "../src/mem_helpers.h" + +int main() { + tiledb::test::get_test_memory_tracker(); + return 0; +} diff --git a/tiledb/CMakeLists.txt b/tiledb/CMakeLists.txt index b0cb663a04f4..bdb707f371b5 100644 --- a/tiledb/CMakeLists.txt +++ b/tiledb/CMakeLists.txt @@ -193,7 +193,6 @@ set(TILEDB_CORE_SOURCES ${TILEDB_CORE_INCLUDE_DIR}/tiledb/sm/filesystem/uri.cc ${TILEDB_CORE_INCLUDE_DIR}/tiledb/sm/filesystem/vfs.cc ${TILEDB_CORE_INCLUDE_DIR}/tiledb/sm/filesystem/vfs_file_handle.cc - ${TILEDB_CORE_INCLUDE_DIR}/tiledb/sm/filesystem/ls_scanner.cc ${TILEDB_CORE_INCLUDE_DIR}/tiledb/sm/filesystem/win.cc ${TILEDB_CORE_INCLUDE_DIR}/tiledb/sm/filesystem/filesystem_base.cc ${TILEDB_CORE_INCLUDE_DIR}/tiledb/sm/filter/bit_width_reduction_filter.cc @@ -236,6 +235,7 @@ set(TILEDB_CORE_SOURCES ${TILEDB_CORE_INCLUDE_DIR}/tiledb/sm/misc/tdb_time.cc ${TILEDB_CORE_INCLUDE_DIR}/tiledb/sm/misc/types.cc ${TILEDB_CORE_INCLUDE_DIR}/tiledb/sm/misc/utils.cc + ${TILEDB_CORE_INCLUDE_DIR}/tiledb/sm/misc/uuid.cc ${TILEDB_CORE_INCLUDE_DIR}/tiledb/sm/misc/win_constants.cc ${TILEDB_CORE_INCLUDE_DIR}/tiledb/sm/query/ast/query_ast.cc ${TILEDB_CORE_INCLUDE_DIR}/tiledb/sm/query/deletes_and_updates/deletes_and_updates.cc @@ -431,6 +431,8 @@ target_link_libraries(TILEDB_CORE_OBJECTS INTERFACE object_store_definitions) ############################################################ # provide actions/target for preparation of magic.mgc data for embedding/build +find_package(Magic_EP REQUIRED) + set(MGC_GZIPPED_BIN_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/..") set(MGC_GZIPPED_BIN_OUTPUT_FILE "${MGC_GZIPPED_BIN_OUTPUT_DIRECTORY}/magic_mgc_gzipped.bin") set(MGC_GZIPPED_BIN_INPUT_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/sm/misc") @@ -449,7 +451,7 @@ add_dependencies(TILEDB_CORE_OBJECTS gen_mgc_unarch) add_custom_target( update-embedded-magic-data - COMMAND "$" < "${libmagic_DICTIONARY}" "${MGC_GZIPPED_BIN_OUTPUT_FILE}" + COMMAND tdb_gzip_embedded_data ${libmagic_DICTIONARY} ${MGC_GZIPPED_BIN_OUTPUT_FILE} # need to work in 'local' directory with no prefix paths so no paths are included in archive WORKING_DIRECTORY "${MGC_GZIPPED_BIN_OUTPUT_DIRECTORY}" COMMAND ${CMAKE_COMMAND} -E tar cvj "magic_mgc_gzipped.bin.tar.bz2" "magic_mgc_gzipped.bin" @@ -600,7 +602,6 @@ find_package(LZ4_EP REQUIRED) find_package(Spdlog_EP REQUIRED) find_package(Zlib_EP REQUIRED) find_package(Zstd_EP REQUIRED) -find_package(Magic_EP REQUIRED) target_link_libraries(TILEDB_CORE_OBJECTS_ILIB INTERFACE BZip2::BZip2 @@ -1037,20 +1038,4 @@ configure_file( install(FILES ${CMAKE_CURRENT_BINARY_DIR}/tiledb.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig) -# Packaging configuration -configure_file ("${PROJECT_SOURCE_DIR}/cmake/inputs/CustomCPackOptions.cmake.in" - "${PROJECT_BINARY_DIR}/CustomCPackOptions.cmake" - @ONLY) -set (CPACK_PROJECT_CONFIG_FILE - "${PROJECT_BINARY_DIR}/CustomCPackOptions.cmake") - -# Not all options can be set in CustomCPackOptions.cmake -if (${CMAKE_SYSTEM_NAME} STREQUAL "Windows") - set(CPACK_SOURCE_GENERATOR "ZIP") - set(CPACK_GENERATOR "ZIP") -else() - set(CPACK_SOURCE_GENERATOR "TGZ") - set(CPACK_GENERATOR "TGZ") -endif() - -include(CPack) +include(${CMAKE_SOURCE_DIR}/cmake/package.cmake) \ No newline at end of file diff --git a/tiledb/api/c_api/datatype/datatype_api.cc b/tiledb/api/c_api/datatype/datatype_api.cc index 854b49fe2a56..fd19e9b0e7e1 100644 --- a/tiledb/api/c_api/datatype/datatype_api.cc +++ b/tiledb/api/c_api/datatype/datatype_api.cc @@ -45,11 +45,7 @@ capi_return_t tiledb_datatype_to_str( capi_return_t tiledb_datatype_from_str( const char* str, tiledb_datatype_t* datatype) { - tiledb::sm::Datatype val = tiledb::sm::Datatype::UINT8; - if (!tiledb::sm::datatype_enum(str, &val).ok()) { - return TILEDB_ERR; - } - *datatype = (tiledb_datatype_t)val; + *datatype = (tiledb_datatype_t)tiledb::sm::datatype_enum(str); return TILEDB_OK; } diff --git a/tiledb/api/c_api/dimension/dimension_api.cc b/tiledb/api/c_api/dimension/dimension_api.cc index c8a831d7a040..c42341e36e84 100644 --- a/tiledb/api/c_api/dimension/dimension_api.cc +++ b/tiledb/api/c_api/dimension/dimension_api.cc @@ -34,10 +34,13 @@ #include "../filter_list/filter_list_api_internal.h" #include "dimension_api_external.h" #include "dimension_api_internal.h" +#include "tiledb/api/c_api_support/exception_wrapper/exception_wrapper.h" +#include "tiledb/common/memory_tracker.h" namespace tiledb::api { int32_t tiledb_dimension_alloc( + tiledb_ctx_t* ctx, const char* name, tiledb_datatype_t type, const void* dim_domain, @@ -47,8 +50,10 @@ int32_t tiledb_dimension_alloc( throw CAPIStatusException("Dimension name must not be NULL"); } ensure_output_pointer_is_valid(dim); + auto memory_tracker = ctx->resources().create_memory_tracker(); + memory_tracker->set_type(sm::MemoryTrackerType::ARRAY_CREATE); *dim = tiledb_dimension_handle_t::make_handle( - name, static_cast(type)); + name, static_cast(type), memory_tracker); try { (*dim)->set_domain(dim_domain); (*dim)->set_tile_extent(tile_extent); @@ -149,7 +154,8 @@ CAPI_INTERFACE( const void* dim_domain, const void* tile_extent, tiledb_dimension_t** dim) { - return api_entry_context( + return tiledb::api::api_entry_with_context< + tiledb::api::tiledb_dimension_alloc>( ctx, name, type, dim_domain, tile_extent, dim); } diff --git a/tiledb/api/c_api/dimension/dimension_api_internal.h b/tiledb/api/c_api/dimension/dimension_api_internal.h index fe96c866e5e9..03ca1657d826 100644 --- a/tiledb/api/c_api/dimension/dimension_api_internal.h +++ b/tiledb/api/c_api/dimension/dimension_api_internal.h @@ -33,9 +33,11 @@ #ifndef TILEDB_CAPI_DIMENSION_INTERNAL_H #define TILEDB_CAPI_DIMENSION_INTERNAL_H +#include #include "dimension_api_external.h" #include "tiledb/api/c_api_support/handle/handle.h" #include "tiledb/common/common.h" +#include "tiledb/common/memory_tracker.h" #include "tiledb/sm/array_schema/dimension.h" /** @@ -63,8 +65,12 @@ struct tiledb_dimension_handle_t */ static constexpr std::string_view object_type_name{"dimension"}; - tiledb_dimension_handle_t(const std::string& name, tiledb::sm::Datatype type) - : dimension_(make_shared(HERE(), name, type)) { + tiledb_dimension_handle_t( + const std::string& name, + tiledb::sm::Datatype type, + shared_ptr memory_tracker) + : dimension_(make_shared( + HERE(), name, type, memory_tracker)) { } /** diff --git a/tiledb/api/c_api/dimension/test/compile_capi_dimension_stub_main.cc b/tiledb/api/c_api/dimension/test/compile_capi_dimension_stub_main.cc index c407374aa149..d106ef1a5b1e 100644 --- a/tiledb/api/c_api/dimension/test/compile_capi_dimension_stub_main.cc +++ b/tiledb/api/c_api/dimension/test/compile_capi_dimension_stub_main.cc @@ -30,6 +30,6 @@ int main() { std::string name("foo"); - tiledb_dimension_handle_t x{name, tiledb::sm::Datatype::UINT32}; + tiledb_dimension_handle_t x{name, tiledb::sm::Datatype::UINT32, nullptr}; return 0; } diff --git a/tiledb/api/c_api/domain/domain_api.cc b/tiledb/api/c_api/domain/domain_api.cc index 3a694a97c117..00238c143fc8 100644 --- a/tiledb/api/c_api/domain/domain_api.cc +++ b/tiledb/api/c_api/domain/domain_api.cc @@ -30,12 +30,16 @@ #include "../dimension/dimension_api_internal.h" #include "domain_api_external.h" #include "domain_api_internal.h" +#include "tiledb/common/memory_tracker.h" namespace tiledb::api { -int32_t tiledb_domain_alloc(tiledb_domain_handle_t** domain) { +int32_t tiledb_domain_alloc( + tiledb_ctx_t* ctx, tiledb_domain_handle_t** domain) { ensure_output_pointer_is_valid(domain); - *domain = tiledb_domain_handle_t::make_handle(); + auto memory_tracker = ctx->resources().create_memory_tracker(); + memory_tracker->set_type(tiledb::sm::MemoryTrackerType::ARRAY_CREATE); + *domain = tiledb_domain_handle_t::make_handle(memory_tracker); return TILEDB_OK; } @@ -147,7 +151,8 @@ int32_t tiledb_domain_dump(const tiledb_domain_t* domain, FILE* out) { using tiledb::api::api_entry_context; CAPI_INTERFACE(domain_alloc, tiledb_ctx_t* ctx, tiledb_domain_t** domain) { - return api_entry_context(ctx, domain); + return tiledb::api::api_entry_with_context( + ctx, domain); } CAPI_INTERFACE_VOID(domain_free, tiledb_domain_t** domain) { diff --git a/tiledb/api/c_api/domain/domain_api_internal.h b/tiledb/api/c_api/domain/domain_api_internal.h index 76a427ce8ace..dabd36e402c4 100644 --- a/tiledb/api/c_api/domain/domain_api_internal.h +++ b/tiledb/api/c_api/domain/domain_api_internal.h @@ -35,6 +35,10 @@ #include "tiledb/api/c_api_support/handle/handle.h" #include "tiledb/sm/array_schema/domain.h" +namespace tiledb::sm { +class MemoryTracker; +} + struct tiledb_domain_handle_t : public tiledb::api::CAPIHandle { private: @@ -54,8 +58,9 @@ struct tiledb_domain_handle_t * `class Domain` is principally a container for `Dimension` objects. Domain * handles are first constructed as empty containers. */ - tiledb_domain_handle_t() - : domain_{make_shared(HERE())} { + explicit tiledb_domain_handle_t( + shared_ptr memory_tracker) + : domain_{make_shared(HERE(), memory_tracker)} { } /** diff --git a/tiledb/api/c_api/domain/test/compile_capi_domain_stub_main.cc b/tiledb/api/c_api/domain/test/compile_capi_domain_stub_main.cc index c5959eecd7bd..6b6a726109e7 100644 --- a/tiledb/api/c_api/domain/test/compile_capi_domain_stub_main.cc +++ b/tiledb/api/c_api/domain/test/compile_capi_domain_stub_main.cc @@ -29,7 +29,6 @@ #include "../domain_api_internal.h" int main() { - // Domain is easy to deal with because it has a default constructor. - tiledb_domain_handle_t x{}; + tiledb_domain_handle_t x{shared_ptr()}; return 0; } diff --git a/tiledb/api/c_api/enumeration/enumeration_api.cc b/tiledb/api/c_api/enumeration/enumeration_api.cc index d73759bef31c..1946969fe199 100644 --- a/tiledb/api/c_api/enumeration/enumeration_api.cc +++ b/tiledb/api/c_api/enumeration/enumeration_api.cc @@ -34,10 +34,12 @@ #include "../string/string_api_internal.h" #include "enumeration_api_experimental.h" #include "enumeration_api_internal.h" +#include "tiledb/common/memory_tracker.h" namespace tiledb::api { capi_return_t tiledb_enumeration_alloc( + tiledb_ctx_t* ctx, const char* name, tiledb_datatype_t type, uint32_t cell_val_num, @@ -65,6 +67,9 @@ capi_return_t tiledb_enumeration_alloc( } try { + auto memory_tracker = ctx->context().resources().create_memory_tracker(); + memory_tracker->set_type(tiledb::sm::MemoryTrackerType::ENUMERATION_CREATE); + *enumeration = tiledb_enumeration_handle_t::make_handle( std::string(name), datatype, @@ -73,7 +78,8 @@ capi_return_t tiledb_enumeration_alloc( data, data_size, offsets, - offsets_size); + offsets_size, + memory_tracker); } catch (...) { *enumeration = nullptr; throw; @@ -179,6 +185,9 @@ capi_return_t tiledb_enumeration_dump( using tiledb::api::api_entry_context; using tiledb::api::api_entry_void; +template +constexpr auto api_entry = tiledb::api::api_entry_with_context; + CAPI_INTERFACE( enumeration_alloc, tiledb_ctx_t* ctx, @@ -191,7 +200,7 @@ CAPI_INTERFACE( const void* offsets, uint64_t offsets_size, tiledb_enumeration_t** enumeration) { - return api_entry_context( + return api_entry( ctx, name, type, diff --git a/tiledb/api/c_api/enumeration/test/compile_capi_enumeration_stub_main.cc b/tiledb/api/c_api/enumeration/test/compile_capi_enumeration_stub_main.cc index 0b7139553f2a..d06771a93436 100644 --- a/tiledb/api/c_api/enumeration/test/compile_capi_enumeration_stub_main.cc +++ b/tiledb/api/c_api/enumeration/test/compile_capi_enumeration_stub_main.cc @@ -32,7 +32,15 @@ int main() { try { tiledb_enumeration_handle_t e{ - "fooo", tiledb::sm::Datatype::INT32, 1, 0, nullptr, 0, nullptr, 0}; + "fooo", + tiledb::sm::Datatype::INT32, + 1, + 0, + nullptr, + 0, + nullptr, + 0, + nullptr}; } catch (...) { } return 0; diff --git a/tiledb/api/c_api/group/group_api.cc b/tiledb/api/c_api/group/group_api.cc index 0191f6acad7e..74bf9f123007 100644 --- a/tiledb/api/c_api/group/group_api.cc +++ b/tiledb/api/c_api/group/group_api.cc @@ -5,7 +5,7 @@ * * The MIT License * - * @copyright Copyright (c) 2017-2023 TileDB, Inc. + * @copyright Copyright (c) 2017-2024 TileDB, Inc. * @copyright Copyright (c) 2016 MIT and Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a copy @@ -540,14 +540,9 @@ capi_return_t tiledb_serialize_group_metadata( auto buf = tiledb_buffer_handle_t::make_handle(); // Get metadata to serialize, this will load it if it does not exist - tiledb::sm::Metadata* metadata; - auto st = group->group().metadata(&metadata); - if (!st.ok()) { - tiledb_buffer_handle_t::break_handle(buf); - throw StatusException(st); - } + auto metadata = group->group().metadata(); - st = tiledb::sm::serialization::metadata_serialize( + auto st = tiledb::sm::serialization::metadata_serialize( metadata, static_cast(serialize_type), &(buf->buffer())); diff --git a/tiledb/api/c_api/query/query_api_internal.h b/tiledb/api/c_api/query/query_api_internal.h index dcc24f111549..404c2bfae8ae 100644 --- a/tiledb/api/c_api/query/query_api_internal.h +++ b/tiledb/api/c_api/query/query_api_internal.h @@ -57,8 +57,8 @@ inline void ensure_query_is_valid(tiledb_query_t* query) { * * @param query A sm::Query pointer */ -inline void ensure_query_is_not_initialized(const tiledb::sm::Query* query) { - if (query->status() != sm::QueryStatus::UNINITIALIZED) { +inline void ensure_query_is_not_initialized(const tiledb::sm::Query& query) { + if (query.status() != sm::QueryStatus::UNINITIALIZED) { throw CAPIStatusException( "argument `query` is at a too late state of its lifetime"); } @@ -72,7 +72,8 @@ inline void ensure_query_is_not_initialized(const tiledb::sm::Query* query) { */ inline void ensure_query_is_not_initialized(tiledb_query_t* query) { ensure_query_is_valid(query); - ensure_query_is_not_initialized(query->query_); + // Indirection safe because previous statement will throw otherwise + ensure_query_is_not_initialized(*query->query_); } } // namespace tiledb::api diff --git a/tiledb/api/c_api/query_aggregate/query_aggregate_api.cc b/tiledb/api/c_api/query_aggregate/query_aggregate_api.cc index 786d663b8160..65912d21cfe2 100644 --- a/tiledb/api/c_api/query_aggregate/query_aggregate_api.cc +++ b/tiledb/api/c_api/query_aggregate/query_aggregate_api.cc @@ -161,11 +161,8 @@ capi_return_t tiledb_query_get_default_channel( tiledb_ctx_t*, tiledb_query_t* query, tiledb_query_channel_t** channel) { ensure_query_is_valid(query); ensure_output_pointer_is_valid(channel); - - // We don't have an internal representation of a channel, - // the default channel is currently just a hashmap, so only pass the query - // to the channel constructor to be carried until next the api call. - *channel = tiledb_query_channel_handle_t::make_handle(query); + *channel = tiledb_query_channel_handle_t::make_handle( + query->query_->default_channel()); return TILEDB_OK; } @@ -190,10 +187,29 @@ capi_return_t tiledb_create_unary_aggregate( tiledb_query_field_handle_t::make_handle(query, input_field_name); tiledb_query_field_handle_t::break_handle(field); + const auto is_dense_dim{schema.dense() && schema.is_dim(field_name)}; + const auto cell_order{schema.cell_order()}; + + // Get the dimension index for the dense case. It is used below to know if the + // dimenson to be aggregated is the last dimension for ROW_MAJOR or first for + // COL_MAJOR. This is used at the aggregate level to know if we need to change + // the dimension value when we move cells. + unsigned dim_idx = 0; + if (is_dense_dim) { + dim_idx = schema.domain().get_dimension_index(field_name); + } + + const bool is_slab_dim = + is_dense_dim && (cell_order == sm::Layout::ROW_MAJOR) ? + (dim_idx == schema.dim_num() - 1) : + (dim_idx == 0); + auto fi = tiledb::sm::FieldInfo( field_name, schema.var_size(field_name), schema.is_nullable(field_name), + is_dense_dim, + is_slab_dim, schema.cell_val_num(field_name), schema.type(field_name)); @@ -209,7 +225,7 @@ capi_return_t tiledb_channel_apply_aggregate( const char* output_field_name, const tiledb_channel_operation_t* operation) { ensure_query_channel_is_valid(channel); - ensure_query_is_not_initialized(channel->query_); + ensure_query_is_not_initialized(channel->query()); ensure_output_field_is_valid(output_field_name); ensure_operation_is_valid(operation); channel->add_aggregate(output_field_name, operation); diff --git a/tiledb/api/c_api/query_aggregate/query_aggregate_api_internal.h b/tiledb/api/c_api/query_aggregate/query_aggregate_api_internal.h index 1c736f077b03..c33d27ae9676 100644 --- a/tiledb/api/c_api/query_aggregate/query_aggregate_api_internal.h +++ b/tiledb/api/c_api/query_aggregate/query_aggregate_api_internal.h @@ -39,6 +39,7 @@ #include "tiledb/sm/c_api/tiledb_struct_def.h" #include "tiledb/sm/query/query.h" #include "tiledb/sm/query/readers/aggregators/operation.h" +#include "tiledb/sm/query/readers/aggregators/query_channel.h" struct tiledb_channel_operation_handle_t : public tiledb::api::CAPIHandle { @@ -71,6 +72,11 @@ struct tiledb_channel_operation_handle_t } }; +/* Forward declaration */ +namespace tiledb::sm { +class Query; +} + struct tiledb_query_channel_handle_t : public tiledb::api::CAPIHandle { /** @@ -78,9 +84,10 @@ struct tiledb_query_channel_handle_t */ static constexpr std::string_view object_type_name{"tiledb_query_channel_t"}; - public: - tiledb::sm::Query* query_; + private: + std::shared_ptr channel_; + public: /** * Default constructor doesn't make sense */ @@ -90,14 +97,16 @@ struct tiledb_query_channel_handle_t * Ordinary constructor. * @param query The query object that owns the channel */ - tiledb_query_channel_handle_t(tiledb_query_t* query) - : query_(query->query_) { + tiledb_query_channel_handle_t( + std::shared_ptr channel) + : channel_(channel) { } inline void add_aggregate( const char* output_field, const tiledb_channel_operation_handle_t* operation) { - if (query_->is_aggregate(output_field)) { + auto& query{channel_->query()}; + if (query.is_aggregate(output_field)) { throw tiledb::api::CAPIStatusException( "An aggregate operation for output field: " + std::string(output_field) + " already exists."); @@ -105,9 +114,13 @@ struct tiledb_query_channel_handle_t // Add the aggregator the the default channel as this is the only channel // type we currently support - query_->add_aggregator_to_default_channel( + query.add_aggregator_to_default_channel( output_field, operation->aggregator()); } + + inline tiledb::sm::Query& query() { + return channel_->query(); + } }; struct tiledb_channel_operator_handle_t diff --git a/tiledb/api/c_api/query_field/query_field_api.cc b/tiledb/api/c_api/query_field/query_field_api.cc index 6e396af2e35c..03fc140003b7 100644 --- a/tiledb/api/c_api/query_field/query_field_api.cc +++ b/tiledb/api/c_api/query_field/query_field_api.cc @@ -53,6 +53,7 @@ tiledb_query_field_handle_t::tiledb_query_field_handle_t( tiledb_query_t* query, const char* field_name) : query_(query->query_) , field_name_(field_name) { + bool is_aggregate{false}; if (field_name_ == tiledb::sm::constants::coords) { field_origin_ = std::make_shared(); type_ = query_->array_schema().domain().dimension_ptr(0)->type(); @@ -72,6 +73,7 @@ tiledb_query_field_handle_t::tiledb_query_field_handle_t( cell_val_num_ = query_->array_schema().dimension_ptr(field_name_)->cell_val_num(); } else if (query_->is_aggregate(field_name_)) { + is_aggregate = true; field_origin_ = std::make_shared(); auto aggregate = query_->get_aggregate(field_name_).value(); type_ = aggregate->output_datatype(); @@ -80,8 +82,16 @@ tiledb_query_field_handle_t::tiledb_query_field_handle_t( } else { throw tiledb::api::CAPIStatusException("There is no field " + field_name_); } - - channel_ = tiledb_query_channel_handle_t::make_handle(query); + /* + * We have no `class QueryField` that would already know its own aggregate, + * so we mirror the channel selection process that `class Query` has + * responsibility for. + */ + if (is_aggregate) { + channel_ = query_->aggegate_channel(); + } else { + channel_ = query_->default_channel(); + } } namespace tiledb::api { diff --git a/tiledb/api/c_api/query_field/query_field_api_internal.h b/tiledb/api/c_api/query_field/query_field_api_internal.h index 3e50160537e1..3ac79f9f4c09 100644 --- a/tiledb/api/c_api/query_field/query_field_api_internal.h +++ b/tiledb/api/c_api/query_field/query_field_api_internal.h @@ -73,7 +73,7 @@ struct tiledb_query_field_handle_t std::shared_ptr field_origin_; tiledb::sm::Datatype type_; uint32_t cell_val_num_; - tiledb_query_channel_handle_t* channel_; + std::shared_ptr channel_; public: /** @@ -87,10 +87,6 @@ struct tiledb_query_field_handle_t */ tiledb_query_field_handle_t(tiledb_query_t* query, const char* field_name); - ~tiledb_query_field_handle_t() { - tiledb_query_channel_handle_t::break_handle(channel_); - } - tiledb_field_origin_t origin() { return field_origin_->origin(); } @@ -101,7 +97,7 @@ struct tiledb_query_field_handle_t return cell_val_num_; } tiledb_query_channel_handle_t* channel() { - return channel_; + return tiledb_query_channel_handle_t::make_handle(channel_); } }; diff --git a/tiledb/api/c_api/query_field/test/unit_capi_query_field.cc b/tiledb/api/c_api/query_field/test/unit_capi_query_field.cc index 40e1042065e0..e9105f50fb97 100644 --- a/tiledb/api/c_api/query_field/test/unit_capi_query_field.cc +++ b/tiledb/api/c_api/query_field/test/unit_capi_query_field.cc @@ -32,25 +32,38 @@ #include #include "test/support/src/helpers.h" -#include "test/support/src/vfs_helpers.h" +#include "test/support/src/temporary_local_directory.h" #include "tiledb/api/c_api/config/config_api_internal.h" #include "tiledb/api/c_api/context/context_api_internal.h" #include "tiledb/api/c_api/query_field/query_field_api_external_experimental.h" using namespace tiledb::test; -struct QueryFieldFx : TemporaryDirectoryFixture { +class QueryFieldFx { + tiledb::sm::TemporaryLocalDirectory tmpslash{}; + std::string test_array_name{tmpslash.path() + "queryfield_array"}; + void write_sparse_array(const std::string& path); + void create_sparse_array(const std::string& path); + + protected: + /** TileDB context */ + tiledb_ctx_t* ctx; + + public: QueryFieldFx() { + if (tiledb_ctx_alloc(nullptr, &ctx) != TILEDB_OK) { + throw std::runtime_error("Failed to allocate context"); + } create_sparse_array(array_name()); write_sparse_array(array_name()); } + ~QueryFieldFx() { + (void)tiledb_ctx_free(&ctx); + } std::string array_name() { - return temp_dir_ + "queryfield_array"; + return test_array_name; } - - void write_sparse_array(const std::string& path); - void create_sparse_array(const std::string& path); }; void QueryFieldFx::write_sparse_array(const std::string& array_name) { @@ -179,15 +192,13 @@ void QueryFieldFx::create_sparse_array(const std::string& array_name) { TEST_CASE_METHOD( QueryFieldFx, - "C API: argument validation", + "C API: argument validation, tiledb_query_get_field", "[capi][query_field][get][args]") { tiledb_array_t* array = nullptr; - ; REQUIRE(tiledb_array_alloc(ctx, array_name().c_str(), &array) == TILEDB_OK); REQUIRE(tiledb_array_open(ctx, array, TILEDB_READ) == TILEDB_OK); tiledb_query_t* query = nullptr; - ; REQUIRE(tiledb_query_alloc(ctx, array, TILEDB_READ, &query) == TILEDB_OK); tiledb_query_field_t* field = nullptr; @@ -213,22 +224,20 @@ TEST_CASE_METHOD( } // Clean up - tiledb_query_free(&query); + (void)tiledb_query_free(&query); CHECK(tiledb_array_close(ctx, array) == TILEDB_OK); - tiledb_array_free(&array); + (void)tiledb_array_free(&array); } TEST_CASE_METHOD( QueryFieldFx, - "C API: argument validation", + "C API: argument validation, query field properties", "[capi][query_field][access][args]") { tiledb_array_t* array = nullptr; - ; REQUIRE(tiledb_array_alloc(ctx, array_name().c_str(), &array) == TILEDB_OK); REQUIRE(tiledb_array_open(ctx, array, TILEDB_READ) == TILEDB_OK); tiledb_query_t* query = nullptr; - ; REQUIRE(tiledb_query_alloc(ctx, array, TILEDB_READ, &query) == TILEDB_OK); tiledb_query_field_t* field = nullptr; @@ -268,13 +277,12 @@ TEST_CASE_METHOD( // Clean up CHECK(tiledb_query_field_free(ctx, &field) == TILEDB_OK); - tiledb_query_free(&query); + (void)tiledb_query_free(&query); CHECK(tiledb_array_close(ctx, array) == TILEDB_OK); - tiledb_array_free(&array); + (void)tiledb_array_free(&array); } -TEST_CASE_METHOD( - QueryFieldFx, "C API: argument validation", "[capi][query_field]") { +TEST_CASE_METHOD(QueryFieldFx, "C API: get_field", "[capi][query_field]") { tiledb_array_t* array = nullptr; REQUIRE(tiledb_array_alloc(ctx, array_name().c_str(), &array) == TILEDB_OK); REQUIRE(tiledb_array_open(ctx, array, TILEDB_READ) == TILEDB_OK); @@ -292,75 +300,94 @@ TEST_CASE_METHOD( uint32_t cell_val_num = 0; tiledb_query_channel_t* channel = nullptr; - // Errors out when the field doesn't exist - CHECK( - tiledb_query_get_field(ctx, query, "non_existent", &field) == TILEDB_ERR); - - // Check field api works on dimension field - REQUIRE(tiledb_query_get_field(ctx, query, "d1", &field) == TILEDB_OK); - - REQUIRE(tiledb_field_datatype(ctx, field, &type) == TILEDB_OK); - CHECK(type == TILEDB_UINT64); - REQUIRE(tiledb_field_origin(ctx, field, &origin) == TILEDB_OK); - CHECK(origin == TILEDB_DIMENSION_FIELD); - REQUIRE(tiledb_field_cell_val_num(ctx, field, &cell_val_num) == TILEDB_OK); - CHECK(cell_val_num == 1); - - // Check field api works on aggregate field - REQUIRE(tiledb_field_channel(ctx, field, &channel) == TILEDB_OK); - REQUIRE( - tiledb_channel_apply_aggregate( - ctx, channel, "Count", tiledb_aggregate_count) == TILEDB_OK); - uint64_t count = 0; - uint64_t size = 8; - REQUIRE( - tiledb_query_set_data_buffer(ctx, query, "Count", &count, &size) == - TILEDB_OK); - REQUIRE(tiledb_query_submit(ctx, query) == TILEDB_OK); - CHECK(count == 9); - CHECK(tiledb_query_channel_free(ctx, &channel) == TILEDB_OK); - CHECK(tiledb_query_field_free(ctx, &field) == TILEDB_OK); + SECTION("Non-existent field") { + // Errors out when the field doesn't exist + CHECK( + tiledb_query_get_field(ctx, query, "non_existent", &field) == + TILEDB_ERR); + }; + + SECTION("Dimension field") { + // Check field api works on dimension field + REQUIRE(tiledb_query_get_field(ctx, query, "d1", &field) == TILEDB_OK); + REQUIRE(tiledb_field_datatype(ctx, field, &type) == TILEDB_OK); + CHECK(type == TILEDB_UINT64); + REQUIRE(tiledb_field_origin(ctx, field, &origin) == TILEDB_OK); + CHECK(origin == TILEDB_DIMENSION_FIELD); + REQUIRE(tiledb_field_cell_val_num(ctx, field, &cell_val_num) == TILEDB_OK); + CHECK(cell_val_num == 1); + CHECK(tiledb_query_field_free(ctx, &field) == TILEDB_OK); + } - // Check field api works on timestamp field - REQUIRE( - tiledb_query_get_field(ctx, query, "__timestamps", &field) == TILEDB_OK); - REQUIRE(tiledb_field_datatype(ctx, field, &type) == TILEDB_OK); - CHECK(type == TILEDB_UINT64); - REQUIRE(tiledb_field_origin(ctx, field, &origin) == TILEDB_OK); - CHECK(origin == TILEDB_ATTRIBUTE_FIELD); - REQUIRE(tiledb_field_cell_val_num(ctx, field, &cell_val_num) == TILEDB_OK); - CHECK(cell_val_num == 1); - CHECK(tiledb_query_field_free(ctx, &field) == TILEDB_OK); + SECTION("Timestamp field") { + // Check field api works on timestamp field + REQUIRE( + tiledb_query_get_field(ctx, query, "__timestamps", &field) == + TILEDB_OK); + REQUIRE(tiledb_field_datatype(ctx, field, &type) == TILEDB_OK); + CHECK(type == TILEDB_UINT64); + REQUIRE(tiledb_field_origin(ctx, field, &origin) == TILEDB_OK); + CHECK(origin == TILEDB_ATTRIBUTE_FIELD); + REQUIRE(tiledb_field_cell_val_num(ctx, field, &cell_val_num) == TILEDB_OK); + CHECK(cell_val_num == 1); + CHECK(tiledb_query_field_free(ctx, &field) == TILEDB_OK); + } - // Check field api works on coords field - REQUIRE(tiledb_query_get_field(ctx, query, "__coords", &field) == TILEDB_OK); - REQUIRE(tiledb_field_datatype(ctx, field, &type) == TILEDB_OK); - CHECK(type == TILEDB_UINT64); - REQUIRE(tiledb_field_origin(ctx, field, &origin) == TILEDB_OK); - CHECK(origin == TILEDB_DIMENSION_FIELD); - REQUIRE(tiledb_field_cell_val_num(ctx, field, &cell_val_num) == TILEDB_OK); - CHECK(cell_val_num == 1); - CHECK(tiledb_query_field_free(ctx, &field) == TILEDB_OK); + SECTION("\"coords\" field") { + // Check field api works on coords field + REQUIRE( + tiledb_query_get_field(ctx, query, "__coords", &field) == TILEDB_OK); + REQUIRE(tiledb_field_datatype(ctx, field, &type) == TILEDB_OK); + CHECK(type == TILEDB_UINT64); + REQUIRE(tiledb_field_origin(ctx, field, &origin) == TILEDB_OK); + CHECK(origin == TILEDB_DIMENSION_FIELD); + REQUIRE(tiledb_field_cell_val_num(ctx, field, &cell_val_num) == TILEDB_OK); + CHECK(cell_val_num == 1); + CHECK(tiledb_query_field_free(ctx, &field) == TILEDB_OK); + } - // Check field api works on attribute field - REQUIRE(tiledb_query_get_field(ctx, query, "c", &field) == TILEDB_OK); - REQUIRE(tiledb_field_datatype(ctx, field, &type) == TILEDB_OK); - CHECK(type == TILEDB_STRING_ASCII); - REQUIRE(tiledb_field_origin(ctx, field, &origin) == TILEDB_OK); - CHECK(origin == TILEDB_ATTRIBUTE_FIELD); - REQUIRE(tiledb_field_cell_val_num(ctx, field, &cell_val_num) == TILEDB_OK); - CHECK(cell_val_num == TILEDB_VAR_NUM); - CHECK(tiledb_query_field_free(ctx, &field) == TILEDB_OK); + SECTION("Attribute field") { + // Check field api works on attribute field + REQUIRE(tiledb_query_get_field(ctx, query, "c", &field) == TILEDB_OK); + REQUIRE(tiledb_field_datatype(ctx, field, &type) == TILEDB_OK); + CHECK(type == TILEDB_STRING_ASCII); + REQUIRE(tiledb_field_origin(ctx, field, &origin) == TILEDB_OK); + CHECK(origin == TILEDB_ATTRIBUTE_FIELD); + REQUIRE(tiledb_field_cell_val_num(ctx, field, &cell_val_num) == TILEDB_OK); + CHECK(cell_val_num == TILEDB_VAR_NUM); + CHECK(tiledb_query_field_free(ctx, &field) == TILEDB_OK); + } - // Check field api works on aggregate field - REQUIRE(tiledb_query_get_field(ctx, query, "Count", &field) == TILEDB_OK); - REQUIRE(tiledb_field_datatype(ctx, field, &type) == TILEDB_OK); - CHECK(type == TILEDB_UINT64); - REQUIRE(tiledb_field_origin(ctx, field, &origin) == TILEDB_OK); - CHECK(origin == TILEDB_AGGREGATE_FIELD); - REQUIRE(tiledb_field_cell_val_num(ctx, field, &cell_val_num) == TILEDB_OK); - CHECK(cell_val_num == 1); - CHECK(tiledb_query_field_free(ctx, &field) == TILEDB_OK); + SECTION("Aggregate field") { + // Check field api works on aggregate field + REQUIRE( + tiledb_query_get_default_channel(ctx, query, &channel) == TILEDB_OK); + REQUIRE( + tiledb_channel_apply_aggregate( + ctx, channel, "Count", tiledb_aggregate_count) == TILEDB_OK); + SECTION("validate") { + // Check field api works on aggregate field + REQUIRE(tiledb_query_get_field(ctx, query, "Count", &field) == TILEDB_OK); + REQUIRE(tiledb_field_datatype(ctx, field, &type) == TILEDB_OK); + CHECK(type == TILEDB_UINT64); + REQUIRE(tiledb_field_origin(ctx, field, &origin) == TILEDB_OK); + CHECK(origin == TILEDB_AGGREGATE_FIELD); + REQUIRE( + tiledb_field_cell_val_num(ctx, field, &cell_val_num) == TILEDB_OK); + CHECK(cell_val_num == 1); + CHECK(tiledb_query_field_free(ctx, &field) == TILEDB_OK); + } + SECTION("run query") { + uint64_t count = 0; + uint64_t size = 8; + REQUIRE( + tiledb_query_set_data_buffer(ctx, query, "Count", &count, &size) == + TILEDB_OK); + REQUIRE(tiledb_query_submit(ctx, query) == TILEDB_OK); + CHECK(count == 9); + } + CHECK(tiledb_query_channel_free(ctx, &channel) == TILEDB_OK); + } // Clean up tiledb_query_free(&query); diff --git a/tiledb/api/c_api/query_plan/query_plan_api.cc b/tiledb/api/c_api/query_plan/query_plan_api.cc index cde57b7f9340..47c5ecdc4b27 100644 --- a/tiledb/api/c_api/query_plan/query_plan_api.cc +++ b/tiledb/api/c_api/query_plan/query_plan_api.cc @@ -48,14 +48,7 @@ capi_return_t tiledb_query_get_plan( throw CAPIStatusException("argument `query` may not be nullptr"); } - if ((*query->query_).array()->is_remote()) { - throw std::logic_error( - "Failed to create a query plan; Remote arrays" - "are not currently supported."); - } - sm::QueryPlan plan(*query->query_); - *rv = tiledb_string_handle_t::make_handle(plan.dump_json()); return TILEDB_OK; diff --git a/tiledb/api/c_api/query_plan/test/unit_capi_query_plan.cc b/tiledb/api/c_api/query_plan/test/unit_capi_query_plan.cc index 8a794b1ddba2..3c60adf798c0 100644 --- a/tiledb/api/c_api/query_plan/test/unit_capi_query_plan.cc +++ b/tiledb/api/c_api/query_plan/test/unit_capi_query_plan.cc @@ -31,7 +31,6 @@ */ #include -#include "external/include/nlohmann/json.hpp" #include "test/support/src/vfs_helpers.h" #include "tiledb/api/c_api/query_plan/query_plan_api_external_experimental.h" @@ -108,71 +107,6 @@ void QueryPlanFx::create_dense_array(const std::string& path) { tiledb_domain_free(&domain); } -void QueryPlanFx::create_sparse_array(const std::string& array_name) { - // Create dimensions - uint64_t tile_extents[] = {2, 2}; - uint64_t dim_domain[] = {1, 10, 1, 10}; - - tiledb_dimension_t* d1; - int rc = tiledb_dimension_alloc( - ctx, "d1", TILEDB_UINT64, &dim_domain[0], &tile_extents[0], &d1); - CHECK(rc == TILEDB_OK); - tiledb_dimension_t* d2; - rc = tiledb_dimension_alloc( - ctx, "d2", TILEDB_UINT64, &dim_domain[2], &tile_extents[1], &d2); - CHECK(rc == TILEDB_OK); - - // Create domain - tiledb_domain_t* domain; - rc = tiledb_domain_alloc(ctx, &domain); - CHECK(rc == TILEDB_OK); - rc = tiledb_domain_add_dimension(ctx, domain, d1); - CHECK(rc == TILEDB_OK); - rc = tiledb_domain_add_dimension(ctx, domain, d2); - CHECK(rc == TILEDB_OK); - - // Create attributes - tiledb_attribute_t* a; - rc = tiledb_attribute_alloc(ctx, "a", TILEDB_INT32, &a); - CHECK(rc == TILEDB_OK); - tiledb_attribute_t* b; - rc = tiledb_attribute_alloc(ctx, "b", TILEDB_INT32, &b); - CHECK(rc == TILEDB_OK); - - // Create array schema - tiledb_array_schema_t* array_schema; - rc = tiledb_array_schema_alloc(ctx, TILEDB_SPARSE, &array_schema); - CHECK(rc == TILEDB_OK); - rc = tiledb_array_schema_set_cell_order(ctx, array_schema, TILEDB_ROW_MAJOR); - CHECK(rc == TILEDB_OK); - rc = tiledb_array_schema_set_tile_order(ctx, array_schema, TILEDB_ROW_MAJOR); - CHECK(rc == TILEDB_OK); - rc = tiledb_array_schema_set_capacity(ctx, array_schema, 4); - CHECK(rc == TILEDB_OK); - rc = tiledb_array_schema_set_domain(ctx, array_schema, domain); - CHECK(rc == TILEDB_OK); - rc = tiledb_array_schema_add_attribute(ctx, array_schema, a); - CHECK(rc == TILEDB_OK); - rc = tiledb_array_schema_add_attribute(ctx, array_schema, b); - CHECK(rc == TILEDB_OK); - - // Check array schema - rc = tiledb_array_schema_check(ctx, array_schema); - CHECK(rc == TILEDB_OK); - - // Create array - rc = tiledb_array_create(ctx, array_name.c_str(), array_schema); - CHECK(rc == TILEDB_OK); - - // Clean up - tiledb_attribute_free(&a); - tiledb_attribute_free(&b); - tiledb_dimension_free(&d1); - tiledb_dimension_free(&d2); - tiledb_domain_free(&domain); - tiledb_array_schema_free(&array_schema); -} - TEST_CASE_METHOD( QueryPlanFx, "C API: tiledb_query_get_plan argument validation", @@ -200,183 +134,3 @@ TEST_CASE_METHOD( tiledb_query_free(&query); tiledb_array_free(&array); } - -TEST_CASE_METHOD( - QueryPlanFx, - "C API: tiledb_query_get_plan API lifecycle checks", - "[capi][query_plan]") { - std::string array_name = temp_dir_ + "queryplan_array"; - create_dense_array(array_name); - - tiledb_array_t* array; - REQUIRE(tiledb_array_alloc(ctx, array_name.c_str(), &array) == TILEDB_OK); - REQUIRE(tiledb_array_open(ctx, array, TILEDB_READ) == TILEDB_OK); - - tiledb_query_t* query; - REQUIRE(tiledb_query_alloc(ctx, array, TILEDB_READ, &query) == TILEDB_OK); - - CHECK(tiledb_query_set_layout(ctx, query, TILEDB_ROW_MAJOR) == TILEDB_OK); - - int64_t dom[] = {1, 2, 1, 2}; - CHECK(tiledb_query_set_subarray(ctx, query, &dom) == TILEDB_OK); - - std::vector d(4); - uint64_t size = 1; - CHECK( - tiledb_query_set_data_buffer(ctx, query, "a1", d.data(), &size) == - TILEDB_OK); - - tiledb_string_handle_t* string_handle; - CHECK(tiledb_query_get_plan(ctx, query, &string_handle) == TILEDB_OK); - - // API lifecycle checks - // It's not possible to set subarrays, layout, query condition or new buffers - // once the query plan got generated. - CHECK(tiledb_query_set_subarray(ctx, query, &dom) == TILEDB_ERR); - CHECK(tiledb_query_set_layout(ctx, query, TILEDB_COL_MAJOR) == TILEDB_ERR); - tiledb_query_condition_t* qc; - CHECK(tiledb_query_condition_alloc(ctx, &qc) == TILEDB_OK); - int32_t val = 10000; - CHECK( - tiledb_query_condition_init( - ctx, qc, "a1", &val, sizeof(int32_t), TILEDB_LT) == TILEDB_OK); - CHECK(tiledb_query_set_condition(ctx, query, qc) == TILEDB_ERR); - CHECK( - tiledb_query_set_data_buffer(ctx, query, "a2", d.data(), &size) == - TILEDB_ERR); - - // But it's possible to set existing buffers to accomodate existing - // query INCOMPLETEs functionality - CHECK( - tiledb_query_set_data_buffer(ctx, query, "a1", d.data(), &size) == - TILEDB_OK); - - REQUIRE(tiledb_string_free(&string_handle) == TILEDB_OK); - REQUIRE(tiledb_array_close(ctx, array) == TILEDB_OK); - tiledb_query_free(&query); - tiledb_array_free(&array); -} - -TEST_CASE_METHOD( - QueryPlanFx, - "C API: Query plan basic bahaviour", - "[capi][query_plan][basic1]") { - std::string array_name = temp_dir_ + "queryplan_array"; - create_dense_array(array_name); - - tiledb_array_t* array; - REQUIRE(tiledb_array_alloc(ctx, array_name.c_str(), &array) == TILEDB_OK); - REQUIRE(tiledb_array_open(ctx, array, TILEDB_READ) == TILEDB_OK); - - tiledb_query_t* query; - REQUIRE(tiledb_query_alloc(ctx, array, TILEDB_READ, &query) == TILEDB_OK); - - CHECK(tiledb_query_set_layout(ctx, query, TILEDB_ROW_MAJOR) == TILEDB_OK); - - int64_t dom[] = {1, 2, 1, 2}; - CHECK(tiledb_query_set_subarray(ctx, query, &dom) == TILEDB_OK); - - std::vector d(4); - uint64_t size = 1; - CHECK( - tiledb_query_set_data_buffer(ctx, query, "a1", d.data(), &size) == - TILEDB_OK); - CHECK( - tiledb_query_set_data_buffer(ctx, query, "a2", d.data(), &size) == - TILEDB_OK); - - tiledb_string_handle_t* string_handle; - const char* data; - size_t len; - CHECK(tiledb_query_get_plan(ctx, query, &string_handle) == TILEDB_OK); - CHECK(tiledb_string_view(string_handle, &data, &len) == TILEDB_OK); - - // This throws if the query plan is not valid JSON - std::string str_plan(data, len); - nlohmann::json json_plan = nlohmann::json::parse(str_plan); - - CHECK(json_plan["TileDB Query Plan"]["Array.URI"] == array_name); - CHECK(json_plan["TileDB Query Plan"]["Array.Type"] == "dense"); - CHECK( - json_plan["TileDB Query Plan"]["VFS.Backend"] == - tiledb::sm::URI(array_name).backend_name()); - CHECK(json_plan["TileDB Query Plan"]["Query.Layout"] == "row-major"); - CHECK(json_plan["TileDB Query Plan"]["Query.Strategy.Name"] == "DenseReader"); - CHECK( - json_plan["TileDB Query Plan"]["Query.Attributes"] == - std::vector({"a1", "a2"})); - CHECK( - json_plan["TileDB Query Plan"]["Query.Dimensions"] == - std::vector({"dim_1", "dim_2"})); - - REQUIRE(tiledb_string_free(&string_handle) == TILEDB_OK); - REQUIRE(tiledb_array_close(ctx, array) == TILEDB_OK); - tiledb_query_free(&query); - tiledb_array_free(&array); -} - -TEST_CASE_METHOD( - QueryPlanFx, - "C API: Query plan basic bahaviour 2", - "[capi][query_plan][basic2]") { - std::string array_name = temp_dir_ + "queryplan_array"; - create_sparse_array(array_name); - - tiledb_array_t* array; - REQUIRE(tiledb_array_alloc(ctx, array_name.c_str(), &array) == TILEDB_OK); - REQUIRE(tiledb_array_open(ctx, array, TILEDB_WRITE) == TILEDB_OK); - - tiledb_query_t* query; - REQUIRE(tiledb_query_alloc(ctx, array, TILEDB_WRITE, &query) == TILEDB_OK); - - CHECK(tiledb_query_set_layout(ctx, query, TILEDB_GLOBAL_ORDER) == TILEDB_OK); - - std::vector coords = {1, 2, 3}; - uint64_t coords_size = coords.size() * sizeof(uint64_t); - std::vector a = {1, 2, 3}; - uint64_t a_size = a.size() * sizeof(int); - std::vector b = {1, 2, 3}; - uint64_t b_size = b.size() * sizeof(int); - - CHECK( - tiledb_query_set_data_buffer(ctx, query, "a", (void*)a.data(), &a_size) == - TILEDB_OK); - CHECK( - tiledb_query_set_data_buffer(ctx, query, "b", (void*)b.data(), &b_size) == - TILEDB_OK); - CHECK( - tiledb_query_set_data_buffer( - ctx, query, TILEDB_COORDS, (void*)coords.data(), &coords_size) == - TILEDB_OK); - - tiledb_string_handle_t* string_handle; - const char* data; - size_t len; - CHECK(tiledb_query_get_plan(ctx, query, &string_handle) == TILEDB_OK); - CHECK(tiledb_string_view(string_handle, &data, &len) == TILEDB_OK); - - // This throws if the query plan is not valid JSON - std::string str_plan(data, len); - nlohmann::json json_plan = nlohmann::json::parse(str_plan); - - CHECK(json_plan["TileDB Query Plan"]["Array.URI"] == array_name); - CHECK(json_plan["TileDB Query Plan"]["Array.Type"] == "sparse"); - CHECK( - json_plan["TileDB Query Plan"]["VFS.Backend"] == - tiledb::sm::URI(array_name).backend_name()); - CHECK(json_plan["TileDB Query Plan"]["Query.Layout"] == "global-order"); - CHECK( - json_plan["TileDB Query Plan"]["Query.Strategy.Name"] == - "GlobalOrderWriter"); - CHECK( - json_plan["TileDB Query Plan"]["Query.Attributes"] == - std::vector({"__coords", "a", "b"})); - CHECK( - json_plan["TileDB Query Plan"]["Query.Dimensions"] == - std::vector()); - - REQUIRE(tiledb_string_free(&string_handle) == TILEDB_OK); - REQUIRE(tiledb_array_close(ctx, array) == TILEDB_OK); - tiledb_query_free(&query); - tiledb_array_free(&array); -} diff --git a/tiledb/api/c_api/vfs/vfs_api_experimental.h b/tiledb/api/c_api/vfs/vfs_api_experimental.h index b70f97629c7d..dba16811ae40 100644 --- a/tiledb/api/c_api/vfs/vfs_api_experimental.h +++ b/tiledb/api/c_api/vfs/vfs_api_experimental.h @@ -58,7 +58,8 @@ typedef int32_t (*tiledb_ls_callback_t)( * on error. The callback is responsible for writing gathered entries into the * `data` buffer, for example using a pointer to a user-defined struct. * - * Currently only S3 is supported, and the `path` must be a valid S3 URI. + * Currently only Posix and S3 are supported, and the `path` must be a valid URI + * for one of those filesystems. * * **Example:** * diff --git a/tiledb/common/CMakeLists.txt b/tiledb/common/CMakeLists.txt index a7736fd55ec0..3c8fbfb987b5 100644 --- a/tiledb/common/CMakeLists.txt +++ b/tiledb/common/CMakeLists.txt @@ -54,9 +54,20 @@ set(TILEDB_COMMON_SOURCES ${COMMON_SOURCES} PARENT_SCOPE) # `baseline` object library # commence(object_library baseline) - this_target_sources(logger.cc governor/governor.cc - dynamic_memory/dynamic_memory.cc heap_profiler.cc heap_memory.cc - exception/exception.cc exception/status.cc) + this_target_sources( + logger.cc + dynamic_memory/dynamic_memory.cc + exception/exception.cc + exception/status.cc + governor/governor.cc + heap_profiler.cc + heap_memory.cc + memory_tracker.cc + pmr.cc + ) + if (TILEDB_USE_CPP17_PMR) + this_target_sources(polymorphic_allocator/polymorphic_allocator.cc) + endif() find_package(Spdlog_EP REQUIRED) target_link_libraries(baseline PUBLIC spdlog::spdlog) target_link_libraries(baseline PUBLIC common) @@ -69,4 +80,4 @@ commence(object_library stringx) this_target_sources(stdx_string.cc) conclude(object_library) -add_test_subdirectory() \ No newline at end of file +add_test_subdirectory() diff --git a/tiledb/common/indexed_list.h b/tiledb/common/indexed_list.h new file mode 100644 index 000000000000..e3e87302264f --- /dev/null +++ b/tiledb/common/indexed_list.h @@ -0,0 +1,189 @@ +/** + * @file indexed_list.h + * + * @section LICENSE + * + * The MIT License + * + * @copyright Copyright (c) 2022 TileDB, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * @section DESCRIPTION + * + * This file defines and implements the `IndexedList` class. + */ + +#ifndef TILEDB_INDEXED_LIST_H +#define TILEDB_INDEXED_LIST_H + +#include +#include + +namespace tiledb::common { + +/** + * Container class for data that cannot be moved but that we want to access by + * an index. + * + * @tparam T The type of the element used in the container + */ +template +class IndexedList { + public: + /* ********************************* */ + /* CONSTRUCTORS & DESTRUCTORS */ + /* ********************************* */ + + /** Default constructor. */ + IndexedList() { + } + + DISABLE_COPY_AND_COPY_ASSIGN(IndexedList); + DISABLE_MOVE_AND_MOVE_ASSIGN(IndexedList); + + /* ********************************* */ + /* API */ + /* ********************************* */ + + /** + * Emplace an item to the end of the container. + * + * @param args Arguments forwarded to the initialization function. + * + */ + template + void emplace_back(Args&&... args) { + list_.emplace_back(std::forward(args)...); + vec_.emplace_back(&list_.back()); + } + + /** Returns an iterator to the beginning of the items. */ + typename std::list::iterator begin() { + return list_.begin(); + } + + /** Returns an iterator to the end of the items. */ + typename std::list::iterator end() { + return list_.end(); + } + + /** Returns wether the container is empty or not. */ + bool empty() const { + return list_.empty(); + } + + /** Clears the container. */ + void clear() { + list_.clear(); + vec_.clear(); + } + + /** Returns the number of items in the container. */ + size_t size() const { + return list_.size(); + } + + /** + * Reserve space for a number of items. + * + * @param num Number of items to reserve for. + */ + void reserve(size_t num) { + vec_.reserve(num); + } + + /** + * Resize the container with default constructed items. + * + * Note: Only allowed on an empty container. + * + * @param num Number of items to add. + */ + void resize(size_t num) { + if (list_.size() != 0 || vec_.size() != 0) { + throw std::logic_error( + "Resize should only be called on empty container."); + } + + vec_.reserve(num); + for (uint64_t n = 0; n < num; n++) { + emplace_back(); + } + } + + /** + * Returns a reference to the item at an index. + * + * Note: This API will not throw if an item out of bounds is asked for. + * + * @param index Index of the item to return. + * @return The item. + */ + T& operator[](size_t index) { + return *vec_[index]; + } + + /** + * Returns a reference to the item at an index. + * + * Note: This API will throw if an item out of bounds is asked for. + * + * @param index Index of the item to return. + * @return The item. + */ + T& at(size_t index) { + return *(vec_.at(index)); + } + + /** + * Returns a const reference to the item at an index. + * + * Note: This API will not throw if an item out of bounds is asked for. + * + * @param index Index of the item to return. + * @return The item. + */ + const T& operator[](size_t index) const { + return *vec_[index]; + } + + /** + * Returns a const reference to the item at an index. + * + * Note: This API will throw if an item out of bounds is asked for. + * + * @param index Index of the item to return. + * @return The item. + */ + const T& at(size_t index) const { + return *(vec_.at(index)); + } + + private: + /** List that contains all the elements. */ + std::list list_; + + /** Vector that contains a pointer to the elements allowing indexed access. */ + std::vector vec_; +}; + +} // namespace tiledb::common + +#endif // TILEDB_INDEXED_LIST_H diff --git a/tiledb/common/memory_tracker.cc b/tiledb/common/memory_tracker.cc new file mode 100644 index 000000000000..c7d9071bdc20 --- /dev/null +++ b/tiledb/common/memory_tracker.cc @@ -0,0 +1,417 @@ +/** + * @file memory_tracker.cc + * + * @section LICENSE + * + * The MIT License + * + * @copyright Copyright (c) 2024 TileDB, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * @section DESCRIPTION + * + * This file contains implementations for the PMR memory tracking classes. See + * the top level description in memory_tracker.h. + */ + +#include + +#include "external/include/nlohmann/json.hpp" + +#include "tiledb/common/exception/exception.h" +#include "tiledb/common/logger.h" +#include "tiledb/common/memory_tracker.h" + +namespace tiledb::sm { + +class MemoryTrackerException : public common::StatusException { + public: + explicit MemoryTrackerException(const std::string& message) + : StatusException("MemoryTracker", message) { + } +}; + +std::string memory_type_to_str(MemoryType type) { + switch (type) { + case MemoryType::ENUMERATION: + return "Enumeration"; + case MemoryType::ENUMERATION_PATHS: + return "EnumerationPaths"; + case MemoryType::FOOTER: + return "Footer"; + case MemoryType::FILTERED_DATA: + return "FilteredData"; + case MemoryType::FILTERED_DATA_BLOCK: + return "FilteredDataBlock"; + case MemoryType::GENERIC_TILE_IO: + return "GenericTileIO"; + case MemoryType::RTREE: + return "RTree"; + case MemoryType::TILE_BITMAP: + return "TileBitmap"; + case MemoryType::TILE_DATA: + return "TileData"; + case MemoryType::TILE_HILBERT_VALUES: + return "TileHilbertValues"; + case MemoryType::TILE_OFFSETS: + return "TileOffsets"; + case MemoryType::TILE_MAX_VALS: + return "TileMaxVals"; + case MemoryType::TILE_MIN_VALS: + return "TileMinVals"; + case MemoryType::TILE_NULL_COUNTS: + return "TileNullCounts"; + case MemoryType::TILE_QUERY_CONDITIONS: + return "TileQueryConditions"; + case MemoryType::ATTRIBUTES: + return "Attributes"; + case MemoryType::DIMENSION_LABELS: + return "DimensionLabels"; + case MemoryType::DIMENSIONS: + return "Dimensions"; + case MemoryType::DOMAINS: + return "Domains"; + case MemoryType::TILE_SUMS: + return "TileSums"; + case MemoryType::TILE_WRITER_DATA: + return "TileWriterData"; + case MemoryType::METADATA: + return "Metadata"; + } + + auto val = std::to_string(static_cast(type)); + throw std::logic_error("Invalid memory type: " + val); +} + +std::string memory_tracker_type_to_str(MemoryTrackerType type) { + switch (type) { + case MemoryTrackerType::ANONYMOUS: + return "Anonymous"; + case MemoryTrackerType::ARRAY_CREATE: + return "ArrayCreate"; + case MemoryTrackerType::ARRAY_LOAD: + return "ArrayLoad"; + case MemoryTrackerType::ARRAY_READ: + return "ArrayRead"; + case MemoryTrackerType::ARRAY_WRITE: + return "ArrayWrite"; + case MemoryTrackerType::ENUMERATION_CREATE: + return "EnumerationCreate"; + case MemoryTrackerType::FRAGMENT_INFO_LOAD: + return "FragmentInfoLoad"; + case MemoryTrackerType::QUERY_READ: + return "QueryRead"; + case MemoryTrackerType::QUERY_WRITE: + return "QueryWrite"; + case MemoryTrackerType::CONSOLIDATOR: + return "Consolidator"; + case MemoryTrackerType::REST_CLIENT: + return "RestClient"; + case MemoryTrackerType::EPHEMERAL: + return "Ephemeral"; + case MemoryTrackerType::SCHEMA_EVOLUTION: + return "SchemaEvolution"; + case MemoryTrackerType::GROUP: + return "Group"; + } + + auto val = std::to_string(static_cast(type)); + throw std::logic_error("Invalid memory tracker type: " + val); +} + +uint64_t MemoryTrackerResource::get_count() { + return type_counter_.fetch_add(0, std::memory_order_relaxed); +} + +void* MemoryTrackerResource::do_allocate(size_t bytes, size_t alignment) { + total_counter_.fetch_add(bytes, std::memory_order_relaxed); + type_counter_.fetch_add(bytes, std::memory_order_relaxed); + return upstream_->allocate(bytes, alignment); +} + +void MemoryTrackerResource::do_deallocate( + void* ptr, size_t bytes, size_t alignment) { + upstream_->deallocate(ptr, bytes, alignment); + type_counter_.fetch_sub(bytes, std::memory_order_relaxed); + total_counter_.fetch_sub(bytes, std::memory_order_relaxed); +} + +bool MemoryTrackerResource::do_is_equal( + const tdb::pmr::memory_resource& other) const noexcept { + return this == &other; +} + +MemoryTracker::~MemoryTracker() { + assert( + total_counter_.fetch_add(0) == 0 && + "MemoryTracker destructed with outstanding allocations."); +} + +tdb::pmr::memory_resource* MemoryTracker::get_resource(MemoryType type) { + std::lock_guard lg(mutex_); + + // If we've already created an instance for this type, return it. + auto iter = resources_.find(type); + if (iter != resources_.end()) { + return iter->second.get(); + } + + // Add a new counter if it doesn't exist. + if (counters_.find(type) == counters_.end()) { + counters_.emplace(type, 0); + } else { + // There's no outstanding memory resource for this type, so it must be zero. + assert(counters_[type] == 0 && "Invalid memory tracking state."); + } + + // Create and track a shared_ptr to the new memory resource. + auto ret = make_shared( + HERE(), upstream_, total_counter_, counters_[type]); + resources_.emplace(type, ret); + + // Return the raw memory resource pointer for use by pmr containers. + return ret.get(); +} + +std::tuple> +MemoryTracker::get_counts() { + std::lock_guard lg(mutex_); + + auto total = total_counter_.fetch_add(0, std::memory_order_relaxed); + std::unordered_map by_type; + std::vector to_del; + for (auto& [mem_type, resource] : resources_) { + by_type[mem_type] = resource->get_count(); + } + + return {total, by_type}; +} + +uint64_t MemoryTracker::generate_id() { + static std::atomic curr_id{0}; + return curr_id.fetch_add(1); +} + +shared_ptr MemoryTrackerManager::create_tracker() { + /* + * The MemoryTracker class has a protected constructor to hopefully help + * self-document that instances should almost never be created directly + * except in test code. There exists a + * `tiledb::test::create_test_memory_tracker()` API that can be used in + * tests to create untracked instances of MemoryTracker. + * + * This just uses the standard private derived class to enable the use of + * `make_shared` to create instances in specific bits of code. + */ + class MemoryTrackerCreator : public MemoryTracker { + public: + /** + * Pass through to the protected MemoryTracker constructor for + * make_shared. + */ + MemoryTrackerCreator() + : MemoryTracker() { + } + }; + + std::lock_guard lg(mutex_); + + // Delete any expired weak_ptr instances + size_t idx = 0; + while (idx < trackers_.size()) { + if (trackers_[idx].expired()) { + trackers_.erase(trackers_.begin() + idx); + } else { + idx++; + } + } + + // Create a new tracker + auto ret = make_shared(HERE()); + trackers_.emplace(trackers_.begin(), ret); + + return ret; +} + +std::string MemoryTrackerManager::to_json() { + std::lock_guard lg(mutex_); + nlohmann::json rv; + + /* + * The reason for this being a while-loop instead of a for-loop is that we're + * modifying the trackers_ vector while iterating over it. The reference docs + * for std::vector::erase make it sound like a standard for-loop with + * iterators would work here, but the subtle key point in the docs is that + * the end() iterator used during iteration is invalidated after a call to + * erase. The end result of which is that it will lead to a subtle bug when + * every weak_ptr in the vector is expired (such as at shutdown) which leads + * to deleting random bits of memory. Thankfully The address sanitizer + * managed to point out the issue rather quickly. + */ + size_t idx = 0; + while (idx < trackers_.size()) { + auto ptr = trackers_[idx].lock(); + // If the weak_ptr is expired, we just remove it from trackers_ and + // carry on. + if (!ptr) { + trackers_.erase(trackers_.begin() + idx); + continue; + } + + nlohmann::json val; + + // Set an distinguishing id + val["tracker_id"] = std::to_string(ptr->get_id()); + + // Mark the stats with the tracker type. + val["tracker_type"] = memory_tracker_type_to_str(ptr->get_type()); + + // Add memory stats + auto [total, by_type] = ptr->get_counts(); + val["total_memory"] = total; + val["by_type"] = nlohmann::json::object(); + for (auto& [type, count] : by_type) { + val["by_type"][memory_type_to_str(type)] = count; + } + rv.push_back(val); + + idx++; + } + + return rv.dump(); +} + +MemoryTrackerReporter::~MemoryTrackerReporter() { + if (!filename_.has_value()) { + return; + } + + // Scoped lock_guard so we don't hold the lock while waiting for threads + // to join. + { + std::lock_guard lg(mutex_); + stop_ = true; + cv_.notify_all(); + } + + // Wait for the background thread to quit so that we don't cause a segfault + // when we destruct our synchronization primitives. + try { + thread_.join(); + } catch (std::exception& exc) { + LOG_ERROR( + "Error stopping MemoryTrackerReporter thread: " + + std::string(exc.what())); + } +} + +void MemoryTrackerReporter::start() { + if (!filename_.has_value()) { + LOG_INFO("No filename set, not starting the MemoryTrackerReporter."); + return; + } + + { + // Scoped so we release this before the thread starts. Probably unnecessary + // but better safe than sorry. + std::lock_guard lg(mutex_); + if (stop_) { + throw std::runtime_error("MemoryTrackerReporters cannot be restarted."); + } + } + + // Thread start logic mirrored from the ThreadPool. + for (size_t i = 0; i < 3; i++) { + try { + thread_ = std::thread(&MemoryTrackerReporter::run, this); + return; + } catch (const std::system_error& e) { + if (e.code() == std::errc::resource_unavailable_try_again) { + continue; + } + + throw MemoryTrackerException( + "Error starting the MemoryTrackerReporter: " + std::string(e.what())); + } + } + + throw MemoryTrackerException( + "No threads avaiable to start the MemoryTrackerReporter."); +} + +void MemoryTrackerReporter::run() { + std::stringstream ss; + std::ofstream out; + + while (true) { + std::unique_lock lk(mutex_); + cv_.wait_for(lk, std::chrono::milliseconds(1000), [&] { return stop_; }); + + if (stop_) { + return; + } + + // Open the log file, possibly re-opening after encountering an error. Log + // any errors and continue trying in case whatever issue resolves itself. + if (!out.is_open()) { + // Clear any error state. + out.clear(); + out.open(filename_.value(), std::ios::app); + } + + // If we failed to open the file, log a message and try again on the next + // iteration of this loop. This logic is in a background thread so the + // only real other options would be to crash the entire program or exit + // the thread. Retrying to see if its an ephemeral error seems better and + // also informs users that something is wrong with their config while not + // causing excessive chaos. + if (!out) { + LOG_ERROR( + "Error opening MemoryTrackerReporter file: " + filename_.value()); + continue; + } + + // Generate a JSON report from our MemoryTrackerManager. + auto json = manager_->to_json(); + if (json == "null") { + // This happens if the manager doesn't have any trackers registered. + // Rather than log noise we just ignore it. + continue; + } + + // Append our report to the log. + ss.str(""); + ss.clear(); + ss << json << std::endl; + out << ss.str(); + + // If writing to the file fails, we make a note, close it and then attempt + // to re-open it on the next iteration. See the note above on open errors + // for more context. + if (!out) { + LOG_ERROR( + "Error writing to MemoryTrackeReporter file: " + filename_.value()); + out.close(); + continue; + } + } +} + +} // namespace tiledb::sm diff --git a/tiledb/common/memory_tracker.h b/tiledb/common/memory_tracker.h index 02d374474b9d..4bda450af2c1 100644 --- a/tiledb/common/memory_tracker.h +++ b/tiledb/common/memory_tracker.h @@ -5,7 +5,7 @@ * * The MIT License * - * @copyright Copyright (c) 2017-2021 TileDB, Inc. + * @copyright Copyright (c) 2017-2024 TileDB, Inc. * @copyright Copyright (c) 2016 MIT and Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a copy @@ -28,39 +28,215 @@ * * @section DESCRIPTION * - * This file defines class MemoryTracker. + * This file contains the definitions for classes related to tracking memory + * using the polymorphic memory resources feature introduced in C++17. + * + * There are four main classes to be aware of: + * + * - MemoryTrackerResource + * - MemoryTracker + * - MemoryTrackerManager + * - MemoryTrackerReporter + * + * MemoryTrackerResource + * ===================== + * + * The MemoryTrackerResource class is responsible for actually tracking + * individual allocations. Each MemoryTrackerResource represents a single type + * of memory as enumerated in the MemoryType enum. To create instances of this + * class, users should use the MemoryTrackerManager::get_resource API. + * + * MemoryTracker + * ============= + * + * The MemoryTracker class is responsible for managing instances of + * MemoryTrackerResource. A MemoryTracker represents some section or behavior + * inside the TileDB library as enumerated in the MemoryTrackerType enum. + * Instances of MemoryTracker should be created using the + * MemoryTrackerManager::create_tracker() API or via the helper method + * ContextResources::create_memory_tracker(). Generally speaking, there should + * be very few of these instances outside of test code and instead existing + * instances should be referenced. + * + * For instance, there is currently an existing MemoryTracker member variable + * on both Array and Query. Most code in the library should be using one of + * these two trackers. There are a few specialized instances like in the + * Consolidator or for things like deserializing GenericTileIO tiles. + * + * MemoryTrackerManager + * ==================== + * + * The MemoryTrackerManager is a member variable on the ContextResources + * class. Users should not need to interact with this class directly as its + * just a container that holds references to all the MemoryTracker instances + * for a given context. Its used by the MemoryTrackerReport when logging + * memory usage. + * + * MemoryTrackerReporter + * ===================== + * + * The MemoryTrackerReporter class is a member variable on the ContextResources + * class. Users should not need to interact with this class directly as its + * just used to log memory statistics to a special log file when configured. + * + * Users wishing to run memory usage experiments should use the + * 'sm.memory.tracker.reporter.filename' configuration key to set a filename + * that will contain the logged memory statistics in JSONL format (i.e., JSON + * objects and arrays encoded one per line). At runtime the reporter appends + * a JSON blob once a second to this logfile that can then be analyzed using + * whatever scripts or software as appropriate. */ #ifndef TILEDB_MEMORY_TRACKER_H #define TILEDB_MEMORY_TRACKER_H +#include +#include + +#include "tiledb/common/pmr.h" #include "tiledb/common/status.h" +#include "tiledb/sm/config/config.h" -namespace tiledb { -namespace sm { +namespace tiledb::sm { -class MemoryTracker { +//** The type of memory to track. */ +enum class MemoryType { + ENUMERATION, + ENUMERATION_PATHS, + FOOTER, + FILTERED_DATA, + FILTERED_DATA_BLOCK, + GENERIC_TILE_IO, + RTREE, + TILE_BITMAP, + TILE_DATA, + TILE_HILBERT_VALUES, + TILE_OFFSETS, + TILE_MAX_VALS, + TILE_MIN_VALS, + TILE_NULL_COUNTS, + TILE_QUERY_CONDITIONS, + ATTRIBUTES, + DIMENSION_LABELS, + DIMENSIONS, + METADATA, + DOMAINS, + TILE_SUMS, + TILE_WRITER_DATA +}; + +/** + * Return a string representation of type + * + * @param type The MemoryType to convert. + * @return A string representation. + */ +std::string memory_type_to_str(MemoryType type); + +/** The type of MemoryTracker. */ +enum class MemoryTrackerType { + ANONYMOUS, + ARRAY_CREATE, + ARRAY_LOAD, + ARRAY_READ, + ARRAY_WRITE, + ENUMERATION_CREATE, + FRAGMENT_INFO_LOAD, + QUERY_READ, + QUERY_WRITE, + CONSOLIDATOR, + REST_CLIENT, + GROUP, + EPHEMERAL, + SCHEMA_EVOLUTION +}; + +/** + * Return a string representation of type + * + * @param type The MemoryTrackerType to convert. + * @return A string representation. + */ +std::string memory_tracker_type_to_str(MemoryTrackerType type); + +class MemoryTrackerResource : public tdb::pmr::memory_resource { public: - enum class MemoryType { - RTREE, - FOOTER, - TILE_OFFSETS, - MIN_MAX_SUM_NULL_COUNT, - ENUMERATION - }; + // Disable all default generated constructors. + MemoryTrackerResource() = delete; + DISABLE_COPY_AND_COPY_ASSIGN(MemoryTrackerResource); + DISABLE_MOVE_AND_MOVE_ASSIGN(MemoryTrackerResource); /** Constructor. */ - MemoryTracker() { - memory_usage_ = 0; - memory_budget_ = std::numeric_limits::max(); - }; + explicit MemoryTrackerResource( + tdb::pmr::memory_resource* upstream, + std::atomic& total_counter, + std::atomic& type_counter) + : upstream_(upstream) + , total_counter_(total_counter) + , type_counter_(type_counter) { + } + + /** The number of bytes tracked by this resource. */ + uint64_t get_count(); + + protected: + /** Perform an allocation, returning a pointer to the allocated memory. */ + void* do_allocate(size_t bytes, size_t alignment) override; + + /** Deallocate a previously allocated chunk of memory. */ + void do_deallocate(void* p, size_t bytes, size_t alignment) override; + + /** Check if two memory trackers are equal. */ + bool do_is_equal( + const tdb::pmr::memory_resource& other) const noexcept override; + + private: + /** The upstream memory resource to use for the actual allocation. */ + tdb::pmr::memory_resource* upstream_; + /** A reference to a total counter for the MemoryTracker. */ + std::atomic& total_counter_; + + /** A reference to the memory type counter this resource is tracking. */ + std::atomic& type_counter_; +}; + +class MemoryTracker { + public: /** Destructor. */ - ~MemoryTracker() = default; + ~MemoryTracker(); DISABLE_COPY_AND_COPY_ASSIGN(MemoryTracker); DISABLE_MOVE_AND_MOVE_ASSIGN(MemoryTracker); + /** Get the id of this MemoryTracker instance. */ + inline uint64_t get_id() { + return id_; + } + + /** Get the type of this memory tracker. */ + inline MemoryTrackerType get_type() { + std::lock_guard lg(mutex_); + return type_; + } + + /** Set the type of this memory tracker. */ + void set_type(MemoryTrackerType type) { + std::lock_guard lg(mutex_); + type_ = type; + } + + /** + * Create a memory resource instance. + * + * @param type The type of memory that is being tracked. + * @return A memory resource derived from std::pmr::memory_resource. + */ + tdb::pmr::memory_resource* get_resource(MemoryType); + + /** Return the total and counts of this tracker. */ + std::tuple> get_counts(); + /** * Take memory from the budget. * @@ -139,8 +315,31 @@ class MemoryTracker { return memory_budget_; } + protected: + /** + * Constructor. + * + * This constructor is protected on purpose to discourage creating instances + * of this class that aren't connected to a ContextResources. When writing + * library code, you should almost always be using an existing instance of + * a MemoryTracker from the places those exist, i.e., on an Array, Query, + * or in the Consolidator. Occasionally, we'll need to create new instances + * for specific reasons. In those cases you need to have a reference to the + * ContextResources to call ContextResource::create_memory_tracker(). + * + * For tests that need to have a temporary MemoryTracker instance, there is + * a `create_test_memory_tracker()` API available in the test support library. + */ + MemoryTracker() + : memory_usage_(0) + , memory_budget_(std::numeric_limits::max()) + , id_(generate_id()) + , type_(MemoryTrackerType::ANONYMOUS) + , upstream_(tdb::pmr::get_default_resource()) + , total_counter_(0){}; + private: - /** Protects all member variables. */ + /** Protects all non-atomic member variables. */ std::mutex mutex_; /** Memory usage for tracked structures. */ @@ -151,9 +350,110 @@ class MemoryTracker { /** Memory usage by type. */ std::unordered_map memory_usage_by_type_; + + /** The id of this MemoryTracker. */ + uint64_t id_; + + /** The type of this MemoryTracker. */ + MemoryTrackerType type_; + + /** The upstream memory resource. */ + tdb::pmr::memory_resource* upstream_; + + /** MemoryTrackerResource by MemoryType. */ + std::unordered_map> + resources_; + + /** Memory counters by MemoryType. */ + std::unordered_map> counters_; + + /** The total memory usage of this MemoryTracker. */ + std::atomic total_counter_; + + /** Generate a unique id for this MemoryTracker. */ + static uint64_t generate_id(); +}; + +class MemoryTrackerManager { + public: + /** Constructor. */ + MemoryTrackerManager() = default; + + DISABLE_COPY_AND_COPY_ASSIGN(MemoryTrackerManager); + DISABLE_MOVE_AND_MOVE_ASSIGN(MemoryTrackerManager); + + /** + * Create a new memory tracker. + * + * @return The created MemoryTracker. + */ + shared_ptr create_tracker(); + + /** + * Generate a JSON string representing the current state of tracked memory. + * + * @return A string containing the JSON representation of tracked memory. + */ + std::string to_json(); + + private: + /** A mutext to protect our list of trackers. */ + std::mutex mutex_; + + /** A weak_ptr to the instances of MemoryTracker we create. */ + std::vector> trackers_; +}; + +class MemoryTrackerReporter { + public: + /** + * Constructor. + * + * @param cfg The Config instance for the parent context. + * @param manager The MemoryTrackerManager instance on the context resources. + */ + MemoryTrackerReporter( + const Config& cfg, shared_ptr manager) + : manager_(manager) + , filename_(cfg.get("sm.memory.tracker.reporter.filename")) + , stop_(false) { + } + + /** Destructor. */ + ~MemoryTrackerReporter(); + + DISABLE_COPY_AND_COPY_ASSIGN(MemoryTrackerReporter); + DISABLE_MOVE_AND_MOVE_ASSIGN(MemoryTrackerReporter); + + /** Start the background reporter thread if configured. */ + void start(); + + /** Stop the background reporter thread if started. */ + void stop(); + + /** The background reporter thread's main loop. */ + void run(); + + private: + /** The MemoryTrackerManager instance on the parent ContextResources. */ + shared_ptr manager_; + + /** An filename set in the config. */ + std::optional filename_; + + /** The background reporter thread. */ + std::thread thread_; + + /** A mutex for communication with the background thread. */ + std::mutex mutex_; + + /** A condition variable for signaling the background thread. */ + std::condition_variable cv_; + + /** A stop flag to signal shutdown to the background thread. */ + bool stop_; }; -} // namespace sm -} // namespace tiledb +} // namespace tiledb::sm #endif // TILEDB_OPEN_ARRAY_MEMORY_TRACKER_H diff --git a/tiledb/common/pmr.cc b/tiledb/common/pmr.cc new file mode 100644 index 000000000000..f37db73927a2 --- /dev/null +++ b/tiledb/common/pmr.cc @@ -0,0 +1,45 @@ +/** + * @file pmr.cc + * + * @section LICENSE + * + * The MIT License + * + * @copyright Copyright (c) 2024 TileDB, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * @section DESCRIPTION + * + * This file contains implementation of pmr functions + */ + +#include "pmr.h" + +namespace tiledb::common::pmr { + +memory_resource* get_default_resource() { +#ifdef USE_CPP17_PMR + return cpp17::pmr::get_default_resource(); +#else + return std::pmr::get_default_resource(); +#endif +} + +} // namespace tiledb::common::pmr diff --git a/tiledb/common/pmr.h b/tiledb/common/pmr.h new file mode 100644 index 000000000000..a5d15a6f0bba --- /dev/null +++ b/tiledb/common/pmr.h @@ -0,0 +1,512 @@ +/** + * @file tiledb/common/pmr.h + * + * @section LICENSE + * + * The MIT License + * + * @copyright Copyright (c) 2024 TileDB, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * @section DESCRIPTION + * + * A centralized definition of the polymorphic resource types used by + * TileDB. + */ + +#ifndef TILEDB_COMMON_PMR_H +#define TILEDB_COMMON_PMR_H + +#include +#include +#include +#include + +#ifdef USE_CPP17_PMR +#include "polymorphic_allocator/polymorphic_allocator.h" +#else +#include +#endif + +#include "common.h" + +namespace tiledb::common::pmr { + +#ifdef USE_CPP17_PMR + +using memory_resource = cpp17::pmr::memory_resource; + +template +using polymorphic_allocator = cpp17::pmr::polymorphic_allocator; + +#else + +using memory_resource = std::pmr::memory_resource; + +template +using polymorphic_allocator = std::pmr::polymorphic_allocator; +#endif + +memory_resource* get_default_resource(); + +/* ********************************* */ +/* PMR UNIQUE_PTR DECLARATION */ +/* ********************************* */ + +template +class unique_ptr_deleter { + public: + unique_ptr_deleter() = delete; + + unique_ptr_deleter(memory_resource* resource, size_t size, size_t alignment) + : resource_(resource) + , size_(size) + , alignment_(alignment) { + } + + void operator()(Tp* ptr) { + if (ptr != nullptr) { + resource_->deallocate(ptr, size_, alignment_); + } + } + + void set_size(size_t size) { + size_ = size; + } + + memory_resource* resource_; + size_t size_; + size_t alignment_; +}; + +template +using unique_ptr = std::unique_ptr>; + +template +unique_ptr make_unique( + memory_resource* resource, size_t size, size_t alignment) { + static_assert(std::is_arithmetic_v || std::is_same_v); + + auto alloc_size = size * sizeof(Tp); + Tp* data = static_cast(resource->allocate(alloc_size, alignment)); + + if (data == nullptr) { + throw std::bad_alloc(); + } + + auto deleter = unique_ptr_deleter(resource, alloc_size, alignment); + + return std::unique_ptr>(data, deleter); +} + +template +unique_ptr make_unique(memory_resource* resource, size_t size) { + return make_unique(resource, size, alignof(Tp)); +} + +/* ********************************* */ +/* PMR LIST DECLARATION */ +/* ********************************* */ +template +using pmr_list = std::list>; + +template +class list : public pmr_list { + public: + using value_type = typename pmr_list::value_type; + using allocator_type = typename pmr_list::allocator_type; + using size_type = typename pmr_list::size_type; + using difference_type = typename pmr_list::difference_type; + using reference = typename pmr_list::reference; + using const_reference = typename pmr_list::const_reference; + using pointer = typename pmr_list::pointer; + using const_pointer = typename pmr_list::const_pointer; + using iterator = typename pmr_list::iterator; + using const_iterator = typename pmr_list::const_iterator; + using reverse_iterator = typename pmr_list::reverse_iterator; + using const_reverse_iterator = typename pmr_list::const_reverse_iterator; + + // Delete all default constructors because they don't require an allocator + list() = delete; + list(const list& other) = delete; + list(list&& other) = delete; + + // Delete non-allocator aware copy and move assign. + list& operator=(const list& other) = delete; + list& operator=(list&& other) noexcept = delete; + + explicit list(const allocator_type& alloc) noexcept + : pmr_list(alloc) { + } + + explicit list(size_type count, const Tp& value, const allocator_type& alloc) + : pmr_list(count, value, alloc) { + } + + explicit list(size_type count, const allocator_type& alloc) + : pmr_list(count, alloc) { + } + + template + list(InputIt first, InputIt last, const allocator_type& alloc) + : pmr_list(first, last, alloc) { + } + + list(const list& other, const allocator_type& alloc) + : pmr_list(other, alloc) { + } + + list(list&& other, const allocator_type& alloc) + : pmr_list(other, alloc) { + } + + list(std::initializer_list init, const allocator_type& alloc) + : pmr_list(init, alloc) { + } +}; + +/* ********************************* */ +/* PMR VECTOR DECLARATION */ +/* ********************************* */ + +template +using pmr_vector = std::vector>; + +template +class vector : public pmr_vector { + public: + // This class exists to ensure that all uses of it are provided with a + // valid std::pmr based allocator. This is so that as we switch from + // std::vector to using this class we don't forget to provide the allocator + // which is quite easy to do. + // + // If these constructors look confusing, just know that all we're doing is + // copying the current definitions from cppreference and then adjusting types + // to require the PMR based allocator. + + // I have absolutely no idea if all of these aliases are required. The + // allocator_type is the important one. I've copied the others just in + // case since I do know that PMR aware containers at least require + // allocator_type. + using value_type = typename pmr_vector::value_type; + using allocator_type = typename pmr_vector::allocator_type; + using size_type = typename pmr_vector::size_type; + using difference_type = typename pmr_vector::difference_type; + using reference = typename pmr_vector::reference; + using const_reference = typename pmr_vector::const_reference; + using pointer = typename pmr_vector::pointer; + using const_pointer = typename pmr_vector::const_pointer; + using iterator = typename pmr_vector::iterator; + using const_iterator = typename pmr_vector::const_iterator; + using reverse_iterator = typename pmr_vector::reverse_iterator; + using const_reverse_iterator = + typename pmr_vector::const_reverse_iterator; + + // Delete all default constructors because they don't require an allocator + constexpr vector() noexcept(noexcept(allocator_type())) = delete; + constexpr vector(const vector& other) = delete; + constexpr vector(vector&& other) noexcept = delete; + + // Delete non-allocator aware copy and move assign. + constexpr vector& operator=(const vector& other) = delete; + constexpr vector& operator=(vector&& other) noexcept = delete; + + constexpr explicit vector(const allocator_type& alloc) noexcept + : pmr_vector(alloc) { + } + + constexpr vector( + size_type count, const Tp& value, const allocator_type& alloc) + : pmr_vector(count, value, alloc) { + } + + constexpr explicit vector(size_type count, const allocator_type& alloc) + : pmr_vector(count, alloc) { + } + + template + constexpr vector(InputIt first, InputIt last, const allocator_type& alloc) + : pmr_vector(first, last, alloc) { + } + + constexpr vector(const vector& other, const allocator_type& alloc) + : pmr_vector(other, alloc) { + } + + constexpr vector(vector&& other, const allocator_type& alloc) + : pmr_vector(other, alloc) { + } + + constexpr vector(std::initializer_list init, const allocator_type& alloc) + : pmr_vector(init, alloc) { + } +}; + +/* ********************************* */ +/* PMR UNORDERED MAP DECLARATION */ +/* ********************************* */ + +template < + class Key, + class T, + class Hash = std::hash, + class KeyEqual = std::equal_to> +using pmr_unordered_map = std::unordered_map< + Key, + T, + Hash, + KeyEqual, + polymorphic_allocator>>; + +template < + class Key, + class T, + class Hash = std::hash, + class KeyEqual = std::equal_to> +class unordered_map : public pmr_unordered_map { + public: + // Type declarations. + using key_type = typename pmr_unordered_map::key_type; + using mapped_type = + typename pmr_unordered_map::mapped_type; + using value_type = + typename pmr_unordered_map::value_type; + using size_type = + typename pmr_unordered_map::size_type; + using difference_type = + typename pmr_unordered_map::difference_type; + using hasher = typename pmr_unordered_map::hasher; + using key_equal = + typename pmr_unordered_map::key_equal; + using allocator_type = + typename pmr_unordered_map::allocator_type; + using reference = + typename pmr_unordered_map::reference; + using const_reference = + typename pmr_unordered_map::const_reference; + using pointer = typename pmr_unordered_map::pointer; + using const_pointer = + typename pmr_unordered_map::const_pointer; + using iterator = typename pmr_unordered_map::iterator; + using const_iterator = + typename pmr_unordered_map::const_iterator; + using local_iterator = + typename pmr_unordered_map::local_iterator; + using node_type = + typename pmr_unordered_map::node_type; + using insert_return_type = + typename pmr_unordered_map::insert_return_type; + + // Delete all default constructors because they don't require an allocator + constexpr unordered_map() = delete; + constexpr unordered_map(const unordered_map& other) = delete; + constexpr unordered_map(unordered_map&& other) = delete; + + // Delete non-allocator aware copy and move assign. + constexpr unordered_map& operator=(const unordered_map& other) = delete; + constexpr unordered_map& operator=(unordered_map&& other) noexcept = delete; + + constexpr explicit unordered_map( + size_type bucket_count, + const Hash& hash, + const key_equal& equal, + const allocator_type& alloc) + : pmr_unordered_map( + bucket_count, hash, equal, alloc) { + } + + constexpr unordered_map(size_type bucket_count, const allocator_type& alloc) + : pmr_unordered_map( + bucket_count, Hash(), KeyEqual(), alloc) { + } + + constexpr unordered_map( + size_type bucket_count, const Hash& hash, const allocator_type& alloc) + : pmr_unordered_map( + bucket_count, hash, KeyEqual(), alloc) { + } + + constexpr explicit unordered_map(const allocator_type& alloc) + : pmr_unordered_map(alloc) { + } + + template + constexpr unordered_map( + InputIt first, + InputIt last, + size_type bucket_count, + const Hash& hash, + const key_equal& equal, + const allocator_type& alloc) + : pmr_unordered_map( + first, last, bucket_count, hash, equal, alloc) { + } + + template + constexpr unordered_map( + InputIt first, + InputIt last, + size_type bucket_count, + const allocator_type& alloc) + : pmr_unordered_map( + first, last, bucket_count, Hash(), KeyEqual(), alloc) { + } + + template + constexpr unordered_map( + InputIt first, + InputIt last, + size_type bucket_count, + const Hash& hash, + const allocator_type& alloc) + : pmr_unordered_map( + first, last, bucket_count, hash, KeyEqual(), alloc) { + } + + constexpr unordered_map( + const unordered_map& other, const allocator_type& alloc) + : pmr_unordered_map(other, alloc) { + } + + constexpr unordered_map(unordered_map&& other, const allocator_type& alloc) + : pmr_unordered_map(other, alloc) { + } + + constexpr unordered_map( + std::initializer_list init, + size_type bucket_count, + const Hash& hash, + const key_equal& equal, + const allocator_type& alloc) + : pmr_unordered_map( + init, bucket_count, hash, equal, alloc) { + } + + constexpr unordered_map( + std::initializer_list init, + size_type bucket_count, + const allocator_type& alloc) + : pmr_unordered_map( + init, bucket_count, Hash(), KeyEqual(), alloc) { + } + + constexpr unordered_map( + std::initializer_list init, + size_type bucket_count, + const Hash& hash, + const allocator_type& alloc) + : pmr_unordered_map( + init, bucket_count, hash, KeyEqual(), alloc) { + } +}; + +/* ********************************* */ +/* PMR MAP DECLARATION */ +/* ********************************* */ +template > +using pmr_map = + std::map>>; + +template > +class map : public pmr_map { + public: + // Type declarations. + using key_type = typename pmr_map::key_type; + using mapped_type = typename pmr_map::mapped_type; + using value_type = typename pmr_map::value_type; + using size_type = typename pmr_map::size_type; + using difference_type = typename pmr_map::difference_type; + using key_compare = typename pmr_map::key_compare; + using allocator_type = typename pmr_map::allocator_type; + using reference = typename pmr_map::reference; + using const_reference = typename pmr_map::const_reference; + using pointer = typename pmr_map::pointer; + using const_pointer = typename pmr_map::const_pointer; + using iterator = typename pmr_map::iterator; + using const_iterator = typename pmr_map::const_iterator; + using reverse_iterator = typename pmr_map::reverse_iterator; + using const_reverse_iterator = + typename pmr_map::const_reverse_iterator; + using node_type = typename pmr_map::node_type; + using insert_return_type = + typename pmr_map::insert_return_type; + + // Delete all default constructors because they don't require an allocator + constexpr map() = delete; + constexpr map(const map& other) = delete; + constexpr map(map&& other) = delete; + + // Delete non-allocator aware copy and move assign. + constexpr map& operator=(const map& other) = delete; + constexpr map& operator=(map&& other) noexcept = delete; + + constexpr explicit map(const Compare& comp, const allocator_type& alloc) + : pmr_map(comp, alloc) { + } + + constexpr explicit map(const allocator_type& alloc) + : pmr_map(alloc) { + } + + template + constexpr map( + InputIt first, + InputIt last, + const Compare& comp, + const allocator_type& alloc) + : pmr_map(first, last, comp, alloc) { + } + + template + constexpr map(InputIt first, InputIt last, const allocator_type& alloc) + : pmr_map(first, last, Compare(), alloc) { + } + + constexpr map(const map& other, const allocator_type& alloc) + : pmr_map(other, alloc) { + } + + constexpr map(map&& other, const allocator_type& alloc) + : pmr_map(other, alloc) { + } + + constexpr map( + std::initializer_list init, + const Compare& comp, + const allocator_type& alloc) + : pmr_map(init, comp, alloc) { + } + + constexpr map( + std::initializer_list init, const allocator_type& alloc) + : pmr_map(init, Compare(), alloc) { + } + + // Declare member class value_compare. + class value_compare : public pmr_map::value_compare { + public: + constexpr bool operator()( + const value_type& lhs, const value_type& rhs) const; + }; +}; + +} // namespace tiledb::common::pmr + +#endif // TILEDB_COMMON_PMR_H diff --git a/tiledb/common/polymorphic_allocator/README.md b/tiledb/common/polymorphic_allocator/README.md new file mode 100644 index 000000000000..510c0f207a31 --- /dev/null +++ b/tiledb/common/polymorphic_allocator/README.md @@ -0,0 +1,15 @@ +Polymorphic Allocator Fallback Implementation +=== + +This implementation of polymorphic_allocator was pulled from Pablo Halpern's +C++11 implementation available here: + +https://github.com/phalpern/CppCon2017Code/tree/d26e7f4f6c593fe135c6b454aee93486790726b7 + +I have a personal forked copy here in case that repository ever dissappears: + +https://github.com/davisp/phalpern-CppCon2017Code/tree/d26e7f4f6c593fe135c6b454aee93486790726b7 + +The only changes from the original files in that repository are to reformat +using TileDB coding style and adding a handful of `std::` namespace qualifiers +to avoid symbol name clashes with internal TileDB symbols. diff --git a/tiledb/common/polymorphic_allocator/polymorphic_allocator.cc b/tiledb/common/polymorphic_allocator/polymorphic_allocator.cc new file mode 100644 index 000000000000..fe031cc482f1 --- /dev/null +++ b/tiledb/common/polymorphic_allocator/polymorphic_allocator.cc @@ -0,0 +1,24 @@ +/* polymorphic_allocator.cpp -*-C++-*- + * + * Copyright 2012 Pablo Halpern. + * Distributed under the Boost Software License, Version 1.0. + * (See accompanying file LICENSE_1_0.txt or copy at + * http://www.boost.org/LICENSE_1_0.txt) + */ + +#include "polymorphic_allocator.h" + +namespace cpp17 { + +atomic pmr::memory_resource::s_default_resource(nullptr); + +pmr::new_delete_resource* pmr::new_delete_resource_singleton() { + // TBD: I think the standard makes this exception-safe, otherwise, we need + // to use 'call_once()' in ''. + static new_delete_resource singleton; + return &singleton; +} + +} // namespace cpp17 + +// end polymorphic_allocator.cpp diff --git a/tiledb/common/polymorphic_allocator/polymorphic_allocator.h b/tiledb/common/polymorphic_allocator/polymorphic_allocator.h new file mode 100644 index 000000000000..654effacb1a2 --- /dev/null +++ b/tiledb/common/polymorphic_allocator/polymorphic_allocator.h @@ -0,0 +1,494 @@ +/* polymorphic_allocator.h -*-C++-*- + * + * Copyright 2017 Pablo Halpern. + * Distributed under the Boost Software License, Version 1.0. + * (See accompanying file LICENSE_1_0.txt or copy at + * http://www.boost.org/LICENSE_1_0.txt) + */ + +#ifndef INCLUDED_POLYMORPHIC_ALLOCATOR_DOT_H +#define INCLUDED_POLYMORPHIC_ALLOCATOR_DOT_H + +#include +#include // For max_align_t +#include +#include +#include + +namespace cpp17 { + +using namespace std; + +// The `byte` type is defined exactly this way in C++17's `` (section +// [cstddef.syn]). It is defined here to allow use of +// `polymorphic_allocator` as a vocabulary type. +enum class byte : unsigned char {}; + +namespace pmr { + +// Abstract base class for allocator resources. +// Conforms to the C++17 standard, section [mem.res.class]. +class memory_resource { + static constexpr size_t max_align = alignof(max_align_t); + + static atomic s_default_resource; + + friend memory_resource* set_default_resource(memory_resource*); + friend memory_resource* get_default_resource(); + + public: + virtual ~memory_resource(); + + void* allocate(size_t bytes, size_t alignment = max_align) { + return do_allocate(bytes, alignment); + } + void deallocate(void* p, size_t bytes, size_t alignment = max_align) { + return do_deallocate(p, bytes, alignment); + } + + // `is_equal` is needed because polymorphic allocators are sometimes + // produced as a result of type erasure. In that case, two different + // instances of a polymorphic_memory_resource may actually represent + // the same underlying allocator and should compare equal, even though + // their addresses are different. + bool is_equal(const memory_resource& other) const noexcept { + return do_is_equal(other); + } + + protected: + virtual void* do_allocate(size_t bytes, size_t alignment) = 0; + virtual void do_deallocate(void* p, size_t bytes, size_t alignment) = 0; + virtual bool do_is_equal(const memory_resource& other) const noexcept = 0; +}; + +inline bool operator==(const memory_resource& a, const memory_resource& b) { + // Call `is_equal` rather than using address comparisons because some + // polymorphic allocators are produced as a result of type erasure. In + // that case, `a` and `b` may contain `memory_resource`s with different + // addresses which, nevertheless, should compare equal. + return &a == &b || a.is_equal(b); +} + +inline bool operator!=(const memory_resource& a, const memory_resource& b) { + return !(a == b); +} + +namespace __details { + +// STL allocator that holds a pointer to a polymorphic allocator resource. +// Used to implement `polymorphic_allocator`, which is a scoped allocator. +template +class polymorphic_allocator_imp { + memory_resource* m_resource; + + public: + using value_type = Tp; + + // These types are old-fashioned, pre-C++11 requirements, still needed by + // g++'s `basic_string` implementation. + using size_type = size_t; + using difference_type = ptrdiff_t; + using reference = Tp&; + using const_reference = Tp const&; + using pointer = Tp*; + using const_pointer = Tp const*; + + polymorphic_allocator_imp(); + polymorphic_allocator_imp(memory_resource* r); + + template + polymorphic_allocator_imp(const polymorphic_allocator_imp& other); + + Tp* allocate(size_t n); + void deallocate(Tp* p, size_t n); + + // Return a default-constructed allocator + polymorphic_allocator_imp select_on_container_copy_construction() const; + + memory_resource* resource() const; +}; + +template +bool operator==( + const polymorphic_allocator_imp& a, + const polymorphic_allocator_imp& b); + +template +bool operator!=( + const polymorphic_allocator_imp& a, + const polymorphic_allocator_imp& b); + +template +struct aligned_chunk; + +template <> +struct aligned_chunk<1> { + char x; +}; +template <> +struct aligned_chunk<2> { + short x; +}; +template <> +struct aligned_chunk<4> { + int x; +}; +template <> +struct aligned_chunk<8> { + long long x; +}; +template <> +struct aligned_chunk<16> { + __attribute__((aligned(16))) char x; +}; +template <> +struct aligned_chunk<32> { + __attribute__((aligned(32))) char x; +}; +template <> +struct aligned_chunk<64> { + __attribute__((aligned(64))) char x; +}; + +// Adaptor to make a polymorphic allocator resource type from an STL allocator +// type. This is really a C++20 feature, but it's useful for implementing +// this component. +template +class resource_adaptor_imp : public memory_resource { + typename allocator_traits::template rebind_alloc + m_alloc; + + template + void* allocate_imp(size_t bytes); + + template + void deallocate_imp(void* p, size_t bytes); + + public: + typedef Allocator allocator_type; + + resource_adaptor_imp() = default; + + resource_adaptor_imp(const resource_adaptor_imp&) = default; + + template + resource_adaptor_imp( + Allocator2&& a2, + typename enable_if::value, int>:: + type = 0); + + protected: + void* do_allocate(size_t bytes, size_t alignment = 0) override; + void do_deallocate(void* p, size_t bytes, size_t alignment = 0) override; + + bool do_is_equal(const memory_resource& other) const noexcept override; + + allocator_type get_allocator() const { + return m_alloc; + } +}; + +} // end namespace __details + +// A resource_adaptor converts a traditional STL allocator to a polymorphic +// memory resource. Somehow, this didn't make it into C++17, but it should +// have, so here it is. +// This alias ensures that `resource_adaptor` and +// `resource_adaptor` are always the same type, whether or not +// `T` and `U` are the same type. +template +using resource_adaptor = __details::resource_adaptor_imp< + typename allocator_traits::template rebind_alloc>; + +// Memory resource that uses new and delete. +class new_delete_resource : public resource_adaptor> {}; + +// Return a pointer to a global instance of `new_delete_resource`. +new_delete_resource* new_delete_resource_singleton(); + +// Get the current default resource +memory_resource* get_default_resource(); + +// Set the default resource +memory_resource* set_default_resource(memory_resource* r); + +template +class polymorphic_allocator : public scoped_allocator_adaptor< + __details::polymorphic_allocator_imp> { + typedef __details::polymorphic_allocator_imp Imp; + typedef scoped_allocator_adaptor Base; + + public: + // g++-4.6.3 does not use allocator_traits in shared_ptr, so we have to + // provide an explicit rebind. + template + struct rebind { + typedef polymorphic_allocator other; + }; + + polymorphic_allocator() = default; + polymorphic_allocator(memory_resource* r) + : Base(Imp(r)) { + } + + template + polymorphic_allocator(const polymorphic_allocator& other) + : Base(Imp((other.resource()))) { + } + + template + polymorphic_allocator(const __details::polymorphic_allocator_imp& other) + : Base(other) { + } + + // Return a default-constructed allocator + polymorphic_allocator select_on_container_copy_construction() const { + return polymorphic_allocator(); + } + + memory_resource* resource() const { + return this->outer_allocator().resource(); + } +}; + +template +inline bool operator==( + const polymorphic_allocator& a, const polymorphic_allocator& b) { + return a.outer_allocator() == b.outer_allocator(); +} + +template +inline bool operator!=( + const polymorphic_allocator& a, const polymorphic_allocator& b) { + return !(a == b); +} + +} // end namespace pmr + +/////////////////////////////////////////////////////////////////////////////// +// INLINE AND TEMPLATE FUNCTION IMPLEMENTATIONS +/////////////////////////////////////////////////////////////////////////////// + +inline pmr::memory_resource::~memory_resource() { +} + +inline pmr::memory_resource* pmr::get_default_resource() { + memory_resource* ret = pmr::memory_resource::s_default_resource.load(); + if (nullptr == ret) + ret = new_delete_resource_singleton(); + return ret; +} + +inline pmr::memory_resource* pmr::set_default_resource( + pmr::memory_resource* r) { + if (nullptr == r) + r = new_delete_resource_singleton(); + + // TBD, should use an atomic swap + pmr::memory_resource* prev = get_default_resource(); + pmr::memory_resource::s_default_resource.store(r); + return prev; +} + +template +template +inline pmr::__details::resource_adaptor_imp::resource_adaptor_imp( + Allocator2&& a2, + typename enable_if::value, int>::type) + : m_alloc(forward(a2)) { +} + +template +template +void* pmr::__details::resource_adaptor_imp::allocate_imp( + size_t bytes) { + typedef __details::aligned_chunk chunk; + size_t chunks = (bytes + Align - 1) / Align; + + typedef typename allocator_traits::template rebind_traits + chunk_traits; + typename chunk_traits::allocator_type rebound(m_alloc); + return chunk_traits::allocate(rebound, chunks); +} + +template +template +void pmr::__details::resource_adaptor_imp::deallocate_imp( + void* p, size_t bytes) { + typedef __details::aligned_chunk chunk; + size_t chunks = (bytes + Align - 1) / Align; + + typedef typename allocator_traits::template rebind_traits + chunk_traits; + typename chunk_traits::allocator_type rebound(m_alloc); + return chunk_traits::deallocate(rebound, static_cast(p), chunks); +} + +template +void* pmr::__details::resource_adaptor_imp::do_allocate( + size_t bytes, size_t alignment) { + static const size_t max_natural_alignment = sizeof(max_align_t); + + if (0 == alignment) { + // Choose natural alignment for `bytes` + alignment = ((bytes ^ (bytes - 1)) >> 1) + 1; + if (alignment > max_natural_alignment) + alignment = max_natural_alignment; + } + + switch (alignment) { + case 1: + return allocate_imp<1>(bytes); + case 2: + return allocate_imp<2>(bytes); + case 4: + return allocate_imp<4>(bytes); + case 8: + return allocate_imp<8>(bytes); + case 16: + return allocate_imp<16>(bytes); + case 32: + return allocate_imp<32>(bytes); + case 64: + return allocate_imp<64>(bytes); + default: { + size_t chunks = (bytes + sizeof(void*) + alignment - 1) / 64; + size_t chunkbytes = chunks * 64; + void* original = allocate_imp<64>(chunkbytes); + + // Make room for original pointer storage + char* p = static_cast(original) + sizeof(void*); + + // Round up to nearest alignment boundary + p += alignment - 1; + p -= (size_t(p)) & (alignment - 1); + + // Store original pointer in word before allocated pointer + reinterpret_cast(p)[-1] = original; + + return p; + } + } +} + +template +void pmr::__details::resource_adaptor_imp::do_deallocate( + void* p, size_t bytes, size_t alignment) { + static const size_t max_natural_alignment = sizeof(max_align_t); + + if (0 == alignment) { + // Choose natural alignment for `bytes` + alignment = ((bytes ^ (bytes - 1)) >> 1) + 1; + if (alignment > max_natural_alignment) + alignment = max_natural_alignment; + } + + switch (alignment) { + case 1: + deallocate_imp<1>(p, bytes); + break; + case 2: + deallocate_imp<2>(p, bytes); + break; + case 4: + deallocate_imp<4>(p, bytes); + break; + case 8: + deallocate_imp<8>(p, bytes); + break; + case 16: + deallocate_imp<16>(p, bytes); + break; + case 32: + deallocate_imp<32>(p, bytes); + break; + case 64: + deallocate_imp<64>(p, bytes); + break; + default: { + size_t chunks = (bytes + sizeof(void*) + alignment - 1) / 64; + size_t chunkbytes = chunks * 64; + void* original = reinterpret_cast(p)[-1]; + + deallocate_imp<64>(original, chunkbytes); + } + } +} + +template +bool pmr::__details::resource_adaptor_imp::do_is_equal( + const memory_resource& other) const noexcept { + const resource_adaptor_imp* other_p = + dynamic_cast(&other); + + if (other_p) + return this->m_alloc == other_p->m_alloc; + else + return false; +} + +namespace __pmrd = pmr::__details; + +template +inline __pmrd::polymorphic_allocator_imp::polymorphic_allocator_imp() + : m_resource(get_default_resource()) { +} + +template +inline __pmrd::polymorphic_allocator_imp::polymorphic_allocator_imp( + pmr::memory_resource* r) + : m_resource(r ? r : get_default_resource()) { +} + +template +template +inline __pmrd::polymorphic_allocator_imp::polymorphic_allocator_imp( + const __pmrd::polymorphic_allocator_imp& other) + : m_resource(other.resource()) { +} + +template +inline Tp* __pmrd::polymorphic_allocator_imp::allocate(size_t n) { + return static_cast(m_resource->allocate(n * sizeof(Tp), alignof(Tp))); +} + +template +inline void __pmrd::polymorphic_allocator_imp::deallocate(Tp* p, size_t n) { + m_resource->deallocate(p, n * sizeof(Tp), alignof(Tp)); +} + +template +inline __pmrd::polymorphic_allocator_imp __pmrd::polymorphic_allocator_imp< + Tp>::select_on_container_copy_construction() const { + return __pmrd::polymorphic_allocator_imp(); +} + +template +inline pmr::memory_resource* __pmrd::polymorphic_allocator_imp::resource() + const { + return m_resource; +} + +template +inline bool __pmrd::operator==( + const __pmrd::polymorphic_allocator_imp& a, + const __pmrd::polymorphic_allocator_imp& b) { + // `operator==` for `memory_resource` first checks for equality of + // addresses and calls `is_equal` only if the addresses differ. The call + // `is_equal` because some polymorphic allocators are produced as a result + // of type erasure. In that case, `a` and `b` may contain + // `memory_resource`s with different addresses which, nevertheless, + // should compare equal. + return *a.resource() == *b.resource(); +} + +template +inline bool __pmrd::operator!=( + const __pmrd::polymorphic_allocator_imp& a, + const __pmrd::polymorphic_allocator_imp& b) { + return *a.resource() != *b.resource(); +} + +} // namespace cpp17 + +#endif // ! defined(INCLUDED_POLYMORPHIC_ALLOCATOR_DOT_H) diff --git a/tiledb/common/random/prng.cc b/tiledb/common/random/prng.cc index 842439b49b4b..e672e5e0b8aa 100644 --- a/tiledb/common/random/prng.cc +++ b/tiledb/common/random/prng.cc @@ -33,13 +33,75 @@ #include "tiledb/common/random/prng.h" namespace tiledb::common { +/** + * 64-bit mersenne twister engine for random number generation. + * + * This definition is duplicated to avoid having it defined as `public` in + * `class PRNG`. + */ +using prng_type = std::mt19937_64; + +/** + * Implementation of the random seed. + * + * This is a class template in order to use `if constexpr`. + * + * @tparam return_size_type The type of the seed to be returned + */ +template +return_size_type random_seed() { + static constexpr size_t rng_size = sizeof(std::random_device::result_type); + static constexpr size_t ret_size = sizeof(return_size_type); + std::random_device rng{}; + /* + * We will need 64 bits to adequately seed the PRNG (`ret_size`). We support + * cases where the result size of the RNG is 64 or 32 bits (`rng_size`). + */ + if constexpr (ret_size == rng_size) { + return rng(); + } else if constexpr (ret_size == 2 * rng_size) { + return (rng() << rng_size) + rng(); + } else { + throw std::runtime_error("Unsupported combination of RNG sizes"); + } +} + +/** + * The PRNG used within the random constructor. + */ +prng_type prng_random() { + return prng_type{random_seed()}; // RVO +} + +/** + * The PRNG used within the default constructor. + */ +prng_type prng_default() { + /* + * Retrieve optional seed, which may or may not have been set explicitly. + */ + auto seed{Seeder::get().seed()}; + /* + * Use the seed if it has been set. Otherwise use a random seed. + */ + if (seed.has_value()) { + return prng_type{seed.value()}; // RVO + } else { + return prng_random(); // RVO + } +} /* ********************************* */ /* CONSTRUCTORS & DESTRUCTORS */ /* ********************************* */ PRNG::PRNG() - : prng_(prng_initial()) + : prng_(prng_default()) + , mtx_{} { +} + +PRNG::PRNG(RandomSeedT) + : prng_(prng_random()) , mtx_{} { } @@ -61,16 +123,4 @@ uint64_t PRNG::operator()() { /* PRIVATE METHODS */ /* ********************************* */ -std::mt19937_64 PRNG::prng_initial() { - // Retrieve optional, potentially default-constructed seed. - auto seed{Seeder::get().seed()}; - - // If the seed has been set, set it on the RNG engine. - if (seed.has_value()) { - return std::mt19937_64{seed.value()}; // RVO - } else { - return {}; // RVO - } -} - } // namespace tiledb::common diff --git a/tiledb/common/random/prng.h b/tiledb/common/random/prng.h index d03f5ceacf11..849c89e63afa 100644 --- a/tiledb/common/random/prng.h +++ b/tiledb/common/random/prng.h @@ -39,6 +39,80 @@ #include "tiledb/common/random/seeder.h" namespace tiledb::common { + +/** + * Marker class for a test-only PRNG constructor + */ +class RandomSeedT {}; +/** + * Marker constant + */ +static constexpr RandomSeedT RandomSeed; + +/** + * A random number generator suitable for both production and testing. + * + * @section Requirements + * + * This PRNG must support two very different kinds of situations: + * + * 1. In production use (the ordinary case) the seed must be _actually_ random + * so that the random sequences in different processes are distinct. + * 2. During most testing the seed must be deterministic to ensure that + * different test runs execute the same sequence of operations. This ensures + * that test failures can be replicated for diagnosis and correction. + * a. In particular, the seed in Catch2 test runs should be deterministic. + * 3. Certain tests, however, require actual randomness. + * a. One such test verifies that actual randomness is available per (1). Such + * tests necessarily have the possibility of failures, i.e. of false + * positives, but the actual likelihood can be made extremely low. + * b. Stress tests execute large number of test runs searching for defects. + * Such tests do not generate new information when run with previously- + * used PRNG sequences. + * + * This class satisfies these requirements with the following implementation + * choices: + * 1. If the user has not called `set_seed()` on the global seeder (from + * `Seeder::get`), then the seed is taken from `std::random_device`. + * 2. If the user has called `set_seed()` on the global seeder, that seed is + * used. + * 3. This class uses a global seeder in order to support Catch2. An event + * handler that executes at the start of the test run calls `set_seed()`. + * + * @section Maturity + * + * This class only has a default constructor. It does not have constructors that + * take seeds nor seeders. Such constructors would be useful for replicating + * test runs, but would also be premature at present. There's further test + * infrastructure required to replicate a specific test in isolation. As that + * test infrastructure matures, so also should this class. In the interim, in + * order to replicate a specific test with a specific seed, the function + * `initial_prng()` can be temporarily changed. + * + * This class uses a seeded PRNG to implement the random sequence. The + * requirement is that sequences in different processes be distinct, not that + * they be actually random. A randomly-seeded PRNG satisfies this requirement. + * The motivation for this implementation choice is as follows: + * 1. There is no standard hardware requirement for random number generation. + * While it's generally available, there are unknown variations in + * significant quality parameters such as the rate of random generation, + * duration of an RNG call, and randomness of generation (e.g. n-gram + * entropies). + * 2. In order not to stress a potentially inadequate RNG, we only call it for + * seeding and not for every number. + * 3. Qualifying a potential RNG implementation requires engineering resources + * that have not been committed as yet. + * + * @section Caveat + * + * This class uses `std::random_device` to seed the PRNG if no explicit seed is + * set. The standard library does not require that this class use an actual RNG, + * i.e. RNG from hardware of some kind. Indeed, certain earlier implementations + * did not do so and were deterministic. In order to validate that this device + * is actually random, it's necessary to run a multiprocess test to observe + * initialization in different processes. The test suite does not contain such + * a validation test at present. + */ class PRNG { public: /* ********************************* */ @@ -46,14 +120,25 @@ class PRNG { /* ********************************* */ /** - * Constructor. + * Default constructor. * - * Constructs an mt19937 engine for random number generation. - * If Seeder has been seeded, the seed will be set on the engine. - * Otherwise, it is default-constructed. + * If `Seeder` has been seeded, the seed will be set on the engine. Otherwise, + * the generator is constructed with a random seed. */ PRNG(); + /** + * Constructor for random seeding. + * + * This constructor makes an object that is always constructed with a random + * seed. + * + * @warning This constructor is only for testing. It must not be used in + * production code, where it would thwart the ability to run tests + * deterministically. + */ + PRNG(RandomSeedT); + /** Copy constructor is deleted. */ PRNG(const PRNG&) = delete; @@ -89,13 +174,6 @@ class PRNG { /** Mutex which protects against simultaneous access to operator() body. */ std::mutex mtx_; - - /* ********************************* */ - /* PRIVATE METHODS */ - /* ********************************* */ - - /** Default-constructs an mt19937 engine and optionally sets the seed. */ - std::mt19937_64 prng_initial(); }; } // namespace tiledb::common diff --git a/tiledb/common/random/seeder.h b/tiledb/common/random/seeder.h index e2807a146fb1..d24efebd4689 100644 --- a/tiledb/common/random/seeder.h +++ b/tiledb/common/random/seeder.h @@ -48,8 +48,10 @@ namespace tiledb::common { * default (set_seed) seed is set (seed) seed is used * but unused * - * Note that each transition may occur only once. - * i.e. A seed may only be set one time and may only be used one time. + * Note that each transition may occur only once, i.e. a seed may only be set + * one time and may only be used one time. This is an explicit design choice to + * ensure that a singleton PRNG is only initialized once, and to prevent the + * case where a seeming initialization is not the actual initialization. */ class Seeder { public: diff --git a/tiledb/common/random/test/unit_seedable_global_PRNG.cc b/tiledb/common/random/test/unit_seedable_global_PRNG.cc index b3d906207a84..3ec2c4153992 100644 --- a/tiledb/common/random/test/unit_seedable_global_PRNG.cc +++ b/tiledb/common/random/test/unit_seedable_global_PRNG.cc @@ -96,6 +96,8 @@ TEST_CASE( "SeedableGlobalPRNG: operator", "[SeedableGlobalPRNG][operator][multiple]") { PRNG& prng = PRNG::get(); + // Verify that a second call succeeds. + CHECK_NOTHROW(PRNG::get()); auto rand_num1 = prng(); CHECK(rand_num1 != 0); @@ -112,7 +114,11 @@ TEST_CASE( TEST_CASE( "SeedableGlobalPRNG: Seeder singleton, errors", "[SeedableGlobalPRNG][Seeder][singleton][errors]") { - // Note: these errors will occur because PRNG sets and uses the singleton. + /* + * Retrieve a PRNG object explicitly. This will cause the PRNG to use the + * singleton seeder, after which subsequent calls should fail. + */ + [[maybe_unused]] auto& x{PRNG::get()}; Seeder& seeder_ = Seeder::get(); SECTION("try to set new seed after it's been set") { @@ -128,6 +134,16 @@ TEST_CASE( } } +/* + * Verify that randomly-seeded PRNG return different numbers. This is the best + * we can do within the ordinary way within a single-process test, the only kind + * readily available within Catch2. + */ +TEST_CASE("SeedableGlobalPRNG: Random seeding", "[SeedableGlobalPRNG]") { + PRNG x(RandomSeed), y(RandomSeed); + CHECK(x() != y()); +} + TEST_CASE("random_label", "[random_label]") { auto rand_label1 = random_label(); CHECK(rand_label1.length() == 32); diff --git a/tiledb/common/stdx_string.cc b/tiledb/common/stdx_string.cc index 147017c6ae3b..1ad50a473798 100644 --- a/tiledb/common/stdx_string.cc +++ b/tiledb/common/stdx_string.cc @@ -31,21 +31,20 @@ namespace tiledb::stdx::string { -bool starts_with(const std::string& value, const std::string& prefix) { +bool starts_with(std::string_view value, std::string_view prefix) { if (prefix.size() > value.size()) return false; return std::equal(prefix.begin(), prefix.end(), value.begin()); } -bool ends_with(const std::string& value, const std::string& suffix) { +bool ends_with(std::string_view value, std::string_view suffix) { if (suffix.size() > value.size()) return false; return value.compare(value.size() - suffix.size(), suffix.size(), suffix) == 0; } -size_t common_prefix_size( - const std::string_view& a, const std::string_view& b) { +size_t common_prefix_size(std::string_view a, std::string_view b) { size_t size = std::min(a.size(), b.size()); for (size_t i = 0; i < size; ++i) { if (a[i] != b[i]) diff --git a/tiledb/common/stdx_string.h b/tiledb/common/stdx_string.h index 2f2cfb9436ca..7a9f4d37e9f5 100644 --- a/tiledb/common/stdx_string.h +++ b/tiledb/common/stdx_string.h @@ -43,7 +43,7 @@ namespace tiledb::stdx::string { * @param prefix The prefix string to be tested. * @return `true` if `value` starts with `prefix`; `false` otherwise. */ -bool starts_with(const std::string& value, const std::string& prefix); +bool starts_with(std::string_view value, std::string_view prefix); /** * Checks if a string ends with a certain suffix. @@ -55,7 +55,7 @@ bool starts_with(const std::string& value, const std::string& prefix); * @param suffix The suffix to be tested. * @return `true` if `value` ends with `suffix`; `false` otherwise. */ -bool ends_with(const std::string& value, const std::string& suffix); +bool ends_with(std::string_view value, std::string_view suffix); /** * Returns the size of the common prefix between `a` and `b`. @@ -64,7 +64,7 @@ bool ends_with(const std::string& value, const std::string& suffix); * a[0..n) == b[0..n) (These ranges are empty if n == 0.) * n == length of a or n == length of b or a[n] != b[n] */ -size_t common_prefix_size(const std::string_view& a, const std::string_view& b); +size_t common_prefix_size(std::string_view a, std::string_view b); } // namespace tiledb::stdx::string @@ -72,10 +72,10 @@ size_t common_prefix_size(const std::string_view& a, const std::string_view& b); * Functions forwarded into the legacy namespace. */ namespace tiledb::sm::utils::parse { -inline bool starts_with(const std::string& value, const std::string& prefix) { +inline bool starts_with(std::string_view value, std::string_view prefix) { return tiledb::stdx::string::starts_with(value, prefix); } -inline bool ends_with(const std::string& value, const std::string& suffix) { +inline bool ends_with(std::string_view value, std::string_view suffix) { return tiledb::stdx::string::ends_with(value, suffix); } } // namespace tiledb::sm::utils::parse diff --git a/tiledb/common/test/CMakeLists.txt b/tiledb/common/test/CMakeLists.txt index 6e09d2fa3d55..552212e48a3d 100644 --- a/tiledb/common/test/CMakeLists.txt +++ b/tiledb/common/test/CMakeLists.txt @@ -35,3 +35,9 @@ commence(unit_test experimental) unit_is_not_experimental.cc> ) conclude(unit_test) + +commence(unit_test memory_tracker_types) + this_target_sources(main.cc unit_memory_tracker_types.cc) + this_target_object_libraries(baseline) +conclude(unit_test) + diff --git a/tiledb/common/test/unit_memory_tracker_types.cc b/tiledb/common/test/unit_memory_tracker_types.cc new file mode 100644 index 000000000000..e43a85d1952a --- /dev/null +++ b/tiledb/common/test/unit_memory_tracker_types.cc @@ -0,0 +1,72 @@ +/** + * @file unit_memory_tracker_types.cc + * + * @section LICENSE + * + * The MIT License + * + * @copyright Copyright (c) 2024 TileDB, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * @section DESCRIPTION + * + * This file tests the memory tracker to_str functions. + */ + +#include + +#include +#include "tiledb/common/memory_tracker.h" + +using namespace tiledb::common; + +TEST_CASE("memory_type_to_str") { + auto max = static_cast(tiledb::sm::MemoryType::TILE_WRITER_DATA); + size_t failures = 0; + for (int8_t i = 0; i < 127; i++) { + auto val = static_cast(i); + if (i <= max) { + REQUIRE_NOTHROW(tiledb::sm::memory_type_to_str(val)); + } else { + REQUIRE_THROWS(tiledb::sm::memory_type_to_str(val)); + failures += 1; + } + } + // Technically, we could eventually have more than 127 enumeration values + // and this test would pass when it shouldn't. + REQUIRE(failures > 0); +} + +TEST_CASE("memory_tracker_type_to_str") { + auto max = static_cast(tiledb::sm::MemoryTrackerType::SCHEMA_EVOLUTION); + size_t failures = 0; + for (int8_t i = 0; i < 127; i++) { + auto val = static_cast(i); + if (i <= max) { + REQUIRE_NOTHROW(tiledb::sm::memory_tracker_type_to_str(val)); + } else { + REQUIRE_THROWS(tiledb::sm::memory_tracker_type_to_str(val)); + failures += 1; + } + } + // Technically, we could eventually have more than 127 enumeration values + // and this test would pass when it shouldn't. + REQUIRE(failures > 0); +} diff --git a/tiledb/doxygen/source/conf.py b/tiledb/doxygen/source/conf.py index b75e2e30713b..1535c996190d 100644 --- a/tiledb/doxygen/source/conf.py +++ b/tiledb/doxygen/source/conf.py @@ -76,9 +76,9 @@ author = 'TileDB, Inc.' # The short X.Y version. -version = '2.21' +version = '2.22' # The full version, including alpha/beta/rc tags. -release = '2.21.0' +release = '2.22.0' # Breathe extension configuration. doxygen_xml_dir = os.path.join(TILEDB_BUILD_DIR, 'xml/') diff --git a/tiledb/sm/array/CMakeLists.txt b/tiledb/sm/array/CMakeLists.txt index 737f99cde7b7..5aaebf284da9 100644 --- a/tiledb/sm/array/CMakeLists.txt +++ b/tiledb/sm/array/CMakeLists.txt @@ -35,8 +35,10 @@ commence(object_library array) this_target_object_libraries( array_schema baseline + context_resources fragment generic_tile_io + uuid vfs ) if(TILEDB_STATS) diff --git a/tiledb/sm/array/array.cc b/tiledb/sm/array/array.cc index 58e99219032d..cf8e9bea8622 100644 --- a/tiledb/sm/array/array.cc +++ b/tiledb/sm/array/array.cc @@ -5,7 +5,7 @@ * * The MIT License * - * @copyright Copyright (c) 2017-2023 TileDB, Inc. + * @copyright Copyright (c) 2017-2024 TileDB, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -33,6 +33,7 @@ #include "tiledb/common/common.h" #include "tiledb/common/logger.h" +#include "tiledb/common/memory_tracker.h" #include "tiledb/sm/array/array.h" #include "tiledb/sm/array_schema/array_schema.h" #include "tiledb/sm/array_schema/array_schema_evolution.h" @@ -59,8 +60,7 @@ using namespace tiledb::common; -namespace tiledb { -namespace sm { +namespace tiledb::sm { class ArrayException : public StatusException { public: @@ -96,6 +96,7 @@ Array::Array( , resources_(storage_manager_->resources()) , config_(resources_.config()) , remote_(array_uri.is_tiledb()) + , memory_tracker_(storage_manager->resources().create_memory_tracker()) , consistency_controller_(cc) , consistency_sentry_(nullopt) { } @@ -137,6 +138,7 @@ Status Array::open_without_fragments( opened_array_ = make_shared( HERE(), resources_, + memory_tracker_, array_uri_, encryption_type, encryption_key, @@ -148,6 +150,7 @@ Status Array::open_without_fragments( /* Note: query_type_ MUST be set before calling set_array_open() because it will be examined by the ConsistencyController. */ query_type_ = QueryType::READ; + memory_tracker_->set_type(MemoryTrackerType::ARRAY_READ); /* Note: the open status MUST be exception safe. If anything interrupts the * opening process, it will throw and the array will be set as closed. */ @@ -245,6 +248,11 @@ Status Array::open( } query_type_ = query_type; + if (query_type_ == QueryType::READ) { + memory_tracker_->set_type(MemoryTrackerType::ARRAY_READ); + } else { + memory_tracker_->set_type(MemoryTrackerType::ARRAY_WRITE); + } set_timestamps( timestamp_start, timestamp_end, query_type_ == QueryType::READ); @@ -309,6 +317,7 @@ Status Array::open( opened_array_ = make_shared( HERE(), resources_, + memory_tracker_, array_uri_, encryption_type, encryption_key, @@ -585,7 +594,8 @@ std::vector> Array::get_enumerations( array_dir_timestamp_start_, array_dir_timestamp_end_, this, - names_to_load); + names_to_load, + memory_tracker_); } else { // Create a vector of paths to be loaded. std::vector paths_to_load; @@ -823,6 +833,7 @@ Status Array::reopen(uint64_t timestamp_start, uint64_t timestamp_end) { opened_array_ = make_shared( HERE(), resources_, + memory_tracker_, array_uri_, key->encryption_type(), key->key().data(), @@ -1039,15 +1050,13 @@ Metadata* Array::unsafe_metadata() { return &opened_array_->metadata(); } -Status Array::metadata(Metadata** metadata) { +Metadata& Array::metadata() { // Load array metadata for array opened for reads, if not loaded yet if (query_type_ == QueryType::READ && !metadata_loaded()) { - RETURN_NOT_OK(load_metadata()); + throw_if_not_ok(load_metadata()); } - *metadata = &opened_array_->metadata(); - - return Status::Ok(); + return opened_array_->metadata(); } const NDRange Array::non_empty_domain() { @@ -1059,10 +1068,6 @@ const NDRange Array::non_empty_domain() { return loaded_non_empty_domain(); } -MemoryTracker* Array::memory_tracker() { - return &memory_tracker_; -} - bool Array::serialize_non_empty_domain() const { auto found = false; auto serialize_ned_array_open = false; @@ -1261,7 +1266,8 @@ Array::open_for_reads_without_fragments() { "array_open_read_without_fragments_load_schemas"); // Load array schemas - auto result = array_directory().load_array_schemas(*encryption_key()); + auto result = + array_directory().load_array_schemas(*encryption_key(), memory_tracker_); auto version = std::get<0>(result)->version(); ensure_supported_schema_version_for_read(version); @@ -1286,7 +1292,7 @@ Array::open_for_writes() { // Load array schemas auto&& [array_schema_latest, array_schemas_all] = - array_directory().load_array_schemas(*encryption_key()); + array_directory().load_array_schemas(*encryption_key(), memory_tracker_); // If building experimentally, this library should not be able to // write to newer-versioned or older-versioned arrays @@ -1350,7 +1356,7 @@ Status Array::compute_max_buffer_sizes(const void* subarray) { last_max_buffer_sizes_.clear(); // Get all attributes and coordinates - auto attributes = array_schema_latest().attributes(); + auto& attributes = array_schema_latest().attributes(); last_max_buffer_sizes_.clear(); for (const auto& attr : attributes) last_max_buffer_sizes_[attr->name()] = @@ -1475,9 +1481,8 @@ void Array::do_load_metadata() { parallel_for(&resources_.compute_tp(), 0, metadata_num, [&](size_t m) { const auto& uri = array_metadata_to_load[m].uri_; - auto&& tile = - GenericTileIO::load(resources_, uri, 0, *encryption_key()); - metadata_tiles[m] = tdb::make_shared(HERE(), std::move(tile)); + metadata_tiles[m] = GenericTileIO::load( + resources_, uri, 0, *encryption_key(), memory_tracker_); return Status::Ok(); })); @@ -1615,6 +1620,7 @@ void Array::set_serialized_array_open() { opened_array_ = make_shared( HERE(), resources_, + memory_tracker_, array_uri_, EncryptionType::NO_ENCRYPTION, nullptr, @@ -1624,6 +1630,15 @@ void Array::set_serialized_array_open() { array_uri_.is_tiledb()); } +void Array::set_query_type(QueryType query_type) { + query_type_ = query_type; + if (query_type_ == QueryType::READ) { + memory_tracker_->set_type(MemoryTrackerType::ARRAY_READ); + } else { + memory_tracker_->set_type(MemoryTrackerType::ARRAY_WRITE); + } +} + void Array::set_array_closed() { std::lock_guard lock(mtx_); @@ -1677,5 +1692,4 @@ void ensure_supported_schema_version_for_read(format_version_t version) { } } -} // namespace sm -} // namespace tiledb +} // namespace tiledb::sm diff --git a/tiledb/sm/array/array.h b/tiledb/sm/array/array.h index d9f0c8b6f48d..380deef7e9c7 100644 --- a/tiledb/sm/array/array.h +++ b/tiledb/sm/array/array.h @@ -5,7 +5,7 @@ * * The MIT License * - * @copyright Copyright (c) 2017-2023 TileDB, Inc. + * @copyright Copyright (c) 2017-2024 TileDB, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -38,7 +38,6 @@ #include #include "tiledb/common/common.h" -#include "tiledb/common/memory_tracker.h" #include "tiledb/common/status.h" #include "tiledb/sm/array/array_directory.h" #include "tiledb/sm/array/consistency.h" @@ -50,12 +49,12 @@ using namespace tiledb::common; -namespace tiledb { -namespace sm { +namespace tiledb::sm { class ArraySchema; class SchemaEvolution; class FragmentMetadata; +class MemoryTracker; enum class QueryType : uint8_t; /** @@ -81,6 +80,7 @@ class OpenedArray { * Construct a new Opened Array object. * * @param resources The context resources to use. + * @param memory_tracker The array's MemoryTracker. * @param array_uri The URI of the array. * @param encryption_type Encryption type. * @param key_bytes Encryption key data. @@ -91,6 +91,7 @@ class OpenedArray { */ OpenedArray( ContextResources& resources, + shared_ptr memory_tracker, const URI& array_uri, EncryptionType encryption_type, const void* key_bytes, @@ -100,7 +101,7 @@ class OpenedArray { bool is_remote) : array_dir_(ArrayDirectory(resources, array_uri)) , array_schema_latest_(nullptr) - , metadata_() + , metadata_(memory_tracker) , metadata_loaded_(false) , non_empty_domain_computed_(false) , encryption_key_(make_shared(HERE())) @@ -730,7 +731,7 @@ class Array { std::optional metadata_type(const char* key); /** Retrieves the array metadata object. */ - Status metadata(Metadata** metadata); + Metadata& metadata(); /** * Retrieves the array metadata object. @@ -764,6 +765,7 @@ class Array { * has not been computed or loaded it will be loaded first */ const NDRange non_empty_domain(); + /** * Retrieves the array metadata object that is already loaded. If it's not yet * loaded it will be empty. @@ -783,7 +785,9 @@ class Array { } /** Returns the memory tracker. */ - MemoryTracker* memory_tracker(); + inline shared_ptr memory_tracker() { + return memory_tracker_; + } /** * Checks the config to see if non empty domain should be serialized on array @@ -820,9 +824,7 @@ class Array { void set_serialized_array_open(); /** Set the query type to open the array for. */ - inline void set_query_type(QueryType query_type) { - query_type_ = query_type; - } + void set_query_type(QueryType query_type); /** * Checks the array is open, in MODIFY_EXCLUSIVE mode, before deleting data. @@ -838,6 +840,11 @@ class Array { /** Load array directory for non-remote arrays */ const ArrayDirectory& load_array_directory(); + /* Get the REST client */ + [[nodiscard]] inline shared_ptr rest_client() const { + return resources_.rest_client(); + } + private: /* ********************************* */ /* PRIVATE ATTRIBUTES */ @@ -925,7 +932,7 @@ class Array { bool remote_; /** Memory tracker for the array. */ - MemoryTracker memory_tracker_; + shared_ptr memory_tracker_; /** A reference to the object which controls the present Array instance. */ ConsistencyController& consistency_controller_; @@ -1049,7 +1056,6 @@ class Array { void set_array_closed(); }; -} // namespace sm -} // namespace tiledb +} // namespace tiledb::sm #endif // TILEDB_ARRAY_H diff --git a/tiledb/sm/array/array_directory.cc b/tiledb/sm/array/array_directory.cc index 9710c5607f29..48c0a65e49e9 100644 --- a/tiledb/sm/array/array_directory.cc +++ b/tiledb/sm/array/array_directory.cc @@ -32,6 +32,7 @@ #include "tiledb/sm/array/array_directory.h" #include "tiledb/common/logger.h" +#include "tiledb/common/memory_tracker.h" #include "tiledb/common/stdx_string.h" #include "tiledb/sm/array_schema/enumeration.h" #include "tiledb/sm/filesystem/vfs.h" @@ -91,22 +92,24 @@ ArrayDirectory::ArrayDirectory( shared_ptr ArrayDirectory::load_array_schema_from_uri( ContextResources& resources, const URI& schema_uri, - const EncryptionKey& encryption_key) { + const EncryptionKey& encryption_key, + shared_ptr memory_tracker) { auto timer_se = resources.stats().start_timer("sm_load_array_schema_from_uri"); - auto&& tile = GenericTileIO::load(resources, schema_uri, 0, encryption_key); + auto tile = GenericTileIO::load( + resources, schema_uri, 0, encryption_key, memory_tracker); - resources.stats().add_counter("read_array_schema_size", tile.size()); + resources.stats().add_counter("read_array_schema_size", tile->size()); // Deserialize - Deserializer deserializer(tile.data(), tile.size()); - return make_shared( - HERE(), ArraySchema::deserialize(deserializer, schema_uri)); + Deserializer deserializer(tile->data(), tile->size()); + return ArraySchema::deserialize(deserializer, schema_uri, memory_tracker); } shared_ptr ArrayDirectory::load_array_schema_latest( - const EncryptionKey& encryption_key) const { + const EncryptionKey& encryption_key, + shared_ptr memory_tracker) const { auto timer_se = resources_.get().stats().start_timer("sm_load_array_schema_latest"); @@ -117,8 +120,8 @@ shared_ptr ArrayDirectory::load_array_schema_latest( // Load schema from URI const URI& schema_uri = latest_array_schema_uri(); - auto&& array_schema = - load_array_schema_from_uri(resources_.get(), schema_uri, encryption_key); + auto&& array_schema = load_array_schema_from_uri( + resources_.get(), schema_uri, encryption_key, memory_tracker); array_schema->set_array_uri(uri_); @@ -128,9 +131,11 @@ shared_ptr ArrayDirectory::load_array_schema_latest( tuple< shared_ptr, std::unordered_map>> -ArrayDirectory::load_array_schemas(const EncryptionKey& encryption_key) const { +ArrayDirectory::load_array_schemas( + const EncryptionKey& encryption_key, + shared_ptr memory_tracker) const { // Load all array schemas - auto&& array_schemas = load_all_array_schemas(encryption_key); + auto&& array_schemas = load_all_array_schemas(encryption_key, memory_tracker); // Locate the latest array schema const auto& array_schema_latest_name = @@ -143,7 +148,8 @@ ArrayDirectory::load_array_schemas(const EncryptionKey& encryption_key) const { std::unordered_map> ArrayDirectory::load_all_array_schemas( - const EncryptionKey& encryption_key) const { + const EncryptionKey& encryption_key, + shared_ptr memory_tracker) const { auto timer_se = resources_.get().stats().start_timer("sm_load_all_array_schemas"); @@ -167,7 +173,7 @@ ArrayDirectory::load_all_array_schemas( auto& schema_uri = schema_uris[schema_ith]; try { auto&& array_schema = load_array_schema_from_uri( - resources_.get(), schema_uri, encryption_key); + resources_.get(), schema_uri, encryption_key, memory_tracker); array_schema->set_array_uri(uri_); schema_vector[schema_ith] = array_schema; } catch (std::exception& e) { @@ -190,7 +196,7 @@ std::vector> ArrayDirectory::load_enumerations_from_paths( const std::vector& enumeration_paths, const EncryptionKey& encryption_key, - MemoryTracker& memory_tracker) const { + shared_ptr memory_tracker) const { // This should never be called with an empty list of enumeration paths, but // there's no reason to not check an early return case here given that code // changes. @@ -1314,27 +1320,27 @@ bool ArrayDirectory::consolidation_with_timestamps_supported( shared_ptr ArrayDirectory::load_enumeration( const std::string& enumeration_path, const EncryptionKey& encryption_key, - MemoryTracker& memory_tracker) const { + shared_ptr memory_tracker) const { auto timer_se = resources_.get().stats().start_timer("sm_load_enumeration"); auto enmr_uri = uri_.join_path(constants::array_schema_dir_name) .join_path(constants::array_enumerations_dir_name) .join_path(enumeration_path); - auto&& tile = GenericTileIO::load(resources_, enmr_uri, 0, encryption_key); - resources_.get().stats().add_counter("read_enumeration_size", tile.size()); + auto tile = GenericTileIO::load( + resources_, enmr_uri, 0, encryption_key, memory_tracker); + resources_.get().stats().add_counter("read_enumeration_size", tile->size()); - if (!memory_tracker.take_memory( - tile.size(), MemoryTracker::MemoryType::ENUMERATION)) { + if (!memory_tracker->take_memory(tile->size(), MemoryType::ENUMERATION)) { throw ArrayDirectoryException( "Error loading enumeration; Insufficient memory budget; Needed " + - std::to_string(tile.size()) + " but only had " + - std::to_string(memory_tracker.get_memory_available()) + - " from budget " + std::to_string(memory_tracker.get_memory_budget())); + std::to_string(tile->size()) + " but only had " + + std::to_string(memory_tracker->get_memory_available()) + + " from budget " + std::to_string(memory_tracker->get_memory_budget())); } - Deserializer deserializer(tile.data(), tile.size()); - return Enumeration::deserialize(deserializer); + Deserializer deserializer(tile->data(), tile->size()); + return Enumeration::deserialize(deserializer, memory_tracker); } } // namespace tiledb::sm diff --git a/tiledb/sm/array/array_directory.h b/tiledb/sm/array/array_directory.h index ffaf399a7017..5509d1900d77 100644 --- a/tiledb/sm/array/array_directory.h +++ b/tiledb/sm/array/array_directory.h @@ -33,7 +33,6 @@ #ifndef TILEDB_ARRAY_DIRECTORY_H #define TILEDB_ARRAY_DIRECTORY_H -#include "tiledb/common/memory_tracker.h" #include "tiledb/common/status.h" #include "tiledb/common/thread_pool.h" #include "tiledb/sm/array_schema/array_schema.h" @@ -60,6 +59,7 @@ enum class ArrayDirectoryMode { }; // Forward declaration +class MemoryTracker; class WhiteboxArrayDirectory; /** @@ -328,7 +328,8 @@ class ArrayDirectory { static shared_ptr load_array_schema_from_uri( ContextResources& resources, const URI& array_schema_uri, - const EncryptionKey& encryption_key); + const EncryptionKey& encryption_key, + shared_ptr memory_tracker); /** * Get the full vac uri using the base URI and a vac uri that might be @@ -350,7 +351,8 @@ class ArrayDirectory { * @return Status, a new ArraySchema */ shared_ptr load_array_schema_latest( - const EncryptionKey& encryption_key) const; + const EncryptionKey& encryption_key, + shared_ptr memory_tracker) const; /** * It loads and returns the latest schema and all the array schemas @@ -367,7 +369,9 @@ class ArrayDirectory { tuple< shared_ptr, std::unordered_map>> - load_array_schemas(const EncryptionKey& encryption_key) const; + load_array_schemas( + const EncryptionKey& encryption_key, + shared_ptr memory_tracker) const; /** * Loads all schemas of an array from persistent storage into memory. @@ -379,7 +383,9 @@ class ArrayDirectory { * ArraySchemaMap Map of all array schemas found keyed by name */ std::unordered_map> - load_all_array_schemas(const EncryptionKey& encryption_key) const; + load_all_array_schemas( + const EncryptionKey& encryption_key, + shared_ptr memory_tracker) const; /** * Load the enumerations from the provided list of paths. @@ -391,7 +397,7 @@ class ArrayDirectory { std::vector> load_enumerations_from_paths( const std::vector& enumeration_paths, const EncryptionKey& encryption_key, - MemoryTracker& memory_tracker) const; + shared_ptr memory_tracker) const; /** Returns the array URI. */ const URI& uri() const; @@ -824,7 +830,7 @@ class ArrayDirectory { shared_ptr load_enumeration( const std::string& enumeration_path, const EncryptionKey& encryption_key, - MemoryTracker& memory_tracker) const; + shared_ptr memory_tracker) const; }; } // namespace tiledb::sm diff --git a/tiledb/sm/array/test/CMakeLists.txt b/tiledb/sm/array/test/CMakeLists.txt index 94b34c77bf70..272083ff20ea 100644 --- a/tiledb/sm/array/test/CMakeLists.txt +++ b/tiledb/sm/array/test/CMakeLists.txt @@ -29,11 +29,10 @@ include(unit_test) commence(unit_test array) this_target_compile_definitions(-DTILEDB_TEST_INPUTS_DIR="${CMAKE_SOURCE_DIR}/test/inputs/") this_target_sources(main.cc unit_array_directory.cc) - this_target_link_libraries(array context_resources) + this_target_link_libraries(array) conclude(unit_test) commence(unit_test consistency) this_target_sources(main.cc unit_consistency.cc) - this_target_link_libraries(TILEDB_CORE_OBJECTS) - this_target_link_libraries(TILEDB_CORE_OBJECTS_ILIB) + this_target_link_libraries(tiledb_test_support_lib) conclude(unit_test) diff --git a/tiledb/sm/array/test/unit_consistency.h b/tiledb/sm/array/test/unit_consistency.h index 8e2aea539fe2..c0b56da2e643 100644 --- a/tiledb/sm/array/test/unit_consistency.h +++ b/tiledb/sm/array/test/unit_consistency.h @@ -36,9 +36,11 @@ #include #include +#include "test/support/src/mem_helpers.h" #include "../array.h" #include "../consistency.h" +#include "test/support/src/mem_helpers.h" #include "tiledb/sm/array_schema/dimension.h" #include "tiledb/sm/enums/array_type.h" #include "tiledb/sm/enums/encryption_type.h" @@ -62,8 +64,13 @@ using array_entry = std::tuple; using entry_type = std::multimap::const_iterator; class WhiteboxConsistencyController : public ConsistencyController { + shared_ptr memory_tracker_; + public: - WhiteboxConsistencyController() = default; + WhiteboxConsistencyController() + : memory_tracker_(tiledb::test::get_test_memory_tracker()) { + } + ~WhiteboxConsistencyController() = default; entry_type register_array( @@ -96,18 +103,18 @@ class WhiteboxConsistencyController : public ConsistencyController { // Create Domain uint64_t dim_dom[2]{0, 1}; uint64_t tile_extent = 1; - shared_ptr dim = - make_shared(HERE(), std::string("dim"), Datatype::UINT64); + shared_ptr dim = make_shared( + HERE(), std::string("dim"), Datatype::UINT64, memory_tracker_); throw_if_not_ok(dim->set_domain(&dim_dom)); throw_if_not_ok(dim->set_tile_extent(&tile_extent)); std::vector> dims = {dim}; - shared_ptr domain = - make_shared(HERE(), Layout::ROW_MAJOR, dims, Layout::ROW_MAJOR); + shared_ptr domain = make_shared( + HERE(), Layout::ROW_MAJOR, dims, Layout::ROW_MAJOR, memory_tracker_); // Create the ArraySchema - shared_ptr schema = - make_shared(HERE(), ArrayType::DENSE); + shared_ptr schema = make_shared( + HERE(), ArrayType::DENSE, tiledb::test::create_test_memory_tracker()); throw_if_not_ok(schema->set_domain(domain)); throw_if_not_ok(schema->add_attribute( make_shared( diff --git a/tiledb/sm/array_schema/CMakeLists.txt b/tiledb/sm/array_schema/CMakeLists.txt index 602d9b9af264..171331ad3166 100644 --- a/tiledb/sm/array_schema/CMakeLists.txt +++ b/tiledb/sm/array_schema/CMakeLists.txt @@ -38,7 +38,8 @@ commence(object_library attribute) constants filter_pipeline range - stringx) + stringx + uuid) conclude(object_library) # @@ -62,7 +63,7 @@ conclude(object_library) # commence(object_library enumeration) this_target_sources(enumeration.cc) - this_target_object_libraries(buffer constants seedable_global_PRNG) + this_target_object_libraries(buffer constants uuid) conclude(object_library) # diff --git a/tiledb/sm/array_schema/array_schema.cc b/tiledb/sm/array_schema/array_schema.cc index 95fc59ace895..e5bee621663b 100644 --- a/tiledb/sm/array_schema/array_schema.cc +++ b/tiledb/sm/array_schema/array_schema.cc @@ -35,6 +35,7 @@ #include "tiledb/common/common.h" #include "tiledb/common/heap_memory.h" #include "tiledb/common/logger.h" +#include "tiledb/common/memory_tracker.h" #include "tiledb/sm/array_schema/attribute.h" #include "tiledb/sm/array_schema/dimension.h" #include "tiledb/sm/array_schema/dimension_label.h" @@ -53,6 +54,7 @@ #include "tiledb/sm/misc/hilbert.h" #include "tiledb/sm/misc/integral_type_casts.h" #include "tiledb/sm/misc/tdb_time.h" +#include "tiledb/sm/storage_manager/context_resources.h" #include "tiledb/sm/tile/generic_tile_io.h" #include "tiledb/storage_format/uri/generate_uri.h" #include "tiledb/type/apply_with_type.h" @@ -79,12 +81,10 @@ class ArraySchemaException : public StatusException { /* CONSTRUCTORS & DESTRUCTORS */ /* ****************************** */ -ArraySchema::ArraySchema() - : ArraySchema(ArrayType::DENSE) { -} - -ArraySchema::ArraySchema(ArrayType array_type) - : uri_(URI()) +ArraySchema::ArraySchema( + ArrayType array_type, shared_ptr memory_tracker) + : memory_tracker_(memory_tracker) + , uri_(URI()) , array_uri_(URI()) , version_(constants::format_version) , timestamp_range_(std::make_pair( @@ -93,9 +93,19 @@ ArraySchema::ArraySchema(ArrayType array_type) , array_type_(array_type) , allows_dups_(false) , domain_(nullptr) + , dim_map_(memory_tracker_->get_resource(MemoryType::DIMENSIONS)) , cell_order_(Layout::ROW_MAJOR) , tile_order_(Layout::ROW_MAJOR) - , capacity_(constants::capacity) { + , capacity_(constants::capacity) + , attributes_(memory_tracker_->get_resource(MemoryType::ATTRIBUTES)) + , attribute_map_(memory_tracker_->get_resource(MemoryType::ATTRIBUTES)) + , dimension_labels_( + memory_tracker_->get_resource(MemoryType::DIMENSION_LABELS)) + , dimension_label_map_( + memory_tracker_->get_resource(MemoryType::DIMENSION_LABELS)) + , enumeration_map_(memory_tracker_->get_resource(MemoryType::ENUMERATION)) + , enumeration_path_map_( + memory_tracker_->get_resource(MemoryType::ENUMERATION_PATHS)) { // Set up default filter pipelines for coords, offsets, and validity values. coords_filters_.add_filter(CompressionFilter( constants::coords_compression, @@ -131,23 +141,44 @@ ArraySchema::ArraySchema( std::unordered_map enumeration_path_map, FilterPipeline cell_var_offsets_filters, FilterPipeline cell_validity_filters, - FilterPipeline coords_filters) - : uri_(uri) + FilterPipeline coords_filters, + shared_ptr memory_tracker) + : memory_tracker_(memory_tracker) + , uri_(uri) , version_(version) , timestamp_range_(timestamp_range) , name_(name) , array_type_(array_type) , allows_dups_(allows_dups) , domain_(domain) + , dim_map_(memory_tracker_->get_resource(MemoryType::DIMENSIONS)) , cell_order_(cell_order) , tile_order_(tile_order) , capacity_(capacity) - , attributes_(attributes) - , dimension_labels_(dim_label_refs) - , enumeration_path_map_(enumeration_path_map) + , attributes_(memory_tracker_->get_resource(MemoryType::ATTRIBUTES)) + , attribute_map_(memory_tracker_->get_resource(MemoryType::ATTRIBUTES)) + , dimension_labels_( + memory_tracker_->get_resource(MemoryType::DIMENSION_LABELS)) + , dimension_label_map_( + memory_tracker_->get_resource(MemoryType::DIMENSION_LABELS)) + , enumeration_map_(memory_tracker_->get_resource(MemoryType::ENUMERATION)) + , enumeration_path_map_( + memory_tracker_->get_resource(MemoryType::ENUMERATION_PATHS)) , cell_var_offsets_filters_(cell_var_offsets_filters) , cell_validity_filters_(cell_validity_filters) , coords_filters_(coords_filters) { + for (auto atr : attributes) { + attributes_.push_back(atr); + } + + for (auto dim_label : dim_label_refs) { + dimension_labels_.push_back(dim_label); + } + + for (auto& elem : enumeration_path_map) { + enumeration_path_map_.insert(elem); + } + // Create dimension map for (dimension_size_type d = 0; d < domain_->dim_num(); ++d) { auto dim{domain_->dimension_ptr(d)}; @@ -192,30 +223,37 @@ ArraySchema::ArraySchema( check_attribute_dimension_label_names(); } -/* - * Copy constructor manually initializes its map members, so we don't use the - * default copy constructor. At some point this may no longer hold and we can - * eliminate this code in favor of the default. - */ ArraySchema::ArraySchema(const ArraySchema& array_schema) - : uri_{array_schema.uri_} + : memory_tracker_{array_schema.memory_tracker_} + , uri_{array_schema.uri_} , array_uri_{array_schema.array_uri_} , version_{array_schema.version_} , timestamp_range_{array_schema.timestamp_range_} , name_{array_schema.name_} , array_type_{array_schema.array_type_} , allows_dups_{array_schema.allows_dups_} - , domain_{} // copied below by `set_domain` - , dim_map_{} // initialized in `set_domain` + , domain_{} // copied below by `set_domain` + , dim_map_(memory_tracker_->get_resource( + MemoryType::DIMENSIONS)) // initialized in `set_domain` , cell_order_{array_schema.cell_order_} , tile_order_{array_schema.tile_order_} , capacity_{array_schema.capacity_} - , attributes_{array_schema.attributes_} - , attribute_map_{array_schema.attribute_map_} - , dimension_labels_{} // copied in loop below - , dimension_label_map_{} // initialized below - , enumeration_map_{array_schema.enumeration_map_} - , enumeration_path_map_{array_schema.enumeration_path_map_} + , attributes_( + array_schema.attributes_, + memory_tracker_->get_resource(MemoryType::ATTRIBUTES)) + , attribute_map_( + array_schema.attribute_map_, + memory_tracker_->get_resource(MemoryType::ATTRIBUTES)) + , dimension_labels_(memory_tracker_->get_resource( + MemoryType::DIMENSION_LABELS)) // copied in loop below + , dimension_label_map_( + memory_tracker_->get_resource(MemoryType::DIMENSION_LABELS)) + , enumeration_map_( + array_schema.enumeration_map_, + memory_tracker_->get_resource(MemoryType::ENUMERATION)) + , enumeration_path_map_( + array_schema.enumeration_path_map_, + memory_tracker_->get_resource(MemoryType::ENUMERATION_PATHS)) , cell_var_offsets_filters_{array_schema.cell_var_offsets_filters_} , cell_validity_filters_{array_schema.cell_validity_filters_} , coords_filters_{array_schema.coords_filters_} @@ -273,7 +311,7 @@ shared_ptr ArraySchema::shared_attribute( return attributes_[it->second.index]; } -const std::vector>& ArraySchema::attributes() +const tdb::pmr::vector>& ArraySchema::attributes() const { return attributes_; } @@ -548,6 +586,12 @@ void ArraySchema::check_enumerations(const Config& cfg) const { uint64_t total_size = 0; for (const auto& pair : enumeration_map_) { + if (!pair.second) { + // We don't have an Array instance at this point so the best we can do + // is just avoid segfaulting when we attempt to check with unloaded + // enumerations. + continue; + } uint64_t size = pair.second->data().size() + pair.second->offsets().size(); if (size > max_size.value()) { throw ArraySchemaException( @@ -1023,7 +1067,14 @@ void ArraySchema::add_dimension_label( // Create the dimension label reference. auto dim_label_ref = make_shared( - HERE(), dim_id, name, uri, dim, label_order, label_type); + HERE(), + dim_id, + name, + uri, + dim, + label_order, + label_type, + memory_tracker_); dimension_labels_.emplace_back(dim_label_ref); dimension_label_map_[name] = dim_label_ref.get(); } catch (...) { @@ -1250,8 +1301,10 @@ void ArraySchema::drop_enumeration(const std::string& enmr_name) { } // #TODO Add security validation on incoming URI -ArraySchema ArraySchema::deserialize( - Deserializer& deserializer, const URI& uri) { +shared_ptr ArraySchema::deserialize( + Deserializer& deserializer, + const URI& uri, + shared_ptr memory_tracker) { Status st; // Load version // #TODO Add security validation @@ -1325,7 +1378,12 @@ ArraySchema ArraySchema::deserialize( // Note: Security validation delegated to invoked API // #TODO Add security validation auto domain{Domain::deserialize( - deserializer, version, cell_order, tile_order, coords_filters)}; + deserializer, + version, + cell_order, + tile_order, + coords_filters, + memory_tracker)}; // Load attributes // Note: Security validation delegated to invoked API @@ -1392,7 +1450,8 @@ ArraySchema ArraySchema::deserialize( // Set schema name std::string name = uri.last_path_part(); - return ArraySchema( + return make_shared( + HERE(), uri, version, timestamp_range, @@ -1405,13 +1464,18 @@ ArraySchema ArraySchema::deserialize( capacity, attributes, dimension_labels, - {}, + std::vector>(), enumeration_path_map, cell_var_filters, cell_validity_filters, FilterPipeline( coords_filters, - version < 5 ? domain->dimension_ptr(0)->type() : Datatype::UINT64)); + version < 5 ? domain->dimension_ptr(0)->type() : Datatype::UINT64), + memory_tracker); +} + +shared_ptr ArraySchema::clone() const { + return make_shared(HERE(), *this); } Status ArraySchema::set_allows_dups(bool allows_dups) { diff --git a/tiledb/sm/array_schema/array_schema.h b/tiledb/sm/array_schema/array_schema.h index f41308eec537..8c360db43071 100644 --- a/tiledb/sm/array_schema/array_schema.h +++ b/tiledb/sm/array_schema/array_schema.h @@ -37,12 +37,12 @@ #include #include "tiledb/common/common.h" +#include "tiledb/common/pmr.h" #include "tiledb/common/status.h" #include "tiledb/sm/filesystem/uri.h" #include "tiledb/sm/filter/filter_pipeline.h" #include "tiledb/sm/misc/constants.h" #include "tiledb/sm/misc/hilbert.h" -#include "tiledb/sm/storage_manager/context_resources.h" using namespace tiledb::common; @@ -56,6 +56,7 @@ class Dimension; class DimensionLabel; class Domain; class Enumeration; +class MemoryTracker; enum class ArrayType : uint8_t; enum class Compressor : uint8_t; @@ -92,12 +93,15 @@ class ArraySchema { /* ********************************* */ /** Constructor. */ - ArraySchema(); - - /** Constructor. */ - ArraySchema(ArrayType array_type); + ArraySchema() = delete; /** Constructor. + * @param memory_tracker The memory tracker of the array this fragment + * metadata corresponds to. + */ + ArraySchema(ArrayType array_type, shared_ptr memory_tracker); + + /** Constructor with std::vector attributes. * @param uri The URI of the array schema file. * @param version The format version of this array schema. * @param timestamp_range The timestamp the array schema was written. @@ -117,6 +121,8 @@ class ArraySchema { * @param cell_validity_filters * The filter pipeline run on validity tiles for nullable attributes. * @param coords_filters The filter pipeline run on coordinate tiles. + * @param memory_tracker The memory tracker of the array this fragment + * metadata corresponds to. **/ ArraySchema( URI uri, @@ -135,14 +141,18 @@ class ArraySchema { std::unordered_map enumeration_path_map, FilterPipeline cell_var_offsets_filters, FilterPipeline cell_validity_filters, - FilterPipeline coords_filters); + FilterPipeline coords_filters, + shared_ptr memory_tracker); /** - * Constructor. Clones the input. + * Copy constructor. Clones the input. * * @param array_schema The array schema to copy. */ - explicit ArraySchema(const ArraySchema& array_schema); + ArraySchema(const ArraySchema& array_schema); + + DISABLE_COPY_ASSIGN(ArraySchema); + DISABLE_MOVE_AND_MOVE_ASSIGN(ArraySchema); /** Destructor. */ ~ArraySchema() = default; @@ -202,7 +212,7 @@ class ArraySchema { } /** Returns the attributes. */ - const std::vector>& attributes() const; + const tdb::pmr::vector>& attributes() const; /** Returns the capacity. */ uint64_t capacity() const; @@ -466,9 +476,16 @@ class ArraySchema { * * @param deserializer The deserializer to deserialize from. * @param uri The uri of the Array. + * @param memory_tracker The memory tracker to use. * @return A new ArraySchema. */ - static ArraySchema deserialize(Deserializer& deserializer, const URI& uri); + static shared_ptr deserialize( + Deserializer& deserializer, + const URI& uri, + shared_ptr memory_tracker); + + /** Return a cloned copy of this array schema. */ + shared_ptr clone() const; /** Returns the array domain. */ inline const Domain& domain() const { @@ -574,6 +591,11 @@ class ArraySchema { /* PRIVATE ATTRIBUTES */ /* ********************************* */ + /** + * The memory tracker of the ArraySchema. + */ + shared_ptr memory_tracker_; + /** The URI of the array schema file. */ URI uri_; @@ -607,7 +629,7 @@ class ArraySchema { shared_ptr domain_; /** It maps each dimension name to the corresponding dimension object. */ - std::unordered_map dim_map_; + tdb::pmr::unordered_map dim_map_; /** * The cell order. It can be one of the following: @@ -633,7 +655,7 @@ class ArraySchema { * within this array schema. Other member variables reference objects within * this container. */ - std::vector> attributes_; + tdb::pmr::vector> attributes_; /** * Type for the range of the map that is member `attribute_map_`. See the @@ -654,20 +676,21 @@ class ArraySchema { * Invariant: The number of entries in `attribute_map_` is the same as the * number of entries in `attributes_` */ - std::unordered_map attribute_map_; + tdb::pmr::unordered_map attribute_map_; /** The array dimension labels. */ - std::vector> dimension_labels_; + tdb::pmr::vector> dimension_labels_; /** A map from the dimension label names to the label schemas. */ - std::unordered_map dimension_label_map_; + tdb::pmr::unordered_map + dimension_label_map_; /** A map of Enumeration names to Enumeration pointers. */ - std::unordered_map> + tdb::pmr::unordered_map> enumeration_map_; /** A map of Enumeration names to Enumeration URIs */ - std::unordered_map enumeration_path_map_; + tdb::pmr::unordered_map enumeration_path_map_; /** The filter pipeline run on offset tiles for var-length attributes. */ FilterPipeline cell_var_offsets_filters_; diff --git a/tiledb/sm/array_schema/array_schema_evolution.cc b/tiledb/sm/array_schema/array_schema_evolution.cc index 7b0c3d1e90bf..117dee9607b4 100644 --- a/tiledb/sm/array_schema/array_schema_evolution.cc +++ b/tiledb/sm/array_schema/array_schema_evolution.cc @@ -35,6 +35,7 @@ #include "tiledb/common/common.h" #include "tiledb/common/heap_memory.h" #include "tiledb/common/logger.h" +#include "tiledb/common/memory_tracker.h" #include "tiledb/common/status.h" #include "tiledb/sm/array_schema/array_schema.h" #include "tiledb/sm/array_schema/attribute.h" @@ -71,7 +72,15 @@ class ArraySchemaEvolutionException : public StatusException { /* CONSTRUCTORS & DESTRUCTORS */ /* ****************************** */ -ArraySchemaEvolution::ArraySchemaEvolution() { +ArraySchemaEvolution::ArraySchemaEvolution( + shared_ptr memory_tracker) + : memory_tracker_(memory_tracker) + , attributes_to_add_map_( + memory_tracker->get_resource(MemoryType::ATTRIBUTES)) + , enumerations_to_add_map_( + memory_tracker_->get_resource(MemoryType::ENUMERATION)) + , enumerations_to_extend_map_( + memory_tracker_->get_resource(MemoryType::ENUMERATION)) { } ArraySchemaEvolution::ArraySchemaEvolution( @@ -81,13 +90,29 @@ ArraySchemaEvolution::ArraySchemaEvolution( std::unordered_map> enmrs_to_extend, std::unordered_set enmrs_to_drop, - std::pair timestamp_range) - : attributes_to_add_map_(attrs_to_add) + std::pair timestamp_range, + shared_ptr memory_tracker) + : memory_tracker_(memory_tracker) + , attributes_to_add_map_( + memory_tracker->get_resource(MemoryType::ATTRIBUTES)) , attributes_to_drop_(attrs_to_drop) - , enumerations_to_add_map_(enmrs_to_add) - , enumerations_to_extend_map_(enmrs_to_extend) + , enumerations_to_add_map_( + memory_tracker_->get_resource(MemoryType::ENUMERATION)) + , enumerations_to_extend_map_( + memory_tracker_->get_resource(MemoryType::ENUMERATION)) , enumerations_to_drop_(enmrs_to_drop) , timestamp_range_(timestamp_range) { + for (auto& elem : attrs_to_add) { + attributes_to_add_map_.insert(elem); + } + + for (auto& elem : enmrs_to_add) { + enumerations_to_add_map_.insert(elem); + } + + for (auto& elem : enmrs_to_extend) { + enumerations_to_extend_map_.insert(elem); + } } ArraySchemaEvolution::~ArraySchemaEvolution() { @@ -106,7 +131,7 @@ shared_ptr ArraySchemaEvolution::evolve_schema( "Cannot evolve schema; Input array schema is null"); } - auto schema = make_shared(HERE(), *(orig_schema.get())); + auto schema = orig_schema->clone(); // Add enumerations. Must be done before attributes so that any attributes // referencing enumerations won't fail to be added. diff --git a/tiledb/sm/array_schema/array_schema_evolution.h b/tiledb/sm/array_schema/array_schema_evolution.h index 24818c598ca4..bc2160698f14 100644 --- a/tiledb/sm/array_schema/array_schema_evolution.h +++ b/tiledb/sm/array_schema/array_schema_evolution.h @@ -36,8 +36,8 @@ #include #include - #include "tiledb/common/common.h" +#include "tiledb/common/pmr.h" #include "tiledb/sm/filesystem/uri.h" #include "tiledb/sm/filter/filter_pipeline.h" #include "tiledb/sm/misc/constants.h" @@ -53,6 +53,7 @@ class ConstBuffer; class Dimension; class Domain; class Enumeration; +class MemoryTracker; class ArraySchema; enum class ArrayType : uint8_t; @@ -68,13 +69,17 @@ class ArraySchemaEvolution { /* ********************************* */ /** Constructor. */ - ArraySchemaEvolution(); + ArraySchemaEvolution() = delete; + + /** Constructor with memory tracker. */ + ArraySchemaEvolution(shared_ptr memory_tracker); /** Constructor. * @param attrs_to_add Attributes to add to the schema. * @param enmrs_to_add Enumerations to add to the schema. * @param attrs_to_drop Attributes to remove from the schema. * @param timestamp_range Timestamp range to use for the new schema. + * @param memory_tracker Memory tracker to use for the new schema. */ ArraySchemaEvolution( std::unordered_map> attrs_to_add, @@ -84,7 +89,11 @@ class ArraySchemaEvolution { std::unordered_map> enmrs_to_extend, std::unordered_set enmrs_to_drop, - std::pair timestamp_range); + std::pair timestamp_range, + shared_ptr memory_tracker); + + DISABLE_COPY_AND_COPY_ASSIGN(ArraySchemaEvolution); + DISABLE_MOVE_AND_MOVE_ASSIGN(ArraySchemaEvolution); /** Destructor. */ ~ArraySchemaEvolution(); @@ -184,19 +193,25 @@ class ArraySchemaEvolution { /* PRIVATE ATTRIBUTES */ /* ********************************* */ + /** + * The memory tracker of the ArraySchema. + */ + shared_ptr memory_tracker_; + /** The array attributes to be added. */ /** It maps each attribute name to the corresponding attribute object. */ - std::unordered_map> attributes_to_add_map_; + tdb::pmr::unordered_map> + attributes_to_add_map_; /** The names of array attributes to be dropped. */ std::unordered_set attributes_to_drop_; /** Enumerations to add with any attribute. */ - std::unordered_map> + tdb::pmr::unordered_map> enumerations_to_add_map_; /** Enumerations to extend. */ - std::unordered_map> + tdb::pmr::unordered_map> enumerations_to_extend_map_; /** The names of array enumerations to be dropped. */ diff --git a/tiledb/sm/array_schema/dimension.cc b/tiledb/sm/array_schema/dimension.cc index 947aaaca2476..61fc54fcf984 100644 --- a/tiledb/sm/array_schema/dimension.cc +++ b/tiledb/sm/array_schema/dimension.cc @@ -32,6 +32,7 @@ #include "dimension.h" #include "tiledb/common/logger_public.h" +#include "tiledb/common/memory_tracker.h" #include "tiledb/common/stdx_string.h" #include "tiledb/sm/buffer/buffer.h" #include "tiledb/sm/enums/filter_type.h" @@ -46,10 +47,6 @@ using namespace tiledb::common; using namespace tiledb::type; -tiledb::common::blank::blank() - : tiledb::sm::Dimension{"", tiledb::sm::Datatype::INT32} { -} - namespace tiledb::sm { class DimensionException : public StatusException { @@ -59,35 +56,157 @@ class DimensionException : public StatusException { } }; +/* ************************ */ +/* DYNAMIC DISPATCH */ +/* ************************ */ + +/** + * Dispatches Dimension behavior based on the physical type + * of the Dimension + * + * Subclasses DimensionDispatchFixedSize and DimensionDispatchVarSize + * handle the few scenarios where the dispatch is based on + * something beyond just the type (namely, function arguments). + */ +template +class DimensionDispatchTyped : public Dimension::DimensionDispatch { + public: + using DimensionDispatch::DimensionDispatch; + + protected: + void ceil_to_tile( + const Range& r, uint64_t tile_num, ByteVecValue* v) const override { + return Dimension::ceil_to_tile(&base, r, tile_num, v); + } + bool check_range(const Range& range, std::string* error) const override { + return Dimension::check_range(&base, range, error); + } + bool coincides_with_tiles(const Range& r) const override { + return Dimension::coincides_with_tiles(&base, r); + } + uint64_t domain_range(const Range& range) const override { + return Dimension::domain_range(range); + } + void expand_range(const Range& r1, Range* r2) const override { + return Dimension::expand_range(r1, r2); + } + void expand_range_v(const void* v, Range* r) const override { + return Dimension::expand_range_v(v, r); + } + void expand_to_tile(Range* range) const override { + return Dimension::expand_to_tile(&base, range); + } + bool oob(const void* coord, std::string* err_msg) const override { + return Dimension::oob(&base, coord, err_msg); + } + bool covered(const Range& r1, const Range& r2) const override { + static_assert(tiledb::type::TileDBFundamental); + return Dimension::covered(r1, r2); + } + bool overlap(const Range& r1, const Range& r2) const override { + static_assert(tiledb::type::TileDBFundamental); + return Dimension::overlap(r1, r2); + } + double overlap_ratio(const Range& r1, const Range& r2) const override { + static_assert(tiledb::type::TileDBFundamental); + return Dimension::overlap_ratio(r1, r2); + } + void relevant_ranges( + const NDRange& ranges, + const Range& mbr, + tdb::pmr::vector& relevant_ranges) const override { + static_assert(tiledb::type::TileDBFundamental); + return Dimension::relevant_ranges(ranges, mbr, relevant_ranges); + } + std::vector covered_vec( + const NDRange& ranges, + const Range& mbr, + const tdb::pmr::vector& relevant_ranges) const override { + static_assert(tiledb::type::TileDBFundamental); + return Dimension::covered_vec(ranges, mbr, relevant_ranges); + } + void split_range(const Range& r, const ByteVecValue& v, Range* r1, Range* r2) + const override { + static_assert(tiledb::type::TileDBFundamental); + return Dimension::split_range(r, v, r1, r2); + } + void splitting_value( + const Range& r, ByteVecValue* v, bool* unsplittable) const override { + static_assert(tiledb::type::TileDBFundamental); + return Dimension::splitting_value(r, v, unsplittable); + } + uint64_t tile_num(const Range& range) const override { + static_assert(tiledb::type::TileDBFundamental); + return Dimension::tile_num(&base, range); + } + uint64_t map_to_uint64( + const void* coord, + uint64_t coord_size, + int bits, + uint64_t max_bucket_val) const override { + static_assert(tiledb::type::TileDBFundamental); + return Dimension::map_to_uint64_2( + &base, coord, coord_size, bits, max_bucket_val); + } + ByteVecValue map_from_uint64( + uint64_t value, int bits, uint64_t max_bucket_val) const override { + static_assert(tiledb::type::TileDBFundamental); + return Dimension::map_from_uint64(&base, value, bits, max_bucket_val); + } + bool smaller_than( + const ByteVecValue& value, const Range& range) const override { + static_assert(tiledb::type::TileDBFundamental); + return Dimension::smaller_than(&base, value, range); + } +}; + +template +class DimensionFixedSize : public DimensionDispatchTyped { + public: + using DimensionDispatchTyped::DimensionDispatchTyped; + + protected: + Range compute_mbr(const WriterTile& tile) const override { + return Dimension::compute_mbr(tile); + } + + Range compute_mbr_var(const WriterTile&, const WriterTile&) const override { + throw std::logic_error( + "Fixed-length dimension has no offset tile, function " + + std::string(__func__) + " cannot be called"); + } +}; + +class DimensionVarSize : public DimensionDispatchTyped { + public: + using DimensionDispatchTyped::DimensionDispatchTyped; + + protected: + Range compute_mbr(const WriterTile&) const override { + throw std::logic_error( + "Variable-length dimension requires an offset tile, function " + + std::string(__func__) + " cannot be called"); + } + + /** note: definition at the bottom due to template specialization */ + Range compute_mbr_var( + const WriterTile& tile_off, const WriterTile& tile_val) const override; +}; + /* ********************************* */ /* CONSTRUCTORS & DESTRUCTORS */ /* ********************************* */ -Dimension::Dimension(const std::string& name, Datatype type) - : name_(name) +Dimension::Dimension( + const std::string& name, + Datatype type, + shared_ptr memory_tracker) + : memory_tracker_(memory_tracker) + , name_(name) , type_(type) { ensure_datatype_is_supported(type_); cell_val_num_ = (datatype_is_string(type)) ? constants::var_num : 1; - set_ceil_to_tile_func(); - set_coincides_with_tiles_func(); - set_compute_mbr_func(); - set_crop_range_func(); - set_domain_range_func(); - set_expand_range_func(); - set_expand_range_v_func(); - set_expand_to_tile_func(); - set_oob_func(); - set_covered_func(); - set_overlap_func(); - set_overlap_ratio_func(); - set_relevant_ranges_func(); - set_covered_vec_func(); - set_split_range_func(); - set_splitting_value_func(); - set_tile_num_func(); - set_map_to_uint64_2_func(); - set_map_from_uint64_func(); - set_smaller_than_func(); + set_dimension_dispatch(); } Dimension::Dimension( @@ -96,34 +215,17 @@ Dimension::Dimension( uint32_t cell_val_num, const Range& domain, const FilterPipeline& filter_pipeline, - const ByteVecValue& tile_extent) - : cell_val_num_(cell_val_num) + const ByteVecValue& tile_extent, + shared_ptr memory_tracker) + : memory_tracker_(memory_tracker) + , cell_val_num_(cell_val_num) , domain_(domain) , filters_(filter_pipeline) , name_(name) , tile_extent_(tile_extent) , type_(type) { ensure_datatype_is_supported(type_); - set_ceil_to_tile_func(); - set_coincides_with_tiles_func(); - set_compute_mbr_func(); - set_crop_range_func(); - set_domain_range_func(); - set_expand_range_func(); - set_expand_range_v_func(); - set_expand_to_tile_func(); - set_oob_func(); - set_covered_func(); - set_overlap_func(); - set_overlap_ratio_func(); - set_relevant_ranges_func(); - set_covered_vec_func(); - set_split_range_func(); - set_splitting_value_func(); - set_tile_num_func(); - set_map_to_uint64_2_func(); - set_map_from_uint64_func(); - set_smaller_than_func(); + set_dimension_dispatch(); } /* ********************************* */ @@ -154,7 +256,8 @@ shared_ptr Dimension::deserialize( Deserializer& deserializer, uint32_t version, Datatype type, - FilterPipeline& coords_filters) { + FilterPipeline& coords_filters, + shared_ptr memory_tracker) { Status st; // Load dimension name auto dimension_name_size = deserializer.read(); @@ -212,7 +315,8 @@ shared_ptr Dimension::deserialize( cell_val_num, domain, filter_pipeline, - tile_extent); + tile_extent, + memory_tracker); } const Range& Dimension::domain() const { @@ -273,8 +377,7 @@ void Dimension::ceil_to_tile( void Dimension::ceil_to_tile( const Range& r, uint64_t tile_num, ByteVecValue* v) const { - assert(ceil_to_tile_func_ != nullptr); - ceil_to_tile_func_(this, r, tile_num, v); + dispatch_->ceil_to_tile(r, tile_num, v); } template @@ -293,8 +396,7 @@ bool Dimension::coincides_with_tiles(const Dimension* dim, const Range& r) { } bool Dimension::coincides_with_tiles(const Range& r) const { - assert(coincides_with_tiles_func_ != nullptr); - return coincides_with_tiles_func_(this, r); + return dispatch_->coincides_with_tiles(r); } template @@ -318,8 +420,7 @@ Range Dimension::compute_mbr(const WriterTile& tile) { } Range Dimension::compute_mbr(const WriterTile& tile) const { - assert(compute_mbr_func_ != nullptr); - return compute_mbr_func_(tile); + return dispatch_->compute_mbr(tile); } template <> @@ -351,23 +452,7 @@ Range Dimension::compute_mbr_var( Range Dimension::compute_mbr_var( const WriterTile& tile_off, const WriterTile& tile_val) const { - assert(compute_mbr_var_func_ != nullptr); - return compute_mbr_var_func_(tile_off, tile_val); -} - -template -void Dimension::crop_range(const Dimension* dim, Range* range) { - assert(dim != nullptr); - assert(!range->empty()); - auto dim_dom = (const T*)dim->domain().data(); - auto r = (const T*)range->data(); - T res[2] = {std::max(r[0], dim_dom[0]), std::min(r[1], dim_dom[1])}; - range->set_range(res, sizeof(res)); -} - -void Dimension::crop_range(Range* range) const { - assert(crop_range_func_ != nullptr); - crop_range_func_(this, range); + return dispatch_->compute_mbr_var(tile_off, tile_val); } template @@ -388,8 +473,7 @@ uint64_t Dimension::domain_range(const Range& range) { } uint64_t Dimension::domain_range(const Range& range) const { - assert(domain_range_func_ != nullptr); - return domain_range_func_(range); + return dispatch_->domain_range(range); } template @@ -404,8 +488,7 @@ void Dimension::expand_range_v(const void* v, Range* r) { } void Dimension::expand_range_v(const void* v, Range* r) const { - assert(expand_range_v_func_ != nullptr); - expand_range_v_func_(v, r); + return dispatch_->expand_range_v(v, r); } void Dimension::expand_range_var_v(const char* v, uint64_t v_size, Range* r) { @@ -431,8 +514,7 @@ void Dimension::expand_range(const Range& r1, Range* r2) { } void Dimension::expand_range(const Range& r1, Range* r2) const { - assert(expand_range_func_ != nullptr); - expand_range_func_(r1, r2); + return dispatch_->expand_range(r1, r2); } void Dimension::expand_range_var(const Range& r1, Range* r2) const { @@ -473,8 +555,7 @@ void Dimension::expand_to_tile(const Dimension* dim, Range* range) { } void Dimension::expand_to_tile(Range* range) const { - assert(expand_to_tile_func_ != nullptr); - expand_to_tile_func_(this, range); + return dispatch_->expand_to_tile(range); } template @@ -504,9 +585,8 @@ Status Dimension::oob(const void* coord) const { if (datatype_is_string(type_)) return Status::Ok(); - assert(oob_func_ != nullptr); std::string err_msg; - auto ret = oob_func_(this, coord, &err_msg); + auto ret = dispatch_->oob(coord, &err_msg); if (ret) return Status_DimensionError(err_msg); return Status::Ok(); @@ -536,8 +616,7 @@ bool Dimension::covered(const Range& r1, const Range& r2) { } bool Dimension::covered(const Range& r1, const Range& r2) const { - assert(covered_func_ != nullptr); - return covered_func_(r1, r2); + return dispatch_->covered(r1, r2); } template <> @@ -564,8 +643,7 @@ bool Dimension::overlap(const Range& r1, const Range& r2) { } bool Dimension::overlap(const Range& r1, const Range& r2) const { - assert(overlap_func_ != nullptr); - return overlap_func_(r1, r2); + return dispatch_->overlap(r1, r2); } template <> @@ -707,23 +785,21 @@ double Dimension::overlap_ratio(const Range& r1, const Range& r2) { } double Dimension::overlap_ratio(const Range& r1, const Range& r2) const { - assert(overlap_ratio_func_ != nullptr); - return overlap_ratio_func_(r1, r2); + return dispatch_->overlap_ratio(r1, r2); } void Dimension::relevant_ranges( const NDRange& ranges, const Range& mbr, - std::vector& relevant_ranges) const { - assert(relevant_ranges_func_ != nullptr); - return relevant_ranges_func_(ranges, mbr, relevant_ranges); + tdb::pmr::vector& relevant_ranges) const { + return dispatch_->relevant_ranges(ranges, mbr, relevant_ranges); } template <> void Dimension::relevant_ranges( const NDRange& ranges, const Range& mbr, - std::vector& relevant_ranges) { + tdb::pmr::vector& relevant_ranges) { const auto& mbr_start = mbr.start_str(); const auto& mbr_end = mbr.end_str(); @@ -772,7 +848,7 @@ template void Dimension::relevant_ranges( const NDRange& ranges, const Range& mbr, - std::vector& relevant_ranges) { + tdb::pmr::vector& relevant_ranges) { const auto mbr_data = (const T*)mbr.start_fixed(); const auto mbr_start = mbr_data[0]; const auto mbr_end = mbr_data[1]; @@ -816,21 +892,19 @@ void Dimension::relevant_ranges( std::vector Dimension::covered_vec( const NDRange& ranges, const Range& mbr, - const std::vector& relevant_ranges) const { - assert(covered_vec_func_ != nullptr); - return covered_vec_func_(ranges, mbr, relevant_ranges); + const tdb::pmr::vector& relevant_ranges) const { + return dispatch_->covered_vec(ranges, mbr, relevant_ranges); } template <> std::vector Dimension::covered_vec( const NDRange& ranges, const Range& mbr, - const std::vector& relevant_ranges) { + const tdb::pmr::vector& relevant_ranges) { const auto& range_start = mbr.start_str(); const auto& range_end = mbr.end_str(); - std::vector covered; - covered.resize(relevant_ranges.size()); + std::vector covered(relevant_ranges.size()); for (uint64_t i = 0; i < relevant_ranges.size(); i++) { auto r = relevant_ranges[i]; auto r2_start = ranges[r].start_str(); @@ -846,11 +920,10 @@ template std::vector Dimension::covered_vec( const NDRange& ranges, const Range& mbr, - const std::vector& relevant_ranges) { + const tdb::pmr::vector& relevant_ranges) { auto d1 = (const T*)mbr.start_fixed(); - std::vector covered; - covered.resize(relevant_ranges.size()); + std::vector covered(relevant_ranges.size()); for (uint64_t i = 0; i < relevant_ranges.size(); i++) { auto r = relevant_ranges[i]; auto d2 = (const T*)ranges[r].start_fixed(); @@ -942,8 +1015,7 @@ void Dimension::split_range( void Dimension::split_range( const Range& r, const ByteVecValue& v, Range* r1, Range* r2) const { - assert(split_range_func_ != nullptr); - split_range_func_(r, v, r1, r2); + dispatch_->split_range(r, v, r1, r2); } template <> @@ -1075,8 +1147,7 @@ void Dimension::splitting_value( void Dimension::splitting_value( const Range& r, ByteVecValue* v, bool* unsplittable) const { - assert(splitting_value_func_ != nullptr); - splitting_value_func_(r, v, unsplittable); + dispatch_->splitting_value(r, v, unsplittable); } template <> @@ -1103,8 +1174,7 @@ uint64_t Dimension::tile_num(const Dimension* dim, const Range& range) { } uint64_t Dimension::tile_num(const Range& range) const { - assert(tile_num_func_ != nullptr); - return tile_num_func_(this, range); + return dispatch_->tile_num(range); } uint64_t Dimension::map_to_uint64( @@ -1112,8 +1182,7 @@ uint64_t Dimension::map_to_uint64( uint64_t coord_size, int bits, uint64_t max_bucket_val) const { - assert(map_to_uint64_2_func_ != nullptr); - return map_to_uint64_2_func_(this, coord, coord_size, bits, max_bucket_val); + return dispatch_->map_to_uint64(coord, coord_size, bits, max_bucket_val); } template @@ -1162,8 +1231,7 @@ uint64_t Dimension::map_to_uint64_2( ByteVecValue Dimension::map_from_uint64( uint64_t value, int bits, uint64_t max_bucket_val) const { - assert(map_from_uint64_func_ != nullptr); - return map_from_uint64_func_(this, value, bits, max_bucket_val); + return dispatch_->map_from_uint64(value, bits, max_bucket_val); } template @@ -1224,8 +1292,7 @@ ByteVecValue Dimension::map_from_uint64( bool Dimension::smaller_than( const ByteVecValue& value, const Range& range) const { - assert(smaller_than_func_ != nullptr); - return smaller_than_func_(this, value, range); + return dispatch_->smaller_than(value, range); } template @@ -1591,209 +1658,24 @@ std::string Dimension::tile_extent_str() const { return apply_with_type(g, type_); } -void Dimension::set_crop_range_func() { - auto g = [&](auto T) { - if constexpr (tiledb::type::TileDBNumeric) { - crop_range_func_ = crop_range; - } - }; - apply_with_type(g, type_); -} - -void Dimension::set_domain_range_func() { - auto g = [&](auto T) { - if constexpr (tiledb::type::TileDBFundamental) { - domain_range_func_ = domain_range; - } - }; - apply_with_type(g, type_); -} - -void Dimension::set_ceil_to_tile_func() { - auto g = [&](auto T) { - if constexpr (tiledb::type::TileDBFundamental) { - ceil_to_tile_func_ = ceil_to_tile; - } - }; - apply_with_type(g, type_); -} - -void Dimension::set_coincides_with_tiles_func() { - auto g = [&](auto T) { - if constexpr (tiledb::type::TileDBFundamental) { - coincides_with_tiles_func_ = coincides_with_tiles; - } - }; - apply_with_type(g, type_); -} - -void Dimension::set_compute_mbr_func() { - if (!var_size()) { // Fixed-sized - compute_mbr_var_func_ = nullptr; - auto g = [&](auto T) { - if constexpr (tiledb::type::TileDBNumeric) { - compute_mbr_func_ = compute_mbr; - } - }; - apply_with_type(g, type_); - } else { // Var-sized +void Dimension::set_dimension_dispatch() { + if (var_size()) { + dispatch_ = + tdb_unique_ptr(tdb_new(DimensionVarSize, *this)); assert(type_ == Datatype::STRING_ASCII); - compute_mbr_func_ = nullptr; - compute_mbr_var_func_ = compute_mbr_var; + } else { + // Fixed-sized + auto set = [&](auto T) { + this->dispatch_ = tdb_unique_ptr( + tdb_new(DimensionFixedSize, *this)); + }; + apply_with_type(set, type_); } } -void Dimension::set_expand_range_func() { - auto g = [&](auto T) { - if constexpr (tiledb::type::TileDBFundamental) { - expand_range_func_ = expand_range; - } - }; - apply_with_type(g, type_); -} - -void Dimension::set_expand_range_v_func() { - auto g = [&](auto T) { - if constexpr (tiledb::type::TileDBFundamental) { - expand_range_v_func_ = expand_range_v; - } - }; - apply_with_type(g, type_); -} - -void Dimension::set_expand_to_tile_func() { - auto g = [&](auto T) { - if constexpr (tiledb::type::TileDBFundamental) { - expand_to_tile_func_ = expand_to_tile; - } - }; - apply_with_type(g, type_); -} - -void Dimension::set_oob_func() { - auto g = [&](auto T) { - if constexpr (tiledb::type::TileDBFundamental) { - oob_func_ = oob; - } - }; - apply_with_type(g, type_); -} - -void Dimension::set_covered_func() { - auto g = [&](auto T) { - if constexpr (std::is_same_v) { - assert(var_size()); - } - if constexpr (tiledb::type::TileDBFundamental) { - covered_func_ = covered; - } - }; - apply_with_type(g, type_); -} - -void Dimension::set_overlap_func() { - auto g = [&](auto T) { - if constexpr (std::is_same_v) { - assert(var_size()); - } - if constexpr (tiledb::type::TileDBFundamental) { - overlap_func_ = overlap; - } - }; - apply_with_type(g, type_); -} - -void Dimension::set_overlap_ratio_func() { - auto g = [&](auto T) { - if constexpr (std::is_same_v) { - assert(var_size()); - } - if constexpr (tiledb::type::TileDBFundamental) { - overlap_ratio_func_ = overlap_ratio; - } - }; - apply_with_type(g, type_); -} - -void Dimension::set_relevant_ranges_func() { - auto g = [&](auto T) { - if constexpr (std::is_same_v) { - assert(var_size()); - } - if constexpr (tiledb::type::TileDBFundamental) { - relevant_ranges_func_ = relevant_ranges; - } - }; - apply_with_type(g, type_); -} - -void Dimension::set_covered_vec_func() { - auto g = [&](auto T) { - if constexpr (std::is_same_v) { - assert(var_size()); - } - if constexpr (tiledb::type::TileDBFundamental) { - covered_vec_func_ = covered_vec; - } - }; - apply_with_type(g, type_); -} - -void Dimension::set_split_range_func() { - auto g = [&](auto T) { - if constexpr (tiledb::type::TileDBFundamental) { - split_range_func_ = split_range; - } - }; - apply_with_type(g, type_); -} - -void Dimension::set_splitting_value_func() { - auto g = [&](auto T) { - if constexpr (std::is_same_v) { - assert(var_size()); - } - if constexpr (tiledb::type::TileDBFundamental) { - splitting_value_func_ = splitting_value; - } - }; - apply_with_type(g, type_); -} - -void Dimension::set_tile_num_func() { - auto g = [&](auto T) { - if constexpr (tiledb::type::TileDBFundamental) { - tile_num_func_ = tile_num; - } - }; - apply_with_type(g, type_); -} - -void Dimension::set_map_to_uint64_2_func() { - auto g = [&](auto T) { - if constexpr (tiledb::type::TileDBFundamental) { - map_to_uint64_2_func_ = map_to_uint64_2; - } - }; - apply_with_type(g, type_); -} - -void Dimension::set_map_from_uint64_func() { - auto g = [&](auto T) { - if constexpr (tiledb::type::TileDBFundamental) { - map_from_uint64_func_ = map_from_uint64; - } - }; - apply_with_type(g, type_); -} - -void Dimension::set_smaller_than_func() { - auto g = [&](auto T) { - if constexpr (tiledb::type::TileDBFundamental) { - smaller_than_func_ = smaller_than; - } - }; - apply_with_type(g, type_); +Range DimensionVarSize::compute_mbr_var( + const WriterTile& tile_off, const WriterTile& tile_val) const { + return Dimension::compute_mbr_var(tile_off, tile_val); } } // namespace tiledb::sm diff --git a/tiledb/sm/array_schema/dimension.h b/tiledb/sm/array_schema/dimension.h index 0e2a0014e377..45665f6e3ea7 100644 --- a/tiledb/sm/array_schema/dimension.h +++ b/tiledb/sm/array_schema/dimension.h @@ -37,12 +37,15 @@ #include #include #include +#include #include #include #include "tiledb/common/blank.h" #include "tiledb/common/common.h" #include "tiledb/common/logger_public.h" +#include "tiledb/common/macros.h" +#include "tiledb/common/memory_tracker.h" #include "tiledb/common/status.h" #include "tiledb/sm/enums/datatype.h" #include "tiledb/sm/misc/constants.h" @@ -65,6 +68,8 @@ class FilterPipeline; enum class Compressor : uint8_t; enum class Datatype : uint8_t; +class DimensionDispatch; + /** Manipulates a TileDB dimension. * * Note: as laid out in the Storage Format, @@ -89,8 +94,12 @@ class Dimension { * * @param name The name of the dimension. * @param type The type of the dimension. + * @param memory_tracker The memory tracker to use. */ - Dimension(const std::string& name, Datatype type); + Dimension( + const std::string& name, + Datatype type, + shared_ptr memory_tracker); /** * Constructor. @@ -101,6 +110,7 @@ class Dimension { * @param domain The range of the dimension range. * @param filter_pipeline The filters of the dimension. * @param tile_extent The tile extent of the dimension. + * @param memory_tracker The memory tracker to use. */ Dimension( const std::string& name, @@ -108,34 +118,15 @@ class Dimension { uint32_t cell_val_num, const Range& domain, const FilterPipeline& filter_pipeline, - const ByteVecValue& tile_extent); - - /** - * Copy constructor is deleted. - * - * `Dimension` objects are stored as `shared_ptr` within C API handles and - * within `Domain`. Instead of copying a `Dimension` one can copy a pointer. - */ - Dimension(const Dimension&) = delete; + const ByteVecValue& tile_extent, + shared_ptr memory_tracker); - /** - * Copy assignment is deleted. - */ - Dimension& operator=(const Dimension&) = delete; + DISABLE_COPY_AND_COPY_ASSIGN(Dimension); + DISABLE_MOVE_AND_MOVE_ASSIGN(Dimension); /** Destructor. */ ~Dimension() = default; - /** - * Move constructor is default - */ - Dimension(Dimension&&) = default; - - /** - * Move assignment is default - */ - Dimension& operator=(Dimension&&) = default; - /* ********************************* */ /* API */ /* ********************************* */ @@ -158,13 +149,15 @@ class Dimension { * @param version The array schema version. * @param type The type of the dimension. * @param coords_filters Coords filters to replace empty coords pipelines. + * @param memory_tracker The memory tracker to use. * @return Dimension */ static shared_ptr deserialize( Deserializer& deserializer, uint32_t version, Datatype type, - FilterPipeline& coords_filters); + FilterPipeline& coords_filters, + shared_ptr memory_tracker); /** Returns the domain. */ const Range& domain() const; @@ -487,19 +480,6 @@ class Dimension { static Range compute_mbr_var( const WriterTile& tile_off, const WriterTile& tile_val); - /** - * Crops the input 1D range such that it does not exceed the - * dimension domain. - */ - void crop_range(Range* range) const; - - /** - * Crops the input 1D range such that it does not exceed the - * dimension domain. - */ - template - static void crop_range(const Dimension* dim, Range* range); - /** * Returns the domain range (high - low + 1) of the input * 1D range. It returns 0 in case the dimension datatype @@ -597,27 +577,27 @@ class Dimension { void relevant_ranges( const NDRange& ranges, const Range& mbr, - std::vector& relevant_ranges) const; + tdb::pmr::vector& relevant_ranges) const; /** Compute relevant ranges for a set of ranges. */ template static void relevant_ranges( const NDRange& ranges, const Range& mbr, - std::vector& relevant_ranges); + tdb::pmr::vector& relevant_ranges); /** Compute covered on a set of relevant ranges. */ std::vector covered_vec( const NDRange& ranges, const Range& mbr, - const std::vector& relevant_ranges) const; + const tdb::pmr::vector& relevant_ranges) const; /** Compute covered on a set of relevant ranges. */ template static std::vector covered_vec( const NDRange& ranges, const Range& mbr, - const std::vector& relevant_ranges); + const tdb::pmr::vector& relevant_ranges); /** Splits `r` at point `v`, producing 1D ranges `r1` and `r2`. */ void split_range( @@ -760,11 +740,71 @@ class Dimension { return cell_val_num_ == constants::var_num; } + class DimensionDispatch { + public: + DimensionDispatch(const Dimension& base) + : base(base) { + } + + virtual ~DimensionDispatch() { + } + + virtual void ceil_to_tile( + const Range& r, uint64_t tile_num, ByteVecValue* v) const = 0; + virtual bool check_range(const Range& range, std::string* error) const = 0; + virtual bool coincides_with_tiles(const Range& r) const = 0; + virtual Range compute_mbr(const WriterTile&) const = 0; + virtual Range compute_mbr_var( + const WriterTile&, const WriterTile&) const = 0; + virtual uint64_t domain_range(const Range& range) const = 0; + virtual void expand_range(const Range& r1, Range* r2) const = 0; + virtual void expand_range_v(const void* v, Range* r) const = 0; + virtual void expand_to_tile(Range* range) const = 0; + virtual bool oob(const void* coord, std::string* err_msg) const = 0; + virtual bool covered(const Range& r1, const Range& r2) const = 0; + virtual bool overlap(const Range& r1, const Range& r2) const = 0; + virtual double overlap_ratio(const Range& r1, const Range& r2) const = 0; + virtual void relevant_ranges( + const NDRange& ranges, + const Range& mbr, + tdb::pmr::vector& relevant_ranges) const = 0; + virtual std::vector covered_vec( + const NDRange& ranges, + const Range& mbr, + const tdb::pmr::vector& relevant_ranges) const = 0; + virtual void split_range( + const Range& r, const ByteVecValue& v, Range* r1, Range* r2) const = 0; + virtual void splitting_value( + const Range& r, ByteVecValue* v, bool* unsplittable) const = 0; + virtual uint64_t tile_num(const Range& range) const = 0; + virtual uint64_t map_to_uint64( + const void* coord, + uint64_t coord_size, + int bits, + uint64_t max_bucket_val) const = 0; + virtual ByteVecValue map_from_uint64( + uint64_t value, int bits, uint64_t max_bucket_val) const = 0; + virtual bool smaller_than( + const ByteVecValue& value, const Range& range) const = 0; + + protected: + const Dimension& base; + }; + friend class DimensionDispatch; + private: /* ********************************* */ /* PRIVATE ATTRIBUTES */ /* ********************************* */ + /** The memory tracker for the dimension. */ + shared_ptr memory_tracker_; + + /** + * Handles dynamic dispatch for functions which depend on Dimension type + */ + tdb_unique_ptr dispatch_; + /** The number of values per coordinate. */ unsigned cell_val_num_; @@ -783,152 +823,6 @@ class Dimension { /** The dimension type. */ Datatype type_; - /** - * Stores the appropriate templated ceil_to_tile() function based on the - * dimension datatype. - */ - std::function - ceil_to_tile_func_; - - /** - * Stores the appropriate templated check_range() function based on the - * dimension datatype. - */ - std::function - check_range_func_; - - /** - * Stores the appropriate templated coincides_with_tiles() function based on - * the dimension datatype. - */ - std::function - coincides_with_tiles_func_; - - /** - * Stores the appropriate templated compute_mbr() function based on the - * dimension datatype. - */ - std::function compute_mbr_func_; - - /** - * Stores the appropriate templated compute_mbr_var() function based on the - * dimension datatype. - */ - std::function - compute_mbr_var_func_; - - /** - * Stores the appropriate templated crop_range() function based on the - * dimension datatype. - */ - std::function crop_range_func_; - - /** - * Stores the appropriate templated crop_range() function based on the - * dimension datatype. - */ - std::function domain_range_func_; - - /** - * Stores the appropriate templated expand_range() function based on the - * dimension datatype. - */ - std::function expand_range_v_func_; - - /** - * Stores the appropriate templated expand_range() function based on the - * dimension datatype. - */ - std::function expand_range_func_; - - /** - * Stores the appropriate templated expand_to_tile() function based on the - * dimension datatype. - */ - std::function expand_to_tile_func_; - - /** - * Stores the appropriate templated oob() function based on the - * dimension datatype. - */ - std::function - oob_func_; - - /** - * Stores the appropriate templated covered() function based on the - * dimension datatype. - */ - std::function covered_func_; - - /** - * Stores the appropriate templated overlap() function based on the - * dimension datatype. - */ - std::function overlap_func_; - - /** - * Stores the appropriate templated overlap_ratio() function based on the - * dimension datatype. - */ - std::function overlap_ratio_func_; - - /** - * Stores the appropriate templated relevant_ranges() function based - * on the dimension datatype. - */ - std::function&)> - relevant_ranges_func_; - - /** - * Stores the appropriate templated covered_vec() function based on the - * dimension datatype. - */ - std::function( - const NDRange&, const Range&, const std::vector&)> - covered_vec_func_; - - /** - * Stores the appropriate templated split_range() function based on the - * dimension datatype. - */ - std::function - split_range_func_; - - /** - * Stores the appropriate templated splitting_value() function based on the - * dimension datatype. - */ - std::function - splitting_value_func_; - - /** - * Stores the appropriate templated tile_num() function based on the - * dimension datatype. - */ - std::function tile_num_func_; - - /** - * Stores the appropriate templated map_to_uint64_2() function based on - * the dimension datatype. - */ - std::function - map_to_uint64_2_func_; - - /** - * Stores the appropriate templated map_from_uint64() function based on - * the dimension datatype. - */ - std::function - map_from_uint64_func_; - - /** - * Stores the appropriate templated smaller_than() function based on - * the dimension datatype. - */ - std::function - smaller_than_func_; - /* ********************************* */ /* PRIVATE METHODS */ /* ********************************* */ @@ -1020,77 +914,13 @@ class Dimension { /** Returns the tile extent in string format. */ std::string tile_extent_str() const; - /** Sets the templated ceil_to_tile() function. */ - void set_ceil_to_tile_func(); - - /** Sets the templated check_range() function. */ - void set_check_range_func(); - - /** Sets the templated coincides_with_tiles() function. */ - void set_coincides_with_tiles_func(); - - /** Sets the templated compute_mbr() function. */ - void set_compute_mbr_func(); - - /** Sets the templated crop_range() function. */ - void set_crop_range_func(); - - /** Sets the templated domain_range() function. */ - void set_domain_range_func(); - - /** Sets the templated expand_range() function. */ - void set_expand_range_func(); - - /** Sets the templated expand_range_v() function. */ - void set_expand_range_v_func(); - - /** Sets the templated expand_to_tile() function. */ - void set_expand_to_tile_func(); - - /** Sets the templated oob() function. */ - void set_oob_func(); - - /** Sets the templated covered() function. */ - void set_covered_func(); - - /** Sets the templated overlap() function. */ - void set_overlap_func(); - - /** Sets the templated overlap_ratio() function. */ - void set_overlap_ratio_func(); - - /** Sets the templated relevant_ranges() function. */ - void set_relevant_ranges_func(); - - /** Sets the templated covered_vec() function. */ - void set_covered_vec_func(); - - /** Sets the templated split_range() function. */ - void set_split_range_func(); - - /** Sets the templated splitting_value() function. */ - void set_splitting_value_func(); - - /** Sets the templated tile_num() function. */ - void set_tile_num_func(); - - /** Sets the templated map_to_uint64_2() function. */ - void set_map_to_uint64_2_func(); - - /** Sets the templated map_from_uint64() function. */ - void set_map_from_uint64_func(); - - /** Sets the templated smaller_than() function. */ - void set_smaller_than_func(); + /** + * Sets the dimension dynamic dispatch implementation. + * Called in the constructor. + */ + void set_dimension_dispatch(); }; } // namespace tiledb::sm -namespace tiledb::common { -template <> -struct blank : public tiledb::sm::Dimension { - blank(); -}; -} // namespace tiledb::common - #endif // TILEDB_DIMENSION_H diff --git a/tiledb/sm/array_schema/dimension_label.cc b/tiledb/sm/array_schema/dimension_label.cc index 0d41a3ff200a..b9138099dd83 100644 --- a/tiledb/sm/array_schema/dimension_label.cc +++ b/tiledb/sm/array_schema/dimension_label.cc @@ -28,6 +28,7 @@ #include "tiledb/sm/array_schema/dimension_label.h" #include "tiledb/common/common.h" +#include "tiledb/common/memory_tracker.h" #include "tiledb/sm/array_schema/array_schema.h" #include "tiledb/sm/array_schema/dimension.h" #include "tiledb/sm/array_schema/domain.h" @@ -128,7 +129,8 @@ DimensionLabel::DimensionLabel( const URI& uri, const Dimension* dim, DataOrder label_order, - Datatype label_type) + Datatype label_type, + shared_ptr memory_tracker) : dim_id_(dim_id) , dim_label_name_(dim_label_name) , uri_(uri) @@ -139,8 +141,9 @@ DimensionLabel::DimensionLabel( label_type == Datatype::STRING_ASCII ? constants::var_num : 1) , schema_(make_shared( HERE(), - label_order == DataOrder::UNORDERED_DATA ? ArrayType::SPARSE : - ArrayType::DENSE)) + (label_order == DataOrder::UNORDERED_DATA ? ArrayType::SPARSE : + ArrayType::DENSE), + memory_tracker)) , is_external_(false) , relative_uri_(true) { auto index_type{dim->type()}; @@ -169,12 +172,16 @@ DimensionLabel::DimensionLabel( // Create and set dimension label domain. std::vector> index_dims{ - make_shared(HERE(), "index", index_type)}; + make_shared(HERE(), "index", index_type, memory_tracker)}; throw_if_not_ok(index_dims.back()->set_domain(dim->domain().data())); throw_if_not_ok( index_dims.back()->set_tile_extent(dim->tile_extent().data())); throw_if_not_ok(schema_->set_domain(make_shared( - HERE(), Layout::ROW_MAJOR, index_dims, Layout::ROW_MAJOR))); + HERE(), + Layout::ROW_MAJOR, + index_dims, + Layout::ROW_MAJOR, + memory_tracker))); // Create and set dimension label attribute. auto label_attr = make_shared( diff --git a/tiledb/sm/array_schema/dimension_label.h b/tiledb/sm/array_schema/dimension_label.h index 3d91c8ffeda7..6a69adc7c969 100644 --- a/tiledb/sm/array_schema/dimension_label.h +++ b/tiledb/sm/array_schema/dimension_label.h @@ -48,6 +48,7 @@ class ArraySchema; class Buffer; class ConstBuffer; class Dimension; +class MemoryTracker; enum class Datatype : uint8_t; enum class DataOrder : uint8_t; @@ -107,6 +108,7 @@ class DimensionLabel { /** * Constructor for an internally generated dimension label. * + * @param memory_tracker Memory tracker for the dimension label. * @param dim_id The index of the dimension the label is attached to. * @param dim_label_name The name of the dimension label. * @param uri The URI of an external dimension label. @@ -120,7 +122,8 @@ class DimensionLabel { const URI& uri, const Dimension* dim, DataOrder label_order, - Datatype label_type); + Datatype label_type, + shared_ptr memory_tracker); /** * Populates the object members from the data in the input binary buffer. diff --git a/tiledb/sm/array_schema/domain.cc b/tiledb/sm/array_schema/domain.cc index 139cca4ecac2..0aca71521d9c 100644 --- a/tiledb/sm/array_schema/domain.cc +++ b/tiledb/sm/array_schema/domain.cc @@ -37,10 +37,12 @@ #include "tiledb/common/blank.h" #include "tiledb/common/heap_memory.h" #include "tiledb/common/logger.h" +#include "tiledb/common/memory_tracker.h" #include "tiledb/sm/enums/datatype.h" #include "tiledb/sm/enums/layout.h" #include "tiledb/sm/misc/tdb_math.h" #include "tiledb/sm/misc/utils.h" +#include "tiledb/type/apply_with_type.h" #include "tiledb/type/range/range.h" #include @@ -57,7 +59,13 @@ namespace tiledb::sm { /* CONSTRUCTORS & DESTRUCTORS */ /* ********************************* */ -Domain::Domain() { +Domain::Domain(shared_ptr memory_tracker) + : memory_tracker_(memory_tracker) + , dimensions_(memory_tracker_->get_resource(MemoryType::DIMENSIONS)) + , dimension_ptrs_(memory_tracker_->get_resource(MemoryType::DIMENSIONS)) + , cell_order_cmp_func_(memory_tracker_->get_resource(MemoryType::DOMAINS)) + , cell_order_cmp_func_2_(memory_tracker_->get_resource(MemoryType::DOMAINS)) + , tile_order_cmp_func_(memory_tracker_->get_resource(MemoryType::DOMAINS)) { cell_order_ = Layout::ROW_MAJOR; tile_order_ = Layout::ROW_MAJOR; dim_num_ = 0; @@ -67,11 +75,20 @@ Domain::Domain() { Domain::Domain( Layout cell_order, const std::vector> dimensions, - Layout tile_order) - : cell_order_(cell_order) - , dimensions_(dimensions) + Layout tile_order, + shared_ptr memory_tracker) + : memory_tracker_(memory_tracker) + , cell_order_(cell_order) + , dimensions_( + dimensions.begin(), + dimensions.end(), + memory_tracker_->get_resource(MemoryType::DIMENSIONS)) + , dimension_ptrs_(memory_tracker_->get_resource(MemoryType::DIMENSIONS)) , dim_num_(static_cast(dimensions.size())) - , tile_order_(tile_order) { + , tile_order_(tile_order) + , cell_order_cmp_func_(memory_tracker_->get_resource(MemoryType::DOMAINS)) + , cell_order_cmp_func_2_(memory_tracker_->get_resource(MemoryType::DOMAINS)) + , tile_order_cmp_func_(memory_tracker_->get_resource(MemoryType::DOMAINS)) { /* * Verify that the input vector has no non-null elements in order to meet the * class invariant. Initialize the dimensions mirror. @@ -92,32 +109,6 @@ Domain::Domain( set_tile_cell_order_cmp_funcs(); } -Domain::Domain(Domain&& rhs) - : cell_num_per_tile_(rhs.cell_num_per_tile_) - , cell_order_(rhs.cell_order_) - , dimensions_(move(rhs.dimensions_)) - , dimension_ptrs_(move(rhs.dimension_ptrs_)) - , dim_num_(rhs.dim_num_) - , tile_order_(rhs.tile_order_) - , cell_order_cmp_func_(move(rhs.cell_order_cmp_func_)) - , cell_order_cmp_func_2_(move(rhs.cell_order_cmp_func_2_)) - , tile_order_cmp_func_(move(rhs.tile_order_cmp_func_)) { -} - -Domain& Domain::operator=(Domain&& rhs) { - cell_num_per_tile_ = rhs.cell_num_per_tile_; - cell_order_ = rhs.cell_order_; - dim_num_ = rhs.dim_num_; - cell_order_cmp_func_ = move(rhs.cell_order_cmp_func_); - tile_order_cmp_func_ = move(rhs.tile_order_cmp_func_); - dimensions_ = move(rhs.dimensions_); - dimension_ptrs_ = move(rhs.dimension_ptrs_); - tile_order_ = rhs.tile_order_; - cell_order_cmp_func_2_ = move(rhs.cell_order_cmp_func_2_); - - return *this; -} - /* ********************************* */ /* API */ /* ********************************* */ @@ -312,8 +303,19 @@ int Domain::cell_order_cmp( } void Domain::crop_ndrange(NDRange* ndrange) const { - for (unsigned d = 0; d < dim_num_; ++d) - dimension_ptrs_[d]->crop_range(&(*ndrange)[d]); + for (unsigned d = 0; d < dim_num_; ++d) { + auto type = dimension_ptrs_[d]->type(); + auto g = [&](auto T) { + if constexpr (tiledb::type::TileDBIntegral) { + tiledb::type::crop_range( + dimension_ptrs_[d]->domain(), (*ndrange)[d]); + } else { + throw std::invalid_argument( + "Unsupported dimension datatype " + datatype_str(type)); + } + }; + apply_with_type(g, type); + } } shared_ptr Domain::deserialize( @@ -321,7 +323,8 @@ shared_ptr Domain::deserialize( uint32_t version, Layout cell_order, Layout tile_order, - FilterPipeline& coords_filters) { + FilterPipeline& coords_filters, + shared_ptr memory_tracker) { Status st; // Load type Datatype type = Datatype::INT32; @@ -333,13 +336,13 @@ shared_ptr Domain::deserialize( std::vector> dimensions; auto dim_num = deserializer.read(); for (uint32_t i = 0; i < dim_num; ++i) { - auto dim{ - Dimension::deserialize(deserializer, version, type, coords_filters)}; + auto dim{Dimension::deserialize( + deserializer, version, type, coords_filters, memory_tracker)}; dimensions.emplace_back(std::move(dim)); } return tiledb::common::make_shared( - HERE(), cell_order, dimensions, tile_order); + HERE(), cell_order, dimensions, tile_order, memory_tracker); } const Range& Domain::domain(unsigned i) const { @@ -529,16 +532,14 @@ Status Domain::has_dimension(const std::string& name, bool* has_dim) const { return Status::Ok(); } -Status Domain::get_dimension_index( - const std::string& name, unsigned* dim_idx) const { +unsigned Domain::get_dimension_index(const std::string& name) const { for (unsigned d = 0; d < dim_num_; ++d) { if (dimension_ptrs_[d]->name() == name) { - *dim_idx = d; - return Status::Ok(); + return d; } } - return Status_DomainError( + throw std::invalid_argument( "Cannot get dimension index; Invalid dimension name"); } @@ -597,8 +598,9 @@ const ByteVecValue& Domain::tile_extent(unsigned i) const { std::vector Domain::tile_extents() const { std::vector ret(dim_num_); - for (unsigned d = 0; d < dim_num_; ++d) + for (unsigned d = 0; d < dim_num_; ++d) { ret[d] = tile_extent(d); + } return ret; } diff --git a/tiledb/sm/array_schema/domain.h b/tiledb/sm/array_schema/domain.h index 80e98dd0cbf6..856a97918727 100644 --- a/tiledb/sm/array_schema/domain.h +++ b/tiledb/sm/array_schema/domain.h @@ -35,6 +35,7 @@ #include "tiledb/common/common.h" #include "tiledb/common/macros.h" +#include "tiledb/common/pmr.h" #include "tiledb/common/status.h" #include "tiledb/common/types/dynamic_typed_datum.h" #include "tiledb/common/types/untyped_datum.h" @@ -58,6 +59,7 @@ class ConstBuffer; class Dimension; class DomainTypedDataView; class FilterPipeline; +class MemoryTracker; enum class Datatype : uint8_t; enum class Layout : uint8_t; @@ -74,20 +76,18 @@ class Domain { /* CONSTRUCTORS & DESTRUCTORS */ /* ********************************* */ - /** Empty constructor. */ - Domain(); + /** Deleted default constructor */ + Domain() = delete; + + /** Constructor. */ + Domain(shared_ptr memory_tracker); /** Constructor.*/ Domain( Layout cell_order, const std::vector> dimensions, - Layout tile_order); - - /** Copy constructor. */ - Domain(const Domain&) = default; - - /** Move constructor. */ - Domain(Domain&& rhs); + Layout tile_order, + shared_ptr memory_tracker); /** Destructor. */ ~Domain() = default; @@ -96,11 +96,8 @@ class Domain { /* OPERATORS */ /* ********************************* */ - /** Copy-assignment operator. */ - DISABLE_COPY_ASSIGN(Domain); - - /** Move-assignment operator. */ - Domain& operator=(Domain&& rhs); + DISABLE_COPY_AND_COPY_ASSIGN(Domain); + DISABLE_MOVE_AND_MOVE_ASSIGN(Domain); /* ********************************* */ /* API */ @@ -186,6 +183,7 @@ class Domain { * @param cell_order Cell order. * @param tile_order Tile order. * @param coords_filters Coords filters to replace empty coords pipelines. + * @param memory_tracker The memory tracker to use. * @return Status and Domain */ static shared_ptr deserialize( @@ -193,7 +191,8 @@ class Domain { uint32_t version, Layout cell_order, Layout tile_order, - FilterPipeline& coords_filters); + FilterPipeline& coords_filters, + shared_ptr memory_tracker); /** Returns the cell order. */ Layout cell_order() const; @@ -404,10 +403,9 @@ class Domain { * Gets the index in the domain of a given dimension name * * @param name Name of dimension to check for - * @param dim_idx The index of this dimension in the domain - * @return Status + * @return Dimension index */ - Status get_dimension_index(const std::string& name, unsigned* dim_idx) const; + unsigned get_dimension_index(const std::string& name) const; /** Returns true if at least one dimension has null tile extent. */ bool null_tile_extents() const; @@ -505,6 +503,9 @@ class Domain { /* PRIVATE ATTRIBUTES */ /* ********************************* */ + /** The memory tracker for this Domain. */ + shared_ptr memory_tracker_; + /** The number of cells per tile. Meaningful only for the **dense** case. */ uint64_t cell_num_per_tile_; @@ -520,7 +521,7 @@ class Domain { * * @invariant All pointers in the vector are non-null. */ - std::vector> dimensions_; + tdb::pmr::vector> dimensions_; /** * Non-allocating mirror of the dimensions vector. @@ -532,7 +533,7 @@ class Domain { * * @invariant All pointers in the vector are non-null. */ - std::vector dimension_ptrs_; + tdb::pmr::vector dimension_ptrs_; /** The number of dimensions. */ unsigned dim_num_; @@ -548,7 +549,7 @@ class Domain { * - buff: The buffer that stores all coorinates; * - a, b: The positions of the two coordinates in the buffer to compare. */ - std::vector cell_order_cmp_func_; @@ -558,7 +559,7 @@ class Domain { * * - coord_a, coord_b: The two coordinates to compare. */ - std::vector + tdb::pmr::vector cell_order_cmp_func_2_; /** @@ -568,7 +569,7 @@ class Domain { * - dim: The dimension to compare on. * - coord_a, coord_b: The two coordinates to compare. */ - std::vector tile_order_cmp_func_; diff --git a/tiledb/sm/array_schema/enumeration.cc b/tiledb/sm/array_schema/enumeration.cc index 3079e8b03d99..dabe1818a3a0 100644 --- a/tiledb/sm/array_schema/enumeration.cc +++ b/tiledb/sm/array_schema/enumeration.cc @@ -33,7 +33,8 @@ #include #include -#include "tiledb/common/random/random_label.h" +#include "tiledb/common/memory_tracker.h" +#include "tiledb/sm/misc/uuid.h" #include "enumeration.h" @@ -56,14 +57,17 @@ Enumeration::Enumeration( const void* data, uint64_t data_size, const void* offsets, - uint64_t offsets_size) - : name_(name) + uint64_t offsets_size, + shared_ptr memory_tracker) + : memory_tracker_(memory_tracker) + , name_(name) , path_name_(path_name) , type_(type) , cell_val_num_(cell_val_num) , ordered_(ordered) , data_(data_size) - , offsets_(offsets_size) { + , offsets_(offsets_size) + , value_map_(memory_tracker_->get_resource(MemoryType::ENUMERATION)) { ensure_datatype_is_valid(type); if (name.empty()) { @@ -71,8 +75,10 @@ Enumeration::Enumeration( } if (path_name_.empty()) { - path_name_ = "__" + tiledb::common::random_label() + "_" + - std::to_string(constants::enumerations_version); + std::string tmp_uuid; + throw_if_not_ok(uuid::generate_uuid(&tmp_uuid, false)); + path_name_ = + "__" + tmp_uuid + "_" + std::to_string(constants::enumerations_version); } if (path_name.find("/") != std::string::npos) { @@ -176,7 +182,7 @@ Enumeration::Enumeration( } shared_ptr Enumeration::deserialize( - Deserializer& deserializer) { + Deserializer& deserializer, shared_ptr memory_tracker) { auto disk_version = deserializer.read(); if (disk_version > constants::enumerations_version) { throw EnumerationException( @@ -222,7 +228,8 @@ shared_ptr Enumeration::deserialize( data, data_size, offsets, - offsets_size); + offsets_size, + memory_tracker); } shared_ptr Enumeration::extend( @@ -302,7 +309,8 @@ shared_ptr Enumeration::extend( new_data.data(), new_data.size(), new_offsets_ptr, - new_offsets_size); + new_offsets_size, + memory_tracker_); } bool Enumeration::is_extension_of(shared_ptr other) const { diff --git a/tiledb/sm/array_schema/enumeration.h b/tiledb/sm/array_schema/enumeration.h index 82e37ce78159..3df183d65907 100644 --- a/tiledb/sm/array_schema/enumeration.h +++ b/tiledb/sm/array_schema/enumeration.h @@ -36,6 +36,7 @@ #include #include "tiledb/common/common.h" +#include "tiledb/common/pmr.h" #include "tiledb/common/types/untyped_datum.h" #include "tiledb/sm/buffer/buffer.h" #include "tiledb/sm/enums/datatype.h" @@ -43,6 +44,8 @@ namespace tiledb::sm { +class MemoryTracker; + /** Defines an array enumeration */ class Enumeration { public: @@ -84,6 +87,7 @@ class Enumeration { * offsets buffer. Must be null if cell_var_num is not var_num. * @param offsets_size The size of the buffer pointed to by offsets. Must be * zero of cell_var_num is not var_num. + * @param memory_tracker The memory tracker associated with this Enumeration. * @return shared_ptr The created enumeration. */ static shared_ptr create( @@ -94,7 +98,8 @@ class Enumeration { const void* data, uint64_t data_size, const void* offsets, - uint64_t offsets_size) { + uint64_t offsets_size, + shared_ptr memory_tracker) { return create( name, "", @@ -104,7 +109,8 @@ class Enumeration { data, data_size, offsets, - offsets_size); + offsets_size, + memory_tracker); } /** Create a new Enumeration @@ -122,6 +128,7 @@ class Enumeration { * offsets buffer. Must be null if cell_var_num is not var_num. * @param offsets_size The size of the buffer pointed to by offsets. Must be * zero of cell_var_num is not var_num. + * @param memory_tracker The memory tracker associated with this Enumeration. * @return shared_ptr The created enumeration. */ static shared_ptr create( @@ -133,7 +140,8 @@ class Enumeration { const void* data, uint64_t data_size, const void* offsets, - uint64_t offsets_size) { + uint64_t offsets_size, + shared_ptr memory_tracker) { struct EnableMakeShared : public Enumeration { EnableMakeShared( const std::string& name, @@ -144,7 +152,8 @@ class Enumeration { const void* data, uint64_t data_size, const void* offsets, - uint64_t offsets_size) + uint64_t offsets_size, + shared_ptr memory_tracker) : Enumeration( name, path_name, @@ -154,7 +163,8 @@ class Enumeration { data, data_size, offsets, - offsets_size) { + offsets_size, + memory_tracker) { } }; return make_shared( @@ -167,16 +177,19 @@ class Enumeration { data, data_size, offsets, - offsets_size); + offsets_size, + memory_tracker); } /** * Deserialize an enumeration * * @param deserializer The deserializer to deserialize from. + * @param memory_tracker The memory tracker associated with this Enumeration. * @return A new Enumeration. */ - static shared_ptr deserialize(Deserializer& deserializer); + static shared_ptr deserialize( + Deserializer& deserializer, shared_ptr memory_tracker); /** * Create a new enumeration by extending an existing enumeration's @@ -306,7 +319,8 @@ class Enumeration { size of the buffer pointed to. */ span data() const { - return {static_cast(data_.data()), data_.size()}; + return { + static_cast(data_.data()), static_cast(data_.size())}; } /** @@ -316,7 +330,9 @@ class Enumeration { * the size of the buffer pointed to. */ span offsets() const { - return {static_cast(offsets_.data()), offsets_.size()}; + return { + static_cast(offsets_.data()), + static_cast(offsets_.size())}; } /** @@ -356,6 +372,7 @@ class Enumeration { * offsets buffer. Must be null if cell_var_num is not var_num. * @param offsets_size The size of the buffer pointed to by offsets. Must be * zero of cell_var_num is not var_num. + * @param memory_tracker The memory tracker. */ Enumeration( const std::string& name, @@ -366,12 +383,18 @@ class Enumeration { const void* data, uint64_t data_size, const void* offsets, - uint64_t offsets_size); + uint64_t offsets_size, + shared_ptr memory_tracker); /* ********************************* */ /* PRIVATE ATTRIBUTES */ /* ********************************* */ + /** + * The memory tracker of the Enumeration. + */ + shared_ptr memory_tracker_; + /** The name of this Enumeration stored in the enumerations directory. */ std::string name_; @@ -394,7 +417,7 @@ class Enumeration { Buffer offsets_; /** Map of values to indices */ - std::unordered_map value_map_; + tdb::pmr::unordered_map value_map_; /* ********************************* */ /* PRIVATE METHODS */ diff --git a/tiledb/sm/array_schema/test/CMakeLists.txt b/tiledb/sm/array_schema/test/CMakeLists.txt index e8a03ec32b94..efb004a35ee4 100644 --- a/tiledb/sm/array_schema/test/CMakeLists.txt +++ b/tiledb/sm/array_schema/test/CMakeLists.txt @@ -38,4 +38,5 @@ commence(unit_test array_schema) unit_domain_data.cc unit_tile_domain.cc ) + this_target_link_libraries(tiledb_test_support_lib) conclude(unit_test) diff --git a/tiledb/sm/array_schema/test/array_schema_test_support.h b/tiledb/sm/array_schema/test/array_schema_test_support.h index 5728a96b4923..2c751fe1bfd0 100644 --- a/tiledb/sm/array_schema/test/array_schema_test_support.h +++ b/tiledb/sm/array_schema/test/array_schema_test_support.h @@ -72,6 +72,7 @@ #include #include +#include "test/support/src/mem_helpers.h" #include "tiledb/common/common.h" #include "tiledb/sm/array_schema/array_schema.h" #include "tiledb/sm/array_schema/attribute.h" @@ -180,6 +181,8 @@ class TestFilterPipeline {}; * Dimension wrapper */ class TestDimension { + shared_ptr memory_tracker_; + shared_ptr d_; public: @@ -188,15 +191,16 @@ class TestDimension { * or empty defaults for everything else about it. */ TestDimension(const std::string& name, Datatype type) - : d_{make_shared( + : memory_tracker_(tiledb::test::create_test_memory_tracker()) + , d_{make_shared( HERE(), name, type, 1, // cell_val_num default_range(type), // domain FilterPipeline{}, - default_tile_extent(type) // fill value - )} {}; + default_tile_extent(type), // fill value + memory_tracker_)} {}; /** * Accessor copies the underlying object. @@ -235,6 +239,8 @@ class TestAttribute { * Array Schema wrapper */ class TestArraySchema { + shared_ptr memory_tracker_; + ArraySchema schema_; /** @@ -259,9 +265,14 @@ class TestArraySchema { static shared_ptr make_domain( std::initializer_list dimensions, Layout cell_order, - Layout tile_order) { + Layout tile_order, + shared_ptr memory_tracker) { return make_shared( - HERE(), cell_order, make_dimension_vector(dimensions), tile_order); + HERE(), + cell_order, + make_dimension_vector(dimensions), + tile_order, + memory_tracker); } /** @@ -303,7 +314,11 @@ class TestArraySchema { "", // name array_type, false, // allow duplicates - make_domain(dimensions, cell_order, tile_order), + make_domain( + dimensions, + cell_order, + tile_order, + tiledb::test::create_test_memory_tracker()), cell_order, tile_order, 10000, // capacity @@ -313,7 +328,8 @@ class TestArraySchema { {}, // the second enumeration thing FilterPipeline(), FilterPipeline(), - FilterPipeline()) { + FilterPipeline(), + tiledb::test::create_test_memory_tracker()) { } /** diff --git a/tiledb/sm/array_schema/test/compile_enumeration_main.cc b/tiledb/sm/array_schema/test/compile_enumeration_main.cc index d35fa1cca83c..e8ce6f2ae9c6 100644 --- a/tiledb/sm/array_schema/test/compile_enumeration_main.cc +++ b/tiledb/sm/array_schema/test/compile_enumeration_main.cc @@ -32,7 +32,15 @@ int main(int, char*[]) { try { tiledb::sm::Enumeration::create( - "foo", tiledb::sm::Datatype::INT32, 1, false, nullptr, 0, nullptr, 0); + "foo", + tiledb::sm::Datatype::INT32, + 1, + false, + nullptr, + 0, + nullptr, + 0, + nullptr); } catch (...) { } return 0; diff --git a/tiledb/sm/array_schema/test/unit_dimension.cc b/tiledb/sm/array_schema/test/unit_dimension.cc index 5ae3e3f12737..206f84168775 100644 --- a/tiledb/sm/array_schema/test/unit_dimension.cc +++ b/tiledb/sm/array_schema/test/unit_dimension.cc @@ -32,6 +32,7 @@ #include #include "../dimension.h" +#include "test/support/src/mem_helpers.h" #include "tiledb/sm/enums/datatype.h" using namespace tiledb; @@ -101,7 +102,8 @@ inline T& dim_buffer_offset(void* p) { } TEST_CASE("Dimension::Dimension") { - Dimension x{"", Datatype::UINT32}; + auto memory_tracker = tiledb::test::get_test_memory_tracker(); + Dimension x{"", Datatype::UINT32, memory_tracker}; } TEST_CASE("Dimension: Test deserialize,int32", "[dimension][deserialize]") { @@ -135,8 +137,10 @@ TEST_CASE("Dimension: Test deserialize,int32", "[dimension][deserialize]") { dim_buffer_offset(p) = tile_extent; Deserializer deserializer(&serialized_buffer, sizeof(serialized_buffer)); + auto memory_tracker = tiledb::test::get_test_memory_tracker(); FilterPipeline fp; - auto dim = Dimension::deserialize(deserializer, 10, Datatype::INT32, fp); + auto dim = Dimension::deserialize( + deserializer, 10, Datatype::INT32, fp, memory_tracker); // Check name CHECK(dim->name() == dimension_name); @@ -175,8 +179,10 @@ TEST_CASE("Dimension: Test deserialize,string", "[dimension][deserialize]") { dim_buffer_offset(p) = null_tile_extent; Deserializer deserializer(&serialized_buffer, sizeof(serialized_buffer)); + auto memory_tracker = tiledb::test::get_test_memory_tracker(); FilterPipeline fp; - auto dim = Dimension::deserialize(deserializer, 10, Datatype::INT32, fp); + auto dim = Dimension::deserialize( + deserializer, 10, Datatype::INT32, fp, memory_tracker); // Check name CHECK(dim->name() == dimension_name); // Check type @@ -187,6 +193,7 @@ TEST_CASE("Dimension: Test deserialize,string", "[dimension][deserialize]") { TEST_CASE("Dimension: Test datatypes", "[dimension][datatypes]") { std::string dim_name = "dim"; + auto memory_tracker = tiledb::test::get_test_memory_tracker(); SECTION("- valid and supported Datatypes") { std::vector valid_supported_datatypes = { @@ -210,7 +217,7 @@ TEST_CASE("Dimension: Test datatypes", "[dimension][datatypes]") { for (Datatype type : valid_supported_datatypes) { try { - Dimension dim{dim_name, type}; + Dimension dim{dim_name, type, memory_tracker}; } catch (...) { throw std::logic_error("Uncaught exception in Dimension constructor"); } @@ -233,7 +240,7 @@ TEST_CASE("Dimension: Test datatypes", "[dimension][datatypes]") { for (Datatype type : valid_unsupported_datatypes) { try { - Dimension dim{dim_name, type}; + Dimension dim{dim_name, type, memory_tracker}; } catch (std::exception& e) { CHECK( e.what() == "Datatype::" + datatype_str(type) + @@ -248,7 +255,7 @@ TEST_CASE("Dimension: Test datatypes", "[dimension][datatypes]") { for (auto type : invalid_datatypes) { try { - Dimension dim{dim_name, Datatype(type)}; + Dimension dim{dim_name, Datatype(type), memory_tracker}; } catch (std::exception& e) { CHECK( std::string(e.what()) == @@ -342,7 +349,8 @@ TEMPLATE_LIST_TEST_CASE( } void check_relevant_ranges( - std::vector& relevant_ranges, std::vector& expected) { + tdb::pmr::vector& relevant_ranges, + std::vector& expected) { CHECK(relevant_ranges.size() == expected.size()); for (uint64_t r = 0; r < expected.size(); r++) { CHECK(relevant_ranges[r] == expected[r]); @@ -363,7 +371,8 @@ TEMPLATE_LIST_TEST_CASE( "test relevant_ranges", "[dimension][relevant_ranges][fixed]", FixedTypes) { typedef TestType T; auto tiledb_type = type_to_datatype().datatype; - Dimension dim{"", tiledb_type}; + auto memory_tracker = tiledb::test::get_test_memory_tracker(); + Dimension dim{"", tiledb_type, memory_tracker}; std::vector range_data = { 1, 1, 1, 1, 2, 2, 3, 4, 5, 6, 5, 7, 8, 9, 50, 56}; @@ -380,14 +389,16 @@ TEMPLATE_LIST_TEST_CASE( for (uint64_t i = 0; i < mbr_data.size(); i++) { Range mbr(mbr_data[i].data(), 2 * sizeof(T)); - std::vector relevant_ranges; + tdb::pmr::vector relevant_ranges( + memory_tracker->get_resource(MemoryType::DIMENSIONS)); dim.relevant_ranges(ranges, mbr, relevant_ranges); check_relevant_ranges(relevant_ranges, expected[i]); } } TEST_CASE("test relevant_ranges", "[dimension][relevant_ranges][string]") { - Dimension dim{"", Datatype::STRING_ASCII}; + auto memory_tracker = tiledb::test::get_test_memory_tracker(); + Dimension dim{"", Datatype::STRING_ASCII, memory_tracker}; std::vector range_data = { 'a', @@ -420,14 +431,16 @@ TEST_CASE("test relevant_ranges", "[dimension][relevant_ranges][string]") { for (uint64_t i = 0; i < mbr_data.size(); i++) { Range mbr(mbr_data[i].data(), 2, 1); - std::vector relevant_ranges; + tdb::pmr::vector relevant_ranges( + memory_tracker->get_resource(MemoryType::DIMENSIONS)); dim.relevant_ranges(ranges, mbr, relevant_ranges); check_relevant_ranges(relevant_ranges, expected[i]); } } TEST_CASE("Dimension::oob format") { - Dimension d("X", Datatype::FLOAT64); + auto memory_tracker = tiledb::test::get_test_memory_tracker(); + Dimension d("X", Datatype::FLOAT64, memory_tracker); double d_dom[2]{-682.73999, 929.42999}; REQUIRE(d.set_domain(Range(&d_dom, sizeof(d_dom))).ok()); double x{-682.75}; diff --git a/tiledb/sm/array_schema/test/unit_domain_data.cc b/tiledb/sm/array_schema/test/unit_domain_data.cc index e5b9968b7034..b26174acefab 100644 --- a/tiledb/sm/array_schema/test/unit_domain_data.cc +++ b/tiledb/sm/array_schema/test/unit_domain_data.cc @@ -33,6 +33,8 @@ #include #include "../domain_typed_data_view.h" #include "../dynamic_array.h" +#include "src/mem_helpers.h" + /* * Instantiating the class `Domain` requires a full definition of `Dimension` so * that its destructor is visible. The need to include this header indicates @@ -109,10 +111,14 @@ struct TestNullInitializer { }; TEST_CASE("DomainTypedDataView::DomainTypedDataView, null initializer") { - Domain d{}; + auto memory_tracker = tiledb::test::create_test_memory_tracker(); + Domain d{memory_tracker}; // tiledb::sm::Dimension dim{"", tiledb::sm::Datatype::INT32}; auto dim{make_shared( - HERE(), "", tiledb::sm::Datatype::INT32)}; + HERE(), + "", + tiledb::sm::Datatype::INT32, + tiledb::test::get_test_memory_tracker())}; CHECK(d.add_dimension(dim).ok()); CHECK(d.add_dimension(dim).ok()); CHECK(d.add_dimension(dim).ok()); @@ -131,9 +137,13 @@ TEST_CASE("DomainTypedDataView::DomainTypedDataView, simple initializer") { } }; - Domain d{}; + auto memory_tracker = tiledb::test::create_test_memory_tracker(); + Domain d{memory_tracker}; auto dim{make_shared( - HERE(), "", tiledb::sm::Datatype::INT32)}; + HERE(), + "", + tiledb::sm::Datatype::INT32, + tiledb::test::get_test_memory_tracker())}; CHECK(d.add_dimension(dim).ok()); CHECK(d.add_dimension(dim).ok()); CHECK(d.add_dimension(dim).ok()); diff --git a/tiledb/sm/array_schema/test/unit_tile_domain.cc b/tiledb/sm/array_schema/test/unit_tile_domain.cc index 771c27726879..95676b338984 100644 --- a/tiledb/sm/array_schema/test/unit_tile_domain.cc +++ b/tiledb/sm/array_schema/test/unit_tile_domain.cc @@ -31,12 +31,14 @@ */ #include +#include "src/mem_helpers.h" #include "tiledb/sm/array_schema/tile_domain.h" using namespace tiledb::sm; TEST_CASE("TileDomain: Test 1D", "[TileDomain][1d]") { int32_t tile_extent_v = 10; + auto memory_tracker = tiledb::test::create_test_memory_tracker(); std::vector tile_extents(1); tile_extents[0].assign_as(tile_extent_v); Layout layout = Layout::ROW_MAJOR; @@ -72,6 +74,7 @@ TEST_CASE( std::vector domain_vec = {1, 10, 1, 10}; std::vector domain_slice = {1, 10, 1, 10}; std::vector tile_extents_vec = {2, 5}; + auto memory_tracker = tiledb::test::create_test_memory_tracker(); std::vector tile_extents(2); tile_extents[0].assign_as(tile_extents_vec[0]); tile_extents[1].assign_as(tile_extents_vec[1]); @@ -115,6 +118,7 @@ TEST_CASE( std::vector domain_vec = {1, 10, 1, 10}; std::vector domain_slice = {4, 10, 2, 8}; std::vector tile_extents_vec = {2, 5}; + auto memory_tracker = tiledb::test::create_test_memory_tracker(); std::vector tile_extents(2); tile_extents[0].assign_as(tile_extents_vec[0]); tile_extents[1].assign_as(tile_extents_vec[1]); @@ -153,6 +157,7 @@ TEST_CASE( std::vector domain_vec = {1, 10, 1, 10}; std::vector domain_slice = {1, 10, 1, 10}; std::vector tile_extents_vec = {2, 5}; + auto memory_tracker = tiledb::test::create_test_memory_tracker(); std::vector tile_extents(2); tile_extents[0].assign_as(tile_extents_vec[0]); tile_extents[1].assign_as(tile_extents_vec[1]); @@ -191,6 +196,7 @@ TEST_CASE( std::vector domain_vec = {1, 10, 1, 10}; std::vector domain_slice = {4, 10, 2, 8}; std::vector tile_extents_vec = {2, 5}; + auto memory_tracker = tiledb::test::create_test_memory_tracker(); std::vector tile_extents(2); tile_extents[0].assign_as(tile_extents_vec[0]); tile_extents[1].assign_as(tile_extents_vec[1]); @@ -228,6 +234,7 @@ TEST_CASE( std::vector domain_vec = {1, 10, 11, 20}; std::vector domain_slice = {4, 10, 12, 18}; std::vector tile_extents_vec = {2, 5}; + auto memory_tracker = tiledb::test::create_test_memory_tracker(); std::vector tile_extents(2); tile_extents[0].assign_as(tile_extents_vec[0]); tile_extents[1].assign_as(tile_extents_vec[1]); @@ -260,6 +267,7 @@ TEST_CASE( std::vector domain_vec = {1, 10, 11, 20}; std::vector domain_slice = {2, 10, 12, 18}; std::vector tile_extents_vec = {2, 5}; + auto memory_tracker = tiledb::test::create_test_memory_tracker(); std::vector tile_extents(2); tile_extents[0].assign_as(tile_extents_vec[0]); tile_extents[1].assign_as(tile_extents_vec[1]); @@ -297,6 +305,7 @@ TEST_CASE( std::vector domain_vec = {1, 10, 11, 20}; std::vector domain_slice = {2, 10, 12, 18}; std::vector tile_extents_vec = {2, 5}; + auto memory_tracker = tiledb::test::create_test_memory_tracker(); std::vector tile_extents(2); tile_extents[0].assign_as(tile_extents_vec[0]); tile_extents[1].assign_as(tile_extents_vec[1]); @@ -323,6 +332,7 @@ TEST_CASE("TileDomain: Test 2D, covers", "[TileDomain][2d][covers]") { std::vector domain_slice_1 = {2, 6, 2, 8}; std::vector domain_slice_2 = {3, 6, 1, 7}; std::vector tile_extents_vec = {2, 5}; + auto memory_tracker = tiledb::test::create_test_memory_tracker(); std::vector tile_extents(2); tile_extents[0].assign_as(tile_extents_vec[0]); tile_extents[1].assign_as(tile_extents_vec[1]); diff --git a/tiledb/sm/array_schema/tile_domain.h b/tiledb/sm/array_schema/tile_domain.h index a89e498e9af3..1cc96404a231 100644 --- a/tiledb/sm/array_schema/tile_domain.h +++ b/tiledb/sm/array_schema/tile_domain.h @@ -36,6 +36,7 @@ #include #include +#include "tiledb/common/pmr.h" #include "tiledb/sm/array_schema/dimension.h" #include "tiledb/sm/enums/layout.h" #include "tiledb/sm/misc/types.h" @@ -81,13 +82,13 @@ class TileDomain { unsigned id, const NDRange& domain, const NDRange& domain_slice, - const std::vector tile_extents, + const std::vector& tile_extents, Layout layout) : id_(id) , dim_num_((unsigned)domain.size()) , domain_(domain) , domain_slice_(domain_slice) - , tile_extents_(tile_extents) + , tile_extents_(tile_extents.begin(), tile_extents.end()) , layout_(layout) { assert(layout == Layout::ROW_MAJOR || layout == Layout::COL_MAJOR); compute_tile_domain(domain, domain_slice, tile_extents); diff --git a/tiledb/sm/c_api/tiledb.cc b/tiledb/sm/c_api/tiledb.cc index 7bafd132752c..c8adf5ebd270 100644 --- a/tiledb/sm/c_api/tiledb.cc +++ b/tiledb/sm/c_api/tiledb.cc @@ -5,7 +5,7 @@ * * The MIT License * - * @copyright Copyright (c) 2017-2023 TileDB, Inc. + * @copyright Copyright (c) 2017-2024 TileDB, Inc. * @copyright Copyright (c) 2016 MIT and Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a copy @@ -52,6 +52,7 @@ #include "tiledb/common/dynamic_memory/dynamic_memory.h" #include "tiledb/common/heap_profiler.h" #include "tiledb/common/logger.h" +#include "tiledb/common/memory_tracker.h" #include "tiledb/sm/array/array.h" #include "tiledb/sm/array_schema/array_schema.h" #include "tiledb/sm/array_schema/dimension_label.h" @@ -257,8 +258,10 @@ int32_t tiledb_array_schema_alloc( } // Create a new ArraySchema object + auto memory_tracker = ctx->context().resources().create_memory_tracker(); + memory_tracker->set_type(sm::MemoryTrackerType::ARRAY_CREATE); (*array_schema)->array_schema_ = make_shared( - HERE(), static_cast(array_type)); + HERE(), static_cast(array_type), memory_tracker); if ((*array_schema)->array_schema_ == nullptr) { auto st = Status_Error("Failed to allocate TileDB array schema object"); LOG_STATUS_NO_RETURN_VALUE(st); @@ -512,8 +515,11 @@ int32_t tiledb_array_schema_load( return TILEDB_ERR; } + auto tracker = storage_manager->resources().ephemeral_memory_tracker(); + // Load latest array schema - auto&& array_schema_latest = array_dir->load_array_schema_latest(key); + auto&& array_schema_latest = + array_dir->load_array_schema_latest(key, tracker); (*array_schema)->array_schema_ = array_schema_latest; } return TILEDB_OK; @@ -603,8 +609,11 @@ int32_t tiledb_array_schema_load_with_key( return TILEDB_ERR; } + auto tracker = storage_manager->resources().ephemeral_memory_tracker(); + // Load latest array schema - auto&& array_schema_latest = array_dir->load_array_schema_latest(key); + auto&& array_schema_latest = + array_dir->load_array_schema_latest(key, tracker); (*array_schema)->array_schema_ = array_schema_latest; } return TILEDB_OK; @@ -820,8 +829,10 @@ int32_t tiledb_array_schema_evolution_alloc( } // Create a new SchemaEvolution object + auto memory_tracker = ctx->context().resources().create_memory_tracker(); + memory_tracker->set_type(sm::MemoryTrackerType::SCHEMA_EVOLUTION); (*array_schema_evolution)->array_schema_evolution_ = - new (std::nothrow) tiledb::sm::ArraySchemaEvolution(); + new (std::nothrow) tiledb::sm::ArraySchemaEvolution(memory_tracker); if ((*array_schema_evolution)->array_schema_evolution_ == nullptr) { delete *array_schema_evolution; *array_schema_evolution = nullptr; @@ -3438,13 +3449,16 @@ int32_t tiledb_deserialize_array( return TILEDB_OOM; } + auto memory_tracker = ctx->context().resources().create_memory_tracker(); + memory_tracker->set_type(sm::MemoryTrackerType::ARRAY_LOAD); if (SAVE_ERROR_CATCH( ctx, tiledb::sm::serialization::array_deserialize( (*array)->array_.get(), (tiledb::sm::SerializationType)serialize_type, buffer->buffer(), - ctx->storage_manager()))) { + ctx->storage_manager(), + memory_tracker))) { delete *array; *array = nullptr; return TILEDB_ERR; @@ -3500,10 +3514,13 @@ int32_t tiledb_deserialize_array_schema( } try { - (*array_schema)->array_schema_ = make_shared( - HERE(), + auto memory_tracker = ctx->context().resources().create_memory_tracker(); + memory_tracker->set_type(sm::MemoryTrackerType::ARRAY_LOAD); + (*array_schema)->array_schema_ = tiledb::sm::serialization::array_schema_deserialize( - (tiledb::sm::SerializationType)serialize_type, buffer->buffer())); + (tiledb::sm::SerializationType)serialize_type, + buffer->buffer(), + memory_tracker); } catch (...) { delete *array_schema; *array_schema = nullptr; @@ -3650,12 +3667,15 @@ int32_t tiledb_deserialize_array_schema_evolution( return TILEDB_OOM; } + auto memory_tracker = ctx->context().resources().create_memory_tracker(); + memory_tracker->set_type(sm::MemoryTrackerType::SCHEMA_EVOLUTION); if (SAVE_ERROR_CATCH( ctx, tiledb::sm::serialization::array_schema_evolution_deserialize( &((*array_schema_evolution)->array_schema_evolution_), (tiledb::sm::SerializationType)serialize_type, - buffer->buffer()))) { + buffer->buffer(), + memory_tracker))) { delete *array_schema_evolution; *array_schema_evolution = nullptr; return TILEDB_ERR; @@ -3766,11 +3786,14 @@ int32_t tiledb_deserialize_query_and_array( } // First deserialize the array included in the query + auto memory_tracker = ctx->resources().create_memory_tracker(); + memory_tracker->set_type(tiledb::sm::MemoryTrackerType::ARRAY_LOAD); throw_if_not_ok(tiledb::sm::serialization::array_from_query_deserialize( buffer->buffer(), (tiledb::sm::SerializationType)serialize_type, *(*array)->array_, - ctx->storage_manager())); + ctx->storage_manager(), + memory_tracker)); // Create query struct *query = new (std::nothrow) tiledb_query_t; @@ -4007,9 +4030,13 @@ int32_t tiledb_serialize_array_metadata( auto buf = tiledb_buffer_handle_t::make_handle(); // Get metadata to serialize, this will load it if it does not exist - tiledb::sm::Metadata* metadata; - if (SAVE_ERROR_CATCH(ctx, array->array_->metadata(&metadata))) { - tiledb_buffer_handle_t::break_handle(buf); + sm::Metadata* metadata = nullptr; + try { + metadata = &array->array_->metadata(); + } catch (StatusException& e) { + auto st = Status_Error(e.what()); + LOG_STATUS_NO_RETURN_VALUE(st); + save_error(ctx, st); return TILEDB_ERR; } @@ -4271,13 +4298,16 @@ int32_t tiledb_deserialize_fragment_info( return TILEDB_ERR; } + auto memory_tracker = ctx->context().resources().create_memory_tracker(); + memory_tracker->set_type(sm::MemoryTrackerType::FRAGMENT_INFO_LOAD); if (SAVE_ERROR_CATCH( ctx, tiledb::sm::serialization::fragment_info_deserialize( fragment_info->fragment_info_, (tiledb::sm::SerializationType)serialize_type, uri, - buffer->buffer()))) { + buffer->buffer(), + memory_tracker))) { return TILEDB_ERR; } diff --git a/tiledb/sm/c_api/tiledb.h b/tiledb/sm/c_api/tiledb.h index 34b9cc66a1d9..39e5048a41bb 100644 --- a/tiledb/sm/c_api/tiledb.h +++ b/tiledb/sm/c_api/tiledb.h @@ -95,10 +95,22 @@ typedef enum { /** Query condition operator. */ typedef enum { -/** Helper macro for defining query condition operator enums. */ -#define TILEDB_QUERY_CONDITION_OP_ENUM(id) TILEDB_##id -#include "tiledb_enum.h" -#undef TILEDB_QUERY_CONDITION_OP_ENUM + /** Less-than operator */ + TILEDB_LT = 0, + /** Less-than-or-equal operator */ + TILEDB_LE = 1, + /** Greater-than operator */ + TILEDB_GT = 2, + /** Greater-than-or-equal operator */ + TILEDB_GE = 3, + /** Equal operator */ + TILEDB_EQ = 4, + /** Not-equal operator */ + TILEDB_NE = 5, + /** IN set membership operator. */ + TILEDB_IN = 6, + /** NOT IN set membership operator. */ + TILEDB_NOT_IN = 7, } tiledb_query_condition_op_t; /** Query condition combination operator. */ diff --git a/tiledb/sm/c_api/tiledb_enum.h b/tiledb/sm/c_api/tiledb_enum.h index a733847c8598..9fb10866c03e 100644 --- a/tiledb/sm/c_api/tiledb_enum.h +++ b/tiledb/sm/c_api/tiledb_enum.h @@ -82,6 +82,9 @@ TILEDB_QUERY_STATUS_DETAILS_ENUM(REASON_MEMORY_BUDGET) = 2, #endif +// This enumeration is special in that if you add enumeration entries here +// you have to manually add the new values in tiledb.h. This is to avoid +// exposing `TILEDB_ALWAYS_TRUE` and `TILEDB_ALWAYS_FALSE` in the public API. #ifdef TILEDB_QUERY_CONDITION_OP_ENUM /** Less-than operator */ TILEDB_QUERY_CONDITION_OP_ENUM(LT) = 0, @@ -99,6 +102,10 @@ TILEDB_QUERY_CONDITION_OP_ENUM(IN) = 6, /** NOT IN set membership operator. */ TILEDB_QUERY_CONDITION_OP_ENUM(NOT_IN) = 7, + /** ALWAYS TRUE operator. */ + TILEDB_QUERY_CONDITION_OP_ENUM(ALWAYS_TRUE) = 253, + /** ALWAYS TRUE operator. */ + TILEDB_QUERY_CONDITION_OP_ENUM(ALWAYS_FALSE) = 254, #endif #ifdef TILEDB_QUERY_CONDITION_COMBINATION_OP_ENUM diff --git a/tiledb/sm/c_api/tiledb_filestore.cc b/tiledb/sm/c_api/tiledb_filestore.cc index 9c0a38342a51..f8c933659e01 100644 --- a/tiledb/sm/c_api/tiledb_filestore.cc +++ b/tiledb/sm/c_api/tiledb_filestore.cc @@ -36,6 +36,7 @@ #include "tiledb/api/c_api/dimension/dimension_api_internal.h" #include "tiledb/api/c_api_support/c_api_support.h" #include "tiledb/common/common.h" +#include "tiledb/common/memory_tracker.h" #include "tiledb/sm/array/array.h" #include "tiledb/sm/array_schema/array_schema.h" #include "tiledb/sm/array_schema/attribute.h" @@ -116,8 +117,10 @@ int32_t tiledb_filestore_schema_create( // All other calls for adding domains, attributes, etc // create copies of the underlying core objects from within // the cpp objects constructed here + auto memory_tracker = context.resources().create_memory_tracker(); + memory_tracker->set_type(sm::MemoryTrackerType::ARRAY_CREATE); (*array_schema)->array_schema_ = make_shared( - HERE(), tiledb::sm::ArrayType::DENSE); + HERE(), tiledb::sm::ArrayType::DENSE, memory_tracker); auto& schema = (*array_schema)->array_schema_; // Define the range of the dimension. @@ -136,9 +139,10 @@ int32_t tiledb_filestore_schema_create( 1, range_obj, tiledb::sm::FilterPipeline{}, - tiledb::sm::ByteVecValue(std::move(tile_extent_vec))); + tiledb::sm::ByteVecValue(std::move(tile_extent_vec)), + memory_tracker); - auto domain = make_shared(HERE()); + auto domain = make_shared(HERE(), memory_tracker); throw_if_not_ok(domain->add_dimension(dim)); auto attr = make_shared( diff --git a/tiledb/sm/c_api/tiledb_version.h b/tiledb/sm/c_api/tiledb_version.h index 91b07fb241d8..4b32ddb980b3 100644 --- a/tiledb/sm/c_api/tiledb_version.h +++ b/tiledb/sm/c_api/tiledb_version.h @@ -27,5 +27,5 @@ */ #define TILEDB_VERSION_MAJOR 2 -#define TILEDB_VERSION_MINOR 21 +#define TILEDB_VERSION_MINOR 22 #define TILEDB_VERSION_PATCH 0 diff --git a/tiledb/sm/compressors/util/tdb_gzip_embedded_data.cc b/tiledb/sm/compressors/util/tdb_gzip_embedded_data.cc index 6c15ad3d66a4..23ac215c97a8 100644 --- a/tiledb/sm/compressors/util/tdb_gzip_embedded_data.cc +++ b/tiledb/sm/compressors/util/tdb_gzip_embedded_data.cc @@ -75,14 +75,25 @@ int main(int argc, char* argv[]) { // output stream. if (argc > 1) { - outfile = fopen(argv[1], "w+b"); - if (!outfile) { + if (argc != 3) { + printf("Usage: tdb_gzip_embedded_data \n"); + printf( + "If neither nor are specified, they will be stdin " + "and stdout respectively.\n"); + } + infile = fopen(argv[1], "rb"); + if (!infile) { fprintf(stderr, "Unable to create file %s\n", argv[1]); exit(-2); } + outfile = fopen(argv[2], "wb"); + if (!outfile) { + fprintf(stderr, "Unable to create file %s\n", argv[2]); + exit(-2); + } } - auto closefile = [&]() { fclose(outfile); }; - tiledb::common::ScopedExecutor onexit1(closefile); + tiledb::common::ScopedExecutor onexit1([&]() { fclose(infile); }); + tiledb::common::ScopedExecutor onexit2([&]() { fclose(outfile); }); #ifdef _WIN32 // need to be sure in/out are in binay mode, windows default won't be!!! diff --git a/tiledb/sm/consolidation_plan/consolidation_plan.cc b/tiledb/sm/consolidation_plan/consolidation_plan.cc index 051306b5db90..63f439fca95a 100644 --- a/tiledb/sm/consolidation_plan/consolidation_plan.cc +++ b/tiledb/sm/consolidation_plan/consolidation_plan.cc @@ -33,6 +33,7 @@ #include "tiledb/sm/consolidation_plan/consolidation_plan.h" #include "tiledb/common/common.h" #include "tiledb/common/logger.h" +#include "tiledb/sm/rest/rest_client.h" using namespace tiledb::sm; using namespace tiledb::common; @@ -44,7 +45,20 @@ using namespace tiledb::common; ConsolidationPlan::ConsolidationPlan( shared_ptr array, uint64_t fragment_size) : desired_fragment_size_(fragment_size) { - generate(array); + if (array->is_remote()) { + auto rest_client = array->rest_client(); + if (!rest_client) { + throw std::runtime_error( + "Failed to create a consolidation plan; Remote array" + "with no REST client."); + } + // reach out to the REST client to populate class members + fragment_uris_per_node_ = rest_client->post_consolidation_plan_from_rest( + array->array_uri(), array->config(), fragment_size); + num_nodes_ = fragment_uris_per_node_.size(); + } else { + generate(array); + } } ConsolidationPlan::ConsolidationPlan( diff --git a/tiledb/sm/consolidator/array_meta_consolidator.cc b/tiledb/sm/consolidator/array_meta_consolidator.cc index 1677120d0835..cde5d10eb2c7 100644 --- a/tiledb/sm/consolidator/array_meta_consolidator.cc +++ b/tiledb/sm/consolidator/array_meta_consolidator.cc @@ -5,7 +5,7 @@ * * The MIT License * - * @copyright Copyright (c) 2017-2023 TileDB, Inc. + * @copyright Copyright (c) 2017-2024 TileDB, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -65,7 +65,6 @@ Status ArrayMetaConsolidator::consolidate( const void* encryption_key, uint32_t key_length) { auto timer_se = stats_->start_timer("consolidate_array_meta"); - check_array_uri(array_name); // Open array for reading @@ -86,30 +85,11 @@ Status ArrayMetaConsolidator::consolidate( QueryType::WRITE, encryption_type, encryption_key, key_length), throw_if_not_ok(array_for_reads.close())); - // Swap the in-memory metadata between the two arrays. - // After that, the array for writes will store the (consolidated by - // the way metadata loading works) metadata of the array for reads - Metadata* metadata_r; - auto st = array_for_reads.metadata(&metadata_r); - if (!st.ok()) { - throw_if_not_ok(array_for_reads.close()); - throw_if_not_ok(array_for_writes.close()); - return st; - } - Metadata* metadata_w; - st = array_for_writes.metadata(&metadata_w); - if (!st.ok()) { - throw_if_not_ok(array_for_reads.close()); - throw_if_not_ok(array_for_writes.close()); - return st; - } - metadata_r->swap(metadata_w); - - // Metadata uris to delete - const auto to_vacuum = metadata_w->loaded_metadata_uris(); - - // Get the new URI name for consolidated metadata - URI new_uri = metadata_w->get_uri(array_uri); + // Copy-assign the read metadata into the metadata of the array for writes + auto& metadata_r = array_for_reads.metadata(); + array_for_writes.opened_array()->metadata() = metadata_r; + URI new_uri = metadata_r.get_uri(array_uri); + const auto& to_vacuum = metadata_r.loaded_metadata_uris(); // Write vac files relative to the array URI. This was fixed for reads in // version 19 so only do this for arrays starting with version 19. @@ -119,20 +99,19 @@ Status ArrayMetaConsolidator::consolidate( base_uri_size = array_for_reads.array_uri().to_string().size(); } - // Close arrays - RETURN_NOT_OK_ELSE( - array_for_reads.close(), throw_if_not_ok(array_for_writes.close())); - throw_if_not_ok(array_for_writes.close()); - - // Write vacuum file + // Prepare vacuum file URI vac_uri = URI(new_uri.to_string() + constants::vacuum_file_suffix); - std::stringstream ss; for (const auto& uri : to_vacuum) { ss << uri.to_string().substr(base_uri_size) << "\n"; } - auto data = ss.str(); + + // Close arrays + throw_if_not_ok(array_for_reads.close()); + throw_if_not_ok(array_for_writes.close()); + + // Write vacuum file RETURN_NOT_OK( storage_manager_->vfs()->write(vac_uri, data.c_str(), data.size())); RETURN_NOT_OK(storage_manager_->vfs()->close_file(vac_uri)); diff --git a/tiledb/sm/consolidator/consolidator.cc b/tiledb/sm/consolidator/consolidator.cc index db2eba17e2c8..13186653aa09 100644 --- a/tiledb/sm/consolidator/consolidator.cc +++ b/tiledb/sm/consolidator/consolidator.cc @@ -32,6 +32,7 @@ #include "tiledb/sm/consolidator/consolidator.h" #include "tiledb/common/logger.h" +#include "tiledb/common/memory_tracker.h" #include "tiledb/common/stdx_string.h" #include "tiledb/sm/consolidator/array_meta_consolidator.h" #include "tiledb/sm/consolidator/commits_consolidator.h" @@ -105,8 +106,11 @@ ConsolidationMode Consolidator::mode_from_config( Consolidator::Consolidator(StorageManager* storage_manager) : storage_manager_(storage_manager) + , consolidator_memory_tracker_( + storage_manager_->resources().create_memory_tracker()) , stats_(storage_manager_->stats()->create_child("Consolidator")) , logger_(storage_manager_->logger()->clone("Consolidator", ++logger_id_)) { + consolidator_memory_tracker_->set_type(MemoryTrackerType::CONSOLIDATOR); } Consolidator::~Consolidator() = default; diff --git a/tiledb/sm/consolidator/consolidator.h b/tiledb/sm/consolidator/consolidator.h index b11ca6120b35..ddc1b8aea649 100644 --- a/tiledb/sm/consolidator/consolidator.h +++ b/tiledb/sm/consolidator/consolidator.h @@ -242,6 +242,9 @@ class Consolidator { /** The storage manager. */ StorageManager* storage_manager_; + /** The consolidator memory tracker. */ + shared_ptr consolidator_memory_tracker_; + /** The class stats. */ stats::Stats* stats_; diff --git a/tiledb/sm/consolidator/fragment_consolidator.cc b/tiledb/sm/consolidator/fragment_consolidator.cc index aa42ee99cfe5..6a37c8ee2856 100644 --- a/tiledb/sm/consolidator/fragment_consolidator.cc +++ b/tiledb/sm/consolidator/fragment_consolidator.cc @@ -841,7 +841,7 @@ void FragmentConsolidator::set_query_buffers( const auto& array_schema = query->array_schema(); auto dim_num = array_schema.dim_num(); auto dense = array_schema.dense(); - auto attributes = array_schema.attributes(); + auto& attributes = array_schema.attributes(); unsigned bid = 0; // Here the first buffer should always be the fixed buffer (either offsets diff --git a/tiledb/sm/consolidator/fragment_meta_consolidator.cc b/tiledb/sm/consolidator/fragment_meta_consolidator.cc index 8b22b37c40c5..4afa82423b23 100644 --- a/tiledb/sm/consolidator/fragment_meta_consolidator.cc +++ b/tiledb/sm/consolidator/fragment_meta_consolidator.cc @@ -117,14 +117,13 @@ Status FragmentMetaConsolidator::consolidate( } // Serialize all fragment metadata footers in parallel - std::vector> tiles(meta.size()); + std::vector> tiles(meta.size()); auto status = parallel_for( storage_manager_->compute_tp(), 0, tiles.size(), [&](size_t i) { SizeComputationSerializer size_computation_serializer; meta[i]->write_footer(size_computation_serializer); - tiles[i].reset(tdb_new( - WriterTile, - WriterTile::from_generic(size_computation_serializer.size()))); + tiles[i] = WriterTile::from_generic( + size_computation_serializer.size(), consolidator_memory_tracker_); Serializer serializer(tiles[i]->data(), tiles[i]->size()); meta[i]->write_footer(serializer); @@ -160,9 +159,10 @@ Status FragmentMetaConsolidator::consolidate( SizeComputationSerializer size_computation_serializer; serialize_data(size_computation_serializer, offset); - WriterTile tile{WriterTile::from_generic(size_computation_serializer.size())}; + auto tile{WriterTile::from_generic( + size_computation_serializer.size(), consolidator_memory_tracker_)}; - Serializer serializer(tile.data(), tile.size()); + Serializer serializer(tile->data(), tile->size()); serialize_data(serializer, offset); // Close array @@ -173,7 +173,7 @@ Status FragmentMetaConsolidator::consolidate( GenericTileIO tile_io(storage_manager_->resources(), uri); [[maybe_unused]] uint64_t nbytes = 0; - tile_io.write_generic(&tile, enc_key, &nbytes); + tile_io.write_generic(tile, enc_key, &nbytes); RETURN_NOT_OK(storage_manager_->vfs()->close_file(uri)); return Status::Ok(); diff --git a/tiledb/sm/consolidator/group_meta_consolidator.cc b/tiledb/sm/consolidator/group_meta_consolidator.cc index 589b067fe4c7..ba0a74d55882 100644 --- a/tiledb/sm/consolidator/group_meta_consolidator.cc +++ b/tiledb/sm/consolidator/group_meta_consolidator.cc @@ -5,7 +5,7 @@ * * The MIT License * - * @copyright Copyright (c) 2017-2023 TileDB, Inc. + * @copyright Copyright (c) 2017-2024 TileDB, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -64,7 +64,6 @@ GroupMetaConsolidator::GroupMetaConsolidator( Status GroupMetaConsolidator::consolidate( const char* group_name, EncryptionType, const void*, uint32_t) { auto timer_se = stats_->start_timer("consolidate_group_meta"); - check_array_uri(group_name); // Open group for reading @@ -81,44 +80,25 @@ Status GroupMetaConsolidator::consolidate( group_for_writes.open(QueryType::WRITE), throw_if_not_ok(group_for_reads.close())); - // Swap the in-memory metadata between the two groups. - // After that, the group for writes will store the (consolidated by - // the way metadata loading works) metadata of the group for reads - Metadata* metadata_r; - auto st = group_for_reads.metadata(&metadata_r); - if (!st.ok()) { - throw_if_not_ok(group_for_reads.close()); - throw_if_not_ok(group_for_writes.close()); - return st; - } - Metadata* metadata_w; - st = group_for_writes.metadata(&metadata_w); - if (!st.ok()) { - throw_if_not_ok(group_for_reads.close()); - throw_if_not_ok(group_for_writes.close()); - return st; - } - metadata_r->swap(metadata_w); - - // Metadata uris to delete - const auto to_vacuum = metadata_w->loaded_metadata_uris(); - - // Get the new URI name for consolidated metadata - URI new_uri = metadata_w->get_uri(group_uri); - - // Close groups - RETURN_NOT_OK_ELSE( - group_for_reads.close(), throw_if_not_ok(group_for_writes.close())); - RETURN_NOT_OK(group_for_writes.close()); + // Copy-assign the read metadata into the metadata of the group for writes + auto metadata_r = group_for_reads.metadata(); + *(group_for_writes.metadata()) = *metadata_r; + URI new_uri = metadata_r->get_uri(group_uri); + const auto& to_vacuum = metadata_r->loaded_metadata_uris(); - // Write vacuum file + // Prepare vacuum file URI vac_uri = URI(new_uri.to_string() + constants::vacuum_file_suffix); - std::stringstream ss; - for (const auto& uri : to_vacuum) + for (const auto& uri : to_vacuum) { ss << uri.to_string() << "\n"; - + } auto data = ss.str(); + + // Close groups + throw_if_not_ok(group_for_reads.close()); + throw_if_not_ok(group_for_writes.close()); + + // Write vacuum file RETURN_NOT_OK( storage_manager_->vfs()->write(vac_uri, data.c_str(), data.size())); RETURN_NOT_OK(storage_manager_->vfs()->close_file(vac_uri)); diff --git a/tiledb/sm/consolidator/test/CMakeLists.txt b/tiledb/sm/consolidator/test/CMakeLists.txt index 3d97a4d2095f..fdc64b244be3 100644 --- a/tiledb/sm/consolidator/test/CMakeLists.txt +++ b/tiledb/sm/consolidator/test/CMakeLists.txt @@ -29,5 +29,5 @@ commence(unit_test consolidator) this_target_sources(main.cc unit_fragment_consolidator.cc) # Not actually testing a unit yet, but some things that ought to be units - this_target_link_libraries(TILEDB_CORE_OBJECTS TILEDB_CORE_OBJECTS_ILIB) + this_target_link_libraries(tiledb_test_support_lib) conclude(unit_test) diff --git a/tiledb/sm/consolidator/test/unit_fragment_consolidator.cc b/tiledb/sm/consolidator/test/unit_fragment_consolidator.cc index 3e2f9d9e2fb1..b5e728fff267 100644 --- a/tiledb/sm/consolidator/test/unit_fragment_consolidator.cc +++ b/tiledb/sm/consolidator/test/unit_fragment_consolidator.cc @@ -32,6 +32,7 @@ #include #include "../fragment_consolidator.h" +#include "test/support/src/mem_helpers.h" #include "tiledb/common/common.h" #include "tiledb/sm/array_schema/dimension.h" #include "tiledb/sm/enums/array_type.h" @@ -47,13 +48,19 @@ shared_ptr make_schema( const std::vector attr_nullable) { // Initialize the array schema. shared_ptr array_schema = make_shared( - HERE(), sparse ? ArrayType::SPARSE : ArrayType::DENSE); + HERE(), + sparse ? ArrayType::SPARSE : ArrayType::DENSE, + tiledb::test::create_test_memory_tracker()); // Create the domain/dimensions. - Domain domain; + auto memory_tracker = tiledb::test::create_test_memory_tracker(); + auto domain{make_shared(HERE(), memory_tracker)}; for (uint64_t d = 0; d < dim_types.size(); d++) { auto dim{make_shared( - HERE(), "d" + std::to_string(d + 1), dim_types[d])}; + HERE(), + "d" + std::to_string(d + 1), + dim_types[d], + tiledb::test::get_test_memory_tracker())}; switch (dim_types[d]) { case Datatype::INT8: { @@ -129,9 +136,9 @@ shared_ptr make_schema( } } - REQUIRE(domain.add_dimension(dim).ok()); + REQUIRE(domain->add_dimension(dim).ok()); } - REQUIRE(array_schema->set_domain(make_shared(HERE(), domain)).ok()); + REQUIRE(array_schema->set_domain(domain).ok()); // Create the attributes. for (uint64_t a = 0; a < attr_types.size(); a++) { diff --git a/tiledb/sm/enums/datatype.h b/tiledb/sm/enums/datatype.h index 8c21280b267b..7b84f00f18e9 100644 --- a/tiledb/sm/enums/datatype.h +++ b/tiledb/sm/enums/datatype.h @@ -235,100 +235,99 @@ inline const std::string& datatype_str(Datatype type) { } /** Returns the datatype given a string representation. */ -inline Status datatype_enum( - const std::string& datatype_str, Datatype* datatype) { +inline Datatype datatype_enum(const std::string& datatype_str) { if (datatype_str == constants::int32_str) - *datatype = Datatype::INT32; + return Datatype::INT32; else if (datatype_str == constants::int64_str) - *datatype = Datatype::INT64; + return Datatype::INT64; else if (datatype_str == constants::float32_str) - *datatype = Datatype::FLOAT32; + return Datatype::FLOAT32; else if (datatype_str == constants::float64_str) - *datatype = Datatype::FLOAT64; + return Datatype::FLOAT64; else if (datatype_str == constants::char_str) - *datatype = Datatype::CHAR; + return Datatype::CHAR; else if (datatype_str == constants::blob_str) - *datatype = Datatype::BLOB; + return Datatype::BLOB; else if (datatype_str == constants::geom_wkb_str) - *datatype = Datatype::GEOM_WKB; + return Datatype::GEOM_WKB; else if (datatype_str == constants::geom_wkt_str) - *datatype = Datatype::GEOM_WKT; + return Datatype::GEOM_WKT; else if (datatype_str == constants::bool_str) - *datatype = Datatype::BOOL; + return Datatype::BOOL; else if (datatype_str == constants::int8_str) - *datatype = Datatype::INT8; + return Datatype::INT8; else if (datatype_str == constants::uint8_str) - *datatype = Datatype::UINT8; + return Datatype::UINT8; else if (datatype_str == constants::int16_str) - *datatype = Datatype::INT16; + return Datatype::INT16; else if (datatype_str == constants::uint16_str) - *datatype = Datatype::UINT16; + return Datatype::UINT16; else if (datatype_str == constants::uint32_str) - *datatype = Datatype::UINT32; + return Datatype::UINT32; else if (datatype_str == constants::uint64_str) - *datatype = Datatype::UINT64; + return Datatype::UINT64; else if (datatype_str == constants::string_ascii_str) - *datatype = Datatype::STRING_ASCII; + return Datatype::STRING_ASCII; else if (datatype_str == constants::string_utf8_str) - *datatype = Datatype::STRING_UTF8; + return Datatype::STRING_UTF8; else if (datatype_str == constants::string_utf16_str) - *datatype = Datatype::STRING_UTF16; + return Datatype::STRING_UTF16; else if (datatype_str == constants::string_utf32_str) - *datatype = Datatype::STRING_UTF32; + return Datatype::STRING_UTF32; else if (datatype_str == constants::string_ucs2_str) - *datatype = Datatype::STRING_UCS2; + return Datatype::STRING_UCS2; else if (datatype_str == constants::string_ucs4_str) - *datatype = Datatype::STRING_UCS4; + return Datatype::STRING_UCS4; else if (datatype_str == constants::any_str) - *datatype = Datatype::ANY; + return Datatype::ANY; else if (datatype_str == constants::datetime_year_str) - *datatype = Datatype::DATETIME_YEAR; + return Datatype::DATETIME_YEAR; else if (datatype_str == constants::datetime_month_str) - *datatype = Datatype::DATETIME_MONTH; + return Datatype::DATETIME_MONTH; else if (datatype_str == constants::datetime_week_str) - *datatype = Datatype::DATETIME_WEEK; + return Datatype::DATETIME_WEEK; else if (datatype_str == constants::datetime_day_str) - *datatype = Datatype::DATETIME_DAY; + return Datatype::DATETIME_DAY; else if (datatype_str == constants::datetime_hr_str) - *datatype = Datatype::DATETIME_HR; + return Datatype::DATETIME_HR; else if (datatype_str == constants::datetime_min_str) - *datatype = Datatype::DATETIME_MIN; + return Datatype::DATETIME_MIN; else if (datatype_str == constants::datetime_sec_str) - *datatype = Datatype::DATETIME_SEC; + return Datatype::DATETIME_SEC; else if (datatype_str == constants::datetime_ms_str) - *datatype = Datatype::DATETIME_MS; + return Datatype::DATETIME_MS; else if (datatype_str == constants::datetime_us_str) - *datatype = Datatype::DATETIME_US; + return Datatype::DATETIME_US; else if (datatype_str == constants::datetime_ns_str) - *datatype = Datatype::DATETIME_NS; + return Datatype::DATETIME_NS; else if (datatype_str == constants::datetime_ps_str) - *datatype = Datatype::DATETIME_PS; + return Datatype::DATETIME_PS; else if (datatype_str == constants::datetime_fs_str) - *datatype = Datatype::DATETIME_FS; + return Datatype::DATETIME_FS; else if (datatype_str == constants::datetime_as_str) - *datatype = Datatype::DATETIME_AS; + return Datatype::DATETIME_AS; else if (datatype_str == constants::time_hr_str) - *datatype = Datatype::TIME_HR; + return Datatype::TIME_HR; else if (datatype_str == constants::time_min_str) - *datatype = Datatype::TIME_MIN; + return Datatype::TIME_MIN; else if (datatype_str == constants::time_sec_str) - *datatype = Datatype::TIME_SEC; + return Datatype::TIME_SEC; else if (datatype_str == constants::time_ms_str) - *datatype = Datatype::TIME_MS; + return Datatype::TIME_MS; else if (datatype_str == constants::time_us_str) - *datatype = Datatype::TIME_US; + return Datatype::TIME_US; else if (datatype_str == constants::time_ns_str) - *datatype = Datatype::TIME_NS; + return Datatype::TIME_NS; else if (datatype_str == constants::time_ps_str) - *datatype = Datatype::TIME_PS; + return Datatype::TIME_PS; else if (datatype_str == constants::time_fs_str) - *datatype = Datatype::TIME_FS; + return Datatype::TIME_FS; else if (datatype_str == constants::time_as_str) - *datatype = Datatype::TIME_AS; + return Datatype::TIME_AS; else { - return Status_Error("Invalid Datatype " + datatype_str); + throw std::runtime_error( + "Invalid Datatype string (\"" + datatype_str + "\")"); } - return Status::Ok(); } /** Returns true if the input datatype is a string type. */ @@ -440,12 +439,7 @@ inline void ensure_datatype_is_valid(Datatype type) { * the datatype string's enum is not between 0 and 43. **/ inline void ensure_datatype_is_valid(const std::string& datatype_str) { - Datatype datatype_type; - Status st{datatype_enum(datatype_str, &datatype_type)}; - if (!st.ok()) { - throw std::runtime_error( - "Invalid Datatype string (\"" + datatype_str + "\")"); - } + Datatype datatype_type = datatype_enum(datatype_str); ensure_datatype_is_valid(datatype_type); } diff --git a/tiledb/sm/enums/query_condition_op.h b/tiledb/sm/enums/query_condition_op.h index 919b65e03367..edfd40826e06 100644 --- a/tiledb/sm/enums/query_condition_op.h +++ b/tiledb/sm/enums/query_condition_op.h @@ -79,6 +79,10 @@ inline const std::string& query_condition_op_str( return constants::query_condition_op_in_str; case QueryConditionOp::NOT_IN: return constants::query_condition_op_not_in_str; + case QueryConditionOp::ALWAYS_TRUE: + return constants::query_condition_op_always_true_str; + case QueryConditionOp::ALWAYS_FALSE: + return constants::query_condition_op_always_false_str; default: return constants::empty_str; } @@ -105,6 +109,13 @@ inline Status query_condition_op_enum( } else if ( query_condition_op_str == constants::query_condition_op_not_in_str) { *query_condition_op = QueryConditionOp::NOT_IN; + } else if ( + query_condition_op_str == constants::query_condition_op_always_true_str) { + *query_condition_op = QueryConditionOp::ALWAYS_TRUE; + } else if ( + query_condition_op_str == + constants::query_condition_op_always_false_str) { + *query_condition_op = QueryConditionOp::ALWAYS_FALSE; } else { return Status_Error("Invalid QueryConditionOp " + query_condition_op_str); } @@ -113,7 +124,7 @@ inline Status query_condition_op_enum( inline void ensure_qc_op_is_valid(QueryConditionOp query_condition_op) { auto qc_op_enum{::stdx::to_underlying(query_condition_op)}; - if (qc_op_enum > 7) { + if (qc_op_enum > 7 && qc_op_enum != 253 && qc_op_enum != 254) { throw std::runtime_error( "Invalid Query Condition Op " + std::to_string(qc_op_enum)); } @@ -156,6 +167,16 @@ inline QueryConditionOp negate_query_condition_op(const QueryConditionOp op) { case QueryConditionOp::NOT_IN: return QueryConditionOp::IN; + // ALWAYS_TRUE and ALWAYS_FALSE are the result of QueryCondition rewriting + // which means they should not be available for negation. This saves us + // from having to have invertible and non-invertible versions of these + // operations. + case QueryConditionOp::ALWAYS_TRUE: + throw std::logic_error("Invalid negation of rewritten query."); + + case QueryConditionOp::ALWAYS_FALSE: + throw std::logic_error("Invalid negation of rewritten query."); + default: throw std::runtime_error("negate_query_condition_op: Invalid op."); } diff --git a/tiledb/sm/filesystem/CMakeLists.txt b/tiledb/sm/filesystem/CMakeLists.txt index fd0e1f34a40e..121829379347 100644 --- a/tiledb/sm/filesystem/CMakeLists.txt +++ b/tiledb/sm/filesystem/CMakeLists.txt @@ -38,7 +38,6 @@ commence(object_library vfs) uri.cc vfs.cc vfs_file_handle.cc - ls_scanner.cc win.cc filesystem_base.cc ../curl/curl_init.cc diff --git a/tiledb/sm/filesystem/azure.cc b/tiledb/sm/filesystem/azure.cc index 4c074e11f94b..3b950a33c075 100644 --- a/tiledb/sm/filesystem/azure.cc +++ b/tiledb/sm/filesystem/azure.cc @@ -44,13 +44,12 @@ #include "tiledb/common/stdx_string.h" #include "tiledb/platform/cert_file.h" #include "tiledb/sm/filesystem/azure.h" -#include "tiledb/sm/filesystem/ssl_config.h" #include "tiledb/sm/misc/parallel_functions.h" #include "tiledb/sm/misc/tdb_math.h" #include "tiledb/sm/misc/utils.h" static std::shared_ptr<::Azure::Core::Http::HttpTransport> create_transport( - tiledb::sm::SSLConfig& ssl_cfg); + const tiledb::sm::SSLConfig& ssl_cfg); using namespace tiledb::common; using tiledb::common::filesystem::directory_entry; @@ -62,102 +61,99 @@ namespace sm { /* CONSTRUCTORS & DESTRUCTORS */ /* ********************************* */ -Azure::Azure() - : write_cache_max_size_(0) - , max_parallel_ops_(1) - , block_list_block_size_(0) - , use_block_list_upload_(false) { +Azure::Azure() { } Azure::~Azure() { } -/* ********************************* */ -/* API */ -/* ********************************* */ - -Status Azure::init(const Config& config, ThreadPool* const thread_pool) { - if (thread_pool == nullptr) { - return LOG_STATUS( - Status_AzureError("Can't initialize with null thread pool.")); - } - - thread_pool_ = thread_pool; - - bool found; - char* tmp = nullptr; - - std::string account_name = - config.get("vfs.azure.storage_account_name", &found); - assert(found); - if (account_name.empty() && - ((tmp = getenv("AZURE_STORAGE_ACCOUNT")) != nullptr)) { - account_name = std::string(tmp); - } - - std::string account_key = config.get("vfs.azure.storage_account_key", &found); - assert(found); - if (account_key.empty() && ((tmp = getenv("AZURE_STORAGE_KEY")) != nullptr)) { - account_key = std::string(tmp); - } - - std::string sas_token = - config.get("vfs.azure.storage_sas_token", Config::must_find); - if (sas_token.empty() && - ((tmp = getenv("AZURE_STORAGE_SAS_TOKEN")) != nullptr)) { - sas_token = std::string(tmp); +std::string get_config_with_env_fallback( + const Config& config, const std::string& key, const char* env_name) { + std::string result = config.get(key, Config::must_find); + if (result.empty()) { + char* env = getenv(env_name); + if (env) { + result = getenv(env_name); + } } + return result; +} - std::string blob_endpoint = config.get("vfs.azure.blob_endpoint", &found); - assert(found); - if (blob_endpoint.empty() && - ((tmp = getenv("AZURE_BLOB_ENDPOINT")) != nullptr)) { - blob_endpoint = std::string(tmp); - } - if (blob_endpoint.empty()) { +std::string get_blob_endpoint( + const Config& config, + const std::string& account_name, + bool& has_sas_token) { + std::string sas_token = get_config_with_env_fallback( + config, "vfs.azure.storage_sas_token", "AZURE_STORAGE_SAS_TOKEN"); + has_sas_token = !sas_token.empty(); + + std::string result = get_config_with_env_fallback( + config, "vfs.azure.blob_endpoint", "AZURE_BLOB_ENDPOINT"); + if (result.empty()) { if (!account_name.empty()) { - blob_endpoint = "https://" + account_name + ".blob.core.windows.net"; + result = "https://" + account_name + ".blob.core.windows.net"; } else { LOG_WARN( "Neither the 'vfs.azure.storage_account_name' nor the " "'vfs.azure.blob_endpoint' options are specified."); } - } else if (!(utils::parse::starts_with(blob_endpoint, "http://") || - utils::parse::starts_with(blob_endpoint, "https://"))) { + } else if (!(utils::parse::starts_with(result, "http://") || + utils::parse::starts_with(result, "https://"))) { LOG_WARN( "The 'vfs.azure.blob_endpoint' option should include the scheme (HTTP " "or HTTPS)."); } - if (!blob_endpoint.empty() && !sas_token.empty()) { + if (!result.empty() && !sas_token.empty()) { // The question mark is not strictly part of the SAS token // (https://learn.microsoft.com/en-us/azure/storage/common/storage-sas-overview#sas-token), // but in the Azure Portal the SAS token starts with one. If it does not, we // add the question mark ourselves. if (!utils::parse::starts_with(sas_token, "?")) { - blob_endpoint += '?'; + result += '?'; } - blob_endpoint += sas_token; + result += sas_token; } + return result; +} - RETURN_NOT_OK(config.get( - "vfs.azure.max_parallel_ops", &max_parallel_ops_, &found)); - assert(found); - RETURN_NOT_OK(config.get( - "vfs.azure.block_list_block_size", &block_list_block_size_, &found)); - assert(found); - RETURN_NOT_OK(config.get( - "vfs.azure.use_block_list_upload", &use_block_list_upload_, &found)); - assert(found); +AzureParameters::AzureParameters(const Config& config) + : max_parallel_ops_( + config.get("vfs.azure.max_parallel_ops", Config::must_find)) + , block_list_block_size_(config.get( + "vfs.azure.block_list_block_size", Config::must_find)) + , write_cache_max_size_(max_parallel_ops_ * block_list_block_size_) + , max_retries_( + config.get("vfs.azure.max_retries", Config::must_find)) + , retry_delay_(std::chrono::milliseconds( + config.get("vfs.azure.retry_delay_ms", Config::must_find))) + , max_retry_delay_(std::chrono::milliseconds(config.get( + "vfs.azure.max_retry_delay_ms", Config::must_find))) + , use_block_list_upload_(config.get( + "vfs.azure.use_block_list_upload", Config::must_find)) + , account_name_(get_config_with_env_fallback( + config, "vfs.azure.storage_account_name", "AZURE_STORAGE_ACCOUNT")) + , account_key_(get_config_with_env_fallback( + config, "vfs.azure.storage_account_key", "AZURE_STORAGE_KEY")) + , blob_endpoint_(get_blob_endpoint(config, account_name_, has_sas_token_)) + , ssl_cfg_(config) { +} - int max_retries = - config.get("vfs.azure.max_retries", Config::must_find); - retry_delay_ = std::chrono::milliseconds( - config.get("vfs.azure.retry_delay_ms", Config::must_find)); - std::chrono::milliseconds max_retry_delay{ - config.get("vfs.azure.retry_delay_ms", Config::must_find)}; +Status Azure::init(const Config& config, ThreadPool* const thread_pool) { + if (thread_pool == nullptr) { + return LOG_STATUS( + Status_AzureError("Can't initialize with null thread pool.")); + } + thread_pool_ = thread_pool; + azure_params_ = config; + return Status::Ok(); +} - write_cache_max_size_ = max_parallel_ops_ * block_list_block_size_; +/* ********************************* */ +/* API */ +/* ********************************* */ +const ::Azure::Storage::Blobs::BlobServiceClient& +Azure::AzureClientSingleton::get(const AzureParameters& params) { // Initialize logging from the Azure SDK. static std::once_flag azure_log_sentinel; std::call_once(azure_log_sentinel, []() { @@ -180,77 +176,82 @@ Status Azure::init(const Config& config, ThreadPool* const thread_pool) { }); }); - ::Azure::Storage::Blobs::BlobClientOptions options; - options.Retry.MaxRetries = max_retries; - options.Retry.RetryDelay = retry_delay_; - options.Retry.MaxRetryDelay = max_retry_delay; - - SSLConfig ssl_cfg = SSLConfig(config); - options.Transport.Transport = create_transport(ssl_cfg); - - // Construct the Azure SDK blob service client. - // We pass a shared key if it was specified. - if (!account_key.empty()) { - client_ = - tdb_unique_ptr<::Azure::Storage::Blobs::BlobServiceClient>(tdb_new( - ::Azure::Storage::Blobs::BlobServiceClient, - blob_endpoint, - make_shared<::Azure::Storage::StorageSharedKeyCredential>( - HERE(), account_name, account_key), - options)); - return Status::Ok(); - } - // Otherwise, if we did not specify an SAS token - // and we are connecting to an HTTPS endpoint, - // use ChainedTokenCredential to authenticate using Microsoft Entra ID. - else if ( - sas_token.empty() && - utils::parse::starts_with(blob_endpoint, "https://")) { - try { - ::Azure::Core::Credentials::TokenCredentialOptions cred_options; - cred_options.Retry = options.Retry; - cred_options.Transport = options.Transport; - auto credential = make_shared<::Azure::Identity::ChainedTokenCredential>( - HERE(), - std::vector< - std::shared_ptr<::Azure::Core::Credentials::TokenCredential>>{ - make_shared<::Azure::Identity::EnvironmentCredential>( - HERE(), cred_options), - make_shared<::Azure::Identity::AzureCliCredential>( - HERE(), cred_options), - make_shared<::Azure::Identity::ManagedIdentityCredential>( - HERE(), cred_options), - make_shared<::Azure::Identity::WorkloadIdentityCredential>( - HERE(), cred_options)}); - // If a token is not available we wouldn't know it until we make a request - // and it would be too late. Try getting a token, and if it fails fall - // back to anonymous authentication. - ::Azure::Core::Credentials::TokenRequestContext tokenContext; - // https://github.com/Azure/azure-sdk-for-cpp/blob/azure-storage-blobs_12.7.0/sdk/storage/azure-storage-blobs/src/blob_service_client.cpp#L84 - tokenContext.Scopes.emplace_back("https://storage.azure.com/.default"); - std::ignore = credential->GetToken(tokenContext, {}); + std::lock_guard lck(client_init_mtx_); + + if (!client_) { + ::Azure::Storage::Blobs::BlobClientOptions options; + options.Retry.MaxRetries = params.max_retries_; + options.Retry.RetryDelay = params.retry_delay_; + options.Retry.MaxRetryDelay = params.max_retry_delay_; + + options.Transport.Transport = create_transport(params.ssl_cfg_); + + // Construct the Azure SDK blob service client. + // We pass a shared key if it was specified. + if (!params.account_key_.empty()) { client_ = tdb_unique_ptr<::Azure::Storage::Blobs::BlobServiceClient>(tdb_new( ::Azure::Storage::Blobs::BlobServiceClient, - blob_endpoint, - credential, + params.blob_endpoint_, + make_shared<::Azure::Storage::StorageSharedKeyCredential>( + HERE(), params.account_name_, params.account_key_), + options)); + } + // Otherwise, if we did not specify an SAS token + // and we are connecting to an HTTPS endpoint, + // use ChainedTokenCredential to authenticate using Microsoft Entra ID. + else if ( + !params.has_sas_token_ && + utils::parse::starts_with(params.blob_endpoint_, "https://")) { + try { + ::Azure::Core::Credentials::TokenCredentialOptions cred_options; + cred_options.Retry = options.Retry; + cred_options.Transport = options.Transport; + auto credential = + make_shared<::Azure::Identity::ChainedTokenCredential>( + HERE(), + std::vector>{ + make_shared<::Azure::Identity::EnvironmentCredential>( + HERE(), cred_options), + make_shared<::Azure::Identity::AzureCliCredential>( + HERE(), cred_options), + make_shared<::Azure::Identity::ManagedIdentityCredential>( + HERE(), cred_options), + make_shared<::Azure::Identity::WorkloadIdentityCredential>( + HERE(), cred_options)}); + // If a token is not available we wouldn't know it until we make a + // request and it would be too late. Try getting a token, and if it + // fails fall back to anonymous authentication. + ::Azure::Core::Credentials::TokenRequestContext tokenContext; + // https://github.com/Azure/azure-sdk-for-cpp/blob/azure-storage-blobs_12.7.0/sdk/storage/azure-storage-blobs/src/blob_service_client.cpp#L84 + tokenContext.Scopes.emplace_back("https://storage.azure.com/.default"); + std::ignore = credential->GetToken(tokenContext, {}); + client_ = + tdb_unique_ptr<::Azure::Storage::Blobs::BlobServiceClient>(tdb_new( + ::Azure::Storage::Blobs::BlobServiceClient, + params.blob_endpoint_, + credential, + options)); + } catch (...) { + LOG_INFO( + "Failed to get Microsoft Entra ID token, falling back to anonymous " + "authentication"); + } + } else { + client_ = + tdb_unique_ptr<::Azure::Storage::Blobs::BlobServiceClient>(tdb_new( + ::Azure::Storage::Blobs::BlobServiceClient, + params.blob_endpoint_, options)); - return Status::Ok(); - } catch (...) { - LOG_INFO( - "Failed to get Microsoft Entra ID token, falling back to anonymous " - "authentication"); } - } - client_ = tdb_unique_ptr<::Azure::Storage::Blobs::BlobServiceClient>(tdb_new( - ::Azure::Storage::Blobs::BlobServiceClient, blob_endpoint, options)); - return Status::Ok(); + return *client_; + } } Status Azure::create_container(const URI& uri) const { - assert(client_); - + const auto& c = client(); if (!uri.is_azure()) { return LOG_STATUS(Status_AzureError( std::string("URI is not an Azure URI: " + uri.to_string()))); @@ -262,8 +263,7 @@ Status Azure::create_container(const URI& uri) const { bool created; std::string error_message = ""; try { - created = - client_->GetBlobContainerClient(container_name).Create().Value.Created; + created = c.GetBlobContainerClient(container_name).Create().Value.Created; } catch (const ::Azure::Storage::StorageException& e) { created = false; error_message = "; " + e.Message; @@ -278,15 +278,14 @@ Status Azure::create_container(const URI& uri) const { } Status Azure::empty_container(const URI& container) const { - assert(client_); - return remove_dir(container); } Status Azure::flush_blob(const URI& uri) { - assert(client_); + assert(azure_params_); + const auto& c = client(); - if (!use_block_list_upload_) { + if (!azure_params_->use_block_list_upload_) { return flush_blob_direct(uri); } @@ -348,7 +347,7 @@ Status Azure::flush_blob(const URI& uri) { finish_block_list_upload(uri); try { - client_->GetBlobContainerClient(container_name) + c.GetBlobContainerClient(container_name) .GetBlockBlobClient(blob_path) .CommitBlockList(std::vector(block_ids.begin(), block_ids.end())); } catch (const ::Azure::Storage::StorageException& e) { @@ -374,6 +373,7 @@ void Azure::finish_block_list_upload(const URI& uri) { } Status Azure::flush_blob_direct(const URI& uri) { + auto& c = client(); if (!uri.is_azure()) { return LOG_STATUS(Status_AzureError( std::string("URI is not an Azure URI: " + uri.to_string()))); @@ -390,7 +390,7 @@ Status Azure::flush_blob_direct(const URI& uri) { RETURN_NOT_OK(parse_azure_uri(uri, &container_name, &blob_path)); try { - client_->GetBlobContainerClient(container_name) + c.GetBlobContainerClient(container_name) .GetBlockBlobClient(blob_path) .UploadFrom( static_cast(write_cache_buffer->data()), @@ -410,7 +410,7 @@ Status Azure::flush_blob_direct(const URI& uri) { } Status Azure::is_empty_container(const URI& uri, bool* is_empty) const { - assert(client_); + const auto& c = client(); assert(is_empty); if (!uri.is_azure()) { @@ -424,7 +424,7 @@ Status Azure::is_empty_container(const URI& uri, bool* is_empty) const { ::Azure::Storage::Blobs::ListBlobsOptions options; options.PageSizeHint = 1; try { - *is_empty = client_->GetBlobContainerClient(container_name) + *is_empty = c.GetBlobContainerClient(container_name) .ListBlobs(options) .Blobs.empty(); } catch (const ::Azure::Storage::StorageException& e) { @@ -451,11 +451,11 @@ Status Azure::is_container(const URI& uri, bool* const is_container) const { Status Azure::is_container( const std::string& container_name, bool* const is_container) const { - assert(client_); + const auto& c = client(); assert(is_container); try { - client_->GetBlobContainerClient(container_name).GetProperties(); + c.GetBlobContainerClient(container_name).GetProperties(); } catch (const ::Azure::Storage::StorageException& e) { if (e.StatusCode == ::Azure::Core::Http::HttpStatusCode::NotFound) { *is_container = false; @@ -471,7 +471,6 @@ Status Azure::is_container( } Status Azure::is_dir(const URI& uri, bool* const exists) const { - assert(client_); assert(exists); std::vector paths; @@ -494,11 +493,11 @@ Status Azure::is_blob( const std::string& container_name, const std::string& blob_path, bool* const is_blob) const { - assert(client_); + const auto& c = client(); assert(is_blob); try { - client_->GetBlobContainerClient(container_name) + c.GetBlobContainerClient(container_name) .GetBlobClient(blob_path) .GetProperties(); } catch (const ::Azure::Storage::StorageException& e) { @@ -543,7 +542,6 @@ Status Azure::ls( std::vector* paths, const std::string& delimiter, const int max_paths) const { - assert(client_); assert(paths); auto&& [st, entries] = ls_with_sizes(uri, delimiter, max_paths); @@ -558,7 +556,7 @@ Status Azure::ls( tuple>> Azure::ls_with_sizes( const URI& uri, const std::string& delimiter, int max_paths) const { - assert(client_); + const auto& c = client(); const URI uri_dir = uri.add_trailing_slash(); @@ -573,7 +571,7 @@ tuple>> Azure::ls_with_sizes( RETURN_NOT_OK_TUPLE( parse_azure_uri(uri_dir, &container_name, &blob_path), nullopt); - auto container_client = client_->GetBlobContainerClient(container_name); + auto container_client = c.GetBlobContainerClient(container_name); std::vector entries; ::Azure::Storage::Blobs::ListBlobsOptions options; @@ -613,14 +611,14 @@ tuple>> Azure::ls_with_sizes( } Status Azure::move_object(const URI& old_uri, const URI& new_uri) { - assert(client_); RETURN_NOT_OK(copy_blob(old_uri, new_uri)); RETURN_NOT_OK(remove_blob(old_uri)); return Status::Ok(); } Status Azure::copy_blob(const URI& old_uri, const URI& new_uri) { - assert(client_); + assert(azure_params_); + auto& c = client(); if (!old_uri.is_azure()) { return LOG_STATUS(Status_AzureError( @@ -635,7 +633,7 @@ Status Azure::copy_blob(const URI& old_uri, const URI& new_uri) { std::string old_container_name; std::string old_blob_path; RETURN_NOT_OK(parse_azure_uri(old_uri, &old_container_name, &old_blob_path)); - std::string source_uri = client_->GetBlobContainerClient(old_container_name) + std::string source_uri = c.GetBlobContainerClient(old_container_name) .GetBlobClient(old_blob_path) .GetUrl(); @@ -644,10 +642,10 @@ Status Azure::copy_blob(const URI& old_uri, const URI& new_uri) { RETURN_NOT_OK(parse_azure_uri(new_uri, &new_container_name, &new_blob_path)); try { - client_->GetBlobContainerClient(new_container_name) + c.GetBlobContainerClient(new_container_name) .GetBlobClient(new_blob_path) .StartCopyFromUri(source_uri) - .PollUntilDone(retry_delay_); + .PollUntilDone(azure_params_->retry_delay_); } catch (const ::Azure::Storage::StorageException& e) { return LOG_STATUS(Status_AzureError( "Copy blob failed on: " + old_uri.to_string() + "; " + e.Message)); @@ -657,8 +655,6 @@ Status Azure::copy_blob(const URI& old_uri, const URI& new_uri) { } Status Azure::move_dir(const URI& old_uri, const URI& new_uri) { - assert(client_); - std::vector paths; RETURN_NOT_OK(ls(old_uri, &paths, "")); for (const auto& path : paths) { @@ -670,7 +666,7 @@ Status Azure::move_dir(const URI& old_uri, const URI& new_uri) { } Status Azure::blob_size(const URI& uri, uint64_t* const nbytes) const { - assert(client_); + auto& c = client(); assert(nbytes); if (!uri.is_azure()) { @@ -689,8 +685,7 @@ Status Azure::blob_size(const URI& uri, uint64_t* const nbytes) const { options.Prefix = blob_path; options.PageSizeHint = 1; - auto response = - client_->GetBlobContainerClient(container_name).ListBlobs(options); + auto response = c.GetBlobContainerClient(container_name).ListBlobs(options); if (response.Blobs.empty()) { error_message = "Blob does not exist."; @@ -716,7 +711,7 @@ Status Azure::read( const uint64_t length, const uint64_t read_ahead_length, uint64_t* const length_returned) const { - assert(client_); + const auto& c = client(); if (!uri.is_azure()) { return LOG_STATUS(Status_AzureError( @@ -736,7 +731,7 @@ Status Azure::read( ::Azure::Storage::Blobs::Models::DownloadBlobResult result; try { - result = client_->GetBlobContainerClient(container_name) + result = c.GetBlobContainerClient(container_name) .GetBlobClient(blob_path) .Download(options) .Value; @@ -757,7 +752,7 @@ Status Azure::read( } Status Azure::remove_container(const URI& uri) const { - assert(client_); + auto& c = client(); // Empty container RETURN_NOT_OK(empty_container(uri)); @@ -768,7 +763,7 @@ Status Azure::remove_container(const URI& uri) const { bool deleted; std::string error_message = ""; try { - deleted = client_->DeleteBlobContainer(container_name).Value.Deleted; + deleted = c.DeleteBlobContainer(container_name).Value.Deleted; } catch (const ::Azure::Storage::StorageException& e) { deleted = false; error_message = "; " + e.Message; @@ -783,7 +778,7 @@ Status Azure::remove_container(const URI& uri) const { } Status Azure::remove_blob(const URI& uri) const { - assert(client_); + auto& c = client(); std::string container_name; std::string blob_path; @@ -792,7 +787,7 @@ Status Azure::remove_blob(const URI& uri) const { bool deleted; std::string error_message = ""; try { - deleted = client_->GetBlobContainerClient(container_name) + deleted = c.GetBlobContainerClient(container_name) .DeleteBlob(blob_path) .Value.Deleted; } catch (const ::Azure::Storage::StorageException& e) { @@ -809,8 +804,6 @@ Status Azure::remove_blob(const URI& uri) const { } Status Azure::remove_dir(const URI& uri) const { - assert(client_); - std::vector paths; RETURN_NOT_OK(ls(uri, &paths, "")); auto status = parallel_for(thread_pool_, 0, paths.size(), [&](size_t i) { @@ -823,7 +816,7 @@ Status Azure::remove_dir(const URI& uri) const { } Status Azure::touch(const URI& uri) const { - assert(client_); + auto& c = client(); if (!uri.is_azure()) { return LOG_STATUS(Status_AzureError( @@ -846,7 +839,7 @@ Status Azure::touch(const URI& uri) const { RETURN_NOT_OK(parse_azure_uri(uri, &container_name, &blob_path)); try { - client_->GetBlobContainerClient(container_name) + c.GetBlobContainerClient(container_name) .GetBlockBlobClient(blob_path) .UploadFrom(nullptr, 0); } catch (const ::Azure::Storage::StorageException& e) { @@ -859,6 +852,8 @@ Status Azure::touch(const URI& uri) const { Status Azure::write( const URI& uri, const void* const buffer, const uint64_t length) { + assert(azure_params_); + auto write_cache_max_size = azure_params_->write_cache_max_size_; if (!uri.is_azure()) { return LOG_STATUS(Status_AzureError( std::string("URI is not an Azure URI: " + uri.to_string()))); @@ -870,7 +865,7 @@ Status Azure::write( RETURN_NOT_OK( fill_write_cache(write_cache_buffer, buffer, length, &nbytes_filled)); - if (!use_block_list_upload_) { + if (!azure_params_->use_block_list_upload_) { if (nbytes_filled != length) { std::stringstream errmsg; errmsg << "Direct write failed! " << nbytes_filled @@ -881,21 +876,21 @@ Status Azure::write( } } - if (write_cache_buffer->size() == write_cache_max_size_) { + if (write_cache_buffer->size() == write_cache_max_size) { RETURN_NOT_OK(flush_write_cache(uri, write_cache_buffer, false)); } uint64_t new_length = length - nbytes_filled; uint64_t offset = nbytes_filled; while (new_length > 0) { - if (new_length >= write_cache_max_size_) { + if (new_length >= write_cache_max_size) { RETURN_NOT_OK(write_blocks( uri, static_cast(buffer) + offset, - write_cache_max_size_, + write_cache_max_size, false)); - offset += write_cache_max_size_; - new_length -= write_cache_max_size_; + offset += write_cache_max_size; + new_length -= write_cache_max_size; } else { RETURN_NOT_OK(fill_write_cache( write_cache_buffer, @@ -926,12 +921,14 @@ Status Azure::fill_write_cache( const void* const buffer, const uint64_t length, uint64_t* const nbytes_filled) { + assert(azure_params_); assert(write_cache_buffer); assert(buffer); assert(nbytes_filled); - *nbytes_filled = - std::min(write_cache_max_size_ - write_cache_buffer->size(), length); + *nbytes_filled = std::min( + azure_params_->write_cache_max_size_ - write_cache_buffer->size(), + length); if (*nbytes_filled > 0) { RETURN_NOT_OK(write_cache_buffer->write(buffer, *nbytes_filled)); @@ -962,6 +959,8 @@ Status Azure::write_blocks( const void* const buffer, const uint64_t length, const bool last_block) { + assert(azure_params_); + auto block_list_block_size = azure_params_->block_list_block_size_; if (!uri.is_azure()) { return LOG_STATUS(Status_AzureError( std::string("URI is not an Azure URI: " + uri.to_string()))); @@ -973,11 +972,12 @@ Status Azure::write_blocks( // configured max number. Length must be evenly divisible by // block_list_block_size_ unless this is the last block. uint64_t num_ops = last_block ? - utils::math::ceil(length, block_list_block_size_) : - (length / block_list_block_size_); - num_ops = std::min(std::max(num_ops, uint64_t(1)), max_parallel_ops_); + utils::math::ceil(length, block_list_block_size) : + (length / block_list_block_size); + num_ops = std::min( + std::max(num_ops, uint64_t(1)), azure_params_->max_parallel_ops_); - if (!last_block && length % block_list_block_size_ != 0) { + if (!last_block && length % block_list_block_size != 0) { return LOG_STATUS( Status_AzureError("Length not evenly divisible by block size")); } @@ -1010,8 +1010,8 @@ Status Azure::write_blocks( } state = &state_iter->second; - // We're done reading and writing from 'block_list_upload_states_'. Mutating - // the 'state' element does not affect the thread-safety of + // We're done reading and writing from 'block_list_upload_states_'. + // Mutating the 'state' element does not affect the thread-safety of // 'block_list_upload_states_'. } @@ -1030,9 +1030,9 @@ Status Azure::write_blocks( std::vector tasks; tasks.reserve(num_ops); for (uint64_t i = 0; i < num_ops; i++) { - const uint64_t begin = i * block_list_block_size_; + const uint64_t begin = i * block_list_block_size; const uint64_t end = - std::min((i + 1) * block_list_block_size_ - 1, length - 1); + std::min((i + 1) * block_list_block_size - 1, length - 1); const char* const thread_buffer = reinterpret_cast(buffer) + begin; const uint64_t thread_buffer_len = end - begin + 1; @@ -1064,10 +1064,11 @@ Status Azure::upload_block( const void* const buffer, const uint64_t length, const std::string& block_id) { + const auto& c = client(); ::Azure::Core::IO::MemoryBodyStream stream( static_cast(buffer), static_cast(length)); try { - client_->GetBlobContainerClient(container_name) + c.GetBlobContainerClient(container_name) .GetBlockBlobClient(blob_path) .StageBlock(block_id, stream); } catch (const ::Azure::Storage::StorageException& e) { @@ -1165,7 +1166,7 @@ std::string Azure::BlockListUploadState::next_block_id() { #if defined(_WIN32) #include std::shared_ptr<::Azure::Core::Http::HttpTransport> create_transport( - tiledb::sm::SSLConfig& ssl_cfg) { + const tiledb::sm::SSLConfig& ssl_cfg) { ::Azure::Core::Http::WinHttpTransportOptions transport_opts; if (!ssl_cfg.ca_file().empty()) { @@ -1186,7 +1187,7 @@ std::shared_ptr<::Azure::Core::Http::HttpTransport> create_transport( #else #include std::shared_ptr<::Azure::Core::Http::HttpTransport> create_transport( - tiledb::sm::SSLConfig& ssl_cfg) { + const tiledb::sm::SSLConfig& ssl_cfg) { ::Azure::Core::Http::CurlTransportOptions transport_opts; if (!ssl_cfg.ca_file().empty()) { diff --git a/tiledb/sm/filesystem/azure.h b/tiledb/sm/filesystem/azure.h index 70792379434d..34ff1d2148d2 100644 --- a/tiledb/sm/filesystem/azure.h +++ b/tiledb/sm/filesystem/azure.h @@ -39,6 +39,7 @@ #include "tiledb/common/thread_pool.h" #include "tiledb/sm/buffer/buffer.h" #include "tiledb/sm/config/config.h" +#include "tiledb/sm/filesystem/ssl_config.h" #include "tiledb/sm/misc/constants.h" #include "uri.h" @@ -63,6 +64,51 @@ class directory_entry; namespace sm { +/** + * The Azure-specific configuration parameters. + */ +struct AzureParameters { + AzureParameters() = delete; + + AzureParameters(const Config& config); + + /** The maximum number of parallel requests. */ + uint64_t max_parallel_ops_; + + /** The target block size in a block list upload */ + uint64_t block_list_block_size_; + + /** The maximum size of each value-element in 'write_cache_map_'. */ + uint64_t write_cache_max_size_; + + /** The maximum number of retries. */ + int max_retries_; + + /** The minimum time to wait between retries. */ + std::chrono::milliseconds retry_delay_; + + /** The maximum time to wait between retries. */ + std::chrono::milliseconds max_retry_delay_; + + /** Whether or not to use block list upload. */ + bool use_block_list_upload_; + + /** The Blob Storage account name. */ + std::string account_name_; + + /** The Blob Storage account key. */ + std::string account_key_; + + /** Whether the config specifies a SAS token. */ + bool has_sas_token_; + + /** The Blob Storage endpoint to connect to. */ + std::string blob_endpoint_; + + /** SSL configuration. */ + SSLConfig ssl_cfg_; +}; + class Azure { public: /* ********************************* */ @@ -310,13 +356,14 @@ class Azure { Status write(const URI& uri, const void* buffer, uint64_t length); /** - * Returns a reference to the Azure blob service client. + * Initializes the Azure blob service client and returns a reference to it. * - * Used for testing. Calling code should include the Azure SDK headers to make + * Calling code should include the Azure SDK headers to make * use of the BlobServiceClient. */ const ::Azure::Storage::Blobs::BlobServiceClient& client() const { - return *client_; + assert(azure_params_); + return client_singleton_.get(*azure_params_); } private: @@ -364,6 +411,32 @@ class Azure { Status st_; }; + /** + * Encapsulates access to an Azure BlobServiceClient. + * + * This class ensures that: + * * Callers access the client in an initialized state. + * * The client gets initialized only once even for concurrent accesses. + */ + class AzureClientSingleton { + public: + /** + * Gets a reference to the Azure BlobServiceClient, and initializes it if it + * is not initialized. + * + * @param params The parameters to initialize the client with. + */ + const ::Azure::Storage::Blobs::BlobServiceClient& get( + const AzureParameters& params); + + private: + /** The Azure blob service client. */ + tdb_unique_ptr<::Azure::Storage::Blobs::BlobServiceClient> client_; + + /** Protects from creating the client many times. */ + std::mutex client_init_mtx_; + }; + /* ********************************* */ /* PRIVATE ATTRIBUTES */ /* ********************************* */ @@ -371,8 +444,8 @@ class Azure { /** The VFS thread pool. */ ThreadPool* thread_pool_; - /** The Azure blob service client. */ - tdb_unique_ptr<::Azure::Storage::Blobs::BlobServiceClient> client_; + /** A holder for the Azure blob service client. */ + mutable AzureClientSingleton client_singleton_; /** Maps a blob URI to a write cache buffer. */ std::unordered_map write_cache_map_; @@ -380,20 +453,11 @@ class Azure { /** Protects 'write_cache_map_'. */ std::mutex write_cache_map_lock_; - /** The maximum size of each value-element in 'write_cache_map_'. */ - uint64_t write_cache_max_size_; - - /** The maximum number of parallel requests. */ - uint64_t max_parallel_ops_; - - /** The target block size in a block list upload */ - uint64_t block_list_block_size_; - - /** The minimum time to wait between retries. */ - std::chrono::milliseconds retry_delay_; - - /** Whether or not to use block list upload. */ - bool use_block_list_upload_; + /** + * Contains options to configure connection to Azure. + * After the class becomes C.41 compliant, remove the std::optional. + */ + std::optional azure_params_; /** Maps a blob URI to its block list upload state. */ std::unordered_map @@ -467,7 +531,6 @@ class Azure { * @param length The length of `buffer`. * @param block_id A base64-encoded string that is unique to this block * within the blob. - * @param result The returned future to fetch the async upload result from. * @return Status */ Status upload_block( diff --git a/tiledb/sm/filesystem/gcs.cc b/tiledb/sm/filesystem/gcs.cc index 52a076acdc3a..b8b145f7c934 100644 --- a/tiledb/sm/filesystem/gcs.cc +++ b/tiledb/sm/filesystem/gcs.cc @@ -35,6 +35,17 @@ #include #include +#if defined(_MSC_VER) +#pragma warning(push) +// One abseil file has a warning that fails on Windows when compiling with +// warnings as errors. +#pragma warning(disable : 4127) // conditional expression is constant +#endif +#include +#if defined(_MSC_VER) +#pragma warning(pop) +#endif + #include "tiledb/common/common.h" #include "tiledb/common/filesystem/directory_entry.h" #include "tiledb/common/logger.h" @@ -171,15 +182,11 @@ Status GCS::init_client() const { if (!endpoint_.empty()) { client_options.set_endpoint(endpoint_); } - auto client = google::cloud::storage::Client( + client_ = tdb_unique_ptr(tdb_new( + google::cloud::storage::Client, client_options, google::cloud::storage::LimitedTimeRetryPolicy( - std::chrono::milliseconds(request_timeout_ms_))); - client_ = google::cloud::StatusOr(client); - if (!client_) { - return LOG_STATUS(Status_GCSError( - "Failed to initialize GCS Client; " + client_.status().message())); - } + std::chrono::milliseconds(request_timeout_ms_)))); } catch (const std::exception& e) { return LOG_STATUS( Status_GCSError("Failed to initialize GCS: " + std::string(e.what()))); @@ -452,7 +459,8 @@ tuple>> GCS::ls_with_sizes( } auto& results = object_metadata.value(); - const std::string gcs_prefix = uri_dir.is_gcs() ? "gcs://" : "gs://"; + const std::string gcs_prefix = + uri_dir.backend_name() == "gcs" ? "gcs://" : "gs://"; if (absl::holds_alternative( results)) { diff --git a/tiledb/sm/filesystem/gcs.h b/tiledb/sm/filesystem/gcs.h index 9d57abecb25a..30cdc4268a44 100644 --- a/tiledb/sm/filesystem/gcs.h +++ b/tiledb/sm/filesystem/gcs.h @@ -35,16 +35,7 @@ #ifdef HAVE_GCS -#if defined(_MSC_VER) -#pragma warning(push) -// One abseil file has a warning that fails on Windows when compiling with -// warnings as errors. -#pragma warning(disable : 4127) // conditional expression is constant -#endif -#include -#if defined(_MSC_VER) -#pragma warning(pop) -#endif +#include #include "tiledb/common/rwlock.h" #include "tiledb/common/status.h" @@ -58,6 +49,12 @@ using namespace tiledb::common; +namespace google::cloud::storage { +GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_BEGIN +class Client; +GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_END +} // namespace google::cloud::storage + namespace tiledb { namespace common::filesystem { @@ -430,7 +427,7 @@ class GCS { std::string project_id_; // The GCS REST client. - mutable google::cloud::StatusOr client_; + mutable tdb_unique_ptr client_; /** Maps a object URI to an write cache buffer. */ std::unordered_map write_cache_map_; diff --git a/tiledb/sm/filesystem/hdfs_filesystem.cc b/tiledb/sm/filesystem/hdfs_filesystem.cc index 75eee420fa08..a4ce1dcde676 100644 --- a/tiledb/sm/filesystem/hdfs_filesystem.cc +++ b/tiledb/sm/filesystem/hdfs_filesystem.cc @@ -49,6 +49,8 @@ #include "tiledb/sm/misc/utils.h" #include "uri.h" +#include "hadoop/hdfs.h" + #include #include #include diff --git a/tiledb/sm/filesystem/hdfs_filesystem.h b/tiledb/sm/filesystem/hdfs_filesystem.h index 44ca198e4acf..899fbd884afa 100644 --- a/tiledb/sm/filesystem/hdfs_filesystem.h +++ b/tiledb/sm/filesystem/hdfs_filesystem.h @@ -41,10 +41,13 @@ #include "tiledb/common/status.h" -#include "hadoop/hdfs.h" - using namespace tiledb::common; +// Declarations copied from hadoop/hdfs.h +// We do not include it here to avoid leaking it to consuming code. +struct hdfs_internal; +typedef struct hdfs_internal* hdfsFS; + namespace tiledb { namespace common::filesystem { diff --git a/tiledb/sm/filesystem/ls_scanner.h b/tiledb/sm/filesystem/ls_scanner.h index 4f925c55aa0b..92bc435bd9e7 100644 --- a/tiledb/sm/filesystem/ls_scanner.h +++ b/tiledb/sm/filesystem/ls_scanner.h @@ -37,6 +37,7 @@ #include "tiledb/sm/filesystem/uri.h" #include +#include #include #include @@ -73,6 +74,10 @@ class LsStopTraversal : public LsScanException { }; using FileFilter = std::function; +[[maybe_unused]] static bool accept_all_files( + const std::string_view&, uint64_t) { + return true; +} using DirectoryFilter = std::function; /** Static DirectoryFilter used as default argument. */ @@ -350,6 +355,58 @@ class CallbackWrapperCPP { LsCallback cb_; }; +/** + * Implements the `ls_filtered` function for `std::filesystem` which can be used + * for Posix and Win32 + */ +template +LsObjects std_filesystem_ls_filtered( + const URI& parent, F file_filter, D directory_filter, bool recursive) { + /* + * The input URI was useful to the top-level VFS to identify this is a + * regular filesystem path, but we don't need the "file://" qualifier + * anymore and can reason with unqualified strings for the rest of the + * function. + */ + const auto parentstr = parent.to_path(); + + LsObjects qualifyingPaths; + + // awkward way of iterating, avoids bug in OSX + auto begin = std::filesystem::recursive_directory_iterator(parentstr); + auto end = std::filesystem::recursive_directory_iterator(); + + for (auto iter = begin; iter != end; ++iter) { + auto& entry = *iter; + const auto abspath = entry.path().string(); + const auto absuri = URI(abspath); + if (entry.is_directory()) { + if (file_filter(absuri, 0) || directory_filter(absuri)) { + qualifyingPaths.push_back( + std::make_pair(tiledb::sm::URI(abspath).to_string(), 0)); + if (!recursive) { + iter.disable_recursion_pending(); + } + } else { + /* do not descend into directories which don't qualify */ + iter.disable_recursion_pending(); + } + } else { + /* + * A leaf of the filesystem + * (or symbolic link - split to a separate case if we want to descend into + * them) + */ + if (file_filter(absuri, entry.file_size())) { + qualifyingPaths.push_back( + std::make_pair(absuri.to_string(), entry.file_size())); + } + } + } + + return qualifyingPaths; +} + } // namespace tiledb::sm #endif // TILEDB_LS_SCANNER_H diff --git a/tiledb/sm/filesystem/path_win.cc b/tiledb/sm/filesystem/path_win.cc index ded0bcb25bb8..49ee7c4adb15 100644 --- a/tiledb/sm/filesystem/path_win.cc +++ b/tiledb/sm/filesystem/path_win.cc @@ -69,11 +69,13 @@ std::string uri_from_path(const std::string& path) { return str_uri; } -std::string path_from_uri(const std::string& uri) { - if (uri.length() == 0) { +std::string path_from_uri(std::string_view uri_view) { + if (uri_view.length() == 0) { return ""; } + std::string uri(uri_view); + std::string uri_with_scheme = (stdx::string::starts_with(uri, "file://") || // also accept 'file:/x...' diff --git a/tiledb/sm/filesystem/path_win.h b/tiledb/sm/filesystem/path_win.h index 1ea8a1e5e9a5..b7f5c429d5db 100644 --- a/tiledb/sm/filesystem/path_win.h +++ b/tiledb/sm/filesystem/path_win.h @@ -61,7 +61,7 @@ std::string uri_from_path(const std::string& path); * @param path The URI to convert. * @status A Windows path. */ -std::string path_from_uri(const std::string& uri); +std::string path_from_uri(std::string_view uri); /** * Converts any '/' to '\\' (single-backslash) and returns the diff --git a/tiledb/sm/filesystem/posix.cc b/tiledb/sm/filesystem/posix.cc index 70d4e8320bcd..58b4f0ab1d5f 100644 --- a/tiledb/sm/filesystem/posix.cc +++ b/tiledb/sm/filesystem/posix.cc @@ -355,7 +355,7 @@ Status Posix::ls( return Status::Ok(); } -std::string Posix::abs_path(const std::string& path) { +std::string Posix::abs_path(std::string_view path) { std::string resolved_path = abs_path_internal(path); // Ensure the returned has the same postfix slash as 'path'. @@ -392,7 +392,7 @@ bool Posix::both_slashes(char a, char b) { return a == '/' && b == '/'; } -std::string Posix::abs_path_internal(const std::string& path) { +std::string Posix::abs_path_internal(std::string_view path) { // Initialize current, home and root std::string current = current_dir(); auto env_home_ptr = getenv("HOME"); @@ -411,15 +411,17 @@ std::string Posix::abs_path_internal(const std::string& path) { // Other cases std::string ret_dir; if (utils::parse::starts_with(path, posix_prefix)) - return path; + return std::string(path); else if (utils::parse::starts_with(path, "/")) - ret_dir = posix_prefix + path; + ret_dir = posix_prefix + std::string(path); else if (utils::parse::starts_with(path, "~/")) - ret_dir = posix_prefix + home + path.substr(1, path.size() - 1); + ret_dir = + posix_prefix + home + std::string(path.substr(1, path.size() - 1)); else if (utils::parse::starts_with(path, "./")) - ret_dir = posix_prefix + current + path.substr(1, path.size() - 1); + ret_dir = + posix_prefix + current + std::string(path.substr(1, path.size() - 1)); else - ret_dir = posix_prefix + current + "/" + path; + ret_dir = posix_prefix + current + "/" + std::string(path); adjacent_slashes_dedup(&ret_dir); purge_dots_from_path(&ret_dir); diff --git a/tiledb/sm/filesystem/posix.h b/tiledb/sm/filesystem/posix.h index 958a4df51a1d..1c397e8e21ce 100644 --- a/tiledb/sm/filesystem/posix.h +++ b/tiledb/sm/filesystem/posix.h @@ -35,16 +35,21 @@ #ifndef _WIN32 +#include #include #include +#include +#include #include #include #include +#include "tiledb/common/logger.h" #include "tiledb/common/status.h" #include "tiledb/sm/config/config.h" #include "tiledb/sm/filesystem/filesystem_base.h" +#include "tiledb/sm/filesystem/ls_scanner.h" using namespace tiledb::common; @@ -259,6 +264,29 @@ class Posix : public FilesystemBase { tuple>> ls_with_sizes(const URI& uri) const override; + /** + * Lists objects and object information that start with `prefix`, invoking + * the FilePredicate on each entry collected and the DirectoryPredicate on + * common prefixes for pruning. + * + * @param parent The parent prefix to list sub-paths. + * @param f The FilePredicate to invoke on each object for filtering. + * @param d The DirectoryPredicate to invoke on each common prefix for + * pruning. This is currently unused, but is kept here for future support. + * @param recursive Whether to recursively list subdirectories. + * + * Note: the return type LsObjects does not match the other "ls" methods so as + * to match the S3 equivalent API. + */ + template + LsObjects ls_filtered( + const URI& parent, + F f, + D d = accept_all_dirs, + bool recursive = false) const { + return std_filesystem_ls_filtered(parent, f, d, recursive); + } + /** * Lists files one level deep under a given path. * @@ -272,7 +300,7 @@ class Posix : public FilesystemBase { * Returns the absolute posix (string) path of the input in the * form "file://" */ - static std::string abs_path(const std::string& path); + static std::string abs_path(std::string_view path); /** * Returns the directory where the program is executed. @@ -289,7 +317,7 @@ class Posix : public FilesystemBase { static bool both_slashes(char a, char b); // Internal logic for 'abs_path()'. - static std::string abs_path_internal(const std::string& path); + static std::string abs_path_internal(std::string_view path); /** * It takes as input an **absolute** path, and returns it in its canonicalized diff --git a/tiledb/sm/filesystem/s3.cc b/tiledb/sm/filesystem/s3.cc index b0433752cf57..d1d13b5e72d8 100644 --- a/tiledb/sm/filesystem/s3.cc +++ b/tiledb/sm/filesystem/s3.cc @@ -443,6 +443,33 @@ void S3::remove_dir(const URI& uri) const { std::vector paths; throw_if_not_ok(ls(uri, &paths, "")); + + // Bail early if we don't have anything to delete. + if (paths.empty()) { + return; + } + + throw_if_not_ok( + parallel_for(vfs_thread_pool_, 0, paths.size(), [&](size_t i) { + throw_if_not_ok(remove_object(URI(paths[i]))); + return Status::Ok(); + })); + + // Minio changed their delete behavior when an object masks another object + // with the same prefix. Previously, minio would delete any object with + // a matching prefix. The new behavior is to only delete the object masking + // the "directory" of objects below. To handle this we just run a second + // ls to see if we still have paths to remove, and remove them if so. + + paths.clear(); + throw_if_not_ok(ls(uri, &paths, "")); + + // We got everything on the first pass. + if (paths.empty()) { + return; + } + + // Delete the uncovered object prefixes. throw_if_not_ok( parallel_for(vfs_thread_pool_, 0, paths.size(), [&](size_t i) { throw_if_not_ok(remove_object(URI(paths[i]))); diff --git a/tiledb/sm/filesystem/test/unit_ls_filtered.cc b/tiledb/sm/filesystem/test/unit_ls_filtered.cc index 8af2ec8f225a..b8a2cd8b9a05 100644 --- a/tiledb/sm/filesystem/test/unit_ls_filtered.cc +++ b/tiledb/sm/filesystem/test/unit_ls_filtered.cc @@ -35,6 +35,18 @@ #include "tiledb/sm/config/config.h" #include "tiledb/sm/filesystem/vfs.h" +namespace tiledb::sm { +/** + * @return true if the URI represents a regular file, false if not + */ +[[maybe_unused]] static bool accept_only_regular_files( + const std::string_view& uri, uint64_t) { + const std::string path = URI(uri).to_path(); + return std::filesystem::is_regular_file(path); +} + +} // namespace tiledb::sm + class VFSTest { public: /** @@ -52,7 +64,8 @@ class VFSTest { , vfs_(&stats_, &io_, &compute_, tiledb::sm::Config()) , test_tree_(test_tree) , prefix_(prefix) - , temp_dir_(prefix_) { + , temp_dir_(prefix_) + , init_open_files_(count_open_files()) { } virtual ~VFSTest() { @@ -63,9 +76,38 @@ class VFSTest { } } - /** FilePredicate for passing to ls_filtered that accepts all files. */ - static bool accept_all_files(const std::string_view&, uint64_t) { - return true; + Status mkdir() const { + return vfs_.create_dir(temp_dir_); + } + +#ifdef __windows__ + std::optional count_open_files() const { + return std::nullopt; + } +#else + std::optional count_open_files() const { + const std::string fddir = "/proc/" + std::to_string(getpid()) + "/fd"; + + std::vector ls; + const auto st = vfs_.ls(tiledb::sm::URI(fddir), &ls); + REQUIRE(st.ok()); + return ls.size(); + } +#endif + + /** + * @return true if the number of open files is the same + * as it was when we started the test + */ + bool check_open_files() const { + const auto maybe_updated_open_files = count_open_files(); + if (maybe_updated_open_files) { + const uint64_t updated_open_files = *maybe_updated_open_files; + return (updated_open_files == init_open_files_); + } else { + /* not enough information to say otherwise */ + return true; + } } /** Resources needed to construct VFS */ @@ -77,40 +119,573 @@ class VFSTest { std::string prefix_; tiledb::sm::URI temp_dir_; + std::optional init_open_files_; + private: tiledb::sm::LsObjects expected_results_; }; -// TODO: Disable shouldfail when file:// or mem:// support is added. -TEST_CASE( - "VFS: Throwing FileFilter ls_recursive", - "[vfs][ls_recursive][!shouldfail]") { - std::string prefix = GENERATE("file://", "mem://"); +/** + * Represents a path used in the test. + * Encapsulates absolute and relative paths, and can be extended for URI + * if we determine that `ls_recursive` should output that instead. + */ +struct TestPath { + VFSTest& vfs_test; + std::filesystem::path relpath; + std::filesystem::path abspath; + uint64_t size; + + TestPath(VFSTest& vfs_test, std::string_view relpath, uint64_t size = 0) + : vfs_test(vfs_test) + , relpath(relpath) + , abspath( + std::filesystem::path(vfs_test.temp_dir_.to_path()).append(relpath)) + , size(size) { + } + + TestPath(const TestPath& copy) + : vfs_test(copy.vfs_test) + , relpath(copy.relpath) + , abspath(copy.abspath) + , size(copy.size) { + } + + /** + * Create a file at the test path. + * @param mkdirs if true, then also create each parent directory in the path + */ + void touch(bool mkdirs = false) { + if (mkdirs) { + std::vector parents; + + tiledb::sm::URI absuri(abspath.string()); + do { + absuri = absuri.parent_path(); + parents.push_back(absuri); + } while (absuri != vfs_test.temp_dir_); + + parents.pop_back(); /* temp_dir_ */ + while (!parents.empty()) { + REQUIRE(vfs_test.vfs_.create_dir(parents.back()).ok()); + parents.pop_back(); + } + } + REQUIRE(vfs_test.vfs_.touch(tiledb::sm::URI(abspath.string())).ok()); + std::filesystem::resize_file(abspath, size); + } + + void mkdir() { + REQUIRE(vfs_test.vfs_.create_dir(tiledb::sm::URI(abspath.string())).ok()); + } + + /** + * @return a string containing the way this is expected + * to appear in the ls_recursive output + */ + std::string lsresult() const { + return tiledb::sm::URI(abspath.string()).to_string(); + } + + bool matches(const std::pair& lsout) const { + return (lsresult() == lsout.first && size == lsout.second); + } +}; + +tiledb::sm::LsObjects sort_by_name(const tiledb::sm::LsObjects& in_objs) { + tiledb::sm::LsObjects out_objs(in_objs); + std::sort( + out_objs.begin(), out_objs.end(), [](const auto& f1, const auto& f2) { + return f1.first < f2.first; + }); + return out_objs; +} + +TEST_CASE("VFS: ls_recursive unfiltered", "[vfs][ls_recursive]") { + std::string prefix = GENERATE("file://"); + prefix += std::filesystem::current_path().string() + "/ls_recursive_test/"; + + VFSTest vfs_test({0}, prefix); + const auto mkst = vfs_test.mkdir(); + REQUIRE(mkst.ok()); + + std::vector testpaths = { + TestPath(vfs_test, "a1.txt", 30), + TestPath(vfs_test, "a2.txt", 40), + TestPath(vfs_test, "f1.txt", 10), + TestPath(vfs_test, "f2.txt", 20), + TestPath(vfs_test, "d1/f1.txt", 45), + TestPath(vfs_test, "d1/c1.txt", 55), + TestPath(vfs_test, "d1/d1sub1/d1sub1sub1/g1.txt", 33), + TestPath(vfs_test, "d1/d1sub1/d1sub1sub1/d1sub1sub1sub1/b1.txt", 12), + TestPath(vfs_test, "d1/d1sub1/d1sub1sub1/d1sub1sub1sub1/h1.txt", 33), + }; + + SECTION("Empty directory") { + const auto ls = sort_by_name(vfs_test.vfs_.ls_recursive( + vfs_test.temp_dir_, + tiledb::sm::accept_all_files, + tiledb::sm::accept_all_dirs)); + CHECK(ls.empty()); + } + + SECTION("Files only") { + testpaths[0].touch(); + testpaths[1].touch(); + testpaths[2].touch(); + testpaths[3].touch(); + + const auto ls = sort_by_name(vfs_test.vfs_.ls_recursive( + vfs_test.temp_dir_, + tiledb::sm::accept_all_files, + tiledb::sm::accept_all_dirs)); + REQUIRE(ls.size() == 4); + CHECK(testpaths[0].matches(ls[0])); + CHECK(testpaths[1].matches(ls[1])); + CHECK(testpaths[2].matches(ls[2])); + CHECK(testpaths[3].matches(ls[3])); + } + + SECTION("Empty subdirectory") { + auto d1 = TestPath(vfs_test, "d1"); + d1.mkdir(); + + const auto ls = sort_by_name(vfs_test.vfs_.ls_recursive( + vfs_test.temp_dir_, + tiledb::sm::accept_all_files, + tiledb::sm::accept_all_dirs)); + + CHECK(ls.size() == 1); + if (ls.size() >= 1) { + CHECK(d1.matches(ls[0])); + } + } + + SECTION("Empty subdirectory and files") { + testpaths[0].touch(); + testpaths[1].touch(); + auto d1 = TestPath(vfs_test, "d1"); + d1.mkdir(); + testpaths[2].touch(); + testpaths[3].touch(); + + const auto ls = sort_by_name(vfs_test.vfs_.ls_recursive( + vfs_test.temp_dir_, + tiledb::sm::accept_all_files, + tiledb::sm::accept_all_dirs)); + CHECK(ls.size() == 5); + if (ls.size() >= 1) { + CHECK(testpaths[0].matches(ls[0])); + } + if (ls.size() >= 2) { + CHECK(testpaths[1].matches(ls[1])); + } + if (ls.size() >= 3) { + CHECK(d1.matches(ls[2])); + } + if (ls.size() >= 4) { + CHECK(testpaths[2].matches(ls[3])); + } + if (ls.size() >= 5) { + CHECK(testpaths[3].matches(ls[4])); + } + } + + SECTION("Empty sub-subdirectory") { + auto d1 = TestPath(vfs_test, "d1"); + auto d1sub1 = TestPath(vfs_test, "d1/d1sub1"); + d1.mkdir(); + d1sub1.mkdir(); + + const auto ls = sort_by_name(vfs_test.vfs_.ls_recursive( + vfs_test.temp_dir_, + tiledb::sm::accept_all_files, + tiledb::sm::accept_all_dirs)); + + CHECK(ls.size() == 2); + if (ls.size() >= 1) { + CHECK(d1.matches(ls[0])); + } + if (ls.size() >= 2) { + CHECK(d1sub1.matches(ls[1])); + } + } + + SECTION("Deeply-nested files") { + auto d1 = TestPath(vfs_test, "d1"); + auto d1sub1 = TestPath(vfs_test, "d1/d1sub1"); + auto d1sub1sub1 = TestPath(vfs_test, "d1/d1sub1/d1sub1sub1"); + auto d1sub1sub1sub1 = + TestPath(vfs_test, "d1/d1sub1/d1sub1sub1/d1sub1sub1sub1"); + d1.mkdir(); + d1sub1.mkdir(); + d1sub1sub1.mkdir(); + d1sub1sub1sub1.mkdir(); + testpaths[7].touch(); + + const auto ls = sort_by_name(vfs_test.vfs_.ls_recursive( + vfs_test.temp_dir_, + tiledb::sm::accept_all_files, + tiledb::sm::accept_all_dirs)); + + CHECK(ls.size() == 5); + if (ls.size() >= 1) { + CHECK(d1.matches(ls[0])); + } + if (ls.size() >= 2) { + CHECK(d1sub1.matches(ls[1])); + } + if (ls.size() >= 3) { + CHECK(d1sub1sub1.matches(ls[2])); + } + if (ls.size() >= 4) { + CHECK(d1sub1sub1sub1.matches(ls[3])); + } + if (ls.size() >= 5) { + CHECK(testpaths[7].matches(ls[4])); + } + } + + SECTION("Recursion") { + auto d1 = TestPath(vfs_test, "d1"); + auto d1sub1 = TestPath(vfs_test, "d1/d1sub1"); + auto d1sub1sub1 = TestPath(vfs_test, "d1/d1sub1/d1sub1sub1"); + auto d1sub1sub1sub1 = + TestPath(vfs_test, "d1/d1sub1/d1sub1sub1/d1sub1sub1sub1"); + d1.mkdir(); + d1sub1.mkdir(); + d1sub1sub1.mkdir(); + d1sub1sub1sub1.mkdir(); + for (unsigned i = 0; i < testpaths.size(); i++) { + testpaths[i].touch(); + } + + const auto ls = sort_by_name(vfs_test.vfs_.ls_recursive( + vfs_test.temp_dir_, + tiledb::sm::accept_all_files, + tiledb::sm::accept_all_dirs)); + CHECK(ls.size() == testpaths.size() + 4); + + if (ls.size() >= 1) { + CHECK(testpaths[0].matches(ls[0])); + } + if (ls.size() >= 2) { + CHECK(testpaths[1].matches(ls[1])); + } + if (ls.size() >= 3) { + CHECK(d1.matches(ls[2])); + } + if (ls.size() >= 4) { + CHECK(testpaths[5].matches(ls[3])); + } + if (ls.size() >= 5) { + CHECK(d1sub1.matches(ls[4])); + } + if (ls.size() >= 6) { + CHECK(d1sub1sub1.matches(ls[5])); + } + if (ls.size() >= 7) { + CHECK(d1sub1sub1sub1.matches(ls[6])); + } + if (ls.size() >= 8) { + CHECK(testpaths[7].matches(ls[7])); + } + if (ls.size() >= 9) { + CHECK(testpaths[8].matches(ls[8])); + } + if (ls.size() >= 10) { + CHECK(testpaths[6].matches(ls[9])); + } + if (ls.size() >= 11) { + CHECK(testpaths[4].matches(ls[10])); + } + if (ls.size() >= 12) { + CHECK(testpaths[2].matches(ls[11])); + } + if (ls.size() >= 13) { + CHECK(testpaths[3].matches(ls[12])); + } + } + + /* all tests must close all the files that they opened, in normal use of the + * API */ + REQUIRE(vfs_test.check_open_files()); +} + +TEST_CASE("VFS: ls_recursive file filter", "[vfs][ls_recursive]") { + std::string prefix = GENERATE("file://"); + prefix += std::filesystem::current_path().string() + "/ls_recursive_test/"; + + VFSTest vfs_test({0}, prefix); + const auto mkst = vfs_test.mkdir(); + REQUIRE(mkst.ok()); + + std::vector testpaths = { + TestPath(vfs_test, "year=2021/month=8/day=27/log1.txt", 30), + TestPath(vfs_test, "year=2021/month=8/day=27/log2.txt", 31), + TestPath(vfs_test, "year=2021/month=8/day=28/log1.txt", 40), + TestPath(vfs_test, "year=2021/month=8/day=28/log2.txt", 41), + TestPath(vfs_test, "year=2021/month=9/day=27/log1.txt", 50), + TestPath(vfs_test, "year=2021/month=9/day=27/log2.txt", 51), + TestPath(vfs_test, "year=2021/month=9/day=28/log1.txt", 60), + TestPath(vfs_test, "year=2021/month=9/day=28/log2.txt", 61), + TestPath(vfs_test, "year=2022/month=8/day=27/log1.txt", 70), + TestPath(vfs_test, "year=2022/month=8/day=27/log2.txt", 71), + TestPath(vfs_test, "year=2022/month=8/day=28/log1.txt", 80), + TestPath(vfs_test, "year=2022/month=8/day=28/log2.txt", 81), + TestPath(vfs_test, "year=2022/month=9/day=27/log1.txt", 90), + TestPath(vfs_test, "year=2022/month=9/day=27/log2.txt", 91), + TestPath(vfs_test, "year=2022/month=9/day=28/log1.txt", 20), + TestPath(vfs_test, "year=2022/month=9/day=28/log2.txt", 21), + }; + + SECTION("File predicate returning false is discarded from results") { + for (auto& testpath : testpaths) { + testpath.touch(true); + } + + /* + * This also shows us that the file filter is only called on the leaves, + * since "log1.txt" only appears in the basename component of the test + * paths. + */ + auto log_is_1 = [](const std::string_view& path, uint64_t) -> bool { + return (path.find("log1.txt") != std::string::npos); + }; + + auto ls = vfs_test.vfs_.ls_recursive( + vfs_test.temp_dir_, log_is_1, tiledb::sm::accept_all_dirs); + + /* directories appear in the result set, we aren't interested in those, + * and the callback doesn't (yet?) have a way to descend into a directory + * without also including it in the result set */ + std::erase_if(ls, [](const auto& obj) { return obj.second == 0; }); + + CHECK(ls.size() == testpaths.size() / 2); + + ls = sort_by_name(ls); /* ensure order matches the testpaths order */ + + for (uint64_t i = 0; i < testpaths.size(); i += 2) { + CHECK(testpaths[i].matches(ls[i / 2])); + } + } +} + +TEST_CASE("VFS: ls_recursive directory filter", "[vfs][ls_recursive]") { + std::string prefix = GENERATE("file://"); + prefix += std::filesystem::current_path().string() + "/ls_recursive_test/"; + + VFSTest vfs_test({0}, prefix); + const auto mkst = vfs_test.mkdir(); + REQUIRE(mkst.ok()); + + std::vector testpaths = { + TestPath(vfs_test, "year=2021/month=8/day=27/log1.txt", 30), + TestPath(vfs_test, "year=2021/month=8/day=27/log2.txt", 31), + TestPath(vfs_test, "year=2021/month=8/day=28/log1.txt", 40), + TestPath(vfs_test, "year=2021/month=8/day=28/log2.txt", 41), + TestPath(vfs_test, "year=2021/month=9/day=28/log1.txt", 50), + TestPath(vfs_test, "year=2021/month=9/day=28/log2.txt", 51), + TestPath(vfs_test, "year=2021/month=9/day=29/log1.txt", 60), + TestPath(vfs_test, "year=2021/month=9/day=29/log2.txt", 61), + TestPath(vfs_test, "year=2022/month=8/day=27/log1.txt", 70), + TestPath(vfs_test, "year=2022/month=8/day=27/log2.txt", 71), + TestPath(vfs_test, "year=2022/month=8/day=28/log1.txt", 80), + TestPath(vfs_test, "year=2022/month=8/day=28/log2.txt", 81), + TestPath(vfs_test, "year=2022/month=9/day=28/log1.txt", 90), + TestPath(vfs_test, "year=2022/month=9/day=28/log2.txt", 91), + TestPath(vfs_test, "year=2022/month=9/day=29/log1.txt", 20), + TestPath(vfs_test, "year=2022/month=9/day=29/log2.txt", 21), + }; + + /* create all files and dirs */ + for (auto& testpath : testpaths) { + testpath.touch(true); + } + + SECTION("Directory predicate returning true is filtered from results") { + auto month_is_august = [](std::string_view dirname) -> bool { + if (dirname.find("month") == std::string::npos) { + /* haven't descended far enough yet */ + return true; + } else if (dirname.find("month=8") == std::string::npos) { + /* not august */ + return false; + } else { + return true; + } + }; + + auto ls = vfs_test.vfs_.ls_recursive( + vfs_test.temp_dir_, + tiledb::sm::accept_only_regular_files, + month_is_august); + + /* directories appear in the result set, we aren't interested in those, + * and the callback doesn't (yet?) have a way to descend into a directory + * without also including it in the result set */ + std::erase_if(ls, [](const auto& obj) { return obj.second == 0; }); + + CHECK(ls.size() == testpaths.size() / 2); + + ls = sort_by_name(ls); /* ensure order matches the testpaths order */ + + CHECK(ls.size() == 8); + if (ls.size() >= 1) { + testpaths[0].matches(ls[0]); + } + if (ls.size() >= 2) { + testpaths[1].matches(ls[1]); + } + if (ls.size() >= 3) { + testpaths[2].matches(ls[2]); + } + if (ls.size() >= 4) { + testpaths[3].matches(ls[3]); + } + if (ls.size() >= 5) { + testpaths[8].matches(ls[4]); + } + if (ls.size() >= 6) { + testpaths[9].matches(ls[5]); + } + if (ls.size() >= 7) { + testpaths[10].matches(ls[6]); + } + if (ls.size() >= 8) { + testpaths[11].matches(ls[7]); + } + } + + /* note: this should be true for POSIX but is not for S3 without hierarchical + * list API */ + SECTION( + "Directory predicate returning true does not descend into directory") { + /* + * In the test data we only find "day=29" beneath "month=9", + * so the `ls` should throw with this directory filter if and only if + * we descend into directories with "month=9". + */ + std::string monthstr = "month=9"; + auto throw_if_day_is_29 = [&monthstr](std::string_view dirname) -> bool { + if (dirname.find("month") == std::string::npos) { + /* haven't descended far enough yet */ + return true; + } else if (dirname.find(monthstr) == std::string::npos) { + /* not august */ + return false; + } else if (dirname.find("day=29") == std::string::npos) { + /* not the 29th */ + return true; + } else { + /* it is the 29th, throw */ + throw std::logic_error("Throwing FileFilter: day=29"); + } + }; + + CHECK_THROWS_AS( + vfs_test.vfs_.ls_recursive( + vfs_test.temp_dir_, + tiledb::sm::accept_only_regular_files, + throw_if_day_is_29), + std::logic_error); + CHECK_THROWS_WITH( + vfs_test.vfs_.ls_recursive( + vfs_test.temp_dir_, + tiledb::sm::accept_only_regular_files, + throw_if_day_is_29), + Catch::Matchers::ContainsSubstring("Throwing FileFilter: day=29")); + + monthstr = "month=8"; + + /* now the result should be the same as the first section */ + auto ls = vfs_test.vfs_.ls_recursive( + vfs_test.temp_dir_, + tiledb::sm::accept_only_regular_files, + throw_if_day_is_29); + + /* directories appear in the result set, we aren't interested in those, + * and the callback doesn't (yet?) have a way to descend into a directory + * without also including it in the result set */ + std::erase_if(ls, [](const auto& obj) { return obj.second == 0; }); + + CHECK(ls.size() == testpaths.size() / 2); + + ls = sort_by_name(ls); /* ensure order matches the testpaths order */ + + CHECK(ls.size() == 8); + if (ls.size() >= 1) { + testpaths[0].matches(ls[0]); + } + if (ls.size() >= 2) { + testpaths[1].matches(ls[1]); + } + if (ls.size() >= 3) { + testpaths[2].matches(ls[2]); + } + if (ls.size() >= 4) { + testpaths[3].matches(ls[3]); + } + if (ls.size() >= 5) { + testpaths[8].matches(ls[4]); + } + if (ls.size() >= 6) { + testpaths[9].matches(ls[5]); + } + if (ls.size() >= 7) { + testpaths[10].matches(ls[6]); + } + if (ls.size() >= 8) { + testpaths[11].matches(ls[7]); + } + } + + /* + * Note that since we throw in the previous section, this check + * demonstrates that all directories are closed whether or not we return + * from ls_recursive normally + */ + REQUIRE(vfs_test.check_open_files()); +} + +TEST_CASE("VFS: Throwing FileFilter ls_recursive", "[vfs][ls_recursive]") { + std::string prefix = GENERATE("file://"); prefix += std::filesystem::current_path().string() + "/ls_filtered_test"; VFSTest vfs_test({0}, prefix); - auto file_filter = [](const std::string_view&, uint64_t) -> bool { + const auto mkst = vfs_test.mkdir(); + REQUIRE(mkst.ok()); + + auto always_throw_filter = [](const std::string_view&, uint64_t) -> bool { throw std::logic_error("Throwing FileFilter"); }; SECTION("Throwing FileFilter with 0 objects should not throw") { CHECK_NOTHROW(vfs_test.vfs_.ls_recursive( - vfs_test.temp_dir_, file_filter, tiledb::sm::accept_all_dirs)); + vfs_test.temp_dir_, always_throw_filter, tiledb::sm::accept_all_dirs)); + } + SECTION( + "Throwing FileFilter will not throw if ls_recursive only visits " + "directories") { } SECTION("Throwing FileFilter with N objects should throw") { vfs_test.vfs_.touch(vfs_test.temp_dir_.join_path("file")).ok(); CHECK_THROWS_AS( - vfs_test.vfs_.ls_recursive(vfs_test.temp_dir_, file_filter), + vfs_test.vfs_.ls_recursive(vfs_test.temp_dir_, always_throw_filter), std::logic_error); CHECK_THROWS_WITH( - vfs_test.vfs_.ls_recursive(vfs_test.temp_dir_, file_filter), + vfs_test.vfs_.ls_recursive(vfs_test.temp_dir_, always_throw_filter), Catch::Matchers::ContainsSubstring("Throwing FileFilter")); } + + /* all tests must close all the files that they opened, regardless of + * exception behavior */ + REQUIRE(vfs_test.check_open_files()); } TEST_CASE( "VFS: ls_recursive throws for unsupported filesystems", "[vfs][ls_recursive]") { - std::string prefix = GENERATE("file://", "mem://"); + std::string prefix = GENERATE("mem://"); prefix += std::filesystem::current_path().string() + "/ls_filtered_test"; VFSTest vfs_test({1}, prefix); @@ -118,7 +693,9 @@ TEST_CASE( DYNAMIC_SECTION(backend << " unsupported backend should throw") { CHECK_THROWS_WITH( vfs_test.vfs_.ls_recursive( - vfs_test.temp_dir_, VFSTest::accept_all_files), + vfs_test.temp_dir_, tiledb::sm::accept_all_files), Catch::Matchers::ContainsSubstring("storage backend is not supported")); } + + REQUIRE(vfs_test.check_open_files()); } diff --git a/tiledb/sm/filesystem/uri.cc b/tiledb/sm/filesystem/uri.cc index 0c7823387850..a0efc94aff0f 100644 --- a/tiledb/sm/filesystem/uri.cc +++ b/tiledb/sm/filesystem/uri.cc @@ -63,7 +63,7 @@ URI::URI(const char* path) : URI((path == nullptr) ? std::string("") : std::string(path)) { } -URI::URI(const std::string& path) { +URI::URI(std::string_view path) { if (path.empty()) uri_ = ""; else if (URI::is_file(path)) @@ -76,7 +76,7 @@ URI::URI(const std::string& path) { uri_ = ""; } -URI::URI(const std::string& path, const bool& get_abs) { +URI::URI(std::string_view path, const bool& get_abs) { if (path.empty()) { uri_ = ""; } else if (URI::is_file(path)) { @@ -132,7 +132,7 @@ bool URI::is_invalid() const { return uri_.empty(); } -bool URI::is_file(const std::string& path) { +bool URI::is_file(std::string_view path) { #ifdef _WIN32 return utils::parse::starts_with(path, "file://") || path.find("://") == std::string::npos; @@ -142,7 +142,7 @@ bool URI::is_file(const std::string& path) { #endif } -bool URI::contains(const std::string_view& str) const { +bool URI::contains(std::string_view str) const { return uri_.find(str, 0) != std::string::npos; } @@ -151,13 +151,13 @@ bool URI::is_file() const { return is_file(uri_); #else // Observed: semantics here differ from sibling - // is_file(const std::string& path), here is missing + // is_file(std::string_view path), here is missing // additional check using "://". return utils::parse::starts_with(uri_, "file:///"); #endif } -bool URI::is_hdfs(const std::string& path) { +bool URI::is_hdfs(std::string_view path) { return utils::parse::starts_with(path, "hdfs://"); } @@ -165,7 +165,7 @@ bool URI::is_hdfs() const { return utils::parse::starts_with(uri_, "hdfs://"); } -bool URI::is_s3(const std::string& path) { +bool URI::is_s3(std::string_view path) { return utils::parse::starts_with(path, "s3://") || utils::parse::starts_with(path, "http://") || utils::parse::starts_with(path, "https://"); @@ -177,7 +177,7 @@ bool URI::is_s3() const { utils::parse::starts_with(uri_, "https://"); } -bool URI::is_azure(const std::string& path) { +bool URI::is_azure(std::string_view path) { return utils::parse::starts_with(path, "azure://"); } @@ -185,7 +185,7 @@ bool URI::is_azure() const { return utils::parse::starts_with(uri_, "azure://"); } -bool URI::is_gcs(const std::string& path) { +bool URI::is_gcs(std::string_view path) { return utils::parse::starts_with(path, "gcs://") || utils::parse::starts_with(path, "gs://"); } @@ -195,7 +195,7 @@ bool URI::is_gcs() const { utils::parse::starts_with(uri_, "gs://"); } -bool URI::is_memfs(const std::string& path) { +bool URI::is_memfs(std::string_view path) { return utils::parse::starts_with(path, "mem://"); } @@ -203,7 +203,7 @@ bool URI::is_memfs() const { return utils::parse::starts_with(uri_, "mem://"); } -bool URI::is_tiledb(const std::string& path) { +bool URI::is_tiledb(std::string_view path) { return utils::parse::starts_with(path, "tiledb://"); } @@ -341,6 +341,10 @@ std::string URI::to_string() const { return uri_; } +URI::operator std::string_view() const noexcept { + return std::string_view(uri_); +} + bool URI::operator==(const URI& uri) const { return uri_ == uri.uri_; } diff --git a/tiledb/sm/filesystem/uri.h b/tiledb/sm/filesystem/uri.h index 35974859d18c..a9e2e6e8af98 100644 --- a/tiledb/sm/filesystem/uri.h +++ b/tiledb/sm/filesystem/uri.h @@ -79,7 +79,7 @@ class URI { * @param path String that gets converted into an absolute path and stored * as a URI. */ - explicit URI(const std::string& path); + explicit URI(std::string_view path); /** * Constructor. @@ -87,7 +87,7 @@ class URI { * @param path * @param get_abs should local files become absolute */ - explicit URI(const std::string& path, const bool& get_abs); + explicit URI(std::string_view path, const bool& get_abs); /** Destructor. */ ~URI(); @@ -125,7 +125,7 @@ class URI { * @param path The path to be checked. * @return The result of the check. */ - static bool is_file(const std::string& path); + static bool is_file(std::string_view path); /** * Checks if the URI is file. @@ -140,7 +140,7 @@ class URI { * @param str the string to search for in the URI * @return The result of the check. */ - bool contains(const std::string_view& str) const; + bool contains(std::string_view str) const; /** * Checks if the input path is HDFS. @@ -148,7 +148,7 @@ class URI { * @param path The path to be checked. * @return The result of the check. */ - static bool is_hdfs(const std::string& path); + static bool is_hdfs(std::string_view path); /** * Checks if the URI is HDFS. @@ -163,7 +163,7 @@ class URI { * @param path The path to be checked. * @return The result of the check. */ - static bool is_s3(const std::string& path); + static bool is_s3(std::string_view path); /** * Checks if the URI is S3. @@ -178,7 +178,7 @@ class URI { * @param path The path to be checked. * @return The result of the check. */ - static bool is_azure(const std::string& path); + static bool is_azure(std::string_view path); /** * Checks if the URI is Azure. @@ -193,7 +193,7 @@ class URI { * @param path The path to be checked. * @return The result of the check. */ - static bool is_gcs(const std::string& path); + static bool is_gcs(std::string_view path); /** * Checks if the URI is gcs. @@ -208,7 +208,7 @@ class URI { * @param path The path to be checked. * @return The result of the check. */ - static bool is_memfs(const std::string& path); + static bool is_memfs(std::string_view path); /** * Checks if the URI is mem. @@ -223,7 +223,7 @@ class URI { * @param path The path to be checked. * @return The result of the check. */ - static bool is_tiledb(const std::string& path); + static bool is_tiledb(std::string_view path); /** * Checks if the URI is TileDB. @@ -317,6 +317,8 @@ class URI { /** For comparing URIs alphanumerically. */ bool operator>(const URI& uri) const; + operator std::string_view() const noexcept; + private: /* ********************************* */ /* PRIVATE ATTRIBUTES */ diff --git a/tiledb/sm/filesystem/vfs.cc b/tiledb/sm/filesystem/vfs.cc index 4d24aad32f20..3b3295731922 100644 --- a/tiledb/sm/filesystem/vfs.cc +++ b/tiledb/sm/filesystem/vfs.cc @@ -118,12 +118,13 @@ VFS::VFS( /* API */ /* ********************************* */ -std::string VFS::abs_path(const std::string& path) { +std::string VFS::abs_path(std::string_view path) { // workaround for older clang (llvm 3.5) compilers (issue #828) - std::string path_copy = path; + std::string path_copy(path); #ifdef _WIN32 { - std::string norm_sep_path = path_win::slashes_to_backslashes(path); + std::string norm_sep_path = + path_win::slashes_to_backslashes(std::string(path)); if (path_win::is_win_path(norm_sep_path)) return path_win::uri_from_path(Win::abs_path(norm_sep_path)); else if (URI::is_file(path)) diff --git a/tiledb/sm/filesystem/vfs.h b/tiledb/sm/filesystem/vfs.h index 08798df4c1ce..1281ad82aead 100644 --- a/tiledb/sm/filesystem/vfs.h +++ b/tiledb/sm/filesystem/vfs.h @@ -332,7 +332,7 @@ class VFS : private VFSBase, protected S3_within_VFS { * @param path The input path. * @return The string with the absolute path. */ - static std::string abs_path(const std::string& path); + static std::string abs_path(std::string_view path); /** * Return a config object containing the VFS parameters. All other non-VFS @@ -514,7 +514,7 @@ class VFS : private VFSBase, protected S3_within_VFS { * the FilePredicate on each entry collected and the DirectoryPredicate on * common prefixes for pruning. * - * Currently only S3 is supported for ls_recursive. + * Currently this API is only supported for Posix and S3. * * @param parent The parent prefix to list sub-paths. * @param f The FilePredicate to invoke on each object for filtering. @@ -530,11 +530,42 @@ class VFS : private VFSBase, protected S3_within_VFS { [[maybe_unused]] D d = accept_all_dirs) const { LsObjects results; try { - if (parent.is_s3()) { + if (parent.is_file()) { +#ifdef _WIN32 + results = win_.ls_filtered(parent, f, d, true); +#else + results = posix_.ls_filtered(parent, f, d, true); +#endif + } else if (parent.is_s3()) { #ifdef HAVE_S3 results = s3().ls_filtered(parent, f, d, true); #else throw filesystem::VFSException("TileDB was built without S3 support"); +#endif + } else if (parent.is_gcs()) { +#ifdef HAVE_GCS + throw filesystem::VFSException( + "Recursive ls over " + parent.backend_name() + + " storage backend is not supported."); +#else + throw filesystem::VFSException("TileDB was built without GCS support"); +#endif + } else if (parent.is_azure()) { +#ifdef HAVE_AZURE + throw filesystem::VFSException( + "Recursive ls over " + parent.backend_name() + + " storage backend is not supported."); +#else + throw filesystem::VFSException( + "TileDB was built without Azure support"); +#endif + } else if (parent.is_hdfs()) { +#ifdef HAVE_HDFS + throw filesystem::VFSException( + "Recursive ls over " + parent.backend_name() + + " storage backend is not supported."); +#else + throw filesystem::VFSException("TileDB was built without HDFS support"); #endif } else { throw filesystem::VFSException( diff --git a/tiledb/sm/filesystem/win.h b/tiledb/sm/filesystem/win.h index f4732d304db8..e5d3ded22069 100644 --- a/tiledb/sm/filesystem/win.h +++ b/tiledb/sm/filesystem/win.h @@ -42,6 +42,7 @@ #include "tiledb/common/status.h" #include "tiledb/sm/buffer/buffer.h" #include "tiledb/sm/config/config.h" +#include "tiledb/sm/filesystem/ls_scanner.h" using namespace tiledb::common; @@ -163,6 +164,29 @@ class Win { tuple>> ls_with_sizes(const URI& path) const; + /** + * Lists objects and object information that start with `prefix`, invoking + * the FilePredicate on each entry collected and the DirectoryPredicate on + * common prefixes for pruning. + * + * @param parent The parent prefix to list sub-paths. + * @param f The FilePredicate to invoke on each object for filtering. + * @param d The DirectoryPredicate to invoke on each common prefix for + * pruning. This is currently unused, but is kept here for future support. + * @param recursive Whether to recursively list subdirectories. + * + * Note: the return type LsObjects does not match the other "ls" methods so as + * to match the S3 equivalent API. + */ + template + LsObjects ls_filtered( + const URI& parent, + F f, + D d = accept_all_dirs, + bool recursive = false) const { + return std_filesystem_ls_filtered(parent, f, d, recursive); + } + /** * Move a given filesystem path. * diff --git a/tiledb/sm/filter/test/CMakeLists.txt b/tiledb/sm/filter/test/CMakeLists.txt index b1fc76cc3d65..71844c964ea8 100644 --- a/tiledb/sm/filter/test/CMakeLists.txt +++ b/tiledb/sm/filter/test/CMakeLists.txt @@ -1,5 +1,5 @@ # -# tiledb/sm/filter/CMakeLists.txt +# tiledb/sm/filter/test/CMakeLists.txt # # The MIT License # @@ -37,7 +37,7 @@ commence(unit_test filter_pipeline) conclude(unit_test) commence(unit_test run_filter_pipeline) - this_target_object_libraries(filter_pipeline) + this_target_object_libraries(filter_pipeline mem_helpers) this_target_sources( main.cc add_1_in_place_filter.cc diff --git a/tiledb/sm/filter/test/tile_data_generator.h b/tiledb/sm/filter/test/tile_data_generator.h index 2b88491e16ae..1caecc356830 100644 --- a/tiledb/sm/filter/test/tile_data_generator.h +++ b/tiledb/sm/filter/test/tile_data_generator.h @@ -35,6 +35,7 @@ #include #include +#include #include #include "tiledb/sm/tile/tile.h" @@ -59,12 +60,14 @@ class TileDataGenerator { /** * Returns an empty writer tile with enough room for the input data. */ - WriterTile create_empty_writer_tile() const { - return WriterTile( + shared_ptr create_empty_writer_tile( + shared_ptr memory_tracker) const { + return make_shared( constants::format_version, datatype(), cell_size(), - original_tile_size()); + original_tile_size(), + memory_tracker); } /** @@ -76,14 +79,17 @@ class TileDataGenerator { * test data and the writer offsets tile with the (optional) input offsets * data. */ - virtual std::tuple> - create_writer_tiles() const = 0; + virtual std:: + tuple, std::optional>> + create_writer_tiles(shared_ptr memory_tracker) const = 0; /** * Returns a tile with the data from the filtered buffer and enough room * for the original tile data. **/ - Tile create_filtered_buffer_tile(FilteredBuffer& filtered_buffer) const { + Tile create_filtered_buffer_tile( + FilteredBuffer& filtered_buffer, + shared_ptr memory_tracker) const { return Tile( constants::format_version, datatype(), @@ -91,7 +97,8 @@ class TileDataGenerator { 0, original_tile_size(), filtered_buffer.data(), - filtered_buffer.size()); + filtered_buffer.size(), + memory_tracker); } /** Returns the size of the original unfiltered data. */ @@ -148,13 +155,13 @@ class IncrementTileDataGenerator : public TileDataGenerator { } } - std::tuple> create_writer_tiles() - const override { + tuple, std::optional>> + create_writer_tiles(shared_ptr memory_tracker) const override { // Writer tile. - auto tile = create_empty_writer_tile(); + auto tile = create_empty_writer_tile(memory_tracker); T value{}; for (uint64_t index = 0; index < num_elements_; ++index) { - CHECK_NOTHROW(tile.write(&value, index * sizeof(T), sizeof(T))); + CHECK_NOTHROW(tile->write(&value, index * sizeof(T), sizeof(T))); ++value; } @@ -174,19 +181,20 @@ class IncrementTileDataGenerator : public TileDataGenerator { offsets.pop_back(); // Write the offsets tile. - WriterTile offsets_tile( + auto offsets_tile = make_shared( constants::format_version, Datatype::UINT64, constants::cell_var_offset_size, - offsets.size() * constants::cell_var_offset_size); + offsets.size() * constants::cell_var_offset_size, + memory_tracker); for (uint64_t index = 0; index < offsets.size(); ++index) { - CHECK_NOTHROW(offsets_tile.write( + CHECK_NOTHROW(offsets_tile->write( &offsets[index], index * constants::cell_var_offset_size, constants::cell_var_offset_size)); } - return {std::move(tile), std::move(offsets_tile)}; + return {tile, offsets_tile}; } Datatype datatype() const override { diff --git a/tiledb/sm/filter/test/unit_run_filter_pipeline.cc b/tiledb/sm/filter/test/unit_run_filter_pipeline.cc index c068d61c8334..2feedb2d0ee5 100644 --- a/tiledb/sm/filter/test/unit_run_filter_pipeline.cc +++ b/tiledb/sm/filter/test/unit_run_filter_pipeline.cc @@ -62,6 +62,7 @@ #include #include +#include #include #include "../bit_width_reduction_filter.h" @@ -165,30 +166,31 @@ class SimpleVariableTestData { void check_run_pipeline_full( Config& config, ThreadPool& tp, - WriterTile& tile, - std::optional& offsets_tile, + shared_ptr& tile, + std::optional>& offsets_tile, FilterPipeline& pipeline, const TileDataGenerator* test_data, - const FilteredTileChecker& filtered_buffer_checker) { + const FilteredTileChecker& filtered_buffer_checker, + shared_ptr memory_tracker) { // Run the pipeline forward. CHECK(pipeline .run_forward( &dummy_stats, - &tile, - offsets_tile.has_value() ? &offsets_tile.value() : nullptr, + tile.get(), + offsets_tile.has_value() ? offsets_tile.value().get() : nullptr, &tp) .ok()); // Check the original unfiltered data was removed. - CHECK(tile.size() == 0); + CHECK(tile->size() == 0); // Check the filtered buffer has the expected data. - auto filtered_buffer = tile.filtered_buffer(); + auto filtered_buffer = tile->filtered_buffer(); filtered_buffer_checker.check(filtered_buffer); // Run the data in reverse. auto unfiltered_tile = - test_data->create_filtered_buffer_tile(filtered_buffer); + test_data->create_filtered_buffer_tile(filtered_buffer, memory_tracker); ChunkData chunk_data; unfiltered_tile.load_chunk_data(chunk_data); CHECK(pipeline @@ -217,25 +219,26 @@ void check_run_pipeline_full( void check_run_pipeline_roundtrip( Config& config, ThreadPool& tp, - WriterTile& tile, - std::optional& offsets_tile, + shared_ptr tile, + std::optional>& offsets_tile, FilterPipeline& pipeline, - TileDataGenerator* test_data) { + const TileDataGenerator* test_data, + shared_ptr memory_tracker) { // Run the pipeline forward. CHECK(pipeline .run_forward( &dummy_stats, - &tile, - offsets_tile.has_value() ? &offsets_tile.value() : nullptr, + tile.get(), + offsets_tile.has_value() ? offsets_tile.value().get() : nullptr, &tp) .ok()); // Check the original unfiltered data was removed. - CHECK(tile.size() == 0); + CHECK(tile->size() == 0); // Run the data in reverse. - auto unfiltered_tile = - test_data->create_filtered_buffer_tile(tile.filtered_buffer()); + auto unfiltered_tile = test_data->create_filtered_buffer_tile( + tile->filtered_buffer(), memory_tracker); ChunkData chunk_data; unfiltered_tile.load_chunk_data(chunk_data); CHECK(pipeline @@ -259,10 +262,13 @@ TEST_CASE("Filter: Test empty pipeline", "[filter][empty-pipeline]") { Config config; ThreadPool tp(4); + auto tracker = tiledb::test::create_test_memory_tracker(); + // Set-up test data. IncrementTileDataGenerator tile_data_generator( 100); - auto&& [tile, offsets_tile] = tile_data_generator.create_writer_tiles(); + auto&& [tile, offsets_tile] = + tile_data_generator.create_writer_tiles(tracker); std::vector elements_per_chunk{100}; // Create pipeline. @@ -281,7 +287,8 @@ TEST_CASE("Filter: Test empty pipeline", "[filter][empty-pipeline]") { offsets_tile, pipeline, &tile_data_generator, - filtered_buffer_checker); + filtered_buffer_checker, + tracker); } TEST_CASE( @@ -290,10 +297,13 @@ TEST_CASE( Config config; ThreadPool tp(4); + auto tracker = tiledb::test::create_test_memory_tracker(); + // Set-up test data. IncrementTileDataGenerator tile_data_generator( 100); - auto&& [tile, offsets_tile] = tile_data_generator.create_writer_tiles(); + auto&& [tile, offsets_tile] = + tile_data_generator.create_writer_tiles(tracker); std::vector elements_per_chunk{100}; // Create pipeline. @@ -312,7 +322,8 @@ TEST_CASE( offsets_tile, pipeline, &tile_data_generator, - filtered_buffer_checker); + filtered_buffer_checker, + tracker); } TEST_CASE( @@ -321,10 +332,13 @@ TEST_CASE( Config config; ThreadPool tp(4); + auto tracker = tiledb::test::create_test_memory_tracker(); + // Set-up test data. SimpleVariableTestData test_data{}; const auto& tile_data_generator = test_data.tile_data_generator(); - auto&& [tile, offsets_tile] = tile_data_generator.create_writer_tiles(); + auto&& [tile, offsets_tile] = + tile_data_generator.create_writer_tiles(tracker); const auto& elements_per_chunk = test_data.elements_per_chunk(); // Create pipeline to test and expected filtered data checker. @@ -341,7 +355,8 @@ TEST_CASE( offsets_tile, pipeline, &tile_data_generator, - filtered_buffer_checker); + filtered_buffer_checker, + tracker); } TEST_CASE( @@ -350,10 +365,13 @@ TEST_CASE( Config config; ThreadPool tp(4); + auto tracker = tiledb::test::create_test_memory_tracker(); + // Set-up test data. IncrementTileDataGenerator tile_data_generator( 100); - auto&& [tile, offsets_tile] = tile_data_generator.create_writer_tiles(); + auto&& [tile, offsets_tile] = + tile_data_generator.create_writer_tiles(tracker); std::vector elements_per_chunk{100}; FilterPipeline pipeline; @@ -373,7 +391,8 @@ TEST_CASE( offsets_tile, pipeline, &tile_data_generator, - filtered_buffer_checker); + filtered_buffer_checker, + tracker); } SECTION("- Multi-stage") { @@ -394,7 +413,8 @@ TEST_CASE( offsets_tile, pipeline, &tile_data_generator, - filtered_buffer_checker); + filtered_buffer_checker, + tracker); } } @@ -405,10 +425,13 @@ TEST_CASE( Config config; ThreadPool tp(4); + auto tracker = tiledb::test::create_test_memory_tracker(); + // Set-up test data. SimpleVariableTestData test_data{}; const auto& tile_data_generator = test_data.tile_data_generator(); - auto&& [tile, offsets_tile] = tile_data_generator.create_writer_tiles(); + auto&& [tile, offsets_tile] = + tile_data_generator.create_writer_tiles(tracker); const auto& elements_per_chunk = test_data.elements_per_chunk(); FilterPipeline pipeline; @@ -428,7 +451,8 @@ TEST_CASE( offsets_tile, pipeline, &tile_data_generator, - filtered_buffer_checker); + filtered_buffer_checker, + tracker); } SECTION("- Multi-stage") { @@ -450,7 +474,8 @@ TEST_CASE( offsets_tile, pipeline, &tile_data_generator, - filtered_buffer_checker); + filtered_buffer_checker, + tracker); } } @@ -461,10 +486,13 @@ TEST_CASE( Config config; ThreadPool tp(4); + auto tracker = tiledb::test::create_test_memory_tracker(); + // Set-up test data. IncrementTileDataGenerator tile_data_generator( 100); - auto&& [tile, offsets_tile] = tile_data_generator.create_writer_tiles(); + auto&& [tile, offsets_tile] = + tile_data_generator.create_writer_tiles(tracker); std::vector elements_per_chunk{100}; // Create pipeline to test. @@ -484,7 +512,8 @@ TEST_CASE( offsets_tile, pipeline, &tile_data_generator, - filtered_buffer_checker); + filtered_buffer_checker, + tracker); } SECTION("- Multi-stage") { @@ -504,7 +533,8 @@ TEST_CASE( offsets_tile, pipeline, &tile_data_generator, - filtered_buffer_checker); + filtered_buffer_checker, + tracker); } } @@ -515,10 +545,13 @@ TEST_CASE( Config config; ThreadPool tp(4); + auto tracker = tiledb::test::create_test_memory_tracker(); + // Set-up test data. SimpleVariableTestData test_data{}; const auto& tile_data_generator = test_data.tile_data_generator(); - auto&& [tile, offsets_tile] = tile_data_generator.create_writer_tiles(); + auto&& [tile, offsets_tile] = + tile_data_generator.create_writer_tiles(tracker); const auto& elements_per_chunk = test_data.elements_per_chunk(); FilterPipeline pipeline; @@ -538,7 +571,8 @@ TEST_CASE( offsets_tile, pipeline, &tile_data_generator, - filtered_buffer_checker); + filtered_buffer_checker, + tracker); } SECTION("- Multi-stage") { @@ -558,7 +592,8 @@ TEST_CASE( offsets_tile, pipeline, &tile_data_generator, - filtered_buffer_checker); + filtered_buffer_checker, + tracker); } } @@ -569,10 +604,13 @@ TEST_CASE( Config config; ThreadPool tp(4); + auto tracker = tiledb::test::create_test_memory_tracker(); + // Set-up test data. IncrementTileDataGenerator tile_data_generator( 100); - auto&& [tile, offsets_tile] = tile_data_generator.create_writer_tiles(); + auto&& [tile, offsets_tile] = + tile_data_generator.create_writer_tiles(tracker); std::vector elements_per_chunk{100}; // Create filter pipeline. @@ -595,7 +633,8 @@ TEST_CASE( offsets_tile, pipeline, &tile_data_generator, - filtered_buffer_checker); + filtered_buffer_checker, + tracker); } TEST_CASE( @@ -605,10 +644,13 @@ TEST_CASE( Config config; ThreadPool tp(4); + auto tracker = tiledb::test::create_test_memory_tracker(); + // Set-up test data. SimpleVariableTestData test_data{}; const auto& tile_data_generator = test_data.tile_data_generator(); - auto&& [tile, offsets_tile] = tile_data_generator.create_writer_tiles(); + auto&& [tile, offsets_tile] = + tile_data_generator.create_writer_tiles(tracker); const auto& elements_per_chunk = test_data.elements_per_chunk(); FilterPipeline pipeline; @@ -630,7 +672,8 @@ TEST_CASE( offsets_tile, pipeline, &tile_data_generator, - filtered_buffer_checker); + filtered_buffer_checker, + tracker); } TEST_CASE("Filter: Test pseudo-checksum", "[filter][pseudo-checksum]") { @@ -638,10 +681,13 @@ TEST_CASE("Filter: Test pseudo-checksum", "[filter][pseudo-checksum]") { Config config; ThreadPool tp(4); + auto tracker = tiledb::test::create_test_memory_tracker(); + // Set-up test data. IncrementTileDataGenerator tile_data_generator( 100); - auto&& [tile, offsets_tile] = tile_data_generator.create_writer_tiles(); + auto&& [tile, offsets_tile] = + tile_data_generator.create_writer_tiles(tracker); std::vector elements_per_chunk{100}; // Create filter pipeline. @@ -663,7 +709,8 @@ TEST_CASE("Filter: Test pseudo-checksum", "[filter][pseudo-checksum]") { offsets_tile, pipeline, &tile_data_generator, - filtered_buffer_checker); + filtered_buffer_checker, + tracker); } SECTION("- Multi-stage") { @@ -693,7 +740,8 @@ TEST_CASE("Filter: Test pseudo-checksum", "[filter][pseudo-checksum]") { offsets_tile, pipeline, &tile_data_generator, - filtered_buffer_checker); + filtered_buffer_checker, + tracker); } } @@ -703,10 +751,13 @@ TEST_CASE( Config config; ThreadPool tp(4); + auto tracker = tiledb::test::create_test_memory_tracker(); + // Set-up test data. SimpleVariableTestData test_data{}; const auto& tile_data_generator = test_data.tile_data_generator(); - auto&& [tile, offsets_tile] = tile_data_generator.create_writer_tiles(); + auto&& [tile, offsets_tile] = + tile_data_generator.create_writer_tiles(tracker); const auto& elements_per_chunk = test_data.elements_per_chunk(); // Create filter pipeline. @@ -729,7 +780,8 @@ TEST_CASE( offsets_tile, pipeline, &tile_data_generator, - filtered_buffer_checker); + filtered_buffer_checker, + tracker); } SECTION("- Multi-stage") { @@ -761,7 +813,8 @@ TEST_CASE( offsets_tile, pipeline, &tile_data_generator, - filtered_buffer_checker); + filtered_buffer_checker, + tracker); } } @@ -770,10 +823,13 @@ TEST_CASE("Filter: Test pipeline modify filter", "[filter][modify]") { Config config; ThreadPool tp(4); + auto tracker = tiledb::test::create_test_memory_tracker(); + // Set-up test data. IncrementTileDataGenerator tile_data_generator( 100); - auto&& [tile, offsets_tile] = tile_data_generator.create_writer_tiles(); + auto&& [tile, offsets_tile] = + tile_data_generator.create_writer_tiles(tracker); std::vector elements_per_chunk{100}; // Create filter pipeline. @@ -804,7 +860,8 @@ TEST_CASE("Filter: Test pipeline modify filter", "[filter][modify]") { offsets_tile, pipeline, &tile_data_generator, - filtered_buffer_checker); + filtered_buffer_checker, + tracker); } TEST_CASE("Filter: Test pipeline modify filter var", "[filter][modify][var]") { @@ -812,10 +869,12 @@ TEST_CASE("Filter: Test pipeline modify filter var", "[filter][modify][var]") { Config config; ThreadPool tp(4); + auto tracker = tiledb::test::create_test_memory_tracker(); + // Set-up test data. SimpleVariableTestData test_data{}; auto&& [tile, offsets_tile] = - test_data.tile_data_generator().create_writer_tiles(); + test_data.tile_data_generator().create_writer_tiles(tracker); const auto& elements_per_chunk = test_data.elements_per_chunk(); FilterPipeline pipeline; @@ -845,7 +904,8 @@ TEST_CASE("Filter: Test pipeline modify filter var", "[filter][modify][var]") { offsets_tile, pipeline, &test_data.tile_data_generator(), - filtered_buffer_checker); + filtered_buffer_checker, + tracker); } TEST_CASE("Filter: Test pipeline copy", "[filter][copy]") { @@ -853,10 +913,13 @@ TEST_CASE("Filter: Test pipeline copy", "[filter][copy]") { Config config; ThreadPool tp(4); + auto tracker = tiledb::test::create_test_memory_tracker(); + // Set-up test data. IncrementTileDataGenerator tile_data_generator( 100); - auto&& [tile, offsets_tile] = tile_data_generator.create_writer_tiles(); + auto&& [tile, offsets_tile] = + tile_data_generator.create_writer_tiles(tracker); std::vector elements_per_chunk{100}; const uint64_t expected_checksum = 5350; @@ -894,7 +957,8 @@ TEST_CASE("Filter: Test pipeline copy", "[filter][copy]") { offsets_tile, pipeline, &tile_data_generator, - filtered_buffer_checker); + filtered_buffer_checker, + tracker); } TEST_CASE("Filter: Test random pipeline", "[filter][random]") { @@ -902,6 +966,8 @@ TEST_CASE("Filter: Test random pipeline", "[filter][random]") { Config config; ThreadPool tp(4); + auto tracker = tiledb::test::create_test_memory_tracker(); + // Create an encryption key. EncryptionKey encryption_key; REQUIRE(encryption_key @@ -947,7 +1013,8 @@ TEST_CASE("Filter: Test random pipeline", "[filter][random]") { 100); for (int i = 0; i < 100; i++) { // Create fresh input tiles. - auto&& [tile, offsets_tile] = tile_data_generator.create_writer_tiles(); + auto&& [tile, offsets_tile] = + tile_data_generator.create_writer_tiles(tracker); // Construct a random pipeline FilterPipeline pipeline; @@ -980,6 +1047,149 @@ TEST_CASE("Filter: Test random pipeline", "[filter][random]") { // input data. // Run the pipeline tests. check_run_pipeline_roundtrip( - config, tp, tile, offsets_tile, pipeline, &tile_data_generator); + config, + tp, + tile, + offsets_tile, + pipeline, + &tile_data_generator, + tracker); + } +} + +TEST_CASE("Filter: Test compression", "[filter][compression]") { + // Create resources for running pipeline tests. + Config config; + ThreadPool tp(4); + FilterPipeline pipeline; + auto tracker = tiledb::test::create_test_memory_tracker(); + + // Set-up test data. + IncrementTileDataGenerator tile_data_generator( + 100); + auto&& [tile, offsets_tile] = + tile_data_generator.create_writer_tiles(tracker); + + SECTION("- Simple") { + pipeline.add_filter(Add1InPlace(Datatype::UINT64)); + pipeline.add_filter(Add1OutOfPlace(Datatype::UINT64)); + pipeline.add_filter( + CompressionFilter(tiledb::sm::Compressor::LZ4, 5, Datatype::UINT64)); + + // Check the pipelines run forward and backward without error and returns + // the input data. + check_run_pipeline_roundtrip( + config, + tp, + tile, + offsets_tile, + pipeline, + &tile_data_generator, + tracker); + } + + SECTION("- With checksum stage") { + pipeline.add_filter(PseudoChecksumFilter(Datatype::UINT64)); + pipeline.add_filter( + CompressionFilter(tiledb::sm::Compressor::LZ4, 5, Datatype::UINT64)); + + // Check the pipelines run forward and backward without error and returns + // the input data. + check_run_pipeline_roundtrip( + config, + tp, + tile, + offsets_tile, + pipeline, + &tile_data_generator, + tracker); + } + + SECTION("- With multiple stages") { + pipeline.add_filter(Add1InPlace(Datatype::UINT64)); + pipeline.add_filter(PseudoChecksumFilter(Datatype::UINT64)); + pipeline.add_filter(Add1OutOfPlace(Datatype::UINT64)); + pipeline.add_filter( + CompressionFilter(tiledb::sm::Compressor::LZ4, 5, Datatype::UINT64)); + + // Check the pipelines run forward and backward without error and returns + // the input data. + check_run_pipeline_roundtrip( + config, + tp, + tile, + offsets_tile, + pipeline, + &tile_data_generator, + tracker); + } +} + +TEST_CASE("Filter: Test compression var", "[filter][compression][var]") { + // Create TileDB resources for running the filter pipeline. + Config config; + ThreadPool tp(4); + auto tracker = tiledb::test::create_test_memory_tracker(); + + // Set-up test data. + SimpleVariableTestData test_data{}; + const auto& tile_data_generator = test_data.tile_data_generator(); + auto&& [tile, offsets_tile] = + tile_data_generator.create_writer_tiles(tracker); + + FilterPipeline pipeline; + + SECTION("- Simple") { + pipeline.add_filter(Add1InPlace(Datatype::UINT64)); + pipeline.add_filter(Add1OutOfPlace(Datatype::UINT64)); + pipeline.add_filter( + CompressionFilter(tiledb::sm::Compressor::LZ4, 5, Datatype::UINT64)); + + // Check the pipelines run forward and backward without error and returns + // the input data. + check_run_pipeline_roundtrip( + config, + tp, + tile, + offsets_tile, + pipeline, + &tile_data_generator, + tracker); + } + + SECTION("- With checksum stage") { + pipeline.add_filter(PseudoChecksumFilter(Datatype::UINT64)); + pipeline.add_filter( + CompressionFilter(tiledb::sm::Compressor::LZ4, 5, Datatype::UINT64)); + + // Check the pipelines run forward and backward without error and returns + // the input data. + check_run_pipeline_roundtrip( + config, + tp, + tile, + offsets_tile, + pipeline, + &tile_data_generator, + tracker); + } + + SECTION("- With multiple stages") { + pipeline.add_filter(Add1InPlace(Datatype::UINT64)); + pipeline.add_filter(PseudoChecksumFilter(Datatype::UINT64)); + pipeline.add_filter(Add1OutOfPlace(Datatype::UINT64)); + pipeline.add_filter( + CompressionFilter(tiledb::sm::Compressor::LZ4, 5, Datatype::UINT64)); + + // Check the pipelines run forward and backward without error and returns + // the input data. + check_run_pipeline_roundtrip( + config, + tp, + tile, + offsets_tile, + pipeline, + &tile_data_generator, + tracker); } } diff --git a/tiledb/sm/filter/webp_filter.h b/tiledb/sm/filter/webp_filter.h index 5842f6351bec..cd6648a06cb4 100644 --- a/tiledb/sm/filter/webp_filter.h +++ b/tiledb/sm/filter/webp_filter.h @@ -40,6 +40,7 @@ constexpr bool webp_filter_exists = false; #endif // TILEDB_WEBP #include "tiledb/common/common.h" +#include "tiledb/common/pmr.h" #include "tiledb/sm/enums/filter_option.h" #include "tiledb/sm/enums/filter_type.h" #include "tiledb/sm/filter/filter.h" diff --git a/tiledb/sm/fragment/fragment_info.cc b/tiledb/sm/fragment/fragment_info.cc index ec6e3988273a..e83d42687a20 100644 --- a/tiledb/sm/fragment/fragment_info.cc +++ b/tiledb/sm/fragment/fragment_info.cc @@ -33,6 +33,7 @@ #include "tiledb/sm/fragment/fragment_info.h" #include "tiledb/common/common.h" #include "tiledb/common/logger.h" +#include "tiledb/common/memory_tracker.h" #include "tiledb/sm/array/array.h" #include "tiledb/sm/array/array_directory.h" #include "tiledb/sm/array_schema/dimension.h" @@ -750,8 +751,9 @@ shared_ptr FragmentInfo::get_array_schema(uint32_t fid) { } EncryptionKey encryption_key; + auto tracker = resources_->ephemeral_memory_tracker(); return ArrayDirectory::load_array_schema_from_uri( - *resources_, schema_uri, encryption_key); + *resources_, schema_uri, encryption_key, tracker); } Status FragmentInfo::get_array_schema_name( @@ -856,10 +858,11 @@ Status FragmentInfo::load(const ArrayDirectory& array_dir) { } // Get the array schemas and fragment metadata. + auto memory_tracker = resources_->create_memory_tracker(); std::vector> fragment_metadata; std::tie(array_schema_latest_, array_schemas_all_, fragment_metadata) = load_array_schemas_and_fragment_metadata( - *resources_, array_dir, nullptr, enc_key_); + *resources_, array_dir, memory_tracker, enc_key_); auto fragment_num = (uint32_t)fragment_metadata.size(); // Get fragment sizes @@ -951,9 +954,12 @@ Status FragmentInfo::load_and_replace( return Status::Ok(); } -tuple>> +tuple, std::vector>> load_consolidated_fragment_meta( - ContextResources& resources, const URI& uri, const EncryptionKey& enc_key) { + ContextResources& resources, + const URI& uri, + const EncryptionKey& enc_key, + shared_ptr memory_tracker) { auto timer_se = resources.stats().start_timer("sm_read_load_consolidated_frag_meta"); @@ -962,12 +968,12 @@ load_consolidated_fragment_meta( throw StatusException(Status_FragmentInfoError( "Cannot load consolidated fragment metadata; URI is empty.")); - auto&& tile = GenericTileIO::load(resources, uri, 0, enc_key); + auto tile = GenericTileIO::load(resources, uri, 0, enc_key, memory_tracker); - resources.stats().add_counter("consolidated_frag_meta_size", tile.size()); + resources.stats().add_counter("consolidated_frag_meta_size", tile->size()); uint32_t fragment_num; - Deserializer deserializer(tile.data(), tile.size()); + Deserializer deserializer(tile->data(), tile->size()); fragment_num = deserializer.read(); uint64_t name_size, offset; @@ -982,7 +988,7 @@ load_consolidated_fragment_meta( ret.emplace_back(name, offset); } - return {std::move(tile), std::move(ret)}; + return {tile, std::move(ret)}; } std::tuple< @@ -992,17 +998,18 @@ std::tuple< FragmentInfo::load_array_schemas_and_fragment_metadata( ContextResources& resources, const ArrayDirectory& array_dir, - MemoryTracker* memory_tracker, + shared_ptr memory_tracker, const EncryptionKey& enc_key) { auto timer_se = resources.stats().start_timer( "sm_load_array_schemas_and_fragment_metadata"); // Load array schemas + auto tracker = resources.ephemeral_memory_tracker(); std::shared_ptr array_schema_latest; std::unordered_map> array_schemas_all; std::tie(array_schema_latest, array_schemas_all) = - array_dir.load_array_schemas(enc_key); + array_dir.load_array_schemas(enc_key, tracker); const auto filtered_fragment_uris = [&]() { auto timer_se = @@ -1018,10 +1025,9 @@ FragmentInfo::load_array_schemas_and_fragment_metadata( meta_uris.size()); throw_if_not_ok( parallel_for(&resources.compute_tp(), 0, meta_uris.size(), [&](size_t i) { - auto&& [tile_opt, offsets] = - load_consolidated_fragment_meta(resources, meta_uris[i], enc_key); - fragment_metadata_tiles[i] = - make_shared(HERE(), std::move(tile_opt)); + auto&& [tile_opt, offsets] = load_consolidated_fragment_meta( + resources, meta_uris[i], enc_key, memory_tracker); + fragment_metadata_tiles[i] = tile_opt; offsets_vectors[i] = std::move(offsets); return Status::Ok(); })); @@ -1122,10 +1128,10 @@ tuple> FragmentInfo::load( auto meta = make_shared( HERE(), resources_, - nullptr, array_schema_latest, new_fragment_uri, timestamp_range, + resources_->create_memory_tracker(), !sparse); meta->load(enc_key_, nullptr, 0, array_schemas_all_); diff --git a/tiledb/sm/fragment/fragment_info.h b/tiledb/sm/fragment/fragment_info.h index eb06049543ec..5f5b3bc5dbd1 100644 --- a/tiledb/sm/fragment/fragment_info.h +++ b/tiledb/sm/fragment/fragment_info.h @@ -313,7 +313,7 @@ class FragmentInfo { load_array_schemas_and_fragment_metadata( ContextResources& resources, const ArrayDirectory& array_dir, - MemoryTracker* memory_tracker, + shared_ptr memory_tracker, const EncryptionKey& enc_key); /** Returns the vector with the info about individual fragments. */ @@ -349,6 +349,11 @@ class FragmentInfo { return config_; } + /** Returns the context resources. */ + inline ContextResources* resources() const { + return resources_; + } + // Accessors /** Returns array schemas latest. */ diff --git a/tiledb/sm/fragment/fragment_metadata.cc b/tiledb/sm/fragment/fragment_metadata.cc index b337e23bc4dc..fa2f2691911c 100644 --- a/tiledb/sm/fragment/fragment_metadata.cc +++ b/tiledb/sm/fragment/fragment_metadata.cc @@ -77,15 +77,33 @@ class FragmentMetadataStatusException : public StatusException { /* CONSTRUCTORS & DESTRUCTORS */ /* ****************************** */ -FragmentMetadata::FragmentMetadata() { +FragmentMetadata::FragmentMetadata( + ContextResources* resources, shared_ptr memory_tracker) + : resources_(resources) + , memory_tracker_(memory_tracker) + , rtree_(RTree(nullptr, constants::rtree_fanout, memory_tracker_)) + , tile_offsets_(memory_tracker_->get_resource(MemoryType::TILE_OFFSETS)) + , tile_var_offsets_(memory_tracker_->get_resource(MemoryType::TILE_OFFSETS)) + , tile_var_sizes_(memory_tracker_->get_resource(MemoryType::TILE_OFFSETS)) + , tile_validity_offsets_( + memory_tracker_->get_resource(MemoryType::TILE_OFFSETS)) + , tile_min_buffer_(memory_tracker_->get_resource(MemoryType::TILE_MIN_VALS)) + , tile_min_var_buffer_( + memory_tracker_->get_resource(MemoryType::TILE_MIN_VALS)) + , tile_max_buffer_(memory_tracker_->get_resource(MemoryType::TILE_MAX_VALS)) + , tile_max_var_buffer_( + memory_tracker_->get_resource(MemoryType::TILE_MAX_VALS)) + , tile_sums_(memory_tracker_->get_resource(MemoryType::TILE_SUMS)) + , tile_null_counts_( + memory_tracker_->get_resource(MemoryType::TILE_NULL_COUNTS)) { } FragmentMetadata::FragmentMetadata( ContextResources* resources, - MemoryTracker* memory_tracker, const shared_ptr& array_schema, const URI& fragment_uri, const std::pair& timestamp_range, + shared_ptr memory_tracker, bool dense, bool has_timestamps, bool has_deletes_meta) @@ -102,8 +120,23 @@ FragmentMetadata::FragmentMetadata( , has_delete_meta_(has_deletes_meta) , sparse_tile_num_(0) , meta_file_size_(0) - , rtree_(RTree(&array_schema_->domain(), constants::rtree_fanout)) + , rtree_(RTree( + &array_schema_->domain(), constants::rtree_fanout, memory_tracker_)) , tile_index_base_(0) + , tile_offsets_(memory_tracker_->get_resource(MemoryType::TILE_OFFSETS)) + , tile_var_offsets_(memory_tracker_->get_resource(MemoryType::TILE_OFFSETS)) + , tile_var_sizes_(memory_tracker_->get_resource(MemoryType::TILE_OFFSETS)) + , tile_validity_offsets_( + memory_tracker_->get_resource(MemoryType::TILE_OFFSETS)) + , tile_min_buffer_(memory_tracker_->get_resource(MemoryType::TILE_MIN_VALS)) + , tile_min_var_buffer_( + memory_tracker_->get_resource(MemoryType::TILE_MIN_VALS)) + , tile_max_buffer_(memory_tracker_->get_resource(MemoryType::TILE_MAX_VALS)) + , tile_max_var_buffer_( + memory_tracker_->get_resource(MemoryType::TILE_MAX_VALS)) + , tile_sums_(memory_tracker_->get_resource(MemoryType::TILE_SUMS)) + , tile_null_counts_( + memory_tracker_->get_resource(MemoryType::TILE_NULL_COUNTS)) , version_(array_schema_->write_version()) , timestamp_range_(timestamp_range) , array_uri_(array_schema_->array_uri()) { @@ -732,7 +765,7 @@ void FragmentMetadata::init(const NDRange& non_empty_domain) { std::vector> FragmentMetadata::load( ContextResources& resources, - MemoryTracker* memory_tracker, + shared_ptr memory_tracker, const shared_ptr array_schema_latest, const std::unordered_map>& array_schemas_all, @@ -763,20 +796,20 @@ std::vector> FragmentMetadata::load( metadata = make_shared( HERE(), &resources, - memory_tracker, array_schema_latest, sf.uri_, sf.timestamp_range_, + memory_tracker, !sparse); } else { // Fragment format version > 2 metadata = make_shared( HERE(), &resources, - memory_tracker, array_schema_latest, sf.uri_, - sf.timestamp_range_); + sf.timestamp_range_, + memory_tracker); } // Potentially find the basic fragment metadata in the consolidated @@ -1283,7 +1316,7 @@ std::string FragmentMetadata::encode_name(const std::string& name) const { const unsigned idx = iter->second; - auto attributes = array_schema_->attributes(); + auto& attributes = array_schema_->attributes(); for (unsigned i = 0; i < attributes.size(); ++i) { const std::string attr_name = attributes[i]->name(); if (attr_name == name) { @@ -1471,9 +1504,9 @@ void FragmentMetadata::load_fragment_min_max_sum_null_count( auto tile = read_generic_tile_from_file( encryption_key, gt_offsets_.fragment_min_max_sum_null_count_offset_); resources_->stats().add_counter( - "read_fragment_min_max_sum_null_count_size", tile.size()); + "read_fragment_min_max_sum_null_count_size", tile->size()); - Deserializer deserializer(tile.data(), tile.size()); + Deserializer deserializer(tile->data(), tile->size()); load_fragment_min_max_sum_null_count(deserializer); loaded_metadata_.fragment_min_max_sum_null_count_ = true; @@ -1494,9 +1527,9 @@ void FragmentMetadata::load_processed_conditions( auto tile = read_generic_tile_from_file( encryption_key, gt_offsets_.processed_conditions_offsets_); resources_->stats().add_counter( - "read_processed_conditions_size", tile.size()); + "read_processed_conditions_size", tile->size()); - Deserializer deserializer(tile.data(), tile.size()); + Deserializer deserializer(tile->data(), tile->size()); load_processed_conditions(deserializer); loaded_metadata_.processed_conditions_ = true; @@ -1545,7 +1578,7 @@ const NDRange& FragmentMetadata::mbr(uint64_t tile_idx) const { return rtree_.leaf(tile_idx); } -const std::vector& FragmentMetadata::mbrs() const { +const tdb::pmr::vector& FragmentMetadata::mbrs() const { return rtree_.leaves(); } @@ -1938,8 +1971,7 @@ TileMetadata FragmentMetadata::get_tile_metadata( unsigned dim_idx = 0; const NDRange* mbr = nullptr; if (is_dim) { - throw_if_not_ok( - array_schema_->domain().get_dimension_index(name, &dim_idx)); + dim_idx = array_schema_->domain().get_dimension_index(name); mbr = &rtree_.leaf(tile_idx); } @@ -2048,20 +2080,19 @@ void FragmentMetadata::load_rtree(const EncryptionKey& encryption_key) { } auto tile = read_generic_tile_from_file(encryption_key, gt_offsets_.rtree_); - resources_->stats().add_counter("read_rtree_size", tile.size()); + resources_->stats().add_counter("read_rtree_size", tile->size()); // Use the serialized buffer size to approximate memory usage of the rtree. if (memory_tracker_ != nullptr && - !memory_tracker_->take_memory( - tile.size(), MemoryTracker::MemoryType::RTREE)) { + !memory_tracker_->take_memory(tile->size(), MemoryType::RTREE)) { throw FragmentMetadataStatusException( "Cannot load R-tree; Insufficient memory budget; Needed " + - std::to_string(tile.size()) + " but only had " + + std::to_string(tile->size()) + " but only had " + std::to_string(memory_tracker_->get_memory_available()) + " from budget " + std::to_string(memory_tracker_->get_memory_budget())); } - Deserializer deserializer(tile.data(), tile.size()); + Deserializer deserializer(tile->data(), tile->size()); rtree_.deserialize(deserializer, &array_schema_->domain(), version_); loaded_metadata_.rtree_ = true; @@ -2070,7 +2101,7 @@ void FragmentMetadata::load_rtree(const EncryptionKey& encryption_key) { void FragmentMetadata::free_rtree() { auto freed = rtree_.free_memory(); if (memory_tracker_ != nullptr) { - memory_tracker_->release_memory(freed, MemoryTracker::MemoryType::RTREE); + memory_tracker_->release_memory(freed, MemoryType::RTREE); } loaded_metadata_.rtree_ = false; } @@ -2080,8 +2111,7 @@ void FragmentMetadata::free_tile_offsets() { std::lock_guard lock(tile_offsets_mtx_[i]); if (memory_tracker_ != nullptr) { memory_tracker_->release_memory( - tile_offsets_[i].size() * sizeof(uint64_t), - MemoryTracker::MemoryType::TILE_OFFSETS); + tile_offsets_[i].size() * sizeof(uint64_t), MemoryType::TILE_OFFSETS); } tile_offsets_[i].clear(); loaded_metadata_.tile_offsets_[i] = false; @@ -2092,7 +2122,7 @@ void FragmentMetadata::free_tile_offsets() { if (memory_tracker_ != nullptr) { memory_tracker_->release_memory( tile_var_offsets_[i].size() * sizeof(uint64_t), - MemoryTracker::MemoryType::TILE_OFFSETS); + MemoryType::TILE_OFFSETS); } tile_var_offsets_[i].clear(); loaded_metadata_.tile_var_offsets_[i] = false; @@ -2102,8 +2132,7 @@ void FragmentMetadata::free_tile_offsets() { std::lock_guard lock(tile_offsets_mtx_[i]); if (memory_tracker_ != nullptr) { memory_tracker_->release_memory( - tile_offsets_[i].size() * sizeof(uint64_t), - MemoryTracker::MemoryType::TILE_OFFSETS); + tile_offsets_[i].size() * sizeof(uint64_t), MemoryType::TILE_OFFSETS); } tile_offsets_[i].clear(); loaded_metadata_.tile_offsets_[i] = false; @@ -2114,7 +2143,7 @@ void FragmentMetadata::free_tile_offsets() { if (memory_tracker_ != nullptr) { memory_tracker_->release_memory( tile_validity_offsets_[i].size() * sizeof(uint64_t), - MemoryTracker::MemoryType::TILE_OFFSETS); + MemoryType::TILE_OFFSETS); } tile_validity_offsets_[i].clear(); loaded_metadata_.tile_validity_offsets_[i] = false; @@ -2125,7 +2154,7 @@ void FragmentMetadata::free_tile_offsets() { if (memory_tracker_ != nullptr) { memory_tracker_->release_memory( tile_var_sizes_[i].size() * sizeof(uint64_t), - MemoryTracker::MemoryType::TILE_OFFSETS); + MemoryType::TILE_OFFSETS); } tile_var_sizes_[i].clear(); loaded_metadata_.tile_var_sizes_[i] = false; @@ -2445,9 +2474,9 @@ void FragmentMetadata::load_tile_offsets( auto tile = read_generic_tile_from_file( encryption_key, gt_offsets_.tile_offsets_[idx]); - resources_->stats().add_counter("read_tile_offsets_size", tile.size()); + resources_->stats().add_counter("read_tile_offsets_size", tile->size()); - Deserializer deserializer(tile.data(), tile.size()); + Deserializer deserializer(tile->data(), tile->size()); load_tile_offsets(idx, deserializer); loaded_metadata_.tile_offsets_[idx] = true; @@ -2472,9 +2501,9 @@ void FragmentMetadata::load_tile_var_offsets( auto tile = read_generic_tile_from_file( encryption_key, gt_offsets_.tile_var_offsets_[idx]); - resources_->stats().add_counter("read_tile_var_offsets_size", tile.size()); + resources_->stats().add_counter("read_tile_var_offsets_size", tile->size()); - Deserializer deserializer(tile.data(), tile.size()); + Deserializer deserializer(tile->data(), tile->size()); load_tile_var_offsets(idx, deserializer); loaded_metadata_.tile_var_offsets_[idx] = true; @@ -2494,9 +2523,9 @@ void FragmentMetadata::load_tile_var_sizes( auto tile = read_generic_tile_from_file( encryption_key, gt_offsets_.tile_var_sizes_[idx]); - resources_->stats().add_counter("read_tile_var_sizes_size", tile.size()); + resources_->stats().add_counter("read_tile_var_sizes_size", tile->size()); - Deserializer deserializer(tile.data(), tile.size()); + Deserializer deserializer(tile->data(), tile->size()); load_tile_var_sizes(idx, deserializer); loaded_metadata_.tile_var_sizes_[idx] = true; @@ -2517,9 +2546,9 @@ void FragmentMetadata::load_tile_validity_offsets( auto tile = read_generic_tile_from_file( encryption_key, gt_offsets_.tile_validity_offsets_[idx]); resources_->stats().add_counter( - "read_tile_validity_offsets_size", tile.size()); + "read_tile_validity_offsets_size", tile->size()); - ConstBuffer cbuff(tile.data(), tile.size()); + ConstBuffer cbuff(tile->data(), tile->size()); load_tile_validity_offsets(idx, &cbuff); loaded_metadata_.tile_validity_offsets_[idx] = true; @@ -2539,9 +2568,9 @@ void FragmentMetadata::load_tile_min_values( auto tile = read_generic_tile_from_file( encryption_key, gt_offsets_.tile_min_offsets_[idx]); - resources_->stats().add_counter("read_tile_min_size", tile.size()); + resources_->stats().add_counter("read_tile_min_size", tile->size()); - Deserializer deserializer(tile.data(), tile.size()); + Deserializer deserializer(tile->data(), tile->size()); load_tile_min_values(idx, deserializer); loaded_metadata_.tile_min_[idx] = true; @@ -2561,9 +2590,9 @@ void FragmentMetadata::load_tile_max_values( auto tile = read_generic_tile_from_file( encryption_key, gt_offsets_.tile_max_offsets_[idx]); - resources_->stats().add_counter("read_tile_max_size", tile.size()); + resources_->stats().add_counter("read_tile_max_size", tile->size()); - Deserializer deserializer(tile.data(), tile.size()); + Deserializer deserializer(tile->data(), tile->size()); load_tile_max_values(idx, deserializer); loaded_metadata_.tile_max_[idx] = true; @@ -2583,9 +2612,9 @@ void FragmentMetadata::load_tile_sum_values( auto tile = read_generic_tile_from_file( encryption_key, gt_offsets_.tile_sum_offsets_[idx]); - resources_->stats().add_counter("read_tile_sum_size", tile.size()); + resources_->stats().add_counter("read_tile_sum_size", tile->size()); - Deserializer deserializer(tile.data(), tile.size()); + Deserializer deserializer(tile->data(), tile->size()); load_tile_sum_values(idx, deserializer); loaded_metadata_.tile_sum_[idx] = true; @@ -2605,9 +2634,9 @@ void FragmentMetadata::load_tile_null_count_values( auto tile = read_generic_tile_from_file( encryption_key, gt_offsets_.tile_null_count_offsets_[idx]); - resources_->stats().add_counter("read_tile_null_count_size", tile.size()); + resources_->stats().add_counter("read_tile_null_count_size", tile->size()); - Deserializer deserializer(tile.data(), tile.size()); + Deserializer deserializer(tile->data(), tile->size()); load_tile_null_count_values(idx, deserializer); loaded_metadata_.tile_null_count_[idx] = true; @@ -2891,8 +2920,7 @@ void FragmentMetadata::load_tile_offsets(Deserializer& deserializer) { auto size = tile_offsets_num * sizeof(uint64_t); if (memory_tracker_ != nullptr && - !memory_tracker_->take_memory( - size, MemoryTracker::MemoryType::TILE_OFFSETS)) { + !memory_tracker_->take_memory(size, MemoryType::TILE_OFFSETS)) { throw FragmentMetadataStatusException( "Cannot load tile offsets; Insufficient memory budget; Needed " + std::to_string(size) + " but only had " + @@ -2921,8 +2949,7 @@ void FragmentMetadata::load_tile_offsets( if (tile_offsets_num != 0) { auto size = tile_offsets_num * sizeof(uint64_t); if (memory_tracker_ != nullptr && - !memory_tracker_->take_memory( - size, MemoryTracker::MemoryType::TILE_OFFSETS)) { + !memory_tracker_->take_memory(size, MemoryType::TILE_OFFSETS)) { throw FragmentMetadataStatusException( "Cannot load tile offsets; Insufficient memory budget; Needed " + std::to_string(size) + " but only had " + @@ -2963,8 +2990,7 @@ void FragmentMetadata::load_tile_var_offsets(Deserializer& deserializer) { auto size = tile_var_offsets_num * sizeof(uint64_t); if (memory_tracker_ != nullptr && - !memory_tracker_->take_memory( - size, MemoryTracker::MemoryType::TILE_OFFSETS)) { + !memory_tracker_->take_memory(size, MemoryType::TILE_OFFSETS)) { throw FragmentMetadataStatusException( "Cannot load tile var offsets; Insufficient memory budget; " "Needed " + @@ -2994,8 +3020,7 @@ void FragmentMetadata::load_tile_var_offsets( if (tile_var_offsets_num != 0) { auto size = tile_var_offsets_num * sizeof(uint64_t); if (memory_tracker_ != nullptr && - !memory_tracker_->take_memory( - size, MemoryTracker::MemoryType::TILE_OFFSETS)) { + !memory_tracker_->take_memory(size, MemoryType::TILE_OFFSETS)) { throw FragmentMetadataStatusException( "Cannot load tile var offsets; Insufficient memory budget; " "Needed " + @@ -3034,8 +3059,7 @@ void FragmentMetadata::load_tile_var_sizes(Deserializer& deserializer) { auto size = tile_var_sizes_num * sizeof(uint64_t); if (memory_tracker_ != nullptr && - !memory_tracker_->take_memory( - size, MemoryTracker::MemoryType::TILE_OFFSETS)) { + !memory_tracker_->take_memory(size, MemoryType::TILE_OFFSETS)) { throw FragmentMetadataStatusException( "Cannot load tile var sizes; Insufficient memory budget; " "Needed " + @@ -3064,8 +3088,7 @@ void FragmentMetadata::load_tile_var_sizes( if (tile_var_sizes_num != 0) { auto size = tile_var_sizes_num * sizeof(uint64_t); if (memory_tracker_ != nullptr && - !memory_tracker_->take_memory( - size, MemoryTracker::MemoryType::TILE_OFFSETS)) { + !memory_tracker_->take_memory(size, MemoryType::TILE_OFFSETS)) { throw FragmentMetadataStatusException( "Cannot load tile var sizes; Insufficient memory budget; " "Needed " + @@ -3095,8 +3118,7 @@ void FragmentMetadata::load_tile_validity_offsets( if (tile_validity_offsets_num != 0) { auto size = tile_validity_offsets_num * sizeof(uint64_t); if (memory_tracker_ != nullptr && - !memory_tracker_->take_memory( - size, MemoryTracker::MemoryType::TILE_OFFSETS)) { + !memory_tracker_->take_memory(size, MemoryType::TILE_OFFSETS)) { throw FragmentMetadataStatusException( "Cannot load tile validity offsets; Insufficient memory budget; " "Needed " + @@ -3140,8 +3162,7 @@ void FragmentMetadata::load_tile_min_values( if (buffer_size != 0) { auto size = buffer_size + var_buffer_size; if (memory_tracker_ != nullptr && - !memory_tracker_->take_memory( - size, MemoryTracker::MemoryType::MIN_MAX_SUM_NULL_COUNT)) { + !memory_tracker_->take_memory(size, MemoryType::TILE_MIN_VALS)) { throw FragmentMetadataStatusException( "Cannot load min values; Insufficient memory budget; Needed " + std::to_string(size) + " but only had " + @@ -3185,8 +3206,7 @@ void FragmentMetadata::load_tile_max_values( if (buffer_size != 0) { auto size = buffer_size + var_buffer_size; if (memory_tracker_ != nullptr && - !memory_tracker_->take_memory( - size, MemoryTracker::MemoryType::MIN_MAX_SUM_NULL_COUNT)) { + !memory_tracker_->take_memory(size, MemoryType::TILE_MAX_VALS)) { throw FragmentMetadataStatusException( "Cannot load max values; Insufficient memory budget; Needed " + std::to_string(size) + " but only had " + @@ -3224,8 +3244,7 @@ void FragmentMetadata::load_tile_sum_values( if (tile_sum_num != 0) { auto size = tile_sum_num * sizeof(uint64_t); if (memory_tracker_ != nullptr && - !memory_tracker_->take_memory( - size, MemoryTracker::MemoryType::MIN_MAX_SUM_NULL_COUNT)) { + !memory_tracker_->take_memory(size, MemoryType::TILE_SUMS)) { throw FragmentMetadataStatusException( "Cannot load sum values; Insufficient memory budget; Needed " + std::to_string(size) + " but only had " + @@ -3257,8 +3276,7 @@ void FragmentMetadata::load_tile_null_count_values( if (tile_null_count_num != 0) { auto size = tile_null_count_num * sizeof(uint64_t); if (memory_tracker_ != nullptr && - !memory_tracker_->take_memory( - size, MemoryTracker::MemoryType::MIN_MAX_SUM_NULL_COUNT)) { + !memory_tracker_->take_memory(size, MemoryType::TILE_NULL_COUNTS)) { throw FragmentMetadataStatusException( "Cannot load null count values; Insufficient memory budget; " "Needed " + @@ -3586,9 +3604,10 @@ void FragmentMetadata::load_v1_v2( std::string(constants::fragment_metadata_filename)); // Read metadata GenericTileIO tile_io(*resources_, fragment_metadata_uri); - auto tile = tile_io.read_generic(0, encryption_key, resources_->config()); + auto tile = tile_io.read_generic( + 0, encryption_key, resources_->config(), memory_tracker_); - resources_->stats().add_counter("read_frag_meta_size", tile.size()); + resources_->stats().add_counter("read_frag_meta_size", tile->size()); // Pre-v10 format fragments we need to set the schema and schema name to // the "old" schema. This way "old" fragments are still loaded fine @@ -3604,7 +3623,7 @@ void FragmentMetadata::load_v1_v2( } // Deserialize - Deserializer deserializer(tile.data(), tile.size()); + Deserializer deserializer(tile->data(), tile->size()); load_version(deserializer); load_non_empty_domain(deserializer); load_mbrs(deserializer); @@ -3865,14 +3884,15 @@ void FragmentMetadata::store_rtree( resources_->stats().add_counter("write_rtree_size", *nbytes); } -WriterTile FragmentMetadata::write_rtree() { +shared_ptr FragmentMetadata::write_rtree() { rtree_.build_tree(); SizeComputationSerializer size_computation_serializer; rtree_.serialize(size_computation_serializer); - WriterTile tile{WriterTile::from_generic(size_computation_serializer.size())}; + auto tile{WriterTile::from_generic( + size_computation_serializer.size(), memory_tracker_)}; - Serializer serializer(tile.data(), tile.size()); + Serializer serializer(tile->data(), tile->size()); rtree_.serialize(serializer); return tile; @@ -3919,14 +3939,15 @@ void FragmentMetadata::write_non_empty_domain(Serializer& serializer) const { } } -Tile FragmentMetadata::read_generic_tile_from_file( +shared_ptr FragmentMetadata::read_generic_tile_from_file( const EncryptionKey& encryption_key, uint64_t offset) const { URI fragment_metadata_uri = fragment_uri_.join_path( std::string(constants::fragment_metadata_filename)); // Read metadata GenericTileIO tile_io(*resources_, fragment_metadata_uri); - return tile_io.read_generic(offset, encryption_key, resources_->config()); + return tile_io.read_generic( + offset, encryption_key, resources_->config(), memory_tracker_); } void FragmentMetadata::read_file_footer( @@ -3939,13 +3960,12 @@ void FragmentMetadata::read_file_footer( // Get footer offset get_footer_offset_and_size(footer_offset, footer_size); - tile = make_shared(HERE(), Tile::from_generic(*footer_size)); + tile = Tile::from_generic(*footer_size, memory_tracker_); resources_->stats().add_counter("read_frag_meta_size", *footer_size); if (memory_tracker_ != nullptr && - !memory_tracker_->take_memory( - *footer_size, MemoryTracker::MemoryType::FOOTER)) { + !memory_tracker_->take_memory(*footer_size, MemoryType::FOOTER)) { throw FragmentMetadataStatusException( "Cannot load file footer; Insufficient memory budget; Needed " + std::to_string(*footer_size) + " but only had " + @@ -3963,22 +3983,22 @@ void FragmentMetadata::read_file_footer( void FragmentMetadata::write_generic_tile_to_file( const EncryptionKey& encryption_key, - WriterTile& tile, + shared_ptr tile, uint64_t* nbytes) const { URI fragment_metadata_uri = fragment_uri_.join_path( std::string(constants::fragment_metadata_filename)); GenericTileIO tile_io(*resources_, fragment_metadata_uri); - tile_io.write_generic(&tile, encryption_key, nbytes); + tile_io.write_generic(tile, encryption_key, nbytes); } -void FragmentMetadata::write_footer_to_file(WriterTile& tile) const { +void FragmentMetadata::write_footer_to_file(shared_ptr tile) const { URI fragment_metadata_uri = fragment_uri_.join_path( std::string(constants::fragment_metadata_filename)); - uint64_t size = tile.size(); - throw_if_not_ok( - resources_->vfs().write(fragment_metadata_uri, tile.data(), tile.size())); + uint64_t size = tile->size(); + throw_if_not_ok(resources_->vfs().write( + fragment_metadata_uri, tile->data(), tile->size())); // Write the size in the end if there is at least one var-sized dimension if (!array_schema_->domain().all_dims_fixed() || version_ >= 10) { @@ -3992,9 +4012,10 @@ void FragmentMetadata::store_tile_offsets( SizeComputationSerializer size_computation_serializer; write_tile_offsets(idx, size_computation_serializer); - WriterTile tile{WriterTile::from_generic(size_computation_serializer.size())}; + auto tile{WriterTile::from_generic( + size_computation_serializer.size(), memory_tracker_)}; - Serializer serializer(tile.data(), tile.size()); + Serializer serializer(tile->data(), tile->size()); write_tile_offsets(idx, serializer); write_generic_tile_to_file(encryption_key, tile, nbytes); @@ -4019,9 +4040,10 @@ void FragmentMetadata::store_tile_var_offsets( SizeComputationSerializer size_computation_serializer; write_tile_var_offsets(idx, size_computation_serializer); - WriterTile tile{WriterTile::from_generic(size_computation_serializer.size())}; + auto tile{WriterTile::from_generic( + size_computation_serializer.size(), memory_tracker_)}; - Serializer serializer(tile.data(), tile.size()); + Serializer serializer(tile->data(), tile->size()); write_tile_var_offsets(idx, serializer); write_generic_tile_to_file(encryption_key, tile, nbytes); @@ -4047,9 +4069,10 @@ void FragmentMetadata::store_tile_var_sizes( SizeComputationSerializer size_computation_serializer; write_tile_var_sizes(idx, size_computation_serializer); - WriterTile tile{WriterTile::from_generic(size_computation_serializer.size())}; + auto tile{WriterTile::from_generic( + size_computation_serializer.size(), memory_tracker_)}; - Serializer serializer(tile.data(), tile.size()); + Serializer serializer(tile->data(), tile->size()); write_tile_var_sizes(idx, serializer); write_generic_tile_to_file(encryption_key, tile, nbytes); @@ -4074,9 +4097,10 @@ void FragmentMetadata::store_tile_validity_offsets( SizeComputationSerializer size_computation_serializer; write_tile_validity_offsets(idx, size_computation_serializer); - WriterTile tile{WriterTile::from_generic(size_computation_serializer.size())}; + auto tile{WriterTile::from_generic( + size_computation_serializer.size(), memory_tracker_)}; - Serializer serializer(tile.data(), tile.size()); + Serializer serializer(tile->data(), tile->size()); write_tile_validity_offsets(idx, serializer); write_generic_tile_to_file(encryption_key, tile, nbytes); @@ -4102,9 +4126,10 @@ void FragmentMetadata::store_tile_mins( SizeComputationSerializer size_computation_serializer; write_tile_mins(idx, size_computation_serializer); - WriterTile tile{WriterTile::from_generic(size_computation_serializer.size())}; + auto tile{WriterTile::from_generic( + size_computation_serializer.size(), memory_tracker_)}; - Serializer serializer(tile.data(), tile.size()); + Serializer serializer(tile->data(), tile->size()); write_tile_mins(idx, serializer); write_generic_tile_to_file(encryption_key, tile, nbytes); @@ -4136,9 +4161,10 @@ void FragmentMetadata::store_tile_maxs( SizeComputationSerializer size_computation_serializer; write_tile_maxs(idx, size_computation_serializer); - WriterTile tile{WriterTile::from_generic(size_computation_serializer.size())}; + auto tile{WriterTile::from_generic( + size_computation_serializer.size(), memory_tracker_)}; - Serializer serializer(tile.data(), tile.size()); + Serializer serializer(tile->data(), tile->size()); write_tile_maxs(idx, serializer); write_generic_tile_to_file(encryption_key, tile, nbytes); @@ -4170,9 +4196,10 @@ void FragmentMetadata::store_tile_sums( SizeComputationSerializer size_computation_serializer; write_tile_sums(idx, size_computation_serializer); - WriterTile tile{WriterTile::from_generic(size_computation_serializer.size())}; + auto tile{WriterTile::from_generic( + size_computation_serializer.size(), memory_tracker_)}; - Serializer serializer(tile.data(), tile.size()); + Serializer serializer(tile->data(), tile->size()); write_tile_sums(idx, serializer); write_generic_tile_to_file(encryption_key, tile, nbytes); @@ -4195,9 +4222,10 @@ void FragmentMetadata::store_tile_null_counts( SizeComputationSerializer size_computation_serializer; write_tile_null_counts(idx, size_computation_serializer); - WriterTile tile{WriterTile::from_generic(size_computation_serializer.size())}; + auto tile{WriterTile::from_generic( + size_computation_serializer.size(), memory_tracker_)}; - Serializer serializer(tile.data(), tile.size()); + Serializer serializer(tile->data(), tile->size()); write_tile_null_counts(idx, serializer); write_generic_tile_to_file(encryption_key, tile, nbytes); @@ -4247,9 +4275,10 @@ void FragmentMetadata::store_fragment_min_max_sum_null_count( SizeComputationSerializer size_computation_serializer; serialize_data(size_computation_serializer); - WriterTile tile{WriterTile::from_generic(size_computation_serializer.size())}; + auto tile{WriterTile::from_generic( + size_computation_serializer.size(), memory_tracker_)}; - Serializer serializer(tile.data(), tile.size()); + Serializer serializer(tile->data(), tile->size()); serialize_data(serializer); write_generic_tile_to_file(encryption_key, tile, nbytes); @@ -4273,9 +4302,10 @@ void FragmentMetadata::store_processed_conditions( SizeComputationSerializer size_computation_serializer; serialize_processed_conditions(size_computation_serializer); - WriterTile tile{WriterTile::from_generic(size_computation_serializer.size())}; + auto tile{WriterTile::from_generic( + size_computation_serializer.size(), memory_tracker_)}; - Serializer serializer(tile.data(), tile.size()); + Serializer serializer(tile->data(), tile->size()); serialize_processed_conditions(serializer); write_generic_tile_to_file(encryption_key, tile, nbytes); @@ -4602,13 +4632,14 @@ void FragmentMetadata::write_has_delete_meta(Serializer& serializer) const { void FragmentMetadata::store_footer(const EncryptionKey&) { SizeComputationSerializer size_computation_serializer; write_footer(size_computation_serializer); - WriterTile tile{WriterTile::from_generic(size_computation_serializer.size())}; + auto tile{WriterTile::from_generic( + size_computation_serializer.size(), memory_tracker_)}; - Serializer serializer(tile.data(), tile.size()); + Serializer serializer(tile->data(), tile->size()); write_footer(serializer); write_footer_to_file(tile); - resources_->stats().add_counter("write_frag_meta_footer_size", tile.size()); + resources_->stats().add_counter("write_frag_meta_footer_size", tile->size()); } void FragmentMetadata::resize_tile_offsets_vectors(uint64_t size) { @@ -4643,7 +4674,7 @@ const shared_ptr& FragmentMetadata::array_schema() const { void FragmentMetadata::build_idx_map() { idx_map_.clear(); - auto attributes = array_schema_->attributes(); + auto& attributes = array_schema_->attributes(); for (unsigned i = 0; i < attributes.size(); ++i) { auto attr_name = attributes[i]->name(); idx_map_[attr_name] = i; diff --git a/tiledb/sm/fragment/fragment_metadata.h b/tiledb/sm/fragment/fragment_metadata.h index 2ef0ab30d416..f23dd1950623 100644 --- a/tiledb/sm/fragment/fragment_metadata.h +++ b/tiledb/sm/fragment/fragment_metadata.h @@ -40,6 +40,7 @@ #include #include "tiledb/common/common.h" +#include "tiledb/common/pmr.h" #include "tiledb/sm/array_schema/array_schema.h" #include "tiledb/sm/filesystem/uri.h" #include "tiledb/sm/misc/types.h" @@ -69,8 +70,15 @@ class FragmentMetadata { /* CONSTRUCTORS & DESTRUCTORS */ /* ********************************* */ - /** Constructor. */ - FragmentMetadata(); + /** + * Constructor. + * + * @param resources A context resources instance. + * @param memory_tracker The memory tracker of the array this fragment + * metadata corresponds to. + */ + FragmentMetadata( + ContextResources* resources, shared_ptr memory_tracker); /** * Constructor. @@ -83,16 +91,17 @@ class FragmentMetadata { * @param timestamp_range The timestamp range of the fragment. * In TileDB, timestamps are in ms elapsed since * 1970-01-01 00:00:00 +0000 (UTC). + * @param memory_tracker Memory tracker for the fragment metadata. * @param dense Indicates whether the fragment is dense or sparse. * @param has_timestamps Does the fragment contains timestamps. * @param has_delete_meta Does the fragment contains delete metadata. */ FragmentMetadata( ContextResources* resources, - MemoryTracker* memory_tracker, const shared_ptr& array_schema, const URI& fragment_uri, const std::pair& timestamp_range, + shared_ptr memory_tracker, bool dense = true, bool has_timestamps = false, bool has_delete_mata = false); @@ -318,53 +327,61 @@ class FragmentMetadata { } /** Returns the tile offsets. */ - inline const std::vector>& tile_offsets() const { + inline const tdb::pmr::vector>& tile_offsets() + const { return tile_offsets_; } /** Returns the variable tile offsets. */ - inline const std::vector>& tile_var_offsets() const { + inline const tdb::pmr::vector>& tile_var_offsets() + const { return tile_var_offsets_; } /** Returns the sizes of the uncompressed variable tiles. */ - inline const std::vector>& tile_var_sizes() const { + inline const tdb::pmr::vector>& tile_var_sizes() + const { return tile_var_sizes_; } /** Returns the validity tile offsets. */ - inline const std::vector>& tile_validity_offsets() - const { + inline const tdb::pmr::vector>& + tile_validity_offsets() const { return tile_validity_offsets_; } /** Returns the tile min buffers. */ - inline const std::vector>& tile_min_buffer() const { + inline const tdb::pmr::vector>& tile_min_buffer() + const { return tile_min_buffer_; } /** Returns the tile min buffers variable length data. */ - inline const std::vector>& tile_min_var_buffer() const { + inline const tdb::pmr::vector>& tile_min_var_buffer() + const { return tile_min_var_buffer_; } /** Returns the tile max buffers. */ - inline const std::vector>& tile_max_buffer() const { + inline const tdb::pmr::vector>& tile_max_buffer() + const { return tile_max_buffer_; } /** Returns the tile max buffers variable length data. */ - inline const std::vector>& tile_max_var_buffer() const { + inline const tdb::pmr::vector>& tile_max_var_buffer() + const { return tile_max_var_buffer_; } /** Returns the tile sum values for fixed sized data. */ - inline const std::vector>& tile_sums() const { + inline const tdb::pmr::vector>& tile_sums() const { return tile_sums_; } /** Returns the tile null count values for attributes/dimensions. */ - inline const std::vector>& tile_null_counts() const { + inline const tdb::pmr::vector>& tile_null_counts() + const { return tile_null_counts_; } @@ -480,7 +497,7 @@ class FragmentMetadata { */ static std::vector> load( ContextResources& resources, - MemoryTracker* memory_tracker, + shared_ptr memory_tracker, const shared_ptr array_schema, const std::unordered_map>& array_schemas_all, @@ -767,7 +784,7 @@ class FragmentMetadata { const NDRange& mbr(uint64_t tile_idx) const; /** Returns all the MBRs of all tiles in the fragment. */ - const std::vector& mbrs() const; + const tdb::pmr::vector& mbrs() const; /** * Retrieves the size of the tile when it is persisted (e.g. the size of the @@ -1104,7 +1121,7 @@ class FragmentMetadata { } /** tile_offsets accessor */ - std::vector>& tile_offsets() { + tdb::pmr::vector>& tile_offsets() { return tile_offsets_; } @@ -1114,7 +1131,7 @@ class FragmentMetadata { } /** tile_var_offsets accessor */ - std::vector>& tile_var_offsets() { + tdb::pmr::vector>& tile_var_offsets() { return tile_var_offsets_; } @@ -1124,42 +1141,42 @@ class FragmentMetadata { } /** tile_var_sizes accessor */ - std::vector>& tile_var_sizes() { + tdb::pmr::vector>& tile_var_sizes() { return tile_var_sizes_; } /** tile_validity_offsets accessor */ - std::vector>& tile_validity_offsets() { + tdb::pmr::vector>& tile_validity_offsets() { return tile_validity_offsets_; } /** tile_min_buffer accessor */ - std::vector>& tile_min_buffer() { + tdb::pmr::vector>& tile_min_buffer() { return tile_min_buffer_; } /** tile_min_var_buffer accessor */ - std::vector>& tile_min_var_buffer() { + tdb::pmr::vector>& tile_min_var_buffer() { return tile_min_var_buffer_; } /** tile_max_buffer accessor */ - std::vector>& tile_max_buffer() { + tdb::pmr::vector>& tile_max_buffer() { return tile_max_buffer_; } /** tile_max_var_buffer accessor */ - std::vector>& tile_max_var_buffer() { + tdb::pmr::vector>& tile_max_var_buffer() { return tile_max_var_buffer_; } /** tile_sums accessor */ - std::vector>& tile_sums() { + tdb::pmr::vector>& tile_sums() { return tile_sums_; } /** tile_null_counts accessor */ - std::vector>& tile_null_counts() { + tdb::pmr::vector>& tile_null_counts() { return tile_null_counts_; } @@ -1218,11 +1235,6 @@ class FragmentMetadata { resources_ = cr; } - /** set the memory tracker pointer during deserialization*/ - void set_memory_tracker(MemoryTracker* memory_tracker) { - memory_tracker_ = memory_tracker; - } - /** loaded_metadata_.rtree_ accessor */ void set_rtree_loaded() { loaded_metadata_.rtree_ = true; @@ -1264,7 +1276,7 @@ class FragmentMetadata { /** * The memory tracker of the array this fragment metadata corresponds to. */ - MemoryTracker* memory_tracker_; + shared_ptr memory_tracker_; /** The array schema */ shared_ptr array_schema_; @@ -1357,57 +1369,57 @@ class FragmentMetadata { * The tile offsets in their corresponding attribute files. Meaningful only * when there is compression. */ - std::vector> tile_offsets_; + tdb::pmr::vector> tile_offsets_; /** * The variable tile offsets in their corresponding attribute files. * Meaningful only for variable-sized tiles. */ - std::vector> tile_var_offsets_; + tdb::pmr::vector> tile_var_offsets_; /** * The sizes of the uncompressed variable tiles. * Meaningful only when there is compression for variable tiles. */ - std::vector> tile_var_sizes_; + tdb::pmr::vector> tile_var_sizes_; /** * The validity tile offsets in their corresponding attribute files. * Meaningful only when there is compression. */ - std::vector> tile_validity_offsets_; + tdb::pmr::vector> tile_validity_offsets_; /** * The tile min buffers, for variable attributes/dimensions, this will store * offsets. */ - std::vector> tile_min_buffer_; + tdb::pmr::vector> tile_min_buffer_; /** * The tile min buffers variable length data. */ - std::vector> tile_min_var_buffer_; + tdb::pmr::vector> tile_min_var_buffer_; /** * The tile max buffers, for variable attributes/dimensions, this will store * offsets. */ - std::vector> tile_max_buffer_; + tdb::pmr::vector> tile_max_buffer_; /** * The tile max buffers variable length data. */ - std::vector> tile_max_var_buffer_; + tdb::pmr::vector> tile_max_var_buffer_; /** * The tile sum values, ignored for var sized attributes/dimensions. */ - std::vector> tile_sums_; + tdb::pmr::vector> tile_sums_; /** * The tile null count values for attributes/dimensions. */ - std::vector> tile_null_counts_; + tdb::pmr::vector> tile_null_counts_; /** * Fragment min values. @@ -1838,7 +1850,7 @@ class FragmentMetadata { void store_footer(const EncryptionKey& encryption_key); /** Writes the R-tree to a tile. */ - WriterTile write_rtree(); + shared_ptr write_rtree(); /** Writes the non-empty domain to the input buffer. */ void write_non_empty_domain(Serializer& serializer) const; @@ -2026,7 +2038,7 @@ class FragmentMetadata { * Reads the contents of a generic tile starting at the input offset, * and returns a tile. */ - Tile read_generic_tile_from_file( + shared_ptr read_generic_tile_from_file( const EncryptionKey& encryption_key, uint64_t offset) const; /** @@ -2048,7 +2060,7 @@ class FragmentMetadata { */ void write_generic_tile_to_file( const EncryptionKey& encryption_key, - WriterTile& tile, + shared_ptr tile, uint64_t* nbytes) const; /** @@ -2057,7 +2069,7 @@ class FragmentMetadata { * retrieval upon reading (as its size is predictable based on the * number of attributes). */ - void write_footer_to_file(WriterTile&) const; + void write_footer_to_file(shared_ptr) const; /** * Simple clean up function called in the case of error. It removes the diff --git a/tiledb/sm/group/group.cc b/tiledb/sm/group/group.cc index aa5a10edb51f..4aea136fb6ac 100644 --- a/tiledb/sm/group/group.cc +++ b/tiledb/sm/group/group.cc @@ -5,7 +5,7 @@ * * The MIT License * - * @copyright Copyright (c) 2023 TileDB, Inc. + * @copyright Copyright (c) 2023-2024 TileDB, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -33,6 +33,8 @@ #include "tiledb/sm/group/group.h" #include "tiledb/common/common.h" #include "tiledb/common/logger.h" +#include "tiledb/common/memory_tracker.h" +#include "tiledb/common/stdx_string.h" #include "tiledb/sm/enums/datatype.h" #include "tiledb/sm/enums/encryption_type.h" #include "tiledb/sm/enums/query_type.h" @@ -64,10 +66,12 @@ Group::Group( ContextResources& resources, const URI& group_uri, StorageManager* storage_manager) - : group_uri_(group_uri) + : memory_tracker_(resources.create_memory_tracker()) + , group_uri_(group_uri) , storage_manager_(storage_manager) , config_(storage_manager_->config()) , remote_(group_uri.is_tiledb()) + , metadata_(memory_tracker_) , metadata_loaded_(false) , is_open_(false) , query_type_(QueryType::READ) @@ -75,6 +79,7 @@ Group::Group( , timestamp_end_(UINT64_MAX) , encryption_key_(tdb::make_shared(HERE())) , resources_(resources) { + memory_tracker_->set_type(MemoryTrackerType::GROUP); } Status Group::open( @@ -470,22 +475,16 @@ std::optional Group::metadata_type(const char* key) { return metadata_.metadata_type(key); } -Metadata* Group::unsafe_metadata() { - return &metadata_; -} - -const Metadata* Group::metadata() const { - return &metadata_; -} - -Status Group::metadata(Metadata** metadata) { +Metadata* Group::metadata() { // Load group metadata, if not loaded yet if (!metadata_loaded_) load_metadata(); - *metadata = &metadata_; + return &metadata_; +} - return Status::Ok(); +Metadata* Group::unsafe_metadata() { + return &metadata_; } void Group::set_metadata_loaded(const bool metadata_loaded) { @@ -712,23 +711,17 @@ void Group::load_metadata() { rest_client->post_group_metadata_from_rest(group_uri_, this)); } else { assert(group_dir_->loaded()); - load_metadata_from_storage(group_dir_, *encryption_key_, &metadata_); + load_metadata_from_storage(group_dir_, *encryption_key_); } metadata_loaded_ = true; } void Group::load_metadata_from_storage( const shared_ptr& group_dir, - const EncryptionKey& encryption_key, - Metadata* metadata) { + const EncryptionKey& encryption_key) { [[maybe_unused]] auto timer_se = resources_.stats().start_timer("group_load_metadata_from_storage"); - // Special case - if (metadata == nullptr) { - return; - } - // Determine which group metadata to load const auto& group_metadata_to_load = group_dir->group_meta_uris(); @@ -739,8 +732,12 @@ void Group::load_metadata_from_storage( parallel_for(&resources_.compute_tp(), 0, metadata_num, [&](size_t m) { const auto& uri = group_metadata_to_load[m].uri_; - auto&& tile = GenericTileIO::load(resources_, uri, 0, encryption_key); - metadata_tiles[m] = tdb::make_shared(HERE(), std::move(tile)); + metadata_tiles[m] = GenericTileIO::load( + resources_, + uri, + 0, + encryption_key, + storage_manager_->resources().ephemeral_memory_tracker()); return Status::Ok(); })); @@ -753,8 +750,8 @@ void Group::load_metadata_from_storage( resources_.stats().add_counter("group_read_group_meta_size", meta_size); // Copy the deserialized metadata into the original Metadata object - *metadata = Metadata::deserialize(metadata_tiles); - metadata->set_loaded_metadata_uris(group_metadata_to_load); + metadata_ = Metadata::deserialize(metadata_tiles); + metadata_.set_loaded_metadata_uris(group_metadata_to_load); } void Group::group_open_for_reads() { @@ -775,8 +772,12 @@ void Group::load_group_details() { // V1 groups did not have the version appended so only have 4 "_" // (____) + // Since 2.19, V1 groups also have the version appended so we have + // to check for that as well auto part = latest_group_uri.last_path_part(); - if (std::count(part.begin(), part.end(), '_') == 4) { + auto underscoreCount = std::count(part.begin(), part.end(), '_'); + if (underscoreCount == 4 || + (underscoreCount == 5 && utils::parse::ends_with(part, "_1"))) { load_group_from_uri(latest_group_uri); return; } @@ -790,12 +791,17 @@ void Group::load_group_from_uri(const URI& uri) { [[maybe_unused]] auto timer_se = resources_.stats().start_timer("load_group_from_uri"); - auto&& tile = GenericTileIO::load(resources_, uri, 0, *encryption_key()); + auto tile = GenericTileIO::load( + resources_, + uri, + 0, + *encryption_key(), + storage_manager_->resources().ephemeral_memory_tracker()); - resources_.stats().add_counter("read_group_size", tile.size()); + resources_.stats().add_counter("read_group_size", tile->size()); // Deserialize - Deserializer deserializer(tile.data(), tile.size()); + Deserializer deserializer(tile->data(), tile->size()); auto opt_group = GroupDetails::deserialize(deserializer, group_directory()->uri()); @@ -810,14 +816,18 @@ void Group::load_group_from_all_uris(const std::vector& uris) { std::vector> deserializers; for (auto& uri : uris) { - auto&& tile = - GenericTileIO::load(resources_, uri.uri_, 0, *encryption_key()); + auto tile = GenericTileIO::load( + resources_, + uri.uri_, + 0, + *encryption_key(), + storage_manager_->resources().ephemeral_memory_tracker()); - resources_.stats().add_counter("read_group_size", tile.size()); + resources_.stats().add_counter("read_group_size", tile->size()); // Deserialize shared_ptr deserializer = - tdb::make_shared(HERE(), std::move(tile)); + tdb::make_shared(HERE(), tile); deserializers.emplace_back(deserializer); } diff --git a/tiledb/sm/group/group.h b/tiledb/sm/group/group.h index 0cb415405372..2dfc5aab22fa 100644 --- a/tiledb/sm/group/group.h +++ b/tiledb/sm/group/group.h @@ -5,7 +5,7 @@ * * The MIT License * - * @copyright Copyright (c) 2023 TileDB, Inc. + * @copyright Copyright (c) 2023-2024 TileDB, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -195,7 +195,7 @@ class Group { std::optional metadata_type(const char* key); /** Retrieves the group metadata object. */ - Status metadata(Metadata** metadata); + Metadata* metadata(); /** * Retrieves the group metadata object. @@ -208,7 +208,6 @@ class Group { * REST. A lock should already by taken before load_metadata is called. */ Metadata* unsafe_metadata(); - const Metadata* metadata() const; /** * Set metadata loaded @@ -386,6 +385,9 @@ class Group { /* ********************************* */ /* PROTECTED ATTRIBUTES */ /* ********************************* */ + /** Memory tracker for the group. */ + shared_ptr memory_tracker_; + /** The group URI. */ URI group_uri_; @@ -460,8 +462,7 @@ class Group { */ void load_metadata_from_storage( const shared_ptr& group_dir, - const EncryptionKey& encryption_key, - Metadata* metadata); + const EncryptionKey& encryption_key); /** Opens an group for reads. */ void group_open_for_reads(); diff --git a/tiledb/sm/metadata/metadata.cc b/tiledb/sm/metadata/metadata.cc index fbce14b864de..d5b585147e6a 100644 --- a/tiledb/sm/metadata/metadata.cc +++ b/tiledb/sm/metadata/metadata.cc @@ -5,7 +5,7 @@ * * The MIT License * - * @copyright Copyright (c) 2017-2023 TileDB, Inc. + * @copyright Copyright (c) 2017-2024 TileDB, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -33,6 +33,7 @@ #include "tiledb/sm/metadata/metadata.h" #include "tiledb/common/exception/exception.h" #include "tiledb/common/logger.h" +#include "tiledb/common/memory_tracker.h" #include "tiledb/sm/buffer/buffer.h" #include "tiledb/sm/enums/datatype.h" #include "tiledb/sm/misc/tdb_time.h" @@ -55,43 +56,51 @@ class MetadataException : public StatusException { /* ********************************* */ /* CONSTRUCTORS & DESTRUCTORS */ /* ********************************* */ - -Metadata::Metadata() - : Metadata(std::map()) { -} - -Metadata::Metadata(const std::map& metadata_map) - : metadata_map_(metadata_map) +Metadata::Metadata(shared_ptr memory_tracker) + : memory_tracker_(memory_tracker) + , metadata_map_(memory_tracker_->get_resource(MemoryType::METADATA)) + , metadata_index_(memory_tracker_->get_resource(MemoryType::METADATA)) , timestamp_range_([]() -> std::pair { auto t = utils::time::timestamp_now_ms(); return std::make_pair(t, t); - }()) { + }()) + , loaded_metadata_uris_( + memory_tracker_->get_resource(MemoryType::METADATA)) { build_metadata_index(); } -Metadata::Metadata(const Metadata& rhs) - : metadata_map_(rhs.metadata_map_) - , timestamp_range_(rhs.timestamp_range_) - , loaded_metadata_uris_(rhs.loaded_metadata_uris_) - , uri_(rhs.uri_) { - if (!rhs.metadata_index_.empty()) - build_metadata_index(); -} +/* ********************************* */ +/* API */ +/* ********************************* */ + +Metadata& Metadata::operator=(Metadata& other) { + clear(); + for (auto& [k, v] : other.metadata_map_) { + metadata_map_.emplace(k, v); + } -Metadata& Metadata::operator=(const Metadata& other) { - metadata_map_ = other.metadata_map_; timestamp_range_ = other.timestamp_range_; - loaded_metadata_uris_ = other.loaded_metadata_uris_; - uri_ = other.uri_; + + for (auto& uri : other.loaded_metadata_uris_) { + loaded_metadata_uris_.emplace_back(uri); + } + build_metadata_index(); + return *this; } -Metadata::~Metadata() = default; +Metadata& Metadata::operator=( + std::map&& md_map) { + clear(); + for (auto& [k, v] : md_map) { + metadata_map_.emplace(k, v); + } -/* ********************************* */ -/* API */ -/* ********************************* */ + build_metadata_index(); + + return *this; +} void Metadata::clear() { metadata_map_.clear(); @@ -115,14 +124,13 @@ void Metadata::generate_uri(const URI& array_uri) { .join_path(ts_name); } -Metadata Metadata::deserialize( +std::map Metadata::deserialize( const std::vector>& metadata_tiles) { if (metadata_tiles.empty()) { - return Metadata(); + return {}; } - std::map metadata_map; - Status st; + std::map metadata_map; uint32_t key_len; char del; size_t value_len; @@ -157,7 +165,7 @@ Metadata Metadata::deserialize( } } - return Metadata(metadata_map); + return metadata_map; } void Metadata::serialize(Serializer& serializer) const { @@ -313,17 +321,10 @@ void Metadata::set_loaded_metadata_uris( timestamp_range_.second = loaded_metadata_uris.back().timestamp_range_.second; } -const std::vector& Metadata::loaded_metadata_uris() const { +const tdb::pmr::vector& Metadata::loaded_metadata_uris() const { return loaded_metadata_uris_; } -void Metadata::swap(Metadata* metadata) { - std::swap(metadata_map_, metadata->metadata_map_); - std::swap(metadata_index_, metadata->metadata_index_); - std::swap(timestamp_range_, metadata->timestamp_range_); - std::swap(loaded_metadata_uris_, metadata->loaded_metadata_uris_); -} - void Metadata::reset(uint64_t timestamp) { clear(); timestamp = (timestamp != 0) ? timestamp : utils::time::timestamp_now_ms(); diff --git a/tiledb/sm/metadata/metadata.h b/tiledb/sm/metadata/metadata.h index 7041223234a5..2e45614eaea8 100644 --- a/tiledb/sm/metadata/metadata.h +++ b/tiledb/sm/metadata/metadata.h @@ -5,7 +5,7 @@ * * The MIT License * - * @copyright Copyright (c) 2017-2023 TileDB, Inc. + * @copyright Copyright (c) 2017-2024 TileDB, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -40,6 +40,7 @@ #include "tiledb/common/common.h" #include "tiledb/common/heap_memory.h" +#include "tiledb/common/pmr.h" #include "tiledb/sm/filesystem/uri.h" #include "tiledb/sm/tile/tile.h" #include "tiledb/storage_format/serialization/serializers.h" @@ -50,6 +51,7 @@ namespace tiledb::sm { class Buffer; class ConstBuffer; +class MemoryTracker; enum class Datatype : uint8_t; /** @@ -81,31 +83,39 @@ class Metadata { }; /** Iterator type for iterating over metadata values. */ - typedef std::map::const_iterator iterator; + typedef tdb::pmr::map::const_iterator iterator; /* ********************************* */ /* CONSTRUCTORS & DESTRUCTORS */ /* ********************************* */ - /** Constructor. */ - explicit Metadata(); + /** Default constructor is deleted. */ + Metadata() = delete; /** Constructor. */ - Metadata(const std::map& metadata_map); - - /** Copy constructor. */ - Metadata(const Metadata& rhs); + Metadata(shared_ptr memory_tracker); - /** Copy assignment. */ - Metadata& operator=(const Metadata& other); + DISABLE_COPY(Metadata); + DISABLE_MOVE_AND_MOVE_ASSIGN(Metadata); /** Destructor. */ - ~Metadata(); + ~Metadata() = default; /* ********************************* */ /* API */ /* ********************************* */ + /** Copy assignment. */ + Metadata& operator=(Metadata& other); + + /** Assignment via std::map. */ + Metadata& operator=(std::map&& md_map); + + /** Returns the memory tracker. */ + inline shared_ptr memory_tracker() { + return memory_tracker_; + } + /** Clears the metadata. */ void clear(); @@ -120,7 +130,7 @@ class Metadata { * assumed to be sorted on time. The function will take care of any * deleted or overwritten metadata items considering the order. */ - static Metadata deserialize( + static std::map deserialize( const std::vector>& metadata_tiles); /** Serializes all key-value metadata items into the input buffer. */ @@ -204,10 +214,7 @@ class Metadata { * Returns the URIs of the metadata files that have been loaded * to this object. */ - const std::vector& loaded_metadata_uris() const; - - /** Swaps the contents between the object and the input. */ - void swap(Metadata* metadata); + const tdb::pmr::vector& loaded_metadata_uris() const; /** * Clears the metadata and assigns the input timestamp to @@ -233,15 +240,19 @@ class Metadata { /* PRIVATE ATTRIBUTES */ /* ********************************* */ + /** The memory tracker. */ + shared_ptr memory_tracker_; + /** A map from metadata key to metadata value. */ - std::map metadata_map_; + tdb::pmr::map metadata_map_; /** * A vector pointing to all the values in `metadata_map_`. It facilitates * searching metadata from index. Used only for reading metadata (inapplicable * when writing metadata). */ - std::vector> metadata_index_; + tdb::pmr::vector> + metadata_index_; /** Mutex for thread-safety. */ mutable std::mutex mtx_; @@ -256,7 +267,7 @@ class Metadata { * The URIs of the metadata files that have been loaded to this object. * This is needed to know which files to delete upon consolidation. */ - std::vector loaded_metadata_uris_; + tdb::pmr::vector loaded_metadata_uris_; /** The URI of the array metadata file. */ URI uri_; diff --git a/tiledb/sm/metadata/test/CMakeLists.txt b/tiledb/sm/metadata/test/CMakeLists.txt index a9c4c368edd0..326e66e796a3 100644 --- a/tiledb/sm/metadata/test/CMakeLists.txt +++ b/tiledb/sm/metadata/test/CMakeLists.txt @@ -3,7 +3,7 @@ # # The MIT License # -# Copyright (c) 2022 TileDB, Inc. +# Copyright (c) 2022-2024 TileDB, Inc. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -26,8 +26,9 @@ include(unit_test) commence(unit_test metadata) - this_target_object_libraries(metadata) - # The dependency on the `tile` object library is suspect, but here for now. + this_target_object_libraries(metadata mem_helpers) + # The dependency on the `tile` object library is suspect, but here for now. this_target_object_libraries(tile) + this_target_link_libraries(tiledb_test_support_lib) this_target_sources(main.cc unit_metadata.cc) conclude(unit_test) diff --git a/tiledb/sm/metadata/test/unit_metadata.cc b/tiledb/sm/metadata/test/unit_metadata.cc index 982902e779f6..35c3ed06806b 100644 --- a/tiledb/sm/metadata/test/unit_metadata.cc +++ b/tiledb/sm/metadata/test/unit_metadata.cc @@ -5,7 +5,7 @@ * * The MIT License * - * @copyright Copyright (c) 2022-2023 TileDB, Inc. + * @copyright Copyright (c) 2022-2024 TileDB, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -30,8 +30,10 @@ * This file defines a test `main()` */ +#include #include #include "../metadata.h" +#include "test/support/src/mem_helpers.h" #include "tiledb/common/common.h" #include "tiledb/common/dynamic_memory/dynamic_memory.h" #include "tiledb/sm/buffer/buffer.h" @@ -42,18 +44,28 @@ using namespace tiledb; using namespace tiledb::common; using namespace tiledb::sm; +using tiledb::test::create_test_memory_tracker; template inline T& buffer_metadata(void* p) { return *static_cast(static_cast(static_cast(p) + n)); } +TEST_CASE("Metadata: Constructor validation", "[metadata][constructor]") { + auto tracker = create_test_memory_tracker(); + + SECTION("memory_tracker") { + REQUIRE_NOTHROW(Metadata(tracker)); + } +} + TEST_CASE( "Metadata: Test metadata deserialization", "[metadata][deserialization]") { + auto tracker = create_test_memory_tracker(); std::vector> metadata_tiles; - Metadata metadata_to_serialize1, metadata_to_serialize2, - metadata_to_serialize3; + Metadata metadata_to_serialize1(tracker), metadata_to_serialize2(tracker), + metadata_to_serialize3(tracker), meta(tracker); // key_1:a, value_1:100,200 std::string key_1 = "key1"; @@ -74,20 +86,20 @@ TEST_CASE( SizeComputationSerializer size_computation_serializer1; metadata_to_serialize1.serialize(size_computation_serializer1); - WriterTile tile1{ - WriterTile::from_generic(size_computation_serializer1.size())}; + auto tile1{ + WriterTile::from_generic(size_computation_serializer1.size(), tracker)}; - Serializer serializer1(tile1.data(), tile1.size()); + Serializer serializer1(tile1->data(), tile1->size()); metadata_to_serialize1.serialize(serializer1); metadata_to_serialize2.put(key_2.c_str(), Datatype::FLOAT64, 1, &value_2); SizeComputationSerializer size_computation_serializer2; metadata_to_serialize2.serialize(size_computation_serializer2); - WriterTile tile2{ - WriterTile::from_generic(size_computation_serializer2.size())}; + auto tile2{ + WriterTile::from_generic(size_computation_serializer2.size(), tracker)}; - Serializer serializer2(tile2.data(), tile2.size()); + Serializer serializer2(tile2->data(), tile2->size()); metadata_to_serialize2.serialize(serializer2); metadata_to_serialize3.put( @@ -95,48 +107,50 @@ TEST_CASE( SizeComputationSerializer size_computation_serializer3; metadata_to_serialize3.serialize(size_computation_serializer3); - WriterTile tile3{ - WriterTile::from_generic(size_computation_serializer3.size())}; + auto tile3{ + WriterTile::from_generic(size_computation_serializer3.size(), tracker)}; - Serializer serializer3(tile3.data(), tile3.size()); + Serializer serializer3(tile3->data(), tile3->size()); metadata_to_serialize3.serialize(serializer3); metadata_tiles.resize(3); metadata_tiles[0] = tdb::make_shared( HERE(), - tile1.format_version(), - tile1.type(), - tile1.cell_size(), + tile1->format_version(), + tile1->type(), + tile1->cell_size(), 0, - tile1.size(), - tile1.filtered_buffer().data(), - tile1.filtered_buffer().size()); - memcpy(metadata_tiles[0]->data(), tile1.data(), tile1.size()); + tile1->size(), + tile1->filtered_buffer().data(), + tile1->filtered_buffer().size(), + tracker); + memcpy(metadata_tiles[0]->data(), tile1->data(), tile1->size()); metadata_tiles[1] = tdb::make_shared( HERE(), - tile2.format_version(), - tile2.type(), - tile2.cell_size(), + tile2->format_version(), + tile2->type(), + tile2->cell_size(), 0, - tile2.size(), - tile2.filtered_buffer().data(), - tile2.filtered_buffer().size()); - memcpy(metadata_tiles[1]->data(), tile2.data(), tile2.size()); + tile2->size(), + tile2->filtered_buffer().data(), + tile2->filtered_buffer().size(), + tracker); + memcpy(metadata_tiles[1]->data(), tile2->data(), tile2->size()); metadata_tiles[2] = tdb::make_shared( HERE(), - tile3.format_version(), - tile3.type(), - tile3.cell_size(), + tile3->format_version(), + tile3->type(), + tile3->cell_size(), 0, - tile3.size(), - tile3.filtered_buffer().data(), - tile3.filtered_buffer().size()); - memcpy(metadata_tiles[2]->data(), tile3.data(), tile3.size()); - - auto meta{Metadata::deserialize(metadata_tiles)}; + tile3->size(), + tile3->filtered_buffer().data(), + tile3->filtered_buffer().size(), + tracker); + memcpy(metadata_tiles[2]->data(), tile3->data(), tile3->size()); + meta = Metadata::deserialize(metadata_tiles); Datatype type; uint32_t v_num; diff --git a/tiledb/sm/misc/CMakeLists.txt b/tiledb/sm/misc/CMakeLists.txt index 3c8733c69d56..f6b033b82741 100644 --- a/tiledb/sm/misc/CMakeLists.txt +++ b/tiledb/sm/misc/CMakeLists.txt @@ -71,6 +71,22 @@ commence(object_library time) this_target_sources(tdb_time.cc) conclude(object_library) +# +# `uuid` object library +# +commence(object_library uuid) + this_target_sources(uuid.cc) + this_target_object_libraries(baseline) + if(WIN32) + this_target_link_libraries(rpcrt4) + else() + find_package(OpenSSL_EP REQUIRED) + this_target_link_libraries(OpenSSL::Crypto) + endif() +conclude(object_library) + +add_test_subdirectory() + # # `mgc_dict.*` tests are declared in this directory for the moment. # diff --git a/tiledb/sm/misc/constants.cc b/tiledb/sm/misc/constants.cc index cadde0492afe..7c575f77b532 100644 --- a/tiledb/sm/misc/constants.cc +++ b/tiledb/sm/misc/constants.cc @@ -337,6 +337,12 @@ const std::string query_status_initialized_str = "INITIALIZED"; /** TILEDB_UNINITIALIZED Query String **/ const std::string query_status_uninitialized_str = "UNINITIALIZED"; +/** TILEDB_ALWAYS_TRUE Query Condition Op String **/ +const std::string query_condition_op_always_true_str = "ALWAYS_TRUE"; + +/** TILEDB_ALWAYS_FALSE Query Condition Op String **/ +const std::string query_condition_op_always_false_str = "ALWAYS_FALSE"; + /** TILEDB_LT Query Condition Op String **/ const std::string query_condition_op_lt_str = "LT"; diff --git a/tiledb/sm/misc/constants.h b/tiledb/sm/misc/constants.h index 0cccd9f49987..4e23f45eabaf 100644 --- a/tiledb/sm/misc/constants.h +++ b/tiledb/sm/misc/constants.h @@ -333,6 +333,12 @@ extern const std::string query_status_initialized_str; /** TILEDB_UNINITIALIZED Query String **/ extern const std::string query_status_uninitialized_str; +/** TILEDB_ALWAYS_TRUE Query Condition Op String **/ +extern const std::string query_condition_op_always_true_str; + +/** TILEDB_ALWAYS_FALSE Query Condition Op String **/ +extern const std::string query_condition_op_always_false_str; + /** TILEDB_LT Query Condition Op String **/ extern const std::string query_condition_op_lt_str; diff --git a/tiledb/sm/misc/magic_mgc_gzipped.bin.tar.bz2 b/tiledb/sm/misc/magic_mgc_gzipped.bin.tar.bz2 index d23e06bc82ac..9c821f153b82 100644 Binary files a/tiledb/sm/misc/magic_mgc_gzipped.bin.tar.bz2 and b/tiledb/sm/misc/magic_mgc_gzipped.bin.tar.bz2 differ diff --git a/tiledb/sm/misc/test/CMakeLists.txt b/tiledb/sm/misc/test/CMakeLists.txt index be13e19e3d31..c61fe26da6fc 100644 --- a/tiledb/sm/misc/test/CMakeLists.txt +++ b/tiledb/sm/misc/test/CMakeLists.txt @@ -26,7 +26,7 @@ include(unit_test) commence(unit_test misc) - this_target_object_libraries(math) + this_target_object_libraries(math uuid) this_target_link_libraries(tiledb_test_support_lib) # change to `this_target_include_directories` when available target_include_directories(unit_misc PRIVATE "${CMAKE_SOURCE_DIR}") @@ -36,5 +36,6 @@ commence(unit_test misc) unit_hilbert.cc unit_integral_type_casts.cc unit_math.cc + unit_uuid.cc ) conclude(unit_test) diff --git a/tiledb/sm/misc/test/compile_uuid_main.cc b/tiledb/sm/misc/test/compile_uuid_main.cc new file mode 100644 index 000000000000..2db30c1d6e52 --- /dev/null +++ b/tiledb/sm/misc/test/compile_uuid_main.cc @@ -0,0 +1,34 @@ +/** + * @file compile_uuid_main.cc + * + * @section LICENSE + * + * The MIT License + * + * @copyright Copyright (c) 2021 TileDB, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "../uuid.h" + +int main() { + (void)tiledb::sm::uuid::generate_uuid(nullptr, false); + return 0; +} diff --git a/tiledb/sm/misc/test/unit_uuid.cc b/tiledb/sm/misc/test/unit_uuid.cc new file mode 100644 index 000000000000..8dbf830b29cd --- /dev/null +++ b/tiledb/sm/misc/test/unit_uuid.cc @@ -0,0 +1,87 @@ +/** + * @file unit_uuid.cc + * + * @section LICENSE + * + * The MIT License + * + * @copyright Copyright (c) 2018-2022 TileDB, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * @section DESCRIPTION + * + * Tests the UUID utility functions. + */ + +#include +#include +#include +#include + +#include "tiledb/sm/global_state/global_state.h" +#include "tiledb/sm/misc/uuid.h" + +using namespace tiledb::sm; + +std::mutex catch2_macro_mutex; + +// A thread-safe variant of the REQUIRE macro. +#define REQUIRE_SAFE(a) \ + { \ + std::lock_guard lock(catch2_macro_mutex); \ + REQUIRE(a); \ + } + +void cancel_all_tasks(StorageManager*) { +} + +TEST_CASE("UUID: Test generate", "[uuid]") { + SECTION("- Serial") { + std::string uuid0, uuid1, uuid2; + REQUIRE(uuid::generate_uuid(&uuid0).ok()); + REQUIRE(uuid0.length() == 36); + REQUIRE(uuid::generate_uuid(&uuid1).ok()); + REQUIRE(uuid1.length() == 36); + REQUIRE(uuid0 != uuid1); + + REQUIRE(uuid::generate_uuid(&uuid2, false).ok()); + REQUIRE(uuid2.length() == 32); + } + + SECTION("- Threaded") { + const unsigned nthreads = 20; + std::vector uuids(nthreads); + std::vector threads; + for (unsigned i = 0; i < nthreads; i++) { + threads.emplace_back([&uuids, i]() { + std::string& uuid = uuids[i]; + REQUIRE_SAFE(uuid::generate_uuid(&uuid).ok()); + REQUIRE_SAFE(uuid.length() == 36); + }); + } + for (auto& t : threads) { + t.join(); + } + // Check uniqueness + std::set uuid_set; + uuid_set.insert(uuids.begin(), uuids.end()); + REQUIRE(uuid_set.size() == uuids.size()); + } +} diff --git a/tiledb/sm/misc/uuid.cc b/tiledb/sm/misc/uuid.cc new file mode 100644 index 000000000000..052cce47bdcd --- /dev/null +++ b/tiledb/sm/misc/uuid.cc @@ -0,0 +1,174 @@ +/** + * @file uuid.cc + * + * @section LICENSE + * + * The MIT License + * + * @copyright Copyright (c) 2018-2023 TileDB, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * @section DESCRIPTION + * + * This file defines a platform-independent UUID generator. + */ + +#include +#include + +#include "tiledb/sm/misc/uuid.h" + +#ifdef _WIN32 +#include +#else +#include +#include +#include +#endif + +using namespace tiledb::common; + +namespace tiledb::sm::uuid { + +/** Mutex to guard UUID generation. */ +static std::mutex uuid_mtx; + +#ifdef _WIN32 + +/** + * Generate a UUID using Win32 RPC API. + */ +Status generate_uuid_win32(std::string* uuid_str) { + if (uuid_str == nullptr) + return Status_UtilsError("Null UUID string argument"); + + UUID uuid; + RPC_STATUS rc = UuidCreate(&uuid); + if (rc != RPC_S_OK) + return Status_UtilsError("Unable to generate Win32 UUID: creation error"); + + char* buf = nullptr; + rc = UuidToStringA(&uuid, reinterpret_cast(&buf)); + if (rc != RPC_S_OK) + return Status_UtilsError( + "Unable to generate Win32 UUID: string conversion error"); + + *uuid_str = std::string(buf); + + rc = RpcStringFreeA(reinterpret_cast(&buf)); + if (rc != RPC_S_OK) + return Status_UtilsError("Unable to generate Win32 UUID: free error"); + + return Status::Ok(); +} + +#else + +/** + * Generate a UUID using OpenSSL. + * + * Initially from: https://gist.github.com/kvelakur/9069c9896577c3040030 + * "Generating a Version 4 UUID using OpenSSL" + */ +Status generate_uuid_openssl(std::string* uuid_str) { + if (uuid_str == nullptr) + return Status_UtilsError("Null UUID string argument"); + + union { + struct { + uint32_t time_low; + uint16_t time_mid; + uint16_t time_hi_and_version; + uint8_t clk_seq_hi_res; + uint8_t clk_seq_low; + uint8_t node[6]; + }; + uint8_t __rnd[16]; + } uuid; + + int rc = RAND_bytes(uuid.__rnd, sizeof(uuid)); + if (rc < 1) { + char err_msg[256]; + ERR_error_string_n(ERR_get_error(), err_msg, sizeof(err_msg)); + return Status_UtilsError( + "Cannot generate random bytes with OpenSSL: " + std::string(err_msg)); + } + + // Refer Section 4.2 of RFC-4122 + // https://tools.ietf.org/html/rfc4122#section-4.2 + uuid.clk_seq_hi_res = (uint8_t)((uuid.clk_seq_hi_res & 0x3F) | 0x80); + uuid.time_hi_and_version = + (uint16_t)((uuid.time_hi_and_version & 0x0FFF) | 0x4000); + + // Format the UUID as a string. + char buf[128]; + rc = snprintf( + buf, + sizeof(buf), + "%08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x", + uuid.time_low, + uuid.time_mid, + uuid.time_hi_and_version, + uuid.clk_seq_hi_res, + uuid.clk_seq_low, + uuid.node[0], + uuid.node[1], + uuid.node[2], + uuid.node[3], + uuid.node[4], + uuid.node[5]); + + if (rc < 0) + return Status_UtilsError("Error formatting UUID string"); + + *uuid_str = std::string(buf); + + return Status::Ok(); +} + +#endif + +Status generate_uuid(std::string* uuid, bool hyphenate) { + if (uuid == nullptr) + return Status_UtilsError("Null UUID string argument"); + + std::string uuid_str; + { + // OpenSSL is not threadsafe, so grab a lock here. We are locking in the + // Windows case as well just to be careful. + std::unique_lock lck(uuid_mtx); +#ifdef _WIN32 + RETURN_NOT_OK(generate_uuid_win32(&uuid_str)); +#else + RETURN_NOT_OK(generate_uuid_openssl(&uuid_str)); +#endif + } + + uuid->clear(); + for (unsigned i = 0; i < uuid_str.length(); i++) { + if (uuid_str[i] == '-' && !hyphenate) + continue; + uuid->push_back(uuid_str[i]); + } + + return Status::Ok(); +} + +} // namespace tiledb::sm::uuid diff --git a/tiledb/sm/misc/uuid.h b/tiledb/sm/misc/uuid.h new file mode 100644 index 000000000000..5999007f8b59 --- /dev/null +++ b/tiledb/sm/misc/uuid.h @@ -0,0 +1,59 @@ +/** + * @file uuid.h + * + * @section LICENSE + * + * The MIT License + * + * @copyright Copyright (c) 2018-2021 TileDB, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * @section DESCRIPTION + * + * This file declares a platform-independent UUID generator. + */ + +#ifndef TILEDB_UUID_H +#define TILEDB_UUID_H + +#include "tiledb/common/status.h" + +using namespace tiledb::common; + +namespace tiledb { +namespace sm { +namespace uuid { + +/** + * Generates a 128-bit UUID. The string is formatted with hyphens like: + * 'xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxx' where 'x' is a hexadecimal digit. + * Note: this function internally acquires a lock. + * + * @param uuid Output parameter which will store the UUID in string format. + * @param hyphenate If false, the UUID string will not be hyphenated. + * @return Status + */ +Status generate_uuid(std::string* uuid, bool hyphenate = true); + +} // namespace uuid +} // namespace sm +} // namespace tiledb + +#endif diff --git a/tiledb/sm/query/ast/query_ast.cc b/tiledb/sm/query/ast/query_ast.cc index 94403e57aaa9..16dfad7c9550 100644 --- a/tiledb/sm/query/ast/query_ast.cc +++ b/tiledb/sm/query/ast/query_ast.cc @@ -199,12 +199,17 @@ void ASTNodeVal::rewrite_enumeration_conditions( if (op_ != QueryConditionOp::IN && op_ != QueryConditionOp::NOT_IN) { auto idx = enumeration->index_of(get_value_ptr(), get_value_size()); if (idx == constants::enumeration_missing_value) { - throw std::invalid_argument( - "Enumeration value not found for field '" + attr->name() + "'"); + if (op_ == QueryConditionOp::NE) { + op_ = QueryConditionOp::ALWAYS_TRUE; + } else { + op_ = QueryConditionOp::ALWAYS_FALSE; + } + data_ = ByteVecValue(val_size); + utils::safe_integral_cast_to_datatype(0, attr->type(), data_); + } else { + data_ = ByteVecValue(val_size); + utils::safe_integral_cast_to_datatype(idx, attr->type(), data_); } - - data_ = ByteVecValue(val_size); - utils::safe_integral_cast_to_datatype(idx, attr->type(), data_); } else { // Buffers and writers for the new data/offsets memory std::vector data_buffer(val_size * members_.size()); @@ -215,25 +220,29 @@ void ASTNodeVal::rewrite_enumeration_conditions( ByteVecValue curr_data(val_size); uint64_t curr_offset = 0; + uint64_t num_offsets = 0; for (auto& member : members_) { auto idx = enumeration->index_of(member.data(), member.size()); if (idx == constants::enumeration_missing_value) { - throw std::invalid_argument( - "Enumeration value not found for field '" + attr->name() + "'"); + continue; } utils::safe_integral_cast_to_datatype(idx, attr->type(), curr_data); data_writer.write(curr_data.data(), curr_data.size()); offsets_writer.write(curr_offset); curr_offset += val_size; + num_offsets += 1; } - data_ = ByteVecValue(data_buffer.size()); - std::memcpy(data_.data(), data_buffer.data(), data_buffer.size()); + auto total_data_size = curr_offset; + auto total_offsets_size = num_offsets * constants::cell_var_offset_size; + + data_ = ByteVecValue(total_data_size); + std::memcpy(data_.data(), data_buffer.data(), total_data_size); - offsets_ = ByteVecValue(offsets_buffer.size()); - std::memcpy(offsets_.data(), offsets_buffer.data(), offsets_buffer.size()); + offsets_ = ByteVecValue(total_offsets_size); + std::memcpy(offsets_.data(), offsets_buffer.data(), total_offsets_size); generate_members(); } diff --git a/tiledb/sm/query/ast/test/CMakeLists.txt b/tiledb/sm/query/ast/test/CMakeLists.txt index 9597f820076e..44bd07de4d76 100644 --- a/tiledb/sm/query/ast/test/CMakeLists.txt +++ b/tiledb/sm/query/ast/test/CMakeLists.txt @@ -40,7 +40,7 @@ add_library(ast_test_support_lib STATIC EXCLUDE_FROM_ALL ${AST_TEST_SUPPORT_SOUR # We want tests to continue as normal even as the API is changing, # so don't warn for deprecations, since they'll be escalated to errors. if (NOT MSVC) - target_compile_options(ast_test_support_lib PRIVATE -Wno-deprecated-declarations) + target_compile_options(ast_test_support_lib PRIVATE -Wno-deprecated-declarations) endif() ################################################################ # diff --git a/tiledb/sm/query/deletes_and_updates/deletes_and_updates.cc b/tiledb/sm/query/deletes_and_updates/deletes_and_updates.cc index e871dd2b2a21..29e5c4a095ba 100644 --- a/tiledb/sm/query/deletes_and_updates/deletes_and_updates.cc +++ b/tiledb/sm/query/deletes_and_updates/deletes_and_updates.cc @@ -36,6 +36,7 @@ #include "tiledb/sm/fragment/fragment_identifier.h" #include "tiledb/sm/query/deletes_and_updates/serialization.h" #include "tiledb/sm/storage_manager/storage_manager.h" +#include "tiledb/sm/tile/generic_tile_io.h" #include "tiledb/storage_format/uri/generate_uri.h" using namespace tiledb; @@ -150,19 +151,24 @@ Status DeletesAndUpdates::dowork() { // Serialize the negated condition (aud update values if they are not empty) // and write to disk. - WriterTile serialized_condition = + auto serialized_condition = update_values_.empty() ? tiledb::sm::deletes_and_updates::serialization::serialize_condition( - condition_->negated_condition()) : + condition_->negated_condition(), query_memory_tracker_) : tiledb::sm::deletes_and_updates::serialization:: serialize_update_condition_and_values( - condition_->negated_condition(), update_values_); + condition_->negated_condition(), + update_values_, + query_memory_tracker_); new_fragment_str += update_values_.empty() ? constants::delete_file_suffix : constants::update_file_suffix; auto uri = commit_uri.join_path(new_fragment_str); - RETURN_NOT_OK(storage_manager_->store_data_to_generic_tile( - serialized_condition, uri, *array_->encryption_key())); + GenericTileIO::store_data( + storage_manager_->resources(), + uri, + serialized_condition, + *array_->encryption_key()); return Status::Ok(); } diff --git a/tiledb/sm/query/deletes_and_updates/serialization.cc b/tiledb/sm/query/deletes_and_updates/serialization.cc index ad775b3bdd98..8a3a7467b025 100644 --- a/tiledb/sm/query/deletes_and_updates/serialization.cc +++ b/tiledb/sm/query/deletes_and_updates/serialization.cc @@ -100,11 +100,13 @@ storage_size_t get_serialized_condition_size( return size_computation_serializer.size(); } -WriterTile serialize_condition(const QueryCondition& query_condition) { - WriterTile tile{WriterTile::from_generic( - get_serialized_condition_size(query_condition.ast()))}; +shared_ptr serialize_condition( + const QueryCondition& query_condition, + shared_ptr memory_tracker) { + auto tile{WriterTile::from_generic( + get_serialized_condition_size(query_condition.ast()), memory_tracker)}; - Serializer serializer(tile.data(), tile.size()); + Serializer serializer(tile->data(), tile->size()); serialize_condition_impl(query_condition.ast(), serializer); return tile; @@ -194,14 +196,16 @@ storage_size_t get_serialized_update_condition_and_values_size( return size_computation_serializer.size(); } -WriterTile serialize_update_condition_and_values( +shared_ptr serialize_update_condition_and_values( const QueryCondition& query_condition, - const std::vector& update_values) { - WriterTile tile{ - WriterTile::from_generic(get_serialized_update_condition_and_values_size( - query_condition.ast(), update_values))}; - - Serializer serializer(tile.data(), tile.size()); + const std::vector& update_values, + shared_ptr memory_tracker) { + auto tile{WriterTile::from_generic( + get_serialized_update_condition_and_values_size( + query_condition.ast(), update_values), + memory_tracker)}; + + Serializer serializer(tile->data(), tile->size()); serialize_condition_impl(query_condition.ast(), serializer); serialize_update_values_impl(update_values, serializer); diff --git a/tiledb/sm/query/deletes_and_updates/serialization.h b/tiledb/sm/query/deletes_and_updates/serialization.h index 8f2d71846121..dfd5c1791292 100644 --- a/tiledb/sm/query/deletes_and_updates/serialization.h +++ b/tiledb/sm/query/deletes_and_updates/serialization.h @@ -48,7 +48,9 @@ enum class NodeType : uint8_t { EXPRESSION = 0, VALUE }; * @param query_condition Query condition to serialize. * @return Serialized query condition tile. */ -WriterTile serialize_condition(const QueryCondition& query_condition); +shared_ptr serialize_condition( + const QueryCondition& query_condition, + shared_ptr memory_tracker); /** * Deserializes the condition. @@ -73,9 +75,10 @@ QueryCondition deserialize_condition( * @param update_values Update values to serialize. * @return Serialized condition and update values tile. */ -WriterTile serialize_update_condition_and_values( +shared_ptr serialize_update_condition_and_values( const QueryCondition& query_condition, - const std::vector& update_values); + const std::vector& update_values, + shared_ptr memory_tracker); /** * Deserializes a condition and update values. diff --git a/tiledb/sm/query/deletes_and_updates/test/CMakeLists.txt b/tiledb/sm/query/deletes_and_updates/test/CMakeLists.txt index 70367c688b25..2b6b3e3662a1 100644 --- a/tiledb/sm/query/deletes_and_updates/test/CMakeLists.txt +++ b/tiledb/sm/query/deletes_and_updates/test/CMakeLists.txt @@ -26,9 +26,8 @@ include(unit_test) commence(unit_test delete_update_condition) - this_target_link_libraries(ast_test_support_lib) this_target_sources(main.cc unit_delete_condition.cc unit_update_condition.cc) # The dependencies can't yet be factored into separate object libraries - target_link_libraries(unit_delete_update_condition PUBLIC TILEDB_CORE_OBJECTS) - target_link_libraries(unit_delete_update_condition PUBLIC TILEDB_CORE_OBJECTS_ILIB) + this_target_link_libraries(ast_test_support_lib) + this_target_link_libraries(tiledb_test_support_lib) conclude(unit_test) diff --git a/tiledb/sm/query/deletes_and_updates/test/unit_delete_condition.cc b/tiledb/sm/query/deletes_and_updates/test/unit_delete_condition.cc index 60383922098e..693b82344a80 100644 --- a/tiledb/sm/query/deletes_and_updates/test/unit_delete_condition.cc +++ b/tiledb/sm/query/deletes_and_updates/test/unit_delete_condition.cc @@ -31,6 +31,7 @@ */ #include "test/support/src/ast_helpers.h" +#include "test/support/src/mem_helpers.h" #include "tiledb/sm/query/deletes_and_updates/serialization.h" #include "tiledb/sm/storage_manager/context.h" @@ -46,9 +47,10 @@ using namespace tiledb::sm::deletes_and_updates::serialization; * @param query_condition Condition to check. */ void serialize_deserialize_check(QueryCondition& query_condition) { - auto serialized = serialize_condition(query_condition); + auto tracker = tiledb::test::create_test_memory_tracker(); + auto serialized = serialize_condition(query_condition, tracker); auto deserialized = - deserialize_condition(0, "", serialized.data(), serialized.size()); + deserialize_condition(0, "", serialized->data(), serialized->size()); CHECK(tiledb::test::ast_equal(query_condition.ast(), deserialized.ast())); } diff --git a/tiledb/sm/query/deletes_and_updates/test/unit_update_condition.cc b/tiledb/sm/query/deletes_and_updates/test/unit_update_condition.cc index 8603780215db..2204f99456b8 100644 --- a/tiledb/sm/query/deletes_and_updates/test/unit_update_condition.cc +++ b/tiledb/sm/query/deletes_and_updates/test/unit_update_condition.cc @@ -31,6 +31,7 @@ */ #include "test/support/src/ast_helpers.h" +#include "test/support/src/mem_helpers.h" #include "tiledb/sm/query/deletes_and_updates/serialization.h" #include "tiledb/sm/storage_manager/context.h" @@ -48,11 +49,12 @@ using namespace tiledb::sm::deletes_and_updates::serialization; */ void serialize_deserialize_check( QueryCondition& query_condition, std::vector& update_values) { - auto serialized = - serialize_update_condition_and_values(query_condition, update_values); + auto tracker = tiledb::test::create_test_memory_tracker(); + auto serialized = serialize_update_condition_and_values( + query_condition, update_values, tracker); auto&& [deserialized_condition, deserialized_update_values] = deserialize_update_condition_and_values( - 0, "", serialized.data(), serialized.size()); + 0, "", serialized->data(), serialized->size()); CHECK(tiledb::test::ast_equal( query_condition.ast(), deserialized_condition.ast())); diff --git a/tiledb/sm/query/legacy/reader.cc b/tiledb/sm/query/legacy/reader.cc index 075c50259930..0cf5015261a3 100644 --- a/tiledb/sm/query/legacy/reader.cc +++ b/tiledb/sm/query/legacy/reader.cc @@ -453,7 +453,9 @@ Status Reader::compute_range_result_coords( result_coords.emplace_back(tile, pos); } } else { // Sparse - std::vector result_bitmap(coords_num, 1); + auto resource = + query_memory_tracker_->get_resource(MemoryType::TILE_BITMAP); + tdb::pmr::vector result_bitmap(coords_num, 1, resource); // Compute result and overwritten bitmap per dimension for (unsigned d = 0; d < dim_num; ++d) { @@ -492,7 +494,7 @@ Status Reader::compute_range_result_coords( Subarray& subarray, const std::vector& single_fragment, const std::map, size_t>& result_tile_map, - std::vector& result_tiles, + IndexedList& result_tiles, std::vector>& range_result_coords) { auto timer_se = stats_->start_timer("compute_range_result_coords"); @@ -547,7 +549,7 @@ Status Reader::compute_range_result_coords( uint64_t range_idx, uint32_t fragment_idx, const std::map, size_t>& result_tile_map, - std::vector& result_tiles, + IndexedList& result_tiles, std::vector& range_result_coords) { // Skip dense fragments if (fragment_metadata_[fragment_idx]->dense()) @@ -568,12 +570,14 @@ Status Reader::compute_range_result_coords( auto tile_it = result_tile_map.find(pair); assert(tile_it != result_tile_map.end()); auto tile_idx = tile_it->second; - auto& tile = result_tiles[tile_idx]; + + auto tile = result_tiles.begin(); + std::advance(tile, tile_idx); // Add results only if the sparse tile MBR is not fully // covered by a more recent fragment's non-empty domain if (!sparse_tile_overwritten(fragment_idx, i)) - RETURN_NOT_OK(get_all_result_coords(&tile, range_result_coords)); + RETURN_NOT_OK(get_all_result_coords(&*tile, range_result_coords)); } ++tr; } else { @@ -582,15 +586,16 @@ Status Reader::compute_range_result_coords( auto tile_it = result_tile_map.find(pair); assert(tile_it != result_tile_map.end()); auto tile_idx = tile_it->second; - auto& tile = result_tiles[tile_idx]; + auto tile = result_tiles.begin(); + std::advance(tile, tile_idx); if (t->second == 1.0) { // Full overlap // Add results only if the sparse tile MBR is not fully // covered by a more recent fragment's non-empty domain if (!sparse_tile_overwritten(fragment_idx, t->first)) - RETURN_NOT_OK(get_all_result_coords(&tile, range_result_coords)); + RETURN_NOT_OK(get_all_result_coords(&*tile, range_result_coords)); } else { // Partial overlap RETURN_NOT_OK(compute_range_result_coords( - subarray, fragment_idx, &tile, range_idx, range_result_coords)); + subarray, fragment_idx, &*tile, range_idx, range_result_coords)); } ++t; } @@ -603,7 +608,7 @@ Status Reader::compute_range_result_coords( Subarray& subarray, uint64_t range_idx, const std::map, size_t>& result_tile_map, - std::vector& result_tiles, + IndexedList& result_tiles, std::vector& range_result_coords) { // Gather result range coordinates per fragment auto fragment_num = fragment_metadata_.size(); @@ -678,7 +683,7 @@ Status Reader::compute_subarray_coords( } Status Reader::compute_sparse_result_tiles( - std::vector& result_tiles, + IndexedList& result_tiles, std::map, size_t>* result_tile_map, std::vector* single_fragment) { auto timer_se = stats_->start_timer("compute_sparse_result_tiles"); @@ -711,7 +716,8 @@ Status Reader::compute_sparse_result_tiles( auto pair = std::pair(f, t); // Add tile only if it does not already exist if (result_tile_map->find(pair) == result_tile_map->end()) { - result_tiles.emplace_back(f, t, *fragment_metadata_[f].get()); + result_tiles.emplace_back( + f, t, *fragment_metadata_[f].get(), query_memory_tracker_); (*result_tile_map)[pair] = result_tiles.size() - 1; } // Always check range for multiple fragments or fragments with @@ -730,7 +736,8 @@ Status Reader::compute_sparse_result_tiles( auto pair = std::pair(f, t); // Add tile only if it does not already exist if (result_tile_map->find(pair) == result_tile_map->end()) { - result_tiles.emplace_back(f, t, *fragment_metadata_[f].get()); + result_tiles.emplace_back( + f, t, *fragment_metadata_[f].get(), query_memory_tracker_); (*result_tile_map)[pair] = result_tiles.size() - 1; } // Always check range for multiple fragments or fragments with @@ -1580,7 +1587,7 @@ Status Reader::compute_result_cell_slabs_global( } Status Reader::compute_result_coords( - std::vector& result_tiles, + IndexedList& result_tiles, std::vector& result_coords) { auto timer_se = stats_->start_timer("compute_result_coords"); @@ -1719,7 +1726,7 @@ Status Reader::dense_read() { // `sparse_result_tiles` will hold all the relevant result tiles of // sparse fragments std::vector result_coords; - std::vector sparse_result_tiles; + IndexedList sparse_result_tiles; RETURN_NOT_OK(compute_result_coords(sparse_result_tiles, result_coords)); // Compute result cell slabs. @@ -1780,7 +1787,9 @@ Status Reader::get_all_result_coords( array_->timestamp_start(), array_->timestamp_end_opened_at()); if (fragment_metadata_[tile->frag_idx()]->has_timestamps() && partial_overlap) { - std::vector result_bitmap(coords_num, 1); + auto resource = + query_memory_tracker_->get_resource(MemoryType::TILE_BITMAP); + tdb::pmr::vector result_bitmap(coords_num, 1, resource); RETURN_NOT_OK(partial_overlap_condition_.apply_sparse( *(frag_meta->array_schema().get()), *tile, result_bitmap)); @@ -1975,7 +1984,7 @@ Status Reader::sparse_read() { // `sparse_result_tiles` will hold all the relevant result tiles of // sparse fragments std::vector result_coords; - std::vector sparse_result_tiles; + IndexedList sparse_result_tiles; RETURN_NOT_OK(compute_result_coords(sparse_result_tiles, result_coords)); std::vector result_tiles; @@ -2057,7 +2066,7 @@ bool Reader::sparse_tile_overwritten( return false; } -void Reader::erase_coord_tiles(std::vector& result_tiles) const { +void Reader::erase_coord_tiles(IndexedList& result_tiles) const { for (auto& tile : result_tiles) { auto dim_num = array_schema_.dim_num(); for (unsigned d = 0; d < dim_num; ++d) diff --git a/tiledb/sm/query/legacy/reader.h b/tiledb/sm/query/legacy/reader.h index 057841d10a47..d544edee0332 100644 --- a/tiledb/sm/query/legacy/reader.h +++ b/tiledb/sm/query/legacy/reader.h @@ -36,6 +36,7 @@ #include #include "tiledb/common/common.h" +#include "tiledb/common/indexed_list.h" #include "tiledb/common/logger_public.h" #include "tiledb/common/status.h" #include "tiledb/sm/array_schema/dimension.h" @@ -305,7 +306,7 @@ class Reader : public ReaderBase, public IQueryStrategy { Subarray& subarray, const std::vector& single_fragment, const std::map, size_t>& result_tile_map, - std::vector& result_tiles, + IndexedList& result_tiles, std::vector>& range_result_coords); /** @@ -325,7 +326,7 @@ class Reader : public ReaderBase, public IQueryStrategy { Subarray& subarray, uint64_t range_idx, const std::map, size_t>& result_tile_map, - std::vector& result_tiles, + IndexedList& result_tiles, std::vector& range_result_coords); /** @@ -347,7 +348,7 @@ class Reader : public ReaderBase, public IQueryStrategy { uint64_t range_idx, uint32_t fragment_idx, const std::map, size_t>& result_tile_map, - std::vector& result_tiles, + IndexedList& result_tiles, std::vector& range_result_coords); /** @@ -382,7 +383,7 @@ class Reader : public ReaderBase, public IQueryStrategy { * @return Status */ Status compute_sparse_result_tiles( - std::vector& result_tiles, + IndexedList& result_tiles, std::map, size_t>* result_tile_map, std::vector* single_fragment); @@ -574,7 +575,7 @@ class Reader : public ReaderBase, public IQueryStrategy { * @param result_coords This will store the result coordinates. */ Status compute_result_coords( - std::vector& result_tiles, + IndexedList& result_tiles, std::vector& result_coords); /** @@ -651,7 +652,7 @@ class Reader : public ReaderBase, public IQueryStrategy { * Erases the coordinate tiles (zipped or separate) from the input result * tiles. */ - void erase_coord_tiles(std::vector& result_tiles) const; + void erase_coord_tiles(IndexedList& result_tiles) const; /** Gets statistics about the result cells. */ void get_result_cell_stats( diff --git a/tiledb/sm/query/query.cc b/tiledb/sm/query/query.cc index 32bcd7199b64..94c7e15575e5 100644 --- a/tiledb/sm/query/query.cc +++ b/tiledb/sm/query/query.cc @@ -35,6 +35,7 @@ #include "tiledb/common/heap_memory.h" #include "tiledb/common/logger.h" #include "tiledb/common/memory.h" +#include "tiledb/common/memory_tracker.h" #include "tiledb/sm/array/array.h" #include "tiledb/sm/array_schema/dimension_label.h" #include "tiledb/sm/enums/query_status.h" @@ -66,14 +67,6 @@ using namespace tiledb::sm::stats; namespace tiledb { namespace sm { -/** Class for query status exceptions. */ -class QueryStatusException : public StatusException { - public: - explicit QueryStatusException(const std::string& msg) - : StatusException("Query", msg) { - } -}; - /* ****************************** */ /* CONSTRUCTORS & DESTRUCTORS */ /* ****************************** */ @@ -82,7 +75,9 @@ Query::Query( StorageManager* storage_manager, shared_ptr array, optional fragment_name) - : array_shared_(array) + : query_memory_tracker_( + storage_manager->resources().create_memory_tracker()) + , array_shared_(array) , array_(array_shared_.get()) , opened_array_(array->opened_array()) , array_schema_(array->array_schema_latest_ptr()) @@ -110,9 +105,16 @@ Query::Query( , is_dimension_label_ordered_read_(false) , dimension_label_increasing_(true) , fragment_size_(std::numeric_limits::max()) - , query_remote_buffer_storage_(std::nullopt) { + , query_remote_buffer_storage_(std::nullopt) + , default_channel_{make_shared(HERE(), *this, 0)} { assert(array->is_open()); + if (array->get_query_type() == QueryType::READ) { + query_memory_tracker_->set_type(MemoryTrackerType::QUERY_READ); + } else { + query_memory_tracker_->set_type(MemoryTrackerType::QUERY_WRITE); + } + subarray_ = Subarray(array_, layout_, stats_, logger_); fragment_metadata_ = array->fragment_metadata(); @@ -827,6 +829,7 @@ Status Query::process() { if (!only_dim_label_query()) { if (strategy_ != nullptr) { + // The strategy destructor should reset its own Stats object here dynamic_cast(strategy_.get())->stats()->reset(); strategy_ = nullptr; } @@ -917,6 +920,7 @@ Status Query::reset_strategy_with_layout( Layout layout, bool force_legacy_reader) { force_legacy_reader_ = force_legacy_reader; if (strategy_ != nullptr) { + // The strategy destructor should reset its own Stats object here dynamic_cast(strategy_.get())->stats()->reset(); strategy_ = nullptr; } @@ -1663,6 +1667,10 @@ stats::Stats* Query::stats() const { return stats_; } +void Query::set_stats(const stats::StatsData& data) { + stats_->populate_with_data(data); +} + shared_ptr Query::rest_scratch() const { return rest_scratch_; } @@ -1791,6 +1799,8 @@ bool Query::is_aggregate(std::string output_field_name) const { Status Query::create_strategy(bool skip_checks_serialization) { auto params = StrategyParams( + array_->memory_tracker(), + query_memory_tracker_, storage_manager_, opened_array_, config_, @@ -1800,8 +1810,7 @@ Status Query::create_strategy(bool skip_checks_serialization) { layout_, condition_, default_channel_aggregates_, - skip_checks_serialization, - array_->memory_tracker()); + skip_checks_serialization); if (type_ == QueryType::WRITE || type_ == QueryType::MODIFY_EXCLUSIVE) { if (layout_ == Layout::COL_MAJOR || layout_ == Layout::ROW_MAJOR) { if (!array_schema_->dense()) { @@ -2138,5 +2147,9 @@ void Query::copy_aggregates_data_to_user_buffer() { } } +RestClient* Query::rest_client() const { + return storage_manager_->rest_client(); +} + } // namespace sm } // namespace tiledb diff --git a/tiledb/sm/query/query.h b/tiledb/sm/query/query.h index c5c8ecba2dba..c02cea963581 100644 --- a/tiledb/sm/query/query.h +++ b/tiledb/sm/query/query.h @@ -54,6 +54,7 @@ #include "tiledb/sm/query/readers/aggregators/query_channel.h" #include "tiledb/sm/query/update_value.h" #include "tiledb/sm/query/validity_vector.h" +#include "tiledb/sm/rest/rest_client.h" #include "tiledb/sm/storage_manager/storage_manager_declaration.h" #include "tiledb/sm/subarray/subarray.h" @@ -62,6 +63,14 @@ using namespace tiledb::common; namespace tiledb { namespace sm { +/** Class for query status exceptions. */ +class QueryStatusException : public StatusException { + public: + explicit QueryStatusException(const std::string& msg) + : StatusException("Query", msg) { + } +}; + class Array; class ArrayDimensionLabelQueries; @@ -655,6 +664,14 @@ class Query { /** Returns the internal stats object. */ stats::Stats* stats() const; + /** + * Populate the owned stats instance with data. + * To be removed when the class will get a C41 constructor. + * + * @param data Data to populate the stats with. + */ + void set_stats(const stats::StatsData& data); + /** Returns the scratch space used for REST requests. */ shared_ptr rest_scratch() const; @@ -732,6 +749,21 @@ class Query { /** Returns true if the output field is an aggregate. */ bool is_aggregate(std::string output_field_name) const; + private: + /** + * Create the aggregate channel object. This is split out because it's not + * at construction time, but on demand, and in two different situations. + */ + void create_aggregate_channel() { + /* + * Because we have an extremely simple way of choosing channel identifiers, + * we can get away with hard-coding `1` here as the identifier for the + * aggregate channel. + */ + aggregate_channel_ = make_shared(HERE(), *this, 1); + } + + public: /** * Adds an aggregator to the default channel. * @@ -740,6 +772,15 @@ class Query { */ void add_aggregator_to_default_channel( std::string output_field_name, shared_ptr aggregator) { + if (default_channel_aggregates_.empty()) { + /* + * Assert: this is the first aggregate added. + * + * We create the aggregate channel on demand, and this is when we need to + * do it. + */ + create_aggregate_channel(); + } default_channel_aggregates_.emplace(output_field_name, aggregator); } @@ -750,20 +791,25 @@ class Query { /** * Get a list of all channels and their aggregates */ - std::vector get_channels() { + std::vector get_channels() { // Currently only the default channel is supported - return {QueryChannel(true, default_channel_aggregates_)}; + return { + LegacyQueryAggregatesOverDefault(true, default_channel_aggregates_)}; } /** - * Add a channel to the query + * Add a channel to the query. Used only by capnp serialization to initialize + * the aggregates list. */ - void add_channel(const QueryChannel& channel) { + void add_channel(const LegacyQueryAggregatesOverDefault& channel) { if (channel.is_default()) { default_channel_aggregates_ = channel.aggregates(); + if (!default_channel_aggregates_.empty()) { + create_aggregate_channel(); + } return; } - throw std::logic_error( + throw QueryStatusException( "We currently only support a default channel for queries"); } @@ -772,14 +818,51 @@ class Query { */ bool has_aggregates() { // We only need to check the default channel for now - return default_channel_aggregates_.empty(); + return !default_channel_aggregates_.empty(); + } + + /** + * Returns the number of channels. + * + * Responsibility for choosing channel identifiers is the responsibility of + * this class. At the present time the policy is very simple, since all + * queries only draw from a single array. + * - Channel 0: All rows from the query. Always non-segmented, that is, + * without any grouping. + * - Channel 1: (optional) Simple aggregates, if any exist. + */ + inline size_t number_of_channels() { + return has_aggregates() ? 1 : 0; + }; + + /** + * The default channel is initialized at construction and always exists. + */ + inline std::shared_ptr default_channel() { + return default_channel_; } + inline std::shared_ptr aggegate_channel() { + if (!has_aggregates()) { + throw QueryStatusException("Aggregate channel does not exist"); + } + return aggregate_channel_; + } + + /** + * Returns the REST client configured in the storage manager associated to + * this query + */ + RestClient* rest_client() const; + private: /* ********************************* */ /* PRIVATE ATTRIBUTES */ /* ********************************* */ + /** The query memory tracker. */ + shared_ptr query_memory_tracker_; + /** A smart pointer to the array the query is associated with. * Ensures that the Array object exists as long as the Query object exists. */ shared_ptr array_shared_; @@ -957,6 +1040,25 @@ class Query { std::unordered_map> default_channel_aggregates_; + /* + * Handles to channels use shared pointers, so the channels are allocated here + * for ease of implementation. + * + * At present there's only one possible non-default channel, so we keep track + * of it in its own variable. A fully C.41 class might simply store these in + * a constant vector. + */ + /** + * The default channel is allocated in the constructor for simplicity. + */ + std::shared_ptr default_channel_; + + /** + * The aggegregate channel is optional, so we initialize it as empty with it + * default constructor. + */ + std::shared_ptr aggregate_channel_{}; + /* ********************************* */ /* PRIVATE METHODS */ /* ********************************* */ diff --git a/tiledb/sm/query/query_condition.cc b/tiledb/sm/query/query_condition.cc index 242babd69049..5c2c8854a869 100644 --- a/tiledb/sm/query/query_condition.cc +++ b/tiledb/sm/query/query_condition.cc @@ -126,6 +126,12 @@ Status QueryCondition::init( return Status_QueryConditionError("Cannot reinitialize query condition"); } + if (op == QueryConditionOp::ALWAYS_TRUE || + op == QueryConditionOp::ALWAYS_FALSE) { + auto op_str = query_condition_op_str(op); + throw std::runtime_error("Invalid use of internal operation: " + op_str); + } + // AST Construction. tree_ = tdb_unique_ptr(tdb_new( ASTNodeVal, field_name, condition_value, condition_value_size, op)); @@ -482,6 +488,22 @@ struct QueryCondition::BinaryCmpNullChecks { } }; +/** Partial template specialization for `QueryConditionOp::LT`. */ +template +struct QueryCondition::BinaryCmpNullChecks { + static inline bool cmp(const void* lhs, uint64_t, const void*, uint64_t) { + return lhs != nullptr; + } +}; + +/** Partial template specialization for `QueryConditionOp::LT`. */ +template +struct QueryCondition::BinaryCmpNullChecks { + static inline bool cmp(const void*, uint64_t, const void*, uint64_t) { + return false; + } +}; + /** Partial template specialization for `QueryConditionOp::LT`. */ template struct QueryCondition::BinaryCmpNullChecks { @@ -748,6 +770,30 @@ void QueryCondition::apply_ast_node( CombinationOp combination_op, std::vector& result_cell_bitmap) const { switch (node->get_op()) { + case QueryConditionOp::ALWAYS_TRUE: + apply_ast_node( + node, + fragment_metadata, + stride, + var_size, + nullable, + fill_value, + result_cell_slabs, + combination_op, + result_cell_bitmap); + break; + case QueryConditionOp::ALWAYS_FALSE: + apply_ast_node( + node, + fragment_metadata, + stride, + var_size, + nullable, + fill_value, + result_cell_slabs, + combination_op, + result_cell_bitmap); + break; case QueryConditionOp::LT: apply_ast_node( node, @@ -1440,6 +1486,34 @@ void QueryCondition::apply_ast_node_dense( const void* cell_slab_coords, span result_buffer) const { switch (node->get_op()) { + case QueryConditionOp::ALWAYS_TRUE: + apply_ast_node_dense( + node, + array_schema, + result_tile, + start, + src_cell, + stride, + var_size, + nullable, + combination_op, + cell_slab_coords, + result_buffer); + break; + case QueryConditionOp::ALWAYS_FALSE: + apply_ast_node_dense( + node, + array_schema, + result_tile, + start, + src_cell, + stride, + var_size, + nullable, + combination_op, + cell_slab_coords, + result_buffer); + break; case QueryConditionOp::LT: apply_ast_node_dense( node, @@ -2205,6 +2279,22 @@ struct QueryCondition::BinaryCmp { } }; +/** Partial template specialization for `QueryConditionOp::LT`. */ +template +struct QueryCondition::BinaryCmp { + static inline bool cmp(const void*, uint64_t, const void*, uint64_t) { + return true; + } +}; + +/** Partial template specialization for `QueryConditionOp::LT`. */ +template +struct QueryCondition::BinaryCmp { + static inline bool cmp(const void*, uint64_t, const void*, uint64_t) { + return false; + } +}; + /** Partial template specialization for `QueryConditionOp::LT`. */ template struct QueryCondition::BinaryCmp { @@ -2295,7 +2385,7 @@ void QueryCondition::apply_ast_node_sparse( ResultTile& result_tile, const bool var_size, CombinationOp combination_op, - std::vector& result_bitmap) const { + tdb::pmr::vector& result_bitmap) const { const auto tile_tuple = result_tile.tile_tuple(node->get_field_name()); const void* condition_value_content = node->get_value_ptr(); const size_t condition_value_size = node->get_value_size(); @@ -2383,8 +2473,24 @@ void QueryCondition::apply_ast_node_sparse( ResultTile& result_tile, const bool var_size, CombinationOp combination_op, - std::vector& result_bitmap) const { + tdb::pmr::vector& result_bitmap) const { switch (node->get_op()) { + case QueryConditionOp::ALWAYS_TRUE: + apply_ast_node_sparse< + T, + QueryConditionOp::ALWAYS_TRUE, + BitmapType, + CombinationOp, + nullable>(node, result_tile, var_size, combination_op, result_bitmap); + break; + case QueryConditionOp::ALWAYS_FALSE: + apply_ast_node_sparse< + T, + QueryConditionOp::ALWAYS_FALSE, + BitmapType, + CombinationOp, + nullable>(node, result_tile, var_size, combination_op, result_bitmap); + break; case QueryConditionOp::LT: apply_ast_node_sparse< T, @@ -2463,7 +2569,7 @@ void QueryCondition::apply_ast_node_sparse( const bool var_size, const bool nullable, CombinationOp combination_op, - std::vector& result_bitmap) const { + tdb::pmr::vector& result_bitmap) const { if (nullable) { apply_ast_node_sparse( node, result_tile, var_size, combination_op, result_bitmap); @@ -2479,7 +2585,7 @@ void QueryCondition::apply_ast_node_sparse( const ArraySchema& array_schema, ResultTile& result_tile, CombinationOp combination_op, - std::vector& result_bitmap) const { + tdb::pmr::vector& result_bitmap) const { std::string node_field_name = node->get_field_name(); if (!array_schema.is_field(node_field_name)) { std::fill(result_bitmap.begin(), result_bitmap.end(), 0); @@ -2638,7 +2744,7 @@ void QueryCondition::apply_tree_sparse( const ArraySchema& array_schema, ResultTile& result_tile, CombinationOp combination_op, - std::vector& result_bitmap) const { + tdb::pmr::vector& result_bitmap) const { if (!node->is_expr()) { apply_ast_node_sparse( node, array_schema, result_tile, combination_op, result_bitmap); @@ -2677,7 +2783,9 @@ void QueryCondition::apply_tree_sparse( // Handle the cl'(q, a) case. // This cases on whether the combination op = OR. } else if constexpr (std::is_same_v>) { - std::vector combination_op_bitmap(result_bitmap_size, 1); + auto resource = result_bitmap.get_allocator().resource(); + tdb::pmr::vector combination_op_bitmap( + result_bitmap_size, 1, resource); for (const auto& child : node->get_children()) { apply_tree_sparse( @@ -2699,7 +2807,9 @@ void QueryCondition::apply_tree_sparse( * = a /\ (cl1'(q; cl2'(q; 0))) */ case QueryConditionCombinationOp::OR: { - std::vector combination_op_bitmap(result_bitmap_size, 0); + auto resource = result_bitmap.get_allocator().resource(); + tdb::pmr::vector combination_op_bitmap( + result_bitmap_size, 0, resource); for (const auto& child : node->get_children()) { apply_tree_sparse( @@ -2730,7 +2840,7 @@ template Status QueryCondition::apply_sparse( const ArraySchema& array_schema, ResultTile& result_tile, - std::vector& result_bitmap) { + tdb::pmr::vector& result_bitmap) { apply_tree_sparse( tree_, array_schema, @@ -2759,7 +2869,7 @@ uint64_t QueryCondition::condition_index() const { // Explicit template instantiations. template Status QueryCondition::apply_sparse( - const ArraySchema& array_schema, ResultTile&, std::vector&); + const ArraySchema& array_schema, ResultTile&, tdb::pmr::vector&); template Status QueryCondition::apply_sparse( - const ArraySchema& array_schema, ResultTile&, std::vector&); + const ArraySchema& array_schema, ResultTile&, tdb::pmr::vector&); } // namespace tiledb::sm diff --git a/tiledb/sm/query/query_condition.h b/tiledb/sm/query/query_condition.h index 327e48d551fd..33115e16f320 100644 --- a/tiledb/sm/query/query_condition.h +++ b/tiledb/sm/query/query_condition.h @@ -240,7 +240,7 @@ class QueryCondition { Status apply_sparse( const ArraySchema& array_schema, ResultTile& result_tile, - std::vector& result_bitmap); + tdb::pmr::vector& result_bitmap); /** * Reverse the query condition using De Morgan's law. @@ -561,7 +561,7 @@ class QueryCondition { ResultTile& result_tile, const bool var_size, CombinationOp combination_op, - std::vector& result_bitmap) const; + tdb::pmr::vector& result_bitmap) const; /** * Applies a value node on a sparse result tile, @@ -584,7 +584,7 @@ class QueryCondition { ResultTile& result_tile, const bool var_size, CombinationOp combination_op, - std::vector& result_bitmap) const; + tdb::pmr::vector& result_bitmap) const; /** * Applies a value node on a sparse result tile. @@ -603,7 +603,7 @@ class QueryCondition { const bool var_size, const bool nullable, CombinationOp combination_op, - std::vector& result_bitmap) const; + tdb::pmr::vector& result_bitmap) const; /** * Applies a value node to filter result cells from the input @@ -622,7 +622,7 @@ class QueryCondition { const ArraySchema& array_schema, ResultTile& result_tile, CombinationOp combination_op, - std::vector& result_bitmap) const; + tdb::pmr::vector& result_bitmap) const; /** * Applies the query condition represented with the AST to a set of cells. @@ -642,7 +642,7 @@ class QueryCondition { const ArraySchema& array_schema, ResultTile& result_tile, CombinationOp combination_op, - std::vector& result_bitmap) const; + tdb::pmr::vector& result_bitmap) const; }; } // namespace sm diff --git a/tiledb/sm/query/readers/aggregators/aggregate_with_count.h b/tiledb/sm/query/readers/aggregators/aggregate_with_count.h index 70ed41adccd5..cf7234fbc754 100644 --- a/tiledb/sm/query/readers/aggregators/aggregate_with_count.h +++ b/tiledb/sm/query/readers/aggregators/aggregate_with_count.h @@ -93,7 +93,17 @@ class AggregateWithCount { // nullable. The bitmap tells us which cells was already filtered out by // ranges or query conditions. if (input_data.has_bitmap()) { - if (field_info_.is_nullable_) { + if (field_info_.is_dense_dim_) { + // Process for dense dimension values with bitmap. + for (uint64_t c = 0; c < input_data.size(); c++) { + auto bitmap_val = input_data.bitmap_at(c); + auto value = dense_dim_value_at(input_data, c); + for (BITMAP_T i = 0; i < bitmap_val; i++) { + agg_policy.op(value, res, count); + count++; + } + } + } else if (field_info_.is_nullable_) { // Process for nullable values with bitmap. for (uint64_t c = 0; c < input_data.size(); c++) { auto bitmap_val = input_data.bitmap_at(c); @@ -117,7 +127,14 @@ class AggregateWithCount { } } } else { - if (field_info_.is_nullable_) { + if (field_info_.is_dense_dim_) { + // Process for dense dimension values with no bitmap. + for (uint64_t c = 0; c < input_data.size(); c++) { + auto value = dense_dim_value_at(input_data, c); + agg_policy.op(value, res, count); + count++; + } + } else if (field_info_.is_nullable_) { // Process for nullable values with no bitmap. for (uint64_t c = 0; c < input_data.size(); c++) { if (val_policy.op(input_data.validity_at(c))) { @@ -166,6 +183,24 @@ class AggregateWithCount { return AGG_T(); } + + /** + * Returns the dense dimension value at the specified cell if needed. + * + * @param input_data Input data. + * @param c Cell index. + * @return Value. + */ + inline AGG_T dense_dim_value_at(AggregateBuffer& input_data, uint64_t c) { + typedef typename type_data::value_type VALUE_T; + if constexpr ( + !std::is_same::value && + !std::is_same::value) { + return input_data.value_at(0) + c * field_info_.is_slab_dim_; + } + + return AGG_T(); + } }; } // namespace tiledb::sm diff --git a/tiledb/sm/query/readers/aggregators/field_info.h b/tiledb/sm/query/readers/aggregators/field_info.h index 0a6d6071736c..df415c10659e 100644 --- a/tiledb/sm/query/readers/aggregators/field_info.h +++ b/tiledb/sm/query/readers/aggregators/field_info.h @@ -50,6 +50,8 @@ class FieldInfo { FieldInfo() : var_sized_(false) , is_nullable_(false) + , is_dense_dim_(false) + , is_slab_dim_(false) , cell_val_num_(1) , type_(Datatype::UINT8){}; @@ -71,6 +73,35 @@ class FieldInfo { : name_(name) , var_sized_(var_sized) , is_nullable_(is_nullable) + , is_dense_dim_(false) + , is_slab_dim_(false) + , cell_val_num_(cell_val_num) + , type_(type){}; + + /** + * Constructor. + * + * @param name Name of the field. + * @param var_sized Is the field var sized? + * @param is_nullable Is the field nullable? + * @param is_dense_dim Is the field nullable? + * @param is_slab_dim Is the dense dimension the slab dimension? + * @param cell_val_num Cell val num. + * @param type Data type of the field + */ + FieldInfo( + const std::string name, + const bool var_sized, + const bool is_nullable, + const bool is_dense_dim, + const bool is_slab_dim, + const unsigned cell_val_num, + const Datatype type) + : name_(name) + , var_sized_(var_sized) + , is_nullable_(is_nullable) + , is_dense_dim_(is_dense_dim) + , is_slab_dim_(is_slab_dim) , cell_val_num_(cell_val_num) , type_(type){}; @@ -87,6 +118,12 @@ class FieldInfo { /** Is the field nullable? */ const bool is_nullable_; + /** Is the field a dense dimension? */ + const bool is_dense_dim_; + + /** Is the dense dimension the cell slab dimension? */ + const bool is_slab_dim_; + /** Cell val num. */ const unsigned cell_val_num_; diff --git a/tiledb/sm/query/readers/aggregators/query_channel.h b/tiledb/sm/query/readers/aggregators/query_channel.h index 62d87c15395f..e619d75adc82 100644 --- a/tiledb/sm/query/readers/aggregators/query_channel.h +++ b/tiledb/sm/query/readers/aggregators/query_channel.h @@ -39,7 +39,11 @@ namespace tiledb::sm { -class QueryChannel { +/** + * Original class is only used for capnp (de)serialization. `class Query` uses + * its own container to hold aggregates. + */ +class LegacyQueryAggregatesOverDefault { public: using ChannelAggregates = std::unordered_map>; @@ -54,7 +58,8 @@ class QueryChannel { * @param is_default If true, this is the default query channel * @param aggregates A map of aggregators by output field name */ - QueryChannel(bool is_default, const ChannelAggregates& aggregates) + LegacyQueryAggregatesOverDefault( + bool is_default, const ChannelAggregates& aggregates) : default_(is_default) , aggregates_{aggregates} { } @@ -85,6 +90,58 @@ class QueryChannel { ChannelAggregates aggregates_; }; +/* forward declaration */ +class Query; + +/** + * Replacement for the current QueryChannel, which does not work for more than + * the initial case with a default channel and only simple aggregates. + * + * Responsibility for choosing channel identifiers is the responsibility of + * `class Query`; this class merely carries the resulting identifier. + */ +class QueryChannel { + std::reference_wrapper query_; + size_t id_; + + public: + /** + * Default constructor is deleted. A channel makes no sense without a query. + */ + QueryChannel() = delete; + /* + * Ordinary constructor. + */ + QueryChannel(Query& q, size_t id) + : query_(q) + , id_(id) { + (void)id_; + } + /** + * Copy constructor is the default. + */ + QueryChannel(const QueryChannel&) = default; + /** + * Move constructor is the default. + */ + QueryChannel(QueryChannel&&) = default; + /** + * Copy assignment is the default. + */ + QueryChannel& operator=(const QueryChannel&) = default; + /** + * Move assignment is the default. + */ + QueryChannel& operator=(QueryChannel&&) = default; + + /** + * Accessor for query member + */ + inline Query& query() { + return query_; + } +}; + } // namespace tiledb::sm #endif // TILEDB_QUERY_CHANNEL_H diff --git a/tiledb/sm/query/readers/attribute_order_validator.h b/tiledb/sm/query/readers/attribute_order_validator.h index a0c6c4672c85..77fb6b1e5aab 100644 --- a/tiledb/sm/query/readers/attribute_order_validator.h +++ b/tiledb/sm/query/readers/attribute_order_validator.h @@ -81,8 +81,12 @@ class AttributeOrderValidator { * @param attribute_name Name of the attribute to validate. * @param num_frags Number of fragments. */ - AttributeOrderValidator(const std::string& attribute_name, uint64_t num_frags) - : attribute_name_(attribute_name) + AttributeOrderValidator( + const std::string& attribute_name, + uint64_t num_frags, + shared_ptr memory_tracker) + : memory_tracker_(memory_tracker) + , attribute_name_(attribute_name) , result_tiles_to_load_(num_frags) , per_fragment_validation_data_(num_frags) { } @@ -526,6 +530,9 @@ class AttributeOrderValidator { /* PRIVATE ATTRIBUTES */ /* ********************************* */ + /** The memory tracker to use. */ + shared_ptr memory_tracker_; + /** Name of the attribute to validate. */ std::string attribute_name_; @@ -562,7 +569,10 @@ class AttributeOrderValidator { std::piecewise_construct, std::forward_as_tuple(t_to_compare), std::forward_as_tuple( - f_to_compare, t_to_compare, *fragment_metadata.get())); + f_to_compare, + t_to_compare, + *fragment_metadata.get(), + memory_tracker_)); } if (is_lower_bound) { @@ -579,8 +589,12 @@ class AttributeOrderValidator { * @return Tile to compare against. */ inline ResultTile* min_tile_to_compare_against(unsigned f) { - return &result_tiles_to_load_[f][per_fragment_validation_data_[f] - .min_tile_to_compare_to_.value()]; + auto idx = per_fragment_validation_data_[f].min_tile_to_compare_to_.value(); + auto iter = result_tiles_to_load_[f].find(idx); + if (iter == result_tiles_to_load_[f].end()) { + throw std::runtime_error("Invalid minimum tile index."); + } + return &(iter->second); } /** @@ -590,8 +604,12 @@ class AttributeOrderValidator { * @return Tile to compare against. */ inline ResultTile* max_tile_to_compare_against(unsigned f) { - return &result_tiles_to_load_[f][per_fragment_validation_data_[f] - .max_tile_to_compare_to_.value()]; + auto idx = per_fragment_validation_data_[f].max_tile_to_compare_to_.value(); + auto iter = result_tiles_to_load_[f].find(idx); + if (iter == result_tiles_to_load_[f].end()) { + throw std::runtime_error("Invalid maximum tile index."); + } + return &(iter->second); } }; diff --git a/tiledb/sm/query/readers/dense_reader.cc b/tiledb/sm/query/readers/dense_reader.cc index 501cdd5b4ad3..60a3d5a18ba4 100644 --- a/tiledb/sm/query/readers/dense_reader.cc +++ b/tiledb/sm/query/readers/dense_reader.cc @@ -5,7 +5,7 @@ * * The MIT License * - * @copyright Copyright (c) 2017-2022 TileDB, Inc. + * @copyright Copyright (c) 2017-2024 TileDB, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -30,8 +30,9 @@ * This file implements class DenseReader. */ +#include "tiledb/sm/query/readers/dense_reader.h" #include "tiledb/common/logger.h" - +#include "tiledb/common/memory_tracker.h" #include "tiledb/sm/array/array.h" #include "tiledb/sm/array_schema/array_schema.h" #include "tiledb/sm/array_schema/dimension.h" @@ -40,7 +41,6 @@ #include "tiledb/sm/misc/utils.h" #include "tiledb/sm/query/legacy/cell_slab_iter.h" #include "tiledb/sm/query/query_macros.h" -#include "tiledb/sm/query/readers/dense_reader.h" #include "tiledb/sm/query/readers/filtered_data.h" #include "tiledb/sm/query/readers/result_tile.h" #include "tiledb/sm/stats/global_stats.h" @@ -54,8 +54,7 @@ using namespace tiledb; using namespace tiledb::common; using namespace tiledb::sm::stats; -namespace tiledb { -namespace sm { +namespace tiledb::sm { class DenseReaderStatusException : public StatusException { public: @@ -73,8 +72,7 @@ DenseReader::DenseReader( shared_ptr logger, StrategyParams& params, bool remote_query) - : ReaderBase(stats, logger->clone("DenseReader", ++logger_id_), params) - , array_memory_tracker_(params.memory_tracker()) { + : ReaderBase(stats, logger->clone("DenseReader", ++logger_id_), params) { elements_mode_ = false; // Sanity checks. @@ -420,27 +418,28 @@ Status DenseReader::dense_read() { // clear the memory. Also, a name in names might not be in the user buffers // so we might skip the copy but still clear the memory. for (auto& name : names) { - if (name == constants::coords || array_schema_.is_dim(name)) { - continue; - } - - // Get the tiles to load for this attribute. - auto result_tiles = result_tiles_to_load( - name, - qc_loaded_attr_names_set_, - subarray, - t_start, - t_end, - result_space_tiles, - tile_subarrays); - - std::vector filtered_data; - - // Read and unfilter tiles. + shared_ptr> filtered_data; + std::vector result_tiles; bool validity_only = null_count_aggregate_only(name); - std::vector to_load; - to_load.emplace_back(name, validity_only); - filtered_data = std::move(read_attribute_tiles(to_load, result_tiles)); + bool dense_dim = name == constants::coords || array_schema_.is_dim(name); + + if (!dense_dim) { + // Get the tiles to load for this attribute. + result_tiles = result_tiles_to_load( + name, + qc_loaded_attr_names_set_, + subarray, + t_start, + t_end, + result_space_tiles, + tile_subarrays); + + // Read and unfilter tiles. + std::vector to_load; + to_load.emplace_back(name, validity_only); + filtered_data = make_shared>( + read_attribute_tiles(to_load, result_tiles)); + } if (compute_task.valid()) { RETURN_NOT_OK(storage_manager_->compute_tp()->wait(compute_task)); @@ -449,40 +448,43 @@ Status DenseReader::dense_read() { } } - compute_task = storage_manager_->compute_tp()->execute( - [&, - filtered_data = std::move(filtered_data), - name, - validity_only, - t_start, - t_end, - subarray_start_cell, - subarray_end_cell, - num_range_threads, - result_tiles]() { - // Unfilter tiles. - RETURN_NOT_OK(unfilter_tiles(name, validity_only, result_tiles)); - - // Only copy names that are present in the user buffers. - if (buffers_.count(name) != 0) { - // Copy attribute data to users buffers. - auto& var_buffer_size = var_buffer_sizes[name]; - status = copy_attribute( - name, - tile_extents, - subarray, - t_start, - t_end, - subarray_start_cell, - subarray_end_cell, - tile_subarrays, - tile_offsets, - var_buffer_size, - range_info, - result_space_tiles, - qc_result, - num_range_threads); - RETURN_CANCEL_OR_ERROR(status); + compute_task = + storage_manager_->compute_tp()->execute([&, + filtered_data, + dense_dim, + name, + validity_only, + t_start, + t_end, + subarray_start_cell, + subarray_end_cell, + num_range_threads, + result_tiles]() { + if (!dense_dim) { + // Unfilter tiles. + RETURN_NOT_OK(unfilter_tiles(name, validity_only, result_tiles)); + + // Only copy names that are present in the user buffers. + if (buffers_.count(name) != 0) { + // Copy attribute data to users buffers. + auto& var_buffer_size = var_buffer_sizes[name]; + status = copy_attribute( + name, + tile_extents, + subarray, + t_start, + t_end, + subarray_start_cell, + subarray_end_cell, + tile_subarrays, + tile_offsets, + var_buffer_size, + range_info, + result_space_tiles, + qc_result, + num_range_threads); + RETURN_CANCEL_OR_ERROR(status); + } } if (aggregates_.count(name) != 0) { @@ -501,7 +503,9 @@ Status DenseReader::dense_read() { RETURN_CANCEL_OR_ERROR(status); } - clear_tiles(name, result_tiles); + if (!dense_dim) { + clear_tiles(name, result_tiles); + } return Status::Ok(); }); @@ -509,9 +513,12 @@ Status DenseReader::dense_read() { // Process count aggregates. if (aggregates_.count(constants::count_of_rows) != 0) { - auto buff{make_aggregate_buffer( + DimType unused = 0; + auto buff{make_aggregate_buffer( false, false, + false, + unused, 0, subarray_start_cell, subarray_end_cell, @@ -912,139 +919,138 @@ Status DenseReader::apply_query_condition( tile_subarrays); // Read and unfilter query condition attributes. - std::vector filtered_data = read_attribute_tiles( - NameToLoad::from_string_vec(qc_names), result_tiles); + shared_ptr> filtered_data = + make_shared>(read_attribute_tiles( + NameToLoad::from_string_vec(qc_names), result_tiles)); if (compute_task.valid()) { RETURN_NOT_OK(storage_manager_->compute_tp()->wait(compute_task)); } - compute_task = storage_manager_->compute_tp()->execute( - [&, - filtered_data = std::move(filtered_data), - qc_names, - t_start, - t_end, - num_range_threads, - result_tiles]() { - // For easy reference. - const auto& tile_coords = subarray.tile_coords(); - const auto dim_num = array_schema_.dim_num(); - auto stride = array_schema_.domain().stride(layout_); - const auto cell_order = array_schema_.cell_order(); - const auto global_order = layout_ == Layout::GLOBAL_ORDER; - - // Unfilter tiles. - for (auto& name : qc_names) { - RETURN_NOT_OK(unfilter_tiles(name, false, result_tiles)); - } + compute_task = storage_manager_->compute_tp()->execute([&, + filtered_data, + qc_names, + t_start, + t_end, + num_range_threads, + result_tiles]() { + // For easy reference. + const auto& tile_coords = subarray.tile_coords(); + const auto dim_num = array_schema_.dim_num(); + auto stride = array_schema_.domain().stride(layout_); + const auto cell_order = array_schema_.cell_order(); + const auto global_order = layout_ == Layout::GLOBAL_ORDER; + + // Unfilter tiles. + for (auto& name : qc_names) { + RETURN_NOT_OK(unfilter_tiles(name, false, result_tiles)); + } - if (stride == UINT64_MAX) { - stride = 1; - } + if (stride == UINT64_MAX) { + stride = 1; + } - // Process all tiles in parallel. - auto status = parallel_for_2d( - storage_manager_->compute_tp(), - t_start, - t_end, - 0, - num_range_threads, - [&](uint64_t t, uint64_t range_thread_idx) { - // Find out result space tile and tile subarray. - const DimType* tc = (DimType*)&tile_coords[t][0]; - auto& result_space_tile = result_space_tiles.at(tc); - - // Iterate over all coordinates, retrieved in cell slab. - const auto& frag_domains = result_space_tile.frag_domains(); - TileCellSlabIter iter( - range_thread_idx, - num_range_threads, - subarray, - tile_subarrays[t], - tile_extents, - result_space_tile.start_coords(), - range_info, - cell_order); - - // Compute cell offset and destination pointer. - uint64_t cell_offset = - global_order ? tile_offsets[t] + iter.global_offset() : 0; - auto dest_ptr = qc_result.data() + cell_offset; - - while (!iter.end()) { - // Compute destination pointer for row/col major orders. - if (!global_order) { - cell_offset = iter.dest_offset_row_col(); - dest_ptr = qc_result.data() + cell_offset; - } + // Process all tiles in parallel. + auto status = parallel_for_2d( + storage_manager_->compute_tp(), + t_start, + t_end, + 0, + num_range_threads, + [&](uint64_t t, uint64_t range_thread_idx) { + // Find out result space tile and tile subarray. + const DimType* tc = (DimType*)&tile_coords[t][0]; + auto& result_space_tile = result_space_tiles.at(tc); - for (int32_t i = - static_cast(frag_domains.size()) - 1; - i >= 0; - --i) { - // If the cell slab overlaps this fragment domain range, - // apply clause. - auto&& [overlaps, start, end] = cell_slab_overlaps_range( - dim_num, - frag_domains[i].domain(), - iter.cell_slab_coords(), - iter.cell_slab_length()); - if (overlaps) { - // Re-initialize the bitmap to 1 in case of overlapping - // domains. - if (i != static_cast(frag_domains.size()) - 1) { - for (uint64_t c = start; c <= end; c++) { - dest_ptr[c] = 1; - } - } - - RETURN_NOT_OK(condition_->apply_dense( - *(fragment_metadata_[frag_domains[i].fid()] - ->array_schema() - .get()), - result_space_tile.result_tile(frag_domains[i].fid()), - start, - end - start + 1, - iter.pos_in_tile(), - stride, - iter.cell_slab_coords().data(), - dest_ptr)); - - // If any cell doesn't match the query condition, signal - // it in the space tile. - for (uint64_t c = start; c <= end; c++) { - if (dest_ptr[c] == 0) { - result_space_tile.set_qc_filtered_results(); - break; - } - } + // Iterate over all coordinates, retrieved in cell slab. + const auto& frag_domains = result_space_tile.frag_domains(); + TileCellSlabIter iter( + range_thread_idx, + num_range_threads, + subarray, + tile_subarrays[t], + tile_extents, + result_space_tile.start_coords(), + range_info, + cell_order); + + // Compute cell offset and destination pointer. + uint64_t cell_offset = + global_order ? tile_offsets[t] + iter.global_offset() : 0; + auto dest_ptr = qc_result.data() + cell_offset; + + while (!iter.end()) { + // Compute destination pointer for row/col major orders. + if (!global_order) { + cell_offset = iter.dest_offset_row_col(); + dest_ptr = qc_result.data() + cell_offset; + } + + for (int32_t i = static_cast(frag_domains.size()) - 1; + i >= 0; + --i) { + // If the cell slab overlaps this fragment domain range, + // apply clause. + auto&& [overlaps, start, end] = cell_slab_overlaps_range( + dim_num, + frag_domains[i].domain(), + iter.cell_slab_coords(), + iter.cell_slab_length()); + if (overlaps) { + // Re-initialize the bitmap to 1 in case of overlapping + // domains. + if (i != static_cast(frag_domains.size()) - 1) { + for (uint64_t c = start; c <= end; c++) { + dest_ptr[c] = 1; } } - // Adjust the destination pointers for global order. - if (global_order) { - dest_ptr += iter.cell_slab_length(); + RETURN_NOT_OK(condition_->apply_dense( + *(fragment_metadata_[frag_domains[i].fid()] + ->array_schema() + .get()), + result_space_tile.result_tile(frag_domains[i].fid()), + start, + end - start + 1, + iter.pos_in_tile(), + stride, + iter.cell_slab_coords().data(), + dest_ptr)); + + // If any cell doesn't match the query condition, signal + // it in the space tile. + for (uint64_t c = start; c <= end; c++) { + if (dest_ptr[c] == 0) { + result_space_tile.set_qc_filtered_results(); + break; + } } - - ++iter; } + } - return Status::Ok(); - }); - RETURN_NOT_OK(status); + // Adjust the destination pointers for global order. + if (global_order) { + dest_ptr += iter.cell_slab_length(); + } - // For `qc_coords_mode` just fill in the coordinates and skip - // attribute - // processing. - if (qc_coords_mode_) { - for (auto& name : qc_names) { - clear_tiles(name, result_tiles); + ++iter; } - } - return Status::Ok(); - }); + return Status::Ok(); + }); + RETURN_NOT_OK(status); + + // For `qc_coords_mode` just fill in the coordinates and skip + // attribute + // processing. + if (qc_coords_mode_) { + for (auto& name : qc_names) { + clear_tiles(name, result_tiles); + } + } + + return Status::Ok(); + }); } return Status::Ok(); @@ -1265,9 +1271,12 @@ Status DenseReader::copy_attribute( return Status::Ok(); } +template AggregateBuffer DenseReader::make_aggregate_buffer( const bool var_sized, const bool nullable, + const bool is_dim, + DimType& dim_val, const uint64_t cell_size, const uint64_t min_cell, const uint64_t max_cell, @@ -1287,6 +1296,8 @@ AggregateBuffer DenseReader::make_aggregate_buffer( std::make_optional( tile_tuple->validity_tile().data_as() + min_cell) : nullopt; + } else if (is_dim) { + fixed_data = &dim_val; } return AggregateBuffer( @@ -1864,14 +1875,28 @@ Status DenseReader::aggregate_tiles( const auto cell_order = array_schema_.cell_order(); auto stride = array_schema_.domain().stride(layout_); const auto& frag_domains = result_space_tile.frag_domains(); + const auto is_dim = array_schema_.is_dim(name); const auto attribute = array_schema_.attribute(name); const auto var_size = array_schema_.var_size(name); - const auto nullable = attribute->nullable(); + const auto nullable = !is_dim && attribute->nullable(); const auto cell_size = var_size ? constants::cell_var_offset_size : array_schema_.cell_size(name); auto& aggregates = aggregates_[name]; const bool validity_only = null_count_aggregate_only(name); + // Get the dimension index. + unsigned dim_idx = 0; + if (is_dim) { + dim_idx = array_schema_.domain().get_dimension_index(name); + } + + const bool is_slab_dim = is_dim && (cell_order == sm::Layout::ROW_MAJOR) ? + (dim_idx == dim_num - 1) : + (dim_idx == 0); + const bool is_col_dim = is_dim && (cell_order == sm::Layout::ROW_MAJOR) ? + (dim_idx == 0) : + (dim_idx == dim_num - 1); + // Cache tile tuples. std::vector tile_tuples(frag_domains.size()); for (uint32_t fd = 0; fd < frag_domains.size(); ++fd) { @@ -1915,7 +1940,7 @@ Status DenseReader::aggregate_tiles( // If the cell slab overlaps this fragment domain range, copy data. bool overlaps = false; uint64_t start = 0, end = 0; - if (tile_tuples[fd] != nullptr) { + if (is_dim || tile_tuples[fd] != nullptr) { auto&& [o, s, e] = cell_slab_overlaps_range( dim_num, frag_domains[fd].domain(), @@ -1928,11 +1953,18 @@ Status DenseReader::aggregate_tiles( if (overlaps) { // If the subarray and tile are in the same order, aggregate the // whole slab. + DimType dim_val = 0; if (stride == 1) { + if (is_dim) { + dim_val = iter.cell_slab_coords()[dim_idx] + is_slab_dim * start; + } + // Compute aggregate. - AggregateBuffer aggregate_buffer{make_aggregate_buffer( + AggregateBuffer aggregate_buffer{make_aggregate_buffer( var_size & !validity_only, nullable, + is_dim, + dim_val, cell_size, iter.pos_in_tile() + start, iter.pos_in_tile() + end + 1, @@ -1944,11 +1976,18 @@ Status DenseReader::aggregate_tiles( } else { // Go cell by cell. for (uint64_t i = 0; i < end - start + 1; ++i) { + if (is_dim) { + dim_val = + iter.cell_slab_coords()[dim_idx] + is_col_dim * (i + start); + } + // Compute aggregate. auto start_cell = iter.pos_in_tile() + (start + i) * stride; - AggregateBuffer aggregate_buffer{make_aggregate_buffer( + AggregateBuffer aggregate_buffer{make_aggregate_buffer( var_size & !validity_only, nullable, + is_dim, + dim_val, cell_size, start_cell, start_cell + 1, @@ -2257,5 +2296,4 @@ void DenseReader::fill_dense_coords_col_slab( } } -} // namespace sm -} // namespace tiledb +} // namespace tiledb::sm diff --git a/tiledb/sm/query/readers/dense_reader.h b/tiledb/sm/query/readers/dense_reader.h index 16c997299acf..dcbf60f3fa63 100644 --- a/tiledb/sm/query/readers/dense_reader.h +++ b/tiledb/sm/query/readers/dense_reader.h @@ -162,9 +162,6 @@ class DenseReader : public ReaderBase, public IQueryStrategy { /** Target upper memory limit for tiles. */ uint64_t tile_upper_memory_limit_; - /** Memory tracker object for the array. */ - MemoryTracker* array_memory_tracker_; - /* ********************************* */ /* PRIVATE METHODS */ /* ********************************* */ @@ -260,9 +257,12 @@ class DenseReader : public ReaderBase, public IQueryStrategy { const uint64_t num_range_threads); /** Make an aggregate buffer. */ + template AggregateBuffer make_aggregate_buffer( const bool var_sized, const bool nullable, + const bool is_dim, + DimType& dim_val, const uint64_t cell_size, const uint64_t min_cell, const uint64_t max_cell, @@ -283,6 +283,10 @@ class DenseReader : public ReaderBase, public IQueryStrategy { const std::string& name, ResultSpaceTile& rst, const Subarray& tile_subarray) const { + if (array_schema_.is_dim(name)) { + return false; + } + // Make sure there are no filtered results by the query condition and that // there are only one fragment domain for this tile. Having more fragment // domains for a tile means we'll have to merge data for many sources so we diff --git a/tiledb/sm/query/readers/filtered_data.h b/tiledb/sm/query/readers/filtered_data.h index abb2f8884ac5..4b66a38dd3a2 100644 --- a/tiledb/sm/query/readers/filtered_data.h +++ b/tiledb/sm/query/readers/filtered_data.h @@ -5,7 +5,7 @@ * * The MIT License * - * @copyright Copyright (c) 2023 TileDB, Inc. + * @copyright Copyright (c) 2023-2024 TileDB, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -34,21 +34,18 @@ #define TILEDB_FILTERED_DATA_H #include "tiledb/common/common.h" +#include "tiledb/common/memory_tracker.h" #include "tiledb/common/status.h" #include "tiledb/sm/storage_manager/storage_manager.h" using namespace tiledb::common; -namespace tiledb { -namespace sm { +namespace tiledb::sm { /** * A filtered data block containing filtered data for multiple tiles. The block * will contain a number of contiguous on-disk tiles and the data is identified * by the fragment index and offset/size of the data in the on-disk file. - * - * This uses a vector for storage which will be replaced by datablocks when - * ready. */ class FilteredDataBlock { public: @@ -63,13 +60,26 @@ class FilteredDataBlock { * coming from. * @param offset File offset of the on-disk data for this datablock. * @param size Size of the on-disk data for this data block. + * @param resource The memory resource. */ - FilteredDataBlock(unsigned frag_idx, uint64_t offset, uint64_t size) - : frag_idx_(frag_idx) + FilteredDataBlock( + unsigned frag_idx, + uint64_t offset, + uint64_t size, + tdb::pmr::memory_resource* resource) + : resource_(resource) + , frag_idx_(frag_idx) , offset_(offset) - , filtered_data_(size) { + , size_(size) + , filtered_data_(tdb::pmr::make_unique(resource_, size)) { + if (!filtered_data_) { + throw std::bad_alloc(); + } } + DISABLE_COPY_AND_COPY_ASSIGN(FilteredDataBlock); + DISABLE_MOVE_AND_MOVE_ASSIGN(FilteredDataBlock); + /* ********************************* */ /* API */ /* ********************************* */ @@ -85,21 +95,20 @@ class FilteredDataBlock { } /** - * @return Pointer to the data at a particular offset in the filtered data - * file. + * @return Pointer to the data at the given offset in the filtered data file. */ inline void* data_at(storage_size_t offset) { - return filtered_data_.data() + offset - offset_; + return filtered_data_.get() + offset - offset_; } /** @return Pointer to the data inside of the filtered data block. */ inline void* data() { - return filtered_data_.data(); + return filtered_data_.get(); } /** @return Size of the data block. */ inline storage_size_t size() const { - return filtered_data_.size(); + return size_; } /** @@ -109,13 +118,15 @@ class FilteredDataBlock { inline bool contains( unsigned frag_idx, storage_size_t offset, storage_size_t size) const { return frag_idx == frag_idx_ && offset >= offset_ && - offset + size <= offset_ + filtered_data_.size(); + offset + size <= offset_ + size_; } private: /* ********************************* */ /* PRIVATE ATTRIBUTES */ /* ********************************* */ + /** The memory resource to use. */ + tdb::pmr::memory_resource* resource_; /** Fragment index for the data this data block contains. */ unsigned frag_idx_; @@ -123,8 +134,11 @@ class FilteredDataBlock { /** File offset of the on-disk data for this datablock. */ storage_size_t offset_; + /** The size of the data. */ + storage_size_t size_; + /** Data for the data block. */ - std::vector filtered_data_; + tdb::pmr::unique_ptr filtered_data_; }; /** @@ -159,6 +173,7 @@ class FilteredData { * @param validity_only Is the field read for validity only? * @param storage_manager Storage manager. * @param read_tasks Read tasks to queue new tasks on for new data blocks. + * @param memory_tracker Memory tracker. */ FilteredData( const ReaderBase& reader, @@ -172,8 +187,16 @@ class FilteredData { const bool nullable, const bool validity_only, StorageManager* storage_manager, - std::vector& read_tasks) - : name_(name) + std::vector& read_tasks, + shared_ptr memory_tracker) + : memory_tracker_(memory_tracker) + , fixed_data_blocks_( + memory_tracker_->get_resource(MemoryType::FILTERED_DATA)) + , var_data_blocks_( + memory_tracker_->get_resource(MemoryType::FILTERED_DATA)) + , nullable_data_blocks_( + memory_tracker_->get_resource(MemoryType::FILTERED_DATA)) + , name_(name) , fragment_metadata_(fragment_metadata) , var_sized_(var_sized) , nullable_(nullable) @@ -184,6 +207,8 @@ class FilteredData { } uint64_t tiles_allocated = 0; + auto* block_resource = + memory_tracker_->get_resource(MemoryType::FILTERED_DATA_BLOCK); // Store data on the datablock in progress for fixed, var and nullable data. std::optional current_frag_idx{nullopt}; @@ -251,19 +276,28 @@ class FilteredData { // Finish by pushing the last in progress blocks. if (current_fixed_size != 0) { fixed_data_blocks_.emplace_back( - *current_frag_idx, current_fixed_offset, current_fixed_size); + *current_frag_idx, + current_fixed_offset, + current_fixed_size, + block_resource); queue_last_block_for_read(TileType::FIXED); } if (current_var_size != 0) { var_data_blocks_.emplace_back( - *current_frag_idx, current_var_offset, current_var_size); + *current_frag_idx, + current_var_offset, + current_var_size, + block_resource); queue_last_block_for_read(TileType::VAR); } if (current_nullable_size != 0) { nullable_data_blocks_.emplace_back( - *current_frag_idx, current_nullable_offset, current_nullable_size); + *current_frag_idx, + current_nullable_offset, + current_nullable_size, + block_resource); queue_last_block_for_read(TileType::NULLABLE); } @@ -274,6 +308,9 @@ class FilteredData { current_nullable_data_block_ = nullable_data_blocks_.begin(); } + DISABLE_COPY_AND_COPY_ASSIGN(FilteredData); + DISABLE_MOVE_AND_MOVE_ASSIGN(FilteredData); + /** Destructor. */ ~FilteredData() = default; @@ -364,7 +401,7 @@ class FilteredData { } /** @return Data blocks corresponding to the tile type. */ - inline std::vector& data_blocks(const TileType type) { + inline tdb::pmr::list& data_blocks(const TileType type) { switch (type) { case TileType::FIXED: return fixed_data_blocks_; @@ -378,7 +415,7 @@ class FilteredData { } /** @return Current data block corresponding to the tile type. */ - inline std::vector::iterator& current_data_block( + inline tdb::pmr::list::iterator& current_data_block( const TileType type) { switch (type) { case TileType::FIXED: @@ -516,7 +553,10 @@ class FilteredData { } else { // Push the old batch and start a new one. data_blocks(type).emplace_back( - *current_block_frag_idx, current_block_offset, current_block_size); + *current_block_frag_idx, + current_block_offset, + current_block_size, + memory_tracker_->get_resource(MemoryType::FILTERED_DATA_BLOCK)); queue_last_block_for_read(type); current_block_offset = offset; current_block_size = size; @@ -554,23 +594,26 @@ class FilteredData { /* PRIVATE ATTRIBUTES */ /* ********************************* */ + /** Memory tracker for the filtered data. */ + shared_ptr memory_tracker_; + /** Fixed data blocks. */ - std::vector fixed_data_blocks_; + tdb::pmr::list fixed_data_blocks_; /** Current fixed data block used when creating fixed tiles. */ - std::vector::iterator current_fixed_data_block_; + tdb::pmr::list::iterator current_fixed_data_block_; /** Var data blocks. */ - std::vector var_data_blocks_; + tdb::pmr::list var_data_blocks_; /** Current var data block used when creating var tiles. */ - std::vector::iterator current_var_data_block_; + tdb::pmr::list::iterator current_var_data_block_; /** Nullable data blocks. */ - std::vector nullable_data_blocks_; + tdb::pmr::list nullable_data_blocks_; /** Current nullable data block used when creating nullable tiles. */ - std::vector::iterator current_nullable_data_block_; + tdb::pmr::list::iterator current_nullable_data_block_; /** Name of the attribute. */ const std::string& name_; @@ -591,7 +634,6 @@ class FilteredData { std::vector& read_tasks_; }; -} // namespace sm -} // namespace tiledb +} // namespace tiledb::sm #endif // TILEDB_FILTERED_DATA_H diff --git a/tiledb/sm/query/readers/ordered_dim_label_reader.cc b/tiledb/sm/query/readers/ordered_dim_label_reader.cc index 0c618d9f17cf..33d80255b61a 100644 --- a/tiledb/sm/query/readers/ordered_dim_label_reader.cc +++ b/tiledb/sm/query/readers/ordered_dim_label_reader.cc @@ -522,7 +522,10 @@ uint64_t OrderedDimLabelReader::create_result_tiles() { std::piecewise_construct, std::forward_as_tuple(tile_idx), std::forward_as_tuple( - f, frag_tile_idx, *fragment_metadata_[f].get())); + f, + frag_tile_idx, + *fragment_metadata_[f].get(), + query_memory_tracker_)); } else { if (r == 0) { throw OrderedDimLabelReaderStatusException( diff --git a/tiledb/sm/query/readers/reader_base.cc b/tiledb/sm/query/readers/reader_base.cc index 7f6eb29c5e80..d5948d81170d 100644 --- a/tiledb/sm/query/readers/reader_base.cc +++ b/tiledb/sm/query/readers/reader_base.cc @@ -5,7 +5,7 @@ * * The MIT License * - * @copyright Copyright (c) 2017-2022 TileDB, Inc. + * @copyright Copyright (c) 2017-2024 TileDB, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -55,8 +55,7 @@ #include "tiledb/sm/subarray/subarray.h" #include "tiledb/type/apply_with_type.h" -namespace tiledb { -namespace sm { +namespace tiledb::sm { using dimension_size_type = uint32_t; @@ -74,6 +73,7 @@ class ReaderBaseStatusException : public StatusException { ReaderBase::ReaderBase( stats::Stats* stats, shared_ptr logger, StrategyParams& params) : StrategyBase(stats, logger, params) + , memory_tracker_(params.query_memory_tracker()) , condition_(params.condition()) , user_requested_timestamps_(false) , use_timestamps_(false) @@ -110,7 +110,8 @@ void ReaderBase::compute_result_space_tiles( const std::vector>& tile_coords, const TileDomain& array_tile_domain, const std::vector>& frag_tile_domains, - std::map>& result_space_tiles) { + std::map>& result_space_tiles, + shared_ptr memory_tracker) { auto fragment_num = (unsigned)frag_tile_domains.size(); auto dim_num = array_tile_domain.dim_num(); std::vector start_coords; @@ -123,7 +124,8 @@ void ReaderBase::compute_result_space_tiles( start_coords = array_tile_domain.start_coords(coords); // Create result space tile and insert into the map - auto r = result_space_tiles.emplace(coords, ResultSpaceTile()); + auto r = + result_space_tiles.emplace(coords, ResultSpaceTile(memory_tracker)); auto& result_space_tile = r.first->second; result_space_tile.set_start_coords(start_coords); @@ -152,9 +154,8 @@ void ReaderBase::compute_result_space_tiles( auto frag_idx = frag_tile_domains[f].id(); result_space_tile.append_frag_domain(frag_idx, frag_domain); auto tile_idx = frag_tile_domains[f].tile_pos(coords); - ResultTile result_tile( + result_space_tile.set_result_tile( frag_idx, tile_idx, *fragment_metadata[frag_idx].get()); - result_space_tile.set_result_tile(frag_idx, result_tile); } } } @@ -585,25 +586,25 @@ Status ReaderBase::read_and_unfilter_coordinate_tiles( return Status::Ok(); } -std::vector ReaderBase::read_attribute_tiles( +std::list ReaderBase::read_attribute_tiles( const std::vector& names, const std::vector& result_tiles) const { auto timer_se = stats_->start_timer("read_attribute_tiles"); return read_tiles(names, result_tiles); } -std::vector ReaderBase::read_coordinate_tiles( +std::list ReaderBase::read_coordinate_tiles( const std::vector& names, const std::vector& result_tiles) const { auto timer_se = stats_->start_timer("read_coordinate_tiles"); return read_tiles(NameToLoad::from_string_vec(names), result_tiles); } -std::vector ReaderBase::read_tiles( +std::list ReaderBase::read_tiles( const std::vector& names, const std::vector& result_tiles) const { auto timer_se = stats_->start_timer("read_tiles"); - std::vector filtered_data; + std::list filtered_data; // Shortcut for empty tile vec. if (result_tiles.empty() || names.empty()) { @@ -612,7 +613,6 @@ std::vector ReaderBase::read_tiles( uint64_t num_tiles_read{0}; std::vector read_tasks; - filtered_data.reserve(names.size()); // Run all attributes independently. for (auto n : names) { @@ -636,7 +636,8 @@ std::vector ReaderBase::read_tiles( nullable, val_only, storage_manager_, - read_tasks); + read_tasks, + memory_tracker_); // Go through each tiles and create the attribute tiles. for (auto tile : result_tiles) { @@ -1103,7 +1104,8 @@ void ReaderBase::compute_result_space_tiles( tile_coords, array_tile_domain, frag_tile_domains, - result_space_tiles); + result_space_tiles, + query_memory_tracker_); } bool ReaderBase::has_coords() const { @@ -1174,7 +1176,8 @@ void ReaderBase::validate_attribute_order( auto index_name = index_dim->name(); // See if some values will already be processed by previous fragments. - AttributeOrderValidator validator(attribute_name, fragment_metadata_.size()); + AttributeOrderValidator validator( + attribute_name, fragment_metadata_.size(), query_memory_tracker_); throw_if_not_ok(parallel_for( storage_manager_->compute_tp(), 0, @@ -1365,5 +1368,4 @@ template void ReaderBase::validate_attribute_order( std::vector&, std::vector&); -} // namespace sm -} // namespace tiledb +} // namespace tiledb::sm diff --git a/tiledb/sm/query/readers/reader_base.h b/tiledb/sm/query/readers/reader_base.h index ca69ada01249..d9a859d13e81 100644 --- a/tiledb/sm/query/readers/reader_base.h +++ b/tiledb/sm/query/readers/reader_base.h @@ -5,7 +5,7 @@ * * The MIT License * - * @copyright Copyright (c) 2017-2021 TileDB, Inc. + * @copyright Copyright (c) 2017-2024 TileDB, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -35,6 +35,7 @@ #include "../strategy_base.h" #include "tiledb/common/common.h" +#include "tiledb/common/memory_tracker.h" #include "tiledb/common/status.h" #include "tiledb/sm/array_schema/dimension.h" #include "tiledb/sm/array_schema/tile_domain.h" @@ -49,8 +50,7 @@ #include "tiledb/sm/storage_manager/storage_manager_declaration.h" #include "tiledb/sm/subarray/subarray_partitioner.h" -namespace tiledb { -namespace sm { +namespace tiledb::sm { class Array; class ArraySchema; @@ -200,7 +200,8 @@ class ReaderBase : public StrategyBase { const std::vector>& tile_coords, const TileDomain& array_tile_domain, const std::vector>& frag_tile_domains, - std::map>& result_space_tiles); + std::map>& result_space_tiles, + shared_ptr memory_tracker); /** * Computes the minimum and maximum indexes of tile chunks to process based on @@ -255,6 +256,9 @@ class ReaderBase : public StrategyBase { /* PROTECTED ATTRIBUTES */ /* ********************************* */ + /** The query's memory tracker. */ + shared_ptr memory_tracker_; + /** The query condition. */ std::optional& condition_; @@ -560,7 +564,7 @@ class ReaderBase : public StrategyBase { * `ResultTile` instances in this vector. * @return Filtered data blocks. */ - std::vector read_attribute_tiles( + std::list read_attribute_tiles( const std::vector& names, const std::vector& result_tiles) const; @@ -576,7 +580,7 @@ class ReaderBase : public StrategyBase { * `ResultTile` instances in this vector. * @return Filtered data blocks. */ - std::vector read_coordinate_tiles( + std::list read_coordinate_tiles( const std::vector& names, const std::vector& result_tiles) const; @@ -593,7 +597,7 @@ class ReaderBase : public StrategyBase { * @param validity_only Is the field read for validity only. * @return Filtered data blocks. */ - std::vector read_tiles( + std::list read_tiles( const std::vector& names, const std::vector& result_tiles) const; @@ -798,7 +802,6 @@ class ReaderBase : public StrategyBase { std::vector& frag_first_array_tile_idx); }; -} // namespace sm -} // namespace tiledb +} // namespace tiledb::sm #endif // TILEDB_READER_BASE_H diff --git a/tiledb/sm/query/readers/result_space_tile.h b/tiledb/sm/query/readers/result_space_tile.h index 8f15945d4ecc..2758bb9da10a 100644 --- a/tiledb/sm/query/readers/result_space_tile.h +++ b/tiledb/sm/query/readers/result_space_tile.h @@ -47,6 +47,8 @@ using namespace tiledb::common; namespace tiledb { namespace sm { +class MemoryTracker; + /** Fragment domain structure (fragment id, fragment domain). */ struct FragmentDomain { public: @@ -81,8 +83,12 @@ struct FragmentDomain { template class ResultSpaceTile { public: - /** Default constructor. */ - ResultSpaceTile() = default; + /** No default constructor. */ + ResultSpaceTile() = delete; + + ResultSpaceTile(shared_ptr memory_tracker) + : memory_tracker_(memory_tracker) { + } /** Default destructor. */ ~ResultSpaceTile() = default; @@ -126,9 +132,13 @@ class ResultSpaceTile { } /** Sets the input result tile for the given fragment. */ - void set_result_tile(unsigned frag_idx, ResultTile& result_tile) { + void set_result_tile( + unsigned frag_idx, uint64_t tile_idx, FragmentMetadata& frag_md) { assert(result_tiles_.count(frag_idx) == 0); - result_tiles_[frag_idx] = std::move(result_tile); + result_tiles_.emplace( + std::piecewise_construct, + std::forward_as_tuple(frag_idx), + std::forward_as_tuple(frag_idx, tile_idx, frag_md, memory_tracker_)); } /** Returns the result tile for the input fragment. */ @@ -175,10 +185,19 @@ class ResultSpaceTile { "fragment domain."); } - return result_tiles_[frag_domains_[0].fid()]; + auto iter = result_tiles_.find(frag_domains_[0].fid()); + if (iter == result_tiles_.end()) { + throw std::runtime_error( + "Invalid call to single_result_tile with unknown tile."); + } + + return iter->second; } private: + /** The memory tracker to use. */ + shared_ptr memory_tracker_; + /** The (global) coordinates of the first cell in the space tile. */ std::vector start_coords_; diff --git a/tiledb/sm/query/readers/result_tile.cc b/tiledb/sm/query/readers/result_tile.cc index 7848d8a9da85..2416c77309b6 100644 --- a/tiledb/sm/query/readers/result_tile.cc +++ b/tiledb/sm/query/readers/result_tile.cc @@ -68,14 +68,18 @@ bool result_tile_cmp(const ResultTile* a, const ResultTile* b) { /* ****************************** */ ResultTile::ResultTile( - unsigned frag_idx, uint64_t tile_idx, const FragmentMetadata& frag_md) - : domain_(&frag_md.array_schema()->domain()) + unsigned frag_idx, + uint64_t tile_idx, + const FragmentMetadata& frag_md, + shared_ptr memory_tracker) + : memory_tracker_(memory_tracker) + , domain_(&frag_md.array_schema()->domain()) , frag_idx_(frag_idx) , tile_idx_(tile_idx) - , cell_num_(frag_md.cell_num(tile_idx)) { + , cell_num_(frag_md.cell_num(tile_idx)) + , attr_tiles_(frag_md.array_schema()->attribute_num()) + , coord_tiles_(domain_->dim_num()) { auto array_schema = frag_md.array_schema(); - coord_tiles_.resize(domain_->dim_num()); - attr_tiles_.resize(array_schema->attribute_num()); for (uint64_t i = 0; i < array_schema->attribute_num(); i++) { auto attribute = array_schema->attribute(i); attr_tiles_[i] = std::make_pair(attribute->name(), nullopt); @@ -87,41 +91,6 @@ ResultTile::ResultTile( coord_func_ = &ResultTile::zipped_coord; } -/** Move constructor. */ -ResultTile::ResultTile(ResultTile&& other) { - // Swap with the argument - swap(other); -} - -/** Move-assign operator. */ -ResultTile& ResultTile::operator=(ResultTile&& other) { - // Swap with the argument - swap(other); - - return *this; -} - -void ResultTile::swap(ResultTile& tile) { - std::swap(domain_, tile.domain_); - std::swap(frag_idx_, tile.frag_idx_); - std::swap(tile_idx_, tile.tile_idx_); - std::swap(cell_num_, tile.cell_num_); - std::swap(attr_tiles_, tile.attr_tiles_); - std::swap(timestamps_tile_, tile.timestamps_tile_); - std::swap(delete_timestamps_tile_, tile.delete_timestamps_tile_); - std::swap(coords_tile_, tile.coords_tile_); - std::swap(coord_tiles_, tile.coord_tiles_); - std::swap(compute_results_dense_func_, tile.compute_results_dense_func_); - std::swap(coord_func_, tile.coord_func_); - std::swap(compute_results_sparse_func_, tile.compute_results_sparse_func_); - std::swap( - compute_results_count_sparse_uint64_t_func_, - tile.compute_results_count_sparse_uint64_t_func_); - std::swap( - compute_results_count_sparse_uint8_t_func_, - tile.compute_results_count_sparse_uint8_t_func_); -} - /* ****************************** */ /* API */ /* ****************************** */ @@ -174,33 +143,68 @@ void ResultTile::init_attr_tile( const std::string& name, const TileSizes tile_sizes, const TileData tile_data) { - auto tuple = - TileTuple(format_version, array_schema, name, tile_sizes, tile_data); + auto tuple = TileTuple( + format_version, + array_schema, + name, + tile_sizes, + tile_data, + memory_tracker_); if (name == constants::coords) { - coords_tile_ = std::move(tuple); + coords_tile_.emplace( + format_version, + array_schema, + name, + tile_sizes, + tile_data, + memory_tracker_); return; } if (name == constants::timestamps) { - timestamps_tile_ = std::move(tuple); + timestamps_tile_.emplace( + format_version, + array_schema, + name, + tile_sizes, + tile_data, + memory_tracker_); return; } if (name == constants::delete_timestamps) { - delete_timestamps_tile_ = std::move(tuple); + delete_timestamps_tile_.emplace( + format_version, + array_schema, + name, + tile_sizes, + tile_data, + memory_tracker_); return; } if (name == constants::delete_condition_index) { - delete_condition_index_tile_ = std::move(tuple); + delete_condition_index_tile_.emplace( + format_version, + array_schema, + name, + tile_sizes, + tile_data, + memory_tracker_); return; } // Handle attributes for (auto& at : attr_tiles_) { if (at.first == name && at.second == nullopt) { - at.second = std::move(tuple); + at.second.emplace( + format_version, + array_schema, + name, + tile_sizes, + tile_data, + memory_tracker_); return; } } @@ -213,9 +217,14 @@ void ResultTile::init_coord_tile( const TileSizes tile_sizes, const TileData tile_data, unsigned dim_idx) { - coord_tiles_[dim_idx] = std::pair( + coord_tiles_[dim_idx].first = name; + coord_tiles_[dim_idx].second.emplace( + format_version, + array_schema, name, - TileTuple(format_version, array_schema, name, tile_sizes, tile_data)); + tile_sizes, + tile_data, + memory_tracker_); // When at least one unzipped coordinate has been initialized, we will // use the unzipped `coord()` implementation. @@ -599,7 +608,7 @@ void ResultTile::compute_results_sparse( const ResultTile* result_tile, unsigned dim_idx, const Range& range, - std::vector* result_bitmap, + tdb::pmr::vector* result_bitmap, const Layout& cell_order) { auto coords_num = result_tile->cell_num(); auto dim_num = result_tile->domain()->dim_num(); @@ -756,7 +765,7 @@ void ResultTile::compute_results_sparse( const ResultTile* result_tile, unsigned dim_idx, const Range& range, - std::vector* result_bitmap, + tdb::pmr::vector* result_bitmap, const Layout&) { // For easy reference. auto coords_num = result_tile->cell_num(); @@ -800,7 +809,7 @@ void ResultTile::compute_results_count_sparse_string_range( const offsets_t* buff_off, const uint64_t start, const uint64_t end, - std::vector& result_count) { + tdb::pmr::vector& result_count) { const bool non_overlapping = std::is_same::value; // Process all cells. @@ -865,8 +874,8 @@ void ResultTile::compute_results_count_sparse_string( const ResultTile* result_tile, unsigned dim_idx, const NDRange& ranges, - const std::vector& range_indexes, - std::vector& result_count, + const tdb::pmr::vector& range_indexes, + tdb::pmr::vector& result_count, const Layout& cell_order, const uint64_t min_cell, const uint64_t max_cell) { @@ -1041,8 +1050,8 @@ void ResultTile::compute_results_count_sparse( const ResultTile* result_tile, unsigned dim_idx, const NDRange& ranges, - const std::vector& range_indexes, - std::vector& result_count, + const tdb::pmr::vector& range_indexes, + tdb::pmr::vector& result_count, const Layout&, const uint64_t min_cell, const uint64_t max_cell) { @@ -1149,7 +1158,7 @@ Status ResultTile::compute_results_dense( Status ResultTile::compute_results_sparse( unsigned dim_idx, const Range& range, - std::vector* result_bitmap, + tdb::pmr::vector* result_bitmap, const Layout& cell_order) const { assert(compute_results_sparse_func_[dim_idx] != nullptr); compute_results_sparse_func_[dim_idx]( @@ -1161,8 +1170,8 @@ template <> Status ResultTile::compute_results_count_sparse( unsigned dim_idx, const NDRange& ranges, - const std::vector& range_indexes, - std::vector& result_count, + const tdb::pmr::vector& range_indexes, + tdb::pmr::vector& result_count, const Layout& cell_order, const uint64_t min_cell, const uint64_t max_cell) const { @@ -1183,8 +1192,8 @@ template <> Status ResultTile::compute_results_count_sparse( unsigned dim_idx, const NDRange& ranges, - const std::vector& range_indexes, - std::vector& result_count, + const tdb::pmr::vector& range_indexes, + tdb::pmr::vector& result_count, const Layout& cell_order, const uint64_t min_cell, const uint64_t max_cell) const { diff --git a/tiledb/sm/query/readers/result_tile.h b/tiledb/sm/query/readers/result_tile.h index 192fcc3332e2..189631211192 100644 --- a/tiledb/sm/query/readers/result_tile.h +++ b/tiledb/sm/query/readers/result_tile.h @@ -266,46 +266,43 @@ class ResultTile { const ArraySchema& array_schema, const std::string& name, const TileSizes tile_sizes, - const TileData tile_data) - : fixed_tile_( - tile_sizes.has_var_tile() ? - Tile( - format_version, - constants::cell_var_offset_type, - constants::cell_var_offset_size, - 0, - tile_sizes.tile_size(), - tile_data.fixed_filtered_data(), - tile_sizes.tile_persisted_size()) : - Tile( - format_version, - array_schema.type(name), - array_schema.cell_size(name), - (name == constants::coords) ? array_schema.dim_num() : 0, - tile_sizes.tile_size(), - tile_data.fixed_filtered_data(), - tile_sizes.tile_persisted_size())) { + const TileData tile_data, + shared_ptr memory_tracker) + : memory_tracker_(memory_tracker) + , fixed_tile_( + format_version, + tile_sizes.has_var_tile() ? constants::cell_var_offset_type : + array_schema.type(name), + tile_sizes.has_var_tile() ? constants::cell_var_offset_size : + array_schema.cell_size(name), + (name == constants::coords) ? array_schema.dim_num() : 0, + tile_sizes.tile_size(), + tile_data.fixed_filtered_data(), + tile_sizes.tile_persisted_size(), + memory_tracker_) { if (tile_sizes.has_var_tile()) { auto type = array_schema.type(name); - var_tile_ = Tile( + var_tile_.emplace( format_version, type, datatype_size(type), 0, tile_sizes.tile_var_size(), tile_data.var_filtered_data(), - tile_sizes.tile_var_persisted_size()); + tile_sizes.tile_var_persisted_size(), + memory_tracker_); } if (tile_sizes.has_validity_tile()) { - validity_tile_ = Tile( + validity_tile_.emplace( format_version, constants::cell_validity_type, constants::cell_validity_size, 0, tile_sizes.tile_validity_size(), tile_data.validity_filtered_data(), - tile_sizes.tile_validity_persisted_size()); + tile_sizes.tile_validity_persisted_size(), + memory_tracker_); } } @@ -348,6 +345,9 @@ class ResultTile { /* PRIVATE ATTRIBUTES */ /* ********************************* */ + /** The memory tracker. */ + shared_ptr memory_tracker_; + /** Stores the fixed data tile. */ Tile fixed_tile_; @@ -362,30 +362,32 @@ class ResultTile { /* CONSTRUCTORS & DESTRUCTORS */ /* ********************************* */ - /** Default constructor. */ - ResultTile() = default; + /** No Default constructor. */ + ResultTile() = delete; + + /** + * Constructor. + * + * @param memory_tracker The memory tracker to use. + */ + ResultTile(shared_ptr memory_tracker); /** * Constructor. The number of dimensions `dim_num` is used to allocate * the separate coordinate tiles. */ ResultTile( - unsigned frag_idx, uint64_t tile_idx, const FragmentMetadata& frag_md); + unsigned frag_idx, + uint64_t tile_idx, + const FragmentMetadata& frag_md, + shared_ptr memory_tracker); DISABLE_COPY_AND_COPY_ASSIGN(ResultTile); + DISABLE_MOVE_AND_MOVE_ASSIGN(ResultTile); /** Default destructor. */ ~ResultTile() = default; - /** Move constructor. */ - ResultTile(ResultTile&& tile); - - /** Move-assign operator. */ - ResultTile& operator=(ResultTile&& tile); - - /** Swaps the contents (all field values) of this tile with the given tile. */ - void swap(ResultTile& tile); - /* ********************************* */ /* API */ /* ********************************* */ @@ -547,7 +549,7 @@ class ResultTile { const ResultTile* result_tile, unsigned dim_idx, const Range& range, - std::vector* result_bitmap, + tdb::pmr::vector* result_bitmap, const Layout& cell_order); /** @@ -567,8 +569,8 @@ class ResultTile { const ResultTile* result_tile, unsigned dim_idx, const NDRange& ranges, - const std::vector& range_indexes, - std::vector& result_count, + const tdb::pmr::vector& range_indexes, + tdb::pmr::vector& result_count, const Layout& cell_order, const uint64_t min_cell, const uint64_t max_cell); @@ -592,7 +594,7 @@ class ResultTile { const uint64_t* buff_off, const uint64_t start, const uint64_t end, - std::vector& result_count); + tdb::pmr::vector& result_count); /** * Applicable only to sparse arrays. @@ -611,8 +613,8 @@ class ResultTile { const ResultTile* result_tile, unsigned dim_idx, const NDRange& ranges, - const std::vector& range_indexes, - std::vector& result_count, + const tdb::pmr::vector& range_indexes, + tdb::pmr::vector& result_count, const Layout& cell_order, const uint64_t min_cell, const uint64_t max_cell); @@ -645,7 +647,7 @@ class ResultTile { Status compute_results_sparse( unsigned dim_idx, const Range& range, - std::vector* result_bitmap, + tdb::pmr::vector* result_bitmap, const Layout& cell_order) const; /** @@ -664,8 +666,8 @@ class ResultTile { Status compute_results_count_sparse( unsigned dim_idx, const NDRange& ranges, - const std::vector& range_indexes, - std::vector& result_count, + const tdb::pmr::vector& range_indexes, + tdb::pmr::vector& result_count, const Layout& cell_order, const uint64_t min_cell, const uint64_t max_cell) const; @@ -675,6 +677,9 @@ class ResultTile { /* PROTECTED ATTRIBUTES */ /* ********************************* */ + /** The memory tracker. */ + shared_ptr memory_tracker_; + /** The array domain. */ const Domain* domain_; @@ -737,7 +742,7 @@ class ResultTile { const ResultTile*, unsigned, const Range&, - std::vector*, + tdb::pmr::vector*, const Layout&)>> compute_results_sparse_func_; @@ -749,8 +754,8 @@ class ResultTile { const ResultTile*, unsigned, const NDRange&, - const std::vector&, - std::vector&, + const tdb::pmr::vector&, + tdb::pmr::vector&, const Layout&, const uint64_t, const uint64_t)>> @@ -764,8 +769,8 @@ class ResultTile { const ResultTile*, unsigned, const NDRange&, - const std::vector&, - std::vector&, + const tdb::pmr::vector&, + tdb::pmr::vector&, const Layout&, const uint64_t, const uint64_t)>> @@ -814,30 +819,30 @@ class ResultTileWithBitmap : public ResultTile { /* ********************************* */ /* CONSTRUCTORS & DESTRUCTORS */ /* ********************************* */ - ResultTileWithBitmap() = default; - - ResultTileWithBitmap( - unsigned frag_idx, uint64_t tile_idx, const FragmentMetadata& frag_md) - : ResultTile(frag_idx, tile_idx, frag_md) - , result_num_(cell_num_) { - } + ResultTileWithBitmap() = delete; - /** Move constructor. */ - ResultTileWithBitmap(ResultTileWithBitmap&& other) noexcept { - // Swap with the argument - swap(other); + /** + * Constructor + * + * @param memory_tracker The memory tracker to use. + */ + ResultTileWithBitmap(shared_ptr memory_tracker) + : ResultTile(memory_tracker) + , bitmap_(memory_tracker_->get_resource(MemoryType::TILE_BITMAP)) { } - /** Move-assign operator. */ - ResultTileWithBitmap& operator=( - ResultTileWithBitmap&& other) { - // Swap with the argument - swap(other); - - return *this; + ResultTileWithBitmap( + unsigned frag_idx, + uint64_t tile_idx, + const FragmentMetadata& frag_md, + shared_ptr memory_tracker) + : ResultTile(frag_idx, tile_idx, frag_md, memory_tracker) + , bitmap_(memory_tracker_->get_resource(MemoryType::TILE_BITMAP)) + , result_num_(cell_num_) { } DISABLE_COPY_AND_COPY_ASSIGN(ResultTileWithBitmap); + DISABLE_MOVE_AND_MOVE_ASSIGN(ResultTileWithBitmap); public: /* ********************************* */ @@ -858,7 +863,7 @@ class ResultTileWithBitmap : public ResultTile { * * @param cell_idx Cell index. */ - inline std::vector& bitmap() { + inline tdb::pmr::vector& bitmap() { return bitmap_; } @@ -945,19 +950,12 @@ class ResultTileWithBitmap : public ResultTile { return false; } - /** Swaps the contents (all field values) of this tile with the given tile. */ - void swap(ResultTileWithBitmap& tile) { - ResultTile::swap(tile); - std::swap(bitmap_, tile.bitmap_); - std::swap(result_num_, tile.result_num_); - } - protected: /* ********************************* */ /* PROTECTED ATTRIBUTES */ /* ********************************* */ /** Bitmap for this tile. */ - std::vector bitmap_; + tdb::pmr::vector bitmap_; /** Number of cells in this bitmap. */ uint64_t result_num_; @@ -970,48 +968,38 @@ class GlobalOrderResultTile : public ResultTileWithBitmap { /* ********************************* */ /* CONSTRUCTORS & DESTRUCTORS */ /* ********************************* */ + + /** No default constructor. */ + GlobalOrderResultTile() = delete; + GlobalOrderResultTile( unsigned frag_idx, uint64_t tile_idx, bool dups, bool include_delete_meta, - const FragmentMetadata& frag_md) - : ResultTileWithBitmap(frag_idx, tile_idx, frag_md) - , post_dedup_bitmap_( - !dups || include_delete_meta ? optional(std::vector()) : - nullopt) + const FragmentMetadata& frag_md, + shared_ptr memory_tracker) + : ResultTileWithBitmap( + frag_idx, tile_idx, frag_md, memory_tracker) + , hilbert_values_(this->memory_tracker_->get_resource( + MemoryType::TILE_HILBERT_VALUES)) + , post_dedup_bitmap_(nullopt) + , per_cell_delete_condition_(this->memory_tracker_->get_resource( + MemoryType::TILE_QUERY_CONDITIONS)) , used_(false) { - } - - /** Move constructor. */ - GlobalOrderResultTile(GlobalOrderResultTile&& other) noexcept { - // Swap with the argument - swap(other); - } - - /** Move-assign operator. */ - GlobalOrderResultTile& operator=(GlobalOrderResultTile&& other) { - // Swap with the argument - swap(other); - - return *this; + if (!dups || include_delete_meta) { + post_dedup_bitmap_.emplace( + this->memory_tracker_->get_resource(MemoryType::TILE_BITMAP)); + } } DISABLE_COPY_AND_COPY_ASSIGN(GlobalOrderResultTile); + DISABLE_MOVE_AND_MOVE_ASSIGN(GlobalOrderResultTile); /* ********************************* */ /* PUBLIC METHODS */ /* ********************************* */ - /** Swaps the contents (all field values) of this tile with the given tile. */ - void swap(GlobalOrderResultTile& tile) { - ResultTileWithBitmap::swap(tile); - std::swap(used_, tile.used_); - std::swap(hilbert_values_, tile.hilbert_values_); - std::swap(post_dedup_bitmap_, tile.post_dedup_bitmap_); - std::swap(per_cell_delete_condition_, tile.per_cell_delete_condition_); - } - /** Returns if the tile was used by the merge or not. */ inline bool used() { return used_; @@ -1040,7 +1028,7 @@ class GlobalOrderResultTile : public ResultTileWithBitmap { void ensure_bitmap_for_query_condition() { if (post_dedup_bitmap_.has_value()) { if (ResultTileWithBitmap::has_bmp()) { - post_dedup_bitmap_ = ResultTileWithBitmap::bitmap_; + post_dedup_bitmap_->assign(this->bitmap_.begin(), this->bitmap_.end()); } else { post_dedup_bitmap_->resize(ResultTile::cell_num_, 1); } @@ -1056,7 +1044,7 @@ class GlobalOrderResultTile : public ResultTileWithBitmap { * Returns the bitmap that included query condition results. For this tile * type, this is 'post_dedup_bitmap_' if allocated, or the regular bitmap. */ - inline std::vector& post_dedup_bitmap() { + inline tdb::pmr::vector& post_dedup_bitmap() { return post_dedup_bitmap_.has_value() && post_dedup_bitmap_->size() > 0 ? post_dedup_bitmap_.value() : ResultTileWithBitmap::bitmap_; @@ -1164,7 +1152,7 @@ class GlobalOrderResultTile : public ResultTileWithBitmap { /* ********************************* */ /** Hilbert values for this tile. */ - std::vector hilbert_values_; + tdb::pmr::vector hilbert_values_; /** * An extra bitmap will be needed for array with no duplicates. For those, @@ -1172,13 +1160,13 @@ class GlobalOrderResultTile : public ResultTileWithBitmap { * will contain the results before query condition, and post_dedup_bitmap_ * will contain results after query condition. */ - optional> post_dedup_bitmap_; + optional> post_dedup_bitmap_; /** * Delete condition index that deleted a cell. Used for consolidation with * delete metadata. */ - std::vector per_cell_delete_condition_; + tdb::pmr::vector per_cell_delete_condition_; /** Was the tile used in the merge. */ bool used_; @@ -1190,36 +1178,25 @@ class UnorderedWithDupsResultTile : public ResultTileWithBitmap { /* ********************************* */ /* CONSTRUCTORS & DESTRUCTORS */ /* ********************************* */ - UnorderedWithDupsResultTile( - unsigned frag_idx, uint64_t tile_idx, const FragmentMetadata& frag_md) - : ResultTileWithBitmap(frag_idx, tile_idx, frag_md) { - } - - /** Move constructor. */ - UnorderedWithDupsResultTile(UnorderedWithDupsResultTile&& other) noexcept { - // Swap with the argument - swap(other); - } + /** No default memory tracker. */ + UnorderedWithDupsResultTile() = delete; - /** Move-assign operator. */ - UnorderedWithDupsResultTile& operator=(UnorderedWithDupsResultTile&& other) { - // Swap with the argument - swap(other); - - return *this; + UnorderedWithDupsResultTile( + unsigned frag_idx, + uint64_t tile_idx, + const FragmentMetadata& frag_md, + shared_ptr memory_tracker) + : ResultTileWithBitmap( + frag_idx, tile_idx, frag_md, memory_tracker) { } + DISABLE_MOVE_AND_MOVE_ASSIGN(UnorderedWithDupsResultTile); DISABLE_COPY_AND_COPY_ASSIGN(UnorderedWithDupsResultTile); /* ********************************* */ /* PUBLIC METHODS */ /* ********************************* */ - /** Swaps the contents (all field values) of this tile with the given tile. */ - void swap(UnorderedWithDupsResultTile& tile) { - ResultTileWithBitmap::swap(tile); - } - /** * Returns whether this tile has a post query condition bitmap. For this * tile type, this is stored in the regular bitmap. @@ -1243,7 +1220,7 @@ class UnorderedWithDupsResultTile : public ResultTileWithBitmap { * Returns the bitmap that included query condition results. For this tile * type, this is stored in the regular bitmap. */ - inline std::vector& post_dedup_bitmap() { + inline tdb::pmr::vector& post_dedup_bitmap() { return ResultTileWithBitmap::bitmap_; } diff --git a/tiledb/sm/query/readers/sparse_global_order_reader.cc b/tiledb/sm/query/readers/sparse_global_order_reader.cc index 8f12ac915714..58b4308c0c75 100644 --- a/tiledb/sm/query/readers/sparse_global_order_reader.cc +++ b/tiledb/sm/query/readers/sparse_global_order_reader.cc @@ -135,6 +135,8 @@ Status SparseGlobalOrderReader::dowork() { return Status::Ok(); } + subarray_.reset_default_ranges(); + // Load initial data, if not loaded already. throw_if_not_ok(load_initial_data()); purge_deletes_consolidation_ = !deletes_consolidation_no_purge_ && @@ -246,9 +248,9 @@ void SparseGlobalOrderReader::load_all_tile_offsets() { // Make sure we have enough space for tile offsets data. uint64_t total_tile_offset_usage = tile_offsets_size(subarray_.relevant_fragments()); - uint64_t available_memory = array_memory_tracker_->get_memory_available() - - array_memory_tracker_->get_memory_usage( - MemoryTracker::MemoryType::TILE_OFFSETS); + uint64_t available_memory = + array_memory_tracker_->get_memory_available() - + array_memory_tracker_->get_memory_usage(MemoryType::TILE_OFFSETS); if (total_tile_offset_usage > available_memory) { throw SparseGlobalOrderReaderStatusException( "Cannot load tile offsets, computed size (" + @@ -330,7 +332,8 @@ bool SparseGlobalOrderReader::add_result_tile( t, array_schema_.allows_dups(), deletes_consolidation_no_purge_, - frag_md); + frag_md, + query_memory_tracker_); return false; } diff --git a/tiledb/sm/query/readers/sparse_index_reader_base.cc b/tiledb/sm/query/readers/sparse_index_reader_base.cc index 3d2861973d71..49d0e7bcb369 100644 --- a/tiledb/sm/query/readers/sparse_index_reader_base.cc +++ b/tiledb/sm/query/readers/sparse_index_reader_base.cc @@ -74,7 +74,6 @@ SparseIndexReaderBase::SparseIndexReaderBase( , tmp_read_state_(array_->fragment_metadata().size()) , memory_budget_(config_, reader_string) , include_coords_(include_coords) - , array_memory_tracker_(params.memory_tracker()) , memory_used_for_coords_total_(0) , deletes_consolidation_no_purge_( buffers_.count(constants::delete_timestamps) != 0) @@ -389,6 +388,14 @@ Status SparseIndexReaderBase::load_initial_data() { memory_budget_.ratio_tile_ranges() * memory_budget_.total_budget()) return logger_->status( Status_ReaderError("Exceeded memory budget for result tile ranges")); + } else { + for (const auto& [name, _] : aggregates_) { + if (array_schema_.is_dim(name)) { + throw_if_not_ok(subarray_.load_relevant_fragment_rtrees( + storage_manager_->compute_tp())); + break; + } + } } // Compute tile offsets to load and var size to load for attributes. @@ -487,7 +494,13 @@ void SparseIndexReaderBase::load_tile_offsets_for_fragments( load_tile_offsets(relevant_fragments, attr_tile_offsets_to_load_); // Load tile metadata. - load_tile_metadata(relevant_fragments, attr_tile_offsets_to_load_); + auto md_names_to_load = attr_tile_offsets_to_load_; + for (const auto& [name, _] : aggregates_) { + if (array_schema_.is_dim(name)) { + md_names_to_load.emplace_back(name); + } + } + load_tile_metadata(relevant_fragments, md_names_to_load); } Status SparseIndexReaderBase::read_and_unfilter_coords( @@ -619,7 +632,8 @@ void SparseIndexReaderBase::compute_tile_bitmaps( auto& ranges_for_dim = subarray_.ranges_for_dim(dim_idx); // Compute the list of range index to process. - std::vector relevant_ranges; + tdb::pmr::vector relevant_ranges( + query_memory_tracker_->get_resource(MemoryType::DIMENSIONS)); relevant_ranges.reserve(ranges_for_dim.size()); domain.dimension_ptr(dim_idx)->relevant_ranges( ranges_for_dim, mbr[dim_idx], relevant_ranges); @@ -628,9 +642,8 @@ void SparseIndexReaderBase::compute_tile_bitmaps( // there is no need to compute bitmaps. const bool non_overlapping = std::is_same::value; if (non_overlapping) { - std::vector covered_bitmap = - domain.dimension_ptr(dim_idx)->covered_vec( - ranges_for_dim, mbr[dim_idx], relevant_ranges); + auto covered_bitmap = domain.dimension_ptr(dim_idx)->covered_vec( + ranges_for_dim, mbr[dim_idx], relevant_ranges); // See if any range is covered. uint64_t count = std::accumulate( diff --git a/tiledb/sm/query/readers/sparse_index_reader_base.h b/tiledb/sm/query/readers/sparse_index_reader_base.h index 5ef454e20cad..c9303391001f 100644 --- a/tiledb/sm/query/readers/sparse_index_reader_base.h +++ b/tiledb/sm/query/readers/sparse_index_reader_base.h @@ -608,9 +608,6 @@ class SparseIndexReaderBase : public ReaderBase { /** Are dimensions var sized. */ std::vector is_dim_var_size_; - /** Memory tracker object for the array. */ - MemoryTracker* array_memory_tracker_; - /** Memory used for coordinates tiles. */ std::atomic memory_used_for_coords_total_; diff --git a/tiledb/sm/query/readers/sparse_unordered_with_dups_reader.cc b/tiledb/sm/query/readers/sparse_unordered_with_dups_reader.cc index 9de064abedd9..ec9dc6d35acf 100644 --- a/tiledb/sm/query/readers/sparse_unordered_with_dups_reader.cc +++ b/tiledb/sm/query/readers/sparse_unordered_with_dups_reader.cc @@ -122,9 +122,6 @@ void SparseUnorderedWithDupsReader::refresh_config() { template Status SparseUnorderedWithDupsReader::dowork() { - // Subarray is not known to be explicitly set until buffers are deserialized - include_coords_ = subarray_.is_set(); - auto timer_se = stats_->start_timer("dowork"); stats_->add_counter("loop_num", 1); @@ -153,6 +150,10 @@ Status SparseUnorderedWithDupsReader::dowork() { return Status::Ok(); } + // Subarray is not known to be explicitly set until buffers are deserialized + subarray_.reset_default_ranges(); + include_coords_ = subarray_.is_set(); + // Load initial data, if not loaded already. Coords are only included if the // subarray is set. throw_if_not_ok(load_initial_data()); @@ -251,9 +252,9 @@ void SparseUnorderedWithDupsReader::load_tile_offsets_data() { bool initial_load = tile_offsets_min_frag_idx_ == std::numeric_limits::max() && tile_offsets_max_frag_idx_ == 0; - uint64_t available_memory = array_memory_tracker_->get_memory_available() - - array_memory_tracker_->get_memory_usage( - MemoryTracker::MemoryType::TILE_OFFSETS); + uint64_t available_memory = + array_memory_tracker_->get_memory_available() - + array_memory_tracker_->get_memory_usage(MemoryType::TILE_OFFSETS); auto& relevant_fragments = subarray_.relevant_fragments(); if (!partial_tile_offsets_loading_) { @@ -402,7 +403,7 @@ bool SparseUnorderedWithDupsReader::add_result_tile( memory_used_for_coords_total_ += tiles_size; // Add the result tile. - result_tiles.emplace_back(f, t, frag_md); + result_tiles.emplace_back(f, t, frag_md, query_memory_tracker_); // Are all tiles loaded for this fragment. if (t == last_t) { diff --git a/tiledb/sm/query/strategy_base.cc b/tiledb/sm/query/strategy_base.cc index 393a1bff0c67..91efeee59487 100644 --- a/tiledb/sm/query/strategy_base.cc +++ b/tiledb/sm/query/strategy_base.cc @@ -47,7 +47,9 @@ namespace sm { StrategyBase::StrategyBase( stats::Stats* stats, shared_ptr logger, StrategyParams& params) - : stats_(stats) + : array_memory_tracker_(params.array_memory_tracker()) + , query_memory_tracker_(params.query_memory_tracker()) + , stats_(stats) , logger_(logger) , array_(params.array()) , array_schema_(params.array()->array_schema_latest()) @@ -61,8 +63,8 @@ StrategyBase::StrategyBase( , offsets_bitsize_(constants::cell_var_offset_size * 8) { } -stats::Stats* StrategyBase::stats() const { - return stats_; +void StrategyBase::set_stats(const stats::StatsData& data) { + stats_->populate_with_data(data); } /* ****************************** */ diff --git a/tiledb/sm/query/strategy_base.h b/tiledb/sm/query/strategy_base.h index 91bc71946403..d1d157c10c5d 100644 --- a/tiledb/sm/query/strategy_base.h +++ b/tiledb/sm/query/strategy_base.h @@ -66,6 +66,8 @@ class StrategyParams { /* ********************************* */ StrategyParams( + shared_ptr array_memory_tracker, + shared_ptr query_memory_tracker, StorageManager* storage_manager, shared_ptr array, Config& config, @@ -75,9 +77,10 @@ class StrategyParams { Layout layout, std::optional& condition, DefaultChannelAggregates& default_channel_aggregates, - bool skip_checks_serialization, - MemoryTracker* memory_tracker) - : storage_manager_(storage_manager) + bool skip_checks_serialization) + : array_memory_tracker_(array_memory_tracker) + , query_memory_tracker_(query_memory_tracker) + , storage_manager_(storage_manager) , array_(array) , config_(config) , buffers_(buffers) @@ -86,14 +89,22 @@ class StrategyParams { , layout_(layout) , condition_(condition) , default_channel_aggregates_(default_channel_aggregates) - , skip_checks_serialization_(skip_checks_serialization) - , memory_tracker_(memory_tracker) { + , skip_checks_serialization_(skip_checks_serialization) { } /* ********************************* */ /* API */ /* ********************************* */ + /** Return the array memory tracker. */ + inline shared_ptr array_memory_tracker() { + return array_memory_tracker_; + } + + inline shared_ptr query_memory_tracker() { + return query_memory_tracker_; + } + /** Return the storage manager. */ inline StorageManager* storage_manager() { return storage_manager_; @@ -144,15 +155,17 @@ class StrategyParams { return skip_checks_serialization_; } - inline MemoryTracker* memory_tracker() { - return memory_tracker_; - } - private: /* ********************************* */ /* PRIVATE ATTRIBUTES */ /* ********************************* */ + /** Array Memory tracker. */ + shared_ptr array_memory_tracker_; + + /** Query Memory tracker. */ + shared_ptr query_memory_tracker_; + /** Storage manager. */ StorageManager* storage_manager_; @@ -182,9 +195,6 @@ class StrategyParams { /** Skip checks for serialization. */ bool skip_checks_serialization_; - - /** Memory tracker. */ - MemoryTracker* memory_tracker_; }; /** Processes read or write queries. */ @@ -206,7 +216,17 @@ class StrategyBase { /* ********************************* */ /** Returns `stats_`. */ - stats::Stats* stats() const; + inline stats::Stats* stats() const { + return stats_; + } + + /** + * Populate the owned stats instance with data. + * To be removed when the class will get a C41 constructor. + * + * @param data Data to populate the stats with. + */ + void set_stats(const stats::StatsData& data); /** Returns the configured offsets format mode. */ std::string offsets_mode() const; @@ -231,6 +251,12 @@ class StrategyBase { /* PROTECTED ATTRIBUTES */ /* ********************************* */ + /** The array memory tracker. */ + shared_ptr array_memory_tracker_; + + /** The query memory tracker. */ + shared_ptr query_memory_tracker_; + /** The class stats. */ stats::Stats* stats_; diff --git a/tiledb/sm/query/test/CMakeLists.txt b/tiledb/sm/query/test/CMakeLists.txt index b6df998a9f9f..32827f7eae3b 100644 --- a/tiledb/sm/query/test/CMakeLists.txt +++ b/tiledb/sm/query/test/CMakeLists.txt @@ -29,7 +29,7 @@ include(unit_test) commence(unit_test query) this_target_sources(main.cc unit_validity_vector.cc unit_query_condition.cc) # Not actually testing a unit yet, but some things that ought to be units - this_target_link_libraries(TILEDB_CORE_OBJECTS TILEDB_CORE_OBJECTS_ILIB) + this_target_link_libraries(tiledb_test_support_lib) this_target_link_libraries(ast_test_support_lib) # We want tests to continue as normal even as the API is changing, diff --git a/tiledb/sm/query/test/unit_query_condition.cc b/tiledb/sm/query/test/unit_query_condition.cc index 0d53549ac7e6..60882000f871 100644 --- a/tiledb/sm/query/test/unit_query_condition.cc +++ b/tiledb/sm/query/test/unit_query_condition.cc @@ -31,11 +31,14 @@ */ #include "test/support/src/ast_helpers.h" +#include "test/support/src/mem_helpers.h" #include "tiledb/common/common.h" +#include "tiledb/common/memory_tracker.h" #include "tiledb/sm/array_schema/array_schema.h" #include "tiledb/sm/array_schema/attribute.h" #include "tiledb/sm/array_schema/dimension.h" #include "tiledb/sm/array_schema/domain.h" +#include "tiledb/sm/enums/array_type.h" #include "tiledb/sm/enums/datatype.h" #include "tiledb/sm/enums/query_condition_combination_op.h" #include "tiledb/sm/enums/query_condition_op.h" @@ -59,7 +62,8 @@ TEST_CASE( REQUIRE(query_condition.empty()); REQUIRE(query_condition.field_names().empty()); - shared_ptr array_schema = make_shared(HERE()); + shared_ptr array_schema = make_shared( + HERE(), ArrayType::DENSE, tiledb::test::create_test_memory_tracker()); std::vector result_cell_slabs; std::vector> frag_md; REQUIRE( @@ -198,7 +202,8 @@ TEST_CASE("QueryCondition: Test blob type", "[QueryCondition][blob]") { QueryConditionOp::LT) .ok()); - shared_ptr array_schema = make_shared(HERE()); + shared_ptr array_schema = make_shared( + HERE(), ArrayType::DENSE, tiledb::test::create_test_memory_tracker()); shared_ptr attr = make_shared(HERE(), "blob_attr", Datatype::BLOB); REQUIRE(array_schema->add_attribute(attr).ok()); @@ -1121,10 +1126,10 @@ void test_apply_cells( frag_md[0] = make_shared( HERE(), nullptr, - nullptr, array_schema, URI(), std::make_pair(0, 0), + tiledb::test::create_test_memory_tracker(), true); REQUIRE( query_condition.apply(*array_schema, frag_md, result_cell_slabs, 1).ok()); @@ -1158,10 +1163,10 @@ void test_apply_cells( frag_md[0] = make_shared( HERE(), nullptr, - nullptr, array_schema, URI(), std::make_pair(0, 0), + tiledb::test::create_test_memory_tracker(), true); REQUIRE(query_condition_eq_null .apply(*array_schema, frag_md, result_cell_slabs_eq_null, 1) @@ -1307,10 +1312,10 @@ void test_apply_cells( frag_md[0] = make_shared( HERE(), nullptr, - nullptr, array_schema, URI(), std::make_pair(0, 0), + tiledb::test::create_test_memory_tracker(), true); REQUIRE( query_condition.apply(*array_schema, frag_md, result_cell_slabs, 1).ok()); @@ -1566,7 +1571,9 @@ void test_apply(const Datatype type, bool var_size, bool nullable) { const char* fill_value = "ac"; // Initialize the array schema. - shared_ptr array_schema = make_shared(HERE()); + auto memory_tracker = tiledb::test::get_test_memory_tracker(); + shared_ptr array_schema = + make_shared(HERE(), ArrayType::DENSE, memory_tracker); Attribute attr(field_name, type); attr.set_nullable(nullable); attr.set_cell_val_num(var_size ? constants::var_num : 2); @@ -1577,26 +1584,31 @@ void test_apply(const Datatype type, bool var_size, bool nullable) { REQUIRE( array_schema->add_attribute(make_shared(HERE(), attr)).ok()); - Domain domain; - auto dim{make_shared(HERE(), "dim1", Datatype::UINT32)}; + auto domain{make_shared(HERE(), memory_tracker)}; + auto dim{make_shared( + HERE(), + "dim1", + Datatype::UINT32, + tiledb::test::get_test_memory_tracker())}; uint32_t bounds[2] = {1, cells}; Range range(bounds, 2 * sizeof(uint32_t)); REQUIRE(dim->set_domain(range).ok()); uint32_t tile_extent = 10; REQUIRE(dim->set_tile_extent(&tile_extent).ok()); - REQUIRE(domain.add_dimension(dim).ok()); - REQUIRE(array_schema->set_domain(make_shared(HERE(), domain)).ok()); + REQUIRE(domain->add_dimension(dim).ok()); + REQUIRE(array_schema->set_domain(domain).ok()); FragmentMetadata frag_md( - nullptr, nullptr, array_schema, URI(), std::make_pair(0, 0), + memory_tracker, true); // Initialize the result tile. - ResultTile result_tile(0, 0, frag_md); + ResultTile result_tile( + 0, 0, frag_md, tiledb::test::get_test_memory_tracker()); ResultTile::TileSizes tile_sizes( var_size ? (cells + 1) * constants::cell_var_offset_size : 2 * cells * sizeof(char), @@ -1627,28 +1639,34 @@ void test_apply(const Datatype type, bool var_size, bool nullable) { const T fill_value = 3; // Initialize the array schema. - shared_ptr array_schema = make_shared(HERE()); + auto memory_tracker = tiledb::test::get_test_memory_tracker(); + shared_ptr array_schema = + make_shared(HERE(), ArrayType::DENSE, memory_tracker); Attribute attr(field_name, type); attr.set_cell_val_num(1); attr.set_fill_value(&fill_value, sizeof(T)); REQUIRE( array_schema->add_attribute(make_shared(HERE(), attr)).ok()); - Domain domain; - auto dim{make_shared(HERE(), "dim1", Datatype::UINT32)}; + auto domain{make_shared(HERE(), memory_tracker)}; + auto dim{make_shared( + HERE(), + "dim1", + Datatype::UINT32, + tiledb::test::get_test_memory_tracker())}; uint32_t bounds[2] = {1, cells}; Range range(bounds, 2 * sizeof(uint32_t)); REQUIRE(dim->set_domain(range).ok()); uint32_t tile_extent = 10; REQUIRE(dim->set_tile_extent(&tile_extent).ok()); - REQUIRE(domain.add_dimension(dim).ok()); - REQUIRE(array_schema->set_domain(make_shared(HERE(), domain)).ok()); + REQUIRE(domain->add_dimension(dim).ok()); + REQUIRE(array_schema->set_domain(domain).ok()); FragmentMetadata frag_md( - nullptr, nullptr, array_schema, URI(), std::make_pair(0, 0), + memory_tracker, true); // Initialize the result tile. @@ -1659,7 +1677,8 @@ void test_apply(const Datatype type, bool var_size, bool nullable) { var_size ? std::optional(0) : std::nullopt, nullable ? std::optional(0) : std::nullopt, nullable ? std::optional(0) : std::nullopt); - ResultTile result_tile(0, 0, frag_md); + ResultTile result_tile( + 0, 0, frag_md, tiledb::test::get_test_memory_tracker()); ResultTile::TileData tile_data{nullptr, nullptr, nullptr}; result_tile.init_attr_tile( constants::format_version, @@ -1729,7 +1748,9 @@ TEST_CASE( return; // Initialize the array schema. - shared_ptr array_schema = make_shared(HERE()); + auto memory_tracker = tiledb::test::create_test_memory_tracker(); + shared_ptr array_schema = + make_shared(HERE(), ArrayType::DENSE, memory_tracker); Attribute attr(field_name, type); attr.set_nullable(nullable); attr.set_cell_val_num(var_size ? constants::var_num : 2); @@ -1740,27 +1761,28 @@ TEST_CASE( REQUIRE(array_schema->add_attribute(tdb::make_shared(HERE(), attr)) .ok()); - Domain domain; - auto dim{ - make_shared(HERE(), "dim1", Datatype::UINT32)}; + auto domain{make_shared(HERE(), memory_tracker)}; + auto dim{make_shared( + HERE(), + "dim1", + Datatype::UINT32, + tiledb::test::get_test_memory_tracker())}; uint32_t bounds[2] = {1, cells}; Range range(bounds, 2 * sizeof(uint32_t)); REQUIRE(dim->set_domain(range).ok()); uint32_t tile_extent = 10; REQUIRE(dim->set_tile_extent(&tile_extent).ok()); - REQUIRE(domain.add_dimension(dim).ok()); - REQUIRE( - array_schema->set_domain(make_shared(HERE(), domain)) - .ok()); + REQUIRE(domain->add_dimension(dim).ok()); + REQUIRE(array_schema->set_domain(domain).ok()); std::vector> frag_md(1); frag_md[0] = make_shared( HERE(), nullptr, - nullptr, array_schema, URI(), std::make_pair(0, 0), + tiledb::test::create_test_memory_tracker(), true); // Initialize the result tile. @@ -1773,7 +1795,8 @@ TEST_CASE( nullable ? std::optional(cells * constants::cell_validity_size) : std::nullopt, nullable ? std::optional(0) : std::nullopt); - ResultTile result_tile(0, 0, *frag_md[0]); + ResultTile result_tile( + 0, 0, *frag_md[0], tiledb::test::get_test_memory_tracker()); ResultTile::TileData tile_data{nullptr, nullptr, nullptr}; result_tile.init_attr_tile( constants::format_version, @@ -2278,7 +2301,9 @@ void test_apply_dense( const char* fill_value = "ac"; // Initialize the array schema. - shared_ptr array_schema = make_shared(HERE()); + auto memory_tracker = tiledb::test::create_test_memory_tracker(); + shared_ptr array_schema = + make_shared(HERE(), ArrayType::DENSE, memory_tracker); Attribute attr(field_name, type); attr.set_nullable(nullable); attr.set_cell_val_num(var_size ? constants::var_num : 2); @@ -2289,25 +2314,26 @@ void test_apply_dense( REQUIRE(array_schema->add_attribute(tdb::make_shared(HERE(), attr)) .ok()); - Domain domain; - auto dim{ - make_shared(HERE(), "dim1", Datatype::UINT32)}; + auto domain{make_shared(HERE(), memory_tracker)}; + auto dim{make_shared( + HERE(), + "dim1", + Datatype::UINT32, + tiledb::test::get_test_memory_tracker())}; uint32_t bounds[2] = {1, cells}; Range range(bounds, 2 * sizeof(uint32_t)); REQUIRE(dim->set_domain(range).ok()); uint32_t tile_extent = 10; REQUIRE(dim->set_tile_extent(&tile_extent).ok()); - REQUIRE(domain.add_dimension(dim).ok()); - REQUIRE( - array_schema->set_domain(make_shared(HERE(), domain)) - .ok()); + REQUIRE(domain->add_dimension(dim).ok()); + REQUIRE(array_schema->set_domain(domain).ok()); FragmentMetadata frag_md( - nullptr, nullptr, array_schema, URI(), std::make_pair(0, 0), + memory_tracker, true); // Initialize the result tile. @@ -2320,7 +2346,8 @@ void test_apply_dense( nullable ? std::optional(cells * constants::cell_validity_size) : std::nullopt, nullable ? std::optional(0) : std::nullopt); - ResultTile result_tile(0, 0, frag_md); + ResultTile result_tile( + 0, 0, frag_md, tiledb::test::get_test_memory_tracker()); ResultTile::TileData tile_data{nullptr, nullptr, nullptr}; result_tile.init_attr_tile( constants::format_version, @@ -2342,31 +2369,34 @@ void test_apply_dense(const Datatype type, bool var_size, bool nullable) { const T fill_value = 3; // Initialize the array schema. - shared_ptr array_schema = make_shared(HERE()); + auto memory_tracker = tiledb::test::create_test_memory_tracker(); + shared_ptr array_schema = + make_shared(HERE(), ArrayType::DENSE, memory_tracker); Attribute attr(field_name, type); attr.set_cell_val_num(1); attr.set_fill_value(&fill_value, sizeof(T)); REQUIRE(array_schema->add_attribute(tdb::make_shared(HERE(), attr)) .ok()); - Domain domain; - auto dim{ - make_shared(HERE(), "dim1", Datatype::UINT32)}; + auto domain{make_shared(HERE(), memory_tracker)}; + auto dim{make_shared( + HERE(), + "dim1", + Datatype::UINT32, + tiledb::test::get_test_memory_tracker())}; uint32_t bounds[2] = {1, cells}; Range range(bounds, 2 * sizeof(uint32_t)); REQUIRE(dim->set_domain(range).ok()); uint32_t tile_extent = 10; REQUIRE(dim->set_tile_extent(&tile_extent).ok()); - REQUIRE(domain.add_dimension(dim).ok()); - REQUIRE( - array_schema->set_domain(make_shared(HERE(), domain)) - .ok()); + REQUIRE(domain->add_dimension(dim).ok()); + REQUIRE(array_schema->set_domain(domain).ok()); FragmentMetadata frag_md( - nullptr, nullptr, array_schema, URI(), std::make_pair(0, 0), + memory_tracker, true); // Initialize the result tile. @@ -2377,7 +2407,8 @@ void test_apply_dense(const Datatype type, bool var_size, bool nullable) { var_size ? std::optional(0) : std::nullopt, nullable ? std::optional(0) : std::nullopt, nullable ? std::optional(0) : std::nullopt); - ResultTile result_tile(0, 0, frag_md); + ResultTile result_tile( + 0, 0, frag_md, tiledb::test::get_test_memory_tracker()); ResultTile::TileData tile_data{nullptr, nullptr, nullptr}; result_tile.init_attr_tile( constants::format_version, @@ -2448,7 +2479,9 @@ TEST_CASE( return; // Initialize the array schema. - shared_ptr array_schema = make_shared(HERE()); + auto memory_tracker = tiledb::test::create_test_memory_tracker(); + shared_ptr array_schema = + make_shared(HERE(), ArrayType::DENSE, memory_tracker); Attribute attr(field_name, type); attr.set_nullable(nullable); attr.set_cell_val_num(var_size ? constants::var_num : 2); @@ -2459,25 +2492,26 @@ TEST_CASE( REQUIRE( array_schema->add_attribute(make_shared(HERE(), attr)).ok()); - Domain domain; - auto dim{ - make_shared(HERE(), "dim1", Datatype::UINT32)}; + auto domain{make_shared(HERE(), memory_tracker)}; + auto dim{make_shared( + HERE(), + "dim1", + Datatype::UINT32, + tiledb::test::get_test_memory_tracker())}; uint32_t bounds[2] = {1, cells}; Range range(bounds, 2 * sizeof(uint32_t)); REQUIRE(dim->set_domain(range).ok()); uint32_t tile_extent = 10; REQUIRE(dim->set_tile_extent(&tile_extent).ok()); - REQUIRE(domain.add_dimension(dim).ok()); - REQUIRE( - array_schema->set_domain(make_shared(HERE(), domain)) - .ok()); + REQUIRE(domain->add_dimension(dim).ok()); + REQUIRE(array_schema->set_domain(domain).ok()); FragmentMetadata frag_md( - nullptr, nullptr, array_schema, URI(), std::make_pair(0, 0), + memory_tracker, true); // Initialize the result tile. @@ -2490,7 +2524,8 @@ TEST_CASE( nullable ? std::optional(cells * constants::cell_validity_size) : std::nullopt, nullable ? std::optional(0) : std::nullopt); - ResultTile result_tile(0, 0, frag_md); + ResultTile result_tile( + 0, 0, frag_md, tiledb::test::get_test_memory_tracker()); ResultTile::TileData tile_data{nullptr, nullptr, nullptr}; result_tile.init_attr_tile( constants::format_version, @@ -2689,7 +2724,9 @@ void test_apply_cells_sparse( } // Apply the query condition. - std::vector result_bitmap(cells, 1); + auto resource = tiledb::test::get_test_memory_tracker()->get_resource( + MemoryType::TILE_BITMAP); + tdb::pmr::vector result_bitmap(cells, 1, resource); REQUIRE(query_condition .apply_sparse(*array_schema, *result_tile, result_bitmap) .ok()); @@ -2714,7 +2751,9 @@ void test_apply_cells_sparse( REQUIRE(query_condition_eq_null.check(*array_schema).ok()); // Apply the query condition. - std::vector result_bitmap_eq_null(cells, 1); + auto resource = tiledb::test::get_test_memory_tracker()->get_resource( + MemoryType::TILE_BITMAP); + tdb::pmr::vector result_bitmap_eq_null(cells, 1, resource); REQUIRE(query_condition_eq_null .apply_sparse( *array_schema, *result_tile, result_bitmap_eq_null) @@ -2784,7 +2823,9 @@ void test_apply_cells_sparse( } // Apply the query condition. - std::vector result_bitmap(cells, 1); + auto resource = tiledb::test::get_test_memory_tracker()->get_resource( + MemoryType::TILE_BITMAP); + tdb::pmr::vector result_bitmap(cells, 1, resource); REQUIRE(query_condition .apply_sparse(*array_schema, *result_tile, result_bitmap) .ok()); @@ -2977,7 +3018,9 @@ void test_apply_sparse( const char* fill_value = "ac"; // Initialize the array schema. - shared_ptr array_schema = make_shared(HERE()); + auto memory_tracker = tiledb::test::create_test_memory_tracker(); + shared_ptr array_schema = + make_shared(HERE(), ArrayType::DENSE, memory_tracker); Attribute attr(field_name, type); attr.set_nullable(nullable); attr.set_cell_val_num(var_size ? constants::var_num : 2); @@ -2988,25 +3031,26 @@ void test_apply_sparse( REQUIRE(array_schema->add_attribute(tdb::make_shared(HERE(), attr)) .ok()); - Domain domain; - auto dim{ - make_shared(HERE(), "dim1", Datatype::UINT32)}; + auto domain{make_shared(HERE(), memory_tracker)}; + auto dim{make_shared( + HERE(), + "dim1", + Datatype::UINT32, + tiledb::test::get_test_memory_tracker())}; uint32_t bounds[2] = {1, cells}; Range range(bounds, 2 * sizeof(uint32_t)); REQUIRE(dim->set_domain(range).ok()); uint32_t tile_extent = 10; REQUIRE(dim->set_tile_extent(&tile_extent).ok()); - REQUIRE(domain.add_dimension(dim).ok()); - REQUIRE( - array_schema->set_domain(make_shared(HERE(), domain)) - .ok()); + REQUIRE(domain->add_dimension(dim).ok()); + REQUIRE(array_schema->set_domain(domain).ok()); FragmentMetadata frag_md( - nullptr, nullptr, array_schema, URI(), std::make_pair(0, 0), + memory_tracker, true); // Initialize the result tile. @@ -3019,7 +3063,8 @@ void test_apply_sparse( nullable ? std::optional(cells * constants::cell_validity_size) : std::nullopt, nullable ? std::optional(0) : std::nullopt); - ResultTile result_tile(0, 0, frag_md); + ResultTile result_tile( + 0, 0, frag_md, tiledb::test::get_test_memory_tracker()); ResultTile::TileData tile_data{nullptr, nullptr, nullptr}; result_tile.init_attr_tile( constants::format_version, @@ -3041,31 +3086,34 @@ void test_apply_sparse(const Datatype type, bool var_size, bool nullable) { const T fill_value = 3; // Initialize the array schema. - shared_ptr array_schema = make_shared(HERE()); + auto memory_tracker = tiledb::test::create_test_memory_tracker(); + shared_ptr array_schema = + make_shared(HERE(), ArrayType::DENSE, memory_tracker); Attribute attr(field_name, type); attr.set_cell_val_num(1); attr.set_fill_value(&fill_value, sizeof(T)); REQUIRE(array_schema->add_attribute(tdb::make_shared(HERE(), attr)) .ok()); - Domain domain; - auto dim{ - make_shared(HERE(), "dim1", Datatype::UINT32)}; + auto domain{make_shared(HERE(), memory_tracker)}; + auto dim{make_shared( + HERE(), + "dim1", + Datatype::UINT32, + tiledb::test::get_test_memory_tracker())}; uint32_t bounds[2] = {1, cells}; Range range(bounds, 2 * sizeof(uint32_t)); REQUIRE(dim->set_domain(range).ok()); uint32_t tile_extent = 10; REQUIRE(dim->set_tile_extent(&tile_extent).ok()); - REQUIRE(domain.add_dimension(dim).ok()); - REQUIRE( - array_schema->set_domain(make_shared(HERE(), domain)) - .ok()); + REQUIRE(domain->add_dimension(dim).ok()); + REQUIRE(array_schema->set_domain(domain).ok()); FragmentMetadata frag_md( - nullptr, nullptr, array_schema, URI(), std::make_pair(0, 0), + memory_tracker, true); // Initialize the result tile. @@ -3076,7 +3124,8 @@ void test_apply_sparse(const Datatype type, bool var_size, bool nullable) { var_size ? std::optional(0) : std::nullopt, nullable ? std::optional(0) : std::nullopt, nullable ? std::optional(0) : std::nullopt); - ResultTile result_tile(0, 0, frag_md); + ResultTile result_tile( + 0, 0, frag_md, tiledb::test::get_test_memory_tracker()); ResultTile::TileData tile_data{nullptr, nullptr, nullptr}; result_tile.init_attr_tile( constants::format_version, @@ -3213,10 +3262,10 @@ void validate_qc_apply( frag_md[0] = make_shared( HERE(), nullptr, - nullptr, array_schema, URI(), std::make_pair(0, 0), + tiledb::test::create_test_memory_tracker(), true); REQUIRE(tp.qc_.apply(*array_schema, frag_md, result_cell_slabs, 1).ok()); REQUIRE(result_cell_slabs.size() == tp.expected_slabs_.size()); @@ -3248,7 +3297,9 @@ void validate_qc_apply_sparse( shared_ptr array_schema, ResultTile& result_tile, bool negated = false) { - std::vector sparse_result_bitmap(cells, 1); + auto resource = tiledb::test::get_test_memory_tracker()->get_resource( + MemoryType::TILE_BITMAP); + tdb::pmr::vector sparse_result_bitmap(cells, 1, resource); REQUIRE(tp.qc_ .apply_sparse( *array_schema, result_tile, sparse_result_bitmap) @@ -3257,7 +3308,7 @@ void validate_qc_apply_sparse( CHECK(sparse_result_bitmap[i] == tp.expected_bitmap_[i]); } - std::vector sparse_result_bitmap1(cells, 2); + tdb::pmr::vector sparse_result_bitmap1(cells, 2, resource); REQUIRE(tp.qc_ .apply_sparse( *array_schema, result_tile, sparse_result_bitmap1) @@ -3813,29 +3864,32 @@ TEST_CASE( const Datatype type = Datatype::UINT64; // Initialize the array schema. - shared_ptr array_schema = make_shared(HERE()); + auto memory_tracker = tiledb::test::create_test_memory_tracker(); + shared_ptr array_schema = + make_shared(HERE(), ArrayType::DENSE, memory_tracker); Attribute attr(field_name, type); REQUIRE(array_schema->add_attribute(tdb::make_shared(HERE(), attr)) .ok()); - Domain domain; - auto dim{ - make_shared(HERE(), "dim1", Datatype::UINT32)}; + auto domain{make_shared(HERE(), memory_tracker)}; + auto dim{make_shared( + HERE(), + "dim1", + Datatype::UINT32, + tiledb::test::get_test_memory_tracker())}; uint32_t bounds[2] = {1, cells}; Range range(bounds, 2 * sizeof(uint32_t)); REQUIRE(dim->set_domain(range).ok()); uint32_t tile_extent = 10; REQUIRE(dim->set_tile_extent(&tile_extent).ok()); - REQUIRE(domain.add_dimension(dim).ok()); - REQUIRE( - array_schema->set_domain(make_shared(HERE(), domain)) - .ok()); + REQUIRE(domain->add_dimension(dim).ok()); + REQUIRE(array_schema->set_domain(domain).ok()); FragmentMetadata frag_md( - nullptr, nullptr, array_schema, URI(), std::make_pair(0, 0), + memory_tracker, true); // Initialize the result tile. @@ -3846,7 +3900,8 @@ TEST_CASE( std::nullopt, std::nullopt, std::nullopt); - ResultTile result_tile(0, 0, frag_md); + ResultTile result_tile( + 0, 0, frag_md, tiledb::test::get_test_memory_tracker()); ResultTile::TileData tile_data{nullptr, nullptr, nullptr}; result_tile.init_attr_tile( constants::format_version, @@ -4098,7 +4153,9 @@ TEST_CASE( const Datatype type = GENERATE(Datatype::STRING_ASCII, Datatype::STRING_UTF8); // Initialize the array schema. - shared_ptr array_schema = make_shared(HERE()); + auto memory_tracker = tiledb::test::get_test_memory_tracker(); + shared_ptr array_schema = + make_shared(HERE(), ArrayType::DENSE, memory_tracker); Attribute attr(field_name, type); attr.set_nullable(false); attr.set_cell_val_num(constants::var_num); @@ -4106,22 +4163,26 @@ TEST_CASE( REQUIRE( array_schema->add_attribute(make_shared(HERE(), attr)).ok()); - Domain domain; - auto dim{make_shared(HERE(), "dim1", Datatype::UINT32)}; + auto domain{make_shared(HERE(), memory_tracker)}; + auto dim{make_shared( + HERE(), + "dim1", + Datatype::UINT32, + tiledb::test::get_test_memory_tracker())}; uint32_t bounds[2] = {1, cells}; Range range(bounds, 2 * sizeof(uint32_t)); REQUIRE(dim->set_domain(range).ok()); uint32_t tile_extent = 10; REQUIRE(dim->set_tile_extent(&tile_extent).ok()); - REQUIRE(domain.add_dimension(dim).ok()); - REQUIRE(array_schema->set_domain(make_shared(HERE(), domain)).ok()); + REQUIRE(domain->add_dimension(dim).ok()); + REQUIRE(array_schema->set_domain(domain).ok()); FragmentMetadata frag_md( - nullptr, nullptr, array_schema, URI(), std::make_pair(0, 0), + memory_tracker, true); // Initialize the result tile. @@ -4133,7 +4194,8 @@ TEST_CASE( 0, std::nullopt, std::nullopt); - ResultTile result_tile(0, 0, frag_md); + ResultTile result_tile( + 0, 0, frag_md, tiledb::test::get_test_memory_tracker()); ResultTile::TileData tile_data{nullptr, nullptr, nullptr}; result_tile.init_attr_tile( constants::format_version, @@ -4452,7 +4514,9 @@ TEST_CASE( const Datatype type = Datatype::STRING_UTF8; // Initialize the array schema. - shared_ptr array_schema = make_shared(HERE()); + auto memory_tracker = tiledb::test::create_test_memory_tracker(); + shared_ptr array_schema = + make_shared(HERE(), ArrayType::DENSE, memory_tracker); Attribute attr(field_name, type); attr.set_nullable(false); attr.set_cell_val_num(constants::var_num); @@ -4460,22 +4524,26 @@ TEST_CASE( REQUIRE( array_schema->add_attribute(make_shared(HERE(), attr)).ok()); - Domain domain; - auto dim{make_shared(HERE(), "dim1", Datatype::UINT32)}; + auto domain{make_shared(HERE(), memory_tracker)}; + auto dim{make_shared( + HERE(), + "dim1", + Datatype::UINT32, + tiledb::test::get_test_memory_tracker())}; uint32_t bounds[2] = {1, cells}; Range range(bounds, 2 * sizeof(uint32_t)); REQUIRE(dim->set_domain(range).ok()); uint32_t tile_extent = 10; REQUIRE(dim->set_tile_extent(&tile_extent).ok()); - REQUIRE(domain.add_dimension(dim).ok()); - REQUIRE(array_schema->set_domain(make_shared(HERE(), domain)).ok()); + REQUIRE(domain->add_dimension(dim).ok()); + REQUIRE(array_schema->set_domain(domain).ok()); FragmentMetadata frag_md( - nullptr, nullptr, array_schema, URI(), std::make_pair(0, 0), + memory_tracker, true); // For pasting into a Python shell: @@ -4545,7 +4613,8 @@ TEST_CASE( 0, std::nullopt, std::nullopt); - ResultTile result_tile(0, 0, frag_md); + ResultTile result_tile( + 0, 0, frag_md, tiledb::test::get_test_memory_tracker()); ResultTile::TileData tile_data{nullptr, nullptr, nullptr}; result_tile.init_attr_tile( constants::format_version, @@ -4774,30 +4843,33 @@ TEST_CASE( const Datatype type = Datatype::FLOAT32; // Initialize the array schema. - shared_ptr array_schema = make_shared(HERE()); + auto memory_tracker = tiledb::test::create_test_memory_tracker(); + shared_ptr array_schema = + make_shared(HERE(), ArrayType::DENSE, memory_tracker); Attribute attr(field_name, type); attr.set_nullable(true); REQUIRE(array_schema->add_attribute(tdb::make_shared(HERE(), attr)) .ok()); - Domain domain; - auto dim{ - make_shared(HERE(), "dim1", Datatype::UINT32)}; + auto domain{make_shared(HERE(), memory_tracker)}; + auto dim{make_shared( + HERE(), + "dim1", + Datatype::UINT32, + tiledb::test::get_test_memory_tracker())}; uint32_t bounds[2] = {1, cells}; Range range(bounds, 2 * sizeof(uint32_t)); REQUIRE(dim->set_domain(range).ok()); uint32_t tile_extent = 10; REQUIRE(dim->set_tile_extent(&tile_extent).ok()); - REQUIRE(domain.add_dimension(dim).ok()); - REQUIRE( - array_schema->set_domain(make_shared(HERE(), domain)) - .ok()); + REQUIRE(domain->add_dimension(dim).ok()); + REQUIRE(array_schema->set_domain(domain).ok()); FragmentMetadata frag_md( - nullptr, nullptr, array_schema, URI(), std::make_pair(0, 0), + memory_tracker, true); // Initialize the result tile. @@ -4808,7 +4880,8 @@ TEST_CASE( std::nullopt, cells * constants::cell_validity_size, 0); - ResultTile result_tile(0, 0, frag_md); + ResultTile result_tile( + 0, 0, frag_md, tiledb::test::get_test_memory_tracker()); ResultTile::TileData tile_data{nullptr, nullptr, nullptr}; result_tile.init_attr_tile( constants::format_version, @@ -4870,7 +4943,9 @@ TEST_CASE( return; // Initialize the array schema. - shared_ptr array_schema = make_shared(HERE()); + auto memory_tracker = tiledb::test::create_test_memory_tracker(); + shared_ptr array_schema = + make_shared(HERE(), ArrayType::DENSE, memory_tracker); Attribute attr(field_name, type); attr.set_nullable(nullable); attr.set_cell_val_num(var_size ? constants::var_num : 2); @@ -4881,22 +4956,26 @@ TEST_CASE( REQUIRE( array_schema->add_attribute(make_shared(HERE(), attr)).ok()); - Domain domain; - auto dim{make_shared(HERE(), "dim1", Datatype::UINT32)}; + auto domain{make_shared(HERE(), memory_tracker)}; + auto dim{make_shared( + HERE(), + "dim1", + Datatype::UINT32, + tiledb::test::get_test_memory_tracker())}; uint32_t bounds[2] = {1, cells}; Range range(bounds, 2 * sizeof(uint32_t)); REQUIRE(dim->set_domain(range).ok()); uint32_t tile_extent = 10; REQUIRE(dim->set_tile_extent(&tile_extent).ok()); - REQUIRE(domain.add_dimension(dim).ok()); - REQUIRE(array_schema->set_domain(make_shared(HERE(), domain)).ok()); + REQUIRE(domain->add_dimension(dim).ok()); + REQUIRE(array_schema->set_domain(domain).ok()); FragmentMetadata frag_md( - nullptr, nullptr, array_schema, URI(), std::make_pair(0, 0), + memory_tracker, true); // Initialize the result tile. @@ -4909,7 +4988,8 @@ TEST_CASE( nullable ? std::optional(cells * constants::cell_validity_size) : std::nullopt, nullable ? std::optional(0) : std::nullopt); - ResultTile result_tile(0, 0, frag_md); + ResultTile result_tile( + 0, 0, frag_md, tiledb::test::get_test_memory_tracker()); ResultTile::TileData tile_data{nullptr, nullptr, nullptr}; result_tile.init_attr_tile( constants::format_version, @@ -5001,7 +5081,9 @@ TEST_CASE( } // Apply the query condition. - std::vector result_bitmap(cells, 1); + auto resource = tiledb::test::get_test_memory_tracker()->get_resource( + MemoryType::TILE_BITMAP); + tdb::pmr::vector result_bitmap(cells, 1, resource); REQUIRE(query_condition .apply_sparse(*array_schema, result_tile, result_bitmap) .ok()); diff --git a/tiledb/sm/query/writers/dense_tiler.cc b/tiledb/sm/query/writers/dense_tiler.cc index 4b6fe9c5abb0..eaf83b388bbc 100644 --- a/tiledb/sm/query/writers/dense_tiler.cc +++ b/tiledb/sm/query/writers/dense_tiler.cc @@ -53,13 +53,15 @@ namespace sm { template DenseTiler::DenseTiler( + shared_ptr memory_tracker, const std::unordered_map* buffers, const Subarray* subarray, Stats* const parent_stats, const std::string& offsets_format_mode, uint64_t offsets_bitsize, bool offsets_extra_element) - : stats_(parent_stats->create_child("DenseTiler")) + : memory_tracker_(memory_tracker) + , stats_(parent_stats->create_child("DenseTiler")) , array_schema_(subarray->array()->array_schema_latest()) , buffers_(buffers) , subarray_(subarray) @@ -223,7 +225,8 @@ Status DenseTiler::get_tile( constants::format_version, constants::cell_var_offset_type, constants::cell_var_offset_size, - tile_off_size); + tile_off_size, + memory_tracker_); // Fill entire tile with MAX_UINT64 std::vector to_write( diff --git a/tiledb/sm/query/writers/dense_tiler.h b/tiledb/sm/query/writers/dense_tiler.h index 1e46927bb512..fdcfa6d5789c 100644 --- a/tiledb/sm/query/writers/dense_tiler.h +++ b/tiledb/sm/query/writers/dense_tiler.h @@ -146,6 +146,7 @@ class DenseTiler { * from `subarray`). Otherwise, an assertion is raised. */ DenseTiler( + shared_ptr memory_tracker, const std::unordered_map* buffers, const Subarray* subarray, stats::Stats* const parent_stats, @@ -207,6 +208,9 @@ class DenseTiler { /* PRIVATE ATTRIBUTES */ /* ********************************* */ + /** The memory tracker. */ + shared_ptr memory_tracker_; + /** The stats for the dense tiler. */ stats::Stats* stats_; diff --git a/tiledb/sm/query/writers/global_order_writer.cc b/tiledb/sm/query/writers/global_order_writer.cc index 5d943e792010..20d20785ad98 100644 --- a/tiledb/sm/query/writers/global_order_writer.cc +++ b/tiledb/sm/query/writers/global_order_writer.cc @@ -177,7 +177,7 @@ Status GlobalOrderWriter::alloc_global_write_state() { global_write_state_.reset(new GlobalWriteState); // Alloc FragmentMetadata object - global_write_state_->frag_meta_ = make_shared(HERE()); + global_write_state_->frag_meta_ = this->create_fragment_metadata(); // Used in serialization when FragmentMetadata is built from ground up global_write_state_->frag_meta_->set_context_resources( &storage_manager_->resources()); @@ -198,20 +198,18 @@ Status GlobalOrderWriter::init_global_write_state() { const auto capacity = array_schema_.capacity(); const auto cell_num_per_tile = coords_info_.has_coords_ ? capacity : domain.cell_num_per_tile(); - auto last_tile_vector = std::pair( - name, WriterTileTupleVector()); - try { - last_tile_vector.second.emplace_back(WriterTileTuple( - array_schema_, - cell_num_per_tile, - var_size, - nullable, - cell_size, - type)); - } catch (const std::logic_error& le) { - return Status_WriterError(le.what()); - } - global_write_state_->last_tiles_.emplace(std::move(last_tile_vector)); + auto last_tiles_it = global_write_state_->last_tiles_.emplace( + std::piecewise_construct, + std::forward_as_tuple(name), + std::forward_as_tuple()); + last_tiles_it.first->second.emplace_back( + array_schema_, + cell_num_per_tile, + var_size, + nullable, + cell_size, + type, + query_memory_tracker_); // Initialize cells written global_write_state_->cells_written_[name] = 0; @@ -866,7 +864,10 @@ Status GlobalOrderWriter::prepare_full_tiles( // Initialize attribute and coordinate tiles for (const auto& it : buffers_) { - (*tiles)[it.first] = WriterTileTupleVector(); + (*tiles).emplace( + std::piecewise_construct, + std::forward_as_tuple(it.first), + std::forward_as_tuple()); } auto num = buffers_.size(); @@ -968,16 +969,26 @@ Status GlobalOrderWriter::prepare_full_tiles_fixed( if (full_tile_num > 0) { tiles->reserve(full_tile_num); for (uint64_t i = 0; i < full_tile_num; i++) { - tiles->emplace_back(WriterTileTuple( - array_schema_, cell_num_per_tile, false, nullable, cell_size, type)); + tiles->emplace_back( + array_schema_, + cell_num_per_tile, + false, + nullable, + cell_size, + type, + query_memory_tracker_); } // Handle last tile (it must be either full or empty) auto tile_it = tiles->begin(); if (last_tile_cell_idx == cell_num_per_tile) { - tile_it->fixed_tile().swap(last_tile.fixed_tile()); + tile_it->fixed_tile().write( + last_tile.fixed_tile().data(), 0, last_tile.fixed_tile().size()); if (nullable) { - tile_it->validity_tile().swap(last_tile.validity_tile()); + tile_it->validity_tile().write( + last_tile.validity_tile().data(), + 0, + last_tile.validity_tile().size()); } tile_it++; } else if (last_tile_cell_idx != 0) { @@ -1161,8 +1172,6 @@ Status GlobalOrderWriter::prepare_full_tiles_var( ++cell_idx; } while (last_tile_cell_idx != cell_num_per_tile && cell_idx != cell_num); } - - last_tile.var_tile().set_size(last_var_offset); } // Initialize full tiles and set previous last tile as first tile @@ -1175,19 +1184,33 @@ Status GlobalOrderWriter::prepare_full_tiles_var( if (full_tile_num > 0) { tiles->reserve(full_tile_num); for (uint64_t i = 0; i < full_tile_num; i++) { - tiles->emplace_back(WriterTileTuple( - array_schema_, cell_num_per_tile, true, nullable, cell_size, type)); + tiles->emplace_back( + array_schema_, + cell_num_per_tile, + true, + nullable, + cell_size, + type, + query_memory_tracker_); } // Handle last tile (it must be either full or empty) auto tile_it = tiles->begin(); if (last_tile_cell_idx == cell_num_per_tile) { - last_var_offset = 0; - tile_it->offset_tile().swap(last_tile.offset_tile()); - tile_it->var_tile().swap(last_tile.var_tile()); + tile_it->offset_tile().write( + last_tile.offset_tile().data(), 0, last_tile.offset_tile().size()); + tile_it->var_tile().write_var( + last_tile.var_tile().data(), 0, last_var_offset); + tile_it->var_tile().set_size(last_var_offset); if (nullable) { - tile_it->validity_tile().swap(last_tile.validity_tile()); + tile_it->validity_tile().write( + last_tile.validity_tile().data(), + 0, + last_tile.validity_tile().size()); } + + last_var_offset = 0; + tile_it++; } else if (last_tile_cell_idx != 0) { return Status_WriterError( diff --git a/tiledb/sm/query/writers/ordered_writer.cc b/tiledb/sm/query/writers/ordered_writer.cc index 714b6b7d9b2f..cc951acc7ee3 100644 --- a/tiledb/sm/query/writers/ordered_writer.cc +++ b/tiledb/sm/query/writers/ordered_writer.cc @@ -176,12 +176,13 @@ Status OrderedWriter::ordered_write() { auto timer_se = stats_->start_timer("ordered_write"); // Create new fragment - auto frag_meta = make_shared(HERE()); + auto frag_meta = this->create_fragment_metadata(); RETURN_CANCEL_OR_ERROR(create_fragment(true, frag_meta)); frag_uri_ = frag_meta->fragment_uri(); // Create a dense tiler DenseTiler dense_tiler( + query_memory_tracker_, &buffers_, &subarray_, stats_, @@ -197,9 +198,12 @@ Status OrderedWriter::ordered_write() { auto attr_num = buffers_.size(); auto compute_tp = storage_manager_->compute_tp(); auto thread_num = compute_tp->concurrency_level(); - std::unordered_map> tiles; + std::unordered_map> tiles; for (const auto& buff : buffers_) { - tiles.emplace(buff.first, std::vector()); + tiles.emplace( + std::piecewise_construct, + std::forward_as_tuple(buff.first), + std::forward_as_tuple()); } if (attr_num > tile_num) { // Parallelize over attributes @@ -283,7 +287,7 @@ Status OrderedWriter::ordered_write() { template Status OrderedWriter::prepare_filter_and_write_tiles( const std::string& name, - std::vector& tile_batches, + IndexedList& tile_batches, shared_ptr frag_meta, DenseTiler* dense_tiler, uint64_t thread_num) { @@ -317,8 +321,14 @@ Status OrderedWriter::prepare_filter_and_write_tiles( assert(batch_size > 0); tile_batches[b].reserve(batch_size); for (uint64_t i = 0; i < batch_size; i++) { - tile_batches[b].emplace_back(WriterTileTuple( - array_schema_, cell_num_per_tile, var, nullable, cell_size, type)); + tile_batches[b].emplace_back( + array_schema_, + cell_num_per_tile, + var, + nullable, + cell_size, + type, + query_memory_tracker_); } { diff --git a/tiledb/sm/query/writers/ordered_writer.h b/tiledb/sm/query/writers/ordered_writer.h index 14887f26a846..1bd5943472a7 100644 --- a/tiledb/sm/query/writers/ordered_writer.h +++ b/tiledb/sm/query/writers/ordered_writer.h @@ -127,7 +127,7 @@ class OrderedWriter : public WriterBase { template Status prepare_filter_and_write_tiles( const std::string& name, - std::vector& tile_batches, + IndexedList& tile_batches, shared_ptr frag_meta, DenseTiler* dense_tiler, uint64_t thread_num); diff --git a/tiledb/sm/query/writers/unordered_writer.cc b/tiledb/sm/query/writers/unordered_writer.cc index 2084f9feb2ac..147f6c89349c 100644 --- a/tiledb/sm/query/writers/unordered_writer.cc +++ b/tiledb/sm/query/writers/unordered_writer.cc @@ -167,7 +167,7 @@ std::string UnorderedWriter::name() { Status UnorderedWriter::alloc_frag_meta() { // Alloc FragmentMetadata object. - frag_meta_ = make_shared(HERE()); + frag_meta_ = this->create_fragment_metadata(); // Used in serialization when FragmentMetadata is built from ground up. frag_meta_->set_context_resources(&storage_manager_->resources()); @@ -380,7 +380,10 @@ Status UnorderedWriter::prepare_tiles( for (const auto& it : buffers_) { const auto& name = it.first; if (written_buffers_.count(name) == 0) { - (*tiles).emplace(name, WriterTileTupleVector()); + (*tiles).emplace( + std::piecewise_construct, + std::forward_as_tuple(name), + std::forward_as_tuple()); } } @@ -389,8 +392,8 @@ Status UnorderedWriter::prepare_tiles( storage_manager_->compute_tp(), 0, tiles->size(), [&](uint64_t i) { auto tiles_it = tiles->begin(); std::advance(tiles_it, i); - const auto& name = tiles_it->first; - RETURN_CANCEL_OR_ERROR(prepare_tiles(name, &((*tiles)[name]))); + RETURN_CANCEL_OR_ERROR( + prepare_tiles(tiles_it->first, &(tiles_it->second))); return Status::Ok(); }); @@ -427,8 +430,14 @@ Status UnorderedWriter::prepare_tiles_fixed( // Initialize tiles tiles->reserve(tile_num); for (uint64_t i = 0; i < tile_num; i++) { - tiles->emplace_back(WriterTileTuple( - array_schema_, cell_num_per_tile, false, nullable, cell_size, type)); + tiles->emplace_back( + array_schema_, + cell_num_per_tile, + false, + nullable, + cell_size, + type, + query_memory_tracker_); } // Write all cells one by one @@ -498,8 +507,14 @@ Status UnorderedWriter::prepare_tiles_var( // Initialize tiles tiles->reserve(tile_num); for (uint64_t i = 0; i < tile_num; i++) { - tiles->emplace_back(WriterTileTuple( - array_schema_, cell_num_per_tile, true, nullable, cell_size, type)); + tiles->emplace_back( + array_schema_, + cell_num_per_tile, + true, + nullable, + cell_size, + type, + query_memory_tracker_); } // Write all cells one by one @@ -653,7 +668,7 @@ Status UnorderedWriter::unordered_write() { } // Create new fragment - frag_meta_ = make_shared(HERE()); + frag_meta_ = this->create_fragment_metadata(); RETURN_CANCEL_OR_ERROR(create_fragment(false, frag_meta_)); } diff --git a/tiledb/sm/query/writers/writer_base.cc b/tiledb/sm/query/writers/writer_base.cc index 751899c547d1..262a71331938 100644 --- a/tiledb/sm/query/writers/writer_base.cc +++ b/tiledb/sm/query/writers/writer_base.cc @@ -309,6 +309,11 @@ void WriterBase::refresh_config() { /* PRIVATE METHODS */ /* ****************************** */ +shared_ptr WriterBase::create_fragment_metadata() { + return make_shared( + HERE(), &storage_manager_->resources(), array_memory_tracker_); +} + Status WriterBase::add_written_fragment_info(const URI& uri) { written_fragment_info_.emplace_back(uri, fragment_timestamp_range_); return Status::Ok(); @@ -705,7 +710,7 @@ Status WriterBase::compute_tiles_metadata( auto tiles_it = tiles.begin(); std::advance(tiles_it, i); const auto& attr = tiles_it->first; - auto& attr_tiles = tiles[attr]; + auto& attr_tiles = tiles.at(attr); const auto type = array_schema_.type(attr); const auto is_dim = array_schema_.is_dim(attr); const auto var_size = array_schema_.var_size(attr); @@ -787,10 +792,10 @@ Status WriterBase::create_fragment( frag_meta = make_shared( HERE(), &storage_manager_->resources(), - nullptr, array_->array_schema_latest_ptr(), fragment_uri_, timestamp_range, + array_memory_tracker_, dense, has_timestamps, has_delete_metadata); @@ -946,8 +951,14 @@ Status WriterBase::init_tiles( coords_info_.has_coords_ ? capacity : domain.cell_num_per_tile(); tiles->reserve(tile_num); for (uint64_t i = 0; i < tile_num; i++) { - tiles->emplace_back(WriterTileTuple( - array_schema_, cell_num_per_tile, var_size, nullable, cell_size, type)); + tiles->emplace_back( + array_schema_, + cell_num_per_tile, + var_size, + nullable, + cell_size, + type, + query_memory_tracker_); } return Status::Ok(); diff --git a/tiledb/sm/query/writers/writer_base.h b/tiledb/sm/query/writers/writer_base.h index e289f5e936dc..99be55ce2ca8 100644 --- a/tiledb/sm/query/writers/writer_base.h +++ b/tiledb/sm/query/writers/writer_base.h @@ -36,6 +36,7 @@ #include #include "tiledb/common/common.h" +#include "tiledb/common/indexed_list.h" #include "tiledb/common/status.h" #include "tiledb/sm/fragment/written_fragment_info.h" #include "tiledb/sm/query/iquery_strategy.h" @@ -57,7 +58,7 @@ class DomainBuffersView; class FragmentMetadata; class TileMetadataGenerator; -using WriterTileTupleVector = std::vector; +using WriterTileTupleVector = IndexedList; /** Processes write queries. */ class WriterBase : public StrategyBase, public IQueryStrategy { @@ -193,6 +194,9 @@ class WriterBase : public StrategyBase, public IQueryStrategy { /* PROTECTED METHODS */ /* ********************************* */ + /** Utility function for constructing new FragmentMetadata instances. */ + shared_ptr create_fragment_metadata(); + /** Adss a fragment to `written_fragment_info_`. */ Status add_written_fragment_info(const URI& uri); @@ -486,7 +490,7 @@ class WriterBase : public StrategyBase, public IQueryStrategy { template Status prepare_filter_and_write_tiles( const std::string& name, - std::vector& tile_batches, + IndexedList& tile_batches, tdb_shared_ptr frag_meta, DenseTiler* dense_tiler, uint64_t thread_num); diff --git a/tiledb/sm/query_plan/query_plan.cc b/tiledb/sm/query_plan/query_plan.cc index d52160cb2aa3..12df72b6dde5 100644 --- a/tiledb/sm/query_plan/query_plan.cc +++ b/tiledb/sm/query_plan/query_plan.cc @@ -35,8 +35,10 @@ #include "tiledb/sm/array/array.h" #include "tiledb/sm/enums/array_type.h" #include "tiledb/sm/enums/layout.h" +#include "tiledb/sm/enums/query_status.h" #include "tiledb/sm/filesystem/uri.h" #include "tiledb/sm/query/query.h" +#include "tiledb/sm/rest/rest_client.h" #include "external/include/nlohmann/json.hpp" @@ -49,6 +51,24 @@ namespace sm { /* CONSTRUCTORS & DESTRUCTORS */ /* ********************************* */ QueryPlan::QueryPlan(Query& query) { + if (query.array()->is_remote()) { + auto rest_client = query.rest_client(); + if (!rest_client) { + throw std::runtime_error( + "Failed to create a query plan; Remote query" + "with no REST client."); + } + + rest_client->post_query_plan_from_rest( + query.array()->array_uri(), query, *this); + + // We need to transition the query status to INITIALIZED to mimic the + // behavior of getting a query plan locally + query.set_status(QueryStatus::INITIALIZED); + + return; + } + array_uri_ = query.array()->array_uri().to_string(); vfs_backend_ = URI(array_uri_).backend_name(); query_layout_ = query.layout(); diff --git a/tiledb/sm/query_plan/test/unit_query_plan.cc b/tiledb/sm/query_plan/test/unit_query_plan.cc index 23ce164c092b..fedbc453a7f0 100644 --- a/tiledb/sm/query_plan/test/unit_query_plan.cc +++ b/tiledb/sm/query_plan/test/unit_query_plan.cc @@ -30,10 +30,12 @@ * This file tests the QueryPlan class */ +#include #include #include #include "../query_plan.h" #include "external/include/nlohmann/json.hpp" +#include "test/support/src/mem_helpers.h" #include "tiledb/sm/array/array.h" #include "tiledb/sm/array_schema/dimension.h" #include "tiledb/sm/enums/array_type.h" @@ -58,8 +60,9 @@ struct QueryPlanFx { URI array_uri(const std::string& uri); - TemporaryLocalDirectory temp_dir_; + shared_ptr memory_tracker_; + TemporaryLocalDirectory temp_dir_; Config cfg_; shared_ptr logger_; ContextResources resources_; @@ -70,18 +73,18 @@ tdb_unique_ptr QueryPlanFx::create_array(const URI uri) { // Create Domain uint64_t dim_dom[2]{0, 1}; uint64_t tile_extent = 1; - shared_ptr dim = - make_shared(HERE(), std::string("dim"), Datatype::UINT64); + shared_ptr dim = make_shared( + HERE(), std::string("dim"), Datatype::UINT64, memory_tracker_); throw_if_not_ok(dim->set_domain(&dim_dom)); throw_if_not_ok(dim->set_tile_extent(&tile_extent)); std::vector> dims = {dim}; - shared_ptr domain = - make_shared(HERE(), Layout::ROW_MAJOR, dims, Layout::ROW_MAJOR); + shared_ptr domain = make_shared( + HERE(), Layout::ROW_MAJOR, dims, Layout::ROW_MAJOR, memory_tracker_); // Create the ArraySchema - shared_ptr schema = - make_shared(HERE(), ArrayType::DENSE); + shared_ptr schema = make_shared( + HERE(), ArrayType::DENSE, tiledb::test::create_test_memory_tracker()); throw_if_not_ok(schema->set_domain(domain)); throw_if_not_ok(schema->add_attribute( make_shared( @@ -109,7 +112,8 @@ URI QueryPlanFx::array_uri(const std::string& array_name) { } QueryPlanFx::QueryPlanFx() - : logger_(make_shared(HERE(), "foo")) + : memory_tracker_(tiledb::test::create_test_memory_tracker()) + , logger_(make_shared(HERE(), "foo")) , resources_(cfg_, logger_, 1, 1, "") , sm_(make_shared(resources_, logger_, cfg_)) { } diff --git a/tiledb/sm/rest/rest_client.cc b/tiledb/sm/rest/rest_client.cc index f69b55c4d234..5c1a758104a3 100644 --- a/tiledb/sm/rest/rest_client.cc +++ b/tiledb/sm/rest/rest_client.cc @@ -43,6 +43,7 @@ #include "tiledb/sm/serialization/fragments.h" #include "tiledb/sm/serialization/group.h" #include "tiledb/sm/serialization/query.h" +#include "tiledb/sm/serialization/query_plan.h" #include "tiledb/sm/serialization/tiledb-rest.capnp.h" #include "tiledb/sm/serialization/vacuum.h" #include "tiledb/sm/rest/curl.h" // must be included last to avoid Windows.h @@ -57,6 +58,7 @@ #include #include "tiledb/common/logger.h" +#include "tiledb/common/memory_tracker.h" #include "tiledb/sm/array/array.h" #include "tiledb/sm/enums/query_type.h" #include "tiledb/sm/group/group.h" @@ -81,7 +83,8 @@ RestClient::RestClient() : stats_(nullptr) , config_(nullptr) , compute_tp_(nullptr) - , resubmit_incomplete_(true) { + , resubmit_incomplete_(true) + , memory_tracker_(nullptr) { auto st = utils::parse::convert( Config::REST_SERIALIZATION_DEFAULT_FORMAT, &serialization_type_); throw_if_not_ok(st); @@ -91,7 +94,8 @@ Status RestClient::init( stats::Stats* const parent_stats, const Config* config, ThreadPool* compute_tp, - const std::shared_ptr& logger) { + const std::shared_ptr& logger, + ContextResources& resources) { if (config == nullptr) return LOG_STATUS( Status_RestError("Error initializing rest client; config is null.")); @@ -103,10 +107,21 @@ Status RestClient::init( config_ = config; compute_tp_ = compute_tp; + // Setting the type of the memory tracker as MemoryTrackerType::REST_CLIENT + // for now. This is because the class is used in many places not directly tied + // to an array. + memory_tracker_ = resources.create_memory_tracker(); + memory_tracker_->set_type(MemoryTrackerType::REST_CLIENT); + const char* c_str; RETURN_NOT_OK(config_->get("rest.server_address", &c_str)); - if (c_str != nullptr) + if (c_str != nullptr) { rest_server_ = std::string(c_str); + if (rest_server_.ends_with('/')) { + size_t pos = rest_server_.find_last_not_of('/'); + rest_server_.resize(pos + 1); + } + } if (rest_server_.empty()) return LOG_STATUS(Status_RestError( "Error initializing rest client; server address is empty.")); @@ -115,10 +130,20 @@ Status RestClient::init( if (c_str != nullptr) RETURN_NOT_OK(serialization_type_enum(c_str, &serialization_type_)); - bool found = false; - auto status = config_->get( + return Status::Ok(); +} + +Status RestClient::set_header( + const std::string& name, const std::string& value) { + extra_headers_[name] = value; + return Status::Ok(); +} + +bool RestClient::use_refactored_query(const Config& config) { + bool found = false, use_refactored_query = false; + auto status = config.get( "rest.use_refactored_array_open_and_query_submit", - &use_refactored_array_and_query_, + &use_refactored_query, &found); if (!status.ok() || !found) { throw std::runtime_error( @@ -126,13 +151,7 @@ Status RestClient::init( "configuration option from config"); } - return Status::Ok(); -} - -Status RestClient::set_header( - const std::string& name, const std::string& value) { - extra_headers_[name] = value; - return Status::Ok(); + return use_refactored_query; } tuple> RestClient::check_array_exists_from_rest( @@ -239,10 +258,8 @@ RestClient::get_array_schema_from_rest(const URI& uri) { ensure_json_null_delimited_string(&returned_data), nullopt); return { Status::Ok(), - make_shared( - HERE(), - serialization::array_schema_deserialize( - serialization_type_, returned_data))}; + serialization::array_schema_deserialize( + serialization_type_, returned_data, memory_tracker_)}; } shared_ptr RestClient::post_array_schema_from_rest( @@ -289,10 +306,8 @@ shared_ptr RestClient::post_array_schema_from_rest( // Ensure data has a null delimiter for cap'n proto if using JSON throw_if_not_ok(ensure_json_null_delimited_string(&returned_data)); - return make_shared( - HERE(), - serialization::deserialize_load_array_schema_response( - serialization_type_, returned_data)); + return serialization::deserialize_load_array_schema_response( + serialization_type_, returned_data, memory_tracker_); } Status RestClient::post_array_schema_to_rest( @@ -376,7 +391,11 @@ Status RestClient::post_array_from_rest( // Ensure data has a null delimiter for cap'n proto if using JSON RETURN_NOT_OK(ensure_json_null_delimited_string(&returned_data)); return serialization::array_deserialize( - array, serialization_type_, returned_data, storage_manager); + array, + serialization_type_, + returned_data, + storage_manager, + memory_tracker_); } void RestClient::delete_array_from_rest(const URI& uri) { @@ -637,12 +656,17 @@ RestClient::post_enumerations_from_rest( uint64_t timestamp_start, uint64_t timestamp_end, Array* array, - const std::vector& enumeration_names) { + const std::vector& enumeration_names, + shared_ptr memory_tracker) { if (array == nullptr) { throw Status_RestError( "Error getting enumerations from REST; array is null."); } + if (!memory_tracker) { + memory_tracker = memory_tracker_; + } + // This should never be called with an empty list of enumeration names, but // there's no reason to not check an early return case here given that code // changes. @@ -687,7 +711,65 @@ RestClient::post_enumerations_from_rest( // Ensure data has a null delimiter for cap'n proto if using JSON throw_if_not_ok(ensure_json_null_delimited_string(&returned_data)); return serialization::deserialize_load_enumerations_response( - serialization_type_, returned_data); + serialization_type_, returned_data, memory_tracker); +} + +void RestClient::post_query_plan_from_rest( + const URI& uri, Query& query, QueryPlan& query_plan) { + // Get array + const Array* array = query.array(); + if (array == nullptr) { + throw Status_RestError("Error submitting query plan to REST; null array."); + } + + Buffer buff; + serialization::serialize_query_plan_request( + query.config(), query, serialization_type_, buff); + + // Wrap in a list + BufferList serialized; + throw_if_not_ok(serialized.add_buffer(std::move(buff))); + + // Init curl and form the URL + Curl curlc(logger_); + std::string array_ns, array_uri; + throw_if_not_ok(uri.get_rest_components(&array_ns, &array_uri)); + const std::string cache_key = array_ns + ":" + array_uri; + throw_if_not_ok( + curlc.init(config_, extra_headers_, &redirect_meta_, &redirect_mtx_)); + std::string url; + if (use_refactored_query(query.config())) { + url = redirect_uri(cache_key) + "/v3/arrays/" + array_ns + "/" + + curlc.url_escape(array_uri) + + "/query/plan?type=" + query_type_str(query.type()); + } else { + url = redirect_uri(cache_key) + "/v2/arrays/" + array_ns + "/" + + curlc.url_escape(array_uri) + + "/query/plan?type=" + query_type_str(query.type()); + } + + // Remote array reads always supply the timestamp. + url += "&start_timestamp=" + std::to_string(array->timestamp_start()); + url += "&end_timestamp=" + std::to_string(array->timestamp_end()); + + // Get the data + Buffer returned_data; + throw_if_not_ok(curlc.post_data( + stats_, + url, + serialization_type_, + &serialized, + &returned_data, + cache_key)); + if (returned_data.data() == nullptr || returned_data.size() == 0) { + throw Status_RestError( + "Error getting query plan from REST; server returned no data."); + } + + // Ensure data has a null delimiter for cap'n proto if using JSON + throw_if_not_ok(ensure_json_null_delimited_string(&returned_data)); + query_plan = serialization::deserialize_query_plan_response( + query, serialization_type_, returned_data); } Status RestClient::submit_query_to_rest(const URI& uri, Query* query) { @@ -751,7 +833,7 @@ Status RestClient::post_query_submit( RETURN_NOT_OK( curlc.init(config_, extra_headers_, &redirect_meta_, &redirect_mtx_)); std::string url; - if (use_refactored_array_and_query_) { + if (use_refactored_query(query->config())) { url = redirect_uri(cache_key) + "/v3/arrays/" + array_ns + "/" + curlc.url_escape(array_uri) + "/query/submit?type=" + query_type_str(query->type()) + @@ -977,7 +1059,7 @@ Status RestClient::finalize_query_to_rest(const URI& uri, Query* query) { RETURN_NOT_OK( curlc.init(config_, extra_headers_, &redirect_meta_, &redirect_mtx_)); std::string url; - if (use_refactored_array_and_query_) { + if (use_refactored_query(query->config())) { url = redirect_uri(cache_key) + "/v3/arrays/" + array_ns + "/" + curlc.url_escape(array_uri) + "/query/finalize?type=" + query_type_str(query->type()); @@ -1032,7 +1114,7 @@ Status RestClient::submit_and_finalize_query_to_rest( RETURN_NOT_OK( curlc.init(config_, extra_headers_, &redirect_meta_, &redirect_mtx_)); std::string url; - if (use_refactored_array_and_query_) { + if (use_refactored_query(query->config())) { url = redirect_uri(cache_key) + "/v3/arrays/" + array_ns + "/" + curlc.url_escape(array_uri) + "/query/submit_and_finalize?type=" + query_type_str(query->type()); @@ -1265,7 +1347,7 @@ Status RestClient::post_fragment_info_from_rest( // Ensure data has a null delimiter for cap'n proto if using JSON RETURN_NOT_OK(ensure_json_null_delimited_string(&returned_data)); return serialization::fragment_info_deserialize( - fragment_info, serialization_type_, uri, returned_data); + fragment_info, serialization_type_, uri, returned_data, memory_tracker_); } Status RestClient::post_group_metadata_from_rest(const URI& uri, Group* group) { @@ -1516,6 +1598,48 @@ Status RestClient::post_vacuum_to_rest(const URI& uri, const Config& config) { stats_, url, serialization_type_, &serialized, &returned_data, cache_key); } +std::vector> +RestClient::post_consolidation_plan_from_rest( + const URI& uri, const Config& config, uint64_t fragment_size) { + Buffer buff; + serialization::serialize_consolidation_plan_request( + fragment_size, config, serialization_type_, buff); + + // Wrap in a list + BufferList serialized; + throw_if_not_ok(serialized.add_buffer(std::move(buff))); + + // Init curl and form the URL + Curl curlc(logger_); + std::string array_ns, array_uri; + throw_if_not_ok(uri.get_rest_components(&array_ns, &array_uri)); + const std::string cache_key = array_ns + ":" + array_uri; + throw_if_not_ok( + curlc.init(config_, extra_headers_, &redirect_meta_, &redirect_mtx_)); + const std::string url = redirect_uri(cache_key) + "/v1/arrays/" + array_ns + + "/" + curlc.url_escape(array_uri) + + "/consolidate/plan"; + + // Get the data + Buffer returned_data; + throw_if_not_ok(curlc.post_data( + stats_, + url, + serialization_type_, + &serialized, + &returned_data, + cache_key)); + if (returned_data.data() == nullptr || returned_data.size() == 0) { + throw Status_RestError( + "Error getting query plan from REST; server returned no data."); + } + + // Ensure data has a null delimiter for cap'n proto if using JSON + throw_if_not_ok(ensure_json_null_delimited_string(&returned_data)); + return serialization::deserialize_consolidation_plan_response( + serialization_type_, returned_data); +} + #else RestClient::RestClient() { @@ -1525,7 +1649,11 @@ RestClient::RestClient() { } Status RestClient::init( - stats::Stats*, const Config*, ThreadPool*, const std::shared_ptr&) { + stats::Stats*, + const Config*, + ThreadPool*, + const std::shared_ptr&, + ContextResources&) { return LOG_STATUS( Status_RestError("Cannot use rest client; serialization not enabled.")); } @@ -1603,7 +1731,16 @@ Status RestClient::post_array_metadata_to_rest( std::vector> RestClient::post_enumerations_from_rest( - const URI&, uint64_t, uint64_t, Array*, const std::vector&) { + const URI&, + uint64_t, + uint64_t, + Array*, + const std::vector&, + shared_ptr) { + throw Status_RestError("Cannot use rest client; serialization not enabled."); +} + +void RestClient::post_query_plan_from_rest(const URI&, Query&, QueryPlan&) { throw Status_RestError("Cannot use rest client; serialization not enabled."); } @@ -1694,6 +1831,13 @@ Status RestClient::post_vacuum_to_rest(const URI&, const Config&) { Status_RestError("Cannot use rest client; serialization not enabled.")); } +std::vector> +RestClient::post_consolidation_plan_from_rest( + const URI&, const Config&, uint64_t) { + throw StatusException( + Status_RestError("Cannot use rest client; serialization not enabled.")); +} + #endif // TILEDB_SERIALIZATION } // namespace sm diff --git a/tiledb/sm/rest/rest_client.h b/tiledb/sm/rest/rest_client.h index 63d9044db9e2..25eb29507ddd 100644 --- a/tiledb/sm/rest/rest_client.h +++ b/tiledb/sm/rest/rest_client.h @@ -53,6 +53,8 @@ class ArraySchemaEvolution; class Config; class FragmentInfo; class Query; +class MemoryTracker; +class QueryPlan; enum class SerializationType : uint8_t; @@ -66,11 +68,21 @@ class RestClient { stats::Stats* parent_stats, const Config* config, ThreadPool* compute_tp, - const std::shared_ptr& logger); + const std::shared_ptr& logger, + ContextResources& resources); /** Sets a header that will be attached to all requests. */ Status set_header(const std::string& name, const std::string& value); + /** + * Check if use_refactored_array_open_and_query_submit is set in + * input config so that rest_client chooses the right URI + * + * @param config Config to check + * + * */ + static bool use_refactored_query(const Config& config); + /** * Check if an array exists by making a REST call. To start with this fetches * the schema but ignores the body returned if non-error @@ -252,7 +264,18 @@ class RestClient { uint64_t timestamp_start, uint64_t timestamp_end, Array* array, - const std::vector& enumeration_names); + const std::vector& enumeration_names, + shared_ptr memory_tracker = nullptr); + + /** + * Get the requested query plan from the REST server via POST request. + * + * @param uri Array URI. + * @param query Query to fetch query plan for. + * @param query_plan The requested query plan. + */ + void post_query_plan_from_rest( + const URI& uri, Query& query, QueryPlan& query_plan); /** * Post a data query to rest server @@ -382,6 +405,21 @@ class RestClient { */ Status post_vacuum_to_rest(const URI& uri, const Config& config); + inline std::string rest_server() const { + return rest_server_; + } + + /** + * Get consolidation plan from the REST server via POST request. + * + * @param uri Array URI. + * @param config Config of the array. + * @param fragment_size Maximum fragment size for constructing the plan. + * @return The requested consolidation plan + */ + std::vector> post_consolidation_plan_from_rest( + const URI& uri, const Config& config, uint64_t fragment_size); + private: /* ********************************* */ /* PRIVATE ATTRIBUTES */ @@ -413,12 +451,6 @@ class RestClient { */ bool resubmit_incomplete_; - /** - * If true, the new, experimental REST routes and APIs for opening an array - * and submitting a query will be used - */ - bool use_refactored_array_and_query_; - /** Collection of extra headers that are attached to REST requests. */ std::unordered_map extra_headers_; @@ -434,6 +466,9 @@ class RestClient { /** UID of the logger instance */ inline static std::atomic logger_id_ = 0; + /** The class MemoryTracker. */ + shared_ptr memory_tracker_; + /* ********************************* */ /* PRIVATE METHODS */ /* ********************************* */ diff --git a/tiledb/sm/rtree/rtree.cc b/tiledb/sm/rtree/rtree.cc index 8578636c3714..e91a1838f792 100644 --- a/tiledb/sm/rtree/rtree.cc +++ b/tiledb/sm/rtree/rtree.cc @@ -5,7 +5,7 @@ * * The MIT License * - * @copyright Copyright (c) 2017-2021 TileDB, Inc. + * @copyright Copyright (c) 2024 TileDB, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -32,6 +32,7 @@ #include "tiledb/sm/rtree/rtree.h" #include "tiledb/common/logger.h" +#include "tiledb/common/memory_tracker.h" #include "tiledb/sm/array_schema/dimension.h" #include "tiledb/sm/buffer/buffer.h" #include "tiledb/sm/enums/datatype.h" @@ -53,43 +54,18 @@ namespace sm { /* CONSTRUCTORS & DESTRUCTORS */ /* ****************************** */ -RTree::RTree() { - domain_ = nullptr; - fanout_ = 0; - deserialized_buffer_size_ = 0; -} - -RTree::RTree(const Domain* domain, unsigned fanout) - : domain_(domain) - , fanout_(fanout) { +RTree::RTree( + const Domain* domain, + unsigned fanout, + shared_ptr memory_tracker) + : memory_tracker_(memory_tracker) + , domain_(domain) + , fanout_(fanout) + , levels_(memory_tracker_->get_resource(MemoryType::RTREE)) { } RTree::~RTree() = default; -RTree::RTree(const RTree& rtree) - : RTree() { - auto clone = rtree.clone(); - swap(clone); -} - -RTree::RTree(RTree&& rtree) noexcept - : RTree() { - swap(rtree); -} - -RTree& RTree::operator=(const RTree& rtree) { - auto clone = rtree.clone(); - swap(clone); - - return *this; -} - -RTree& RTree::operator=(RTree&& rtree) noexcept { - swap(rtree); - - return *this; -} - /* ****************************** */ /* API */ /* ****************************** */ @@ -240,7 +216,7 @@ const NDRange& RTree::leaf(uint64_t leaf_idx) const { return levels_.back()[leaf_idx]; } -const std::vector& RTree::leaves() const { +const tdb::pmr::vector& RTree::leaves() const { assert(!levels_.empty()); return levels_.back(); } @@ -297,10 +273,10 @@ Status RTree::set_leaf(uint64_t leaf_id, const NDRange& mbr) { return Status::Ok(); } -Status RTree::set_leaves(const std::vector& mbrs) { +Status RTree::set_leaves(const tdb::pmr::vector& mbrs) { levels_.clear(); levels_.resize(1); - levels_[0] = mbrs; + levels_[0].assign(mbrs.begin(), mbrs.end()); return Status::Ok(); } @@ -328,13 +304,21 @@ void RTree::deserialize( deserialize_v5(deserializer, domain); } +void RTree::reset(const Domain* domain, unsigned int fanout) { + domain_ = domain; + fanout_ = fanout; + free_memory(); +} + /* ****************************** */ /* PRIVATE METHODS */ /* ****************************** */ RTree::Level RTree::build_level(const Level& level) { auto cur_mbr_num = (uint64_t)level.size(); - Level new_level((uint64_t)std::ceil((double)cur_mbr_num / fanout_)); + Level new_level( + (uint64_t)std::ceil((double)cur_mbr_num / fanout_), + memory_tracker_->get_resource(MemoryType::RTREE)); auto new_mbr_num = (uint64_t)new_level.size(); uint64_t mbrs_visited = 0; @@ -344,16 +328,7 @@ RTree::Level RTree::build_level(const Level& level) { domain_->expand_ndrange(level[mbrs_visited], &new_level[i]); } - return new_level; -} - -RTree RTree::clone() const { - RTree clone; - clone.domain_ = domain_; - clone.fanout_ = fanout_; - clone.levels_ = levels_; - - return clone; + return {new_level, memory_tracker_->get_resource(MemoryType::RTREE)}; } void RTree::deserialize_v1_v4( @@ -424,11 +399,5 @@ void RTree::deserialize_v5(Deserializer& deserializer, const Domain* domain) { domain_ = domain; } -void RTree::swap(RTree& rtree) { - std::swap(domain_, rtree.domain_); - std::swap(fanout_, rtree.fanout_); - std::swap(levels_, rtree.levels_); -} - } // namespace sm } // namespace tiledb diff --git a/tiledb/sm/rtree/rtree.h b/tiledb/sm/rtree/rtree.h index ad70a0b8066c..35fd505d7bed 100644 --- a/tiledb/sm/rtree/rtree.h +++ b/tiledb/sm/rtree/rtree.h @@ -5,7 +5,7 @@ * * The MIT License * - * @copyright Copyright (c) 2017-2021 TileDB, Inc. + * @copyright Copyright (c) 2024 TileDB, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -36,6 +36,7 @@ #include #include "tiledb/common/common.h" +#include "tiledb/common/pmr.h" #include "tiledb/common/status.h" #include "tiledb/sm/array_schema/domain.h" #include "tiledb/sm/misc/tile_overlap.h" @@ -48,6 +49,7 @@ namespace sm { class Buffer; class ConstBuffer; +class MemoryTracker; enum class Datatype : uint8_t; enum class Layout : uint8_t; @@ -62,27 +64,19 @@ class RTree { /* ********************************* */ /* CONSTRUCTORS & DESTRUCTORS */ /* ********************************* */ + RTree() = delete; /** Constructor. */ - RTree(); - - /** Constructor. */ - RTree(const Domain* domain, unsigned fanout); + RTree( + const Domain* domain, + unsigned fanout, + shared_ptr memory_tracker); /** Destructor. */ ~RTree(); - /** Copy constructor. This performs a deep copy. */ - RTree(const RTree& rtree); - - /** Move constructor. */ - RTree(RTree&& rtree) noexcept; - - /** Copy-assign operator. This performs a deep copy. */ - RTree& operator=(const RTree& rtree); - - /** Move-assign operator. */ - RTree& operator=(RTree&& rtree) noexcept; + DISABLE_COPY_AND_COPY_ASSIGN(RTree); + DISABLE_MOVE_AND_MOVE_ASSIGN(RTree); /* ********************************* */ /* API */ @@ -125,7 +119,7 @@ class RTree { const NDRange& leaf(uint64_t leaf_idx) const; /** Returns the leaves of the tree. */ - const std::vector& leaves() const; + const tdb::pmr::vector& leaves() const; /** * Returns the number of leaves that are stored in a (full) subtree @@ -158,7 +152,7 @@ class RTree { * Sets the input MBRs as leaves. This will destroy the existing * RTree. */ - Status set_leaves(const std::vector& mbrs); + Status set_leaves(const tdb::pmr::vector& mbrs); /** * Resizes the leaf level. It destroys the upper levels @@ -176,6 +170,14 @@ class RTree { void deserialize( Deserializer& deserializer, const Domain* domain, uint32_t version); + /** + * Resets the RTree with the input domain and fanout. + * + * @param domain The domain to use for the RTree. + * @param fanout The fanout of the RTree. + */ + void reset(const Domain* domain, unsigned fanout); + private: /* ********************************* */ /* PRIVATE TYPE DEFINITIONS */ @@ -203,7 +205,7 @@ class RTree { * `levels_`, where the first level is the root. This is how * we can infer which tree level each `Level` object corresponds to. */ - typedef std::vector Level; + typedef tdb::pmr::vector Level; /** * Defines an R-Tree level entry, which corresponds to a node @@ -222,6 +224,9 @@ class RTree { /* PRIVATE ATTRIBUTES */ /* ********************************* */ + /** Memory tracker for the RTree. */ + shared_ptr memory_tracker_; + /** * The domain for which this R-tree provides an index. * @@ -237,7 +242,7 @@ class RTree { * The tree levels. The first level is the root. Note that the root * always consists of a single MBR. */ - std::vector levels_; + tdb::pmr::vector levels_; /** * Stores the size of the buffer used to deserialize the data, used for @@ -252,9 +257,6 @@ class RTree { /** Builds a single tree level on top of the input level. */ Level build_level(const Level& level); - /** Returns a deep copy of this RTree. */ - RTree clone() const; - /** * Deserializes the contents of the object from the input buffer based * on the format version. @@ -272,12 +274,6 @@ class RTree { * Applicable to versions >= 5 */ void deserialize_v5(Deserializer& deserializer, const Domain* domain); - - /** - * Swaps the contents (all field values) of this RTree with the - * given ``rtree``. - */ - void swap(RTree& rtree); }; } // namespace sm diff --git a/tiledb/sm/rtree/test/CMakeLists.txt b/tiledb/sm/rtree/test/CMakeLists.txt index ef40ea8a1224..13aade8948ec 100644 --- a/tiledb/sm/rtree/test/CMakeLists.txt +++ b/tiledb/sm/rtree/test/CMakeLists.txt @@ -28,5 +28,6 @@ include(unit_test) commence(unit_test rtree) this_target_sources(main.cc unit_rtree.cc) + this_target_link_libraries(tiledb_test_support_lib) this_target_object_libraries(rtree) conclude(unit_test) diff --git a/tiledb/sm/rtree/test/compile_rtree_main.cc b/tiledb/sm/rtree/test/compile_rtree_main.cc index d720c36ba364..f1f30d98cb9c 100644 --- a/tiledb/sm/rtree/test/compile_rtree_main.cc +++ b/tiledb/sm/rtree/test/compile_rtree_main.cc @@ -29,6 +29,6 @@ #include "../rtree.h" int main() { - tiledb::sm::RTree x{}; + tiledb::sm::RTree x{nullptr, 0, nullptr}; return 0; } diff --git a/tiledb/sm/rtree/test/unit_rtree.cc b/tiledb/sm/rtree/test/unit_rtree.cc index 029d75988348..6d81dd0c9499 100644 --- a/tiledb/sm/rtree/test/unit_rtree.cc +++ b/tiledb/sm/rtree/test/unit_rtree.cc @@ -5,7 +5,7 @@ * * The MIT License * - * @copyright Copyright (c) 2022 TileDB, Inc. + * @copyright Copyright (c) 2024 TileDB, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -30,7 +30,9 @@ * Tests the `RTree` class. */ +#include "test/support/src/mem_helpers.h" #include "tiledb/common/common.h" +#include "tiledb/common/memory_tracker.h" #include "tiledb/sm/array_schema/dimension.h" #include "tiledb/sm/enums/datatype.h" #include "tiledb/sm/enums/layout.h" @@ -40,15 +42,18 @@ #include using namespace tiledb::sm; +using tiledb::test::create_test_memory_tracker; // `mbrs` contains a flattened vector of values (low, high) // per dimension per MBR template -std::vector create_mbrs(const std::vector& mbrs) { +tdb::pmr::vector create_mbrs( + const std::vector& mbrs, shared_ptr tracker) { assert(mbrs.size() % 2 * D == 0); uint64_t mbr_num = (uint64_t)(mbrs.size() / (2 * D)); - std::vector ret(mbr_num); + tdb::pmr::vector ret( + mbr_num, tracker->get_resource(MemoryType::RTREE)); uint64_t r_size = 2 * sizeof(T); for (uint64_t m = 0; m < mbr_num; ++m) { ret[m].resize(D); @@ -57,16 +62,19 @@ std::vector create_mbrs(const std::vector& mbrs) { } } - return ret; + return {ret, tracker->get_resource(MemoryType::RTREE)}; } template -std::vector create_mbrs( - const std::vector& r1, const std::vector& r2) { +tdb::pmr::vector create_mbrs( + const std::vector& r1, + const std::vector& r2, + shared_ptr tracker) { assert(r1.size() == r2.size()); uint64_t mbr_num = (uint64_t)(r1.size() / 2); - std::vector ret(mbr_num); + tdb::pmr::vector ret( + mbr_num, tracker->get_resource(MemoryType::RTREE)); uint64_t r1_size = 2 * sizeof(T1); uint64_t r2_size = 2 * sizeof(T2); for (uint64_t m = 0; m < mbr_num; ++m) { @@ -75,14 +83,15 @@ std::vector create_mbrs( ret[m][1] = Range(&r2[2 * m], r2_size); } - return ret; + return {ret, tracker->get_resource(MemoryType::RTREE)}; } Domain create_domain( const std::vector& dim_names, const std::vector& dim_types, const std::vector& dim_domains, - const std::vector& dim_tile_extents) { + const std::vector& dim_tile_extents, + shared_ptr memory_tracker) { assert(!dim_names.empty()); assert(dim_names.size() == dim_types.size()); assert(dim_names.size() == dim_domains.size()); @@ -109,16 +118,19 @@ Domain create_domain( cell_val_num, range, FilterPipeline(), - tile_extent); + tile_extent, + tiledb::test::get_test_memory_tracker()); dimensions.emplace_back(std::move(dim)); } - return Domain(Layout::ROW_MAJOR, dimensions, Layout::ROW_MAJOR); + return Domain( + Layout::ROW_MAJOR, dimensions, Layout::ROW_MAJOR, memory_tracker); } TEST_CASE("RTree: Test R-Tree, basic functions", "[rtree][basic]") { // Empty tree - RTree rtree0; + auto tracker = create_test_memory_tracker(); + RTree rtree0(nullptr, 0, tracker); CHECK(rtree0.height() == 0); CHECK(rtree0.dim_num() == 0); CHECK(rtree0.domain() == nullptr); @@ -129,11 +141,12 @@ TEST_CASE("RTree: Test R-Tree, basic functions", "[rtree][basic]") { // 1D int32_t dim_dom[] = {1, 1000}; int32_t dim_extent = 10; - Domain dom1 = - create_domain({"d"}, {Datatype::INT32}, {dim_dom}, {&dim_extent}); - std::vector mbrs_1d = create_mbrs({1, 3, 5, 10, 20, 22}); - const Domain d1{dom1}; - RTree rtree1(&d1, 3); + Domain dom1 = create_domain( + {"d"}, {Datatype::INT32}, {dim_dom}, {&dim_extent}, tracker); + const Domain d1 = create_domain( + {"d"}, {Datatype::INT32}, {dim_dom}, {&dim_extent}, tracker); + auto mbrs_1d = create_mbrs({1, 3, 5, 10, 20, 22}, tracker); + RTree rtree1(&d1, 3, tracker); CHECK(!rtree1.set_leaf(0, mbrs_1d[0]).ok()); CHECK(rtree1.set_leaf_num(mbrs_1d.size()).ok()); for (size_t m = 0; m < mbrs_1d.size(); ++m) @@ -190,11 +203,17 @@ TEST_CASE("RTree: Test R-Tree, basic functions", "[rtree][basic]") { {"d1", "d2"}, {Datatype::INT64, Datatype::INT64}, {dim_dom_2, dim_dom_2}, - {&dim_extent_2, &dim_extent_2}); - std::vector mbrs_2d = - create_mbrs({1, 3, 5, 10, 20, 22, 24, 25, 11, 15, 30, 31}); - const Domain d2{dom2}; - RTree rtree2(&d2, 5); + {&dim_extent_2, &dim_extent_2}, + tracker); + const Domain d2 = create_domain( + {"d1", "d2"}, + {Datatype::INT64, Datatype::INT64}, + {dim_dom_2, dim_dom_2}, + {&dim_extent_2, &dim_extent_2}, + tracker); + auto mbrs_2d = create_mbrs( + {1, 3, 5, 10, 20, 22, 24, 25, 11, 15, 30, 31}, tracker); + RTree rtree2(&d2, 5, tracker); CHECK(rtree2.set_leaves(mbrs_2d).ok()); rtree2.build_tree(); CHECK(rtree2.height() == 2); @@ -228,12 +247,13 @@ TEST_CASE("RTree: Test R-Tree, basic functions", "[rtree][basic]") { // Float datatype float dim_dom_f[] = {1.0, 1000.0}; float dim_extent_f = 10.0; - std::vector mbrs_f = - create_mbrs({1.0f, 3.0f, 5.0f, 10.0f, 20.0f, 22.0f}); - Domain dom2f = - create_domain({"d"}, {Datatype::FLOAT32}, {dim_dom_f}, {&dim_extent_f}); - const Domain d2f{dom2f}; - RTree rtreef(&d2f, 5); + auto mbrs_f = + create_mbrs({1.0f, 3.0f, 5.0f, 10.0f, 20.0f, 22.0f}, tracker); + Domain dom2f = create_domain( + {"d"}, {Datatype::FLOAT32}, {dim_dom_f}, {&dim_extent_f}, tracker); + const Domain d2f = create_domain( + {"d"}, {Datatype::FLOAT32}, {dim_dom_f}, {&dim_extent_f}, tracker); + RTree rtreef(&d2f, 5, tracker); CHECK(rtreef.set_leaves(mbrs_f).ok()); rtreef.build_tree(); @@ -269,14 +289,14 @@ TEST_CASE("RTree: Test R-Tree, basic functions", "[rtree][basic]") { TEST_CASE("RTree: Test 1D R-tree, height 2", "[rtree][1d][2h]") { // Build tree + auto tracker = create_test_memory_tracker(); std::vector is_default(1, false); int32_t dim_dom[] = {1, 1000}; int32_t dim_extent = 10; - Domain dom1 = - create_domain({"d"}, {Datatype::INT32}, {dim_dom}, {&dim_extent}); - std::vector mbrs = create_mbrs({1, 3, 5, 10, 20, 22}); - const Domain d1{dom1}; - RTree rtree(&d1, 3); + Domain dom1 = create_domain( + {"d"}, {Datatype::INT32}, {dim_dom}, {&dim_extent}, tracker); + auto mbrs = create_mbrs({1, 3, 5, 10, 20, 22}, tracker); + RTree rtree(&dom1, 3, tracker); CHECK(rtree.set_leaves(mbrs).ok()); rtree.build_tree(); CHECK(rtree.height() == 2); @@ -315,15 +335,15 @@ TEST_CASE("RTree: Test 1D R-tree, height 2", "[rtree][1d][2h]") { TEST_CASE("RTree: Test 1D R-tree, height 3", "[rtree][1d][3h]") { // Build tree + auto tracker = create_test_memory_tracker(); std::vector is_default(1, false); int32_t dim_dom[] = {1, 1000}; int32_t dim_extent = 10; - std::vector mbrs = create_mbrs( - {1, 3, 5, 10, 20, 22, 30, 35, 36, 38, 40, 49, 50, 51, 65, 69}); - Domain dom1 = - create_domain({"d"}, {Datatype::INT32}, {dim_dom}, {&dim_extent}); - const Domain d1(dom1); - RTree rtree(&d1, 3); + auto mbrs = create_mbrs( + {1, 3, 5, 10, 20, 22, 30, 35, 36, 38, 40, 49, 50, 51, 65, 69}, tracker); + Domain dom1 = create_domain( + {"d"}, {Datatype::INT32}, {dim_dom}, {&dim_extent}, tracker); + RTree rtree(&dom1, 3, tracker); CHECK(rtree.set_leaves(mbrs).ok()); rtree.build_tree(); CHECK(rtree.height() == 3); @@ -381,18 +401,19 @@ TEST_CASE("RTree: Test 1D R-tree, height 3", "[rtree][1d][3h]") { TEST_CASE("RTree: Test 2D R-tree, height 2", "[rtree][2d][2h]") { // Build tree + auto tracker = create_test_memory_tracker(); std::vector is_default(2, false); int32_t dim_dom[] = {1, 1000}; int32_t dim_extent = 10; - Domain dom2 = create_domain( + Domain dom1 = create_domain( {"d1", "d2"}, {Datatype::INT32, Datatype::INT32}, {dim_dom, dim_dom}, - {&dim_extent, &dim_extent}); - std::vector mbrs = - create_mbrs({1, 3, 2, 4, 5, 7, 6, 9, 10, 12, 10, 15}); - const Domain d2{dom2}; - RTree rtree(&d2, 3); + {&dim_extent, &dim_extent}, + tracker); + auto mbrs = create_mbrs( + {1, 3, 2, 4, 5, 7, 6, 9, 10, 12, 10, 15}, tracker); + RTree rtree(&dom1, 3, tracker); CHECK(rtree.set_leaves(mbrs).ok()); rtree.build_tree(); CHECK(rtree.height() == 2); @@ -434,19 +455,21 @@ TEST_CASE("RTree: Test 2D R-tree, height 2", "[rtree][2d][2h]") { TEST_CASE("RTree: Test 2D R-tree, height 3", "[rtree][2d][3h]") { // Build tree + auto tracker = create_test_memory_tracker(); std::vector is_default(2, false); int32_t dim_dom[] = {1, 1000}; int32_t dim_extent = 10; - Domain dom2 = create_domain( + Domain dom1 = create_domain( {"d1", "d2"}, {Datatype::INT32, Datatype::INT32}, {dim_dom, dim_dom}, - {&dim_extent, &dim_extent}); - std::vector mbrs = create_mbrs( + {&dim_extent, &dim_extent}, + tracker); + auto mbrs = create_mbrs( {1, 3, 2, 4, 5, 7, 6, 9, 10, 12, 10, 15, 11, 15, 20, 22, 16, 16, - 23, 23, 19, 20, 24, 26, 25, 28, 30, 32, 30, 35, 35, 37, 40, 42, 40, 42}); - const Domain d2{dom2}; - RTree rtree(&d2, 3); + 23, 23, 19, 20, 24, 26, 25, 28, 30, 32, 30, 35, 35, 37, 40, 42, 40, 42}, + tracker); + RTree rtree(&dom1, 3, tracker); CHECK(rtree.set_leaves(mbrs).ok()); rtree.build_tree(); CHECK(rtree.height() == 3); @@ -511,20 +534,27 @@ TEST_CASE( "RTree: Test R-Tree, heterogeneous (uint8, int32), basic functions", "[rtree][basic][heter]") { // Create RTree with dimensions uint8, int32 + auto tracker = create_test_memory_tracker(); std::vector is_default(2, false); uint8_t uint8_dom[] = {0, 10}; int32_t int32_dom[] = {5, 10}; uint8_t uint8_extent = 2; int32_t int32_extent = 2; - Domain dom = create_domain( + Domain dom1 = create_domain( {"d1", "d2"}, {Datatype::UINT8, Datatype::INT32}, {uint8_dom, int32_dom}, - {&uint8_extent, &int32_extent}); - std::vector mbrs = - create_mbrs({0, 1, 3, 5}, {5, 6, 7, 9}); - const Domain d1{dom}; - RTree rtree(&d1, 5); + {&uint8_extent, &int32_extent}, + tracker); + const Domain d1 = create_domain( + {"d1", "d2"}, + {Datatype::UINT8, Datatype::INT32}, + {uint8_dom, int32_dom}, + {&uint8_extent, &int32_extent}, + tracker); + auto mbrs = + create_mbrs({0, 1, 3, 5}, {5, 6, 7, 9}, tracker); + RTree rtree(&d1, 5, tracker); CHECK(rtree.set_leaves(mbrs).ok()); rtree.build_tree(); CHECK(rtree.height() == 2); @@ -539,7 +569,7 @@ TEST_CASE( int32_t int32_r_no[] = {1, 10}; range_no[0] = Range(uint8_r_no, sizeof(uint8_r_no)); range_no[1] = Range(int32_r_no, sizeof(int32_r_no)); - double ratio = dom.overlap_ratio(range_no, is_default, mbrs[0]); + double ratio = dom1.overlap_ratio(range_no, is_default, mbrs[0]); CHECK(ratio == 0.0); // Check full domain overlap @@ -548,9 +578,9 @@ TEST_CASE( int32_t int32_r_full[] = {1, 10}; range_full[0] = Range(uint8_r_full, sizeof(uint8_r_full)); range_full[1] = Range(int32_r_full, sizeof(int32_r_full)); - ratio = dom.overlap_ratio(range_full, is_default, mbrs[0]); + ratio = dom1.overlap_ratio(range_full, is_default, mbrs[0]); CHECK(ratio == 1.0); - ratio = dom.overlap_ratio(range_full, is_default, mbrs[1]); + ratio = dom1.overlap_ratio(range_full, is_default, mbrs[1]); CHECK(ratio == 1.0); // Check partial domain overlap @@ -559,7 +589,7 @@ TEST_CASE( int32_t int32_r_part[] = {5, 5}; range_part[0] = Range(uint8_r_part, sizeof(uint8_r_part)); range_part[1] = Range(int32_r_part, sizeof(int32_r_part)); - ratio = dom.overlap_ratio(range_part, is_default, mbrs[0]); + ratio = dom1.overlap_ratio(range_part, is_default, mbrs[0]); CHECK(ratio == 0.25); } @@ -567,20 +597,27 @@ TEST_CASE( "RTree: Test R-Tree, heterogeneous (uint64, float32), basic functions", "[rtree][basic][heter]") { // Create RTree with dimensions uint64, float32 + auto tracker = create_test_memory_tracker(); std::vector is_default(2, false); uint64_t uint64_dom[] = {0, 10}; float float_dom[] = {0.1f, 0.9f}; uint64_t uint64_extent = 2; float float_extent = 0.1f; - Domain dom = create_domain( + Domain dom1 = create_domain( + {"d1", "d2"}, + {Datatype::UINT64, Datatype::FLOAT32}, + {uint64_dom, float_dom}, + {&uint64_extent, &float_extent}, + tracker); + const Domain d1 = create_domain( {"d1", "d2"}, {Datatype::UINT64, Datatype::FLOAT32}, {uint64_dom, float_dom}, - {&uint64_extent, &float_extent}); - std::vector mbrs = - create_mbrs({0, 1, 3, 5}, {.5f, .6f, .7f, .9f}); - const Domain d1{dom}; - RTree rtree(&d1, 5); + {&uint64_extent, &float_extent}, + tracker); + auto mbrs = + create_mbrs({0, 1, 3, 5}, {.5f, .6f, .7f, .9f}, tracker); + RTree rtree(&d1, 5, tracker); CHECK(rtree.set_leaves(mbrs).ok()); rtree.build_tree(); CHECK(rtree.height() == 2); @@ -595,7 +632,7 @@ TEST_CASE( float float_r_no[] = {.1f, .9f}; range_no[0] = Range(uint64_r_no, sizeof(uint64_r_no)); range_no[1] = Range(float_r_no, sizeof(float_r_no)); - double ratio = dom.overlap_ratio(range_no, is_default, mbrs[0]); + double ratio = dom1.overlap_ratio(range_no, is_default, mbrs[0]); CHECK(ratio == 0.0); // Check full domain overlap @@ -604,9 +641,9 @@ TEST_CASE( float float_r_full[] = {.1f, 1.0f}; range_full[0] = Range(uint64_r_full, sizeof(uint64_r_full)); range_full[1] = Range(float_r_full, sizeof(float_r_full)); - ratio = dom.overlap_ratio(range_full, is_default, mbrs[0]); + ratio = dom1.overlap_ratio(range_full, is_default, mbrs[0]); CHECK(ratio == 1.0); - ratio = dom.overlap_ratio(range_full, is_default, mbrs[1]); + ratio = dom1.overlap_ratio(range_full, is_default, mbrs[1]); CHECK(ratio == 1.0); // Check partial domain overlap @@ -615,7 +652,7 @@ TEST_CASE( float float_r_part[] = {.5f, .55f}; range_part[0] = Range(uint64_r_part, sizeof(uint64_r_part)); range_part[1] = Range(float_r_part, sizeof(float_r_part)); - ratio = dom.overlap_ratio(range_part, is_default, mbrs[0]); + ratio = dom1.overlap_ratio(range_part, is_default, mbrs[0]); CHECK(ratio == 0.25); } @@ -623,6 +660,7 @@ TEST_CASE( "RTree: Test 2D R-tree, height 2, heterogeneous (uint8, int32)", "[rtree][2d][2h][heter]") { // Create RTree with dimensions uint8, int32 + auto tracker = create_test_memory_tracker(); std::vector is_default(2, false); uint8_t uint8_dom[] = {0, 200}; int32_t int32_dom[] = {5, 100}; @@ -632,11 +670,11 @@ TEST_CASE( {"d1", "d2"}, {Datatype::UINT8, Datatype::INT32}, {uint8_dom, int32_dom}, - {&uint8_extent, &int32_extent}); - std::vector mbrs = - create_mbrs({0, 1, 3, 5, 11, 20}, {5, 6, 7, 9, 11, 30}); - const Domain d1{dom}; - RTree rtree(&d1, 3); + {&uint8_extent, &int32_extent}, + tracker); + auto mbrs = create_mbrs( + {0, 1, 3, 5, 11, 20}, {5, 6, 7, 9, 11, 30}, tracker); + RTree rtree(&dom, 3, tracker); CHECK(rtree.set_leaves(mbrs).ok()); rtree.build_tree(); CHECK(rtree.height() == 2); @@ -692,6 +730,7 @@ TEST_CASE( "RTree: Test 2D R-tree, height 3, heterogeneous (uint8, int32)", "[rtree][2d][2h][heter]") { // Create RTree with dimensions uint8, int32 + auto tracker = create_test_memory_tracker(); std::vector is_default(2, false); uint8_t uint8_dom[] = {0, 200}; int32_t int32_dom[] = {5, 100}; @@ -701,11 +740,11 @@ TEST_CASE( {"d1", "d2"}, {Datatype::UINT8, Datatype::INT32}, {uint8_dom, int32_dom}, - {&uint8_extent, &int32_extent}); - std::vector mbrs = create_mbrs( - {0, 1, 3, 5, 11, 20, 21, 26}, {5, 6, 7, 9, 11, 30, 31, 40}); - const Domain d1{dom}; - RTree rtree(&d1, 2); + {&uint8_extent, &int32_extent}, + tracker); + auto mbrs = create_mbrs( + {0, 1, 3, 5, 11, 20, 21, 26}, {5, 6, 7, 9, 11, 30, 31, 40}, tracker); + RTree rtree(&dom, 2, tracker); CHECK(rtree.set_leaves(mbrs).ok()); rtree.build_tree(); CHECK(rtree.height() == 3); @@ -788,11 +827,13 @@ TEST_CASE( // `mbrs` contains a flattened vector of values (low, high) // per dimension per MBR template -std::vector create_str_mbrs(const std::vector& mbrs) { +tdb::pmr::vector create_str_mbrs( + const std::vector& mbrs, shared_ptr tracker) { assert(mbrs.size() % 2 * D == 0); uint64_t mbr_num = (uint64_t)(mbrs.size() / (2 * D)); - std::vector ret(mbr_num); + tdb::pmr::vector ret( + mbr_num, tracker->get_resource(MemoryType::RTREE)); for (uint64_t m = 0; m < mbr_num; ++m) { ret[m].resize(D); for (unsigned d = 0; d < D; ++d) { @@ -802,19 +843,21 @@ std::vector create_str_mbrs(const std::vector& mbrs) { } } - return ret; + return {ret, tracker->get_resource(MemoryType::RTREE)}; } // `mbrs` contains a flattened vector of values (low, high) // per dimension per MBR -std::vector create_str_int32_mbrs( +tdb::pmr::vector create_str_int32_mbrs( const std::vector& mbrs_str, - const std::vector mbrs_int) { + const std::vector mbrs_int, + shared_ptr tracker) { assert(mbrs_str.size() == mbrs_int.size()); assert(mbrs_str.size() % 2 == 0); uint64_t mbr_num = (uint64_t)(mbrs_str.size() / 2); - std::vector ret(mbr_num); + tdb::pmr::vector ret( + mbr_num, tracker->get_resource(MemoryType::RTREE)); for (uint64_t m = 0; m < mbr_num; ++m) { ret[m].resize(2); const auto& start = mbrs_str[2 * m]; @@ -824,25 +867,21 @@ std::vector create_str_int32_mbrs( ret[m][1] = Range(range, sizeof(range)); } - return ret; -} - -std::pair range_to_str(const Range& r) { - return std::pair(r.start_str(), r.end_str()); + return {ret, tracker->get_resource(MemoryType::RTREE)}; } TEST_CASE( "RTree: Test 1D R-tree, string dims, height 2", "[rtree][1d][string-dims][2h]") { // Build tree + auto tracker = create_test_memory_tracker(); std::vector is_default(1, false); - Domain dom1 = - create_domain({"d"}, {Datatype::STRING_ASCII}, {nullptr}, {nullptr}); - std::vector mbrs = - create_str_mbrs<1>({"aa", "b", "eee", "g", "gggg", "ii"}); + Domain dom1 = create_domain( + {"d"}, {Datatype::STRING_ASCII}, {nullptr}, {nullptr}, tracker); + auto mbrs = + create_str_mbrs<1>({"aa", "b", "eee", "g", "gggg", "ii"}, tracker); - const Domain d1{dom1}; - RTree rtree(&d1, 3); + RTree rtree(&dom1, 3, tracker); CHECK(rtree.set_leaves(mbrs).ok()); rtree.build_tree(); CHECK(rtree.height() == 2); @@ -913,10 +952,11 @@ TEST_CASE( "RTree: Test 1D R-tree, string dims, height 3", "[rtree][1d][string-dims][3h]") { // Build tree + auto tracker = create_test_memory_tracker(); std::vector is_default(1, false); - Domain dom1 = - create_domain({"d"}, {Datatype::STRING_ASCII}, {nullptr}, {nullptr}); - std::vector mbrs = create_str_mbrs<1>( + Domain dom1 = create_domain( + {"d"}, {Datatype::STRING_ASCII}, {nullptr}, {nullptr}, tracker); + auto mbrs = create_str_mbrs<1>( {"aa", "b", "eee", @@ -928,10 +968,10 @@ TEST_CASE( "mm", "mmn", "oo", - "oop"}); + "oop"}, + tracker); - const Domain d1{dom1}; - RTree rtree(&d1, 3); + RTree rtree(&dom1, 3, tracker); CHECK(rtree.set_leaves(mbrs).ok()); rtree.build_tree(); CHECK(rtree.height() == 3); @@ -1005,28 +1045,19 @@ TEST_CASE( "RTree: Test 2D R-tree, string dims, height 2", "[rtree][2d][string-dims][2h]") { // Build tree + auto tracker = create_test_memory_tracker(); std::vector is_default(2, false); Domain dom = create_domain( {"d1", "d2"}, {Datatype::STRING_ASCII, Datatype::STRING_ASCII}, {nullptr, nullptr}, - {nullptr, nullptr}); - std::vector mbrs = create_str_mbrs<2>( - {"aa", - "b", - "eee", - "g", - "gggg", - "ii", - "jj", - "lll", - "m", - "n", - "oo", - "qqq"}); + {nullptr, nullptr}, + tracker); + auto mbrs = create_str_mbrs<2>( + {"aa", "b", "eee", "g", "gggg", "ii", "jj", "lll", "m", "n", "oo", "qqq"}, + tracker); - const Domain d1{dom}; - RTree rtree(&d1, 3); + RTree rtree(&dom, 3, tracker); CHECK(rtree.set_leaves(mbrs).ok()); rtree.build_tree(); CHECK(rtree.height() == 2); @@ -1123,16 +1154,17 @@ TEST_CASE( std::vector is_default(2, false); int32_t dom_int32[] = {1, 20}; int32_t tile_extent = 5; + auto tracker = create_test_memory_tracker(); Domain dom = create_domain( {"d1", "d2"}, {Datatype::STRING_ASCII, Datatype::INT32}, {nullptr, dom_int32}, - {nullptr, &tile_extent}); - std::vector mbrs = create_str_int32_mbrs( - {"aa", "b", "eee", "g", "gggg", "ii"}, {1, 5, 7, 8, 10, 14}); + {nullptr, &tile_extent}, + tracker); + auto mbrs = create_str_int32_mbrs( + {"aa", "b", "eee", "g", "gggg", "ii"}, {1, 5, 7, 8, 10, 14}, tracker); - const Domain d1{dom}; - RTree rtree(&d1, 3); + RTree rtree(&dom, 3, tracker); CHECK(rtree.set_leaves(mbrs).ok()); rtree.build_tree(); CHECK(rtree.height() == 2); diff --git a/tiledb/sm/serialization/array.cc b/tiledb/sm/serialization/array.cc index 7792c1ca5b6d..72c6a1c142e2 100644 --- a/tiledb/sm/serialization/array.cc +++ b/tiledb/sm/serialization/array.cc @@ -5,7 +5,7 @@ * * The MIT License * - * @copyright Copyright (c) 2023 TileDB, Inc. + * @copyright Copyright (c) 2023-2024 TileDB, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -55,9 +55,7 @@ using namespace tiledb::common; using namespace tiledb::sm::stats; -namespace tiledb { -namespace sm { -namespace serialization { +namespace tiledb::sm::serialization { class ArraySerializationException : public StatusException { public: @@ -102,8 +100,7 @@ Status metadata_from_capnp( auto entry_reader = entries_reader[i]; auto key = std::string{std::string_view{ entry_reader.getKey().cStr(), entry_reader.getKey().size()}}; - Datatype type = Datatype::UINT8; - RETURN_NOT_OK(datatype_enum(entry_reader.getType(), &type)); + Datatype type = datatype_enum(entry_reader.getType()); uint32_t value_num = entry_reader.getValueNum(); auto value_ptr = entry_reader.getValue(); @@ -206,10 +203,8 @@ Status array_to_capnp( // If this is the Cloud server, it should load and serialize metadata // If this is the client, it should have previously received the array // metadata from the Cloud server, so it should just serialize it - Metadata* metadata = nullptr; - // Get metadata. If not loaded, load it first. - RETURN_NOT_OK(array->metadata(&metadata)); - RETURN_NOT_OK(metadata_to_capnp(metadata, &array_metadata_builder)); + auto& metadata = array->metadata(); + RETURN_NOT_OK(metadata_to_capnp(&metadata, &array_metadata_builder)); } } else { if (array->non_empty_domain_computed()) { @@ -232,7 +227,8 @@ Status array_from_capnp( const capnp::Array::Reader& array_reader, StorageManager* storage_manager, Array* array, - const bool client_side) { + const bool client_side, + shared_ptr memory_tracker) { // The serialized URI is set if it exists // this is used for backwards compatibility with pre TileDB 2.5 clients that // want to serialized a query object TileDB >= 2.5 no longer needs to receive @@ -269,11 +265,10 @@ Status array_from_capnp( if (all_schemas_reader.hasEntries()) { auto entries = array_reader.getArraySchemasAll().getEntries(); for (auto array_schema_build : entries) { - auto schema{array_schema_from_capnp( - array_schema_build.getValue(), array->array_uri())}; - schema.set_array_uri(array->array_uri()); - all_schemas[array_schema_build.getKey()] = - make_shared(HERE(), schema); + auto schema = array_schema_from_capnp( + array_schema_build.getValue(), array->array_uri(), memory_tracker); + schema->set_array_uri(array->array_uri()); + all_schemas[array_schema_build.getKey()] = schema; } } array->set_array_schemas_all(std::move(all_schemas)); @@ -282,10 +277,9 @@ Status array_from_capnp( if (array_reader.hasArraySchemaLatest()) { auto array_schema_latest_reader = array_reader.getArraySchemaLatest(); auto array_schema_latest{array_schema_from_capnp( - array_schema_latest_reader, array->array_uri())}; - array_schema_latest.set_array_uri(array->array_uri()); - array->set_array_schema_latest( - make_shared(HERE(), array_schema_latest)); + array_schema_latest_reader, array->array_uri(), memory_tracker)}; + array_schema_latest->set_array_uri(array->array_uri()); + array->set_array_schema_latest(array_schema_latest); } // Deserialize array directory @@ -307,13 +301,10 @@ Status array_from_capnp( auto fragment_metadata_all_reader = array_reader.getFragmentMetadataAll(); fragment_metadata.reserve(fragment_metadata_all_reader.size()); for (auto frag_meta_reader : fragment_metadata_all_reader) { - auto meta = make_shared(HERE()); + auto meta = make_shared( + HERE(), &storage_manager->resources(), array->memory_tracker()); RETURN_NOT_OK(fragment_metadata_from_capnp( - array->array_schema_latest_ptr(), - frag_meta_reader, - meta, - &storage_manager->resources(), - array->memory_tracker())); + array->array_schema_latest_ptr(), frag_meta_reader, meta)); if (client_side) { meta->set_rtree_loaded(); } @@ -546,7 +537,8 @@ Status array_deserialize( Array* array, SerializationType serialize_type, const Buffer& serialized_buffer, - StorageManager* storage_manager) { + StorageManager* storage_manager, + shared_ptr memory_tracker) { try { switch (serialize_type) { case SerializationType::JSON: { @@ -558,7 +550,8 @@ Status array_deserialize( kj::StringPtr(static_cast(serialized_buffer.data())), array_builder); capnp::Array::Reader array_reader = array_builder.asReader(); - RETURN_NOT_OK(array_from_capnp(array_reader, storage_manager, array)); + RETURN_NOT_OK(array_from_capnp( + array_reader, storage_manager, array, true, memory_tracker)); break; } case SerializationType::CAPNP: { @@ -578,7 +571,8 @@ Status array_deserialize( serialized_buffer.size() / sizeof(::capnp::word)), readerOptions); capnp::Array::Reader array_reader = reader.getRoot(); - RETURN_NOT_OK(array_from_capnp(array_reader, storage_manager, array)); + RETURN_NOT_OK(array_from_capnp( + array_reader, storage_manager, array, true, memory_tracker)); break; } default: { @@ -711,7 +705,11 @@ Status array_serialize(Array*, SerializationType, Buffer*, const bool) { } Status array_deserialize( - Array*, SerializationType, const Buffer&, StorageManager*) { + Array*, + SerializationType, + const Buffer&, + StorageManager*, + shared_ptr) { return LOG_STATUS(Status_SerializationError( "Cannot deserialize; serialization not enabled.")); } @@ -738,6 +736,4 @@ Status metadata_deserialize(Metadata*, SerializationType, const Buffer&) { #endif // TILEDB_SERIALIZATION -} // namespace serialization -} // namespace sm -} // namespace tiledb +} // namespace tiledb::sm::serialization diff --git a/tiledb/sm/serialization/array.h b/tiledb/sm/serialization/array.h index bef9d5143de4..e048510c5349 100644 --- a/tiledb/sm/serialization/array.h +++ b/tiledb/sm/serialization/array.h @@ -72,15 +72,18 @@ Status array_to_capnp( * @param storage_manager the storage manager associated with the array * @param array Array to deserialize into * @param client_side Allows to specify different behavior depending on who is + * @param memory_tracker Memory tracker to use for memory allocations. * serializing, the client (1) or the Cloud server (0). This is sometimes needed * since they are both using the same Core library APIs for serialization. + * @param memory_tracker Memory tracker to use on the deserialized object. * @return Status */ Status array_from_capnp( const capnp::Array::Reader& array_reader, StorageManager* storage_manager, Array* array, - const bool client_side = true); + const bool client_side, + shared_ptr memory_tracker); /** * Convert info for opening and array to Cap'n Proto message @@ -135,7 +138,8 @@ Status array_deserialize( Array* array, SerializationType serialize_type, const Buffer& serialized_buffer, - StorageManager* storage_manager); + StorageManager* storage_manager, + shared_ptr memory_tracker); /** * Serialize an open array request via Cap'n Proto diff --git a/tiledb/sm/serialization/array_schema.cc b/tiledb/sm/serialization/array_schema.cc index bc36b4ed970f..5f3bc76d3a7c 100644 --- a/tiledb/sm/serialization/array_schema.cc +++ b/tiledb/sm/serialization/array_schema.cc @@ -37,6 +37,7 @@ #include "tiledb/common/heap_memory.h" #include "tiledb/common/logger.h" +#include "tiledb/common/memory_tracker.h" #include "tiledb/sm/array/array.h" #include "tiledb/sm/array_schema/attribute.h" #include "tiledb/sm/array_schema/dimension.h" @@ -400,8 +401,7 @@ void attribute_to_capnp( shared_ptr attribute_from_capnp( const capnp::Attribute::Reader& attribute_reader) { // Get datatype - Datatype datatype = Datatype::ANY; - throw_if_not_ok(datatype_enum(attribute_reader.getType(), &datatype)); + Datatype datatype = datatype_enum(attribute_reader.getType()); // Set nullable const bool nullable = attribute_reader.getNullable(); @@ -610,12 +610,12 @@ Range range_from_capnp( /** Deserialize a dimension from a cap'n proto object. */ shared_ptr dimension_from_capnp( - const capnp::Dimension::Reader& dimension_reader) { + const capnp::Dimension::Reader& dimension_reader, + shared_ptr memory_tracker) { Status st; // Deserialize datatype - Datatype dim_type; - st = datatype_enum(dimension_reader.getType().cStr(), &dim_type); + Datatype dim_type = datatype_enum(dimension_reader.getType().cStr()); if (!st.ok()) { throw std::runtime_error( "[Deserialization::dimension_from_capnp] " + @@ -678,7 +678,8 @@ shared_ptr dimension_from_capnp( cell_val_num, domain, *(filters.get()), - tile_extent); + tile_extent, + memory_tracker); } Status domain_to_capnp( @@ -705,7 +706,8 @@ Status domain_to_capnp( /* Deserialize a domain from a cap'n proto object. */ shared_ptr domain_from_capnp( - const capnp::Domain::Reader& domain_reader) { + const capnp::Domain::Reader& domain_reader, + shared_ptr memory_tracker) { Status st; // Deserialize and validate cell order @@ -745,10 +747,11 @@ shared_ptr domain_from_capnp( std::vector> dims; auto dimensions = domain_reader.getDimensions(); for (auto dimension : dimensions) { - dims.emplace_back(dimension_from_capnp(dimension)); + dims.emplace_back(dimension_from_capnp(dimension, memory_tracker)); } - return make_shared(HERE(), cell_order, dims, tile_order); + return make_shared( + HERE(), cell_order, dims, tile_order, memory_tracker); } void dimension_label_to_capnp( @@ -780,16 +783,15 @@ void dimension_label_to_capnp( } shared_ptr dimension_label_from_capnp( - const capnp::DimensionLabel::Reader& dim_label_reader) { + const capnp::DimensionLabel::Reader& dim_label_reader, + shared_ptr memory_tracker) { // Get datatype - Datatype datatype = Datatype::ANY; - throw_if_not_ok(datatype_enum(dim_label_reader.getType(), &datatype)); + Datatype datatype = datatype_enum(dim_label_reader.getType()); shared_ptr schema{nullptr}; if (dim_label_reader.hasSchema()) { auto schema_reader = dim_label_reader.getSchema(); - schema = make_shared( - HERE(), array_schema_from_capnp(schema_reader, URI())); + schema = array_schema_from_capnp(schema_reader, URI(), memory_tracker); } auto is_relative = dim_label_reader.getRelative(); @@ -914,8 +916,10 @@ Status array_schema_to_capnp( } // #TODO Add security validation on incoming URI -ArraySchema array_schema_from_capnp( - const capnp::ArraySchema::Reader& schema_reader, const URI& uri) { +shared_ptr array_schema_from_capnp( + const capnp::ArraySchema::Reader& schema_reader, + const URI& uri, + shared_ptr memory_tracker) { // Deserialize and validate array_type ArrayType array_type = ArrayType::DENSE; Status st = array_type_enum(schema_reader.getArrayType(), &array_type); @@ -996,7 +1000,7 @@ ArraySchema array_schema_from_capnp( // Deserialize domain // Note: Security validation delegated to invoked API auto domain_reader = schema_reader.getDomain(); - auto domain{domain_from_capnp(domain_reader)}; + auto domain{domain_from_capnp(domain_reader, memory_tracker)}; // Set coords filter pipelines // Note: Security validation delegated to invoked API @@ -1069,7 +1073,7 @@ ArraySchema array_schema_from_capnp( try { for (auto dim_label_reader : dim_labels_reader) { dimension_labels.emplace_back( - dimension_label_from_capnp(dim_label_reader)); + dimension_label_from_capnp(dim_label_reader, memory_tracker)); } } catch (const std::exception& e) { std::throw_with_nested(std::runtime_error( @@ -1085,7 +1089,8 @@ ArraySchema array_schema_from_capnp( enumerations.reserve(enmr_readers.size()); try { for (auto&& enmr_reader : enmr_readers) { - enumerations.emplace_back(enumeration_from_capnp(enmr_reader)); + enumerations.emplace_back( + enumeration_from_capnp(enmr_reader, memory_tracker)); } } catch (const std::exception& e) { std::throw_with_nested(std::runtime_error( @@ -1127,7 +1132,8 @@ ArraySchema array_schema_from_capnp( name = schema_reader.getName().cStr(); } - return ArraySchema( + return make_shared( + HERE(), uri_deserialized, version, timestamp_range, @@ -1144,7 +1150,8 @@ ArraySchema array_schema_from_capnp( enmr_path_map, cell_var_offsets_filters, cell_validity_filters, - coords_filters); + coords_filters, + memory_tracker); } Status array_schema_serialize( @@ -1201,8 +1208,10 @@ Status array_schema_serialize( return Status::Ok(); } -ArraySchema array_schema_deserialize( - SerializationType serialize_type, const Buffer& serialized_buffer) { +shared_ptr array_schema_deserialize( + SerializationType serialize_type, + const Buffer& serialized_buffer, + shared_ptr memory_tracker) { capnp::ArraySchema::Reader array_schema_reader; ::capnp::MallocMessageBuilder message_builder; @@ -1216,7 +1225,8 @@ ArraySchema array_schema_deserialize( kj::StringPtr(static_cast(serialized_buffer.data())), array_schema_builder); array_schema_reader = array_schema_builder.asReader(); - return array_schema_from_capnp(array_schema_reader, URI()); + return array_schema_from_capnp( + array_schema_reader, URI(), memory_tracker); } case SerializationType::CAPNP: { const auto mBytes = @@ -1225,7 +1235,8 @@ ArraySchema array_schema_deserialize( reinterpret_cast(mBytes), serialized_buffer.size() / sizeof(::capnp::word))); array_schema_reader = reader.getRoot(); - return array_schema_from_capnp(array_schema_reader, URI()); + return array_schema_from_capnp( + array_schema_reader, URI(), memory_tracker); } default: { throw StatusException(Status_SerializationError( @@ -1963,14 +1974,17 @@ void serialize_load_array_schema_response( } } -ArraySchema load_array_schema_response_from_capnp( - capnp::LoadArraySchemaResponse::Reader& reader) { +shared_ptr load_array_schema_response_from_capnp( + capnp::LoadArraySchemaResponse::Reader& reader, + shared_ptr memory_tracker) { auto schema_reader = reader.getSchema(); - return array_schema_from_capnp(schema_reader, URI()); + return array_schema_from_capnp(schema_reader, URI(), memory_tracker); } -ArraySchema deserialize_load_array_schema_response( - SerializationType serialization_type, const Buffer& data) { +shared_ptr deserialize_load_array_schema_response( + SerializationType serialization_type, + const Buffer& data, + shared_ptr memory_tracker) { try { switch (serialization_type) { case SerializationType::JSON: { @@ -1981,7 +1995,7 @@ ArraySchema deserialize_load_array_schema_response( json.decode( kj::StringPtr(static_cast(data.data())), builder); auto reader = builder.asReader(); - return load_array_schema_response_from_capnp(reader); + return load_array_schema_response_from_capnp(reader, memory_tracker); } case SerializationType::CAPNP: { const auto mBytes = reinterpret_cast(data.data()); @@ -1989,7 +2003,7 @@ ArraySchema deserialize_load_array_schema_response( reinterpret_cast(mBytes), data.size() / sizeof(::capnp::word))); auto reader = array_reader.getRoot(); - return load_array_schema_response_from_capnp(reader); + return load_array_schema_response_from_capnp(reader, memory_tracker); } default: { throw Status_SerializationError( @@ -2016,7 +2030,8 @@ Status array_schema_serialize( "Cannot serialize; serialization not enabled.")); } -ArraySchema array_schema_deserialize(SerializationType, const Buffer&) { +shared_ptr array_schema_deserialize( + SerializationType, const Buffer&, shared_ptr) { throw StatusException(Status_SerializationError( "Cannot serialize; serialization not enabled.")); } @@ -2076,8 +2091,8 @@ void serialize_load_array_schema_response( "Cannot serialize; serialization not enabled."); } -ArraySchema deserialize_load_array_schema_response( - SerializationType, const Buffer&) { +shared_ptr deserialize_load_array_schema_response( + SerializationType, const Buffer&, shared_ptr) { throw Status_SerializationError( "Cannot serialize; serialization not enabled."); } diff --git a/tiledb/sm/serialization/array_schema.h b/tiledb/sm/serialization/array_schema.h index f0c2d7ee6666..14b51d7b6203 100644 --- a/tiledb/sm/serialization/array_schema.h +++ b/tiledb/sm/serialization/array_schema.h @@ -51,6 +51,7 @@ class Array; class Buffer; class ArraySchema; class Dimension; +class MemoryTracker; enum class SerializationType : uint8_t; namespace serialization { @@ -109,10 +110,13 @@ Status array_schema_to_capnp( * * @param schema_reader Cap'n proto object * @param uri A URI object + * @param memory_tracker The memory tracker to use. * @return a new ArraySchema */ -ArraySchema array_schema_from_capnp( - const capnp::ArraySchema::Reader& schema_reader, const URI& uri); +shared_ptr array_schema_from_capnp( + const capnp::ArraySchema::Reader& schema_reader, + const URI& uri, + shared_ptr memory_tracker); /** * Serialize a dimension label to cap'n proto object @@ -130,10 +134,12 @@ void dimension_label_to_capnp( * Deserialize a dimension label from a cap'n proto object * * @param reader Cap'n proto reader object. + * @param memory_tracker The memory tracker to use. * @return A new DimensionLabel. */ shared_ptr dimension_label_from_capnp( - const capnp::DimensionLabel::Reader& reader); + const capnp::DimensionLabel::Reader& reader, + shared_ptr memory_tracker); #endif // TILEDB_SERIALIZATION @@ -152,8 +158,10 @@ Status array_schema_serialize( Buffer* serialized_buffer, const bool client_side); -ArraySchema array_schema_deserialize( - SerializationType serialize_type, const Buffer& serialized_buffer); +shared_ptr array_schema_deserialize( + SerializationType serialize_type, + const Buffer& serialized_buffer, + shared_ptr memory_tracker); Status nonempty_domain_serialize( const Array* array, @@ -204,8 +212,10 @@ void serialize_load_array_schema_response( SerializationType serialization_type, Buffer& data); -ArraySchema deserialize_load_array_schema_response( - SerializationType serialization_type, const Buffer& data); +shared_ptr deserialize_load_array_schema_response( + SerializationType serialization_type, + const Buffer& data, + shared_ptr memory_tracker); } // namespace serialization } // namespace sm diff --git a/tiledb/sm/serialization/array_schema_evolution.cc b/tiledb/sm/serialization/array_schema_evolution.cc index c60b9882a674..36a3c329fec9 100644 --- a/tiledb/sm/serialization/array_schema_evolution.cc +++ b/tiledb/sm/serialization/array_schema_evolution.cc @@ -156,7 +156,8 @@ Status array_schema_evolution_to_capnp( } tdb_unique_ptr array_schema_evolution_from_capnp( - const capnp::ArraySchemaEvolution::Reader& evolution_reader) { + const capnp::ArraySchemaEvolution::Reader& evolution_reader, + shared_ptr memory_tracker) { // Create attributes to add std::unordered_map> attrs_to_add; auto attrs_to_add_reader = evolution_reader.getAttributesToAdd(); @@ -177,7 +178,7 @@ tdb_unique_ptr array_schema_evolution_from_capnp( std::unordered_map> enmrs_to_add; auto enmrs_to_add_reader = evolution_reader.getEnumerationsToAdd(); for (auto enmr_reader : enmrs_to_add_reader) { - auto enmr = enumeration_from_capnp(enmr_reader); + auto enmr = enumeration_from_capnp(enmr_reader, memory_tracker); enmrs_to_add[enmr->name()] = enmr; } @@ -186,7 +187,7 @@ tdb_unique_ptr array_schema_evolution_from_capnp( enmrs_to_extend; auto enmrs_to_extend_reader = evolution_reader.getEnumerationsToExtend(); for (auto enmr_reader : enmrs_to_extend_reader) { - auto enmr = enumeration_from_capnp(enmr_reader); + auto enmr = enumeration_from_capnp(enmr_reader, memory_tracker); enmrs_to_extend[enmr->name()] = enmr; } @@ -213,7 +214,8 @@ tdb_unique_ptr array_schema_evolution_from_capnp( enmrs_to_add, enmrs_to_extend, enmrs_to_drop, - ts_range)); + ts_range, + memory_tracker)); } Status array_schema_evolution_serialize( @@ -275,7 +277,8 @@ Status array_schema_evolution_serialize( Status array_schema_evolution_deserialize( ArraySchemaEvolution** array_schema_evolution, SerializationType serialize_type, - const Buffer& serialized_buffer) { + const Buffer& serialized_buffer, + shared_ptr memory_tracker) { try { tdb_unique_ptr decoded_array_schema_evolution = nullptr; @@ -291,8 +294,8 @@ Status array_schema_evolution_deserialize( array_schema_evolution_builder); capnp::ArraySchemaEvolution::Reader array_schema_evolution_reader = array_schema_evolution_builder.asReader(); - decoded_array_schema_evolution = - array_schema_evolution_from_capnp(array_schema_evolution_reader); + decoded_array_schema_evolution = array_schema_evolution_from_capnp( + array_schema_evolution_reader, memory_tracker); break; } case SerializationType::CAPNP: { @@ -303,8 +306,8 @@ Status array_schema_evolution_deserialize( serialized_buffer.size() / sizeof(::capnp::word))); capnp::ArraySchemaEvolution::Reader array_schema_evolution_reader = reader.getRoot(); - decoded_array_schema_evolution = - array_schema_evolution_from_capnp(array_schema_evolution_reader); + decoded_array_schema_evolution = array_schema_evolution_from_capnp( + array_schema_evolution_reader, memory_tracker); break; } default: { @@ -343,7 +346,10 @@ Status array_schema_evolution_serialize( } Status array_schema_evolution_deserialize( - ArraySchemaEvolution**, SerializationType, const Buffer&) { + ArraySchemaEvolution**, + SerializationType, + const Buffer&, + shared_ptr) { return LOG_STATUS(Status_SerializationError( "Cannot serialize; serialization not enabled.")); } diff --git a/tiledb/sm/serialization/array_schema_evolution.h b/tiledb/sm/serialization/array_schema_evolution.h index c1b1d8d25841..b75d873e5389 100644 --- a/tiledb/sm/serialization/array_schema_evolution.h +++ b/tiledb/sm/serialization/array_schema_evolution.h @@ -47,12 +47,13 @@ class Buffer; class ArraySchema; class ArraySchemaEvolution; class Dimension; +class MemoryTracker; enum class SerializationType : uint8_t; namespace serialization { /** - * Serialize an array schema evolution via Cap'n Prto + * Serialize an array schema evolution via Cap'n Proto * @param array_schema_evolution evolution object to serialize * @param serialize_type format to serialize into Cap'n Proto or JSON * @param serialized_buffer buffer to store serialized bytes in @@ -66,10 +67,19 @@ Status array_schema_evolution_serialize( Buffer* serialized_buffer, const bool client_side); +/** + * Deserialize an array schema evolution via Cap'n Proto + * @param array_schema_evolution pointer to store evolution object in + * @param serialize_type format to serialize into Cap'n Proto or JSON + * @param serialized_buffer buffer where serialized bytes are stored + * @param memory_tracker memory tracker associated with the evolution object + * @return + */ Status array_schema_evolution_deserialize( ArraySchemaEvolution** array_schema_evolution, SerializationType serialize_type, - const Buffer& serialized_buffer); + const Buffer& serialized_buffer, + shared_ptr memory_tracker); } // namespace serialization } // namespace sm diff --git a/tiledb/sm/serialization/enumeration.cc b/tiledb/sm/serialization/enumeration.cc index 16eb87916636..197a2d8834d5 100644 --- a/tiledb/sm/serialization/enumeration.cc +++ b/tiledb/sm/serialization/enumeration.cc @@ -71,11 +71,11 @@ void enumeration_to_capnp( } shared_ptr enumeration_from_capnp( - const capnp::Enumeration::Reader& reader) { + const capnp::Enumeration::Reader& reader, + shared_ptr memory_tracker) { auto name = reader.getName(); auto path_name = reader.getPathName(); - Datatype datatype = Datatype::ANY; - throw_if_not_ok(datatype_enum(reader.getType(), &datatype)); + Datatype datatype = datatype_enum(reader.getType()); const void* data = nullptr; uint64_t data_size = 0; @@ -104,7 +104,8 @@ shared_ptr enumeration_from_capnp( data, data_size, offsets, - offsets_size); + offsets_size, + memory_tracker); } void load_enumerations_request_to_capnp( @@ -150,12 +151,13 @@ void load_enumerations_response_to_capnp( std::vector> load_enumerations_response_from_capnp( - const capnp::LoadEnumerationsResponse::Reader& reader) { + const capnp::LoadEnumerationsResponse::Reader& reader, + shared_ptr memory_tracker) { std::vector> ret; if (reader.hasEnumerations()) { auto enmr_readers = reader.getEnumerations(); for (auto enmr_reader : enmr_readers) { - ret.push_back(enumeration_from_capnp(enmr_reader)); + ret.push_back(enumeration_from_capnp(enmr_reader, memory_tracker)); } } return ret; @@ -306,7 +308,9 @@ void serialize_load_enumerations_response( std::vector> deserialize_load_enumerations_response( - SerializationType serialize_type, const Buffer& response) { + SerializationType serialize_type, + const Buffer& response, + shared_ptr memory_tracker) { try { switch (serialize_type) { case SerializationType::JSON: { @@ -317,7 +321,7 @@ deserialize_load_enumerations_response( json.decode( kj::StringPtr(static_cast(response.data())), builder); capnp::LoadEnumerationsResponse::Reader reader = builder.asReader(); - return load_enumerations_response_from_capnp(reader); + return load_enumerations_response_from_capnp(reader, memory_tracker); } case SerializationType::CAPNP: { const auto mBytes = reinterpret_cast(response.data()); @@ -326,7 +330,7 @@ deserialize_load_enumerations_response( response.size() / sizeof(::capnp::word))); capnp::LoadEnumerationsResponse::Reader reader = array_reader.getRoot(); - return load_enumerations_response_from_capnp(reader); + return load_enumerations_response_from_capnp(reader, memory_tracker); } default: { throw Status_SerializationError( @@ -371,7 +375,8 @@ void serialize_load_enumerations_response( } std::vector> -deserialize_load_enumerations_response(SerializationType, const Buffer&) { +deserialize_load_enumerations_response( + SerializationType, const Buffer&, shared_ptr) { throw Status_SerializationError( "Cannot serialize; serialization not enabled."); } diff --git a/tiledb/sm/serialization/enumeration.h b/tiledb/sm/serialization/enumeration.h index 85e5faaaf7a3..c92129f973cc 100644 --- a/tiledb/sm/serialization/enumeration.h +++ b/tiledb/sm/serialization/enumeration.h @@ -64,10 +64,13 @@ void enumeration_to_capnp( * Deserialize an enumeration from a cap'n proto object * * @param reader Cap'n proto reader object + * @param memory_tracker The memory tracker associated with the Enumeration + * object. * @return A new Enumeration */ shared_ptr enumeration_from_capnp( - const capnp::Enumeration::Reader& reader); + const capnp::Enumeration::Reader& reader, + shared_ptr memory_tracker); #endif @@ -87,7 +90,9 @@ void serialize_load_enumerations_response( std::vector> deserialize_load_enumerations_response( - SerializationType serialization_type, const Buffer& response); + SerializationType serialization_type, + const Buffer& response, + shared_ptr memory_tracker); } // namespace serialization } // namespace tiledb::sm diff --git a/tiledb/sm/serialization/fragment_info.cc b/tiledb/sm/serialization/fragment_info.cc index 4d2badb9e8c9..3c18e57d139b 100644 --- a/tiledb/sm/serialization/fragment_info.cc +++ b/tiledb/sm/serialization/fragment_info.cc @@ -199,8 +199,7 @@ Status fragment_info_request_deserialize( std::tuple> single_fragment_info_from_capnp( const capnp::SingleFragmentInfo::Reader& single_frag_info_reader, - const std::unordered_map>& - array_schemas) { + FragmentInfo* fragment_info) { // Get array schema name std::string schema_name; if (single_frag_info_reader.hasArraySchemaName()) { @@ -213,8 +212,8 @@ single_fragment_info_from_capnp( } // Use the array schema name to find the corresponding array schema - auto schema = array_schemas.find(schema_name); - if (schema == array_schemas.end()) { + auto schema = fragment_info->array_schemas_all().find(schema_name); + if (schema == fragment_info->array_schemas_all().end()) { return { Status_SerializationError( "Could not find schema" + schema_name + @@ -226,7 +225,10 @@ single_fragment_info_from_capnp( shared_ptr meta; if (single_frag_info_reader.hasMeta()) { auto frag_meta_reader = single_frag_info_reader.getMeta(); - meta = make_shared(HERE()); + + auto memory_tracker = fragment_info->resources()->create_memory_tracker(); + meta = make_shared( + HERE(), fragment_info->resources(), memory_tracker); auto st = fragment_metadata_from_capnp(schema->second, frag_meta_reader, meta); } else { @@ -274,16 +276,16 @@ Status single_fragment_info_to_capnp( Status fragment_info_from_capnp( const capnp::FragmentInfo::Reader& fragment_info_reader, const URI& array_uri, - FragmentInfo* fragment_info) { + FragmentInfo* fragment_info, + shared_ptr memory_tracker) { // Get array_schema_latest from capnp if (fragment_info_reader.hasArraySchemaLatest()) { auto array_schema_latest_reader = fragment_info_reader.getArraySchemaLatest(); - auto array_schema_latest{ - array_schema_from_capnp(array_schema_latest_reader, array_uri)}; - array_schema_latest.set_array_uri(array_uri); - fragment_info->array_schema_latest() = - make_shared(HERE(), array_schema_latest); + auto array_schema_latest{array_schema_from_capnp( + array_schema_latest_reader, array_uri, memory_tracker)}; + array_schema_latest->set_array_uri(array_uri); + fragment_info->array_schema_latest() = array_schema_latest; } // Get array_schemas_all from capnp @@ -293,14 +295,13 @@ Status fragment_info_from_capnp( if (all_schemas_reader.hasEntries()) { auto entries = fragment_info_reader.getArraySchemasAll().getEntries(); for (auto array_schema_build : entries) { - auto schema{ - array_schema_from_capnp(array_schema_build.getValue(), array_uri)}; - schema.set_array_uri(array_uri); + auto schema{array_schema_from_capnp( + array_schema_build.getValue(), array_uri, memory_tracker)}; + schema->set_array_uri(array_uri); auto key = std::string_view{ array_schema_build.getKey().cStr(), array_schema_build.getKey().size()}; - fragment_info->array_schemas_all()[std::string{key}] = - make_shared(HERE(), schema); + fragment_info->array_schemas_all()[std::string{key}] = schema; } } } @@ -312,7 +313,7 @@ Status fragment_info_from_capnp( fragment_info_list_reader.size()); for (auto single_frag_info_reader : fragment_info_list_reader) { auto&& [st, single_frag_info] = single_fragment_info_from_capnp( - single_frag_info_reader, fragment_info->array_schemas_all()); + single_frag_info_reader, fragment_info); RETURN_NOT_OK(st); fragment_info->single_fragment_info_vec().emplace_back( single_frag_info.value()); @@ -451,7 +452,8 @@ Status fragment_info_deserialize( FragmentInfo* fragment_info, SerializationType serialize_type, const URI& uri, - const Buffer& serialized_buffer) { + const Buffer& serialized_buffer, + shared_ptr memory_tracker) { if (fragment_info == nullptr) return LOG_STATUS( Status_SerializationError("Error deserializing fragment info; null " @@ -469,7 +471,8 @@ Status fragment_info_deserialize( auto reader = builder.asReader(); // Deserialize - RETURN_NOT_OK(fragment_info_from_capnp(reader, uri, fragment_info)); + RETURN_NOT_OK(fragment_info_from_capnp( + reader, uri, fragment_info, memory_tracker)); break; } case SerializationType::CAPNP: { @@ -491,7 +494,8 @@ Status fragment_info_deserialize( auto reader = msg_reader.getRoot(); // Deserialize - RETURN_NOT_OK(fragment_info_from_capnp(reader, uri, fragment_info)); + RETURN_NOT_OK(fragment_info_from_capnp( + reader, uri, fragment_info, memory_tracker)); break; } default: { @@ -522,7 +526,11 @@ Status fragment_info_serialize( } Status fragment_info_deserialize( - FragmentInfo*, SerializationType, const URI&, const Buffer&) { + FragmentInfo*, + SerializationType, + const URI&, + const Buffer&, + shared_ptr) { return LOG_STATUS(Status_SerializationError( "Cannot deserialize; serialization not enabled.")); } diff --git a/tiledb/sm/serialization/fragment_info.h b/tiledb/sm/serialization/fragment_info.h index bd69c37c60f7..968a13d38a81 100644 --- a/tiledb/sm/serialization/fragment_info.h +++ b/tiledb/sm/serialization/fragment_info.h @@ -59,12 +59,14 @@ namespace serialization { * @param fragment_info_reader cap'n proto class. * @param uri array uri that the fragment belongs to * @param fragment_info fragment info object to deserialize into. + * @param memory_tracker The memory tracker to use. * @return Status */ Status fragment_info_from_capnp( const capnp::FragmentInfo::Reader& fragment_info_reader, const URI& uri, - FragmentInfo* fragment_info); + FragmentInfo* fragment_info, + shared_ptr memory_tracker); /** * Convert Fragment Info to Cap'n Proto message. @@ -124,13 +126,15 @@ Status fragment_info_serialize( * @param serialize_type format the data is serialized in: Cap'n Proto of JSON. * @param uri array uri that the fragment belongs to * @param serialized_buffer buffer to read serialized bytes from. + * @param memory_tracker The memory tracker to use. * @return Status */ Status fragment_info_deserialize( FragmentInfo* fragment_info, SerializationType serialize_type, const URI& uri, - const Buffer& serialized_buffer); + const Buffer& serialized_buffer, + shared_ptr memory_tracker); /** * Serialize a fragment info request via Cap'n Proto. diff --git a/tiledb/sm/serialization/fragment_metadata.cc b/tiledb/sm/serialization/fragment_metadata.cc index 74482c0b4c15..999c860db8fb 100644 --- a/tiledb/sm/serialization/fragment_metadata.cc +++ b/tiledb/sm/serialization/fragment_metadata.cc @@ -119,17 +119,7 @@ void generic_tile_offsets_from_capnp( Status fragment_metadata_from_capnp( const shared_ptr& array_schema, const capnp::FragmentMetadata::Reader& frag_meta_reader, - shared_ptr frag_meta, - ContextResources* resources, - MemoryTracker* memory_tracker) { - // TODO: consider a new constructor for fragment meta or using the - // existing one - if (resources) { - frag_meta->set_context_resources(resources); - } - if (memory_tracker) { - frag_meta->set_memory_tracker(memory_tracker); - } + shared_ptr frag_meta) { if (frag_meta_reader.hasFileSizes()) { auto filesizes_reader = frag_meta_reader.getFileSizes(); frag_meta->file_sizes().reserve(filesizes_reader.size()); @@ -368,7 +358,7 @@ Status fragment_metadata_from_capnp( auto data = frag_meta_reader.getRtree(); auto& domain = array_schema->domain(); // If there are no levels, we still need domain_ properly initialized - frag_meta->rtree() = RTree(&domain, constants::rtree_fanout); + frag_meta->rtree().reset(&domain, constants::rtree_fanout); Deserializer deserializer(data.begin(), data.size()); // What we actually deserialize is not something written on disk in a // possibly historical format, but what has been serialized in diff --git a/tiledb/sm/serialization/fragment_metadata.h b/tiledb/sm/serialization/fragment_metadata.h index 270f50a2a9c4..281fc875f04e 100644 --- a/tiledb/sm/serialization/fragment_metadata.h +++ b/tiledb/sm/serialization/fragment_metadata.h @@ -64,9 +64,7 @@ namespace serialization { Status fragment_metadata_from_capnp( const shared_ptr& array_schema, const capnp::FragmentMetadata::Reader& frag_meta_reader, - shared_ptr frag_meta, - ContextResources* resources = nullptr, - MemoryTracker* memory_tracker = nullptr); + shared_ptr frag_meta); /** * Serialize Fragment Metadata sizes and offsets diff --git a/tiledb/sm/serialization/group.cc b/tiledb/sm/serialization/group.cc index 3b5d59a27adb..de8611677273 100644 --- a/tiledb/sm/serialization/group.cc +++ b/tiledb/sm/serialization/group.cc @@ -5,7 +5,7 @@ * * The MIT License * - * @copyright Copyright (c) 2023 TileDB, Inc. + * @copyright Copyright (c) 2023-2024 TileDB, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -58,9 +58,7 @@ using namespace tiledb::common; -namespace tiledb { -namespace sm { -namespace serialization { +namespace tiledb::sm::serialization { #ifdef TILEDB_SERIALIZATION @@ -72,13 +70,7 @@ Status group_metadata_to_capnp( auto config_builder = group_metadata_builder->initConfig(); RETURN_NOT_OK(config_to_capnp(group->config(), &config_builder)); - Metadata* metadata; - if (load) { - RETURN_NOT_OK(group->metadata(&metadata)); - } else { - metadata = const_cast(group->metadata()); - } - + auto metadata = group->metadata(); if (metadata->num()) { auto metadata_builder = group_metadata_builder->initMetadata(); RETURN_NOT_OK(metadata_to_capnp(metadata, &metadata_builder)); @@ -165,13 +157,7 @@ Status group_details_to_capnp( } } - Metadata* metadata; - if (group->group_uri().is_tiledb()) { - metadata = const_cast(group->metadata()); - } else { - RETURN_NOT_OK(group->metadata(&metadata)); - } - + auto metadata = group->metadata(); if (metadata->num()) { auto group_metadata_builder = group_details_builder->initMetadata(); RETURN_NOT_OK(metadata_to_capnp(metadata, &group_metadata_builder)); @@ -831,6 +817,4 @@ Status group_metadata_serialize(Group*, SerializationType, Buffer*, bool) { #endif // TILEDB_SERIALIZATION -} // namespace serialization -} // namespace sm -} // namespace tiledb +} // namespace tiledb::sm::serialization diff --git a/tiledb/sm/serialization/query.cc b/tiledb/sm/serialization/query.cc index 75162c69347c..7d98392eddaf 100644 --- a/tiledb/sm/serialization/query.cc +++ b/tiledb/sm/serialization/query.cc @@ -88,7 +88,7 @@ namespace serialization { shared_ptr dummy_logger = make_shared(HERE(), ""); -Status stats_to_capnp(Stats& stats, capnp::Stats::Builder* stats_builder) { +void stats_to_capnp(const Stats& stats, capnp::Stats::Builder* stats_builder) { // Build counters const auto counters = stats.counters(); if (counters != nullptr && !counters->empty()) { @@ -114,31 +114,29 @@ Status stats_to_capnp(Stats& stats, capnp::Stats::Builder* stats_builder) { ++index; } } - - return Status::Ok(); } -Status stats_from_capnp( - const capnp::Stats::Reader& stats_reader, Stats* stats) { +StatsData stats_from_capnp(const capnp::Stats::Reader& stats_reader) { + std::unordered_map counters; + std::unordered_map timers; + if (stats_reader.hasCounters()) { - auto counters = stats->counters(); auto counters_reader = stats_reader.getCounters(); for (const auto entry : counters_reader.getEntries()) { auto key = std::string_view{entry.getKey().cStr(), entry.getKey().size()}; - (*counters)[std::string{key}] = entry.getValue(); + counters[std::string(key)] = entry.getValue(); } } if (stats_reader.hasTimers()) { - auto timers = stats->timers(); auto timers_reader = stats_reader.getTimers(); for (const auto entry : timers_reader.getEntries()) { auto key = std::string_view{entry.getKey().cStr(), entry.getKey().size()}; - (*timers)[std::string{key}] = entry.getValue(); + timers[std::string(key)] = entry.getValue(); } } - return Status::Ok(); + return stats::StatsData(counters, timers); } void range_buffers_to_capnp( @@ -246,11 +244,9 @@ Status subarray_to_capnp( } // If stats object exists set its cap'n proto object - stats::Stats* stats = subarray->stats(); - if (stats != nullptr) { - auto stats_builder = builder->initStats(); - RETURN_NOT_OK(stats_to_capnp(*stats, &stats_builder)); - } + const auto& stats = subarray->stats(); + auto stats_builder = builder->initStats(); + stats_to_capnp(stats, &stats_builder); if (subarray->relevant_fragments().relevant_fragments_size() > 0) { auto relevant_fragments_builder = builder->initRelevantFragments( @@ -272,8 +268,6 @@ Status subarray_from_capnp( uint32_t dim_num = ranges_reader.size(); for (uint32_t i = 0; i < dim_num; i++) { auto range_reader = ranges_reader[i]; - Datatype type = Datatype::UINT8; - RETURN_NOT_OK(datatype_enum(range_reader.getType(), &type)); auto data = range_reader.getBuffer(); auto data_ptr = data.asBytes(); @@ -328,11 +322,8 @@ Status subarray_from_capnp( // If cap'n proto object has stats set it on c++ object if (reader.hasStats()) { - stats::Stats* stats = subarray->stats(); - // We should always have a stats here - if (stats != nullptr) { - RETURN_NOT_OK(stats_from_capnp(reader.getStats(), stats)); - } + auto stats_data = stats_from_capnp(reader.getStats()); + subarray->set_stats(stats_data); } if (reader.hasRelevantFragments()) { @@ -428,11 +419,9 @@ Status subarray_partitioner_to_capnp( builder->setMemoryBudgetValidity(mem_budget_validity); // If stats object exists set its cap'n proto object - stats::Stats* stats = partitioner.stats(); - if (stats != nullptr) { - auto stats_builder = builder->initStats(); - RETURN_NOT_OK(stats_to_capnp(*stats, &stats_builder)); - } + const auto& stats = partitioner.stats(); + auto stats_builder = builder->initStats(); + stats_to_capnp(stats, &stats_builder); return Status::Ok(); } @@ -566,11 +555,8 @@ Status subarray_partitioner_from_capnp( // If cap'n proto object has stats set it on c++ object if (reader.hasStats()) { - auto stats = partitioner->stats(); - // We should always have stats - if (stats != nullptr) { - RETURN_NOT_OK(stats_from_capnp(reader.getStats(), stats)); - } + auto stats_data = stats_from_capnp(reader.getStats()); + partitioner->set_stats(stats_data); } return Status::Ok(); @@ -913,11 +899,9 @@ Status reader_to_capnp( } // If stats object exists set its cap'n proto object - stats::Stats* stats = reader.stats(); - if (stats != nullptr) { - auto stats_builder = reader_builder->initStats(); - RETURN_NOT_OK(stats_to_capnp(*stats, &stats_builder)); - } + const auto& stats = *reader.stats(); + auto stats_builder = reader_builder->initStats(); + stats_to_capnp(stats, &stats_builder); return Status::Ok(); } @@ -947,11 +931,9 @@ Status index_reader_to_capnp( } // If stats object exists set its cap'n proto object - stats::Stats* stats = reader.stats(); - if (stats != nullptr) { - auto stats_builder = reader_builder->initStats(); - RETURN_NOT_OK(stats_to_capnp(*stats, &stats_builder)); - } + const auto& stats = *reader.stats(); + auto stats_builder = reader_builder->initStats(); + stats_to_capnp(stats, &stats_builder); return Status::Ok(); } @@ -982,11 +964,9 @@ Status dense_reader_to_capnp( } // If stats object exists set its cap'n proto object - stats::Stats* stats = reader.stats(); - if (stats != nullptr) { - auto stats_builder = reader_builder->initStats(); - RETURN_NOT_OK(stats_to_capnp(*stats, &stats_builder)); - } + const auto& stats = *reader.stats(); + auto stats_builder = reader_builder->initStats(); + stats_to_capnp(stats, &stats_builder); return Status::Ok(); } @@ -1146,11 +1126,8 @@ Status reader_from_capnp( // If cap'n proto object has stats set it on c++ object if (reader_reader.hasStats()) { - stats::Stats* stats = reader->stats(); - // We should always have a stats here - if (stats != nullptr) { - RETURN_NOT_OK(stats_from_capnp(reader_reader.getStats(), stats)); - } + auto stats_data = stats_from_capnp(reader_reader.getStats()); + reader->set_stats(stats_data); } return Status::Ok(); @@ -1187,11 +1164,8 @@ Status index_reader_from_capnp( // If cap'n proto object has stats set it on c++ object if (reader_reader.hasStats()) { - stats::Stats* stats = reader->stats(); - // We should always have a stats here - if (stats != nullptr) { - RETURN_NOT_OK(stats_from_capnp(reader_reader.getStats(), stats)); - } + auto stats_data = stats_from_capnp(reader_reader.getStats()); + reader->set_stats(stats_data); } return Status::Ok(); @@ -1229,11 +1203,8 @@ Status dense_reader_from_capnp( // If cap'n proto object has stats set it on c++ object if (reader_reader.hasStats()) { - stats::Stats* stats = reader->stats(); - // We should always have a stats here - if (stats != nullptr) { - RETURN_NOT_OK(stats_from_capnp(reader_reader.getStats(), stats)); - } + auto stats_data = stats_from_capnp(reader_reader.getStats()); + reader->set_stats(stats_data); } return Status::Ok(); @@ -1252,11 +1223,8 @@ Status delete_from_capnp( // If cap'n proto object has stats set it on c++ object if (delete_reader.hasStats()) { - stats::Stats* stats = delete_strategy->stats(); - // We should always have a stats here - if (stats != nullptr) { - RETURN_NOT_OK(stats_from_capnp(delete_reader.getStats(), stats)); - } + auto stats_data = stats_from_capnp(delete_reader.getStats()); + delete_strategy->set_stats(stats_data); } return Status::Ok(); @@ -1273,11 +1241,9 @@ Status delete_to_capnp( } // If stats object exists set its cap'n proto object - stats::Stats* stats = delete_strategy.stats(); - if (stats != nullptr) { - auto stats_builder = delete_builder->initStats(); - RETURN_NOT_OK(stats_to_capnp(*stats, &stats_builder)); - } + const auto& stats = *delete_strategy.stats(); + auto stats_builder = delete_builder->initStats(); + stats_to_capnp(stats, &stats_builder); return Status::Ok(); } @@ -1301,11 +1267,9 @@ Status writer_to_capnp( } // If stats object exists set its cap'n proto object - stats::Stats* stats = writer.stats(); - if (stats != nullptr) { - auto stats_builder = writer_builder->initStats(); - RETURN_NOT_OK(stats_to_capnp(*stats, &stats_builder)); - } + const auto& stats = *writer.stats(); + auto stats_builder = writer_builder->initStats(); + stats_to_capnp(stats, &stats_builder); if (query.layout() == Layout::GLOBAL_ORDER) { auto& global_writer = dynamic_cast(writer); @@ -1341,11 +1305,8 @@ Status writer_from_capnp( // If cap'n proto object has stats set it on c++ object if (writer_reader.hasStats()) { - stats::Stats* stats = writer->stats(); - // We should always have a stats here - if (stats != nullptr) { - RETURN_NOT_OK(stats_from_capnp(writer_reader.getStats(), stats)); - } + auto stats_data = stats_from_capnp(writer_reader.getStats()); + writer->set_stats(stats_data); } if (query.layout() == Layout::GLOBAL_ORDER && @@ -1557,10 +1518,10 @@ Status query_to_capnp( RETURN_NOT_OK(config_to_capnp(query.config(), &config_builder)); // If stats object exists set its cap'n proto object - stats::Stats* stats = query.stats(); - if (stats != nullptr) { + auto stats = query.stats(); + if (stats) { auto stats_builder = query_builder->initStats(); - RETURN_NOT_OK(stats_to_capnp(*stats, &stats_builder)); + stats_to_capnp(*stats, &stats_builder); } auto& written_fragment_info = query.get_written_fragment_info(); @@ -1589,10 +1550,10 @@ Status query_to_capnp( // The server should throw if it's about to serialize an incomplete query // that has aggregates on it, this behavior is currently not supported. if (!client_side && query.status() == QueryStatus::INCOMPLETE && - !query.has_aggregates()) { - throw Status_SerializationError( + query.has_aggregates()) { + throw StatusException(Status_SerializationError( "Aggregates are not currently supported in incomplete remote " - "queries"); + "queries")); } query_channels_to_capnp(query, query_builder); @@ -2270,11 +2231,8 @@ Status query_from_capnp( // If cap'n proto object has stats set it on c++ object if (query_reader.hasStats()) { - stats::Stats* stats = query->stats(); - // We should always have a stats here - if (stats != nullptr) { - RETURN_NOT_OK(stats_from_capnp(query_reader.getStats(), stats)); - } + auto stats_data = stats_from_capnp(query_reader.getStats()); + query->set_stats(stats_data); } if (query_reader.hasWrittenFragmentInfo()) { @@ -2324,7 +2282,8 @@ Status array_from_query_deserialize( const Buffer& serialized_buffer, SerializationType serialize_type, Array& array, - StorageManager* storage_manager) { + StorageManager* storage_manager, + shared_ptr memory_tracker) { try { switch (serialize_type) { case SerializationType::JSON: { @@ -2339,7 +2298,11 @@ Status array_from_query_deserialize( capnp::Query::Reader query_reader = query_builder.asReader(); // Deserialize array instance. RETURN_NOT_OK(array_from_capnp( - query_reader.getArray(), storage_manager, &array, false)); + query_reader.getArray(), + storage_manager, + &array, + false, + memory_tracker)); break; } case SerializationType::CAPNP: { @@ -2367,7 +2330,11 @@ Status array_from_query_deserialize( capnp::Query::Reader query_reader = reader.getRoot(); // Deserialize array instance. RETURN_NOT_OK(array_from_capnp( - query_reader.getArray(), storage_manager, &array, false)); + query_reader.getArray(), + storage_manager, + &array, + false, + memory_tracker)); break; } default: @@ -3180,11 +3147,9 @@ void ordered_dim_label_reader_to_capnp( query.dimension_label_increasing_order()); // If stats object exists set its cap'n proto object - stats::Stats* stats = reader.stats(); - if (stats != nullptr) { - auto stats_builder = reader_builder->initStats(); - throw_if_not_ok(stats_to_capnp(*stats, &stats_builder)); - } + const auto& stats = *reader.stats(); + auto stats_builder = reader_builder->initStats(); + stats_to_capnp(stats, &stats_builder); } void ordered_dim_label_reader_from_capnp( @@ -3212,11 +3177,8 @@ void ordered_dim_label_reader_from_capnp( // If cap'n proto object has stats set it on c++ object if (reader_reader.hasStats()) { - stats::Stats* stats = reader->stats(); - // We should always have a stats here - if (stats != nullptr) { - throw_if_not_ok(stats_from_capnp(reader_reader.getStats(), stats)); - } + auto stats_data = stats_from_capnp(reader_reader.getStats()); + reader->set_stats(stats_data); } } @@ -3234,7 +3196,11 @@ Status query_deserialize( } Status array_from_query_deserialize( - const Buffer&, SerializationType, Array&, StorageManager*) { + const Buffer&, + SerializationType, + Array&, + StorageManager*, + shared_ptr) { return LOG_STATUS(Status_SerializationError( "Cannot deserialize; serialization not enabled.")); } diff --git a/tiledb/sm/serialization/query.h b/tiledb/sm/serialization/query.h index 1a711db6b7de..a5ad85c03482 100644 --- a/tiledb/sm/serialization/query.h +++ b/tiledb/sm/serialization/query.h @@ -137,12 +137,14 @@ using CopyState = * @param serialized_buffer Buffer containing serialized query * @param serialize_type Serialization type of serialized query * @param array Array object to deserialize into + * @param memory_tracker Memory tracker to use for allocations. */ Status array_from_query_deserialize( const Buffer& serialized_buffer, SerializationType serialize_type, Array& array, - StorageManager* storage_manager); + StorageManager* storage_manager, + shared_ptr memory_tracker); /** * Serialize a query diff --git a/tiledb/sm/serialization/query_aggregates.cc b/tiledb/sm/serialization/query_aggregates.cc index d3785e1ebd03..63ecf23d297b 100644 --- a/tiledb/sm/serialization/query_aggregates.cc +++ b/tiledb/sm/serialization/query_aggregates.cc @@ -87,7 +87,7 @@ void query_channels_from_capnp( auto channel_reader = channels_reader[i]; if (channel_reader.hasAggregates()) { - QueryChannel::ChannelAggregates aggregates; + LegacyQueryAggregatesOverDefault::ChannelAggregates aggregates; auto aggregates_reader = channel_reader.getAggregates(); for (const auto& aggregate : aggregates_reader) { @@ -119,8 +119,8 @@ void query_channels_from_capnp( } } if (!aggregates.empty()) { - query->add_channel( - QueryChannel{channel_reader.getDefault(), aggregates}); + query->add_channel(LegacyQueryAggregatesOverDefault{ + channel_reader.getDefault(), aggregates}); } } } diff --git a/tiledb/sm/serialization/test/CMakeLists.txt b/tiledb/sm/serialization/test/CMakeLists.txt index abd02d04503c..832eaae79a9f 100644 --- a/tiledb/sm/serialization/test/CMakeLists.txt +++ b/tiledb/sm/serialization/test/CMakeLists.txt @@ -36,15 +36,16 @@ conclude(unit_test) commence(unit_test capnp_array_schema) this_target_sources(main.cc unit_capnp_array_schema.cc) - this_target_link_libraries(TILEDB_CORE_OBJECTS TILEDB_CORE_OBJECTS_ILIB) + this_target_link_libraries(TILEDB_CORE_OBJECTS TILEDB_CORE_OBJECTS_ILIB tiledb_test_support_lib) # Enable serialization target_compile_definitions(unit_capnp_array_schema PRIVATE -DTILEDB_SERIALIZATION) + this_target_link_libraries(tiledb_test_support_lib) conclude(unit_test) commence(unit_test capnp_nonempty_domain) this_target_sources(main.cc unit_capnp_nonempty_domain.cc) - this_target_link_libraries(TILEDB_CORE_OBJECTS TILEDB_CORE_OBJECTS_ILIB) + this_target_link_libraries(tiledb_test_support_lib) # Enable serialization target_compile_definitions(unit_capnp_nonempty_domain PRIVATE -DTILEDB_SERIALIZATION) diff --git a/tiledb/sm/serialization/test/unit_capnp_array_schema.cc b/tiledb/sm/serialization/test/unit_capnp_array_schema.cc index f532d950a624..0bd703210f48 100644 --- a/tiledb/sm/serialization/test/unit_capnp_array_schema.cc +++ b/tiledb/sm/serialization/test/unit_capnp_array_schema.cc @@ -33,7 +33,7 @@ #include #include - +#include "test/support/src/mem_helpers.h" #include "tiledb/sm/array_schema/array_schema.h" #include "tiledb/sm/array_schema/attribute.h" #include "tiledb/sm/array_schema/dimension.h" @@ -101,14 +101,22 @@ TEST_CASE( SECTION("Internal dimension label") { // Create dimension label array schema. Status st; - auto schema = make_shared(HERE(), ArrayType::DENSE); - std::vector> dims{ - make_shared(HERE(), "index", Datatype::UINT32)}; + auto schema = make_shared( + HERE(), ArrayType::DENSE, tiledb::test::create_test_memory_tracker()); + std::vector> dims{make_shared( + HERE(), + "index", + Datatype::UINT32, + tiledb::test::get_test_memory_tracker())}; uint32_t domain1[2]{1, 64}; st = dims[0]->set_domain(&domain1[0]); REQUIRE(st.ok()); st = schema->set_domain(make_shared( - HERE(), Layout::ROW_MAJOR, dims, Layout::ROW_MAJOR)); + HERE(), + Layout::ROW_MAJOR, + dims, + Layout::ROW_MAJOR, + tiledb::test::get_test_memory_tracker())); REQUIRE(st.ok()); st = schema->add_attribute( make_shared(HERE(), "label", Datatype::FLOAT64)); @@ -153,8 +161,8 @@ TEST_CASE( message.initRoot(); tiledb::sm::serialization::dimension_label_to_capnp( *dim_label.get(), &builder, true); - auto dim_label_clone = - tiledb::sm::serialization::dimension_label_from_capnp(builder); + auto dim_label_clone = tiledb::sm::serialization::dimension_label_from_capnp( + builder, tiledb::test::get_test_memory_tracker()); // Check dimension label properties and components. CHECK(dim_label->has_schema() == dim_label_clone->has_schema()); diff --git a/tiledb/sm/serialization/test/unit_capnp_nonempty_domain.cc b/tiledb/sm/serialization/test/unit_capnp_nonempty_domain.cc index 6c61dc7c08ae..727f09812378 100644 --- a/tiledb/sm/serialization/test/unit_capnp_nonempty_domain.cc +++ b/tiledb/sm/serialization/test/unit_capnp_nonempty_domain.cc @@ -34,6 +34,7 @@ #include +#include "test/support/src/mem_helpers.h" #include "tiledb/sm/array_schema/dimension.h" #include "tiledb/sm/serialization/array_schema.h" @@ -44,7 +45,11 @@ TEST_CASE( "Check serialization correctly marks nonempty domain as " "var/fixed size", "[nonemptydomain][serialization]") { - auto dim = make_shared(HERE(), "index", Datatype::UINT32); + auto dim = make_shared( + HERE(), + "index", + Datatype::UINT32, + tiledb::test::get_test_memory_tracker()); uint32_t domain1[2]{1, 64}; auto st = dim->set_domain(&domain1[0]); REQUIRE(st.ok()); diff --git a/tiledb/sm/stats/global_stats.h b/tiledb/sm/stats/global_stats.h index 7d5635ef4830..66d9e3dfab58 100644 --- a/tiledb/sm/stats/global_stats.h +++ b/tiledb/sm/stats/global_stats.h @@ -49,6 +49,71 @@ #include "tiledb/common/heap_memory.h" #include "tiledb/sm/stats/stats.h" +/** + * Documenting the current stats behavior and architecture as close as + * possible to the code so it's helpful next time someone tries to refactor. + * + * Statistics collection is done at the top level via the GlobalStats class + * defined in this file. + * We maintain a global object called `all_stats` which is used to register + * Stats objects, enable/disable collection, reset or dumping the collected + * stats. + * + * The TileDB C API uses the `all_stats` object directly to execute the + * actions iterated above. + * + * The GlobalStats class owns a list called `registered_stats` that has one + * Stats object registered for each Context used. In consequence, + * ContextResources register a Stats object for each Context created, this + * object serves as the root for the tree of all children Stats used in a + * Context. + * + * As mentioned above, the Stats objects used under a Context form a tree. + * Each Stats object mentains a list of children Stats and a pointer to the + * parent Stats object. + * The Stats object created by ContextResources(named "Context.StorageManager") + * is the only Stats constructed in a standalone fashion using the Stats + * constructor, all the other objects under this root Stats are created via + * the Stats::create_child API. + * + * The (current, please update if not accurate anymore) exhaustive list of + * Stats we maintain under a Context is: + * --------------------------- + * ContextResources + * - Query + * - Reader + * - Writer + * - DenseTiler + * - Subarray + * - Deletes + * - Subarray + * - subSubarray + * - SubarrayPartitioner + * - VFS + * - S3 + * - ArrayDirectory + * - RestClient + * - Consolidator + * --------------------------- + * Please visualize this as a tree, it was much easier to write + * like this, the tree is too wide. + * + * + * Observed issues: + * - Stats objects currently are created via Stats::create_child API from a + * parent stats object. Child objects such as e.g. Subarray only hold a + * pointer to the Stats object, this means that the Stats objects outlive + * the objects they represent and are kept alive by the tree like structure + * defined by the Stats class. + * In theory, a Context running for a long time would OOM the machine with + * Stats objects. + * + * - Stats::populate_flattened_stats aggregate the collected statistic via + * summing. But we also collect ".min", ".max" statistics as well, + * sum-aggregating those is incorrect. Currently the dump function just + * doesn't print those statistics. + */ + namespace tiledb { namespace sm { namespace stats { @@ -158,8 +223,7 @@ class GlobalStats { /* ********************************* */ /** - * The singleton instance holding all global stats counters. The report will - * be automatically made when this object is destroyed (at program termination). + * The singleton instance holding all global stats counters and timers. */ extern GlobalStats all_stats; diff --git a/tiledb/sm/stats/stats.cc b/tiledb/sm/stats/stats.cc index 5773c312b583..82de3ed25494 100644 --- a/tiledb/sm/stats/stats.cc +++ b/tiledb/sm/stats/stats.cc @@ -48,9 +48,14 @@ namespace stats { /* ****************************** */ Stats::Stats(const std::string& prefix) + : Stats(prefix, StatsData{}) { +} + +Stats::Stats(const std::string& prefix, const StatsData& data) : enabled_(true) , prefix_(prefix + ".") , parent_(nullptr) { + this->populate_with_data(data); } /* ****************************** */ @@ -246,8 +251,12 @@ Stats* Stats::parent() { } Stats* Stats::create_child(const std::string& prefix) { + return create_child(prefix, StatsData{}); +} + +Stats* Stats::create_child(const std::string& prefix, const StatsData& data) { std::unique_lock lck(mtx_); - children_.emplace_back(prefix_ + prefix); + children_.emplace_back(prefix_ + prefix, data); Stats* const child = &children_.back(); child->parent_ = this; return child; @@ -272,15 +281,26 @@ void Stats::populate_flattened_stats( } } -std::unordered_map* Stats::timers() { +const std::unordered_map* Stats::timers() const { return &timers_; } /** Return pointer to conters map, used for serialization only. */ -std::unordered_map* Stats::counters() { +const std::unordered_map* Stats::counters() const { return &counters_; } +void Stats::populate_with_data(const StatsData& data) { + auto& timers = data.timers(); + for (const auto& timer : timers) { + timers_[timer.first] = timer.second; + } + auto& counters = data.counters(); + for (const auto& counter : counters) { + counters_[counter.first] = counter.second; + } +} + } // namespace stats } // namespace sm } // namespace tiledb diff --git a/tiledb/sm/stats/stats.h b/tiledb/sm/stats/stats.h index 018ed721f637..47691c8d2943 100644 --- a/tiledb/sm/stats/stats.h +++ b/tiledb/sm/stats/stats.h @@ -51,6 +51,58 @@ namespace tiledb { namespace sm { namespace stats { +/** + * Class that holds measurement data that Stats objects can be + * initialized with. + */ +class StatsData { + public: + /* ****************************** */ + /* CONSTRUCTORS & DESTRUCTORS */ + /* ****************************** */ + + /* Default constructor */ + StatsData() = default; + + /** + * Value constructor. + * + * @param counters A map of counters + * @param timers A map of timers + */ + StatsData( + std::unordered_map& counters, + std::unordered_map& timers) + : counters_(counters) + , timers_(timers) { + } + + /* ****************************** */ + /* API */ + /* ****************************** */ + + /** Get a reference to internal counters */ + const std::unordered_map& counters() const { + return counters_; + } + + /** Get a reference to internal timers */ + const std::unordered_map& timers() const { + return timers_; + } + + private: + /* ****************************** */ + /* PRIVATE ATTRIBUTES */ + /* ****************************** */ + + /** Map of counters and values */ + std::unordered_map counters_; + + /** Map of timers and values */ + std::unordered_map timers_; +}; + /** * Class that defines stats counters and methods to manipulate them. */ @@ -72,6 +124,14 @@ class Stats { */ Stats(const std::string& prefix); + /** + * Value constructor. + * + * @param prefix The stat name prefix. + * @param data Initial data to populate the Stats object with. + */ + Stats(const std::string& prefix, const StatsData& data); + /** Destructor. */ ~Stats() = default; @@ -116,11 +176,29 @@ class Stats { /** Creates a child instance, managed by this instance. */ Stats* create_child(const std::string& prefix); + /** + * Creates a child instance, managed by this instance, the instance is + * constructed with initial data. + * + * @param prefix The stat name prefix. + * @param data Initial data to populate the Stats object with. + */ + Stats* create_child(const std::string& prefix, const StatsData& data); + /** Return pointer to timers map, used for serialization only. */ - std::unordered_map* timers(); + const std::unordered_map* timers() const; /** Return pointer to conters map, used for serialization only. */ - std::unordered_map* counters(); + const std::unordered_map* counters() const; + + /** + * Populate the counters and timers internal maps from a StatsData object + * Please be aware that the data is not being added up, it will override the + * existing data on the Stats object. + * + * @param data Data to populate the stats with. + */ + void populate_with_data(const StatsData& data); private: /* ****************************** */ diff --git a/tiledb/sm/storage_manager/context_resources.cc b/tiledb/sm/storage_manager/context_resources.cc index 1dd423a163ac..ee9f4df9ecf8 100644 --- a/tiledb/sm/storage_manager/context_resources.cc +++ b/tiledb/sm/storage_manager/context_resources.cc @@ -31,6 +31,7 @@ */ #include "tiledb/sm/storage_manager/context_resources.h" +#include "tiledb/common/memory_tracker.h" #include "tiledb/sm/rest/rest_client.h" using namespace tiledb::common; @@ -53,12 +54,18 @@ ContextResources::ContextResources( size_t compute_thread_count, size_t io_thread_count, std::string stats_name) - : config_(config) + : memory_tracker_manager_(make_shared(HERE())) + , ephemeral_memory_tracker_(memory_tracker_manager_->create_tracker()) + , memory_tracker_reporter_(make_shared( + HERE(), config, memory_tracker_manager_)) + , config_(config) , logger_(logger) , compute_tp_(compute_thread_count) , io_tp_(io_thread_count) , stats_(make_shared(HERE(), stats_name)) , vfs_(stats_.get(), &compute_tp_, &io_tp_, config) { + ephemeral_memory_tracker_->set_type(MemoryTrackerType::EPHEMERAL); + /* * Explicitly register our `stats` object with the global. */ @@ -72,11 +79,21 @@ ContextResources::ContextResources( auto server_address = config_.get("rest.server_address"); if (server_address) { auto client = tdb::make_shared(HERE()); - auto st = client->init(&stats(), &config_, &compute_tp(), logger_); + auto st = client->init(&stats(), &config_, &compute_tp(), logger_, *this); throw_if_not_ok(st); rest_client_ = client; } } + + memory_tracker_reporter_->start(); +} + +shared_ptr ContextResources::create_memory_tracker() const { + return memory_tracker_manager_->create_tracker(); +} + +shared_ptr ContextResources::ephemeral_memory_tracker() const { + return ephemeral_memory_tracker_; } } // namespace tiledb::sm diff --git a/tiledb/sm/storage_manager/context_resources.h b/tiledb/sm/storage_manager/context_resources.h index 3bf63e6fbcc8..dea51cfde416 100644 --- a/tiledb/sm/storage_manager/context_resources.h +++ b/tiledb/sm/storage_manager/context_resources.h @@ -44,6 +44,9 @@ using namespace tiledb::common; namespace tiledb::sm { +class MemoryTracker; +class MemoryTrackerManager; +class MemoryTrackerReporter; class RestClient; /** @@ -109,11 +112,40 @@ class ContextResources { return rest_client_; } + /** + * Create a new MemoryTracker + * + * @return The created MemoryTracker. + */ + shared_ptr create_memory_tracker() const; + + /** + * Return the ephemeral memory tracker. + * + * Use this tracker when you have a case where you need a memory tracker + * temporarily, without access to a more appropriate tracker. For instance, + * when using GenericTileIO when deserializing various objects we can use + * this for the GenericTileIO. Make sure to not confuse this with the + * memory tracker that might exists on what's being deserialized. + * + * @return The ephemeral MemoryTracker. + */ + shared_ptr ephemeral_memory_tracker() const; + private: /* ********************************* */ /* PRIVATE ATTRIBUTES */ /* ********************************* */ + /** The MemoryTrackerManager for this context. */ + mutable shared_ptr memory_tracker_manager_; + + /** The ephemeral MemoryTracker. */ + mutable shared_ptr ephemeral_memory_tracker_; + + /** The MemoryTrackerReporter for this context. */ + mutable shared_ptr memory_tracker_reporter_; + /** The configuration for this ContextResources */ mutable Config config_; diff --git a/tiledb/sm/storage_manager/storage_manager.cc b/tiledb/sm/storage_manager/storage_manager.cc index 51649923d6a0..1a0cc587c89a 100644 --- a/tiledb/sm/storage_manager/storage_manager.cc +++ b/tiledb/sm/storage_manager/storage_manager.cc @@ -41,6 +41,7 @@ #include "tiledb/common/heap_memory.h" #include "tiledb/common/logger.h" #include "tiledb/common/memory.h" +#include "tiledb/common/memory_tracker.h" #include "tiledb/common/stdx_string.h" #include "tiledb/sm/array/array.h" #include "tiledb/sm/array/array_directory.h" @@ -382,7 +383,8 @@ Status StorageManager::array_evolve_schema( "' not exists")); } - auto&& array_schema = array_dir.load_array_schema_latest(encryption_key); + auto&& array_schema = array_dir.load_array_schema_latest( + encryption_key, resources_.ephemeral_memory_tracker()); // Load required enumerations before evolution. auto enmr_names = schema_evolution->enumeration_names_to_extend(); @@ -396,9 +398,8 @@ Status StorageManager::array_evolve_schema( enmr_paths.emplace_back(path); } - MemoryTracker tracker; auto loaded_enmrs = array_dir.load_enumerations_from_paths( - enmr_paths, encryption_key, tracker); + enmr_paths, encryption_key, resources_.create_memory_tracker()); for (auto enmr : loaded_enmrs) { array_schema->store_enumeration(enmr); @@ -456,7 +457,8 @@ Status StorageManagerCanonical::array_upgrade_version( static_cast(encryption_key_from_cfg.size()))); } - auto&& array_schema = array_dir.load_array_schema_latest(encryption_key_cfg); + auto&& array_schema = array_dir.load_array_schema_latest( + encryption_key_cfg, resources().ephemeral_memory_tracker()); if (array_schema->version() < constants::format_version) { array_schema->generate_uri(); @@ -1008,24 +1010,25 @@ StorageManagerCanonical::load_delete_and_update_conditions( auto& uri = locations[i].uri(); // Read the condition from storage. - auto&& tile = GenericTileIO::load( + auto tile = GenericTileIO::load( resources_, uri, locations[i].offset(), - *(opened_array.encryption_key())); + *(opened_array.encryption_key()), + resources_.ephemeral_memory_tracker()); if (tiledb::sm::utils::parse::ends_with( locations[i].condition_marker(), tiledb::sm::constants::delete_file_suffix)) { conditions[i] = tiledb::sm::deletes_and_updates::serialization::deserialize_condition( - i, locations[i].condition_marker(), tile.data(), tile.size()); + i, locations[i].condition_marker(), tile->data(), tile->size()); } else if (tiledb::sm::utils::parse::ends_with( locations[i].condition_marker(), tiledb::sm::constants::update_file_suffix)) { auto&& [cond, uvs] = tiledb::sm::deletes_and_updates::serialization:: deserialize_update_condition_and_values( - i, locations[i].condition_marker(), tile.data(), tile.size()); + i, locations[i].condition_marker(), tile->data(), tile->size()); conditions[i] = std::move(cond); update_values[i] = std::move(uvs); } else { @@ -1264,12 +1267,14 @@ Status StorageManagerCanonical::store_group_detail( SizeComputationSerializer size_computation_serializer; group->serialize(members, size_computation_serializer); - WriterTile tile{WriterTile::from_generic(size_computation_serializer.size())}; + auto tile{WriterTile::from_generic( + size_computation_serializer.size(), + resources_.ephemeral_memory_tracker())}; - Serializer serializer(tile.data(), tile.size()); + Serializer serializer(tile->data(), tile->size()); group->serialize(members, serializer); - stats()->add_counter("write_group_size", tile.size()); + stats()->add_counter("write_group_size", tile->size()); // Check if the array schema directory exists // If not create it, this is caused by a pre-v10 array @@ -1279,8 +1284,7 @@ Status StorageManagerCanonical::store_group_detail( if (!group_detail_dir_exists) RETURN_NOT_OK(vfs()->create_dir(group_detail_folder_uri)); - RETURN_NOT_OK( - store_data_to_generic_tile(tile, group_detail_uri, encryption_key)); + GenericTileIO::store_data(resources_, group_detail_uri, tile, encryption_key); return Status::Ok(); } @@ -1294,11 +1298,13 @@ Status StorageManagerCanonical::store_array_schema( SizeComputationSerializer size_computation_serializer; array_schema->serialize(size_computation_serializer); - WriterTile tile{WriterTile::from_generic(size_computation_serializer.size())}; - Serializer serializer(tile.data(), tile.size()); + auto tile{WriterTile::from_generic( + size_computation_serializer.size(), + resources_.ephemeral_memory_tracker())}; + Serializer serializer(tile->data(), tile->size()); array_schema->serialize(serializer); - stats()->add_counter("write_array_schema_size", tile.size()); + stats()->add_counter("write_array_schema_size", tile->size()); // Delete file if it exists already bool exists; @@ -1316,7 +1322,7 @@ Status StorageManagerCanonical::store_array_schema( if (!schema_dir_exists) RETURN_NOT_OK(vfs()->create_dir(array_schema_dir_uri)); - RETURN_NOT_OK(store_data_to_generic_tile(tile, schema_uri, encryption_key)); + GenericTileIO::store_data(resources_, schema_uri, tile, encryption_key); // Create the `__enumerations` directory under `__schema` if it doesn't // exist. This might happen if someone tries to add an enumeration to an @@ -1342,14 +1348,14 @@ Status StorageManagerCanonical::store_array_schema( SizeComputationSerializer enumeration_size_serializer; enmr->serialize(enumeration_size_serializer); - WriterTile tile{ - WriterTile::from_generic(enumeration_size_serializer.size())}; - Serializer serializer(tile.data(), tile.size()); + auto tile{WriterTile::from_generic( + enumeration_size_serializer.size(), + resources_.ephemeral_memory_tracker())}; + Serializer serializer(tile->data(), tile->size()); enmr->serialize(serializer); auto abs_enmr_uri = array_enumerations_dir_uri.join_path(enmr->path_name()); - RETURN_NOT_OK( - store_data_to_generic_tile(tile, abs_enmr_uri, encryption_key)); + GenericTileIO::store_data(resources_, abs_enmr_uri, tile, encryption_key); } return Status::Ok(); @@ -1372,28 +1378,22 @@ Status StorageManagerCanonical::store_metadata( if (0 == size_computation_serializer.size()) { return Status::Ok(); } - WriterTile tile{WriterTile::from_generic(size_computation_serializer.size())}; - Serializer serializer(tile.data(), tile.size()); + auto tile{WriterTile::from_generic( + size_computation_serializer.size(), + resources_.ephemeral_memory_tracker())}; + Serializer serializer(tile->data(), tile->size()); metadata->serialize(serializer); - stats()->add_counter("write_meta_size", serializer.size()); + stats()->add_counter("write_meta_size", size_computation_serializer.size()); // Create a metadata file name URI metadata_uri = metadata->get_uri(uri); - RETURN_NOT_OK(store_data_to_generic_tile(tile, metadata_uri, encryption_key)); + GenericTileIO::store_data(resources_, metadata_uri, tile, encryption_key); return Status::Ok(); } -Status StorageManagerCanonical::store_data_to_generic_tile( - WriterTile& tile, const URI& uri, const EncryptionKey& encryption_key) { - GenericTileIO tile_io(resources_, uri); - uint64_t nbytes = 0; - tile_io.write_generic(&tile, encryption_key, &nbytes); - return vfs()->close_file(uri); -} - void StorageManagerCanonical::wait_for_zero_in_progress() { std::unique_lock lck(queries_in_progress_mtx_); queries_in_progress_cv_.wait( diff --git a/tiledb/sm/storage_manager/storage_manager_canonical.h b/tiledb/sm/storage_manager/storage_manager_canonical.h index 50c374516430..4309f946e320 100644 --- a/tiledb/sm/storage_manager/storage_manager_canonical.h +++ b/tiledb/sm/storage_manager/storage_manager_canonical.h @@ -591,17 +591,6 @@ class StorageManagerCanonical { Status store_metadata( const URI& uri, const EncryptionKey& encryption_key, Metadata* metadata); - /** - * Stores data into persistent storage. - * - * @param tile Tile to store. - * @param uri The object URI. - * @param encryption_key The encryption key to use. - * @return Status - */ - Status store_data_to_generic_tile( - WriterTile& tile, const URI& uri, const EncryptionKey& encryption_key); - [[nodiscard]] inline ContextResources& resources() const { return resources_; } diff --git a/tiledb/sm/subarray/subarray.cc b/tiledb/sm/subarray/subarray.cc index 486c88b9c949..d535b75e4900 100644 --- a/tiledb/sm/subarray/subarray.cc +++ b/tiledb/sm/subarray/subarray.cc @@ -527,10 +527,8 @@ Status Subarray::add_range_by_name( const void* start, const void* end, const void* stride) { - unsigned dim_idx; - RETURN_NOT_OK(array_->array_schema_latest().domain().get_dimension_index( - dim_name, &dim_idx)); - + unsigned dim_idx = + array_->array_schema_latest().domain().get_dimension_index(dim_name); return add_range(dim_idx, start, end, stride); } @@ -586,10 +584,8 @@ Status Subarray::add_range_var_by_name( uint64_t start_size, const void* end, uint64_t end_size) { - unsigned dim_idx; - RETURN_NOT_OK(array_->array_schema_latest().domain().get_dimension_index( - dim_name, &dim_idx)); - + unsigned dim_idx = + array_->array_schema_latest().domain().get_dimension_index(dim_name); return add_range_var(dim_idx, start, start_size, end, end_size); } @@ -697,9 +693,8 @@ Status Subarray::get_range_var( Status Subarray::get_range_num_from_name( const std::string& dim_name, uint64_t* range_num) const { - unsigned dim_idx; - RETURN_NOT_OK(array_->array_schema_latest().domain().get_dimension_index( - dim_name, &dim_idx)); + unsigned dim_idx = + array_->array_schema_latest().domain().get_dimension_index(dim_name); return get_range_num(dim_idx, range_num); } @@ -720,9 +715,8 @@ Status Subarray::get_range_from_name( const void** start, const void** end, const void** stride) const { - unsigned dim_idx; - RETURN_NOT_OK(array_->array_schema_latest().domain().get_dimension_index( - dim_name, &dim_idx)); + unsigned dim_idx = + array_->array_schema_latest().domain().get_dimension_index(dim_name); return get_range(dim_idx, range_idx, start, end, stride); } @@ -732,10 +726,8 @@ Status Subarray::get_range_var_size_from_name( uint64_t range_idx, uint64_t* start_size, uint64_t* end_size) const { - unsigned dim_idx; - RETURN_NOT_OK(array_->array_schema_latest().domain().get_dimension_index( - dim_name, &dim_idx)); - + unsigned dim_idx = + array_->array_schema_latest().domain().get_dimension_index(dim_name); return get_range_var_size(dim_idx, range_idx, start_size, end_size); } @@ -744,10 +736,8 @@ Status Subarray::get_range_var_from_name( uint64_t range_idx, void* start, void* end) const { - unsigned dim_idx; - RETURN_NOT_OK(array_->array_schema_latest().domain().get_dimension_index( - dim_name, &dim_idx)); - + unsigned dim_idx = + array_->array_schema_latest().domain().get_dimension_index(dim_name); return get_range_var(dim_idx, range_idx, start, end); } const shared_ptr Subarray::array() const { @@ -2131,6 +2121,29 @@ void Subarray::add_default_label_ranges(dimension_size_type dim_num) { label_range_subset_.resize(dim_num, nullopt); } +void Subarray::reset_default_ranges() { + if (array_->non_empty_domain_computed()) { + auto dim_num = array_->array_schema_latest().dim_num(); + auto& domain{array_->array_schema_latest().domain()}; + + // Process all dimensions one by one. + for (unsigned d = 0; d < dim_num; d++) { + // Only enter the check if there are only one range set on the dimension. + if (!is_default_[d] && range_subset_[d].num_ranges() == 1) { + // If the range set is the same as the non empty domain. + auto& ned = array_->non_empty_domain()[d]; + if (ned == range_subset_[d][0]) { + // Reset the default flag and reset the range subset to be default. + is_default_[d] = true; + auto dim{domain.dimension_ptr(d)}; + range_subset_[d] = RangeSetAndSuperset( + dim->type(), dim->domain(), true, coalesce_ranges_); + } + } + } + } +} + void Subarray::compute_range_offsets() { range_offsets_.clear(); @@ -2187,7 +2200,7 @@ Status Subarray::compute_est_result_size( const auto& array_schema = array_->array_schema_latest(); auto attribute_num = array_schema.attribute_num(); auto dim_num = array_schema.dim_num(); - auto attributes = array_schema.attributes(); + auto& attributes = array_schema.attributes(); auto num = attribute_num + dim_num + 1; auto range_num = this->range_num(); @@ -3080,8 +3093,12 @@ RelevantFragments& Subarray::relevant_fragments() { return relevant_fragments_; } -stats::Stats* Subarray::stats() const { - return stats_; +const stats::Stats& Subarray::stats() const { + return *stats_; +} + +void Subarray::set_stats(const stats::StatsData& data) { + stats_->populate_with_data(data); } tuple> Subarray::non_overlapping_ranges_for_dim( diff --git a/tiledb/sm/subarray/subarray.h b/tiledb/sm/subarray/subarray.h index a16e92b376e0..bd36e8647700 100644 --- a/tiledb/sm/subarray/subarray.h +++ b/tiledb/sm/subarray/subarray.h @@ -205,6 +205,60 @@ class Subarray { uint64_t size_validity_; }; + /** + * Wrapper for optional> for + * cleaner data access. + */ + struct LabelRangeSubset { + public: + /** + * Default constructor is not C.41. + **/ + LabelRangeSubset() = delete; + + /** + * Constructor + * + * @param ref Dimension label the ranges will be set on. + * @param coalesce_ranges Set if ranges should be combined when adjacent. + */ + LabelRangeSubset(const DimensionLabel& ref, bool coalesce_ranges = true); + + /** + * Constructor + * + * @param name The name of the dimension label the ranges will be set on. + * @param type The type of the label the ranges will be set on. + * @param coalesce_ranges Set if ranges should be combined when adjacent. + */ + LabelRangeSubset( + const std::string& name, Datatype type, bool coalesce_ranges = true); + + /** + * Constructor + * + * @param name The name of the dimension label the ranges will be set on. + * @param type The type of the label the ranges will be set on. + * @param ranges The range subset for the dimension label. + * @param coalesce_ranges Set if ranges should be combined when adjacent. + */ + LabelRangeSubset( + const std::string& name, + Datatype type, + std::vector ranges, + bool coalesce_ranges = true); + + inline const std::vector& get_ranges() const { + return ranges_.ranges(); + } + + /** Name of the dimension label. */ + std::string name_; + + /** The ranges set on the dimension label. */ + RangeSetAndSuperset ranges_; + }; + /* ********************************* */ /* CONSTRUCTORS & DESTRUCTORS */ /* ********************************* */ @@ -1276,7 +1330,15 @@ class Subarray { std::vector* end_coords) const; /** Returns `stats_`. */ - stats::Stats* stats() const; + const stats::Stats& stats() const; + + /** + * Populate the owned stats instance with data. + * To be removed when the class will get a C41 constructor. + * + * @param data Data to populate the stats with. + */ + void set_stats(const stats::StatsData& data); /** Stores a vector of 1D ranges per dimension. */ std::vector> original_range_idx_; @@ -1306,6 +1368,20 @@ class Subarray { */ void add_default_label_ranges(dimension_size_type dim_num); + /** + * Reset ranges to default if possible before a read operation for sparse + * reads. We have a lot of optimizations in the sparse readers when no ranges + * are specified. Python will set ranges that are equal to the non empty + * domain, which will negate those optimizations. When the non empty domain is + * computed for the array, it is low performance cost to see if the ranges set + * are equal to the non empty domain. If they are, we can reset them to be + * default. + */ + void reset_default_ranges(); + + /** Loads the R-Trees of all relevant fragments in parallel. */ + Status load_relevant_fragment_rtrees(ThreadPool* compute_tp) const; + private: /* ********************************* */ /* PRIVATE DATA TYPES */ @@ -1332,46 +1408,6 @@ class Subarray { uint64_t range_len_; }; - /** - * Wrapper for optional> for - * cleaner data access. - */ - struct LabelRangeSubset { - public: - /** - * Default constructor is not C.41. - **/ - LabelRangeSubset() = delete; - - /** - * Constructor - * - * @param ref Dimension label the ranges will be set on. - * @param coalesce_ranges Set if ranges should be combined when adjacent. - */ - LabelRangeSubset(const DimensionLabel& ref, bool coalesce_ranges = true); - - /** - * Constructor - * - * @param name The name of the dimension label the ranges will be set on. - * @param type The type of the label the ranges will be set on. - * @param coalesce_ranges Set if ranges should be combined when adjacent. - */ - LabelRangeSubset( - const std::string& name, Datatype type, bool coalesce_ranges = true); - - inline const std::vector& get_ranges() const { - return ranges_.ranges(); - } - - /** Name of the dimension label. */ - std::string name_; - - /** The ranges set on the dimension label. */ - RangeSetAndSuperset ranges_; - }; - /** * A hash function capable of hashing std::vector for use by * the tile_coords_map_ unordered_map for caching coords indices. @@ -1577,9 +1613,6 @@ class Subarray { */ void swap(Subarray& subarray); - /** Loads the R-Trees of all relevant fragments in parallel. */ - Status load_relevant_fragment_rtrees(ThreadPool* compute_tp) const; - /** * Computes the tile overlap for each range and relevant fragment. * diff --git a/tiledb/sm/subarray/subarray_partitioner.cc b/tiledb/sm/subarray/subarray_partitioner.cc index db6dab81cf42..e22fbf28ac51 100644 --- a/tiledb/sm/subarray/subarray_partitioner.cc +++ b/tiledb/sm/subarray/subarray_partitioner.cc @@ -666,8 +666,12 @@ Subarray& SubarrayPartitioner::subarray() { return subarray_; } -stats::Stats* SubarrayPartitioner::stats() const { - return stats_; +const stats::Stats& SubarrayPartitioner::stats() const { + return *stats_; +} + +void SubarrayPartitioner::set_stats(const stats::StatsData& data) { + stats_->populate_with_data(data); } /* ****************************** */ diff --git a/tiledb/sm/subarray/subarray_partitioner.h b/tiledb/sm/subarray/subarray_partitioner.h index 919b9a1b6261..2915fd7aefe7 100644 --- a/tiledb/sm/subarray/subarray_partitioner.h +++ b/tiledb/sm/subarray/subarray_partitioner.h @@ -333,7 +333,14 @@ class SubarrayPartitioner { Subarray& subarray(); /** Returns `stats_`. */ - stats::Stats* stats() const; + const stats::Stats& stats() const; + + /** Populate the owned stats instance with data. + * To be removed when the class will get a C41 constructor. + * + * @param data Data to populate the stats with. + */ + void set_stats(const stats::StatsData& data); private: /* ********************************* */ diff --git a/tiledb/sm/subarray/test/unit_add_ranges_list.cc b/tiledb/sm/subarray/test/unit_add_ranges_list.cc index c8168cc65ed7..040c7a129d06 100644 --- a/tiledb/sm/subarray/test/unit_add_ranges_list.cc +++ b/tiledb/sm/subarray/test/unit_add_ranges_list.cc @@ -37,11 +37,13 @@ #endif #include #include +#include #include #include #include #include +#include using namespace tiledb; using namespace tiledb::common; @@ -51,14 +53,23 @@ using namespace tiledb::type; TEST_CASE("Subarray::add_ranges_list", "[subarray]") { // Setup an Array needed to construct the Subarray for testing // add_ranges_list. + auto memory_tracker = tiledb::test::create_test_memory_tracker(); std::shared_ptr sp_dim1 = - make_shared(HERE(), "d1", Datatype::INT64); + make_shared( + HERE(), + "d1", + Datatype::INT64, + tiledb::test::get_test_memory_tracker()); std::shared_ptr sp_dim2 = - make_shared(HERE(), "d2", Datatype::INT64); + make_shared( + HERE(), + "d2", + Datatype::INT64, + tiledb::test::get_test_memory_tracker()); uint64_t tile_extents[] = {2, 2}; std::vector> dims{sp_dim1, sp_dim2}; std::shared_ptr sp_dom = make_shared( - HERE(), Layout::ROW_MAJOR, dims, Layout::ROW_MAJOR); + HERE(), Layout::ROW_MAJOR, dims, Layout::ROW_MAJOR, memory_tracker); uint64_t local_DIM_DOMAIN[4] = {1, 12, 1, 12}; CHECK(sp_dim1->set_domain(&local_DIM_DOMAIN[0]).ok()); CHECK(sp_dim2->set_domain(&local_DIM_DOMAIN[2]).ok()); @@ -66,9 +77,13 @@ TEST_CASE("Subarray::add_ranges_list", "[subarray]") { CHECK(sp_dim2->set_tile_extent(&tile_extents[1]).ok()); std::shared_ptr sp_attrib = make_shared(HERE(), "a1", Datatype::INT32); - tiledb::sm::Domain dom{Layout::ROW_MAJOR, dims, Layout::ROW_MAJOR}; + tiledb::sm::Domain dom{ + Layout::ROW_MAJOR, dims, Layout::ROW_MAJOR, memory_tracker}; std::shared_ptr sp_as = - make_shared(HERE()); + make_shared( + HERE(), + tiledb::sm::ArrayType::DENSE, + tiledb::test::create_test_memory_tracker()); CHECK(sp_as->set_domain(sp_dom).ok()); CHECK(sp_as->add_attribute(sp_attrib).ok()); tiledb::sm::Config cfg; diff --git a/tiledb/sm/tile/generic_tile_io.cc b/tiledb/sm/tile/generic_tile_io.cc index 5968114d7866..976f333291c5 100644 --- a/tiledb/sm/tile/generic_tile_io.cc +++ b/tiledb/sm/tile/generic_tile_io.cc @@ -33,6 +33,7 @@ #include "tiledb/sm/tile/generic_tile_io.h" #include "tiledb/common/heap_memory.h" #include "tiledb/common/logger.h" +#include "tiledb/common/memory_tracker.h" #include "tiledb/common/unreachable.h" #include "tiledb/sm/crypto/encryption_key.h" #include "tiledb/sm/filesystem/vfs.h" @@ -67,28 +68,32 @@ GenericTileIO::GenericTileIO(ContextResources& resources, const URI& uri) /* API */ /* ****************************** */ -Tile GenericTileIO::load( +shared_ptr GenericTileIO::load( ContextResources& resources, const URI& uri, uint64_t offset, - const EncryptionKey& encryption_key) { + const EncryptionKey& encryption_key, + shared_ptr memory_tracker) { GenericTileIO tile_io(resources, uri); // Get encryption key from config if (encryption_key.encryption_type() == EncryptionType::NO_ENCRYPTION) { EncryptionKey cfg_enc_key(resources.config()); - return tile_io.read_generic(offset, cfg_enc_key, resources.config()); + return tile_io.read_generic( + offset, cfg_enc_key, resources.config(), memory_tracker); } else { - return tile_io.read_generic(offset, encryption_key, resources.config()); + return tile_io.read_generic( + offset, encryption_key, resources.config(), memory_tracker); } stdx::unreachable(); } -Tile GenericTileIO::read_generic( +shared_ptr GenericTileIO::read_generic( uint64_t file_offset, const EncryptionKey& encryption_key, - const Config& config) { + const Config& config, + shared_ptr memory_tracker) { auto&& header = read_generic_tile_header(resources_, uri_, file_offset); if (encryption_key.encryption_type() != @@ -106,26 +111,28 @@ Tile GenericTileIO::read_generic( GenericTileHeader::BASE_SIZE + header.filter_pipeline_size; std::vector filtered_data(header.persisted_size); - Tile tile( + shared_ptr tile = make_shared( + HERE(), header.version_number, (Datatype)header.datatype, header.cell_size, 0, header.tile_size, filtered_data.data(), - header.persisted_size); + header.persisted_size, + memory_tracker->get_resource(MemoryType::GENERIC_TILE_IO)); // Read the tile. throw_if_not_ok(resources_.vfs().read( uri_, file_offset + tile_data_offset, - tile.filtered_data(), + tile->filtered_data(), header.persisted_size)); // Unfilter - assert(tile.filtered()); - header.filters.run_reverse_generic_tile(&resources_.stats(), tile, config); - assert(!tile.filtered()); + assert(tile->filtered()); + header.filters.run_reverse_generic_tile(&resources_.stats(), *tile, config); + assert(!tile->filtered()); return tile; } @@ -168,16 +175,29 @@ GenericTileIO::GenericTileHeader GenericTileIO::read_generic_tile_header( return header; } +void GenericTileIO::store_data( + ContextResources& resources, + const URI& uri, + shared_ptr tile, + const EncryptionKey& encryption_key) { + GenericTileIO tile_io(resources, uri); + uint64_t nbytes = 0; + tile_io.write_generic(tile, encryption_key, &nbytes); + throw_if_not_ok(resources.vfs().close_file(uri)); +} + void GenericTileIO::write_generic( - WriterTile* tile, const EncryptionKey& encryption_key, uint64_t* nbytes) { + shared_ptr tile, + const EncryptionKey& encryption_key, + uint64_t* nbytes) { // Create a header GenericTileHeader header; - init_generic_tile_header(tile, &header, encryption_key); + init_generic_tile_header(tile.get(), &header, encryption_key); // Filter tile assert(!tile->filtered()); throw_if_not_ok(header.filters.run_forward( - &resources_.stats(), tile, nullptr, &resources_.compute_tp())); + &resources_.stats(), tile.get(), nullptr, &resources_.compute_tp())); header.persisted_size = tile->filtered_buffer().size(); assert(tile->filtered()); diff --git a/tiledb/sm/tile/generic_tile_io.h b/tiledb/sm/tile/generic_tile_io.h index 7410784df30f..ad75af43f81e 100644 --- a/tiledb/sm/tile/generic_tile_io.h +++ b/tiledb/sm/tile/generic_tile_io.h @@ -120,13 +120,14 @@ class GenericTileIO { * @param uri The object URI. * @param offset The offset into the file to read from. * @param encryption_key The encryption key to use. - * @return Status, Tile with the data. + * @return Tile with the data. */ - static Tile load( + static shared_ptr load( ContextResources& resources, const URI& uri, uint64_t offset, - const EncryptionKey& encryption_key); + const EncryptionKey& encryption_key, + shared_ptr memory_tracker); /** * Reads a generic tile from the file. A generic tile is a tile residing @@ -141,12 +142,13 @@ class GenericTileIO { * @param file_offset The offset in the file to read from. * @param encryption_key The encryption key to use. * @param config The storage manager's config. - * @return Status, Tile + * @return Tile */ - Tile read_generic( + shared_ptr read_generic( uint64_t file_offset, const EncryptionKey& encryption_key, - const Config& config); + const Config& config, + shared_ptr memory_tracker); /** * Reads the generic tile header from the file. @@ -154,13 +156,25 @@ class GenericTileIO { * @param resources The ContextResources instance to use for reading. * @param uri The URI of the generic tile. * @param file_offset The offset where the header read will begin. - * @param encryption_key If the array is encrypted, the private encryption - * key. For unencrypted arrays, pass `nullptr`. - * @return Status, Header + * @return Header */ static GenericTileHeader read_generic_tile_header( ContextResources& resources, const URI& uri, uint64_t file_offset); + /** + * Writes a generic tile to a file. + * + * @param resources The ContextResources instance to use for writing. + * @param uri The URI of the generic tile. + * @param tile The tile to write. + * @param encryption_key The encryption key to use. + */ + static void store_data( + ContextResources& resources, + const URI& uri, + shared_ptr tile, + const EncryptionKey& encryption_key); + /** * Writes a tile generically to the file. This means that a header will be * prepended to the file before writing the tile contents. The reason is @@ -170,10 +184,11 @@ class GenericTileIO { * @param tile The tile to be written. * @param encryption_key The encryption key to use. * @param nbytes The total number of bytes written to the file. - * @return Status */ void write_generic( - WriterTile* tile, const EncryptionKey& encryption_key, uint64_t* nbytes); + shared_ptr tile, + const EncryptionKey& encryption_key, + uint64_t* nbytes); /** * Serialize a generic tile header. diff --git a/tiledb/sm/tile/test/CMakeLists.txt b/tiledb/sm/tile/test/CMakeLists.txt index fceae274d674..6feeb4eafb18 100644 --- a/tiledb/sm/tile/test/CMakeLists.txt +++ b/tiledb/sm/tile/test/CMakeLists.txt @@ -27,6 +27,9 @@ include(unit_test) commence(unit_test tile) - this_target_object_libraries(tile) - this_target_sources(main.cc unit_tile.cc) + this_target_sources( + main.cc + unit_tile.cc + ) + this_target_object_libraries(tile mem_helpers) conclude(unit_test) diff --git a/tiledb/sm/tile/test/unit_tile.cc b/tiledb/sm/tile/test/unit_tile.cc index 1c6b549d89b5..35754dbbfe6a 100644 --- a/tiledb/sm/tile/test/unit_tile.cc +++ b/tiledb/sm/tile/test/unit_tile.cc @@ -30,6 +30,7 @@ * Tests the `Tile` class. */ +#include "test/support/src/mem_helpers.h" #include "tiledb/sm/enums/datatype.h" #include "tiledb/sm/tile/tile.h" @@ -39,6 +40,9 @@ using namespace tiledb::sm; TEST_CASE("Tile: Test basic IO", "[Tile][basic_io]") { + // Create our test memory tracker. + auto tracker = tiledb::test::create_test_memory_tracker(); + // Initialize the test Tile. const format_version_t format_version = 0; const Datatype data_type = Datatype::UINT32; @@ -46,7 +50,14 @@ TEST_CASE("Tile: Test basic IO", "[Tile][basic_io]") { const uint64_t cell_size = sizeof(uint32_t); const unsigned int dim_num = 1; Tile tile( - format_version, data_type, cell_size, dim_num, tile_size, nullptr, 0); + format_version, + data_type, + cell_size, + dim_num, + tile_size, + nullptr, + 0, + tracker); CHECK(tile.size() == tile_size); // Create a buffer to write to the test Tile. @@ -114,83 +125,3 @@ TEST_CASE("Tile: Test basic IO", "[Tile][basic_io]") { CHECK_NOTHROW(tile.read(read_buffer.data(), read_offset, tile_size)); CHECK(memcmp(read_buffer.data(), write_buffer_copy.data(), tile_size) == 0); } - -TEST_CASE("Tile: Test move constructor", "[Tile][move_constructor]") { - // Instantiate and initialize the first test Tile. - const format_version_t format_version = 0; - const Datatype data_type = Datatype::UINT32; - const uint64_t tile_size = 1024 * 1024; - const uint64_t cell_size = sizeof(uint32_t); - const unsigned int dim_num = 1; - Tile tile1( - format_version, data_type, cell_size, dim_num, tile_size, nullptr, 0); - - // Create a buffer to write to the first test Tile. - const uint32_t buffer_len = tile_size / sizeof(uint32_t); - std::vector buffer(buffer_len); - for (uint32_t i = 0; i < buffer_len; ++i) { - buffer[i] = i; - } - - // Write the buffer to the first test Tile. - CHECK_NOTHROW(tile1.write(buffer.data(), 0, tile_size)); - - // Instantiate a second test tile with the move constructor. - Tile tile2(std::move(tile1)); - - // Verify all public attributes are identical. - CHECK(tile2.cell_size() == cell_size); - CHECK(tile2.zipped_coords_dim_num() == dim_num); - CHECK(tile2.filtered() == false); - CHECK(tile2.format_version() == format_version); - CHECK(tile2.size() == tile_size); - CHECK(tile2.stores_coords() == true); - CHECK(tile2.type() == Datatype::UINT32); - - // Read the second test tile to verify it contains the data - // written to the first test tile. - std::vector read_buffer(buffer_len); - uint64_t read_offset = 0; - CHECK_NOTHROW(tile2.read(read_buffer.data(), read_offset, tile_size)); - CHECK(memcmp(read_buffer.data(), buffer.data(), tile_size) == 0); -} - -TEST_CASE("Tile: Test move-assignment", "[Tile][move_assignment]") { - // Instantiate and initialize the first test Tile. - const format_version_t format_version = 0; - const Datatype data_type = Datatype::UINT32; - const uint64_t tile_size = 1024 * 1024; - const uint64_t cell_size = sizeof(uint32_t); - const unsigned int dim_num = 1; - Tile tile1( - format_version, data_type, cell_size, dim_num, tile_size, nullptr, 0); - - // Create a buffer to write to the first test Tile. - const uint32_t buffer_len = tile_size / sizeof(uint32_t); - std::vector buffer(buffer_len); - for (uint32_t i = 0; i < buffer_len; ++i) { - buffer[i] = i; - } - - // Write the buffer to the first test Tile. - CHECK_NOTHROW(tile1.write(buffer.data(), 0, tile_size)); - - // Instantiate a third test tile with the move constructor. - Tile tile2 = std::move(tile1); - - // Verify all public attributes are identical. - CHECK(tile2.cell_size() == cell_size); - CHECK(tile2.zipped_coords_dim_num() == dim_num); - CHECK(tile2.filtered() == false); - CHECK(tile2.format_version() == format_version); - CHECK(tile2.size() == tile_size); - CHECK(tile2.stores_coords() == true); - CHECK(tile2.type() == Datatype::UINT32); - - // Read the second test tile to verify it contains the data - // written to the first test tile. - std::vector read_buffer(buffer_len); - uint64_t read_offset = 0; - CHECK_NOTHROW(tile2.read(read_buffer.data(), read_offset, tile_size)); - CHECK(memcmp(read_buffer.data(), buffer.data(), tile_size) == 0); -} diff --git a/tiledb/sm/tile/tile.cc b/tiledb/sm/tile/tile.cc index 76bd8af3f06a..b678cdfdbbbb 100644 --- a/tiledb/sm/tile/tile.cc +++ b/tiledb/sm/tile/tile.cc @@ -33,12 +33,10 @@ #include "tiledb/sm/tile/tile.h" #include "tiledb/common/exception/exception.h" #include "tiledb/common/heap_memory.h" -#include "tiledb/common/logger.h" +#include "tiledb/common/memory_tracker.h" #include "tiledb/sm/enums/datatype.h" #include "tiledb/storage_format/serialization/serializers.h" -#include - using namespace tiledb::common; namespace tiledb { @@ -60,23 +58,29 @@ uint64_t WriterTile::max_tile_chunk_size_ = constants::max_tile_chunk_size; /* STATIC API */ /* ****************************** */ -Tile Tile::from_generic(storage_size_t tile_size) { - return { +shared_ptr Tile::from_generic( + storage_size_t tile_size, shared_ptr memory_tracker) { + return make_shared( + HERE(), 0, constants::generic_tile_datatype, constants::generic_tile_cell_size, 0, tile_size, nullptr, - 0}; + 0, + memory_tracker->get_resource(MemoryType::GENERIC_TILE_IO)); } -WriterTile WriterTile::from_generic(storage_size_t tile_size) { - return { +shared_ptr WriterTile::from_generic( + storage_size_t tile_size, shared_ptr memory_tracker) { + return make_shared( + HERE(), 0, constants::generic_tile_datatype, constants::generic_tile_cell_size, - tile_size}; + tile_size, + memory_tracker->get_resource(MemoryType::GENERIC_TILE_IO)); } uint32_t WriterTile::compute_chunk_size( @@ -106,8 +110,10 @@ TileBase::TileBase( const format_version_t format_version, const Datatype type, const uint64_t cell_size, - const uint64_t size) - : data_(static_cast(tdb_malloc(size)), tiledb_free) + const uint64_t size, + tdb::pmr::memory_resource* resource) + : resource_(resource) + , data_(tdb::pmr::make_unique(resource_, size)) , size_(size) , cell_size_(cell_size) , format_version_(format_version) @@ -122,19 +128,24 @@ TileBase::TileBase( } } -TileBase::TileBase(TileBase&& tile) - : data_(std::move(tile.data_)) - , size_(std::move(tile.size_)) - , cell_size_(std::move(tile.cell_size_)) - , format_version_(std::move(tile.format_version_)) - , type_(std::move(tile.type_)) { -} - -TileBase& TileBase::operator=(TileBase&& tile) { - // Swap with the argument - swap(tile); - - return *this; +Tile::Tile( + const format_version_t format_version, + const Datatype type, + const uint64_t cell_size, + const unsigned int zipped_coords_dim_num, + const uint64_t size, + void* filtered_data, + uint64_t filtered_size, + shared_ptr memory_tracker) + : Tile( + format_version, + type, + cell_size, + zipped_coords_dim_num, + size, + filtered_data, + filtered_size, + memory_tracker->get_resource(MemoryType::TILE_DATA)) { } Tile::Tile( @@ -144,60 +155,43 @@ Tile::Tile( const unsigned int zipped_coords_dim_num, const uint64_t size, void* filtered_data, - uint64_t filtered_size) - : TileBase(format_version, type, cell_size, size) + uint64_t filtered_size, + tdb::pmr::memory_resource* resource) + : TileBase(format_version, type, cell_size, size, resource) , zipped_coords_dim_num_(zipped_coords_dim_num) , filtered_data_(filtered_data) , filtered_size_(filtered_size) { } -Tile::Tile(Tile&& tile) - : TileBase(std::move(tile)) - , zipped_coords_dim_num_(std::move(tile.zipped_coords_dim_num_)) - , filtered_data_(std::move(tile.filtered_data_)) - , filtered_size_(std::move(tile.filtered_size_)) { -} - -Tile& Tile::operator=(Tile&& tile) { - // Swap with the argument - swap(tile); - - return *this; -} - WriterTile::WriterTile( const format_version_t format_version, const Datatype type, const uint64_t cell_size, - const uint64_t size) - : TileBase(format_version, type, cell_size, size) + const uint64_t size, + shared_ptr memory_tracker) + : TileBase( + format_version, + type, + cell_size, + size, + memory_tracker->get_resource(MemoryType::TILE_WRITER_DATA)) , filtered_buffer_(0) { } -WriterTile::WriterTile(WriterTile&& tile) - : TileBase(std::move(tile)) - , filtered_buffer_(std::move(tile.filtered_buffer_)) { -} - -WriterTile& WriterTile::operator=(WriterTile&& tile) { - // Swap with the argument - swap(tile); - - return *this; +WriterTile::WriterTile( + const format_version_t format_version, + const Datatype type, + const uint64_t cell_size, + const uint64_t size, + tdb::pmr::memory_resource* resource) + : TileBase(format_version, type, cell_size, size, resource) + , filtered_buffer_(0) { } /* ****************************** */ /* API */ /* ****************************** */ -void TileBase::swap(TileBase& tile) { - std::swap(size_, tile.size_); - std::swap(data_, tile.data_); - std::swap(cell_size_, tile.cell_size_); - std::swap(format_version_, tile.format_version_); - std::swap(type_, tile.type_); -} - void TileBase::read( void* const buffer, const uint64_t offset, const uint64_t nbytes) const { if (nbytes > size_ - offset) { @@ -256,13 +250,6 @@ uint64_t Tile::load_offsets_chunk_data(ChunkData& chunk_data) { return load_chunk_data(chunk_data, s - 8); } -void Tile::swap(Tile& tile) { - TileBase::swap(tile); - std::swap(filtered_data_, tile.filtered_data_); - std::swap(filtered_size_, tile.filtered_size_); - std::swap(zipped_coords_dim_num_, tile.zipped_coords_dim_num_); -} - void WriterTile::clear_data() { data_ = nullptr; size_ = 0; @@ -274,23 +261,21 @@ void WriterTile::write_var(const void* data, uint64_t offset, uint64_t nbytes) { while (new_alloc_size < offset + nbytes) new_alloc_size *= 2; - auto new_data = - static_cast(tdb_realloc(data_.release(), new_alloc_size)); + auto new_data = tdb::pmr::make_unique(resource_, new_alloc_size); + if (new_data == nullptr) { throw TileException("Cannot reallocate buffer; Memory allocation failed"); } - data_.reset(new_data); + + std::memcpy(new_data.get(), data_.get(), std::min(size_, new_alloc_size)); + + data_ = std::move(new_data); size_ = new_alloc_size; } write(data, offset, nbytes); } -void WriterTile::swap(WriterTile& tile) { - TileBase::swap(tile); - std::swap(filtered_buffer_, tile.filtered_buffer_); -} - /* ********************************* */ /* PRIVATE FUNCTIONS */ /* ********************************* */ diff --git a/tiledb/sm/tile/tile.h b/tiledb/sm/tile/tile.h index 0bfcd906d4e3..cfd1bea986c2 100644 --- a/tiledb/sm/tile/tile.h +++ b/tiledb/sm/tile/tile.h @@ -34,6 +34,7 @@ #define TILEDB_TILE_H #include "tiledb/common/common.h" +#include "tiledb/common/pmr.h" #include "tiledb/common/status.h" #include "tiledb/sm/array_schema/attribute.h" #include "tiledb/sm/tile/filtered_buffer.h" @@ -46,6 +47,8 @@ using namespace tiledb::common; namespace tiledb { namespace sm { +class MemoryTracker; + /** * Base class for common code between Tile and WriterTile objects. */ @@ -58,20 +61,17 @@ class TileBase { * @param type The data type. * @param cell_size The cell size. * @param size The size of the tile. + * @param resource The memory resource to use. */ TileBase( const format_version_t format_version, const Datatype type, const uint64_t cell_size, - const uint64_t size); - - /** Move constructor. */ - TileBase(TileBase&& tile); - - /** Move-assign operator. */ - TileBase& operator=(TileBase&& tile); + const uint64_t size, + tdb::pmr::memory_resource* resource); DISABLE_COPY_AND_COPY_ASSIGN(TileBase); + DISABLE_MOVE_AND_MOVE_ASSIGN(TileBase); /* ********************************* */ /* API */ @@ -139,21 +139,16 @@ class TileBase { data_as()[size_ / cell_size_ - 1] = var_tile.size(); } - /** Swaps the contents (all field values) of this tile with the given tile. */ - void swap(TileBase& tile); - protected: /* ********************************* */ /* PROTECTED ATTRIBUTES */ /* ********************************* */ - /** - * The buffer backing the tile data. - * - * TODO: Convert to regular allocations once tdb_realloc is not used for var - * size data anymore and remove custom deleter. - */ - std::unique_ptr data_; + /** The memory resource to use. */ + tdb::pmr::memory_resource* resource_; + + /** The buffer backing the tile data. */ + tdb::pmr::unique_ptr data_; /** Size of the data. */ uint64_t size_; @@ -179,7 +174,8 @@ class Tile : public TileBase { * * @param tile_size to be provided to init_unfiltered call */ - static Tile from_generic(storage_size_t tile_size); + static shared_ptr from_generic( + storage_size_t tile_size, shared_ptr memory_tracker); /* ********************************* */ /* CONSTRUCTORS & DESTRUCTORS */ @@ -204,14 +200,33 @@ class Tile : public TileBase { const unsigned int zipped_coords_dim_num, const uint64_t size, void* filtered_data, - uint64_t filtered_size); + uint64_t filtered_size, + shared_ptr memory_tracker); - /** Move constructor. */ - Tile(Tile&& tile); - - /** Move-assign operator. */ - Tile& operator=(Tile&& tile); + /** + * Constructor. + * + * @param format_version The format version. + * @param type The data type. + * @param cell_size The cell size. + * @param zipped_coords_dim_num The number of dimensions in case the tile + * stores coordinates. + * @param size The size of the tile. + * @param filtered_data Pointer to the external filtered data. + * @param filtered_size The filtered size to allocate. + * @param resource The memory resource to use. + */ + Tile( + const format_version_t format_version, + const Datatype type, + const uint64_t cell_size, + const unsigned int zipped_coords_dim_num, + const uint64_t size, + void* filtered_data, + uint64_t filtered_size, + tdb::pmr::memory_resource* resource); + DISABLE_MOVE_AND_MOVE_ASSIGN(Tile); DISABLE_COPY_AND_COPY_ASSIGN(Tile); /* ********************************* */ @@ -284,9 +299,6 @@ class Tile : public TileBase { */ uint64_t load_offsets_chunk_data(ChunkData& chunk_data); - /** Swaps the contents (all field values) of this tile with the given tile. */ - void swap(Tile& tile); - private: /* ********************************* */ /* PRIVATE FUNCTIONS */ @@ -354,8 +366,10 @@ class WriterTile : public TileBase { * generic data storage. * * @param tile_size to be provided to init_unfiltered call + * @param memory_tracker The memory tracker to use. */ - static WriterTile from_generic(storage_size_t tile_size); + static shared_ptr from_generic( + storage_size_t tile_size, shared_ptr memory_tracker); /** * Computes the chunk size for a tile. @@ -385,20 +399,33 @@ class WriterTile : public TileBase { * @param type The data type. * @param cell_size The cell size. * @param size The size of the tile. + * @param meory_tracker The memory tracker to use. */ WriterTile( const format_version_t format_version, const Datatype type, const uint64_t cell_size, - const uint64_t size); - - /** Move constructor. */ - WriterTile(WriterTile&& tile); + const uint64_t size, + shared_ptr memory_tracker); - /** Move-assign operator. */ - WriterTile& operator=(WriterTile&& tile); + /** + * Constructor. + * + * @param format_version The format version. + * @param type The data type. + * @param cell_size The cell size. + * @param size The size of the tile. + * @param resource The memory resource to use. + */ + WriterTile( + const format_version_t format_version, + const Datatype type, + const uint64_t cell_size, + const uint64_t size, + tdb::pmr::memory_resource* resource); DISABLE_COPY_AND_COPY_ASSIGN(WriterTile); + DISABLE_MOVE_AND_MOVE_ASSIGN(WriterTile); /* ********************************* */ /* API */ @@ -445,9 +472,6 @@ class WriterTile : public TileBase { size_ = size; } - /** Swaps the contents (all field values) of this tile with the given tile. */ - void swap(WriterTile& tile); - private: /* ********************************* */ /* PRIVATE ATTRIBUTES */ @@ -473,13 +497,13 @@ class WriterTile : public TileBase { */ class TileDeserializer : public Deserializer { public: - explicit TileDeserializer(Tile&& tile) - : Deserializer(tile.data(), tile.size()) - , tile_(std::move(tile)) { + explicit TileDeserializer(shared_ptr tile) + : Deserializer(tile->data(), tile->size()) + , tile_(tile) { } private: - Tile tile_; + shared_ptr tile_; }; } // namespace sm diff --git a/tiledb/sm/tile/writer_tile_tuple.cc b/tiledb/sm/tile/writer_tile_tuple.cc index dcd330b99b6d..9ce07d20f95c 100644 --- a/tiledb/sm/tile/writer_tile_tuple.cc +++ b/tiledb/sm/tile/writer_tile_tuple.cc @@ -48,60 +48,39 @@ WriterTileTuple::WriterTileTuple( const bool var_size, const bool nullable, const uint64_t cell_size, - const Datatype type) - : fixed_tile_( - var_size ? WriterTile( - array_schema.write_version(), - constants::cell_var_offset_type, - constants::cell_var_offset_size, - cell_num_per_tile * constants::cell_var_offset_size) : - WriterTile( - array_schema.write_version(), - type, - cell_size, - cell_num_per_tile * cell_size)) - , var_tile_( - var_size ? std::optional(WriterTile( - array_schema.write_version(), - type, - datatype_size(type), - cell_num_per_tile * constants::cell_var_offset_size)) : - std::nullopt) - , validity_tile_( - nullable ? std::optional(WriterTile( - array_schema.write_version(), - constants::cell_validity_type, - constants::cell_validity_size, - cell_num_per_tile * constants::cell_validity_size)) : - std::nullopt) + const Datatype type, + shared_ptr memory_tracker) + : memory_tracker_(memory_tracker) + , fixed_tile_( + array_schema.write_version(), + var_size ? constants::cell_var_offset_type : type, + var_size ? constants::cell_var_offset_size : cell_size, + var_size ? cell_num_per_tile * constants::cell_var_offset_size : + cell_num_per_tile * cell_size, + memory_tracker_) , cell_size_(cell_size) , var_pre_filtered_size_(0) , min_size_(0) , max_size_(0) , null_count_(0) , cell_num_(cell_num_per_tile) { -} - -WriterTileTuple::WriterTileTuple(WriterTileTuple&& tile) - : fixed_tile_(std::move(tile.fixed_tile_)) - , var_tile_(std::move(tile.var_tile_)) - , validity_tile_(std::move(tile.validity_tile_)) - , cell_size_(std::move(tile.cell_size_)) - , var_pre_filtered_size_(std::move(tile.var_pre_filtered_size_)) - , min_(std::move(tile.min_)) - , min_size_(std::move(tile.min_size_)) - , max_(std::move(tile.max_)) - , max_size_(std::move(tile.max_size_)) - , sum_(std::move(tile.sum_)) - , null_count_(std::move(tile.null_count_)) - , cell_num_(std::move(tile.cell_num_)) { -} - -WriterTileTuple& WriterTileTuple::operator=(WriterTileTuple&& tile) { - // Swap with the argument - swap(tile); + if (var_size) { + var_tile_.emplace( + array_schema.write_version(), + type, + datatype_size(type), + cell_num_per_tile * constants::cell_var_offset_size, + memory_tracker_); + } - return *this; + if (nullable) { + validity_tile_.emplace( + array_schema.write_version(), + constants::cell_validity_type, + constants::cell_validity_size, + cell_num_per_tile * constants::cell_validity_size, + memory_tracker_); + } } /* ****************************** */ @@ -135,20 +114,5 @@ void WriterTileTuple::set_metadata( } } -void WriterTileTuple::swap(WriterTileTuple& tile) { - std::swap(fixed_tile_, tile.fixed_tile_); - std::swap(var_tile_, tile.var_tile_); - std::swap(validity_tile_, tile.validity_tile_); - std::swap(cell_size_, tile.cell_size_); - std::swap(var_pre_filtered_size_, tile.var_pre_filtered_size_); - std::swap(min_, tile.min_); - std::swap(min_size_, tile.min_size_); - std::swap(max_, tile.max_); - std::swap(max_size_, tile.max_size_); - std::swap(sum_, tile.sum_); - std::swap(null_count_, tile.null_count_); - std::swap(cell_num_, tile.cell_num_); -} - } // namespace sm } // namespace tiledb diff --git a/tiledb/sm/tile/writer_tile_tuple.h b/tiledb/sm/tile/writer_tile_tuple.h index 13364df1a669..850a17b9ca90 100644 --- a/tiledb/sm/tile/writer_tile_tuple.h +++ b/tiledb/sm/tile/writer_tile_tuple.h @@ -56,15 +56,11 @@ class WriterTileTuple { const bool var_size, const bool nullable, const uint64_t cell_size, - const Datatype type); - - /** Move constructor. */ - WriterTileTuple(WriterTileTuple&& tile); - - /** Move-assign operator. */ - WriterTileTuple& operator=(WriterTileTuple&& tile); + const Datatype type, + shared_ptr memory_tracker); DISABLE_COPY_AND_COPY_ASSIGN(WriterTileTuple); + DISABLE_MOVE_AND_MOVE_ASSIGN(WriterTileTuple); /* ********************************* */ /* API */ @@ -215,14 +211,14 @@ class WriterTileTuple { return cell_num_; } - /** Swaps the contents (all field values) of this tile with the given tile. */ - void swap(WriterTileTuple& tile); - private: /* ********************************* */ /* PRIVATE ATTRIBUTES */ /* ********************************* */ + /** The memory tracker. */ + shared_ptr memory_tracker_; + /** * Fixed data tile. Contains offsets for var size attribute/dimension and * the data itself in case of fixed sized attribute/dimension. diff --git a/tiledb/storage_format/uri/CMakeLists.txt b/tiledb/storage_format/uri/CMakeLists.txt index b0b7949accf7..e92ebae65092 100644 --- a/tiledb/storage_format/uri/CMakeLists.txt +++ b/tiledb/storage_format/uri/CMakeLists.txt @@ -31,7 +31,7 @@ include(object_library) # commence(object_library uri_format) this_target_sources(parse_uri.cc generate_uri.cc) - this_target_object_libraries(baseline fragment time vfs) + this_target_object_libraries(baseline fragment time uuid vfs) conclude(object_library) add_test_subdirectory() diff --git a/tiledb/storage_format/uri/generate_uri.cc b/tiledb/storage_format/uri/generate_uri.cc index d823936c13d2..fe0e0a9738aa 100644 --- a/tiledb/storage_format/uri/generate_uri.cc +++ b/tiledb/storage_format/uri/generate_uri.cc @@ -27,9 +27,9 @@ */ #include "tiledb/storage_format/uri/generate_uri.h" -#include "tiledb/common/random/random_label.h" #include "tiledb/sm/fragment/fragment_identifier.h" #include "tiledb/sm/misc/tdb_time.h" +#include "tiledb/sm/misc/uuid.h" #include "tiledb/storage_format/uri/parse_uri.h" #include @@ -42,6 +42,9 @@ std::string generate_timestamped_name( uint64_t timestamp_start, uint64_t timestamp_end, std::optional version) { + std::string uuid; + throw_if_not_ok(sm::uuid::generate_uuid(&uuid, false)); + if (timestamp_start > timestamp_end) { throw std::logic_error( "Error generating timestamped name; " @@ -49,8 +52,7 @@ std::string generate_timestamped_name( } std::stringstream ss; - ss << "/__" << timestamp_start << "_" << timestamp_end << "_" - << random_label(); + ss << "/__" << timestamp_start << "_" << timestamp_end << "_" << uuid; if (version.has_value()) { ss << "_" << version.value(); diff --git a/tiledb/type/range/range.h b/tiledb/type/range/range.h index 7009283094a4..c3011983a764 100644 --- a/tiledb/type/range/range.h +++ b/tiledb/type/range/range.h @@ -38,6 +38,7 @@ #include "tiledb/common/tag.h" #include "tiledb/sm/enums/datatype.h" +#include #include #include #include @@ -461,8 +462,8 @@ template < void crop_range(const Range& bounds, Range& range) { auto bounds_data = (const T*)bounds.data(); auto range_data = (T*)range.data(); - range_data[0] = std::max(bounds_data[0], range_data[0]); - range_data[1] = std::min(bounds_data[1], range_data[1]); + range_data[0] = std::clamp(range_data[0], bounds_data[0], bounds_data[1]); + range_data[1] = std::clamp(range_data[1], bounds_data[0], bounds_data[1]); }; /** diff --git a/tiledb/type/range/test/unit_crop_range.cc b/tiledb/type/range/test/unit_crop_range.cc index 46dfcf055b55..8234041d29ce 100644 --- a/tiledb/type/range/test/unit_crop_range.cc +++ b/tiledb/type/range/test/unit_crop_range.cc @@ -89,6 +89,16 @@ TEMPLATE_TEST_CASE( std::numeric_limits::max()}; test_crop_range(bounds, range, bounds); } + SECTION("Test crop outside lower bound") { + TestType range[2]{0, 0}; + TestType result[2]{1, 1}; + test_crop_range(bounds, range, result); + } + SECTION("Test crop outside upper bound") { + TestType range[2]{5, 6}; + TestType result[2]{4, 4}; + test_crop_range(bounds, range, result); + } } TEMPLATE_TEST_CASE( @@ -126,6 +136,16 @@ TEMPLATE_TEST_CASE( std::numeric_limits::max()}; test_crop_range(bounds, range, bounds); } + SECTION("Test crop outside lower bound") { + TestType range[2]{-6, -4}; + TestType result[2]{-2, -2}; + test_crop_range(bounds, range, result); + } + SECTION("Test crop outside upper bound") { + TestType range[2]{5, 6}; + TestType result[2]{2, 2}; + test_crop_range(bounds, range, result); + } } TEMPLATE_TEST_CASE( @@ -164,4 +184,14 @@ TEMPLATE_TEST_CASE( std::numeric_limits::infinity()}; test_crop_range(bounds, range, bounds); } + SECTION("Test crop outside lower bound") { + TestType range[2]{-60.1f, -40.3f}; + TestType result[2]{-10.5f, -10.5f}; + test_crop_range(bounds, range, result); + } + SECTION("Test crop outside upper bound") { + TestType range[2]{5.1f, 6.5f}; + TestType result[2]{3.33f, 3.33f}; + test_crop_range(bounds, range, result); + } } diff --git a/tools/src/commands/info_command.cc b/tools/src/commands/info_command.cc index eeb028e904eb..e181dd6a9948 100644 --- a/tools/src/commands/info_command.cc +++ b/tools/src/commands/info_command.cc @@ -131,7 +131,7 @@ void InfoCommand::print_tile_sizes() const { // Compute and report mean persisted tile sizes over all attributes. const auto& schema = array.array_schema_latest(); auto fragment_metadata = array.fragment_metadata(); - auto attributes = schema.attributes(); + auto& attributes = schema.attributes(); uint64_t total_persisted_size = 0, total_in_memory_size = 0; // Helper function for processing each attribute. diff --git a/vcpkg.json b/vcpkg.json index 19ed762ab250..fec72703d0e4 100644 --- a/vcpkg.json +++ b/vcpkg.json @@ -81,12 +81,5 @@ "libwebp" ] } - }, - "overrides": [ - { - "$note": "Remove this when the custom port for libmagic gets removed", - "name": "dirent", - "version": "1.23.2#3" - } - ] + } }