Skip to content

Commit

Permalink
Exported changes
Browse files Browse the repository at this point in the history
  • Loading branch information
TensorStore team authored and jbms committed Nov 1, 2024
1 parent b46a7b0 commit f25c988
Show file tree
Hide file tree
Showing 34 changed files with 725 additions and 137 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,11 @@ jobs:
shell: bash
run: |
echo "dir=$(pip cache dir)" >> $GITHUB_OUTPUT
- uses: actions/cache@v2
- uses: actions/cache@v4
with:
path: ${{ steps.pip-cache.outputs.dir }}
key: pip-${{ runner.os }}-buildwheel-${{ hashFiles('tools/ci/*_requirements.txt', 'third_party/pypa/*_requirements_frozen.txt') }}
- uses: actions/cache@v2
- uses: actions/cache@v4
with:
path: |
~/.cache/cibuildwheel_bazel_cache/cache/repos
Expand Down
93 changes: 47 additions & 46 deletions .github/workflows/docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,45 +7,46 @@ jobs:
strategy:
matrix:
python-version:
- '3.9'
- "3.12"
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
with:
# Need full history to determine version number.
fetch-depth: 0
- name: 'Set up Python ${{ matrix.python-version }}'
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: 'Configure bazel remote cache write credentials'
env:
BAZEL_CACHE_SERVICE_ACCOUNT_KEY: ${{ secrets.BAZEL_CACHE_SERVICE_ACCOUNT_KEY }}
run: python ./tools/ci/configure_bazel_remote_cache.py --bazelrc ~/ci_bazelrc docs
shell: bash
- name: Get pip cache dir
id: pip-cache
shell: bash
run: |
echo "dir=$(pip cache dir)" >> $GITHUB_OUTPUT
- uses: actions/cache@v2
with:
path: ${{ steps.pip-cache.outputs.dir }}
key: pip-${{ runner.os }}-docs-${{ matrix.python-version }}-${{ hashFiles('third_party/pypa/workspace.bzl') }}
- uses: actions/cache@v2
with:
path: |
~/.cache/bazel/_bazel_*/cache/repos
~/.cache/bazelisk
key: bazel-docs-${{ hashFiles('.bazelversion', 'WORKSPACE', 'external.bzl', 'third_party/**') }}
- name: Build documentation
run: CC=gcc-10 python -u bazelisk.py --bazelrc ~/ci_bazelrc run --announce_rc --show_timestamps --keep_going --color=yes --verbose_failures //docs:build_docs -- --output docs_output
shell: bash
- name: Upload docs as artifact
uses: actions/upload-artifact@v4
with:
name: docs
path: docs_output
- uses: actions/checkout@v4
with:
# Need full history to determine version number.
fetch-depth: 0
- name: "Set up Python ${{ matrix.python-version }}"
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: "Configure bazel remote cache write credentials"
env:
BAZEL_CACHE_SERVICE_ACCOUNT_KEY: ${{ secrets.BAZEL_CACHE_SERVICE_ACCOUNT_KEY }}
run: python ./tools/ci/configure_bazel_remote_cache.py --bazelrc ~/ci_bazelrc docs
shell: bash
- name: Get pip cache dir
id: pip-cache
shell: bash
run: |
echo "dir=$(pip cache dir)" >> $GITHUB_OUTPUT
- uses: actions/cache@v4
with:
path: ${{ steps.pip-cache.outputs.dir }}
key: pip-${{ runner.os }}-docs-${{ matrix.python-version }}-${{ hashFiles('third_party/pypa/workspace.bzl') }}
- uses: actions/cache@v4
with:
path: |
~/.cache/bazel/_bazel_*/cache/repos
~/.cache/bazelisk
key: bazel-docs-${{ hashFiles('.bazelversion', 'WORKSPACE', 'external.bzl', 'third_party/**') }}
- name: Build documentation
run: CC=gcc-10 python -u bazelisk.py --bazelrc ~/ci_bazelrc run --announce_rc --show_timestamps --keep_going --color=yes --verbose_failures //docs:build_docs -- --output docs_output
shell: bash
- run: zip -r docs_output.zip docs_output
- name: Upload docs as artifact
uses: actions/upload-artifact@v4
with:
name: docs
path: docs_output.zip

publish-docs:
# Only publish package on push to tag or default branch.
Expand All @@ -54,12 +55,12 @@ jobs:
needs:
- build-docs
steps:
- uses: actions/download-artifact@v4
with:
name: docs
path: docs_output
- name: Publish to gh-pages
uses: peaceiris/actions-gh-pages@bbdfb200618d235585ad98e965f4aafc39b4c501 # v3.7.3 (2020-10-20)
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
publish_dir: ./docs_output
- uses: actions/download-artifact@v4
with:
name: docs
- run: unzip docs_output.zip
- name: Publish to gh-pages
uses: peaceiris/actions-gh-pages@bbdfb200618d235585ad98e965f4aafc39b4c501 # v3.7.3 (2020-10-20)
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
publish_dir: ./docs_output
79 changes: 49 additions & 30 deletions docs/context_schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,50 +17,69 @@ properties:
:literal:`<resource-type>`. The resource specification must be compatible with
:literal:`<resource-type>`.
examples:
- "cache_pool":
total_bytes_limit: 10000000
"cache_pool#remote":
total_bytes_limit: 100000000
"data_copy_concurrency":
limit: 8
- "cache_pool":
total_bytes_limit: 10000000
"cache_pool#remote":
total_bytes_limit: 100000000
"data_copy_concurrency":
limit: 8
definitions:
resource:
$id: ContextResource
description: |-
Specifies a context resource of a particular :literal:`<resource-type>`.
oneOf:
- oneOf:
- type: object
- type: boolean
- type: number
description: |-
Specifies the resource directly. Any constraints on the value are
determined by the particular :literal:`<resource-type>`.
- type: string
description: |-
References another resource of the same type in the current or parent
context using the syntax ``"<resource-type>"`` or
``"<resource-type>#<id>"``, where :literal:`<resource-type>`
matches the type of this resource.
- type: 'null'
description: |-
Specifies a new instance of the default resource of the given
:literal:`<resource-type>`. Only valid within a `Context` specification.
- oneOf:
- type: object
- type: boolean
- type: number
description: |-
Specifies the resource directly. Any constraints on the value are
determined by the particular :literal:`<resource-type>`.
- type: string
description: |-
References another resource of the same type in the current or parent
context using the syntax ``"<resource-type>"`` or
``"<resource-type>#<id>"``, where :literal:`<resource-type>`
matches the type of this resource.
- type: "null"
description: |-
Specifies a new instance of the default resource of the given
:literal:`<resource-type>`. Only valid within a `Context` specification.
cache_pool:
$id: Context.cache_pool
description: |-
Specifies the size of an in-memory Least Recently Used (LRU) cache. Each
:literal:`cache_pool` resource specifies a separate memory pool.
type: object
properties:
disabled:
type: boolean
default: false
title: |
May be set to ``true`` to disable the cache entirely.
description: |
If set to ``true``, no other properties may be specified. Compared to
setting `.total_bytes_limit` to ``0``, multiple concurrent reads (e.g.
of the same chunk of an array) won't be coalesced.
total_bytes_limit:
type: integer
minimum: 0
description: |-
Soft limit on the total number of bytes in the cache. The
least-recently used data that is not in use is evicted from the cache
when this limit is reached.
title: |-
Soft limit on the total number of bytes in the cache.
description: |
The least-recently used data that is not in use is evicted from the
cache when this limit is reached. In-use data remains cached
regardless of the limit.
default: 0
queued_for_writeback_bytes_limit:
type: integer
minimum: 0
description: |-
Soft limit on the total number of bytes of data pending writeback.
Writeback is initated on the least-recently used data that is pending
writeback when this limit is reached. Defaults to half of
`.total_bytes_limit`.
data_copy_concurrency:
$id: Context.data_copy_concurrency
description: |-
Expand All @@ -70,9 +89,9 @@ definitions:
properties:
limit:
oneOf:
- type: integer
minimum: 1
- const: "shared"
- type: integer
minimum: 1
- const: "shared"
description: |-
The maximum number of CPU cores that may be used. If the special
value of ``"shared"`` is specified, a shared global limit equal to the
Expand Down
57 changes: 40 additions & 17 deletions tensorstore/driver/driver_testutil.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include <algorithm>
#include <atomic>
#include <cassert>
#include <iostream>
#include <map>
#include <memory>
#include <optional>
Expand Down Expand Up @@ -582,9 +583,13 @@ absl::Status TestDriverWriteReadChunks(
absl::BitGenRef gen, const TestDriverWriteReadChunksOptions& options) {
Context context(options.context_spec);
const auto is_write = options.total_write_bytes != 0;
tensorstore::OpenMode open_mode = is_write
? tensorstore::OpenMode::open_or_create
: tensorstore::OpenMode::open;
tensorstore::OpenMode open_mode =
is_write
? (options.delete_existing ? (tensorstore::OpenMode::create |
tensorstore::OpenMode::delete_existing)
: (tensorstore::OpenMode::open |
tensorstore::OpenMode::create))
: tensorstore::OpenMode::open;

tensorstore::ReadWriteMode read_write_mode =
is_write ? tensorstore::ReadWriteMode::read_write
Expand Down Expand Up @@ -629,11 +634,20 @@ absl::Status TestDriverWriteReadChunks(
ABSL_LOG(INFO) << "read/write shape " << span(chunk_shape, ts.rank());
ABSL_LOG(INFO) << "Starting writes: " << options.repeat_writes
<< ", total_write_bytes=" << options.total_write_bytes;

auto result_callback = options.result_callback;
if (!result_callback) {
result_callback =
[](const TestDriverWriteReadChunksOptions::Results& results) {
ABSL_LOG(INFO) << results.FormatSummary();
return absl::OkStatus();
};
}
for (int64_t i = 0; i < options.repeat_writes; i++) {
TENSORSTORE_RETURN_IF_ERROR(
TestDriverReadOrWriteChunks(gen, ts, span(chunk_shape, ts.rank()),
options.total_write_bytes, options.strategy,
/*read=*/false));
/*read=*/false, result_callback));
}

ABSL_LOG(INFO) << "Starting reads: " << options.repeat_reads
Expand All @@ -642,7 +656,7 @@ absl::Status TestDriverWriteReadChunks(
TENSORSTORE_RETURN_IF_ERROR(
TestDriverReadOrWriteChunks(gen, ts, span(chunk_shape, ts.rank()),
options.total_read_bytes, options.strategy,
/*read=*/true));
/*read=*/true, result_callback));
}
return absl::OkStatus();
}
Expand Down Expand Up @@ -696,10 +710,22 @@ void ForEachChunk(BoxView<> domain, DataType dtype, absl::BitGenRef gen,

} // namespace

std::string TestDriverWriteReadChunksOptions::Results::FormatSummary() const {
auto elapsed_s = absl::FDivDuration(elapsed_time, absl::Seconds(1));
double bytes_mb = static_cast<double>(total_bytes) / 1e6;

return absl::StrFormat(
"%s summary: %d bytes in %.0f ms: %.3f MB/second (%d chunks of %d "
"bytes)",
(read ? "Read" : "Write"), total_bytes, elapsed_s * 1e3,
bytes_mb / elapsed_s, num_chunks, chunk_bytes);
}

absl::Status TestDriverReadOrWriteChunks(
absl::BitGenRef gen, tensorstore::TensorStore<> ts,
span<const Index> chunk_shape, int64_t total_bytes,
TestDriverWriteReadChunksOptions::Strategy strategy, bool read) {
TestDriverWriteReadChunksOptions::Strategy strategy, bool read,
const TestDriverWriteReadChunksOptions::ResultCallback& result_callback) {
if (total_bytes == 0) return absl::OkStatus();

if (total_bytes < 0) {
Expand Down Expand Up @@ -742,18 +768,15 @@ absl::Status TestDriverReadOrWriteChunks(
op.future.Wait();
TENSORSTORE_RETURN_IF_ERROR(op.future.result());

auto elapsed_s =
absl::FDivDuration(absl::Now() - start_time, absl::Seconds(1));
double bytes_mb = static_cast<double>(bytes_completed.load()) / 1e6;

ABSL_LOG(INFO)
<< (read ? "Read" : "Write") << " summary: "
<< absl::StrFormat(
"%d bytes in %.0f ms: %.3f MB/second (%d chunks of %d bytes)",
bytes_completed.load(), elapsed_s * 1e3, bytes_mb / elapsed_s,
chunks_completed.load(), chunk_bytes);
TestDriverWriteReadChunksOptions::Results results;
results.chunk_shape = chunk_shape;
results.total_bytes = bytes_completed.load();
results.chunk_bytes = chunk_bytes;
results.num_chunks = chunks_completed.load();
results.elapsed_time = absl::Now() - start_time;
results.read = read;

return absl::OkStatus();
return result_callback(results);
}

void RegisterTensorStoreDriverBasicFunctionalityTest(
Expand Down
22 changes: 21 additions & 1 deletion tensorstore/driver/driver_testutil.h
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,25 @@ struct TestDriverWriteReadChunksOptions {

// Number of times to repeat the writes.
int64_t repeat_writes = 1;

// Delete existing data before writing.
bool delete_existing = true;

struct Results {
span<const Index> chunk_shape;
int64_t total_bytes;
int64_t chunk_bytes;
int64_t num_chunks;
absl::Duration elapsed_time;
bool read;

std::string FormatSummary() const;
};

using ResultCallback = std::function<absl::Status(const Results& results)>;

// Callback to invoke instead of logging results.
ResultCallback result_callback;
};

// Tests concurrently reading and/or writing multiple chunks.
Expand All @@ -296,7 +315,8 @@ absl::Status TestDriverWriteReadChunks(
absl::Status TestDriverReadOrWriteChunks(
absl::BitGenRef gen, tensorstore::TensorStore<> ts,
span<const Index> chunk_shape, int64_t total_bytes,
TestDriverWriteReadChunksOptions::Strategy strategy, bool read);
TestDriverWriteReadChunksOptions::Strategy strategy, bool read,
const TestDriverWriteReadChunksOptions::ResultCallback& result_callback);

void TestTensorStoreCreateWithSchemaImpl(::nlohmann::json json_spec,
const Schema& schema);
Expand Down
Loading

0 comments on commit f25c988

Please sign in to comment.