Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Filter pipeline support for datatype conversions based on filtered output datatype. #4165

Merged
merged 22 commits into from
Jul 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion format_spec/filter_pipeline.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,13 @@ The filter options are configuration parameters for the filters that do not chan

### Main Compressor Options

For the compression filters \(any of the filter types `TILEDB_FILTER_{GZIP,ZSTD,LZ4,RLE,BZIP2,DOUBLE_DELTA,DICTIONARY}`\) the filter options have internal format:
For the compression filters \(any of the filter types `TILEDB_FILTER_{GZIP,ZSTD,LZ4,RLE,BZIP2,DOUBLE_DELTA,DELTA,DICTIONARY}`\) the filter options have internal format:

| **Field** | **Type** | **Description** |
| :--- | :--- | :--- |
| Compressor type | `uint8_t` | Type of compression \(e.g. `TILEDB_BZIP2`\) |
| Compression level | `int32_t` | Compression level used \(ignored by some compressors\). |
| Reinterpret datatype | `uint8_t` | Type to reinterpret data prior to compression. Used for DOUBLE_DELTA and DELTA only. |

### Bit-width Reduction Options

Expand Down
45 changes: 39 additions & 6 deletions test/regression/targets/sc-24079.cc
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ std::string array_name = "cpp_unit_array_24079";
TEST_CASE(
"C++ API: DoubleDelta filter typecheck should account for output type of "
"FloatScaleFilter",
"[cppapi][filter][float-scaling][!shouldfail]") {
"[cppapi][filter][float-scaling]") {
tiledb::Context ctx;
tiledb::VFS vfs(ctx);

Expand All @@ -24,7 +24,7 @@ TEST_CASE(

// Create and initialize dimension.
auto d1 = tiledb::Dimension::create<float>(
ctx, "soma_joinid", {{domain_lo, domain_hi}}, 2048);
ctx, "d1", {{domain_lo, domain_hi}}, 2048);

tiledb::Filter float_scale(ctx, TILEDB_FILTER_SCALE_FLOAT);
double scale = 1.0f;
Expand All @@ -40,22 +40,55 @@ TEST_CASE(
tiledb::FilterList filters(ctx);
filters.add_filter(float_scale);
filters.add_filter(dd);
d1.set_filter_list(filters);

d1.set_filter_list(filters);
domain.add_dimension(d1);

auto a = tiledb::Attribute::create<float>(ctx, "A");
auto a1 = tiledb::Attribute::create<float>(ctx, "a1");
a1.set_filter_list(filters);

tiledb::ArraySchema schema(ctx, TILEDB_SPARSE);
schema.set_domain(domain);
schema.add_attribute(a);
schema.add_attribute(a1);
schema.set_capacity(100000);
schema.set_cell_order(TILEDB_ROW_MAJOR);
schema.set_tile_order(TILEDB_ROW_MAJOR);
CHECK_NOTHROW(tiledb::Array::create(array_name, schema));
std::vector<float> d1_data = {
1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f};
std::vector<float> a1_data = {
1.0f, 2.1f, 3.2f, 4.3f, 5.4f, 6.5f, 7.6f, 8.7f, 9.8f, 10.9f};

// Write to array.
{
tiledb::Array array(ctx, array_name, TILEDB_WRITE);
tiledb::Query query(ctx, array);
query.set_data_buffer("d1", d1_data);
query.set_data_buffer("a1", a1_data);
query.submit();
CHECK(tiledb::Query::Status::COMPLETE == query.query_status());
}

// Read from array.
{
std::vector<float> d1_read(10);
std::vector<float> a1_read(10);
tiledb::Array array(ctx, array_name, TILEDB_READ);
tiledb::Query query(ctx, array);
query.set_subarray({domain_lo, domain_hi});
query.set_data_buffer("a1", a1_read);
query.set_data_buffer("d1", d1_read);
query.submit();
CHECK(tiledb::Query::Status::COMPLETE == query.query_status());
CHECK(
std::vector<float>{
1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f} ==
a1_read);
CHECK(d1_data == d1_read);
}

// Cleanup.
if (vfs.is_dir(array_name)) {
vfs.remove_dir(array_name);
}
}
}
8 changes: 8 additions & 0 deletions test/src/unit-capi-sparse_array.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2486,6 +2486,14 @@ TEST_CASE_METHOD(
TILEDB_ROW_MAJOR,
TILEDB_COL_MAJOR);
}

SECTION("- delta compression, row/col-major") {
// TODO: refactor for each supported FS.
std::string temp_dir = fs_vec_[0]->temp_dir();
array_name = temp_dir + ARRAY;
check_sorted_reads(
array_name, TILEDB_FILTER_DELTA, TILEDB_ROW_MAJOR, TILEDB_COL_MAJOR);
}
}

TEST_CASE_METHOD(
Expand Down
17 changes: 4 additions & 13 deletions test/src/unit-cppapi-webp-filter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,10 @@ TEMPLATE_LIST_TEST_CASE(

// Create an invalid attribute for use with WebP filter.
auto invalid_attr = Attribute::create<TestType>(ctx, "rgb");
invalid_attr.set_filter_list(filterList);
REQUIRE_THROWS_WITH(
invalid_attr.set_filter_list(filterList),
Catch::Matchers::ContainsSubstring(
"Filter WEBP does not accept input type"));

// WebP filter requires exactly 2 dimensions for Y, X.
{
Expand Down Expand Up @@ -299,18 +302,6 @@ TEMPLATE_LIST_TEST_CASE(
"In dense arrays, all dimensions must have the same datatype"));
}

// WebP filter supports only uint8 attributes.
{
ArraySchema invalid_schema(ctx, TILEDB_DENSE);
invalid_schema.set_domain(valid_domain);

invalid_schema.add_attribute(invalid_attr);
REQUIRE_THROWS_WITH(
Array::create(webp_array_name, invalid_schema),
Catch::Matchers::ContainsSubstring(
"WebP filter supports only uint8 attributes"));
}

// WebP filter can only be applied to dense arrays.
{
ArraySchema invalid_schema(ctx, TILEDB_SPARSE);
Expand Down
Loading