Skip to content

Commit

Permalink
Enable the user to set arbitrary fill values to attributes. The fill …
Browse files Browse the repository at this point in the history
…values are used when empty attribute values are retrieved during a read query. This can happen either (1) in the case of a dense array with "empty" regions, or (2) any array with missing attributes. Note that the latter (i.e., enabling writes with a subset of attributes) will be supported in a subsequent PR.
  • Loading branch information
stavrospapadopoulos committed Jun 7, 2020
1 parent 732a618 commit 9ab9f5f
Show file tree
Hide file tree
Showing 21 changed files with 1,014 additions and 30 deletions.
12 changes: 12 additions & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@

## Breaking behavior

* Empty dense arrays now return cells with fill values.

## New features

## Improvements
Expand All @@ -14,8 +16,18 @@

## Bug fixes

* Fixed bug in setting a fill value for var-sized attributes.

## API additions

### C API

* Added functions `tiledb_attribute_{set,get}_fill_value` to get/set default fill values

### C++ API

* Added functions `Attribute::{set,get}_fill_value` to get/set default fill values

# TileDB v2.0.2 Release Notes

## Bug fixes
Expand Down
4 changes: 4 additions & 0 deletions doc/source/c-api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,10 @@ Attribute
:project: TileDB-C
.. doxygenfunction:: tiledb_attribute_dump
:project: TileDB-C
.. doxygenfunction:: tiledb_attribute_set_fill_value
:project: TileDB-C
.. doxygenfunction:: tiledb_attribute_get_fill_value
:project: TileDB-C

Domain
------
Expand Down
2 changes: 2 additions & 0 deletions format_spec/array_schema.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,4 +67,6 @@ The attribute has internal format:
| Attribute datatype | `uint8_t` | Datatype of the attribute values |
| Cell val num | `uint32_t` | Number of attribute values per cell. For variable-length attributes, this is `std::numeric_limits<uint32_t>::max()` |
| Filters | [Filter Pipeline](./filter_pipeline.md) | The filter pipeline used on attribute value tiles |
| Fill value size | `uint64_t` | The size in bytes of the fill value |
| Fill value | `uint8_t[]` | The fill value |

2 changes: 2 additions & 0 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ set(TILEDB_TEST_SOURCES
src/unit-capi-empty-var-length.cc
src/unit-capi-enum_values.cc
src/unit-capi-error.cc
src/unit-capi-fill_values.cc
src/unit-capi-filter.cc
src/unit-capi-incomplete.cc
src/unit-capi-incomplete-2.cc
Expand Down Expand Up @@ -119,6 +120,7 @@ if (TILEDB_CPP_API)
src/unit-cppapi-consolidation.cc
src/unit-cppapi-consolidation-sparse.cc
src/unit-cppapi-datetimes.cc
src/unit-cppapi-fill_values.cc
src/unit-cppapi-filter.cc
src/unit-cppapi-metadata.cc
src/unit-cppapi-query.cc
Expand Down
8 changes: 5 additions & 3 deletions test/src/unit-capi-array.cc
Original file line number Diff line number Diff line change
Expand Up @@ -939,7 +939,7 @@ TEST_CASE_METHOD(
tiledb_query_free(&query);

// Check correctness
int buffer_read_c[] = {1, 2, 3, 4, 50, 60, 70, 8, 9, 10};
int32_t buffer_read_c[] = {1, 2, 3, 4, 50, 60, 70, 8, 9, 10};
CHECK(!std::memcmp(buffer_read, buffer_read_c, sizeof(buffer_read_c)));
CHECK(buffer_read_size == sizeof(buffer_read_c));

Expand Down Expand Up @@ -987,7 +987,8 @@ TEST_CASE_METHOD(
tiledb_query_free(&query);

// Check correctness
CHECK(buffer_read_size == 0); // Empty array
// Empty array still returns fill values
CHECK(buffer_read_size == 10 * sizeof(int32_t));

// ---- READ AT TIMESTAMP BEFORE UPDATE ----
buffer_read_size = sizeof(buffer_read);
Expand Down Expand Up @@ -1241,7 +1242,8 @@ TEST_CASE_METHOD(
tiledb_query_free(&query);

// Check correctness
CHECK(buffer_read_size == 0); // Empty array
// Empty array still returns fill values
CHECK(buffer_read_size == 10 * sizeof(int32_t));

// ---- READ AT THE WRITTEN TIMESTAMP ----
buffer_read_size = sizeof(buffer_read);
Expand Down
3 changes: 2 additions & 1 deletion test/src/unit-capi-array_schema.cc
Original file line number Diff line number Diff line change
Expand Up @@ -979,7 +979,8 @@ void ArraySchemaFx::load_and_check_array_schema(const std::string& path) {
"- Type: " + ATTR_TYPE_STR + "\n" +
"- Cell val num: " + CELL_VAL_NUM_STR + "\n" + "- Filters: 2\n" +
" > BZIP2: COMPRESSION_LEVEL=5\n" +
" > BitWidthReduction: BIT_WIDTH_MAX_WINDOW=1000\n";
" > BitWidthReduction: BIT_WIDTH_MAX_WINDOW=1000\n" +
"- Fill value: -2147483648\n";
FILE* gold_fout = fopen("gold_fout.txt", "w");
const char* dump = dump_str.c_str();
fwrite(dump, sizeof(char), strlen(dump), gold_fout);
Expand Down
2 changes: 1 addition & 1 deletion test/src/unit-capi-consolidation.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4103,7 +4103,7 @@ TEST_CASE_METHOD(
CHECK(status == TILEDB_COMPLETED);

// Check buffers
CHECK(a_size == 0);
CHECK(a_size == 410 * sizeof(int32_t));

// Close array
rc = tiledb_array_close(ctx_, array);
Expand Down
2 changes: 1 addition & 1 deletion test/src/unit-capi-error.cc
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@

#include <iostream>

TEST_CASE("C API: Test error and error message", "[capi], [error]") {
TEST_CASE("C API: Test error and error message", "[capi][error]") {
tiledb_ctx_t* ctx;
int rc = tiledb_ctx_alloc(nullptr, &ctx);
CHECK(rc == TILEDB_OK);
Expand Down
187 changes: 187 additions & 0 deletions test/src/unit-capi-fill_values.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
/**
* @file unit-capi-fill_values.cc
*
* @section LICENSE
*
* The MIT License
*
* @copyright Copyright (c) 2017-2020 TileDB Inc.
* @copyright Copyright (c) 2016 MIT and Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
* @section DESCRIPTION
*
* Tests the attribute fill values with the C API.
*/

#include "catch.hpp"
#include "tiledb/sm/c_api/tiledb.h"

#include <iostream>

void check_dump(
tiledb_ctx_t* ctx, tiledb_attribute_t* a, const std::string& gold_out) {
FILE* gold_fout = fopen("gold_fout.txt", "w");
fwrite(gold_out.c_str(), sizeof(char), gold_out.size(), gold_fout);
fclose(gold_fout);
FILE* fout = fopen("fout.txt", "w");
tiledb_attribute_dump(ctx, a, fout);
fclose(fout);
#ifdef _WIN32
CHECK(!system("FC gold_fout.txt fout.txt > nul"));
#else
CHECK(!system("diff gold_fout.txt fout.txt"));
#endif

// Clean up
tiledb_vfs_t* vfs;
tiledb_vfs_alloc(ctx, nullptr, &vfs);
CHECK(tiledb_vfs_remove_file(ctx, vfs, "gold_fout.txt") == TILEDB_OK);
CHECK(tiledb_vfs_remove_file(ctx, vfs, "fout.txt") == TILEDB_OK);
tiledb_vfs_free(&vfs);
}

TEST_CASE(
"C API: Test fill values, basic errors", "[capi][fill-values][basic]") {
int32_t value = 5;
uint64_t value_size = sizeof(int32_t);

tiledb_ctx_t* ctx;
int32_t rc = tiledb_ctx_alloc(nullptr, &ctx);
CHECK(rc == TILEDB_OK);

// Fixed-sized
tiledb_attribute_t* a;
rc = tiledb_attribute_alloc(ctx, "a", TILEDB_INT32, &a);
CHECK(rc == TILEDB_OK);

// Null value
rc = tiledb_attribute_set_fill_value(ctx, a, NULL, value_size);
CHECK(rc == TILEDB_ERR);

// Zero size
rc = tiledb_attribute_set_fill_value(ctx, a, &value, 0);
CHECK(rc == TILEDB_ERR);

// Wrong size
rc = tiledb_attribute_set_fill_value(ctx, a, &value, 100);
CHECK(rc == TILEDB_ERR);

// Get default
const void* value_ptr;
rc = tiledb_attribute_get_fill_value(ctx, a, &value_ptr, &value_size);
CHECK(rc == TILEDB_OK);
CHECK(*(const int32_t*)value_ptr == -2147483648);
CHECK(value_size == sizeof(int32_t));

// Check dump
std::string dump = std::string("### Attribute ###\n") + "- Name: a\n" +
"- Type: INT32\n" + "- Cell val num: 1\n" +
"- Filters: 0\n" + "- Fill value: -2147483648\n";
check_dump(ctx, a, dump);

// Correct setter
rc = tiledb_attribute_set_fill_value(ctx, a, &value, value_size);
CHECK(rc == TILEDB_OK);

// Get the set value
rc = tiledb_attribute_get_fill_value(ctx, a, &value_ptr, &value_size);
CHECK(rc == TILEDB_OK);
CHECK(*(const int32_t*)value_ptr == 5);
CHECK(value_size == sizeof(int32_t));

// Check dump
dump = std::string("### Attribute ###\n") + "- Name: a\n" +
"- Type: INT32\n" + "- Cell val num: 1\n" + "- Filters: 0\n" +
"- Fill value: 5\n";
check_dump(ctx, a, dump);

// Setting the cell val num, also sets the fill value to a new default
rc = tiledb_attribute_set_cell_val_num(ctx, a, 2);
CHECK(rc == TILEDB_OK);
rc = tiledb_attribute_get_fill_value(ctx, a, &value_ptr, &value_size);
CHECK(rc == TILEDB_OK);
CHECK(((const int32_t*)value_ptr)[0] == -2147483648);
CHECK(((const int32_t*)value_ptr)[1] == -2147483648);
CHECK(value_size == 2 * sizeof(int32_t));

// Check dump
dump = std::string("### Attribute ###\n") + "- Name: a\n" +
"- Type: INT32\n" + "- Cell val num: 2\n" + "- Filters: 0\n" +
"- Fill value: -2147483648, -2147483648\n";
check_dump(ctx, a, dump);

// Set a fill value that is comprised of two integers
int32_t value_2[2] = {1, 2};
rc = tiledb_attribute_set_fill_value(ctx, a, value_2, sizeof(value_2));
CHECK(rc == TILEDB_OK);

// Get the new value back
rc = tiledb_attribute_get_fill_value(ctx, a, &value_ptr, &value_size);
CHECK(rc == TILEDB_OK);
CHECK(((const int32_t*)value_ptr)[0] == 1);
CHECK(((const int32_t*)value_ptr)[1] == 2);
CHECK(value_size == 2 * sizeof(int32_t));

// Check dump
dump = std::string("### Attribute ###\n") + "- Name: a\n" +
"- Type: INT32\n" + "- Cell val num: 2\n" + "- Filters: 0\n" +
"- Fill value: 1, 2\n";
check_dump(ctx, a, dump);

// Make the attribute var-sized
rc = tiledb_attribute_set_cell_val_num(ctx, a, TILEDB_VAR_NUM);
CHECK(rc == TILEDB_OK);

// Check dump
dump = std::string("### Attribute ###\n") + "- Name: a\n" +
"- Type: INT32\n" + "- Cell val num: var\n" + "- Filters: 0\n" +
"- Fill value: -2147483648\n";
check_dump(ctx, a, dump);

// Get the default var-sized fill value
rc = tiledb_attribute_get_fill_value(ctx, a, &value_ptr, &value_size);
CHECK(rc == TILEDB_OK);
CHECK(*(const int32_t*)value_ptr == -2147483648);
CHECK(value_size == sizeof(int32_t));

// Set a new fill value for the var-sized attribute
int32_t value_3[3] = {1, 2, 3};
rc = tiledb_attribute_set_fill_value(ctx, a, value_3, sizeof(value_3));
CHECK(rc == TILEDB_OK);

// Get the new fill value
rc = tiledb_attribute_get_fill_value(ctx, a, &value_ptr, &value_size);
CHECK(rc == TILEDB_OK);
CHECK(((const int32_t*)value_ptr)[0] == 1);
CHECK(((const int32_t*)value_ptr)[1] == 2);
CHECK(((const int32_t*)value_ptr)[2] == 3);
CHECK(value_size == 3 * sizeof(int32_t));

// Check dump
dump = std::string("### Attribute ###\n") + "- Name: a\n" +
"- Type: INT32\n" + "- Cell val num: var\n" + "- Filters: 0\n" +
"- Fill value: 1, 2, 3\n";
check_dump(ctx, a, dump);

// Clean up
tiledb_ctx_free(&ctx);
tiledb_attribute_free(&a);
}
4 changes: 2 additions & 2 deletions test/src/unit-cppapi-array.cc
Original file line number Diff line number Diff line change
Expand Up @@ -783,7 +783,7 @@ TEST_CASE("C++ API: Open array at", "[cppapi][open-array-at]") {
query_r_at_0.submit();
array_r_at_0.close();
auto result = query_r_at_0.result_buffer_elements();
CHECK(result["a"].second == 0);
CHECK(result["a"].second == 4); // Empty arrays return fill values
CHECK(!std::equal(a_r_at_0.begin(), a_r_at_0.end(), a_w.begin()));

// Read from later timestamp
Expand Down Expand Up @@ -885,7 +885,7 @@ TEST_CASE(
query_r_at_0.submit();
array_r_at_0.close();
auto result = query_r_at_0.result_buffer_elements();
CHECK(result["a"].second == 0);
CHECK(result["a"].second == 4); // Empty arrays return fill values
CHECK(!std::equal(a_r_at_0.begin(), a_r_at_0.end(), a_w.begin()));

// Read from later timestamp
Expand Down
Loading

0 comments on commit 9ab9f5f

Please sign in to comment.