Skip to content

Commit

Permalink
Percent-encode reserved characters in file names
Browse files Browse the repository at this point in the history
This bumps the format version from 7 to 8. For version 8, dimension and
attribute names are percent-encoded to RFC 3986 + a few extra to handle
reserved characters on Windows.

This is intended to be a temporary measure. In the future, we will include
a mapping between names and their ordered index in the array schema.
  • Loading branch information
Joe Maley committed Jan 27, 2021
1 parent 73c7cc4 commit 5defb05
Show file tree
Hide file tree
Showing 9 changed files with 344 additions and 15 deletions.
2 changes: 2 additions & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@

## Improvements

* Support for dimension/attribute names that contain commonly reserved filesystem characters [#2047](https://github.com/TileDB-Inc/TileDB/pull/2047)

## Deprecations

## Bug fixes
Expand Down
2 changes: 1 addition & 1 deletion format_spec/FORMAT_SPEC.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@


:information_source: **Notes:**
- The current TileDB format version number is **7** (`uint32_t`).
- The current TileDB format version number is **8** (`uint32_t`).
- All data written by TileDB and referenced in this document is **little-endian**.

## Table of Contents
Expand Down
1 change: 1 addition & 0 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ set(TILEDB_TEST_SOURCES
src/unit-capi-array.cc
src/unit-capi-array_schema.cc
src/unit-capi-async.cc
src/unit-capi-attributes.cc
src/unit-capi-buffer.cc
src/unit-capi-config.cc
src/unit-capi-consolidation.cc
Expand Down
272 changes: 272 additions & 0 deletions test/src/unit-capi-attributes.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,272 @@
/**
* @file unit-capi-attributes.cc
*
* @section LICENSE
*
* The MIT License
*
* @copyright Copyright (c) 2021 TileDB Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
* @section DESCRIPTION
*
* Tests of C API for attributes.
*/

#include "catch.hpp"
#include "tiledb/sm/c_api/tiledb.h"

#include <iostream>

#include "catch.hpp"
#include "test/src/helpers.h"
#include "test/src/vfs_helpers.h"
#ifdef _WIN32
#include <Windows.h>
#include "tiledb/sm/filesystem/win.h"
#else
#include "tiledb/sm/filesystem/posix.h"
#endif
#include "tiledb/sm/c_api/tiledb.h"
#include "tiledb/sm/misc/utils.h"

#include <chrono>
#include <iostream>
#include <sstream>
#include <thread>

using namespace tiledb::test;

struct Attributesfx {
// TileDB context
tiledb_ctx_t* ctx_;
tiledb_vfs_t* vfs_;

// Vector of supported filesystems
const std::vector<std::unique_ptr<SupportedFs>> fs_vec_;

// Functions
Attributesfx();
~Attributesfx();
void create_temp_dir(const std::string& path);
void remove_temp_dir(const std::string& path);
void create_dense_vector(
const std::string& path, const std::string& attr_name);
static std::string random_name(const std::string& prefix);
};

Attributesfx::Attributesfx()
: fs_vec_(vfs_test_get_fs_vec()) {
// Initialize vfs test
REQUIRE(vfs_test_init(fs_vec_, &ctx_, &vfs_).ok());
}

Attributesfx::~Attributesfx() {
// Close vfs test
REQUIRE(vfs_test_close(fs_vec_, ctx_, vfs_).ok());
tiledb_vfs_free(&vfs_);
tiledb_ctx_free(&ctx_);
}

void Attributesfx::create_temp_dir(const std::string& path) {
remove_temp_dir(path);
REQUIRE(tiledb_vfs_create_dir(ctx_, vfs_, path.c_str()) == TILEDB_OK);
}

void Attributesfx::remove_temp_dir(const std::string& path) {
int is_dir = 0;
REQUIRE(tiledb_vfs_is_dir(ctx_, vfs_, path.c_str(), &is_dir) == TILEDB_OK);
if (is_dir)
REQUIRE(tiledb_vfs_remove_dir(ctx_, vfs_, path.c_str()) == TILEDB_OK);
}

std::string Attributesfx::random_name(const std::string& prefix) {
std::stringstream ss;
ss << prefix << "-" << std::this_thread::get_id() << "-"
<< TILEDB_TIMESTAMP_NOW_MS;
return ss.str();
}

void Attributesfx::create_dense_vector(
const std::string& path, const std::string& attr_name) {
int rc;
int64_t dim_domain[] = {1, 10};
int64_t tile_extent = 2;

// Create domain
tiledb_domain_t* domain;
rc = tiledb_domain_alloc(ctx_, &domain);
REQUIRE(rc == TILEDB_OK);
tiledb_dimension_t* dim;
rc = tiledb_dimension_alloc(
ctx_, "d1", TILEDB_INT64, dim_domain, &tile_extent, &dim);
REQUIRE(rc == TILEDB_OK);
rc = tiledb_domain_add_dimension(ctx_, domain, dim);
REQUIRE(rc == TILEDB_OK);

tiledb_attribute_t* attr;
rc = tiledb_attribute_alloc(ctx_, attr_name.c_str(), TILEDB_INT32, &attr);
REQUIRE(rc == TILEDB_OK);

// Create array schema
tiledb_array_schema_t* array_schema;
rc = tiledb_array_schema_alloc(ctx_, TILEDB_DENSE, &array_schema);
REQUIRE(rc == TILEDB_OK);
rc = tiledb_array_schema_set_cell_order(ctx_, array_schema, TILEDB_ROW_MAJOR);
REQUIRE(rc == TILEDB_OK);
rc = tiledb_array_schema_set_tile_order(ctx_, array_schema, TILEDB_ROW_MAJOR);
REQUIRE(rc == TILEDB_OK);
rc = tiledb_array_schema_set_domain(ctx_, array_schema, domain);
REQUIRE(rc == TILEDB_OK);
rc = tiledb_array_schema_add_attribute(ctx_, array_schema, attr);
REQUIRE(rc == TILEDB_OK);

rc = tiledb_array_schema_check(ctx_, array_schema);
REQUIRE(rc == TILEDB_OK);

// Create array
rc = tiledb_array_create(ctx_, path.c_str(), array_schema);

REQUIRE(rc == TILEDB_OK);
tiledb_attribute_free(&attr);
tiledb_dimension_free(&dim);
tiledb_array_schema_free(&array_schema);
}

TEST_CASE_METHOD(
Attributesfx,
"C API: Test attributes with illegal filesystem characters in the name",
"[capi][attributes][illegal_name]") {
const std::vector<std::string> attr_names = {
"miles!hour", "miles#hour", "miles$hour", "miles%hour", "miles&hour",
"miles'hour", "miles(hour", "miles)hour", "miles*hour", "miles+hour",
"miles,hour", "miles/hour", "miles:hour", "miles;hour", "miles=hour",
"miles?hour", "miles@hour", "miles[hour", "miles]hour", "miles[hour",
"miles\"hour", "miles<hour", "miles>hour", "miles\\hour", "miles|hour"};

for (const auto& attr_name : attr_names) {
for (const auto& fs : fs_vec_) {
// Skip this test for Azure and Windows because they are broken for
// certain characters. The path to handle illegal characters is a
// temporary fix. We will get this working on Azure and Windows in the
// future.
bool skip = false;
if (dynamic_cast<SupportedFsAzure*>(fs.get()) != nullptr)
skip = true;
#ifdef _WIN32
if (dynamic_cast<SupportedFsLocal*>(fs.get()) != nullptr)
skip = true;
#endif
if (skip)
continue;

std::string temp_dir = fs->temp_dir();
std::string array_name = temp_dir + "array-illegal-char";

// Create new TileDB context with file lock config disabled, rest the
// same.
tiledb_ctx_free(&ctx_);
tiledb_vfs_free(&vfs_);

tiledb_config_t* config = nullptr;
tiledb_error_t* error = nullptr;
REQUIRE(tiledb_config_alloc(&config, &error) == TILEDB_OK);
REQUIRE(error == nullptr);

REQUIRE(vfs_test_init(fs_vec_, &ctx_, &vfs_, config).ok());

tiledb_config_free(&config);

create_temp_dir(temp_dir);

create_dense_vector(array_name, attr_name);

// Prepare cell buffers
int buffer_a1[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
uint64_t buffer_a1_size = sizeof(buffer_a1);

// Open array
int64_t subarray[] = {1, 10};
tiledb_array_t* array;
int rc = tiledb_array_alloc(ctx_, array_name.c_str(), &array);
CHECK(rc == TILEDB_OK);
rc = tiledb_array_open(ctx_, array, TILEDB_WRITE);
CHECK(rc == TILEDB_OK);

// Submit query
tiledb_query_t* query;
rc = tiledb_query_alloc(ctx_, array, TILEDB_WRITE, &query);
CHECK(rc == TILEDB_OK);
rc = tiledb_query_set_layout(ctx_, query, TILEDB_GLOBAL_ORDER);
CHECK(rc == TILEDB_OK);
rc = tiledb_query_set_subarray(ctx_, query, subarray);
CHECK(rc == TILEDB_OK);
rc = tiledb_query_set_buffer(
ctx_, query, attr_name.c_str(), buffer_a1, &buffer_a1_size);
CHECK(rc == TILEDB_OK);

rc = tiledb_query_submit(ctx_, query);
CHECK(rc == TILEDB_OK);
rc = tiledb_query_finalize(ctx_, query);
CHECK(rc == TILEDB_OK);

// Close array and clean up
rc = tiledb_array_close(ctx_, array);
CHECK(rc == TILEDB_OK);
tiledb_array_free(&array);
tiledb_query_free(&query);

int buffer_read[10];
uint64_t buffer_read_size = sizeof(buffer_read);

// Open array
rc = tiledb_array_alloc(ctx_, array_name.c_str(), &array);
CHECK(rc == TILEDB_OK);
rc = tiledb_array_open(ctx_, array, TILEDB_READ);
CHECK(rc == TILEDB_OK);

// Submit query
rc = tiledb_query_alloc(ctx_, array, TILEDB_READ, &query);
CHECK(rc == TILEDB_OK);
rc = tiledb_query_set_layout(ctx_, query, TILEDB_ROW_MAJOR);
CHECK(rc == TILEDB_OK);
rc = tiledb_query_set_subarray(ctx_, query, subarray);
CHECK(rc == TILEDB_OK);
rc = tiledb_query_set_buffer(
ctx_, query, attr_name.c_str(), buffer_read, &buffer_read_size);
CHECK(rc == TILEDB_OK);
rc = tiledb_query_submit(ctx_, query);
CHECK(rc == TILEDB_OK);

// Close array and clean up
rc = tiledb_array_close(ctx_, array);
CHECK(rc == TILEDB_OK);
tiledb_array_free(&array);
tiledb_query_free(&query);

// Check correctness
int buffer_read_c[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
CHECK(!std::memcmp(buffer_read, buffer_read_c, sizeof(buffer_read_c)));
CHECK(buffer_read_size == sizeof(buffer_read_c));

remove_temp_dir(temp_dir);
}
}
}
12 changes: 6 additions & 6 deletions test/src/unit-capi-fragment_info.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1001,16 +1001,16 @@ TEST_CASE("C API: Test fragment info, dump", "[capi][fragment_info][dump]") {
"- Fragment #1:\n" + " > URI: " + written_frag_uri_1 + "\n" +
" > Type: dense\n" + " > Non-empty domain: [1, 6]\n" +
" > Size: 1584\n" + " > Cell num: 10\n" +
" > Timestamp range: [1, 1]\n" + " > Format version: 7\n" +
" > Timestamp range: [1, 1]\n" + " > Format version: 8\n" +
" > Has consolidated metadata: no\n" + "- Fragment #2:\n" +
" > URI: " + written_frag_uri_2 + "\n" + " > Type: sparse\n" +
" > Non-empty domain: [1, 7]\n" + " > Size: 1708\n" +
" > Cell num: 4\n" + " > Timestamp range: [2, 2]\n" +
" > Format version: 7\n" + " > Has consolidated metadata: no\n" +
" > Format version: 8\n" + " > Has consolidated metadata: no\n" +
"- Fragment #3:\n" + " > URI: " + written_frag_uri_3 + "\n" +
" > Type: sparse\n" + " > Non-empty domain: [2, 9]\n" +
" > Size: 1696\n" + " > Cell num: 3\n" +
" > Timestamp range: [3, 3]\n" + " > Format version: 7\n" +
" > Timestamp range: [3, 3]\n" + " > Format version: 8\n" +
" > Has consolidated metadata: no\n";
FILE* gold_fout = fopen("gold_fout.txt", "w");
const char* dump = dump_str.c_str();
Expand Down Expand Up @@ -1130,7 +1130,7 @@ TEST_CASE(
"- Fragment #1:\n" + " > URI: " + uri + "\n" + " > Type: dense\n" +
" > Non-empty domain: [1, 10]\n" + " > Size: 1584\n" +
" > Cell num: 10\n" + " > Timestamp range: [1, 3]\n" +
" > Format version: 7\n" + " > Has consolidated metadata: no\n";
" > Format version: 8\n" + " > Has consolidated metadata: no\n";
FILE* gold_fout = fopen("gold_fout.txt", "w");
const char* dump = dump_str.c_str();
fwrite(dump, sizeof(char), strlen(dump), gold_fout);
Expand Down Expand Up @@ -1212,7 +1212,7 @@ TEST_CASE(
"- Fragment #1:\n" + " > URI: " + written_frag_uri + "\n" +
" > Type: sparse\n" + " > Non-empty domain: [a, ddd]\n" +
" > Size: 1833\n" + " > Cell num: 4\n" +
" > Timestamp range: [1, 1]\n" + " > Format version: 7\n" +
" > Timestamp range: [1, 1]\n" + " > Format version: 8\n" +
" > Has consolidated metadata: no\n";
FILE* gold_fout = fopen("gold_fout.txt", "w");
const char* dump = dump_str.c_str();
Expand All @@ -1234,4 +1234,4 @@ TEST_CASE(
tiledb_ctx_free(&ctx);
tiledb_vfs_free(&vfs);
tiledb_fragment_info_free(&fragment_info);
}
}
8 changes: 4 additions & 4 deletions test/src/unit-cppapi-fragment_info.cc
Original file line number Diff line number Diff line change
Expand Up @@ -626,16 +626,16 @@ TEST_CASE(
"- Fragment #1:\n" + " > URI: " + written_frag_uri_1 + "\n" +
" > Type: dense\n" + " > Non-empty domain: [1, 6]\n" +
" > Size: 1584\n" + " > Cell num: 10\n" +
" > Timestamp range: [1, 1]\n" + " > Format version: 7\n" +
" > Timestamp range: [1, 1]\n" + " > Format version: 8\n" +
" > Has consolidated metadata: no\n" + "- Fragment #2:\n" +
" > URI: " + written_frag_uri_2 + "\n" + " > Type: sparse\n" +
" > Non-empty domain: [1, 7]\n" + " > Size: 1708\n" +
" > Cell num: 4\n" + " > Timestamp range: [2, 2]\n" +
" > Format version: 7\n" + " > Has consolidated metadata: no\n" +
" > Format version: 8\n" + " > Has consolidated metadata: no\n" +
"- Fragment #3:\n" + " > URI: " + written_frag_uri_3 + "\n" +
" > Type: sparse\n" + " > Non-empty domain: [2, 9]\n" +
" > Size: 1696\n" + " > Cell num: 3\n" +
" > Timestamp range: [3, 3]\n" + " > Format version: 7\n" +
" > Timestamp range: [3, 3]\n" + " > Format version: 8\n" +
" > Has consolidated metadata: no\n";
FILE* gold_fout = fopen("gold_fout.txt", "w");
const char* dump = dump_str.c_str();
Expand All @@ -654,4 +654,4 @@ TEST_CASE(

// Clean up
remove_dir(array_name, ctx.ptr().get(), vfs.ptr().get());
}
}
Loading

0 comments on commit 5defb05

Please sign in to comment.