From 14cae1f02bae1d13aa1b5398c9038c24220e3a9b Mon Sep 17 00:00:00 2001 From: Agisilaos Kounelis <36283973+kounelisagis@users.noreply.github.com> Date: Mon, 14 Oct 2024 23:18:13 +0300 Subject: [PATCH 01/21] Fix randomly failing test `DenseArrayTest::test_open_with_timestamp[False]` (#2090) --- tiledb/tests/test_libtiledb.py | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/tiledb/tests/test_libtiledb.py b/tiledb/tests/test_libtiledb.py index a0e40b9600..9122ad0f7e 100644 --- a/tiledb/tests/test_libtiledb.py +++ b/tiledb/tests/test_libtiledb.py @@ -836,9 +836,7 @@ def test_open_with_timestamp(self, use_timestamps): with tiledb.DenseArray(self.path("foo"), mode="w") as T: T[:] = A - read1_timestamp = -1 with tiledb.DenseArray(self.path("foo"), mode="r") as T: - read1_timestamp = T.timestamp_range self.assertEqual(T[0], 0) self.assertEqual(T[1], 0) self.assertEqual(T[2], 0) @@ -849,25 +847,20 @@ def test_open_with_timestamp(self, use_timestamps): with tiledb.DenseArray(self.path("foo"), mode="w") as T: T[0:1] = 1 - read2_timestamp = -1 - with tiledb.DenseArray(self.path("foo"), mode="r") as T: - read2_timestamp = T.timestamp_range - self.assertTrue(read2_timestamp > read1_timestamp) - if use_timestamps: # sleep 200ms and write time.sleep(0.2) with tiledb.DenseArray(self.path("foo"), mode="w") as T: T[1:2] = 2 - read3_timestamp = -1 - with tiledb.DenseArray(self.path("foo"), mode="r") as T: - read3_timestamp = T.timestamp_range - self.assertTrue(read3_timestamp > read2_timestamp > read1_timestamp) + frags = tiledb.array_fragments(self.path("foo")) + # timestamps are in the form of (start, end) for each fragment, with start == end, + # as we are not dealing with consolidated fragments. Let's simply read from 0 to the end timestamp. + read_timestamps = [(0, frag.timestamp_range[1]) for frag in frags] # read at first timestamp with tiledb.DenseArray( - self.path("foo"), timestamp=read1_timestamp, mode="r" + self.path("foo"), timestamp=read_timestamps[0], mode="r" ) as T: self.assertEqual(T[0], 0) self.assertEqual(T[1], 0) @@ -875,7 +868,7 @@ def test_open_with_timestamp(self, use_timestamps): # read at second timestamp with tiledb.DenseArray( - self.path("foo"), timestamp=read2_timestamp, mode="r" + self.path("foo"), timestamp=read_timestamps[1], mode="r" ) as T: self.assertEqual(T[0], 1) self.assertEqual(T[1], 0) @@ -883,7 +876,7 @@ def test_open_with_timestamp(self, use_timestamps): # read at third timestamp with tiledb.DenseArray( - self.path("foo"), timestamp=read3_timestamp, mode="r" + self.path("foo"), timestamp=read_timestamps[2], mode="r" ) as T: self.assertEqual(T[0], 1) self.assertEqual(T[1], 2) From b29e61d779d4c6563a85091ecd3b78f5e73a6848 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Du=C5=A1an=20Baran?= Date: Wed, 16 Oct 2024 10:44:52 +0200 Subject: [PATCH 02/21] Add CI to test against the TileDB core library built from source with a custom version (#2091) Co-authored-by: Agisilaos Kounelis <36283973+kounelisagis@users.noreply.github.com> --- .github/workflows/ci-tiledb-from-source.yml | 94 +++++++++++++++++++++ 1 file changed, 94 insertions(+) create mode 100644 .github/workflows/ci-tiledb-from-source.yml diff --git a/.github/workflows/ci-tiledb-from-source.yml b/.github/workflows/ci-tiledb-from-source.yml new file mode 100644 index 0000000000..6d222f3ac5 --- /dev/null +++ b/.github/workflows/ci-tiledb-from-source.yml @@ -0,0 +1,94 @@ +name: TileDB Python CI Using TileDB Core Source Build + +on: + workflow_dispatch: + inputs: + libtiledb_ref: + default: dev + type: string + libtiledb_version: + type: string + +jobs: + + build_libtiledb: + runs-on: ubuntu-latest + steps: + - name: Checkout TileDB Core ${{ inputs.libtiledb_ref || 'dev' }} + uses: actions/checkout@v4 + with: + repository: TileDB-Inc/TileDB + ref: ${{ inputs.libtiledb_ref || 'dev' }} + + - name: Configure TileDB + run: | + cmake -S . -B build \ + -DCMAKE_BUILD_TYPE=Release \ + -DBUILD_SHARED_LIBS=ON \ + -DCMAKE_INSTALL_PREFIX=./dist \ + -DTILEDB_INSTALL_LIBDIR=lib \ + -DTILEDB_S3=ON \ + -DTILEDB_AZURE=ON \ + -DTILEDB_GCS=ON \ + -DTILEDB_HDFS=ON \ + -DTILEDB_SERIALIZATION=ON \ + -DTILEDB_WEBP=ON \ + -DTILEDB_TESTS=OFF \ + -DVCPKG_TARGET_TRIPLET=x64-linux-release + + - name: Build TileDB + env: + TILEDB_PACKAGE_VERSION: ${{ inputs.libtiledb_version || '0.1' }} + run: cmake --build build --config Release --target package + + - name: Upload TileDB Core Artifact + uses: actions/upload-artifact@v4 + with: + name: libtiledb + path: | + build/tiledb-*.tar.gz* + build/tiledb-*.zip* + + build_tiledb_py: + needs: + - build_libtiledb + runs-on: ubuntu-latest + steps: + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - name: Checkout TileDB-Py + uses: actions/checkout@v4 + + - name: Download TileDB Core Artifact + uses: actions/download-artifact@v4 + with: + name: libtiledb + path: ${{ github.workspace }}/libtiledb + + - name: Unpack Release Archive + run: tar xvf ${{ github.workspace }}/libtiledb/*.tar.gz --directory ${{ github.workspace }}/libtiledb + + - name: Build TileDB-Py Wheel + env: + TILEDB_PATH: ${{ github.workspace }}/libtiledb + run: | + python -m pip wheel -w dist --verbose . + WHEEL=$(ls dist/tiledb-*.whl) + python -m pip install ${WHEEL}[test] + + - name: Upload TileDB Core Artifact + uses: actions/upload-artifact@v4 + with: + name: tiledb-py + path: | + dist/tiledb-*.whl + + - name: Run tests + run: | + PROJECT_CWD=$PWD + rm tiledb/__init__.py + cd /tmp + pytest -vv --showlocals $PROJECT_CWD From f1b09f3569dc3ab672a657e27faeb6450f57033f Mon Sep 17 00:00:00 2001 From: Agisilaos Kounelis <36283973+kounelisagis@users.noreply.github.com> Date: Wed, 16 Oct 2024 12:33:35 +0300 Subject: [PATCH 03/21] Make error message for non-existing `Enumeration` in test conditional (#2087) --- tiledb/tests/test_enumeration.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/tiledb/tests/test_enumeration.py b/tiledb/tests/test_enumeration.py index f6af612b2c..c8b5d36aaa 100644 --- a/tiledb/tests/test_enumeration.py +++ b/tiledb/tests/test_enumeration.py @@ -92,10 +92,16 @@ def test_array_schema_enumeration(self): with self.assertRaises(tiledb.TileDBError) as excinfo: assert A.enum("enmr3") == [] - assert ( - "ArraySchema: Unable to check if unknown enumeration is loaded. No enumeration named 'enmr3'." - == str(excinfo.value) - ) + if tiledb.libtiledb.version() >= (2, 27): + assert ( + "Array: Unable to get enumeration; Enumeration 'enmr3' does not exist." + == str(excinfo.value) + ) + else: + assert ( + "ArraySchema: Unable to check if unknown enumeration is loaded. No enumeration named 'enmr3'." + == str(excinfo.value) + ) assert attr3.enum_label is None assert A.attr("attr3").enum_label is None From dcef1552508c3f397e5484ee3925407006674317 Mon Sep 17 00:00:00 2001 From: Agisilaos Kounelis <36283973+kounelisagis@users.noreply.github.com> Date: Wed, 16 Oct 2024 12:49:38 +0300 Subject: [PATCH 04/21] Make default value for `vfs.s3.region` in test conditional (#2086) --- tiledb/tests/test_fixes.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tiledb/tests/test_fixes.py b/tiledb/tests/test_fixes.py index 1a9c605111..da17a2ea87 100644 --- a/tiledb/tests/test_fixes.py +++ b/tiledb/tests/test_fixes.py @@ -196,7 +196,7 @@ def test_py1078_df_all_empty_strings(self): def test_sc23827_aws_region(self): # Test for SC-23287 # The expected behavior here for `vfs.s3.region` is: - # - default to 'us-east-1' if no environment variables are set + # - default to '' if no environment variables are set # - empty if AWS_REGION or AWS_DEFAULT_REGION is set (to any value) def get_config_with_env(env, key): @@ -209,7 +209,10 @@ def get_config_with_env(env, key): ) return sp_output.decode("UTF-8").strip() - assert get_config_with_env({}, "vfs.s3.region") == "us-east-1" + if tiledb.libtiledb.version() >= (2, 27, 0): + assert get_config_with_env({}, "vfs.s3.region") == "" + else: + assert get_config_with_env({}, "vfs.s3.region") == "us-east-1" assert get_config_with_env({"AWS_DEFAULT_REGION": ""}, "vfs.s3.region") == "" assert get_config_with_env({"AWS_REGION": ""}, "vfs.s3.region") == "" From 33ff06d969c93b77143f21b2f2fcbdda8dabbb73 Mon Sep 17 00:00:00 2001 From: Agisilaos Kounelis <36283973+kounelisagis@users.noreply.github.com> Date: Thu, 17 Oct 2024 23:21:11 +0300 Subject: [PATCH 05/21] Handle removal of 'StorageManager' from stats dumps in 2.27 (#2088) --- tiledb/tests/test_fixes.py | 13 +++++++++---- tiledb/tests/test_libtiledb.py | 3 ++- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/tiledb/tests/test_fixes.py b/tiledb/tests/test_fixes.py index da17a2ea87..3ba9b81d92 100644 --- a/tiledb/tests/test_fixes.py +++ b/tiledb/tests/test_fixes.py @@ -161,10 +161,15 @@ def test_sc16301_arrow_extra_estimate_dense(self): with tiledb.open(uri) as A: tiledb.stats_enable() A[:] - assert ( - """"Context.StorageManager.Query.Reader.loop_num": 1""" - in tiledb.stats_dump(print_out=False) - ) + + stats_dump_str = tiledb.stats_dump(print_out=False) + if tiledb.libtiledb.version() >= (2, 27): + assert """"Context.Query.Reader.loop_num": 1""" in stats_dump_str + else: + assert ( + """"Context.StorageManager.Query.Reader.loop_num": 1""" + in stats_dump_str + ) tiledb.stats_disable() @pytest.mark.skipif( diff --git a/tiledb/tests/test_libtiledb.py b/tiledb/tests/test_libtiledb.py index 9122ad0f7e..83067239db 100644 --- a/tiledb/tests/test_libtiledb.py +++ b/tiledb/tests/test_libtiledb.py @@ -3500,7 +3500,8 @@ def test_query(self): q[:] stats = q.get_stats(print_out=False) - assert "Context.StorageManager.Query" in stats + # check that the stats are non-empty + assert stats class NullableIOTest(DiskTestCase): From d8109c3e85a26c1668a10d8ec569525ddf307646 Mon Sep 17 00:00:00 2001 From: Agisilaos Kounelis <36283973+kounelisagis@users.noreply.github.com> Date: Fri, 18 Oct 2024 21:13:39 +0300 Subject: [PATCH 06/21] Add extra argument to Group::add_member Add extra argument to Group::add_member Should fix the related error in TileDB-Inc/centralized-tiledb-nightlies#24 --- tiledb/cc/group.cc | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tiledb/cc/group.cc b/tiledb/cc/group.cc index 3e9a4829e6..c1a77be423 100644 --- a/tiledb/cc/group.cc +++ b/tiledb/cc/group.cc @@ -120,7 +120,12 @@ void init_group(py::module &m) { .def("_get_key_from_index", get_key_from_index) .def("_add", &Group::add_member, py::arg("uri"), - py::arg("relative") = false, py::arg("name") = std::nullopt) + py::arg("relative") = false, py::arg("name") = std::nullopt +#if TILEDB_VERSION_MAJOR >= 2 && TILEDB_VERSION_MINOR >= 27 + , + py::arg("type") = std::nullopt +#endif + ) .def("_remove", &Group::remove_member) .def("_delete_group", &Group::delete_group) .def("_member_count", &Group::member_count) From 61b1ce2d57ee75470d4b4c578eabee84eb022946 Mon Sep 17 00:00:00 2001 From: Agisilaos Kounelis <36283973+kounelisagis@users.noreply.github.com> Date: Wed, 23 Oct 2024 14:33:05 +0300 Subject: [PATCH 07/21] Extend the `GroupMetadata` functionality to support NumPy arrays (#2085) * Extend GroupMetadata functionality * Add tests --- tiledb/cc/common.cc | 20 +++--- tiledb/cc/group.cc | 118 +++++++++++++++++++++++++------ tiledb/group.py | 79 +++++++++++---------- tiledb/tests/cc/test_group.py | 4 +- tiledb/tests/test_group.py | 128 +++++++++++++++++++++++++++++++++- 5 files changed, 279 insertions(+), 70 deletions(-) diff --git a/tiledb/cc/common.cc b/tiledb/cc/common.cc index 9a6be60cf7..7015cebcec 100644 --- a/tiledb/cc/common.cc +++ b/tiledb/cc/common.cc @@ -192,18 +192,20 @@ bool is_tdb_str(tiledb_datatype_t type) { } py::size_t get_ncells(py::dtype type) { - if (type.is(py::dtype("S"))) - return type.itemsize() == 0 ? TILEDB_VAR_NUM : type.itemsize(); - - if (type.is(py::dtype("U"))) { - auto np_unicode_size = py::dtype("U").itemsize(); - return type.itemsize() == 0 ? TILEDB_VAR_NUM - : type.itemsize() / np_unicode_size; - } - auto np = py::module::import("numpy"); auto np_issubdtype = np.attr("issubdtype"); auto np_complexfloating = np.attr("complexfloating"); + auto np_character = np.attr("character"); + + py::bool_ ischaracter = np_issubdtype(type, np_character); + if (ischaracter) { + py::dtype base_dtype = + np.attr("dtype")(py::make_tuple(type.attr("kind"), 1)); + if (type.itemsize() == 0) + return TILEDB_VAR_NUM; + return type.itemsize() / base_dtype.itemsize(); + } + py::bool_ iscomplexfloating = np_issubdtype(type, np_complexfloating); if (iscomplexfloating) return 2; diff --git a/tiledb/cc/group.cc b/tiledb/cc/group.cc index c1a77be423..130b9266fb 100644 --- a/tiledb/cc/group.cc +++ b/tiledb/cc/group.cc @@ -22,16 +22,14 @@ void put_metadata_numpy(Group &group, const std::string &key, py::array value) { throw py::type_error(e.what()); } - if (is_tdb_str(value_type) && value.size() > 1) - throw py::type_error("array/list of strings not supported"); - - py::buffer_info value_buffer = value.request(); - if (value_buffer.ndim != 1) + if (value.ndim() != 1) throw py::type_error("Only 1D Numpy arrays can be stored as metadata"); py::size_t ncells = get_ncells(value.dtype()); if (ncells != 1) - throw py::type_error("Unsupported dtype for metadata"); + throw py::type_error("Unsupported dtype '" + + std::string(py::str(value.dtype())) + + "' for metadata"); auto value_num = is_tdb_str(value_type) ? value.nbytes() : value.size(); group.put_metadata(key, value_type, value_num, @@ -40,8 +38,10 @@ void put_metadata_numpy(Group &group, const std::string &key, py::array value) { void put_metadata(Group &group, const std::string &key, tiledb_datatype_t value_type, uint32_t value_num, - const char *value) { - group.put_metadata(key, value_type, value_num, value); + py::buffer &value) { + + py::buffer_info info = value.request(); + group.put_metadata(key, value_type, value_num, info.ptr); } bool has_metadata(Group &group, const std::string &key) { @@ -60,28 +60,102 @@ std::string get_key_from_index(Group &group, uint64_t index) { return key; } -py::tuple get_metadata(Group &group, const std::string &key) { - tiledb_datatype_t tdb_type; - uint32_t value_num; - const void *value; +py::object unpack_metadata_val(tiledb_datatype_t value_type, uint32_t value_num, + const char *value_ptr) { + if (value_num == 0) + throw TileDBError("internal error: unexpected value_num==0"); + + if (value_type == TILEDB_STRING_UTF8) { + return value_ptr == nullptr ? py::str() : py::str(value_ptr, value_num); + } + + if (value_type == TILEDB_BLOB || value_type == TILEDB_CHAR || + value_type == TILEDB_STRING_ASCII) { + return value_ptr == nullptr ? py::bytes() : py::bytes(value_ptr, value_num); + } - group.get_metadata(key, &tdb_type, &value_num, &value); + if (value_ptr == nullptr) + return py::tuple(); + + py::tuple unpacked(value_num); + for (uint32_t i = 0; i < value_num; i++) { + switch (value_type) { + case TILEDB_INT64: + unpacked[i] = *((int64_t *)value_ptr); + break; + case TILEDB_FLOAT64: + unpacked[i] = *((double *)value_ptr); + break; + case TILEDB_FLOAT32: + unpacked[i] = *((float *)value_ptr); + break; + case TILEDB_INT32: + unpacked[i] = *((int32_t *)value_ptr); + break; + case TILEDB_UINT32: + unpacked[i] = *((uint32_t *)value_ptr); + break; + case TILEDB_UINT64: + unpacked[i] = *((uint64_t *)value_ptr); + break; + case TILEDB_INT8: + unpacked[i] = *((int8_t *)value_ptr); + break; + case TILEDB_UINT8: + unpacked[i] = *((uint8_t *)value_ptr); + break; + case TILEDB_INT16: + unpacked[i] = *((int16_t *)value_ptr); + break; + case TILEDB_UINT16: + unpacked[i] = *((uint16_t *)value_ptr); + break; + default: + throw TileDBError("TileDB datatype not supported"); + } + value_ptr += tiledb_datatype_size(value_type); + } + + if (value_num > 1) + return unpacked; - py::dtype value_type = tdb_to_np_dtype(tdb_type, 1); + // for single values, return the value directly + return unpacked[0]; +} - py::array py_buf; - if (value == nullptr) { - py_buf = py::array(value_type, 0); - return py::make_tuple(py_buf, tdb_type); +py::array unpack_metadata_ndarray(tiledb_datatype_t value_type, + uint32_t value_num, const char *value_ptr) { + py::dtype dtype = tdb_to_np_dtype(value_type, 1); + + if (value_ptr == nullptr) { + auto np = py::module::import("numpy"); + return np.attr("empty")(py::make_tuple(0), dtype); } - if (tdb_type == TILEDB_STRING_UTF8) { - value_type = py::dtype("|S1"); + // special case for TILEDB_STRING_UTF8: TileDB assumes size=1 + if (value_type != TILEDB_STRING_UTF8) { + value_num *= tiledb_datatype_size(value_type); } - py_buf = py::array(value_type, value_num, value); + auto buf = py::memoryview::from_memory(value_ptr, value_num); - return py::make_tuple(py_buf, tdb_type); + auto np = py::module::import("numpy"); + return np.attr("frombuffer")(buf, dtype); +} + +py::tuple get_metadata(Group &group, const py::str &key, bool is_ndarray) { + tiledb_datatype_t tdb_type; + uint32_t value_num; + const char *value_ptr; + + group.get_metadata(key, &tdb_type, &value_num, (const void **)&value_ptr); + if (is_ndarray) { + auto arr = unpack_metadata_ndarray(tdb_type, value_num, value_ptr); + return py::make_tuple(arr, tdb_type); + } else { + auto arr = unpack_metadata_val(tdb_type, value_num, value_ptr); + return py::make_tuple(arr, tdb_type); + } } bool has_member(Group &group, std::string obj) { diff --git a/tiledb/group.py b/tiledb/group.py index a3435565e2..f32c394228 100644 --- a/tiledb/group.py +++ b/tiledb/group.py @@ -75,6 +75,7 @@ class Group(CtxMixin, lt.Group): """ _NP_DATA_PREFIX = "__np_flat_" + _NP_SHAPE_PREFIX = "__np_shape_" _mode_to_query_type = { "r": lt.QueryType.READ, @@ -112,19 +113,21 @@ def __setitem__(self, key: str, value: GroupMetadataValueType): put_metadata = self._group._put_metadata if isinstance(value, np.ndarray): - put_metadata(f"{Group._NP_DATA_PREFIX}{key}", np.array(value)) - elif isinstance(value, bytes): - put_metadata(key, lt.DataType.BLOB, len(value), value) - elif isinstance(value, str): - value = value.encode("UTF-8") - put_metadata(key, lt.DataType.STRING_UTF8, len(value), value) - elif isinstance(value, (list, tuple)): - put_metadata(key, np.array(value)) + flat_value = value.ravel() + put_metadata(f"{Group._NP_DATA_PREFIX}{key}", flat_value) + if value.shape != flat_value.shape: + # If the value is not a 1D ndarray, store its associated shape. + # The value's shape will be stored as separate metadata with the correct prefix. + self.__setitem__(f"{Group._NP_SHAPE_PREFIX}{key}", value.shape) else: - if isinstance(value, int): - # raise OverflowError too large to convert to int64 - value = np.int64(value) - put_metadata(key, np.array([value])) + from .metadata import pack_metadata_val + + packed_buf = pack_metadata_val(value) + tiledb_type = packed_buf.tdbtype + value_num = packed_buf.value_num + data_view = packed_buf.data + + put_metadata(key, tiledb_type, value_num, data_view) def __getitem__(self, key: str, include_type=False) -> GroupMetadataValueType: """ @@ -137,25 +140,20 @@ def __getitem__(self, key: str, include_type=False) -> GroupMetadataValueType: raise TypeError(f"Unexpected key type '{type(key)}': expected str") if self._group._has_metadata(key): - pass + data, tdb_type = self._group._get_metadata(key, False) elif self._group._has_metadata(f"{Group._NP_DATA_PREFIX}{key}"): - key = f"{Group._NP_DATA_PREFIX}{key}" + data, tdb_type = self._group._get_metadata( + f"{Group._NP_DATA_PREFIX}{key}", True + ) + # reshape numpy array back to original shape, if needed + shape_key = f"{Group._NP_SHAPE_PREFIX}{key}" + if self._group._has_metadata(shape_key): + shape, tdb_type = self._group._get_metadata(shape_key, False) + data = data.reshape(shape) else: raise KeyError(f"KeyError: {key}") - data, tdb_type = self._group._get_metadata(key) - dtype = DataType.from_tiledb(tdb_type).np_dtype - if np.issubdtype(dtype, np.character): - value = data.tobytes() - if np.issubdtype(dtype, np.str_): - value = value.decode("UTF-8") - elif key.startswith(Group._NP_DATA_PREFIX): - value = data - elif len(data) == 1: - value = data[0] - else: - value = tuple(data) - return (value, tdb_type) if include_type else value + return (data, tdb_type) if include_type else data def __delitem__(self, key: str): """Removes the entry from the Group metadata. @@ -168,8 +166,8 @@ def __delitem__(self, key: str): # key may be stored as is or it may be prefixed (for numpy values) # we don't know this here so delete all potential internal keys - self._group._delete_metadata(key) - self._group._delete_metadata(f"{Group._NP_DATA_PREFIX}{key}") + for k in key, Group._NP_DATA_PREFIX + key, Group._NP_SHAPE_PREFIX + key: + self._group._delete_metadata(k) def __contains__(self, key: str) -> bool: """ @@ -193,12 +191,19 @@ def __len__(self) -> int: :return: Number of entries in the Group metadata """ - return self._group._metadata_num() + num = self._group._metadata_num() + # subtract the _NP_SHAPE_PREFIX prefixed keys + for key in self._iter(keys_only=True): + if key.startswith(Group._NP_SHAPE_PREFIX): + num -= 1 + + return num def _iter(self, keys_only: bool = True, dump: bool = False): """ Iterate over Group metadata keys or (key, value) tuples :param keys_only: whether to yield just keys or values too + :param dump: whether to yield a formatted string for each metadata entry """ if keys_only and dump: raise ValueError("keys_only and dump cannot both be True") @@ -207,9 +212,6 @@ def _iter(self, keys_only: bool = True, dump: bool = False): for i in range(metadata_num): key = self._group._get_key_from_index(i) - if key.startswith(Group._NP_DATA_PREFIX): - key = key[len(Group._NP_DATA_PREFIX) :] - if keys_only: yield key else: @@ -226,11 +228,16 @@ def _iter(self, keys_only: bool = True, dump: bool = False): yield key, val def __iter__(self): - for key in self._iter(): - yield key + np_data_prefix_len = len(Group._NP_DATA_PREFIX) + for key in self._iter(keys_only=True): + if key.startswith(Group._NP_DATA_PREFIX): + yield key[np_data_prefix_len:] + elif not key.startswith(Group._NP_SHAPE_PREFIX): + yield key + # else: ignore the shape keys def __repr__(self): - return str(dict(self._iter(keys_only=False))) + return str(dict(self)) def setdefault(self, key, default=None): raise NotImplementedError( diff --git a/tiledb/tests/cc/test_group.py b/tiledb/tests/cc/test_group.py index def6dbe5b3..b9513990db 100644 --- a/tiledb/tests/cc/test_group.py +++ b/tiledb/tests/cc/test_group.py @@ -23,9 +23,9 @@ def test_group_metadata(tmp_path): grp._open(lt.QueryType.READ) assert grp._metadata_num() == 2 assert grp._has_metadata("int") - assert_array_equal(grp._get_metadata("int")[0], int_data) + assert_array_equal(grp._get_metadata("int", False)[0], int_data) assert grp._has_metadata("flt") - assert_array_equal(grp._get_metadata("flt")[0], flt_data) + assert_array_equal(grp._get_metadata("flt", False)[0], flt_data) grp._close() time.sleep(0.001) diff --git a/tiledb/tests/test_group.py b/tiledb/tests/test_group.py index a20f9fb0a5..659b4f4826 100644 --- a/tiledb/tests/test_group.py +++ b/tiledb/tests/test_group.py @@ -9,7 +9,7 @@ import tiledb -from .common import DiskTestCase +from .common import DiskTestCase, assert_captured MIN_INT = np.iinfo(np.int64).min MAX_INT = np.iinfo(np.int64).max @@ -585,6 +585,59 @@ def test_basic(self, test_vals): self.assert_metadata_roundtrip(grp.meta, test_vals) grp.close() + @given(st_metadata, st_ndarray) + @settings(deadline=None) + def test_numpy(self, test_vals, ndarray): + test_vals["ndarray"] = ndarray + + path = self.path() + tiledb.Group.create(path) + + grp = tiledb.Group(path, "w") + grp.meta.update(test_vals) + grp.close() + + grp = tiledb.Group(path, "r") + self.assert_metadata_roundtrip(grp.meta, test_vals) + grp.close() + + grp = tiledb.Group(path, "w") + grp.meta["ndarray"] = 42 + test_vals["ndarray"] = 42 + grp.close() + + grp = tiledb.Group(path, "r") + self.assert_metadata_roundtrip(grp.meta, test_vals) + grp.close() + + # test resetting a key with a non-ndarray value to a ndarray value + grp = tiledb.Group(path, "w") + grp.meta["bytes"] = ndarray + test_vals["bytes"] = ndarray + grp.close() + + grp = tiledb.Group(path, "r") + self.assert_metadata_roundtrip(grp.meta, test_vals) + grp.close() + + grp = tiledb.Group(path, "w") + del grp.meta["ndarray"] + del test_vals["ndarray"] + grp.close() + + grp = tiledb.Group(path, "r") + self.assert_metadata_roundtrip(grp.meta, test_vals) + grp.close() + + grp = tiledb.Group(path, "w") + test_vals.update(ndarray=np.stack([ndarray, ndarray]), transp=ndarray.T) + grp.meta.update(ndarray=np.stack([ndarray, ndarray]), transp=ndarray.T) + grp.close() + + grp = tiledb.Group(path, "r") + self.assert_metadata_roundtrip(grp.meta, test_vals) + grp.close() + @pytest.mark.parametrize("use_timestamps", [True, False]) def test_consolidation_and_vac(self, use_timestamps): vfs = tiledb.VFS() @@ -636,3 +689,76 @@ def test_consolidation_and_vac_no_config(self): tiledb.Group.vacuum_metadata(path) assert len(vfs.ls(meta_path)) == 1 + + def test_string_metadata(self, capfd): + # this test ensures that string metadata is correctly stored and + # retrieved from the metadata store. It also tests that the metadata + # dump method works correctly for string metadata. + uri = self.path("test_ascii_metadata") + tiledb.Group.create(uri) + + grp = tiledb.Group(uri, "w") + grp.meta["abc"] = "xyz" + grp.close() + + grp = tiledb.Group(uri, "r") + assert grp.meta["abc"] == "xyz" + grp.meta.dump() + assert_captured(capfd, "Type: DataType.STRING_UTF8") + grp.close() + + def test_array_or_list_of_strings_metadata_error(self): + # this test ensures that an error is raised when trying to store + # an array or list of strings as metadata in a group. + # numpy arrays of single characters are supported since we don't need + # any extra offset information to retrieve them. + uri = self.path("test_ascii_metadata") + tiledb.Group.create(uri) + + grp = tiledb.Group(uri, "w") + with pytest.raises(TypeError) as exc: + grp.meta["abc"] = ["x", "1"] + assert "Unsupported item type" in str(exc.value) + + with pytest.raises(TypeError) as exc: + grp.meta["abc"] = ["foo", "foofoo"] + + with pytest.raises(TypeError) as exc: + grp.meta["abc"] = np.array(["foo", "12345"]) + + grp.meta["abc"] = np.array(["1", "2", "3", "f", "o", "o"], dtype="U1") + grp.close() + + grp = tiledb.Group(uri, "r") + self.assert_metadata_roundtrip( + grp.meta, {"abc": np.array(["1", "2", "3", "f", "o", "o"], dtype="U1")} + ) + grp.close() + + grp = tiledb.Group(uri, "w") + grp.meta["abc"] = np.array(["T", "i", "l", "e", "D", "B", "!"], dtype="S1") + grp.close() + + grp = tiledb.Group(uri, "r") + self.assert_metadata_roundtrip( + grp.meta, + {"abc": np.array([b"T", b"i", b"l", b"e", b"D", b"B", b"!"], dtype="S1")}, + ) + grp.close() + + def test_bytes_metadata(self, capfd): + # this test ensures that bytes metadata is correctly stored and + # retrieved from the metadata store. It also tests that the metadata + # dump method works correctly for bytes metadata. + path = self.path() + tiledb.Group.create(path) + + grp = tiledb.Group(path, "w") + grp.meta["bytes"] = b"blob" + grp.close() + + grp = tiledb.Group(path, "r") + assert grp.meta["bytes"] == b"blob" + grp.meta.dump() + assert_captured(capfd, "Type: DataType.BLOB") + grp.close() From f206545c1b29839bd2831295acdb7782e88c3475 Mon Sep 17 00:00:00 2001 From: Agisilaos Kounelis <36283973+kounelisagis@users.noreply.github.com> Date: Wed, 23 Oct 2024 14:33:43 +0300 Subject: [PATCH 08/21] Add `TILEDB_DATETIME_DAY` type support for Arrow (#2002) * Add in place buffer shift for TILEDB_DATETIME_DAY * Add tests --- tiledb/py_arrow_io_impl.h | 32 ++++-- tiledb/tests/test_pandas_dataframe.py | 135 +++++++++++++++++++++++++- 2 files changed, 160 insertions(+), 7 deletions(-) diff --git a/tiledb/py_arrow_io_impl.h b/tiledb/py_arrow_io_impl.h index f2e8a60dda..060bb5d374 100644 --- a/tiledb/py_arrow_io_impl.h +++ b/tiledb/py_arrow_io_impl.h @@ -233,6 +233,8 @@ ArrowInfo tiledb_buffer_arrow_fmt(BufferInfo bufferinfo, bool use_list = true) { return ArrowInfo("tsu:"); case TILEDB_DATETIME_NS: return ArrowInfo("tsn:"); + case TILEDB_DATETIME_DAY: + return ArrowInfo("tdD"); // TILEDB_BOOL is stored as a uint8_t but arrow::Type::BOOL is 1 bit case TILEDB_BOOL: return ArrowInfo("C"); @@ -242,7 +244,6 @@ ArrowInfo tiledb_buffer_arrow_fmt(BufferInfo bufferinfo, bool use_list = true) { case TILEDB_DATETIME_YEAR: case TILEDB_DATETIME_MONTH: case TILEDB_DATETIME_WEEK: - case TILEDB_DATETIME_DAY: case TILEDB_DATETIME_HR: case TILEDB_DATETIME_MIN: case TILEDB_DATETIME_PS: @@ -739,6 +740,14 @@ int64_t flags_for_buffer(BufferInfo binfo) { return 0; } +template T cast_checked(uint64_t val) { + if (val > std::numeric_limits::max()) { + throw tiledb::TileDBError( + "[TileDB-Arrow] Value too large to cast to requested type"); + } + return static_cast(val); +} + void ArrowExporter::export_(const std::string &name, ArrowArray *array, ArrowSchema *schema, ArrowAdapter::release_cb cb, void *cb_data) { @@ -762,13 +771,11 @@ void ArrowExporter::export_(const std::string &name, ArrowArray *array, if (bufferinfo.is_var) { buffers = {nullptr, bufferinfo.offsets, bufferinfo.data}; } else { - cpp_schema = new CPPArrowSchema(name, arrow_fmt.fmt_, std::nullopt, - arrow_flags, {}, {}); buffers = {nullptr, bufferinfo.data}; } cpp_schema->export_ptr(schema); - size_t elem_num = 0; + size_t elem_num = bufferinfo.data_num; if (bufferinfo.is_var) { // adjust for arrow offset unless empty result elem_num = (bufferinfo.offsets_num == 0) ? 0 : bufferinfo.offsets_num - 1; @@ -778,8 +785,21 @@ void ArrowExporter::export_(const std::string &name, ArrowArray *array, // take the size of the entire buffer and divide by the size of each // element elem_num = bufferinfo.data_num / bufferinfo.tdbtype.cell_val_num; - } else { - elem_num = bufferinfo.data_num; + } else if (arrow_fmt.fmt_ == "tdD") { + // for Arrow date32 we only need the first 4 bytes of each 8-byte + // TILEDB_DATETIME_DAY element which we keep by in-place left shifting + for (size_t i = 0; i < bufferinfo.data_num; i++) { + uint32_t lost_data = *(reinterpret_cast( + static_cast(buffers[1]) + i * 8 + 4)); + if (lost_data != 0) { + throw tiledb::TileDBError( + "[TileDB-Arrow] Non-zero data detected in the memory buffer at " + "position that will be overwritten"); + } + + static_cast(buffers[1])[i] = + cast_checked(static_cast(buffers[1])[i]); + } } } diff --git a/tiledb/tests/test_pandas_dataframe.py b/tiledb/tests/test_pandas_dataframe.py index 4d80aeba47..f7374252c0 100644 --- a/tiledb/tests/test_pandas_dataframe.py +++ b/tiledb/tests/test_pandas_dataframe.py @@ -5,6 +5,7 @@ import string import sys import uuid +from collections import OrderedDict import numpy as np import pyarrow @@ -16,6 +17,7 @@ from .common import ( DiskTestCase, + assert_dict_arrays_equal, dtype_max, dtype_min, has_pandas, @@ -219,7 +221,6 @@ def test_object_dtype(self): " Date: Thu, 24 Oct 2024 00:01:19 +0300 Subject: [PATCH 09/21] Update HISTORY for 0.32.2 and 0.32.3 (#2095) * Update HISTORY for 0.32.2 against TileDB 2.26.1 * Update HISTORY for 0.32.3 against TileDB 2.26.2 --------- Co-authored-by: Isaiah Norton --- CMakeLists.txt | 4 ++-- HISTORY.md | 14 ++++++++++++++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 973aa94870..dfd75c62dc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -46,8 +46,8 @@ if (NOT TileDB_FOUND) message(STATUS "Downloading TileDB default version ...") # Download latest release fetch_prebuilt_tiledb( - VERSION 2.26.1 - RELLIST_HASH SHA256=256216aa989015397f4efbbd319ebeccfead568baa73611aa0c1c0fcea35f8d5 + VERSION 2.26.2 + RELLIST_HASH SHA256=86c19d7c5246cb18e370a4272cead63ea84bd651789842e618de4d57d4510522 ) endif() find_package(TileDB REQUIRED) diff --git a/HISTORY.md b/HISTORY.md index 336d8af540..07e71c36e5 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,3 +1,17 @@ +# Release 0.32.3 + +* TileDB-Py 0.32.3 includes TileDB Embedded [2.26.2](https://github.com/TileDB-Inc/TileDB/releases/tag/2.26.2) + +## Build system changes + +* Override tag version in manylinux container by @dudoslav in https://github.com/TileDB-Inc/TileDB-Py/pull/2077 + +# Release 0.32.2 + +## Improvements + +* Fix object_type return value by @kounelisagis in https://github.com/TileDB-Inc/TileDB-Py/pull/2073 + # Release 0.32.1 * TileDB-Py 0.32.1 includes TileDB Embedded [2.26.1](https://github.com/TileDB-Inc/TileDB/releases/tag/2.26.1) From f627920a7c327102bf8230ae6027e4f17c3a90d9 Mon Sep 17 00:00:00 2001 From: Agisilaos Kounelis <36283973+kounelisagis@users.noreply.github.com> Date: Thu, 24 Oct 2024 15:33:50 +0300 Subject: [PATCH 10/21] Handle removal of 'StorageManager' from stats dumps in 2.27 - Part 2 (#2098) --- tiledb/tests/test_libtiledb.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tiledb/tests/test_libtiledb.py b/tiledb/tests/test_libtiledb.py index 83067239db..bddefd68c9 100644 --- a/tiledb/tests/test_libtiledb.py +++ b/tiledb/tests/test_libtiledb.py @@ -3480,7 +3480,8 @@ def test_ctx(self): T[:] = np.random.randint(10, size=3) stats = ctx.get_stats(print_out=False) - assert "Context.StorageManager.write_store" in stats + # check that the stats are non-empty + assert stats def test_query(self): tiledb.stats_enable() From e63fe29f70e9eb89cb79d20e4383d76cf4387c60 Mon Sep 17 00:00:00 2001 From: Shaun M Reed Date: Thu, 24 Oct 2024 10:53:59 -0400 Subject: [PATCH 11/21] Update enumerations exception message. (#2096) --- tiledb/tests/test_enumeration.py | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/tiledb/tests/test_enumeration.py b/tiledb/tests/test_enumeration.py index c8b5d36aaa..f6af612b2c 100644 --- a/tiledb/tests/test_enumeration.py +++ b/tiledb/tests/test_enumeration.py @@ -92,16 +92,10 @@ def test_array_schema_enumeration(self): with self.assertRaises(tiledb.TileDBError) as excinfo: assert A.enum("enmr3") == [] - if tiledb.libtiledb.version() >= (2, 27): - assert ( - "Array: Unable to get enumeration; Enumeration 'enmr3' does not exist." - == str(excinfo.value) - ) - else: - assert ( - "ArraySchema: Unable to check if unknown enumeration is loaded. No enumeration named 'enmr3'." - == str(excinfo.value) - ) + assert ( + "ArraySchema: Unable to check if unknown enumeration is loaded. No enumeration named 'enmr3'." + == str(excinfo.value) + ) assert attr3.enum_label is None assert A.attr("attr3").enum_label is None From 34575efedecc0929987e14001f5fc24cbf7a1fc4 Mon Sep 17 00:00:00 2001 From: Agisilaos Kounelis <36283973+kounelisagis@users.noreply.github.com> Date: Fri, 25 Oct 2024 16:06:13 +0300 Subject: [PATCH 12/21] Revert "Do not run CI in PR twice (#2082)" (#2100) This reverts commit 0cd2714af004becb71d4f65f2c471a91788dd422. --- .github/workflows/ci.yml | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1baf4bc553..7bb1af3b77 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,11 +1,6 @@ name: TileDB Python CI -on: - push: - branches: [dev] - pull_request: - branches: [dev] - workflow_dispatch: +on: [push, pull_request, workflow_dispatch] concurrency: group: ${{ github.head_ref || github.run_id }} From 8ccbb1cdf9682fc06dce03049571b6c7a01f2615 Mon Sep 17 00:00:00 2001 From: Nick Vigilante Date: Fri, 25 Oct 2024 17:51:45 -0400 Subject: [PATCH 13/21] Fix typo in API doc for create_bucket (#2101) --- tiledb/vfs.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tiledb/vfs.py b/tiledb/vfs.py index 3ed9e848c5..7a703fae06 100644 --- a/tiledb/vfs.py +++ b/tiledb/vfs.py @@ -188,9 +188,9 @@ def is_empty_bucket(self, uri: _AnyPath) -> bool: return self._is_empty_bucket(_to_path_str(uri)) def create_dir(self, uri: _AnyPath): - """Check if an object store bucket is empty. + """Create a directory at the specified input URI. - :param str uri: Input URI of the bucket + :param str uri: Input URI of the directory """ return self._create_dir(_to_path_str(uri)) From 53208af32a6c7c018d041f7961a2831ccdcec7cf Mon Sep 17 00:00:00 2001 From: Agisilaos Kounelis <36283973+kounelisagis@users.noreply.github.com> Date: Tue, 29 Oct 2024 12:54:07 +0200 Subject: [PATCH 14/21] Fix GroupMetadata backwards compatibility (#2102) --- tiledb/cc/group.cc | 4 +- tiledb/group.py | 15 +++++ tiledb/tests/test_group.py | 132 +++++++++++++++++++++++++++++++++++++ 3 files changed, 150 insertions(+), 1 deletion(-) diff --git a/tiledb/cc/group.cc b/tiledb/cc/group.cc index 130b9266fb..dcdf918e6b 100644 --- a/tiledb/cc/group.cc +++ b/tiledb/cc/group.cc @@ -26,7 +26,9 @@ void put_metadata_numpy(Group &group, const std::string &key, py::array value) { throw py::type_error("Only 1D Numpy arrays can be stored as metadata"); py::size_t ncells = get_ncells(value.dtype()); - if (ncells != 1) + // we can't store multi-cell arrays as metadata + // e.g. an array of strings containing strings of more than one character + if (ncells != 1 && value.size() > 1) throw py::type_error("Unsupported dtype '" + std::string(py::str(value.dtype())) + "' for metadata"); diff --git a/tiledb/group.py b/tiledb/group.py index f32c394228..ff1366adef 100644 --- a/tiledb/group.py +++ b/tiledb/group.py @@ -119,6 +119,16 @@ def __setitem__(self, key: str, value: GroupMetadataValueType): # If the value is not a 1D ndarray, store its associated shape. # The value's shape will be stored as separate metadata with the correct prefix. self.__setitem__(f"{Group._NP_SHAPE_PREFIX}{key}", value.shape) + elif isinstance(value, np.generic): + tiledb_type = DataType.from_numpy(value.dtype).tiledb_type + if tiledb_type in (lt.DataType.BLOB, lt.DataType.CHAR): + put_metadata(key, tiledb_type, len(value), value) + elif tiledb_type == lt.DataType.STRING_UTF8: + put_metadata( + key, lt.DataType.STRING_UTF8, len(value), value.encode("UTF-8") + ) + else: + put_metadata(key, tiledb_type, 1, value) else: from .metadata import pack_metadata_val @@ -141,11 +151,16 @@ def __getitem__(self, key: str, include_type=False) -> GroupMetadataValueType: if self._group._has_metadata(key): data, tdb_type = self._group._get_metadata(key, False) + dtype = DataType.from_tiledb(tdb_type).np_dtype + # we return all int and float values as numpy scalars + if dtype.kind in ("i", "f") and not isinstance(data, tuple): + data = np.dtype(dtype).type(data) elif self._group._has_metadata(f"{Group._NP_DATA_PREFIX}{key}"): data, tdb_type = self._group._get_metadata( f"{Group._NP_DATA_PREFIX}{key}", True ) # reshape numpy array back to original shape, if needed + # this will not be found in any case for TileDB-Py <= 0.32.3. shape_key = f"{Group._NP_SHAPE_PREFIX}{key}" if self._group._has_metadata(shape_key): shape, tdb_type = self._group._get_metadata(shape_key, False) diff --git a/tiledb/tests/test_group.py b/tiledb/tests/test_group.py index 659b4f4826..4fe3536b0e 100644 --- a/tiledb/tests/test_group.py +++ b/tiledb/tests/test_group.py @@ -1,5 +1,8 @@ +import base64 +import io import os import pathlib +import tarfile import numpy as np import pytest @@ -762,3 +765,132 @@ def test_bytes_metadata(self, capfd): grp.meta.dump() assert_captured(capfd, "Type: DataType.BLOB") grp.close() + + def test_group_metadata_backwards_compat(self): + # This test ensures that metadata written with the TileDB-Py 0.32.3 + # will be read correctly in the future versions. + + # === The following code creates a group with metadata using the current version of TileDB-Py === + path_new = self.path("new_group") + tiledb.Group.create(path_new) + group = tiledb.Group(path_new, "w") + + # python primitive types + group.meta["python_int"] = -1234 + group.meta["python_float"] = 3.14 + group.meta["python_str"] = "hello" + group.meta["python_bytes"] = b"hello" + group.meta["python_bool"] = False + + # numpy primitive types + group.meta["numpy_int"] = np.int64(-93) + group.meta["numpy_uint"] = np.uint64(42) + group.meta["numpy_float64"] = np.float64(3.14) + group.meta["numpy_bytes"] = np.bytes_("hello") + group.meta["numpy_str"] = np.str_("hello") + group.meta["numpy_bool"] = np.bool(False) + + # lists/tuples + group.meta["list_int"] = [7] + group.meta["tuple_int"] = (7,) + group.meta["list_ints"] = [1, -2, 3] + group.meta["tuple_ints"] = (1, 2, 3) + group.meta["list_float"] = [1.1] + group.meta["tuple_float"] = (1.1,) + group.meta["list_floats"] = [1.1, 2.2, 3.3] + group.meta["tuple_floats"] = (1.1, 2.2, 3.3) + group.meta["list_empty"] = [] + group.meta["tuple_empty"] = () + + # numpy arrays + group.meta["numpy_int"] = np.array([-11], dtype=np.int64) + group.meta["numpy_ints"] = np.array([1, -2, 3], dtype=np.int64) + group.meta["numpy_uint"] = np.array([22], dtype=np.uint64) + group.meta["numpy_uints"] = np.array([1, 2, 3], dtype=np.uint64) + group.meta["numpy_float"] = np.array([3.14], dtype=np.float64) + group.meta["numpy_floats"] = np.array([1.1, 2.2, 3.3], dtype=np.float64) + group.meta["numpy_byte"] = np.array([b"hello"], dtype="S5") + group.meta["numpy_str"] = np.array(["hello"], dtype="U5") + group.meta["numpy_bool"] = np.array([True, False, True]) + + group.close() + # === End of the code that creates the group with metadata === + + # The following commented out code was used to generate the base64 encoded string of the group + # from the TileDB-Py 0.32.3 after creating the group with metadata in the exact same way as above. + ''' + # Compress the contents of the group folder to tgz + with tarfile.open("test.tar.gz", "w:gz") as tar: + with os.scandir(path_new) as entries: + for entry in entries: + tar.add(entry.path, arcname=entry.name) + + # Read the .tgz file and encode it to base64 + with open("test.tar.gz", 'rb') as f: + s = base64.encodebytes(f.read()) + + # Print the base64 encoded string + group_tgz = f"""{s.decode():>32}""" + print(group_tgz) + ''' + + # The following base64 encoded string is the contents of the group folder compressed + # to a tgz file using TileDB-Py 0.32.3. + group_tgz = b"""H4sICO/+G2cC/3Rlc3QudGFyANPT19N3CEis8EhNTEktYqAJMIAAXLSBgbEJgg0SNzQwMjRiUKhg + oAMoLS5JLAJazzAygZGFQm5JZm6qraG5kaWFhbmlhbGekaGphbGlJRfDKBj2ID4+N7UkUZ+mdoAy + tbmpKYQ2g9AGRqh53tDE3MDM3Nzc2NQcmP8NDc3NGRRM6Zn/E9Mzi/GpAypLSxt+8a83KMp/Y8zy + 33C0/KdL+W+Otfy3NBot/kdS+R8fj4h/YPSj8UxTktOSjQxMjNPMzS0MDCxTjVLNTUwS01IMzMxM + zJMTicj/ZiYmuMp/QwNjM9Ty38jQAFhdKBjQM/+P0PJfDIhfMULYV1khNAsjTFYITDIygAQYQbKM + YBYDQv0xIEcAymdEEqtgbA1x9DtsIBATrJgRpRfwgC18R8GqqqXxD1gDJwZtnTTb5YbtE0YbprhD + 8y0KH7SwVJTnps9d9sorMOX8Met7M8+yMHzas+bz0rgbMet7z3b75kqb3mSdtisqonQnu8GrGvHI + 6WGxX/Jm+7UW7V45+8/OVSZ3+O+Ic/0Sloo+8OKG6hqutaun9NgfXjqDz9ftBZNBwLvXt6+fX94/ + ++EfK0X1S2nBpVv5jQ0cut7nS8T3/wn7rOpq5q9/Jn2XW8OhQ/frZTLrkycxHt1evlKvrtbsXeIX + 2dw33D0fd0yt5vqe8T/k3d3wtO4UI5Vm8yMvspXTJE+ozFY+13ZA7e+avDertDwP+b1mcjq0JPar + QLS26mvFLQH6D97dDbyZlx1b8X/ZHYmHWpqMjTP6QiVvrZX/3nsqxv3WwofHjtgmbk+YGnhC/U1D + v5+z0SvXZ5YfmXhYiw4Ynmi727rZteXvpZULJ/jvNikQV1/tuiM73XDytc2ZVu6PRcy4NN3Cuze9 + 0GJc1KHr+mXOAxexJaUFAv/kVgi/K+FaI+2wZfqOxoYWocQPGzNeG9h9edh+3DfBJMYzOKL2l+em + ezc0Hyq98xaQ8eT40PDoxpYX60KKnogs7Ht2d+cf9lm5m9pGy8fhDvRG+/+j/X+M9p+JqYGJ+WgD + cES0/0oyc1JTkuLTi/JLC/RKUpJok//xtP+w9P+NTUD9v9H232j5P1r+D0j5b2ZoYDZa/o+I8h9c + 8NN0AJiM8V8TA9PR8d9RMApGwSgYBaNgFIyCUTAKRsEooCYAAP1+F2wAKAAA""" + + # Ceate a new group by extracting the contents of the tgz file + path_original = self.path("original_group") + with tarfile.open(fileobj=io.BytesIO(base64.b64decode(group_tgz))) as tf: + try: + tf.extractall(path_original, filter="fully_trusted") + except TypeError: + tf.extractall(path_original) + + # Open both the original and the new group and compare the metadata both in values and types + group_original = tiledb.Group(path_original, "r") + group_new = tiledb.Group(path_new, "r") + + self.assert_metadata_roundtrip(group_new.meta, group_original.meta) + + group_original.close() + group_new.close() + + def test_group_metadata_new_types(self): + # This kind of data was not supported for TileDB-Py <= 0.32.3 + path_new = self.path("new_group") + + tiledb.Group.create(path_new) + group = tiledb.Group(path_new, "w") + test_vals = { + "int64": np.array(-1111, dtype=np.int64), + "uint64": np.array(2, dtype=np.uint64), + "float64": np.array(3.14, dtype=np.float64), + "bool": np.array(True, dtype=bool), + "str": np.array(["a", "b", "c"], dtype="S"), + "unicode": np.array(["a", "b", "c"], dtype="U"), + "bytes": np.array([b"a", b"b", b"c"]), + "datetime": np.array( + [np.datetime64("2021-01-01"), np.datetime64("2021-01-02")] + ), + } + group.meta.update(test_vals) + group.close() + + group = tiledb.Group(path_new, "r") + self.assert_metadata_roundtrip(group.meta, test_vals) + group.close() From c18a63c25bb5d9d5ec63afb96f7086e62ff9db9a Mon Sep 17 00:00:00 2001 From: Agisilaos Kounelis <36283973+kounelisagis@users.noreply.github.com> Date: Tue, 29 Oct 2024 15:23:29 +0200 Subject: [PATCH 15/21] Update HISTORY for 0.32.5 against TileDB 2.26.2 and 0.32.4 (#2097) --- HISTORY.md | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/HISTORY.md b/HISTORY.md index 07e71c36e5..c8f42bf90c 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,3 +1,46 @@ +# Release 0.32.5 + +* TileDB-Py 0.32.5 includes TileDB Embedded [2.26.2](https://github.com/TileDB-Inc/TileDB/releases/tag/2.26.2) + +## Improvements + +* Fix GroupMetadata backwards compatibility by @kounelisagis in https://github.com/TileDB-Inc/TileDB-Py/pull/2102 +* Fix typo in API doc for create_bucket by @nickvigilante in https://github.com/TileDB-Inc/TileDB-Py/pull/2101 +* Update enumerations exception message by @shaunrd0 in https://github.com/TileDB-Inc/TileDB-Py/pull/2096 +* Handle removal of 'StorageManager' from stats dumps in 2.27 - Part 2 by @kounelisagis https://github.com/TileDB-Inc/TileDB-Py/pull/2098 + +## Build system changes + +* Revert "Do not run CI in PR twice (#2082)" by @kounelisagis in https://github.com/TileDB-Inc/TileDB-Py/pull/2100 + +# Release 0.32.4 + +TileDB-Py 0.32.4 was inadvertently released against TileDB [2.26.1](https://github.com/TileDB-Inc/TileDB/releases/tag/2.26.1). This will be corrected in 0.32.5, but the version delta does not justify yanking 0.32.4 + +## Improvements + +* Add `TILEDB_DATETIME_DAY` type support for Arrow by @kounelisagis in https://github.com/TileDB-Inc/TileDB-Py/pull/2002 +* Extend the `GroupMetadata` functionality to support NumPy arrays by @kounelisagis in https://github.com/TileDB-Inc/TileDB-Py/pull/2085 +* Add extra argument to Group::add_member by @kounelisagis in https://github.com/TileDB-Inc/TileDB-Py/pull/2093 +* Handle removal of 'StorageManager' from stats dumps in 2.27 by @kounelisagis in https://github.com/TileDB-Inc/TileDB-Py/pull/2088 +* Make default value for `vfs.s3.region` in test conditional by @kounelisagis in https://github.com/TileDB-Inc/TileDB-Py/pull/2086 +* Make error message for non-existing `Enumeration` in test conditional by @kounelisagis in https://github.com/TileDB-Inc/TileDB-Py/pull/2087 +* Fix randomly failing test `DenseArrayTest::test_open_with_timestamp[False]` by @kounelisagis in https://github.com/TileDB-Inc/TileDB-Py/pull/2090 +* Fix skipif condition for test_cloud by @kounelisagis in https://github.com/TileDB-Inc/TileDB-Py/pull/2084 +* Raise error when sparse=True is passed to `tiledb.from_numpy` by @kounelisagis in https://github.com/TileDB-Inc/TileDB-Py/pull/2080 +* Documentation and CI updates by @kounelisagis in https://github.com/TileDB-Inc/TileDB-Py/pull/2078 +* Move `DenseArrayImpl` to pure Python by @kounelisagis and @nguyenv in https://github.com/TileDB-Inc/TileDB-Py/pull/2071 +* Move `PackedBuffer` and `pack_metadata_val` to pure Python, and wrap `tiledb_datatype_size` with pybind by @kounelisagis in https://github.com/TileDB-Inc/TileDB-Py/pull/2072 +* Move `SparseArrayImpl` to pure Python by @kounelisagis in https://github.com/TileDB-Inc/TileDB-Py/pull/2065 + +## Build system changes + +* Add CI to test against the TileDB core library built from source with a custom version by @dudoslav and @kounelisagis in https://github.com/TileDB-Inc/TileDB-Py/pull/2091 +* Do not run CI in PR twice by @dudoslav in https://github.com/TileDB-Inc/TileDB-Py/pull/2082 +* Disable release for Python 3.8 by @dudoslav in https://github.com/TileDB-Inc/TileDB-Py/pull/2081 +* Re-enable automatic upload to pypi by @dudoslav and @kounelisagis in https://github.com/TileDB-Inc/TileDB-Py/pull/2034 +* Drop Python 3.8 support by @kounelisagis in https://github.com/TileDB-Inc/TileDB-Py/pull/2079 + # Release 0.32.3 * TileDB-Py 0.32.3 includes TileDB Embedded [2.26.2](https://github.com/TileDB-Inc/TileDB/releases/tag/2.26.2) From 36742e264edaa42bc957b7b7b8d9dfd8feba2210 Mon Sep 17 00:00:00 2001 From: Agisilaos Kounelis <36283973+kounelisagis@users.noreply.github.com> Date: Thu, 31 Oct 2024 20:52:17 +0200 Subject: [PATCH 16/21] Change `np.bool` to `np.bool_` - Daily Tests fix (#2103) --- tiledb/tests/test_group.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tiledb/tests/test_group.py b/tiledb/tests/test_group.py index 4fe3536b0e..000120d879 100644 --- a/tiledb/tests/test_group.py +++ b/tiledb/tests/test_group.py @@ -788,7 +788,7 @@ def test_group_metadata_backwards_compat(self): group.meta["numpy_float64"] = np.float64(3.14) group.meta["numpy_bytes"] = np.bytes_("hello") group.meta["numpy_str"] = np.str_("hello") - group.meta["numpy_bool"] = np.bool(False) + group.meta["numpy_bool"] = np.bool_(False) # lists/tuples group.meta["list_int"] = [7] From 65536d8fcee8a3eece5a69458cc9036135e3cea7 Mon Sep 17 00:00:00 2001 From: Agisilaos Kounelis <36283973+kounelisagis@users.noreply.github.com> Date: Fri, 1 Nov 2024 11:21:43 +0200 Subject: [PATCH 17/21] Fix `stats_dump` broken return value type (#2104) * Raise error if stats_enable is not called before stats_dump * Fix stats dump return type --- tiledb/core.cc | 3 ++ tiledb/stats.py | 13 ++++++--- tiledb/tests/test_fixes.py | 56 ++++++++++++++++++++++++++++++++++++++ tiledb/tests/test_stats.py | 2 +- 4 files changed, 69 insertions(+), 5 deletions(-) diff --git a/tiledb/core.cc b/tiledb/core.cc index fc8af53ea5..22ddf5d003 100644 --- a/tiledb/core.cc +++ b/tiledb/core.cc @@ -1594,6 +1594,8 @@ void init_stats() { void disable_stats() { g_stats.reset(nullptr); } +bool stats_enabled() { return (bool)g_stats; } + void increment_stat(std::string key, double value) { auto &stats_counters = g_stats.get()->counters; @@ -1759,6 +1761,7 @@ void init_core(py::module &m) { Stats::disable(); disable_stats(); }); + m.def("stats_enabled", &stats_enabled); m.def("reset_stats", []() { Stats::reset(); init_stats(); diff --git a/tiledb/stats.py b/tiledb/stats.py index 47aeeb4e4c..cbcc73823b 100644 --- a/tiledb/stats.py +++ b/tiledb/stats.py @@ -1,6 +1,8 @@ from json import dumps as json_dumps from json import loads as json_loads +from tiledb import TileDBError + def stats_enable(): """Enable TileDB internal statistics.""" @@ -34,9 +36,12 @@ def stats_dump( :param json: Return stats JSON object (default: False) :param verbose: Print extended internal statistics (default: True) """ - from .main import stats_dump_str, stats_raw_dump_str + from .main import stats_dump_str, stats_enabled, stats_raw_dump_str - stats_str = None + if not stats_enabled(): + raise TileDBError( + "Statistics are not enabled. Call tiledb.stats_enable() first." + ) if json or not verbose: stats_str = stats_raw_dump_str() @@ -50,9 +55,9 @@ def stats_dump( if include_python: from .main import python_internal_stats - stats_json_core["python"] = json_dumps(python_internal_stats(True)) + stats_json_core["python"] = python_internal_stats(True) if json: - return stats_json_core + return json_dumps(stats_json_core) stats_str = "" diff --git a/tiledb/tests/test_fixes.py b/tiledb/tests/test_fixes.py index 3ba9b81d92..a4ab4f65f0 100644 --- a/tiledb/tests/test_fixes.py +++ b/tiledb/tests/test_fixes.py @@ -1,5 +1,6 @@ import concurrent import concurrent.futures +import json import os import subprocess import sys @@ -172,6 +173,61 @@ def test_sc16301_arrow_extra_estimate_dense(self): ) tiledb.stats_disable() + def test_sc58286_fix_stats_dump_return_value_broken(self): + uri = self.path("test_sc58286_fix_stats_dump_return_value_broken") + dim1 = tiledb.Dim(name="d1", dtype="int64", domain=(1, 3)) + att = tiledb.Attr(name="a1", dtype=" Date: Tue, 12 Nov 2024 16:39:26 +0200 Subject: [PATCH 18/21] Enable Python 3.13 (#2107) --- .github/workflows/build-wheels.yml | 6 +++--- .github/workflows/ci.yml | 2 +- .github/workflows/daily-test-build-numpy.yml | 4 +++- pyproject.toml | 1 + 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build-wheels.yml b/.github/workflows/build-wheels.yml index 58f12c1b4c..f67cf374a1 100644 --- a/.github/workflows/build-wheels.yml +++ b/.github/workflows/build-wheels.yml @@ -33,7 +33,7 @@ jobs: - [macos-13, macosx_x86_64] - [macos-14, macosx_arm64] - [windows-2022, win_amd64] - python: ["cp39", "cp310", "cp311", "cp312"] + python: ["cp39", "cp310", "cp311", "cp312", "cp313"] steps: - uses: actions/checkout@v4 @@ -46,7 +46,7 @@ jobs: brew install automake pkg-config ninja llvm - name: Build wheels - uses: pypa/cibuildwheel@v2.18.1 + uses: pypa/cibuildwheel@v2.21.3 env: CIBW_BUILD_VERBOSITY: 3 CIBW_ENVIRONMENT_PASS_LINUX: SETUPTOOLS_SCM_PRETEND_VERSION_FOR_TILEDB S3_BUCKET TILEDB_TOKEN TILEDB_NAMESPACE @@ -99,7 +99,7 @@ jobs: - macos-14 - windows-2022 - ubuntu-22.04 - python: ["3.9", "3.10", "3.11", "3.12"] + python: ["3.9", "3.10", "3.11", "3.12", "3.13"] runs-on: ${{ matrix.os }} steps: - name: Set up Python ${{ matrix.python }} diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7bb1af3b77..71fc1ceb77 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -25,7 +25,7 @@ jobs: # libfaketime tests fail on macos arm. Disable tests for now. # - macos-14 - windows-latest - python-version: [ "3.9", "3.10", "3.11", "3.12"] + python-version: [ "3.9", "3.10", "3.11", "3.12", "3.13" ] fail-fast: false env: MACOSX_DEPLOYMENT_TARGET: "11" diff --git a/.github/workflows/daily-test-build-numpy.yml b/.github/workflows/daily-test-build-numpy.yml index 5802898169..a6ecb592a5 100644 --- a/.github/workflows/daily-test-build-numpy.yml +++ b/.github/workflows/daily-test-build-numpy.yml @@ -22,9 +22,11 @@ jobs: strategy: matrix: os: [ubuntu-latest, macos-13, macos-14, windows-latest] - python-version: ["3.9", "3.10", "3.11", "3.12"] + python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"] include: # https://github.com/scipy/oldest-supported-numpy/blob/main/setup.cfg + - python-version: "3.13" + numpy-version: "2.1.0" - python-version: "3.12" numpy-version: "1.26.4" - python-version: "3.12" diff --git a/pyproject.toml b/pyproject.toml index d026bbf88e..8ee52a5001 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,6 +35,7 @@ classifiers=[ "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", ] dependencies = [ "numpy>=1.25", From a2f8ccc99c9062455da867a609f0dce6d6d88041 Mon Sep 17 00:00:00 2001 From: Agisilaos Kounelis <36283973+kounelisagis@users.noreply.github.com> Date: Fri, 15 Nov 2024 17:37:04 +0200 Subject: [PATCH 19/21] Add test for dropping a fixed attribute and adding it back as var-sized (#2083) --- tiledb/tests/test_schema_evolution.py | 57 +++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/tiledb/tests/test_schema_evolution.py b/tiledb/tests/test_schema_evolution.py index 5110e8ab1e..efadd0a741 100644 --- a/tiledb/tests/test_schema_evolution.py +++ b/tiledb/tests/test_schema_evolution.py @@ -227,3 +227,60 @@ def test_schema_evolution_extend_check_bad_type(): with pytest.raises(tiledb.TileDBError): enmr.extend([1, 2, 3]) enmr.extend([True, False]) + + +@pytest.mark.skipif( + tiledb.libtiledb.version() < (2, 27), + reason="Dropping a fixed-sized attribute and adding it back" + "as a var-sized attribute is not supported in TileDB < 2.27", +) +def test_schema_evolution_drop_fixed_attribute_and_add_back_as_var_sized(tmp_path): + ctx = tiledb.default_ctx() + uri = str(tmp_path) + attrs = [ + tiledb.Attr(name="a", dtype=np.int32), + tiledb.Attr(name="b", dtype=np.int32), + ] + dims = [tiledb.Dim(domain=(1, 10), dtype=np.int32)] + domain = tiledb.Domain(*dims) + schema = tiledb.ArraySchema(domain=domain, attrs=attrs, sparse=False) + tiledb.Array.create(uri, schema) + + original_data = np.arange(1, 11) + with tiledb.open(uri, "w") as A: + A[:] = {"a": original_data, "b": original_data} + + se = tiledb.ArraySchemaEvolution(ctx) + se.drop_attribute("a") + se.array_evolve(uri) + + # check schema after dropping attribute + with tiledb.open(uri) as A: + assert not A.schema.has_attr("a") + assert A.schema.attr("b").dtype == np.int32 + + se = tiledb.ArraySchemaEvolution(ctx) + newattr = tiledb.Attr("a", dtype="S", var=True) + se.add_attribute(newattr) + se.array_evolve(uri) + + # check schema and data after adding attribute back as a var-sized attribute + with tiledb.open(uri) as A: + assert A.schema.has_attr("a") + assert A.schema.attr("a").dtype == "S" + assert A.schema.attr("b").dtype == np.int32 + # check that each value == b'\x80' (empty byte) + assert_array_equal(A[:]["a"], np.array([b"\x80" for _ in range(10)])) + + # add new data to the array + new_data = np.array( + ["tiledb-string-n.{}".format(i) for i in range(1, 11)], dtype="S" + ) + with tiledb.open(uri, "w") as A: + A[:] = {"a": new_data, "b": original_data} + + # check data for both attributes + with tiledb.open(uri) as A: + res = A[:] + assert_array_equal(res["a"], new_data) + assert_array_equal(res["b"], original_data) From 484d9d113815e1d8caffe7028ce8891e95b2a1e5 Mon Sep 17 00:00:00 2001 From: Agisilaos Kounelis <36283973+kounelisagis@users.noreply.github.com> Date: Tue, 19 Nov 2024 07:54:32 +0200 Subject: [PATCH 20/21] Remove workaround from `CurrentDomain`/`NDRectangle` (#2111) --- tiledb/array_schema.py | 4 +- tiledb/cc/current_domain.cc | 115 ++++++++++++++++++------------------ tiledb/current_domain.py | 10 +--- tiledb/ndrectangle.py | 9 +-- 4 files changed, 61 insertions(+), 77 deletions(-) diff --git a/tiledb/array_schema.py b/tiledb/array_schema.py index 09cf727f50..d44d4b5138 100644 --- a/tiledb/array_schema.py +++ b/tiledb/array_schema.py @@ -396,11 +396,9 @@ def current_domain(self) -> CurrentDomain: :rtype: tiledb.CurrentDomain """ - curr_dom = CurrentDomain.from_pybind11( + return CurrentDomain.from_pybind11( self._ctx, self._current_domain(self._ctx) ) - curr_dom._set_domain(self.domain) - return curr_dom def set_current_domain(self, current_domain): """Set the current domain diff --git a/tiledb/cc/current_domain.cc b/tiledb/cc/current_domain.cc index f45b1c33f4..2436fbf730 100644 --- a/tiledb/cc/current_domain.cc +++ b/tiledb/cc/current_domain.cc @@ -118,87 +118,86 @@ void init_current_domain(py::module &m) { py::arg("dim_name"), py::arg("start"), py::arg("end")) .def("_range", - [](NDRectangle &ndrect, const std::string &dim_name, - const py::dtype &n_type) -> py::tuple { - if (n_type.is(py::dtype::of())) { + [](NDRectangle &ndrect, const std::string &dim_name) -> py::tuple { + const tiledb_datatype_t n_type = ndrect.range_dtype(dim_name); + if (n_type == TILEDB_UINT64) { auto range = ndrect.range(dim_name); return py::make_tuple(range[0], range[1]); - } else if (n_type.is(py::dtype::of())) { + } else if (n_type == TILEDB_INT64) { auto range = ndrect.range(dim_name); return py::make_tuple(range[0], range[1]); - } else if (n_type.is(py::dtype::of())) { + } else if (n_type == TILEDB_UINT32) { auto range = ndrect.range(dim_name); return py::make_tuple(range[0], range[1]); - } else if (n_type.is(py::dtype::of())) { + } else if (n_type == TILEDB_INT32) { auto range = ndrect.range(dim_name); return py::make_tuple(range[0], range[1]); - } else if (n_type.is(py::dtype::of())) { + } else if (n_type == TILEDB_UINT16) { auto range = ndrect.range(dim_name); return py::make_tuple(range[0], range[1]); - } else if (n_type.is(py::dtype::of())) { + } else if (n_type == TILEDB_INT16) { auto range = ndrect.range(dim_name); return py::make_tuple(range[0], range[1]); - } else if (n_type.is(py::dtype::of())) { + } else if (n_type == TILEDB_UINT8) { auto range = ndrect.range(dim_name); return py::make_tuple(range[0], range[1]); - } else if (n_type.is(py::dtype::of())) { + } else if (n_type == TILEDB_INT8) { auto range = ndrect.range(dim_name); return py::make_tuple(range[0], range[1]); - } else if (n_type.is(py::dtype::of())) { + } else if (n_type == TILEDB_FLOAT64) { auto range = ndrect.range(dim_name); return py::make_tuple(range[0], range[1]); - } else if (n_type.is(py::dtype::of())) { + } else if (n_type == TILEDB_FLOAT32) { auto range = ndrect.range(dim_name); return py::make_tuple(range[0], range[1]); - } else if (py::getattr(n_type, "kind").is(py::str("S")) || - py::getattr(n_type, "kind").is(py::str("U"))) { + } else if (n_type == TILEDB_STRING_ASCII || + n_type == TILEDB_STRING_UTF8) { auto range = ndrect.range(dim_name); return py::make_tuple(range[0], range[1]); } else { TPY_ERROR_LOC("Unsupported type for NDRectangle's range"); } }) - .def("_range", - [](NDRectangle &ndrect, unsigned dim_idx, - const py::dtype &n_type) -> py::tuple { - if (n_type.is(py::dtype::of())) { - auto range = ndrect.range(dim_idx); - return py::make_tuple(range[0], range[1]); - } else if (n_type.is(py::dtype::of())) { - auto range = ndrect.range(dim_idx); - return py::make_tuple(range[0], range[1]); - } else if (n_type.is(py::dtype::of())) { - auto range = ndrect.range(dim_idx); - return py::make_tuple(range[0], range[1]); - } else if (n_type.is(py::dtype::of())) { - auto range = ndrect.range(dim_idx); - return py::make_tuple(range[0], range[1]); - } else if (n_type.is(py::dtype::of())) { - auto range = ndrect.range(dim_idx); - return py::make_tuple(range[0], range[1]); - } else if (n_type.is(py::dtype::of())) { - auto range = ndrect.range(dim_idx); - return py::make_tuple(range[0], range[1]); - } else if (n_type.is(py::dtype::of())) { - auto range = ndrect.range(dim_idx); - return py::make_tuple(range[0], range[1]); - } else if (n_type.is(py::dtype::of())) { - auto range = ndrect.range(dim_idx); - return py::make_tuple(range[0], range[1]); - } else if (n_type.is(py::dtype::of())) { - auto range = ndrect.range(dim_idx); - return py::make_tuple(range[0], range[1]); - } else if (n_type.is(py::dtype::of())) { - auto range = ndrect.range(dim_idx); - return py::make_tuple(range[0], range[1]); - } else if (py::getattr(n_type, "kind").is(py::str("S")) || - py::getattr(n_type, "kind").is(py::str("U"))) { - auto range = ndrect.range(dim_idx); - return py::make_tuple(range[0], range[1]); - } else { - TPY_ERROR_LOC("Unsupported type for NDRectangle's range"); - } - }); + .def("_range", [](NDRectangle &ndrect, unsigned dim_idx) -> py::tuple { + const tiledb_datatype_t n_type = ndrect.range_dtype(dim_idx); + if (n_type == TILEDB_UINT64) { + auto range = ndrect.range(dim_idx); + return py::make_tuple(range[0], range[1]); + } else if (n_type == TILEDB_INT64) { + auto range = ndrect.range(dim_idx); + return py::make_tuple(range[0], range[1]); + } else if (n_type == TILEDB_UINT32) { + auto range = ndrect.range(dim_idx); + return py::make_tuple(range[0], range[1]); + } else if (n_type == TILEDB_INT32) { + auto range = ndrect.range(dim_idx); + return py::make_tuple(range[0], range[1]); + } else if (n_type == TILEDB_UINT16) { + auto range = ndrect.range(dim_idx); + return py::make_tuple(range[0], range[1]); + } else if (n_type == TILEDB_INT16) { + auto range = ndrect.range(dim_idx); + return py::make_tuple(range[0], range[1]); + } else if (n_type == TILEDB_UINT8) { + auto range = ndrect.range(dim_idx); + return py::make_tuple(range[0], range[1]); + } else if (n_type == TILEDB_INT8) { + auto range = ndrect.range(dim_idx); + return py::make_tuple(range[0], range[1]); + } else if (n_type == TILEDB_FLOAT64) { + auto range = ndrect.range(dim_idx); + return py::make_tuple(range[0], range[1]); + } else if (n_type == TILEDB_FLOAT32) { + auto range = ndrect.range(dim_idx); + return py::make_tuple(range[0], range[1]); + } else if (n_type == TILEDB_STRING_ASCII || + n_type == TILEDB_STRING_UTF8) { + auto range = ndrect.range(dim_idx); + return py::make_tuple(range[0], range[1]); + } else { + TPY_ERROR_LOC("Unsupported type for NDRectangle's range"); + } + }); py::class_(m, "CurrentDomain") .def(py::init()) @@ -210,14 +209,14 @@ void init_current_domain(py::module &m) { return py::capsule(curr_dom.ptr().get(), "curr_dom"); }) + .def_property_readonly("_is_empty", &CurrentDomain::is_empty) + .def_property_readonly("_type", &CurrentDomain::type) .def("_set_ndrectangle", &CurrentDomain::set_ndrectangle, py::arg("ndrect")) - .def("_ndrectangle", &CurrentDomain::ndrectangle) - - .def("_is_empty", &CurrentDomain::is_empty); + .def("_ndrectangle", &CurrentDomain::ndrectangle); #endif } diff --git a/tiledb/current_domain.py b/tiledb/current_domain.py index f00755f845..b842ad73b7 100644 --- a/tiledb/current_domain.py +++ b/tiledb/current_domain.py @@ -18,9 +18,6 @@ def __init__(self, ctx: Ctx): """ super().__init__(ctx) - def _set_domain(self, domain: Domain): - self._domain = domain - @property def type(self): """The type of the current domain. @@ -35,7 +32,7 @@ def is_empty(self): :rtype: bool """ - return self._is_empty() + return self._is_empty def set_ndrectangle(self, ndrect: NDRectangle): """Sets an N-dimensional rectangle representation on a current domain. @@ -44,7 +41,6 @@ def set_ndrectangle(self, ndrect: NDRectangle): :raises tiledb.TileDBError: """ self._set_ndrectangle(ndrect) - self._domain = ndrect._get_domain() @property def ndrectangle(self): @@ -53,6 +49,4 @@ def ndrectangle(self): :rtype: NDRectangle :raises tiledb.TileDBError: """ - ndrect = NDRectangle.from_pybind11(self._ctx, self._ndrectangle()) - ndrect._set_domain(self._domain) - return ndrect + return NDRectangle.from_pybind11(self._ctx, self._ndrectangle()) diff --git a/tiledb/ndrectangle.py b/tiledb/ndrectangle.py index a81940911f..e071e403bf 100644 --- a/tiledb/ndrectangle.py +++ b/tiledb/ndrectangle.py @@ -19,7 +19,6 @@ def __init__(self, ctx: Ctx, domain: Domain): :raises tiledb.TileDBError: """ super().__init__(ctx, domain) - self._set_domain(domain) def __str__(self) -> str: dimensions_str = ", ".join( @@ -28,12 +27,6 @@ def __str__(self) -> str: ) return f"NDRectangle({dimensions_str})" - def _set_domain(self, domain: Domain): - self._domain = domain - - def _get_domain(self) -> Domain: - return self._domain - def set_range( self, dim: Union[str, int], @@ -58,4 +51,4 @@ def range( :return: Range as a tuple (start, end) :raises tiledb.TileDBError: """ - return self._range(dim, self._domain.dim(dim).dtype) + return self._range(dim) From e9d05cd5be4a4abcb75df70305ecd06cee216783 Mon Sep 17 00:00:00 2001 From: Agisilaos Kounelis <36283973+kounelisagis@users.noreply.github.com> Date: Wed, 20 Nov 2024 00:27:32 +0200 Subject: [PATCH 21/21] Change default value of `TILEDB_REMOVE_DEPRECATIONS` to `OFF` (#2112) --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 8ee52a5001..0e822ea338 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -76,7 +76,7 @@ sdist.include = ["tiledb/_generated_version.py"] TILEDB_PATH = {env="TILEDB_PATH"} TILEDB_VERSION = {env="TILEDB_VERSION"} TILEDB_HASH = {env="TILEDB_HASH"} -TILEDB_REMOVE_DEPRECATIONS = "ON" +TILEDB_REMOVE_DEPRECATIONS = "OFF" TILEDB_SERIALIZATION = "OFF" [tool.pytest.ini_options]