From 53cca7e583cbf87c7be187ea4916a3ec5bc5fe8f Mon Sep 17 00:00:00 2001 From: Vivian Nguyen Date: Wed, 17 Jan 2024 13:29:19 -0600 Subject: [PATCH] Move all Python tests to same directory, Require KWs for `SOMADataFrame.open` --- .github/workflows/python-ci-single.yml | 7 - Makefile | 2 +- apis/python/src/tiledbsoma/_dataframe.py | 28 +- apis/python/src/tiledbsoma/_tdb_handles.py | 13 +- apis/python/src/tiledbsoma/_tiledb_array.py | 5 +- apis/python/src/tiledbsoma/io/ingest.py | 7 +- apis/python/src/tiledbsoma/pytiledbsoma.cc | 180 +++-- apis/python/src/tiledbsoma/soma_dataframe.cc | 18 +- apis/python/tests/test_dataframe.py | 4 + apis/python/tests/test_experiment_query.py | 34 +- .../python/tests}/test_indexer.py | 8 +- .../python/tests}/test_query_condition.py | 3 +- .../test => apis/python/tests}/test_simple.py | 0 .../python/tests}/test_soma_array.py | 2 +- libtiledbsoma/src/external/khash/khash.h | 619 ++++++++++-------- libtiledbsoma/src/external/khash/khashl.h | 571 ++++++++++------ 16 files changed, 897 insertions(+), 604 deletions(-) rename {libtiledbsoma/test => apis/python/tests}/test_indexer.py (94%) rename {libtiledbsoma/test => apis/python/tests}/test_query_condition.py (98%) rename {libtiledbsoma/test => apis/python/tests}/test_simple.py (100%) rename {libtiledbsoma/test => apis/python/tests}/test_soma_array.py (99%) diff --git a/.github/workflows/python-ci-single.yml b/.github/workflows/python-ci-single.yml index 4803dfe260..a67076231b 100644 --- a/.github/workflows/python-ci-single.yml +++ b/.github/workflows/python-ci-single.yml @@ -122,13 +122,6 @@ jobs: - name: Run libtiledbsoma unit tests run: ctest --output-on-failure --test-dir build/libtiledbsoma -C Release --verbose - - name: Run pytests for C++ - shell: bash - # Setting PYTHONPATH ensures the tests load the in-tree source code unde apis/python/src - # instead of copy we `pip install`ed to site-packages above. That's needed for the code - # coverage analysis to work. - run: PYTHONPATH=$(pwd)/apis/python/src python -m pytest --cov=apis/python/src --cov-report=xml libtiledbsoma/test -v --durations=20 - - name: Run pytests for Python shell: bash # Setting PYTHONPATH ensures the tests load the in-tree source code unde apis/python/src diff --git a/Makefile b/Makefile index cdcebb3835..f033dce8d8 100644 --- a/Makefile +++ b/Makefile @@ -32,7 +32,7 @@ update: .PHONY: test test: data ctest --test-dir build/libtiledbsoma -C Release --verbose --rerun-failed --output-on-failure - pytest apis/python/tests libtiledbsoma/test + pytest apis/python/tests .PHONY: data data: diff --git a/apis/python/src/tiledbsoma/_dataframe.py b/apis/python/src/tiledbsoma/_dataframe.py index bc9102b0e1..fd3700ecb5 100644 --- a/apis/python/src/tiledbsoma/_dataframe.py +++ b/apis/python/src/tiledbsoma/_dataframe.py @@ -343,12 +343,12 @@ def read( ts = (0, self._handle._handle.timestamp) sr = clib.SOMADataFrame.open( - self._handle._handle.uri, - clib.OpenMode.read, - platform_config or {}, - column_names or [], - _util.to_clib_result_order(result_order), - ts, + uri=self._handle._handle.uri, + mode=clib.OpenMode.read, + platform_config=platform_config or {}, + column_names=column_names or [], + result_order=_util.to_clib_result_order(result_order), + timestamp=ts, ) if value_filter is not None: @@ -533,11 +533,6 @@ def _set_reader_coord( # There's no way to specify "to infinity" for strings. # We have to get the nonempty domain and use that as the end. ned = self._handle.non_empty_domain() - if ned is None: - raise ValueError( - "Found empty nonempty domain when setting " - "string coordinates in _set_reader_coord" - ) _, stop = ned[dim_idx] else: stop = coord.stop @@ -587,6 +582,7 @@ def _set_reader_coord_by_py_seq_or_np_array( if _util.pa_types_is_string_or_bytes(dim.type): sr.set_dim_points_string_or_bytes(dim.name, coord) + return True elif pa.types.is_timestamp(dim.type): if not isinstance(coord, (tuple, list, np.ndarray)): raise ValueError( @@ -597,15 +593,13 @@ def _set_reader_coord_by_py_seq_or_np_array( for e in coord ] sr.set_dim_points_int64(dim.name, icoord) + return True # TODO: bool - else: - raise ValueError( - f"unhandled type {dim.dtype} for index column named {dim.name}" - ) - - return True + raise ValueError( + f"unhandled type {dim.dtype} for index column named {dim.name}" + ) def _set_reader_coord_by_numeric_slice( self, sr: clib.SOMAArray, dim_idx: int, dim: pa.Field, coord: Slice[Any] diff --git a/apis/python/src/tiledbsoma/_tdb_handles.py b/apis/python/src/tiledbsoma/_tdb_handles.py index f192bd6e52..c28743240b 100644 --- a/apis/python/src/tiledbsoma/_tdb_handles.py +++ b/apis/python/src/tiledbsoma/_tdb_handles.py @@ -24,7 +24,6 @@ Type, TypeVar, Union, - cast, ) import attrs @@ -243,12 +242,9 @@ def _opener( def schema(self) -> tiledb.ArraySchema: return self._handle.schema - def non_empty_domain(self) -> Optional[Tuple[Tuple[object, object], ...]]: + def non_empty_domain(self) -> Tuple[Tuple[object, object], ...]: try: - ned = self._handle.nonempty_domain() - if ned is None: - return None - return cast(Tuple[Tuple[object, object], ...], ned) + return self._handle.nonempty_domain() or () except tiledb.TileDBError as e: raise SOMAError(e) @@ -393,9 +389,8 @@ def _cast_domain( def domain(self) -> Tuple[Tuple[object, object], ...]: return self._cast_domain(self._handle.domain) - def non_empty_domain(self) -> Optional[Tuple[Tuple[object, object], ...]]: - result = self._cast_domain(self._handle.non_empty_domain) - return result or None + def non_empty_domain(self) -> Tuple[Tuple[object, object], ...]: + return self._cast_domain(self._handle.non_empty_domain) or () @property def attr_names(self) -> Tuple[str, ...]: diff --git a/apis/python/src/tiledbsoma/_tiledb_array.py b/apis/python/src/tiledbsoma/_tiledb_array.py index f10c45cd50..a937ab00a3 100644 --- a/apis/python/src/tiledbsoma/_tiledb_array.py +++ b/apis/python/src/tiledbsoma/_tiledb_array.py @@ -69,10 +69,9 @@ def schema(self) -> pa.Schema: return tiledb_schema_to_arrow( self._tiledb_array_schema(), self.uri, self._ctx ) - else: - return self._tiledb_array_schema() + return self._tiledb_array_schema() - def non_empty_domain(self) -> Optional[Tuple[Tuple[Any, Any], ...]]: + def non_empty_domain(self) -> Tuple[Tuple[Any, Any], ...]: """ Retrieves the non-empty domain for each dimension, namely the smallest and largest indices in each dimension for which the array/dataframe has diff --git a/apis/python/src/tiledbsoma/io/ingest.py b/apis/python/src/tiledbsoma/io/ingest.py index 1f21910687..67d5651fe3 100644 --- a/apis/python/src/tiledbsoma/io/ingest.py +++ b/apis/python/src/tiledbsoma/io/ingest.py @@ -2251,7 +2251,7 @@ def _coo_to_table( def _chunk_is_contained_in( chunk_bounds: Sequence[Tuple[int, int]], - storage_nonempty_domain: Optional[Sequence[Tuple[Optional[int], Optional[int]]]], + storage_nonempty_domain: Sequence[Tuple[Optional[int], Optional[int]]], ) -> bool: """ Determines if a dim range is included within the array's non-empty domain. Ranges are inclusive @@ -2269,7 +2269,7 @@ def _chunk_is_contained_in( user that they declare they are retrying the exact same input file -- and we do our best to fulfill their ask by checking the dimension being strided on. """ - if storage_nonempty_domain is None: + if len(storage_nonempty_domain) == 0: return False if len(chunk_bounds) != len(storage_nonempty_domain): @@ -2288,6 +2288,9 @@ def _chunk_is_contained_in_axis( stride_axis: int, ) -> bool: """Helper function for ``_chunk_is_contained_in``.""" + if len(storage_nonempty_domain) == 0: + return False + storage_lo, storage_hi = storage_nonempty_domain[stride_axis] if storage_lo is None or storage_hi is None: # E.g. an array has had its schema created but no data written yet diff --git a/apis/python/src/tiledbsoma/pytiledbsoma.cc b/apis/python/src/tiledbsoma/pytiledbsoma.cc index 78481aca59..eee25aad4e 100644 --- a/apis/python/src/tiledbsoma/pytiledbsoma.cc +++ b/apis/python/src/tiledbsoma/pytiledbsoma.cc @@ -1,4 +1,5 @@ #include +#include #include #include @@ -22,76 +23,135 @@ void load_soma_dataframe(py::module &); void load_query_condition(py::module &); PYBIND11_MODULE(pytiledbsoma, m) { - py::register_exception(m, "SOMAError"); + py::register_exception(m, "SOMAError"); - /* We need to make sure C++ TileDBSOMAError is translated to a correctly-typed - * Python error - */ - py::register_exception_translator([](std::exception_ptr p) { + /* We need to make sure C++ TileDBSOMAError is translated to a correctly-typed + * Python error + */ + py::register_exception_translator([](std::exception_ptr p) { auto tiledb_soma_error = (py::object)py::module::import("tiledbsoma").attr("SOMAError"); try { - if (p) + if (p) std::rethrow_exception(p); } catch (const TileDBSOMAError &e) { - PyErr_SetString(tiledb_soma_error.ptr(), e.what()); + PyErr_SetString(tiledb_soma_error.ptr(), e.what()); } catch (const TileDBSOMAPyError &e) { - PyErr_SetString(tiledb_soma_error.ptr(), e.what()); + PyErr_SetString(tiledb_soma_error.ptr(), e.what()); } catch (py::builtin_exception &e) { - throw; + throw; }; - }); - - py::enum_(m, "OpenMode") - .value("read", OpenMode::read) - .value("write", OpenMode::write); - - py::enum_(m, "ResultOrder") - .value("automatic", ResultOrder::automatic) - .value("rowmajor", ResultOrder::rowmajor) - .value("colmajor", ResultOrder::colmajor); - - m.doc() = "SOMA acceleration library"; - - m.def("version", []() { return tiledbsoma::version::as_string(); }); - - m.def( - "config_logging", - [](const std::string& level, const std::string& logfile) { - LOG_CONFIG(level, logfile); - }, - "level"_a, - "logfile"_a = ""); - - m.def("info", &LOG_INFO, "message"_a = ""); - m.def("debug", &LOG_DEBUG, "message"_a = ""); - - m.def( - "tiledbsoma_stats_enable", - []() { tiledbsoma::stats::enable(); }, - "Enable TileDB internal statistics. Lifecycle: experimental."); - m.def( - "tiledbsoma_stats_disable", - []() { tiledbsoma::stats::disable(); }, - "Disable TileDB internal statistics. Lifecycle: experimental."); - m.def( - "tiledbsoma_stats_reset", - []() { tiledbsoma::stats::reset(); }, - "Reset all TileDB internal statistics to 0. Lifecycle: experimental."); - m.def( - "tiledbsoma_stats_dump", - []() { - py::print(tiledbsoma::version::as_string()); - std::string stats = tiledbsoma::stats::dump(); - py::print(stats); - }, - "Print TileDB internal statistics. Lifecycle: experimental."); - - load_soma_array(m); - load_soma_object(m); - load_soma_dataframe(m); - load_query_condition(m); + }); + + py::enum_(m, "OpenMode") + .value("read", OpenMode::read) + .value("write", OpenMode::write); + + py::enum_(m, "ResultOrder") + .value("automatic", ResultOrder::automatic) + .value("rowmajor", ResultOrder::rowmajor) + .value("colmajor", ResultOrder::colmajor); + + m.doc() = "SOMA acceleration library"; + + m.def("version", []() { return tiledbsoma::version::as_string(); }); + + m.def( + "config_logging", + [](const std::string& level, const std::string& logfile) { + LOG_CONFIG(level, logfile); + }, + "level"_a, + "logfile"_a = ""); + + m.def("info", &LOG_INFO, "message"_a = ""); + m.def("debug", &LOG_DEBUG, "message"_a = ""); + + m.def( + "tiledbsoma_stats_enable", + []() { tiledbsoma::stats::enable(); }, + "Enable TileDB internal statistics. Lifecycle: experimental."); + m.def( + "tiledbsoma_stats_disable", + []() { tiledbsoma::stats::disable(); }, + "Disable TileDB internal statistics. Lifecycle: experimental."); + m.def( + "tiledbsoma_stats_reset", + []() { tiledbsoma::stats::reset(); }, + "Reset all TileDB internal statistics to 0. Lifecycle: experimental."); + m.def( + "tiledbsoma_stats_dump", + []() { + py::print(tiledbsoma::version::as_string()); + std::string stats = tiledbsoma::stats::dump(); + py::print(stats); + }, + "Print TileDB internal statistics. Lifecycle: experimental."); + + // Efficient C++ re-indexing (aka hashing unique key values to an index + // between 0 and number of keys - 1) based on khash + py::class_(m, "IntIndexer") + .def(py::init<>()) + .def(py::init&, int>()) + .def( + "map_locations", + [](IntIndexer& indexer, + py::array_t keys, + int num_threads) { + auto buffer = keys.request(); + int64_t* data = static_cast(buffer.ptr); + size_t length = buffer.shape[0]; + indexer.map_locations(keys.data(), keys.size(), num_threads); + }) + .def( + "map_locations", + [](IntIndexer& indexer, + std::vector keys, + int num_threads) { + indexer.map_locations(keys.data(), keys.size(), num_threads); + }) + // Perform lookup for a large input array of keys and return the looked + // up value array (passing ownership from C++ to python) + .def( + "get_indexer", + [](IntIndexer& indexer, py::array_t lookups) { + auto input_buffer = lookups.request(); + int64_t* input_ptr = static_cast(input_buffer.ptr); + size_t size = input_buffer.shape[0]; + auto results = py::array_t(size); + auto results_buffer = results.request(); + size_t results_size = results_buffer.shape[0]; + + int64_t* results_ptr = static_cast( + results_buffer.ptr); + + indexer.lookup(input_ptr, results_ptr, size); + return results; + }) + // Perform lookup for a large input array of keys and writes the looked + // up values into previously allocated array (works for the cases in + // which python and R pre-allocate the array) + .def( + "get_indexer", + [](IntIndexer& indexer, + py::array_t lookups, + py::array_t& results) { + auto input_buffer = lookups.request(); + int64_t* input_ptr = static_cast(input_buffer.ptr); + size_t size = input_buffer.shape[0]; + + auto results_buffer = results.request(); + int64_t* results_ptr = static_cast( + results_buffer.ptr); + size_t results_size = input_buffer.shape[0]; + indexer.lookup(input_ptr, input_ptr, size); + }); + + load_soma_array(m); + load_soma_object(m); + load_soma_dataframe(m); + load_query_condition(m); } }; diff --git a/apis/python/src/tiledbsoma/soma_dataframe.cc b/apis/python/src/tiledbsoma/soma_dataframe.cc index 95b6152a75..18717fc9ed 100644 --- a/apis/python/src/tiledbsoma/soma_dataframe.cc +++ b/apis/python/src/tiledbsoma/soma_dataframe.cc @@ -49,7 +49,23 @@ using namespace tiledbsoma; void load_soma_dataframe(py::module &m) { py::class_(m, "SOMADataFrame") - .def_static("open", py::overload_cast, std::vector, ResultOrder, std::optional>>(&SOMADataFrame::open)) + .def_static( + "open", + py::overload_cast< + std::string_view, + OpenMode, + std::map, + std::vector, + ResultOrder, + std::optional>>(&SOMADataFrame::open), + "uri"_a, + "mode"_a, + py::kw_only(), + "platform_config"_a = py::dict(), + "column_names"_a = py::none(), + "result_order"_a = ResultOrder::automatic, + "timestamp"_a = py::none()) + .def_static("exists", &SOMADataFrame::exists) .def("reopen", py::overload_cast>>(&SOMADataFrame::open)) .def("close", &SOMADataFrame::close) diff --git a/apis/python/tests/test_dataframe.py b/apis/python/tests/test_dataframe.py index 1412e0f55f..dbc8ef945b 100644 --- a/apis/python/tests/test_dataframe.py +++ b/apis/python/tests/test_dataframe.py @@ -1356,3 +1356,7 @@ def test_enum_extend_past_numerical_limit(tmp_path): with pytest.raises(ValueError): with soma.open(uri, mode="w") as A: A.write(tbl) + + +def test_write_str_empty_ned(tmp_path): + tmp_path.as_posix() diff --git a/apis/python/tests/test_experiment_query.py b/apis/python/tests/test_experiment_query.py index 53ec116837..e92663668f 100644 --- a/apis/python/tests/test_experiment_query.py +++ b/apis/python/tests/test_experiment_query.py @@ -628,23 +628,23 @@ def test_experiment_query_to_anndata_obsm_varm(soma_experiment): ) def test_experiment_query_to_anndata_obsp_varp(soma_experiment): with soma_experiment.axis_query("RNA") as query: - query.to_anndata("raw", obsp_layers=["foo"], varp_layers=["bar"]) - # assert set(ad.obsp.keys()) == {"foo"} - # obsp = ad.obsp["foo"] - # assert isinstance(obsp, np.ndarray) - # assert obsp.shape == (query.n_obs, query.n_obs) - - # assert np.array_equal( - # query.obsp("foo").coos().concat().to_scipy().todense(), obsp - # ) - - # assert set(ad.varp.keys()) == {"bar"} - # varp = ad.varp["bar"] - # assert isinstance(varp, np.ndarray) - # assert varp.shape == (query.n_vars, query.n_vars) - # assert np.array_equal( - # query.varp("bar").coos().concat().to_scipy().todense(), varp - # ) + ad = query.to_anndata("raw", obsp_layers=["foo"], varp_layers=["bar"]) + assert set(ad.obsp.keys()) == {"foo"} + obsp = ad.obsp["foo"] + assert isinstance(obsp, np.ndarray) + assert obsp.shape == (query.n_obs, query.n_obs) + + assert np.array_equal( + query.obsp("foo").coos().concat().to_scipy().todense(), obsp + ) + + assert set(ad.varp.keys()) == {"bar"} + varp = ad.varp["bar"] + assert isinstance(varp, np.ndarray) + assert varp.shape == (query.n_vars, query.n_vars) + assert np.array_equal( + query.varp("bar").coos().concat().to_scipy().todense(), varp + ) def test_axis_query(): diff --git a/libtiledbsoma/test/test_indexer.py b/apis/python/tests/test_indexer.py similarity index 94% rename from libtiledbsoma/test/test_indexer.py rename to apis/python/tests/test_indexer.py index 3bbd9ec520..c2acb53fd7 100644 --- a/libtiledbsoma/test/test_indexer.py +++ b/apis/python/tests/test_indexer.py @@ -1,14 +1,10 @@ import numpy as np import pandas as pd -import tiledb from tiledbsoma.options import SOMATileDBContext from tiledbsoma.options._soma_tiledb_context import _validate_soma_tiledb_context -from tiledbsoma.pytiledbsoma import config_logging from tiledbsoma.utils import build_index -config_logging("debug") - def indexer_test(keys: np.array, lookups: np.array, fail: bool): if fail: @@ -19,7 +15,7 @@ def indexer_test(keys: np.array, lookups: np.array, fail: bool): def indexer_test_fail(keys: np.array, lookups: np.array): try: - context = _validate_soma_tiledb_context(SOMATileDBContext(tiledb.default_ctx())) + context = _validate_soma_tiledb_context(SOMATileDBContext()) index = build_index(keys, context) index.get_indexer(lookups) raise AssertionError("should have failed") @@ -35,7 +31,7 @@ def indexer_test_fail(keys: np.array, lookups: np.array): def indexer_test_pass(keys: np.array, lookups: np.array): - context = _validate_soma_tiledb_context(SOMATileDBContext(tiledb.default_ctx())) + context = _validate_soma_tiledb_context(SOMATileDBContext()) indexer = build_index(keys, context) results = indexer.get_indexer(lookups) panda_indexer = pd.Index(keys) diff --git a/libtiledbsoma/test/test_query_condition.py b/apis/python/tests/test_query_condition.py similarity index 98% rename from libtiledbsoma/test/test_query_condition.py rename to apis/python/tests/test_query_condition.py index b6a070df2c..3fad4f47fa 100644 --- a/libtiledbsoma/test/test_query_condition.py +++ b/apis/python/tests/test_query_condition.py @@ -13,7 +13,7 @@ VERBOSE = False TEST_DIR = os.path.dirname(__file__) -SOMA_URI = f"{TEST_DIR}/../../test/soco/pbmc3k_processed" +SOMA_URI = f"{TEST_DIR}/../../../test/soco/pbmc3k_processed" if VERBOSE: clib.config_logging("debug") @@ -228,6 +228,7 @@ def test_eval_error_conditions(malformed_condition): with pytest.raises(SOMAError): # test function directly for codecov qc.init_query_condition(schema, []) + qc.init_query_condition(schema, ["bad_query_attr"]) if __name__ == "__main__": diff --git a/libtiledbsoma/test/test_simple.py b/apis/python/tests/test_simple.py similarity index 100% rename from libtiledbsoma/test/test_simple.py rename to apis/python/tests/test_simple.py diff --git a/libtiledbsoma/test/test_soma_array.py b/apis/python/tests/test_soma_array.py similarity index 99% rename from libtiledbsoma/test/test_soma_array.py rename to apis/python/tests/test_soma_array.py index f6b9ef4519..e090cada8e 100644 --- a/libtiledbsoma/test/test_soma_array.py +++ b/apis/python/tests/test_soma_array.py @@ -9,7 +9,7 @@ VERBOSE = False TEST_DIR = os.path.dirname(__file__) -SOMA_URI = f"{TEST_DIR}/../../test/soco/pbmc3k_processed" +SOMA_URI = f"{TEST_DIR}/../../../test/soco/pbmc3k_processed" if VERBOSE: clib.config_logging("debug") diff --git a/libtiledbsoma/src/external/khash/khash.h b/libtiledbsoma/src/external/khash/khash.h index f75f3474c1..9142c5df5a 100644 --- a/libtiledbsoma/src/external/khash/khash.h +++ b/libtiledbsoma/src/external/khash/khash.h @@ -29,35 +29,35 @@ #include "khash.h" KHASH_MAP_INIT_INT(32, char) int main() { - int ret, is_missing; - khiter_t k; - khash_t(32) *h = kh_init(32); - k = kh_put(32, h, 5, &ret); - kh_value(h, k) = 10; - k = kh_get(32, h, 10); - is_missing = (k == kh_end(h)); - k = kh_get(32, h, 5); - kh_del(32, h, k); - for (k = kh_begin(h); k != kh_end(h); ++k) - if (kh_exist(h, k)) kh_value(h, k) = 1; - kh_destroy(32, h); - return 0; + int ret, is_missing; + khiter_t k; + khash_t(32) *h = kh_init(32); + k = kh_put(32, h, 5, &ret); + kh_value(h, k) = 10; + k = kh_get(32, h, 10); + is_missing = (k == kh_end(h)); + k = kh_get(32, h, 5); + kh_del(32, h, k); + for (k = kh_begin(h); k != kh_end(h); ++k) + if (kh_exist(h, k)) kh_value(h, k) = 1; + kh_destroy(32, h); + return 0; } */ /* 2013-05-02 (0.2.8): - * Use quadratic probing. When the capacity is power of 2, stepping function - i*(i+1)/2 guarantees to traverse each bucket. It is better than double - hashing on cache performance and is more robust than linear probing. + * Use quadratic probing. When the capacity is power of 2, stepping + function i*(i+1)/2 guarantees to traverse each bucket. It is better than + double hashing on cache performance and is more robust than linear probing. - In theory, double hashing should be more robust than quadratic probing. - However, my implementation is probably not for large hash tables, because - the second hash function is closely tied to the first hash function, - which reduce the effectiveness of double hashing. + In theory, double hashing should be more robust than quadratic + probing. However, my implementation is probably not for large hash tables, + because the second hash function is closely tied to the first hash function, + which reduce the effectiveness of double hashing. - Reference: http://research.cs.vt.edu/AVresearch/hashing/quadratic.php + Reference: http://research.cs.vt.edu/AVresearch/hashing/quadratic.php 2011-12-29 (0.2.7): @@ -65,18 +65,18 @@ int main() { 2011-09-16 (0.2.6): - * The capacity is a power of 2. This seems to dramatically improve the - speed for simple keys. Thank Zilong Tan for the suggestion. Reference: + * The capacity is a power of 2. This seems to dramatically improve the + speed for simple keys. Thank Zilong Tan for the suggestion. Reference: - - http://code.google.com/p/ulib/ - - http://nothings.org/computer/judy/ + - http://code.google.com/p/ulib/ + - http://nothings.org/computer/judy/ - * Allow to optionally use linear probing which usually has better - performance for random input. Double hashing is still the default as it - is more robust to certain non-random input. + * Allow to optionally use linear probing which usually has better + performance for random input. Double hashing is still the default as + it is more robust to certain non-random input. - * Added Wang's integer hash function (not used by default). This hash - function is more robust to certain non-random input. + * Added Wang's integer hash function (not used by default). This hash + function is more robust to certain non-random input. 2011-02-14 (0.2.5): @@ -88,32 +88,31 @@ int main() { 2008-09-19 (0.2.3): - * Corrected the example - * Improved interfaces + * Corrected the example + * Improved interfaces 2008-09-11 (0.2.2): - * Improved speed a little in kh_put() + * Improved speed a little in kh_put() 2008-09-10 (0.2.1): - * Added kh_clear() - * Fixed a compiling error + * Added kh_clear() + * Fixed a compiling error 2008-09-02 (0.2.0): - * Changed to token concatenation which increases flexibility. + * Changed to token concatenation which increases flexibility. 2008-08-31 (0.1.2): - * Fixed a bug in kh_get(), which has not been tested previously. + * Fixed a bug in kh_get(), which has not been tested previously. 2008-08-31 (0.1.1): - * Added destructor + * Added destructor */ - #ifndef __AC_KHASH_H #define __AC_KHASH_H @@ -125,9 +124,9 @@ int main() { #define AC_VERSION_KHASH_H "0.2.8" +#include #include #include -#include /* compiler specific configuration */ @@ -152,8 +151,9 @@ typedef unsigned long long khint64_t; #endif /* kh_inline */ #ifndef klib_unused -#if (defined __clang__ && __clang_major__ >= 3) || (defined __GNUC__ && __GNUC__ >= 3) -#define klib_unused __attribute__ ((__unused__)) +#if (defined __clang__ && __clang_major__ >= 3) || \ + (defined __GNUC__ && __GNUC__ >= 3) +#define klib_unused __attribute__((__unused__)) #else #define klib_unused #endif @@ -162,28 +162,38 @@ typedef unsigned long long khint64_t; typedef khint32_t khint_t; typedef khint_t khiter_t; -#define __ac_isempty(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&2) -#define __ac_isdel(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&1) -#define __ac_iseither(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&3) -#define __ac_set_isdel_false(flag, i) (flag[i>>4]&=~(1ul<<((i&0xfU)<<1))) -#define __ac_set_isempty_false(flag, i) (flag[i>>4]&=~(2ul<<((i&0xfU)<<1))) -#define __ac_set_isboth_false(flag, i) (flag[i>>4]&=~(3ul<<((i&0xfU)<<1))) -#define __ac_set_isdel_true(flag, i) (flag[i>>4]|=1ul<<((i&0xfU)<<1)) +#define __ac_isempty(flag, i) ((flag[i >> 4] >> ((i & 0xfU) << 1)) & 2) +#define __ac_isdel(flag, i) ((flag[i >> 4] >> ((i & 0xfU) << 1)) & 1) +#define __ac_iseither(flag, i) ((flag[i >> 4] >> ((i & 0xfU) << 1)) & 3) +#define __ac_set_isdel_false(flag, i) \ + (flag[i >> 4] &= ~(1ul << ((i & 0xfU) << 1))) +#define __ac_set_isempty_false(flag, i) \ + (flag[i >> 4] &= ~(2ul << ((i & 0xfU) << 1))) +#define __ac_set_isboth_false(flag, i) \ + (flag[i >> 4] &= ~(3ul << ((i & 0xfU) << 1))) +#define __ac_set_isdel_true(flag, i) (flag[i >> 4] |= 1ul << ((i & 0xfU) << 1)) -#define __ac_fsize(m) ((m) < 16? 1 : (m)>>4) +#define __ac_fsize(m) ((m) < 16 ? 1 : (m) >> 4) #ifndef kroundup32 -#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) +#define kroundup32(x) \ + (--(x), \ + (x) |= (x) >> 1, \ + (x) |= (x) >> 2, \ + (x) |= (x) >> 4, \ + (x) |= (x) >> 8, \ + (x) |= (x) >> 16, \ + ++(x)) #endif #ifndef kcalloc -#define kcalloc(N,Z) calloc(N,Z) +#define kcalloc(N, Z) calloc(N, Z) #endif #ifndef kmalloc #define kmalloc(Z) malloc(Z) #endif #ifndef krealloc -#define krealloc(P,Z) realloc(P,Z) +#define krealloc(P, Z) realloc(P, Z) #endif #ifndef kfree #define kfree(P) free(P) @@ -191,179 +201,240 @@ typedef khint_t khiter_t; static const double __ac_HASH_UPPER = 0.77; -#define __KHASH_TYPE(name, khkey_t, khval_t) \ - typedef struct kh_##name##_s { \ - khint_t n_buckets, size, n_occupied, upper_bound; \ - khint32_t *flags; \ - khkey_t *keys; \ - khval_t *vals; \ - } kh_##name##_t; - -#define __KHASH_PROTOTYPES(name, khkey_t, khval_t) \ - extern kh_##name##_t *kh_init_##name(void); \ - extern void kh_destroy_##name(kh_##name##_t *h); \ - extern void kh_clear_##name(kh_##name##_t *h); \ - extern khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key); \ - extern int kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets); \ - extern khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret); \ - extern void kh_del_##name(kh_##name##_t *h, khint_t x); - -#define __KHASH_IMPL(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \ - SCOPE kh_##name##_t *kh_init_##name(void) { \ - return (kh_##name##_t*)kcalloc(1, sizeof(kh_##name##_t)); \ - } \ - SCOPE void kh_destroy_##name(kh_##name##_t *h) \ - { \ - if (h) { \ - kfree((void *)h->keys); kfree(h->flags); \ - kfree((void *)h->vals); \ - kfree(h); \ - } \ - } \ - SCOPE void kh_clear_##name(kh_##name##_t *h) \ - { \ - if (h && h->flags) { \ - memset(h->flags, 0xaa, __ac_fsize(h->n_buckets) * sizeof(khint32_t)); \ - h->size = h->n_occupied = 0; \ - } \ - } \ - SCOPE khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key) \ - { \ - if (h->n_buckets) { \ - khint_t k, i, last, mask, step = 0; \ - mask = h->n_buckets - 1; \ - k = __hash_func(key); i = k & mask; \ - last = i; \ - while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \ - i = (i + (++step)) & mask; \ - if (i == last) return h->n_buckets; \ - } \ - return __ac_iseither(h->flags, i)? h->n_buckets : i; \ - } else return 0; \ - } \ - SCOPE int kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets) \ - { /* This function uses 0.25*n_buckets bytes of working space instead of [sizeof(key_t+val_t)+.25]*n_buckets. */ \ - khint32_t *new_flags = 0; \ - khint_t j = 1; \ - { \ - kroundup32(new_n_buckets); \ - if (new_n_buckets < 4) new_n_buckets = 4; \ - if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER + 0.5)) j = 0; /* requested size is too small */ \ - else { /* hash table size to be changed (shrink or expand); rehash */ \ - new_flags = (khint32_t*)kmalloc(__ac_fsize(new_n_buckets) * sizeof(khint32_t)); \ - if (!new_flags) return -1; \ - memset(new_flags, 0xaa, __ac_fsize(new_n_buckets) * sizeof(khint32_t)); \ - if (h->n_buckets < new_n_buckets) { /* expand */ \ - khkey_t *new_keys = (khkey_t*)krealloc((void *)h->keys, new_n_buckets * sizeof(khkey_t)); \ - if (!new_keys) { kfree(new_flags); return -1; } \ - h->keys = new_keys; \ - if (kh_is_map) { \ - khval_t *new_vals = (khval_t*)krealloc((void *)h->vals, new_n_buckets * sizeof(khval_t)); \ - if (!new_vals) { kfree(new_flags); return -1; } \ - h->vals = new_vals; \ - } \ - } /* otherwise shrink */ \ - } \ - } \ - if (j) { /* rehashing is needed */ \ - for (j = 0; j != h->n_buckets; ++j) { \ - if (__ac_iseither(h->flags, j) == 0) { \ - khkey_t key = h->keys[j]; \ - khval_t val; \ - khint_t new_mask; \ - new_mask = new_n_buckets - 1; \ - if (kh_is_map) val = h->vals[j]; \ - __ac_set_isdel_true(h->flags, j); \ - while (1) { /* kick-out process; sort of like in Cuckoo hashing */ \ - khint_t k, i, step = 0; \ - k = __hash_func(key); \ - i = k & new_mask; \ - while (!__ac_isempty(new_flags, i)) i = (i + (++step)) & new_mask; \ - __ac_set_isempty_false(new_flags, i); \ - if (i < h->n_buckets && __ac_iseither(h->flags, i) == 0) { /* kick out the existing element */ \ - { khkey_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } \ - if (kh_is_map) { khval_t tmp = h->vals[i]; h->vals[i] = val; val = tmp; } \ - __ac_set_isdel_true(h->flags, i); /* mark it as deleted in the old hash table */ \ - } else { /* write the element and jump out of the loop */ \ - h->keys[i] = key; \ - if (kh_is_map) h->vals[i] = val; \ - break; \ - } \ - } \ - } \ - } \ - if (h->n_buckets > new_n_buckets) { /* shrink the hash table */ \ - h->keys = (khkey_t*)krealloc((void *)h->keys, new_n_buckets * sizeof(khkey_t)); \ - if (kh_is_map) h->vals = (khval_t*)krealloc((void *)h->vals, new_n_buckets * sizeof(khval_t)); \ - } \ - kfree(h->flags); /* free the working space */ \ - h->flags = new_flags; \ - h->n_buckets = new_n_buckets; \ - h->n_occupied = h->size; \ - h->upper_bound = (khint_t)(h->n_buckets * __ac_HASH_UPPER + 0.5); \ - } \ - return 0; \ - } \ - SCOPE khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret) \ - { \ - khint_t x; \ - if (h->n_occupied >= h->upper_bound) { /* update the hash table */ \ - if (h->n_buckets > (h->size<<1)) { \ - if (kh_resize_##name(h, h->n_buckets - 1) < 0) { /* clear "deleted" elements */ \ - *ret = -1; return h->n_buckets; \ - } \ - } else if (kh_resize_##name(h, h->n_buckets + 1) < 0) { /* expand the hash table */ \ - *ret = -1; return h->n_buckets; \ - } \ - } /* TODO: to implement automatically shrinking; resize() already support shrinking */ \ - { \ - khint_t k, i, site, last, mask = h->n_buckets - 1, step = 0; \ - x = site = h->n_buckets; k = __hash_func(key); i = k & mask; \ - if (__ac_isempty(h->flags, i)) x = i; /* for speed up */ \ - else { \ - last = i; \ - while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \ - if (__ac_isdel(h->flags, i)) site = i; \ - i = (i + (++step)) & mask; \ - if (i == last) { x = site; break; } \ - } \ - if (x == h->n_buckets) { \ - if (__ac_isempty(h->flags, i) && site != h->n_buckets) x = site; \ - else x = i; \ - } \ - } \ - } \ - if (__ac_isempty(h->flags, x)) { /* not present at all */ \ - h->keys[x] = key; \ - __ac_set_isboth_false(h->flags, x); \ - ++h->size; ++h->n_occupied; \ - *ret = 1; \ - } else if (__ac_isdel(h->flags, x)) { /* deleted */ \ - h->keys[x] = key; \ - __ac_set_isboth_false(h->flags, x); \ - ++h->size; \ - *ret = 2; \ - } else *ret = 0; /* Don't touch h->keys[x] if present and not deleted */ \ - return x; \ - } \ - SCOPE void kh_del_##name(kh_##name##_t *h, khint_t x) \ - { \ - if (x != h->n_buckets && !__ac_iseither(h->flags, x)) { \ - __ac_set_isdel_true(h->flags, x); \ - --h->size; \ - } \ - } - -#define KHASH_DECLARE(name, khkey_t, khval_t) \ - __KHASH_TYPE(name, khkey_t, khval_t) \ - __KHASH_PROTOTYPES(name, khkey_t, khval_t) - -#define KHASH_INIT2(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \ - __KHASH_TYPE(name, khkey_t, khval_t) \ - __KHASH_IMPL(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) - -#define KHASH_INIT(name, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \ - KHASH_INIT2(name, static kh_inline klib_unused, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) +#define __KHASH_TYPE(name, khkey_t, khval_t) \ + typedef struct kh_##name##_s { \ + khint_t n_buckets, size, n_occupied, upper_bound; \ + khint32_t* flags; \ + khkey_t* keys; \ + khval_t* vals; \ + } kh_##name##_t; + +#define __KHASH_PROTOTYPES(name, khkey_t, khval_t) \ + extern kh_##name##_t* kh_init_##name(void); \ + extern void kh_destroy_##name(kh_##name##_t* h); \ + extern void kh_clear_##name(kh_##name##_t* h); \ + extern khint_t kh_get_##name(const kh_##name##_t* h, khkey_t key); \ + extern int kh_resize_##name(kh_##name##_t* h, khint_t new_n_buckets); \ + extern khint_t kh_put_##name(kh_##name##_t* h, khkey_t key, int* ret); \ + extern void kh_del_##name(kh_##name##_t* h, khint_t x); + +#define __KHASH_IMPL( \ + name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \ + SCOPE kh_##name##_t* kh_init_##name(void) { \ + return (kh_##name##_t*)kcalloc(1, sizeof(kh_##name##_t)); \ + } \ + SCOPE void kh_destroy_##name(kh_##name##_t* h) { \ + if (h) { \ + kfree((void*)h->keys); \ + kfree(h->flags); \ + kfree((void*)h->vals); \ + kfree(h); \ + } \ + } \ + SCOPE void kh_clear_##name(kh_##name##_t* h) { \ + if (h && h->flags) { \ + memset(h->flags, 0xaa, __ac_fsize(h->n_buckets) * sizeof(khint32_t)); \ + h->size = h->n_occupied = 0; \ + } \ + } \ + SCOPE khint_t kh_get_##name(const kh_##name##_t* h, khkey_t key) { \ + if (h->n_buckets) { \ + khint_t k, i, last, mask, step = 0; \ + mask = h->n_buckets - 1; \ + k = __hash_func(key); \ + i = k & mask; \ + last = i; \ + while (!__ac_isempty(h->flags, i) && \ + (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \ + i = (i + (++step)) & mask; \ + if (i == last) \ + return h->n_buckets; \ + } \ + return __ac_iseither(h->flags, i) ? h->n_buckets : i; \ + } else \ + return 0; \ + } \ + SCOPE int kh_resize_##name( \ + kh_##name##_t* h, \ + khint_t new_n_buckets) { /* This function uses 0.25*n_buckets bytes of \ + working space instead of \ + [sizeof(key_t+val_t)+.25]*n_buckets. */ \ + khint32_t* new_flags = 0; \ + khint_t j = 1; \ + { \ + kroundup32(new_n_buckets); \ + if (new_n_buckets < 4) \ + new_n_buckets = 4; \ + if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER + 0.5)) \ + j = 0; /* requested size is too small */ \ + else { /* hash table size to be changed (shrink or expand); rehash */ \ + new_flags = (khint32_t*)kmalloc( \ + __ac_fsize(new_n_buckets) * sizeof(khint32_t)); \ + if (!new_flags) \ + return -1; \ + memset( \ + new_flags, 0xaa, __ac_fsize(new_n_buckets) * sizeof(khint32_t)); \ + if (h->n_buckets < new_n_buckets) { /* expand */ \ + khkey_t* new_keys = (khkey_t*)krealloc( \ + (void*)h->keys, new_n_buckets * sizeof(khkey_t)); \ + if (!new_keys) { \ + kfree(new_flags); \ + return -1; \ + } \ + h->keys = new_keys; \ + if (kh_is_map) { \ + khval_t* new_vals = (khval_t*)krealloc( \ + (void*)h->vals, new_n_buckets * sizeof(khval_t)); \ + if (!new_vals) { \ + kfree(new_flags); \ + return -1; \ + } \ + h->vals = new_vals; \ + } \ + } /* otherwise shrink */ \ + } \ + } \ + if (j) { /* rehashing is needed */ \ + for (j = 0; j != h->n_buckets; ++j) { \ + if (__ac_iseither(h->flags, j) == 0) { \ + khkey_t key = h->keys[j]; \ + khval_t val; \ + khint_t new_mask; \ + new_mask = new_n_buckets - 1; \ + if (kh_is_map) \ + val = h->vals[j]; \ + __ac_set_isdel_true(h->flags, j); \ + while (1) { /* kick-out process; sort of like in Cuckoo hashing */ \ + khint_t k, i, step = 0; \ + k = __hash_func(key); \ + i = k & new_mask; \ + while (!__ac_isempty(new_flags, i)) \ + i = (i + (++step)) & new_mask; \ + __ac_set_isempty_false(new_flags, i); \ + if (i < h->n_buckets && \ + __ac_iseither(h->flags, i) == \ + 0) { /* kick out the existing element */ \ + { \ + khkey_t tmp = h->keys[i]; \ + h->keys[i] = key; \ + key = tmp; \ + } \ + if (kh_is_map) { \ + khval_t tmp = h->vals[i]; \ + h->vals[i] = val; \ + val = tmp; \ + } \ + __ac_set_isdel_true( \ + h->flags, i); /* mark it as deleted in the old hash table */ \ + } else { /* write the element and jump out of the loop */ \ + h->keys[i] = key; \ + if (kh_is_map) \ + h->vals[i] = val; \ + break; \ + } \ + } \ + } \ + } \ + if (h->n_buckets > new_n_buckets) { /* shrink the hash table */ \ + h->keys = (khkey_t*)krealloc( \ + (void*)h->keys, new_n_buckets * sizeof(khkey_t)); \ + if (kh_is_map) \ + h->vals = (khval_t*)krealloc( \ + (void*)h->vals, new_n_buckets * sizeof(khval_t)); \ + } \ + kfree(h->flags); /* free the working space */ \ + h->flags = new_flags; \ + h->n_buckets = new_n_buckets; \ + h->n_occupied = h->size; \ + h->upper_bound = (khint_t)(h->n_buckets * __ac_HASH_UPPER + 0.5); \ + } \ + return 0; \ + } \ + SCOPE khint_t kh_put_##name(kh_##name##_t* h, khkey_t key, int* ret) { \ + khint_t x; \ + if (h->n_occupied >= h->upper_bound) { /* update the hash table */ \ + if (h->n_buckets > (h->size << 1)) { \ + if (kh_resize_##name(h, h->n_buckets - 1) < \ + 0) { /* clear "deleted" elements */ \ + *ret = -1; \ + return h->n_buckets; \ + } \ + } else if (kh_resize_##name(h, h->n_buckets + 1) < 0) { /* expand the \ + hash table */ \ + *ret = -1; \ + return h->n_buckets; \ + } \ + } /* TODO: to implement automatically shrinking; resize() already support \ + shrinking */ \ + { \ + khint_t k, i, site, last, mask = h->n_buckets - 1, step = 0; \ + x = site = h->n_buckets; \ + k = __hash_func(key); \ + i = k & mask; \ + if (__ac_isempty(h->flags, i)) \ + x = i; /* for speed up */ \ + else { \ + last = i; \ + while (!__ac_isempty(h->flags, i) && \ + (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \ + if (__ac_isdel(h->flags, i)) \ + site = i; \ + i = (i + (++step)) & mask; \ + if (i == last) { \ + x = site; \ + break; \ + } \ + } \ + if (x == h->n_buckets) { \ + if (__ac_isempty(h->flags, i) && site != h->n_buckets) \ + x = site; \ + else \ + x = i; \ + } \ + } \ + } \ + if (__ac_isempty(h->flags, x)) { /* not present at all */ \ + h->keys[x] = key; \ + __ac_set_isboth_false(h->flags, x); \ + ++h->size; \ + ++h->n_occupied; \ + *ret = 1; \ + } else if (__ac_isdel(h->flags, x)) { /* deleted */ \ + h->keys[x] = key; \ + __ac_set_isboth_false(h->flags, x); \ + ++h->size; \ + *ret = 2; \ + } else \ + *ret = 0; /* Don't touch h->keys[x] if present and not deleted */ \ + return x; \ + } \ + SCOPE void kh_del_##name(kh_##name##_t* h, khint_t x) { \ + if (x != h->n_buckets && !__ac_iseither(h->flags, x)) { \ + __ac_set_isdel_true(h->flags, x); \ + --h->size; \ + } \ + } + +#define KHASH_DECLARE(name, khkey_t, khval_t) \ + __KHASH_TYPE(name, khkey_t, khval_t) \ + __KHASH_PROTOTYPES(name, khkey_t, khval_t) + +#define KHASH_INIT2( \ + name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \ + __KHASH_TYPE(name, khkey_t, khval_t) \ + __KHASH_IMPL( \ + name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) + +#define KHASH_INIT( \ + name, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \ + KHASH_INIT2( \ + name, \ + static kh_inline klib_unused, \ + khkey_t, \ + khval_t, \ + kh_is_map, \ + __hash_func, \ + __hash_equal) /* --- BEGIN OF HASH FUNCTIONS --- */ @@ -382,7 +453,7 @@ static const double __ac_HASH_UPPER = 0.77; @param key The integer [khint64_t] @return The hash value [khint_t] */ -#define kh_int64_hash_func(key) (khint32_t)((key)>>33^(key)^(key)<<11) +#define kh_int64_hash_func(key) (khint32_t)((key) >> 33 ^ (key) ^ (key) << 11) /*! @function @abstract 64-bit integer comparison function */ @@ -392,11 +463,12 @@ static const double __ac_HASH_UPPER = 0.77; @param s Pointer to a null terminated string @return The hash value */ -static kh_inline khint_t __ac_X31_hash_string(const char *s) -{ - khint_t h = (khint_t)*s; - if (h) for (++s ; *s; ++s) h = (h << 5) - h + (khint_t)*s; - return h; +static kh_inline khint_t __ac_X31_hash_string(const char* s) { + khint_t h = (khint_t)*s; + if (h) + for (++s; *s; ++s) + h = (h << 5) - h + (khint_t)*s; + return h; } /*! @function @abstract Another interface to const char* hash function @@ -409,15 +481,14 @@ static kh_inline khint_t __ac_X31_hash_string(const char *s) */ #define kh_str_hash_equal(a, b) (strcmp(a, b) == 0) -static kh_inline khint_t __ac_Wang_hash(khint_t key) -{ - key += ~(key << 15); - key ^= (key >> 10); - key += (key << 3); - key ^= (key >> 6); - key += ~(key << 11); - key ^= (key >> 16); - return key; +static kh_inline khint_t __ac_Wang_hash(khint_t key) { + key += ~(key << 15); + key ^= (key >> 10); + key += (key << 3); + key ^= (key >> 6); + key += ~(key << 11); + key ^= (key >> 16); + return key; } #define kh_int_hash_func2(key) __ac_Wang_hash((khint_t)key) @@ -468,7 +539,7 @@ static kh_inline khint_t __ac_Wang_hash(khint_t key) @param r Extra return code: -1 if the operation failed; 0 if the key is present in the hash table; 1 if the bucket is empty (never used); 2 if the element in - the bucket has been deleted [int*] + the bucket has been deleted [int*] @return Iterator to the inserted element [khint_t] */ #define kh_put(name, h, k, r) kh_put_##name(h, k, r) @@ -478,7 +549,8 @@ static kh_inline khint_t __ac_Wang_hash(khint_t key) @param name Name of the hash table [symbol] @param h Pointer to the hash table [khash_t(name)*] @param k Key [type of keys] - @return Iterator to the found element, or kh_end(h) if the element is absent [khint_t] + @return Iterator to the found element, or kh_end(h) if the element is + absent [khint_t] */ #define kh_get(name, h, k) kh_get_##name(h, k) @@ -555,13 +627,17 @@ static kh_inline khint_t __ac_Wang_hash(khint_t key) @param vvar Variable to which value will be assigned @param code Block of code to execute */ -#define kh_foreach(h, kvar, vvar, code) { khint_t __i; \ - for (__i = kh_begin(h); __i != kh_end(h); ++__i) { \ - if (!kh_exist(h,__i)) continue; \ - (kvar) = kh_key(h,__i); \ - (vvar) = kh_val(h,__i); \ - code; \ - } } +#define kh_foreach(h, kvar, vvar, code) \ + { \ + khint_t __i; \ + for (__i = kh_begin(h); __i != kh_end(h); ++__i) { \ + if (!kh_exist(h, __i)) \ + continue; \ + (kvar) = kh_key(h, __i); \ + (vvar) = kh_val(h, __i); \ + code; \ + } \ + } /*! @function @abstract Iterate over the values in the hash table @@ -569,12 +645,16 @@ static kh_inline khint_t __ac_Wang_hash(khint_t key) @param vvar Variable to which value will be assigned @param code Block of code to execute */ -#define kh_foreach_value(h, vvar, code) { khint_t __i; \ - for (__i = kh_begin(h); __i != kh_end(h); ++__i) { \ - if (!kh_exist(h,__i)) continue; \ - (vvar) = kh_val(h,__i); \ - code; \ - } } +#define kh_foreach_value(h, vvar, code) \ + { \ + khint_t __i; \ + for (__i = kh_begin(h); __i != kh_end(h); ++__i) { \ + if (!kh_exist(h, __i)) \ + continue; \ + (vvar) = kh_val(h, __i); \ + code; \ + } \ + } /* More convenient interfaces */ @@ -582,46 +662,47 @@ static kh_inline khint_t __ac_Wang_hash(khint_t key) @abstract Instantiate a hash set containing integer keys @param name Name of the hash table [symbol] */ -#define KHASH_SET_INIT_INT(name) \ - KHASH_INIT(name, khint32_t, char, 0, kh_int_hash_func, kh_int_hash_equal) +#define KHASH_SET_INIT_INT(name) \ + KHASH_INIT(name, khint32_t, char, 0, kh_int_hash_func, kh_int_hash_equal) /*! @function @abstract Instantiate a hash map containing integer keys @param name Name of the hash table [symbol] @param khval_t Type of values [type] */ -#define KHASH_MAP_INIT_INT(name, khval_t) \ - KHASH_INIT(name, khint32_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal) +#define KHASH_MAP_INIT_INT(name, khval_t) \ + KHASH_INIT(name, khint32_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal) /*! @function @abstract Instantiate a hash set containing 64-bit integer keys @param name Name of the hash table [symbol] */ -#define KHASH_SET_INIT_INT64(name) \ - KHASH_INIT(name, khint64_t, char, 0, kh_int64_hash_func, kh_int64_hash_equal) +#define KHASH_SET_INIT_INT64(name) \ + KHASH_INIT(name, khint64_t, char, 0, kh_int64_hash_func, kh_int64_hash_equal) /*! @function @abstract Instantiate a hash map containing 64-bit integer keys @param name Name of the hash table [symbol] @param khval_t Type of values [type] */ -#define KHASH_MAP_INIT_INT64(name, khval_t) \ - KHASH_INIT(name, khint64_t, khval_t, 1, kh_int64_hash_func, kh_int64_hash_equal) +#define KHASH_MAP_INIT_INT64(name, khval_t) \ + KHASH_INIT( \ + name, khint64_t, khval_t, 1, kh_int64_hash_func, kh_int64_hash_equal) -typedef const char *kh_cstr_t; +typedef const char* kh_cstr_t; /*! @function @abstract Instantiate a hash map containing const char* keys @param name Name of the hash table [symbol] */ -#define KHASH_SET_INIT_STR(name) \ - KHASH_INIT(name, kh_cstr_t, char, 0, kh_str_hash_func, kh_str_hash_equal) +#define KHASH_SET_INIT_STR(name) \ + KHASH_INIT(name, kh_cstr_t, char, 0, kh_str_hash_func, kh_str_hash_equal) /*! @function @abstract Instantiate a hash map containing const char* keys @param name Name of the hash table [symbol] @param khval_t Type of values [type] */ -#define KHASH_MAP_INIT_STR(name, khval_t) \ - KHASH_INIT(name, kh_cstr_t, khval_t, 1, kh_str_hash_func, kh_str_hash_equal) +#define KHASH_MAP_INIT_STR(name, khval_t) \ + KHASH_INIT(name, kh_cstr_t, khval_t, 1, kh_str_hash_func, kh_str_hash_equal) #endif /* __AC_KHASH_H */ diff --git a/libtiledbsoma/src/external/khash/khashl.h b/libtiledbsoma/src/external/khash/khashl.h index 93ce31354c..dcbffa704b 100644 --- a/libtiledbsoma/src/external/khash/khashl.h +++ b/libtiledbsoma/src/external/khash/khashl.h @@ -28,9 +28,9 @@ #define AC_VERSION_KHASHL_H "0.1" +#include #include #include -#include /************************************ * Compiler specific configurations * @@ -57,8 +57,9 @@ typedef int64_t khint64_t; #endif /* kh_inline */ #ifndef klib_unused -#if (defined __clang__ && __clang_major__ >= 3) || (defined __GNUC__ && __GNUC__ >= 3) -#define klib_unused __attribute__ ((__unused__)) +#if (defined __clang__ && __clang_major__ >= 3) || \ + (defined __GNUC__ && __GNUC__ >= 3) +#define klib_unused __attribute__((__unused__)) #else #define klib_unused #endif @@ -73,13 +74,13 @@ typedef khint32_t khint_t; ******************/ #ifndef kcalloc -#define kcalloc(N,Z) calloc(N,Z) +#define kcalloc(N, Z) calloc(N, Z) #endif #ifndef kmalloc #define kmalloc(Z) malloc(Z) #endif #ifndef krealloc -#define krealloc(P,Z) realloc(P,Z) +#define krealloc(P, Z) realloc(P, Z) #endif #ifndef kfree #define kfree(P) free(P) @@ -89,216 +90,364 @@ typedef khint32_t khint_t; * Simple private functions * ****************************/ -#define __kh_used(flag, i) (flag[i>>5] >> (i&0x1fU) & 1U) -#define __kh_set_used(flag, i) (flag[i>>5] |= 1U<<(i&0x1fU)) -#define __kh_set_unused(flag, i) (flag[i>>5] &= ~(1U<<(i&0x1fU))) +#define __kh_used(flag, i) (flag[i >> 5] >> (i & 0x1fU) & 1U) +#define __kh_set_used(flag, i) (flag[i >> 5] |= 1U << (i & 0x1fU)) +#define __kh_set_unused(flag, i) (flag[i >> 5] &= ~(1U << (i & 0x1fU))) -#define __kh_fsize(m) ((m) < 32? 1 : (m)>>5) +#define __kh_fsize(m) ((m) < 32 ? 1 : (m) >> 5) -static kh_inline khint_t __kh_h2b(khint_t hash, khint_t bits) { return hash * 2654435769U >> (32 - bits); } +static kh_inline khint_t __kh_h2b(khint_t hash, khint_t bits) { + return hash * 2654435769U >> (32 - bits); +} /******************* * Hash table base * *******************/ #define __KHASHL_TYPE(HType, khkey_t) \ - typedef struct HType { \ - khint_t bits, count; \ - khint32_t *used; \ - khkey_t *keys; \ - } HType; - -#define __KHASHL_PROTOTYPES(HType, prefix, khkey_t) \ - extern HType *prefix##_init(void); \ - extern void prefix##_destroy(HType *h); \ - extern void prefix##_clear(HType *h); \ - extern khint_t prefix##_getp(const HType *h, const khkey_t *key); \ - extern int prefix##_resize(HType *h, khint_t new_n_buckets); \ - extern khint_t prefix##_putp(HType *h, const khkey_t *key, int *absent); \ - extern void prefix##_del(HType *h, khint_t k); - -#define __KHASHL_IMPL_BASIC(SCOPE, HType, prefix) \ - SCOPE HType *prefix##_init(void) { \ - return (HType*)kcalloc(1, sizeof(HType)); \ - } \ - SCOPE void prefix##_destroy(HType *h) { \ - if (!h) return; \ - kfree((void *)h->keys); kfree(h->used); \ - kfree(h); \ - } \ - SCOPE void prefix##_clear(HType *h) { \ - if (h && h->used) { \ - uint32_t n_buckets = 1U << h->bits; \ - memset(h->used, 0, __kh_fsize(n_buckets) * sizeof(khint32_t)); \ - h->count = 0; \ - } \ - } + typedef struct HType { \ + khint_t bits, count; \ + khint32_t* used; \ + khkey_t* keys; \ + } HType; + +#define __KHASHL_PROTOTYPES(HType, prefix, khkey_t) \ + extern HType* prefix##_init(void); \ + extern void prefix##_destroy(HType* h); \ + extern void prefix##_clear(HType* h); \ + extern khint_t prefix##_getp(const HType* h, const khkey_t* key); \ + extern int prefix##_resize(HType* h, khint_t new_n_buckets); \ + extern khint_t prefix##_putp(HType* h, const khkey_t* key, int* absent); \ + extern void prefix##_del(HType* h, khint_t k); + +#define __KHASHL_IMPL_BASIC(SCOPE, HType, prefix) \ + SCOPE HType* prefix##_init(void) { \ + return (HType*)kcalloc(1, sizeof(HType)); \ + } \ + SCOPE void prefix##_destroy(HType* h) { \ + if (!h) \ + return; \ + kfree((void*)h->keys); \ + kfree(h->used); \ + kfree(h); \ + } \ + SCOPE void prefix##_clear(HType* h) { \ + if (h && h->used) { \ + uint32_t n_buckets = 1U << h->bits; \ + memset(h->used, 0, __kh_fsize(n_buckets) * sizeof(khint32_t)); \ + h->count = 0; \ + } \ + } #define __KHASHL_IMPL_GET(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ - SCOPE khint_t prefix##_getp(const HType *h, const khkey_t *key) { \ - khint_t i, last, n_buckets, mask; \ - if (h->keys == 0) return 0; \ - n_buckets = 1U << h->bits; \ - mask = n_buckets - 1U; \ - i = last = __kh_h2b(__hash_fn(*key), h->bits); \ - while (__kh_used(h->used, i) && !__hash_eq(h->keys[i], *key)) { \ - i = (i + 1U) & mask; \ - if (i == last) return n_buckets; \ - } \ - return !__kh_used(h->used, i)? n_buckets : i; \ - } \ - SCOPE khint_t prefix##_get(const HType *h, khkey_t key) { return prefix##_getp(h, &key); } - -#define __KHASHL_IMPL_RESIZE(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ - SCOPE int prefix##_resize(HType *h, khint_t new_n_buckets) { \ - khint32_t *new_used = 0; \ - khint_t j = 0, x = new_n_buckets, n_buckets, new_bits, new_mask; \ - while ((x >>= 1) != 0) ++j; \ - if (new_n_buckets & (new_n_buckets - 1)) ++j; \ - new_bits = j > 2? j : 2; \ - new_n_buckets = 1U << new_bits; \ - if (h->count > (new_n_buckets>>1) + (new_n_buckets>>2)) return 0; /* requested size is too small */ \ - new_used = (khint32_t*)kmalloc(__kh_fsize(new_n_buckets) * sizeof(khint32_t)); \ - memset(new_used, 0, __kh_fsize(new_n_buckets) * sizeof(khint32_t)); \ - if (!new_used) return -1; /* not enough memory */ \ - n_buckets = h->keys? 1U<bits : 0U; \ - if (n_buckets < new_n_buckets) { /* expand */ \ - khkey_t *new_keys = (khkey_t*)krealloc((void*)h->keys, new_n_buckets * sizeof(khkey_t)); \ - if (!new_keys) { kfree(new_used); return -1; } \ - h->keys = new_keys; \ - } /* otherwise shrink */ \ - new_mask = new_n_buckets - 1; \ - for (j = 0; j != n_buckets; ++j) { \ - khkey_t key; \ - if (!__kh_used(h->used, j)) continue; \ - key = h->keys[j]; \ - __kh_set_unused(h->used, j); \ - while (1) { /* kick-out process; sort of like in Cuckoo hashing */ \ - khint_t i; \ - i = __kh_h2b(__hash_fn(key), new_bits); \ - while (__kh_used(new_used, i)) i = (i + 1) & new_mask; \ - __kh_set_used(new_used, i); \ - if (i < n_buckets && __kh_used(h->used, i)) { /* kick out the existing element */ \ - { khkey_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } \ - __kh_set_unused(h->used, i); /* mark it as deleted in the old hash table */ \ - } else { /* write the element and jump out of the loop */ \ - h->keys[i] = key; \ - break; \ - } \ - } \ - } \ - if (n_buckets > new_n_buckets) /* shrink the hash table */ \ - h->keys = (khkey_t*)krealloc((void *)h->keys, new_n_buckets * sizeof(khkey_t)); \ - kfree(h->used); /* free the working space */ \ - h->used = new_used, h->bits = new_bits; \ - return 0; \ - } + SCOPE khint_t prefix##_getp(const HType* h, const khkey_t* key) { \ + khint_t i, last, n_buckets, mask; \ + if (h->keys == 0) \ + return 0; \ + n_buckets = 1U << h->bits; \ + mask = n_buckets - 1U; \ + i = last = __kh_h2b(__hash_fn(*key), h->bits); \ + while (__kh_used(h->used, i) && !__hash_eq(h->keys[i], *key)) { \ + i = (i + 1U) & mask; \ + if (i == last) \ + return n_buckets; \ + } \ + return !__kh_used(h->used, i) ? n_buckets : i; \ + } \ + SCOPE khint_t prefix##_get(const HType* h, khkey_t key) { \ + return prefix##_getp(h, &key); \ + } + +#define __KHASHL_IMPL_RESIZE( \ + SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ + SCOPE int prefix##_resize(HType* h, khint_t new_n_buckets) { \ + khint32_t* new_used = 0; \ + khint_t j = 0, x = new_n_buckets, n_buckets, new_bits, new_mask; \ + while ((x >>= 1) != 0) \ + ++j; \ + if (new_n_buckets & (new_n_buckets - 1)) \ + ++j; \ + new_bits = j > 2 ? j : 2; \ + new_n_buckets = 1U << new_bits; \ + if (h->count > (new_n_buckets >> 1) + (new_n_buckets >> 2)) \ + return 0; /* requested size is too small */ \ + new_used = \ + (khint32_t*)kmalloc(__kh_fsize(new_n_buckets) * sizeof(khint32_t)); \ + memset(new_used, 0, __kh_fsize(new_n_buckets) * sizeof(khint32_t)); \ + if (!new_used) \ + return -1; /* not enough memory */ \ + n_buckets = h->keys ? 1U << h->bits : 0U; \ + if (n_buckets < new_n_buckets) { /* expand */ \ + khkey_t* new_keys = \ + (khkey_t*)krealloc((void*)h->keys, new_n_buckets * sizeof(khkey_t)); \ + if (!new_keys) { \ + kfree(new_used); \ + return -1; \ + } \ + h->keys = new_keys; \ + } /* otherwise shrink */ \ + new_mask = new_n_buckets - 1; \ + for (j = 0; j != n_buckets; ++j) { \ + khkey_t key; \ + if (!__kh_used(h->used, j)) \ + continue; \ + key = h->keys[j]; \ + __kh_set_unused(h->used, j); \ + while (1) { /* kick-out process; sort of like in Cuckoo hashing */ \ + khint_t i; \ + i = __kh_h2b(__hash_fn(key), new_bits); \ + while (__kh_used(new_used, i)) \ + i = (i + 1) & new_mask; \ + __kh_set_used(new_used, i); \ + if (i < n_buckets && \ + __kh_used(h->used, i)) { /* kick out the existing element */ \ + { \ + khkey_t tmp = h->keys[i]; \ + h->keys[i] = key; \ + key = tmp; \ + } \ + __kh_set_unused( \ + h->used, i); /* mark it as deleted in the old hash table */ \ + } else { /* write the element and jump out of the loop */ \ + h->keys[i] = key; \ + break; \ + } \ + } \ + } \ + if (n_buckets > new_n_buckets) /* shrink the hash table */ \ + h->keys = \ + (khkey_t*)krealloc((void*)h->keys, new_n_buckets * sizeof(khkey_t)); \ + kfree(h->used); /* free the working space */ \ + h->used = new_used, h->bits = new_bits; \ + return 0; \ + } #define __KHASHL_IMPL_PUT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ - SCOPE khint_t prefix##_putp(HType *h, const khkey_t *key, int *absent) { \ - khint_t n_buckets, i, last, mask; \ - n_buckets = h->keys? 1U<bits : 0U; \ - *absent = -1; \ - if (h->count >= (n_buckets>>1) + (n_buckets>>2)) { /* rehashing */ \ - if (prefix##_resize(h, n_buckets + 1U) < 0) \ - return n_buckets; \ - n_buckets = 1U<bits; \ - } /* TODO: to implement automatically shrinking; resize() already support shrinking */ \ - mask = n_buckets - 1; \ - i = last = __kh_h2b(__hash_fn(*key), h->bits); \ - while (__kh_used(h->used, i) && !__hash_eq(h->keys[i], *key)) { \ - i = (i + 1U) & mask; \ - if (i == last) break; \ - } \ - if (!__kh_used(h->used, i)) { /* not present at all */ \ - h->keys[i] = *key; \ - __kh_set_used(h->used, i); \ - ++h->count; \ - *absent = 1; \ - } else *absent = 0; /* Don't touch h->keys[i] if present */ \ - return i; \ - } \ - SCOPE khint_t prefix##_put(HType *h, khkey_t key, int *absent) { return prefix##_putp(h, &key, absent); } - -#define __KHASHL_IMPL_DEL(SCOPE, HType, prefix, khkey_t, __hash_fn) \ - SCOPE int prefix##_del(HType *h, khint_t i) { \ - khint_t j = i, k, mask, n_buckets; \ - if (h->keys == 0) return 0; \ - n_buckets = 1U<bits; \ - mask = n_buckets - 1U; \ - while (1) { \ - j = (j + 1U) & mask; \ - if (j == i || !__kh_used(h->used, j)) break; /* j==i only when the table is completely full */ \ - k = __kh_h2b(__hash_fn(h->keys[j]), h->bits); \ - if ((j > i && (k <= i || k > j)) || (j < i && (k <= i && k > j))) \ - h->keys[i] = h->keys[j], i = j; \ - } \ - __kh_set_unused(h->used, i); \ - --h->count; \ - return 1; \ - } + SCOPE khint_t prefix##_putp(HType* h, const khkey_t* key, int* absent) { \ + khint_t n_buckets, i, last, mask; \ + n_buckets = h->keys ? 1U << h->bits : 0U; \ + *absent = -1; \ + if (h->count >= (n_buckets >> 1) + (n_buckets >> 2)) { /* rehashing */ \ + if (prefix##_resize(h, n_buckets + 1U) < 0) \ + return n_buckets; \ + n_buckets = 1U << h->bits; \ + } /* TODO: to implement automatically shrinking; resize() already support \ + shrinking */ \ + mask = n_buckets - 1; \ + i = last = __kh_h2b(__hash_fn(*key), h->bits); \ + while (__kh_used(h->used, i) && !__hash_eq(h->keys[i], *key)) { \ + i = (i + 1U) & mask; \ + if (i == last) \ + break; \ + } \ + if (!__kh_used(h->used, i)) { /* not present at all */ \ + h->keys[i] = *key; \ + __kh_set_used(h->used, i); \ + ++h->count; \ + *absent = 1; \ + } else \ + *absent = 0; /* Don't touch h->keys[i] if present */ \ + return i; \ + } \ + SCOPE khint_t prefix##_put(HType* h, khkey_t key, int* absent) { \ + return prefix##_putp(h, &key, absent); \ + } + +#define __KHASHL_IMPL_DEL(SCOPE, HType, prefix, khkey_t, __hash_fn) \ + SCOPE int prefix##_del(HType* h, khint_t i) { \ + khint_t j = i, k, mask, n_buckets; \ + if (h->keys == 0) \ + return 0; \ + n_buckets = 1U << h->bits; \ + mask = n_buckets - 1U; \ + while (1) { \ + j = (j + 1U) & mask; \ + if (j == i || !__kh_used(h->used, j)) \ + break; /* j==i only when the table is completely full */ \ + k = __kh_h2b(__hash_fn(h->keys[j]), h->bits); \ + if ((j > i && (k <= i || k > j)) || (j < i && (k <= i && k > j))) \ + h->keys[i] = h->keys[j], i = j; \ + } \ + __kh_set_unused(h->used, i); \ + --h->count; \ + return 1; \ + } #define KHASHL_DECLARE(HType, prefix, khkey_t) \ - __KHASHL_TYPE(HType, khkey_t) \ - __KHASHL_PROTOTYPES(HType, prefix, khkey_t) + __KHASHL_TYPE(HType, khkey_t) \ + __KHASHL_PROTOTYPES(HType, prefix, khkey_t) -#define KHASHL_INIT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ - __KHASHL_TYPE(HType, khkey_t) \ - __KHASHL_IMPL_BASIC(SCOPE, HType, prefix) \ - __KHASHL_IMPL_GET(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ - __KHASHL_IMPL_RESIZE(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ - __KHASHL_IMPL_PUT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ - __KHASHL_IMPL_DEL(SCOPE, HType, prefix, khkey_t, __hash_fn) +#define KHASHL_INIT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ + __KHASHL_TYPE(HType, khkey_t) \ + __KHASHL_IMPL_BASIC(SCOPE, HType, prefix) \ + __KHASHL_IMPL_GET(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ + __KHASHL_IMPL_RESIZE(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ + __KHASHL_IMPL_PUT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ + __KHASHL_IMPL_DEL(SCOPE, HType, prefix, khkey_t, __hash_fn) /***************************** * More convenient interface * *****************************/ -#define __kh_packed __attribute__ ((__packed__)) +#define __kh_packed __attribute__((__packed__)) #define __kh_cached_hash(x) ((x).hash) #define KHASHL_SET_INIT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ - typedef struct { khkey_t key; } __kh_packed HType##_s_bucket_t; \ - static kh_inline khint_t prefix##_s_hash(HType##_s_bucket_t x) { return __hash_fn(x.key); } \ - static kh_inline int prefix##_s_eq(HType##_s_bucket_t x, HType##_s_bucket_t y) { return __hash_eq(x.key, y.key); } \ - KHASHL_INIT(KH_LOCAL, HType, prefix##_s, HType##_s_bucket_t, prefix##_s_hash, prefix##_s_eq) \ - SCOPE HType *prefix##_init(void) { return prefix##_s_init(); } \ - SCOPE void prefix##_destroy(HType *h) { prefix##_s_destroy(h); } \ - SCOPE void prefix##_resize(HType *h, khint_t new_n_buckets) { prefix##_s_resize(h, new_n_buckets); } \ - SCOPE khint_t prefix##_get(const HType *h, khkey_t key) { HType##_s_bucket_t t; t.key = key; return prefix##_s_getp(h, &t); } \ - SCOPE int prefix##_del(HType *h, khint_t k) { return prefix##_s_del(h, k); } \ - SCOPE khint_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_s_bucket_t t; t.key = key; return prefix##_s_putp(h, &t, absent); } - -#define KHASHL_MAP_INIT(SCOPE, HType, prefix, khkey_t, kh_val_t, __hash_fn, __hash_eq) \ - typedef struct { khkey_t key; kh_val_t val; } __kh_packed HType##_m_bucket_t; \ - static kh_inline khint_t prefix##_m_hash(HType##_m_bucket_t x) { return __hash_fn(x.key); } \ - static kh_inline int prefix##_m_eq(HType##_m_bucket_t x, HType##_m_bucket_t y) { return __hash_eq(x.key, y.key); } \ - KHASHL_INIT(KH_LOCAL, HType, prefix##_m, HType##_m_bucket_t, prefix##_m_hash, prefix##_m_eq) \ - SCOPE HType *prefix##_init(void) { return prefix##_m_init(); } \ - SCOPE void prefix##_destroy(HType *h) { prefix##_m_destroy(h); } \ - SCOPE khint_t prefix##_get(const HType *h, khkey_t key) { HType##_m_bucket_t t; t.key = key; return prefix##_m_getp(h, &t); } \ - SCOPE int prefix##_del(HType *h, khint_t k) { return prefix##_m_del(h, k); } \ - SCOPE khint_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_m_bucket_t t; t.key = key; return prefix##_m_putp(h, &t, absent); } + typedef struct { \ + khkey_t key; \ + } __kh_packed HType##_s_bucket_t; \ + static kh_inline khint_t prefix##_s_hash(HType##_s_bucket_t x) { \ + return __hash_fn(x.key); \ + } \ + static kh_inline int prefix##_s_eq( \ + HType##_s_bucket_t x, HType##_s_bucket_t y) { \ + return __hash_eq(x.key, y.key); \ + } \ + KHASHL_INIT( \ + KH_LOCAL, \ + HType, \ + prefix##_s, \ + HType##_s_bucket_t, \ + prefix##_s_hash, \ + prefix##_s_eq) \ + SCOPE HType* prefix##_init(void) { \ + return prefix##_s_init(); \ + } \ + SCOPE void prefix##_destroy(HType* h) { \ + prefix##_s_destroy(h); \ + } \ + SCOPE void prefix##_resize(HType* h, khint_t new_n_buckets) { \ + prefix##_s_resize(h, new_n_buckets); \ + } \ + SCOPE khint_t prefix##_get(const HType* h, khkey_t key) { \ + HType##_s_bucket_t t; \ + t.key = key; \ + return prefix##_s_getp(h, &t); \ + } \ + SCOPE int prefix##_del(HType* h, khint_t k) { \ + return prefix##_s_del(h, k); \ + } \ + SCOPE khint_t prefix##_put(HType* h, khkey_t key, int* absent) { \ + HType##_s_bucket_t t; \ + t.key = key; \ + return prefix##_s_putp(h, &t, absent); \ + } + +#define KHASHL_MAP_INIT( \ + SCOPE, HType, prefix, khkey_t, kh_val_t, __hash_fn, __hash_eq) \ + typedef struct { \ + khkey_t key; \ + kh_val_t val; \ + } __kh_packed HType##_m_bucket_t; \ + static kh_inline khint_t prefix##_m_hash(HType##_m_bucket_t x) { \ + return __hash_fn(x.key); \ + } \ + static kh_inline int prefix##_m_eq( \ + HType##_m_bucket_t x, HType##_m_bucket_t y) { \ + return __hash_eq(x.key, y.key); \ + } \ + KHASHL_INIT( \ + KH_LOCAL, \ + HType, \ + prefix##_m, \ + HType##_m_bucket_t, \ + prefix##_m_hash, \ + prefix##_m_eq) \ + SCOPE HType* prefix##_init(void) { \ + return prefix##_m_init(); \ + } \ + SCOPE void prefix##_destroy(HType* h) { \ + prefix##_m_destroy(h); \ + } \ + SCOPE khint_t prefix##_get(const HType* h, khkey_t key) { \ + HType##_m_bucket_t t; \ + t.key = key; \ + return prefix##_m_getp(h, &t); \ + } \ + SCOPE int prefix##_del(HType* h, khint_t k) { \ + return prefix##_m_del(h, k); \ + } \ + SCOPE khint_t prefix##_put(HType* h, khkey_t key, int* absent) { \ + HType##_m_bucket_t t; \ + t.key = key; \ + return prefix##_m_putp(h, &t, absent); \ + } #define KHASHL_CSET_INIT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ - typedef struct { khkey_t key; khint_t hash; } __kh_packed HType##_cs_bucket_t; \ - static kh_inline int prefix##_cs_eq(HType##_cs_bucket_t x, HType##_cs_bucket_t y) { return x.hash == y.hash && __hash_eq(x.key, y.key); } \ - KHASHL_INIT(KH_LOCAL, HType, prefix##_cs, HType##_cs_bucket_t, __kh_cached_hash, prefix##_cs_eq) \ - SCOPE HType *prefix##_init(void) { return prefix##_cs_init(); } \ - SCOPE void prefix##_destroy(HType *h) { prefix##_cs_destroy(h); } \ - SCOPE khint_t prefix##_get(const HType *h, khkey_t key) { HType##_cs_bucket_t t; t.key = key; t.hash = __hash_fn(key); return prefix##_cs_getp(h, &t); } \ - SCOPE int prefix##_del(HType *h, khint_t k) { return prefix##_cs_del(h, k); } \ - SCOPE khint_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_cs_bucket_t t; t.key = key, t.hash = __hash_fn(key); return prefix##_cs_putp(h, &t, absent); } - -#define KHASHL_CMAP_INIT(SCOPE, HType, prefix, khkey_t, kh_val_t, __hash_fn, __hash_eq) \ - typedef struct { khkey_t key; kh_val_t val; khint_t hash; } __kh_packed HType##_cm_bucket_t; \ - static kh_inline int prefix##_cm_eq(HType##_cm_bucket_t x, HType##_cm_bucket_t y) { return x.hash == y.hash && __hash_eq(x.key, y.key); } \ - KHASHL_INIT(KH_LOCAL, HType, prefix##_cm, HType##_cm_bucket_t, __kh_cached_hash, prefix##_cm_eq) \ - SCOPE HType *prefix##_init(void) { return prefix##_cm_init(); } \ - SCOPE void prefix##_destroy(HType *h) { prefix##_cm_destroy(h); } \ - SCOPE khint_t prefix##_get(const HType *h, khkey_t key) { HType##_cm_bucket_t t; t.key = key; t.hash = __hash_fn(key); return prefix##_cm_getp(h, &t); } \ - SCOPE int prefix##_del(HType *h, khint_t k) { return prefix##_cm_del(h, k); } \ - SCOPE khint_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_cm_bucket_t t; t.key = key, t.hash = __hash_fn(key); return prefix##_cm_putp(h, &t, absent); } + typedef struct { \ + khkey_t key; \ + khint_t hash; \ + } __kh_packed HType##_cs_bucket_t; \ + static kh_inline int prefix##_cs_eq( \ + HType##_cs_bucket_t x, HType##_cs_bucket_t y) { \ + return x.hash == y.hash && __hash_eq(x.key, y.key); \ + } \ + KHASHL_INIT( \ + KH_LOCAL, \ + HType, \ + prefix##_cs, \ + HType##_cs_bucket_t, \ + __kh_cached_hash, \ + prefix##_cs_eq) \ + SCOPE HType* prefix##_init(void) { \ + return prefix##_cs_init(); \ + } \ + SCOPE void prefix##_destroy(HType* h) { \ + prefix##_cs_destroy(h); \ + } \ + SCOPE khint_t prefix##_get(const HType* h, khkey_t key) { \ + HType##_cs_bucket_t t; \ + t.key = key; \ + t.hash = __hash_fn(key); \ + return prefix##_cs_getp(h, &t); \ + } \ + SCOPE int prefix##_del(HType* h, khint_t k) { \ + return prefix##_cs_del(h, k); \ + } \ + SCOPE khint_t prefix##_put(HType* h, khkey_t key, int* absent) { \ + HType##_cs_bucket_t t; \ + t.key = key, t.hash = __hash_fn(key); \ + return prefix##_cs_putp(h, &t, absent); \ + } + +#define KHASHL_CMAP_INIT( \ + SCOPE, HType, prefix, khkey_t, kh_val_t, __hash_fn, __hash_eq) \ + typedef struct { \ + khkey_t key; \ + kh_val_t val; \ + khint_t hash; \ + } __kh_packed HType##_cm_bucket_t; \ + static kh_inline int prefix##_cm_eq( \ + HType##_cm_bucket_t x, HType##_cm_bucket_t y) { \ + return x.hash == y.hash && __hash_eq(x.key, y.key); \ + } \ + KHASHL_INIT( \ + KH_LOCAL, \ + HType, \ + prefix##_cm, \ + HType##_cm_bucket_t, \ + __kh_cached_hash, \ + prefix##_cm_eq) \ + SCOPE HType* prefix##_init(void) { \ + return prefix##_cm_init(); \ + } \ + SCOPE void prefix##_destroy(HType* h) { \ + prefix##_cm_destroy(h); \ + } \ + SCOPE khint_t prefix##_get(const HType* h, khkey_t key) { \ + HType##_cm_bucket_t t; \ + t.key = key; \ + t.hash = __hash_fn(key); \ + return prefix##_cm_getp(h, &t); \ + } \ + SCOPE int prefix##_del(HType* h, khint_t k) { \ + return prefix##_cm_del(h, k); \ + } \ + SCOPE khint_t prefix##_put(HType* h, khkey_t key, int* absent) { \ + HType##_cm_bucket_t t; \ + t.key = key, t.hash = __hash_fn(key); \ + return prefix##_cm_putp(h, &t, absent); \ + } /************************** * Public macro functions * @@ -306,7 +455,7 @@ static kh_inline khint_t __kh_h2b(khint_t hash, khint_t bits) { return hash * 26 #define kh_bucket(h, x) ((h)->keys[x]) #define kh_size(h) ((h)->count) -#define kh_capacity(h) ((h)->keys? 1U<<(h)->bits : 0U) +#define kh_capacity(h) ((h)->keys ? 1U << (h)->bits : 0U) #define kh_end(h) kh_capacity(h) #define kh_key(h, x) ((h)->keys[x].key) @@ -322,30 +471,32 @@ static kh_inline khint_t __kh_h2b(khint_t hash, khint_t bits) { return hash * 26 #define kh_hash_dummy(x) ((khint_t)(x)) static kh_inline khint_t kh_hash_uint32(khint_t key) { - key += ~(key << 15); - key ^= (key >> 10); - key += (key << 3); - key ^= (key >> 6); - key += ~(key << 11); - key ^= (key >> 16); - return key; + key += ~(key << 15); + key ^= (key >> 10); + key += (key << 3); + key ^= (key >> 6); + key += ~(key << 11); + key ^= (key >> 16); + return key; } static kh_inline khint_t kh_hash_uint64(khint64_t key) { - key = ~key + (key << 21); - key = key ^ key >> 24; - key = (key + (key << 3)) + (key << 8); - key = key ^ key >> 14; - key = (key + (key << 2)) + (key << 4); - key = key ^ key >> 28; - key = key + (key << 31); - return (khint_t)key; + key = ~key + (key << 21); + key = key ^ key >> 24; + key = (key + (key << 3)) + (key << 8); + key = key ^ key >> 14; + key = (key + (key << 2)) + (key << 4); + key = key ^ key >> 28; + key = key + (key << 31); + return (khint_t)key; } -static kh_inline khint_t kh_hash_str(const char *s) { - khint_t h = (khint_t)*s; - if (h) for (++s ; *s; ++s) h = (h << 5) - h + (khint_t)*s; - return h; +static kh_inline khint_t kh_hash_str(const char* s) { + khint_t h = (khint_t)*s; + if (h) + for (++s; *s; ++s) + h = (h << 5) - h + (khint_t)*s; + return h; } #endif /* __AC_KHASHL_H */