From a4556eaa0b73922c039c52b163145002cb637494 Mon Sep 17 00:00:00 2001 From: Tom Scogland Date: Tue, 3 Sep 2024 10:46:06 -0700 Subject: [PATCH] interner: add finalize/open/close to interner problem: refcounted strings for resource types are turning out to be expensive enough to actually slow us down. We compare them and copy them so often they show up in the traces as 20-30% of runtime even as just shared_ptrs solution: avoid the possible denial of service issue from resource types by adding an interface to the interner type that finalizes it, preventing new strings from being added until it is explicitly "opened" for addition. This way after initialization of the graph/database we finalize the interners for subsystems and resource types, then open them back up for updates in update_resource. The danger is in JobSpec, which can no longer create new resource types. This also effectively makes it a parse error for a user to specify an unknown resource, which will happen as soon as the jobspec hits qmanager. --- resource/modules/resource_match.cpp | 6 ++++ resource/schema/data_std.hpp | 2 +- src/common/libintern/interner.cpp | 29 +++++++++++++++++++ src/common/libintern/interner.hpp | 43 +++++++++++++++++++++++------ 4 files changed, 70 insertions(+), 10 deletions(-) diff --git a/resource/modules/resource_match.cpp b/resource/modules/resource_match.cpp index 1b4210da7..af39bfcc4 100644 --- a/resource/modules/resource_match.cpp +++ b/resource/modules/resource_match.cpp @@ -1082,6 +1082,7 @@ static int grow_resource_db (std::shared_ptr &ctx, json_t *resou struct idset *grow_set = NULL; json_t *r_lite = NULL; json_t *jgf = NULL; + resource_type_t::storage_t::open (); if ((rc = unpack_resources (resources, &grow_set, &r_lite, &jgf, duration)) < 0) { flux_log_error (ctx->h, "%s: unpack_resources", __FUNCTION__); @@ -1116,6 +1117,7 @@ static int grow_resource_db (std::shared_ptr &ctx, json_t *resou done: idset_destroy (grow_set); + resource_type_t::storage_t::close (); return rc; } @@ -1460,6 +1462,10 @@ static int init_resource_graph (std::shared_ptr &ctx) } } } + + // prevent users from consuming unbounded memory with arbitrary resource types + subsystem_t::storage_t::finalize (); + resource_type_t::storage_t::finalize (); return 0; } diff --git a/resource/schema/data_std.hpp b/resource/schema/data_std.hpp index a6fc0a936..f938bdb0a 100644 --- a/resource/schema/data_std.hpp +++ b/resource/schema/data_std.hpp @@ -36,7 +36,7 @@ extern subsystem_t containment_sub; constexpr uint64_t resource_type_id{1}; struct resource_type_tag {}; -using resource_type_t = intern::interned_string>; +using resource_type_t = intern::interned_string>; extern resource_type_t slot_rt; extern resource_type_t cluster_rt; extern resource_type_t rack_rt; diff --git a/src/common/libintern/interner.cpp b/src/common/libintern/interner.cpp index 7c0d4b70e..24d9b2c58 100644 --- a/src/common/libintern/interner.cpp +++ b/src/common/libintern/interner.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -47,6 +48,10 @@ struct dense_inner_storage { std::unordered_map> ids_by_string; // reader/writer lock to protect entries std::unique_ptr mtx = std::make_unique (); + // if the object is finalized and not opened, reject new strings + bool finalized = false; + // whether this object is allowed to accept new strings after finalization from each thread + std::unordered_map open_map; }; dense_inner_storage &get_dense_inner_storage (size_t unique_id) @@ -56,6 +61,22 @@ dense_inner_storage &get_dense_inner_storage (size_t unique_id) return groups[unique_id]; } +void dense_storage_finalize (dense_inner_storage &storage) +{ + auto ul = std::unique_lock (*storage.mtx); + storage.finalized = true; +} +void dense_storage_open (dense_inner_storage &storage) +{ + auto ul = std::unique_lock (*storage.mtx); + storage.open_map[std::this_thread::get_id ()] = true; +} +void dense_storage_close (dense_inner_storage &storage) +{ + auto ul = std::unique_lock (*storage.mtx); + storage.open_map[std::this_thread::get_id ()] = false; +} + struct sparse_inner_storage; struct sparse_string_node { // This must be carefully managed, it holds onto the sparse_inner_storage that is keeping the @@ -153,6 +174,13 @@ view_and_id get_both (dense_inner_storage &storage, const std::string_view s, ch throw std::system_error (ENOMEM, std::generic_category (), "Too many strings for configured size"); + // if storage is finalized and the thread isn't in the open map, we must not add another + // here, check while under the shared lock + if (storage.finalized && !storage.open_map[std::this_thread::get_id ()]) { + throw std::system_error (ENOMEM, + std::generic_category (), + "This interner is finalized and must be open to add strings"); + } } // release shared lock // writer lock scope @@ -167,6 +195,7 @@ view_and_id get_both (dense_inner_storage &storage, const std::string_view s, ch const std::string *get_by_id (dense_inner_storage &storage, size_t string_id) { + auto sl = std::shared_lock (*storage.mtx); return storage.strings_by_id.at (string_id); } diff --git a/src/common/libintern/interner.hpp b/src/common/libintern/interner.hpp index 748436b71..aacdfc1b3 100644 --- a/src/common/libintern/interner.hpp +++ b/src/common/libintern/interner.hpp @@ -35,12 +35,20 @@ struct dense_inner_storage; view_and_id get_both (dense_inner_storage &ds, std::string_view s, char bytes_supported); const std::string *get_by_id (dense_inner_storage &ds, size_t string_id); +void dense_storage_finalize (dense_inner_storage &storage); +void dense_storage_open (dense_inner_storage &storage); +void dense_storage_close (dense_inner_storage &storage); + dense_inner_storage &get_dense_inner_storage (size_t unique_id); std::size_t hash_combine (std::size_t lhs, std::size_t rhs); }; // namespace detail /// interner storage class providing dense, in-order IDs of configurable size +/// +/// allows addition of strings until finalized, then after that only when explicitly opened for +/// additions, this is not for thread safety, it is to prevent denial of service from addition of +/// invalid types through interfaces that take user input template requires (sizeof (Id) <= sizeof (size_t)) struct dense_storage { @@ -54,15 +62,6 @@ struct dense_storage { dense_storage &operator= (dense_storage const &) = delete; dense_storage &operator= (dense_storage &&) = delete; - static detail::view_and_id get_both (std::string_view s); - static id_instance_t get_id (std::string_view s); - static const std::string *get_by_id (id_storage_t string_id); - - static size_t unique_id () - { - return std::hash () (__PRETTY_FUNCTION__); - } - private: static detail::dense_inner_storage &get_storage () { @@ -73,6 +72,32 @@ struct dense_storage { ::intern::detail::get_dense_inner_storage (dense_storage::unique_id ()); return s; }; + + public: + static detail::view_and_id get_both (std::string_view s); + static id_instance_t get_id (std::string_view s); + static const std::string *get_by_id (id_storage_t string_id); + + /// stop new strings from being added unless the object is explicitly opened + static void finalize () + { + detail::dense_storage_finalize (get_storage ()); + } + /// open the interner for additions + static void open () + { + detail::dense_storage_open (get_storage ()); + } + /// close the interner for additions + static void close () + { + detail::dense_storage_close (get_storage ()); + } + + static size_t unique_id () + { + return std::hash () (__PRETTY_FUNCTION__); + } }; template