From 66fcf72f76f18b164e65b282ffc1738274c4facf Mon Sep 17 00:00:00 2001 From: gitamohr Date: Thu, 7 Dec 2023 09:39:33 -0800 Subject: [PATCH] tf: A token change. Make the 'immortal' bit be part of the atomic reference count. This is required for thread-safety. Instead of eagerly reaping tokens whose reference counts go to zero, sweep for garbage when an insert would trigger a rehash. (Internal change: 2307757) --- pxr/base/tf/token.cpp | 201 ++++++++++++++++++++++++------------------ pxr/base/tf/token.h | 158 ++++++++++++++++++--------------- 2 files changed, 203 insertions(+), 156 deletions(-) diff --git a/pxr/base/tf/token.cpp b/pxr/base/tf/token.cpp index e61c882efe..c1b87a9430 100644 --- a/pxr/base/tf/token.cpp +++ b/pxr/base/tf/token.cpp @@ -39,12 +39,14 @@ #include -#include -#include -#include +#include +#include #include +#include +#include +#include #include -#include +#include using std::vector; using std::string; @@ -85,29 +87,23 @@ struct Tf_TokenRegistry typedef _Hash<7> _OuterHash; typedef _Hash<5> _InnerHash; - static const size_t _NumSets = 128; - static const size_t _SetMask = _NumSets-1; - - // Utility to pad an instance to take up a whole number of cache lines to - // avoid false sharing. - template - struct _CacheLinePadded { - T val; - char _unused_padding[ - ARCH_CACHE_LINE_SIZE-(sizeof(T) % ARCH_CACHE_LINE_SIZE)]; - }; + static constexpr unsigned _NumSets = 128; + static constexpr unsigned _SetMask = _NumSets-1; + static constexpr size_t _MinInsertsUntilSweepCheck = 32; + + // TODO: Consider a better container than TfHashSet here. + using _RepSet = TfHashSet; - // It's not ideal to use TfHashSet here. It would be better to use - // boost::multi_index from boost 1.56 or newer: it lets us lookup an element - // by a key more easily. It also has better performance. Earlier versions - // suffer from the issue where erase(iterator) has to return an iterator to - // the next element, which has to scan forward to find the next, so we use - // TfHashSet for now. - typedef TfHashSet _RepSet; + // Align this structure to ensure no false sharing of 'mutex'. + struct alignas(ARCH_CACHE_LINE_SIZE) + _Set { + _RepSet reps; + unsigned insertsUntilSweepCheck = _MinInsertsUntilSweepCheck; + mutable tbb::spin_mutex mutex; + }; // Data members. - _RepSet _sets[_NumSets]; - mutable _CacheLinePadded _locks[_NumSets]; + _Set _sets[_NumSets]; static Tf_TokenRegistry& _GetInstance() { return TfSingleton::GetInstance(); @@ -136,48 +132,26 @@ struct Tf_TokenRegistry return _FindPtrImpl(s); } - /* - * rep may be dying. remove its key and raw pointer reference - * from the table if it truly is. - */ - void _PossiblyDestroyRep(_RepPtr rep) { - bool repFoundInSet = true; - string repString; - { - unsigned int setNum = rep->_setNum; - - tbb::spin_mutex::scoped_lock lock(_locks[setNum].val); - - if (!rep->_isCounted) - return; - - /* - * We hold the lock, but there could be others outside the lock - * modifying this same counter. Be safe: be atomic. - */ - if (--rep->_refCount != 0) - return; - - if (!_sets[setNum].erase(*rep)) { - repFoundInSet = false; - repString = rep->_str; - } - } - TF_VERIFY(repFoundInSet, - "failed to find token '%s' in table for destruction", - repString.c_str()); - } - void _DumpStats() const { std::vector > sizesWithSet; for (size_t i = 0; i != _NumSets; ++i) { - sizesWithSet.push_back(std::make_pair(_sets[i].size(), i)); + tbb::spin_mutex::scoped_lock lock(_sets[i].mutex); + sizesWithSet.push_back(std::make_pair(_sets[i].reps.size(), i)); } std::sort(sizesWithSet.begin(), sizesWithSet.end()); printf("Set # -- Size\n"); TF_FOR_ALL(i, sizesWithSet) { printf("%zu -- %zu\n", i->second, i->first); } + + // Uncomment to dump every token & refcount. + // for (size_t i = 0; i != _NumSets; ++i) { + // tbb::spin_mutex::scoped_lock lock(_sets[i].mutex); + // for (auto const &rep: _sets[i].reps) { + // printf("%d '%s'\n", rep._refCount.load(), rep._str.c_str()); + // } + // } + } private: @@ -212,53 +186,114 @@ struct Tf_TokenRegistry */ template inline _RepPtrAndBits _GetPtrImpl(Str s, bool makeImmortal) { - if (_IsEmpty(s)) + if (_IsEmpty(s)) { return _RepPtrAndBits(); + } - size_t setNum = _GetSetNum(_CStr(s)); + unsigned setNum = _GetSetNum(_CStr(s)); + _Set &set = _sets[setNum]; - tbb::spin_mutex::scoped_lock lock(_locks[setNum].val); + tbb::spin_mutex::scoped_lock lock(set.mutex); // Insert or lookup an existing. - _RepSet::iterator iter = _sets[setNum].find(_LookupRep(_CStr(s))); - if (iter != _sets[setNum].end()) { + _RepSet::iterator iter = set.reps.find(_LookupRep(_CStr(s))); + if (iter != set.reps.end()) { _RepPtr rep = &(*iter); - bool isCounted = rep->_isCounted; + bool isCounted = rep->_refCount.load(std::memory_order_relaxed) & 1; if (isCounted) { - if (makeImmortal) - isCounted = rep->_isCounted = false; - else - ++rep->_refCount; + if (makeImmortal) { + // Clear counted flag. + rep->_refCount.fetch_and(~1u, std::memory_order_relaxed); + isCounted = false; + } + else { + // Add one reference (we count by 2's). + rep->_refCount.fetch_add(2, std::memory_order_relaxed); + } } return _RepPtrAndBits(rep, isCounted); } else { - // No entry present, add a new entry. + // No entry present, add a new entry. First check if we should + // clean house. + if (set.insertsUntilSweepCheck == 0) { + // If this insert would cause a rehash, clean house first. + // Remove any reps that have refcount zero. This is safe to do + // since we hold a lock on the set's mutex: no other thread can + // observe or manipulate this set concurrently. + if ((float(set.reps.size() + 1) / + float(set.reps.bucket_count()) > + set.reps.max_load_factor())) { + // Walk the set and erase any non-immortal reps with no + // outstanding refcounts (_refCount == 1). Recall that the + // low-order bit indicates counted (non-immortal) reps, we + // count references by 2. + for (auto iter = set.reps.begin(), + end = set.reps.end(); iter != end; ) { + if (iter->_refCount == 1) { + set.reps.erase(iter++); + } + else { + ++iter; + } + } + // Reset insertsUntilSweepCheck. We want to wait at least a + // bit to avoid bad patterns. For example if we sweep the + // table and find one garbage token, we will remove it and + // then insert the new token. On the next insert we'll + // sweep for garbage again. If the calling code is + // consuming a list of tokens, discarding one and creating a + // new one on each iteration, we could end up sweeping the + // whole table like this on every insertion. By waiting a + // bit we avoid these situations at the cost of the + // occasional "unnecessary" rehash. + set.insertsUntilSweepCheck = std::max( + _MinInsertsUntilSweepCheck, + static_cast( + set.reps.bucket_count() * + float(set.reps.max_load_factor() - + set.reps.load_factor()))); + } + } + else { + --set.insertsUntilSweepCheck; + } + + // Create the new entry. TfAutoMallocTag noname("TfToken"); - _RepPtr rep = &(*_sets[setNum].insert(TfToken::_Rep(s)).first); - rep->_isCounted = !makeImmortal; - rep->_setNum = setNum; - rep->_compareCode = _ComputeCompareCode(rep->_cstr); - if (!makeImmortal) - rep->_refCount = 1; + uint64_t compareCode = _ComputeCompareCode(_CStr(s)); + _RepPtr rep = &(*set.reps.insert( + TfToken::_Rep(s, setNum, compareCode)).first); + // 3, because 1 for counted bit plus 2 for one refcount. We can + // store relaxed here since our lock on the set's mutex provides + // synchronization. + rep->_refCount.store(makeImmortal ? 0 : 3, + std::memory_order_relaxed); return _RepPtrAndBits(rep, !makeImmortal); } } template _RepPtrAndBits _FindPtrImpl(Str s) const { - if (_IsEmpty(s)) + if (_IsEmpty(s)) { return _RepPtrAndBits(); + } size_t setNum = _GetSetNum(_CStr(s)); + _Set const &set = _sets[setNum]; - tbb::spin_mutex::scoped_lock lock(_locks[setNum].val); + tbb::spin_mutex::scoped_lock lock(set.mutex); - _RepSet::const_iterator iter = - _sets[setNum].find(_LookupRep(_CStr(s))); - if (iter == _sets[setNum].end()) + _RepSet::const_iterator iter = set.reps.find(_LookupRep(_CStr(s))); + if (iter == set.reps.end()) { return _RepPtrAndBits(); + } + + TfToken::_Rep const *rep = &(*iter); + + unsigned oldVal = + rep->_refCount.fetch_add(2, std::memory_order_relaxed); - return _RepPtrAndBits(&(*iter), iter->IncrementIfCounted()); + return _RepPtrAndBits(rep, oldVal & 1); } }; @@ -272,12 +307,6 @@ TF_REGISTRY_FUNCTION(TfType) .Alias( TfType::GetRoot(), "vector" ); } -void -TfToken::_PossiblyDestroyRep() const -{ - Tf_TokenRegistry::_GetInstance()._PossiblyDestroyRep(_rep.Get()); -} - string const& TfToken::_GetEmptyString() { diff --git a/pxr/base/tf/token.h b/pxr/base/tf/token.h index 31fdef19c3..f61dcfc86b 100644 --- a/pxr/base/tf/token.h +++ b/pxr/base/tf/token.h @@ -90,7 +90,7 @@ class TfToken enum _ImmortalTag { Immortal }; /// Create the empty token, containing the empty string. - constexpr TfToken() noexcept {} + constexpr TfToken() noexcept = default; /// Copy constructor. TfToken(TfToken const& rhs) noexcept : _rep(rhs._rep) { _AddRef(); } @@ -269,13 +269,16 @@ class TfToken /// Allows \c TfToken to be used in \c std::set inline bool operator<(TfToken const& r) const { auto ll = _rep.GetLiteral(), rl = r._rep.GetLiteral(); - if (ll && rl) { - auto lrep = _rep.Get(), rrep = r._rep.Get(); - uint64_t lcc = lrep->_compareCode, rcc = rrep->_compareCode; - return lcc < rcc || (lcc == rcc && lrep->_str < rrep->_str); + if (!ll || !rl) { + // One or both are zero -- return true if ll is zero and rl is not. + return !ll && rl; } - // One or both are zero -- return true if ll is zero and rl is not. - return !ll && rl; + if (ll == rl) { + return false; + } + auto lrep = _rep.Get(), rrep = r._rep.Get(); + uint64_t lcc = lrep->_compareCode, rcc = rrep->_compareCode; + return lcc < rcc || (lcc == rcc && lrep->_str < rrep->_str); } /// Greater-than operator that compares tokenized strings lexicographically. @@ -301,8 +304,23 @@ class TfToken /// Returns \c true iff this token contains the empty string \c "" bool IsEmpty() const { return _rep.GetLiteral() == 0; } - /// Returns \c true iff this is an immortal token. - bool IsImmortal() const { return !_rep->_isCounted; } + /// Returns \c true iff this is an immortal token. Note that a return of \c + /// false could be instantly stale if another thread races to immortalize + /// this token. A return of \c true is always valid since tokens cannot + /// lose immortality. + bool IsImmortal() const { + if (!_rep.BitsAs()) { + return true; + } + // There is no synchronization or ordering constraints between this read + // and other reads/writes, so relaxed memory order suffices. + bool immortal = !(_rep->_refCount.load(std::memory_order_relaxed) & 1); + if (immortal) { + // Our belief is wrong, update our cache of countedness. + _rep.SetBits(false); + } + return immortal; + } /// Stream insertion. friend TF_API std::ostream &operator <<(std::ostream &stream, TfToken const&); @@ -321,88 +339,88 @@ class TfToken } void _AddRef() const { - if (_rep.BitsAs()) { - // We believe this rep is refCounted. - if (!_rep->IncrementIfCounted()) { - // Our belief is wrong, update our cache of countedness. - _rep.SetBits(false); - } + if (!_rep.BitsAs()) { + // Not counted, do nothing. + return; + } + // We believe this rep is refCounted. + if (!_rep->IncrementAndCheckCounted()) { + // Our belief is wrong, update our cache of countedness. + _rep.SetBits(false); } } void _RemoveRef() const { - if (_rep.BitsAs()) { - // We believe this rep is refCounted. - if (_rep->_isCounted) { - if (_rep->_refCount.load(std::memory_order_relaxed) == 1) { - _PossiblyDestroyRep(); - } - else { - /* - * This is deliberately racy. It's possible the statement - * below drops our count to zero, and we leak the rep - * (i.e. we leave it in the table). That's a low - * probability event, in exchange for only grabbing the lock - * (in _PossiblyDestroyRep()) when the odds are we really do - * need to modify the table. - * - * Note that even if we leak the rep, if we look it up - * again, we'll simply repull it from the table and keep - * using it. So it's not even necessarily a true leak -- - * it's just a potential leak. - */ - _rep->_refCount.fetch_sub(1, std::memory_order_relaxed); - } - } else { - // Our belief is wrong, update our cache of countedness. - _rep.SetBits(false); - } + if (!_rep.BitsAs()) { + // Not counted, do nothing. + return; + } + // We believe this rep is refCounted. + if (!_rep->DecrementAndCheckCounted()) { + // Our belief is wrong, update our cache of countedness. + _rep.SetBits(false); } } - - void TF_API _PossiblyDestroyRep() const; - + struct _Rep { - _Rep() {} - explicit _Rep(char const *s) : _str(s), _cstr(_str.c_str()) {} - explicit _Rep(std::string const &s) : _str(s), _cstr(_str.c_str()) {} + _Rep() = default; + + explicit _Rep(std::string &&str, + unsigned setNum, + uint64_t compareCode) + : _setNum(setNum) + , _compareCode(compareCode) + , _str(std::move(str)) + , _cstr(_str.c_str()) {} + + explicit _Rep(std::string const &str, + unsigned setNum, + uint64_t compareCode) + : _Rep(std::string(str), setNum, compareCode) {} + + explicit _Rep(char const *str, + unsigned setNum, + uint64_t compareCode) + : _Rep(std::string(str), setNum, compareCode) {} // Make sure we reacquire _cstr from _str on copy and assignment // to avoid holding on to a dangling pointer. However, if rhs' // _cstr member doesn't come from its _str, just copy it directly // over. This is to support lightweight _Rep objects used for // internal lookups. - _Rep(_Rep const &rhs) : _str(rhs._str), - _cstr(rhs._str.c_str() != rhs._cstr ? - rhs._cstr : _str.c_str()), - _compareCode(rhs._compareCode), - _refCount(rhs._refCount.load()), - _isCounted(rhs._isCounted), - _setNum(rhs._setNum) {} - _Rep& operator=(_Rep const &rhs) { - _str = rhs._str; - _cstr = (rhs._str.c_str() != rhs._cstr ? rhs._cstr : _str.c_str()); - _compareCode = rhs._compareCode; - _refCount = rhs._refCount.load(); - _isCounted = rhs._isCounted; + _Rep(_Rep const &rhs) + : _refCount(rhs._refCount.load(std::memory_order_relaxed)) + , _setNum(rhs._setNum) + , _compareCode(rhs._compareCode) + , _str(rhs._str) + , _cstr(rhs._str.c_str() == rhs._cstr ? _str.c_str() : rhs._cstr) {} + + _Rep &operator=(_Rep const &rhs) { + _refCount = rhs._refCount.load(std::memory_order_relaxed); _setNum = rhs._setNum; + _compareCode = rhs._compareCode; + _str = rhs._str; + _cstr = (rhs._str.c_str() == rhs._cstr ? _str.c_str() : rhs._cstr); return *this; } - inline bool IncrementIfCounted() const { - const bool isCounted = _isCounted; - if (isCounted) { - _refCount.fetch_add(1, std::memory_order_relaxed); - } - return isCounted; + inline bool IncrementAndCheckCounted() const { + // Refcounts are manipulated by add/sub 2, since the lowest-order + // bit indicates whether or not the rep is counted. + return _refCount.fetch_add(2, std::memory_order_relaxed) & 1; + } + + inline bool DecrementAndCheckCounted() const { + // Refcounts are manipulated by add/sub 2, since the lowest-order + // bit indicates whether or not the rep is counted. + return _refCount.fetch_sub(2, std::memory_order_release) & 1; } + mutable std::atomic_uint _refCount; + unsigned _setNum; + uint64_t _compareCode; std::string _str; char const *_cstr; - mutable uint64_t _compareCode; - mutable std::atomic_int _refCount; - mutable bool _isCounted; - mutable unsigned char _setNum; }; friend struct TfTokenFastArbitraryLessThan;