Skip to content

Commit

Permalink
[ML] Always use Boost unordered maps/sets (#1647)
Browse files Browse the repository at this point in the history
The memory instrumentation in core::CMemory exists for
boost::unordered_set and boost::unordered_map, but not
the std equivalents.  It is better that we switch all
uses in the codebase at the same time (or even switch
to completely different hash container implementations).

This change switches the few cases of std::unordered_map
and std::unordered_set to the boost versions.  This may
cause some increases in reported memory usage, where these
containers are in memory instrumented classes.  It won't
increase actual memory usage (at least not significantly),
just make the reporting more accurate.
  • Loading branch information
droberts195 authored Jan 8, 2021
1 parent daa5f00 commit d2b8d9b
Show file tree
Hide file tree
Showing 6 changed files with 21 additions and 18 deletions.
8 changes: 5 additions & 3 deletions include/api/CBoostedTreeInferenceModelBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@

#include <rapidjson/document.h>

#include <boost/unordered_map.hpp>

#include <string>

namespace ml {
Expand All @@ -25,7 +27,7 @@ class API_EXPORT CBoostedTreeInferenceModelBuilder : public maths::CBoostedTree:
using TDoubleVec = std::vector<double>;
using TStrVec = std::vector<std::string>;
using TStrVecVec = std::vector<TStrVec>;
using TSizeStringUMap = std::unordered_map<std::size_t, std::string>;
using TSizeStringUMap = boost::unordered_map<std::size_t, std::string>;
using TSizeStringUMapVec = std::vector<TSizeStringUMap>;
using TVector = maths::CBoostedTreeNode::TVector;
using TApiCustomEncodingUPtr = std::unique_ptr<api::CCustomEncoding>;
Expand Down Expand Up @@ -59,8 +61,8 @@ class API_EXPORT CBoostedTreeInferenceModelBuilder : public maths::CBoostedTree:

private:
using TOneHotEncodingUPtr = std::unique_ptr<COneHotEncoding>;
using TOneHotEncodingUMap = std::unordered_map<std::string, TOneHotEncodingUPtr>;
using TStringDoubleUMap = std::unordered_map<std::string, double>;
using TOneHotEncodingUMap = boost::unordered_map<std::string, TOneHotEncodingUPtr>;
using TStringDoubleUMap = boost::unordered_map<std::string, double>;

private:
virtual void setTargetType() = 0;
Expand Down
1 change: 0 additions & 1 deletion include/api/CDataFrameAnalysisInstrumentation.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
#include <cstdint>
#include <memory>
#include <mutex>
#include <unordered_map>

namespace ml {
namespace api {
Expand Down
10 changes: 5 additions & 5 deletions include/api/CInferenceModelDefinition.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@
#include <rapidjson/ostreamwrapper.h>

#include <boost/optional.hpp>
#include <boost/unordered_map.hpp>

#include <sstream>
#include <string>
#include <unordered_map>
#include <vector>

namespace ml {
Expand Down Expand Up @@ -354,7 +354,7 @@ class API_EXPORT CFrequencyEncoding final : public CEncoding {
private:
const CFrequencyEncoding& m_Encoding;
};
using TStringDoubleUMap = const std::unordered_map<std::string, double>;
using TStringDoubleUMap = const boost::unordered_map<std::string, double>;

public:
~CFrequencyEncoding() override = default;
Expand Down Expand Up @@ -417,7 +417,7 @@ class API_EXPORT CTargetMeanEncoding final : public CEncoding {
private:
const CTargetMeanEncoding& m_Encoding;
};
using TStringDoubleUMap = std::unordered_map<std::string, double>;
using TStringDoubleUMap = boost::unordered_map<std::string, double>;

public:
~CTargetMeanEncoding() override = default;
Expand Down Expand Up @@ -462,9 +462,9 @@ class API_EXPORT CInferenceModelDefinition : public CSerializableToJsonStream {
using TApiCustomEncodingUPtr = std::unique_ptr<api::CCustomEncoding>;
using TApiCustomEncodingUPtrVec = std::vector<TApiCustomEncodingUPtr>;
using TRapidJsonWriter = core::CRapidJsonConcurrentLineWriter;
using TSizeStringUMap = std::unordered_map<std::size_t, std::string>;
using TSizeStringUMap = boost::unordered_map<std::size_t, std::string>;
using TSizeStringUMapVec = std::vector<TSizeStringUMap>;
using TStringSizeUMap = std::unordered_map<std::string, std::size_t>;
using TStringSizeUMap = boost::unordered_map<std::string, std::size_t>;
using TStringSizeUMapVec = std::vector<TStringSizeUMap>;
using TStringVec = std::vector<std::string>;
using TTrainedModelUPtr = CEnsemble::TTrainedModelUPtr;
Expand Down
6 changes: 4 additions & 2 deletions include/api/CInferenceModelMetadata.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
#include <api/CInferenceModelDefinition.h>
#include <api/ImportExport.h>

#include <boost/unordered_map.hpp>

#include <string>
#include <tuple>

Expand Down Expand Up @@ -83,8 +85,8 @@ class API_EXPORT CInferenceModelMetadata {
using TMeanAccumulator =
std::vector<maths::CBasicStatistics::SSampleMean<double>::TAccumulator>;
using TMinMaxAccumulator = std::vector<maths::CBasicStatistics::CMinMax<double>>;
using TSizeMeanAccumulatorUMap = std::unordered_map<std::size_t, TMeanAccumulator>;
using TSizeMinMaxAccumulatorUMap = std::unordered_map<std::size_t, TMinMaxAccumulator>;
using TSizeMeanAccumulatorUMap = boost::unordered_map<std::size_t, TMeanAccumulator>;
using TSizeMinMaxAccumulatorUMap = boost::unordered_map<std::size_t, TMinMaxAccumulator>;
using TOptionalVector = boost::optional<TVector>;
using THyperparametersVec = std::vector<SHyperparameterImportance>;

Expand Down
8 changes: 4 additions & 4 deletions lib/api/CInferenceModelDefinition.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,13 @@

#include <boost/iostreams/filter/gzip.hpp>
#include <boost/iostreams/filtering_stream.hpp>
#include <boost/unordered_map.hpp>
#include <boost/unordered_set.hpp>

#include <cmath>
#include <iterator>
#include <memory>
#include <ostream>
#include <unordered_map>
#include <unordered_set>

namespace ml {
namespace api {
Expand Down Expand Up @@ -280,7 +280,7 @@ void CEnsemble::targetType(ETargetType targetType) {
}

CTrainedModel::TStringVec CEnsemble::removeUnusedFeatures() {
std::unordered_set<std::string> set;
boost::unordered_set<std::string> set;
for (auto& trainedModel : this->trainedModels()) {
TStringVec vec(trainedModel->removeUnusedFeatures());
set.insert(vec.begin(), vec.end());
Expand Down Expand Up @@ -380,7 +380,7 @@ CTree::TTreeNodeVec& CTree::treeStructure() {
}

CTrainedModel::TStringVec CTree::removeUnusedFeatures() {
std::unordered_map<std::size_t, std::size_t> selectedFeatureIndices;
boost::unordered_map<std::size_t, std::size_t> selectedFeatureIndices;
for (auto& treeNode : m_TreeStructure) {
if (treeNode.leaf() == false) {
std::size_t adjustedIndex{selectedFeatureIndices
Expand Down
6 changes: 3 additions & 3 deletions lib/model/unittest/CEventRatePopulationModelTest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,13 @@
#include <boost/test/unit_test.hpp>
#include <boost/tuple/tuple.hpp>
#include <boost/tuple/tuple_comparison.hpp>
#include <boost/unordered_map.hpp>

#include <algorithm>
#include <cstddef>
#include <map>
#include <set>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>

Expand Down Expand Up @@ -279,15 +279,15 @@ BOOST_FIXTURE_TEST_CASE(testFeatures, CTestFixture) {
}

private:
using TDoubleSizeUMap = std::unordered_map<double, std::size_t>;
using TDoubleSizeUMap = boost::unordered_map<double, std::size_t>;

private:
TDoubleSizeUMap m_Uniques;
TDouble2VecVec m_Values;
TDouble2VecWeightsAryVec m_TrendWeights;
TDouble2VecWeightsAryVec m_ResidualWeights;
};
using TSizeUniqueValuesUMap = std::unordered_map<std::size_t, CUniqueValues>;
using TSizeUniqueValuesUMap = boost::unordered_map<std::size_t, CUniqueValues>;

core_t::TTime startTime = 1367280000;
const core_t::TTime bucketLength = 3600;
Expand Down

0 comments on commit d2b8d9b

Please sign in to comment.