From cd30f370b8799a4c9518828d88990cf55cce2200 Mon Sep 17 00:00:00 2001 From: Patrick Niklaus Date: Sat, 28 May 2016 17:36:31 +0200 Subject: [PATCH] Remove the use of atomics and use thread-local accumulation instead --- src/contractor/contractor.cpp | 38 +++++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/src/contractor/contractor.cpp b/src/contractor/contractor.cpp index a90f4bfff13..c4adcefd762 100644 --- a/src/contractor/contractor.cpp +++ b/src/contractor/contractor.cpp @@ -25,6 +25,7 @@ #include #include +#include #include #include #include @@ -32,7 +33,6 @@ #include #include -#include #include #include #include @@ -516,15 +516,14 @@ std::size_t Contractor::LoadEdgeExpandedGraph( // vector to count used speeds for logging // size offset by one since index 0 is used for speeds not from external file - std::vector> segment_speeds_counters; - for (std::size_t i = 0; i < segment_speed_filenames.size() + 1; ++i) - { - segment_speeds_counters.emplace_back(); - segment_speeds_counters[i].store(0); - } + using counters_type = std::vector; + std::size_t num_counters = segment_speed_filenames.size() + 1; + tbb::enumerable_thread_specific segment_speeds_counters( + counters_type(num_counters, 0)); const constexpr auto LUA_SOURCE = 0; tbb::parallel_for_each(first, last, [&](const LeafNode ¤t_node) { + auto &counters = segment_speeds_counters.local(); for (size_t i = 0; i < current_node.object_count; i++) { const auto &leaf_object = current_node.objects[i]; @@ -571,12 +570,12 @@ std::size_t Contractor::LoadEdgeExpandedGraph( forward_speed_iter->speed_source.source; // count statistics for logging - segment_speeds_counters[forward_speed_iter->speed_source.source] += 1; + counters[forward_speed_iter->speed_source.source] += 1; } else { // count statistics for logging - segment_speeds_counters[LUA_SOURCE] += 1; + counters[LUA_SOURCE] += 1; } } if (leaf_object.reverse_packed_geometry_id != SPECIAL_EDGEID) @@ -622,30 +621,39 @@ std::size_t Contractor::LoadEdgeExpandedGraph( reverse_speed_iter->speed_source.source; // count statistics for logging - segment_speeds_counters[reverse_speed_iter->speed_source.source] += 1; + counters[reverse_speed_iter->speed_source.source] += 1; } else { // count statistics for logging - segment_speeds_counters[LUA_SOURCE] += 1; + counters[LUA_SOURCE] += 1; } } } }); // parallel_for_each - for (std::size_t i = 0; i < segment_speeds_counters.size(); i++) + counters_type merged_counters(num_counters, 0); + for (const auto &counters : segment_speeds_counters) + { + for (std::size_t i = 0; i < counters.size(); i++) + { + merged_counters[i] += counters[i]; + } + } + + for (std::size_t i = 0; i < merged_counters.size(); i++) { if (i == LUA_SOURCE) { - util::SimpleLogger().Write() << "Used " << segment_speeds_counters[LUA_SOURCE] + util::SimpleLogger().Write() << "Used " << merged_counters[LUA_SOURCE] << " speeds from LUA profile or input map"; } else { // segments_speeds_counters has 0 as LUA, segment_speed_filenames not, thus we need // to susbstract 1 to avoid off-by-one error - util::SimpleLogger().Write() << "Used " << segment_speeds_counters[i] - << " speeds from " << segment_speed_filenames[i - 1]; + util::SimpleLogger().Write() << "Used " << merged_counters[i] << " speeds from " + << segment_speed_filenames[i - 1]; } } }