From d80318f8eaa8e783eebf463b7c96df0c1370fb2a Mon Sep 17 00:00:00 2001 From: Daniel Patterson Date: Fri, 26 Oct 2018 23:37:36 -0700 Subject: [PATCH 1/4] Match serialized bit-packing for vector to match in-memory layout for vector_view so that data can be directly mmapped. --- include/storage/serialization.hpp | 58 ++++++++++++++++++---------- include/util/vector_view.hpp | 15 +++++++ unit_tests/storage/serialization.cpp | 46 ++++++++++++++++++---- 3 files changed, 92 insertions(+), 27 deletions(-) diff --git a/include/storage/serialization.hpp b/include/storage/serialization.hpp index 92ee9600c0d..78185f9edd4 100644 --- a/include/storage/serialization.hpp +++ b/include/storage/serialization.hpp @@ -9,6 +9,7 @@ #include "storage/shared_datatype.hpp" #include "storage/tar.hpp" +#include #include #include @@ -30,22 +31,37 @@ namespace serialization namespace detail { template -inline BlockT packBits(const T &data, std::size_t index, std::size_t count) +inline BlockT packBits(const T &data, std::size_t base_index, const std::size_t count) { static_assert(std::is_same::value, "value_type is not bool"); + static_assert(std::is_unsigned::value, "BlockT must be unsigned type"); + static_assert(std::is_integral::value, "BlockT must be an integral type"); + static_assert(CHAR_BIT == 8, "Non-8-bit bytes not supported, sorry!"); + BOOST_ASSERT(sizeof(BlockT) * CHAR_BIT >= count); + + // Note: if this packing is changed, be sure to update vector_view + // as well, so that on-disk and in-memory layouts match. BlockT value = 0; - for (std::size_t bit = 0; bit < count; ++bit, ++index) - value = (value << 1) | data[index]; + for (std::size_t bit = 0; bit < count; ++bit) + { + value |= (data[base_index + bit] ? BlockT{1} : BlockT{0}) << bit; + } return value; } template -inline void unpackBits(T &data, std::size_t index, std::size_t count, BlockT value) +inline void +unpackBits(T &data, const std::size_t base_index, const std::size_t count, const BlockT value) { static_assert(std::is_same::value, "value_type is not bool"); - const BlockT mask = BlockT{1} << (count - 1); - for (std::size_t bit = 0; bit < count; value <<= 1, ++bit, ++index) - data[index] = value & mask; + static_assert(std::is_unsigned::value, "BlockT must be unsigned type"); + static_assert(std::is_integral::value, "BlockT must be an integral type"); + static_assert(CHAR_BIT == 8, "Non-8-bit bytes not supported, sorry!"); + BOOST_ASSERT(sizeof(BlockT) * CHAR_BIT >= count); + for (std::size_t bit = 0; bit < count; ++bit) + { + data[base_index + bit] = value & (BlockT{1} << bit); + } } template @@ -55,15 +71,16 @@ void readBoolVector(tar::FileReader &reader, const std::string &name, VectorT &d data.resize(count); std::uint64_t index = 0; - constexpr std::uint64_t WORD_BITS = CHAR_BIT * sizeof(std::uint64_t); + using BlockType = std::uint64_t; + constexpr std::uint64_t BLOCK_BITS = CHAR_BIT * sizeof(BlockType); - const auto decode = [&](const std::uint64_t block) { - auto read_size = std::min(count - index, WORD_BITS); - unpackBits(data, index, read_size, block); - index += WORD_BITS; + const auto decode = [&](const BlockType block) { + auto read_size = std::min(count - index, BLOCK_BITS); + unpackBits(data, index, read_size, block); + index += BLOCK_BITS; }; - reader.ReadStreaming(name, boost::make_function_output_iterator(decode)); + reader.ReadStreaming(name, boost::make_function_output_iterator(decode)); } template @@ -73,19 +90,20 @@ void writeBoolVector(tar::FileWriter &writer, const std::string &name, const Vec writer.WriteElementCount64(name, count); std::uint64_t index = 0; - constexpr std::uint64_t WORD_BITS = CHAR_BIT * sizeof(std::uint64_t); + using BlockType = std::uint64_t; + constexpr std::uint64_t BLOCK_BITS = CHAR_BIT * sizeof(BlockType); // FIXME on old boost version the function_input_iterator does not work with lambdas // so we need to wrap it in a function here. - const std::function encode_function = [&]() -> std::uint64_t { - auto write_size = std::min(count - index, WORD_BITS); - auto packed = packBits(data, index, write_size); - index += WORD_BITS; + const std::function encode_function = [&]() -> BlockType { + auto write_size = std::min(count - index, BLOCK_BITS); + auto packed = packBits(data, index, write_size); + index += BLOCK_BITS; return packed; }; - std::uint64_t number_of_blocks = (count + WORD_BITS - 1) / WORD_BITS; - writer.WriteStreaming( + std::uint64_t number_of_blocks = (count + BLOCK_BITS - 1) / BLOCK_BITS; + writer.WriteStreaming( name, boost::make_function_input_iterator(encode_function, boost::infinite()), number_of_blocks); diff --git a/include/util/vector_view.hpp b/include/util/vector_view.hpp index ab6e91fd607..260cc511eb3 100644 --- a/include/util/vector_view.hpp +++ b/include/util/vector_view.hpp @@ -195,7 +195,10 @@ template <> class vector_view { BOOST_ASSERT_MSG(index < m_size, "invalid size"); const std::size_t bucket = index / WORD_BITS; + // Note: ordering of bits here should match packBits in storage/serialization.hpp + // so that directly mmap-ing data is possible const auto offset = index % WORD_BITS; + BOOST_ASSERT(WORD_BITS > offset); return m_ptr[bucket] & (static_cast(1) << offset); } @@ -224,11 +227,23 @@ template <> class vector_view { BOOST_ASSERT(index < m_size); const auto bucket = index / WORD_BITS; + // Note: ordering of bits here should match packBits in storage/serialization.hpp + // so that directly mmap-ing data is possible const auto offset = index % WORD_BITS; + BOOST_ASSERT(WORD_BITS > offset); return reference{m_ptr + bucket, static_cast(1) << offset}; } template friend void swap(vector_view &, vector_view &) noexcept; + + friend std::ostream &operator<<(std::ostream &os, const vector_view &rhs) + { + for (std::size_t i = 0; i < rhs.size(); ++i) + { + os << (i > 0 ? " " : "") << rhs.at(i); + } + return os; + } }; // Both vector_view and the vector_view specializations share this impl. diff --git a/unit_tests/storage/serialization.cpp b/unit_tests/storage/serialization.cpp index e42f8ea8e5c..760f1faeabd 100644 --- a/unit_tests/storage/serialization.cpp +++ b/unit_tests/storage/serialization.cpp @@ -1,11 +1,15 @@ #include "storage/serialization.hpp" +#include "util/vector_view.hpp" + #include "../common/range_tools.hpp" #include "../common/temporary_file.hpp" #include #include +#include + BOOST_AUTO_TEST_SUITE(serialization) using namespace osrm; @@ -15,20 +19,48 @@ BOOST_AUTO_TEST_CASE(pack_test) { std::vector v = {0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1}; - BOOST_CHECK_EQUAL(storage::serialization::detail::packBits(v, 0, 8), 0x2e); - BOOST_CHECK_EQUAL(storage::serialization::detail::packBits(v, 5, 7), 0x65); - BOOST_CHECK_EQUAL(storage::serialization::detail::packBits(v, 6, 8), 0x95); + BOOST_CHECK_EQUAL(storage::serialization::detail::packBits(v, 0, 8), 0x74); + BOOST_CHECK_EQUAL(storage::serialization::detail::packBits(v, 5, 7), 0x53); + BOOST_CHECK_EQUAL(storage::serialization::detail::packBits(v, 6, 8), 0xa9); BOOST_CHECK_EQUAL(storage::serialization::detail::packBits(v, 11, 1), 0x01); } +BOOST_AUTO_TEST_CASE(vector_view_pack_test) +{ + // Verifies that the packing generated by packBits matches + // what vector_view expects + + // 1. Generate a random bool vector that covers several uint64_t bytes + constexpr unsigned RANDOM_SEED = 42; + std::mt19937 g(RANDOM_SEED); + std::uniform_int_distribution<> binary_distribution(0, 1); + std::vector v(150); + for (std::size_t i = 0; i < v.size(); ++i) + v[i] = binary_distribution(g) == 1; + + // 2. Pack the vector into a contiguous set of bytes + std::uint64_t data[3]; + data[0] = storage::serialization::detail::packBits(v, 0, 64); + data[1] = storage::serialization::detail::packBits(v, 64, 64); + data[2] = storage::serialization::detail::packBits(v, 128, 22); + + // 3. Make a vector_view of that memory, and see if the bit sequence is + // interpreted correctly by vector_view + util::vector_view view(data, v.size()); + for (std::size_t index = 0; index < v.size(); ++index) + { + BOOST_CHECK_EQUAL(v[index], view[index]); + } +} + BOOST_AUTO_TEST_CASE(unpack_test) { std::vector v(14), expected = {0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1}; - storage::serialization::detail::unpackBits(v, 0, 8, 0x2e); - storage::serialization::detail::unpackBits(v, 5, 7, 0x65); - storage::serialization::detail::unpackBits(v, 6, 8, 0x95); - storage::serialization::detail::unpackBits(v, 11, 1, 0x01); + storage::serialization::detail::unpackBits(v, 0, 8, 0x74u); + storage::serialization::detail::unpackBits(v, 5, 7, 0x53u); + storage::serialization::detail::unpackBits(v, 6, 8, 0xa9u); + storage::serialization::detail::unpackBits(v, 11, 1, 0x01u); BOOST_CHECK_EQUAL_COLLECTIONS(v.begin(), v.end(), expected.begin(), expected.end()); } From 2f9cb44368132ae910b0ac37ef750f698915de83 Mon Sep 17 00:00:00 2001 From: Daniel Patterson Date: Fri, 26 Oct 2018 23:48:51 -0700 Subject: [PATCH 2/4] `mmap` tarfiles directly when mmapping is enabled, instead of copying data into separate mmapped block Co-authored-by: Kajari Ghosh --- .../datafacade/mmap_memory_allocator.hpp | 8 +- include/engine/datafacade_provider.hpp | 5 +- include/storage/block.hpp | 11 +- include/storage/serialization.hpp | 4 +- include/storage/shared_data_index.hpp | 35 ++-- include/storage/shared_datatype.hpp | 98 +++++++---- include/storage/storage.hpp | 12 +- include/util/mmap_file.hpp | 33 ++-- .../datafacade/mmap_memory_allocator.cpp | 71 ++++---- .../datafacade/process_memory_allocator.cpp | 16 +- .../datafacade/shared_memory_allocator.cpp | 5 +- src/storage/storage.cpp | 157 ++++++++++-------- src/tools/store.cpp | 8 +- unit_tests/storage/data_layout.cpp | 67 ++++---- 14 files changed, 310 insertions(+), 220 deletions(-) diff --git a/include/engine/datafacade/mmap_memory_allocator.hpp b/include/engine/datafacade/mmap_memory_allocator.hpp index 4a9742a4422..60df71e90b5 100644 --- a/include/engine/datafacade/mmap_memory_allocator.hpp +++ b/include/engine/datafacade/mmap_memory_allocator.hpp @@ -10,6 +10,7 @@ #include #include +#include namespace osrm { @@ -24,8 +25,7 @@ namespace datafacade class MMapMemoryAllocator : public ContiguousBlockAllocator { public: - explicit MMapMemoryAllocator(const storage::StorageConfig &config, - const boost::filesystem::path &memory_file); + explicit MMapMemoryAllocator(const storage::StorageConfig &config); ~MMapMemoryAllocator() override final; // interface to give access to the datafacades @@ -33,8 +33,8 @@ class MMapMemoryAllocator : public ContiguousBlockAllocator private: storage::SharedDataIndex index; - util::vector_view mapped_memory; - boost::iostreams::mapped_file mapped_memory_file; + std::vector mapped_memory_files; + std::string rtree_filename; }; } // namespace datafacade diff --git a/include/engine/datafacade_provider.hpp b/include/engine/datafacade_provider.hpp index a624ee009e6..0f1441f6a39 100644 --- a/include/engine/datafacade_provider.hpp +++ b/include/engine/datafacade_provider.hpp @@ -32,9 +32,8 @@ class ExternalProvider final : public DataFacadeProvider public: using Facade = typename DataFacadeProvider::Facade; - ExternalProvider(const storage::StorageConfig &config, - const boost::filesystem::path &memory_file) - : facade_factory(std::make_shared(config, memory_file)) + ExternalProvider(const storage::StorageConfig &config) + : facade_factory(std::make_shared(config)) { } diff --git a/include/storage/block.hpp b/include/storage/block.hpp index 47478e5eb18..60271f4eaa0 100644 --- a/include/storage/block.hpp +++ b/include/storage/block.hpp @@ -16,10 +16,15 @@ struct Block { std::uint64_t num_entries; std::uint64_t byte_size; + std::uint64_t offset; - Block() : num_entries(0), byte_size(0) {} + Block() : num_entries(0), byte_size(0), offset(0) {} + Block(std::uint64_t num_entries, std::uint64_t byte_size, std::uint64_t offset) + : num_entries(num_entries), byte_size(byte_size), offset(offset) + { + } Block(std::uint64_t num_entries, std::uint64_t byte_size) - : num_entries(num_entries), byte_size(byte_size) + : num_entries(num_entries), byte_size(byte_size), offset(0) { } }; @@ -29,7 +34,7 @@ using NamedBlock = std::tuple; template Block make_block(uint64_t num_entries) { static_assert(sizeof(T) % alignof(T) == 0, "aligned T* can't be used as an array pointer"); - return Block{num_entries, sizeof(T) * num_entries}; + return Block{num_entries, sizeof(T) * num_entries, 0}; } } } diff --git a/include/storage/serialization.hpp b/include/storage/serialization.hpp index 78185f9edd4..97cba94efe4 100644 --- a/include/storage/serialization.hpp +++ b/include/storage/serialization.hpp @@ -284,9 +284,9 @@ template void write(io::BufferWriter &writer, const std } } -inline void read(io::BufferReader &reader, DataLayout &layout) { read(reader, layout.blocks); } +inline void read(io::BufferReader &reader, BaseDataLayout &layout) { read(reader, layout.blocks); } -inline void write(io::BufferWriter &writer, const DataLayout &layout) +inline void write(io::BufferWriter &writer, const BaseDataLayout &layout) { write(writer, layout.blocks); } diff --git a/include/storage/shared_data_index.hpp b/include/storage/shared_data_index.hpp index 94f78182ac7..9041bc100b4 100644 --- a/include/storage/shared_data_index.hpp +++ b/include/storage/shared_data_index.hpp @@ -5,6 +5,7 @@ #include +#include #include namespace osrm @@ -19,8 +20,8 @@ class SharedDataIndex public: struct AllocatedRegion { - char *memory_ptr; - DataLayout layout; + void *memory_ptr; + std::unique_ptr layout; }; SharedDataIndex() = default; @@ -29,10 +30,10 @@ class SharedDataIndex // Build mapping from block name to region for (auto index : util::irange(0, regions.size())) { - regions[index].layout.List("", - boost::make_function_output_iterator([&](const auto &name) { - block_to_region[name] = index; - })); + regions[index].layout->List("", + boost::make_function_output_iterator([&](const auto &name) { + block_to_region[name] = index; + })); } } @@ -40,32 +41,44 @@ class SharedDataIndex { for (const auto ®ion : regions) { - region.layout.List(name_prefix, out); + region.layout->List(name_prefix, out); } } template auto GetBlockPtr(const std::string &name) const { +#if !defined(__GNUC__) || (__GNUC__ > 4) + // is_tivially_copyable only exists in GCC >=5 + static_assert(std::is_trivially_copyable::value, + "Block-based data must be a trivially copyable type"); + static_assert(sizeof(T) % alignof(T) == 0, "aligned T* can't be used as an array pointer"); +#endif const auto ®ion = GetBlockRegion(name); - return region.layout.GetBlockPtr(region.memory_ptr, name); + return reinterpret_cast(region.layout->GetBlockPtr(region.memory_ptr, name)); } template auto GetBlockPtr(const std::string &name) { +#if !defined(__GNUC__) || (__GNUC__ > 4) + // is_tivially_copyable only exists in GCC >=5 + static_assert(std::is_trivially_copyable::value, + "Block-based data must be a trivially copyable type"); + static_assert(sizeof(T) % alignof(T) == 0, "aligned T* can't be used as an array pointer"); +#endif const auto ®ion = GetBlockRegion(name); - return region.layout.GetBlockPtr(region.memory_ptr, name); + return reinterpret_cast(region.layout->GetBlockPtr(region.memory_ptr, name)); } std::size_t GetBlockEntries(const std::string &name) const { const auto ®ion = GetBlockRegion(name); - return region.layout.GetBlockEntries(name); + return region.layout->GetBlockEntries(name); } std::size_t GetBlockSize(const std::string &name) const { const auto ®ion = GetBlockRegion(name); - return region.layout.GetBlockSize(name); + return region.layout->GetBlockSize(name); } private: diff --git a/include/storage/shared_datatype.hpp b/include/storage/shared_datatype.hpp index 926ca1deddf..65addcffce1 100644 --- a/include/storage/shared_datatype.hpp +++ b/include/storage/shared_datatype.hpp @@ -20,12 +20,12 @@ namespace osrm namespace storage { -class DataLayout; +class BaseDataLayout; namespace serialization { -inline void read(io::BufferReader &reader, DataLayout &layout); +inline void read(io::BufferReader &reader, BaseDataLayout &layout); -inline void write(io::BufferWriter &writer, const DataLayout &layout); +inline void write(io::BufferWriter &writer, const BaseDataLayout &layout); } // namespace serialization namespace detail @@ -54,42 +54,26 @@ inline std::string trimName(const std::string &name_prefix, const std::string &n } } // namespace detail -class DataLayout +class BaseDataLayout { public: - DataLayout() : blocks{} {} + virtual ~BaseDataLayout() = default; inline void SetBlock(const std::string &name, Block block) { blocks[name] = std::move(block); } - inline uint64_t GetBlockEntries(const std::string &name) const + inline std::uint64_t GetBlockEntries(const std::string &name) const { return GetBlock(name).num_entries; } - inline uint64_t GetBlockSize(const std::string &name) const { return GetBlock(name).byte_size; } - - inline bool HasBlock(const std::string &name) const + inline std::uint64_t GetBlockSize(const std::string &name) const { - return blocks.find(name) != blocks.end(); + return GetBlock(name).byte_size; } - inline uint64_t GetSizeOfLayout() const - { - uint64_t result = 0; - for (const auto &name_and_block : blocks) - { - result += GetBlockSize(name_and_block.first) + BLOCK_ALIGNMENT; - } - return result; - } - - template inline T *GetBlockPtr(char *shared_memory, const std::string &name) const + inline bool HasBlock(const std::string &name) const { - static_assert(BLOCK_ALIGNMENT % std::alignment_of::value == 0, - "Datatype does not fit alignment constraints."); - - char *ptr = (char *)GetAlignedBlockPtr(shared_memory, name); - return (T *)ptr; + return blocks.find(name) != blocks.end(); } // Depending on the name prefix this function either lists all blocks with the same prefix @@ -115,10 +99,10 @@ class DataLayout } } - private: - friend void serialization::read(io::BufferReader &reader, DataLayout &layout); - friend void serialization::write(io::BufferWriter &writer, const DataLayout &layout); + virtual inline void *GetBlockPtr(void *base_ptr, const std::string &name) const = 0; + virtual inline std::uint64_t GetSizeOfLayout() const = 0; + protected: const Block &GetBlock(const std::string &name) const { auto iter = blocks.find(name); @@ -130,10 +114,42 @@ class DataLayout return iter->second; } + friend void serialization::read(io::BufferReader &reader, BaseDataLayout &layout); + friend void serialization::write(io::BufferWriter &writer, const BaseDataLayout &layout); + + std::map blocks; +}; + +class ContiguousDataLayout final : public BaseDataLayout +{ + public: + inline std::uint64_t GetSizeOfLayout() const override final + { + std::uint64_t result = 0; + for (const auto &name_and_block : blocks) + { + result += GetBlockSize(name_and_block.first) + BLOCK_ALIGNMENT; + } + return result; + } + + inline void *GetBlockPtr(void *base_ptr, const std::string &name) const override final + { + // TODO: re-enable this alignment checking somehow + // static_assert(BLOCK_ALIGNMENT % std::alignment_of::value == 0, + // "Datatype does not fit alignment constraints."); + + return GetAlignedBlockPtr(base_ptr, name); + } + + private: + friend void serialization::read(io::BufferReader &reader, BaseDataLayout &layout); + friend void serialization::write(io::BufferWriter &writer, const BaseDataLayout &layout); + // Fit aligned storage in buffer to 64 bytes to conform with AVX 512 types inline void *align(void *&ptr) const noexcept { - const auto intptr = reinterpret_cast(ptr); + const auto intptr = reinterpret_cast(ptr); const auto aligned = (intptr - 1u + BLOCK_ALIGNMENT) & -BLOCK_ALIGNMENT; return ptr = reinterpret_cast(aligned); } @@ -157,7 +173,27 @@ class DataLayout } static constexpr std::size_t BLOCK_ALIGNMENT = 64; - std::map blocks; +}; + +class TarDataLayout final : public BaseDataLayout +{ + public: + inline std::uint64_t GetSizeOfLayout() const override final + { + std::uint64_t result = 0; + for (const auto &name_and_block : blocks) + { + result += GetBlockSize(name_and_block.first); + } + return result; + } + + inline void *GetBlockPtr(void *base_ptr, const std::string &name) const override final + { + auto offset = GetBlock(name).offset; + const auto offset_address = reinterpret_cast(base_ptr) + offset; + return reinterpret_cast(offset_address); + } }; struct SharedRegion diff --git a/include/storage/storage.hpp b/include/storage/storage.hpp index 2c8ae02bc61..b03669db05f 100644 --- a/include/storage/storage.hpp +++ b/include/storage/storage.hpp @@ -35,22 +35,28 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include +#include namespace osrm { namespace storage { + +void populateLayoutFromFile(const boost::filesystem::path &path, storage::BaseDataLayout &layout); + class Storage { public: Storage(StorageConfig config); int Run(int max_wait, const std::string &name, bool only_metric); - - void PopulateStaticLayout(DataLayout &layout); - void PopulateUpdatableLayout(DataLayout &layout); void PopulateStaticData(const SharedDataIndex &index); void PopulateUpdatableData(const SharedDataIndex &index); + void PopulateLayout(storage::BaseDataLayout &layout, + const std::vector> &files); + std::string PopulateLayoutWithRTree(storage::BaseDataLayout &layout); + std::vector> GetUpdatableFiles(); + std::vector> GetStaticFiles(); private: StorageConfig config; diff --git a/include/util/mmap_file.hpp b/include/util/mmap_file.hpp index 46984d94848..d55203d7228 100644 --- a/include/util/mmap_file.hpp +++ b/include/util/mmap_file.hpp @@ -15,14 +15,14 @@ namespace util namespace detail { -template -util::vector_view mmapFile(const boost::filesystem::path &file, RegionT ®ion) +template +util::vector_view mmapFile(const boost::filesystem::path &file, MmapContainerT &mmap_container) { try { - region.open(file); - std::size_t num_objects = region.size() / sizeof(T); - auto data_ptr = region.data(); + mmap_container.open(file); + std::size_t num_objects = mmap_container.size() / sizeof(T); + auto data_ptr = mmap_container.data(); BOOST_ASSERT(reinterpret_cast(data_ptr) % alignof(T) == 0); return util::vector_view(reinterpret_cast(data_ptr), num_objects); } @@ -34,9 +34,10 @@ util::vector_view mmapFile(const boost::filesystem::path &file, RegionT ®i } } -template -util::vector_view -mmapFile(const boost::filesystem::path &file, RegionT ®ion, const std::size_t size) +template +util::vector_view mmapFile(const boost::filesystem::path &file, + MmapContainerT &mmap_container, + const std::size_t size) { try { @@ -45,10 +46,10 @@ mmapFile(const boost::filesystem::path &file, RegionT ®ion, const std::size_t params.path = file.string(); params.flags = boost::iostreams::mapped_file::readwrite; params.new_file_size = size; - region.open(params); + mmap_container.open(params); std::size_t num_objects = size / sizeof(T); - auto data_ptr = region.data(); + auto data_ptr = mmap_container.data(); BOOST_ASSERT(reinterpret_cast(data_ptr) % alignof(T) == 0); return util::vector_view(reinterpret_cast(data_ptr), num_objects); } @@ -63,24 +64,24 @@ mmapFile(const boost::filesystem::path &file, RegionT ®ion, const std::size_t template util::vector_view mmapFile(const boost::filesystem::path &file, - boost::iostreams::mapped_file_source ®ion) + boost::iostreams::mapped_file_source &mmap_container) { - return detail::mmapFile(file, region); + return detail::mmapFile(file, mmap_container); } template util::vector_view mmapFile(const boost::filesystem::path &file, - boost::iostreams::mapped_file ®ion) + boost::iostreams::mapped_file &mmap_container) { - return detail::mmapFile(file, region); + return detail::mmapFile(file, mmap_container); } template util::vector_view mmapFile(const boost::filesystem::path &file, - boost::iostreams::mapped_file ®ion, + boost::iostreams::mapped_file &mmap_container, std::size_t size) { - return detail::mmapFile(file, region, size); + return detail::mmapFile(file, mmap_container, size); } } } diff --git a/src/engine/datafacade/mmap_memory_allocator.cpp b/src/engine/datafacade/mmap_memory_allocator.cpp index 9b0902fd390..fd87cc42ab0 100644 --- a/src/engine/datafacade/mmap_memory_allocator.cpp +++ b/src/engine/datafacade/mmap_memory_allocator.cpp @@ -1,5 +1,6 @@ #include "engine/datafacade/mmap_memory_allocator.hpp" +#include "storage/block.hpp" #include "storage/io.hpp" #include "storage/serialization.hpp" #include "storage/storage.hpp" @@ -7,7 +8,7 @@ #include "util/log.hpp" #include "util/mmap_file.hpp" -#include "boost/assert.hpp" +#include namespace osrm { @@ -16,46 +17,50 @@ namespace engine namespace datafacade { -MMapMemoryAllocator::MMapMemoryAllocator(const storage::StorageConfig &config, - const boost::filesystem::path &memory_file) +MMapMemoryAllocator::MMapMemoryAllocator(const storage::StorageConfig &config) { storage::Storage storage(config); + std::vector allocated_regions; - if (!boost::filesystem::exists(memory_file)) { - storage::DataLayout initial_layout; - storage.PopulateStaticLayout(initial_layout); - storage.PopulateUpdatableLayout(initial_layout); - - auto data_size = initial_layout.GetSizeOfLayout(); - - storage::io::BufferWriter writer; - storage::serialization::write(writer, initial_layout); - auto encoded_layout = writer.GetBuffer(); - - auto total_size = data_size + encoded_layout.size(); - - mapped_memory = util::mmapFile(memory_file, mapped_memory_file, total_size); - - std::copy(encoded_layout.begin(), encoded_layout.end(), mapped_memory.data()); - - index = storage::SharedDataIndex( - {{mapped_memory.data() + encoded_layout.size(), std::move(initial_layout)}}); - - storage.PopulateStaticData(index); - storage.PopulateUpdatableData(index); + std::unique_ptr fake_layout = + std::make_unique(); + + // Convert the boost::filesystem::path object into a plain string + // that's stored as a member of this allocator object + rtree_filename = storage.PopulateLayoutWithRTree(*fake_layout); + + // Now, we add one more AllocatedRegion, with it's start address as the start + // of the rtree_filename string we've saved. In the fake_layout, we've + // stated that the data is at offset 0, which is where the string starts + // at it's own memory address. + // The syntax &(rtree_filename[0]) gets the memory address of the first char. + // We can't use the convenient `.data()` or `.c_str()` methods, because + // prior to C++17 (which we're not using), those return a `const char *`, + // which isn't compatible with the `char *` that AllocatedRegion expects + // for it's memory_ptr + allocated_regions.push_back({&(rtree_filename[0]), std::move(fake_layout)}); } - else - { - mapped_memory = util::mmapFile(memory_file, mapped_memory_file); - storage::DataLayout layout; - storage::io::BufferReader reader(mapped_memory.data(), mapped_memory.size()); - storage::serialization::read(reader, layout); - auto layout_size = reader.GetPosition(); + auto files = storage.GetStaticFiles(); + auto updatable_files = storage.GetUpdatableFiles(); + files.insert(files.end(), updatable_files.begin(), updatable_files.end()); - index = storage::SharedDataIndex({{mapped_memory.data() + layout_size, std::move(layout)}}); + for (const auto &file : files) + { + if (boost::filesystem::exists(file.second)) + { + std::unique_ptr layout = + std::make_unique(); + boost::iostreams::mapped_file mapped_memory_file; + util::mmapFile(file.second, mapped_memory_file); + mapped_memory_files.push_back(std::move(mapped_memory_file)); + storage::populateLayoutFromFile(file.second, *layout); + allocated_regions.push_back({mapped_memory_file.data(), std::move(layout)}); + } } + + index = storage::SharedDataIndex{std::move(allocated_regions)}; } MMapMemoryAllocator::~MMapMemoryAllocator() {} diff --git a/src/engine/datafacade/process_memory_allocator.cpp b/src/engine/datafacade/process_memory_allocator.cpp index 4e20ee8b6fe..ed07d669fbe 100644 --- a/src/engine/datafacade/process_memory_allocator.cpp +++ b/src/engine/datafacade/process_memory_allocator.cpp @@ -15,14 +15,20 @@ ProcessMemoryAllocator::ProcessMemoryAllocator(const storage::StorageConfig &con storage::Storage storage(config); // Calculate the layout/size of the memory block - storage::DataLayout layout; - storage.PopulateStaticLayout(layout); - storage.PopulateUpdatableLayout(layout); + auto static_files = storage.GetStaticFiles(); + auto updatable_files = storage.GetUpdatableFiles(); + std::unique_ptr layout = + std::make_unique(); + storage.PopulateLayoutWithRTree(*layout); + storage.PopulateLayout(*layout, static_files); + storage.PopulateLayout(*layout, updatable_files); // Allocate the memory block, then load data from files into it - internal_memory = std::make_unique(layout.GetSizeOfLayout()); + internal_memory = std::make_unique(layout->GetSizeOfLayout()); - index = storage::SharedDataIndex({{internal_memory.get(), std::move(layout)}}); + std::vector regions; + regions.push_back({internal_memory.get(), std::move(layout)}); + index = {std::move(regions)}; storage.PopulateStaticData(index); storage.PopulateUpdatableData(index); diff --git a/src/engine/datafacade/shared_memory_allocator.cpp b/src/engine/datafacade/shared_memory_allocator.cpp index 6105034983a..d90302bf9d2 100644 --- a/src/engine/datafacade/shared_memory_allocator.cpp +++ b/src/engine/datafacade/shared_memory_allocator.cpp @@ -25,8 +25,9 @@ SharedMemoryAllocator::SharedMemoryAllocator( auto mem = storage::makeSharedMemory(shm_key); storage::io::BufferReader reader(reinterpret_cast(mem->Ptr()), mem->Size()); - storage::DataLayout layout; - storage::serialization::read(reader, layout); + std::unique_ptr layout = + std::make_unique(); + storage::serialization::read(reader, *layout); auto layout_size = reader.GetPosition(); regions.push_back({reinterpret_cast(mem->Ptr()) + layout_size, std::move(layout)}); diff --git a/src/storage/storage.cpp b/src/storage/storage.cpp index 213217425f8..503b1c10ef4 100644 --- a/src/storage/storage.cpp +++ b/src/storage/storage.cpp @@ -44,24 +44,6 @@ namespace { using Monitor = SharedMonitor; -void readBlocks(const boost::filesystem::path &path, DataLayout &layout) -{ - tar::FileReader reader(path, tar::FileReader::VerifyFingerprint); - - std::vector entries; - reader.List(std::back_inserter(entries)); - - for (const auto &entry : entries) - { - const auto name_end = entry.name.rfind(".meta"); - if (name_end == std::string::npos) - { - auto number_of_elements = reader.ReadElementCount64(entry.name); - layout.SetBlock(entry.name, Block{number_of_elements, entry.size}); - } - } -} - struct RegionHandle { std::unique_ptr memory; @@ -69,7 +51,8 @@ struct RegionHandle std::uint16_t shm_key; }; -auto setupRegion(SharedRegionRegister &shared_register, const DataLayout &layout) +RegionHandle setupRegion(SharedRegionRegister &shared_register, + const storage::BaseDataLayout &layout) { // This is safe because we have an exclusive lock for all osrm-datastore processes. auto shm_key = shared_register.ReserveKey(); @@ -184,6 +167,24 @@ bool swapData(Monitor &monitor, } } +void populateLayoutFromFile(const boost::filesystem::path &path, storage::BaseDataLayout &layout) +{ + tar::FileReader reader(path, tar::FileReader::VerifyFingerprint); + + std::vector entries; + reader.List(std::back_inserter(entries)); + + for (const auto &entry : entries) + { + const auto name_end = entry.name.rfind(".meta"); + if (name_end == std::string::npos) + { + auto number_of_elements = reader.ReadElementCount64(entry.name); + layout.SetBlock(entry.name, Block{number_of_elements, entry.size, entry.offset}); + } + } +} + Storage::Storage(StorageConfig config_) : config(std::move(config_)) {} int Storage::Run(int max_wait, const std::string &dataset_name, bool only_metric) @@ -243,29 +244,35 @@ int Storage::Run(int max_wait, const std::string &dataset_name, bool only_metric auto static_region = shared_register.GetRegion(region_id); auto static_memory = makeSharedMemory(static_region.shm_key); - DataLayout static_layout; + std::unique_ptr static_layout = + std::make_unique(); io::BufferReader reader(reinterpret_cast(static_memory->Ptr()), static_memory->Size()); - serialization::read(reader, static_layout); + serialization::read(reader, *static_layout); auto layout_size = reader.GetPosition(); auto *data_ptr = reinterpret_cast(static_memory->Ptr()) + layout_size; - regions.push_back({data_ptr, static_layout}); + regions.push_back({data_ptr, std::move(static_layout)}); readonly_handles.push_back({std::move(static_memory), data_ptr, static_region.shm_key}); } else { - DataLayout static_layout; - PopulateStaticLayout(static_layout); - auto static_handle = setupRegion(shared_register, static_layout); - regions.push_back({static_handle.data_ptr, static_layout}); + std::unique_ptr static_layout = + std::make_unique(); + Storage::PopulateLayoutWithRTree(*static_layout); + std::vector> files = Storage::GetStaticFiles(); + Storage::PopulateLayout(*static_layout, files); + auto static_handle = setupRegion(shared_register, *static_layout); + regions.push_back({static_handle.data_ptr, std::move(static_layout)}); handles[dataset_name + "/static"] = std::move(static_handle); } - DataLayout updatable_layout; - PopulateUpdatableLayout(updatable_layout); - auto updatable_handle = setupRegion(shared_register, updatable_layout); - regions.push_back({updatable_handle.data_ptr, updatable_layout}); + std::unique_ptr updatable_layout = + std::make_unique(); + std::vector> files = Storage::GetUpdatableFiles(); + Storage::PopulateLayout(*updatable_layout, files); + auto updatable_handle = setupRegion(shared_register, *updatable_layout); + regions.push_back({updatable_handle.data_ptr, std::move(updatable_layout)}); handles[dataset_name + "/updatable"] = std::move(updatable_handle); SharedDataIndex index{std::move(regions)}; @@ -281,24 +288,12 @@ int Storage::Run(int max_wait, const std::string &dataset_name, bool only_metric return EXIT_SUCCESS; } -/** - * This function examines all our data files and figures out how much - * memory needs to be allocated, and the position of each data structure - * in that big block. It updates the fields in the DataLayout parameter. - */ -void Storage::PopulateStaticLayout(DataLayout &static_layout) +std::vector> Storage::GetStaticFiles() { - { - auto absolute_file_index_path = - boost::filesystem::absolute(config.GetPath(".osrm.fileIndex")); - - static_layout.SetBlock("/common/rtree/file_index_path", - make_block(absolute_file_index_path.string().length() + 1)); - } - constexpr bool REQUIRED = true; constexpr bool OPTIONAL = false; - std::vector> tar_files = { + + std::vector> files = { {OPTIONAL, config.GetPath(".osrm.cells")}, {OPTIONAL, config.GetPath(".osrm.partition")}, {REQUIRED, config.GetPath(".osrm.icd")}, @@ -310,53 +305,73 @@ void Storage::PopulateStaticLayout(DataLayout &static_layout) {REQUIRED, config.GetPath(".osrm.maneuver_overrides")}, {REQUIRED, config.GetPath(".osrm.edges")}, {REQUIRED, config.GetPath(".osrm.names")}, - {REQUIRED, config.GetPath(".osrm.ramIndex")}, - }; + {REQUIRED, config.GetPath(".osrm.ramIndex")}}; - for (const auto &file : tar_files) + for (const auto &file : files) { - if (boost::filesystem::exists(file.second)) - { - readBlocks(file.second, static_layout); - } - else + if (file.first == REQUIRED && !boost::filesystem::exists(file.second)) { - if (file.first == REQUIRED) - { - throw util::exception("Could not find required filed: " + - std::get<1>(file).string()); - } + throw util::exception("Could not find required filed: " + std::get<1>(file).string()); } } + + return files; } -void Storage::PopulateUpdatableLayout(DataLayout &updatable_layout) +std::vector> Storage::GetUpdatableFiles() { constexpr bool REQUIRED = true; constexpr bool OPTIONAL = false; - std::vector> tar_files = { + + std::vector> files = { {OPTIONAL, config.GetPath(".osrm.mldgr")}, {OPTIONAL, config.GetPath(".osrm.cell_metrics")}, {OPTIONAL, config.GetPath(".osrm.hsgr")}, {REQUIRED, config.GetPath(".osrm.datasource_names")}, {REQUIRED, config.GetPath(".osrm.geometry")}, {REQUIRED, config.GetPath(".osrm.turn_weight_penalties")}, - {REQUIRED, config.GetPath(".osrm.turn_duration_penalties")}, - }; + {REQUIRED, config.GetPath(".osrm.turn_duration_penalties")}}; - for (const auto &file : tar_files) + for (const auto &file : files) { - if (boost::filesystem::exists(file.second)) + if (file.first == REQUIRED && !boost::filesystem::exists(file.second)) { - readBlocks(file.second, updatable_layout); + throw util::exception("Could not find required filed: " + std::get<1>(file).string()); } - else + } + + return files; +} + +std::string Storage::PopulateLayoutWithRTree(storage::BaseDataLayout &layout) +{ + // Figure out the path to the rtree file (it's not a tar file) + auto absolute_file_index_path = boost::filesystem::absolute(config.GetPath(".osrm.fileIndex")); + + // Convert the boost::filesystem::path object into a plain string + // that can then be stored as a member of an allocator object + auto rtree_filename = absolute_file_index_path.string(); + + // Here, we hardcode the special file_index_path block name. + // The important bit here is that the "offset" is set to zero + layout.SetBlock("/common/rtree/file_index_path", make_block(rtree_filename.length() + 1)); + + return rtree_filename; +} + +/** + * This function examines all our data files and figures out how much + * memory needs to be allocated, and the position of each data structure + * in that big block. It updates the fields in the layout parameter. + */ +void Storage::PopulateLayout(storage::BaseDataLayout &layout, + const std::vector> &files) +{ + for (const auto &file : files) + { + if (boost::filesystem::exists(file.second)) { - if (file.first == REQUIRED) - { - throw util::exception("Could not find required filed: " + - std::get<1>(file).string()); - } + populateLayoutFromFile(file.second, layout); } } } diff --git a/src/tools/store.cpp b/src/tools/store.cpp index bd21c9b4b1d..533f0d01026 100644 --- a/src/tools/store.cpp +++ b/src/tools/store.cpp @@ -52,14 +52,14 @@ void listRegions(bool show_blocks) auto memory = makeSharedMemory(region.shm_key); io::BufferReader reader(reinterpret_cast(memory->Ptr()), memory->Size()); - DataLayout layout; - serialization::read(reader, layout); + std::unique_ptr layout = std::make_unique(); + serialization::read(reader, *layout); std::vector block_names; - layout.List("", std::back_inserter(block_names)); + layout->List("", std::back_inserter(block_names)); for (auto &name : block_names) { - osrm::util::Log() << " " << name << " " << layout.GetBlockSize(name); + osrm::util::Log() << " " << name << " " << layout->GetBlockSize(name); } } } diff --git a/unit_tests/storage/data_layout.cpp b/unit_tests/storage/data_layout.cpp index 2aa0b40ee4b..b9ef8e8ba45 100644 --- a/unit_tests/storage/data_layout.cpp +++ b/unit_tests/storage/data_layout.cpp @@ -15,86 +15,89 @@ using namespace osrm::storage; BOOST_AUTO_TEST_CASE(layout_write_test) { - DataLayout layout; + std::unique_ptr layout = std::make_unique(); Block block_1{20, 8 * 20}; Block block_2{1, 4 * 1}; Block block_3{100, static_cast(std::ceil(100 / 64.))}; - layout.SetBlock("block1", block_1); - layout.SetBlock("block2", block_2); - layout.SetBlock("block3", block_3); + layout->SetBlock("block1", block_1); + layout->SetBlock("block2", block_2); + layout->SetBlock("block3", block_3); // Canary and alignment change layout size - BOOST_CHECK_GT(layout.GetSizeOfLayout(), + BOOST_CHECK_GT(layout->GetSizeOfLayout(), block_1.byte_size + block_2.byte_size + block_3.byte_size); - BOOST_CHECK_EQUAL(layout.GetBlockSize("block1"), block_1.byte_size); - BOOST_CHECK_EQUAL(layout.GetBlockSize("block2"), block_2.byte_size); - BOOST_CHECK_EQUAL(layout.GetBlockSize("block3"), block_3.byte_size); + BOOST_CHECK_EQUAL(layout->GetBlockSize("block1"), block_1.byte_size); + BOOST_CHECK_EQUAL(layout->GetBlockSize("block2"), block_2.byte_size); + BOOST_CHECK_EQUAL(layout->GetBlockSize("block3"), block_3.byte_size); - std::vector buffer(layout.GetSizeOfLayout()); + std::vector buffer(layout->GetSizeOfLayout()); auto smallest_addr = buffer.data(); auto biggest_addr = buffer.data() + buffer.size(); { - auto block_1_ptr = layout.GetBlockPtr(buffer.data(), "block1"); - auto block_2_ptr = layout.GetBlockPtr(buffer.data(), "block2"); - auto block_3_ptr = layout.GetBlockPtr(buffer.data(), "block3"); - - BOOST_CHECK_LT(reinterpret_cast(smallest_addr), + auto block_1_ptr = + reinterpret_cast(layout->GetBlockPtr(buffer.data(), "block1")); + auto block_2_ptr = + reinterpret_cast(layout->GetBlockPtr(buffer.data(), "block2")); + auto block_3_ptr = + reinterpret_cast(layout->GetBlockPtr(buffer.data(), "block3")); + + BOOST_CHECK_LE(reinterpret_cast(smallest_addr), reinterpret_cast(block_1_ptr)); BOOST_CHECK_GT( reinterpret_cast(biggest_addr), - reinterpret_cast(block_1_ptr + layout.GetBlockEntries("block1"))); + reinterpret_cast(block_1_ptr + layout->GetBlockEntries("block1"))); BOOST_CHECK_LT(reinterpret_cast(smallest_addr), reinterpret_cast(block_2_ptr)); BOOST_CHECK_GT( reinterpret_cast(biggest_addr), - reinterpret_cast(block_2_ptr + layout.GetBlockEntries("block2"))); + reinterpret_cast(block_2_ptr + layout->GetBlockEntries("block2"))); BOOST_CHECK_LT(reinterpret_cast(smallest_addr), reinterpret_cast(block_3_ptr)); BOOST_CHECK_GT(reinterpret_cast(biggest_addr), reinterpret_cast( block_3_ptr + static_cast( - std::ceil(layout.GetBlockEntries("block3") / 64)))); + std::ceil(layout->GetBlockEntries("block3") / 64)))); } } BOOST_AUTO_TEST_CASE(layout_list_test) { - DataLayout layout; + std::unique_ptr layout = std::make_unique(); Block block_1{20, 8 * 20}; Block block_2{1, 4 * 1}; Block block_3{100, static_cast(std::ceil(100 / 64.))}; - layout.SetBlock("/ch/edge_filter/block1", block_1); - layout.SetBlock("/ch/edge_filter/block2", block_2); - layout.SetBlock("/ch/edge_filter/block3", block_3); - layout.SetBlock("/mld/metrics/0/durations", block_2); - layout.SetBlock("/mld/metrics/0/weights", block_3); - layout.SetBlock("/mld/metrics/1/durations", block_2); - layout.SetBlock("/mld/metrics/1/weights", block_3); + layout->SetBlock("/ch/edge_filter/block1", block_1); + layout->SetBlock("/ch/edge_filter/block2", block_2); + layout->SetBlock("/ch/edge_filter/block3", block_3); + layout->SetBlock("/mld/metrics/0/durations", block_2); + layout->SetBlock("/mld/metrics/0/weights", block_3); + layout->SetBlock("/mld/metrics/1/durations", block_2); + layout->SetBlock("/mld/metrics/1/weights", block_3); std::vector results_1; std::vector results_2; std::vector results_3; - layout.List("/ch/edge_filter", std::back_inserter(results_1)); - layout.List("/ch/edge_filter/", std::back_inserter(results_2)); - layout.List("/ch/", std::back_inserter(results_3)); + layout->List("/ch/edge_filter", std::back_inserter(results_1)); + layout->List("/ch/edge_filter/", std::back_inserter(results_2)); + layout->List("/ch/", std::back_inserter(results_3)); std::vector results_4; std::vector results_5; std::vector results_6; - layout.List("/mld/metrics", std::back_inserter(results_4)); - layout.List("/mld/metrics/", std::back_inserter(results_5)); - layout.List("/mld/", std::back_inserter(results_6)); + layout->List("/mld/metrics", std::back_inserter(results_4)); + layout->List("/mld/metrics/", std::back_inserter(results_5)); + layout->List("/mld/", std::back_inserter(results_6)); std::vector results_7; - layout.List("", std::back_inserter(results_7)); + layout->List("", std::back_inserter(results_7)); BOOST_CHECK_EQUAL(results_7.size(), 7); CHECK_EQUAL_RANGE( From b7e7d32361ec792f0c8ed335a36fd1c2279ce2f6 Mon Sep 17 00:00:00 2001 From: Daniel Patterson Date: Fri, 26 Oct 2018 23:50:46 -0700 Subject: [PATCH 3/4] Expose new `--mmap switch` (`mmap_memory: true` in NodeJS), and run test suite in this mode, as well as shared memory mode. --- CHANGELOG.md | 3 ++ features/lib/osrm_loader.js | 56 ++++++++++++++++++++++++++-- features/support/env.js | 2 +- include/engine/engine.hpp | 14 ++++--- include/engine/engine_config.hpp | 1 + include/nodejs/node_osrm_support.hpp | 16 ++++++++ package.json | 2 +- src/engine/engine_config.cpp | 4 +- src/tools/routed.cpp | 5 ++- 9 files changed, 91 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 078e95dd4dc..d9aca267d55 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ # UNRELEASED - Changes from 5.19.0: + - Features: + - ADDED: direct mmapping of datafiles is now supported via the `-mmap` switch. [#5242](https://github.com/Project-OSRM/osrm-backend/pull/5242) + - REMOVED: the previous `--memory_file` switch is now deprecated and will fallback to `--mmap` [#5242](https://github.com/Project-OSRM/osrm-backend/pull/5242) # 5.19.0 - Changes from 5.18.0: diff --git a/features/lib/osrm_loader.js b/features/lib/osrm_loader.js index 51410d22875..a29d53b8e33 100644 --- a/features/lib/osrm_loader.js +++ b/features/lib/osrm_loader.js @@ -84,7 +84,47 @@ class OSRMDirectLoader extends OSRMBaseLoader { throw new Error(util.format('osrm-routed %s: %s', errorReason(err), err.cmd)); } }); - callback(); + + this.child.readyFunc = (data) => { + if (/running and waiting for requests/.test(data)) { + this.child.stdout.removeListener('data', this.child.readyFunc); + callback(); + } + }; + this.child.stdout.on('data',this.child.readyFunc); + } +}; + +class OSRMmmapLoader extends OSRMBaseLoader { + constructor (scope) { + super(scope); + } + + load (inputFile, callback) { + this.inputFile = inputFile; + this.shutdown(() => { + this.launch(callback); + }); + } + + osrmUp (callback) { + if (this.osrmIsRunning()) return callback(new Error("osrm-routed already running!")); + + const command_arguments = util.format('%s -p %d -i %s -a %s --mmap', this.inputFile, this.scope.OSRM_PORT, this.scope.OSRM_IP, this.scope.ROUTING_ALGORITHM); + this.child = this.scope.runBin('osrm-routed', command_arguments, this.scope.environment, (err) => { + if (err && err.signal !== 'SIGINT') { + this.child = null; + throw new Error(util.format('osrm-routed %s: %s', errorReason(err), err.cmd)); + } + }); + + this.child.readyFunc = (data) => { + if (/running and waiting for requests/.test(data)) { + this.child.stdout.removeListener('data', this.child.readyFunc); + callback(); + } + }; + this.child.stdout.on('data',this.child.readyFunc); } }; @@ -135,22 +175,32 @@ class OSRMLoader { this.scope = scope; this.sharedLoader = new OSRMDatastoreLoader(this.scope); this.directLoader = new OSRMDirectLoader(this.scope); + this.mmapLoader = new OSRMmmapLoader(this.scope); this.method = scope.DEFAULT_LOAD_METHOD; } load (inputFile, callback) { + if (!this.loader) { + this.loader = {shutdown: (cb) => cb() }; + } if (this.method === 'datastore') { - this.directLoader.shutdown((err) => { + this.loader.shutdown((err) => { if (err) return callback(err); this.loader = this.sharedLoader; this.sharedLoader.load(inputFile, callback); }); } else if (this.method === 'directly') { - this.sharedLoader.shutdown((err) => { + this.loader.shutdown((err) => { if (err) return callback(err); this.loader = this.directLoader; this.directLoader.load(inputFile, callback); }); + } else if (this.method === 'mmap') { + this.loader.shutdown((err) => { + if (err) return callback(err); + this.loader = this.mmapLoader; + this.mmapLoader.load(inputFile, callback); + }); } else { callback(new Error('*** Unknown load method ' + method)); } diff --git a/features/support/env.js b/features/support/env.js index 7866e0756c3..b1fd3ef951d 100644 --- a/features/support/env.js +++ b/features/support/env.js @@ -32,7 +32,7 @@ module.exports = function () { this.DEFAULT_ENVIRONMENT = Object.assign({STXXLCFG: stxxl_config}, process.env); this.DEFAULT_PROFILE = 'bicycle'; this.DEFAULT_INPUT_FORMAT = 'osm'; - this.DEFAULT_LOAD_METHOD = 'datastore'; + this.DEFAULT_LOAD_METHOD = process.argv[process.argv.indexOf('-m') +1].match('mmap') ? 'mmap' : 'datastore'; this.DEFAULT_ORIGIN = [1,1]; this.OSM_USER = 'osrm'; this.OSM_UID = 1; diff --git a/include/engine/engine.hpp b/include/engine/engine.hpp index 025dc2deb96..9121c7b0e0a 100644 --- a/include/engine/engine.hpp +++ b/include/engine/engine.hpp @@ -63,12 +63,16 @@ template class Engine final : public EngineInterface << "\" with algorithm " << routing_algorithms::name(); facade_provider = std::make_unique>(config.dataset_name); } - else if (!config.memory_file.empty()) + else if (!config.memory_file.empty() || config.use_mmap) { - util::Log(logDEBUG) << "Using memory mapped filed at " << config.memory_file - << " with algorithm " << routing_algorithms::name(); - facade_provider = std::make_unique>(config.storage_config, - config.memory_file); + if (!config.memory_file.empty()) + { + util::Log(logWARNING) + << "The 'memory_file' option is DEPRECATED - using direct mmaping instead"; + } + util::Log(logDEBUG) << "Using direct memory mapping with algorithm " + << routing_algorithms::name(); + facade_provider = std::make_unique>(config.storage_config); } else { diff --git a/include/engine/engine_config.hpp b/include/engine/engine_config.hpp index 149cdbd390c..cc0cc4a932b 100644 --- a/include/engine/engine_config.hpp +++ b/include/engine/engine_config.hpp @@ -89,6 +89,7 @@ struct EngineConfig final int max_alternatives = 3; // set an arbitrary upper bound; can be adjusted by user bool use_shared_memory = true; boost::filesystem::path memory_file; + bool use_mmap = true; Algorithm algorithm = Algorithm::CH; std::string verbosity; std::string dataset_name; diff --git a/include/nodejs/node_osrm_support.hpp b/include/nodejs/node_osrm_support.hpp index 4a93eb7a74c..27752e431bd 100644 --- a/include/nodejs/node_osrm_support.hpp +++ b/include/nodejs/node_osrm_support.hpp @@ -142,6 +142,10 @@ inline engine_config_ptr argumentsToEngineConfig(const Nan::FunctionCallbackInfo if (shared_memory.IsEmpty()) return engine_config_ptr(); + auto mmap_memory = params->Get(Nan::New("mmap_memory").ToLocalChecked()); + if (mmap_memory.IsEmpty()) + return engine_config_ptr(); + if (!memory_file->IsUndefined()) { if (path->IsUndefined()) @@ -190,6 +194,18 @@ inline engine_config_ptr argumentsToEngineConfig(const Nan::FunctionCallbackInfo return engine_config_ptr(); } } + if (!mmap_memory->IsUndefined()) + { + if (mmap_memory->IsBoolean()) + { + engine_config->use_mmap = Nan::To(mmap_memory).FromJust(); + } + else + { + Nan::ThrowError("mmap_memory option must be a boolean"); + return engine_config_ptr(); + } + } if (path->IsUndefined() && !engine_config->use_shared_memory) { diff --git a/package.json b/package.json index d56bc5afc9f..7c0f7001be0 100644 --- a/package.json +++ b/package.json @@ -18,7 +18,7 @@ }, "scripts": { "lint": "node ./node_modules/eslint/bin/eslint.js -c ./.eslintrc features/step_definitions/ features/support/", - "test": "npm run lint && node ./node_modules/cucumber/bin/cucumber.js features/ -p verify && node ./node_modules/cucumber/bin/cucumber.js features/ -p mld", + "test": "npm run lint && node ./node_modules/cucumber/bin/cucumber.js features/ -p verify && node ./node_modules/cucumber/bin/cucumber.js features/ -p verify -m mmap && node ./node_modules/cucumber/bin/cucumber.js features/ -p mld && node ./node_modules/cucumber/bin/cucumber.js features/ -p mld -m mmap", "clean": "rm -rf test/cache", "docs": "./scripts/build_api_docs.sh", "install": "node-pre-gyp install --fallback-to-build=false || ./scripts/node_install.sh", diff --git a/src/engine/engine_config.cpp b/src/engine/engine_config.cpp index 94f1f0a56ff..ac7723ba92a 100644 --- a/src/engine/engine_config.cpp +++ b/src/engine/engine_config.cpp @@ -23,7 +23,9 @@ bool EngineConfig::IsValid() const unlimited_or_more_than(max_results_nearest, 0) && max_alternatives >= 0; - return ((use_shared_memory && all_path_are_empty) || storage_config.IsValid()) && limits_valid; + return ((use_shared_memory && all_path_are_empty) || (use_mmap && storage_config.IsValid()) || + storage_config.IsValid()) && + limits_valid; } } } diff --git a/src/tools/routed.cpp b/src/tools/routed.cpp index 25a62b03aae..718e3e4d09a 100644 --- a/src/tools/routed.cpp +++ b/src/tools/routed.cpp @@ -119,7 +119,10 @@ inline unsigned generateServerProgramOptions(const int argc, "Load data from shared memory") // ("memory_file", value(&config.memory_file), - "Store data in a memory mapped file rather than in process memory.") // + "DEPRECATED: Will behave the same as --mmap.")( + "mmap,m", + value(&config.use_mmap)->implicit_value(true)->default_value(false), + "Map datafiles directly, do not use any additional memory.") // ("dataset-name", value(&config.dataset_name), "Name of the shared memory dataset to connect to.") // From 96c7b47afeeaf2e0dd3fc8aaf85ec7ca65ba48ed Mon Sep 17 00:00:00 2001 From: Daniel Patterson Date: Sat, 27 Oct 2018 00:12:17 -0700 Subject: [PATCH 4/4] Document new mmap_memory option in NodeJS API --- docs/nodejs/api.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/nodejs/api.md b/docs/nodejs/api.md index 9372488e8af..49cad3920fd 100644 --- a/docs/nodejs/api.md +++ b/docs/nodejs/api.md @@ -25,7 +25,9 @@ var osrm = new OSRM('network.osrm'); Make sure you prepared the dataset with the correct toolchain. - `options.shared_memory` **[Boolean](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Boolean)?** Connects to the persistent shared memory datastore. This requires you to run `osrm-datastore` prior to creating an `OSRM` object. - - `options.memory_file` **[String](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String)?** Path to a file on disk to store the memory using mmap. + - `options.memory_file` **[String](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String)?** *DEPRECATED* + Old behaviour: Path to a file on disk to store the memory using mmap. Current behaviour: setting this value is the same as setting `mmap_memory: true`. + - `options.mmap_memory` **[Boolean](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Boolean)?** Map on-disk files to virtual memory addresses (mmap), rather than loading into RAM. - `options.path` **[String](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String)?** The path to the `.osrm` files. This is mutually exclusive with setting {options.shared_memory} to true. - `options.max_locations_trip` **[Number](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Number)?** Max. locations supported in trip query (default: unlimited). - `options.max_locations_viaroute` **[Number](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Number)?** Max. locations supported in viaroute query (default: unlimited).