From 0861ecd6068047b07ae5f13bace04bf1dab2df04 Mon Sep 17 00:00:00 2001 From: Andrew Bell Date: Wed, 6 Oct 2021 09:09:38 -0400 Subject: [PATCH] Merge small files. (#84) --- bu/FileInfo.hpp | 2 +- bu/OctantInfo.cpp | 63 +++++++++++++++++++++++++++++++++++++++++++++++ bu/OctantInfo.hpp | 2 ++ bu/Processor.cpp | 12 ++++++++- 4 files changed, 77 insertions(+), 2 deletions(-) create mode 100644 bu/OctantInfo.cpp diff --git a/bu/FileInfo.hpp b/bu/FileInfo.hpp index e1a8641..d9d4733 100644 --- a/bu/FileInfo.hpp +++ b/bu/FileInfo.hpp @@ -24,7 +24,7 @@ namespace bu class FileInfo { public: - FileInfo(const std::string& filename, size_t numPoints) : + FileInfo(const std::string& filename, int numPoints) : m_filename(filename), m_numPoints(numPoints) {} diff --git a/bu/OctantInfo.cpp b/bu/OctantInfo.cpp new file mode 100644 index 0000000..ade6acb --- /dev/null +++ b/bu/OctantInfo.cpp @@ -0,0 +1,63 @@ +/***************************************************************************** + * Copyright (c) 2021, Hobu, Inc. (info@hobu.co) * + * * + * All rights reserved. * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 3 of the License, or * + * (at your option) any later version. * + * * + ****************************************************************************/ + +#include +#include + +#include "OctantInfo.hpp" +#include "../untwine/Common.hpp" + +namespace untwine +{ +namespace bu +{ + +void OctantInfo::mergeSmallFiles(const std::string tempDir, size_t pointSize) +{ + std::string baseFilename = key().toString() + "_merge.bin"; + std::string filename = tempDir + "/" + baseFilename; + + std::ofstream out(filename, std::ios::binary | std::ios::trunc); + if (!out) + fatal("Couldn't open temporary merge file '" + filename + "'."); + + int totalPoints = 0; + auto it = m_fileInfos.begin(); + while (it != m_fileInfos.end()) + { + FileInfo& fi = *it; + int numPoints = fi.numPoints(); + std::vector buf(1500 * pointSize); + if (numPoints < 1500) + { + size_t bytes = numPoints * pointSize; + filename = tempDir + "/" + fi.filename(); + std::ifstream in(filename, std::ios::binary); + if (!in) + fatal("Couldn't open file '" + filename + "' to merge."); + in.read(buf.data(), bytes); + out.write(buf.data(), bytes); + totalPoints += numPoints; + it = m_fileInfos.erase(it); + } + else + it++; + } + // Stick a new file info for the merge file on the list. + // If there were no file infos to merge, then don't add the file because we'll end up + // with a 0-sized file that we try to map and that will blow up. + if (totalPoints > 0) + m_fileInfos.emplace_back(baseFilename, totalPoints); +} + +} // namespace bu +} // namespace untwine diff --git a/bu/OctantInfo.hpp b/bu/OctantInfo.hpp index 98e8286..e29080d 100644 --- a/bu/OctantInfo.hpp +++ b/bu/OctantInfo.hpp @@ -56,6 +56,8 @@ class OctantInfo return false; } + void mergeSmallFiles(const std::string tempDir, size_t pointSize); + std::list& fileInfos() { return m_fileInfos; } const std::list& fileInfos() const diff --git a/bu/Processor.cpp b/bu/Processor.cpp index 89dd8d5..b60a9e1 100644 --- a/bu/Processor.cpp +++ b/bu/Processor.cpp @@ -42,17 +42,26 @@ Processor::Processor(PyramidManager& manager, const VoxelInfo& v, const BaseInfo void Processor::run() { + // If we don't merge small files into one, we'll end up trying to deal with too many + // open files later and run out of file descriptors. + for (int i = 0; i < 8; ++i) + { + OctantInfo& child = m_vi[i]; + if (child.fileInfos().size() >= 4) + child.mergeSmallFiles(m_b.opts.tempDir, m_b.pointSize); + } + size_t totalPoints = 0; size_t totalFileInfos = 0; for (int i = 0; i < 8; ++i) { OctantInfo& child = m_vi[i]; - totalFileInfos += child.fileInfos().size(); totalPoints += child.numPoints(); if (child.numPoints() < MinimumPoints) m_vi.octant().appendFileInfos(child); } + // It's possible that all the file infos have been moved above, but this is cheap. if (totalPoints < MinimumTotalPoints) for (int i = 0; i < 8; ++i) @@ -332,6 +341,7 @@ Processor::writeOctantCompressed(const OctantInfo& o, Index& index, IndexIter po auto fii = o.fileInfos().begin(); auto fiiEnd = o.fileInfos().end(); size_t count = 0; + if (fii != fiiEnd) { // We're trying to find the range of points that come from a single FileInfo.