Skip to content

Commit

Permalink
Merge small files. (#84)
Browse files Browse the repository at this point in the history
  • Loading branch information
abellgithub authored Oct 6, 2021
1 parent 5210bfe commit 0861ecd
Show file tree
Hide file tree
Showing 4 changed files with 77 additions and 2 deletions.
2 changes: 1 addition & 1 deletion bu/FileInfo.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ namespace bu
class FileInfo
{
public:
FileInfo(const std::string& filename, size_t numPoints) :
FileInfo(const std::string& filename, int numPoints) :
m_filename(filename), m_numPoints(numPoints)
{}

Expand Down
63 changes: 63 additions & 0 deletions bu/OctantInfo.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
/*****************************************************************************
* Copyright (c) 2021, Hobu, Inc. ([email protected]) *
* *
* All rights reserved. *
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 3 of the License, or *
* (at your option) any later version. *
* *
****************************************************************************/

#include <fstream>
#include <vector>

#include "OctantInfo.hpp"
#include "../untwine/Common.hpp"

namespace untwine
{
namespace bu
{

void OctantInfo::mergeSmallFiles(const std::string tempDir, size_t pointSize)
{
std::string baseFilename = key().toString() + "_merge.bin";
std::string filename = tempDir + "/" + baseFilename;

std::ofstream out(filename, std::ios::binary | std::ios::trunc);
if (!out)
fatal("Couldn't open temporary merge file '" + filename + "'.");

int totalPoints = 0;
auto it = m_fileInfos.begin();
while (it != m_fileInfos.end())
{
FileInfo& fi = *it;
int numPoints = fi.numPoints();
std::vector<char> buf(1500 * pointSize);
if (numPoints < 1500)
{
size_t bytes = numPoints * pointSize;
filename = tempDir + "/" + fi.filename();
std::ifstream in(filename, std::ios::binary);
if (!in)
fatal("Couldn't open file '" + filename + "' to merge.");
in.read(buf.data(), bytes);
out.write(buf.data(), bytes);
totalPoints += numPoints;
it = m_fileInfos.erase(it);
}
else
it++;
}
// Stick a new file info for the merge file on the list.
// If there were no file infos to merge, then don't add the file because we'll end up
// with a 0-sized file that we try to map and that will blow up.
if (totalPoints > 0)
m_fileInfos.emplace_back(baseFilename, totalPoints);
}

} // namespace bu
} // namespace untwine
2 changes: 2 additions & 0 deletions bu/OctantInfo.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ class OctantInfo
return false;
}

void mergeSmallFiles(const std::string tempDir, size_t pointSize);

std::list<FileInfo>& fileInfos()
{ return m_fileInfos; }
const std::list<FileInfo>& fileInfos() const
Expand Down
12 changes: 11 additions & 1 deletion bu/Processor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,17 +42,26 @@ Processor::Processor(PyramidManager& manager, const VoxelInfo& v, const BaseInfo

void Processor::run()
{
// If we don't merge small files into one, we'll end up trying to deal with too many
// open files later and run out of file descriptors.
for (int i = 0; i < 8; ++i)
{
OctantInfo& child = m_vi[i];
if (child.fileInfos().size() >= 4)
child.mergeSmallFiles(m_b.opts.tempDir, m_b.pointSize);
}

size_t totalPoints = 0;
size_t totalFileInfos = 0;
for (int i = 0; i < 8; ++i)
{
OctantInfo& child = m_vi[i];

totalFileInfos += child.fileInfos().size();
totalPoints += child.numPoints();
if (child.numPoints() < MinimumPoints)
m_vi.octant().appendFileInfos(child);
}

// It's possible that all the file infos have been moved above, but this is cheap.
if (totalPoints < MinimumTotalPoints)
for (int i = 0; i < 8; ++i)
Expand Down Expand Up @@ -332,6 +341,7 @@ Processor::writeOctantCompressed(const OctantInfo& o, Index& index, IndexIter po
auto fii = o.fileInfos().begin();
auto fiiEnd = o.fileInfos().end();
size_t count = 0;

if (fii != fiiEnd)
{
// We're trying to find the range of points that come from a single FileInfo.
Expand Down

0 comments on commit 0861ecd

Please sign in to comment.