From be5a392044f8668f5140b87fb0c540a076e5ffda Mon Sep 17 00:00:00 2001 From: Andrew Bell Date: Tue, 19 Oct 2021 15:54:06 -0400 Subject: [PATCH] Documentation and check on buffer count. --- epf/Epf.cpp | 1 + epf/Writer.hpp | 30 ++++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/epf/Epf.cpp b/epf/Epf.cpp index 98199b7..1610116 100644 --- a/epf/Epf.cpp +++ b/epf/Epf.cpp @@ -37,6 +37,7 @@ namespace epf /// Epf +static_assert(MaxBuffers > NumFileProcessors, "MaxBuffers must be greater than NumFileProcessors."); Epf::Epf(BaseInfo& common) : m_b(common), m_pool(NumFileProcessors) {} diff --git a/epf/Writer.hpp b/epf/Writer.hpp index 05cc425..1c8dde2 100644 --- a/epf/Writer.hpp +++ b/epf/Writer.hpp @@ -28,6 +28,36 @@ namespace untwine namespace epf { +// The writer has some number of threads that actually write data to the files for tiles. When +// a processor has a full tile (or a partial that it needs to discard/finish), it sticks it +// on the queue for one of the writer threads to pick up and process. +// +// We can't have multiple writer threads write to the same file simultaneously, so rather than +// lock (which might stall threads that could otherwise be working), we make sure that only +// one writer thread is working on a file at a time by sticking +// the key of the thread in an "active" list. A writer thread looking for work will ignore +// any buffer on the queue that's for a file currently being handled by another writer thread. +// +// The writer owns a buffer cache. The cache manages the actual data buffers that are filled +// by the file processors and written by a writer thread. The buffers are created as needed +// until some predefined number of buffers is hit in order to limit memory use. +// Once a writer is done with a buffer, it sticks it back on the cache +// and then notifes the some processor that a buffer is available in case the processor +// is waiting for a free buffer. +// +// Since processors try to hold onto buffers until they are full, there can be times at +// which the buffers are exhaused and no more are available, but none are ready to be +// written. In this case, the buffers for the processor needing a new buffer flushed to +/ the queue even if they aren't full so that they can be reused. The active buffer for a +// flushing processor is reserved, so there need to be at least one more buffer than the +// number of file processors, though typically there are many more buffers than file processors. +// +// Buffers containing no points are never queued, but if a processor flush occurs, they are +// replaced on the buffer cache for reuse. Empty buffers can happen because if a cell has had +// its buffer written, it immediately grabs a new buffer even if it hasn't seen a point +// destined for that cell - we don't want to tear down the cell just to recreate it. +// The thinking is that if we've filled a buffer for a cell, there's +// probably at least one more point going to that cell from the source. class Writer { struct WriteData