Skip to content

Commit

Permalink
prepare rendering loop for multithreading
Browse files Browse the repository at this point in the history
The goal of this PR is to get closer to being able to run all
FrameGraph passes in parallel. To achieve this we need all data
consumed by the "execute" closure of the FrameGraph passes to be
immutable or thread-safe. We also need the passes to never use the
Engine's global `DriverAPi&` object.

Specifically in this PR, we turn as many objects to `const` as possible
without major changes, and we pass the `DriverApi&` object as parameter
to render passes.

This work is far from being complete. So we also annotate with FIXMEs
all the places we can identify will be problematic (there are probably
others).

The main remaining issues are:
- main allocator is not thread-safe
- some places take a non-const View, Scene or Engine
- lazy allocation of materials and material instance usages are not
  thread-safe.

This PR shouldn't change any behavior.
  • Loading branch information
pixelflinger committed Oct 28, 2024
1 parent f2f9c54 commit f4d87ad
Show file tree
Hide file tree
Showing 15 changed files with 115 additions and 112 deletions.
15 changes: 9 additions & 6 deletions filament/src/PostProcessManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -519,16 +519,17 @@ PostProcessManager::StructurePassOutput PostProcessManager::structure(FrameGraph
Variant structureVariant(Variant::DEPTH_VARIANT);
structureVariant.setPicking(config.picking);

auto out = resources.getRenderPassInfo();

bindPostProcessDescriptorSet(driver);

passBuilder.renderFlags(structureRenderFlags);
passBuilder.variant(structureVariant);
passBuilder.commandTypeFlags(RenderPass::CommandTypeFlags::SSAO);

RenderPass const pass{ passBuilder.build(mEngine) };
RenderPass::execute(pass, mEngine, resources.getPassName(), out.target, out.params);
RenderPass const pass{ passBuilder.build(mEngine, driver) };
auto out = resources.getRenderPassInfo();
driver.beginRenderPass(out.target, out.params);
pass.getExecutor().execute(mEngine, driver);
driver.endRenderPass();
});

auto depth = structurePass->depth;
Expand Down Expand Up @@ -678,8 +679,10 @@ FrameGraphId<FrameGraphTexture> PostProcessManager::ssr(FrameGraph& fg,
// generate all our drawing commands, except blended objects.
passBuilder.commandTypeFlags(RenderPass::CommandTypeFlags::SCREEN_SPACE_REFLECTIONS);

RenderPass const pass{ passBuilder.build(mEngine) };
RenderPass::execute(pass, mEngine, resources.getPassName(), out.target, out.params);
RenderPass const pass{ passBuilder.build(mEngine, driver) };
driver.beginRenderPass(out.target, out.params);
pass.getExecutor().execute(mEngine, driver);
driver.endRenderPass();
});

return ssrPass->reflections;
Expand Down
61 changes: 26 additions & 35 deletions filament/src/RenderPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,25 +68,24 @@ namespace filament {
using namespace backend;

RenderPassBuilder& RenderPassBuilder::customCommand(
FEngine& engine,
uint8_t channel,
RenderPass::Pass pass,
RenderPass::CustomCommand custom,
uint32_t order,
RenderPass::Executor::CustomCommandFn const& command) {
if (!mCustomCommands.has_value()) {
// construct the vector the first time
mCustomCommands.emplace(engine.getPerRenderPassArena());
mCustomCommands.emplace();
}
mCustomCommands->emplace_back(channel, pass, custom, order, command);
return *this;
}

RenderPass RenderPassBuilder::build(FEngine& engine) {
RenderPass RenderPassBuilder::build(FEngine const& engine, backend::DriverApi& driver) const {
assert_invariant(mRenderableSoa);
assert_invariant(mScissorViewport.width <= std::numeric_limits<int32_t>::max());
assert_invariant(mScissorViewport.height <= std::numeric_limits<int32_t>::max());
return RenderPass{ engine, *this };
return RenderPass{ engine, driver, *this };
}

// ------------------------------------------------------------------------------------------------
Expand All @@ -107,11 +106,11 @@ void RenderPass::DescriptorSetHandleDeleter::operator()(

// ------------------------------------------------------------------------------------------------

RenderPass::RenderPass(FEngine& engine, RenderPassBuilder const& builder) noexcept
RenderPass::RenderPass(FEngine const& engine, backend::DriverApi& driver,
RenderPassBuilder const& builder) noexcept
: mRenderableSoa(*builder.mRenderableSoa),
mColorPassDescriptorSet(builder.mColorPassDescriptorSet),
mScissorViewport(builder.mScissorViewport),
mCustomCommands(engine.getPerRenderPassArena()) {
mScissorViewport(builder.mScissorViewport) {

// compute the number of commands we need
updateSummedPrimitiveCounts(
Expand All @@ -127,10 +126,12 @@ RenderPass::RenderPass(FEngine& engine, RenderPassBuilder const& builder) noexce
uint32_t const customCommandCount =
builder.mCustomCommands.has_value() ? builder.mCustomCommands->size() : 0;

// FIXME: builder.mArena must be thread safe eventually
Command* const commandBegin = builder.mArena.alloc<Command>(commandCount + customCommandCount);
Command* commandEnd = commandBegin + (commandCount + customCommandCount);
assert_invariant(commandBegin);

// FIXME: builder.mArena must be thread safe eventually
if (UTILS_UNLIKELY(builder.mArena.getAllocator().isHeapAllocation(commandBegin))) {
static bool sLogOnce = true;
if (UTILS_UNLIKELY(sLogOnce)) {
Expand All @@ -150,6 +151,7 @@ RenderPass::RenderPass(FEngine& engine, RenderPassBuilder const& builder) noexce
builder.mCameraForwardVector);

if (builder.mCustomCommands.has_value()) {
mCustomCommands.reserve(customCommandCount);
Command* p = commandBegin + commandCount;
for (auto const& [channel, passId, command, order, fn]: builder.mCustomCommands.value()) {
appendCustomCommand(p++, channel, passId, command, order, fn);
Expand All @@ -166,7 +168,9 @@ RenderPass::RenderPass(FEngine& engine, RenderPassBuilder const& builder) noexce
stereoscopicEyeCount *= engine.getConfig().stereoscopicEyeCount;
}
commandEnd = resize(builder.mArena,
instanceify(engine, commandBegin, commandEnd, stereoscopicEyeCount));
instanceify(driver,
engine.getPerRenderableDescriptorSetLayout().getHandle(),
commandBegin, commandEnd, stereoscopicEyeCount));
}

// these are `const` from this point on...
Expand All @@ -182,7 +186,7 @@ RenderPass::Command* RenderPass::resize(Arena& arena, Command* const last) noexc
return last;
}

void RenderPass::appendCommands(FEngine& engine,
void RenderPass::appendCommands(FEngine const& engine,
Slice<Command> commands,
utils::Range<uint32_t> const vr,
CommandTypeFlags const commandTypeFlags,
Expand Down Expand Up @@ -283,17 +287,8 @@ RenderPass::Command* RenderPass::sortCommands(
return last;
}

void RenderPass::execute(RenderPass const& pass,
FEngine& engine, const char* name,
backend::Handle<backend::HwRenderTarget> renderTarget,
backend::RenderPassParams params) noexcept {
DriverApi& driver = engine.getDriverApi();
driver.beginRenderPass(renderTarget, params);
pass.getExecutor().execute(engine, name);
driver.endRenderPass();
}

RenderPass::Command* RenderPass::instanceify(FEngine& engine,
RenderPass::Command* RenderPass::instanceify(backend::DriverApi& driver,
DescriptorSetLayoutHandle perRenderableDescriptorSetLayoutHandle,
Command* curr, Command* const last,
int32_t eyeCount) const noexcept {
SYSTRACE_NAME("instanceify");
Expand Down Expand Up @@ -352,8 +347,6 @@ RenderPass::Command* RenderPass::instanceify(FEngine& engine,
if (UTILS_UNLIKELY(instanceCount > 1)) {
drawCallsSavedCount += instanceCount - 1;

auto& driver = engine.getDriverApi();

// allocate our staging buffer only if needed
if (UTILS_UNLIKELY(!stagingBuffer)) {
// Create a temporary UBO for holding the per-renderable data of each primitive,
Expand Down Expand Up @@ -382,8 +375,7 @@ RenderPass::Command* RenderPass::instanceify(FEngine& engine,
// in this case we would need to preserve the default descriptor-set content).
// This has the same lifetime as the UBO (see above).
mInstancedDescriptorSetHandle = DescriptorSetSharedHandle{
driver.createDescriptorSet(
engine.getPerRenderableDescriptorSetLayout().getHandle()),
driver.createDescriptorSet(perRenderableDescriptorSetLayoutHandle),
driver
};
driver.updateDescriptorSetBuffer(mInstancedDescriptorSetHandle,
Expand Down Expand Up @@ -418,10 +410,7 @@ RenderPass::Command* RenderPass::instanceify(FEngine& engine,
if (UTILS_UNLIKELY(firstSentinel)) {
//slog.d << "auto-instancing, saving " << drawCallsSavedCount << " draw calls, out of "
// << count << io::endl;

// we have instanced primitives
DriverApi& driver = engine.getDriverApi();

// copy our instanced ubo data
driver.updateBufferObjectUnsynchronized(mInstancedUboHandle, {
stagingBuffer, sizeof(PerRenderableData) * instancedPrimitiveOffset,
Expand Down Expand Up @@ -866,8 +855,8 @@ void RenderPass::Executor::overrideScissor(backend::Viewport const& scissor) noe
mScissor = scissor;
}

void RenderPass::Executor::execute(FEngine& engine, const char*) const noexcept {
execute(engine, mCommands.begin(), mCommands.end());
void RenderPass::Executor::execute(FEngine const& engine, backend::DriverApi& driver) const noexcept {
execute(engine, driver, mCommands.begin(), mCommands.end());
}

UTILS_NOINLINE // no need to be inlined
Expand All @@ -893,14 +882,12 @@ backend::Viewport RenderPass::Executor::applyScissorViewport(
}

UTILS_NOINLINE // no need to be inlined
void RenderPass::Executor::execute(FEngine& engine,
const Command* first, const Command* last) const noexcept {
void RenderPass::Executor::execute(FEngine const& engine, backend::DriverApi& driver,
Command const* first, Command const* last) const noexcept {

SYSTRACE_CALL();
SYSTRACE_CONTEXT();

DriverApi& driver = engine.getDriverApi();

size_t const capacity = engine.getMinCommandBufferSize();
CircularBuffer const& circularBuffer = driver.getCircularBuffer();

Expand Down Expand Up @@ -965,7 +952,9 @@ void RenderPass::Executor::execute(FEngine& engine,
// check we have enough capacity to write these commandCount commands, if not,
// request a new CircularBuffer allocation of `capacity` bytes.
if (UTILS_UNLIKELY(circularBuffer.getUsed() > capacity - commandSizeInBytes)) {
engine.flush(); // TODO: we should use a "fast" flush if possible
// FIXME: eventually we can't flush here because this will be a secondary
// command buffer. We will need another solution for overflows.
const_cast<FEngine&>(engine).flush();
}

first--;
Expand Down Expand Up @@ -1093,7 +1082,9 @@ void RenderPass::Executor::execute(FEngine& engine,
// If the remaining space is less than half the capacity, we flush right away to
// allow some headroom for commands that might come later.
if (UTILS_UNLIKELY(circularBuffer.getUsed() > capacity / 2)) {
engine.flush();
// FIXME: eventually we can't flush here because this will be a secondary
// command buffer.
const_cast<FEngine&>(engine).flush();
}
}
}
Expand Down
33 changes: 14 additions & 19 deletions filament/src/RenderPass.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@

#include <utils/Allocator.h>
#include <utils/BitmaskEnum.h>
#include <utils/FixedCapacityVector.h>
#include <utils/Range.h>
#include <utils/Slice.h>
#include <utils/architecture.h>
Expand Down Expand Up @@ -309,13 +310,6 @@ class RenderPass {
Command const* end() const noexcept { return mCommandEnd; }
bool empty() const noexcept { return begin() == end(); }

// Helper to execute all the commands generated by this RenderPass
static void execute(RenderPass const& pass,
FEngine& engine, const char* name,
backend::Handle<backend::HwRenderTarget> renderTarget,
backend::RenderPassParams params) noexcept;


class BufferObjectHandleDeleter {
std::reference_wrapper<backend::DriverApi> driver;
public:
Expand Down Expand Up @@ -364,7 +358,8 @@ class RenderPass {

Executor(RenderPass const& pass, Command const* b, Command const* e) noexcept;

void execute(FEngine& engine, const Command* first, const Command* last) const noexcept;
void execute(FEngine const& engine, backend::DriverApi& driver,
Command const* first, Command const* last) const noexcept;

static backend::Viewport applyScissorViewport(
backend::Viewport const& scissorViewport,
Expand All @@ -389,7 +384,7 @@ class RenderPass {

void overrideScissor(backend::Viewport const& scissor) noexcept;

void execute(FEngine& engine, const char* name) const noexcept;
void execute(FEngine const& engine, backend::DriverApi& driver) const noexcept;
};

// returns a new executor for this pass
Expand All @@ -404,11 +399,12 @@ class RenderPass {
private:
friend class FRenderer;
friend class RenderPassBuilder;
RenderPass(FEngine& engine, RenderPassBuilder const& builder) noexcept;
RenderPass(FEngine const& engine, backend::DriverApi& driver,
RenderPassBuilder const& builder) noexcept;

// This is the main function of this class, this appends commands to the pass using
// the current camera, geometry and flags set. This can be called multiple times if needed.
void appendCommands(FEngine& engine,
void appendCommands(FEngine const& engine,
utils::Slice<Command> commands,
utils::Range<uint32_t> visibleRenderables,
CommandTypeFlags commandTypeFlags,
Expand All @@ -430,7 +426,8 @@ class RenderPass {
Command* begin, Command* end) noexcept;

// instanceify commands then trims sentinels
RenderPass::Command* instanceify(FEngine& engine,
RenderPass::Command* instanceify(backend::DriverApi& driver,
backend::DescriptorSetLayoutHandle perRenderableDescriptorSetLayoutHandle,
Command* begin, Command* end,
int32_t eyeCount) const noexcept;

Expand Down Expand Up @@ -470,8 +467,7 @@ class RenderPass {
mutable BufferObjectSharedHandle mInstancedUboHandle; // ubo for instanced primitives
mutable DescriptorSetSharedHandle mInstancedDescriptorSetHandle; // a descriptor-set to hold the ubo
// a vector for our custom commands
using CustomCommandVector = std::vector<Executor::CustomCommandFn,
utils::STLAllocator<Executor::CustomCommandFn, LinearAllocatorArena>>;
using CustomCommandVector = utils::FixedCapacityVector<Executor::CustomCommandFn>;
mutable CustomCommandVector mCustomCommands;
};

Expand All @@ -497,11 +493,10 @@ class RenderPassBuilder {
uint32_t,
RenderPass::Executor::CustomCommandFn>;

using CustomCommandContainer = std::vector<CustomCommandRecord,
utils::STLAllocator<CustomCommandRecord, LinearAllocatorArena>>;
using CustomCommandContainer = std::vector<CustomCommandRecord>;

// we make this optional because it's not used often, and we don't want to have
// to construct it by default.
// to construct it by default. For the same reason we use a std::vector<>
std::optional<CustomCommandContainer> mCustomCommands;

public:
Expand Down Expand Up @@ -569,14 +564,14 @@ class RenderPassBuilder {
return *this;
}

RenderPassBuilder& customCommand(FEngine& engine,
RenderPassBuilder& customCommand(
uint8_t channel,
RenderPass::Pass pass,
RenderPass::CustomCommand custom,
uint32_t order,
const RenderPass::Executor::CustomCommandFn& command);

RenderPass build(FEngine& engine);
RenderPass build(FEngine const& engine, backend::DriverApi& driver) const;
};


Expand Down
2 changes: 1 addition & 1 deletion filament/src/RendererUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ RendererUtils::ColorPassOutput RendererUtils::colorPass(
}

driver.beginRenderPass(out.target, out.params);
passExecutor.execute(engine, resources.getPassName());
passExecutor.execute(engine, driver);
driver.endRenderPass();

// color pass is typically heavy, and we don't have much CPU work left after
Expand Down
6 changes: 3 additions & 3 deletions filament/src/ShadowMap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1361,8 +1361,8 @@ math::float4 ShadowMap::getClampToEdgeCoords(ShadowMapInfo const& shadowMapInfo)
// ------------------------------------------------------------------------------------------------

void ShadowMap::prepareCamera(Transaction const& transaction,
FEngine& engine, const CameraInfo& cameraInfo) noexcept {
ShadowMapDescriptorSet::prepareCamera(transaction, engine, cameraInfo);
backend::DriverApi& driver, const CameraInfo& cameraInfo) noexcept {
ShadowMapDescriptorSet::prepareCamera(transaction, driver, cameraInfo);
ShadowMapDescriptorSet::prepareLodBias(transaction, 0.0f);
}

Expand All @@ -1372,7 +1372,7 @@ void ShadowMap::prepareViewport(Transaction const& transaction,
}

void ShadowMap::prepareTime(Transaction const& transaction,
FEngine& engine, math::float4 const& userTime) noexcept {
FEngine const& engine, math::float4 const& userTime) noexcept {
ShadowMapDescriptorSet::prepareTime(transaction, engine, userTime);
}

Expand Down
4 changes: 2 additions & 2 deletions filament/src/ShadowMap.h
Original file line number Diff line number Diff line change
Expand Up @@ -193,11 +193,11 @@ class ShadowMap {
using Transaction = ShadowMapDescriptorSet::Transaction;

static void prepareCamera(Transaction const& transaction,
FEngine& engine, const CameraInfo& cameraInfo) noexcept;
backend::DriverApi& driver, const CameraInfo& cameraInfo) noexcept;
static void prepareViewport(Transaction const& transaction,
backend::Viewport const& viewport) noexcept;
static void prepareTime(Transaction const& transaction,
FEngine& engine, math::float4 const& userTime) noexcept;
FEngine const& engine, math::float4 const& userTime) noexcept;
static void prepareShadowMapping(Transaction const& transaction,
bool highPrecision) noexcept;
static ShadowMapDescriptorSet::Transaction open(backend::DriverApi& driver) noexcept;
Expand Down
Loading

0 comments on commit f4d87ad

Please sign in to comment.