From f73b10408ca09d84111968bbdd76ed43bed7eff5 Mon Sep 17 00:00:00 2001 From: Jan Date: Sun, 15 Sep 2024 11:31:33 +0200 Subject: [PATCH] Fix release mode OOM crash - enable LAA in release mode, not just debug - optimize memory usage so a big world (GRM) can be loaded in release mode without LAA: - remove big vector copy during loadZen return - split batches with very high vertCount along chunk boundaries - new constant vertCountPerBatch tries to maintain max vert count per batch (can be higher if a single chunk's vertCount goes over vertCountPerBatch) --- ZenRen/ZenRen.vcxproj | 1 + ZenRen/src/assets/ZenLoader.cpp | 24 ++++------ ZenRen/src/assets/ZenLoader.h | 2 +- ZenRen/src/render/Renderer.h | 2 +- ZenRen/src/render/pass/PassWorldLoader.cpp | 52 +++++++++++++++++++--- 5 files changed, 59 insertions(+), 22 deletions(-) diff --git a/ZenRen/ZenRen.vcxproj b/ZenRen/ZenRen.vcxproj index ef845dc..559862b 100644 --- a/ZenRen/ZenRen.vcxproj +++ b/ZenRen/ZenRen.vcxproj @@ -237,6 +237,7 @@ true ntdll.lib;%(AdditionalDependencies) Windows + true true diff --git a/ZenRen/src/assets/ZenLoader.cpp b/ZenRen/src/assets/ZenLoader.cpp index b2e22d1..3dc9f3e 100644 --- a/ZenRen/src/assets/ZenLoader.cpp +++ b/ZenRen/src/assets/ZenLoader.cpp @@ -373,7 +373,7 @@ namespace assets return statics; } - RenderData loadZen(string& zenFilename, VDFS::FileIndex* vdf) + void loadZen(render::RenderData& out, string& zenFilename, VDFS::FileIndex* vdf) { const auto now = std::chrono::high_resolution_clock::now(); @@ -399,8 +399,7 @@ namespace assets vector lightmaps = loadZenLightmaps(worldMesh); - VERT_CHUNKS_BY_MAT worldMeshData; - loadWorldMesh(worldMeshData, parser.getWorldMesh()); + loadWorldMesh(out.worldMesh, parser.getWorldMesh()); LOG(INFO) << "Zen parsed!"; @@ -410,7 +409,7 @@ namespace assets bool isOutdoorLevel = world.bspTree.mode == ZenLoad::zCBspTreeData::TreeMode::Outdoor; vector vobs; if (loadStaticMeshes) { - vobs = loadVobs(world.rootVobs, worldMeshData, lightsStatic, isOutdoorLevel); + vobs = loadVobs(world.rootVobs, out.worldMesh, lightsStatic, isOutdoorLevel); LOG(INFO) << "VOBs loaded!"; } else { @@ -421,34 +420,29 @@ namespace assets for (auto& vob : vobs) { auto& visualname = vob.meshName; - bool loaded = loadInstanceMesh(staticMeshData, *vdf, vob); + bool loaded = loadInstanceMesh(out.staticMeshes, *vdf, vob); } if (debugStaticLights) { for (auto& light : lightsStatic) { float scale = light.range / 10.f; - loadPointDebugVisual(staticMeshData, light.pos, { scale, scale, scale }); + loadPointDebugVisual(out.staticMeshes, light.pos, { scale, scale, scale }); } } if (debugStaticLightRays) { for (auto& ray : debugLightToVobRays) { - loadLineDebugVisual(staticMeshData, ray.posStart, ray.posEnd, ray.color); + loadLineDebugVisual(out.staticMeshes, ray.posStart, ray.posEnd, ray.color); } } - VERTEX_DATA_BY_MAT dynamicMeshData; + //VERTEX_DATA_BY_MAT dynamicMeshData; LOG(INFO) << "Meshes loaded!"; const auto duration = std::chrono::high_resolution_clock::now() - now; LOG(INFO) << "Loading finished in: " << duration / std::chrono::milliseconds(1) << " ms."; - return RenderData { - .isOutdoorLevel = isOutdoorLevel, - .worldMesh = worldMeshData, - .staticMeshes = staticMeshData, - .dynamicMeshes = dynamicMeshData, - .worldMeshLightmaps = lightmaps - }; + out.isOutdoorLevel = isOutdoorLevel; + out.worldMeshLightmaps = lightmaps; } } diff --git a/ZenRen/src/assets/ZenLoader.h b/ZenRen/src/assets/ZenLoader.h index ffeea2b..b5af4c8 100644 --- a/ZenRen/src/assets/ZenLoader.h +++ b/ZenRen/src/assets/ZenLoader.h @@ -6,6 +6,6 @@ namespace assets { - render::RenderData loadZen(std::string& zenFilename, ZenLib::VDFS::FileIndex* vdf); + void loadZen(render::RenderData& out, std::string& zenFilename, ZenLib::VDFS::FileIndex* vdf); } diff --git a/ZenRen/src/render/Renderer.h b/ZenRen/src/render/Renderer.h index e968fcc..4f425ad 100644 --- a/ZenRen/src/render/Renderer.h +++ b/ZenRen/src/render/Renderer.h @@ -31,7 +31,7 @@ namespace render }; struct RenderData { - bool isOutdoorLevel; + bool isOutdoorLevel = false; VERT_CHUNKS_BY_MAT worldMesh; VERT_CHUNKS_BY_MAT staticMeshes; VERTEX_DATA_BY_MAT dynamicMeshes; diff --git a/ZenRen/src/render/pass/PassWorldLoader.cpp b/ZenRen/src/render/pass/PassWorldLoader.cpp index 447b6b5..52ff208 100644 --- a/ZenRen/src/render/pass/PassWorldLoader.cpp +++ b/ZenRen/src/render/pass/PassWorldLoader.cpp @@ -36,6 +36,7 @@ namespace render::pass::world }; const TEX_INDEX texturesPerBatch = 512; + const uint32_t vertCountPerBatch = (20 * 1024 * 1024) / sizeof(VERTEX_OTHER);// 20 MB divided by biggest buffer element size World world; @@ -164,7 +165,7 @@ namespace render::pass::world target.push_back(batch); } - vector>>> groupByTexId(D3d d3d, const VERT_CHUNKS_BY_MAT& meshData, TEX_INDEX maxTexturesPerBatch) + vector>>> groupByTexId(D3d d3d, const VERT_CHUNKS_BY_MAT& meshData, TEX_INDEX maxTexturesPerBatch) { // load and bucket all materials so textures that are texture-array-compatible are grouped in a single bucket unordered_map> texBuckets; @@ -245,6 +246,38 @@ namespace render::pass::world return result; } + vector>>>>> splitByVertCount( + const vector>>>& batchData, uint32_t maxVertCount) + { + vector>>>>> result; + + vector>>> currentBatch; + uint32_t currentBatchVertCount = 0; + + for (const auto& [chunkIndex, vertDataByMat] : batchData) { + + uint32_t chunkVertCount = 0; + for (const auto& [material, vertData] : vertDataByMat) { + chunkVertCount += vertData.vecPos.size(); + } + + // we never split a single chunk, so if the first chunk of a batch has more than maxVertCount verts we accept that + if (currentBatchVertCount != 0 && (currentBatchVertCount + chunkVertCount) > maxVertCount) { + result.push_back({ currentBatchVertCount, currentBatch }); + currentBatch.clear(); + currentBatchVertCount = 0; + } + currentBatch.push_back({ chunkIndex, vertDataByMat }); + currentBatchVertCount += chunkVertCount; + } + + if (currentBatchVertCount != 0) { + result.push_back({ currentBatchVertCount, currentBatch }); + } + + return result; + } + pair flattenIntoBatch(const vector>>>& batchData) { LoadResult result; @@ -302,11 +335,20 @@ namespace render::pass::world for (const auto& [texInfo, batchData] : batchedMeshData) { vector>>> batchDataByChunk = groupAndSortByChunkIndex(batchData); + + // split current batch into multiple smaller batches along chunk boundaries if it contains too many verts to prevent OOM crashes + vector>>>>> batchDataSplit = + splitByVertCount(batchDataByChunk, vertCountPerBatch); - const auto [batchDataFlat, batchLoadResult] = flattenIntoBatch(batchDataByChunk); - result += batchLoadResult; + batchDataByChunk.clear();// lots of memory that are no longer needed - loadRenderBatch(d3d, target, texInfo, batchDataFlat); + for (const auto& [vertCount, batchData] : batchDataSplit) { + const auto [batchDataFlat, batchLoadResult] = flattenIntoBatch(batchData); + + assert(vertCount == batchLoadResult.verts); + result += batchLoadResult; + loadRenderBatch(d3d, target, texInfo, batchDataFlat); + } } return result; @@ -400,7 +442,7 @@ namespace render::pass::world } else if (optionalVdfIndex.has_value()) { if (::util::endsWith(level, ".zen")) { - data = assets::loadZen(level, optionalVdfIndex.value()); + assets::loadZen(data, level, optionalVdfIndex.value()); levelDataFound = true; } else {