diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 0a267bf75ab..6f2422b1b6a 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -206,8 +206,7 @@ jobs:
           TAG: ${{ steps.git_ref.outputs.tag }}
         run: |
           build\windows\build-github.bat release
-          cd ..\..
-          move out\filament-windows.tgz out\filament-%TAG%-windows.tgz
+          move out\filament-windows.tgz out\filament-$Env:TAG-windows.tgz
         shell: cmd
       - uses: actions/github-script@v6
         env:
diff --git a/README.md b/README.md
index 1f06eefc7b8..b8b5a10646b 100644
--- a/README.md
+++ b/README.md
@@ -31,7 +31,7 @@ repositories {
 }
 
 dependencies {
-    implementation 'com.google.android.filament:filament-android:1.43.0'
+    implementation 'com.google.android.filament:filament-android:1.43.1'
 }
 ```
 
@@ -51,7 +51,7 @@ Here are all the libraries available in the group `com.google.android.filament`:
 iOS projects can use CocoaPods to install the latest release:
 
 ```shell
-pod 'Filament', '~> 1.43.0'
+pod 'Filament', '~> 1.43.1'
 ```
 
 ### Snapshots
diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md
index 16bc2a619f1..14bb3bd3e3b 100644
--- a/RELEASE_NOTES.md
+++ b/RELEASE_NOTES.md
@@ -7,6 +7,8 @@ A new header is inserted each time a *tag* is created.
 Instead, if you are authoring a PR for the main branch, add your release note to
 [NEW_RELEASE_NOTES.md](./NEW_RELEASE_NOTES.md).
 
+## v1.43.1
+
 ## v1.43.0
 
 - gltfio: Fix possible change of scale sign when decomposing transform matrix for animation
diff --git a/android/filament-android/src/main/java/com/google/android/filament/View.java b/android/filament-android/src/main/java/com/google/android/filament/View.java
index 5d37d0ae804..f1338796eb1 100644
--- a/android/filament-android/src/main/java/com/google/android/filament/View.java
+++ b/android/filament-android/src/main/java/com/google/android/filament/View.java
@@ -1418,6 +1418,17 @@ public enum BlendMode {
          * limit highlights to this value before bloom [10, +inf]
          */
         public float highlight = 1000.0f;
+        /**
+         * Bloom quality level.
+         * LOW (default): use a more optimized down-sampling filter, however there can be artifacts
+         *      with dynamic resolution, this can be alleviated by using the homogenous mode.
+         * MEDIUM: Good balance between quality and performance.
+         * HIGH: In this mode the bloom resolution is automatically increased to avoid artifacts.
+         *      This mode can be significantly slower on mobile, especially at high resolution.
+         *      This mode greatly improves the anamorphic bloom.
+         */
+        @NonNull
+        public QualityLevel quality = QualityLevel.LOW;
         /**
          * enable screen-space lens flare
          */
diff --git a/android/gradle.properties b/android/gradle.properties
index 026f77ea0b8..5f2a461dff3 100644
--- a/android/gradle.properties
+++ b/android/gradle.properties
@@ -1,5 +1,5 @@
 GROUP=com.google.android.filament
-VERSION_NAME=1.43.0
+VERSION_NAME=1.43.1
 
 POM_DESCRIPTION=Real-time physically based rendering engine for Android.
 
diff --git a/filament/CMakeLists.txt b/filament/CMakeLists.txt
index ea2543eadc5..e417eb06dc8 100644
--- a/filament/CMakeLists.txt
+++ b/filament/CMakeLists.txt
@@ -230,6 +230,8 @@ set(MATERIAL_SRCS
         src/materials/flare/flare.mat
         src/materials/blitLow.mat
         src/materials/bloom/bloomDownsample.mat
+        src/materials/bloom/bloomDownsample2x.mat
+        src/materials/bloom/bloomDownsample9.mat
         src/materials/bloom/bloomUpsample.mat
         src/materials/ssao/bilateralBlur.mat
         src/materials/ssao/bilateralBlurBentNormals.mat
diff --git a/filament/backend/src/metal/MetalDriver.mm b/filament/backend/src/metal/MetalDriver.mm
index 9fbba163bcd..d14d9fb900c 100644
--- a/filament/backend/src/metal/MetalDriver.mm
+++ b/filament/backend/src/metal/MetalDriver.mm
@@ -911,19 +911,15 @@
     // textures, which is required by Metal.
     for (size_t s = 0; s < data.size / sizeof(SamplerDescriptor); s++) {
         if (!samplers[s].t) {
-            // Assign a default texture / sampler to empty slots.
+            // Assign a default sampler to empty slots.
             // Metal requires all samplers referenced in shaders to be bound.
-            id<MTLTexture> empty = getOrCreateEmptyTexture(mContext);
-            sb->setFinalizedTexture(s, empty);
-
+            // An empty texture will be assigned inside finalizeSamplerGroup.
             id<MTLSamplerState> sampler = mContext->samplerStateCache.getOrCreateState({});
             sb->setFinalizedSampler(s, sampler);
-
             continue;
         }
 
-        // First, bind the sampler state. We always know the full sampler state at
-        // updateSamplerGroup time.
+        // Bind the sampler state. We always know the full sampler state at updateSamplerGroup time.
         SamplerState samplerState {
                 .samplerParams = samplers[s].s,
         };
@@ -1379,28 +1375,38 @@
     }
 #endif
 
+    utils::FixedCapacityVector<id<MTLTexture>> newTextures(samplerGroup->size, nil);
     for (size_t binding = 0; binding < samplerGroup->size; binding++) {
-        auto [th, t] = samplerGroup->getFinalizedTexture(binding);
-
-        // This may be an external texture, in which case we can't cache the id<MTLTexture>, we
-        // need to refetch it in case the external image has changed.
-        bool isExternalImage = false;
-        if (th) {
-            auto* texture = handle_cast<MetalTexture>(th);
-            isExternalImage = texture->target == SamplerType::SAMPLER_EXTERNAL;
-        }
+        auto [th, _] = samplerGroup->getFinalizedTexture(binding);
 
-        // If t is non-nil, then we've already finalized this texture.
-        if (t && !isExternalImage) {
+        if (!th) {
+            // Bind an empty texture.
+            newTextures[binding] = getOrCreateEmptyTexture(mContext);
             continue;
         }
 
-        // It's possible that some texture handles are null, but we should have already handled
-        // these inside updateSamplerGroup by binding an "empty" texture.
         assert_invariant(th);
         auto* texture = handle_cast<MetalTexture>(th);
 
-        // Determine if this SamplerGroup needs mutation.
+        // External images
+        if (texture->target == SamplerType::SAMPLER_EXTERNAL) {
+            if (texture->externalImage.isValid()) {
+                id<MTLTexture> mtlTexture = texture->externalImage.getMetalTextureForDraw();
+                assert_invariant(mtlTexture);
+                newTextures[binding] = mtlTexture;
+            } else {
+                // Bind an empty texture.
+                newTextures[binding] = getOrCreateEmptyTexture(mContext);
+            }
+            continue;
+        }
+
+        newTextures[binding] = texture->getMtlTextureForRead();
+    }
+
+    if (!std::equal(newTextures.begin(), newTextures.end(), samplerGroup->textures.begin())) {
+        // One or more of the id<MTLTexture>s has changed.
+        // First, determine if this SamplerGroup needs mutation.
         // We can't just simply mutate the SamplerGroup, since it could currently be in use by the
         // GPU from a prior render pass.
         // If the SamplerGroup does need mutation, then there's two cases:
@@ -1408,30 +1414,18 @@
         //    draw call). We're free to mutate it.
         // 2. The SamplerGroup is finalized. We must call mutate(), which will create a new argument
         //    buffer that we can then mutate freely.
-        // TODO: don't just always call mutate, check to see if the texture is actually different.
 
         if (samplerGroup->isFinalized()) {
             samplerGroup->mutate(cmdBuffer);
         }
 
-        // External images
-        if (texture->target == SamplerType::SAMPLER_EXTERNAL) {
-            if (texture->externalImage.isValid()) {
-                id<MTLTexture> mtlTexture = texture->externalImage.getMetalTextureForDraw();
-                assert_invariant(mtlTexture);
-                samplerGroup->setFinalizedTexture(binding, mtlTexture);
-            } else {
-                // Bind an empty texture.
-                samplerGroup->setFinalizedTexture(binding, getOrCreateEmptyTexture(mContext));
-            }
-            continue;
+        for (size_t binding = 0; binding < samplerGroup->size; binding++) {
+            samplerGroup->setFinalizedTexture(binding, newTextures[binding]);
         }
 
-        samplerGroup->setFinalizedTexture(binding, texture->getMtlTextureForRead());
+        samplerGroup->finalize();
     }
 
-    samplerGroup->finalize();
-
     // At this point, all the id<MTLTextures> should be set to valid textures. Some of them will be
     // the "empty" texture. Per Apple documentation, the useResource method must be called once per
     // render pass.
diff --git a/filament/backend/src/vulkan/VulkanBlitter.cpp b/filament/backend/src/vulkan/VulkanBlitter.cpp
index e9e4062ac6e..a880fd6d009 100644
--- a/filament/backend/src/vulkan/VulkanBlitter.cpp
+++ b/filament/backend/src/vulkan/VulkanBlitter.cpp
@@ -28,8 +28,6 @@
 
 #include "generated/vkshaders/vkshaders.h"
 
-#define FILAMENT_VULKAN_CHECK_BLIT_FORMAT 0
-
 using namespace bluevk;
 using namespace utils;
 
@@ -69,7 +67,7 @@ inline void blitFast(const VkCommandBuffer cmdbuffer, VkImageAspectFlags aspect,
             .layerCount = 1,
     };
 
-    if constexpr (FILAMENT_VULKAN_VERBOSE) {
+    if constexpr (FVK_ENABLED(FVK_DEBUG_BLITTER)) {
         utils::slog.d << "Fast blit from=" << src.texture->getVkImage() << ",level=" << (int) src.level
                       << "layout=" << src.getLayout()
                       << " to=" << dst.texture->getVkImage() << ",level=" << (int) dst.level
@@ -132,7 +130,7 @@ void VulkanBlitter::blitColor(BlitArgs args) {
     const VulkanAttachment dst = args.dstTarget->getColor(0);
     const VkImageAspectFlags aspect = VK_IMAGE_ASPECT_COLOR_BIT;
 
-#if FILAMENT_VULKAN_CHECK_BLIT_FORMAT
+#if FVK_ENABLED(FVK_DEBUG_BLIT_FORMAT)
     VkPhysicalDevice const gpu = mPhysicalDevice;
     VkFormatProperties info;
     vkGetPhysicalDeviceFormatProperties(gpu, src.getFormat(), &info);
@@ -160,7 +158,7 @@ void VulkanBlitter::blitDepth(BlitArgs args) {
     const VulkanAttachment dst = args.dstTarget->getDepth();
     const VkImageAspectFlags aspect = VK_IMAGE_ASPECT_DEPTH_BIT;
 
-#if FILAMENT_VULKAN_CHECK_BLIT_FORMAT
+#if FVK_ENABLED(FVK_DEBUG_BLIT_FORMAT)
     VkPhysicalDevice const gpu = mPhysicalDevice;
     VkFormatProperties info;
     vkGetPhysicalDeviceFormatProperties(gpu, src.getFormat(), &info);
@@ -241,7 +239,7 @@ void VulkanBlitter::lazyInit() noexcept {
     mDepthResolveProgram->samplerGroupInfo[0].samplers.reserve(1);
     mDepthResolveProgram->samplerGroupInfo[0].samplers.resize(1);
 
-    if constexpr (FILAMENT_VULKAN_VERBOSE) {
+    if constexpr (FVK_ENABLED(FVK_DEBUG_BLITTER)) {
         utils::slog.d << "Created Shader Module for VulkanBlitter "
                     << "shaders = (" << vertexShader << ", " << fragmentShader << ")"
                     << utils::io::endl;
diff --git a/filament/backend/src/vulkan/VulkanCommands.cpp b/filament/backend/src/vulkan/VulkanCommands.cpp
index 45e55ac3f54..39674fc0b44 100644
--- a/filament/backend/src/vulkan/VulkanCommands.cpp
+++ b/filament/backend/src/vulkan/VulkanCommands.cpp
@@ -33,7 +33,9 @@ using namespace utils;
 
 namespace filament::backend {
 
+#if FVK_ENABLED(FVK_DEBUG_GROUP_MARKERS)
 using Timestamp = VulkanGroupMarkers::Timestamp;
+#endif
 
 VulkanCmdFence::VulkanCmdFence(VkFence ifence)
     : fence(ifence) {
@@ -63,20 +65,21 @@ CommandBufferObserver::~CommandBufferObserver() {}
 
 static VkCommandPool createPool(VkDevice device, uint32_t queueFamilyIndex) {
     VkCommandPoolCreateInfo createInfo = {
-    .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
-    .flags =
-        VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT | VK_COMMAND_POOL_CREATE_TRANSIENT_BIT,
-    .queueFamilyIndex = queueFamilyIndex,
+            .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
+            .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT
+                     | VK_COMMAND_POOL_CREATE_TRANSIENT_BIT,
+            .queueFamilyIndex = queueFamilyIndex,
     };
     VkCommandPool pool;
     vkCreateCommandPool(device, &createInfo, VKALLOC, &pool);
     return pool;
-
 }
 
+#if FVK_ENABLED(FVK_DEBUG_GROUP_MARKERS)
 void VulkanGroupMarkers::push(std::string const& marker, Timestamp start) noexcept {
     mMarkers.push_back(marker);
-#if FILAMENT_VULKAN_VERBOSE
+
+#if FVK_ENABLED(FVK_DEBUG_PRINT_GROUP_MARKERS)
     mTimestamps.push_back(start.time_since_epoch().count() > 0.0
                                   ? start
                                   : std::chrono::high_resolution_clock::now());
@@ -87,7 +90,7 @@ std::pair<std::string, Timestamp> VulkanGroupMarkers::pop() noexcept {
     auto const marker = mMarkers.back();
     mMarkers.pop_back();
 
-#if FILAMENT_VULKAN_VERBOSE
+#if FVK_ENABLED(FVK_DEBUG_PRINT_GROUP_MARKERS)
     auto const timestamp = mTimestamps.back();
     mTimestamps.pop_back();
     return std::make_pair(marker, timestamp);
@@ -100,7 +103,7 @@ std::pair<std::string, Timestamp> VulkanGroupMarkers::pop_bottom() noexcept {
     auto const marker = mMarkers.front();
     mMarkers.pop_front();
 
-#if FILAMENT_VULKAN_VERBOSE
+#if FVK_ENABLED(FVK_DEBUG_PRINT_GROUP_MARKERS)
     auto const timestamp = mTimestamps.front();
     mTimestamps.pop_front();
     return std::make_pair(marker, timestamp);
@@ -112,7 +115,7 @@ std::pair<std::string, Timestamp> VulkanGroupMarkers::pop_bottom() noexcept {
 std::pair<std::string, Timestamp> VulkanGroupMarkers::top() const {
     assert_invariant(!empty());
     auto const marker = mMarkers.back();
-#if FILAMENT_VULKAN_VERBOSE
+#if FVK_ENABLED(FVK_DEBUG_PRINT_GROUP_MARKERS)
     auto const topTimestamp = mTimestamps.front();
     return std::make_pair(marker, topTimestamp);
 #else
@@ -124,6 +127,8 @@ bool VulkanGroupMarkers::empty() const noexcept {
     return mMarkers.empty();
 }
 
+#endif // FVK_DEBUG_GROUP_MARKERS
+
 VulkanCommands::VulkanCommands(VkDevice device, VkQueue queue, uint32_t queueFamilyIndex,
         VulkanContext* context, VulkanResourceAllocator* allocator)
     : mDevice(device),
@@ -144,6 +149,10 @@ VulkanCommands::VulkanCommands(VkDevice device, VkQueue queue, uint32_t queueFam
     for (size_t i = 0; i < CAPACITY; ++i) {
         mStorage[i] = std::make_unique<VulkanCommandBuffer>(allocator, mDevice, mPool);
     }
+
+#if !FVK_ENABLED(FVK_DEBUG_GROUP_MARKERS)
+    (void) mContext;
+#endif
 }
 
 VulkanCommands::~VulkanCommands() {
@@ -167,7 +176,7 @@ VulkanCommandBuffer& VulkanCommands::get() {
     // It occurs only when Filament invokes commit() or endFrame() a large number of times without
     // presenting the swap chain or waiting on a fence.
     while (mAvailableBufferCount == 0) {
-#if VK_REPORT_STALLS
+#if FVK_ENABLED(FVK_DEBUG_COMMAND_BUFFER)
         slog.i << "VulkanCommands has stalled. "
                << "If this occurs frequently, consider increasing VK_MAX_COMMAND_BUFFERS."
                << io::endl;
@@ -207,12 +216,14 @@ VulkanCommandBuffer& VulkanCommands::get() {
         mObserver->onCommandBuffer(*currentbuf);
     }
 
+#if FVK_ENABLED(FVK_DEBUG_GROUP_MARKERS)
     // We push the current markers onto a temporary stack. This must be placed after currentbuf is
     // set to the new command buffer since pushGroupMarker also calls get().
     while (mCarriedOverMarkers && !mCarriedOverMarkers->empty()) {
         auto [marker, time] = mCarriedOverMarkers->pop();
         pushGroupMarker(marker.c_str(), time);
     }
+#endif
     return *currentbuf;
 }
 
@@ -224,6 +235,7 @@ bool VulkanCommands::flush() {
 
     // Before actually submitting, we need to pop any leftover group markers.
     // Note that this needs to occur before vkEndCommandBuffer.
+#if FVK_ENABLED(FVK_DEBUG_GROUP_MARKERS)
     while (mGroupMarkers && !mGroupMarkers->empty()) {
         if (!mCarriedOverMarkers) {
             mCarriedOverMarkers = std::make_unique<VulkanGroupMarkers>();
@@ -233,7 +245,7 @@ bool VulkanCommands::flush() {
         // We still need to call through to vkCmdEndDebugUtilsLabelEXT.
         popGroupMarker();
     }
-
+#endif
 
     int8_t const index = mCurrentCommandBufferIndex;
     VulkanCommandBuffer const* currentbuf = mStorage[index].get();
@@ -283,7 +295,7 @@ bool VulkanCommands::flush() {
         submitInfo.pWaitSemaphores = VK_NULL_HANDLE;
     }
 
-#if FILAMENT_VULKAN_VERBOSE
+#if FVK_ENABLED(FVK_DEBUG_COMMAND_BUFFER)
     slog.i << "Submitting cmdbuffer=" << cmdbuffer
            << " wait=(" << signals[0] << ", " << signals[1] << ") "
            << " signal=" << renderingFinished
@@ -298,7 +310,7 @@ bool VulkanCommands::flush() {
     cmdfence->condition.notify_all();
     lock.unlock();
 
-#if FILAMENT_VULKAN_VERBOSE
+#if FVK_ENABLED(FVK_DEBUG_COMMAND_BUFFER)
     if (result != VK_SUCCESS) {
         utils::slog.d << "Failed command buffer submission result: " << result << utils::io::endl;
     }
@@ -314,8 +326,8 @@ bool VulkanCommands::flush() {
 VkSemaphore VulkanCommands::acquireFinishedSignal() {
     VkSemaphore semaphore = mSubmissionSignal;
     mSubmissionSignal = VK_NULL_HANDLE;
-#if FILAMENT_VULKAN_VERBOSE
-     slog.i << "Acquiring " << semaphore << " (e.g. for vkQueuePresentKHR)" << io::endl;
+#if FVK_ENABLED(FVK_DEBUG_COMMAND_BUFFER)
+    slog.i << "Acquiring " << semaphore << " (e.g. for vkQueuePresentKHR)" << io::endl;
 #endif
     return semaphore;
 }
@@ -323,7 +335,7 @@ VkSemaphore VulkanCommands::acquireFinishedSignal() {
 void VulkanCommands::injectDependency(VkSemaphore next) {
     assert_invariant(mInjectedSignal == VK_NULL_HANDLE);
     mInjectedSignal = next;
-#if FILAMENT_VULKAN_VERBOSE
+#if FVK_ENABLED(FVK_DEBUG_COMMAND_BUFFER)
     slog.i << "Injecting " << next << " (e.g. due to vkAcquireNextImageKHR)" << io::endl;
 #endif
 }
@@ -345,6 +357,9 @@ void VulkanCommands::wait() {
 }
 
 void VulkanCommands::gc() {
+    FVK_SYSTRACE_CONTEXT();
+    FVK_SYSTRACE_START("commands::gc");
+
     VkFence fences[CAPACITY];
     size_t count = 0;
 
@@ -366,6 +381,7 @@ void VulkanCommands::gc() {
     if (count > 0) {
         vkResetFences(mDevice, count, fences);
     }
+    FVK_SYSTRACE_END();
 }
 
 void VulkanCommands::updateFences() {
@@ -382,8 +398,10 @@ void VulkanCommands::updateFences() {
     }
 }
 
+#if FVK_ENABLED(FVK_DEBUG_GROUP_MARKERS)
+
 void VulkanCommands::pushGroupMarker(char const* str, VulkanGroupMarkers::Timestamp timestamp) {
-#if FILAMENT_VULKAN_VERBOSE
+#if FVK_ENABLED(FVK_DEBUG_PRINT_GROUP_MARKERS)
     // If the timestamp is not 0, then we are carrying over a marker across buffer submits.
     // If it is 0, then this is a normal marker push and we should just print debug line as usual.
     if (timestamp.time_since_epoch().count() == 0.0) {
@@ -421,7 +439,7 @@ void VulkanCommands::popGroupMarker() {
 
     if (!mGroupMarkers->empty()) {
         VkCommandBuffer const cmdbuffer = get().buffer();
-#if FILAMENT_VULKAN_VERBOSE
+#if FVK_ENABLED(FVK_DEBUG_PRINT_GROUP_MARKERS)
         auto const [marker, startTime] = mGroupMarkers->pop();
         auto const endTime = std::chrono::high_resolution_clock::now();
         std::chrono::duration<double> diff = endTime - startTime;
@@ -430,7 +448,6 @@ void VulkanCommands::popGroupMarker() {
 #else
         mGroupMarkers->pop();
 #endif
-
         if (mContext->isDebugUtilsSupported()) {
             vkCmdEndDebugUtilsLabelEXT(cmdbuffer);
         } else if (mContext->isDebugMarkersSupported()) {
@@ -469,8 +486,9 @@ std::string VulkanCommands::getTopGroupMarker() const {
     }
     return std::get<0>(mGroupMarkers->top());
 }
+#endif // FVK_DEBUG_GROUP_MARKERS
 
-}// namespace filament::backend
+} // namespace filament::backend
 
 #if defined(_MSC_VER)
 #pragma warning( pop )
diff --git a/filament/backend/src/vulkan/VulkanCommands.h b/filament/backend/src/vulkan/VulkanCommands.h
index 15605cfef84..660e3e106c5 100644
--- a/filament/backend/src/vulkan/VulkanCommands.h
+++ b/filament/backend/src/vulkan/VulkanCommands.h
@@ -39,6 +39,7 @@ namespace filament::backend {
 
 struct VulkanContext;
 
+#if FVK_ENABLED(FVK_DEBUG_GROUP_MARKERS)
 class VulkanGroupMarkers {
 public:
     using Timestamp = std::chrono::time_point<std::chrono::high_resolution_clock>;
@@ -51,11 +52,13 @@ class VulkanGroupMarkers {
 
 private:
     std::list<std::string> mMarkers;
-#if FILAMENT_VULKAN_VERBOSE
+#if FVK_ENABLED(FVK_DEBUG_PRINT_GROUP_MARKERS)
     std::list<Timestamp> mTimestamps;
 #endif
 };
 
+#endif // FVK_DEBUG_GROUP_MARKERS
+
 // Wrapper to enable use of shared_ptr for implementing shared ownership of low-level Vulkan fences.
 struct VulkanCmdFence {
     VulkanCmdFence(VkFence ifence);
@@ -170,6 +173,7 @@ class VulkanCommands {
         // The observer's event handler can only be called during get().
         void setObserver(CommandBufferObserver* observer) { mObserver = observer; }
 
+#if FVK_ENABLED(FVK_DEBUG_GROUP_MARKERS)
         void pushGroupMarker(char const* str, VulkanGroupMarkers::Timestamp timestamp = {});
 
         void popGroupMarker();
@@ -177,9 +181,10 @@ class VulkanCommands {
         void insertEventMarker(char const* string, uint32_t len);
 
         std::string getTopGroupMarker() const;
+#endif
 
     private:
-        static constexpr int CAPACITY = VK_MAX_COMMAND_BUFFERS;
+        static constexpr int CAPACITY = FVK_MAX_COMMAND_BUFFERS;
         VkDevice const mDevice;
         VkQueue const mQueue;
         VkCommandPool const mPool;
@@ -196,8 +201,10 @@ class VulkanCommands {
         uint8_t mAvailableBufferCount = CAPACITY;
         CommandBufferObserver* mObserver = nullptr;
 
+#if FVK_ENABLED(FVK_DEBUG_GROUP_MARKERS)
         std::unique_ptr<VulkanGroupMarkers> mGroupMarkers;
         std::unique_ptr<VulkanGroupMarkers> mCarriedOverMarkers;
+#endif
 };
 
 } // namespace filament::backend
diff --git a/filament/backend/src/vulkan/VulkanConstants.h b/filament/backend/src/vulkan/VulkanConstants.h
index 7692366fd34..429060535d3 100644
--- a/filament/backend/src/vulkan/VulkanConstants.h
+++ b/filament/backend/src/vulkan/VulkanConstants.h
@@ -19,13 +19,6 @@
 
 #include <stdint.h>
 
-#define FILAMENT_VULKAN_VERBOSE 0
-#define FILAMENT_VULKAN_DUMP_API 0
-
-#ifndef FILAMENT_VULKAN_HANDLE_ARENA_SIZE_IN_MB
-#define FILAMENT_VULKAN_HANDLE_ARENA_SIZE_IN_MB 8
-#endif
-
 // In debug builds, we enable validation layers and set up a debug callback.
 //
 // To enable validation layers in Android, also be sure to set the jniLibs property in the gradle
@@ -43,21 +36,80 @@
 //     ndkVersion "23.1.7779620"
 //
 // Also consider changing the root `gradle.properties` to point to a debug build, although this is
-// not required for validation if you change the definition of VK_ENABLE_VALIDATION below.
-#if defined(NDEBUG)
-#define VK_ENABLE_VALIDATION 0
+// not required for validation.
+
+// FVK is short for Filament Vulkan
+#define FVK_DEBUG_SYSTRACE                0x00000001
+#define FVK_DEBUG_GROUP_MARKERS           0x00000002
+#define FVK_DEBUG_TEXTURE                 0x00000004
+#define FVK_DEBUG_LAYOUT_TRANSITION       0x00000008
+#define FVK_DEBUG_COMMAND_BUFFER          0x00000010
+#define FVK_DEBUG_DUMP_API                0x00000020
+#define FVK_DEBUG_VALIDATION              0x00000040
+#define FVK_DEBUG_PRINT_GROUP_MARKERS     0x00000080
+#define FVK_DEBUG_BLIT_FORMAT             0x00000100
+#define FVK_DEBUG_BLITTER                 0x00000200
+#define FVK_DEBUG_FBO_CACHE               0x00000400
+#define FVK_DEBUG_SHADER_MODULE           0x00000800
+#define FVK_DEBUG_READ_PIXELS             0x00001000
+#define FVK_DEBUG_PIPELINE_CACHE          0x00002000
+#define FVK_DEBUG_ALLOCATION              0x00004000
+
+// Usefaul default combinations
+#define FVK_DEBUG_EVERYTHING              0xFFFFFFFF
+#define FVK_DEBUG_PERFORMANCE     \
+    FVK_DEBUG_SYSTRACE |          \
+    FVK_DEBUG_GROUP_MARKERS
+
+#define FVK_DEBUG_CORRECTNESS     \
+    FVK_DEBUG_VALIDATION |        \
+    FVK_DEBUG_SHADER_MODULE |     \
+    FVK_DEBUG_TEXTURE |           \
+    FVK_DEBUG_LAYOUT_TRANSITION
+
+#define FVK_DEBUG_RENDER_PASSES   \
+    FVK_DEBUG_GROUP_MARKERS |     \
+    FVK_DEBUG_PRINT_GROUP_MARKERS
+
+#ifndef NDEBUG
+#define FVK_DEBUG_FLAGS FVK_DEBUG_PERFORMANCE
+#else
+#define FVK_DEBUG_FLAGS 0
+#endif
+
+#define FVK_ENABLED(flags) ((FVK_DEBUG_FLAGS) & (flags))
+#define FVK_ENABLED_BOOL(flags) ((bool) FVK_ENABLED(flags))
+
+// Ensure dependencies are met between debug options
+#if FVK_ENABLED(FVK_DEBUG_PRINT_GROUP_MARKERS)
+static_assert(FVK_ENABLED(FVK_DEBUG_GROUP_MARKERS));
+#endif
+
+// end dependcy checks
+
+#if FVK_ENABLED(FVK_DEBUG_SYSTRACE)
+
+#include <utils/Systrace.h>
+
+#define FVK_SYSTRACE_CONTEXT()      SYSTRACE_CONTEXT()
+#define FVK_SYSTRACE_START(marker)  SYSTRACE_NAME_BEGIN(marker)
+#define FVK_SYSTRACE_END()          SYSTRACE_NAME_END()
 #else
-#define VK_ENABLE_VALIDATION 1
+#define FVK_SYSTRACE_CONTEXT()
+#define FVK_SYSTRACE_START(marker)
+#define FVK_SYSTRACE_END()
 #endif
 
-#define VK_REPORT_STALLS 0
+#ifndef FVK_HANDLE_ARENA_SIZE_IN_MB
+#define FVK_HANDLE_ARENA_SIZE_IN_MB 8
+#endif
 
 // All vkCreate* functions take an optional allocator. For now we select the default allocator by
 // passing in a null pointer, and we highlight the argument by using the VKALLOC constant.
 constexpr struct VkAllocationCallbacks* VKALLOC = nullptr;
 
-constexpr static const int VK_REQUIRED_VERSION_MAJOR = 1;
-constexpr static const int VK_REQUIRED_VERSION_MINOR = 1;
+constexpr static const int FVK_REQUIRED_VERSION_MAJOR = 1;
+constexpr static const int FVK_REQUIRED_VERSION_MINOR = 1;
 
 // Maximum number of VkCommandBuffer handles managed simultaneously by VulkanCommands.
 //
@@ -65,18 +117,18 @@ constexpr static const int VK_REQUIRED_VERSION_MINOR = 1;
 // buffers that have been submitted but have not yet finished rendering. Note that Filament can
 // issue multiple commit calls in a single frame, and that we use a triple buffered swap chain on
 // some platforms.
-constexpr static const int VK_MAX_COMMAND_BUFFERS = 10;
+constexpr static const int FVK_MAX_COMMAND_BUFFERS = 10;
 
 // Number of command buffer submissions that should occur before an unused pipeline is removed
 // from the cache.
 //
 // If this number is low, VkPipeline construction will occur frequently, which can
 // be extremely slow. If this number is high, the memory footprint will be large.
-constexpr static const int VK_MAX_PIPELINE_AGE = 10;
+constexpr static const int FVK_MAX_PIPELINE_AGE = 10;
 
 // VulkanPipelineCache does not track which command buffers contain references to which pipelines,
-// instead it simply waits for at least VK_MAX_COMMAND_BUFFERS submissions to occur before
+// instead it simply waits for at least FVK_MAX_COMMAND_BUFFERS submissions to occur before
 // destroying any unused pipeline object.
-static_assert(VK_MAX_PIPELINE_AGE >= VK_MAX_COMMAND_BUFFERS);
+static_assert(FVK_MAX_PIPELINE_AGE >= FVK_MAX_COMMAND_BUFFERS);
 
 #endif
diff --git a/filament/backend/src/vulkan/VulkanDriver.cpp b/filament/backend/src/vulkan/VulkanDriver.cpp
index 40cb457c8c9..116451763e6 100644
--- a/filament/backend/src/vulkan/VulkanDriver.cpp
+++ b/filament/backend/src/vulkan/VulkanDriver.cpp
@@ -30,10 +30,9 @@
 #include <utils/CString.h>
 #include <utils/FixedCapacityVector.h>
 #include <utils/Panic.h>
-#include <utils/Systrace.h>
 
 #ifndef NDEBUG
-#include <set>
+#include <set>  // For VulkanDriver::debugCommandBegin
 #endif
 
 using namespace bluevk;
@@ -101,7 +100,7 @@ VulkanTexture* createEmptyTexture(VkDevice device, VkPhysicalDevice physicalDevi
     return emptyTexture;
 }
 
-#if VK_ENABLE_VALIDATION
+#if FVK_ENABLED(FVK_DEBUG_VALIDATION)
 VKAPI_ATTR VkBool32 VKAPI_CALL debugReportCallback(VkDebugReportFlagsEXT flags,
         VkDebugReportObjectTypeEXT objectType, uint64_t object, size_t location,
         int32_t messageCode, const char* pLayerPrefix, const char* pMessage, void* pUserData) {
@@ -136,7 +135,7 @@ VKAPI_ATTR VkBool32 VKAPI_CALL debugUtilsCallback(VkDebugUtilsMessageSeverityFla
     utils::slog.e << utils::io::endl;
     return VK_FALSE;
 }
-#endif// VK_ENABLE_VALIDATION
+#endif // FVK_EANBLED(FVK_DEBUG_VALIDATION)
 
 } // anonymous namespace
 
@@ -159,7 +158,7 @@ VulkanDriver::VulkanDriver(VulkanPlatform* platform, VulkanContext const& contex
       mBlitter(mStagePool, mPipelineCache, mFramebufferCache, mSamplerCache),
       mReadPixels(mPlatform->getDevice()) {
 
-#if VK_ENABLE_VALIDATION
+#if FVK_ENABLED(FVK_DEBUG_VALIDATION)
     UTILS_UNUSED const PFN_vkCreateDebugReportCallbackEXT createDebugReportCallback
             = vkCreateDebugReportCallbackEXT;
     VkResult result;
@@ -214,7 +213,7 @@ UTILS_NOINLINE
 Driver* VulkanDriver::create(VulkanPlatform* platform, VulkanContext const& context,
          Platform::DriverConfig const& driverConfig) noexcept {
     assert_invariant(platform);
-    size_t defaultSize = FILAMENT_VULKAN_HANDLE_ARENA_SIZE_IN_MB * 1024U * 1024U;
+    size_t defaultSize = FVK_HANDLE_ARENA_SIZE_IN_MB * 1024U * 1024U;
     Platform::DriverConfig validConfig{
             .handleArenaSize = std::max(driverConfig.handleArenaSize, defaultSize)};
     return new VulkanDriver(platform, context, validConfig);
@@ -266,24 +265,15 @@ void VulkanDriver::tick(int) {
 // rather than the wall clock, because we must wait 3 frames after a DriverAPI-level resource has
 // been destroyed for safe destruction, due to outstanding command buffers and triple buffering.
 void VulkanDriver::collectGarbage() {
+    FVK_SYSTRACE_CONTEXT();
+    FVK_SYSTRACE_START("gc");
     // Command buffers need to be submitted and completed before other resources can be gc'd. And
     // its gc() function carrys out the *wait*.
-    SYSTRACE_CONTEXT();
-    SYSTRACE_NAME_BEGIN("gc");
-
     mCommands->gc();
-
-    SYSTRACE_NAME_BEGIN("stagepool-gc");
     mStagePool.gc();
-    SYSTRACE_NAME_END();
-
-    SYSTRACE_NAME_BEGIN("framebuffercache-gc");
     mFramebufferCache.gc();
-    SYSTRACE_NAME_END();
-
-    SYSTRACE_NAME_END(); // "gc"
+    FVK_SYSTRACE_END();
 }
-
 void VulkanDriver::beginFrame(int64_t monotonic_clock_ns, uint32_t frameId) {
     // Do nothing.
 }
@@ -306,14 +296,21 @@ void VulkanDriver::endFrame(uint32_t frameId) {
 }
 
 void VulkanDriver::flush(int) {
+    FVK_SYSTRACE_CONTEXT();
+    FVK_SYSTRACE_START("flush");
     mCommands->flush();
+    FVK_SYSTRACE_END();
 }
 
 void VulkanDriver::finish(int dummy) {
+    FVK_SYSTRACE_CONTEXT();
+    FVK_SYSTRACE_START("finish");
+
     mCommands->flush();
     mCommands->wait();
 
     mReadPixels.runUntilComplete();
+    FVK_SYSTRACE_END();
 }
 
 void VulkanDriver::createSamplerGroupR(Handle<HwSamplerGroup> sbh, uint32_t count) {
@@ -990,6 +987,9 @@ void VulkanDriver::compilePrograms(CompilerPriorityQueue priority,
 }
 
 void VulkanDriver::beginRenderPass(Handle<HwRenderTarget> rth, const RenderPassParams& params) {
+    FVK_SYSTRACE_CONTEXT();
+    FVK_SYSTRACE_START("beginRenderPass");
+
     VulkanRenderTarget* const rt = mResourceAllocator.handle_cast<VulkanRenderTarget*>(rth);
     const VkExtent2D extent = rt->getExtent();
     assert_invariant(extent.width > 0 && extent.height > 0);
@@ -1008,7 +1008,8 @@ void VulkanDriver::beginRenderPass(Handle<HwRenderTarget> rth, const RenderPassP
     }
 
     VulkanAttachment depth = rt->getSamples() == 1 ? rt->getDepth() : rt->getMsaaDepth();
-#if FILAMENT_VULKAN_VERBOSE
+
+#if FVK_ENABLED(FVK_DEBUG_TEXTURE)
     if (depth.texture) {
         depth.texture->print();
     }
@@ -1153,6 +1154,7 @@ void VulkanDriver::beginRenderPass(Handle<HwRenderTarget> rth, const RenderPassP
     VkFramebuffer vkfb = mFramebufferCache.getFramebuffer(fbkey);
 
     // Assign a label to the framebuffer for debugging purposes.
+    #if FVK_ENABLED(FVK_DEBUG_GROUP_MARKERS)
     if (UTILS_UNLIKELY(mContext.isDebugUtilsSupported())) {
         auto const topMarker = mCommands->getTopGroupMarker();
         if (!topMarker.empty()) {
@@ -1166,6 +1168,7 @@ void VulkanDriver::beginRenderPass(Handle<HwRenderTarget> rth, const RenderPassP
             vkSetDebugUtilsObjectNameEXT(mPlatform->getDevice(), &info);
         }
     }
+    #endif
 
     // The current command buffer now owns a reference to the render target and its attachments.
     // Note that we must acquire parent textures, not sidecars.
@@ -1242,9 +1245,13 @@ void VulkanDriver::beginRenderPass(Handle<HwRenderTarget> rth, const RenderPassP
         .params = params,
         .currentSubpass = 0,
     };
+    FVK_SYSTRACE_END();
 }
 
 void VulkanDriver::endRenderPass(int) {
+    FVK_SYSTRACE_CONTEXT();
+    FVK_SYSTRACE_START("endRenderPass");
+
     VulkanCommandBuffer& commands = mCommands->get();
     VkCommandBuffer cmdbuffer = commands.buffer();
     vkCmdEndRenderPass(cmdbuffer);
@@ -1288,6 +1295,7 @@ void VulkanDriver::endRenderPass(int) {
     }
     mCurrentRenderPass.renderTarget = nullptr;
     mCurrentRenderPass.renderPass = VK_NULL_HANDLE;
+    FVK_SYSTRACE_END();
 }
 
 void VulkanDriver::nextSubpass(int) {
@@ -1334,6 +1342,9 @@ void VulkanDriver::setRenderPrimitiveRange(Handle<HwRenderPrimitive> rph,
 }
 
 void VulkanDriver::makeCurrent(Handle<HwSwapChain> drawSch, Handle<HwSwapChain> readSch) {
+    FVK_SYSTRACE_CONTEXT();
+    FVK_SYSTRACE_START("makeCurrent");
+
     ASSERT_PRECONDITION_NON_FATAL(drawSch == readSch,
             "Vulkan driver does not support distinct draw/read swap chains.");
     VulkanSwapChain* swapChain = mCurrentSwapChain
@@ -1349,9 +1360,14 @@ void VulkanDriver::makeCurrent(Handle<HwSwapChain> drawSch, Handle<HwSwapChain>
     if (UTILS_LIKELY(mDefaultRenderTarget)) {
         mDefaultRenderTarget->bindToSwapChain(*swapChain);
     }
+
+    FVK_SYSTRACE_END();
 }
 
 void VulkanDriver::commit(Handle<HwSwapChain> sch) {
+    FVK_SYSTRACE_CONTEXT();
+    FVK_SYSTRACE_START("commit");
+
     VulkanSwapChain* swapChain = mResourceAllocator.handle_cast<VulkanSwapChain*>(sch);
 
     if (mCommands->flush()) {
@@ -1360,6 +1376,7 @@ void VulkanDriver::commit(Handle<HwSwapChain> sch) {
 
     // Present the backbuffer after the most recent command buffer submission has finished.
     swapChain->present();
+    FVK_SYSTRACE_END();
 }
 
 void VulkanDriver::bindUniformBuffer(uint32_t index, Handle<HwBufferObject> boh) {
@@ -1391,24 +1408,26 @@ void VulkanDriver::bindSamplers(uint32_t index, Handle<HwSamplerGroup> sbh) {
 }
 
 void VulkanDriver::insertEventMarker(char const* string, uint32_t len) {
+#if FVK_ENABLED(FVK_DEBUG_GROUP_MARKERS)
     mCommands->insertEventMarker(string, len);
+#endif
 }
 
 void VulkanDriver::pushGroupMarker(char const* string, uint32_t) {
     // Turns out all the markers are 0-terminated, so we can just pass it without len.
+#if FVK_ENABLED(FVK_DEBUG_GROUP_MARKERS)
     mCommands->pushGroupMarker(string);
-    {
-        SYSTRACE_CONTEXT();
-        SYSTRACE_NAME_BEGIN(string);
-    }
+#endif
+    FVK_SYSTRACE_CONTEXT();
+    FVK_SYSTRACE_START(string);
 }
 
 void VulkanDriver::popGroupMarker(int) {
+#if FVK_ENABLED(FVK_DEBUG_GROUP_MARKERS)
     mCommands->popGroupMarker();
-    {
-        SYSTRACE_CONTEXT();
-        SYSTRACE_NAME_END();
-    }
+#endif
+    FVK_SYSTRACE_CONTEXT();
+    FVK_SYSTRACE_END();
 }
 
 void VulkanDriver::startCapture(int) {}
@@ -1439,6 +1458,9 @@ void VulkanDriver::readBufferSubData(backend::BufferObjectHandle boh,
 
 void VulkanDriver::blit(TargetBufferFlags buffers, Handle<HwRenderTarget> dst, Viewport dstRect,
         Handle<HwRenderTarget> src, Viewport srcRect, SamplerMagFilter filter) {
+    FVK_SYSTRACE_CONTEXT();
+    FVK_SYSTRACE_START("blit");
+
     assert_invariant(mCurrentRenderPass.renderPass == VK_NULL_HANDLE);
 
     // blit operation only support COLOR0 color buffer
@@ -1478,10 +1500,14 @@ void VulkanDriver::blit(TargetBufferFlags buffers, Handle<HwRenderTarget> dst, V
     if (any(buffers & TargetBufferFlags::COLOR0)) {
         mBlitter.blitColor({ dstTarget, dstOffsets, srcTarget, srcOffsets, vkfilter, int(0) });
     }
+    FVK_SYSTRACE_END();
 }
 
 void VulkanDriver::draw(PipelineState pipelineState, Handle<HwRenderPrimitive> rph,
         const uint32_t instanceCount) {
+    FVK_SYSTRACE_CONTEXT();
+    FVK_SYSTRACE_START("draw");
+
     VulkanCommandBuffer* commands = &mCommands->get();
     VkCommandBuffer cmdbuffer = commands->buffer();
     const VulkanRenderPrimitive& prim = *mResourceAllocator.handle_cast<VulkanRenderPrimitive*>(rph);
@@ -1497,7 +1523,7 @@ void VulkanDriver::draw(PipelineState pipelineState, Handle<HwRenderPrimitive> r
     commands->acquire(prim.vertexBuffer);
 
     // If this is a debug build, validate the current shader.
-#if !defined(NDEBUG)
+#if FVK_ENABLED(FVK_DEBUG_SHADER_MODULE)
     if (program->bundle.vertex == VK_NULL_HANDLE || program->bundle.fragment == VK_NULL_HANDLE) {
         utils::slog.e << "Binding missing shader: " << program->name.c_str() << utils::io::endl;
     }
@@ -1613,7 +1639,7 @@ void VulkanDriver::draw(PipelineState pipelineState, Handle<HwRenderPrimitive> r
                 // This fallback path is very flaky because the dummy texture might not have
                 // matching characteristics. (e.g. if the missing texture is a 3D texture)
                 if (UTILS_UNLIKELY(texture->getPrimaryImageLayout() == VulkanLayout::UNDEFINED)) {
-#ifndef NDEBUG
+#if FVK_ENABLED(FVK_DEBUG_TEXTURE)
                     utils::slog.w << "Uninitialized texture bound to '" << sampler.name.c_str() << "'";
                     utils::slog.w << " in material '" << program->name.c_str() << "'";
                     utils::slog.w << " at binding point " << +sampler.binding << utils::io::endl;
@@ -1687,6 +1713,7 @@ void VulkanDriver::draw(PipelineState pipelineState, Handle<HwRenderPrimitive> r
     const uint32_t firstInstId = 0;
 
     vkCmdDrawIndexed(cmdbuffer, indexCount, instanceCount, firstIndex, vertexOffset, firstInstId);
+    FVK_SYSTRACE_END();
 }
 
 void VulkanDriver::dispatchCompute(Handle<HwProgram> program, math::uint3 workGroupCount) {
diff --git a/filament/backend/src/vulkan/VulkanFboCache.cpp b/filament/backend/src/vulkan/VulkanFboCache.cpp
index 1943dce87a4..0546ffd04fb 100644
--- a/filament/backend/src/vulkan/VulkanFboCache.cpp
+++ b/filament/backend/src/vulkan/VulkanFboCache.cpp
@@ -23,7 +23,7 @@
 
 // If any VkRenderPass or VkFramebuffer is unused for more than TIME_BEFORE_EVICTION frames, it
 // is evicted from the cache.
-static constexpr uint32_t TIME_BEFORE_EVICTION = VK_MAX_COMMAND_BUFFERS;
+static constexpr uint32_t TIME_BEFORE_EVICTION = FVK_MAX_COMMAND_BUFFERS;
 
 using namespace bluevk;
 
@@ -97,7 +97,7 @@ VkFramebuffer VulkanFboCache::getFramebuffer(FboKey config) noexcept {
         attachments[attachmentCount++] = config.depth;
     }
 
-    #if FILAMENT_VULKAN_VERBOSE
+    #if FVK_ENABLED(FVK_DEBUG_FBO_CACHE)
     utils::slog.d << "Creating framebuffer " << config.width << "x" << config.height << " "
         << "for render pass " << config.renderPass << ", "
         << "samples = " << int(config.samples) << ", "
@@ -312,7 +312,7 @@ VkRenderPass VulkanFboCache::getRenderPass(RenderPassKey config) noexcept {
     ASSERT_POSTCONDITION(!error, "Unable to create render pass.");
     mRenderPassCache[config] = {renderPass, mCurrentTime};
 
-    #if FILAMENT_VULKAN_VERBOSE
+    #if FVK_ENABLED(FVK_DEBUG_FBO_CACHE)
     utils::slog.d << "Created render pass " << renderPass << " with "
         << "samples = " << int(config.samples) << ", "
         << "depth = " << (hasDepth ? 1 : 0) << ", "
@@ -338,6 +338,9 @@ void VulkanFboCache::reset() noexcept {
 // Frees up old framebuffers and render passes, then nulls out their key.  Doesn't bother removing
 // the actual map entry since it is fairly small.
 void VulkanFboCache::gc() noexcept {
+    FVK_SYSTRACE_CONTEXT();
+    FVK_SYSTRACE_START("fbocache::gc");
+
     // If this is one of the first few frames, return early to avoid wrapping unsigned integers.
     if (++mCurrentTime <= TIME_BEFORE_EVICTION) {
         return;
@@ -359,6 +362,7 @@ void VulkanFboCache::gc() noexcept {
             iter.value().handle = VK_NULL_HANDLE;
         }
     }
+    FVK_SYSTRACE_END();
 }
 
 } // namespace filament::backend
diff --git a/filament/backend/src/vulkan/VulkanHandles.cpp b/filament/backend/src/vulkan/VulkanHandles.cpp
index 3d8d5b6da95..5ab3365d027 100644
--- a/filament/backend/src/vulkan/VulkanHandles.cpp
+++ b/filament/backend/src/vulkan/VulkanHandles.cpp
@@ -114,10 +114,10 @@ VulkanProgram::VulkanProgram(VkDevice device, const Program& builder) noexcept
 
     // Make a copy of the binding map
     samplerGroupInfo = builder.getSamplerGroupInfo();
-    if constexpr (FILAMENT_VULKAN_VERBOSE) {
+    #if FVK_ENABLED(FVK_DEBUG_SHADER_MODULE)
         utils::slog.d << "Created VulkanProgram " << builder << ", shaders = (" << bundle.vertex
                       << ", " << bundle.fragment << ")" << utils::io::endl;
-    }
+    #endif
 }
 
 VulkanProgram::VulkanProgram(VkDevice device, VkShaderModule vs, VkShaderModule fs) noexcept
diff --git a/filament/backend/src/vulkan/VulkanImageUtility.cpp b/filament/backend/src/vulkan/VulkanImageUtility.cpp
index 5615b889e74..186771778c3 100644
--- a/filament/backend/src/vulkan/VulkanImageUtility.cpp
+++ b/filament/backend/src/vulkan/VulkanImageUtility.cpp
@@ -218,7 +218,7 @@ bool operator<(const VkImageSubresourceRange& a, const VkImageSubresourceRange&
     return false;
 }
 
-#if FILAMENT_VULKAN_VERBOSE
+#if FVK_ENABLED(FVK_DEBUG_LAYOUT_TRANSITION | FVK_DEBUG_TEXTURE)
 #define CASE(VALUE)                                                                                \
     case filament::backend::VulkanLayout::VALUE: {                                                 \
         out << #VALUE;                                                                             \
diff --git a/filament/backend/src/vulkan/VulkanPipelineCache.cpp b/filament/backend/src/vulkan/VulkanPipelineCache.cpp
index e69cfae9a53..233bef3a32f 100644
--- a/filament/backend/src/vulkan/VulkanPipelineCache.cpp
+++ b/filament/backend/src/vulkan/VulkanPipelineCache.cpp
@@ -481,10 +481,10 @@ VulkanPipelineCache::PipelineCacheEntry* VulkanPipelineCache::createPipeline() n
 
     PipelineCacheEntry cacheEntry = {};
 
-    if constexpr (FILAMENT_VULKAN_VERBOSE) {
+    #if FVK_ENABLED(FVK_DEBUG_SHADER_MODULE)
         utils::slog.d << "vkCreateGraphicsPipelines with shaders = ("
                 << shaderStages[0].module << ", " << shaderStages[1].module << ")" << utils::io::endl;
-    }
+    #endif
     VkResult error = vkCreateGraphicsPipelines(mDevice, VK_NULL_HANDLE, 1, &pipelineCreateInfo,
             VKALLOC, &cacheEntry.handle);
     assert_invariant(error == VK_SUCCESS);
@@ -681,7 +681,7 @@ void VulkanPipelineCache::terminate() noexcept {
 void VulkanPipelineCache::onCommandBuffer(const VulkanCommandBuffer& commands) {
     // The timestamp associated with a given cache entry represents "time" as a count of flush
     // events since the cache was constructed. If any cache entry was most recently used over
-    // VK_MAX_PIPELINE_AGE flush events in the past, then we can be sure that it is no longer
+    // FVK_MAX_PIPELINE_AGE flush events in the past, then we can be sure that it is no longer
     // being used by the GPU, and is therefore safe to destroy or reclaim.
     ++mCurrentTime;
 
@@ -698,7 +698,7 @@ void VulkanPipelineCache::onCommandBuffer(const VulkanCommandBuffer& commands) {
     using ConstDescIterator = decltype(mDescriptorSets)::const_iterator;
     for (ConstDescIterator iter = mDescriptorSets.begin(); iter != mDescriptorSets.end();) {
         const DescriptorCacheEntry& cacheEntry = iter.value();
-        if (cacheEntry.lastUsed + VK_MAX_PIPELINE_AGE < mCurrentTime) {
+        if (cacheEntry.lastUsed + FVK_MAX_PIPELINE_AGE < mCurrentTime) {
             auto& arenas = mPipelineLayouts[cacheEntry.pipelineLayout].descriptorSetArenas;
             for (uint32_t i = 0; i < DESCRIPTOR_TYPE_COUNT; ++i) {
                 arenas[i].push_back(cacheEntry.handles[i]);
@@ -712,11 +712,11 @@ void VulkanPipelineCache::onCommandBuffer(const VulkanCommandBuffer& commands) {
     }
 
     // Evict any pipelines that have not been used in a while.
-    // Any pipeline older than VK_MAX_COMMAND_BUFFERS can be safely destroyed.
+    // Any pipeline older than FVK_MAX_COMMAND_BUFFERS can be safely destroyed.
     using ConstPipeIterator = decltype(mPipelines)::const_iterator;
     for (ConstPipeIterator iter = mPipelines.begin(); iter != mPipelines.end();) {
         const PipelineCacheEntry& cacheEntry = iter.value();
-        if (cacheEntry.lastUsed + VK_MAX_PIPELINE_AGE < mCurrentTime) {
+        if (cacheEntry.lastUsed + FVK_MAX_PIPELINE_AGE < mCurrentTime) {
             vkDestroyPipeline(mDevice, iter->second.handle, VKALLOC);
             iter = mPipelines.erase(iter);
         } else {
@@ -728,15 +728,15 @@ void VulkanPipelineCache::onCommandBuffer(const VulkanCommandBuffer& commands) {
     using ConstLayoutIterator = decltype(mPipelineLayouts)::const_iterator;
     for (ConstLayoutIterator iter = mPipelineLayouts.begin(); iter != mPipelineLayouts.end();) {
         const PipelineLayoutCacheEntry& cacheEntry = iter.value();
-        if (cacheEntry.lastUsed + VK_MAX_PIPELINE_AGE < mCurrentTime) {
+        if (cacheEntry.lastUsed + FVK_MAX_PIPELINE_AGE < mCurrentTime) {
             vkDestroyPipelineLayout(mDevice, iter->second.handle, VKALLOC);
             for (auto setLayout : iter->second.descriptorSetLayouts) {
-#ifndef NDEBUG
+                #if FVK_ENABLED(FVK_DEBUG_PIPELINE_CACHE)
                 PipelineLayoutKey key = iter.key();
                 for (auto& pair : mDescriptorSets) {
                     assert_invariant(pair.second.pipelineLayout != key);
                 }
-#endif
+                #endif
                 vkDestroyDescriptorSetLayout(mDevice, setLayout, VKALLOC);
             }
             auto& arenas = iter->second.descriptorSetArenas;
@@ -755,7 +755,7 @@ void VulkanPipelineCache::onCommandBuffer(const VulkanCommandBuffer& commands) {
     // destroy the extinct pools, which implicitly frees their associated descriptor sets.
     bool canPurgeExtinctPools = true;
     for (auto& bundle : mExtinctDescriptorBundles) {
-        if (bundle.lastUsed + VK_MAX_PIPELINE_AGE >= mCurrentTime) {
+        if (bundle.lastUsed + FVK_MAX_PIPELINE_AGE >= mCurrentTime) {
             canPurgeExtinctPools = false;
             break;
         }
@@ -799,7 +799,7 @@ VkDescriptorPool VulkanPipelineCache::createDescriptorPool(uint32_t size) const
 void VulkanPipelineCache::destroyLayoutsAndDescriptors() noexcept {
     // Our current descriptor set strategy can cause the # of descriptor sets to explode in certain
     // situations, so it's interesting to report the number that get stuffed into the cache.
-    #ifndef NDEBUG
+    #if FVK_ENABLED(FVK_DEBUG_PIPELINE_CACHE)
     utils::slog.d << "Destroying " << mDescriptorSets.size() << " bundles of descriptor sets."
             << utils::io::endl;
     #endif
@@ -808,7 +808,7 @@ void VulkanPipelineCache::destroyLayoutsAndDescriptors() noexcept {
 
     // Our current layout bundle strategy can cause the # of layout bundles to explode in certain
     // situations, so it's interesting to report the number that get stuffed into the cache.
-    #ifndef NDEBUG
+    #if FVK_ENABLED(FVK_DEBUG_PIPELINE_CACHE)
     utils::slog.d << "Destroying " << mPipelineLayouts.size() << " pipeline layouts."
                   << utils::io::endl;
     #endif
diff --git a/filament/backend/src/vulkan/VulkanPipelineCache.h b/filament/backend/src/vulkan/VulkanPipelineCache.h
index a54e6bccd13..2c9bf09350e 100644
--- a/filament/backend/src/vulkan/VulkanPipelineCache.h
+++ b/filament/backend/src/vulkan/VulkanPipelineCache.h
@@ -340,7 +340,7 @@ class VulkanPipelineCache : public CommandBufferObserver {
 
     // The timestamp associated with a given cache entry represents time as a count of flush
     // events since the cache was constructed. If any cache entry was most recently used over
-    // VK_MAX_PIPELINE_AGE flushes in the past, then we can be sure that it is no longer
+    // FVK_MAX_PIPELINE_AGE flushes in the past, then we can be sure that it is no longer
     // being used by the GPU, and is therefore safe to destroy or reclaim.
     using Timestamp = uint64_t;
     Timestamp mCurrentTime = 0;
diff --git a/filament/backend/src/vulkan/VulkanReadPixels.cpp b/filament/backend/src/vulkan/VulkanReadPixels.cpp
index fd51344b830..83e341c2768 100644
--- a/filament/backend/src/vulkan/VulkanReadPixels.cpp
+++ b/filament/backend/src/vulkan/VulkanReadPixels.cpp
@@ -167,7 +167,7 @@ void VulkanReadPixels::run(VulkanRenderTarget const* srcTarget, uint32_t const x
     VkImage stagingImage;
     vkCreateImage(device, &imageInfo, VKALLOC, &stagingImage);
 
-#if FILAMENT_VULKAN_VERBOSE
+#if FVK_ENABLED(FVK_DEBUG_READ_PIXELS)
     utils::slog.d << "readPixels created image=" << stagingImage
                   << " to copy from image=" << srcTexture->getVkImage()
                   << " src-layout=" << srcTexture->getLayout(0, 0) << utils::io::endl;
diff --git a/filament/backend/src/vulkan/VulkanStagePool.cpp b/filament/backend/src/vulkan/VulkanStagePool.cpp
index a8934d242b4..93f434d4afb 100644
--- a/filament/backend/src/vulkan/VulkanStagePool.cpp
+++ b/filament/backend/src/vulkan/VulkanStagePool.cpp
@@ -22,7 +22,7 @@
 
 #include <utils/Panic.h>
 
-static constexpr uint32_t TIME_BEFORE_EVICTION = VK_MAX_COMMAND_BUFFERS;
+static constexpr uint32_t TIME_BEFORE_EVICTION = FVK_MAX_COMMAND_BUFFERS;
 
 namespace filament::backend {
 
@@ -59,7 +59,7 @@ VulkanStage const* VulkanStagePool::acquireStage(uint32_t numBytes) {
     UTILS_UNUSED_IN_RELEASE VkResult result = vmaCreateBuffer(mAllocator, &bufferInfo,
             &allocInfo, &stage->buffer, &stage->memory, nullptr);
 
-#ifndef NDEBUG
+#if FVK_ENABLED(FVK_DEBUG_ALLOCATION)
     if (result != VK_SUCCESS) {
         utils::slog.e << "Allocation error: " << result << utils::io::endl;
     }
@@ -129,6 +129,9 @@ VulkanStageImage const* VulkanStagePool::acquireImage(PixelDataFormat format, Pi
 }
 
 void VulkanStagePool::gc() noexcept {
+    FVK_SYSTRACE_CONTEXT();
+    FVK_SYSTRACE_START("stagepool::gc");
+
     // If this is one of the first few frames, return early to avoid wrapping unsigned integers.
     if (++mCurrentFrame <= TIME_BEFORE_EVICTION) {
         return;
@@ -182,6 +185,7 @@ void VulkanStagePool::gc() noexcept {
             mUsedImages.insert(image);
         }
     }
+    FVK_SYSTRACE_END();
 }
 
 void VulkanStagePool::terminate() noexcept {
diff --git a/filament/backend/src/vulkan/VulkanTexture.cpp b/filament/backend/src/vulkan/VulkanTexture.cpp
index 140c697f87d..c40f6f779ca 100644
--- a/filament/backend/src/vulkan/VulkanTexture.cpp
+++ b/filament/backend/src/vulkan/VulkanTexture.cpp
@@ -106,7 +106,7 @@ VulkanTexture::VulkanTexture(VkDevice device, VkPhysicalDevice physicalDevice,
 
     if (any(usage & TextureUsage::SAMPLEABLE)) {
 
-#if VK_ENABLE_VALIDATION
+#if FVK_ENABLED(FVK_DEBUG_TEXTURE)
         // Validate that the format is actually sampleable.
         VkFormatProperties props;
         vkGetPhysicalDeviceFormatProperties(physicalDevice, mVkFormat, &props);
@@ -160,7 +160,7 @@ VulkanTexture::VulkanTexture(VkDevice device, VkPhysicalDevice physicalDevice,
     imageInfo.samples = (VkSampleCountFlagBits) samples;
 
     VkResult error = vkCreateImage(mDevice, &imageInfo, VKALLOC, &mTextureImage);
-    if (error || FILAMENT_VULKAN_VERBOSE) {
+    if (error || FVK_ENABLED_BOOL(FVK_DEBUG_TEXTURE)) {
         utils::slog.d << "vkCreateImage: "
             << "image = " << mTextureImage << ", "
             << "result = " << error << ", "
@@ -417,7 +417,7 @@ void VulkanTexture::transitionLayout(VkCommandBuffer cmdbuf, const VkImageSubres
         VulkanLayout newLayout) {
     VulkanLayout oldLayout = getLayout(range.baseArrayLayer, range.baseMipLevel);
 
-    #if FILAMENT_VULKAN_VERBOSE
+    #if FVK_ENABLED(FVK_DEBUG_LAYOUT_TRANSITION)
     utils::slog.i << "transition layout of " << mTextureImage << ",layer=" << range.baseArrayLayer
                   << ",level=" << range.baseMipLevel << " from=" << oldLayout << " to=" << newLayout
                   << " format=" << mVkFormat
@@ -463,7 +463,7 @@ VulkanLayout VulkanTexture::getLayout(uint32_t layer, uint32_t level) const {
     return mSubresourceLayouts.get(key);
 }
 
-#if FILAMENT_VULKAN_VERBOSE
+#if FVK_ENABLED(FVK_DEBUG_TEXTURE)
 void VulkanTexture::print() const {
     const uint32_t firstLayer = 0;
     const uint32_t lastLayer = firstLayer + mPrimaryViewRange.layerCount;
diff --git a/filament/backend/src/vulkan/VulkanTexture.h b/filament/backend/src/vulkan/VulkanTexture.h
index b4b71a9065c..d3a84067c82 100644
--- a/filament/backend/src/vulkan/VulkanTexture.h
+++ b/filament/backend/src/vulkan/VulkanTexture.h
@@ -84,7 +84,7 @@ struct VulkanTexture : public HwTexture, VulkanResource {
     // For now this always returns either DEPTH or COLOR.
     VkImageAspectFlags getImageAspect() const;
 
-#if FILAMENT_VULKAN_VERBOSE
+#if FVK_ENABLED(FVK_DEBUG_TEXTURE)
     void print() const;
 #endif
 
diff --git a/filament/backend/src/vulkan/platform/VulkanPlatform.cpp b/filament/backend/src/vulkan/platform/VulkanPlatform.cpp
index 23c513734ce..153946a3f8b 100644
--- a/filament/backend/src/vulkan/platform/VulkanPlatform.cpp
+++ b/filament/backend/src/vulkan/platform/VulkanPlatform.cpp
@@ -45,11 +45,11 @@ constexpr uint32_t const INVALID_VK_INDEX = 0xFFFFFFFF;
 
 typedef std::unordered_set<std::string_view> ExtensionSet;
 
-#if VK_ENABLE_VALIDATION
+#if FVK_ENABLED(FVK_DEBUG_VALIDATION)
 // These strings need to be allocated outside a function stack
 const std::string_view DESIRED_LAYERS[] = {
         "VK_LAYER_KHRONOS_validation",
-#if FILAMENT_VULKAN_DUMP_API
+#if FVK_ENABLED(FVK_DEBUG_DUMP_API)
         "VK_LAYER_LUNARG_api_dump",
 #endif
 #if defined(ENABLE_RENDERDOC)
@@ -75,7 +75,7 @@ FixedCapacityVector<const char*> getEnabledLayers() {
     }
     return enabledLayers;
 }
-#endif
+#endif // FVK_EANBLED(FVK_DEBUG_VALIDATION)
 
 void printDeviceInfo(VkInstance instance, VkPhysicalDevice device) {
     // Print some driver or MoltenVK information if it is available.
@@ -129,27 +129,27 @@ void printDeviceInfo(VkInstance instance, VkPhysicalDevice device) {
                   << minor << ")" << utils::io::endl;
 }
 
+#if FVK_ENABLED(FVK_DEBUG_VALIDATION)
 void printDepthFormats(VkPhysicalDevice device) {
     // For diagnostic purposes, print useful information about available depth formats.
     // Note that Vulkan is more constrained than OpenGL ES 3.1 in this area.
-    if constexpr (VK_ENABLE_VALIDATION && FILAMENT_VULKAN_VERBOSE) {
-        const VkFormatFeatureFlags required = VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT
+    const VkFormatFeatureFlags required = VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT
                                               | VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT;
-        utils::slog.i << "Sampleable depth formats: ";
-        for (VkFormat format = (VkFormat) 1;;) {
-            VkFormatProperties props;
-            vkGetPhysicalDeviceFormatProperties(device, format, &props);
-            if ((props.optimalTilingFeatures & required) == required) {
-                utils::slog.i << format << " ";
-            }
-            if (format == VK_FORMAT_ASTC_12x12_SRGB_BLOCK) {
-                utils::slog.i << utils::io::endl;
-                break;
-            }
-            format = (VkFormat) (1 + (int) format);
+    utils::slog.i << "Sampleable depth formats: ";
+    for (VkFormat format = (VkFormat) 1;;) {
+        VkFormatProperties props;
+        vkGetPhysicalDeviceFormatProperties(device, format, &props);
+        if ((props.optimalTilingFeatures & required) == required) {
+            utils::slog.i << format << " ";
+        }
+        if (format == VK_FORMAT_ASTC_12x12_SRGB_BLOCK) {
+            utils::slog.i << utils::io::endl;
+            break;
         }
+        format = (VkFormat) (1 + (int) format);
     }
 }
+#endif
 
 ExtensionSet getInstanceExtensions() {
     std::string_view const TARGET_EXTS[] = {
@@ -161,7 +161,7 @@ ExtensionSet getInstanceExtensions() {
             VK_EXT_DEBUG_UTILS_EXTENSION_NAME,
             VK_KHR_PORTABILITY_ENUMERATION_EXTENSION_NAME,
 
-#if VK_ENABLE_VALIDATION
+#if FVK_ENABLED(FVK_DEBUG_VALIDATION)
             VK_EXT_DEBUG_REPORT_EXTENSION_NAME,
 #endif
     };
@@ -207,7 +207,7 @@ VkInstance createInstance(ExtensionSet const& requiredExts) {
     VkInstanceCreateInfo instanceCreateInfo = {};
     bool validationFeaturesSupported = false;
 
-#if VK_ENABLE_VALIDATION
+#if FVK_ENABLED(FVK_DEBUG_VALIDATION)
     auto const enabledLayers = getEnabledLayers();
     if (!enabledLayers.empty()) {
         // If layers are supported, Check if VK_EXT_validation_features is supported.
@@ -229,9 +229,10 @@ VkInstance createInstance(ExtensionSet const& requiredExts) {
 #else
         utils::slog.d << "Validation layer not available; did you install the Vulkan SDK?\n"
                       << "Please ensure that VK_LAYER_PATH is set correctly." << utils::io::endl;
-#endif
+#endif // __ANDROID__
+
     }
-#endif// VK_ENABLE_VALIDATION
+#endif // FVK_ENABLED(FVK_DEBUG_VALIDATION)
 
     // The Platform class can require 1 or 2 instance extensions, plus we'll request at most 5
     // instance extensions here in the common code. So that's a max of 7.
@@ -252,7 +253,7 @@ VkInstance createInstance(ExtensionSet const& requiredExts) {
     VkApplicationInfo appInfo = {};
     appInfo.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO;
     appInfo.apiVersion
-            = VK_MAKE_API_VERSION(0, VK_REQUIRED_VERSION_MAJOR, VK_REQUIRED_VERSION_MINOR, 0);
+            = VK_MAKE_API_VERSION(0, FVK_REQUIRED_VERSION_MAJOR, FVK_REQUIRED_VERSION_MINOR, 0);
     instanceCreateInfo.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO;
     instanceCreateInfo.pApplicationInfo = &appInfo;
     instanceCreateInfo.enabledExtensionCount = enabledExtensionCount;
@@ -274,12 +275,8 @@ VkInstance createInstance(ExtensionSet const& requiredExts) {
     }
 
     VkResult result = vkCreateInstance(&instanceCreateInfo, VKALLOC, &instance);
-#ifndef NDEBUG
-    if (result != VK_SUCCESS) {
-        utils::slog.e << "Unable to create instance: " << result << utils::io::endl;
-    }
-#endif
-    ASSERT_POSTCONDITION(result == VK_SUCCESS, "Unable to create Vulkan instance.");
+    ASSERT_POSTCONDITION(result == VK_SUCCESS, "Unable to create Vulkan instance. Result=%d",
+            result);
     return instance;
 }
 
@@ -443,10 +440,10 @@ VkPhysicalDevice selectPhysicalDevice(VkInstance instance,
         int const minor = VK_VERSION_MINOR(targetDeviceProperties.apiVersion);
 
         // Does the device support the required Vulkan level?
-        if (major < VK_REQUIRED_VERSION_MAJOR) {
+        if (major < FVK_REQUIRED_VERSION_MAJOR) {
             continue;
         }
-        if (major == VK_REQUIRED_VERSION_MAJOR && minor < VK_REQUIRED_VERSION_MINOR) {
+        if (major == FVK_REQUIRED_VERSION_MAJOR && minor < FVK_REQUIRED_VERSION_MINOR) {
             continue;
         }
 
@@ -667,7 +664,9 @@ Driver* VulkanPlatform::createDriver(void* sharedContext,
     // just yet, since that would require a corollary change to the "aspect" flags for the VkImage.
     context.mDepthFormat = findSupportedFormat(mImpl->mPhysicalDevice);
 
+#if FVK_ENABLED(FVK_DEBUG_VALIDATION)
     printDepthFormats(mImpl->mPhysicalDevice);
+#endif
 
     // Keep a copy of context for swapchains.
     mImpl->mContext = context;
diff --git a/filament/backend/test/test_MipLevels.cpp b/filament/backend/test/test_MipLevels.cpp
index e677e904d5b..a794003ba3f 100644
--- a/filament/backend/test/test_MipLevels.cpp
+++ b/filament/backend/test/test_MipLevels.cpp
@@ -52,6 +52,18 @@ void main() {
 }
 )");
 
+std::string whiteFragment (R"(#version 450 core
+
+layout(location = 0) out vec4 fragColor;
+layout(location = 0) in vec2 uv;
+
+layout(location = 0, set = 1) uniform sampler2D backend_test_sib_tex;
+
+void main() {
+    fragColor = vec4(1.0);
+}
+)");
+
 }
 
 namespace test {
@@ -70,17 +82,30 @@ TEST_F(BackendTest, SetMinMaxLevel) {
         auto swapChain = createSwapChain();
         api.makeCurrent(swapChain, swapChain);
 
+        // Create a program that draws only white.
+        Handle<HwProgram> whiteProgram;
+        {
+            ShaderGenerator shaderGen(vertex, whiteFragment, sBackend, sIsMobilePlatform);
+            Program p = shaderGen.getProgram(api);
+            Program::Sampler sampler{utils::CString("backend_test_sib_tex"), 0};
+            p.setSamplerGroup(0, ShaderStageFlags::FRAGMENT, &sampler, 1);
+            whiteProgram = api.createProgram(std::move(p));
+        }
+
         // Create a program that samples a texture.
-        SamplerInterfaceBlock sib = filament::SamplerInterfaceBlock::Builder()
-                .name("backend_test_sib")
-                .stageFlags(backend::ShaderStageFlags::FRAGMENT)
-                .add( {{"tex", SamplerType::SAMPLER_2D, SamplerFormat::FLOAT, Precision::HIGH }} )
-                .build();
-        ShaderGenerator shaderGen(vertex, fragment, sBackend, sIsMobilePlatform, &sib);
-        Program p = shaderGen.getProgram(api);
-        Program::Sampler sampler { utils::CString("backend_test_sib_tex"), 0 };
-        p.setSamplerGroup(0, ShaderStageFlags::FRAGMENT, &sampler, 1);
-        auto program = api.createProgram(std::move(p));
+        Handle<HwProgram> textureProgram;
+        {
+            SamplerInterfaceBlock sib = filament::SamplerInterfaceBlock::Builder()
+                    .name("backend_test_sib")
+                    .stageFlags(backend::ShaderStageFlags::FRAGMENT)
+                    .add( {{"tex", SamplerType::SAMPLER_2D, SamplerFormat::FLOAT, Precision::HIGH }} )
+                    .build();
+            ShaderGenerator shaderGen(vertex, fragment, sBackend, sIsMobilePlatform, &sib);
+            Program p = shaderGen.getProgram(api);
+            Program::Sampler sampler{utils::CString("backend_test_sib_tex"), 0};
+            p.setSamplerGroup(0, ShaderStageFlags::FRAGMENT, &sampler, 1);
+            textureProgram = api.createProgram(std::move(p));
+        }
 
         // Create a texture that has 4 mip levels. Each level is a different color.
         // Level 0: 128x128 (red)
@@ -91,7 +116,7 @@ TEST_F(BackendTest, SetMinMaxLevel) {
         const size_t kMipLevels = 4;
         Handle<HwTexture> texture = api.createTexture(SamplerType::SAMPLER_2D, kMipLevels,
                 TextureFormat::RGBA8, 1, kTextureSize, kTextureSize, 1,
-                TextureUsage::SAMPLEABLE | TextureUsage::UPLOADABLE);
+                TextureUsage::SAMPLEABLE | TextureUsage::COLOR_ATTACHMENT | TextureUsage::UPLOADABLE);
 
         // Create image data.
         auto pixelFormat = PixelDataFormat::RGBA;
@@ -116,8 +141,37 @@ TEST_F(BackendTest, SetMinMaxLevel) {
                     texture, l, 0, 0, 0, mipSize, mipSize, 1, std::move(descriptor));
         }
 
+        TrianglePrimitive triangle(api);
+
+        api.beginFrame(0, 0);
+
+        // We set the base mip to 1, and the max mip to 3
+        // Level 0: 128x128 (red)
+        // Level 1:   64x64 (green)             <-- base
+        // Level 2:   32x32 (blue)              <--- white triangle rendered
+        // Level 3:   16x16 (yellow)            <-- max
         api.setMinMaxLevels(texture, 1, 3);
 
+        // Render a white triangle into level 2.
+        // We specify mip level 2, because minMaxLevels has no effect when rendering into a texture.
+        Handle<HwRenderTarget> renderTarget = api.createRenderTarget(
+                TargetBufferFlags::COLOR, 32, 32, 1,
+                {texture, 2 /* level */, 0 /* layer */}, {}, {});
+        {
+            RenderPassParams params = {};
+            fullViewport(params);
+            params.flags.clear = TargetBufferFlags::NONE;
+            params.flags.discardStart = TargetBufferFlags::NONE;
+            params.flags.discardEnd = TargetBufferFlags::NONE;
+            PipelineState ps = {};
+            ps.program = whiteProgram;
+            ps.rasterState.colorWrite = true;
+            ps.rasterState.depthWrite = false;
+            api.beginRenderPass(renderTarget, params);
+            api.draw(ps, triangle.getRenderPrimitive(), 1);
+            api.endRenderPass();
+        }
+
         backend::Handle<HwRenderTarget> defaultRenderTarget = api.createDefaultRenderTarget(0);
 
         RenderPassParams params = {};
@@ -129,14 +183,12 @@ TEST_F(BackendTest, SetMinMaxLevel) {
 
         PipelineState state;
         state.scissor = params.viewport;
-        state.program = program;
+        state.program = textureProgram;
         state.rasterState.colorWrite = true;
         state.rasterState.depthWrite = false;
         state.rasterState.depthFunc = SamplerCompareFunc::A;
         state.rasterState.culling = CullingMode::NONE;
 
-        api.beginFrame(0, 0);
-
         SamplerGroup samplers(1);
         SamplerParams samplerParams {};
         samplerParams.filterMag = SamplerMagFilter::NEAREST;
@@ -147,8 +199,26 @@ TEST_F(BackendTest, SetMinMaxLevel) {
         api.bindSamplers(0, samplerGroup);
 
         // Render a triangle to the screen, sampling from mip level 1.
-        // Because the min level is 1, the result color should be blue.
-        TrianglePrimitive triangle(api);
+        // Because the min level is 1, the result color should be the white triangle drawn in the
+        // previous pass.
+        api.beginRenderPass(defaultRenderTarget, params);
+        api.draw(state, triangle.getRenderPrimitive(), 1);
+        api.endRenderPass();
+
+        // Adjust the base mip to 2.
+        // Note that this is done without another call to updateSamplerGroup.
+        api.setMinMaxLevels(texture, 2, 3);
+
+        // Render a second, smaller, triangle, again sampling from mip level 1.
+        // This triangle should be yellow striped.
+        static filament::math::float2 vertices[3] = {
+                { -0.5, -0.5 },
+                {  0.5, -0.5 },
+                { -0.5,  0.5 }
+        };
+        triangle.updateVertices(vertices);
+        params.flags.clear = TargetBufferFlags::NONE;
+        params.flags.discardStart = TargetBufferFlags::NONE;
         api.beginRenderPass(defaultRenderTarget, params);
         api.draw(state, triangle.getRenderPrimitive(), 1);
         api.endRenderPass();
@@ -160,6 +230,10 @@ TEST_F(BackendTest, SetMinMaxLevel) {
 
         // Cleanup.
         api.destroySwapChain(swapChain);
+        api.destroyRenderTarget(renderTarget);
+        api.destroyTexture(texture);
+        api.destroyProgram(whiteProgram);
+        api.destroyProgram(textureProgram);
     }
 
     api.finish();
diff --git a/filament/include/filament/Options.h b/filament/include/filament/Options.h
index f62f2e1631a..d5fac6b3e1b 100644
--- a/filament/include/filament/Options.h
+++ b/filament/include/filament/Options.h
@@ -141,6 +141,17 @@ struct BloomOptions {
     bool enabled = false;                   //!< enable or disable bloom
     float highlight = 1000.0f;              //!< limit highlights to this value before bloom [10, +inf]
 
+    /**
+     * Bloom quality level.
+     * LOW (default): use a more optimized down-sampling filter, however there can be artifacts
+     *      with dynamic resolution, this can be alleviated by using the homogenous mode.
+     * MEDIUM: Good balance between quality and performance.
+     * HIGH: In this mode the bloom resolution is automatically increased to avoid artifacts.
+     *      This mode can be significantly slower on mobile, especially at high resolution.
+     *      This mode greatly improves the anamorphic bloom.
+     */
+    QualityLevel quality = QualityLevel::LOW;
+
     bool lensFlare = false;                 //!< enable screen-space lens flare
     bool starburst = true;                  //!< enable starburst effect on lens flare
     float chromaticAberration = 0.005f;     //!< amount of chromatic aberration
diff --git a/filament/src/PostProcessManager.cpp b/filament/src/PostProcessManager.cpp
index a7880f414b6..4bfb70e2eaf 100644
--- a/filament/src/PostProcessManager.cpp
+++ b/filament/src/PostProcessManager.cpp
@@ -212,6 +212,8 @@ static const PostProcessManager::MaterialInfo sMaterialList[] = {
         { "bilateralBlurBentNormals",   MATERIAL(BILATERALBLURBENTNORMALS) },
         { "blitLow",                    MATERIAL(BLITLOW) },
         { "bloomDownsample",            MATERIAL(BLOOMDOWNSAMPLE) },
+        { "bloomDownsample2x",          MATERIAL(BLOOMDOWNSAMPLE2X) },
+        { "bloomDownsample9",           MATERIAL(BLOOMDOWNSAMPLE9) },
         { "bloomUpsample",              MATERIAL(BLOOMUPSAMPLE) },
         { "colorGrading",               MATERIAL(COLORGRADING) },
         { "colorGradingAsSubpass",      MATERIAL(COLORGRADINGASSUBPASS) },
@@ -450,7 +452,6 @@ PostProcessManager::StructurePassOutput PostProcessManager::structure(FrameGraph
                 for (size_t level = 0; level < levelCount - 1; level++) {
                     auto out = resources.getRenderPassInfo(level);
                     driver.setMinMaxLevels(in, level, level);
-                    mi->setParameter("level", uint32_t(level));
                     commitAndRender(out, material, driver);
                 }
                 driver.setMinMaxLevels(in, 0, levelCount - 1);
@@ -1596,8 +1597,7 @@ FrameGraphId<FrameGraphTexture> PostProcessManager::dof(FrameGraph& fg,
                     auto const& out = resources.getRenderPassInfo(data.rp[level]);
                     driver.setMinMaxLevels(inOutColor, level, level);
                     driver.setMinMaxLevels(inOutCoc, level, level);
-                    mi->setParameter("mip", uint32_t(level));
-                    mi->setParameter("weightScale", 0.5f / float(1u<<level));   // FIXME: halfres?
+                    mi->setParameter("weightScale", 0.5f / float(1u << level));   // FIXME: halfres?
                     mi->setParameter("texelSize", float2{ 1.0f / w, 1.0f / h });
                     mi->commit(driver);
                     render(out, pipeline, driver);
@@ -1870,15 +1870,53 @@ PostProcessManager::BloomPassOutput PostProcessManager::bloom(FrameGraph& fg,
     return bloomPass(fg, input, outFormat, inoutBloomOptions, scale);
 }
 
+FrameGraphId<FrameGraphTexture> PostProcessManager::downscalePass(FrameGraph& fg,
+        FrameGraphId<FrameGraphTexture> input,
+        FrameGraphTexture::Descriptor const& outDesc,
+        bool threshold, float highlight, bool fireflies) noexcept {
+    struct DownsampleData {
+        FrameGraphId<FrameGraphTexture> input;
+        FrameGraphId<FrameGraphTexture> output;
+    };
+    auto& downsamplePass = fg.addPass<DownsampleData>("Downsample",
+            [&](FrameGraph::Builder& builder, auto& data) {
+                data.input = builder.sample(input);
+                data.output = builder.createTexture("Downsample-output", outDesc);
+                builder.declareRenderPass(data.output);
+            },
+            [=](FrameGraphResources const& resources,
+                    auto const& data, DriverApi& driver) {
+                auto const& material = getPostProcessMaterial("bloomDownsample2x");
+                auto* mi = material.getMaterialInstance(mEngine);
+                mi->setParameter("source", resources.getTexture(data.input), {
+                        .filterMag = SamplerMagFilter::LINEAR,
+                        .filterMin = SamplerMinFilter::LINEAR
+                });
+                mi->setParameter("level", 0);
+                mi->setParameter("threshold", threshold ? 1.0f : 0.0f);
+                mi->setParameter("fireflies", fireflies ? 1.0f : 0.0f);
+                mi->setParameter("invHighlight", std::isinf(highlight) ? 0.0f : 1.0f / highlight);
+                commitAndRender(resources.getRenderPassInfo(), material, driver);
+            });
+    return downsamplePass->output;
+}
+
 PostProcessManager::BloomPassOutput PostProcessManager::bloomPass(FrameGraph& fg,
         FrameGraphId<FrameGraphTexture> input, TextureFormat outFormat,
         BloomOptions& inoutBloomOptions, float2 scale) noexcept {
-    // Figure out a good size for the bloom buffer.
-    auto const& desc = fg.getDescriptor(input);
+
+    // Figure out a good size for the bloom buffer. We must use a fixed bloom buffer size so
+    // that the size/strength of the bloom doesn't vary much with the resolution, otherwise
+    // dynamic resolution would affect the bloom effect too much.
+    auto desc = fg.getDescriptor(input);
 
     // width and height after dynamic resolution upscaling
     const float aspect = (float(desc.width) * scale.y) / (float(desc.height) * scale.x);
 
+    // FIXME: don't allow inoutBloomOptions.resolution to be larger than input's resolution
+    //        (avoid upscale) but how does this affect dynamic resolution
+    // FIXME: check what happens on WebGL and intel's processors
+
     // compute the desired bloom buffer size
     float bloomHeight = float(inoutBloomOptions.resolution);
     float bloomWidth  = bloomHeight * aspect;
@@ -1893,38 +1931,73 @@ PostProcessManager::BloomPassOutput PostProcessManager::bloomPass(FrameGraph& fg
         bloomHeight *= inoutBloomOptions.anamorphism;
     }
 
-    // convert back to integer width/height
-    const uint32_t width  = std::max(1u, uint32_t(std::floor(bloomWidth)));
-    const uint32_t height = std::max(1u, uint32_t(std::floor(bloomHeight)));
-
     // we might need to adjust the max # of levels
     const uint32_t major = uint32_t(std::max(bloomWidth,  bloomHeight));
     const uint8_t maxLevels = FTexture::maxLevelCount(major);
     inoutBloomOptions.levels = std::min(inoutBloomOptions.levels, maxLevels);
     inoutBloomOptions.levels = std::min(inoutBloomOptions.levels, kMaxBloomLevels);
 
-    if (2 * width < desc.width || 2 * height < desc.height) {
-        // if we're scaling down by more than 2x, prescale the image with a blit to improve
-        // performance. This is important on mobile/tilers.
-        input = opaqueBlit(fg, input, { 0, 0, desc.width, desc.height }, {
-                .width = std::max(1u, desc.width / 2),
-                .height = std::max(1u, desc.height / 2),
-                .format = outFormat
-        });
+    if (inoutBloomOptions.quality == QualityLevel::LOW) {
+        // In low quality mode, we adjust the bloom buffer size so that both dimensions
+        // have enough exact mip levels. This can slightly affect the aspect ratio causing
+        // some artifacts:
+        // - add some anamorphism (experimentally not visible)
+        // - visible bloom size changes with dynamic resolution in non-homogenous mode
+        // This allows us to use the 9 sample downsampling filter (instead of 13)
+        // for at least 4 levels.
+        uint32_t width  = std::max(1u, uint32_t(std::floor(bloomWidth)));
+        uint32_t height = std::max(1u, uint32_t(std::floor(bloomHeight)));
+        width  &= ~((1 << 4) - 1);  // at least 4 levels
+        height &= ~((1 << 4) - 1);
+        bloomWidth  = float(width);
+        bloomHeight = float(height);
+    }
+
+    bool threshold = inoutBloomOptions.threshold;
+
+    while (2 * bloomWidth < float(desc.width) || 2 * bloomHeight < float(desc.height)) {
+        if (inoutBloomOptions.quality == QualityLevel::LOW ||
+            inoutBloomOptions.quality == QualityLevel::MEDIUM) {
+            input = downscalePass(fg, input, {
+                            .width  = (desc.width  = std::max(1u, desc.width  / 2)),
+                            .height = (desc.height = std::max(1u, desc.height / 2)),
+                            .format = outFormat
+                    },
+                    threshold, inoutBloomOptions.highlight, threshold);
+            threshold = false; // we do the thresholding only once during down sampling
+        } else if (inoutBloomOptions.quality == QualityLevel::HIGH ||
+                   inoutBloomOptions.quality == QualityLevel::ULTRA) {
+            // In high quality mode, we increase the size of the bloom buffer such that the
+            // first scaling is less than 2x, and we increase the number of levels accordingly.
+            if (bloomWidth * 2.0f > 2048.0f || bloomHeight * 2.0f > 2048.0f) {
+                // but we can't scale above the h/w guaranteed minspec
+                break;
+            }
+            bloomWidth  *= 2.0f;
+            bloomHeight *= 2.0f;
+            inoutBloomOptions.levels++;
+        }
     }
 
+    // convert back to integer width/height
+    uint32_t const width  = std::max(1u, uint32_t(std::floor(bloomWidth)));
+    uint32_t const height = std::max(1u, uint32_t(std::floor(bloomHeight)));
+
+    input = downscalePass(fg, input,
+            { .width = width, .height = height, .format = outFormat },
+            threshold, inoutBloomOptions.highlight, threshold);
+
     struct BloomPassData {
-        FrameGraphId<FrameGraphTexture> in;
         FrameGraphId<FrameGraphTexture> out;
-        FrameGraphId<FrameGraphTexture> stage;
         uint32_t outRT[kMaxBloomLevels];
-        uint32_t stageRT[kMaxBloomLevels];
     };
 
-    // downsample phase
+    // Creating a mip-chain poses a "feedback" loop problem on some GPU. We will disable
+    // Bloom on these.
+    // See: https://github.com/google/filament/issues/2338
+
     auto& bloomDownsamplePass = fg.addPass<BloomPassData>("Bloom Downsample",
             [&](FrameGraph::Builder& builder, auto& data) {
-                data.in = builder.sample(input);
                 data.out = builder.createTexture("Bloom Out Texture", {
                         .width = width,
                         .height = height,
@@ -1933,165 +2006,107 @@ PostProcessManager::BloomPassOutput PostProcessManager::bloomPass(FrameGraph& fg
                 });
                 data.out = builder.sample(data.out);
 
-                data.stage = builder.createTexture("Bloom Stage Texture", {
-                        .width = width,
-                        .height = height,
-                        .levels = inoutBloomOptions.levels,
-                        .format = outFormat
-                });
-                data.stage = builder.sample(data.stage);
-
                 for (size_t i = 0; i < inoutBloomOptions.levels; i++) {
                     auto out = builder.createSubresource(data.out, "Bloom Out Texture mip",
                             { .level = uint8_t(i) });
-                    auto stage = builder.createSubresource(data.stage,
-                            "Bloom Stage Texture mip", { .level = uint8_t(i) });
+                    if (i == 0) {
+                        // this causes the last blit above to render into this mip
+                       fg.forwardResource(out, input);
+                    }
                     builder.declareRenderPass(out, &data.outRT[i]);
-                    builder.declareRenderPass(stage, &data.stageRT[i]);
                 }
             },
             [=](FrameGraphResources const& resources,
                     auto const& data, DriverApi& driver) {
 
-                auto hwIn = resources.getTexture(data.in);
+                // TODO: if downsampling is not exactly a multiple of two, use the 13 samples
+                //       filter. This is generally the accepted solution, however, the 13 samples
+                //       filter is not correct either when we don't sample at integer coordinates,
+                //       but it seems ot create less artifacts.
+                //       A better solution might be to use the filter described in
+                //       Castaño, 2013, "Shadow Mapping Summary Part 1", which is 5x5 filter with
+                //       9 samples, but works at all coordinates.
+
                 auto hwOut = resources.getTexture(data.out);
-                auto hwStage = resources.getTexture(data.stage);
 
-                auto const& material = getPostProcessMaterial("bloomDownsample");
-                auto const* ma = material.getMaterial(mEngine);
+                auto const& material9 = getPostProcessMaterial("bloomDownsample9");
+                auto const& material13 = getPostProcessMaterial("bloomDownsample");
 
-                FMaterialInstance* mis[] = {
-                        ma->createInstance("bloomDownsample-ping"),
-                        ma->createInstance("bloomDownsample-pong"),
-                        ma->createInstance("bloomDownsample-first"),
-                };
+                auto* mi9 = material9.getMaterialInstance(mEngine);
+                auto* mi13 = material13.getMaterialInstance(mEngine);
 
-                mis[0]->setParameter("source", hwOut, {
+                mi9->setParameter("source", hwOut, {
                         .filterMag = SamplerMagFilter::LINEAR,
-                        .filterMin = SamplerMinFilter::LINEAR_MIPMAP_NEAREST
-                });
+                        .filterMin = SamplerMinFilter::LINEAR_MIPMAP_NEAREST });
 
-                mis[1]->setParameter("source", hwStage, {
+                mi13->setParameter("source", hwOut, {
                         .filterMag = SamplerMagFilter::LINEAR,
-                        .filterMin = SamplerMinFilter::LINEAR_MIPMAP_NEAREST
-                });
-                mis[2]->setParameter("source", hwIn, {
-                        .filterMag = SamplerMagFilter::LINEAR,
-                        .filterMin = SamplerMinFilter::LINEAR_MIPMAP_NEAREST
-                });
+                        .filterMin = SamplerMinFilter::LINEAR_MIPMAP_NEAREST });
 
-                for (auto* mi : mis) {
-                    mi->setParameter("level", 0.0f);
-                    mi->setParameter("threshold", inoutBloomOptions.threshold ? 1.0f : 0.0f);
-                    mi->setParameter("invHighlight", std::isinf(inoutBloomOptions.highlight)
-                            ? 0.0f : 1.0f / inoutBloomOptions.highlight);
-                    mi->commit(driver);
-                }
-
-                const PipelineState pipeline(material.getPipelineState(mEngine));
+                mi9->commit(driver);
+                mi13->commit(driver);
 
-                { // first iteration
-                    auto hwDstRT = resources.getRenderPassInfo(data.outRT[0]);
-                    hwDstRT.params.flags.discardStart = TargetBufferFlags::COLOR;
-                    hwDstRT.params.flags.discardEnd = TargetBufferFlags::NONE;
-                    mis[2]->use(driver);
-                    render(hwDstRT, pipeline, driver);
-                }
+                // PipelineState for both materials should be the same
+                const PipelineState pipeline(material9.getPipelineState(mEngine));
 
                 for (size_t i = 1; i < inoutBloomOptions.levels; i++) {
-                    const size_t parity = 1u - (i & 0x1u);
-                    auto hwDstRT = resources.getRenderPassInfo(parity ? data.outRT[i] : data.stageRT[i]);
+                    auto hwDstRT = resources.getRenderPassInfo(data.outRT[i]);
                     hwDstRT.params.flags.discardStart = TargetBufferFlags::COLOR;
                     hwDstRT.params.flags.discardEnd = TargetBufferFlags::NONE;
-                    mis[parity]->setParameter("level", float(i - 1));
-                    mis[parity]->commit(driver);
-                    mis[parity]->use(driver);
-                    render(hwDstRT, pipeline, driver);
-                }
 
-                for (auto& mi : mis) {
-                    mEngine.destroy(mi);
+                    // if downsampling is a multiple of 2 in each dimension we can use the
+                    // 9 samples filter.
+                    auto vp = resources.getRenderPassInfo(data.outRT[i-1]).params.viewport;
+                    auto* const mi = (vp.width & 1 || vp.height & 1) ? mi13 : mi9;
+                    mi->use(driver);
+                    driver.setMinMaxLevels(hwOut, i - 1, i - 1); // this offsets baseLevel to i-1
+                    render(hwDstRT, pipeline, driver);
                 }
+                driver.setMinMaxLevels(hwOut, 0, inoutBloomOptions.levels - 1);
             });
 
-    FrameGraphId<FrameGraphTexture> output = bloomDownsamplePass->out;
-    FrameGraphId<FrameGraphTexture> stage = bloomDownsamplePass->stage;
+    // output of bloom downsample pass becomes input of next (flare) pass
+    input = bloomDownsamplePass->out;
 
     // flare pass
-    auto flare = flarePass(fg, bloomDownsamplePass->out, width, height, outFormat, inoutBloomOptions);
+    auto flare = flarePass(fg, input, width, height, outFormat, inoutBloomOptions);
 
-    // upsample phase
     auto& bloomUpsamplePass = fg.addPass<BloomPassData>("Bloom Upsample",
             [&](FrameGraph::Builder& builder, auto& data) {
-                data.out = builder.sample(output);
-                data.stage = builder.sample(stage);
+                data.out = builder.sample(input);
                 for (size_t i = 0; i < inoutBloomOptions.levels; i++) {
                     auto out = builder.createSubresource(data.out, "Bloom Out Texture mip",
                             { .level = uint8_t(i) });
-                    auto staging = builder.createSubresource(data.stage,
-                            "Bloom Stage Texture mip", { .level = uint8_t(i) });
                     builder.declareRenderPass(out, &data.outRT[i]);
-                    builder.declareRenderPass(staging, &data.stageRT[i]);
                 }
             },
             [=](FrameGraphResources const& resources, auto const& data, DriverApi& driver) {
-
                 auto hwOut = resources.getTexture(data.out);
-                auto hwStage = resources.getTexture(data.stage);
                 auto const& outDesc = resources.getDescriptor(data.out);
 
                 auto const& material = getPostProcessMaterial("bloomUpsample");
-                auto const* ma = material.getMaterial(mEngine);
-
-                FMaterialInstance* mis[] = {
-                        ma->createInstance("bloomUpsample-ping"),
-                        ma->createInstance("bloomUpsample-pong"),
-                };
-
-                mis[0]->setParameter("source", hwOut, {
+                auto* mi = material.getMaterialInstance(mEngine);
+                mi->setParameter("source", hwOut, {
                         .filterMag = SamplerMagFilter::LINEAR,
-                        .filterMin = SamplerMinFilter::LINEAR_MIPMAP_NEAREST
-                });
-
-                mis[1]->setParameter("source", hwStage, {
-                        .filterMag = SamplerMagFilter::LINEAR,
-                        .filterMin = SamplerMinFilter::LINEAR_MIPMAP_NEAREST
-                });
+                        .filterMin = SamplerMinFilter::LINEAR_MIPMAP_NEAREST});
+                mi->use(driver);
 
                 PipelineState pipeline(material.getPipelineState(mEngine));
                 pipeline.rasterState.blendFunctionSrcRGB = BlendFunction::ONE;
                 pipeline.rasterState.blendFunctionDstRGB = BlendFunction::ONE;
 
                 for (size_t j = inoutBloomOptions.levels, i = j - 1; i >= 1; i--, j++) {
-                    const size_t parity = 1u - (j % 2u);
-
-                    auto hwDstRT = resources.getRenderPassInfo(
-                            parity ? data.outRT[i - 1] : data.stageRT[i - 1]);
+                    auto hwDstRT = resources.getRenderPassInfo(data.outRT[i - 1]);
                     hwDstRT.params.flags.discardStart = TargetBufferFlags::NONE; // b/c we'll blend
                     hwDstRT.params.flags.discardEnd = TargetBufferFlags::NONE;
-
                     auto w = FTexture::valueForLevel(i - 1, outDesc.width);
                     auto h = FTexture::valueForLevel(i - 1, outDesc.height);
-                    mis[parity]->setParameter("resolution", float4{ w, h, 1.0f / w, 1.0f / h });
-                    mis[parity]->setParameter("level", float(i));
-                    mis[parity]->commit(driver);
-                    mis[parity]->use(driver);
+                    mi->setParameter("resolution", float4{ w, h, 1.0f / w, 1.0f / h });
+                    mi->commit(driver);
+                    driver.setMinMaxLevels(hwOut, i, i); // this offsets baseLevel to i
                     render(hwDstRT, pipeline, driver);
                 }
-
-                for (auto& mi : mis) {
-                    mEngine.destroy(mi);
-                }
-
-                // Every other level is missing from the out texture, so we need to do
-                // blits to complete the chain.
-                const SamplerMagFilter filter = SamplerMagFilter::NEAREST;
-                for (size_t i = 1; i < inoutBloomOptions.levels; i += 2) {
-                    auto in = resources.getRenderPassInfo(data.stageRT[i]);
-                    auto out = resources.getRenderPassInfo(data.outRT[i]);
-                    driver.blit(TargetBufferFlags::COLOR, out.target, out.params.viewport,
-                            in.target, in.params.viewport, filter);
-                }
+                driver.setMinMaxLevels(hwOut, 0, inoutBloomOptions.levels - 1);
             });
 
     return { bloomUpsamplePass->out, flare };
@@ -3036,7 +3051,6 @@ FrameGraphId<FrameGraphTexture> PostProcessManager::vsmMipmapPass(FrameGraph& fg
                         .filterMag = SamplerMagFilter::LINEAR,
                         .filterMin = SamplerMinFilter::LINEAR_MIPMAP_NEAREST
                 });
-                mi->setParameter("level", uint32_t(level));
                 mi->setParameter("layer", uint32_t(layer));
                 mi->setParameter("uvscale", 1.0f / float(dim));
                 mi->commit(driver);
diff --git a/filament/src/PostProcessManager.h b/filament/src/PostProcessManager.h
index a379da283fe..ca65c70c74e 100644
--- a/filament/src/PostProcessManager.h
+++ b/filament/src/PostProcessManager.h
@@ -292,6 +292,11 @@ class PostProcessManager {
             FrameGraphId<FrameGraphTexture> input, backend::TextureFormat outFormat,
             BloomOptions& inoutBloomOptions, math::float2 scale) noexcept;
 
+    FrameGraphId<FrameGraphTexture> downscalePass(FrameGraph& fg,
+            FrameGraphId<FrameGraphTexture> input,
+            FrameGraphTexture::Descriptor const& outDesc,
+            bool threshold, float highlight, bool fireflies) noexcept;
+
     void commitAndRender(FrameGraphResources::RenderPassInfo const& out,
             PostProcessMaterial const& material, uint8_t variant,
             backend::DriverApi& driver) const noexcept;
diff --git a/filament/src/ShadowMap.cpp b/filament/src/ShadowMap.cpp
index f3c250e2d9c..a709d6c44e5 100644
--- a/filament/src/ShadowMap.cpp
+++ b/filament/src/ShadowMap.cpp
@@ -343,8 +343,9 @@ ShadowMap::ShaderParameters ShadowMap::updateDirectional(FEngine& engine,
         // For directional lights, we further constraint the light frustum to the
         // intersection of the shadow casters & shadow receivers in light-space.
         // ** This relies on the 1-texel shadow map border **
+
         if (engine.debug.shadowmap.focus_shadowcasters) {
-            intersectWithShadowCasters(lsLightFrustumBounds, WLMpMv, wsShadowCastersVolume);
+            intersectWithShadowCasters(&lsLightFrustumBounds, WLMpMv, wsShadowCastersVolume);
         }
         if (UTILS_UNLIKELY((lsLightFrustumBounds.min.x >= lsLightFrustumBounds.max.x) ||
                            (lsLightFrustumBounds.min.y >= lsLightFrustumBounds.max.y))) {
@@ -356,8 +357,8 @@ ShadowMap::ShaderParameters ShadowMap::updateDirectional(FEngine& engine,
         assert_invariant(lsLightFrustumBounds.min.x < lsLightFrustumBounds.max.x);
         assert_invariant(lsLightFrustumBounds.min.y < lsLightFrustumBounds.max.y);
 
-        s = 2.0f / float2(bounds.max.xy - bounds.min.xy);
-        o = float2(bounds.max.xy + bounds.min.xy) * 0.5f;
+        s = 2.0f / float2(lsLightFrustumBounds.max.xy - lsLightFrustumBounds.min.xy);
+        o = float2(lsLightFrustumBounds.max.xy + lsLightFrustumBounds.min.xy) * 0.5f;
 
         // TODO: we could quantize `s` here to give some stability when lispsm is disabled,
         //       however, the quantization paramater should probably be user settable.
@@ -767,20 +768,12 @@ Aabb ShadowMap::compute2DBounds(const mat4f& lightView,
     return bounds;
 }
 
-Aabb ShadowMap::compute2DBounds(const mat4f& lightView, float4 const& sphere) noexcept {
-    // this assumes a rigid body transform
-    float4 s;
-    s.xyz = (lightView * float4{sphere.xyz, 1.0f}).xyz;
-    s.w = sphere.w;
-    return Aabb{s.xyz - s.w, s.xyz + s.w};
-}
-
-void ShadowMap::intersectWithShadowCasters(Aabb& UTILS_RESTRICT lightFrustum,
+void ShadowMap::intersectWithShadowCasters(Aabb* UTILS_RESTRICT lightFrustum,
         mat4f const& lightView, Aabb const& wsShadowCastersVolume) noexcept {
 
     // construct the Focus transform (scale + offset)
-    const float2 s = 2.0f / float2(lightFrustum.max.xy - lightFrustum.min.xy);
-    const float2 o =   -s * float2(lightFrustum.max.xy + lightFrustum.min.xy) * 0.5f;
+    const float2 s = 2.0f / float2(lightFrustum->max.xy - lightFrustum->min.xy);
+    const float2 o =   -s * float2(lightFrustum->max.xy + lightFrustum->min.xy) * 0.5f;
     const mat4f F(mat4f::row_major_init {
             s.x,  0.0f, 0.0f,    o.x,
             0.0f,  s.y, 0.0f,    o.y,
@@ -806,8 +799,8 @@ void ShadowMap::intersectWithShadowCasters(Aabb& UTILS_RESTRICT lightFrustum,
     Aabb const box = compute2DBounds(lightView, wsClippedShadowCasterVolumeVertices.data(), vertexCount);
 
     // intersect shadow-caster and current light frustum bounds
-    lightFrustum.min.xy = max(box.min.xy, lightFrustum.min.xy);
-    lightFrustum.max.xy = min(box.max.xy, lightFrustum.max.xy);
+    lightFrustum->min.xy = max(box.min.xy, lightFrustum->min.xy);
+    lightFrustum->max.xy = min(box.max.xy, lightFrustum->max.xy);
 }
 
 void ShadowMap::computeFrustumCorners(float3* UTILS_RESTRICT out,
diff --git a/filament/src/ShadowMap.h b/filament/src/ShadowMap.h
index f77ff6bfe83..5aee0163c6f 100644
--- a/filament/src/ShadowMap.h
+++ b/filament/src/ShadowMap.h
@@ -240,10 +240,8 @@ class ShadowMap {
     static inline Aabb compute2DBounds(const math::mat4f& lightView,
             math::float3 const* wsVertices, size_t count) noexcept;
 
-    static inline Aabb compute2DBounds(const math::mat4f& lightView,
-            math::float4 const& sphere) noexcept;
-
-    static inline void intersectWithShadowCasters(Aabb& lightFrustum, const math::mat4f& lightView,
+    static inline void intersectWithShadowCasters(Aabb* lightFrustum,
+            math::mat4f const& lightView,
             Aabb const& wsShadowCastersVolume) noexcept;
 
     static inline math::float2 computeNearFarOfWarpSpace(math::mat4f const& lightView,
diff --git a/filament/src/details/View.cpp b/filament/src/details/View.cpp
index fc3c42e5681..6e1d6252aaa 100644
--- a/filament/src/details/View.cpp
+++ b/filament/src/details/View.cpp
@@ -1080,8 +1080,9 @@ void FView::setSoftShadowOptions(SoftShadowOptions options) noexcept {
 
 void FView::setBloomOptions(BloomOptions options) noexcept {
     options.dirtStrength = math::saturate(options.dirtStrength);
-    options.levels = math::clamp(options.levels, uint8_t(1), uint8_t(11));
-    options.resolution = math::clamp(options.resolution, 1u << options.levels, 2048u);
+    options.resolution = math::clamp(options.resolution, 2u, 2048u);
+    options.levels = math::clamp(options.levels, uint8_t(1),
+            FTexture::maxLevelCount(options.resolution));
     options.anamorphism = math::clamp(options.anamorphism, 1.0f/32.0f, 32.0f);
     options.highlight = std::max(10.0f, options.highlight);
     mBloomOptions = options;
diff --git a/filament/src/materials/bloom/bloomDownsample.mat b/filament/src/materials/bloom/bloomDownsample.mat
index a05a0ca37f8..6553f767a85 100644
--- a/filament/src/materials/bloom/bloomDownsample.mat
+++ b/filament/src/materials/bloom/bloomDownsample.mat
@@ -5,18 +5,6 @@ material {
             type : sampler2d,
             name : source,
             precision: medium
-        },
-        {
-            type : float,
-            name : level
-        },
-        {
-            type : float,
-            name : threshold
-        },
-        {
-            type : float,
-            name : invHighlight
         }
     ],
     variables : [
@@ -37,85 +25,43 @@ fragment {
 
     void dummy(){}
 
-    void threshold(inout vec3 c) {
-        // threshold everything below 1.0
-        c = max(vec3(0.0), c - 1.0);
-        // crush everything above 1
-        highp float f = max3(c);
-        c *= 1.0 / (1.0 + f * materialParams.invHighlight);
-    }
-
     vec3 box4x4(vec3 s0, vec3 s1, vec3 s2, vec3 s3) {
         return (s0 + s1 + s2 + s3) * 0.25;
     }
 
-    vec3 box4x4Reinhard(vec3 s0, vec3 s1, vec3 s2, vec3 s3) {
-        float w0 = 1.0 / (1.0 + max3(s0));
-        float w1 = 1.0 / (1.0 + max3(s1));
-        float w2 = 1.0 / (1.0 + max3(s2));
-        float w3 = 1.0 / (1.0 + max3(s3));
-        return (s0 * w0 + s1 * w1 + s2 * w2 + s3 * w3) * (1.0 / (w0 + w1 + w2 + w3));
-    }
-
     void postProcess(inout PostProcessInputs postProcess) {
-        float lod = materialParams.level;
         highp vec2 uv = variable_vertex.xy;
 
         // see SIGGRAPH 2014: Advances in Real-Time Rendering
         //     "Next Generation Post-Processing in Call of Duty Advanced Warfare"
         //      Jorge Jimenez
-        vec3 c = textureLod(materialParams_source, uv, lod).rgb;
+        vec3 c = textureLod(materialParams_source, uv, 0.0).rgb;
 
         // The offsets below are in "source" texture space
-        vec3 lt  = textureLodOffset(materialParams_source, uv, lod, ivec2(-1, -1)).rgb;
-        vec3 rt  = textureLodOffset(materialParams_source, uv, lod, ivec2( 1, -1)).rgb;
-        vec3 rb  = textureLodOffset(materialParams_source, uv, lod, ivec2( 1,  1)).rgb;
-        vec3 lb  = textureLodOffset(materialParams_source, uv, lod, ivec2(-1,  1)).rgb;
+        vec3 lt  = textureLodOffset(materialParams_source, uv, 0.0, ivec2(-1, -1)).rgb;
+        vec3 rt  = textureLodOffset(materialParams_source, uv, 0.0, ivec2( 1, -1)).rgb;
+        vec3 rb  = textureLodOffset(materialParams_source, uv, 0.0, ivec2( 1,  1)).rgb;
+        vec3 lb  = textureLodOffset(materialParams_source, uv, 0.0, ivec2(-1,  1)).rgb;
 
-        vec3 lt2 = textureLodOffset(materialParams_source, uv, lod, ivec2(-2, -2)).rgb;
-        vec3 rt2 = textureLodOffset(materialParams_source, uv, lod, ivec2( 2, -2)).rgb;
-        vec3 rb2 = textureLodOffset(materialParams_source, uv, lod, ivec2( 2,  2)).rgb;
-        vec3 lb2 = textureLodOffset(materialParams_source, uv, lod, ivec2(-2,  2)).rgb;
+        vec3 lt2 = textureLodOffset(materialParams_source, uv, 0.0, ivec2(-2, -2)).rgb;
+        vec3 rt2 = textureLodOffset(materialParams_source, uv, 0.0, ivec2( 2, -2)).rgb;
+        vec3 rb2 = textureLodOffset(materialParams_source, uv, 0.0, ivec2( 2,  2)).rgb;
+        vec3 lb2 = textureLodOffset(materialParams_source, uv, 0.0, ivec2(-2,  2)).rgb;
 
-        vec3 l   = textureLodOffset(materialParams_source, uv, lod, ivec2(-2,  0)).rgb;
-        vec3 t   = textureLodOffset(materialParams_source, uv, lod, ivec2( 0, -2)).rgb;
-        vec3 r   = textureLodOffset(materialParams_source, uv, lod, ivec2( 2,  0)).rgb;
-        vec3 b   = textureLodOffset(materialParams_source, uv, lod, ivec2( 0,  2)).rgb;
+        vec3 l   = textureLodOffset(materialParams_source, uv, 0.0, ivec2(-2,  0)).rgb;
+        vec3 t   = textureLodOffset(materialParams_source, uv, 0.0, ivec2( 0, -2)).rgb;
+        vec3 r   = textureLodOffset(materialParams_source, uv, 0.0, ivec2( 2,  0)).rgb;
+        vec3 b   = textureLodOffset(materialParams_source, uv, 0.0, ivec2( 0,  2)).rgb;
 
         // five h4x4 boxes
         vec3 c0, c1;
 
-        if (materialParams.level <= 0.5) {
-            if (materialParams.threshold > 0.0) {
-                // Threshold the first level blur
-                threshold(c);
-                threshold(lt);
-                threshold(rt);
-                threshold(rb);
-                threshold(lb);
-                threshold(lt2);
-                threshold(rt2);
-                threshold(rb2);
-                threshold(lb2);
-                threshold(l);
-                threshold(t);
-                threshold(r);
-                threshold(b);
-            }
-            // Also apply fireflies (flickering) filtering
-            c0  = box4x4Reinhard(lt, rt, rb, lb);
-            c1  = box4x4Reinhard(c, l, t, lt2);
-            c1 += box4x4Reinhard(c, r, t, rt2);
-            c1 += box4x4Reinhard(c, r, b, rb2);
-            c1 += box4x4Reinhard(c, l, b, lb2);
-        } else {
-            // common case
-            c0  = box4x4(lt, rt, rb, lb);
-            c1  = box4x4(c, l, t, lt2);
-            c1 += box4x4(c, r, t, rt2);
-            c1 += box4x4(c, r, b, rb2);
-            c1 += box4x4(c, l, b, lb2);
-        }
+        // common case
+        c0  = box4x4(lt, rt, rb, lb);
+        c1  = box4x4(c, l, t, lt2);
+        c1 += box4x4(c, r, t, rt2);
+        c1 += box4x4(c, r, b, rb2);
+        c1 += box4x4(c, l, b, lb2);
 
         // weighted average of the five boxes
         postProcess.color.rgb = c0 * 0.5 + c1 * 0.125;
diff --git a/filament/src/materials/bloom/bloomDownsample2x.mat b/filament/src/materials/bloom/bloomDownsample2x.mat
new file mode 100644
index 00000000000..3af1b28ba4f
--- /dev/null
+++ b/filament/src/materials/bloom/bloomDownsample2x.mat
@@ -0,0 +1,98 @@
+material {
+    name : bloomDownsample2x,
+    parameters : [
+        {
+            type : sampler2d,
+            name : source,
+            precision: medium
+        },
+        {
+            type : float,
+            name : level
+        },
+        {
+            type : float,
+            name : threshold
+        },
+        {
+            type : float,
+            name : fireflies
+        },
+        {
+            type : float,
+            name : invHighlight
+        }
+    ],
+    variables : [
+        vertex
+    ],
+    domain : postprocess,
+    depthWrite : false,
+    depthCulling : false
+}
+
+vertex {
+    void postProcessVertex(inout PostProcessVertexInputs postProcess) {
+        postProcess.vertex.xy = uvToRenderTargetUV(postProcess.normalizedUV);
+    }
+}
+
+fragment {
+
+    void dummy(){}
+
+    void threshold(inout vec3 c) {
+        // threshold everything below 1.0
+        c = max(vec3(0.0), c - 1.0);
+        // crush everything above 1
+        highp float f = max3(c);
+        c *= 1.0 / (1.0 + f * materialParams.invHighlight);
+    }
+
+    void postProcess(inout PostProcessInputs postProcess) {
+        float lod = materialParams.level;
+
+        highp vec2 size = vec2(textureSize(materialParams_source, int(lod)));
+        highp vec2 texelSize = vec2(1.0) / size;
+
+        // Castaño, 2013, "Shadow Mapping Summary Part 1"
+        // 3x3 gaussian filter with 4 linear samples
+        vec2 offset = vec2(0.5);
+        highp vec2 uv = (variable_vertex.xy * size) + offset;
+        highp vec2 base = (floor(uv) - offset) * texelSize;
+        highp vec2 st = fract(uv);
+        vec2 uw = vec2(3.0 - 2.0 * st.x, 1.0 + 2.0 * st.x);
+        vec2 vw = vec2(3.0 - 2.0 * st.y, 1.0 + 2.0 * st.y);
+        highp vec2 u = vec2((2.0 - st.x) / uw.x - 1.0, st.x / uw.y + 1.0) * texelSize.x;
+        highp vec2 v = vec2((2.0 - st.y) / vw.x - 1.0, st.y / vw.y + 1.0) * texelSize.y;
+        vec3 c0 = textureLod(materialParams_source, base + vec2(u.x, v.x), lod).rgb;
+        vec3 c1 = textureLod(materialParams_source, base + vec2(u.y, v.x), lod).rgb;
+        vec3 c2 = textureLod(materialParams_source, base + vec2(u.x, v.y), lod).rgb;
+        vec3 c3 = textureLod(materialParams_source, base + vec2(u.y, v.y), lod).rgb;
+
+        float w0 = uw.x * vw.x * (1.0 / 16.0);
+        float w1 = uw.y * vw.x * (1.0 / 16.0);
+        float w2 = uw.x * vw.y * (1.0 / 16.0);
+        float w3 = uw.y * vw.y * (1.0 / 16.0);
+
+        if (materialParams.fireflies > 0.0) {
+            w0 /= (1.0 + max3(c0));
+            w1 /= (1.0 + max3(c1));
+            w2 /= (1.0 + max3(c2));
+            w3 /= (1.0 + max3(c3));
+            float w = 1.0 / (w0 + w1 + w2 + w3);
+            w0 *= w;
+            w1 *= w;
+            w2 *= w;
+            w3 *= w;
+        }
+
+        vec3 c = c0 * w0 + c1 * w1 + c2 * w2 + c3 * w3;
+
+        if (materialParams.threshold > 0.0) {
+            threshold(c);
+        }
+
+        postProcess.color.rgb = c;
+    }
+}
diff --git a/filament/src/materials/bloom/bloomDownsample9.mat b/filament/src/materials/bloom/bloomDownsample9.mat
new file mode 100644
index 00000000000..7a5fa47b3ec
--- /dev/null
+++ b/filament/src/materials/bloom/bloomDownsample9.mat
@@ -0,0 +1,65 @@
+material {
+    name : bloomDownsample9,
+    parameters : [
+        {
+            type : sampler2d,
+            name : source,
+            precision: medium
+        }
+    ],
+    variables : [
+        vertex
+    ],
+    domain : postprocess,
+    depthWrite : false,
+    depthCulling : false
+}
+
+vertex {
+    void postProcessVertex(inout PostProcessVertexInputs postProcess) {
+        postProcess.vertex.xy = uvToRenderTargetUV(postProcess.normalizedUV);
+    }
+}
+
+fragment {
+
+    void dummy(){}
+
+    // see https://www.shadertoy.com/view/cslczj
+    // 6x6 downsampling kernel implemented via 9 bilinear samples
+
+    void postProcess(inout PostProcessInputs postProcess) {
+        highp vec2 uv = variable_vertex.xy;
+        highp vec2 size = vec2(1.0) / vec2(textureSize(materialParams_source, 0));
+
+        float o  = 1.5 + 0.261629;
+        float wa = 7.46602 / 32.0;
+        float wb = 1.0 - wa * 2.0;
+        float wab = wa * wb;
+        float waa = wa * wa;
+        float wbb = wb * wb;
+
+        size *= o;
+
+        vec3 c  = textureLod(materialParams_source, uv + vec2(0.0)            , 0.0).rgb;
+        vec3 l  = textureLod(materialParams_source, uv + vec2(-size.x,    0.0), 0.0).rgb;
+        vec3 r  = textureLod(materialParams_source, uv + vec2( size.x,    0.0), 0.0).rgb;
+        vec3 b  = textureLod(materialParams_source, uv + vec2(    0.0,-size.y), 0.0).rgb;
+        vec3 t  = textureLod(materialParams_source, uv + vec2(    0.0, size.y), 0.0).rgb;
+        vec3 lb = textureLod(materialParams_source, uv + vec2(-size.x,-size.y), 0.0).rgb;
+        vec3 rb = textureLod(materialParams_source, uv + vec2( size.x,-size.y), 0.0).rgb;
+        vec3 lt = textureLod(materialParams_source, uv + vec2(-size.x, size.y), 0.0).rgb;
+        vec3 rt = textureLod(materialParams_source, uv + vec2( size.x, size.y), 0.0).rgb;
+
+        postProcess.color.rgb =
+                (c * wbb +
+                (l * wab +
+                (r * wab +
+                (b * wab +
+                (t * wab +
+                (lb * waa +
+                (rb * waa +
+                (lt * waa +
+                (rt * waa)))))))));
+    }
+}
diff --git a/filament/src/materials/bloom/bloomUpsample.mat b/filament/src/materials/bloom/bloomUpsample.mat
index b67dc160c8e..9b11a60524a 100644
--- a/filament/src/materials/bloom/bloomUpsample.mat
+++ b/filament/src/materials/bloom/bloomUpsample.mat
@@ -10,10 +10,6 @@ material {
             type : float4,
             name : resolution,
             precision: high
-        },
-        {
-            type : float,
-            name : level
         }
     ],
     variables : [
@@ -34,16 +30,15 @@ fragment {
     void dummy(){}
 
     void postProcess(inout PostProcessInputs postProcess) {
-        float lod = materialParams.level;
         highp vec2 uv = variable_vertex.xy;
 
 #if FILAMENT_QUALITY < FILAMENT_QUALITY_HIGH
         highp vec4 d = vec4(materialParams.resolution.zw, -materialParams.resolution.zw) * 0.5;
         vec3 c;
-        c  = textureLod(materialParams_source, uv + d.zw, lod).rgb;
-        c += textureLod(materialParams_source, uv + d.xw, lod).rgb;
-        c += textureLod(materialParams_source, uv + d.xy, lod).rgb;
-        c += textureLod(materialParams_source, uv + d.zy, lod).rgb;
+        c  = textureLod(materialParams_source, uv + d.zw, 0.0).rgb;
+        c += textureLod(materialParams_source, uv + d.xw, 0.0).rgb;
+        c += textureLod(materialParams_source, uv + d.xy, 0.0).rgb;
+        c += textureLod(materialParams_source, uv + d.zy, 0.0).rgb;
         postProcess.color.rgb = c * 0.25;
 #else
         // see SIGGRAPH 2014: Advances in Real-Time Rendering
@@ -52,15 +47,15 @@ fragment {
         const float radius = 1.0;
         highp vec4 d = vec4(materialParams.resolution.zw, -materialParams.resolution.zw) * radius;
         vec3 c0, c1;
-        c0  = textureLod(materialParams_source, uv + d.zw, lod).rgb;
-        c0 += textureLod(materialParams_source, uv + d.xw, lod).rgb;
-        c0 += textureLod(materialParams_source, uv + d.xy, lod).rgb;
-        c0 += textureLod(materialParams_source, uv + d.zy, lod).rgb;
-        c0 += 4.0 * textureLod(materialParams_source, uv, lod).rgb;
-        c1  = textureLod(materialParams_source, uv + vec2(d.z,  0.0), lod).rgb;
-        c1 += textureLod(materialParams_source, uv + vec2(0.0,  d.w), lod).rgb;
-        c1 += textureLod(materialParams_source, uv + vec2(d.x,  0.0), lod).rgb;
-        c1 += textureLod(materialParams_source, uv + vec2( 0.0, d.y), lod).rgb;
+        c0  = textureLod(materialParams_source, uv + d.zw, 0.0).rgb;
+        c0 += textureLod(materialParams_source, uv + d.xw, 0.0).rgb;
+        c0 += textureLod(materialParams_source, uv + d.xy, 0.0).rgb;
+        c0 += textureLod(materialParams_source, uv + d.zy, 0.0).rgb;
+        c0 += 4.0 * textureLod(materialParams_source, uv, 0.0).rgb;
+        c1  = textureLod(materialParams_source, uv + vec2(d.z,  0.0), 0.0).rgb;
+        c1 += textureLod(materialParams_source, uv + vec2(0.0,  d.w), 0.0).rgb;
+        c1 += textureLod(materialParams_source, uv + vec2(d.x,  0.0), 0.0).rgb;
+        c1 += textureLod(materialParams_source, uv + vec2( 0.0, d.y), 0.0).rgb;
         postProcess.color.rgb = (c0 + 2.0 * c1) * (1.0 / 16.0);
 #endif
     }
diff --git a/filament/src/materials/dof/dofMipmap.mat b/filament/src/materials/dof/dofMipmap.mat
index b22492f463b..9c285309b66 100644
--- a/filament/src/materials/dof/dofMipmap.mat
+++ b/filament/src/materials/dof/dofMipmap.mat
@@ -11,10 +11,6 @@ material {
             name : coc,
             precision: medium
         },
-        {
-            type : int,
-            name : mip
-        },
         {
             type : float,
             name : weightScale
@@ -65,19 +61,18 @@ void postProcess(inout PostProcessInputs postProcess) {
 
     // the bilateral weights need to be scaled by to match the lower resolution
     float weightScale = materialParams.weightScale;
-    float mip = float(materialParams.mip);
 
-    vec4 s01 = textureLodOffset(materialParams_color, uv, mip, ivec2(0, 1));
-    vec4 s11 = textureLodOffset(materialParams_color, uv, mip, ivec2(1, 1));
-    vec4 s10 = textureLodOffset(materialParams_color, uv, mip, ivec2(1, 0));
-    vec4 s00 = textureLodOffset(materialParams_color, uv, mip, ivec2(0, 0));
+    vec4 s01 = textureLodOffset(materialParams_color, uv, 0.0, ivec2(0, 1));
+    vec4 s11 = textureLodOffset(materialParams_color, uv, 0.0, ivec2(1, 1));
+    vec4 s10 = textureLodOffset(materialParams_color, uv, 0.0, ivec2(1, 0));
+    vec4 s00 = textureLodOffset(materialParams_color, uv, 0.0, ivec2(0, 0));
 
     // fetch the 4 corresponding CoC (textureGather with LOD doesn't exist)
     vec4 c;
-    c[0] = textureLodOffset(materialParams_coc, uv, mip, ivec2(0, 1)).r;
-    c[1] = textureLodOffset(materialParams_coc, uv, mip, ivec2(1, 1)).r;
-    c[2] = textureLodOffset(materialParams_coc, uv, mip, ivec2(1, 0)).r;
-    c[3] = textureLodOffset(materialParams_coc, uv, mip, ivec2(0, 0)).r;
+    c[0] = textureLodOffset(materialParams_coc, uv, 0.0, ivec2(0, 1)).r;
+    c[1] = textureLodOffset(materialParams_coc, uv, 0.0, ivec2(1, 1)).r;
+    c[2] = textureLodOffset(materialParams_coc, uv, 0.0, ivec2(1, 0)).r;
+    c[3] = textureLodOffset(materialParams_coc, uv, 0.0, ivec2(0, 0)).r;
 
     float outCoc = downsampleCoC(c);
     vec4 w = downsampleCocWeights(c, outCoc, weightScale);
diff --git a/filament/src/materials/ssao/mipmapDepth.mat b/filament/src/materials/ssao/mipmapDepth.mat
index 74b3835e290..1fb36ab5fe3 100644
--- a/filament/src/materials/ssao/mipmapDepth.mat
+++ b/filament/src/materials/ssao/mipmapDepth.mat
@@ -5,10 +5,6 @@ material {
             type : sampler2d,
             name : depth,
             precision: high
-        },
-        {
-            type : int,
-            name : level
         }
     ],
     variables : [
@@ -30,9 +26,8 @@ fragment {
     // We use a rotated grid sub-sample as it's cheap and gives good results
     // See Scalable Ambient Obscurance by McGuire and al.
     void postProcess(inout PostProcessInputs postProcess) {
-        int level = materialParams.level;
         ivec2 icoord = ivec2(gl_FragCoord.xy);
         postProcess.depth = texelFetch(materialParams_depth,
-                2 * icoord + ivec2(icoord.y & 1, icoord.x & 1), level).r;
+                2 * icoord + ivec2(icoord.y & 1, icoord.x & 1), 0).r;
     }
 }
diff --git a/filament/src/materials/vsmMipmap.mat b/filament/src/materials/vsmMipmap.mat
index 70e65b8f344..5047447bf28 100644
--- a/filament/src/materials/vsmMipmap.mat
+++ b/filament/src/materials/vsmMipmap.mat
@@ -10,10 +10,6 @@ material {
             type : int,
             name : layer
         },
-        {
-            type : int,
-            name : level
-        },
         {
             type : float,
             name : uvscale
@@ -36,7 +32,6 @@ fragment {
     void postProcess(inout PostProcessInputs postProcess) {
         highp vec2 uv = gl_FragCoord.xy * materialParams.uvscale;
         postProcess.color = textureLod(materialParams_color,
-                vec3(uv, materialParams.layer),
-                float(materialParams.level));
+                vec3(uv, materialParams.layer), 0.0);
     }
 }
diff --git a/ios/CocoaPods/Filament.podspec b/ios/CocoaPods/Filament.podspec
index 728c6bc5aa7..eaac850ba15 100644
--- a/ios/CocoaPods/Filament.podspec
+++ b/ios/CocoaPods/Filament.podspec
@@ -1,12 +1,12 @@
 Pod::Spec.new do |spec|
   spec.name = "Filament"
-  spec.version = "1.43.0"
+  spec.version = "1.43.1"
   spec.license = { :type => "Apache 2.0", :file => "LICENSE" }
   spec.homepage = "https://google.github.io/filament"
   spec.authors = "Google LLC."
   spec.summary = "Filament is a real-time physically based rendering engine for Android, iOS, Windows, Linux, macOS, and WASM/WebGL."
   spec.platform = :ios, "11.0"
-  spec.source = { :http => "https://github.com/google/filament/releases/download/v1.43.0/filament-v1.43.0-ios.tgz" }
+  spec.source = { :http => "https://github.com/google/filament/releases/download/v1.43.1/filament-v1.43.1-ios.tgz" }
 
   # Fix linking error with Xcode 12; we do not yet support the simulator on Apple silicon.
   spec.pod_target_xcconfig = {
diff --git a/libs/viewer/src/Settings_generated.cpp b/libs/viewer/src/Settings_generated.cpp
index f9cbbdff6fc..1017b7aa618 100644
--- a/libs/viewer/src/Settings_generated.cpp
+++ b/libs/viewer/src/Settings_generated.cpp
@@ -249,6 +249,8 @@ int parse(jsmntok_t const* tokens, int i, const char* jsonChunk, BloomOptions* o
             i = parse(tokens, i + 1, jsonChunk, &out->enabled);
         } else if (compare(tok, jsonChunk, "highlight") == 0) {
             i = parse(tokens, i + 1, jsonChunk, &out->highlight);
+        } else if (compare(tok, jsonChunk, "quality") == 0) {
+            i = parse(tokens, i + 1, jsonChunk, &out->quality);
         } else if (compare(tok, jsonChunk, "lensFlare") == 0) {
             i = parse(tokens, i + 1, jsonChunk, &out->lensFlare);
         } else if (compare(tok, jsonChunk, "starburst") == 0) {
@@ -291,6 +293,7 @@ std::ostream& operator<<(std::ostream& out, const BloomOptions& in) {
         << "\"threshold\": " << to_string(in.threshold) << ",\n"
         << "\"enabled\": " << to_string(in.enabled) << ",\n"
         << "\"highlight\": " << (in.highlight) << ",\n"
+        << "\"quality\": " << (in.quality) << ",\n"
         << "\"lensFlare\": " << to_string(in.lensFlare) << ",\n"
         << "\"starburst\": " << to_string(in.starburst) << ",\n"
         << "\"chromaticAberration\": " << (in.chromaticAberration) << ",\n"
diff --git a/libs/viewer/src/ViewerGui.cpp b/libs/viewer/src/ViewerGui.cpp
index 6befd6e264f..70fc2a4ac48 100644
--- a/libs/viewer/src/ViewerGui.cpp
+++ b/libs/viewer/src/ViewerGui.cpp
@@ -777,6 +777,10 @@ void ViewerGui::updateUserInterface() {
         ImGui::SliderInt("Levels", &levels, 3, 11);
         mSettings.view.bloom.levels = levels;
 
+        int quality = (int) mSettings.view.bloom.quality;
+        ImGui::SliderInt("Bloom Quality", &quality, 0, 3);
+        mSettings.view.bloom.quality = (View::QualityLevel) quality;
+
         ImGui::Checkbox("Lens Flare", &mSettings.view.bloom.lensFlare);
     }
 
diff --git a/web/filament-js/extensions_generated.js b/web/filament-js/extensions_generated.js
index ae77408a84b..fb26545133e 100644
--- a/web/filament-js/extensions_generated.js
+++ b/web/filament-js/extensions_generated.js
@@ -26,6 +26,7 @@ Filament.loadGeneratedExtensions = function() {
             threshold: true,
             enabled: false,
             highlight: 1000.0,
+            quality: Filament.View$QualityLevel.LOW,
             lensFlare: false,
             starburst: true,
             chromaticAberration: 0.005,
diff --git a/web/filament-js/filament.d.ts b/web/filament-js/filament.d.ts
index 0cd992a629d..f952c0dcc0a 100644
--- a/web/filament-js/filament.d.ts
+++ b/web/filament-js/filament.d.ts
@@ -1253,6 +1253,16 @@ export interface View$BloomOptions {
      * limit highlights to this value before bloom [10, +inf]
      */
     highlight?: number;
+    /**
+     * Bloom quality level.
+     * LOW (default): use a more optimized down-sampling filter, however there can be artifacts
+     *      with dynamic resolution, this can be alleviated by using the homogenous mode.
+     * MEDIUM: Good balance between quality and performance.
+     * HIGH: In this mode the bloom resolution is automatically increased to avoid artifacts.
+     *      This mode can be significantly slower on mobile, especially at high resolution.
+     *      This mode greatly improves the anamorphic bloom.
+     */
+    quality?: View$QualityLevel;
     /**
      * enable screen-space lens flare
      */
diff --git a/web/filament-js/jsbindings_generated.cpp b/web/filament-js/jsbindings_generated.cpp
index f5598830e21..4ef1dd6327a 100644
--- a/web/filament-js/jsbindings_generated.cpp
+++ b/web/filament-js/jsbindings_generated.cpp
@@ -30,6 +30,7 @@ value_object<View::BloomOptions>("View$BloomOptions")
     .field("threshold", &View::BloomOptions::threshold)
     .field("enabled", &View::BloomOptions::enabled)
     .field("highlight", &View::BloomOptions::highlight)
+    .field("quality", &View::BloomOptions::quality)
     .field("lensFlare", &View::BloomOptions::lensFlare)
     .field("starburst", &View::BloomOptions::starburst)
     .field("chromaticAberration", &View::BloomOptions::chromaticAberration)
diff --git a/web/filament-js/package.json b/web/filament-js/package.json
index 6d5c6bb5287..dd9b35c10e4 100644
--- a/web/filament-js/package.json
+++ b/web/filament-js/package.json
@@ -1,6 +1,6 @@
 {
   "name": "filament",
-  "version": "1.43.0",
+  "version": "1.43.1",
   "description": "Real-time physically based rendering engine",
   "main": "filament.js",
   "module": "filament.js",