From 38d54b4b62fd0b6f9c81cca5e688779a8b5fda9e Mon Sep 17 00:00:00 2001 From: gaoxiaojie Date: Fri, 21 Jul 2023 11:28:12 +0800 Subject: [PATCH 1/2] fix(esp32s3): patch Cache_WriteBack_Addr api Need to ensure that the cacheline being written back will not be accessed during the write back process. --- components/esp_rom/CMakeLists.txt | 6 +- .../esp_rom/esp32s3/Kconfig.soc_caps.in | 4 + components/esp_rom/esp32s3/esp_rom_caps.h | 1 + components/esp_rom/esp32s3/ld/esp32s3.rom.ld | 2 +- components/esp_rom/linker.lf | 2 + .../patches/esp_rom_cache_esp32s2_esp32s3.c | 78 ++++++++++- .../patches/esp_rom_cache_writeback_esp32s3.S | 132 ++++++++++++++++++ 7 files changed, 221 insertions(+), 4 deletions(-) create mode 100644 components/esp_rom/patches/esp_rom_cache_writeback_esp32s3.S diff --git a/components/esp_rom/CMakeLists.txt b/components/esp_rom/CMakeLists.txt index 6634f5ffc71..34a3ad078e3 100644 --- a/components/esp_rom/CMakeLists.txt +++ b/components/esp_rom/CMakeLists.txt @@ -46,10 +46,14 @@ if(CONFIG_HAL_WDT_USE_ROM_IMPL) list(APPEND sources "patches/esp_rom_wdt.c") endif() -if(CONFIG_ESP_ROM_HAS_FLASH_COUNT_PAGES_BUG) +if(CONFIG_ESP_ROM_HAS_FLASH_COUNT_PAGES_BUG OR CONFIG_ESP_ROM_HAS_CACHE_WRITEBACK_BUG) list(APPEND sources "patches/esp_rom_cache_esp32s2_esp32s3.c") endif() +if(CONFIG_ESP_ROM_HAS_CACHE_WRITEBACK_BUG) + list(APPEND sources "patches/esp_rom_cache_writeback_esp32s3.S") +endif() + idf_component_register(SRCS ${sources} INCLUDE_DIRS ${include_dirs} PRIV_REQUIRES ${private_required_comp} diff --git a/components/esp_rom/esp32s3/Kconfig.soc_caps.in b/components/esp_rom/esp32s3/Kconfig.soc_caps.in index 3d192660705..30b1f52d169 100644 --- a/components/esp_rom/esp32s3/Kconfig.soc_caps.in +++ b/components/esp_rom/esp32s3/Kconfig.soc_caps.in @@ -78,3 +78,7 @@ config ESP_ROM_HAS_FLASH_COUNT_PAGES_BUG config ESP_ROM_HAS_CACHE_SUSPEND_WAITI_BUG bool default y + +config ESP_ROM_HAS_CACHE_WRITEBACK_BUG + bool + default y diff --git a/components/esp_rom/esp32s3/esp_rom_caps.h b/components/esp_rom/esp32s3/esp_rom_caps.h index f960c350bbc..ee3740b53ab 100644 --- a/components/esp_rom/esp32s3/esp_rom_caps.h +++ b/components/esp_rom/esp32s3/esp_rom_caps.h @@ -25,3 +25,4 @@ #define ESP_ROM_RAM_APP_NEEDS_MMU_INIT (1) // ROM doesn't init cache MMU when it's a RAM APP, needs MMU hal to init #define ESP_ROM_HAS_FLASH_COUNT_PAGES_BUG (1) // ROM api Cache_Count_Flash_Pages will return unexpected value #define ESP_ROM_HAS_CACHE_SUSPEND_WAITI_BUG (1) // ROM api Cache_Suspend_I/DCache and Cache_Freeze_I/DCache_Enable does not waiti +#define ESP_ROM_HAS_CACHE_WRITEBACK_BUG (1) // ROM api Cache_WriteBack_Addr access cacheline being writen back may cause cache hit with wrong value. diff --git a/components/esp_rom/esp32s3/ld/esp32s3.rom.ld b/components/esp_rom/esp32s3/ld/esp32s3.rom.ld index 312343b38f9..3d9234092be 100644 --- a/components/esp_rom/esp32s3/ld/esp32s3.rom.ld +++ b/components/esp_rom/esp32s3/ld/esp32s3.rom.ld @@ -381,7 +381,7 @@ PROVIDE( Cache_WriteBack_Items = 0x40001698 ); PROVIDE( Cache_Op_Addr = 0x400016a4 ); PROVIDE( Cache_Invalidate_Addr = 0x400016b0 ); PROVIDE( Cache_Clean_Addr = 0x400016bc ); -PROVIDE( Cache_WriteBack_Addr = 0x400016c8 ); +PROVIDE( rom_Cache_WriteBack_Addr = 0x400016c8 ); PROVIDE( Cache_Invalidate_ICache_All = 0x400016d4 ); PROVIDE( Cache_Invalidate_DCache_All = 0x400016e0 ); PROVIDE( Cache_Clean_All = 0x400016ec ); diff --git a/components/esp_rom/linker.lf b/components/esp_rom/linker.lf index e9cb68a742e..99ddf6bdd42 100644 --- a/components/esp_rom/linker.lf +++ b/components/esp_rom/linker.lf @@ -4,6 +4,8 @@ entries: esp_rom_spiflash (noflash) if ESP_ROM_HAS_FLASH_COUNT_PAGES_BUG = y: esp_rom_cache_esp32s2_esp32s3 (noflash) + if ESP_ROM_HAS_CACHE_WRITEBACK_BUG = y: + esp_rom_cache_writeback_esp32s3 (noflash) if HEAP_TLSF_USE_ROM_IMPL = y && ESP_ROM_TLSF_CHECK_PATCH = y: esp_rom_tlsf (noflash) if SOC_SYSTIMER_SUPPORTED = y: diff --git a/components/esp_rom/patches/esp_rom_cache_esp32s2_esp32s3.c b/components/esp_rom/patches/esp_rom_cache_esp32s2_esp32s3.c index 062432caad6..9866d652a82 100644 --- a/components/esp_rom/patches/esp_rom_cache_esp32s2_esp32s3.c +++ b/components/esp_rom/patches/esp_rom_cache_esp32s2_esp32s3.c @@ -7,11 +7,16 @@ #include "sdkconfig.h" #include #include "soc/soc_caps.h" +#include "esp_rom_caps.h" #include "soc/extmem_reg.h" +#include "xtensa/xtruntime.h" #if CONFIG_IDF_TARGET_ESP32S3 #include "esp32s3/rom/cache.h" #endif +#define ALIGN_UP(addr, align) (((addr) + (align)-1) & ~((align)-1)) +#define ALIGN_DOWN(addr, align) ((addr) & ~((align) - 1)) + // this api is renamed for patch extern uint32_t rom_Cache_Count_Flash_Pages(uint32_t bus, uint32_t * page0_mapped); uint32_t Cache_Count_Flash_Pages(uint32_t bus, uint32_t * page0_mapped) @@ -30,7 +35,7 @@ uint32_t Cache_Count_Flash_Pages(uint32_t bus, uint32_t * page0_mapped) } extern uint32_t Cache_Count_Flash_Pages(uint32_t bus, uint32_t * page0_mapped); -#if CONFIG_ESP_ROM_HAS_CACHE_SUSPEND_WAITI_BUG +#if ESP_ROM_HAS_CACHE_SUSPEND_WAITI_BUG static inline void Cache_Wait_Idle(int icache) { if (icache) { @@ -63,6 +68,7 @@ uint32_t Cache_Suspend_DCache(void) } extern uint32_t Cache_Suspend_DCache(void); +#if SOC_CACHE_FREEZE_SUPPORTED // renamed for patch extern void rom_Cache_Freeze_ICache_Enable(cache_freeze_mode_t mode); void Cache_Freeze_ICache_Enable(cache_freeze_mode_t mode) @@ -80,4 +86,72 @@ void Cache_Freeze_DCache_Enable(cache_freeze_mode_t mode) Cache_Wait_Idle(0); } extern void Cache_Freeze_DCache_Enable(cache_freeze_mode_t mode); -#endif +#endif //#if SOC_CACHE_FREEZE_SUPPORTED +#endif //#if ESP_ROM_HAS_CACHE_SUSPEND_WAITI_BUG + +#if ESP_ROM_HAS_CACHE_WRITEBACK_BUG +/* Defined in esp_rom_cache_writeback_esp32s3.S */ +extern void cache_writeback_items_freeze(uint32_t addr, uint32_t items); +// renamed for patch +extern int rom_Cache_WriteBack_Addr(uint32_t addr, uint32_t size); +int Cache_WriteBack_Addr(uint32_t addr, uint32_t size) +{ + /* Do special processing for unaligned memory at the start and end of the cache writeback memory. + * 1. Disable the interrupt to prevent the current CPU accessing the same cacheline. + * 2. Enable dcache freeze to prevent the another CPU accessing the same cacheline. + */ + uint32_t irq_status; + uint32_t start_len, end_len; + uint32_t start, end; + uint32_t dcache_line_size; + uint32_t autoload; + int ret = 0; + start = addr; + end = addr + size; + dcache_line_size = Cache_Get_DCache_Line_Size(); + + if (size == 0) { + return 0; + } + + /*the start address is unaligned*/ + if (start & (dcache_line_size -1)) { + addr = ALIGN_UP(start, dcache_line_size); + start_len = addr - start; + size = (size < start_len) ? 0 : (size - start_len); + + /*writeback start unaligned mem, one cacheline*/ + irq_status = XTOS_SET_INTLEVEL(XCHAL_NMILEVEL);//mask all interrupts + cache_writeback_items_freeze(start, 1); + XTOS_RESTORE_INTLEVEL(irq_status); + + if (size == 0) { + return 0; + } + } + + /*the end address is unaligned*/ + if (end & (dcache_line_size -1)) { + end = ALIGN_DOWN(end, dcache_line_size); + end_len = addr + size - end; + size = (size - end_len); + + /*writeback end unaligned mem, one cacheline*/ + irq_status = XTOS_SET_INTLEVEL(XCHAL_NMILEVEL);//mask all interrupts + cache_writeback_items_freeze(end, 1); + XTOS_RESTORE_INTLEVEL(irq_status); + + if (size == 0) { + return 0; + } + } + + /*suspend autoload, avoid load cachelines being written back*/ + autoload = Cache_Suspend_DCache_Autoload(); + ret = rom_Cache_WriteBack_Addr(addr, size); + Cache_Resume_DCache_Autoload(autoload); + + return ret; +} +extern int Cache_WriteBack_Addr(uint32_t addr, uint32_t size); +#endif //#if ESP_ROM_HAS_CACHE_WRITEBACK_BUG diff --git a/components/esp_rom/patches/esp_rom_cache_writeback_esp32s3.S b/components/esp_rom/patches/esp_rom_cache_writeback_esp32s3.S new file mode 100644 index 00000000000..1c87329b0e8 --- /dev/null +++ b/components/esp_rom/patches/esp_rom_cache_writeback_esp32s3.S @@ -0,0 +1,132 @@ +/* + * SPDX-FileCopyrightText: 2022-2023 Espressif Systems (Shanghai) CO LTD + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "sdkconfig.h" +#include "esp_bit_defs.h" +#include "soc/extmem_reg.h" + +/** + * @brief Write back the cache items of DCache, enable cache freeze during writeback. + * Operation will be done CACHE_LINE_SIZE aligned. + * If the region is not in DCache addr room, nothing will be done. + * Please do not call this function in your SDK application. + * @param uint32_t addr: start address to write back + * @param uint32_t items: cache lines to invalidate, items * cache_line_size should + * not exceed the bus address size(4MB) + * + * void cache_writeback_items_freeze(uint32_t addr, uint32_t items) +*/ + +/******************************************************************************* + +This function is a cache write-back function that works around the following +hardware errata on the ESP32-S3: + +- Core X manually triggers (via the EXTMEM_DCACHE_SYNC_CTRL_REG register) the +write-back of one or more cache lines. +- While the write-back is in progress, there are two scenarios that may cause +cache hit error. + - Core X enters the interrupt handler and access the same cache line + being written back. + - Core Y access the same cache line being written back. + +To workaround this errata, the following steps must be taken when manually +triggering a cache write-back: + +- Core X must disable interrupts so that it cannot be preempted +- Core X must freeze the cache (via the EXTMEM_DCACHE_FREEZE_REG register) to +prevent Core Y from accessing the same cache lines that are about to be written +back. +- Core X now triggers the cache write-back. During the write-back... + - If Core Y attempts the access any address in the cache region, Core Y will + busy wait until the cache is unfrozen. + - Core X must ensure that it does not access any address in the cache region, + otherwise Core X will busy wait thus causing a deadlock. +- After the write-back is complete, Core X unfreezes the cache, and reenables +interrupts. + +Notes: + +- Please do not modify this function, it must strictly follow the current execution +sequence, otherwise it may cause unexpected errors. +- This function is written in assmebly to ensure that the function itself never +accesses any cache address while the cache is frozen. Unexpected cache access +could occur if... + - the function triggers an window overflow onto a stack placed in PSRAM. + Thus, we only use two window panes (a0 to a8) in this function and trigger + all window overflows before freezing the cache. + - the function accesses literals/read-only variables placed in Flash. + +*******************************************************************************/ + + .align 4 + /* + Create dedicated literal pool for this function. Mostly used to store out + of range movi transformations. + */ + .literal_position + .global cache_writeback_items_freeze + .type cache_writeback_items_freeze, @function +cache_writeback_items_freeze: + entry sp, 32 + + /* REG_WRITE(EXTMEM_DCACHE_SYNC_ADDR_REG, addr); */ + movi a4, EXTMEM_DCACHE_SYNC_ADDR_REG + s32i a2, a4, 0 + /* REG_WRITE(EXTMEM_DCACHE_SYNC_SIZE_REG, items); */ + movi a4, EXTMEM_DCACHE_SYNC_SIZE_REG + s32i a3, a4, 0 + memw /* About to freeze the cache. Ensure all previous memory R/W are completed */ + + movi a2, EXTMEM_DCACHE_FREEZE_REG + movi a3, EXTMEM_DCACHE_SYNC_CTRL_REG + + /* + REG_CLR_BIT(EXTMEM_DCACHE_FREEZE_REG, EXTMEM_DCACHE_FREEZE_MODE); + REG_SET_BIT(EXTMEM_DCACHE_FREEZE_REG, EXTMEM_DCACHE_FREEZE_ENA); + */ + l32i a4, a2, 0 /* a4 = *(EXTMEM_DCACHE_FREEZE_REG) */ + movi a5, ~(EXTMEM_DCACHE_FREEZE_MODE_M) + and a4, a4, a5 + movi a5, EXTMEM_DCACHE_FREEZE_ENA_M + or a4, a4, a5 + s32i a4, a2, 0 /* *(EXTMEM_DCACHE_FREEZE_REG) = a4 */ + + /* while (!REG_GET_BIT(EXTMEM_DCACHE_FREEZE_REG, EXTMEM_DCACHE_FREEZE_DONE)); */ + movi a5, EXTMEM_DCACHE_FREEZE_DONE_M +_wait_freeze_done: + l32i a4, a2, 0 /* a4 = *(EXTMEM_DCACHE_FREEZE_REG) */ + memw + bnone a4, a5, _wait_freeze_done + + /* REG_SET_BIT(EXTMEM_DCACHE_SYNC_CTRL_REG, EXTMEM_DCACHE_WRITEBACK_ENA); */ + l32i a4, a3, 0 /* a4 = *(EXTMEM_DCACHE_SYNC_CTRL_REG) */ + movi a5, EXTMEM_DCACHE_WRITEBACK_ENA_M + or a4, a4, a5 + s32i a4, a3, 0 /* *(EXTMEM_DCACHE_SYNC_CTRL_REG) = a4 */ + + /* while(!REG_GET_BIT(EXTMEM_DCACHE_SYNC_CTRL_REG, EXTMEM_DCACHE_SYNC_DONE)); */ + movi a5, EXTMEM_DCACHE_SYNC_DONE_M +_wait_writeback_done: + l32i a4, a3, 0 /* a4 = *(EXTMEM_DCACHE_SYNC_CTRL_REG) */ + memw + bnone a4, a5, _wait_writeback_done + + /* REG_CLR_BIT(EXTMEM_DCACHE_FREEZE_REG, EXTMEM_DCACHE_FREEZE_ENA); */ + l32i a4, a2, 0 /* a4 = *(EXTMEM_DCACHE_FREEZE_REG) */ + movi a5, ~(EXTMEM_DCACHE_FREEZE_ENA_M) + and a4, a4, a5 + s32i a4, a2, 0 /* *(EXTMEM_DCACHE_FREEZE_REG) = a4 */ + + /* while (REG_GET_BIT(EXTMEM_DCACHE_FREEZE_REG, EXTMEM_DCACHE_FREEZE_DONE)); */ + movi a5, EXTMEM_DCACHE_FREEZE_DONE_M +_wait_unfreeze_done: + l32i a4, a2, 0 /* a4 = *(EXTMEM_DCACHE_FREEZE_REG) */ + memw + bany a4, a5, _wait_unfreeze_done + + retw + .size cache_writeback_items_freeze, . - cache_writeback_items_freeze From aff298be189f6d3a1b390bda57116a6e1fc43578 Mon Sep 17 00:00:00 2001 From: gaoxiaojie Date: Fri, 21 Jul 2023 17:19:11 +0800 Subject: [PATCH 2/2] fix(cache): no longer use freeze in esp_cache_msync Writeback and invalidation don't need cache to be frozen first --- components/esp_mm/esp_cache.c | 36 ++++------------------------------- 1 file changed, 4 insertions(+), 32 deletions(-) diff --git a/components/esp_mm/esp_cache.c b/components/esp_mm/esp_cache.c index d66e235a2f8..ae625801748 100644 --- a/components/esp_mm/esp_cache.c +++ b/components/esp_mm/esp_cache.c @@ -15,35 +15,10 @@ #include "esp_cache.h" #include "esp_private/critical_section.h" - static const char *TAG = "cache"; #if SOC_CACHE_WRITEBACK_SUPPORTED DEFINE_CRIT_SECTION_LOCK_STATIC(s_spinlock); - -void s_cache_freeze(void) -{ -#if SOC_CACHE_FREEZE_SUPPORTED - cache_hal_freeze(CACHE_TYPE_DATA | CACHE_TYPE_INSTRUCTION); -#endif - - /** - * For writeback supported, but the freeze not supported chip (Now only S2), - * as it's single core, the critical section is enough to prevent preemption from an non-IRAM ISR - */ -} - -void s_cache_unfreeze(void) -{ -#if SOC_CACHE_FREEZE_SUPPORTED - cache_hal_unfreeze(CACHE_TYPE_DATA | CACHE_TYPE_INSTRUCTION); -#endif - - /** - * Similarly, for writeback supported, but the freeze not supported chip (Now only S2), - * we don't need to do more - */ -} #endif //#if SOC_CACHE_WRITEBACK_SUPPORTED @@ -54,9 +29,7 @@ esp_err_t esp_cache_msync(void *addr, size_t size, int flags) #if SOC_CACHE_WRITEBACK_SUPPORTED if ((flags & ESP_CACHE_MSYNC_FLAG_UNALIGNED) == 0) { - esp_os_enter_critical_safe(&s_spinlock); uint32_t data_cache_line_size = cache_hal_get_cache_line_size(CACHE_TYPE_DATA); - esp_os_exit_critical_safe(&s_spinlock); ESP_RETURN_ON_FALSE_ISR(((uint32_t)addr % data_cache_line_size) == 0, ESP_ERR_INVALID_ARG, TAG, "start address isn't aligned with the data cache line size (%d)B", data_cache_line_size); ESP_RETURN_ON_FALSE_ISR((size % data_cache_line_size) == 0, ESP_ERR_INVALID_ARG, TAG, "size isn't aligned with the data cache line size (%d)B", data_cache_line_size); @@ -66,15 +39,14 @@ esp_err_t esp_cache_msync(void *addr, size_t size, int flags) uint32_t vaddr = (uint32_t)addr; esp_os_enter_critical_safe(&s_spinlock); - s_cache_freeze(); - cache_hal_writeback_addr(vaddr, size); + esp_os_exit_critical_safe(&s_spinlock); + if (flags & ESP_CACHE_MSYNC_FLAG_INVALIDATE) { + esp_os_enter_critical_safe(&s_spinlock); cache_hal_invalidate_addr(vaddr, size); + esp_os_exit_critical_safe(&s_spinlock); } - - s_cache_unfreeze(); - esp_os_exit_critical_safe(&s_spinlock); #endif return ESP_OK;