From 30fa27a0455f49ceb8b5f768c313dd4e4bfaa0ae Mon Sep 17 00:00:00 2001 From: David Garske Date: Mon, 9 Dec 2024 17:33:14 -0800 Subject: [PATCH 1/4] Fixes for Xilinx Zynq UltraScale+ MPSoC: * Fixes to support wolfBoot native make and gcc-arm cross compiler. ZD 18159 * Adjust wolfBoot linker script to not use 0 base, instead use end of DDR - 1MB. * Fixed QSPI bare-metal driver for multi-sector and read return code. * Fixed issue with Xilinx XMSS IMAGE_HEADER_SIZE in documentation. It should be 5000 bytes. * Performance optimizations for QSPI: - Allow configuration of SPI clock. - Improve GSPI FIFO TX/RX fill. * Added support for FAST_MEMCPY that supports an aligned 32-bit. * Added Flattened uImage Tree (FIT) image (FDT format). * Added Aarch64 support for FDT fixups. * Added Aarch64 startup to support EL2 with cache/MMU. * Added documentation about exception levels * Moved zynqmp registers to header. * Fix printf uart_writenum "buf" len. * Updated fdt-parser to support saving off larger data images. --- IDE/XilinxSDK/.cproject | 4 +- IDE/XilinxSDK/README.md | 67 +- arch.mk | 55 +- config/examples/sim-tpm-seal.config | 3 + config/examples/zynqmp.config | 13 +- hal/nxp_ls1028a.h | 36 +- hal/nxp_ls1028a.ld | 25 +- hal/zynq.c | 562 ++++-------- hal/zynq.h | 363 ++++++++ hal/zynq.ld | 10 +- include/fdt.h | 9 + include/printf.h | 2 +- src/boot_aarch64.c | 43 +- src/boot_aarch64_start.S | 1306 ++++++++++++++++++++------- src/fdt.c | 152 +++- src/string.c | 15 +- src/update_ram.c | 94 +- stage1/loader_stage1.c | 15 +- tools/fdt-parser/README.md | 35 +- tools/fdt-parser/fdt-parser.c | 138 ++- 20 files changed, 2085 insertions(+), 862 deletions(-) create mode 100644 hal/zynq.h diff --git a/IDE/XilinxSDK/.cproject b/IDE/XilinxSDK/.cproject index 170006ff4..ad35ffc6d 100644 --- a/IDE/XilinxSDK/.cproject +++ b/IDE/XilinxSDK/.cproject @@ -131,7 +131,7 @@ - + @@ -267,7 +267,7 @@ - + diff --git a/IDE/XilinxSDK/README.md b/IDE/XilinxSDK/README.md index 09b3dd970..42b92f9ee 100644 --- a/IDE/XilinxSDK/README.md +++ b/IDE/XilinxSDK/README.md @@ -45,8 +45,51 @@ These template settings are also in this `.cproject` as preprocessor macros. The #define WOLFBOOT_LOAD_DTS_ADDRESS 0x11800000 ``` +The default .cproject build symbols are: + +``` +ARCH_AARCH64 +ARCH_FLASH_OFFSET=0x0 +CORTEX_A53 +DEBUG_ZYNQ=1 +EXT_FLASH=1 +FILL_BYTE=0xFF +IMAGE_HEADER_SIZE=1024 +MMU +NO_QNX +NO_XIP +PART_BOOT_EXT=1 +PART_SWAP_EXT=1 +PART_UPDATE_EXT=1 +TARGET_zynq +WC_HASH_DATA_ALIGNMENT=8 +WOLFBOOT_ARCH_AARCH64 +WOLFBOOT_DUALBOOT +WOLFBOOT_ELF +WOLFBOOT_HASH_SHA3_384 +WOLFBOOT_ORIGIN=0x0 +WOLFBOOT_SHA_BLOCK_SIZE=4096 +WOLFBOOT_SIGN_RSA4096 +WOLFBOOT_UBOOT_LEGACY +``` + Note: If not using Position Independent Code (PIC) the linker script `ldscript.ld` must have the start address offset to match the `WOLFBOOT_LOAD_ADDRESS`. + +## Zynq UltraScale+ ARMv8 Crypto Extensions + +To enable ARM assembly speedups for SHA: + +1) Add these build symbols: + +``` +WOLFSSL_ARMASM +WOLFSSL_ARMASM_INLINE +``` + +2) Add these compiler misc flags: `-mcpu=generic+crypto -mstrict-align -DWOLFSSL_AARCH64_NO_SQRMLSH` + + ## Generate signing key The keygen tool creates an RSA 4096-bit private key (`wolfboot_signing_private_key.der`) and exports the public key to `src/keystore.c` for wolfBoot to use at compile-time as the default root-of-trust. @@ -91,9 +134,13 @@ Xilinx uses a `bootgen` tool for generating a boot binary image that has Xilinx * Use "offset=" option to place the application into a specific location in flash. * Use "load=" option to have FSBL load into specific location in RAM. -Generating a boot.bin (from boot.bif). -Run the Xilinx -> Vitis Shell and cd into the workspace root. +Default install locations for bootgen tools: +* Linux: `/tools/Xilinx/Vitis/2022.1/bin` +* Windows: `C:\Xilinx\Vitis\2022.1\bin` +Open the Vitis Shell from the IDE by using file menu "Xilinx" -> "Vitis Shell". + +Generating a boot.bin (from boot.bif). Example boot.bif in workspace root: ``` @@ -102,11 +149,15 @@ Example boot.bif in workspace root: the_ROM_image: { [bootloader, destination_cpu=a53-0] zcu102\zynqmp_fsbl\fsbl_a53.elf - [destination_cpu=a53-0, exception_level=el-1] wolfboot\Debug\wolfboot.elf + [destination_cpu=a53-0, exception_level=el-2] wolfboot\Debug\wolfboot.elf [destination_cpu=a53-0, partition_owner=uboot, offset=0x800000] hello_world\Debug\hello_world_v1_signed.bin } ``` +You can also use exception level 3 or 1 depending on your needs. + +From the workspace root: + ```sh bootgen -image boot.bif -arch zynqmp -w -o BOOT.bin @@ -184,10 +235,10 @@ Successfully ran Hello World application ``` 6. Build “boot.bin” image: - * `bootgen.exe -image boot.bif -arch zynqmp -o i BOOT.BIN -w` + * `bootgen -image boot.bif -arch zynqmp -o i BOOT.BIN -w` -Note: To generate a report of a boot.bin use the `bootgen_utility`: -`bootgen_utility -arch zynqmp -bin boot.bin -out boot.bin.txt` +Note: To generate a report of a boot.bin use the `bootgen_utility` or after 2022.1 use `bootgen -read`: +`bootgen -arch zynqmp -read BOOT.BIN` ## Post Quantum @@ -207,7 +258,8 @@ WOLFSSL_XMSS_VERIFY_ONLY WOLFSSL_XMSS_MAX_HEIGHT=32 WOLFBOOT_SHA_BLOCK_SIZE=4096 IMAGE_SIGNATURE_SIZE=2500 -IMAGE_HEADER_SIZE=4096 +XMSS_IMAGE_SIGNATURE_SIZE=2500 +IMAGE_HEADER_SIZE=5000 ``` 2) Create and sign image: @@ -300,3 +352,4 @@ Output image(s) successfully created. ### References: * [ZAPP1319](https://www.xilinx.com/support/documentation/application_notes/xapp1319-zynq-usp-prog-nvm.pdf): Programming BBRAM and eFUSEs * [UG1283](https://www.xilinx.com/support/documentation/sw_manuals/xilinx2018_2/ug1283-bootgen-user-guide.pdf): Bootgen User Guide +* [Using Cryptography in Zynq UltraScale MPSoC](https://xilinx-wiki.atlassian.net/wiki/spaces/A/pages/18842541/Using+Cryptography+in+Zynq+UltraScale+MPSoC) diff --git a/arch.mk b/arch.mk index b517062f3..424c9aa1b 100644 --- a/arch.mk +++ b/arch.mk @@ -37,7 +37,7 @@ WOLFCRYPT_OBJS+=./lib/wolfssl/wolfcrypt/src/sha256.o \ ifeq ($(ARCH),x86_64) - CFLAGS+=-DARCH_x86_64 + CFLAGS+=-DARCH_x86_64 -DFAST_MEMCPY ifeq ($(FORCE_32BIT),1) NO_ASM=1 CFLAGS+=-DFORCE_32BIT @@ -65,24 +65,33 @@ endif ## ARM Cortex-A ifeq ($(ARCH),AARCH64) CROSS_COMPILE?=aarch64-none-elf- - CFLAGS+=-DARCH_AARCH64 + CFLAGS+=-DARCH_AARCH64 -DFAST_MEMCPY OBJS+=src/boot_aarch64.o src/boot_aarch64_start.o - ifeq ($(TARGET),nxp_ls1028a) - ARCH_FLAGS=-mcpu=cortex-a72+crypto -march=armv8-a+crypto -mtune=cortex-a72 - CFLAGS+=$(ARCH_FLAGS) -DCORTEX_A72 + ifeq ($(TARGET),zynq) + ARCH_FLAGS=-march=armv8-a+crypto + CFLAGS+=$(ARCH_FLAGS) -DCORTEX_A53 + CFLAGS+=-DNO_QNX + # Support detection and skip of U-Boot legacy header */ + CFLAGS+=-DWOLFBOOT_UBOOT_LEGACY + CFLAGS+=-DWOLFBOOT_DUALBOOT + else + ifeq ($(TARGET),nxp_ls1028a) + ARCH_FLAGS=-mcpu=cortex-a72+crypto -march=armv8-a+crypto -mtune=cortex-a72 + CFLAGS+=$(ARCH_FLAGS) -DCORTEX_A72 - CFLAGS +=-ffunction-sections -fdata-sections - LDFLAGS+=-Wl,--gc-sections + CFLAGS +=-ffunction-sections -fdata-sections + LDFLAGS+=-Wl,--gc-sections - ifeq ($(DEBUG_UART),0) - CFLAGS+=-fno-builtin-printf - endif + ifeq ($(DEBUG_UART),0) + CFLAGS+=-fno-builtin-printf + endif - SPI_TARGET=nxp - else - # By default disable ARM ASM for other targets - NO_ARM_ASM?=1 + SPI_TARGET=nxp + else + # By default disable ARM ASM for other targets + NO_ARM_ASM?=1 + endif endif ifeq ($(SPMATH),1) @@ -523,7 +532,7 @@ endif ifeq ($(ARCH),PPC) CROSS_COMPILE?=powerpc-linux-gnu- LDFLAGS+=-Wl,--build-id=none - CFLAGS+=-DARCH_PPC + CFLAGS+=-DARCH_PPC -DFAST_MEMCPY ifeq ($(DEBUG_UART),0) CFLAGS+=-fno-builtin-printf @@ -789,12 +798,6 @@ ifeq ($(TARGET),nxp_p1021) SPI_TARGET=nxp endif -ifeq ($(TARGET),zynq) - # Support detection and skip of U-Boot legecy header */ - CFLAGS+=-DWOLFBOOT_UBOOT_LEGACY - CFLAGS+=-DWOLFBOOT_DUALBOOT -endif - ifeq ($(TARGET),ti_hercules) # HALCoGen Source and Include? CORTEX_R5=1 @@ -1082,12 +1085,12 @@ ifeq ($(ARCH),AARCH64) CFLAGS+=-DMMU -DWOLFBOOT_DUALBOOT OBJS+=src/fdt.o UPDATE_OBJS:=src/update_ram.o +else + ifeq ($(DUALBANK_SWAP),1) + CFLAGS+=-DWOLFBOOT_DUALBOOT + UPDATE_OBJS:=src/update_flash_hwswap.o + endif endif -ifeq ($(DUALBANK_SWAP),1) - CFLAGS+=-DWOLFBOOT_DUALBOOT - UPDATE_OBJS:=src/update_flash_hwswap.o -endif - # Set default update object (if not library) ifneq ($(TARGET),library) ifeq ($(UPDATE_OBJS),) diff --git a/config/examples/sim-tpm-seal.config b/config/examples/sim-tpm-seal.config index 096ca1d0b..508828bcb 100644 --- a/config/examples/sim-tpm-seal.config +++ b/config/examples/sim-tpm-seal.config @@ -31,6 +31,9 @@ WOLFBOOT_TPM_SEAL?=1 WOLFBOOT_TPM_SEAL_NV_BASE=0x01400300 #WOLFBOOT_TPM_SEAL_AUTH?=SealAuth +# Default image header size is larger to support room for policy +IMAGE_HEADER_SIZE?=512 + # TPM Logging #CFLAGS_EXTRA+=-DDEBUG_WOLFTPM #CFLAGS_EXTRA+=-DWOLFTPM_DEBUG_VERBOSE diff --git a/config/examples/zynqmp.config b/config/examples/zynqmp.config index a67ab205d..c8249b092 100644 --- a/config/examples/zynqmp.config +++ b/config/examples/zynqmp.config @@ -1,5 +1,8 @@ ARCH?=AARCH64 TARGET?=zynq + +WOLFBOOT_VERSION?=0 + # Default to ZCU102 as hardware platform (QSPI sizes) CFLAGS_EXTRA+=-DZCU102 @@ -28,15 +31,17 @@ IMAGE_HEADER_SIZE?=1024 #IMAGE_HEADER_SIZE?=5288 DEBUG?=0 +DEBUG_SYMBOLS=1 DEBUG_UART=1 -#DEBUG_ZYNQ=1 +CFLAGS_EXTRA+=-DDEBUG_ZYNQ=1 +#OPTIMIZATION_LEVEL=2 VTOR?=1 CORTEX_M0?=0 NO_ASM?=0 +NO_ARM_ASM?=0 ALLOW_DOWNGRADE?=0 NVM_FLASH_WRITEONCE?=0 -WOLFBOOT_VERSION?=0 V?=0 SPMATH?=1 RAM_CODE?=0 @@ -48,6 +53,7 @@ SPI_FLASH?=0 NO_XIP=1 USE_GCC=1 ELF?=1 +#DEBUG_ELF?=1 # Flash Sector Size WOLFBOOT_SECTOR_SIZE=0x20000 @@ -73,3 +79,6 @@ CROSS_COMPILE=aarch64-none-elf- # Speed up reads from flash by using larger blocks CFLAGS_EXTRA+=-DWOLFBOOT_SHA_BLOCK_SIZE=4096 + +# QSPI Clock at 0=150MHz, 1=75MHz, 2=37.5MHz (default) +#CFLAGS_EXTRA+=-DGQSPI_CLK_DIV=0 diff --git a/hal/nxp_ls1028a.h b/hal/nxp_ls1028a.h index b0592df77..2192e7b96 100644 --- a/hal/nxp_ls1028a.h +++ b/hal/nxp_ls1028a.h @@ -22,6 +22,27 @@ #ifndef NXP_LS1028A_H #define NXP_LS1028A_H +/* By default expect EL3 at startup */ +#ifndef EL3_SECURE +#define EL3_SECURE 1 +#endif +#ifndef EL2_HYPERVISOR +#define EL2_HYPERVISOR 0 +#endif +#ifndef EL1_NONSECURE +#define EL1_NONSECURE 0 +#endif + +#ifndef HYP_GUEST +/* ZEN Hypervisor guest format support */ +#define HYP_GUEST 0 +#endif + +/* Floating Point Trap Enable */ +#ifndef FPU_TRAP +#define FPU_TRAP 0 +#endif + /* Expose AA64 defines */ #define AA64_TARGET_EL 2 /* Boot to EL2 hypervisor */ @@ -32,7 +53,20 @@ #define AA64GICV3_GITS_BASE GITS_BASE #define AA64GICV3_GITST_BASE GITST_BASE - +/* ID_AA64PFR0_EL1 ARMv8 Processor Feature Register 0*/ +#define ID_AA64PFRO_EL3_MASK (0xF<<12) /* EL3 is implemented: 0x0000 no */ + /* 0x1000 AA64, 0x2000 AA64+AA32 */ +#define ID_AA64PFRO_EL2_MASK (0xF<<8) /* EL2 is implemented: 0x000 no */ + /* 0x100 AA64, 0x200 AA64+AA32 */ +#define ID_AA64PFRO_EL1_MASK (0xF<<4) /* EL1 is implemented: */ + /* 0x10 AA64, 0x20 AA64+AA32 */ +#define ID_AA64PFRO_EL0_MASK (0xF<<0) /* EL0 is implemented: */ + /* 0x1 AA64, 0x2 AA64+AA32 */ +#define ID_AA64PFRO_FGT_MASK (0xFull<<56) /* Fine Grained Traps: */ + /* 0x0 no, !0x0: yes */ +#define TZPCDECPROT0_SET_BASE 0x02200804 +#define TZPCDECPROT1_SET_BASE 0x02200810 +#define OCRAM_TZPC_ADDR 0x02200000 /* LS1028A Reference Manual Rev 0 12/2019 */ diff --git a/hal/nxp_ls1028a.ld b/hal/nxp_ls1028a.ld index ac76670d3..87ca474db 100644 --- a/hal/nxp_ls1028a.ld +++ b/hal/nxp_ls1028a.ld @@ -23,6 +23,9 @@ SECTIONS PROVIDE (_MEMORY_SIZE = LENGTH(OCRAM)); PROVIDE (_FLASH_SIZE = LENGTH(FLASH)); PROVIDE (_STACK_SIZE = 64K); + PROVIDE (_EL0_STACK_SIZE = DEFINED(_EL0_STACK_SIZE) ? _EL0_STACK_SIZE : 1024); + PROVIDE (_EL1_STACK_SIZE = DEFINED(_EL1_STACK_SIZE) ? _EL1_STACK_SIZE : 2048); + PROVIDE (_EL2_STACK_SIZE = DEFINED(_EL2_STACK_SIZE) ? _EL2_STACK_SIZE : 1024); .boot : { @@ -100,10 +103,26 @@ SECTIONS } > OCRAM . = ALIGN(16); - .stack : - { + + .stack (NOLOAD) : { + . = ALIGN(64); _start_stack = .; - . = . + _STACK_SIZE; + _el3_stack_end = .; + . += _STACK_SIZE; + __el3_stack = .; + _el2_stack_end = .; + . += _EL2_STACK_SIZE; + . = ALIGN(64); + __el2_stack = .; + _el1_stack_end = .; + . += _EL1_STACK_SIZE; + . = ALIGN(64); + __el1_stack = .; + _el0_stack_end = .; + . += _EL0_STACK_SIZE; + . = ALIGN(64); + __el0_stack = .; + _end_stack = .; } > OCRAM diff --git a/hal/zynq.c b/hal/zynq.c index fd1b31ac9..9df15e893 100644 --- a/hal/zynq.c +++ b/hal/zynq.c @@ -1,6 +1,6 @@ /* zynq.c * - * Copyright (C) 2021 wolfSSL Inc. + * Copyright (C) 2024 wolfSSL Inc. * * This file is part of wolfBoot. * @@ -19,8 +19,13 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA */ -#include -#include +#ifdef TARGET_zynq + +#include "hal/zynq.h" + +#ifndef ARCH_AARCH64 +# error "wolfBoot zynq HAL: wrong architecture selected. Please compile with ARCH=AARCH64." +#endif #if defined(__QNXNTO__) && !defined(NO_QNX) #define USE_QNX @@ -31,15 +36,19 @@ #include #include "image.h" #include "printf.h" -#ifndef ARCH_AARCH64 -# error "wolfBoot zynq HAL: wrong architecture selected. Please compile with ARCH=AARCH64." -#endif + +#include +#include #ifdef USE_XQSPIPSU /* Xilinx BSP Driver */ #include "xqspipsu.h" + #ifndef QSPI_DEVICE_ID #define QSPI_DEVICE_ID XPAR_XQSPIPSU_0_DEVICE_ID + #endif + #ifndef QSPI_CLK_PRESACALE #define QSPI_CLK_PRESACALE XQSPIPSU_CLK_PRESCALE_8 + #endif #elif defined(USE_QNX) /* QNX QSPI driver */ #include @@ -48,214 +57,6 @@ /* QSPI bare-metal */ #endif - -/* QSPI bare-metal driver */ -#define CORTEXA53_0_CPU_CLK_FREQ_HZ 1099989014 -#define CORTEXA53_0_TIMESTAMP_CLK_FREQ 99998999 - -/* Generic Quad-SPI */ -#define QSPI_BASE 0xFF0F0000UL -#define LQSPI_EN (*((volatile uint32_t*)(QSPI_BASE + 0x14))) /* SPI enable: 0: disable the SPI, 1: enable the SPI */ -#define GQSPI_CFG (*((volatile uint32_t*)(QSPI_BASE + 0x100))) /* configuration register. */ -#define GQSPI_ISR (*((volatile uint32_t*)(QSPI_BASE + 0x104))) /* interrupt status register. */ -#define GQSPI_IER (*((volatile uint32_t*)(QSPI_BASE + 0x108))) /* interrupt enable register. */ -#define GQSPI_IDR (*((volatile uint32_t*)(QSPI_BASE + 0x10C))) /* interrupt disable register. */ -#define GQSPI_IMR (*((volatile uint32_t*)(QSPI_BASE + 0x110))) /* interrupt unmask register. */ -#define GQSPI_EN (*((volatile uint32_t*)(QSPI_BASE + 0x114))) /* enable register. */ -#define GQSPI_TXD (*((volatile uint32_t*)(QSPI_BASE + 0x11C))) /* TX data register. Keyhole addresses for the transmit data FIFO. */ -#define GQSPI_RXD (*((volatile uint32_t*)(QSPI_BASE + 0x120))) /* RX data register. */ -#define GQSPI_TX_THRESH (*((volatile uint32_t*)(QSPI_BASE + 0x128))) /* TXFIFO Threshold Level register: (bits 5:0) Defines the level at which the TX_FIFO_NOT_FULL interrupt is generated */ -#define GQSPI_RX_THRESH (*((volatile uint32_t*)(QSPI_BASE + 0x12C))) /* RXFIFO threshold level register: (bits 5:0) Defines the level at which the RX_FIFO_NOT_EMPTY interrupt is generated */ -#define GQSPI_GPIO (*((volatile uint32_t*)(QSPI_BASE + 0x130))) -#define GQSPI_LPBK_DLY_ADJ (*((volatile uint32_t*)(QSPI_BASE + 0x138))) /* adjusting the internal loopback clock delay for read data capturing */ -#define GQSPI_GEN_FIFO (*((volatile uint32_t*)(QSPI_BASE + 0x140))) /* generic FIFO data register. Keyhole addresses for the generic FIFO. */ -#define GQSPI_SEL (*((volatile uint32_t*)(QSPI_BASE + 0x144))) /* select register. */ -#define GQSPI_FIFO_CTRL (*((volatile uint32_t*)(QSPI_BASE + 0x14C))) /* FIFO control register. */ -#define GQSPI_GF_THRESH (*((volatile uint32_t*)(QSPI_BASE + 0x150))) /* generic FIFO threshold level register: (bits 4:0) Defines the level at which the GEN_FIFO_NOT_FULL interrupt is generated */ -#define GQSPI_POLL_CFG (*((volatile uint32_t*)(QSPI_BASE + 0x154))) /* poll configuration register */ -#define GQSPI_P_TIMEOUT (*((volatile uint32_t*)(QSPI_BASE + 0x158))) /* poll timeout register. */ -#define GQSPI_XFER_STS (*((volatile uint32_t*)(QSPI_BASE + 0x15C))) /* transfer status register. */ -#define QSPI_DATA_DLY_ADJ (*((volatile uint32_t*)(QSPI_BASE + 0x1F8))) /* adjusting the internal receive data delay for read data capturing */ -#define GQSPI_MOD_ID (*((volatile uint32_t*)(QSPI_BASE + 0x1FC))) -#define QSPIDMA_DST_STS (*((volatile uint32_t*)(QSPI_BASE + 0x808))) -#define QSPIDMA_DST_CTRL (*((volatile uint32_t*)(QSPI_BASE + 0x80C))) -#define QSPIDMA_DST_I_STS (*((volatile uint32_t*)(QSPI_BASE + 0x814))) -#define QSPIDMA_DST_CTRL2 (*((volatile uint32_t*)(QSPI_BASE + 0x824))) - -/* GQSPI Registers */ -/* GQSPI_CFG: Configuration registers */ -#define GQSPI_CFG_CLK_POL (1UL << 1) /* Clock polarity outside QSPI word: 0: QSPI clock is quiescent low, 1: QSPI clock is quiescent high */ -#define GQSPI_CFG_CLK_PH (1UL << 2) /* Clock phase: 1: the QSPI clock is inactive outside the word, 0: the QSPI clock is active outside the word */ -/* 000: divide by 2, 001: divide by 4, 010: divide by 8, - 011: divide by 16, 100: divide by 32, 101: divide by 64, - 110: divide by 128, 111: divide by 256 */ -#define GQSPI_CFG_BAUD_RATE_DIV_MASK (7UL << 3) -#define GQSPI_CFG_BAUD_RATE_DIV(d) ((d << 3) & GQSPI_CFG_BAUD_RATE_DIV_MASK) -#define GQSPI_CFG_WP_HOLD (1UL << 19) /* If set, Holdb and WPn pins are actively driven by the qspi controller in 1-bit and 2-bit modes. */ -#define GQSPI_CFG_EN_POLL_TIMEOUT (1UL << 20) /* Poll Timeout Enable: 0: disable, 1: enable */ -#define GQSPI_CFG_ENDIAN (1UL << 26) /* Endian format transmit data register: 0: little endian, 1: big endian */ -#define GQSPI_CFG_START_GEN_FIFO (1UL << 28) /* Trigger Generic FIFO Command Execution: 0:disable executing requests, 1: enable executing requests */ -#define GQSPI_CFG_GEN_FIFO_START_MODE (1UL << 29) /* Start mode of Generic FIFO: 0: Auto Start Mode, 1: Manual Start Mode */ -#define GQSPI_CFG_MODE_EN_MASK (3UL << 30) /* Flash memory interface mode control: 00: IO mode, 10: DMA mode */ -#define GQSPI_CFG_MODE_EN(m) ((m << 30) & GQSPI_CFG_MODE_EN_MASK) -#define GQSPI_CFG_MODE_EN_IO GQSPI_CFG_MODE_EN(0) -#define GQSPI_CFG_MODE_EN_DMA GQSPI_CFG_MODE_EN(2) - -/* GQSPI_ISR / GQSPI_IER / GQSPI_IDR / GQSPI_IMR: Interrupt registers */ -#define GQSPI_IXR_RX_FIFO_EMPTY (1UL << 11) -#define GQSPI_IXR_GEN_FIFO_FULL (1UL << 10) -#define GQSPI_IXR_GEN_FIFO_NOT_FULL (1UL << 9) -#define GQSPI_IXR_TX_FIFO_EMPTY (1UL << 8) -#define GQSPI_IXR_GEN_FIFO_EMPTY (1UL << 7) -#define GQSPI_IXR_RX_FIFO_FULL (1UL << 5) -#define GQSPI_IXR_RX_FIFO_NOT_EMPTY (1UL << 4) -#define GQSPI_IXR_TX_FIFO_FULL (1UL << 3) -#define GQSPI_IXR_TX_FIFO_NOT_FULL (1UL << 2) -#define GQSPI_IXR_POLL_TIME_EXPIRE (1UL << 1) - -#define GQSPI_IXR_ALL_MASK (GQSPI_IXR_POLL_TIME_EXPIRE | GQSPI_IXR_TX_FIFO_NOT_FULL | \ - GQSPI_IXR_TX_FIFO_FULL | GQSPI_IXR_RX_FIFO_NOT_EMPTY | GQSPI_IXR_RX_FIFO_FULL | \ - GQSPI_IXR_GEN_FIFO_EMPTY | GQSPI_IXR_TX_FIFO_EMPTY | GQSPI_IXR_GEN_FIFO_NOT_FULL | \ - GQSPI_IXR_GEN_FIFO_FULL | GQSPI_IXR_RX_FIFO_EMPTY) -#define GQSPI_ISR_WR_TO_CLR_MASK 0x00000002U - -/* GQSPI_GEN_FIFO: FIFO data register */ -/* bits 0-7: Length in bytes (except when GQSPI_GEN_FIFO_EXP_MASK is set length as 255 chunks) */ -#define GQSPI_GEN_FIFO_IMM_MASK (0xFFUL) /* Immediate Data Field */ -#define GQSPI_GEN_FIFO_IMM(imm) (imm & GQSPI_GEN_FIFO_IMM_MASK) -#define GQSPI_GEN_FIFO_DATA_XFER (1UL << 8) /* Indicates IMM is size, otherwise byte is sent directly in IMM reg */ -#define GQSPI_GEN_FIFO_EXP_MASK (1UL << 9) /* Length is Exponent (length / 255) */ -#define GQSPI_GEN_FIFO_MODE_MASK (3UL << 10) -#define GQSPI_GEN_FIFO_MODE(m) ((m << 10) & GQSPI_GEN_FIFO_MODE_MASK) -#define GQSPI_GEN_FIFO_MODE_SPI GQSPI_GEN_FIFO_MODE(1) -#define GQSPI_GEN_FIFO_MODE_DSPI GQSPI_GEN_FIFO_MODE(2) -#define GQSPI_GEN_FIFO_MODE_QSPI GQSPI_GEN_FIFO_MODE(3) -#define GQSPI_GEN_FIFO_CS_MASK (3UL << 12) -#define GQSPI_GEN_FIFO_CS(c) ((c << 12) & GQSPI_GEN_FIFO_CS_MASK) -#define GQSPI_GEN_FIFO_CS_LOWER GQSPI_GEN_FIFO_CS(1) -#define GQSPI_GEN_FIFO_CS_UPPER GQSPI_GEN_FIFO_CS(2) -#define GQSPI_GEN_FIFO_CS_BOTH GQSPI_GEN_FIFO_CS(3) -#define GQSPI_GEN_FIFO_BUS_MASK (3UL << 14) -#define GQSPI_GEN_FIFO_BUS(b) ((b << 14) & GQSPI_GEN_FIFO_BUS_MASK) -#define GQSPI_GEN_FIFO_BUS_LOW GQSPI_GEN_FIFO_BUS(1) -#define GQSPI_GEN_FIFO_BUS_UP GQSPI_GEN_FIFO_BUS(2) -#define GQSPI_GEN_FIFO_BUS_BOTH GQSPI_GEN_FIFO_BUS(3) -#define GQSPI_GEN_FIFO_TX (1UL << 16) -#define GQSPI_GEN_FIFO_RX (1UL << 17) -#define GQSPI_GEN_FIFO_STRIPE (1UL << 18) /* Stripe data across the lower and upper data buses. */ -#define GQSPI_GEN_FIFO_POLL (1UL << 19) - -/* GQSPI_FIFO_CTRL */ -#define GQSPI_FIFO_CTRL_RST_GEN_FIFO (1UL << 0) -#define GQSPI_FIFO_CTRL_RST_TX_FIFO (1UL << 1) -#define GQSPI_FIFO_CTRL_RST_RX_FIFO (1UL << 2) - -/* QSPIDMA_DST_CTRL */ -#define QSPIDMA_DST_CTRL_DEF 0x403FFA00UL -#define QSPIDMA_DST_CTRL2_DEF 0x081BFFF8UL - -/* QSPIDMA_DST_STS */ -#define QSPIDMA_DST_STS_WTC 0xE000U - -/* QSPIDMA_DST_I_STS */ -#define QSPIDMA_DST_I_STS_ALL_MASK 0xFEU - -/* IOP System-level Control */ -#define IOU_SLCR_BASSE 0xFF180000 -#define IOU_TAPDLY_BYPASS (*((volatile uint32_t*)(IOU_SLCR_BASSE + 0x390))) -#define IOU_TAPDLY_BYPASS_LQSPI_RX (1UL << 2) /* LQSPI Tap Delay Enable on Rx Clock signal. 0: enable. 1: disable (bypass tap delay). */ - - -/* Configuration used for bare-metal only */ -#define GQSPI_CLK_FREQ_HZ 124987511 -#define GQSPI_CLK_DIV 2 /* (CLK / (2 << val) = BUS) - DIV 2 = 37.5 MHz */ -#define GQSPI_CS_ASSERT_CLOCKS 5 /* CS Setup Time (tCSS) - num of clock cycles foes in IMM */ -#define GQSPI_FIFO_WORD_SZ 4 -#define GQSPI_TIMEOUT_TRIES 100000 -#define QSPI_FLASH_READY_TRIES 1000 - - -/* QSPI Configuration */ -#ifndef GQSPI_QSPI_MODE -#define GQSPI_QSPI_MODE GQSPI_GEN_FIFO_MODE_QSPI -#endif -#ifndef GQPI_USE_DUAL_PARALLEL -#define GQPI_USE_DUAL_PARALLEL 1 /* 0=no stripe, 1=stripe */ -#endif -#ifndef GQPI_USE_4BYTE_ADDR -#define GQPI_USE_4BYTE_ADDR 1 -#endif -#ifndef GQSPI_DUMMY_READ -#define GQSPI_DUMMY_READ (8*8) /* Number of dummy clock cycles for reads */ -#endif - - - -/* Flash Parameters: - * Micron Serial NOR Flash Memory 64KB Sector Erase MT25QU512ABB - * Stacked device (two 512Mb (64MB)) - * Dual Parallel so total addressable size is double - */ -#ifndef FLASH_DEVICE_SIZE - #ifdef ZCU102 - /* 64*2 (dual parallel) = 128MB */ - #define FLASH_DEVICE_SIZE (2 * 64 * 1024 * 1024) /* MT25QU512ABB */ - #else - /* 128*2 (dual parallel) = 256MB */ - #define FLASH_DEVICE_SIZE (2 * 128 * 1024 * 1024) /* MT25QU01GBBB */ - #endif -#endif -#ifndef FLASH_PAGE_SIZE - #ifdef ZCU102 - #define FLASH_PAGE_SIZE 256 /* MT25QU512ABB */ - #else - #define FLASH_PAGE_SIZE 512 /* MT25QU01GBBB */ - #endif -#endif -#define FLASH_NUM_SECTORS (FLASH_DEVICE_SIZE/WOLFBOOT_SECTOR_SIZE) - - -/* Flash Commands */ -#define WRITE_ENABLE_CMD 0x06U -#define READ_SR_CMD 0x05U -#define WRITE_DISABLE_CMD 0x04U -#define READ_ID_CMD 0x9FU -#define MULTI_IO_READ_ID_CMD 0xAFU -#define READ_FSR_CMD 0x70U -#define ENTER_QSPI_MODE_CMD 0x35U -#define EXIT_QSPI_MODE_CMD 0xF5U -#define ENTER_4B_ADDR_MODE_CMD 0xB7U -#define EXIT_4B_ADDR_MODE_CMD 0xE9U - -#define FAST_READ_CMD 0x0BU -#define DUAL_READ_CMD 0x3BU -#define QUAD_READ_CMD 0x6BU -#define FAST_READ_4B_CMD 0x0CU -#define DUAL_READ_4B_CMD 0x3CU -#define QUAD_READ_4B_CMD 0x6CU - -#define PAGE_PROG_CMD 0x02U -#define DUAL_PROG_CMD 0xA2U -#define QUAD_PROG_CMD 0x22U -#define PAGE_PROG_4B_CMD 0x12U -#define DUAL_PROG_4B_CMD 0x12U -#define QUAD_PROG_4B_CMD 0x34U - -#define SEC_ERASE_CMD 0xD8U -#define SEC_4K_ERASE_CMD 0x20U -#define RESET_ENABLE_CMD 0x66U -#define RESET_MEMORY_CMD 0x99U - -#define WRITE_EN_MASK 0x02 /* 0=Write Enabled, 1=Disabled Write */ -#define FLASH_READY_MASK 0x80 /* 0=Busy, 1=Ready */ - - -/* Return Codes */ -#define GQSPI_CODE_SUCCESS 0 -#define GQSPI_CODE_FAILED -100 -#define GQSPI_CODE_TIMEOUT -101 - - /* QSPI Slave Device Information */ typedef struct QspiDev { uint32_t mode; /* GQSPI_GEN_FIFO_MODE_SPI, GQSPI_GEN_FIFO_MODE_DSPI or GQSPI_GEN_FIFO_MODE_QSPI */ @@ -279,84 +80,35 @@ static int qspi_wait_we(QspiDev_t* dev); static int test_ext_flash(QspiDev_t* dev); #endif -/* eFUSE support */ -#define ZYNQMP_EFUSE_BASE 0xFFCC0000 -#define ZYNQMP_EFUSE_STATUS (*((volatile uint32_t*)(ZYNQMP_EFUSE_BASE + 0x0008))) -#define ZYNQMP_EFUSE_SEC_CTRL (*((volatile uint32_t*)(ZYNQMP_EFUSE_BASE + 0x1058))) -#define ZYNQMP_EFUSE_PPK0_0 (*((volatile uint32_t*)(ZYNQMP_EFUSE_BASE + 0x10A0))) -#define ZYNQMP_EFUSE_PPK0_1 (*((volatile uint32_t*)(ZYNQMP_EFUSE_BASE + 0x10A4))) -#define ZYNQMP_EFUSE_PPK0_2 (*((volatile uint32_t*)(ZYNQMP_EFUSE_BASE + 0x10A8))) -#define ZYNQMP_EFUSE_PPK0_3 (*((volatile uint32_t*)(ZYNQMP_EFUSE_BASE + 0x10AC))) -#define ZYNQMP_EFUSE_PPK0_4 (*((volatile uint32_t*)(ZYNQMP_EFUSE_BASE + 0x10B0))) -#define ZYNQMP_EFUSE_PPK0_5 (*((volatile uint32_t*)(ZYNQMP_EFUSE_BASE + 0x10B4))) -#define ZYNQMP_EFUSE_PPK0_6 (*((volatile uint32_t*)(ZYNQMP_EFUSE_BASE + 0x10B8))) -#define ZYNQMP_EFUSE_PPK0_7 (*((volatile uint32_t*)(ZYNQMP_EFUSE_BASE + 0x10BC))) -#define ZYNQMP_EFUSE_PPK0_8 (*((volatile uint32_t*)(ZYNQMP_EFUSE_BASE + 0x10C0))) -#define ZYNQMP_EFUSE_PPK0_9 (*((volatile uint32_t*)(ZYNQMP_EFUSE_BASE + 0x10C4))) -#define ZYNQMP_EFUSE_PPK0_10 (*((volatile uint32_t*)(ZYNQMP_EFUSE_BASE + 0x10C8))) -#define ZYNQMP_EFUSE_PPK0_11 (*((volatile uint32_t*)(ZYNQMP_EFUSE_BASE + 0x10CC))) - -/* eFUSE STATUS Registers */ -#define ZYNQMP_EFUSE_STATUS_CACHE_DONE (1UL << 5) -#define ZYNQMP_EFUSE_STATUS_CACHE_LOAD (1UL << 4) - -/* eFUSE SEC_CTRL Registers */ -#define ZYNQMP_EFUSE_SEC_CTRL_PPK1_INVLD (3UL << 30) /* Revokes PPK1 */ -#define ZYNQMP_EFUSE_SEC_CTRL_PPK1_WRLK (1UL << 29) /* Locks writing to PPK1 eFuses */ -#define ZYNQMP_EFUSE_SEC_CTRL_PPK0_INVLD (3UL << 27) /* Revokes PPK0 */ -#define ZYNQMP_EFUSE_SEC_CTRL_PPK0_WRLK (1UL << 26) /* Locks writing to PPK0 eFuses */ -#define ZYNQMP_EFUSE_SEC_CTRL_RSA_EN (15UL << 11) /* Enables RSA Authentication during boot. All boots must be authenticated */ -#define ZYNQMP_EFUSE_SEC_CTRL_SEC_LOCK (1UL << 10) /* Disables the reboot into JTAG mode when doing a secure lockdown. */ -#define ZYNQMP_EFUSE_SEC_CTRL_JTAG_DIS (1UL << 5) /* Disables the JTAG controller. The only instructions available are BYPASS and IDCODE. */ -#define ZYNQMP_EFUSE_SEC_CTRL_ENC_ONLY (1UL << 2) /* Requires all boots to be encrypted using the eFuse key. */ -#define ZYNQMP_EFUSE_SEC_CTRL_AES_WRLK (1UL << 1) /* Locks writing to the AES key section of eFuse */ -#define ZYNQMP_EFUSE_SEC_CTRL_AES_RDLK (1UL << 0) /* Locks the AES key CRC check function */ - - #ifdef DEBUG_UART -/* UART Support for Debugging */ -#define ZYNQMP_UART0_BASE 0xFF000000 -#define ZYNQMP_UART1_BASE 0xFF010000 - -#define ZYNQMP_UART_CR (*((volatile uint32_t*)(DEBUG_UART_BASE + 0x0))) -#define ZYNQMP_UART_MR (*((volatile uint32_t*)(DEBUG_UART_BASE + 0x4))) -#define ZYNQMP_UART_SR (*((volatile uint32_t*)(DEBUG_UART_BASE + 0x2C))) -#define ZYNQMP_UART_FIFO (*((volatile uint32_t*)(DEBUG_UART_BASE + 0x30))) -#define ZYNQMP_UART_BR_GEN (*((volatile uint32_t*)(DEBUG_UART_BASE + 0x18))) /* 2 - 65535: baud_sample */ -#define ZYNQMP_UART_BR_DIV (*((volatile uint32_t*)(DEBUG_UART_BASE + 0x34))) /* 4 - 255: Baud rate */ - -/* UART Control Registers */ -#define ZYNQMP_UART_CR_TX_EN 0x00000010 /* TX enabled */ -#define ZYNQMP_UART_CR_RX_EN 0x00000004 /* RX enabled */ -#define ZYNQMP_UART_CR_TXRST 0x00000002 /* TX logic reset */ -#define ZYNQMP_UART_CR_RXRST 0x00000001 /* RX logic reset */ - -/* UART Mode Registers */ -#define ZYNQMP_UART_MR_PARITY_NONE 0x00000020 /* No parity */ - -/* UART Channel Status Register (read only) */ -#define ZYNQMP_UART_SR_TXFULL 0x00000010U /* TX FIFO full */ -#define ZYNQMP_UART_SR_TXEMPTY 0x00000008U /* TX FIFO empty */ -#define ZYNQMP_UART_SR_RXFULL 0x00000004U /* RX FIFO full */ -#define ZYNQMP_UART_SR_RXEMPTY 0x00000002U /* RX FIFO empty */ - -/* UART Configuration */ -#define UART_MASTER_CLOCK 100000000 -#define DEBUG_UART_BASE ZYNQMP_UART1_BASE -#define DEBUG_UART_BAUD 115200 -#define DEBUG_UART_DIV 4 - void uart_init(void) { - /* Enable TX/RX and Reset */ - ZYNQMP_UART_CR = (ZYNQMP_UART_CR_TX_EN | ZYNQMP_UART_CR_RX_EN | - ZYNQMP_UART_CR_TXRST | ZYNQMP_UART_CR_RXRST); + /* Disable Interrupts */ + ZYNQMP_UART_IDR = ZYNQMP_UART_ISR_MASK; + /* Disable TX/RX */ + ZYNQMP_UART_CR = (ZYNQMP_UART_CR_TX_DIS | ZYNQMP_UART_CR_RX_DIS); + /* Clear ISR */ + ZYNQMP_UART_ISR = ZYNQMP_UART_ISR_MASK; + /* 8-bits, no parity */ ZYNQMP_UART_MR = ZYNQMP_UART_MR_PARITY_NONE; + /* FIFO Trigger Level */ + ZYNQMP_UART_RXWM = 32; /* half of 64 byte FIFO */ + ZYNQMP_UART_TXWM = 32; /* half of 64 byte FIFO */ + + /* RX Timeout - disable */ + ZYNQMP_UART_RXTOUT = 0; + /* baud (115200) = master clk / (BR_GEN * (BR_DIV + 1)) */ + ZYNQMP_UART_BR_GEN = UART_MASTER_CLOCK / (DEBUG_UART_BAUD * (DEBUG_UART_DIV+1)); ZYNQMP_UART_BR_DIV = DEBUG_UART_DIV; - ZYNQMP_UART_BR_GEN = UART_MASTER_CLOCK / DEBUG_UART_BAUD / (DEBUG_UART_DIV+1); + + /* Reset TX/RX */ + ZYNQMP_UART_CR = (ZYNQMP_UART_CR_TXRST | ZYNQMP_UART_CR_RXRST); + + /* Enable TX/RX */ + ZYNQMP_UART_CR = (ZYNQMP_UART_CR_TX_EN | ZYNQMP_UART_CR_RX_EN); } void uart_write(const char* buf, uint32_t sz) @@ -366,10 +118,10 @@ void uart_write(const char* buf, uint32_t sz) char c = buf[pos++]; if (c == '\n') { /* handle CRLF */ while (ZYNQMP_UART_SR & ZYNQMP_UART_SR_TXFULL); - ZYNQMP_UART_SR = '\r'; + ZYNQMP_UART_FIFO = '\r'; } while (ZYNQMP_UART_SR & ZYNQMP_UART_SR_TXFULL); - ZYNQMP_UART_SR = c; + ZYNQMP_UART_FIFO = c; } /* Wait till TX Fifo is empty */ while (!(ZYNQMP_UART_SR & ZYNQMP_UART_SR_TXEMPTY)); @@ -558,9 +310,7 @@ static int qspi_gen_fifo_write(uint32_t reg_genfifo) static int gspi_fifo_tx(const uint8_t* data, uint32_t sz) { - uint32_t tmp32, txSz; - uint8_t* txData = (uint8_t*)&tmp32; - + uint32_t tmp32; while (sz > 0) { /* Wait for TX FIFO not full */ if (qspi_isr_wait(GQSPI_IXR_TX_FIFO_FULL, GQSPI_IXR_TX_FIFO_FULL)) { @@ -568,57 +318,44 @@ static int gspi_fifo_tx(const uint8_t* data, uint32_t sz) } /* Write data */ - txSz = sz; - if (txSz > GQSPI_FIFO_WORD_SZ) - txSz = GQSPI_FIFO_WORD_SZ; - tmp32 = 0; - memcpy(txData, data, txSz); - sz -= txSz; - data += txSz; - - #if defined(DEBUG_ZYNQ) && DEBUG_ZYNQ >= 3 - wolfBoot_printf("TXD=%08x\n", tmp32); - #endif - GQSPI_TXD = tmp32; + if (sz >= 4) { + GQSPI_TXD = *(uint32_t*)data; + data += 4; + sz -= 4; + } + else { + tmp32 = 0; + memcpy(&tmp32, data, sz); + GQSPI_TXD = tmp32; + sz = 0; + } } return GQSPI_CODE_SUCCESS; } static int gspi_fifo_rx(uint8_t* data, uint32_t sz, uint32_t discardSz) { - uint32_t tmp32, rxSz; - uint8_t* rxData = (uint8_t*)&tmp32; - + uint32_t tmp32; while (sz > 0) { /* Wait for RX FIFO not empty */ if (qspi_isr_wait(GQSPI_IXR_RX_FIFO_NOT_EMPTY, 0)) { return GQSPI_CODE_TIMEOUT; } - - /* Read data */ - tmp32 = GQSPI_RXD; - #if defined(DEBUG_ZYNQ) && DEBUG_ZYNQ >= 3 - wolfBoot_printf("RXD=%08x\n", tmp32); - if (discardSz > 0) - wolfBoot_printf("Discard %d\n", discardSz); - #endif if (discardSz >= GQSPI_FIFO_WORD_SZ) { + tmp32 = GQSPI_RXD; /* discard */ discardSz -= GQSPI_FIFO_WORD_SZ; continue; } - - rxSz = sz; - if (rxSz > GQSPI_FIFO_WORD_SZ) - rxSz = GQSPI_FIFO_WORD_SZ; - if (rxSz > discardSz) { - rxSz -= discardSz; - sz -= discardSz; + if (sz >= 4) { + *(uint32_t*)data = GQSPI_RXD; + data += 4; + sz -= 4; + } + else { + tmp32 = GQSPI_RXD; + memcpy(data, &tmp32, sz); + sz = 0; } - memcpy(data, rxData + discardSz, rxSz); - discardSz = 0; - - sz -= rxSz; - data += rxSz; } return GQSPI_CODE_SUCCESS; } @@ -778,14 +515,13 @@ static int qspi_transfer(QspiDev_t* pDev, return ret; } - -#endif +#endif /* QSPI Implementation */ static int qspi_flash_read_id(QspiDev_t* dev, uint8_t* id, uint32_t idSz) { int ret; uint8_t cmd[20]; /* size multiple of uint32_t */ - uint8_t status; + uint8_t status = 0; memset(cmd, 0, sizeof(cmd)); cmd[0] = MULTI_IO_READ_ID_CMD; @@ -815,7 +551,7 @@ static int qspi_write_enable(QspiDev_t* dev) { int ret; uint8_t cmd[4]; /* size multiple of uint32_t */ - uint8_t status; + uint8_t status = 0; memset(cmd, 0, sizeof(cmd)); cmd[0] = WRITE_ENABLE_CMD; @@ -990,7 +726,7 @@ static int qspi_exit_4byte_addr(QspiDev_t* dev) void qspi_init(uint32_t cpu_clock, uint32_t flash_freq) { int ret; - uint32_t reg_cfg; + uint32_t reg_cfg, reg_isr; uint8_t id_low[4]; #if GQPI_USE_DUAL_PARALLEL == 1 uint8_t id_hi[4]; @@ -1037,9 +773,10 @@ void qspi_init(uint32_t cpu_clock, uint32_t flash_freq) GQSPI_SEL = 1; /* Clear and disable interrupts */ - reg_cfg = GQSPI_ISR; + reg_isr = GQSPI_ISR; GQSPI_ISR |= GQSPI_ISR_WR_TO_CLR_MASK; /* Clear poll timeout counter interrupt */ - QSPIDMA_DST_I_STS = QSPIDMA_DST_I_STS; /* clear all active interrupts */ + reg_cfg = QSPIDMA_DST_I_STS; + QSPIDMA_DST_I_STS = reg_cfg; /* clear all active interrupts */ QSPIDMA_DST_STS |= QSPIDMA_DST_STS_WTC; /* mark outstanding DMA's done */ GQSPI_IDR = GQSPI_IXR_ALL_MASK; /* disable interrupts */ QSPIDMA_DST_I_STS = QSPIDMA_DST_I_STS_ALL_MASK; /* disable interrupts */ @@ -1047,7 +784,7 @@ void qspi_init(uint32_t cpu_clock, uint32_t flash_freq) if (GQSPI_ISR & GQSPI_IXR_RX_FIFO_EMPTY) { GQSPI_FIFO_CTRL |= (GQSPI_FIFO_CTRL_RST_TX_FIFO | GQSPI_FIFO_CTRL_RST_RX_FIFO); } - if (reg_cfg & GQSPI_IXR_RX_FIFO_EMPTY) { + if (reg_isr & GQSPI_IXR_RX_FIFO_EMPTY) { GQSPI_FIFO_CTRL |= GQSPI_FIFO_CTRL_RST_RX_FIFO; } @@ -1061,10 +798,26 @@ void qspi_init(uint32_t cpu_clock, uint32_t flash_freq) reg_cfg &= ~(GQSPI_CFG_CLK_POL | GQSPI_CFG_CLK_PH); /* Use POL=0,PH=0 */ GQSPI_CFG = reg_cfg; - /* use tap delay bypass < 40MHz SPI clock */ +#if GQSPI_CLK_DIV >= 2 /* 300/8=37.5MHz */ + /* At 40 MHz, the Quad-SPI controller should be in non-loopback mode with + * the clock and data tap delays bypassed. */ IOU_TAPDLY_BYPASS |= IOU_TAPDLY_BYPASS_LQSPI_RX; GQSPI_LPBK_DLY_ADJ = 0; QSPI_DATA_DLY_ADJ = 0; +#elif GQSPI_CLK_DIV >= 1 /* 300/4=75MHz */ + /* At 100 MHz, the Quad-SPI controller should be in clock loopback mode + * with the clock tap delay bypassed, but the data tap delay enabled. */ + IOU_TAPDLY_BYPASS |= IOU_TAPDLY_BYPASS_LQSPI_RX; + GQSPI_LPBK_DLY_ADJ = GQSPI_LPBK_DLY_ADJ_USE_LPBK; + QSPI_DATA_DLY_ADJ = QSPI_DATA_DLY_ADJ_USE_DATA_DLY | QSPI_DATA_DLY_ADJ_DATA_DLY_ADJ(2); +#else + /* At 150 MHz, only the generic controller can be used. + * The generic controller should be in clock loopback mode and the clock + * tap delay enabled, but the data tap delay disabled. */ + IOU_TAPDLY_BYPASS = 0; + GQSPI_LPBK_DLY_ADJ = GQSPI_LPBK_DLY_ADJ_USE_LPBK; + QSPI_DATA_DLY_ADJ = 0; +#endif /* Initialize hardware parameters for Threshold and Interrupts */ GQSPI_TX_THRESH = 1; @@ -1082,6 +835,7 @@ void qspi_init(uint32_t cpu_clock, uint32_t flash_freq) GQSPI_EN = 1; /* Enable Device */ #endif /* USE_QNX */ (void)reg_cfg; + (void)reg_isr; /* ------ Flash Read ID (retry) ------ */ timeout = 0; @@ -1138,34 +892,20 @@ void qspi_init(uint32_t cpu_clock, uint32_t flash_freq) #endif } - -void zynq_init(uint32_t cpu_clock) +#if 0 +uint64_t hal_timer_ms(void) { - qspi_init(cpu_clock, 0); + uint64_t val; + unsigned long cntfrq; + unsigned long cntpct; + asm volatile("mrs %0, cntfrq_el0" : "=r" (cntfrq)); + asm volatile("mrs %0, cntpct_el0" : "=r" (cntpct)); + val = cntpct * 1000; + val /= cntfrq; + return val; } - -void zynq_exit(void) -{ - int ret; - -#if GQPI_USE_4BYTE_ADDR == 1 - /* Exit 4-byte address mode */ - ret = qspi_exit_4byte_addr(&mDev); - if (ret != GQSPI_CODE_SUCCESS) - return; #endif -#ifdef USE_QNX - if (mDev.qnx) { - xzynq_qspi_close(mDev.qnx); - mDev.qnx = NULL; - } -#endif - - (void)ret; -} - - /* public HAL functions */ void hal_init(void) { @@ -1177,17 +917,29 @@ void hal_init(void) #endif wolfBoot_printf(bootMsg); -#ifdef USE_BUILTIN_STARTUP /* Vitis is EL-3 */ +#if 0 /* This is only allowed for EL-3 */ asm volatile("msr cntfrq_el0, %0" : : "r" (cpu_freq) : "memory"); #endif - zynq_init(cpu_freq); + qspi_init(cpu_freq, 0); } void hal_prepare_boot(void) { - zynq_exit(); +#if GQPI_USE_4BYTE_ADDR == 1 + /* Exit 4-byte address mode */ + int ret = qspi_exit_4byte_addr(&mDev); + if (ret != GQSPI_CODE_SUCCESS) + return; +#endif + +#ifdef USE_QNX + if (mDev.qnx) { + xzynq_qspi_close(mDev.qnx); + mDev.qnx = NULL; + } +#endif } /* Flash functions must be relocated to RAM for execution */ @@ -1216,46 +968,54 @@ int RAMFUNCTION ext_flash_write(uintptr_t address, const uint8_t *data, int len) { int ret = 0; uint8_t cmd[8]; /* size multiple of uint32_t */ - uint32_t xferSz, page, pages, idx = 0; + uint32_t xferSz, page, pages, idx; uintptr_t addr; +#if defined(DEBUG_ZYNQ) && DEBUG_ZYNQ >= 2 + wolfBoot_printf("Flash Write: Addr 0x%x, Ptr %p, Len %d\n", + address, data, len); +#endif + /* write by page */ pages = ((len + (FLASH_PAGE_SIZE-1)) / FLASH_PAGE_SIZE); for (page = 0; page < pages; page++) { ret = qspi_write_enable(&mDev); - if (ret == GQSPI_CODE_SUCCESS) { - xferSz = len; - if (xferSz > FLASH_PAGE_SIZE) - xferSz = FLASH_PAGE_SIZE; - - addr = address + (page * FLASH_PAGE_SIZE); - if (mDev.stripe) { - /* For dual parallel the address divide by 2 */ - addr /= 2; - } + if (ret != GQSPI_CODE_SUCCESS) { + break; + } + xferSz = len; + if (xferSz > FLASH_PAGE_SIZE) + xferSz = FLASH_PAGE_SIZE; + + addr = address + (page * FLASH_PAGE_SIZE); + if (mDev.stripe) { + /* For dual parallel the address divide by 2 */ + addr /= 2; + } - /* ------ Write Flash (page at a time) ------ */ - memset(cmd, 0, sizeof(cmd)); - cmd[idx++] = PAGE_PROG_CMD; - #if GQPI_USE_4BYTE_ADDR == 1 - cmd[idx++] = ((addr >> 24) & 0xFF); - #endif - cmd[idx++] = ((addr >> 16) & 0xFF); - cmd[idx++] = ((addr >> 8) & 0xFF); - cmd[idx++] = ((addr >> 0) & 0xFF); - ret = qspi_transfer(&mDev, cmd, idx, - (const uint8_t*)(data + (page * FLASH_PAGE_SIZE)), - xferSz, NULL, 0, 0, GQSPI_GEN_FIFO_MODE_SPI); - wolfBoot_printf("Flash Page %d Write: Ret %d\n", page, ret); - if (ret != GQSPI_CODE_SUCCESS) - break; + /* ------ Write Flash (page at a time) ------ */ + memset(cmd, 0, sizeof(cmd)); + idx = 0; + cmd[idx++] = PAGE_PROG_CMD; + #if GQPI_USE_4BYTE_ADDR == 1 + cmd[idx++] = ((addr >> 24) & 0xFF); + #endif + cmd[idx++] = ((addr >> 16) & 0xFF); + cmd[idx++] = ((addr >> 8) & 0xFF); + cmd[idx++] = ((addr >> 0) & 0xFF); + ret = qspi_transfer(&mDev, cmd, idx, + (const uint8_t*)(data + (page * FLASH_PAGE_SIZE)), + xferSz, NULL, 0, 0, GQSPI_GEN_FIFO_MODE_SPI); + wolfBoot_printf("Flash Page %d Write: Ret %d\n", page, ret); + if (ret != GQSPI_CODE_SUCCESS) + break; - ret = qspi_wait_ready(&mDev); /* Wait for not busy */ - if (ret != GQSPI_CODE_SUCCESS) { - break; - } - qspi_write_disable(&mDev); + ret = qspi_wait_ready(&mDev); /* Wait for not busy */ + if (ret != GQSPI_CODE_SUCCESS) { + break; } + qspi_write_disable(&mDev); + len -= xferSz; } return ret; @@ -1281,6 +1041,11 @@ int RAMFUNCTION ext_flash_read(uintptr_t address, uint8_t *data, int len) uint8_t cmd[8]; /* size multiple of uint32_t */ uint32_t idx = 0; +#if defined(DEBUG_ZYNQ) && DEBUG_ZYNQ >= 2 + wolfBoot_printf("Flash Read: Addr 0x%x, Ptr %p, Len %d\n", + address, data, len); +#endif + if (mDev.stripe) { /* For dual parallel the address divide by 2 */ address /= 2; @@ -1312,6 +1077,10 @@ int RAMFUNCTION ext_flash_erase(uintptr_t address, int len) uint32_t idx = 0; uintptr_t qspiaddr; +#if defined(DEBUG_ZYNQ) && DEBUG_ZYNQ >= 2 + wolfBoot_printf("Flash Erase: Addr 0x%x, Len %d\n", address, len); +#endif + while (len > 0) { /* For dual parallel the address divide by 2 */ qspiaddr = (mDev.stripe) ? address / 2 : address; @@ -1358,6 +1127,13 @@ void* hal_get_dts_address(void) { return (void*)WOLFBOOT_DTS_BOOT_ADDRESS; } + +int hal_dts_fixup(void* dts_addr) +{ + /* place FDT fixup specific to ZynqMP here */ + //fdt_set_boot_cpuid_phys(buf, fdt_boot_cpuid_phys(fdt)); + return 0; +} #endif @@ -1369,7 +1145,7 @@ static int test_ext_flash(QspiDev_t* dev) { int ret; uint32_t i; - uint8_t pageData[FLASH_PAGE_SIZE]; + uint8_t pageData[FLASH_PAGE_SIZE*4]; #ifndef TEST_FLASH_READONLY /* Erase sector */ @@ -1403,3 +1179,5 @@ static int test_ext_flash(QspiDev_t* dev) return ret; } #endif /* TEST_EXT_FLASH */ + +#endif /* TARGET_zynq */ diff --git a/hal/zynq.h b/hal/zynq.h new file mode 100644 index 000000000..22b728560 --- /dev/null +++ b/hal/zynq.h @@ -0,0 +1,363 @@ +/* zynq.h + * + * Copyright (C) 2024 wolfSSL Inc. + * + * This file is part of wolfBoot. + * + * wolfBoot is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * wolfBoot is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +#ifndef _ZYNQMP_H_ +#define _ZYNQMP_H_ + +/* By default expect EL2 at startup */ +#ifndef EL3_SECURE +#define EL3_SECURE 0 +#endif +#ifndef EL2_HYPERVISOR +#define EL2_HYPERVISOR 1 +#endif +#ifndef EL1_NONSECURE +#define EL1_NONSECURE 0 +#endif + +#ifndef HYP_GUEST +/* ZEN Hypervisor guest format support */ +#define HYP_GUEST 0 +#endif + +/* Floating Point Trap Enable */ +#ifndef FPU_TRAP +#define FPU_TRAP 0 +#endif + +/* Errata: 855873: An eviction might overtake a cache clean operation */ +#define CONFIG_ARM_ERRATA_855873 1 + +#define XPAR_PSU_DDR_0_S_AXI_BASEADDR 0x00000000 +#define XPAR_PSU_DDR_0_S_AXI_HIGHADDR 0x7FFFFFFF +#define XPAR_PSU_DDR_1_S_AXI_BASEADDR 0x800000000 +#define XPAR_PSU_DDR_1_S_AXI_HIGHADDR 0x87FFFFFFF + +/* Clocking */ +#define CORTEXA53_0_CPU_CLK_FREQ_HZ 1199880127 +#define CORTEXA53_0_TIMESTAMP_CLK_FREQ 99990005 +#define UART_MASTER_CLOCK 99990005 +#define GQSPI_CLK_FREQ_HZ 124987511 + +/* IOP System-level Control */ +#define IOU_SLCR_BASSE 0xFF180000 +#define IOU_TAPDLY_BYPASS (*((volatile uint32_t*)(IOU_SLCR_BASSE + 0x390))) +#define IOU_TAPDLY_BYPASS_LQSPI_RX (1UL << 2) /* LQSPI Tap Delay Enable on Rx Clock signal. 0: enable. 1: disable (bypass tap delay). */ + +/* QSPI bare-metal driver */ +/* Generic Quad-SPI */ +#define QSPI_BASE 0xFF0F0000UL +#define LQSPI_EN (*((volatile uint32_t*)(QSPI_BASE + 0x14))) /* SPI enable: 0: disable the SPI, 1: enable the SPI */ +#define GQSPI_CFG (*((volatile uint32_t*)(QSPI_BASE + 0x100))) /* configuration register. */ +#define GQSPI_ISR (*((volatile uint32_t*)(QSPI_BASE + 0x104))) /* interrupt status register. */ +#define GQSPI_IER (*((volatile uint32_t*)(QSPI_BASE + 0x108))) /* interrupt enable register. */ +#define GQSPI_IDR (*((volatile uint32_t*)(QSPI_BASE + 0x10C))) /* interrupt disable register. */ +#define GQSPI_IMR (*((volatile uint32_t*)(QSPI_BASE + 0x110))) /* interrupt unmask register. */ +#define GQSPI_EN (*((volatile uint32_t*)(QSPI_BASE + 0x114))) /* enable register. */ +#define GQSPI_TXD (*((volatile uint32_t*)(QSPI_BASE + 0x11C))) /* TX data register. Keyhole addresses for the transmit data FIFO. */ +#define GQSPI_RXD (*((volatile uint32_t*)(QSPI_BASE + 0x120))) /* RX data register. */ +#define GQSPI_TX_THRESH (*((volatile uint32_t*)(QSPI_BASE + 0x128))) /* TXFIFO Threshold Level register: (bits 5:0) Defines the level at which the TX_FIFO_NOT_FULL interrupt is generated */ +#define GQSPI_RX_THRESH (*((volatile uint32_t*)(QSPI_BASE + 0x12C))) /* RXFIFO threshold level register: (bits 5:0) Defines the level at which the RX_FIFO_NOT_EMPTY interrupt is generated */ +#define GQSPI_GPIO (*((volatile uint32_t*)(QSPI_BASE + 0x130))) +#define GQSPI_LPBK_DLY_ADJ (*((volatile uint32_t*)(QSPI_BASE + 0x138))) /* adjusting the internal loopback clock delay for read data capturing */ +#define GQSPI_GEN_FIFO (*((volatile uint32_t*)(QSPI_BASE + 0x140))) /* generic FIFO data register. Keyhole addresses for the generic FIFO. */ +#define GQSPI_SEL (*((volatile uint32_t*)(QSPI_BASE + 0x144))) /* select register. */ +#define GQSPI_FIFO_CTRL (*((volatile uint32_t*)(QSPI_BASE + 0x14C))) /* FIFO control register. */ +#define GQSPI_GF_THRESH (*((volatile uint32_t*)(QSPI_BASE + 0x150))) /* generic FIFO threshold level register: (bits 4:0) Defines the level at which the GEN_FIFO_NOT_FULL interrupt is generated */ +#define GQSPI_POLL_CFG (*((volatile uint32_t*)(QSPI_BASE + 0x154))) /* poll configuration register */ +#define GQSPI_P_TIMEOUT (*((volatile uint32_t*)(QSPI_BASE + 0x158))) /* poll timeout register. */ +#define GQSPI_XFER_STS (*((volatile uint32_t*)(QSPI_BASE + 0x15C))) /* transfer status register. */ +#define QSPI_DATA_DLY_ADJ (*((volatile uint32_t*)(QSPI_BASE + 0x1F8))) /* adjusting the internal receive data delay for read data capturing */ +#define GQSPI_MOD_ID (*((volatile uint32_t*)(QSPI_BASE + 0x1FC))) +#define QSPIDMA_DST_STS (*((volatile uint32_t*)(QSPI_BASE + 0x808))) +#define QSPIDMA_DST_CTRL (*((volatile uint32_t*)(QSPI_BASE + 0x80C))) +#define QSPIDMA_DST_I_STS (*((volatile uint32_t*)(QSPI_BASE + 0x814))) +#define QSPIDMA_DST_CTRL2 (*((volatile uint32_t*)(QSPI_BASE + 0x824))) + +#define GQSPI_LPBK_DLY_ADJ_USE_LPBK (1UL << 5) +#define GQSPI_LPBK_DLY_ADJ_DIV0(x) (((x) & 0x7) << 0) +#define GQSPI_LPBK_DLY_ADJ_DLY1(x) (((x) & 0x3) << 3) +#define QSPI_DATA_DLY_ADJ_USE_DATA_DLY (1UL << 31) +#define QSPI_DATA_DLY_ADJ_DATA_DLY_ADJ(x) (((x) & 0x7) << 28) + +/* GQSPI Registers */ +/* GQSPI_CFG: Configuration registers */ +#define GQSPI_CFG_CLK_POL (1UL << 1) /* Clock polarity outside QSPI word: 0: QSPI clock is quiescent low, 1: QSPI clock is quiescent high */ +#define GQSPI_CFG_CLK_PH (1UL << 2) /* Clock phase: 1: the QSPI clock is inactive outside the word, 0: the QSPI clock is active outside the word */ +/* 000: divide by 2, 001: divide by 4, 010: divide by 8, + 011: divide by 16, 100: divide by 32, 101: divide by 64, + 110: divide by 128, 111: divide by 256 */ +#define GQSPI_CFG_BAUD_RATE_DIV_MASK (7UL << 3) +#define GQSPI_CFG_BAUD_RATE_DIV(d) ((d << 3) & GQSPI_CFG_BAUD_RATE_DIV_MASK) +#define GQSPI_CFG_WP_HOLD (1UL << 19) /* If set, Holdb and WPn pins are actively driven by the qspi controller in 1-bit and 2-bit modes. */ +#define GQSPI_CFG_EN_POLL_TIMEOUT (1UL << 20) /* Poll Timeout Enable: 0: disable, 1: enable */ +#define GQSPI_CFG_ENDIAN (1UL << 26) /* Endian format transmit data register: 0: little endian, 1: big endian */ +#define GQSPI_CFG_START_GEN_FIFO (1UL << 28) /* Trigger Generic FIFO Command Execution: 0:disable executing requests, 1: enable executing requests */ +#define GQSPI_CFG_GEN_FIFO_START_MODE (1UL << 29) /* Start mode of Generic FIFO: 0: Auto Start Mode, 1: Manual Start Mode */ +#define GQSPI_CFG_MODE_EN_MASK (3UL << 30) /* Flash memory interface mode control: 00: IO mode, 10: DMA mode */ +#define GQSPI_CFG_MODE_EN(m) ((m << 30) & GQSPI_CFG_MODE_EN_MASK) +#define GQSPI_CFG_MODE_EN_IO GQSPI_CFG_MODE_EN(0) +#define GQSPI_CFG_MODE_EN_DMA GQSPI_CFG_MODE_EN(2) + +/* GQSPI_ISR / GQSPI_IER / GQSPI_IDR / GQSPI_IMR: Interrupt registers */ +#define GQSPI_IXR_RX_FIFO_EMPTY (1UL << 11) +#define GQSPI_IXR_GEN_FIFO_FULL (1UL << 10) +#define GQSPI_IXR_GEN_FIFO_NOT_FULL (1UL << 9) +#define GQSPI_IXR_TX_FIFO_EMPTY (1UL << 8) +#define GQSPI_IXR_GEN_FIFO_EMPTY (1UL << 7) +#define GQSPI_IXR_RX_FIFO_FULL (1UL << 5) +#define GQSPI_IXR_RX_FIFO_NOT_EMPTY (1UL << 4) +#define GQSPI_IXR_TX_FIFO_FULL (1UL << 3) +#define GQSPI_IXR_TX_FIFO_NOT_FULL (1UL << 2) +#define GQSPI_IXR_POLL_TIME_EXPIRE (1UL << 1) + +#define GQSPI_IXR_ALL_MASK (GQSPI_IXR_POLL_TIME_EXPIRE | GQSPI_IXR_TX_FIFO_NOT_FULL | \ + GQSPI_IXR_TX_FIFO_FULL | GQSPI_IXR_RX_FIFO_NOT_EMPTY | GQSPI_IXR_RX_FIFO_FULL | \ + GQSPI_IXR_GEN_FIFO_EMPTY | GQSPI_IXR_TX_FIFO_EMPTY | GQSPI_IXR_GEN_FIFO_NOT_FULL | \ + GQSPI_IXR_GEN_FIFO_FULL | GQSPI_IXR_RX_FIFO_EMPTY) +#define GQSPI_ISR_WR_TO_CLR_MASK 0x00000002U + +/* GQSPI_GEN_FIFO: FIFO data register */ +/* bits 0-7: Length in bytes (except when GQSPI_GEN_FIFO_EXP_MASK is set length as 255 chunks) */ +#define GQSPI_GEN_FIFO_IMM_MASK (0xFFUL) /* Immediate Data Field */ +#define GQSPI_GEN_FIFO_IMM(imm) (imm & GQSPI_GEN_FIFO_IMM_MASK) +#define GQSPI_GEN_FIFO_DATA_XFER (1UL << 8) /* Indicates IMM is size, otherwise byte is sent directly in IMM reg */ +#define GQSPI_GEN_FIFO_EXP_MASK (1UL << 9) /* Length is Exponent (length / 255) */ +#define GQSPI_GEN_FIFO_MODE_MASK (3UL << 10) +#define GQSPI_GEN_FIFO_MODE(m) ((m << 10) & GQSPI_GEN_FIFO_MODE_MASK) +#define GQSPI_GEN_FIFO_MODE_SPI GQSPI_GEN_FIFO_MODE(1) +#define GQSPI_GEN_FIFO_MODE_DSPI GQSPI_GEN_FIFO_MODE(2) +#define GQSPI_GEN_FIFO_MODE_QSPI GQSPI_GEN_FIFO_MODE(3) +#define GQSPI_GEN_FIFO_CS_MASK (3UL << 12) +#define GQSPI_GEN_FIFO_CS(c) ((c << 12) & GQSPI_GEN_FIFO_CS_MASK) +#define GQSPI_GEN_FIFO_CS_LOWER GQSPI_GEN_FIFO_CS(1) +#define GQSPI_GEN_FIFO_CS_UPPER GQSPI_GEN_FIFO_CS(2) +#define GQSPI_GEN_FIFO_CS_BOTH GQSPI_GEN_FIFO_CS(3) +#define GQSPI_GEN_FIFO_BUS_MASK (3UL << 14) +#define GQSPI_GEN_FIFO_BUS(b) ((b << 14) & GQSPI_GEN_FIFO_BUS_MASK) +#define GQSPI_GEN_FIFO_BUS_LOW GQSPI_GEN_FIFO_BUS(1) +#define GQSPI_GEN_FIFO_BUS_UP GQSPI_GEN_FIFO_BUS(2) +#define GQSPI_GEN_FIFO_BUS_BOTH GQSPI_GEN_FIFO_BUS(3) +#define GQSPI_GEN_FIFO_TX (1UL << 16) +#define GQSPI_GEN_FIFO_RX (1UL << 17) +#define GQSPI_GEN_FIFO_STRIPE (1UL << 18) /* Stripe data across the lower and upper data buses. */ +#define GQSPI_GEN_FIFO_POLL (1UL << 19) + +/* GQSPI_FIFO_CTRL */ +#define GQSPI_FIFO_CTRL_RST_GEN_FIFO (1UL << 0) +#define GQSPI_FIFO_CTRL_RST_TX_FIFO (1UL << 1) +#define GQSPI_FIFO_CTRL_RST_RX_FIFO (1UL << 2) + +/* QSPIDMA_DST_CTRL */ +#define QSPIDMA_DST_CTRL_DEF 0x403FFA00UL +#define QSPIDMA_DST_CTRL2_DEF 0x081BFFF8UL + +/* QSPIDMA_DST_STS */ +#define QSPIDMA_DST_STS_WTC 0xE000U + +/* QSPIDMA_DST_I_STS */ +#define QSPIDMA_DST_I_STS_ALL_MASK 0xFEU + +/* QSPI Configuration (bare-metal only) */ +#ifndef GQSPI_CLK_DIV +#define GQSPI_CLK_DIV 2 /* (CLK (300MHz) / (2 << DIV) = BUS): 0=DIV2, 1=DIV4, 2=DIV8 */ +#endif +#define GQSPI_CS_ASSERT_CLOCKS 5 /* CS Setup Time (tCSS) - num of clock cycles foes in IMM */ +#define GQSPI_FIFO_WORD_SZ 4 +#define GQSPI_TIMEOUT_TRIES 100000 +#define QSPI_FLASH_READY_TRIES 1000 + +/* QSPI Configuration */ +#ifndef GQSPI_QSPI_MODE +#define GQSPI_QSPI_MODE GQSPI_GEN_FIFO_MODE_QSPI +#endif +#ifndef GQPI_USE_DUAL_PARALLEL +#define GQPI_USE_DUAL_PARALLEL 1 /* 0=no stripe, 1=stripe */ +#endif +#ifndef GQPI_USE_4BYTE_ADDR +#define GQPI_USE_4BYTE_ADDR 1 +#endif +#ifndef GQSPI_DUMMY_READ +#define GQSPI_DUMMY_READ (8*8) /* Number of dummy clock cycles for reads */ +#endif + + +/* Flash Parameters: + * Micron Serial NOR Flash Memory 64KB Sector Erase MT25QU512ABB + * Stacked device (two 512Mb (64MB)) + * Dual Parallel so total addressable size is double + */ +#ifndef FLASH_DEVICE_SIZE + #ifdef ZCU102 + /* 64*2 (dual parallel) = 128MB */ + #define FLASH_DEVICE_SIZE (2 * 64 * 1024 * 1024) /* MT25QU512ABB */ + #else + /* 128*2 (dual parallel) = 256MB */ + #define FLASH_DEVICE_SIZE (2 * 128 * 1024 * 1024) /* MT25QU01GBBB */ + #endif +#endif +#ifndef FLASH_PAGE_SIZE + #ifdef ZCU102 + /* MT25QU512ABB - Read FlashID: 20 BB 20 */ + #define FLASH_PAGE_SIZE 256 + #else + /* MT25QU01GBBB - Read FlashID: 20 BB 21 */ + #define FLASH_PAGE_SIZE 512 + #endif +#endif +#define FLASH_NUM_SECTORS (FLASH_DEVICE_SIZE/WOLFBOOT_SECTOR_SIZE) + + +/* Flash Commands */ +#define WRITE_ENABLE_CMD 0x06U +#define READ_SR_CMD 0x05U +#define WRITE_DISABLE_CMD 0x04U +#define READ_ID_CMD 0x9FU +#define MULTI_IO_READ_ID_CMD 0xAFU +#define READ_FSR_CMD 0x70U +#define ENTER_QSPI_MODE_CMD 0x35U +#define EXIT_QSPI_MODE_CMD 0xF5U +#define ENTER_4B_ADDR_MODE_CMD 0xB7U +#define EXIT_4B_ADDR_MODE_CMD 0xE9U + +#define FAST_READ_CMD 0x0BU +#define DUAL_READ_CMD 0x3BU +#define QUAD_READ_CMD 0x6BU +#define FAST_READ_4B_CMD 0x0CU +#define DUAL_READ_4B_CMD 0x3CU +#define QUAD_READ_4B_CMD 0x6CU + +#define PAGE_PROG_CMD 0x02U +#define DUAL_PROG_CMD 0xA2U +#define QUAD_PROG_CMD 0x22U +#define PAGE_PROG_4B_CMD 0x12U +#define DUAL_PROG_4B_CMD 0x12U +#define QUAD_PROG_4B_CMD 0x34U + +#define SEC_ERASE_CMD 0xD8U +#define SEC_4K_ERASE_CMD 0x20U +#define RESET_ENABLE_CMD 0x66U +#define RESET_MEMORY_CMD 0x99U + +#define WRITE_EN_MASK 0x02 /* 0=Write Enabled, 1=Disabled Write */ +#define FLASH_READY_MASK 0x80 /* 0=Busy, 1=Ready */ + + +/* Return Codes */ +#define GQSPI_CODE_SUCCESS 0 +#define GQSPI_CODE_FAILED -100 +#define GQSPI_CODE_TIMEOUT -101 + + + +/* eFUSE support */ +#define ZYNQMP_EFUSE_BASE 0xFFCC0000 +#define ZYNQMP_EFUSE_STATUS (*((volatile uint32_t*)(ZYNQMP_EFUSE_BASE + 0x0008))) +#define ZYNQMP_EFUSE_SEC_CTRL (*((volatile uint32_t*)(ZYNQMP_EFUSE_BASE + 0x1058))) +#define ZYNQMP_EFUSE_PPK0_0 (*((volatile uint32_t*)(ZYNQMP_EFUSE_BASE + 0x10A0))) +#define ZYNQMP_EFUSE_PPK0_1 (*((volatile uint32_t*)(ZYNQMP_EFUSE_BASE + 0x10A4))) +#define ZYNQMP_EFUSE_PPK0_2 (*((volatile uint32_t*)(ZYNQMP_EFUSE_BASE + 0x10A8))) +#define ZYNQMP_EFUSE_PPK0_3 (*((volatile uint32_t*)(ZYNQMP_EFUSE_BASE + 0x10AC))) +#define ZYNQMP_EFUSE_PPK0_4 (*((volatile uint32_t*)(ZYNQMP_EFUSE_BASE + 0x10B0))) +#define ZYNQMP_EFUSE_PPK0_5 (*((volatile uint32_t*)(ZYNQMP_EFUSE_BASE + 0x10B4))) +#define ZYNQMP_EFUSE_PPK0_6 (*((volatile uint32_t*)(ZYNQMP_EFUSE_BASE + 0x10B8))) +#define ZYNQMP_EFUSE_PPK0_7 (*((volatile uint32_t*)(ZYNQMP_EFUSE_BASE + 0x10BC))) +#define ZYNQMP_EFUSE_PPK0_8 (*((volatile uint32_t*)(ZYNQMP_EFUSE_BASE + 0x10C0))) +#define ZYNQMP_EFUSE_PPK0_9 (*((volatile uint32_t*)(ZYNQMP_EFUSE_BASE + 0x10C4))) +#define ZYNQMP_EFUSE_PPK0_10 (*((volatile uint32_t*)(ZYNQMP_EFUSE_BASE + 0x10C8))) +#define ZYNQMP_EFUSE_PPK0_11 (*((volatile uint32_t*)(ZYNQMP_EFUSE_BASE + 0x10CC))) + +/* eFUSE STATUS Registers */ +#define ZYNQMP_EFUSE_STATUS_CACHE_DONE (1UL << 5) +#define ZYNQMP_EFUSE_STATUS_CACHE_LOAD (1UL << 4) + +/* eFUSE SEC_CTRL Registers */ +#define ZYNQMP_EFUSE_SEC_CTRL_PPK1_INVLD (3UL << 30) /* Revokes PPK1 */ +#define ZYNQMP_EFUSE_SEC_CTRL_PPK1_WRLK (1UL << 29) /* Locks writing to PPK1 eFuses */ +#define ZYNQMP_EFUSE_SEC_CTRL_PPK0_INVLD (3UL << 27) /* Revokes PPK0 */ +#define ZYNQMP_EFUSE_SEC_CTRL_PPK0_WRLK (1UL << 26) /* Locks writing to PPK0 eFuses */ +#define ZYNQMP_EFUSE_SEC_CTRL_RSA_EN (15UL << 11) /* Enables RSA Authentication during boot. All boots must be authenticated */ +#define ZYNQMP_EFUSE_SEC_CTRL_SEC_LOCK (1UL << 10) /* Disables the reboot into JTAG mode when doing a secure lockdown. */ +#define ZYNQMP_EFUSE_SEC_CTRL_JTAG_DIS (1UL << 5) /* Disables the JTAG controller. The only instructions available are BYPASS and IDCODE. */ +#define ZYNQMP_EFUSE_SEC_CTRL_ENC_ONLY (1UL << 2) /* Requires all boots to be encrypted using the eFuse key. */ +#define ZYNQMP_EFUSE_SEC_CTRL_AES_WRLK (1UL << 1) /* Locks writing to the AES key section of eFuse */ +#define ZYNQMP_EFUSE_SEC_CTRL_AES_RDLK (1UL << 0) /* Locks the AES key CRC check function */ + + +/* UART Support */ +#define ZYNQMP_UART0_BASE 0xFF000000 +#define ZYNQMP_UART1_BASE 0xFF010000 + +#define ZYNQMP_UART_CR (*((volatile uint32_t*)(DEBUG_UART_BASE + 0x00))) +#define ZYNQMP_UART_MR (*((volatile uint32_t*)(DEBUG_UART_BASE + 0x04))) +#define ZYNQMP_UART_IDR (*((volatile uint32_t*)(DEBUG_UART_BASE + 0x0C))) /* Interrupt Disable Register */ +#define ZYNQMP_UART_ISR (*((volatile uint32_t*)(DEBUG_UART_BASE + 0x14))) /* Interrupt Status Register */ +#define ZYNQMP_UART_RXTOUT (*((volatile uint32_t*)(DEBUG_UART_BASE + 0x1C))) +#define ZYNQMP_UART_RXWM (*((volatile uint32_t*)(DEBUG_UART_BASE + 0x20))) +#define ZYNQMP_UART_TXWM (*((volatile uint32_t*)(DEBUG_UART_BASE + 0x44))) +#define ZYNQMP_UART_SR (*((volatile uint32_t*)(DEBUG_UART_BASE + 0x2C))) +#define ZYNQMP_UART_FIFO (*((volatile uint32_t*)(DEBUG_UART_BASE + 0x30))) +#define ZYNQMP_UART_BR_GEN (*((volatile uint32_t*)(DEBUG_UART_BASE + 0x18))) /* 2 - 65535: baud_sample */ +#define ZYNQMP_UART_BR_DIV (*((volatile uint32_t*)(DEBUG_UART_BASE + 0x34))) /* 4 - 255: Baud rate */ + + +/* UART Control Registers */ +#define ZYNQMP_UART_CR_TX_DIS 0x00000020 /* TX disable */ +#define ZYNQMP_UART_CR_TX_EN 0x00000010 /* TX enabled */ +#define ZYNQMP_UART_CR_RX_DIS 0x00000008 /* RX disable */ +#define ZYNQMP_UART_CR_RX_EN 0x00000004 /* RX enabled */ +#define ZYNQMP_UART_CR_TXRST 0x00000002 /* TX logic reset */ +#define ZYNQMP_UART_CR_RXRST 0x00000001 /* RX logic reset */ + +/* UART ISR Mask 0-13 bits */ +#define ZYNQMP_UART_ISR_MASK 0x3FFF + +/* UART Mode Registers */ +#define ZYNQMP_UART_MR_PARITY_NONE 0x00000020 /* No parity */ + +/* UART Channel Status Register (read only) */ +#define ZYNQMP_UART_SR_TXFULL 0x00000010U /* TX FIFO full */ +#define ZYNQMP_UART_SR_TXEMPTY 0x00000008U /* TX FIFO empty */ +#define ZYNQMP_UART_SR_RXFULL 0x00000004U /* RX FIFO full */ +#define ZYNQMP_UART_SR_RXEMPTY 0x00000002U /* RX FIFO empty */ + +/* UART Configuration */ +#if defined(DEBUG_UART_NUM) && DEBUG_UART_NUM == 0 + #define DEBUG_UART_BASE ZYNQMP_UART0_BASE +#elif defined(DEBUG_UART_NUM) && DEBUG_UART_NUM == 1 + #define DEBUG_UART_BASE ZYNQMP_UART1_BASE +#endif +#ifndef DEBUG_UART_BASE + /* default to UART0 */ + #define DEBUG_UART_BASE ZYNQMP_UART0_BASE +#endif + +#ifndef DEBUG_UART_BAUD + #define DEBUG_UART_BAUD 115200 + #define DEBUG_UART_DIV 6 +#endif + + +#define GICD_BASE 0xF9010000 +#define GICC_BASE 0xF9020000 + + +#endif /* _ZYNQMP_H_ */ diff --git a/hal/zynq.ld b/hal/zynq.ld index 5ad5ee56d..9aeeaaadf 100644 --- a/hal/zynq.ld +++ b/hal/zynq.ld @@ -11,12 +11,12 @@ _EL2_STACK_SIZE = DEFINED(_EL2_STACK_SIZE) ? _EL2_STACK_SIZE : 1024; /* Define Memories in the system */ MEMORY { - /* psu_ddr_0_MEM_0 : ORIGIN = 0x0, LENGTH = 0x7FF00000 */ - psu_ddr_0_MEM_0 : ORIGIN = 0x40000000, LENGTH = 0x100000 + /* psu_ddr_0_MEM_0 : ORIGIN = 0x0, LENGTH = 0x80000000 */ + /* Use the end of DDR0 for wolfBoot (reserve 1MB) */ + psu_ddr_0_MEM_0 : ORIGIN = 0x7FF00000, LENGTH = 0x100000 psu_ddr_1_MEM_0 : ORIGIN = 0x800000000, LENGTH = 0x80000000 psu_ocm_ram_0_MEM_0 : ORIGIN = 0xFFFC0000, LENGTH = 0x40000 psu_qspi_linear_0_MEM_0 : ORIGIN = 0xC0000000, LENGTH = 0x20000000 - } /* Specify the default entry point to the program */ @@ -128,6 +128,10 @@ SECTIONS *(.got2) } > psu_ddr_0_MEM_0 +.note.gnu.build-id : { + KEEP (*(.note.gnu.build-id)) +} > psu_ddr_0_MEM_0 + .ctors : { . = ALIGN(64); __CTOR_LIST__ = .; diff --git a/include/fdt.h b/include/fdt.h index 402b7ed4f..e963b9658 100644 --- a/include/fdt.h +++ b/include/fdt.h @@ -139,6 +139,11 @@ const char* fdt_get_string(const void *fdt, int stroffset, int *lenp); const void *fdt_getprop(const void *fdt, int nodeoffset, const char *name, int *lenp); int fdt_setprop(void *fdt, int nodeoffset, const char *name, const void *val, int len); +void* fdt_getprop_address(const void *fdt, int nodeoffset, const char *name); + +int fdt_find_node_offset(void* fdt, int startoff, const char* nodename); +int fdt_find_prop_offset(void* fdt, int startoff, const char* propname, const char* propval); + int fdt_find_devtype(void* fdt, int startoff, const char* node); int fdt_node_check_compatible(const void *fdt, int nodeoffset, const char *compatible); int fdt_node_offset_by_compatible(const void *fdt, int startoffset, const char *compatible); @@ -152,6 +157,10 @@ int fdt_fixup_val64(void* fdt, int off, const char* node, const char* name, uint int fdt_shrink(void* fdt); +/* FIT */ +const char* fit_find_images(void* fdt, const char** pkernel, const char** pflat_dt); +void* fit_load_image(void* fdt, const char* image, int* lenp); + #ifdef __cplusplus } #endif diff --git a/include/printf.h b/include/printf.h index 2b4337bf2..8b5c6e9b9 100644 --- a/include/printf.h +++ b/include/printf.h @@ -49,7 +49,7 @@ /* support for wolfBoot_printf logging */ #if defined(PRINTF_ENABLED) && !defined(WOLFBOOT_NO_PRINTF) # include -# if defined(DEBUG_ZYNQ) && !defined(USE_QNX) +# if defined(DEBUG_ZYNQ) && !defined(USE_QNX) && !defined(DEBUG_UART) # include "xil_printf.h" # define wolfBoot_printf(_f_, ...) xil_printf(_f_, ##__VA_ARGS__) # elif defined(WOLFBOOT_DEBUG_EFI) diff --git a/src/boot_aarch64.c b/src/boot_aarch64.c index 00628e6dd..e323dbfce 100644 --- a/src/boot_aarch64.c +++ b/src/boot_aarch64.c @@ -1,6 +1,6 @@ /* boot_aarch64.c * - * Copyright (C) 2021 wolfSSL Inc. + * Copyright (C) 2024 wolfSSL Inc. * * This file is part of wolfBoot. * @@ -67,6 +67,16 @@ void boot_entry_C(void) main(); } + +#ifdef MMU +int __attribute((weak)) hal_dts_fixup(void* dts_addr) +{ + (void)dts_addr; + return 0; +} +#endif + + /* This is the main loop for the bootloader. * * It performs the following actions: @@ -80,14 +90,18 @@ void RAMFUNCTION do_boot(const uint32_t *app_offset, const uint32_t* dts_offset) void RAMFUNCTION do_boot(const uint32_t *app_offset) #endif { - /* Set application address via x4 */ - asm volatile("mov x4, %0" : : "r"(app_offset)); +#ifdef MMU + hal_dts_fixup((uint32_t*)dts_offset); +#endif + + /* Set application address via x4 */ + asm volatile("mov x4, %0" : : "r"(app_offset)); #ifdef MMU - /* Move the dts pointer to x5 (as first argument) */ - asm volatile("mov x5, %0" : : "r"(dts_offset)); + /* Move the dts pointer to x5 (as first argument) */ + asm volatile("mov x5, %0" : : "r"(dts_offset)); #else - asm volatile("mov x5, xzr"); + asm volatile("mov x5, xzr"); #endif #ifndef NO_QNX @@ -122,3 +136,20 @@ void RAMFUNCTION arch_reboot(void) } #endif + +void SynchronousInterrupt(void) +{ + +} +void IRQInterrupt(void) +{ + +} +void FIQInterrupt(void) +{ + +} +void SErrorInterrupt(void) +{ + +} \ No newline at end of file diff --git a/src/boot_aarch64_start.S b/src/boot_aarch64_start.S index 56017a8a1..f761331b9 100644 --- a/src/boot_aarch64_start.S +++ b/src/boot_aarch64_start.S @@ -19,51 +19,17 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA */ +/* Code is adapted from the default AMD/Xilinx boot.S, translation_table.S and + * asm_vectors.S*/ -/* Include target-specific defines here to override any AA64 defaults */ -#ifdef TARGET_nxp_ls1028a -#include "../hal/nxp_ls1028a.h" -#endif - -/* AARCH64 default configurations */ -#if !defined(AA64_TARGET_EL) -#define AA64_TARGET_EL 2 +#ifdef TARGET_zynq +#include "hal/zynq.h" #endif -#if !defined(AA64GIC_VERSION) -#define AA64GIC_VERSION 2 -#endif - -#if (AA64GIC_VERSION==2) - #if !defined(AA64GICV2_GICD_BASE) - #define AA64GICV2_GICD_BASE 0xF9010000 - #endif - #if !defined(AA64_GICC_BASE) - #define AA64GICV2_GICC_BASE 0xF9020000 - #endif +#ifdef TARGET_nxp_ls1028a +#include "hal/nxp_ls1028a.h" #endif - -/* CURRENT_EL ARMv8 Current Exception Level Register */ -#define CURRENT_EL_MASK (0x3 << 2) /* Current EL */ -#define CURRENT_EL_EL0 0x0 -#define CURRENT_EL_EL1 0x4 -#define CURRENT_EL_EL2 0x8 -#define CURRENT_EL_EL3 0xC - -/* ID_AA64PFR0_EL1 ARMv8 Processor Feature Register 0*/ -#define ID_AA64PFRO_EL3_MASK (0xF<<12) /* EL3 is implemented: 0x0000 no */ - /* 0x1000 AA64, 0x2000 AA64+AA32 */ -#define ID_AA64PFRO_EL2_MASK (0xF<<8) /* EL2 is implemented: 0x000 no */ - /* 0x100 AA64, 0x200 AA64+AA32 */ -#define ID_AA64PFRO_EL1_MASK (0xF<<4) /* EL1 is implemented: */ - /* 0x10 AA64, 0x20 AA64+AA32 */ -#define ID_AA64PFRO_EL0_MASK (0xF<<0) /* EL0 is implemented: */ - /* 0x1 AA64, 0x2 AA64+AA32 */ -#define ID_AA64PFRO_FGT_MASK (0xFull<<56) /* Fine Grained Traps: */ - /* 0x0 no, !0x0: yes */ - - /* GICv2 Register Offsets */ #ifndef GICD_BASE #define GICD_BASE 0xF9010000 @@ -78,335 +44,1015 @@ #endif #define GICC_PMR 0x0004 -.equ TZPCDECPROT0_SET_BASE, 0x02200804 -.equ TZPCDECPROT1_SET_BASE, 0x02200810 -.equ OCRAM_TZPC_ADDR , 0x02200000 #ifndef USE_BUILTIN_STARTUP -.section ".boot", "ax" + +.globl MMUTableL0 +.globl MMUTableL1 +.globl MMUTableL2 +.global _prestart +.global _boot + +.global __el3_stack +.global __el2_stack +.global __el1_stack +.global __el0_stack .global _vector_table -_vector_table: - mov x21, x0 // read ATAG/FDT address - -4: ldr x1, =_vector_table // get start of .text in x1 - // Read current EL - mrs x0, CurrentEL - and x0, x0, #0x0C - - // EL == 3? - cmp x0, #12 - bne 2f -3: mrs x2, scr_el3 - orr x2, x2, 0x0F // scr_el3 |= NS|IRQ|FIQ|EA - msr scr_el3, x2 - - msr cptr_el3, xzr // enable FP/SIMD - - // EL == 1? -2: cmp x0, #4 - beq 1f - - // EL == 2? - mov x2, #3 << 20 - msr cptr_el2, x2 /* Enable FP/SIMD */ - b 0f - -1: mov x0, #3 << 20 - msr cpacr_el1, x0 // Enable FP/SIMD for EL1 - msr sp_el1, x1 - - /* Suspend slave CPUs */ -0: mrs x3, mpidr_el1 // read MPIDR_EL1 - and x3, x3, #3 // CPUID = MPIDR_EL1 & 0x03 - cbz x3, 8f // if 0, branch forward -7: wfi // infinite sleep - b 7b - -8: mov sp, x1 // set stack pointer - -#ifdef CORTEX_A72 - bl init_A72 + +.globl FIQInterrupt +.globl IRQInterrupt +.globl SErrorInterrupt +.globl SynchronousInterrupt +.globl FPUStatus + +.set EL3_stack, __el3_stack +.set EL2_stack, __el2_stack +.set EL1_stack, __el1_stack +.set EL0_stack, __el0_stack + +.set L0Table, MMUTableL0 +.set L1Table, MMUTableL1 +.set L2Table, MMUTableL2 +.set vector_base, _vector_table +.set rvbar_base, 0xFD5C0040 + +# Cortex-A53 timestamp clock frequency +.set counterfreq, 99990005 + +.set MODE_EL1, 0x5 +.set DAIF_BIT, 0x1C0 + + +.section .boot,"ax" +_boot: + mov x0, #0 + mov x1, #0 + mov x2, #0 + mov x3, #0 + mov x4, #0 + mov x5, #0 + mov x6, #0 + mov x7, #0 + mov x8, #0 + mov x9, #0 + mov x10, #0 + mov x11, #0 + mov x12, #0 + mov x13, #0 + mov x14, #0 + mov x15, #0 + mov x16, #0 + mov x17, #0 + mov x18, #0 + mov x19, #0 + mov x20, #0 + mov x21, #0 + mov x22, #0 + mov x23, #0 + mov x24, #0 + mov x25, #0 + mov x26, #0 + mov x27, #0 + mov x28, #0 + mov x29, #0 + mov x30, #0 + + /* Init Exception Level */ + mrs x0, currentEL + cmp x0, #0xC + beq InitEL3 + + cmp x0, #0x8 + beq InitEL2 + + cmp x0, #0x4 + beq InitEL1 + + /* go to error if current exception level is not EL1-3 */ + b error + +InitEL3: +#if defined(EL3_SECURE) && EL3_SECURE == 1 + /* Set vector table base address */ + ldr x1, =vector_base + msr VBAR_EL3,x1 + + /* Set reset vector address */ + /* Get the cpu ID */ + mrs x0, MPIDR_EL1 + and x0, x0, #0xFF + mov w0, w0 + ldr w2, =rvbar_base + /* calculate the RVBAR base address for particular CPU core */ + mov w3, #0x8 + mul w0, w0, w3 + add w2, w2, w0 + /* store vector base address to RVBAR */ + str x1, [x2] + + /* Define stack pointer for current exception level */ + ldr x2,=EL3_stack + mov sp,x2 + + /* Enable Trapping of SIMD/FPU register for standalone BSP */ + mov x0, #0 +#if defined(FPU_TRAP) && FPU_TRAP == 1 + orr x0, x0, #(0x1 << 10) #endif - bl boot_entry_C // boot_entry_C never returns - b 7b // go to sleep anyhow in case. -#endif /* USE_BUILTIN_STARTUP */ + msr CPTR_EL3, x0 + isb + /* Clear FPUStatus variable to make sure that it contains current + * status of FPU i.e. disabled. In case of a warm restart execution + * when bss sections are not cleared, it may contain previously updated + * value which does not hold true now. + */ +#if defined(FPU_TRAP) && FPU_TRAP == 1 + ldr x0,=FPUStatus + str xzr, [x0] +#endif + /* Configure SCR_EL3 */ + mov w1, #0 /* Initial value of register is unknown */ + orr w1, w1, #(1 << 11) /* Set ST bit (Secure EL1 can access CNTPS_TVAL_EL1, CNTPS_CTL_EL1 & CNTPS_CVAL_EL1) */ + orr w1, w1, #(1 << 10) /* Set RW bit (EL1 is AArch64, as this is the Secure world) */ + orr w1, w1, #(1 << 3) /* Set EA bit (SError routed to EL3) */ + orr w1, w1, #(1 << 2) /* Set FIQ bit (FIQs routed to EL3) */ + orr w1, w1, #(1 << 1) /* Set IRQ bit (IRQs routed to EL3) */ + msr SCR_EL3, x1 + + /* Configure cpu auxiliary control register EL1 */ + ldr x0,=0x80CA000 /* L1 Data prefetch control - 5, Enable device split throttle, 2 independent data prefetch streams */ +#if defined(CONFIG_ARM_ERRATA_855873) && CONFIG_ARM_ERRATA_855873 + /* Set ENDCCASCI bit in CPUACTLR_EL1 register, to execute data + * cache clean operations as data cache clean and invalidate + */ + orr x0, x0, #(1 << 44) /* Set ENDCCASCI bit */ +#endif + msr S3_1_C15_C2_0, x0 /* CPUACTLR_EL1 */ -/* Initialize GIC 400 (GICv2) */ -.global gicv2_init_secure -gicv2_init_secure: - ldr x0, =GICD_BASE - mov w9, #0x3 /* EnableGrp0 | EnableGrp1 */ - str w9, [x0, GICD_CTLR] /* Secure GICD_CTLR */ - ldr w9, [x0, GICD_TYPER] - and w10, w9, #0x1f /* ITLinesNumber */ - cbz w10, 1f /* No SPIs */ - add x11, x0, GICD_IGROUPRn - mov w9, #~0 /* Config SPIs as Grp1 */ - str w9, [x11], #0x4 -0: str w9, [x11], #0x4 - sub w10, w10, #0x1 - cbnz w10, 0b - - ldr x1, =GICC_BASE /* GICC_CTLR */ - mov w0, #3 /* EnableGrp0 | EnableGrp1 */ - str w0, [x1] - - mov w0, #1 << 7 /* Allow NS access to GICC_PMR */ - str w0, [x1, #4] /* GICC_PMR */ -1: - ret - - -#ifdef CORTEX_A72 -.global invalidate_ivac -invalidate_ivac: - ldr x0, =_OCRAM_ADDRESS - ldr x1, =_MEMORY_SIZE - add x1, x1, x0 - mrs x2, ctr_el0 - ubfx x4, x2, #16, #4 - mov x3, #4 - lsl x3, x3, x4 - sub x4, x3, #1 - bic x4, x0, x4 - inval_loop: - dc ivac, x4 - add x4, x4, x3 - cmp x4, x1 - blt inval_loop - dsb sy - ret + /* Program the counter frequency */ + ldr x0,=counterfreq + msr CNTFRQ_EL0, x0 -.global disable_mmu -disable_mmu: - mrs x0, sctlr_el3 - bic x0, x0, x1 - msr sctlr_el3, x0 + /* Enable hardware coherency between cores */ + mrs x0, S3_1_c15_c2_1 /* Read EL1 CPU Extended Control Register */ + orr x0, x0, #(1 << 6) /* Set the SMPEN bit */ + msr S3_1_c15_c2_1, x0 /* Write EL1 CPU Extended Control Register */ isb - dsb sy - ret -.global switch_el3_to_el2 -switch_el3_to_el2: - mov x0, #0x531 - msr scr_el3, x0 - msr cptr_el3, xzr /* Disable el3 traps */ - mov x0, #0x33ff - msr cptr_el2, x0 /* Disable el2 traps */ - mrs x0, sctlr_el2 - mov x1, #(1 << 0) | (1 << 2) | (1 << 12) - bic x0, x0, x1 - msr sctlr_el2, x0 - mrs x0, sctlr_el3 - bic x0, x0, x1 - msr sctlr_el3, x0 - bl invalidate_ivac - ldp x29, x30, [sp] - mrs x0, vbar_el3 - msr vbar_el2, x0 - mov x0, #0x3c9 - msr spsr_el3, x0 - msr elr_el3, x30 - ret + tlbi ALLE3 + ic IALLU /* Invalidate ICache to PoU */ + bl invalidate_dcaches + dsb sy + isb -.global cortex_a72_erratta -cortex_a72_erratta: - -/* Initalization code for NXP LS1028a (A72) */ -.global init_A72 -init_A72: - ldr x1, =_vector_table_el3 /* Initalize vec table */ - msr vbar_el3, x1 - -el3_state: - mrs x0, scr_el3 /* scr_el3 config */ - bic x0, x0, #(1 << 13) /* Trap WFE instruciton to EL3 off */ - bic x0, x0, #(1 << 12) /* Traps TWI ins to EL3 off */ - bic x0, x0, #(1 << 11) /* Traps EL1 access to physical secure timer to EL3 on */ - orr x0, x0, #(1 << 10) /* Next lower level is AArch64 */ - bic x0, x0, #(1 << 9) /* Secure state instuction fetches from non-secure memory are permitted */ - bic x0, x0, #(1 << 8) /* Hypervisor Call instruction disabled */ - bic x0, x0, #(1 << 7) /* Secure Monitor Call enabled */ - orr x0, x0, #0xf /* IRQ|FIQ|EA to EL3 */ - msr scr_el3, x0 - - mrs x0, sctlr_el3 /* sctlr_el3 config */ - bic x0, x0, #(1 << 19) /* Disable EL3 translation XN */ - bic x0, x0, #(1 << 12) /* Disable I cache */ - bic x0, x0, #(1 << 3) /* Disable SP Alignment check */ - bic x0, x0, #(1 << 2) /* Disable D cache */ - bic x0, x0, #(1 << 1) /* Disable Alignment check */ - bic x0, x0, #(1 << 0) /* Disable MMU */ - msr sctlr_el3, x0 + ldr x1, =L0Table /* Get address of level 0 for TTBR0_EL3 */ + msr TTBR0_EL3, x1 /* Set TTBR0_EL3 */ + + /********************************************** + * Set up memory attributes + * This equates to: + * 0 = b01000100 = Normal, Inner/Outer Non-Cacheable + * 1 = b11111111 = Normal, Inner/Outer WB/WA/RA + * 2 = b00000000 = Device-nGnRnE + * 3 = b00000100 = Device-nGnRE + * 4 = b10111011 = Normal, Inner/Outer WT/WA/RA + **********************************************/ + ldr x1, =0x000000BB0400FF44 + msr MAIR_EL3, x1 + + /********************************************** + * Set up TCR_EL3 + * Physical Address Size PS = 010 -> 40bits 1TB + * Granual Size TG0 = 00 -> 4KB + * size offset of the memory region T0SZ = 24 -> (region size 2^(64-24) = 2^40) + ***************************************************/ + ldr x1,=0x80823518 + + msr TCR_EL3, x1 isb -invalidate_cache: - msr csselr_el1, x0 - mrs x4, ccsidr_el1 /* read cache size */ - and x1, x4, #0x7 - and x1, x1, #0x4 /* cache line size */ - ldr x3, =0x7ff - and x2, x3, x4, lsr #13 /* number of cache sets */ - ldr x3, =0x3ff - and x3, x3, x4, lsr #3 /* cache associativity number */ - clz w4, w3 - mov x5, #0 -way_loop: - mov x6, #0 -set_loop: - lsl x7, x5, x4 - orr x7, x0, x7 - lsl x8, x6, x1 - orr x7, x7, x8 - dc cisw, x7 /* invalidate cache */ - add x6, x6, #1 - cmp x6, x2 - ble set_loop /* loop until all sets are invalidated */ - add x5, x5, #1 - cmp x5, x3 - ble way_loop /* loop until all ways are invalidated */ - msr cptr_el3, xzr - -init_stack: - ldr x0, =_stack_base /* Set and align stack */ - sub x0, x0, #16 - and x0, x0, #-16 - mov sp, x0 - ldr x1, =_STACK_SIZE - msr sp_el2, x0 - msr sp_el1, x0 - msr sp_el0, x0 - mov x29, 0 /* Setup an initial dummy frame with saved fp=0 and saved lr=0 */ - stp x29, x29, [sp, #-16]! - mov x29, sp - - bl invalidate_ivac - b boot_entry_C - -.global mmu_enable -mmu_enable: - tlbi alle3 /* Invalidate table entries */ - dsb sy + /* Enable SError Exception for asynchronous abort */ + mrs x1,DAIF + bic x1,x1,#(0x1<<8) + msr DAIF,x1 + + /* Configure SCTLR_EL3 */ + mov x1, #0 /* Most of the SCTLR_EL3 bits are unknown at reset */ + orr x1, x1, #(1 << 12) /* Enable I cache */ + orr x1, x1, #(1 << 3) /* Enable SP alignment check */ + orr x1, x1, #(1 << 2) /* Enable caches */ + orr x1, x1, #(1 << 0) /* Enable MMU */ + msr SCTLR_EL3, x1 + dsb sy isb - /* Set tcr reg */ - ldr x0, =0x0 - orr x0, x0, #24 /* Size of the memory region */ - orr x0, x0, #(1 << 17) /* PS 40 bit */ - orr x0, x0, #(1 << 16) /* TG0 4KB */ - orr x0, x0, #(2 << 12) /* SH0 Outer Shareable */ - orr x0, x0, #(1 << 10) /* normal outer WBWA cacheable */ - orr x0, x0, #(1 << 8) /* normal inner WBWA cacheable */ - msr tcr_el3, x0 - - ldr x1, =0x44E048E000098AA4 //0xFF440C0400 - msr mair_el3, x1 - - ldr x0, =ttb0_base - msr ttbr0_el3, x0 - - mrs x0, S3_1_c15_c2_1 - orr x0, x0, #(1 << 6) /* Must set SPMEN */ - msr S3_1_c15_c2_1, x0 + bl boot_entry_C /* jump to start */ +#else + /* present exception level and selected exception level mismatch */ + b error +#endif + +InitEL2: +#if defined(EL2_HYPERVISOR) && EL2_HYPERVISOR == 1 + /* Set vector table base address */ + ldr x1, =vector_base + msr VBAR_EL2, x1 + + /* Define stack pointer for current exception level */ + ldr x2,=EL2_stack + mov sp,x2 + + mov x0, #0x33ff + msr CPTR_EL2, x0 /* Enable FP/SIMD */ + + /* Invalidate TLB */ + tlbi alle2 + /* Invalidate ICache */ + ic ialluis + isb sy + /* Invalidate DCache */ + bl invalidate_dcaches + dsb sy isb - /* Set sctlr reg */ - mrs x0, sctlr_el3 - orr x1, x0, #(1 << 12) /* I - instruction cache enable */ - orr x1, x0, #(1 << 2) /* C - data & unified cache enable */ - orr x1, x0, #(1 << 0) /* M - MMU enable */ - msr sctlr_el3, x1 + ldr x1, =L0Table /* Get address of level 0 for TTBR0_EL2 */ + msr TTBR0_EL2, x1 /* Set TTBR0_EL2 */ + + /********************************************** + * Set up memory attributes + * This equates to: + * 0 = b01000100 = Normal, Inner/Outer Non-Cacheable + * 1 = b11111111 = Normal, Inner/Outer WB/WA/RA + * 2 = b00000000 = Device-nGnRnE + * 3 = b00000100 = Device-nGnRE + * 4 = b10111011 = Normal, Inner/Outer WT/WA/RA + **********************************************/ + ldr x1, =0x000000BB0400FF44 + msr MAIR_EL2, x1 + + /********************************************** + * Set up TCR_EL2 + * Physical Address Size PS = 010 -> 40bits 1TB + * Granual Size TG0 = 00 -> 4KB + * size offset of the memory region T0SZ = 24 -> (region size 2^(64-24) = 2^40) + ***************************************************/ + ldr x1,=0x80823518 + + msr TCR_EL2, x1 + isb + + /* Enable ICache */ + mrs x1, SCTLR_EL2 + orr x1, x1, #(1 << 12) /* Enable ICache */ + orr x1, x1, #(1 << 3) /* Enable SP alignment check */ + orr x1, x1, #(1 << 2) /* Enable DCaches */ + orr x1, x1, #(1 << 0) /* Enable MMU */ + msr SCTLR_EL2, x1 + dsb sy + isb + + bl boot_entry_C /* jump to start */ +#else + /* present exception level and selected exception level mismatch */ + b error +#endif + +InitEL1: +#if defined(EL1_NONSECURE) && EL1_NONSECURE == 1 + /* Set vector table base address */ + ldr x1, =vector_base + msr VBAR_EL1,x1 + + /* Trap floating point access only in case of standalone BSP */ +#if defined(FPU_TRAP) && FPU_TRAP == 0 + mrs x0, CPACR_EL1 + orr x0, x0, #(0x3 << 20) + msr CPACR_EL1, x0 +#else + mrs x0, CPACR_EL1 + bic x0, x0, #(0x3 << 20) + msr CPACR_EL1, x0 +#endif + isb + + /* Clear FPUStatus variable to make sure that it contains current + * status of FPU i.e. disabled. In case of a warm restart execution + * when bss sections are not cleared, it may contain previously updated + * value which does not hold true now. + */ +#if defined(FPU_TRAP) && FPU_TRAP == 1 + ldr x0,=FPUStatus + str xzr, [x0] +#endif + /* Define stack pointer for current exception level */ + ldr x2,=EL1_stack + mov sp,x2 + + /* Disable MMU first */ + mov x1,#0x0 + msr SCTLR_EL1, x1 + isb + + tlbi VMALLE1 + ic IALLU /* Invalidate I cache to PoU */ + bl invalidate_dcaches + dsb sy + isb + + ldr x1, =L0Table /* Get address of level 0 for TTBR0_EL1 */ + msr TTBR0_EL1, x1 /* Set TTBR0_EL1 */ + + /********************************************** + * Set up memory attributes + * This equates to: + * 0 = b01000100 = Normal, Inner/Outer Non-Cacheable + * 1 = b11111111 = Normal, Inner/Outer WB/WA/RA + * 2 = b00000000 = Device-nGnRnE + * 3 = b00000100 = Device-nGnRE + * 4 = b10111011 = Normal, Inner/Outer WT/WA/RA + **********************************************/ + ldr x1, =0x000000BB0400FF44 + msr MAIR_EL1, x1 + + /********************************************** + * Set up TCR_EL1 + * Physical Address Size PS = 010 -> 44bits 16TB + * Granual Size TG0 = 00 -> 4KB + * size offset of the memory region T0SZ = 24 -> (region size 2^(64-24) = 2^40) + ***************************************************/ + ldr x1,=0x285800518 + + msr TCR_EL1, x1 + isb - dsb sy + /* Enable SError Exception for asynchronous abort */ + mrs x1,DAIF + bic x1,x1,#(0x1<<8) + msr DAIF,x1 + + /* Enable MMU */ + mov x1,#0x0 + orr x1, x1, #(1 << 18) /* Set WFE non trapping */ + orr x1, x1, #(1 << 17) /* Set WFI non trapping */ + orr x1, x1, #(1 << 5) /* Set CP15 barrier enabled */ + orr x1, x1, #(1 << 12) /* Set I bit */ + orr x1, x1, #(1 << 2) /* Set C bit */ + orr x1, x1, #(1 << 0) /* Set M bit */ + msr SCTLR_EL1, x1 isb + + bl boot_entry_C /* jump to start */ +#else + /* present exception level and selected exception level mismatch */ + b error +#endif + +/* Assembly startup error handler */ +error: + b error + + +invalidate_dcaches: + dmb ISH + mrs x0, CLIDR_EL1 /* x0 = CLIDR */ + ubfx w2, w0, #24, #3 /* w2 = CLIDR.LoC */ + cmp w2, #0 /* LoC is 0? */ + b.eq invalidatecaches_end /* No cleaning required and enable MMU */ + mov w1, #0 /* w1 = level iterator */ + +invalidatecaches_flush_level: + add w3, w1, w1, lsl #1 /* w3 = w1 * 3 (right-shift for cache type) */ + lsr w3, w0, w3 /* w3 = w0 >> w3 */ + ubfx w3, w3, #0, #3 /* w3 = cache type of this level */ + cmp w3, #2 /* No cache at this level? */ + b.lt invalidatecaches_next_level + + lsl w4, w1, #1 + msr CSSELR_EL1, x4 /* Select current cache level in CSSELR */ + isb /* ISB required to reflect new CSIDR */ + mrs x4, CCSIDR_EL1 /* w4 = CSIDR */ + + ubfx w3, w4, #0, #3 + add w3, w3, #2 /* w3 = log2(line size) */ + ubfx w5, w4, #13, #15 + ubfx w4, w4, #3, #10 /* w4 = Way number */ + clz w6, w4 /* w6 = 32 - log2(number of ways) */ + +invalidatecaches_flush_set: + mov w8, w4 /* w8 = Way number */ +invalidatecaches_flush_way: + lsl w7, w1, #1 /* Fill level field */ + lsl w9, w5, w3 + orr w7, w7, w9 /* Fill index field */ + lsl w9, w8, w6 + orr w7, w7, w9 /* Fill way field */ + dc CISW, x7 /* Invalidate by set/way to point of coherency */ + subs w8, w8, #1 /* Decrement way */ + b.ge invalidatecaches_flush_way + subs w5, w5, #1 /* Descrement set */ + b.ge invalidatecaches_flush_set + +invalidatecaches_next_level: + add w1, w1, #1 /* Next level */ + cmp w2, w1 + b.gt invalidatecaches_flush_level + +invalidatecaches_end: ret -/* Exception Vector Table EL3 */ -.balign 0x800 -.global _vector_table_el3 -_vector_table_el3: -el3_sp0_sync: - eret +/* +* Below is the static translation page table required by MMU for Cortex-A53. +* The translation table is flat mapped (input address = output address) with +* default memory attributes defined for Zynq Ultrascale+ architecture. +* It utilizes translation granual size of 4KB with 2MB section size for +* initial 4GB memory and 1GB section size for memory after 4GB. +* The overview of translation table memory attributes is described below. +* +*| | Memory Range | Definition in Translation Table | +*|-----------------------|-----------------------------|-----------------------------------| +*| DDR | 0x0000000000 - 0x007FFFFFFF | Normal write-back Cacheable | +*| PL | 0x0080000000 - 0x00BFFFFFFF | Strongly Ordered | +*| QSPI, lower PCIe | 0x00C0000000 - 0x00EFFFFFFF | Strongly Ordere | +*| Reserved | 0x00F0000000 - 0x00F7FFFFFF | Unassigned | +*| STM Coresight | 0x00F8000000 - 0x00F8FFFFFF | Strongly Ordered | +*| GIC | 0x00F9000000 - 0x00F91FFFFF | Strongly Ordered | +*| Reserved | 0x00F9200000 - 0x00FCFFFFFF | Unassigned | +*| FPS, LPS slaves | 0x00FD000000 - 0x00FFBFFFFF | Strongly Ordered | +*| CSU, PMU | 0x00FFC00000 - 0x00FFDFFFFF | Strongly Ordered | +*| TCM, OCM | 0x00FFE00000 - 0x00FFFFFFFF | Normal inner write-back cacheable | +*| Reserved | 0x0100000000 - 0x03FFFFFFFF | Unassigned | +*| PL, PCIe | 0x0400000000 - 0x07FFFFFFFF | Strongly Ordered | +*| DDR | 0x0800000000 - 0x0FFFFFFFFF | Normal inner write-back cacheable | +*| PL, PCIe | 0x1000000000 - 0xBFFFFFFFFF | Strongly Ordered | +*| Reserved | 0xC000000000 - 0xFFFFFFFFFF | Unassigned | +* +* For DDR region 0x0000000000 - 0x007FFFFFFF, a system where DDR is less than +* 2GB, region after DDR and before PL is marked as undefined/reserved in +* translation table. Region 0xF9100000 - 0xF91FFFFF is reserved memory in +* 0x00F9000000 - 0x00F91FFFFF range, but it is marked as strongly ordered +* because minimum section size in translation table section is 2MB. Region +* 0x00FFC00000 - 0x00FFDFFFFF contains CSU and PMU memory which are marked as +* Device since it is less than 1MB and falls in a region with device memory. +*/ + +.set reserved, 0x0 /* Fault */ +#if defined(EL1_NONSECURE) && EL1_NONSECURE == 1 +.set Memory, 0x405 | (2 << 8) | (0x0) /* normal writeback write allocate outer shared read write */ +#else +.set Memory, 0x405 | (3 << 8) | (0x0) /* normal writeback write allocate inner shared read write */ +#endif +.set Device, 0x409 | (1 << 53) | (1 << 54) | (0x0) /* strongly ordered read write non executable*/ +.section .mmu_tbl0,"a" -.balign 0x80 -el3_sp0_irq: - eret +MMUTableL0: -.balign 0x80 -el3_spi_fiq: - eret +.set SECT, MMUTableL1 /* 0x0000_0000 - 0x7F_FFFF_FFFF */ +.8byte SECT + 0x3 +.set SECT, MMUTableL1+0x1000 /* 0x80_0000_0000 - 0xFF_FFFF_FFFF */ +.8byte SECT + 0x3 -.balign 0x80 -el3_sp0_serror: - eret +.section .mmu_tbl1,"a" -.balign 0x80 -el3_spx_sync: - eret +MMUTableL1: -.balign 0x80 -el3_spx_irq: - eret +.set SECT, MMUTableL2 /* 0x0000_0000 - 0x3FFF_FFFF */ +.8byte SECT + 0x3 /* 1GB DDR */ -.balign 0x80 -el3_spx_fiq: - eret +.rept 0x3 /* 0x4000_0000 - 0xFFFF_FFFF */ +.set SECT, SECT + 0x1000 /*1GB DDR, 1GB PL, 2GB other devices n memory */ +.8byte SECT + 0x3 +.endr -.balign 0x80 -el3_spx_serror: - eret +.set SECT,0x100000000 +.rept 0xC /* 0x0001_0000_0000 - 0x0003_FFFF_FFFF */ +.8byte SECT + reserved /* 12GB Reserved */ +.set SECT, SECT + 0x40000000 +.endr -.balign 0x80 -lower_el3_aarch64_sync: - eret +.rept 0x10 /* 0x0004_0000_0000 - 0x0007_FFFF_FFFF */ +.8byte SECT + Device /* 8GB PL, 8GB PCIe */ +.set SECT, SECT + 0x40000000 +.endr -.balign 0x80 -lower_el3_aarch64_irq: - eret -.balign 0x80 -lower_el3_aarch64_fiq: - eret +#ifdef XPAR_PSU_DDR_1_S_AXI_BASEADDR +.set DDR_1_START, XPAR_PSU_DDR_1_S_AXI_BASEADDR +.set DDR_1_END, XPAR_PSU_DDR_1_S_AXI_HIGHADDR +.set DDR_1_SIZE, (DDR_1_END - DDR_1_START)+1 +#if defined(DDR_1_SIZE) && DDR_1_SIZE > 0x800000000 +/* If DDR size is larger than 32GB, truncate to 32GB */ +.set DDR_1_REG, 0x20 +#else +.set DDR_1_REG, DDR_1_SIZE/0x40000000 +#endif +#else +.set DDR_1_REG, 0 +#endif -.balign 0x80 -lower_el3_aarch64_serror: - eret +.set UNDEF_1_REG, 0x20 - DDR_1_REG +.rept DDR_1_REG /* DDR based on size in hdf*/ +.8byte SECT + Memory +.set SECT, SECT+0x40000000 +.endr + +.rept UNDEF_1_REG /* reserved for region where ddr is absent */ +.8byte SECT + reserved +.set SECT, SECT+0x40000000 +.endr + +.rept 0x1C0 /* 0x0010_0000_0000 - 0x007F_FFFF_FFFF */ +.8byte SECT + Device /* 448 GB PL */ +.set SECT, SECT + 0x40000000 +.endr + + +.rept 0x100 /* 0x0080_0000_0000 - 0x00BF_FFFF_FFFF */ +.8byte SECT + Device /* 256GB PCIe */ +.set SECT, SECT + 0x40000000 +.endr + + +.rept 0x100 /* 0x00C0_0000_0000 - 0x00FF_FFFF_FFFF */ +.8byte SECT + reserved /* 256GB reserved */ +.set SECT, SECT + 0x40000000 +.endr + + +.section .mmu_tbl2,"a" + +MMUTableL2: + +.set SECT, 0 + +#ifdef XPAR_PSU_DDR_0_S_AXI_BASEADDR +.set DDR_0_START, XPAR_PSU_DDR_0_S_AXI_BASEADDR +.set DDR_0_END, XPAR_PSU_DDR_0_S_AXI_HIGHADDR +.set DDR_0_SIZE, (DDR_0_END - DDR_0_START)+1 +#if defined(DDR_0_SIZE) && DDR_0_SIZE > 0x80000000 +/* If DDR size is larger than 2GB, truncate to 2GB */ +.set DDR_0_REG, 0x400 +#else +.set DDR_0_REG, DDR_0_SIZE/0x200000 +#endif +#else +.set DDR_0_REG, 0 +#endif + +.set UNDEF_0_REG, 0x400 - DDR_0_REG + +.rept DDR_0_REG /* DDR based on size in hdf*/ +.8byte SECT + Memory +.set SECT, SECT+0x200000 +.endr -/* Memory Table Macros */ -.macro PUT_64BIT_WORD high, low - .word \low - .word \high +.rept UNDEF_0_REG /* reserved for region where ddr is absent */ +.8byte SECT + reserved +.set SECT, SECT+0x200000 +.endr + +.rept 0x0200 /* 0x8000_0000 - 0xBFFF_FFFF */ +.8byte SECT + Device /* 1GB lower PL */ +.set SECT, SECT+0x200000 +.endr + +.rept 0x0100 /* 0xC000_0000 - 0xDFFF_FFFF */ +.8byte SECT + Device /* 512MB QSPI */ +.set SECT, SECT+0x200000 +.endr + +.rept 0x080 /* 0xE000_0000 - 0xEFFF_FFFF */ +.8byte SECT + Device /* 256MB lower PCIe */ +.set SECT, SECT+0x200000 +.endr + +.rept 0x040 /* 0xF000_0000 - 0xF7FF_FFFF */ +.8byte SECT + reserved /* 128MB Reserved */ +.set SECT, SECT+0x200000 +.endr + +.rept 0x8 /* 0xF800_0000 - 0xF8FF_FFFF */ +.8byte SECT + Device /* 16MB coresight */ +.set SECT, SECT+0x200000 +.endr + +/* 1MB RPU LLP is marked for 2MB region as the minimum block size in +translation table is 2MB and adjacent 63MB reserved region is +converted to 62MB */ + +.rept 0x1 /* 0xF900_0000 - 0xF91F_FFFF */ +.8byte SECT + Device /* 2MB RPU low latency port */ +.set SECT, SECT+0x200000 +.endr + +.rept 0x1F /* 0xF920_0000 - 0xFCFF_FFFF */ +.8byte SECT + reserved /* 62MB Reserved */ +.set SECT, SECT+0x200000 +.endr + +.rept 0x8 /* 0xFD00_0000 - 0xFDFF_FFFF */ +.8byte SECT + Device /* 16MB FPS */ +.set SECT, SECT+0x200000 +.endr + +.rept 0xE /* 0xFE00_0000 - 0xFFBF_FFFF */ +.8byte SECT + Device /* 28MB LPS */ +.set SECT, SECT+0x200000 +.endr + +/* 0xFFC0_0000 - 0xFFDF_FFFF */ +.8byte SECT + Device /*2MB PMU/CSU */ + +.set SECT, SECT+0x200000 /* 0xFFE0_0000 - 0xFFFF_FFFF*/ +.8byte SECT + Memory /* 2MB OCM/TCM */ + + +/* + * FPUContextSize is the size of the array where floating point registers are + * stored when required. The default size corresponds to the case when there is no + * nested interrupt. If there are nested interrupts in application which are using + * floating point operation, the size of FPUContextSize need to be increased as per + * requirement + */ + +.set FPUContextSize, 528 + +.macro saveregister + stp X0,X1, [sp,#-0x10]! + stp X2,X3, [sp,#-0x10]! + stp X4,X5, [sp,#-0x10]! + stp X6,X7, [sp,#-0x10]! + stp X8,X9, [sp,#-0x10]! + stp X10,X11, [sp,#-0x10]! + stp X12,X13, [sp,#-0x10]! + stp X14,X15, [sp,#-0x10]! + stp X16,X17, [sp,#-0x10]! + stp X18,X19, [sp,#-0x10]! + stp X29,X30, [sp,#-0x10]! .endm -.macro TABLE_ENTRY PA, attributes -PUT_64BIT_WORD \attributes, \PA + 0x3 +.macro restoreregister + ldp X29,X30, [sp], #0x10 + ldp X18,X19, [sp], #0x10 + ldp X16,X17, [sp], #0x10 + ldp X14,X15, [sp], #0x10 + ldp X12,X13, [sp], #0x10 + ldp X10,X11, [sp], #0x10 + ldp X8,X9, [sp], #0x10 + ldp X6,X7, [sp], #0x10 + ldp X4,X5, [sp], #0x10 + ldp X2,X3, [sp], #0x10 + ldp X0,X1, [sp], #0x10 .endm -.macro BLOCK_1GB PA, attr_hi, attr_lo -PUT_64BIT_WORD \attr_hi, ((\PA) & 0xc0000000) | \attr_lo | 0x1 +.macro savefloatregister + +/* Load the floating point context array address from FPUContextBase */ + ldr x1,=FPUContextBase + ldr x0, [x1] + +/* Save all the floating point register to the array */ + stp q0,q1, [x0], #0x20 + stp q2,q3, [x0], #0x20 + stp q4,q5, [x0], #0x20 + stp q6,q7, [x0], #0x20 + stp q8,q9, [x0], #0x20 + stp q10,q11, [x0], #0x20 + stp q12,q13, [x0], #0x20 + stp q14,q15, [x0], #0x20 + stp q16,q17, [x0], #0x20 + stp q18,q19, [x0], #0x20 + stp q20,q21, [x0], #0x20 + stp q22,q23, [x0], #0x20 + stp q24,q25, [x0], #0x20 + stp q26,q27, [x0], #0x20 + stp q28,q29, [x0], #0x20 + stp q30,q31, [x0], #0x20 + mrs x2, FPCR + mrs x3, FPSR + stp x2, x3, [x0], #0x10 + +/* Save current address of floating point context array to FPUContextBase */ + str x0, [x1] .endm -.macro BLOCK_2MB PA, attr_hi, attr_lo -PUT_64BIT_WORD \attr_hi, ((\PA) & 0xffe00000) | \attr_lo | 0x1 +.macro restorefloatregister + +/* Restore the address of floating point context array from FPUContextBase */ + ldr x1,=FPUContextBase + ldr x0, [x1] + +/* Restore all the floating point register from the array */ + ldp x2, x3, [x0,#-0x10]! + msr FPCR, x2 + msr FPSR, x3 + ldp q30,q31, [x0,#-0x20]! + ldp q28,q29, [x0,#-0x20]! + ldp q26,q27, [x0,#-0x20]! + ldp q24,q25, [x0,#-0x20]! + ldp q22,q23, [x0,#-0x20]! + ldp q20,q21, [x0,#-0x20]! + ldp q18,q19, [x0,#-0x20]! + ldp q16,q17, [x0,#-0x20]! + ldp q14,q15, [x0,#-0x20]! + ldp q12,q13, [x0,#-0x20]! + ldp q10,q11, [x0,#-0x20]! + ldp q8,q9, [x0,#-0x20]! + ldp q6,q7, [x0,#-0x20]! + ldp q4,q5, [x0,#-0x20]! + ldp q2,q3, [x0,#-0x20]! + ldp q0,q1, [x0,#-0x20]! + +/* Save current address of floating point context array to FPUContextBase */ + str x0, [x1] .endm -/* Note: In EL3/2 has direct physical to virutal mapping */ -.align 12 -.global ttb0_base -ttb0_base: -TABLE_ENTRY level1_pagetable, 0 -BLOCK_1GB 0x80000000, 0, 0x740 -BLOCK_1GB 0xC0000000, 0, 0x740 - -.align 12 -.global level1_pagetable -level1_pagetable: -.set ADDR, 0x0 -.rept 0x200 -BLOCK_2MB (ADDR << 20), 0, 0x74c -.set ADDR, ADDR + 2 -.endr +.macro exception_return + eret +#ifdef TARGET_versal + dsb nsh + isb +#endif +.endm + + +.section .vectors, "a" + +_vector_table: +.set VBAR, _vector_table +.org VBAR + +/* + * if application is built for XEN GUEST as EL1 Non-secure following image + * header is required by XEN. + */ +#if defined(HYP_GUEST) && HYP_GUEST == 1 + /* Valid Image header */ + /* HW reset vector */ + ldr x16, =_boot + br x16 +#ifdef TARGET_versal + dsb nsh + isb +#endif + /* text offset. */ + .dword 0 + /* image size. */ + .dword 0 + /* flags. */ + .dword 8 + /* RES0 */ + .dword 0 + .dword 0 + .dword 0 + + /* magic */ + .dword 0x644d5241 + /* RES0 */ + .dword 0 + /* End of Image header. */ +#endif + b _boot + +.org (VBAR + 0x200) + b SynchronousInterruptHandler + +.org (VBAR + 0x280) + b IRQInterruptHandler + +.org (VBAR + 0x300) + b FIQInterruptHandler + +.org (VBAR + 0x380) + b SErrorInterruptHandler + + +SynchronousInterruptHandler: + saveregister + +/* Check if the Synchronous abort is occurred due to floating point access. */ +#if defined(EL3_SECURE) && EL3_SECURE == 1 + mrs x0, ESR_EL3 +#else + mrs x0, ESR_EL1 +#endif + and x0, x0, #(0x3F << 26) + mov x1, #(0x7 << 26) + cmp x0, x1 +/* If exception is not due to floating point access go to synchronous handler */ + bne synchronoushandler + +/* + * If excpetion occurred due to floating point access, Enable the floating point + * access i.e. do not trap floating point instruction + */ + #if defined(EL3_SECURE) && EL3_SECURE == 1 + mrs x1,CPTR_EL3 + bic x1, x1, #(0x1<<10) + msr CPTR_EL3, x1 +#else + mrs x1,CPACR_EL1 + orr x1, x1, #(0x1<<20) + msr CPACR_EL1, x1 +#endif + isb + +/* If the floating point access was previously enabled, store FPU context + * registers(storefloat). + */ + ldr x0, =FPUStatus + ldrb w1,[x0] + cbnz w1, storefloat +/* + * If the floating point access was not enabled previously, save the status of + * floating point accessibility i.e. enabled and store floating point context + * array address(FPUContext) to FPUContextBase. + */ + mov w1, #0x1 + strb w1, [x0] + ldr x0, =FPUContext + ldr x1, =FPUContextBase + str x0,[x1] + b restorecontext +storefloat: + savefloatregister + b restorecontext +synchronoushandler: + bl SynchronousInterrupt +restorecontext: + restoreregister + exception_return + +IRQInterruptHandler: + + saveregister +/* Save the status of SPSR, ELR and CPTR to stack */ +#if defined(EL3_SECURE) && EL3_SECURE == 1 + mrs x0, CPTR_EL3 + mrs x1, ELR_EL3 + mrs x2, SPSR_EL3 +#else + mrs x0, CPACR_EL1 + mrs x1, ELR_EL1 + mrs x2, SPSR_EL1 +#endif + stp x0, x1, [sp,#-0x10]! + str x2, [sp,#-0x10]! + +/* Trap floating point access */ +#if defined(EL3_SECURE) && EL3_SECURE == 1 + mrs x1,CPTR_EL3 + orr x1, x1, #(0x1<<10) + msr CPTR_EL3, x1 +#else + mrs x1,CPACR_EL1 + bic x1, x1, #(0x1<<20) + msr CPACR_EL1, x1 +#endif + isb + + bl IRQInterrupt +/* + * If floating point access is enabled during interrupt handling, + * restore floating point registers. + */ + +#if defined(EL3_SECURE) && EL3_SECURE == 1 + mrs x0, CPTR_EL3 + ands x0, x0, #(0x1<<10) + bne RestorePrevState +#else + mrs x0,CPACR_EL1 + ands x0, x0, #(0x1<<20) + beq RestorePrevState +#endif + + restorefloatregister + +/* Restore the status of SPSR, ELR and CPTR from stack */ +RestorePrevState: + ldr x2,[sp],0x10 + ldp x0, x1, [sp],0x10 +#if defined(EL3_SECURE) && EL3_SECURE == 1 + msr CPTR_EL3, x0 + msr ELR_EL3, x1 + msr SPSR_EL3, x2 +#else + msr CPACR_EL1, x0 + msr ELR_EL1, x1 + msr SPSR_EL1, x2 +#endif + restoreregister + exception_return + +FIQInterruptHandler: + + saveregister +/* Save the status of SPSR, ELR and CPTR to stack */ +#if defined(EL3_SECURE) && EL3_SECURE == 1 + mrs x0, CPTR_EL3 + mrs x1, ELR_EL3 + mrs x2, SPSR_EL3 +#else + mrs x0, CPACR_EL1 + mrs x1, ELR_EL1 + mrs x2, SPSR_EL1 +#endif + stp x0, x1, [sp,#-0x10]! + str x2, [sp,#-0x10]! + +/* Trap floating point access */ +#if defined(EL3_SECURE) && EL3_SECURE == 1 + mrs x1,CPTR_EL3 + orr x1, x1, #(0x1<<10) + msr CPTR_EL3, x1 +#else + mrs x1,CPACR_EL1 + bic x1, x1, #(0x1<<20) + msr CPACR_EL1, x1 +#endif + isb + bl FIQInterrupt +/* + * If floating point access is enabled during interrupt handling, + * restore floating point registers. + */ + +#if defined(EL3_SECURE) && EL3_SECURE == 1 + mrs x0, CPTR_EL3 + ands x0, x0, #(0x1<<10) + bne RestorePrevStatefiq +#else + mrs x0,CPACR_EL1 + ands x0, x0, #(0x1<<20) + beq RestorePrevStatefiq +#endif + + restorefloatregister + + /* Restore the status of SPSR, ELR and CPTR from stack */ +RestorePrevStatefiq: + ldr x2,[sp],0x10 + ldp x0, x1, [sp],0x10 + #ifdef EL3_SECURE + msr CPTR_EL3, x0 + msr ELR_EL3, x1 + msr SPSR_EL3, x2 +#else + msr CPACR_EL1, x0 + msr ELR_EL1, x1 + msr SPSR_EL1, x2 +#endif + restoreregister + exception_return + +SErrorInterruptHandler: + + saveregister + bl SErrorInterrupt + restoreregister + exception_return + + +.align 8 +/* Array to store floating point registers */ +FPUContext: + .skip FPUContextSize +/* Stores address for floating point context array */ +FPUContextBase: + .skip 8 +FPUStatus: + .skip 1 + +.align 8 + +#endif /* !USE_BUILTIN_STARTUP */ + + +/* Initialize GIC 400 (GICv2) */ +.global gicv2_init_secure +gicv2_init_secure: + ldr x0, =GICD_BASE + mov w9, #0x3 /* EnableGrp0 | EnableGrp1 */ + str w9, [x0, GICD_CTLR] /* Secure GICD_CTLR */ + ldr w9, [x0, GICD_TYPER] + and w10, w9, #0x1f /* ITLinesNumber */ + cbz w10, 1f /* No SPIs */ + add x11, x0, GICD_IGROUPRn + mov w9, #~0 /* Config SPIs as Grp1 */ + str w9, [x11], #0x4 +0: str w9, [x11], #0x4 + sub w10, w10, #0x1 + cbnz w10, 0b + + ldr x1, =GICC_BASE /* GICC_CTLR */ + mov w0, #3 /* EnableGrp0 | EnableGrp1 */ + str w0, [x1] + + mov w0, #1 << 7 /* Allow NS access to GICC_PMR */ + str w0, [x1, #4] /* GICC_PMR */ +1: + ret -#endif /* CORTEX_A72 */ +.end diff --git a/src/fdt.c b/src/fdt.c index b7c506636..97c199fdf 100644 --- a/src/fdt.c +++ b/src/fdt.c @@ -510,7 +510,7 @@ const char* fdt_get_name(const void *fdt, int nodeoffset, int *len) err = fdt_check_node_offset_(fdt, nodeoffset); if (err >= 0) { name = nh->name; - namelen = strlen(nh->name); + namelen = (int)strlen(nh->name); } } if (err < 0) @@ -524,7 +524,7 @@ const char* fdt_get_string(const void *fdt, int stroffset, int *lenp) { const char *s = (const char*)fdt + fdt_off_dt_strings(fdt) + stroffset; if (lenp) { - *lenp = strlen(s); + *lenp = (int)strlen(s); } return s; } @@ -554,13 +554,13 @@ int fdt_setprop(void *fdt, int nodeoffset, const char *name, const void *val, } } if (err != 0) { - wolfBoot_printf("FDT: Set prop failed! %d (name %d, off %d)\n", + wolfBoot_printf("FDT: Set prop failed! %d (name %s, off %d)\n", err, name, nodeoffset); } return err; } -const void *fdt_getprop(const void *fdt, int nodeoffset, const char *name, +const void* fdt_getprop(const void *fdt, int nodeoffset, const char *name, int *lenp) { int poffset; @@ -577,23 +577,70 @@ const void *fdt_getprop(const void *fdt, int nodeoffset, const char *name, return NULL; } -int fdt_find_devtype(void* fdt, int startoff, const char* node) +void* fdt_getprop_address(const void *fdt, int nodeoffset, const char *name) +{ + void* ret = NULL; + int len = 0; + void* val = (void*)fdt_getprop(fdt, nodeoffset, name, &len); + if (val != NULL && len > 0) { + if (len == 8) { + uint64_t* val64 = (uint64_t*)val; + ret = (void*)((uintptr_t)fdt64_to_cpu(*val64)); + } + else if (len == 4) { + uint32_t* val32 = (uint32_t*)val; + ret = (void*)((uintptr_t)fdt32_to_cpu(*val32)); + } + } + return ret; +} + +int fdt_find_node_offset(void* fdt, int startoff, const char* nodename) { - int len, off; + int off, nlen, fnlen; + const char* nstr = NULL; + + if (nodename == NULL) + return -1; + + fnlen = (int)strlen(nodename); + for (off = fdt_next_node(fdt, startoff, NULL); + off >= 0; + off = fdt_next_node(fdt, off, NULL)) + { + nstr = fdt_get_name(fdt, off, &nlen); + if ((nlen == fnlen) && (memcmp(nstr, nodename, fnlen) == 0)) { + break; + } + } + return off; +} + +int fdt_find_prop_offset(void* fdt, int startoff, const char* propname, + const char* propval) +{ + int len, off, pvallen; const void* val; - const char* propname = "device_type"; - int nodelen = strlen(node)+1; + if (propname == NULL || propval == NULL) + return -1; + + pvallen = (int)strlen(propval)+1; for (off = fdt_next_node(fdt, startoff, NULL); off >= 0; off = fdt_next_node(fdt, off, NULL)) { val = fdt_getprop(fdt, off, propname, &len); - if (val && (len == nodelen) && (memcmp(val, node, len) == 0)) { - return off; + if (val && (len == pvallen) && (memcmp(val, propval, len) == 0)) { + break; } } - return off; /* return error from fdt_next_node() */ + return off; +} + +int fdt_find_devtype(void* fdt, int startoff, const char* node) +{ + return fdt_find_prop_offset(fdt, startoff, "device_type", node); } int fdt_node_offset_by_compatible(const void *fdt, int startoffset, @@ -721,4 +768,87 @@ int fdt_fixup_val64(void* fdt, int off, const char* node, const char* name, return fdt_setprop(fdt, off, name, &val, sizeof(val)); } + +/* FIT Specific */ +const char* fit_find_images(void* fdt, const char** pkernel, const char** pflat_dt) +{ + const void* val; + const char *conf = NULL, *kernel = NULL, *flat_dt = NULL; + int off, len = 0; + + /* Find the default configuration (optional) */ + off = fdt_find_node_offset(fdt, -1, "configurations"); + if (off > 0) { + val = fdt_getprop(fdt, off, "default", &len); + if (val != NULL && len > 0) { + conf = (const char*)val; + } + } + if (conf != NULL) { + off = fdt_find_node_offset(fdt, -1, conf); + if (off > 0) { + kernel = fdt_getprop(fdt, off, "kernel", &len); + flat_dt = fdt_getprop(fdt, off, "fdt", &len); + } + } + if (kernel == NULL) { + /* find node with "type" == kernel */ + off = fdt_find_prop_offset(fdt, -1, "type", "kernel"); + if (off > 0) { + val = fdt_get_name(fdt, off, &len); + if (val != NULL && len > 0) { + kernel = (const char*)val; + } + } + } + if (flat_dt == NULL) { + /* find node with "type" == flat_dt */ + off = fdt_find_prop_offset(fdt, -1, "type", "flat_dt"); + if (off > 0) { + val = fdt_get_name(fdt, off, &len); + if (val != NULL && len > 0) { + flat_dt = (const char*)val; + } + } + } + + if (pkernel) + *pkernel = kernel; + if (pflat_dt) + *pflat_dt = flat_dt; + + return conf; +} + +void* fit_load_image(void* fdt, const char* image, int* lenp) +{ + void *load, *entry, *data = NULL; + int off, len = 0; + + off = fdt_find_node_offset(fdt, -1, image); + if (off > 0) { + /* get load and entry */ + data = (void*)fdt_getprop(fdt, off, "data", &len); + load = fdt_getprop_address(fdt, off, "load"); + entry = fdt_getprop_address(fdt, off, "entry"); + if (data != NULL && load != NULL && data != load) { + wolfBoot_printf("Loading Image %s: %p -> %p (%d bytes)\n", + image, data, load, len); + memcpy(load, data, len); + + /* load should always have entry, but if not use load adress */ + data = (entry != NULL) ? entry : load; + } + wolfBoot_printf("Image %s: %p (%d bytes)\n", image, data, len); + } + else { + wolfBoot_printf("Image %s: Not found!\n", image); + } + if (lenp != NULL) { + *lenp = len; + } + return data; + +} + #endif /* MMU && !BUILD_LOADER_STAGE1 */ diff --git a/src/string.c b/src/string.c index d8de8178c..7963b68bd 100644 --- a/src/string.c +++ b/src/string.c @@ -264,6 +264,19 @@ void RAMFUNCTION *memcpy(void *dst, const void *src, size_t n) const char *s = (const char *)src; char *d = (char *)dst; +#ifdef FAST_MEMCPY + /* is 32-bit aligned pointer */ + if (((size_t)dst & (sizeof(unsigned long)-1)) == 0 && + ((size_t)src & (sizeof(unsigned long)-1)) == 0) + { + while (n >= sizeof(unsigned long)) { + *(unsigned long*)d = *(unsigned long*)s; + d += sizeof(unsigned long); + s += sizeof(unsigned long); + n -= sizeof(unsigned long); + } + } +#endif for (i = 0; i < n; i++) { d[i] = s[i]; } @@ -297,7 +310,7 @@ void *memmove(void *dst, const void *src, size_t n) void uart_writenum(int num, int base, int zeropad, int maxdigits) { int i = 0; - char buf[sizeof(int)*2+1]; + char buf[sizeof(unsigned long)*2+1]; const char* kDigitLut = "0123456789ABCDEF"; unsigned int val = (unsigned int)num; int sz = 0; diff --git a/src/update_ram.c b/src/update_ram.c index 75f7aa59b..d7a6a4d96 100644 --- a/src/update_ram.c +++ b/src/update_ram.c @@ -49,19 +49,6 @@ extern uint32_t dts_load_addr; #ifdef WOLFBOOT_USE_RAMBOOT -#if !(defined(EXT_FLASH) && defined(NO_XIP)) -/* requires/assumes inputs and size to be 4-byte aligned */ -static void memcpy32(void *dst, const void *src, size_t n) -{ - size_t i; - const uint32_t *s = (const uint32_t*)src; - uint32_t *d = (uint32_t*)dst; - for (i = 0; i < n/4; i++) { - d[i] = s[i]; - } -} -#endif - /* Function to load image from flash to ram */ int wolfBoot_ramboot(struct wolfBoot_image *img, uint8_t *src, uint8_t *dst) { @@ -78,7 +65,7 @@ int wolfBoot_ramboot(struct wolfBoot_image *img, uint8_t *src, uint8_t *dst) return -1; } #else - memcpy32(dst, src, IMAGE_HEADER_SIZE); + memcpy(dst, src, IMAGE_HEADER_SIZE); #endif /* check for valid header and version */ @@ -102,7 +89,7 @@ int wolfBoot_ramboot(struct wolfBoot_image *img, uint8_t *src, uint8_t *dst) return -1; } #else - memcpy32(dst + IMAGE_HEADER_SIZE, src + IMAGE_HEADER_SIZE, img_size); + memcpy(dst + IMAGE_HEADER_SIZE, src + IMAGE_HEADER_SIZE, img_size); #endif /* mark image as no longer external */ @@ -290,36 +277,61 @@ void RAMFUNCTION wolfBoot_start(void) #endif #ifdef MMU + /* Is this a Flattened uImage Tree (FIT) image (FDT format) */ + if (wolfBoot_get_dts_size(load_address) > 0) { + void* fit = (void*)load_address; + const char *kernel = NULL, *flat_dt = NULL; + + wolfBoot_printf("Flattened uImage Tree: Version %d, Size %d\n", + fdt_version(fit), fdt_totalsize(fit)); + + (void)fit_find_images(fit, &kernel, &flat_dt); + if (kernel != NULL) { + load_address = fit_load_image(fit, kernel, NULL); + } + if (flat_dt != NULL) { + uint8_t *dts_ptr = fit_load_image(fit, flat_dt, (int*)&dts_size); + if (dts_ptr != NULL && wolfBoot_get_dts_size(dts_ptr) >= 0) { + /* relocate to load DTS address */ + dts_addr = (uint8_t*)WOLFBOOT_LOAD_DTS_ADDRESS; + wolfBoot_printf("Loading DTS: %p -> %p (%d bytes)\n", + dts_ptr, dts_addr, dts_size); + memcpy(dts_addr, dts_ptr, dts_size); + } + } + } + else { /* Load DTS to RAM */ #ifdef EXT_FLASH - if (PART_IS_EXT(&os_image) && - wolfBoot_open_image(&os_image, PART_DTS_BOOT) >= 0) { - dts_addr = (uint8_t*)WOLFBOOT_LOAD_DTS_ADDRESS; - dts_size = (uint32_t)os_image.fw_size; - - wolfBoot_printf("Loading DTS (size %lu) to RAM at %08lx\n", - dts_size, dts_addr); - ext_flash_check_read((uintptr_t)os_image.fw_base, - (uint8_t*)dts_addr, dts_size); - } - else + if (PART_IS_EXT(&os_image) && + wolfBoot_open_image(&os_image, PART_DTS_BOOT) >= 0) { + dts_addr = (uint8_t*)WOLFBOOT_LOAD_DTS_ADDRESS; + dts_size = (uint32_t)os_image.fw_size; + + wolfBoot_printf("Loading DTS (size %lu) to RAM at %08lx\n", + dts_size, dts_addr); + ext_flash_check_read((uintptr_t)os_image.fw_base, + (uint8_t*)dts_addr, dts_size); + } + else #endif /* EXT_FLASH */ - { - dts_addr = hal_get_dts_address(); - if (dts_addr) { - ret = wolfBoot_get_dts_size(dts_addr); - if (ret < 0) { - wolfBoot_printf("Failed parsing DTB to load\n"); - /* Allow failure, continue booting */ - } - else { - /* relocate DTS to RAM */ - uint8_t* dts_dst = (uint8_t*)WOLFBOOT_LOAD_DTS_ADDRESS; - dts_size = (uint32_t)ret; - wolfBoot_printf("Loading DTB (size %d) from %p to RAM at %p\n", + { + dts_addr = hal_get_dts_address(); + if (dts_addr) { + ret = wolfBoot_get_dts_size(dts_addr); + if (ret < 0) { + wolfBoot_printf("Failed parsing DTB to load\n"); + /* Allow failure, continue booting */ + } + else { + /* relocate DTS to RAM */ + uint8_t* dts_dst = (uint8_t*)WOLFBOOT_LOAD_DTS_ADDRESS; + dts_size = (uint32_t)ret; + wolfBoot_printf("Loading DTB (size %d) from %p to RAM at %p\n", dts_size, dts_addr, WOLFBOOT_LOAD_DTS_ADDRESS); - memcpy(dts_dst, dts_addr, dts_size); - dts_addr = dts_dst; + memcpy(dts_dst, dts_addr, dts_size); + dts_addr = dts_dst; + } } } } diff --git a/stage1/loader_stage1.c b/stage1/loader_stage1.c index 4ecc6a2d8..176c4f0d6 100644 --- a/stage1/loader_stage1.c +++ b/stage1/loader_stage1.c @@ -55,17 +55,6 @@ #endif #endif -/* requires/assumes inputs and size to be 4-byte aligned */ -static void memcpy32(void *dst, const void *src, size_t n) -{ - size_t i; - const uint32_t *s = (const uint32_t*)src; - uint32_t *d = (uint32_t*)dst; - for (i = 0; i < n/4; i++) { - d[i] = s[i]; - } -} - int main(void) { int ret = -1; @@ -84,7 +73,7 @@ int main(void) #endif /* relocate 4KB code to DST and jump */ - memcpy32((void*)wolfboot_start, (void*)BOOT_ROM_ADDR, BOOT_ROM_SIZE); + memcpy((void*)wolfboot_start, (void*)BOOT_ROM_ADDR, BOOT_ROM_SIZE); #ifdef WOLFBOOT_ARCH_PPC /* TODO: Fix hack and consider moving to hal_prepare_boot */ @@ -113,7 +102,7 @@ int main(void) ); #else /* copy from flash to ram */ - memcpy32( + memcpy( (uint8_t*)WOLFBOOT_STAGE1_LOAD_ADDR,/* ram destination */ (uint8_t*)WOLFBOOT_ORIGIN, /* flash offset */ BOOTLOADER_PARTITION_SIZE /* boot-loader partition (entire) */ diff --git a/tools/fdt-parser/README.md b/tools/fdt-parser/README.md index 4b5045fc1..9011b8dc6 100644 --- a/tools/fdt-parser/README.md +++ b/tools/fdt-parser/README.md @@ -1,6 +1,10 @@ # Flattened Device Tree (FDT) Parser -This tool uses our internal FDT (fdt.c) parsing code to dump the device tree. There is also a `-t` option that tests making several updates to the device tree. +This tool uses our internal FDT (fdt.c) parsing code to dump the device tree. + +Use `-i` to parse a Flattened uImage Tree (FIT) image. + +There is also a `-t` option that tests making several updates to the device tree (useful with the nxp_t1024.dtb). ## Building fdt-parser @@ -8,7 +12,7 @@ From root: `make fdt-parser` OR From `tools/fdt-parser` use `make clean && make` -## Example Output +## Example FDT Output ```sh % ./tools/fdt-parser/fdt-parser ./tools/fdt-parser/nxp_t1024.dtb @@ -28,3 +32,30 @@ root (node offset 0, depth 1, len 0): power-isa-cs (prop offset 180, len 0): NULL ... ``` + +## Example FIT Output + +```sh +% ./tools/fdt-parser/fdt-parser -i ./tools/fdt-parser/lynx-test-arm.srp +FDT Parser (./tools/fdt-parser/lynx-test-arm.srp): +FDT Version 17, Size 164232633 +FIT: Found 'conf@1' configuration + description (len 46): LynxSecure 2024.06.0-96ce6f31a0 SRP (aarch64) +Kernel Image: kernel@1 + description (len 46): LynxSecure 2024.06.0-96ce6f31a0 SRP (aarch64) + type (len 7): kernel + os (len 6): linux + arch (len 6): arm64 + compression (len 5): none + load (len 4): + entry (len 4): + data (len 164186944): not rendering +FDT Image: fdt@1 + description (len 77): Flattened Device Tree blob for LynxSecure 2024.06.0-96ce6f31a0 SRP (aarch64) + type (len 8): flat_dt + arch (len 6): arm64 + compression (len 5): none + padding (len 8): + data (len 44770): not rendering +Return 0 +``` diff --git a/tools/fdt-parser/fdt-parser.c b/tools/fdt-parser/fdt-parser.c index d074cab34..bee846867 100644 --- a/tools/fdt-parser/fdt-parser.c +++ b/tools/fdt-parser/fdt-parser.c @@ -31,6 +31,7 @@ #include static int gEnableUnitTest = 0; +static int gParseFit = 0; #define UNIT_TEST_GROW_SIZE 1024 /* Test case for "nxp_t1024.dtb" */ @@ -113,7 +114,7 @@ static int fdt_test(void* fdt) p += sizeof(uint64_t); ret = fdt_setprop(fdt, off, "reg", ranges, (int)(p - ranges)); if (ret != 0) goto exit; - wolfBoot_printf("FDT: Set memory, start=0x%x, size=0x%x\n", + printf("FDT: Set memory, start=0x%x, size=0x%x\n", DDR_ADDRESS, (uint32_t)DDR_SIZE); } @@ -198,7 +199,7 @@ static int fdt_test(void* fdt) liodns[0] = qp_info[i].dliodn; liodns[1] = qp_info[i].fliodn; - wolfBoot_printf("FDT: Set %s@%d (%d), %s=%d,%d\n", + printf("FDT: Set %s@%d (%d), %s=%d,%d\n", "qman-portal", i, off, "fsl,liodn", liodns[0], liodns[1]); ret = fdt_setprop(fdt, off, "fsl,liodn", liodns, sizeof(liodns)); if (ret != 0) goto exit; @@ -319,10 +320,70 @@ static int load_file(const char* filename, uint8_t** buf, size_t* bufLen) return ret; } -int dts_parse(void* dts_addr) +static void* dts_fit_image_addr(void* fit, uint32_t off, const char* prop) +{ + void* val = fdt_getprop_address(fit, off, prop); + printf("\t%s: %p\n", prop, val); + return val; +} + +static const void* dts_fit_image_item(void* fit, uint32_t off, const char* prop) +{ + int len = 0; + const void* val = fdt_getprop(fit, off, prop, &len); + if (val != NULL && len > 0) { + if (len < 256) + printf("\t%s (len %d): %s\n", prop, len, (const char*)val); + else + printf("\t%s (len %d): not rendering\n", prop, len); + } + return val; +} + +void dts_parse_fit_image(void* fit, const char* image, const char* desc) +{ + int off; + + if (fit != NULL) { + printf("%s Image: %s\n", desc, image); + } + + off = fdt_find_node_offset(fit, -1, image); + if (off > 0) { + dts_fit_image_item(fit, off, "description"); + dts_fit_image_item(fit, off, "type"); + dts_fit_image_item(fit, off, "os"); + dts_fit_image_item(fit, off, "arch"); + dts_fit_image_item(fit, off, "compression"); + dts_fit_image_addr(fit, off, "load"); + dts_fit_image_addr(fit, off, "entry"); + dts_fit_image_item(fit, off, "padding"); + dts_fit_image_item(fit, off, "data"); + } +} + +int dts_parse_fit(void* image) +{ + const char *conf = NULL, *kernel = NULL, *flat_dt = NULL; + + conf = fit_find_images(image, &kernel, &flat_dt); + if (conf != NULL) { + printf("FIT: Found '%s' configuration\n", conf); + dts_fit_image_item(image, fdt_find_node_offset(image, -1, conf), + "description"); + } + + /* dump image information */ + dts_parse_fit_image(image, kernel, "Kernel"); + dts_parse_fit_image(image, flat_dt, "FDT"); + + return 0; +} + +int dts_parse(void* image) { int ret = 0; - struct fdt_header *fdt = (struct fdt_header *)dts_addr; + struct fdt_header *fdt = (struct fdt_header *)image; const struct fdt_property* prop; int nlen, plen, slen; int noff, poff, soff; @@ -331,17 +392,6 @@ int dts_parse(void* dts_addr) #define MAX_DEPTH 24 char tabs[MAX_DEPTH+1] = "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"; - /* check header */ - ret = fdt_check_header(fdt); - if (ret != 0) { - printf("FDT check failed %d!\n", ret); - return ret; - } - - /* display information */ - printf("FDT Version %d, Size %d\n", - fdt_version(fdt), fdt_totalsize(fdt)); - /* walk tree */ for (noff = fdt_next_node(fdt, -1, &depth); noff >= 0; @@ -369,8 +419,16 @@ int dts_parse(void* dts_addr) &tabs[MAX_DEPTH-depth], pstr, poff, plen); if (plen > 32) printf("\n%s", &tabs[MAX_DEPTH-depth-1]); - print_bin((const uint8_t*)prop->data, plen); - printf("\n"); + if (plen > 256) { + char file[260+1]; + snprintf(file, sizeof(file), "%s.%s.bin", nstr, pstr); + printf("Saving to file %s\n", file); + write_bin(file, (const uint8_t*)prop->data, plen); + } + else { + print_bin((const uint8_t*)prop->data, plen); + printf("\n"); + } } } } @@ -378,6 +436,14 @@ int dts_parse(void* dts_addr) return ret; } +static void Usage(void) +{ + printf("Expected usage:\n"); + printf("./tools/fdt-parser/fdt-parser [-t] [-i] filename\n"); + printf("\t* -i: Parse Flattened uImage Tree (FIT) image\n"); + printf("\t* -t: Test several updates (used with nxp_t1024.dtb)\n"); +} + int main(int argc, char *argv[]) { int ret = 0; @@ -385,13 +451,26 @@ int main(int argc, char *argv[]) size_t imageSz = 0; const char* filename = NULL; - if (argc >= 2) { - filename = argv[1]; + if (argc == 1 || (argc >= 2 && + (strcmp(argv[1], "-?") == 0 || + strcmp(argv[1], "-h") == 0 || + strcmp(argv[1], "--help") == 0))) { + Usage(); + return 0; } - while (argc > 2) { + while (argc > 1) { if (strcmp(argv[argc-1], "-t") == 0) { gEnableUnitTest = 1; } + else if (strcmp(argv[argc-1], "-i") == 0) { + gParseFit = 1; + } + else if (*argv[argc-1] != '-') { + filename = argv[argc-1]; + } + else { + printf("Warning: Unrecognized option: %s\n", argv[argc-1]); + } argc--; } @@ -402,6 +481,18 @@ int main(int argc, char *argv[]) } ret = load_file(filename, &image, &imageSz); + if (ret == 0) { + /* check header */ + ret = fdt_check_header(image); + if (ret != 0) { + printf("FDT check failed %d!\n", ret); + return ret; + } + + /* display information */ + printf("FDT Version %d, Size %d\n", + fdt_version(image), fdt_totalsize(image)); + } if (ret == 0 && gEnableUnitTest) { ret = fdt_test(image); if (ret == 0) { @@ -414,7 +505,12 @@ int main(int argc, char *argv[]) } } if (ret == 0) { - ret = dts_parse(image); + if (gParseFit) { + ret = dts_parse_fit(image); + } + else { + ret = dts_parse(image); + } } free(image); From 5f68cb3c79ceaa68fdb39d7a8a700d04ddeeb65f Mon Sep 17 00:00:00 2001 From: David Garske Date: Tue, 10 Dec 2024 17:15:34 -0800 Subject: [PATCH 2/4] Added QSPI DMA support. --- hal/zynq.c | 207 +++++++++++++++++++++++++++------------ hal/zynq.h | 48 ++++++--- include/image.h | 16 +++ src/boot_aarch64.c | 2 +- src/boot_aarch64_start.S | 28 ++++++ src/boot_ppc.c | 4 +- test-app/app_stm32h5.c | 3 +- 7 files changed, 225 insertions(+), 83 deletions(-) diff --git a/hal/zynq.c b/hal/zynq.c index 9df15e893..b23610f9d 100644 --- a/hal/zynq.c +++ b/hal/zynq.c @@ -80,6 +80,9 @@ static int qspi_wait_we(QspiDev_t* dev); static int test_ext_flash(QspiDev_t* dev); #endif +/* asm function */ +extern void flush_dcache_range(unsigned long start, unsigned long stop); + #ifdef DEBUG_UART void uart_init(void) { @@ -293,9 +296,23 @@ static inline int qspi_isr_wait(uint32_t wait_mask, uint32_t wait_val) } return 0; } +#ifdef GQSPI_DMA +static inline int qspi_dmaisr_wait(uint32_t wait_mask, uint32_t wait_val) +{ + uint32_t timeout = 0; + while ((GQSPIDMA_ISR & wait_mask) == wait_val && + ++timeout < GQSPI_TIMEOUT_TRIES); + if (timeout == GQSPI_TIMEOUT_TRIES) { + return -1; + } + return 0; +} +#endif static int qspi_gen_fifo_write(uint32_t reg_genfifo) { + uint32_t reg_cfg; + /* wait until the gen FIFO is not full to write */ if (qspi_isr_wait(GQSPI_IXR_GEN_FIFO_NOT_FULL, 0)) { return GQSPI_CODE_TIMEOUT; @@ -317,6 +334,17 @@ static int gspi_fifo_tx(const uint8_t* data, uint32_t sz) return GQSPI_CODE_TIMEOUT; } + #if defined(DEBUG_ZYNQ) && DEBUG_ZYNQ >= 3 + uint32_t txSz = sz; + if (txSz > GQSPI_FIFO_WORD_SZ) + txSz = GQSPI_FIFO_WORD_SZ; + memcpy(&tmp32, data, txSz); + GQSPI_TXD = tmp32; + wolfBoot_printf("TXD=%08x\n", tmp32); + + sz -= txSz; + data += txSz; + #else /* Write data */ if (sz >= 4) { GQSPI_TXD = *(uint32_t*)data; @@ -329,23 +357,32 @@ static int gspi_fifo_tx(const uint8_t* data, uint32_t sz) GQSPI_TXD = tmp32; sz = 0; } + #endif } return GQSPI_CODE_SUCCESS; } -static int gspi_fifo_rx(uint8_t* data, uint32_t sz, uint32_t discardSz) +#ifndef GQSPI_DMA +static int gspi_fifo_rx(uint8_t* data, uint32_t sz) { uint32_t tmp32; + while (sz > 0) { /* Wait for RX FIFO not empty */ if (qspi_isr_wait(GQSPI_IXR_RX_FIFO_NOT_EMPTY, 0)) { return GQSPI_CODE_TIMEOUT; } - if (discardSz >= GQSPI_FIFO_WORD_SZ) { - tmp32 = GQSPI_RXD; /* discard */ - discardSz -= GQSPI_FIFO_WORD_SZ; - continue; - } + + #if defined(DEBUG_ZYNQ) && DEBUG_ZYNQ >= 3 + uint32_t rxSz = sz; + if (rxSz > GQSPI_FIFO_WORD_SZ) + rxSz = GQSPI_FIFO_WORD_SZ; + tmp32 = GQSPI_RXD; + memcpy(data, &tmp32, rxSz); + wolfBoot_printf("RXD=%08x\n", tmp32); + sz -= rxSz; + data += rxSz; + #else if (sz >= 4) { *(uint32_t*)data = GQSPI_RXD; data += 4; @@ -356,9 +393,11 @@ static int gspi_fifo_rx(uint8_t* data, uint32_t sz, uint32_t discardSz) memcpy(data, &tmp32, sz); sz = 0; } + #endif } return GQSPI_CODE_SUCCESS; } +#endif static int qspi_cs(QspiDev_t* pDev, int csAssert) { @@ -374,6 +413,33 @@ static int qspi_cs(QspiDev_t* pDev, int csAssert) return qspi_gen_fifo_write(reg_genfifo); } +static uint32_t qspi_calc_exp(uint32_t xferSz, uint32_t* reg_genfifo) +{ + uint32_t expval = 8; + *reg_genfifo &= ~(GQSPI_GEN_FIFO_IMM_MASK | GQSPI_GEN_FIFO_EXP_MASK); + if (xferSz > GQSPI_GEN_FIFO_IMM_MASK) { + /* Use exponent mode */ + while (1) { + if (xferSz & (1 << expval)) { + *reg_genfifo |= GQSPI_GEN_FIFO_EXP_MASK; + *reg_genfifo |= GQSPI_GEN_FIFO_IMM(expval); /* IMM is exponent */ + xferSz = (1 << expval); + break; + } + expval++; + } + } + else { + /* Use length mode */ + *reg_genfifo |= GQSPI_GEN_FIFO_IMM(xferSz); /* IMM is length */ + } + return xferSz; +} + +#ifdef GQSPI_DMA +static uint8_t XALIGNED(QQSPI_DMA_ALIGN) dmatmp[GQSPI_DMA_TMPSZ]; +#endif + static int qspi_transfer(QspiDev_t* pDev, const uint8_t* cmdData, uint32_t cmdSz, const uint8_t* txData, uint32_t txSz, @@ -382,7 +448,9 @@ static int qspi_transfer(QspiDev_t* pDev, { int ret = GQSPI_CODE_SUCCESS; uint32_t reg_genfifo, xferSz; - +#ifdef GQSPI_DMA + uint8_t* dmarxptr = NULL; +#endif GQSPI_EN = 1; /* Enable device */ qspi_cs(pDev, 1); /* Select slave */ @@ -395,14 +463,14 @@ static int qspi_transfer(QspiDev_t* pDev, xferSz = cmdSz; while (ret == GQSPI_CODE_SUCCESS && cmdData && xferSz > 0) { /* Enable TX and send command inline */ - reg_genfifo |= GQSPI_GEN_FIFO_TX; reg_genfifo &= ~(GQSPI_GEN_FIFO_RX | GQSPI_GEN_FIFO_IMM_MASK); + reg_genfifo |= GQSPI_GEN_FIFO_TX; reg_genfifo |= GQSPI_GEN_FIFO_IMM(*cmdData); /* IMM is data */ /* Submit general FIFO operation */ ret = qspi_gen_fifo_write(reg_genfifo); if (ret != GQSPI_CODE_SUCCESS) { - wolfBoot_printf("on line %d: error %d\n", __LINE__, ret); + wolfBoot_printf("zynq.c:%d (error %d)\n", __LINE__, ret); break; } @@ -411,39 +479,28 @@ static int qspi_transfer(QspiDev_t* pDev, cmdData++; } - /* Set desired data mode and stripe */ + /* Set desired data mode */ reg_genfifo |= (mode & GQSPI_GEN_FIFO_MODE_MASK); - reg_genfifo |= (pDev->stripe & GQSPI_GEN_FIFO_STRIPE); /* TX Data */ while (ret == GQSPI_CODE_SUCCESS && txData && txSz > 0) { - xferSz = txSz; - /* Enable TX */ reg_genfifo &= ~(GQSPI_GEN_FIFO_RX | GQSPI_GEN_FIFO_IMM_MASK | GQSPI_GEN_FIFO_EXP_MASK); reg_genfifo |= (GQSPI_GEN_FIFO_TX | GQSPI_GEN_FIFO_DATA_XFER); - - if (xferSz > GQSPI_GEN_FIFO_IMM_MASK) { - /* Use exponent mode */ - xferSz = 256; /* 2 ^ 8 = 256 */ - reg_genfifo |= GQSPI_GEN_FIFO_EXP_MASK; - reg_genfifo |= GQSPI_GEN_FIFO_IMM(8); /* IMM is exponent */ - } - else { - reg_genfifo |= GQSPI_GEN_FIFO_IMM(xferSz); /* IMM is length */ - } + reg_genfifo |= (pDev->stripe & GQSPI_GEN_FIFO_STRIPE); + xferSz = qspi_calc_exp(txSz, ®_genfifo); /* Submit general FIFO operation */ ret = qspi_gen_fifo_write(reg_genfifo); if (ret != GQSPI_CODE_SUCCESS) { - wolfBoot_printf("on line %d: error %d\n", __LINE__, ret); + wolfBoot_printf("zynq.c:%d (error %d)\n", __LINE__, ret); } /* Fill FIFO */ ret = gspi_fifo_tx(txData, xferSz); if (ret != GQSPI_CODE_SUCCESS) { - wolfBoot_printf("on line %d: error %d\n", __LINE__, ret); + wolfBoot_printf("zynq.c:%d (error %d)\n", __LINE__, ret); break; } @@ -454,60 +511,77 @@ static int qspi_transfer(QspiDev_t* pDev, /* Dummy operations */ if (ret == GQSPI_CODE_SUCCESS && dummySz) { - /* Send dummy clocks (Disable TX & RX) */ + /* Send dummy clocks (Disable TX & RX), do not set stripe */ reg_genfifo &= ~(GQSPI_GEN_FIFO_TX | GQSPI_GEN_FIFO_RX | - GQSPI_GEN_FIFO_IMM_MASK | GQSPI_GEN_FIFO_EXP_MASK); + GQSPI_GEN_FIFO_IMM_MASK | GQSPI_GEN_FIFO_EXP_MASK | + GQSPI_GEN_FIFO_STRIPE); + reg_genfifo |= GQSPI_GEN_FIFO_DATA_XFER; /* IMM is number of dummy clock cycles */ reg_genfifo |= GQSPI_GEN_FIFO_IMM(dummySz); ret = qspi_gen_fifo_write(reg_genfifo); /* Submit FIFO Dummy Op */ - - if (rxSz > 0) { - /* Convert dummy bits to bytes */ - dummySz = (dummySz + 7) / 8; - /* Adjust rxSz for dummy bytes */ - rxSz += dummySz; - /* round up by FIFO Word Size */ - rxSz = (((rxSz + GQSPI_FIFO_WORD_SZ - 1) / GQSPI_FIFO_WORD_SZ) * - GQSPI_FIFO_WORD_SZ); - } } /* RX Data */ while (ret == GQSPI_CODE_SUCCESS && rxData && rxSz > 0) { - xferSz = rxSz; - /* Enable RX */ reg_genfifo &= ~(GQSPI_GEN_FIFO_TX | GQSPI_GEN_FIFO_IMM_MASK | GQSPI_GEN_FIFO_EXP_MASK); reg_genfifo |= (GQSPI_GEN_FIFO_RX | GQSPI_GEN_FIFO_DATA_XFER); + reg_genfifo |= (pDev->stripe & GQSPI_GEN_FIFO_STRIPE); - if (xferSz > GQSPI_GEN_FIFO_IMM_MASK) { - /* Use exponent mode */ - xferSz = 256; /* 2 ^ 8 = 256 */ - reg_genfifo |= GQSPI_GEN_FIFO_EXP_MASK; - reg_genfifo |= GQSPI_GEN_FIFO_IMM(8); /* IMM is exponent */ - } - else { - reg_genfifo |= GQSPI_GEN_FIFO_IMM(xferSz); /* IMM is length */ + xferSz = rxSz; + #ifdef GQSPI_DMA + /* if xferSz or rxData is not QQSPI_DMA_ALIGN aligned use tmp */ + dmarxptr = rxData; + if ((rxSz & (QQSPI_DMA_ALIGN-1)) || + (((size_t)rxData) & (QQSPI_DMA_ALIGN-1))) { + dmarxptr = (uint8_t*)dmatmp; + /* round up */ + xferSz = ((xferSz + (QQSPI_DMA_ALIGN-1)) & ~(QQSPI_DMA_ALIGN-1)); + if (xferSz > (uint32_t)sizeof(dmatmp)) { + xferSz = (uint32_t)sizeof(dmatmp); + } } + GQSPIDMA_DST = (unsigned long)dmarxptr; + GQSPIDMA_SIZE = xferSz; + GQSPIDMA_IER = GQSPIDMA_ISR_ALL_MASK; + flush_dcache_range((unsigned long)dmarxptr, + (unsigned long)dmarxptr + xferSz); + #endif + xferSz = qspi_calc_exp(xferSz, ®_genfifo); + /* Submit general FIFO operation */ ret = qspi_gen_fifo_write(reg_genfifo); if (ret != GQSPI_CODE_SUCCESS) { - wolfBoot_printf("on line %d: error %d\n", __LINE__, ret); + wolfBoot_printf("zynq.c:%d (error %d)\n", __LINE__, ret); break; } + #ifndef GQSPI_DMA /* Read FIFO */ - ret = gspi_fifo_rx(rxData, xferSz-dummySz, dummySz); + ret = gspi_fifo_rx(rxData, xferSz); if (ret != GQSPI_CODE_SUCCESS) { - wolfBoot_printf("on line %d: error %d\n", __LINE__, ret); + wolfBoot_printf("zynq.c:%d (error %d)\n", __LINE__, ret); } + #else + /* Wait for DMA done */ + if (qspi_dmaisr_wait(GQSPIDMA_ISR_DONE, 0)) { + return GQSPI_CODE_TIMEOUT; + } + GQSPIDMA_ISR = GQSPIDMA_ISR_DONE; + /* adjust xfer sz */ + if (xferSz > rxSz) + xferSz = rxSz; + /* copy result if not aligned */ + if (dmarxptr != rxData) { + memcpy(rxData, dmarxptr, xferSz); + } + #endif /* offset size and buffer */ rxSz -= xferSz; - rxData += (xferSz - dummySz); - dummySz = 0; /* only first RX */ + rxData += xferSz; } qspi_cs(pDev, 0); /* Deselect Slave */ @@ -524,7 +598,7 @@ static int qspi_flash_read_id(QspiDev_t* dev, uint8_t* id, uint32_t idSz) uint8_t status = 0; memset(cmd, 0, sizeof(cmd)); - cmd[0] = MULTI_IO_READ_ID_CMD; + cmd[0] = READ_ID_CMD; ret = qspi_transfer(&mDev, cmd, 1, NULL, 0, cmd, sizeof(cmd), 0, GQSPI_GEN_FIFO_MODE_SPI); @@ -775,11 +849,11 @@ void qspi_init(uint32_t cpu_clock, uint32_t flash_freq) /* Clear and disable interrupts */ reg_isr = GQSPI_ISR; GQSPI_ISR |= GQSPI_ISR_WR_TO_CLR_MASK; /* Clear poll timeout counter interrupt */ - reg_cfg = QSPIDMA_DST_I_STS; - QSPIDMA_DST_I_STS = reg_cfg; /* clear all active interrupts */ - QSPIDMA_DST_STS |= QSPIDMA_DST_STS_WTC; /* mark outstanding DMA's done */ + reg_cfg = GQSPIDMA_ISR; + GQSPIDMA_ISR = reg_cfg; /* clear all active interrupts */ + GQSPIDMA_STS |= GQSPIDMA_STS_WTC; /* mark outstanding DMA's done */ GQSPI_IDR = GQSPI_IXR_ALL_MASK; /* disable interrupts */ - QSPIDMA_DST_I_STS = QSPIDMA_DST_I_STS_ALL_MASK; /* disable interrupts */ + GQSPIDMA_ISR = GQSPIDMA_ISR_ALL_MASK; /* disable interrupts */ /* Reset FIFOs */ if (GQSPI_ISR & GQSPI_IXR_RX_FIFO_EMPTY) { GQSPI_FIFO_CTRL |= (GQSPI_FIFO_CTRL_RST_TX_FIFO | GQSPI_FIFO_CTRL_RST_RX_FIFO); @@ -791,10 +865,14 @@ void qspi_init(uint32_t cpu_clock, uint32_t flash_freq) GQSPI_EN = 0; /* Disable device */ /* Initialize clock divisor, write protect hold and start mode */ +#ifdef GQSPI_DMA + reg_cfg = GQSPI_CFG_MODE_EN_DMA; /* Use DMA Transfer Mode */ +#else reg_cfg = GQSPI_CFG_MODE_EN_IO; /* Use I/O Transfer Mode */ + reg_cfg |= GQSPI_CFG_START_GEN_FIFO; /* Auto start GFIFO cmd execution */ +#endif reg_cfg |= GQSPI_CFG_BAUD_RATE_DIV(GQSPI_CLK_DIV); /* Clock Divider */ reg_cfg |= GQSPI_CFG_WP_HOLD; /* Use WP Hold */ - reg_cfg |= GQSPI_CFG_START_GEN_FIFO; /* Start GFIFO command execution */ reg_cfg &= ~(GQSPI_CFG_CLK_POL | GQSPI_CFG_CLK_PH); /* Use POL=0,PH=0 */ GQSPI_CFG = reg_cfg; @@ -803,30 +881,31 @@ void qspi_init(uint32_t cpu_clock, uint32_t flash_freq) * the clock and data tap delays bypassed. */ IOU_TAPDLY_BYPASS |= IOU_TAPDLY_BYPASS_LQSPI_RX; GQSPI_LPBK_DLY_ADJ = 0; - QSPI_DATA_DLY_ADJ = 0; + GQSPI_DATA_DLY_ADJ = 0; #elif GQSPI_CLK_DIV >= 1 /* 300/4=75MHz */ /* At 100 MHz, the Quad-SPI controller should be in clock loopback mode * with the clock tap delay bypassed, but the data tap delay enabled. */ IOU_TAPDLY_BYPASS |= IOU_TAPDLY_BYPASS_LQSPI_RX; GQSPI_LPBK_DLY_ADJ = GQSPI_LPBK_DLY_ADJ_USE_LPBK; - QSPI_DATA_DLY_ADJ = QSPI_DATA_DLY_ADJ_USE_DATA_DLY | QSPI_DATA_DLY_ADJ_DATA_DLY_ADJ(2); + GQSPI_DATA_DLY_ADJ = (GQSPI_DATA_DLY_ADJ_USE_DATA_DLY | + GQSPI_DATA_DLY_ADJ_DATA_DLY_ADJ(2)); #else /* At 150 MHz, only the generic controller can be used. * The generic controller should be in clock loopback mode and the clock * tap delay enabled, but the data tap delay disabled. */ IOU_TAPDLY_BYPASS = 0; GQSPI_LPBK_DLY_ADJ = GQSPI_LPBK_DLY_ADJ_USE_LPBK; - QSPI_DATA_DLY_ADJ = 0; + GQSPI_DATA_DLY_ADJ = 0; #endif /* Initialize hardware parameters for Threshold and Interrupts */ GQSPI_TX_THRESH = 1; GQSPI_RX_THRESH = 1; - GQSPI_GF_THRESH = 16; + GQSPI_GF_THRESH = 31; /* Reset DMA */ - QSPIDMA_DST_CTRL = QSPIDMA_DST_CTRL_DEF; - QSPIDMA_DST_CTRL2 = QSPIDMA_DST_CTRL2_DEF; + GQSPIDMA_CTRL = GQSPIDMA_CTRL_DEF; + GQSPIDMA_CTRL2 = GQSPIDMA_CTRL2_DEF; /* Interrupts unmask and enable */ GQSPI_IMR = GQSPI_IXR_ALL_MASK; diff --git a/hal/zynq.h b/hal/zynq.h index 22b728560..725fcbf6c 100644 --- a/hal/zynq.h +++ b/hal/zynq.h @@ -85,18 +85,25 @@ #define GQSPI_POLL_CFG (*((volatile uint32_t*)(QSPI_BASE + 0x154))) /* poll configuration register */ #define GQSPI_P_TIMEOUT (*((volatile uint32_t*)(QSPI_BASE + 0x158))) /* poll timeout register. */ #define GQSPI_XFER_STS (*((volatile uint32_t*)(QSPI_BASE + 0x15C))) /* transfer status register. */ -#define QSPI_DATA_DLY_ADJ (*((volatile uint32_t*)(QSPI_BASE + 0x1F8))) /* adjusting the internal receive data delay for read data capturing */ +#define GQSPI_DATA_DLY_ADJ (*((volatile uint32_t*)(QSPI_BASE + 0x1F8))) /* adjusting the internal receive data delay for read data capturing */ #define GQSPI_MOD_ID (*((volatile uint32_t*)(QSPI_BASE + 0x1FC))) -#define QSPIDMA_DST_STS (*((volatile uint32_t*)(QSPI_BASE + 0x808))) -#define QSPIDMA_DST_CTRL (*((volatile uint32_t*)(QSPI_BASE + 0x80C))) -#define QSPIDMA_DST_I_STS (*((volatile uint32_t*)(QSPI_BASE + 0x814))) -#define QSPIDMA_DST_CTRL2 (*((volatile uint32_t*)(QSPI_BASE + 0x824))) +/* DMA Registers */ +#define GQSPIDMA_DST (*((volatile uint32_t*)(QSPI_BASE + 0x800))) /* Destination memory address for DMA stream -> memory data transfer */ +#define GQSPIDMA_DST_MSB (*((volatile uint32_t*)(QSPI_BASE + 0x828))) /* Destination memory address (MSBs) for DMA stream -> memory data transfer */ +#define GQSPIDMA_SIZE (*((volatile uint32_t*)(QSPI_BASE + 0x804))) /* DMA transfer payload for DMA stream -> memory data transfer */ +#define GQSPIDMA_STS (*((volatile uint32_t*)(QSPI_BASE + 0x808))) /* General DST DMA status */ +#define GQSPIDMA_CTRL (*((volatile uint32_t*)(QSPI_BASE + 0x80C))) /* General DST DMA control */ +#define GQSPIDMA_ISR (*((volatile uint32_t*)(QSPI_BASE + 0x814))) /* DST DMA interrupt status register */ +#define GQSPIDMA_IER (*((volatile uint32_t*)(QSPI_BASE + 0x818))) /* DST DMA interrupt enable */ +#define GQSPIDMA_IDR (*((volatile uint32_t*)(QSPI_BASE + 0x81C))) /* DST DMA interrupt disable */ +#define GQSPIDMA_IMR (*((volatile uint32_t*)(QSPI_BASE + 0x820))) /* DST DMA interrupt mask */ +#define GQSPIDMA_CTRL2 (*((volatile uint32_t*)(QSPI_BASE + 0x824))) /* General DST DMA control register 2 */ #define GQSPI_LPBK_DLY_ADJ_USE_LPBK (1UL << 5) #define GQSPI_LPBK_DLY_ADJ_DIV0(x) (((x) & 0x7) << 0) #define GQSPI_LPBK_DLY_ADJ_DLY1(x) (((x) & 0x3) << 3) -#define QSPI_DATA_DLY_ADJ_USE_DATA_DLY (1UL << 31) -#define QSPI_DATA_DLY_ADJ_DATA_DLY_ADJ(x) (((x) & 0x7) << 28) +#define GQSPI_DATA_DLY_ADJ_USE_DATA_DLY (1UL << 31) +#define GQSPI_DATA_DLY_ADJ_DATA_DLY_ADJ(x) (((x) & 0x7) << 28) /* GQSPI Registers */ /* GQSPI_CFG: Configuration registers */ @@ -166,15 +173,16 @@ #define GQSPI_FIFO_CTRL_RST_TX_FIFO (1UL << 1) #define GQSPI_FIFO_CTRL_RST_RX_FIFO (1UL << 2) -/* QSPIDMA_DST_CTRL */ -#define QSPIDMA_DST_CTRL_DEF 0x403FFA00UL -#define QSPIDMA_DST_CTRL2_DEF 0x081BFFF8UL +/* GQSPIDMA_CTRL */ +#define GQSPIDMA_CTRL_DEF 0x403FFA00UL +#define GQSPIDMA_CTRL2_DEF 0x081BFFF8UL -/* QSPIDMA_DST_STS */ -#define QSPIDMA_DST_STS_WTC 0xE000U +/* GQSPIDMA_STS */ +#define GQSPIDMA_STS_WTC 0xE000U -/* QSPIDMA_DST_I_STS */ -#define QSPIDMA_DST_I_STS_ALL_MASK 0xFEU +/* GQSPIDMA_ISR */ +#define GQSPIDMA_ISR_DONE 0x02 +#define GQSPIDMA_ISR_ALL_MASK 0xFEU /* QSPI Configuration (bare-metal only) */ #ifndef GQSPI_CLK_DIV @@ -182,6 +190,16 @@ #endif #define GQSPI_CS_ASSERT_CLOCKS 5 /* CS Setup Time (tCSS) - num of clock cycles foes in IMM */ #define GQSPI_FIFO_WORD_SZ 4 +#define QQSPI_DMA_ALIGN 64 /* L1 cache size */ +#ifndef GQSPI_DMA_TMPSZ + /* Use larger of WOLFBOOT_SHA_BLOCK_SIZE or IMAGE_HEADER_SIZE */ + #if defined(WOLFBOOT_SHA_BLOCK_SIZE) && \ + WOLFBOOT_SHA_BLOCK_SIZE > IMAGE_HEADER_SIZE + #define GQSPI_DMA_TMPSZ WOLFBOOT_SHA_BLOCK_SIZE + #else + #define GQSPI_DMA_TMPSZ IMAGE_HEADER_SIZE + #endif +#endif #define GQSPI_TIMEOUT_TRIES 100000 #define QSPI_FLASH_READY_TRIES 1000 @@ -196,7 +214,7 @@ #define GQPI_USE_4BYTE_ADDR 1 #endif #ifndef GQSPI_DUMMY_READ -#define GQSPI_DUMMY_READ (8*8) /* Number of dummy clock cycles for reads */ +#define GQSPI_DUMMY_READ (8) /* Number of dummy clock cycles for reads */ #endif diff --git a/include/image.h b/include/image.h index c084aa7fa..8b00a3ac0 100644 --- a/include/image.h +++ b/include/image.h @@ -74,6 +74,22 @@ int wolfBot_get_dts_size(void *dts_addr); # endif #endif +/* Helpers for memory alignment */ +#ifndef XALIGNED + #if defined(__GNUC__) || defined(__llvm__) || \ + defined(__IAR_SYSTEMS_ICC__) + #define XALIGNED(x) __attribute__ ( (aligned (x))) + #elif defined(__KEIL__) + #define XALIGNED(x) __align(x) + #elif defined(_MSC_VER) + /* disable align warning, we want alignment ! */ + #pragma warning(disable: 4324) + #define XALIGNED(x) __declspec (align (x)) + #else + #define XALIGNED(x) /* null expansion */ + #endif +#endif + #ifndef WOLFBOOT_FLAGS_INVERT #define SECT_FLAG_NEW 0x0F diff --git a/src/boot_aarch64.c b/src/boot_aarch64.c index e323dbfce..dec78f6d9 100644 --- a/src/boot_aarch64.c +++ b/src/boot_aarch64.c @@ -69,7 +69,7 @@ void boot_entry_C(void) #ifdef MMU -int __attribute((weak)) hal_dts_fixup(void* dts_addr) +int WEAKFUNCTION hal_dts_fixup(void* dts_addr) { (void)dts_addr; return 0; diff --git a/src/boot_aarch64_start.S b/src/boot_aarch64_start.S index f761331b9..4d4b57847 100644 --- a/src/boot_aarch64_start.S +++ b/src/boot_aarch64_start.S @@ -461,6 +461,34 @@ invalidatecaches_next_level: invalidatecaches_end: ret + +/* + * void flush_dcache_range(start, end) + * + * clean & invalidate data cache in the range + * + * x0: start address + * x1: end address + */ +.global flush_dcache_range +flush_dcache_range: + mrs x3, ctr_el0 + lsr x3, x3, #16 + and x3, x3, #0xf + mov x2, #4 + lsl x2, x2, x3 /* cache line size */ + + /* x2 <- minimal cache line size in cache system */ + sub x3, x2, #1 + bic x0, x0, x3 +1: dc civac, x0 /* clean & invalidate data or unified cache */ + add x0, x0, x2 + cmp x0, x1 + b.lo 1b + dsb sy + ret + + /* * Below is the static translation page table required by MMU for Cortex-A53. * The translation table is flat mapped (input address = output address) with diff --git a/src/boot_ppc.c b/src/boot_ppc.c index b59d90893..912ae630a 100644 --- a/src/boot_ppc.c +++ b/src/boot_ppc.c @@ -101,12 +101,12 @@ void set_law(uint8_t idx, uint32_t addr_h, uint32_t addr_l, uint32_t trgt_id, (void)get32(LAWAR(idx)); } -void __attribute((weak)) hal_early_init(void) +void WEAKFUNCTION hal_early_init(void) { } #ifdef MMU -int __attribute((weak)) hal_dts_fixup(void* dts_addr) +int WEAKFUNCTION hal_dts_fixup(void* dts_addr) { (void)dts_addr; return 0; diff --git a/test-app/app_stm32h5.c b/test-app/app_stm32h5.c index b0250c771..08bd8b12f 100644 --- a/test-app/app_stm32h5.c +++ b/test-app/app_stm32h5.c @@ -33,6 +33,7 @@ #include "wolfboot/wolfboot.h" #include "keystore.h" #include "target.h" +#include "image.h" #ifdef SECURE_PKCS11 #include "wcs/user_settings.h" @@ -797,7 +798,7 @@ void _exit (int status) while (1) {} /* Make sure we hang here */ } -__attribute__((weak)) int _read(int file, char *ptr, int len) +int WEAKFUNCTION _read(int file, char *ptr, int len) { (void)file; int DataIdx; From 14183c9478bb7ccce30fb1867d52940668818394 Mon Sep 17 00:00:00 2001 From: David Garske Date: Thu, 19 Dec 2024 12:02:55 -0800 Subject: [PATCH 3/4] Fixes for QSPI DMA mode. For example reduces QSPI->DDR load of 154MB from 18,228ms to 2,607ms. Changed QSPI to use DMA by default (can force IO mode using `GQSPI_MODE_IO`). --- config/examples/zynqmp.config | 3 ++ hal/zynq.c | 90 +++++++++++++++++------------------ hal/zynq.h | 6 ++- 3 files changed, 50 insertions(+), 49 deletions(-) diff --git a/config/examples/zynqmp.config b/config/examples/zynqmp.config index c8249b092..2cabceb5a 100644 --- a/config/examples/zynqmp.config +++ b/config/examples/zynqmp.config @@ -82,3 +82,6 @@ CFLAGS_EXTRA+=-DWOLFBOOT_SHA_BLOCK_SIZE=4096 # QSPI Clock at 0=150MHz, 1=75MHz, 2=37.5MHz (default) #CFLAGS_EXTRA+=-DGQSPI_CLK_DIV=0 + +# QSPI force IO mode (default is faster DMA mode) +#CFLAGS_EXTRA+=-DGQSPI_MODE_IO diff --git a/hal/zynq.c b/hal/zynq.c index b23610f9d..4d0849364 100644 --- a/hal/zynq.c +++ b/hal/zynq.c @@ -296,13 +296,13 @@ static inline int qspi_isr_wait(uint32_t wait_mask, uint32_t wait_val) } return 0; } -#ifdef GQSPI_DMA +#ifndef GQSPI_MODE_IO static inline int qspi_dmaisr_wait(uint32_t wait_mask, uint32_t wait_val) { uint32_t timeout = 0; while ((GQSPIDMA_ISR & wait_mask) == wait_val && - ++timeout < GQSPI_TIMEOUT_TRIES); - if (timeout == GQSPI_TIMEOUT_TRIES) { + ++timeout < GQSPIDMA_TIMEOUT_TRIES); + if (timeout == GQSPIDMA_TIMEOUT_TRIES) { return -1; } return 0; @@ -362,7 +362,7 @@ static int gspi_fifo_tx(const uint8_t* data, uint32_t sz) return GQSPI_CODE_SUCCESS; } -#ifndef GQSPI_DMA +#ifdef GQSPI_MODE_IO static int gspi_fifo_rx(uint8_t* data, uint32_t sz) { uint32_t tmp32; @@ -415,28 +415,28 @@ static int qspi_cs(QspiDev_t* pDev, int csAssert) static uint32_t qspi_calc_exp(uint32_t xferSz, uint32_t* reg_genfifo) { - uint32_t expval = 8; + uint32_t expval; *reg_genfifo &= ~(GQSPI_GEN_FIFO_IMM_MASK | GQSPI_GEN_FIFO_EXP_MASK); if (xferSz > GQSPI_GEN_FIFO_IMM_MASK) { - /* Use exponent mode */ - while (1) { + /* Use exponent mode (DMA max is 2^28) */ + for (expval=28; expval>=8; expval--) { + /* find highest bit set */ if (xferSz & (1 << expval)) { *reg_genfifo |= GQSPI_GEN_FIFO_EXP_MASK; - *reg_genfifo |= GQSPI_GEN_FIFO_IMM(expval); /* IMM is exponent */ + *reg_genfifo |= GQSPI_GEN_FIFO_IMM(expval); /* IMM=exponent */ xferSz = (1 << expval); break; } - expval++; } } else { /* Use length mode */ - *reg_genfifo |= GQSPI_GEN_FIFO_IMM(xferSz); /* IMM is length */ + *reg_genfifo |= GQSPI_GEN_FIFO_IMM(xferSz); /* IMM=actual length */ } return xferSz; } -#ifdef GQSPI_DMA +#ifndef GQSPI_MODE_IO static uint8_t XALIGNED(QQSPI_DMA_ALIGN) dmatmp[GQSPI_DMA_TMPSZ]; #endif @@ -448,7 +448,7 @@ static int qspi_transfer(QspiDev_t* pDev, { int ret = GQSPI_CODE_SUCCESS; uint32_t reg_genfifo, xferSz; -#ifdef GQSPI_DMA +#ifndef GQSPI_MODE_IO uint8_t* dmarxptr = NULL; #endif GQSPI_EN = 1; /* Enable device */ @@ -529,27 +529,26 @@ static int qspi_transfer(QspiDev_t* pDev, reg_genfifo |= (GQSPI_GEN_FIFO_RX | GQSPI_GEN_FIFO_DATA_XFER); reg_genfifo |= (pDev->stripe & GQSPI_GEN_FIFO_STRIPE); - xferSz = rxSz; - #ifdef GQSPI_DMA - /* if xferSz or rxData is not QQSPI_DMA_ALIGN aligned use tmp */ + xferSz = qspi_calc_exp(rxSz, ®_genfifo); + #ifndef GQSPI_MODE_IO + /* check if pointer is aligned or odd remainder */ dmarxptr = rxData; - if ((rxSz & (QQSPI_DMA_ALIGN-1)) || - (((size_t)rxData) & (QQSPI_DMA_ALIGN-1))) { + if (((size_t)rxData & (QQSPI_DMA_ALIGN-1)) || (xferSz & 3)) { dmarxptr = (uint8_t*)dmatmp; - /* round up */ xferSz = ((xferSz + (QQSPI_DMA_ALIGN-1)) & ~(QQSPI_DMA_ALIGN-1)); if (xferSz > (uint32_t)sizeof(dmatmp)) { xferSz = (uint32_t)sizeof(dmatmp); } + /* re-adjust transfer */ + xferSz = qspi_calc_exp(xferSz, ®_genfifo); } GQSPIDMA_DST = (unsigned long)dmarxptr; GQSPIDMA_SIZE = xferSz; - GQSPIDMA_IER = GQSPIDMA_ISR_ALL_MASK; + GQSPIDMA_IER = GQSPIDMA_ISR_DONE; /* enable DMA done interrupt */ flush_dcache_range((unsigned long)dmarxptr, (unsigned long)dmarxptr + xferSz); #endif - xferSz = qspi_calc_exp(xferSz, ®_genfifo); /* Submit general FIFO operation */ ret = qspi_gen_fifo_write(reg_genfifo); @@ -558,7 +557,7 @@ static int qspi_transfer(QspiDev_t* pDev, break; } - #ifndef GQSPI_DMA + #ifdef GQSPI_MODE_IO /* Read FIFO */ ret = gspi_fifo_rx(rxData, xferSz); if (ret != GQSPI_CODE_SUCCESS) { @@ -569,7 +568,7 @@ static int qspi_transfer(QspiDev_t* pDev, if (qspi_dmaisr_wait(GQSPIDMA_ISR_DONE, 0)) { return GQSPI_CODE_TIMEOUT; } - GQSPIDMA_ISR = GQSPIDMA_ISR_DONE; + GQSPIDMA_ISR = GQSPIDMA_ISR_DONE; /* clear DMA interrupt */ /* adjust xfer sz */ if (xferSz > rxSz) xferSz = rxSz; @@ -577,6 +576,13 @@ static int qspi_transfer(QspiDev_t* pDev, if (dmarxptr != rxData) { memcpy(rxData, dmarxptr, xferSz); } + #if defined(DEBUG_ZYNQ) && DEBUG_ZYNQ >= 3 + if (xferSz <= 1024) { + for (uint32_t i=0; i= 2 /* 300/8=37.5MHz */ - /* At 40 MHz, the Quad-SPI controller should be in non-loopback mode with +#if GQSPI_CLK_DIV >= 1 /* 125/4=31.25MHz */ + /* At <40 MHz, the Quad-SPI controller should be in non-loopback mode with * the clock and data tap delays bypassed. */ IOU_TAPDLY_BYPASS |= IOU_TAPDLY_BYPASS_LQSPI_RX; GQSPI_LPBK_DLY_ADJ = 0; GQSPI_DATA_DLY_ADJ = 0; -#elif GQSPI_CLK_DIV >= 1 /* 300/4=75MHz */ - /* At 100 MHz, the Quad-SPI controller should be in clock loopback mode +#elif GQSPI_CLK_DIV >= 0 /* 125/2 = 62.5MHz */ + /* At <100 MHz, the Quad-SPI controller should be in clock loopback mode * with the clock tap delay bypassed, but the data tap delay enabled. */ IOU_TAPDLY_BYPASS |= IOU_TAPDLY_BYPASS_LQSPI_RX; GQSPI_LPBK_DLY_ADJ = GQSPI_LPBK_DLY_ADJ_USE_LPBK; GQSPI_DATA_DLY_ADJ = (GQSPI_DATA_DLY_ADJ_USE_DATA_DLY | GQSPI_DATA_DLY_ADJ_DATA_DLY_ADJ(2)); -#else - /* At 150 MHz, only the generic controller can be used. +#endif +#if 0 + /* At <150 MHz, only the generic controller can be used. * The generic controller should be in clock loopback mode and the clock * tap delay enabled, but the data tap delay disabled. */ + /* For EL2 or lower must use IOCTL_SET_TAPDELAY_BYPASS ARG1=2, ARG2=0 */ IOU_TAPDLY_BYPASS = 0; GQSPI_LPBK_DLY_ADJ = GQSPI_LPBK_DLY_ADJ_USE_LPBK; GQSPI_DATA_DLY_ADJ = 0; @@ -907,10 +907,6 @@ void qspi_init(uint32_t cpu_clock, uint32_t flash_freq) GQSPIDMA_CTRL = GQSPIDMA_CTRL_DEF; GQSPIDMA_CTRL2 = GQSPIDMA_CTRL2_DEF; - /* Interrupts unmask and enable */ - GQSPI_IMR = GQSPI_IXR_ALL_MASK; - GQSPI_IER = GQSPI_IXR_ALL_MASK; - GQSPI_EN = 1; /* Enable Device */ #endif /* USE_QNX */ (void)reg_cfg; diff --git a/hal/zynq.h b/hal/zynq.h index 725fcbf6c..0ba18cd77 100644 --- a/hal/zynq.h +++ b/hal/zynq.h @@ -174,11 +174,12 @@ #define GQSPI_FIFO_CTRL_RST_RX_FIFO (1UL << 2) /* GQSPIDMA_CTRL */ -#define GQSPIDMA_CTRL_DEF 0x403FFA00UL +#define GQSPIDMA_CTRL_DEF 0x803FFA00UL #define GQSPIDMA_CTRL2_DEF 0x081BFFF8UL /* GQSPIDMA_STS */ -#define GQSPIDMA_STS_WTC 0xE000U +#define GQSPIDMA_STS_WTC (7UL << 13) +#define GQSPIDMA_STS_BUSY (1UL << 0) /* GQSPIDMA_ISR */ #define GQSPIDMA_ISR_DONE 0x02 @@ -201,6 +202,7 @@ #endif #endif #define GQSPI_TIMEOUT_TRIES 100000 +#define GQSPIDMA_TIMEOUT_TRIES 100000000 #define QSPI_FLASH_READY_TRIES 1000 /* QSPI Configuration */ From 0bda48755d010132cf0be024454e859d8b5fb2b8 Mon Sep 17 00:00:00 2001 From: David Garske Date: Fri, 20 Dec 2024 15:04:37 -0800 Subject: [PATCH 4/4] Make `flush_dcache_range` available even with `USE_BUILTIN_STARTUP`. Remove unused variable. --- hal/zynq.c | 2 -- src/boot_aarch64_start.S | 53 ++++++++++++++++++++-------------------- 2 files changed, 27 insertions(+), 28 deletions(-) diff --git a/hal/zynq.c b/hal/zynq.c index 4d0849364..84e9368e6 100644 --- a/hal/zynq.c +++ b/hal/zynq.c @@ -311,8 +311,6 @@ static inline int qspi_dmaisr_wait(uint32_t wait_mask, uint32_t wait_val) static int qspi_gen_fifo_write(uint32_t reg_genfifo) { - uint32_t reg_cfg; - /* wait until the gen FIFO is not full to write */ if (qspi_isr_wait(GQSPI_IXR_GEN_FIFO_NOT_FULL, 0)) { return GQSPI_CODE_TIMEOUT; diff --git a/src/boot_aarch64_start.S b/src/boot_aarch64_start.S index 4d4b57847..79cc3f67d 100644 --- a/src/boot_aarch64_start.S +++ b/src/boot_aarch64_start.S @@ -462,32 +462,6 @@ invalidatecaches_end: ret -/* - * void flush_dcache_range(start, end) - * - * clean & invalidate data cache in the range - * - * x0: start address - * x1: end address - */ -.global flush_dcache_range -flush_dcache_range: - mrs x3, ctr_el0 - lsr x3, x3, #16 - and x3, x3, #0xf - mov x2, #4 - lsl x2, x2, x3 /* cache line size */ - - /* x2 <- minimal cache line size in cache system */ - sub x3, x2, #1 - bic x0, x0, x3 -1: dc civac, x0 /* clean & invalidate data or unified cache */ - add x0, x0, x2 - cmp x0, x1 - b.lo 1b - dsb sy - ret - /* * Below is the static translation page table required by MMU for Cortex-A53. @@ -1058,6 +1032,33 @@ FPUStatus: #endif /* !USE_BUILTIN_STARTUP */ +/* + * void flush_dcache_range(start, end) + * + * clean & invalidate data cache in the range + * + * x0: start address + * x1: end address + */ +.global flush_dcache_range +flush_dcache_range: + mrs x3, ctr_el0 + lsr x3, x3, #16 + and x3, x3, #0xf + mov x2, #4 + lsl x2, x2, x3 /* cache line size */ + + /* x2 <- minimal cache line size in cache system */ + sub x3, x2, #1 + bic x0, x0, x3 +1: dc civac, x0 /* clean & invalidate data or unified cache */ + add x0, x0, x2 + cmp x0, x1 + b.lo 1b + dsb sy + ret + + /* Initialize GIC 400 (GICv2) */ .global gicv2_init_secure gicv2_init_secure: