diff --git a/.github/workflows/test-configs.yml b/.github/workflows/test-configs.yml index b34ba299d..4bd44470b 100644 --- a/.github/workflows/test-configs.yml +++ b/.github/workflows/test-configs.yml @@ -70,28 +70,28 @@ jobs: with: arch: arm config-file: ./config/examples/imx-rt1040.config - make-args: PKA=1 + make-args: PKA=1 NO_ARM_ASM=1 imx_rt1050_test_pka: uses: ./.github/workflows/test-build-mcux-sdk.yml with: arch: arm config-file: ./config/examples/imx-rt1050.config - make-args: PKA=1 + make-args: PKA=1 NO_ARM_ASM=1 imx_rt1060_test_pka: uses: ./.github/workflows/test-build-mcux-sdk.yml with: arch: arm config-file: ./config/examples/imx-rt1060.config - make-args: PKA=1 + make-args: PKA=1 NO_ARM_ASM=1 imx_rt1064_test_pka: uses: ./.github/workflows/test-build-mcux-sdk.yml with: arch: arm config-file: ./config/examples/imx-rt1064.config - make-args: PKA=1 + make-args: PKA=1 NO_ARM_ASM=1 kinetis_k64f_test: uses: ./.github/workflows/test-build-mcux-sdk.yml diff --git a/Makefile b/Makefile index 4c9f618c6..1addb7017 100644 --- a/Makefile +++ b/Makefile @@ -318,6 +318,7 @@ clean: $(Q)rm -f $(MACHINE_OBJ) $(MAIN_TARGET) $(LSCRIPT) $(Q)rm -f $(OBJS) $(Q)rm -f tools/keytools/otp/otp-keystore-gen + $(Q)rm -f .stack_usage $(Q)$(MAKE) -C test-app -s clean $(Q)$(MAKE) -C tools/check_config -s clean $(Q)$(MAKE) -C stage1 -s clean @@ -385,6 +386,13 @@ line-count-nrf52: line-count-x86: cloc --force-lang-def cloc_lang_def.txt src/boot_x86_fsp.c src/boot_x86_fsp_payload.c src/boot_x86_fsp_start.S src/image.c src/keystore.c src/libwolfboot.c src/loader.c src/string.c src/update_disk.c src/x86/ahci.c src/x86/ata.c src/x86/common.c src/x86/gpt.c src/x86/hob.c src/pci.c src/x86/tgl_fsp.c hal/x86_fsp_tgl.c hal/x86_uart.c +stack-usage: wolfboot.bin + $(Q)echo $(STACK_USAGE) > .stack_usage + +image-header-size: wolfboot.bin + $(Q)echo $(IMAGE_HEADER_SIZE) > .image_header_size + + cppcheck: cppcheck -f --enable=warning --enable=portability \ --suppress="ctunullpointer" --suppress="nullPointer" \ diff --git a/arch.mk b/arch.mk index 92d45bfcd..979fb7b9a 100644 --- a/arch.mk +++ b/arch.mk @@ -188,7 +188,8 @@ ifeq ($(ARCH),ARM) ifeq ($(CORTEX_A5),1) FPU=-mfpu=vfp4-d16 CFLAGS+=-mcpu=cortex-a5 -mtune=cortex-a5 -static -z noexecstack - LDLAGS+=-mcpu=cortex-a5 -mtune=cortex-a5 -mtune=cortex-a5 -static -z noexecstack -Ttext 0x300000 + LDLAGS+=-mcpu=cortex-a5 -mtune=cortex-a5 -mtune=cortex-a5 -static \ + -z noexecstack -Ttext 0x300000 # Cortex-A uses boot_arm32.o OBJS+=src/boot_arm32.o src/boot_arm32_start.o ifeq ($(NO_ASM),1) @@ -198,11 +199,37 @@ ifeq ($(CORTEX_A5),1) OBJS+=./lib/wolfssl/wolfcrypt/src/port/arm/armv8-sha256.o OBJS+=./lib/wolfssl/wolfcrypt/src/port/arm/armv8-32-sha256-asm.o OBJS+=./lib/wolfssl/wolfcrypt/src/port/arm/armv8-32-sha256-asm_c.o - CFLAGS+=-DWOLFSSL_SP_ARM32_ASM -DWOLFSSL_ARMASM -DWOLFSSL_ARMASM_NO_HW_CRYPTO -DWOLFSSL_ARM_ARCH=7 -DWOLFSSL_ARMASM_INLINE -DWOLFSSL_ARMASM_NO_NEON + CFLAGS+=-DWOLFSSL_SP_ARM32_ASM -DWOLFSSL_ARMASM -DWOLFSSL_ARMASM_NO_HW_CRYPTO \ + -DWOLFSSL_ARM_ARCH=7 -DWOLFSSL_ARMASM_INLINE -DWOLFSSL_ARMASM_NO_NEON endif else # All others use boot_arm.o OBJS+=src/boot_arm.o + ifneq ($(NO_ARM_ASM),1) + CORTEXM_ARM_EXTRA_OBJS= \ + ./lib/wolfssl/wolfcrypt/src/port/arm/armv8-aes.o \ + ./lib/wolfssl/wolfcrypt/src/port/arm/armv8-chacha.o \ + ./lib/wolfssl/wolfcrypt/src/port/arm/armv8-sha256.o \ + ./lib/wolfssl/wolfcrypt/src/port/arm/armv8-sha512.o \ + ./lib/wolfssl/wolfcrypt/src/port/arm/armv8-32-aes-asm.o \ + ./lib/wolfssl/wolfcrypt/src/port/arm/armv8-32-aes-asm_c.o \ + ./lib/wolfssl/wolfcrypt/src/port/arm/armv8-32-sha256-asm.o \ + ./lib/wolfssl/wolfcrypt/src/port/arm/armv8-32-sha256-asm_c.o \ + ./lib/wolfssl/wolfcrypt/src/port/arm/armv8-32-sha512-asm.o \ + ./lib/wolfssl/wolfcrypt/src/port/arm/armv8-32-sha512-asm_c.o \ + ./lib/wolfssl/wolfcrypt/src/port/arm/armv8-32-sha3-asm.o \ + ./lib/wolfssl/wolfcrypt/src/port/arm/armv8-32-sha3-asm_c.o \ + ./lib/wolfssl/wolfcrypt/src/port/arm/armv8-32-chacha-asm.o \ + ./lib/wolfssl/wolfcrypt/src/port/arm/armv8-32-chacha-asm_c.o + + + CORTEXM_ARM_THUMB_EXTRA_OBJS= \ + ./lib/wolfssl/wolfcrypt/src/port/arm/thumb2-sha256-asm.o \ + ./lib/wolfssl/wolfcrypt/src/port/arm/thumb2-sha256-asm_c.o + + CORTEXM_ARM_EXTRA_CFLAGS+=-DWOLFSSL_ARMASM -DWOLFSSL_ARMASM_NO_HW_CRYPTO \ + -DWOLFSSL_ARMASM_INLINE -DWOLFSSL_ARMASM_NO_NEON + endif ifeq ($(CORTEX_M33),1) CFLAGS+=-mcpu=cortex-m33 -DCORTEX_M33 LDFLAGS+=-mcpu=cortex-m33 @@ -212,28 +239,25 @@ else endif CFLAGS+=-mcmse ifeq ($(WOLFCRYPT_TZ),1) + CORTEXM_ARM_EXTRA_OBJS= + CORTEXM_ARM_EXTRA_CFLAGS= SECURE_OBJS+=./src/wc_callable.o SECURE_OBJS+=./lib/wolfssl/wolfcrypt/src/random.o CFLAGS+=-DWOLFCRYPT_SECURE_MODE SECURE_LDFLAGS+=-Wl,--cmse-implib -Wl,--out-implib=./src/wc_secure_calls.o endif endif # TZEN=1 - ifeq ($(NO_ASM),1) ifeq ($(SPMATH),1) ifeq ($(NO_ASM),1) MATH_OBJS += ./lib/wolfssl/wolfcrypt/src/sp_c32.o else CFLAGS+=-DWOLFSSL_SP_ASM -DWOLFSSL_SP_ARM_CORTEX_M_ASM MATH_OBJS += ./lib/wolfssl/wolfcrypt/src/sp_cortexm.o + CFLAGS+=$(CORTEXM_ARM_EXTRA_CFLAGS) -DWOLFSSL_ARM_ARCH=8 + OBJS+=$(CORTEXM_ARM_EXTRA_OBJS) endif endif else - ifeq ($(SPMATH),1) - CFLAGS+=-DWOLFSSL_SP_ASM -DWOLFSSL_SP_ARM_CORTEX_M_ASM - MATH_OBJS += ./lib/wolfssl/wolfcrypt/src/sp_cortexm.o - endif - endif - else ifeq ($(CORTEX_M7),1) CFLAGS+=-mcpu=cortex-m7 LDFLAGS+=-mcpu=cortex-m7 @@ -243,10 +267,12 @@ else else CFLAGS+=-DWOLFSSL_SP_ASM -DWOLFSSL_SP_ARM_CORTEX_M_ASM MATH_OBJS += ./lib/wolfssl/wolfcrypt/src/sp_cortexm.o + CFLAGS+=$(CORTEXM_ARM_EXTRA_CFLAGS) -DWOLFSSL_ARM_ARCH=7 + OBJS+=$(CORTEXM_ARM_EXTRA_OBJS) endif - endif + endif else - ifeq ($(CORTEX_M0),1) + ifeq ($(CORTEX_M0),1) CFLAGS+=-mcpu=cortex-m0 LDFLAGS+=-mcpu=cortex-m0 ifeq ($(SPMATH),1) @@ -255,6 +281,9 @@ else else CFLAGS+=-DWOLFSSL_SP_ASM -DWOLFSSL_SP_ARM_THUMB_ASM MATH_OBJS += ./lib/wolfssl/wolfcrypt/src/sp_armthumb.o + # TODO: integrate thumb2-asm + #CFLAGS+=$(CORTEXM_ARM_EXTRA_CFLAGS) -DWOLFSSL_ARM_ARCH=6 + #OBJS+=$(CORTEXM_ARM_THUMB_EXTRA_OBJS) endif endif else @@ -269,6 +298,8 @@ else ifeq ($(SPMATH),1) CFLAGS+=-DWOLFSSL_SP_ASM -DWOLFSSL_SP_ARM_CORTEX_M_ASM -DWOLFSSL_SP_NO_UMAAL MATH_OBJS += ./lib/wolfssl/wolfcrypt/src/sp_cortexm.o + CFLAGS+=$(CORTEXM_ARM_EXTRA_CFLAGS) -DWOLFSSL_ARM_ARCH=7 + OBJS+=$(CORTEXM_ARM_EXTRA_OBJS) endif endif else @@ -284,6 +315,8 @@ else ifeq ($(SPMATH),1) CFLAGS+=-DWOLFSSL_SP_ASM -DWOLFSSL_SP_ARM_CORTEX_M_ASM MATH_OBJS += ./lib/wolfssl/wolfcrypt/src/sp_cortexm.o + CFLAGS+=$(CORTEXM_ARM_EXTRA_CFLAGS) -DWOLFSSL_ARM_ARCH=7 + OBJS+=$(CORTEXM_ARM_EXTRA_OBJS) endif endif endif diff --git a/docs/compile.md b/docs/compile.md index 76fdca285..bf45964c4 100644 --- a/docs/compile.md +++ b/docs/compile.md @@ -42,6 +42,28 @@ By default, wolfBoot is compiled for ARM Cortex-M3/4/7. To compile for Cortex-M0 `CORTEX_M0=1` +### Speed vs. size + +On a number of targets, algorithm may be optimized automatically to use assembly +optimizations. To disable assembly optimizations, use `NO_ASM=1`. This option will +produce smaller code, but will also impact on the boot time. + +ARM-specific ARM optimizations affecting hash and symmetric key ciphers can be +disabled with the option `NO_ARM_ASM=1`. This is useful for example when you want +to use SP math optimizations for key verification, but exclude SHA2/AES optimizations +to save some space. + +#### Example: ECC256 + SHA256 on STM32H7 + +Benchmark footprint vs. boot time SHA of 100KB image + signature verification + +| Description | Selected options | wolfBoot size (B) | Boot time (s) | +|-------------|------------------|-------------------|---------------| +| Full ECC256 assembly optimizations. Fastest. | `SIGN=ECC256` | 21836 | .583 | +| Optimize ECC only (SP math assembly only) | `SIGN=ECC256 NO_ARM_ASM=1` | 18624 | .760 | +| No assembly optimizations (smallest) | `SIGN=ECC256 NO_ASM=1` | 14416 | 3.356 | + + ### Flash partitions The file [include/target.h](../include/target.h) is generated according to the configured flash geometry, diff --git a/include/user_settings.h b/include/user_settings.h index 9cac3bb59..188c4f19c 100644 --- a/include/user_settings.h +++ b/include/user_settings.h @@ -75,8 +75,8 @@ extern int tolower(int c); # define ED25519_SMALL # define NO_ED25519_SIGN # define NO_ED25519_EXPORT -# define WOLFSSL_SHA512 # define USE_SLOW_SHA512 +# define WOLFSSL_SHA512 #endif /* ED448 and SHA3/SHAKE256 */ @@ -267,6 +267,9 @@ extern int tolower(int c); !defined(WOLFCRYPT_SECURE_MODE) # define NO_SHA256 # endif +#ifndef WOLFSSL_SHA512 +#define WOLFSSL_SHA512 +#endif #endif /* If SP math is enabled determine word size */ @@ -499,4 +502,8 @@ extern int tolower(int c); #endif /* WOLFBOOT_PKCS11_APP */ +#ifndef XTOLOWER +#define XTOLOWER(x) (x) +#endif + #endif /* !_WOLFBOOT_USER_SETTINGS_H_ */ diff --git a/options.mk b/options.mk index d3e8e72f8..e7c126cae 100644 --- a/options.mk +++ b/options.mk @@ -448,7 +448,7 @@ ifeq ($(SIGN),XMSS) ifeq ($(WOLFBOOT_SMALL_STACK),1) $(error WOLFBOOT_SMALL_STACK with XMSS not supported) else - STACK_USAGE=2720 + STACK_USAGE=9352 endif endif diff --git a/test-app/app_stm32h7.c b/test-app/app_stm32h7.c index 3c345bda0..c0a029a77 100644 --- a/test-app/app_stm32h7.c +++ b/test-app/app_stm32h7.c @@ -362,6 +362,9 @@ void uart_print(const char *s) } } +#define FILLER_SIZE (100 * 1024) +static volatile uint8_t filler_data[FILLER_SIZE] = { 0x01, 0x02, 0x03 }; + void main(void) { uint8_t firmware_version = 0; @@ -373,6 +376,7 @@ void main(void) if (FIRMWARE_A) ld3_write(LED_INIT); + filler_data[FILLER_SIZE - 1] = 0xAA; /* LED Indicator of successful UART initialization. SUCCESS = ON, FAIL = OFF */ if (uart_setup(115200) < 0) ld2_write(LED_OFF); diff --git a/tools/config.mk b/tools/config.mk index 1b91346f5..7f70024cd 100644 --- a/tools/config.mk +++ b/tools/config.mk @@ -23,6 +23,7 @@ ifeq ($(ARCH),) CORTEX_M7?=0 CORTEX_M3?=0 NO_ASM?=0 + NO_ARM_ASM?=0 EXT_FLASH?=0 SPI_FLASH?=0 QSPI_FLASH?=0 @@ -104,5 +105,6 @@ CONFIG_VARS:= ARCH TARGET SIGN HASH MCUXSDK MCUXPRESSO MCUXPRESSO_CPU MCUXPRESSO NXP_CUSTOM_DCD NXP_CUSTOM_DCD_OBJS \ FLASH_OTP_KEYSTORE \ KEYVAULT_OBJ_SIZE \ - KEYVAULT_MAX_ITEMS + KEYVAULT_MAX_ITEMS \ + NO_ARM_ASM diff --git a/tools/scripts/benchmark.sh b/tools/scripts/benchmark.sh new file mode 100755 index 000000000..8caa1efc0 --- /dev/null +++ b/tools/scripts/benchmark.sh @@ -0,0 +1,98 @@ +#!/bin/bash +# +function run_on_board() { + # GPIO2: RST + # GPIO3: BOOT (input) + + if ! (st-flash reset &>/dev/null); then + echo -n "No data." + else + sleep 1 + st-flash --connect-under-reset write factory.bin 0x8000000 &>/dev/null + sleep .2 + echo "2" > /sys/class/gpio/export 2>/dev/null + echo "out" > /sys/class/gpio/gpio2/direction + echo "1" > /sys/class/gpio/gpio2/value # Release reset + echo "0" > /sys/class/gpio/gpio2/value # Keep reset low + sleep 1 + echo -n " | " + echo "1" > /sys/class/gpio/gpio2/value # Release reset + START=`date +%s.%N` + while (test `cat /sys/class/gpio/gpio4/value` -eq 0); do + sleep .01 + done + while (test `cat /sys/class/gpio/gpio4/value` -eq 0); do + sleep .01 + done + END=`date +%s.%N` + echo "scale=3; $END/1 - $START/1 "| bc + echo "in" > /sys/class/gpio/gpio2/direction + echo "2" >/sys/class/gpio/unexport 2>/dev/null + fi +} + +function set_benchmark { + NAME=$1 + shift + CONFIG=$@ + # Name + echo -n "| " + echo -n $NAME + echo -n " | " + # Configuration + echo -n $CONFIG | tr -d '\n' + echo -n " | " + make clean &>/dev/null + make keysclean &>/dev/null + make $@ factory.bin &>/dev/null || make $@ factory.bin + make $@ stack-usage &>/dev/null + make $@ image-header-size &>/dev/null + # Bootloader size + echo -n `ls -l wolfboot.bin | cut -d " " -f 5 | tr -d '\n'` + echo -n " | " + # Stack size + cat .stack_usage | tr -d '\n' + echo -n " | " + # Image header size + cat .image_header_size | tr -d '\n' + # Boot time + run_on_board 2>&1 | tr -d '\n' + echo " |" +} + +echo "4" > /sys/class/gpio/export 2>/dev/null +echo "2" > /sys/class/gpio/unexport 2>/dev/null +make keytools &>/dev/null +cp config/examples/stm32h7.config .config +echo "in" > /sys/class/gpio/gpio4/direction +# Output benchmark results in a Markdown table +echo "| Name | Configuration | Bootloader size | Stack size | Image header size | Boot time |" +echo "|------|---------------|-----------------|------------|-------------------|-----------|" + + +set_benchmark "SHA2 only" SIGN=NONE +set_benchmark "SHA384 only" SIGN=NONE HASH=SHA384 +set_benchmark "SHA3 only" SIGN=NONE HASH=SHA3 +set_benchmark "SHA2 only,small" SIGN=NONE NO_ASM=1 +set_benchmark "rsa2048" SIGN=RSA2048 +set_benchmark "rsa3072" SIGN=RSA3072 +set_benchmark "rsa4096" SIGN=RSA4096 +set_benchmark "rsa4096 with sha384" SIGN=RSA4096 HASH=SHA384 +set_benchmark "ecdsa256" SIGN=ECC256 +set_benchmark "ecdsa384" SIGN=ECC384 +set_benchmark "ecdsa521" SIGN=ECC521 +set_benchmark "ecdsa256 with small stack" SIGN=ECC384 WOLFBOOT_SMALL_STACK=1 +set_benchmark "ecdsa256 with fast math" SIGN=ECC384 SP_MATH=0 +set_benchmark "ecdsa256, no asm" SIGN=ECC256 NO_ASM=1 +set_benchmark "ecdsa384, no asm" SIGN=ECC384 NO_ASM=1 +set_benchmark "ecdsa521, no asm" SIGN=ECC521 NO_ASM=1 +set_benchmark "ecdsa384 with sha384" SIGN=ECC384 HASH=SHA384 +set_benchmark "ed25519 with sha384, small" SIGN=ED25519 HASH=SHA384 NO_ASM=1 +set_benchmark "ed25519 fast" SIGN=ED25519 NO_ASM=0 +set_benchmark "ed448" SIGN=ED448 +set_benchmark "ML_DSA-44" SIGN=ML_DSA ML_DSA_LEVEL=2 IMAGE_SIGNATURE_SIZE=2420 IMAGE_HEADER_SIZE=8192 +set_benchmark "ML_DSA-65" SIGN=ML_DSA ML_DSA_LEVEL=3 IMAGE_SIGNATURE_SIZE=3309 IMAGE_HEADER_SIZE=8192 +set_benchmark "ML_DSA-87" SIGN=ML_DSA ML_DSA_LEVEL=5 IMAGE_SIGNATURE_SIZE=4627 IMAGE_HEADER_SIZE=12288 +set_benchmark "LMS 1-10-8" SIGN=LMS LMS_LEVELS=1 LMS_HEIGHT=10 LMS_WINTERNITZ=8 IMAGE_HEADER_SIZE=4096 IMAGE_SIGNATURE_SIZE=1456 +set_benchmark "XMSS-SHA2_10_256'" XMSS_PARAMS='XMSS-SHA2_10_256' SIGN=XMSS IMAGE_SIGNATURE_SIZE=2500 IMAGE_HEADER_SIZE=8192 + diff --git a/tools/test.mk b/tools/test.mk index 167b9accc..36cae4a04 100644 --- a/tools/test.mk +++ b/tools/test.mk @@ -986,37 +986,37 @@ test-all: clean test-size-all: - make test-size SIGN=NONE LIMIT=4816 + make test-size SIGN=NONE LIMIT=4816 NO_ARM_ASM=1 make keysclean - make test-size SIGN=ED25519 LIMIT=11396 + make test-size SIGN=ED25519 LIMIT=11396 NO_ARM_ASM=1 make keysclean - make test-size SIGN=ECC256 LIMIT=17936 + make test-size SIGN=ECC256 LIMIT=17936 NO_ARM_ASM=1 make clean - make test-size SIGN=ECC256 NO_ASM=1 LIMIT=13480 + make test-size SIGN=ECC256 NO_ASM=1 LIMIT=13480 NO_ARM_ASM=1 make keysclean - make test-size SIGN=RSA2048 LIMIT=11212 + make test-size SIGN=RSA2048 LIMIT=11212 NO_ARM_ASM=1 make clean - make test-size SIGN=RSA2048 NO_ASM=1 LIMIT=11788 + make test-size SIGN=RSA2048 NO_ASM=1 LIMIT=11788 NO_ARM_ASM=1 make keysclean - make test-size SIGN=RSA4096 LIMIT=11500 + make test-size SIGN=RSA4096 LIMIT=11500 NO_ARM_ASM=1 make clean - make test-size SIGN=RSA4096 NO_ASM=1 LIMIT=12076 + make test-size SIGN=RSA4096 NO_ASM=1 LIMIT=12076 NO_ARM_ASM=1 make keysclean - make test-size SIGN=ECC384 LIMIT=17504 + make test-size SIGN=ECC384 LIMIT=17504 NO_ARM_ASM=1 make clean - make test-size SIGN=ECC384 NO_ASM=1 LIMIT=14872 + make test-size SIGN=ECC384 NO_ASM=1 LIMIT=14872 NO_ARM_ASM=1 make keysclean - make test-size SIGN=ED448 LIMIT=13408 + make test-size SIGN=ED448 LIMIT=13408 NO_ARM_ASM=1 make keysclean - make test-size SIGN=RSA3072 LIMIT=11352 + make test-size SIGN=RSA3072 LIMIT=11352 NO_ARM_ASM=1 make clean - make test-size SIGN=RSA3072 NO_ASM=1 LIMIT=11892 + make test-size SIGN=RSA3072 NO_ASM=1 LIMIT=11892 NO_ARM_ASM=1 make keysclean make test-size SIGN=LMS LMS_LEVELS=2 LMS_HEIGHT=5 LMS_WINTERNITZ=8 \ WOLFBOOT_SMALL_STACK=0 IMAGE_SIGNATURE_SIZE=2644 \ - IMAGE_HEADER_SIZE?=5288 LIMIT=7504 + IMAGE_HEADER_SIZE?=5288 LIMIT=7504 NO_ARM_ASM=1 make keysclean make test-size SIGN=XMSS XMSS_PARAMS='XMSS-SHA2_10_256' \ IMAGE_SIGNATURE_SIZE=2500 IMAGE_HEADER_SIZE?=4096 \ - LIMIT=8220 + LIMIT=8220 NO_ARM_ASM=1 make keysclean