Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Core] RP2040: use built-in integer hardware divider and optimized i64 multiplication #18464

Merged
merged 1 commit into from
Oct 4, 2022
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
251 changes: 31 additions & 220 deletions platforms/chibios/vendors/RP/RP2040.mk
Original file line number Diff line number Diff line change
Expand Up @@ -66,223 +66,34 @@ EXTRAINCDIRS += $(PLATFORM_RP2040_PATH)
# RP2040 optimized compiler intrinsics
##############################################################################

# Enables optimized Compiler intrinsics which are located in the RP2040
# bootrom. This needs startup code and linker script support from ChibiOS,
# which is WIP. Therefore disabled by default for now.
RP2040_INTRINSICS_ENABLED ?= no
ifeq ($(strip $(RP2040_INTRINSICS_ENABLED)), yes)
PICOSDKINTRINSICSSRC = $(PICOSDKROOT)/src/rp2_common/pico_float/float_aeabi.S \
$(PICOSDKROOT)/src/rp2_common/pico_float/float_math.c \
$(PICOSDKROOT)/src/rp2_common/pico_float/float_init_rom.c \
$(PICOSDKROOT)/src/rp2_common/pico_float/float_v1_rom_shim.S \
$(PICOSDKROOT)/src/rp2_common/pico_double/double_aeabi.S \
$(PICOSDKROOT)/src/rp2_common/pico_double/double_math.c \
$(PICOSDKROOT)/src/rp2_common/pico_double/double_init_rom.c \
$(PICOSDKROOT)/src/rp2_common/pico_double/double_v1_rom_shim.S \
$(PICOSDKROOT)/src/rp2_common/pico_divider/divider.S \
$(PICOSDKROOT)/src/rp2_common/pico_int64_ops/pico_int64_ops_aeabi.S \
$(PICOSDKROOT)/src/rp2_common/pico_mem_ops/mem_ops_aeabi.S \
$(PICOSDKROOT)/src/rp2_common/pico_malloc/pico_malloc.c \
$(PICOSDKROOT)/src/rp2_common/pico_bit_ops/bit_ops_aeabi.S

PICOSDKINTRINSICSINC = $(PICOSDKROOT)/src/common/pico_base/include \
$(PICOSDKROOT)/src/rp2_common/pico_platfrom/include \
$(PICOSDKROOT)/src/rp2_common/pico_bootrom/include \
$(PICOSDKROOT)/src/rp2_common/hardware_divider/include \
$(PICOSDKROOT)/src/rp2_common/pico_float/include \
$(PICOSDKROOT)/src/rp2_common/pico_double/include \
$(PICOSDKROOT)/src/rp2_common/pico_malloc/include

OPT_DEFS += -DPICO_FLOAT_SUPPORT_ROM_V1=0 -DPICO_DOUBLE_SUPPORT_ROM_V1=0

CFLAGS += -Wl,--defsym=__StackLimit=__heap_end__
CFLAGS += -Wl,--defsym=__unhandled_user_irq=_unhandled_exception
CFLAGS += -Wl,--build-id=none

# single precision floating point intrinsics
OPT_DEFS += -DPICO_FLOAT_IN_RAM=1
OPT_DEFS += -DPICO_FLOAT_PROPAGATE_NANS=0

CFLAGS += -Wl,--wrap=__aeabi_fdiv
CFLAGS += -Wl,--wrap=__aeabi_fmul
CFLAGS += -Wl,--wrap=__aeabi_frsub
CFLAGS += -Wl,--wrap=__aeabi_fsub
CFLAGS += -Wl,--wrap=__aeabi_cfcmpeq
CFLAGS += -Wl,--wrap=__aeabi_cfrcmple
CFLAGS += -Wl,--wrap=__aeabi_cfcmple
CFLAGS += -Wl,--wrap=__aeabi_fcmpeq
CFLAGS += -Wl,--wrap=__aeabi_fcmplt
CFLAGS += -Wl,--wrap=__aeabi_fcmple
CFLAGS += -Wl,--wrap=__aeabi_fcmpge
CFLAGS += -Wl,--wrap=__aeabi_fcmpgt
CFLAGS += -Wl,--wrap=__aeabi_fcmpun
CFLAGS += -Wl,--wrap=__aeabi_i2f
CFLAGS += -Wl,--wrap=__aeabi_l2f
CFLAGS += -Wl,--wrap=__aeabi_ui2f
CFLAGS += -Wl,--wrap=__aeabi_ul2f
CFLAGS += -Wl,--wrap=__aeabi_i2f
CFLAGS += -Wl,--wrap=__aeabi_f2iz
CFLAGS += -Wl,--wrap=__aeabi_f2lz
CFLAGS += -Wl,--wrap=__aeabi_f2uiz
CFLAGS += -Wl,--wrap=__aeabi_f2ulz
CFLAGS += -Wl,--wrap=__aeabi_f2d
CFLAGS += -Wl,--wrap=sqrtf
CFLAGS += -Wl,--wrap=cosf
CFLAGS += -Wl,--wrap=sinf
CFLAGS += -Wl,--wrap=tanf
CFLAGS += -Wl,--wrap=atan2f
CFLAGS += -Wl,--wrap=expf
CFLAGS += -Wl,--wrap=logf
CFLAGS += -Wl,--wrap=ldexpf
CFLAGS += -Wl,--wrap=copysignf
CFLAGS += -Wl,--wrap=truncf
CFLAGS += -Wl,--wrap=floorf
CFLAGS += -Wl,--wrap=ceilf
CFLAGS += -Wl,--wrap=roundf
CFLAGS += -Wl,--wrap=sincosf
CFLAGS += -Wl,--wrap=asinf
CFLAGS += -Wl,--wrap=acosf
CFLAGS += -Wl,--wrap=atanf
CFLAGS += -Wl,--wrap=sinhf
CFLAGS += -Wl,--wrap=coshf
CFLAGS += -Wl,--wrap=tanhf
CFLAGS += -Wl,--wrap=asinhf
CFLAGS += -Wl,--wrap=acoshf
CFLAGS += -Wl,--wrap=atanhf
CFLAGS += -Wl,--wrap=exp2f
CFLAGS += -Wl,--wrap=log2f
CFLAGS += -Wl,--wrap=exp10f
CFLAGS += -Wl,--wrap=log10f
CFLAGS += -Wl,--wrap=powf
CFLAGS += -Wl,--wrap=powintf
CFLAGS += -Wl,--wrap=hypotf
CFLAGS += -Wl,--wrap=cbrtf
CFLAGS += -Wl,--wrap=fmodf
CFLAGS += -Wl,--wrap=dremf
CFLAGS += -Wl,--wrap=remainderf
CFLAGS += -Wl,--wrap=remquof
CFLAGS += -Wl,--wrap=expm1f
CFLAGS += -Wl,--wrap=log1pf
CFLAGS += -Wl,--wrap=fmaf

# double precision floating point intrinsics
OPT_DEFS += -DPICO_DOUBLE_IN_RAM=1
OPT_DEFS += -DPICO_DOUBLE_PROPAGATE_NANS=0

CFLAGS += -Wl,--wrap=__aeabi_dadd
CFLAGS += -Wl,--wrap=__aeabi_ddiv
CFLAGS += -Wl,--wrap=__aeabi_dmul
CFLAGS += -Wl,--wrap=__aeabi_drsub
CFLAGS += -Wl,--wrap=__aeabi_dsub
CFLAGS += -Wl,--wrap=__aeabi_cdcmpeq
CFLAGS += -Wl,--wrap=__aeabi_cdrcmple
CFLAGS += -Wl,--wrap=__aeabi_cdcmple
CFLAGS += -Wl,--wrap=__aeabi_dcmpeq
CFLAGS += -Wl,--wrap=__aeabi_dcmplt
CFLAGS += -Wl,--wrap=__aeabi_dcmple
CFLAGS += -Wl,--wrap=__aeabi_dcmpge
CFLAGS += -Wl,--wrap=__aeabi_dcmpgt
CFLAGS += -Wl,--wrap=__aeabi_dcmpun
CFLAGS += -Wl,--wrap=__aeabi_i2d
CFLAGS += -Wl,--wrap=__aeabi_l2d
CFLAGS += -Wl,--wrap=__aeabi_ui2d
CFLAGS += -Wl,--wrap=__aeabi_ul2d
CFLAGS += -Wl,--wrap=__aeabi_d2iz
CFLAGS += -Wl,--wrap=__aeabi_d2lz
CFLAGS += -Wl,--wrap=__aeabi_d2uiz
CFLAGS += -Wl,--wrap=__aeabi_d2ulz
CFLAGS += -Wl,--wrap=__aeabi_d2f
CFLAGS += -Wl,--wrap=sqrt
CFLAGS += -Wl,--wrap=cos
CFLAGS += -Wl,--wrap=sin
CFLAGS += -Wl,--wrap=tan
CFLAGS += -Wl,--wrap=atan2
CFLAGS += -Wl,--wrap=exp
CFLAGS += -Wl,--wrap=log
CFLAGS += -Wl,--wrap=ldexp
CFLAGS += -Wl,--wrap=copysign
CFLAGS += -Wl,--wrap=trunc
CFLAGS += -Wl,--wrap=floor
CFLAGS += -Wl,--wrap=ceil
CFLAGS += -Wl,--wrap=round
CFLAGS += -Wl,--wrap=sincos
CFLAGS += -Wl,--wrap=asin
CFLAGS += -Wl,--wrap=acos
CFLAGS += -Wl,--wrap=atan
CFLAGS += -Wl,--wrap=sinh
CFLAGS += -Wl,--wrap=cosh
CFLAGS += -Wl,--wrap=tanh
CFLAGS += -Wl,--wrap=asinh
CFLAGS += -Wl,--wrap=acosh
CFLAGS += -Wl,--wrap=atanh
CFLAGS += -Wl,--wrap=exp2
CFLAGS += -Wl,--wrap=log2
CFLAGS += -Wl,--wrap=exp10
CFLAGS += -Wl,--wrap=log10
CFLAGS += -Wl,--wrap=pow
CFLAGS += -Wl,--wrap=powint
CFLAGS += -Wl,--wrap=hypot
CFLAGS += -Wl,--wrap=cbrt
CFLAGS += -Wl,--wrap=fmod
CFLAGS += -Wl,--wrap=drem
CFLAGS += -Wl,--wrap=remainder
CFLAGS += -Wl,--wrap=remquo
CFLAGS += -Wl,--wrap=expm1
CFLAGS += -Wl,--wrap=log1p
CFLAGS += -Wl,--wrap=fma

# bit operation intrinsics
OPT_DEFS += -DPICO_BITS_IN_RAM=1

CFLAGS += -Wl,--wrap=__clzsi2
CFLAGS += -Wl,--wrap=__clzsi2
CFLAGS += -Wl,--wrap=__clzdi2
CFLAGS += -Wl,--wrap=__ctzsi2
CFLAGS += -Wl,--wrap=__ctzdi2
CFLAGS += -Wl,--wrap=__popcountsi2
CFLAGS += -Wl,--wrap=__popcountdi2
CFLAGS += -Wl,--wrap=__clz
CFLAGS += -Wl,--wrap=__clzl
CFLAGS += -Wl,--wrap=__clzsi2
CFLAGS += -Wl,--wrap=__clzll

# integer division intrinsics
OPT_DEFS += -DPICO_DIVIDER_IN_RAM=1
OPT_DEFS += -DPICO_DIVIDER_DISABLE_INTERRUPTS=1

CFLAGS += -Wl,--wrap=__aeabi_idiv
CFLAGS += -Wl,--wrap=__aeabi_idivmod
CFLAGS += -Wl,--wrap=__aeabi_ldivmod
CFLAGS += -Wl,--wrap=__aeabi_uidiv
CFLAGS += -Wl,--wrap=__aeabi_uidivmod
CFLAGS += -Wl,--wrap=__aeabi_uldivmod

# 64bit integer intrinsics
OPT_DEFS += -DPICO_INT64_OPS_IN_RAM=1

CFLAGS += -Wl,--wrap=__aeabi_lmul

# malloc and friends functions
OPT_DEFS += -DPICO_USE_MALLOC_MUTEX=0
OPT_DEFS += -DPICO_DEBUG_MALLOC=0
OPT_DEFS ?= -DPICO_MALLOC_PANIC=0

CFLAGS += -Wl,--wrap=malloc
CFLAGS += -Wl,--wrap=calloc
CFLAGS += -Wl,--wrap=free

# memory operation intrinsics
OPT_DEFS += -DPICO_MEM_IN_RAM=1

CFLAGS += -Wl,--wrap=memcpy
CFLAGS += -Wl,--wrap=memset
CFLAGS += -Wl,--wrap=__aeabi_memcpy
CFLAGS += -Wl,--wrap=__aeabi_memset
CFLAGS += -Wl,--wrap=__aeabi_memcpy4
CFLAGS += -Wl,--wrap=__aeabi_memset4
CFLAGS += -Wl,--wrap=__aeabi_memcpy8
CFLAGS += -Wl,--wrap=__aeabi_memset8

PLATFORM_SRC += $(PICOSDKINTRINSICSSRC)
EXTRAINCDIRS += $(PICOSDKINTRINSICSINC)
endif
# The RP2040 sdk provides optimized compiler intrinsics which override the GCC
# built-ins. Some of these functions are located in the bootrom of the RP2040.
# Execution of these functions is realized via a vtable that is populated on
# bootup. This mechanism needs startup code and linker script support from
# ChibiOS, which is currently not implemented thus these functions are disabled
# ATM.
PICOSDKINTRINSICSSRC = $(PICOSDKROOT)/src/rp2_common/pico_divider/divider.S \
$(PICOSDKROOT)/src/rp2_common/pico_int64_ops/pico_int64_ops_aeabi.S

PICOSDKINTRINSICSINC = $(PICOSDKROOT)/src/common/pico_base/include \
$(PICOSDKROOT)/src/rp2_common/pico_platfrom/include \
$(PICOSDKROOT)/src/rp2_common/hardware_divider/include

# integer division intrinsics utilizing the RP2040 hardware divider
OPT_DEFS += -DPICO_DIVIDER_IN_RAM=1
OPT_DEFS += -DPICO_DIVIDER_DISABLE_INTERRUPTS=1

CFLAGS += -Wl,--wrap=__aeabi_idiv
CFLAGS += -Wl,--wrap=__aeabi_idivmod
CFLAGS += -Wl,--wrap=__aeabi_ldivmod
CFLAGS += -Wl,--wrap=__aeabi_uidiv
CFLAGS += -Wl,--wrap=__aeabi_uidivmod
CFLAGS += -Wl,--wrap=__aeabi_uldivmod

# 64bit integer intrinsics
OPT_DEFS += -DPICO_INT64_OPS_IN_RAM=1

CFLAGS += -Wl,--wrap=__aeabi_lmul

PLATFORM_SRC += $(PICOSDKINTRINSICSSRC)
EXTRAINCDIRS += $(PICOSDKINTRINSICSINC)