From 2d514a48d046281f76f69e62dbefed72d2964f91 Mon Sep 17 00:00:00 2001 From: Simon Tatham Date: Tue, 4 Jun 2024 16:27:55 +0100 Subject: [PATCH] Call heuristics: defuse 'recent lr update' after a branch. This protects against misidentifying an internal branch within a function as a potential call on the strength of a sequence such as callee pops LR callee returns to LR caller tests return value caller conditionally branches in which the final conditional branch would previously have been a PendingCall candidate on the grounds that LR had been written <8 instructions previously. But I think that because another branch (the RET) happened _between_ the LR write and the conditional branch, that disqualifies the latter from being a call with that LR as its return value. The same thing can also happen to a conditional branch just after a call, because that too is a moment where LR has just been written. But that version of the problem only triggers if the call instruction and the callee are very close together, because of the 'within 64 bytes' clause in the heuristic analysis. However, that can happen when a function recursively calls itself - which caused 7 of our existing regression tests to fail, because the call tree analysis of tests/quicksort.tarmac had changed, because that error was happening in the recursive calls to quicksort! (An easy-to-observe reason why the new expected output for those tests is better: the tarmac-calltree output with symbols listed some calls to quicksort as having address 0x8038 and some as 0x8054, which is suspicious, because surely a function should have only one address. In fact 0x8038 is the real address of quicksort() in that test trace, and 0x8054 is an internal branch label. Now all the calls to quicksort list the former.) --- lib/index.cpp | 4 + samples/source/calltests/calltest32.S | 28 +++ samples/source/calltests/calltest64.S | 31 ++++ tests/calltests/calltest32.elf | Bin 1156 -> 1384 bytes tests/calltests/calltest32.ref | 102 ++++++----- tests/calltests/calltest32.tarmac | 230 ++++++++++++++----------- tests/calltests/calltest64.elf | Bin 1184 -> 1440 bytes tests/calltests/calltest64.ref | 66 ++++--- tests/calltests/calltest64.tarmac | 168 +++++++++++------- tests/calltree-quicksort-addr.ref | 56 +++--- tests/calltree-quicksort-symbols.ref | 56 +++--- tests/flamegraph-quicksort-addr.ref | 23 +-- tests/flamegraph-quicksort-symbols.ref | 12 +- tests/profile-quicksort-addr.ref | 3 +- tests/profile-quicksort-symbols.ref | 3 +- tests/vcd-quicksort-nodate.ref | 28 +-- 16 files changed, 478 insertions(+), 332 deletions(-) diff --git a/lib/index.cpp b/lib/index.cpp index 9bcc4ae..1b42a2f 100644 --- a/lib/index.cpp +++ b/lib/index.cpp @@ -262,6 +262,10 @@ void Index::update_pc(unsigned long long pc, unsigned long long next_pc, pending_calls.insert(PendingCall(sp, lr, prev_lineno)); } + + // After a transfer of control, reset insns_since_lr_update to + // pretend lr hasn't been updated recently. + insns_since_lr_update = BRANCH_LR_WRITE_THRESHOLD; } curr_pc = pc; diff --git a/samples/source/calltests/calltest32.S b/samples/source/calltests/calltest32.S index 4fdc5ae..25fad87 100644 --- a/samples/source/calltests/calltest32.S +++ b/samples/source/calltests/calltest32.S @@ -54,6 +54,14 @@ loopend: // of call, so these ones can be done the standard way. bl internal_bl bl internal_bx_lr + bl branch_after_call_wrapper + // After returning from this function, immediately take a + // conditional branch, which should not be identified as even a + // _candidate_ for being a call. + cmp r0, #122 + bne branch_target_after_return + mov r0, #121 +branch_target_after_return: // We're done. Exit. mov r0, #0x18 @@ -139,3 +147,23 @@ internal_bx_lr: b done done: mul r0, r0, r0 pop {r4, pc} + + // Wrapper function that calls branch_after_call, while being + // very near it, so that the internal branch might trip the + // call detector. +branch_after_call_wrapper: + push {r4, lr} + bl branch_after_call + pop {r4, lr} + bx lr + // A function that does a conditional branch immediately after + // being called. +branch_after_call: + push {r4, lr} + cmp r0, #123 + bne branch_after_call_internal_label + mov r0, #122 +branch_after_call_internal_label: + add r0, r0, #10 + pop {r4, lr} + bx lr diff --git a/samples/source/calltests/calltest64.S b/samples/source/calltests/calltest64.S index 2bcfb55..d229914 100644 --- a/samples/source/calltests/calltest64.S +++ b/samples/source/calltests/calltest64.S @@ -47,6 +47,17 @@ loopstart: mov x1, #0x100000000 b leaf_ret loopend: + // More calls to test functions. I've run out of unusual types + // of call, so these ones can be done the standard way. + bl branch_after_call_wrapper + // After returning from this function, immediately take a + // conditional branch, which should not be identified as even a + // _candidate_ for being a call. + cmp x0, #122 + bne branch_target_after_return + mov x0, #121 +branch_target_after_return: + // We're done. Exit. mov x0, #0x18 ldr x1, =0x20026 @@ -71,3 +82,23 @@ nonleaf_pop_lr: // Helper function for the indirect call above. call_via_x1: br x1 + + // Wrapper function that calls branch_after_call, while being + // very near it, so that the internal branch might trip the + // call detector. +branch_after_call_wrapper: + stp x19, lr, [sp, #-16]! + bl branch_after_call + ldp x19, lr, [sp], #16 + ret + // A function that does a conditional branch immediately after + // being called. +branch_after_call: + stp x19, lr, [sp, #-16]! + cmp x0, #123 + bne branch_after_call_internal_label + mov x0, #122 +branch_after_call_internal_label: + add x0, x0, #10 + ldp x19, lr, [sp], #16 + ret diff --git a/tests/calltests/calltest32.elf b/tests/calltests/calltest32.elf index eb2eef3b00076775fb3ab20b8c5663999614fb09..3fe31530856a8247e05fc1e3be1a9cc654889505 100755 GIT binary patch delta 579 zcmZXR%}WAd5XR@-U7^HC)4)tb>|#M!Q519$qUd7}b{A~b?E@CE#meenhoHJd@}49A zfFObnbtr;P-RjUiGr_WRmk#xaF1*mdu+MMinVE%mzrAKZ2}*gT7)shyMx+*x#uq(A z^`PslI{X_c`|<63EtOr+9_RpiM5PLAi&fwsP|9c3HloiS)I-P!a))9SBJyp+=S9AA z#xBO3(ZLT))Sc1WS^(P`yxVf_z-6bsH>)?nB$`Er`3+led7+xY$ORGS-BP5mBvdC+ zJrsBzd@pbs>=3vB4hwt?Zrp4Jqf>~+eT&Z_@&exx&Cs<_c!1$a;1}?#z~A66fq%eP zBvG0_O1q;;Q_G}Q6WVf4Hxnt%FcMp)md)xWwK_S?T+wqagQ@2>%nYBBr$_z`S_8tW zwG6gqG$UbXN!{QF@*Gq7v)pIHdGhf~+07k_v%?P>!u#0pXI;Es>9wgc-j}Z{ZXQ>h M-c68%z7>}I1J>++%m4rY delta 337 zcmaFC)xtSJfoTf!M5QEF0Y(M}fr*{wjsgq}ub&9ae`@$&|KS55&HrG*LqQ<>29PZU zlw)96@R*S!;4x6W)`K;lON`5-ePfRP~s$N-rSI>l7Ku>g8!j<0H(XlCJUNb0&-8Qk{*}x?>Ce>%R~%rD zzXsyVzkw)q04ddAmkSGK*7#Y@}sc(jg??Dq^0u?_25tjmULBvWR76MwJ0M!sOc_C9= zeNs_kUUEi!Vp>URQG9Y@PELGzQDQ+sY7qlerX;Z_J+%a?peVJZv?z}uFF!9QH8Cx| zAip3!2d)xf223wPBtA0_XhB|LPJB*cQfdx^N{XH#Lws?`WHII}#uJkpnavqrOy0?C oFUr9Laj*p=gk}X&Jd=4@w3!^3CtI=Dab^J{1_U-tc4UzU0DhTyg#Z8m delta 249 zcmZ3$y?}FqCL{Ait!11O7$9K6#GAoJd<+az7?~L&6d3HUfoNeMpMjy_k|M)_OY#hi zuag)ULgq0sM6fV0h%hiWTuNk`oX4og3o@aEQ4wrV%H)fT`HU%(9ht-#J0?dmxm&_y zAoegY*nk;~3=^O_VB(HIh7bcIg923CWwIr69A933UQTLaT6{r%L3~cpWL}nH#tD-f uS