From 9f6e2aac73ca1f863d236f86f446b0894c8ebcef Mon Sep 17 00:00:00 2001 From: Michael Schmidt Date: Wed, 2 Aug 2017 14:26:16 +0200 Subject: [PATCH 1/6] Improve stack offset addressing Functions which require large amounts of stack for spilling and/or arguments for function calls lead to stackframe offsets that exceed the 12bit limit of immediate constants in ARM instructions. This patch fixes the stack-offsets in the function prolog/epilog. --- arm/Asm.v | 23 +++++++++++++--- arm/Asmexpand.ml | 62 ++++++++++++++++++++++++++++++++------------ arm/Asmgen.v | 2 +- arm/Asmgenproof.v | 4 +-- arm/TargetPrinter.ml | 17 ++++-------- 5 files changed, 73 insertions(+), 35 deletions(-) diff --git a/arm/Asm.v b/arm/Asm.v index 08234975c9..6ba09a8fb8 100644 --- a/arm/Asm.v +++ b/arm/Asm.v @@ -199,15 +199,17 @@ Inductive instruction : Type := | Pfsts: freg -> ireg -> int -> instruction (**r float32 store *) (* Pseudo-instructions *) - | Pallocframe: Z -> ptrofs -> instruction (**r allocate new stack frame *) - | Pfreeframe: Z -> ptrofs -> instruction (**r deallocate stack frame and restore previous frame *) - | Plabel: label -> instruction (**r define a code label *) + | Pallocframe: Z -> ptrofs -> instruction (**r allocate new stack frame *) + | Pfreeframe: Z -> ptrofs -> instruction (**r deallocate stack frame and restore previous frame *) + | Psavelr: ptrofs -> instruction (**r store link register in allocated stack frame *) + | Plabel: label -> instruction (**r define a code label *) | Ploadsymbol: ireg -> ident -> ptrofs -> instruction (**r load the address of a symbol *) | Pmovite: testcond -> ireg -> shift_op -> shift_op -> instruction (**r integer conditional move *) | Pbtbl: ireg -> list label -> instruction (**r N-way branch through a jump table *) | Pbuiltin: external_function -> list (builtin_arg preg) -> builtin_res preg -> instruction (**r built-in function (pseudo) *) | Padc: ireg -> ireg -> shift_op -> instruction (**r add with carry *) | Pcfi_adjust: int -> instruction (**r .cfi_adjust debug directive *) + | Pcfi_rel_offset: int -> instruction (**r .cfi_rel_offset debug directive *) | Pclz: ireg -> ireg -> instruction (**r count leading zeros. *) | Pfsqrt: freg -> freg -> instruction (**r floating-point square root. *) | Prev: ireg -> ireg -> instruction (**r reverse bytes and reverse bits. *) @@ -268,6 +270,18 @@ lbl: .word symbol >> Again, our memory model cannot comprehend that this operation frees (logically) the current stack frame. +- [Psavelr pos]: this pseudo-instruction stores the link register in the + stackframe at the specified position. It handles out-of-range offsets, + i.e. the generated code for in-range offsets is: +<< + str lr, [sp, #pos] +>> + and for out-of-range offsets: +<< + add sp, sp, #pos (* code to expand the immediate *) + str lr, [sp] + sub sp, sp, #pos (* code to expand the immediate *) +>> - [Pbtbl reg table]: this is a N-way branch, implemented via a jump table as follows: << @@ -736,6 +750,8 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out | _ => Stuck end end + | Psavelr pos => + exec_store Mint32 (Val.offset_ptr rs#IR13 pos) IR14 rs m | Plabel lbl => Next (nextinstr rs) m | Ploadsymbol r1 lbl ofs => @@ -763,6 +779,7 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out | Ppush _ | Padc _ _ _ | Pcfi_adjust _ + | Pcfi_rel_offset _ | Pclz _ _ | Prev _ _ | Prev16 _ _ diff --git a/arm/Asmexpand.ml b/arm/Asmexpand.ml index a32b0e8ba0..1a473e036e 100644 --- a/arm/Asmexpand.ml +++ b/arm/Asmexpand.ml @@ -46,18 +46,22 @@ let expand_movimm dst n = (fun n -> emit (Porr (dst,dst, SOimm n))) tl let expand_subimm dst src n = - match Asmgen.decompose_int n with - | [] -> assert false - | hd::tl -> - emit (Psub(dst,src,SOimm hd)); - List.iter (fun n -> emit (Psub (dst,dst,SOimm n))) tl + if dst <> src || n <> _0 then begin + match Asmgen.decompose_int n with + | [] -> assert false + | hd::tl -> + emit (Psub(dst,src,SOimm hd)); + List.iter (fun n -> emit (Psub (dst,dst,SOimm n))) tl + end let expand_addimm dst src n = - match Asmgen.decompose_int n with - | [] -> assert false - | hd::tl -> - emit (Padd (dst,src,SOimm hd)); - List.iter (fun n -> emit (Padd (dst,dst,SOimm n))) tl + if dst <> src || n <> _0 then begin + match Asmgen.decompose_int n with + | [] -> assert false + | hd::tl -> + emit (Padd (dst,src,SOimm hd)); + List.iter (fun n -> emit (Padd (dst,dst,SOimm n))) tl + end let expand_int64_arith conflict rl fn = if conflict then @@ -410,12 +414,22 @@ let expand_instruction instr = | Pallocframe (sz, ofs) -> emit (Pmov (IR12,SOreg IR13)); if (is_current_function_variadic ()) then begin - emit (Ppush [IR0;IR1;IR2;IR3]); - emit (Pcfi_adjust _16); - end; - expand_subimm IR13 IR13 sz; - emit (Pcfi_adjust sz); - emit (Pstr (IR12,IR13,SOimm ofs)); + emit (Ppush [IR0;IR1;IR2;IR3]); + emit (Pcfi_adjust _16); + end; + let sz' = camlint_of_coqint sz in + let ofs' = camlint_of_coqint ofs in + if ofs' >= 4096l && sz' >= ofs' then begin + expand_subimm IR13 IR13 (coqint_of_camlint (Int32.sub sz' (Int32.add ofs' 4l))); + emit (Ppush [IR13]); + expand_subimm IR13 IR13 ofs; + emit (Pcfi_adjust sz); + end else begin + assert (ofs' < 4096l); + expand_subimm IR13 IR13 sz; + emit (Pcfi_adjust sz); + emit (Pstr (IR12,IR13,SOimm ofs)); + end; PrintAsmaux.current_function_stacksize := camlint_of_coqint sz | Pfreeframe (sz, ofs) -> let sz = @@ -424,7 +438,21 @@ let expand_instruction instr = else sz in if Asmgen.is_immed_arith sz then emit (Padd (IR13,IR13,SOimm sz)) - else emit (Pldr (IR13,IR13,SOimm ofs)) + else begin + if camlint_of_coqint ofs >= 4096l then begin + expand_addimm IR13 IR13 ofs; + emit (Pldr (IR13,IR13,SOimm _0)) + end else + emit (Pldr (IR13,IR13,SOimm ofs)); + end + | Psavelr ofs -> + if camlint_of_coqint ofs >= 4096l then begin + expand_addimm IR13 IR13 ofs; + emit (Pstr (IR14,IR13,SOimm _0)); + expand_subimm IR13 IR13 ofs + end else + emit (Pstr (IR14,IR13,SOimm ofs)); + emit (Pcfi_rel_offset ofs) | Pbuiltin (ef,args,res) -> begin match ef with | EF_builtin (name,sg) -> diff --git a/arm/Asmgen.v b/arm/Asmgen.v index e7a3b4fad9..125e95ff6e 100644 --- a/arm/Asmgen.v +++ b/arm/Asmgen.v @@ -790,7 +790,7 @@ Definition transl_function (f: Mach.function) := do c <- transl_code f f.(Mach.fn_code) true; OK (mkfunction f.(Mach.fn_sig) (Pallocframe f.(fn_stacksize) f.(fn_link_ofs) :: - Pstr IR14 IR13 (SOimm (Ptrofs.to_int f.(fn_retaddr_ofs))) :: c)). + Psavelr f.(fn_retaddr_ofs) :: c)). Definition transf_function (f: Mach.function) : res Asm.function := do tf <- transl_function f; diff --git a/arm/Asmgenproof.v b/arm/Asmgenproof.v index 09c20d5c4e..f306f43a32 100644 --- a/arm/Asmgenproof.v +++ b/arm/Asmgenproof.v @@ -854,7 +854,7 @@ Opaque loadind. generalize EQ; intros EQ'. monadInv EQ'. destruct (zlt Ptrofs.max_unsigned (list_length_z (fn_code x0))); inversion EQ1. clear EQ1. subst x0. monadInv EQ0. - set (tfbody := Pallocframe (fn_stacksize f) (fn_link_ofs f) :: Pstr IR14 IR13 (SOimm (Ptrofs.to_int (fn_retaddr_ofs f))) :: x0) in *. + set (tfbody := Pallocframe (fn_stacksize f) (fn_link_ofs f) :: Psavelr (fn_retaddr_ofs f) :: x0) in *. set (tf := {| fn_sig := Mach.fn_sig f; fn_code := tfbody |}) in *. unfold store_stack in *. exploit Mem.alloc_extends. eauto. eauto. apply Zle_refl. apply Zle_refl. @@ -877,7 +877,7 @@ Opaque loadind. simpl. auto. simpl. unfold exec_store. change (rs2 IR14) with (rs0 IR14). rewrite Ptrofs.add_zero_l. simpl. unfold Tptr, chunk_of_type, Archi.ptr64 in P. simpl in P. - rewrite Ptrofs.add_zero_l in P. rewrite ATLR. rewrite Ptrofs.of_int_to_int by auto. + rewrite Ptrofs.add_zero_l in P. rewrite ATLR. rewrite P. auto. auto. auto. left; exists (State rs3 m3'); split. eapply exec_straight_steps_1; eauto. omega. constructor. diff --git a/arm/TargetPrinter.ml b/arm/TargetPrinter.ml index 60f52efd25..02c348a852 100644 --- a/arm/TargetPrinter.ml +++ b/arm/TargetPrinter.ml @@ -539,23 +539,13 @@ struct | Psbc (r1,r2,sa) -> fprintf oc " sbc %a, %a, %a\n" ireg r1 ireg r2 shift_op sa; 1 | Pstr(r1, r2, sa) | Pstr_a(r1, r2, sa) -> - fprintf oc " str %a, [%a, %a]\n" ireg r1 ireg r2 shift_op sa; - begin match r1, r2, sa with - | IR14, IR13, SOimm n -> cfi_rel_offset oc "lr" (camlint_of_coqint n) - | _ -> () - end; - 1 + fprintf oc " str %a, [%a, %a]\n" ireg r1 ireg r2 shift_op sa; 1 | Pstrb(r1, r2, sa) -> fprintf oc " strb %a, [%a, %a]\n" ireg r1 ireg r2 shift_op sa; 1 | Pstrh(r1, r2, sa) -> fprintf oc " strh %a, [%a, %a]\n" ireg r1 ireg r2 shift_op sa; 1 | Pstr_p(r1, r2, sa) -> - fprintf oc " str %a, [%a], %a\n" ireg r1 ireg r2 shift_op sa; - begin match r1, r2, sa with - | IR14, IR13, SOimm n -> cfi_rel_offset oc "lr" (camlint_of_coqint n) - | _ -> () - end; - 1 + fprintf oc " str %a, [%a], %a\n" ireg r1 ireg r2 shift_op sa; 1 | Pstrb_p(r1, r2, sa) -> fprintf oc " strb %a, [%a], %a\n" ireg r1 ireg r2 shift_op sa; 1 | Pstrh_p(r1, r2, sa) -> @@ -703,6 +693,8 @@ struct assert false | Pfreeframe(sz, ofs) -> assert false + | Psavelr(ofs) -> + assert false | Plabel lbl -> fprintf oc "%a:\n" print_label lbl; 0 | Ploadsymbol(r1, id, ofs) -> @@ -751,6 +743,7 @@ struct assert false end | Pcfi_adjust sz -> cfi_adjust oc (camlint_of_coqint sz); 0 + | Pcfi_rel_offset ofs -> cfi_rel_offset oc "lr" (camlint_of_coqint ofs); 0 let no_fallthrough = function | Pb _ -> true From e5d8d8d2d67daed762f6e0cc8f486b2c1b37bb20 Mon Sep 17 00:00:00 2001 From: Michael Schmidt Date: Wed, 2 Aug 2017 16:01:41 +0200 Subject: [PATCH 2/6] Push correct register --- arm/Asmexpand.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arm/Asmexpand.ml b/arm/Asmexpand.ml index 1a473e036e..fbe2e0bb22 100644 --- a/arm/Asmexpand.ml +++ b/arm/Asmexpand.ml @@ -421,7 +421,7 @@ let expand_instruction instr = let ofs' = camlint_of_coqint ofs in if ofs' >= 4096l && sz' >= ofs' then begin expand_subimm IR13 IR13 (coqint_of_camlint (Int32.sub sz' (Int32.add ofs' 4l))); - emit (Ppush [IR13]); + emit (Ppush [IR12]); expand_subimm IR13 IR13 ofs; emit (Pcfi_adjust sz); end else begin From c9d01df3577a23e20abbe934f6f36f17dbbb82cd Mon Sep 17 00:00:00 2001 From: Xavier Leroy Date: Thu, 17 Aug 2017 09:45:44 +0200 Subject: [PATCH 3/6] ARM: Generate Pcfi_rel_offset directives directly from Asmgen This is what we do for PowerPC and is more resilient to changes in code generation. We need to give Pcfi_rel_offset a dynamic semantics, but that's just a no-op. --- arm/Asm.v | 3 ++- arm/Asmexpand.ml | 3 +-- arm/Asmgen.v | 3 ++- arm/Asmgenproof.v | 20 ++++++++++++-------- 4 files changed, 17 insertions(+), 12 deletions(-) diff --git a/arm/Asm.v b/arm/Asm.v index 6ba09a8fb8..7a39313989 100644 --- a/arm/Asm.v +++ b/arm/Asm.v @@ -773,13 +773,14 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out end | _ => Stuck end + | Pcfi_rel_offset ofs => + Next (nextinstr rs) m | Pbuiltin ef args res => Stuck (**r treated specially below *) (** The following instructions and directives are not generated directly by Asmgen, so we do not model them. *) | Ppush _ | Padc _ _ _ | Pcfi_adjust _ - | Pcfi_rel_offset _ | Pclz _ _ | Prev _ _ | Prev16 _ _ diff --git a/arm/Asmexpand.ml b/arm/Asmexpand.ml index fbe2e0bb22..c4e7e77dbc 100644 --- a/arm/Asmexpand.ml +++ b/arm/Asmexpand.ml @@ -451,8 +451,7 @@ let expand_instruction instr = emit (Pstr (IR14,IR13,SOimm _0)); expand_subimm IR13 IR13 ofs end else - emit (Pstr (IR14,IR13,SOimm ofs)); - emit (Pcfi_rel_offset ofs) + emit (Pstr (IR14,IR13,SOimm ofs)) | Pbuiltin (ef,args,res) -> begin match ef with | EF_builtin (name,sg) -> diff --git a/arm/Asmgen.v b/arm/Asmgen.v index 125e95ff6e..8c49d88132 100644 --- a/arm/Asmgen.v +++ b/arm/Asmgen.v @@ -790,7 +790,8 @@ Definition transl_function (f: Mach.function) := do c <- transl_code f f.(Mach.fn_code) true; OK (mkfunction f.(Mach.fn_sig) (Pallocframe f.(fn_stacksize) f.(fn_link_ofs) :: - Psavelr f.(fn_retaddr_ofs) :: c)). + Psavelr f.(fn_retaddr_ofs) :: + Pcfi_rel_offset (Ptrofs.to_int f.(fn_retaddr_ofs)):: c)). Definition transf_function (f: Mach.function) : res Asm.function := do tf <- transl_function f; diff --git a/arm/Asmgenproof.v b/arm/Asmgenproof.v index f306f43a32..7d963a471b 100644 --- a/arm/Asmgenproof.v +++ b/arm/Asmgenproof.v @@ -854,7 +854,9 @@ Opaque loadind. generalize EQ; intros EQ'. monadInv EQ'. destruct (zlt Ptrofs.max_unsigned (list_length_z (fn_code x0))); inversion EQ1. clear EQ1. subst x0. monadInv EQ0. - set (tfbody := Pallocframe (fn_stacksize f) (fn_link_ofs f) :: Psavelr (fn_retaddr_ofs f) :: x0) in *. + set (tfbody := Pallocframe (fn_stacksize f) (fn_link_ofs f) :: + Psavelr (fn_retaddr_ofs f) :: + Pcfi_rel_offset (Ptrofs.to_int (fn_retaddr_ofs f)) :: x0) in *. set (tf := {| fn_sig := Mach.fn_sig f; fn_code := tfbody |}) in *. unfold store_stack in *. exploit Mem.alloc_extends. eauto. eauto. apply Zle_refl. apply Zle_refl. @@ -866,28 +868,30 @@ Opaque loadind. (* Execution of function prologue *) set (rs2 := nextinstr (rs0#IR12 <- (parent_sp s) #IR13 <- (Vptr stk Ptrofs.zero))). set (rs3 := nextinstr rs2). + set (rs4 := nextinstr rs3). assert (EXEC_PROLOGUE: exec_straight tge tf (fn_code tf) rs0 m' - x0 rs3 m3'). + x0 rs4 m3'). change (fn_code tf) with tfbody; unfold tfbody. - apply exec_straight_two with rs2 m2'. + apply exec_straight_three with rs2 m2' rs3 m3'. unfold exec_instr. rewrite C. fold sp. rewrite <- (sp_val _ _ _ AG). unfold Tptr, chunk_of_type, Archi.ptr64 in F. rewrite F. auto. simpl. auto. simpl. unfold exec_store. change (rs2 IR14) with (rs0 IR14). rewrite Ptrofs.add_zero_l. simpl. unfold Tptr, chunk_of_type, Archi.ptr64 in P. simpl in P. rewrite Ptrofs.add_zero_l in P. rewrite ATLR. - rewrite P. auto. auto. auto. - left; exists (State rs3 m3'); split. + rewrite P. auto. auto. auto. auto. auto. + left; exists (State rs4 m3'); split. eapply exec_straight_steps_1; eauto. omega. constructor. econstructor; eauto. - change (rs3 PC) with (Val.offset_ptr (Val.offset_ptr (rs0 PC) Ptrofs.one) Ptrofs.one). + change (rs4 PC) with (Val.offset_ptr (Val.offset_ptr (Val.offset_ptr (rs0 PC) Ptrofs.one) Ptrofs.one) Ptrofs.one). rewrite ATPC. simpl. constructor; eauto. eapply code_tail_next_int. omega. + eapply code_tail_next_int. omega. eapply code_tail_next_int. omega. constructor. - unfold rs3, rs2. - apply agree_nextinstr. apply agree_nextinstr. + unfold rs4, rs2. + apply agree_nextinstr. apply agree_nextinstr. apply agree_nextinstr. eapply agree_change_sp. apply agree_undef_regs with rs0; eauto. intros. Simpl. congruence. From 82c4547961d567003a83d6c489e06f1271e80a7f Mon Sep 17 00:00:00 2001 From: Xavier Leroy Date: Thu, 17 Aug 2017 11:18:19 +0200 Subject: [PATCH 4/6] Asmgenproof0: some more useful lemmas Next commit uses those lemmas in the ARM port. --- backend/Asmgenproof0.v | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/backend/Asmgenproof0.v b/backend/Asmgenproof0.v index 53ecf73ade..0250628b34 100644 --- a/backend/Asmgenproof0.v +++ b/backend/Asmgenproof0.v @@ -285,6 +285,23 @@ Proof. exploit preg_of_injective; eauto. congruence. Qed. +Lemma agree_undef_regs2: + forall ms sp rl rs rs', + agree (Mach.undef_regs rl ms) sp rs -> + (forall r', data_preg r' = true -> preg_notin r' rl -> rs'#r' = rs#r') -> + agree (Mach.undef_regs rl ms) sp rs'. +Proof. + intros. destruct H. split; auto. + rewrite <- agree_sp0. apply H0; auto. + rewrite preg_notin_charact. intros. apply not_eq_sym. apply preg_of_not_SP. + intros. destruct (In_dec mreg_eq r rl). + rewrite Mach.undef_regs_same; auto. + rewrite H0; auto. + apply preg_of_data. + rewrite preg_notin_charact. intros; red; intros. elim n. + exploit preg_of_injective; eauto. congruence. +Qed. + Lemma agree_set_undef_mreg: forall ms sp rs r v rl rs', agree ms sp rs -> @@ -738,6 +755,18 @@ Ltac TailNoLabel := | _ => idtac end. +Remark tail_nolabel_find_label: + forall lbl k c, tail_nolabel k c -> find_label lbl c = find_label lbl k. +Proof. + intros. destruct H. auto. +Qed. + +Remark tail_nolabel_is_tail: + forall k c, tail_nolabel k c -> is_tail k c. +Proof. + intros. destruct H. auto. +Qed. + (** * Execution of straight-line code *) Section STRAIGHTLINE. From 6c34468898e3726b53c875023730fae7caaf88ee Mon Sep 17 00:00:00 2001 From: Xavier Leroy Date: Thu, 17 Aug 2017 11:26:22 +0200 Subject: [PATCH 5/6] ARM port: replace Psavelr pseudo-instruction by actual instructions Saving the return address register (R14) in the function prologue can be done either with a single "str" instruction if the offset is small enough, or by constructing the address using the R12 register as a temporary then storing with "str" relative to R12. R12 can be used as a temporary because it is marked as destroyed at function entry. We just need to tell Asmgen.translcode whether R12 still contains the back link left there by Pallocframe, or R12 was trashed. In the latter case R12 will be reloaded from the stack at the next Mgetparam instruction. --- arm/Asm.v | 15 ------------ arm/Asmexpand.ml | 7 ------ arm/Asmgen.v | 23 +++++++++++++++--- arm/Asmgenproof.v | 57 ++++++++++++++++++++++++++++---------------- arm/Asmgenproof1.v | 31 ++++++++++++++++++++++++ arm/TargetPrinter.ml | 2 -- 6 files changed, 88 insertions(+), 47 deletions(-) diff --git a/arm/Asm.v b/arm/Asm.v index 7a39313989..85eb94c18e 100644 --- a/arm/Asm.v +++ b/arm/Asm.v @@ -201,7 +201,6 @@ Inductive instruction : Type := (* Pseudo-instructions *) | Pallocframe: Z -> ptrofs -> instruction (**r allocate new stack frame *) | Pfreeframe: Z -> ptrofs -> instruction (**r deallocate stack frame and restore previous frame *) - | Psavelr: ptrofs -> instruction (**r store link register in allocated stack frame *) | Plabel: label -> instruction (**r define a code label *) | Ploadsymbol: ireg -> ident -> ptrofs -> instruction (**r load the address of a symbol *) | Pmovite: testcond -> ireg -> shift_op -> shift_op -> instruction (**r integer conditional move *) @@ -270,18 +269,6 @@ lbl: .word symbol >> Again, our memory model cannot comprehend that this operation frees (logically) the current stack frame. -- [Psavelr pos]: this pseudo-instruction stores the link register in the - stackframe at the specified position. It handles out-of-range offsets, - i.e. the generated code for in-range offsets is: -<< - str lr, [sp, #pos] ->> - and for out-of-range offsets: -<< - add sp, sp, #pos (* code to expand the immediate *) - str lr, [sp] - sub sp, sp, #pos (* code to expand the immediate *) ->> - [Pbtbl reg table]: this is a N-way branch, implemented via a jump table as follows: << @@ -750,8 +737,6 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out | _ => Stuck end end - | Psavelr pos => - exec_store Mint32 (Val.offset_ptr rs#IR13 pos) IR14 rs m | Plabel lbl => Next (nextinstr rs) m | Ploadsymbol r1 lbl ofs => diff --git a/arm/Asmexpand.ml b/arm/Asmexpand.ml index c4e7e77dbc..04b4152d04 100644 --- a/arm/Asmexpand.ml +++ b/arm/Asmexpand.ml @@ -445,13 +445,6 @@ let expand_instruction instr = end else emit (Pldr (IR13,IR13,SOimm ofs)); end - | Psavelr ofs -> - if camlint_of_coqint ofs >= 4096l then begin - expand_addimm IR13 IR13 ofs; - emit (Pstr (IR14,IR13,SOimm _0)); - expand_subimm IR13 IR13 ofs - end else - emit (Pstr (IR14,IR13,SOimm ofs)) | Pbuiltin (ef,args,res) -> begin match ef with | EF_builtin (name,sg) -> diff --git a/arm/Asmgen.v b/arm/Asmgen.v index 8c49d88132..1b96416d94 100644 --- a/arm/Asmgen.v +++ b/arm/Asmgen.v @@ -602,6 +602,22 @@ Definition storeind (src: mreg) (base: ireg) (ofs: ptrofs) (ty: typ) (k: code) : Error (msg "Asmgen.storeind") end. +(** This is a variant of [storeind] that is used to save the return address + at the beginning of a function. It uses [R12] instead of [R14] as + temporary register. *) + +Definition save_lr (ofs: ptrofs) (k: code) := + let n := Ptrofs.to_int ofs in + let n1 := mk_immed_mem_word n in + if Int.eq n n1 + then Pstr IR14 IR13 (SOimm n) :: k + else addimm IR12 IR13 (Int.sub n n1) (Pstr IR14 IR12 (SOimm n1) :: k). + +Definition save_lr_preserves_R12 (ofs: ptrofs) : bool := + let n := Ptrofs.to_int ofs in + let n1 := mk_immed_mem_word n in + Int.eq n n1. + (** Translation of memory accesses *) Definition transl_memory_access @@ -787,11 +803,12 @@ Definition transl_code' (f: Mach.function) (il: list Mach.instruction) (it1p: bo around, leading to incorrect executions. *) Definition transl_function (f: Mach.function) := - do c <- transl_code f f.(Mach.fn_code) true; + do c <- transl_code f f.(Mach.fn_code) + (save_lr_preserves_R12 f.(fn_retaddr_ofs)); OK (mkfunction f.(Mach.fn_sig) (Pallocframe f.(fn_stacksize) f.(fn_link_ofs) :: - Psavelr f.(fn_retaddr_ofs) :: - Pcfi_rel_offset (Ptrofs.to_int f.(fn_retaddr_ofs)):: c)). + save_lr f.(fn_retaddr_ofs) + (Pcfi_rel_offset (Ptrofs.to_int f.(fn_retaddr_ofs)):: c))). Definition transf_function (f: Mach.function) : res Asm.function := do tf <- transl_function f; diff --git a/arm/Asmgenproof.v b/arm/Asmgenproof.v index 7d963a471b..71a0e04915 100644 --- a/arm/Asmgenproof.v +++ b/arm/Asmgenproof.v @@ -241,6 +241,15 @@ Proof. destruct ty, (preg_of src); inv H; TailNoLabel. Qed. +Remark save_lr_label: + forall ofs k, tail_nolabel k (save_lr ofs k). +Proof. + unfold save_lr; intros. + destruct (Int.eq (Ptrofs.to_int ofs) (mk_immed_mem_word (Ptrofs.to_int ofs))). + TailNoLabel. + eapply tail_nolabel_trans; TailNoLabel. +Qed. + Remark transl_cond_label: forall cond args k c, transl_cond cond args k = OK c -> tail_nolabel k c. Proof. @@ -338,7 +347,7 @@ Lemma transl_find_label: end. Proof. intros. monadInv H. destruct (zlt Ptrofs.max_unsigned (list_length_z (fn_code x))); inv EQ0. - monadInv EQ. simpl. + monadInv EQ. simpl. erewrite tail_nolabel_find_label by (apply save_lr_label). simpl. eapply transl_code_label; eauto. Qed. @@ -382,7 +391,8 @@ Proof. destruct i; try (intros [A B]; apply A). intros. subst c0. repeat constructor. - intros. monadInv H0. destruct (zlt Ptrofs.max_unsigned (list_length_z (fn_code x))); inv EQ0. monadInv EQ. - exists x; exists true; split; auto. repeat constructor. + exists x; exists (save_lr_preserves_R12 (fn_retaddr_ofs f0)); split; auto. + constructor. eapply is_tail_trans. 2: apply tail_nolabel_is_tail; apply save_lr_label. repeat constructor. - exact transf_function_no_overflow. Qed. @@ -854,9 +864,10 @@ Opaque loadind. generalize EQ; intros EQ'. monadInv EQ'. destruct (zlt Ptrofs.max_unsigned (list_length_z (fn_code x0))); inversion EQ1. clear EQ1. subst x0. monadInv EQ0. + set (ra_ofs := fn_retaddr_ofs f) in *. + set (ra_ofs' := Ptrofs.to_int ra_ofs) in *. set (tfbody := Pallocframe (fn_stacksize f) (fn_link_ofs f) :: - Psavelr (fn_retaddr_ofs f) :: - Pcfi_rel_offset (Ptrofs.to_int (fn_retaddr_ofs f)) :: x0) in *. + save_lr ra_ofs (Pcfi_rel_offset ra_ofs' :: x0)) in *. set (tf := {| fn_sig := Mach.fn_sig f; fn_code := tfbody |}) in *. unfold store_stack in *. exploit Mem.alloc_extends. eauto. eauto. apply Zle_refl. apply Zle_refl. @@ -867,34 +878,40 @@ Opaque loadind. intros [m3' [P Q]]. (* Execution of function prologue *) set (rs2 := nextinstr (rs0#IR12 <- (parent_sp s) #IR13 <- (Vptr stk Ptrofs.zero))). - set (rs3 := nextinstr rs2). + edestruct (save_lr_correct tge tf ra_ofs (Pcfi_rel_offset ra_ofs' :: x0) rs2) as (rs3 & X & Y & Z). + change (rs2 IR13) with sp. change (rs2 IR14) with (rs0 IR14). rewrite ATLR. eexact P. set (rs4 := nextinstr rs3). assert (EXEC_PROLOGUE: exec_straight tge tf (fn_code tf) rs0 m' x0 rs4 m3'). + { change (fn_code tf) with tfbody; unfold tfbody. - apply exec_straight_three with rs2 m2' rs3 m3'. + eapply exec_straight_trans with (rs2 := rs2) (m2 := m2'). + apply exec_straight_one. unfold exec_instr. rewrite C. fold sp. rewrite <- (sp_val _ _ _ AG). unfold Tptr, chunk_of_type, Archi.ptr64 in F. rewrite F. auto. - simpl. auto. - simpl. unfold exec_store. change (rs2 IR14) with (rs0 IR14). - rewrite Ptrofs.add_zero_l. simpl. unfold Tptr, chunk_of_type, Archi.ptr64 in P. simpl in P. - rewrite Ptrofs.add_zero_l in P. rewrite ATLR. - rewrite P. auto. auto. auto. auto. auto. + auto. + eapply exec_straight_trans with (rs2 := rs3) (m2 := m3'). + eexact X. + apply exec_straight_one. + simpl; reflexivity. reflexivity. + } + (* After the function prologue is the code for the function body *) + exploit exec_straight_steps_2; eauto using functions_transl. omega. constructor. + intros (ofsbody & U & V). + (* Conclusions *) left; exists (State rs4 m3'); split. eapply exec_straight_steps_1; eauto. omega. constructor. - econstructor; eauto. - change (rs4 PC) with (Val.offset_ptr (Val.offset_ptr (Val.offset_ptr (rs0 PC) Ptrofs.one) Ptrofs.one) Ptrofs.one). - rewrite ATPC. simpl. constructor; eauto. - eapply code_tail_next_int. omega. - eapply code_tail_next_int. omega. - eapply code_tail_next_int. omega. constructor. - unfold rs4, rs2. - apply agree_nextinstr. apply agree_nextinstr. apply agree_nextinstr. + econstructor; eauto. rewrite U. econstructor; eauto. + apply agree_nextinstr. + apply agree_undef_regs2 with rs2. + apply agree_nextinstr. eapply agree_change_sp. apply agree_undef_regs with rs0; eauto. - intros. Simpl. congruence. + intros; Simpl. + congruence. + intros; apply Y; eauto with asmgen. - (* external function *) exploit functions_translated; eauto. diff --git a/arm/Asmgenproof1.v b/arm/Asmgenproof1.v index eec531dc50..7084336e71 100644 --- a/arm/Asmgenproof1.v +++ b/arm/Asmgenproof1.v @@ -616,6 +616,37 @@ Proof. intros; Simpl. Qed. +(** Saving the link register *) + +Lemma save_lr_correct: + forall ofs k (rs: regset) m m', + Mem.storev Mint32 m (Val.offset_ptr rs#IR13 ofs) (rs#IR14) = Some m' -> + exists rs', + exec_straight ge fn (save_lr ofs k) rs m k rs' m' + /\ (forall r, if_preg r = true -> r <> IR12 -> rs'#r = rs#r) + /\ (save_lr_preserves_R12 ofs = true -> rs'#IR12 = rs#IR12). +Proof. + intros; unfold save_lr, save_lr_preserves_R12. + set (n := Ptrofs.to_int ofs). set (n1 := mk_immed_mem_word n). + assert (EQ: Val.offset_ptr rs#IR13 ofs = Val.add rs#IR13 (Vint n)). + { destruct rs#IR13; try discriminate. simpl. f_equal; f_equal. unfold n; symmetry; auto with ptrofs. } + destruct (Int.eq n n1). +- econstructor; split. apply exec_straight_one. simpl; unfold exec_store. rewrite <- EQ, H; reflexivity. auto. + split. intros; Simpl. intros; Simpl. +- destruct (addimm_correct IR12 IR13 (Int.sub n n1) (Pstr IR14 IR12 (SOimm n1) :: k) rs m) + as (rs1 & A & B & C). + econstructor; split. + eapply exec_straight_trans. eexact A. + apply exec_straight_one. simpl; unfold exec_store. + rewrite B. rewrite Val.add_assoc. simpl. + rewrite Int.sub_add_opp. rewrite Int.add_assoc. + rewrite (Int.add_commut (Int.neg n1)). + rewrite Int.add_neg_zero. rewrite Int.add_zero. + rewrite <- EQ. rewrite C by eauto with asmgen. rewrite H. reflexivity. + auto. + split. intros; Simpl. congruence. +Qed. + (** Translation of shift immediates *) Lemma transl_shift_correct: diff --git a/arm/TargetPrinter.ml b/arm/TargetPrinter.ml index 02c348a852..1dfe8af6d7 100644 --- a/arm/TargetPrinter.ml +++ b/arm/TargetPrinter.ml @@ -693,8 +693,6 @@ struct assert false | Pfreeframe(sz, ofs) -> assert false - | Psavelr(ofs) -> - assert false | Plabel lbl -> fprintf oc "%a:\n" print_label lbl; 0 | Ploadsymbol(r1, id, ofs) -> From fc1b2bfea598354a3e939de35d270376c880e1b0 Mon Sep 17 00:00:00 2001 From: Xavier Leroy Date: Thu, 17 Aug 2017 11:46:29 +0200 Subject: [PATCH 6/6] ARM: tweak stack layout so that back link and return address are lower This reduces the chances that back link and return address cannot be saved by a single str instruction. We generate correct code for the overflow case, but the code isn't very efficient, so let's make it uncommon. --- arm/Stacklayout.v | 71 +++++++++++++++++++++++------------------------ 1 file changed, 34 insertions(+), 37 deletions(-) diff --git a/arm/Stacklayout.v b/arm/Stacklayout.v index c867ba5966..043393bfd4 100644 --- a/arm/Stacklayout.v +++ b/arm/Stacklayout.v @@ -19,11 +19,10 @@ Require Import Bounds. (** The general shape of activation records is as follows, from bottom (lowest offsets) to top: - Space for outgoing arguments to function calls. -- Local stack slots. -- Saved values of integer callee-save registers used by the function. -- Saved values of float callee-save registers used by the function. -- Saved return address into caller. - Pointer to activation record of the caller. +- Saved return address into caller. +- Local stack slots. +- Saved values of callee-save registers used by the function. - Space for the stack-allocated data declared in Cminor. The [frame_env] compilation environment records the positions of @@ -36,11 +35,11 @@ Definition fe_ofs_arg := 0. function. *) Definition make_env (b: bounds) := - let ol := align (4 * b.(bound_outgoing)) 8 in (* locals *) + let olink := 4 * b.(bound_outgoing) in (* back link*) + let ora := olink + 4 in (* return address *) + let ol := align (ora + 4) 8 in (* locals *) let ocs := ol + 4 * b.(bound_local) in (* callee-saves *) - let ora := align (size_callee_save_area b ocs) 4 in (* retaddr *) - let olink := ora + 4 in (* back link *) - let ostkdata := align (olink + 4) 8 in (* stack data *) + let ostkdata := align (size_callee_save_area b ocs) 8 in (* retaddr *) let sz := align (ostkdata + b.(bound_stack_data)) 8 in {| fe_size := sz; fe_ofs_link := olink; @@ -67,33 +66,32 @@ Lemma frame_env_separated: Proof. Local Opaque Z.add Z.mul sepconj range. intros; simpl. - set (ol := align (4 * b.(bound_outgoing)) 8); + set (olink := 4 * b.(bound_outgoing)); + set (ora := olink + 4); + set (ol := align (ora + 4) 8); set (ocs := ol + 4 * b.(bound_local)); - set (ora := align (size_callee_save_area b ocs) 4); - set (olink := ora + 4); - set (ostkdata := align (olink + 4) 8). + set (ostkdata := align (size_callee_save_area b ocs) 8). generalize b.(bound_local_pos) b.(bound_outgoing_pos) b.(bound_stack_data_pos); intros. - assert (4 * b.(bound_outgoing) <= ol) by (apply align_le; omega). + assert (0 <= olink) by (unfold olink; omega). + assert (olink <= ora) by (unfold ora; omega). + assert (ora + 4 <= ol) by (apply align_le; omega). assert (ol + 4 * b.(bound_local) <= ocs) by (unfold ocs; omega). assert (ocs <= size_callee_save_area b ocs) by apply size_callee_save_area_incr. - assert (size_callee_save_area b ocs <= ora) by (apply align_le; omega). - assert (ora <= olink) by (unfold olink; omega). - assert (olink + 4 <= ostkdata) by (apply align_le; omega). + assert (size_callee_save_area b ocs <= ostkdata) by (apply align_le; omega). (* Reorder as: outgoing - local - callee-save + back link retaddr - back link *) + local + callee-save *) rewrite sep_swap12. - rewrite sep_swap45. + rewrite sep_swap23. rewrite sep_swap34. - rewrite sep_swap45. (* Apply range_split and range_split2 repeatedly *) unfold fe_ofs_arg. - apply range_split_2. fold ol; omega. omega. apply range_split. omega. - apply range_split_2. fold ora; omega. omega. + apply range_split. omega. + apply range_split_2. fold ol; omega. omega. apply range_split. omega. apply range_drop_right with ostkdata. omega. eapply sep_drop2. eexact H. @@ -105,18 +103,18 @@ Lemma frame_env_range: 0 <= fe_stack_data fe /\ fe_stack_data fe + bound_stack_data b <= fe_size fe. Proof. intros; simpl. - set (ol := align (4 * b.(bound_outgoing)) 8); + set (olink := 4 * b.(bound_outgoing)); + set (ora := olink + 4); + set (ol := align (ora + 4) 8); set (ocs := ol + 4 * b.(bound_local)); - set (ora := align (size_callee_save_area b ocs) 4); - set (olink := ora + 4); - set (ostkdata := align (olink + 4) 8). + set (ostkdata := align (size_callee_save_area b ocs) 8). generalize b.(bound_local_pos) b.(bound_outgoing_pos) b.(bound_stack_data_pos); intros. - assert (4 * b.(bound_outgoing) <= ol) by (apply align_le; omega). + assert (0 <= olink) by (unfold olink; omega). + assert (olink <= ora) by (unfold ora; omega). + assert (ora + 4 <= ol) by (apply align_le; omega). assert (ol + 4 * b.(bound_local) <= ocs) by (unfold ocs; omega). assert (ocs <= size_callee_save_area b ocs) by apply size_callee_save_area_incr. - assert (size_callee_save_area b ocs <= ora) by (apply align_le; omega). - assert (ora <= olink) by (unfold olink; omega). - assert (olink + 4 <= ostkdata) by (apply align_le; omega). + assert (size_callee_save_area b ocs <= ostkdata) by (apply align_le; omega). split. omega. apply align_le; omega. Qed. @@ -130,14 +128,13 @@ Lemma frame_env_aligned: /\ (4 | fe_ofs_retaddr fe). Proof. intros; simpl. - set (ol := align (4 * b.(bound_outgoing)) 8); + set (olink := 4 * b.(bound_outgoing)); + set (ora := olink + 4); + set (ol := align (ora + 4) 8); set (ocs := ol + 4 * b.(bound_local)); - set (ora := align (size_callee_save_area b ocs) 4); - set (olink := ora + 4); - set (ostkdata := align (olink + 4) 8). + set (ostkdata := align (size_callee_save_area b ocs) 8). split. apply Zdivide_0. split. apply align_divides; omega. split. apply align_divides; omega. - split. apply Z.divide_add_r. apply align_divides; omega. apply Z.divide_refl. - apply align_divides; omega. + unfold ora, olink; auto using Z.divide_mul_l, Z.divide_add_r, Z.divide_refl. Qed.