Skip to content

Commit

Permalink
Stack check as a linear/CFG instruction (ocaml-flambda#2372)
Browse files Browse the repository at this point in the history
Co-authored-by: Mark Shinwell <[email protected]>
  • Loading branch information
2 people authored and Forestryks committed Apr 17, 2024
1 parent c81ae16 commit b985dfa
Show file tree
Hide file tree
Showing 30 changed files with 229 additions and 79 deletions.
1 change: 1 addition & 0 deletions backend/.ocamlformat-enable
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ peephole/**/*.mli
regalloc/**/*.ml
regalloc/**/*.mli
amd64/simd*.ml
amd64/stack_check.ml
arm64/simd*.ml
generic_fns.ml
generic_fns.mli
127 changes: 62 additions & 65 deletions backend/amd64/emit.mlp
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,6 @@ let emit_debug_info_linear i =

let fp = Config.with_frame_pointers

let stack_threshold_size = Config.stack_threshold * 8 (* bytes *)

(* Tradeoff between code size and code speed *)

let fastcode_flag = ref true
Expand All @@ -104,19 +102,11 @@ let prologue_required = ref false

let frame_required = ref false

let frame_size () = (* includes return address *)
if !frame_required then begin
if num_stack_slots.(2) > 0 then assert_simd_enabled ();
let sz =
(!stack_offset
+ 8
+ 8 * num_stack_slots.(0)
+ 8 * num_stack_slots.(1)
+ 16 * num_stack_slots.(2)
+ (if fp then 8 else 0))
in Misc.align sz 16
end else
!stack_offset + 8
let frame_size () =
Stack_check.frame_size
~stack_offset:!stack_offset
~frame_required:!frame_required
~num_stack_slots

let slot_offset loc cl =
match loc with
Expand Down Expand Up @@ -473,6 +463,36 @@ let emit_call_safety_errors () =
emit_call (Cmm.global_symbol "caml_ml_array_align_error")
end

(* Stack reallocation *)
type stack_realloc = {
sc_label : Label.t; (* Label of the reallocation code. *)
sc_return : Label.t; (* Label to return to after reallocation. *)
sc_max_frame_size_in_bytes : int; (* Size for reallocation. *)
}

let stack_realloc = ref (None : stack_realloc option)

let clear_stack_realloc () =
stack_realloc := None

let emit_stack_realloc () =
begin match !stack_realloc with
| None -> ()
| Some { sc_label; sc_return; sc_max_frame_size_in_bytes; } -> begin
def_label sc_label;
(* Pass the desired frame size on the stack, since all of the
argument-passing registers may be in use.
Also serves to align the stack properly before the call *)
I.push (int (Config.stack_threshold + sc_max_frame_size_in_bytes / 8));
cfi_adjust_cfa_offset 8;
(* measured in words *)
emit_call (Cmm.global_symbol "caml_call_realloc_stack");
I.pop r10; (* ignored *)
cfi_adjust_cfa_offset (-8);
I.jmp (label sc_return)
end
end

(* Record jump tables *)
type jump_table =
{ table_lbl: string;
Expand Down Expand Up @@ -1197,7 +1217,7 @@ let emit_simd_instr op i =
I.pcmpistri (X86_dsl.int n) (arg i 1) (arg i 0); I.set E (res8 i 0); I.movzx (res8 i 0) (res i 0)

(* Emit an instruction *)
let emit_instr fallthrough i =
let emit_instr ~first ~fallthrough i =
emit_debug_info_linear i;
match i.desc with
| Lend -> ()
Expand Down Expand Up @@ -1776,14 +1796,29 @@ let emit_instr fallthrough i =
I.pop (domain_field Domainstate.Domain_exn_handler);
I.pop r11;
I.jmp r11
end

let rec emit_all fallthrough i =
end
| Lstackcheck { max_frame_size_bytes; } ->
let save_registers = first in
let overflow = new_label () and ret = new_label () in
let threshold_offset = Domainstate.stack_ctx_words * 8 + Stack_check.stack_threshold_size in
if save_registers then I.push r10;
I.lea (mem64 NONE (-(max_frame_size_bytes + threshold_offset)) RSP) r10;
I.cmp (domain_field Domainstate.Domain_current_stack) r10;
if save_registers then I.pop r10;
I.jb (label overflow);
def_label ret;
stack_realloc := Some {
sc_label = overflow;
sc_return = ret;
sc_max_frame_size_in_bytes = max_frame_size_bytes;
}

let rec emit_all ~first ~fallthrough i =
match i.desc with
| Lend -> ()
| _ ->
emit_instr fallthrough i;
emit_all (Linear.has_fallthrough i.desc) i.next
emit_instr ~first ~fallthrough i;
emit_all ~first:false ~fallthrough:(Linear.has_fallthrough i.desc) i.next

let all_functions = ref []

Expand Down Expand Up @@ -1813,6 +1848,7 @@ let fundecl fundecl =
call_gc_sites := [];
local_realloc_sites := [];
clear_safety_checks ();
clear_stack_realloc ();
for i = 0 to Proc.num_stack_slot_classes - 1 do
num_stack_slots.(i) <- fundecl.fun_num_stack_slots.(i);
done;
Expand All @@ -1838,53 +1874,14 @@ let fundecl fundecl =
D.label (label_name (emit_symbol fundecl.fun_name));
emit_debug_info fundecl.fun_dbg;
cfi_startproc ();
let handle_overflow_and_max_frame_size =
(* CR mshinwell: this should be conditionalized on a specific
"stack checks enabled" config option, so we can backport to 4.x *)
if not Config.runtime5 then None
else (
if !Clflags.runtime_variant = "d" then
emit_call (Cmm.global_symbol "caml_assert_stack_invariants");
let { max_frame_size; contains_nontail_calls} =
preproc_stack_check
~fun_body:fundecl.fun_body ~frame_size:(frame_size ()) ~trap_size:16
in
let handle_overflow =
if contains_nontail_calls || max_frame_size >= stack_threshold_size then begin
let overflow = new_label () and ret = new_label () in
let threshold_offset = Domainstate.stack_ctx_words * 8 + stack_threshold_size in
I.lea (mem64 NONE (-(max_frame_size + threshold_offset)) RSP) r10;
I.cmp (domain_field Domainstate.Domain_current_stack) r10;
I.jb (label overflow);
def_label ret;
Some (overflow, ret)
end else None
in
match handle_overflow with
| None -> None
| Some handle_overflow -> Some (handle_overflow, max_frame_size)
)
in
emit_all true fundecl.fun_body;
if Config.runtime5 && !Clflags.runtime_variant = "d" then begin
emit_call (Cmm.global_symbol "caml_assert_stack_invariants");
end;
emit_all ~first:true ~fallthrough:true fundecl.fun_body;
List.iter emit_call_gc !call_gc_sites;
List.iter emit_local_realloc !local_realloc_sites;
emit_call_safety_errors ();
begin match handle_overflow_and_max_frame_size with
| None -> ()
| Some ((overflow,ret), max_frame_size) -> begin
def_label overflow;
(* Pass the desired frame size on the stack, since all of the
argument-passing registers may be in use.
Also serves to align the stack properly before the call *)
I.push (int (Config.stack_threshold + max_frame_size / 8));
cfi_adjust_cfa_offset 8;
(* measured in words *)
emit_call (Cmm.global_symbol "caml_call_realloc_stack");
I.pop r10; (* ignored *)
cfi_adjust_cfa_offset (-8);
I.jmp (label ret)
end
end;
emit_stack_realloc ();
if !frame_required then begin
let n = frame_size() - 8 - (if fp then 8 else 0) in
if n <> 0
Expand Down
2 changes: 2 additions & 0 deletions backend/amd64/proc.ml
Original file line number Diff line number Diff line change
Expand Up @@ -517,6 +517,8 @@ let destroyed_at_basic (basic : Cfg_intf.S.basic) =
| Name_for_debugger _ | Dls_get)
| Poptrap | Prologue ->
if fp then [| rbp |] else [||]
| Stack_check _ ->
assert false (* the instruction is added after register allocation *)

(* note: keep this function in sync with `destroyed_at_oper` above,
and `is_destruction_point` below. *)
Expand Down
2 changes: 1 addition & 1 deletion backend/amd64/regalloc_stack_operands.ml
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@ let basic (map : spilled_map) (instr : Cfg.basic Cfg.instruction) =
| Prologue ->
(* no rewrite *)
May_still_have_spilled_registers
| Op (Intop_imm ((Ipopcnt | Iclz _ | Ictz _ ), _)) ->
| Op (Intop_imm ((Ipopcnt | Iclz _ | Ictz _ ), _)) | Stack_check _ ->
(* should not happen *)
fatal "unexpected instruction"
end
Expand Down
66 changes: 66 additions & 0 deletions backend/amd64/stack_check.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
(**************************************************************************)
(* *)
(* OCaml *)
(* *)
(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
(* *)
(* Copyright 1996 Institut National de Recherche en Informatique et *)
(* en Automatique. *)
(* Copyright 2024 Jane Street Group LLC *)
(* *)
(* All rights reserved. This file is distributed under the terms of *)
(* the GNU Lesser General Public License version 2.1, with the *)
(* special exception on linking described in the file LICENSE. *)
(* *)
(**************************************************************************)

[@@@ocaml.warning "+a-30-40-41-42"]

let stack_threshold_size = Config.stack_threshold * 8 (* bytes *)

let fp = Config.with_frame_pointers

(* includes return address *)
let frame_size :
stack_offset:int -> frame_required:bool -> num_stack_slots:int array -> int
=
fun ~stack_offset ~frame_required ~num_stack_slots ->
if frame_required
then (
if num_stack_slots.(2) > 0 then Arch.assert_simd_enabled ();
let sz =
stack_offset + 8
+ (8 * num_stack_slots.(0))
+ (8 * num_stack_slots.(1))
+ (16 * num_stack_slots.(2))
+ if fp then 8 else 0
in
Misc.align sz 16)
else stack_offset + 8

let linear : Linear.fundecl -> Linear.fundecl =
fun fundecl ->
match Config.runtime5 with
| false -> fundecl
| true ->
let frame_size =
frame_size ~stack_offset:0 ~frame_required:fundecl.fun_frame_required
~num_stack_slots:fundecl.fun_num_stack_slots
in
let { Emitaux.max_frame_size; contains_nontail_calls } =
Emitaux.preproc_stack_check ~fun_body:fundecl.fun_body ~frame_size
~trap_size:16
in
let insert_stack_check =
contains_nontail_calls || max_frame_size >= stack_threshold_size
in
if insert_stack_check
then
let fun_body =
Linear.instr_cons
(Lstackcheck { max_frame_size_bytes = max_frame_size })
[||] [||] ~available_before:fundecl.fun_body.available_before
~available_across:fundecl.fun_body.available_across fundecl.fun_body
in
{ fundecl with fun_body }
else fundecl
4 changes: 3 additions & 1 deletion backend/arm64/emit.mlp
Original file line number Diff line number Diff line change
Expand Up @@ -546,6 +546,7 @@ module BR = Branch_relaxation.Make (struct
| Lambda.Raise_reraise -> 1
| Lambda.Raise_notrace -> 4
end
| Lstackcheck _ -> assert false (* not supported *)

let relax_poll ~return_label =
Lop (Ispecific (Ifar_poll { return_label }))
Expand Down Expand Up @@ -1090,7 +1091,8 @@ let emit_instr i =
` ldr {emit_reg reg_tmp1}, [sp, #8]\n`;
` ldr {emit_reg reg_trap_ptr}, [sp], 16\n`;
` br {emit_reg reg_tmp1}\n`
end
end
| Lstackcheck _ -> assert false (* not supported *)

(* Emission of an instruction sequence *)

Expand Down
1 change: 1 addition & 0 deletions backend/arm64/proc.ml
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,7 @@ let destroyed_at_basic (basic : Cfg_intf.S.basic) =
[| reg_d7 |]
| Op _ | Poptrap | Prologue ->
[||]
| Stack_check _ -> assert false (* not supported *)

(* note: keep this function in sync with `destroyed_at_oper` above,
and `is_destruction_point` below. *)
Expand Down
32 changes: 32 additions & 0 deletions backend/arm64/stack_check.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
(**************************************************************************)
(* *)
(* OCaml *)
(* *)
(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
(* *)
(* Copyright 1996 Institut National de Recherche en Informatique et *)
(* en Automatique. *)
(* Copyright 2024 Jane Street Group LLC *)
(* *)
(* All rights reserved. This file is distributed under the terms of *)
(* the GNU Lesser General Public License version 2.1, with the *)
(* special exception on linking described in the file LICENSE. *)
(* *)
(**************************************************************************)

[@@@ocaml.warning "+a-30-40-41-42"]

let stack_threshold_size = 0

let frame_size
: stack_offset:int -> frame_required:bool -> num_stack_slots:int array -> int
= fun ~stack_offset:_ ~frame_required:_ ~num_stack_slots:_ ->
Misc.fatal_error "stack checks are not supported on arm64"

let linear
: Linear.fundecl -> Linear.fundecl
= fun fundecl ->
match Config.runtime5 with
| false -> fundecl
| true ->
Misc.fatal_error "stack checks are not supported on arm64"
1 change: 1 addition & 0 deletions backend/asmgen.ml
Original file line number Diff line number Diff line change
Expand Up @@ -385,6 +385,7 @@ let compile_fundecl ~ppf_dump ~funcnames fd_cmm =
++ Profile.record ~accumulate:true "scheduling" Scheduling.fundecl
++ pass_dump_linear_if ppf_dump dump_scheduling "After instruction scheduling"
++ Profile.record ~accumulate:true "save_linear" save_linear
++ Stack_check.linear
++ Profile.record ~accumulate:true "emit_fundecl" emit_fundecl

let compile_data dl =
Expand Down
7 changes: 6 additions & 1 deletion backend/cfg/cfg.ml
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,8 @@ let dump_basic ppf (basic : basic) =
| Pushtrap { lbl_handler } -> fprintf ppf "Pushtrap handler=%d" lbl_handler
| Poptrap -> fprintf ppf "Poptrap"
| Prologue -> fprintf ppf "Prologue"
| Stack_check { max_frame_size_bytes } ->
fprintf ppf "Stack_check size=%d" max_frame_size_bytes

let dump_terminator' ?(print_reg = Printmach.reg) ?(res = [||]) ?(args = [||])
?(specific_can_raise = fun ppf _ -> Format.fprintf ppf "specific_can_raise")
Expand Down Expand Up @@ -507,6 +509,9 @@ let is_pure_basic : basic -> bool = function
ensured that it wouldn't modify the stack pointer (e.g. there are no used
local stack slots nor calls). *)
false
| Stack_check _ ->
(* May reallocate the stack. *)
false
let same_location (r1 : Reg.t) (r2 : Reg.t) =
Reg.same_loc r1 r2
Expand Down Expand Up @@ -534,7 +539,7 @@ let is_noop_move instr =
| Intop_atomic _ | Floatop _ | Opaque | Valueofint | Intofvalue
| Scalarcast _ | Probe_is_enabled _ | Specific _ | Name_for_debugger _
| Begin_region | End_region | Dls_get | Poll | Alloc _ )
| Reloadretaddr | Pushtrap _ | Poptrap | Prologue ->
| Reloadretaddr | Pushtrap _ | Poptrap | Prologue | Stack_check _ ->
false
let set_stack_offset (instr : _ instruction) stack_offset =
Expand Down
4 changes: 2 additions & 2 deletions backend/cfg/cfg_comballoc.ml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ let rec find_next_allocation : cell option -> allocation option =
| Intofvalue | Vectorcast _ | Scalarcast _ | Probe_is_enabled _ | Opaque
| Begin_region | End_region | Specific _ | Name_for_debugger _ | Dls_get
| Poll )
| Reloadretaddr | Pushtrap _ | Poptrap | Prologue ->
| Reloadretaddr | Pushtrap _ | Poptrap | Prologue | Stack_check _ ->
find_next_allocation (DLL.next cell))

(* [find_compatible_allocations cell ~curr_mode ~curr_size] returns the
Expand Down Expand Up @@ -92,7 +92,7 @@ let find_compatible_allocations :
| Lambda.Alloc_heap ->
loop allocations (DLL.next cell) ~curr_mode ~curr_size)
| Op Poll -> return ()
| Reloadretaddr | Poptrap | Prologue | Pushtrap _ ->
| Reloadretaddr | Poptrap | Prologue | Pushtrap _ | Stack_check _ ->
(* CR-soon xclerc for xclerc: is it too conservative? (note: only the
`Pushtrap` case may be too conservative) *)
{ allocations = List.rev allocations; next_cell = Some cell }
Expand Down
2 changes: 1 addition & 1 deletion backend/cfg/cfg_cse.ml
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ class cse_generic =
fun state n cell ->
let i = DLL.value cell in
match i.desc with
| Reloadretaddr | Pushtrap _ | Poptrap | Prologue -> n
| Reloadretaddr | Pushtrap _ | Poptrap | Prologue | Stack_check _ -> n
| Op (Move | Spill | Reload) ->
(* For moves, we associate the same value number to the result reg as
to the argument reg. *)
Expand Down
Loading

0 comments on commit b985dfa

Please sign in to comment.