From e0535973aa072f17807559e8bd3049f1dd7e52c3 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Thu, 6 Apr 2023 08:44:56 -0700 Subject: [PATCH] Cranelift: remove non-egraphs optimization pipeline and `use_egraphs` option. This PR removes the LICM, GVN, and preopt passes, and associated support pieces, from `cranelift-codegen`. Not to worry, we still have optimizations: the egraph framework subsumes all of these, and has been on by default since #5181. A few decision points: - Filetests for the legacy LICM, GVN and simple_preopt were removed too. As we built optimizations in the egraph framework we wrote new tests for the equivalent functionality, and many of the old tests were testing specific behaviors in the old implementations that may not be relevant anymore. However if folks prefer I could take a different approach here and try to port over all of the tests. - The corresponding filetest modes (commands) were deleted too. The `test alias_analysis` mode remains, but no longer invokes a separate GVN first (since there is no separate GVN that will not also do alias analysis) so the tests were tweaked slightly to work with that. The egrpah testsuite also covers alias analysis. - The `divconst_magic_numbers` module is removed since it's unused without `simple_preopt`, though this is the one remaining optimization we still need to build in the egraphs framework, pending #5908. The magic numbers will live forever in git history so removing this in the meantime is not a major issue IMHO. - The `use_egraphs` setting itself was removed at both the Cranelift and Wasmtime levels. It has been marked deprecated for a few releases now (Wasmtime 6.0, 7.0, upcoming 8.0, and corresponding Cranelift versions) so I think this is probably OK. As an alternative if anyone feels strongly, we could leave the setting and make it a no-op. --- cranelift/codegen/meta/src/shared/settings.rs | 13 - cranelift/codegen/src/context.rs | 46 +- .../codegen/src/divconst_magic_numbers.rs | 1083 ----------------- cranelift/codegen/src/fx.rs | 5 - cranelift/codegen/src/lib.rs | 4 - cranelift/codegen/src/licm.rs | 241 ---- cranelift/codegen/src/machinst/isle.rs | 23 - cranelift/codegen/src/settings.rs | 1 - cranelift/codegen/src/simple_gvn.rs | 149 --- cranelift/codegen/src/simple_preopt.rs | 796 ------------ .../filetests/alias/simple-alias.clif | 10 +- .../filetests/filetests/egraph/algebraic.clif | 1 - .../filetests/egraph/alias_analysis.clif | 1 - .../filetests/filetests/egraph/basic-gvn.clif | 1 - .../filetests/filetests/egraph/bitselect.clif | 1 - .../filetests/egraph/cprop-splat.clif | 1 - .../filetests/filetests/egraph/cprop.clif | 1 - .../filetests/filetests/egraph/i128-opts.clif | 1 - .../filetests/egraph/icmp-parameterized.clif | 1 - .../filetests/filetests/egraph/icmp.clif | 1 - .../filetests/filetests/egraph/isplit.clif | 1 - .../filetests/egraph/issue-5405.clif | 1 - .../filetests/egraph/issue-5417.clif | 1 - .../filetests/egraph/issue-5437.clif | 1 - .../filetests/filetests/egraph/licm.clif | 1 - .../egraph/make-icmp-parameterized-tests.sh | 1 - .../filetests/filetests/egraph/misc.clif | 1 - .../filetests/filetests/egraph/mul-pow-2.clif | 1 - .../filetests/egraph/multivalue.clif | 1 - .../filetests/egraph/not_a_load.clif | 1 - .../filetests/filetests/egraph/remat.clif | 1 - .../filetests/filetests/egraph/select.clif | 1 - .../filetests/isa/x64/amode-opt.clif | 1 - cranelift/filetests/filetests/licm/basic.clif | 39 - .../filetests/filetests/licm/br-table.clif | 19 - .../filetests/filetests/licm/complex.clif | 88 -- .../filetests/licm/critical-edge.clif | 50 - .../filetests/filetests/licm/encoding.clif | 40 - .../filetests/licm/load_readonly_notrap.clif | 51 - .../filetests/licm/multiple-blocks.clif | 55 - .../filetests/licm/nested_loops.clif | 58 - .../filetests/filetests/licm/reject.clif | 65 - .../filetests/licm/reject_load_notrap.clif | 52 - .../filetests/licm/reject_load_readonly.clif | 52 - .../filetests/licm/rewrite-jump-table.clif | 23 - .../filetests/runtests/issue5569.clif | 1 - .../filetests/filetests/simple_gvn/basic.clif | 43 - .../simple_gvn/idempotent-trapping.clif | 68 -- .../filetests/simple_gvn/readonly.clif | 25 - .../filetests/simple_gvn/reject.clif | 27 - .../filetests/simple_gvn/scopes.clif | 80 -- .../filetests/simple_preopt/branch.clif | 53 - .../simple_preopt/div_by_const_indirect.clif | 60 - .../div_by_const_non_power_of_2.clif | 267 ---- .../div_by_const_power_of_2.clif | 293 ----- ...order_instructions_when_transplanting.clif | 23 - .../fold-extended-move-wraparound.clif | 15 - .../filetests/simple_preopt/i128.clif | 28 - .../rem_by_const_non_power_of_2.clif | 286 ----- .../rem_by_const_power_of_2.clif | 292 ----- ...ing_instructions_and_cfg_predecessors.clif | 18 - .../filetests/simple_preopt/sign_extend.clif | 40 - .../filetests/simple_preopt/simplify32.clif | 62 - .../filetests/simple_preopt/simplify64.clif | 294 ----- .../duplicate-loads-dynamic-memory-egraph.wat | 88 -- .../wasm/duplicate-loads-dynamic-memory.wat | 33 +- .../duplicate-loads-static-memory-egraph.wat | 74 -- .../wasm/duplicate-loads-static-memory.wat | 17 +- ...re-access-same-index-different-offsets.wat | 1 - ...re-access-same-index-different-offsets.wat | 1 - cranelift/filetests/src/lib.rs | 6 - .../filetests/src/test_alias_analysis.rs | 3 - cranelift/filetests/src/test_licm.rs | 51 - cranelift/filetests/src/test_simple_gvn.rs | 44 - cranelift/filetests/src/test_simple_preopt.rs | 46 - cranelift/fuzzgen/src/lib.rs | 1 - crates/fuzzing/src/generators/config.rs | 3 - crates/wasmtime/src/config.rs | 24 - crates/wasmtime/src/engine.rs | 1 - 79 files changed, 13 insertions(+), 5340 deletions(-) delete mode 100644 cranelift/codegen/src/divconst_magic_numbers.rs delete mode 100644 cranelift/codegen/src/licm.rs delete mode 100644 cranelift/codegen/src/simple_gvn.rs delete mode 100644 cranelift/codegen/src/simple_preopt.rs delete mode 100644 cranelift/filetests/filetests/licm/basic.clif delete mode 100644 cranelift/filetests/filetests/licm/br-table.clif delete mode 100644 cranelift/filetests/filetests/licm/complex.clif delete mode 100644 cranelift/filetests/filetests/licm/critical-edge.clif delete mode 100644 cranelift/filetests/filetests/licm/encoding.clif delete mode 100644 cranelift/filetests/filetests/licm/load_readonly_notrap.clif delete mode 100644 cranelift/filetests/filetests/licm/multiple-blocks.clif delete mode 100644 cranelift/filetests/filetests/licm/nested_loops.clif delete mode 100644 cranelift/filetests/filetests/licm/reject.clif delete mode 100644 cranelift/filetests/filetests/licm/reject_load_notrap.clif delete mode 100644 cranelift/filetests/filetests/licm/reject_load_readonly.clif delete mode 100644 cranelift/filetests/filetests/licm/rewrite-jump-table.clif delete mode 100644 cranelift/filetests/filetests/simple_gvn/basic.clif delete mode 100644 cranelift/filetests/filetests/simple_gvn/idempotent-trapping.clif delete mode 100644 cranelift/filetests/filetests/simple_gvn/readonly.clif delete mode 100644 cranelift/filetests/filetests/simple_gvn/reject.clif delete mode 100644 cranelift/filetests/filetests/simple_gvn/scopes.clif delete mode 100644 cranelift/filetests/filetests/simple_preopt/branch.clif delete mode 100644 cranelift/filetests/filetests/simple_preopt/div_by_const_indirect.clif delete mode 100644 cranelift/filetests/filetests/simple_preopt/div_by_const_non_power_of_2.clif delete mode 100644 cranelift/filetests/filetests/simple_preopt/div_by_const_power_of_2.clif delete mode 100644 cranelift/filetests/filetests/simple_preopt/do_not_reorder_instructions_when_transplanting.clif delete mode 100644 cranelift/filetests/filetests/simple_preopt/fold-extended-move-wraparound.clif delete mode 100644 cranelift/filetests/filetests/simple_preopt/i128.clif delete mode 100644 cranelift/filetests/filetests/simple_preopt/rem_by_const_non_power_of_2.clif delete mode 100644 cranelift/filetests/filetests/simple_preopt/rem_by_const_power_of_2.clif delete mode 100644 cranelift/filetests/filetests/simple_preopt/replace_branching_instructions_and_cfg_predecessors.clif delete mode 100644 cranelift/filetests/filetests/simple_preopt/sign_extend.clif delete mode 100644 cranelift/filetests/filetests/simple_preopt/simplify32.clif delete mode 100644 cranelift/filetests/filetests/simple_preopt/simplify64.clif delete mode 100644 cranelift/filetests/filetests/wasm/duplicate-loads-dynamic-memory-egraph.wat delete mode 100644 cranelift/filetests/filetests/wasm/duplicate-loads-static-memory-egraph.wat delete mode 100644 cranelift/filetests/src/test_licm.rs delete mode 100644 cranelift/filetests/src/test_simple_gvn.rs delete mode 100644 cranelift/filetests/src/test_simple_preopt.rs diff --git a/cranelift/codegen/meta/src/shared/settings.rs b/cranelift/codegen/meta/src/shared/settings.rs index e68412183aee..d9689b4fd1a8 100644 --- a/cranelift/codegen/meta/src/shared/settings.rs +++ b/cranelift/codegen/meta/src/shared/settings.rs @@ -53,19 +53,6 @@ pub(crate) fn define() -> SettingGroup { true, ); - settings.add_bool( - "use_egraphs", - "Enable egraph-based optimization.", - r#" - This enables an optimization phase that converts CLIF to an egraph (equivalence graph) - representation, performs various rewrites, and then converts it back. This should result in - better optimization, but the traditional optimization pass structure is also still - available by setting this to `false`. The `false` setting will eventually be - deprecated and removed. - "#, - true, - ); - settings.add_bool( "enable_verifier", "Run the Cranelift IR verifier at strategic times during compilation.", diff --git a/cranelift/codegen/src/context.rs b/cranelift/codegen/src/context.rs index b5b10eaa76c7..dc2ab0b4bdb2 100644 --- a/cranelift/codegen/src/context.rs +++ b/cranelift/codegen/src/context.rs @@ -17,15 +17,12 @@ use crate::flowgraph::ControlFlowGraph; use crate::ir::Function; use crate::isa::TargetIsa; use crate::legalizer::simple_legalize; -use crate::licm::do_licm; use crate::loop_analysis::LoopAnalysis; use crate::machinst::{CompiledCode, CompiledCodeStencil}; use crate::nan_canonicalization::do_nan_canonicalization; use crate::remove_constant_phis::do_remove_constant_phis; use crate::result::{CodegenResult, CompileResult}; use crate::settings::{FlagsOrIsa, OptLevel}; -use crate::simple_gvn::do_simple_gvn; -use crate::simple_preopt::do_preopt; use crate::trace; use crate::unreachable_code::eliminate_unreachable_code; use crate::verifier::{verify_context, VerifierErrors, VerifierResult}; @@ -172,22 +169,12 @@ impl Context { ); self.compute_cfg(); - if !isa.flags().use_egraphs() && opt_level != OptLevel::None { - self.preopt(isa)?; - } if isa.flags().enable_nan_canonicalization() { self.canonicalize_nans(isa)?; } self.legalize(isa)?; - if !isa.flags().use_egraphs() && opt_level != OptLevel::None { - self.compute_domtree(); - self.compute_loop_analysis(); - self.licm(isa)?; - self.simple_gvn(isa)?; - } - self.compute_domtree(); self.eliminate_unreachable_code(isa)?; @@ -198,14 +185,7 @@ impl Context { self.remove_constant_phis(isa)?; if opt_level != OptLevel::None { - if isa.flags().use_egraphs() { - self.egraph_pass()?; - } else if isa.flags().enable_alias_analysis() { - for _ in 0..2 { - self.replace_redundant_loads()?; - self.simple_gvn(isa)?; - } - } + self.egraph_pass()?; } Ok(()) @@ -294,13 +274,6 @@ impl Context { Ok(()) } - /// Perform pre-legalization rewrites on the function. - pub fn preopt(&mut self, isa: &dyn TargetIsa) -> CodegenResult<()> { - do_preopt(&mut self.func, isa); - self.verify_if(isa)?; - Ok(()) - } - /// Perform NaN canonicalizing rewrites on the function. pub fn canonicalize_nans(&mut self, isa: &dyn TargetIsa) -> CodegenResult<()> { do_nan_canonicalization(&mut self.func); @@ -341,23 +314,6 @@ impl Context { self.compute_domtree() } - /// Perform simple GVN on the function. - pub fn simple_gvn<'a, FOI: Into>>(&mut self, fisa: FOI) -> CodegenResult<()> { - do_simple_gvn(&mut self.func, &mut self.domtree); - self.verify_if(fisa) - } - - /// Perform LICM on the function. - pub fn licm(&mut self, isa: &dyn TargetIsa) -> CodegenResult<()> { - do_licm( - &mut self.func, - &mut self.cfg, - &mut self.domtree, - &mut self.loop_analysis, - ); - self.verify_if(isa) - } - /// Perform unreachable code elimination. pub fn eliminate_unreachable_code<'a, FOI>(&mut self, fisa: FOI) -> CodegenResult<()> where diff --git a/cranelift/codegen/src/divconst_magic_numbers.rs b/cranelift/codegen/src/divconst_magic_numbers.rs deleted file mode 100644 index af45444c4030..000000000000 --- a/cranelift/codegen/src/divconst_magic_numbers.rs +++ /dev/null @@ -1,1083 +0,0 @@ -//! Compute "magic numbers" for division-by-constants transformations. -//! -//! Math helpers for division by (non-power-of-2) constants. This is based -//! on the presentation in "Hacker's Delight" by Henry Warren, 2003. There -//! are four cases: {unsigned, signed} x {32 bit, 64 bit}. The word size -//! makes little difference, but the signed-vs-unsigned aspect has a large -//! effect. Therefore everything is presented in the order U32 U64 S32 S64 -//! so as to emphasise the similarity of the U32 and U64 cases and the S32 -//! and S64 cases. - -// Structures to hold the "magic numbers" computed. - -#[derive(PartialEq, Debug)] -pub struct MU32 { - pub mul_by: u32, - pub do_add: bool, - pub shift_by: i32, -} - -#[derive(PartialEq, Debug)] -pub struct MU64 { - pub mul_by: u64, - pub do_add: bool, - pub shift_by: i32, -} - -#[derive(PartialEq, Debug)] -pub struct MS32 { - pub mul_by: i32, - pub shift_by: i32, -} - -#[derive(PartialEq, Debug)] -pub struct MS64 { - pub mul_by: i64, - pub shift_by: i32, -} - -// The actual "magic number" generators follow. - -pub fn magic_u32(d: u32) -> MU32 { - debug_assert_ne!(d, 0); - debug_assert_ne!(d, 1); // d==1 generates out of range shifts. - - let mut do_add: bool = false; - let mut p: i32 = 31; - let nc: u32 = 0xFFFFFFFFu32 - u32::wrapping_neg(d) % d; - let mut q1: u32 = 0x80000000u32 / nc; - let mut r1: u32 = 0x80000000u32 - q1 * nc; - let mut q2: u32 = 0x7FFFFFFFu32 / d; - let mut r2: u32 = 0x7FFFFFFFu32 - q2 * d; - loop { - p = p + 1; - if r1 >= nc - r1 { - q1 = u32::wrapping_add(u32::wrapping_mul(2, q1), 1); - r1 = u32::wrapping_sub(u32::wrapping_mul(2, r1), nc); - } else { - q1 = u32::wrapping_mul(2, q1); - r1 = 2 * r1; - } - if r2 + 1 >= d - r2 { - if q2 >= 0x7FFFFFFFu32 { - do_add = true; - } - q2 = 2 * q2 + 1; - r2 = u32::wrapping_sub(u32::wrapping_add(u32::wrapping_mul(2, r2), 1), d); - } else { - if q2 >= 0x80000000u32 { - do_add = true; - } - q2 = u32::wrapping_mul(2, q2); - r2 = 2 * r2 + 1; - } - let delta: u32 = d - 1 - r2; - if !(p < 64 && (q1 < delta || (q1 == delta && r1 == 0))) { - break; - } - } - - MU32 { - mul_by: q2 + 1, - do_add, - shift_by: p - 32, - } -} - -pub fn magic_u64(d: u64) -> MU64 { - debug_assert_ne!(d, 0); - debug_assert_ne!(d, 1); // d==1 generates out of range shifts. - - let mut do_add: bool = false; - let mut p: i32 = 63; - let nc: u64 = 0xFFFFFFFFFFFFFFFFu64 - u64::wrapping_neg(d) % d; - let mut q1: u64 = 0x8000000000000000u64 / nc; - let mut r1: u64 = 0x8000000000000000u64 - q1 * nc; - let mut q2: u64 = 0x7FFFFFFFFFFFFFFFu64 / d; - let mut r2: u64 = 0x7FFFFFFFFFFFFFFFu64 - q2 * d; - loop { - p = p + 1; - if r1 >= nc - r1 { - q1 = u64::wrapping_add(u64::wrapping_mul(2, q1), 1); - r1 = u64::wrapping_sub(u64::wrapping_mul(2, r1), nc); - } else { - q1 = u64::wrapping_mul(2, q1); - r1 = 2 * r1; - } - if r2 + 1 >= d - r2 { - if q2 >= 0x7FFFFFFFFFFFFFFFu64 { - do_add = true; - } - q2 = 2 * q2 + 1; - r2 = u64::wrapping_sub(u64::wrapping_add(u64::wrapping_mul(2, r2), 1), d); - } else { - if q2 >= 0x8000000000000000u64 { - do_add = true; - } - q2 = u64::wrapping_mul(2, q2); - r2 = 2 * r2 + 1; - } - let delta: u64 = d - 1 - r2; - if !(p < 128 && (q1 < delta || (q1 == delta && r1 == 0))) { - break; - } - } - - MU64 { - mul_by: q2 + 1, - do_add, - shift_by: p - 64, - } -} - -pub fn magic_s32(d: i32) -> MS32 { - debug_assert_ne!(d, -1); - debug_assert_ne!(d, 0); - debug_assert_ne!(d, 1); - let two31: u32 = 0x80000000u32; - let mut p: i32 = 31; - let ad: u32 = i32::wrapping_abs(d) as u32; - let t: u32 = two31 + ((d as u32) >> 31); - let anc: u32 = u32::wrapping_sub(t - 1, t % ad); - let mut q1: u32 = two31 / anc; - let mut r1: u32 = two31 - q1 * anc; - let mut q2: u32 = two31 / ad; - let mut r2: u32 = two31 - q2 * ad; - loop { - p = p + 1; - q1 = 2 * q1; - r1 = 2 * r1; - if r1 >= anc { - q1 = q1 + 1; - r1 = r1 - anc; - } - q2 = 2 * q2; - r2 = 2 * r2; - if r2 >= ad { - q2 = q2 + 1; - r2 = r2 - ad; - } - let delta: u32 = ad - r2; - if !(q1 < delta || (q1 == delta && r1 == 0)) { - break; - } - } - - MS32 { - mul_by: (if d < 0 { - u32::wrapping_neg(q2 + 1) - } else { - q2 + 1 - }) as i32, - shift_by: p - 32, - } -} - -pub fn magic_s64(d: i64) -> MS64 { - debug_assert_ne!(d, -1); - debug_assert_ne!(d, 0); - debug_assert_ne!(d, 1); - let two63: u64 = 0x8000000000000000u64; - let mut p: i32 = 63; - let ad: u64 = i64::wrapping_abs(d) as u64; - let t: u64 = two63 + ((d as u64) >> 63); - let anc: u64 = u64::wrapping_sub(t - 1, t % ad); - let mut q1: u64 = two63 / anc; - let mut r1: u64 = two63 - q1 * anc; - let mut q2: u64 = two63 / ad; - let mut r2: u64 = two63 - q2 * ad; - loop { - p = p + 1; - q1 = 2 * q1; - r1 = 2 * r1; - if r1 >= anc { - q1 = q1 + 1; - r1 = r1 - anc; - } - q2 = 2 * q2; - r2 = 2 * r2; - if r2 >= ad { - q2 = q2 + 1; - r2 = r2 - ad; - } - let delta: u64 = ad - r2; - if !(q1 < delta || (q1 == delta && r1 == 0)) { - break; - } - } - - MS64 { - mul_by: (if d < 0 { - u64::wrapping_neg(q2 + 1) - } else { - q2 + 1 - }) as i64, - shift_by: p - 64, - } -} - -#[cfg(test)] -mod tests { - use super::{magic_s32, magic_s64, magic_u32, magic_u64}; - use super::{MS32, MS64, MU32, MU64}; - - fn make_mu32(mul_by: u32, do_add: bool, shift_by: i32) -> MU32 { - MU32 { - mul_by, - do_add, - shift_by, - } - } - - fn make_mu64(mul_by: u64, do_add: bool, shift_by: i32) -> MU64 { - MU64 { - mul_by, - do_add, - shift_by, - } - } - - fn make_ms32(mul_by: i32, shift_by: i32) -> MS32 { - MS32 { mul_by, shift_by } - } - - fn make_ms64(mul_by: i64, shift_by: i32) -> MS64 { - MS64 { mul_by, shift_by } - } - - #[test] - fn test_magic_u32() { - assert_eq!(magic_u32(2u32), make_mu32(0x80000000u32, false, 0)); - assert_eq!(magic_u32(3u32), make_mu32(0xaaaaaaabu32, false, 1)); - assert_eq!(magic_u32(4u32), make_mu32(0x40000000u32, false, 0)); - assert_eq!(magic_u32(5u32), make_mu32(0xcccccccdu32, false, 2)); - assert_eq!(magic_u32(6u32), make_mu32(0xaaaaaaabu32, false, 2)); - assert_eq!(magic_u32(7u32), make_mu32(0x24924925u32, true, 3)); - assert_eq!(magic_u32(9u32), make_mu32(0x38e38e39u32, false, 1)); - assert_eq!(magic_u32(10u32), make_mu32(0xcccccccdu32, false, 3)); - assert_eq!(magic_u32(11u32), make_mu32(0xba2e8ba3u32, false, 3)); - assert_eq!(magic_u32(12u32), make_mu32(0xaaaaaaabu32, false, 3)); - assert_eq!(magic_u32(25u32), make_mu32(0x51eb851fu32, false, 3)); - assert_eq!(magic_u32(125u32), make_mu32(0x10624dd3u32, false, 3)); - assert_eq!(magic_u32(625u32), make_mu32(0xd1b71759u32, false, 9)); - assert_eq!(magic_u32(1337u32), make_mu32(0x88233b2bu32, true, 11)); - assert_eq!(magic_u32(65535u32), make_mu32(0x80008001u32, false, 15)); - assert_eq!(magic_u32(65536u32), make_mu32(0x00010000u32, false, 0)); - assert_eq!(magic_u32(65537u32), make_mu32(0xffff0001u32, false, 16)); - assert_eq!(magic_u32(31415927u32), make_mu32(0x445b4553u32, false, 23)); - assert_eq!( - magic_u32(0xdeadbeefu32), - make_mu32(0x93275ab3u32, false, 31) - ); - assert_eq!( - magic_u32(0xfffffffdu32), - make_mu32(0x40000001u32, false, 30) - ); - assert_eq!(magic_u32(0xfffffffeu32), make_mu32(0x00000003u32, true, 32)); - assert_eq!( - magic_u32(0xffffffffu32), - make_mu32(0x80000001u32, false, 31) - ); - } - - #[test] - fn test_magic_u64() { - assert_eq!(magic_u64(2u64), make_mu64(0x8000000000000000u64, false, 0)); - assert_eq!(magic_u64(3u64), make_mu64(0xaaaaaaaaaaaaaaabu64, false, 1)); - assert_eq!(magic_u64(4u64), make_mu64(0x4000000000000000u64, false, 0)); - assert_eq!(magic_u64(5u64), make_mu64(0xcccccccccccccccdu64, false, 2)); - assert_eq!(magic_u64(6u64), make_mu64(0xaaaaaaaaaaaaaaabu64, false, 2)); - assert_eq!(magic_u64(7u64), make_mu64(0x2492492492492493u64, true, 3)); - assert_eq!(magic_u64(9u64), make_mu64(0xe38e38e38e38e38fu64, false, 3)); - assert_eq!(magic_u64(10u64), make_mu64(0xcccccccccccccccdu64, false, 3)); - assert_eq!(magic_u64(11u64), make_mu64(0x2e8ba2e8ba2e8ba3u64, false, 1)); - assert_eq!(magic_u64(12u64), make_mu64(0xaaaaaaaaaaaaaaabu64, false, 3)); - assert_eq!(magic_u64(25u64), make_mu64(0x47ae147ae147ae15u64, true, 5)); - assert_eq!(magic_u64(125u64), make_mu64(0x0624dd2f1a9fbe77u64, true, 7)); - assert_eq!( - magic_u64(625u64), - make_mu64(0x346dc5d63886594bu64, false, 7) - ); - assert_eq!( - magic_u64(1337u64), - make_mu64(0xc4119d952866a139u64, false, 10) - ); - assert_eq!( - magic_u64(31415927u64), - make_mu64(0x116d154b9c3d2f85u64, true, 25) - ); - assert_eq!( - magic_u64(0x00000000deadbeefu64), - make_mu64(0x93275ab2dfc9094bu64, false, 31) - ); - assert_eq!( - magic_u64(0x00000000fffffffdu64), - make_mu64(0x8000000180000005u64, false, 31) - ); - assert_eq!( - magic_u64(0x00000000fffffffeu64), - make_mu64(0x0000000200000005u64, true, 32) - ); - assert_eq!( - magic_u64(0x00000000ffffffffu64), - make_mu64(0x8000000080000001u64, false, 31) - ); - assert_eq!( - magic_u64(0x0000000100000000u64), - make_mu64(0x0000000100000000u64, false, 0) - ); - assert_eq!( - magic_u64(0x0000000100000001u64), - make_mu64(0xffffffff00000001u64, false, 32) - ); - assert_eq!( - magic_u64(0x0ddc0ffeebadf00du64), - make_mu64(0x2788e9d394b77da1u64, true, 60) - ); - assert_eq!( - magic_u64(0xfffffffffffffffdu64), - make_mu64(0x4000000000000001u64, false, 62) - ); - assert_eq!( - magic_u64(0xfffffffffffffffeu64), - make_mu64(0x0000000000000003u64, true, 64) - ); - assert_eq!( - magic_u64(0xffffffffffffffffu64), - make_mu64(0x8000000000000001u64, false, 63) - ); - } - - #[test] - fn test_magic_s32() { - assert_eq!( - magic_s32(-0x80000000i32), - make_ms32(0x7fffffffu32 as i32, 30) - ); - assert_eq!( - magic_s32(-0x7FFFFFFFi32), - make_ms32(0xbfffffffu32 as i32, 29) - ); - assert_eq!( - magic_s32(-0x7FFFFFFEi32), - make_ms32(0x7ffffffdu32 as i32, 30) - ); - assert_eq!(magic_s32(-31415927i32), make_ms32(0xbba4baadu32 as i32, 23)); - assert_eq!(magic_s32(-1337i32), make_ms32(0x9df73135u32 as i32, 9)); - assert_eq!(magic_s32(-256i32), make_ms32(0x7fffffffu32 as i32, 7)); - assert_eq!(magic_s32(-5i32), make_ms32(0x99999999u32 as i32, 1)); - assert_eq!(magic_s32(-3i32), make_ms32(0x55555555u32 as i32, 1)); - assert_eq!(magic_s32(-2i32), make_ms32(0x7fffffffu32 as i32, 0)); - assert_eq!(magic_s32(2i32), make_ms32(0x80000001u32 as i32, 0)); - assert_eq!(magic_s32(3i32), make_ms32(0x55555556u32 as i32, 0)); - assert_eq!(magic_s32(4i32), make_ms32(0x80000001u32 as i32, 1)); - assert_eq!(magic_s32(5i32), make_ms32(0x66666667u32 as i32, 1)); - assert_eq!(magic_s32(6i32), make_ms32(0x2aaaaaabu32 as i32, 0)); - assert_eq!(magic_s32(7i32), make_ms32(0x92492493u32 as i32, 2)); - assert_eq!(magic_s32(9i32), make_ms32(0x38e38e39u32 as i32, 1)); - assert_eq!(magic_s32(10i32), make_ms32(0x66666667u32 as i32, 2)); - assert_eq!(magic_s32(11i32), make_ms32(0x2e8ba2e9u32 as i32, 1)); - assert_eq!(magic_s32(12i32), make_ms32(0x2aaaaaabu32 as i32, 1)); - assert_eq!(magic_s32(25i32), make_ms32(0x51eb851fu32 as i32, 3)); - assert_eq!(magic_s32(125i32), make_ms32(0x10624dd3u32 as i32, 3)); - assert_eq!(magic_s32(625i32), make_ms32(0x68db8badu32 as i32, 8)); - assert_eq!(magic_s32(1337i32), make_ms32(0x6208cecbu32 as i32, 9)); - assert_eq!(magic_s32(31415927i32), make_ms32(0x445b4553u32 as i32, 23)); - assert_eq!( - magic_s32(0x7ffffffei32), - make_ms32(0x80000003u32 as i32, 30) - ); - assert_eq!( - magic_s32(0x7fffffffi32), - make_ms32(0x40000001u32 as i32, 29) - ); - } - - #[test] - fn test_magic_s64() { - assert_eq!( - magic_s64(-0x8000000000000000i64), - make_ms64(0x7fffffffffffffffu64 as i64, 62) - ); - assert_eq!( - magic_s64(-0x7FFFFFFFFFFFFFFFi64), - make_ms64(0xbfffffffffffffffu64 as i64, 61) - ); - assert_eq!( - magic_s64(-0x7FFFFFFFFFFFFFFEi64), - make_ms64(0x7ffffffffffffffdu64 as i64, 62) - ); - assert_eq!( - magic_s64(-0x0ddC0ffeeBadF00di64), - make_ms64(0x6c3b8b1635a4412fu64 as i64, 59) - ); - assert_eq!( - magic_s64(-0x100000001i64), - make_ms64(0x800000007fffffffu64 as i64, 31) - ); - assert_eq!( - magic_s64(-0x100000000i64), - make_ms64(0x7fffffffffffffffu64 as i64, 31) - ); - assert_eq!( - magic_s64(-0xFFFFFFFFi64), - make_ms64(0x7fffffff7fffffffu64 as i64, 31) - ); - assert_eq!( - magic_s64(-0xFFFFFFFEi64), - make_ms64(0x7ffffffefffffffdu64 as i64, 31) - ); - assert_eq!( - magic_s64(-0xFFFFFFFDi64), - make_ms64(0x7ffffffe7ffffffbu64 as i64, 31) - ); - assert_eq!( - magic_s64(-0xDeadBeefi64), - make_ms64(0x6cd8a54d2036f6b5u64 as i64, 31) - ); - assert_eq!( - magic_s64(-31415927i64), - make_ms64(0x7749755a31e1683du64 as i64, 24) - ); - assert_eq!( - magic_s64(-1337i64), - make_ms64(0x9df731356bccaf63u64 as i64, 9) - ); - assert_eq!( - magic_s64(-256i64), - make_ms64(0x7fffffffffffffffu64 as i64, 7) - ); - assert_eq!(magic_s64(-5i64), make_ms64(0x9999999999999999u64 as i64, 1)); - assert_eq!(magic_s64(-3i64), make_ms64(0x5555555555555555u64 as i64, 1)); - assert_eq!(magic_s64(-2i64), make_ms64(0x7fffffffffffffffu64 as i64, 0)); - assert_eq!(magic_s64(2i64), make_ms64(0x8000000000000001u64 as i64, 0)); - assert_eq!(magic_s64(3i64), make_ms64(0x5555555555555556u64 as i64, 0)); - assert_eq!(magic_s64(4i64), make_ms64(0x8000000000000001u64 as i64, 1)); - assert_eq!(magic_s64(5i64), make_ms64(0x6666666666666667u64 as i64, 1)); - assert_eq!(magic_s64(6i64), make_ms64(0x2aaaaaaaaaaaaaabu64 as i64, 0)); - assert_eq!(magic_s64(7i64), make_ms64(0x4924924924924925u64 as i64, 1)); - assert_eq!(magic_s64(9i64), make_ms64(0x1c71c71c71c71c72u64 as i64, 0)); - assert_eq!(magic_s64(10i64), make_ms64(0x6666666666666667u64 as i64, 2)); - assert_eq!(magic_s64(11i64), make_ms64(0x2e8ba2e8ba2e8ba3u64 as i64, 1)); - assert_eq!(magic_s64(12i64), make_ms64(0x2aaaaaaaaaaaaaabu64 as i64, 1)); - assert_eq!(magic_s64(25i64), make_ms64(0xa3d70a3d70a3d70bu64 as i64, 4)); - assert_eq!( - magic_s64(125i64), - make_ms64(0x20c49ba5e353f7cfu64 as i64, 4) - ); - assert_eq!( - magic_s64(625i64), - make_ms64(0x346dc5d63886594bu64 as i64, 7) - ); - assert_eq!( - magic_s64(1337i64), - make_ms64(0x6208ceca9433509du64 as i64, 9) - ); - assert_eq!( - magic_s64(31415927i64), - make_ms64(0x88b68aa5ce1e97c3u64 as i64, 24) - ); - assert_eq!( - magic_s64(0x00000000deadbeefi64), - make_ms64(0x93275ab2dfc9094bu64 as i64, 31) - ); - assert_eq!( - magic_s64(0x00000000fffffffdi64), - make_ms64(0x8000000180000005u64 as i64, 31) - ); - assert_eq!( - magic_s64(0x00000000fffffffei64), - make_ms64(0x8000000100000003u64 as i64, 31) - ); - assert_eq!( - magic_s64(0x00000000ffffffffi64), - make_ms64(0x8000000080000001u64 as i64, 31) - ); - assert_eq!( - magic_s64(0x0000000100000000i64), - make_ms64(0x8000000000000001u64 as i64, 31) - ); - assert_eq!( - magic_s64(0x0000000100000001i64), - make_ms64(0x7fffffff80000001u64 as i64, 31) - ); - assert_eq!( - magic_s64(0x0ddc0ffeebadf00di64), - make_ms64(0x93c474e9ca5bbed1u64 as i64, 59) - ); - assert_eq!( - magic_s64(0x7ffffffffffffffdi64), - make_ms64(0x2000000000000001u64 as i64, 60) - ); - assert_eq!( - magic_s64(0x7ffffffffffffffei64), - make_ms64(0x8000000000000003u64 as i64, 62) - ); - assert_eq!( - magic_s64(0x7fffffffffffffffi64), - make_ms64(0x4000000000000001u64 as i64, 61) - ); - } - - #[test] - fn test_magic_generators_dont_panic() { - // The point of this is to check that the magic number generators - // don't panic with integer wraparounds, especially at boundary cases - // for their arguments. The actual results are thrown away, although - // we force `total` to be used, so that rustc can't optimise the - // entire computation away. - - // Testing UP magic_u32 - let mut total: u64 = 0; - for x in 2..(200 * 1000u32) { - let m = magic_u32(x); - total = total ^ (m.mul_by as u64); - total = total + (m.shift_by as u64); - total = total + (if m.do_add { 123 } else { 456 }); - } - assert_eq!(total, 2481999609); - - total = 0; - // Testing MIDPOINT magic_u32 - for x in 0x8000_0000u32 - 10 * 1000u32..0x8000_0000u32 + 10 * 1000u32 { - let m = magic_u32(x); - total = total ^ (m.mul_by as u64); - total = total + (m.shift_by as u64); - total = total + (if m.do_add { 123 } else { 456 }); - } - assert_eq!(total, 2399809723); - - total = 0; - // Testing DOWN magic_u32 - for x in 0..(200 * 1000u32) { - let m = magic_u32(0xFFFF_FFFFu32 - x); - total = total ^ (m.mul_by as u64); - total = total + (m.shift_by as u64); - total = total + (if m.do_add { 123 } else { 456 }); - } - assert_eq!(total, 271138267); - - // Testing UP magic_u64 - total = 0; - for x in 2..(200 * 1000u64) { - let m = magic_u64(x); - total = total ^ m.mul_by; - total = total + (m.shift_by as u64); - total = total + (if m.do_add { 123 } else { 456 }); - } - assert_eq!(total, 7430004086976261161); - - total = 0; - // Testing MIDPOINT magic_u64 - for x in 0x8000_0000_0000_0000u64 - 10 * 1000u64..0x8000_0000_0000_0000u64 + 10 * 1000u64 { - let m = magic_u64(x); - total = total ^ m.mul_by; - total = total + (m.shift_by as u64); - total = total + (if m.do_add { 123 } else { 456 }); - } - assert_eq!(total, 10312117246769520603); - - // Testing DOWN magic_u64 - total = 0; - for x in 0..(200 * 1000u64) { - let m = magic_u64(0xFFFF_FFFF_FFFF_FFFFu64 - x); - total = total ^ m.mul_by; - total = total + (m.shift_by as u64); - total = total + (if m.do_add { 123 } else { 456 }); - } - assert_eq!(total, 1126603594357269734); - - // Testing UP magic_s32 - total = 0; - for x in 0..(200 * 1000i32) { - let m = magic_s32(-0x8000_0000i32 + x); - total = total ^ (m.mul_by as u64); - total = total + (m.shift_by as u64); - } - assert_eq!(total, 18446744069953376812); - - total = 0; - // Testing MIDPOINT magic_s32 - for x in 0..(200 * 1000i32) { - let x2 = -100 * 1000i32 + x; - if x2 != -1 && x2 != 0 && x2 != 1 { - let m = magic_s32(x2); - total = total ^ (m.mul_by as u64); - total = total + (m.shift_by as u64); - } - } - assert_eq!(total, 351839350); - - // Testing DOWN magic_s32 - total = 0; - for x in 0..(200 * 1000i32) { - let m = magic_s32(0x7FFF_FFFFi32 - x); - total = total ^ (m.mul_by as u64); - total = total + (m.shift_by as u64); - } - assert_eq!(total, 18446744072916880714); - - // Testing UP magic_s64 - total = 0; - for x in 0..(200 * 1000i64) { - let m = magic_s64(-0x8000_0000_0000_0000i64 + x); - total = total ^ (m.mul_by as u64); - total = total + (m.shift_by as u64); - } - assert_eq!(total, 17929885647724831014); - - total = 0; - // Testing MIDPOINT magic_s64 - for x in 0..(200 * 1000i64) { - let x2 = -100 * 1000i64 + x; - if x2 != -1 && x2 != 0 && x2 != 1 { - let m = magic_s64(x2); - total = total ^ (m.mul_by as u64); - total = total + (m.shift_by as u64); - } - } - assert_eq!(total, 18106042338125661964); - - // Testing DOWN magic_s64 - total = 0; - for x in 0..(200 * 1000i64) { - let m = magic_s64(0x7FFF_FFFF_FFFF_FFFFi64 - x); - total = total ^ (m.mul_by as u64); - total = total + (m.shift_by as u64); - } - assert_eq!(total, 563301797155560970); - } - - #[test] - fn test_magic_generators_give_correct_numbers() { - // For a variety of values for both `n` and `d`, compute the magic - // numbers for `d`, and in effect interpret them so as to compute - // `n / d`. Check that that equals the value of `n / d` computed - // directly by the hardware. This serves to check that the magic - // number generates work properly. In total, 50,148,000 tests are - // done. - - // Some constants - const MIN_U32: u32 = 0; - const MAX_U32: u32 = 0xFFFF_FFFFu32; - const MAX_U32_HALF: u32 = 0x8000_0000u32; // more or less - - const MIN_S32: i32 = 0x8000_0000u32 as i32; - const MAX_S32: i32 = 0x7FFF_FFFFu32 as i32; - - const MIN_U64: u64 = 0; - const MAX_U64: u64 = 0xFFFF_FFFF_FFFF_FFFFu64; - const MAX_U64_HALF: u64 = 0x8000_0000_0000_0000u64; // ditto - - const MIN_S64: i64 = 0x8000_0000_0000_0000u64 as i64; - const MAX_S64: i64 = 0x7FFF_FFFF_FFFF_FFFFu64 as i64; - - // These generate reference results for signed/unsigned 32/64 bit - // division, rounding towards zero. - fn div_u32(x: u32, y: u32) -> u32 { - return x / y; - } - fn div_s32(x: i32, y: i32) -> i32 { - return x / y; - } - fn div_u64(x: u64, y: u64) -> u64 { - return x / y; - } - fn div_s64(x: i64, y: i64) -> i64 { - return x / y; - } - - // Returns the high half of a 32 bit unsigned widening multiply. - fn mulhw_u32(x: u32, y: u32) -> u32 { - let x64: u64 = x as u64; - let y64: u64 = y as u64; - let r64: u64 = x64 * y64; - (r64 >> 32) as u32 - } - - // Returns the high half of a 32 bit signed widening multiply. - fn mulhw_s32(x: i32, y: i32) -> i32 { - let x64: i64 = x as i64; - let y64: i64 = y as i64; - let r64: i64 = x64 * y64; - (r64 >> 32) as i32 - } - - // Returns the high half of a 64 bit unsigned widening multiply. - fn mulhw_u64(x: u64, y: u64) -> u64 { - let t0: u64 = x & 0xffffffffu64; - let t1: u64 = x >> 32; - let t2: u64 = y & 0xffffffffu64; - let t3: u64 = y >> 32; - let t4: u64 = t0 * t2; - let t5: u64 = t1 * t2 + (t4 >> 32); - let t6: u64 = t5 & 0xffffffffu64; - let t7: u64 = t5 >> 32; - let t8: u64 = t0 * t3 + t6; - let t9: u64 = t1 * t3 + t7 + (t8 >> 32); - t9 - } - - // Returns the high half of a 64 bit signed widening multiply. - fn mulhw_s64(x: i64, y: i64) -> i64 { - let t0: u64 = x as u64 & 0xffffffffu64; - let t1: i64 = x >> 32; - let t2: u64 = y as u64 & 0xffffffffu64; - let t3: i64 = y >> 32; - let t4: u64 = t0 * t2; - let t5: i64 = t1 * t2 as i64 + (t4 >> 32) as i64; - let t6: u64 = t5 as u64 & 0xffffffffu64; - let t7: i64 = t5 >> 32; - let t8: i64 = t0 as i64 * t3 + t6 as i64; - let t9: i64 = t1 * t3 + t7 + (t8 >> 32); - t9 - } - - // Compute the magic numbers for `d` and then use them to compute and - // check `n / d` for around 1000 values of `n`, using unsigned 32-bit - // division. - fn test_magic_u32_inner(d: u32, n_tests_done: &mut i32) { - // Advance the numerator (the `n` in `n / d`) so as to test - // densely near the range ends (and, in the signed variants, near - // zero) but not so densely away from those regions. - fn advance_n_u32(x: u32) -> u32 { - if x < MIN_U32 + 110 { - return x + 1; - } - if x < MIN_U32 + 1700 { - return x + 23; - } - if x < MAX_U32 - 1700 { - let xd: f64 = (x as f64) * 1.06415927; - return if xd >= (MAX_U32 - 1700) as f64 { - MAX_U32 - 1700 - } else { - xd as u32 - }; - } - if x < MAX_U32 - 110 { - return x + 23; - } - u32::wrapping_add(x, 1) - } - - let magic: MU32 = magic_u32(d); - let mut n: u32 = MIN_U32; - loop { - *n_tests_done += 1; - // Compute and check `q = n / d` using `magic`. - let mut q: u32 = mulhw_u32(n, magic.mul_by); - if magic.do_add { - assert!(magic.shift_by >= 1 && magic.shift_by <= 32); - let mut t: u32 = n - q; - t >>= 1; - t = t + q; - q = t >> (magic.shift_by - 1); - } else { - assert!(magic.shift_by >= 0 && magic.shift_by <= 31); - q >>= magic.shift_by; - } - - assert_eq!(q, div_u32(n, d)); - - n = advance_n_u32(n); - if n == MIN_U32 { - break; - } - } - } - - // Compute the magic numbers for `d` and then use them to compute and - // check `n / d` for around 1000 values of `n`, using signed 32-bit - // division. - fn test_magic_s32_inner(d: i32, n_tests_done: &mut i32) { - // See comment on advance_n_u32 above. - fn advance_n_s32(x: i32) -> i32 { - if x >= 0 && x <= 29 { - return x + 1; - } - if x < MIN_S32 + 110 { - return x + 1; - } - if x < MIN_S32 + 1700 { - return x + 23; - } - if x < MAX_S32 - 1700 { - let mut xd: f64 = x as f64; - xd = if xd < 0.0 { - xd / 1.06415927 - } else { - xd * 1.06415927 - }; - return if xd >= (MAX_S32 - 1700) as f64 { - MAX_S32 - 1700 - } else { - xd as i32 - }; - } - if x < MAX_S32 - 110 { - return x + 23; - } - if x == MAX_S32 { - return MIN_S32; - } - x + 1 - } - - let magic: MS32 = magic_s32(d); - let mut n: i32 = MIN_S32; - loop { - *n_tests_done += 1; - // Compute and check `q = n / d` using `magic`. - let mut q: i32 = mulhw_s32(n, magic.mul_by); - if d > 0 && magic.mul_by < 0 { - q = q + n; - } else if d < 0 && magic.mul_by > 0 { - q = q - n; - } - assert!(magic.shift_by >= 0 && magic.shift_by <= 31); - q = q >> magic.shift_by; - let mut t: u32 = q as u32; - t = t >> 31; - q = q + (t as i32); - - assert_eq!(q, div_s32(n, d)); - - n = advance_n_s32(n); - if n == MIN_S32 { - break; - } - } - } - - // Compute the magic numbers for `d` and then use them to compute and - // check `n / d` for around 1000 values of `n`, using unsigned 64-bit - // division. - fn test_magic_u64_inner(d: u64, n_tests_done: &mut i32) { - // See comment on advance_n_u32 above. - fn advance_n_u64(x: u64) -> u64 { - if x < MIN_U64 + 110 { - return x + 1; - } - if x < MIN_U64 + 1700 { - return x + 23; - } - if x < MAX_U64 - 1700 { - let xd: f64 = (x as f64) * 1.06415927; - return if xd >= (MAX_U64 - 1700) as f64 { - MAX_U64 - 1700 - } else { - xd as u64 - }; - } - if x < MAX_U64 - 110 { - return x + 23; - } - u64::wrapping_add(x, 1) - } - - let magic: MU64 = magic_u64(d); - let mut n: u64 = MIN_U64; - loop { - *n_tests_done += 1; - // Compute and check `q = n / d` using `magic`. - let mut q = mulhw_u64(n, magic.mul_by); - if magic.do_add { - assert!(magic.shift_by >= 1 && magic.shift_by <= 64); - let mut t: u64 = n - q; - t >>= 1; - t = t + q; - q = t >> (magic.shift_by - 1); - } else { - assert!(magic.shift_by >= 0 && magic.shift_by <= 63); - q >>= magic.shift_by; - } - - assert_eq!(q, div_u64(n, d)); - - n = advance_n_u64(n); - if n == MIN_U64 { - break; - } - } - } - - // Compute the magic numbers for `d` and then use them to compute and - // check `n / d` for around 1000 values of `n`, using signed 64-bit - // division. - fn test_magic_s64_inner(d: i64, n_tests_done: &mut i32) { - // See comment on advance_n_u32 above. - fn advance_n_s64(x: i64) -> i64 { - if x >= 0 && x <= 29 { - return x + 1; - } - if x < MIN_S64 + 110 { - return x + 1; - } - if x < MIN_S64 + 1700 { - return x + 23; - } - if x < MAX_S64 - 1700 { - let mut xd: f64 = x as f64; - xd = if xd < 0.0 { - xd / 1.06415927 - } else { - xd * 1.06415927 - }; - return if xd >= (MAX_S64 - 1700) as f64 { - MAX_S64 - 1700 - } else { - xd as i64 - }; - } - if x < MAX_S64 - 110 { - return x + 23; - } - if x == MAX_S64 { - return MIN_S64; - } - x + 1 - } - - let magic: MS64 = magic_s64(d); - let mut n: i64 = MIN_S64; - loop { - *n_tests_done += 1; - // Compute and check `q = n / d` using `magic`. */ - let mut q: i64 = mulhw_s64(n, magic.mul_by); - if d > 0 && magic.mul_by < 0 { - q = q + n; - } else if d < 0 && magic.mul_by > 0 { - q = q - n; - } - assert!(magic.shift_by >= 0 && magic.shift_by <= 63); - q = q >> magic.shift_by; - let mut t: u64 = q as u64; - t = t >> 63; - q = q + (t as i64); - - assert_eq!(q, div_s64(n, d)); - - n = advance_n_s64(n); - if n == MIN_S64 { - break; - } - } - } - - // Using all the above support machinery, actually run the tests. - - let mut n_tests_done: i32 = 0; - - // u32 division tests - { - // 2 .. 3k - let mut d: u32 = 2; - for _ in 0..3 * 1000 { - test_magic_u32_inner(d, &mut n_tests_done); - d += 1; - } - - // across the midpoint: midpoint - 3k .. midpoint + 3k - d = MAX_U32_HALF - 3 * 1000; - for _ in 0..2 * 3 * 1000 { - test_magic_u32_inner(d, &mut n_tests_done); - d += 1; - } - - // MAX_U32 - 3k .. MAX_U32 (in reverse order) - d = MAX_U32; - for _ in 0..3 * 1000 { - test_magic_u32_inner(d, &mut n_tests_done); - d -= 1; - } - } - - // s32 division tests - { - // MIN_S32 .. MIN_S32 + 3k - let mut d: i32 = MIN_S32; - for _ in 0..3 * 1000 { - test_magic_s32_inner(d, &mut n_tests_done); - d += 1; - } - - // -3k .. -2 (in reverse order) - d = -2; - for _ in 0..3 * 1000 { - test_magic_s32_inner(d, &mut n_tests_done); - d -= 1; - } - - // 2 .. 3k - d = 2; - for _ in 0..3 * 1000 { - test_magic_s32_inner(d, &mut n_tests_done); - d += 1; - } - - // MAX_S32 - 3k .. MAX_S32 (in reverse order) - d = MAX_S32; - for _ in 0..3 * 1000 { - test_magic_s32_inner(d, &mut n_tests_done); - d -= 1; - } - } - - // u64 division tests - { - // 2 .. 3k - let mut d: u64 = 2; - for _ in 0..3 * 1000 { - test_magic_u64_inner(d, &mut n_tests_done); - d += 1; - } - - // across the midpoint: midpoint - 3k .. midpoint + 3k - d = MAX_U64_HALF - 3 * 1000; - for _ in 0..2 * 3 * 1000 { - test_magic_u64_inner(d, &mut n_tests_done); - d += 1; - } - - // mAX_U64 - 3000 .. mAX_U64 (in reverse order) - d = MAX_U64; - for _ in 0..3 * 1000 { - test_magic_u64_inner(d, &mut n_tests_done); - d -= 1; - } - } - - // s64 division tests - { - // MIN_S64 .. MIN_S64 + 3k - let mut d: i64 = MIN_S64; - for _ in 0..3 * 1000 { - test_magic_s64_inner(d, &mut n_tests_done); - d += 1; - } - - // -3k .. -2 (in reverse order) - d = -2; - for _ in 0..3 * 1000 { - test_magic_s64_inner(d, &mut n_tests_done); - d -= 1; - } - - // 2 .. 3k - d = 2; - for _ in 0..3 * 1000 { - test_magic_s64_inner(d, &mut n_tests_done); - d += 1; - } - - // MAX_S64 - 3k .. MAX_S64 (in reverse order) - d = MAX_S64; - for _ in 0..3 * 1000 { - test_magic_s64_inner(d, &mut n_tests_done); - d -= 1; - } - } - assert_eq!(n_tests_done, 50_148_000); - } -} diff --git a/cranelift/codegen/src/fx.rs b/cranelift/codegen/src/fx.rs index 36eb62df9013..bb1a9e59e6c6 100644 --- a/cranelift/codegen/src/fx.rs +++ b/cranelift/codegen/src/fx.rs @@ -23,11 +23,6 @@ pub fn FxHashMap() -> FxHashMap { HashMap::default() } -#[allow(non_snake_case)] -pub fn FxHashSet() -> FxHashSet { - HashSet::default() -} - /// A speedy hash algorithm for use within rustc. The hashmap in liballoc /// by default uses SipHash which isn't quite as speedy as we want. In the /// compiler we're not really worried about DOS attempts, so we use a fast diff --git a/cranelift/codegen/src/lib.rs b/cranelift/codegen/src/lib.rs index 39c0f14809bb..d9f917cc5bfd 100644 --- a/cranelift/codegen/src/lib.rs +++ b/cranelift/codegen/src/lib.rs @@ -102,21 +102,17 @@ mod constant_hash; mod context; mod ctxhash; mod dce; -mod divconst_magic_numbers; mod egraph; mod fx; mod inst_predicates; mod isle_prelude; mod iterators; mod legalizer; -mod licm; mod nan_canonicalization; mod opts; mod remove_constant_phis; mod result; mod scoped_hash_map; -mod simple_gvn; -mod simple_preopt; mod unionfind; mod unreachable_code; mod value_label; diff --git a/cranelift/codegen/src/licm.rs b/cranelift/codegen/src/licm.rs deleted file mode 100644 index 9f543392cd1c..000000000000 --- a/cranelift/codegen/src/licm.rs +++ /dev/null @@ -1,241 +0,0 @@ -//! A Loop Invariant Code Motion optimization pass - -use crate::cursor::{Cursor, FuncCursor}; -use crate::dominator_tree::DominatorTree; -use crate::entity::{EntityList, ListPool}; -use crate::flowgraph::{BlockPredecessor, ControlFlowGraph}; -use crate::fx::FxHashSet; -use crate::ir::{ - Block, DataFlowGraph, Function, Inst, InstBuilder, InstructionData, Layout, Opcode, Type, Value, -}; -use crate::loop_analysis::{Loop, LoopAnalysis}; -use crate::timing; -use alloc::vec::Vec; - -/// Performs the LICM pass by detecting loops within the CFG and moving -/// loop-invariant instructions out of them. -/// Changes the CFG and domtree in-place during the operation. -pub fn do_licm( - func: &mut Function, - cfg: &mut ControlFlowGraph, - domtree: &mut DominatorTree, - loop_analysis: &mut LoopAnalysis, -) { - let _tt = timing::licm(); - debug_assert!(cfg.is_valid()); - debug_assert!(domtree.is_valid()); - debug_assert!(loop_analysis.is_valid()); - - for lp in loop_analysis.loops() { - // For each loop that we want to optimize we determine the set of loop-invariant - // instructions - let invariant_insts = remove_loop_invariant_instructions(lp, func, cfg, loop_analysis); - // Then we create the loop's pre-header and fill it with the invariant instructions - // Then we remove the invariant instructions from the loop body - if !invariant_insts.is_empty() { - // If the loop has a natural pre-header we use it, otherwise we create it. - let mut pos; - match has_pre_header(&func.layout, cfg, domtree, loop_analysis.loop_header(lp)) { - None => { - let pre_header = - create_pre_header(loop_analysis.loop_header(lp), func, cfg, domtree); - pos = FuncCursor::new(func).at_last_inst(pre_header); - } - // If there is a natural pre-header we insert new instructions just before the - // related jumping instruction (which is not necessarily at the end). - Some((_, last_inst)) => { - pos = FuncCursor::new(func).at_inst(last_inst); - } - }; - // The last instruction of the pre-header is the termination instruction (usually - // a jump) so we need to insert just before this. - for inst in invariant_insts { - pos.insert_inst(inst); - } - } - } - // We have to recompute the domtree to account for the changes - cfg.compute(func); - domtree.compute(func, cfg); -} - -/// Insert a pre-header before the header, modifying the function layout and CFG to reflect it. -/// A jump instruction to the header is placed at the end of the pre-header. -fn create_pre_header( - header: Block, - func: &mut Function, - cfg: &mut ControlFlowGraph, - domtree: &DominatorTree, -) -> Block { - let pool = &mut ListPool::::new(); - let header_args_values = func.dfg.block_params(header).to_vec(); - let header_args_types: Vec = header_args_values - .into_iter() - .map(|val| func.dfg.value_type(val)) - .collect(); - let pre_header = func.dfg.make_block(); - let mut pre_header_args_value: EntityList = EntityList::new(); - for typ in header_args_types { - pre_header_args_value.push(func.dfg.append_block_param(pre_header, typ), pool); - } - - for BlockPredecessor { - inst: last_inst, .. - } in cfg.pred_iter(header) - { - // We only follow normal edges (not the back edges) - if !domtree.dominates(header, last_inst, &func.layout) { - func.rewrite_branch_destination(last_inst, header, pre_header); - } - } - - // Inserts the pre-header at the right place in the layout. - let mut pos = FuncCursor::new(func).at_top(header); - pos.insert_block(pre_header); - pos.next_inst(); - pos.ins().jump(header, pre_header_args_value.as_slice(pool)); - - pre_header -} - -/// Detects if a loop header has a natural pre-header. -/// -/// A loop header has a pre-header if there is only one predecessor that the header doesn't -/// dominate. -/// Returns the pre-header Block and the instruction jumping to the header. -fn has_pre_header( - layout: &Layout, - cfg: &ControlFlowGraph, - domtree: &DominatorTree, - header: Block, -) -> Option<(Block, Inst)> { - let mut result = None; - for BlockPredecessor { - block: pred_block, - inst: branch_inst, - } in cfg.pred_iter(header) - { - // We only count normal edges (not the back edges) - if !domtree.dominates(header, branch_inst, layout) { - if result.is_some() { - // We have already found one, there are more than one - return None; - } - if branch_inst != layout.last_inst(pred_block).unwrap() - || cfg.succ_iter(pred_block).nth(1).is_some() - { - // It's along a critical edge, so don't use it. - return None; - } - result = Some((pred_block, branch_inst)); - } - } - result -} - -/// Test whether the given opcode is unsafe to even consider for LICM. -fn trivially_unsafe_for_licm(opcode: Opcode) -> bool { - opcode.can_store() - || opcode.is_call() - || opcode.is_branch() - || opcode.is_terminator() - || opcode.is_return() - || opcode.can_trap() - || opcode.other_side_effects() -} - -fn is_unsafe_load(inst_data: &InstructionData) -> bool { - match *inst_data { - InstructionData::Load { flags, .. } => !flags.readonly() || !flags.notrap(), - _ => inst_data.opcode().can_load(), - } -} - -/// Test whether the given instruction is loop-invariant. -fn is_loop_invariant(inst: Inst, dfg: &DataFlowGraph, loop_values: &FxHashSet) -> bool { - if trivially_unsafe_for_licm(dfg.insts[inst].opcode()) { - return false; - } - - if is_unsafe_load(&dfg.insts[inst]) { - return false; - } - - for arg in dfg.inst_values(inst) { - let arg = dfg.resolve_aliases(arg); - if loop_values.contains(&arg) { - return false; - } - } - true -} - -/// Traverses a loop in reverse post-order from a header block and identify loop-invariant -/// instructions. These loop-invariant instructions are then removed from the code and returned -/// (in reverse post-order) for later use. -fn remove_loop_invariant_instructions( - lp: Loop, - func: &mut Function, - cfg: &ControlFlowGraph, - loop_analysis: &LoopAnalysis, -) -> Vec { - let mut loop_values: FxHashSet = FxHashSet(); - let mut invariant_insts: Vec = Vec::new(); - let mut pos = FuncCursor::new(func); - // We traverse the loop block in reverse post-order. - for block in postorder_blocks_loop(loop_analysis, cfg, lp).iter().rev() { - // Arguments of the block are loop values - for val in pos.func.dfg.block_params(*block) { - loop_values.insert(*val); - } - pos.goto_top(*block); - #[cfg_attr(feature = "cargo-clippy", allow(clippy::block_in_if_condition_stmt))] - while let Some(inst) = pos.next_inst() { - if is_loop_invariant(inst, &pos.func.dfg, &loop_values) { - // If all the instruction's argument are defined outside the loop - // then this instruction is loop-invariant - invariant_insts.push(inst); - // We remove it from the loop - pos.remove_inst_and_step_back(); - } else { - // If the instruction is not loop-invariant we push its results in the set of - // loop values - for out in pos.func.dfg.inst_results(inst) { - loop_values.insert(*out); - } - } - } - } - invariant_insts -} - -/// Return blocks from a loop in post-order, starting from an entry point in the block. -fn postorder_blocks_loop( - loop_analysis: &LoopAnalysis, - cfg: &ControlFlowGraph, - lp: Loop, -) -> Vec { - let mut grey = FxHashSet(); - let mut black = FxHashSet(); - let mut stack = vec![loop_analysis.loop_header(lp)]; - let mut postorder = Vec::new(); - - while !stack.is_empty() { - let node = stack.pop().unwrap(); - if !grey.contains(&node) { - // This is a white node. Mark it as gray. - grey.insert(node); - stack.push(node); - // Get any children we've never seen before. - for child in cfg.succ_iter(node) { - if loop_analysis.is_in_loop(child, lp) && !grey.contains(&child) { - stack.push(child); - } - } - } else if !black.contains(&node) { - postorder.push(node); - black.insert(node); - } - } - postorder -} diff --git a/cranelift/codegen/src/machinst/isle.rs b/cranelift/codegen/src/machinst/isle.rs index 867102c32d6a..ce4bdd241dd5 100644 --- a/cranelift/codegen/src/machinst/isle.rs +++ b/cranelift/codegen/src/machinst/isle.rs @@ -128,29 +128,6 @@ macro_rules! isle_lower_prelude_methods { #[inline] fn put_in_regs(&mut self, val: Value) -> ValueRegs { - // If the value is a constant, then (re)materialize it at each - // use. This lowers register pressure. (Only do this if we are - // not using egraph-based compilation; the egraph framework - // more efficiently rematerializes constants where needed.) - if !(self.backend.flags().use_egraphs() - && self.backend.flags().opt_level() != OptLevel::None) - { - let inputs = self.lower_ctx.get_value_as_source_or_const(val); - if inputs.constant.is_some() { - let insn = match inputs.inst { - InputSourceInst::UniqueUse(insn, 0) => Some(insn), - InputSourceInst::Use(insn, 0) => Some(insn), - _ => None, - }; - if let Some(insn) = insn { - if let Some(regs) = self.backend.lower(self.lower_ctx, insn) { - assert!(regs.len() == 1); - return regs[0]; - } - } - } - } - self.lower_ctx.put_value_in_regs(val) } diff --git a/cranelift/codegen/src/settings.rs b/cranelift/codegen/src/settings.rs index 44d2504dff75..d48e1cbaa79f 100644 --- a/cranelift/codegen/src/settings.rs +++ b/cranelift/codegen/src/settings.rs @@ -528,7 +528,6 @@ probestack_strategy = "outline" regalloc_checker = false regalloc_verbose_logs = false enable_alias_analysis = true -use_egraphs = true enable_verifier = true is_pic = false use_colocated_libcalls = false diff --git a/cranelift/codegen/src/simple_gvn.rs b/cranelift/codegen/src/simple_gvn.rs deleted file mode 100644 index 6b09ae96b226..000000000000 --- a/cranelift/codegen/src/simple_gvn.rs +++ /dev/null @@ -1,149 +0,0 @@ -//! A simple GVN pass. - -use crate::cursor::{Cursor, FuncCursor}; -use crate::dominator_tree::DominatorTree; -use crate::ir::{Function, Inst, InstructionData, Opcode, Type}; -use crate::scoped_hash_map::ScopedHashMap; -use crate::timing; -use alloc::vec::Vec; -use core::cell::{Ref, RefCell}; -use core::hash::{Hash, Hasher}; - -/// Test whether the given opcode is unsafe to even consider for GVN. -fn trivially_unsafe_for_gvn(opcode: Opcode) -> bool { - opcode.is_call() - || opcode.is_branch() - || opcode.is_terminator() - || opcode.is_return() - || opcode.can_store() - || (opcode.can_trap() && !opcode.side_effects_idempotent()) - || (opcode.other_side_effects() && !opcode.side_effects_idempotent()) -} - -/// Test that, if the specified instruction is a load, it doesn't have the `readonly` memflag. -fn is_load_and_not_readonly(inst_data: &InstructionData) -> bool { - match *inst_data { - InstructionData::Load { flags, .. } => !flags.readonly(), - _ => inst_data.opcode().can_load(), - } -} - -/// Wrapper around `InstructionData` which implements `Eq` and `Hash` -#[derive(Clone)] -struct HashKey<'a, 'f: 'a> { - inst: InstructionData, - ty: Type, - pos: &'a RefCell>, -} -impl<'a, 'f: 'a> Hash for HashKey<'a, 'f> { - fn hash(&self, state: &mut H) { - let pool = &self.pos.borrow().func.dfg.value_lists; - self.inst.hash(state, pool, |value| value); - self.ty.hash(state); - } -} -impl<'a, 'f: 'a> PartialEq for HashKey<'a, 'f> { - fn eq(&self, other: &Self) -> bool { - let pool = &self.pos.borrow().func.dfg.value_lists; - self.inst.eq(&other.inst, pool, |value| value) && self.ty == other.ty - } -} -impl<'a, 'f: 'a> Eq for HashKey<'a, 'f> {} - -/// Perform simple GVN on `func`. -/// -pub fn do_simple_gvn(func: &mut Function, domtree: &mut DominatorTree) { - let _tt = timing::gvn(); - debug_assert!(domtree.is_valid()); - - // Visit blocks in a reverse post-order. - // - // The RefCell here is a bit ugly since the HashKeys in the ScopedHashMap - // need a reference to the function. - let pos = RefCell::new(FuncCursor::new(func)); - - let mut visible_values: ScopedHashMap = ScopedHashMap::new(); - let mut scope_stack: Vec = Vec::new(); - - for &block in domtree.cfg_postorder().iter().rev() { - { - // Pop any scopes that we just exited. - let layout = &pos.borrow().func.layout; - loop { - if let Some(current) = scope_stack.last() { - if domtree.dominates(*current, block, layout) { - break; - } - } else { - break; - } - scope_stack.pop(); - visible_values.decrement_depth(); - } - - // Push a scope for the current block. - scope_stack.push(layout.first_inst(block).unwrap()); - visible_values.increment_depth(); - } - - pos.borrow_mut().goto_top(block); - while let Some(inst) = { - let mut pos = pos.borrow_mut(); - pos.next_inst() - } { - // Resolve aliases, particularly aliases we created earlier. - pos.borrow_mut().func.dfg.resolve_aliases_in_arguments(inst); - - let func = Ref::map(pos.borrow(), |pos| &pos.func); - - let opcode = func.dfg.insts[inst].opcode(); - - if opcode.is_branch() && !opcode.is_terminator() { - scope_stack.push(func.layout.next_inst(inst).unwrap()); - visible_values.increment_depth(); - } - - if trivially_unsafe_for_gvn(opcode) { - continue; - } - - // These are split up to separate concerns. - if is_load_and_not_readonly(&func.dfg.insts[inst]) { - continue; - } - - let ctrl_typevar = func.dfg.ctrl_typevar(inst); - let key = HashKey { - inst: func.dfg.insts[inst], - ty: ctrl_typevar, - pos: &pos, - }; - use crate::scoped_hash_map::Entry::*; - match visible_values.entry(key) { - Occupied(entry) => { - #[allow(clippy::debug_assert_with_mut_call)] - { - // Clippy incorrectly believes `&func.layout` should not be used here: - // https://github.com/rust-lang/rust-clippy/issues/4737 - debug_assert!(domtree.dominates(*entry.get(), inst, &func.layout)); - } - - // If the redundant instruction is representing the current - // scope, pick a new representative. - let old = scope_stack.last_mut().unwrap(); - if *old == inst { - *old = func.layout.next_inst(inst).unwrap(); - } - // Replace the redundant instruction and remove it. - drop(func); - let mut pos = pos.borrow_mut(); - pos.func.dfg.replace_with_aliases(inst, *entry.get()); - pos.remove_inst_and_step_back(); - } - Vacant(entry) => { - entry.insert(inst); - } - } - } - } -} diff --git a/cranelift/codegen/src/simple_preopt.rs b/cranelift/codegen/src/simple_preopt.rs deleted file mode 100644 index f1e05d7e74cd..000000000000 --- a/cranelift/codegen/src/simple_preopt.rs +++ /dev/null @@ -1,796 +0,0 @@ -//! A pre-legalization rewriting pass. -//! -//! This module provides early-stage optimizations. The optimizations found -//! should be useful for already well-optimized code. - -use crate::cursor::{Cursor, FuncCursor}; -use crate::divconst_magic_numbers::{magic_s32, magic_s64, magic_u32, magic_u64}; -use crate::divconst_magic_numbers::{MS32, MS64, MU32, MU64}; -use crate::ir::{ - condcodes::IntCC, - instructions::Opcode, - types::{I128, I32, I64}, - DataFlowGraph, Function, Inst, InstBuilder, InstructionData, Type, Value, -}; -use crate::isa::TargetIsa; -use crate::timing; - -#[inline] -/// Replaces the unique result of the instruction inst to an alias of the given value, and -/// replaces the instruction with a nop. Can be used only on instructions producing one unique -/// result, otherwise will assert. -fn replace_single_result_with_alias(dfg: &mut DataFlowGraph, inst: Inst, value: Value) { - // Replace the result value by an alias. - let results = dfg.detach_results(inst); - debug_assert!(results.len(&dfg.value_lists) == 1); - let result = results.get(0, &dfg.value_lists).unwrap(); - dfg.change_to_alias(result, value); - - // Replace instruction by a nop. - dfg.replace(inst).nop(); -} - -//---------------------------------------------------------------------- -// -// Pattern-match helpers and transformation for div and rem by constants. - -// Simple math helpers - -/// if `x` is a power of two, or the negation thereof, return the power along -/// with a boolean that indicates whether `x` is negative. Else return None. -#[inline] -fn i32_is_power_of_two(x: i32) -> Option<(bool, u32)> { - // We have to special-case this because abs(x) isn't representable. - if x == -0x8000_0000 { - return Some((true, 31)); - } - let abs_x = i32::wrapping_abs(x) as u32; - if abs_x.is_power_of_two() { - return Some((x < 0, abs_x.trailing_zeros())); - } - None -} - -/// Same comments as for i32_is_power_of_two apply. -#[inline] -fn i64_is_power_of_two(x: i64) -> Option<(bool, u32)> { - // We have to special-case this because abs(x) isn't representable. - if x == -0x8000_0000_0000_0000 { - return Some((true, 63)); - } - let abs_x = i64::wrapping_abs(x) as u64; - if abs_x.is_power_of_two() { - return Some((x < 0, abs_x.trailing_zeros())); - } - None -} - -/// Representation of an instruction that can be replaced by a single division/remainder operation -/// between a left Value operand and a right immediate operand. -#[derive(Debug)] -enum DivRemByConstInfo { - DivU32(Value, u32), - DivU64(Value, u64), - DivS32(Value, i32), - DivS64(Value, i64), - RemU32(Value, u32), - RemU64(Value, u64), - RemS32(Value, i32), - RemS64(Value, i64), -} - -/// Possibly create a DivRemByConstInfo from the given components, by figuring out which, if any, -/// of the 8 cases apply, and also taking care to sanity-check the immediate. -fn package_up_divrem_info( - value: Value, - value_type: Type, - imm_i64: i64, - is_signed: bool, - is_rem: bool, -) -> Option { - let imm_u64 = imm_i64 as u64; - - match (is_signed, value_type) { - (false, I32) => { - if imm_u64 < 0x1_0000_0000 { - if is_rem { - Some(DivRemByConstInfo::RemU32(value, imm_u64 as u32)) - } else { - Some(DivRemByConstInfo::DivU32(value, imm_u64 as u32)) - } - } else { - None - } - } - - (false, I64) => { - // unsigned 64, no range constraint. - if is_rem { - Some(DivRemByConstInfo::RemU64(value, imm_u64)) - } else { - Some(DivRemByConstInfo::DivU64(value, imm_u64)) - } - } - - (true, I32) => { - if imm_u64 <= 0x7fff_ffff || imm_u64 >= 0xffff_ffff_8000_0000 { - if is_rem { - Some(DivRemByConstInfo::RemS32(value, imm_u64 as i32)) - } else { - Some(DivRemByConstInfo::DivS32(value, imm_u64 as i32)) - } - } else { - None - } - } - - (true, I64) => { - // signed 64, no range constraint. - if is_rem { - Some(DivRemByConstInfo::RemS64(value, imm_u64 as i64)) - } else { - Some(DivRemByConstInfo::DivS64(value, imm_u64 as i64)) - } - } - - _ => None, - } -} - -/// Examine `inst` to see if it is a div or rem by a constant, and if so return the operands, -/// signedness, operation size and div-vs-rem-ness in a handy bundle. -fn get_div_info(inst: Inst, dfg: &DataFlowGraph) -> Option { - if let InstructionData::BinaryImm64 { opcode, arg, imm } = dfg.insts[inst] { - let (is_signed, is_rem) = match opcode { - Opcode::UdivImm => (false, false), - Opcode::UremImm => (false, true), - Opcode::SdivImm => (true, false), - Opcode::SremImm => (true, true), - _ => return None, - }; - return package_up_divrem_info(arg, dfg.value_type(arg), imm.into(), is_signed, is_rem); - } - - None -} - -/// Actually do the transformation given a bundle containing the relevant information. -/// `divrem_info` describes a div or rem by a constant, that `pos` currently points at, and `inst` -/// is the associated instruction. `inst` is replaced by a sequence of other operations that -/// calculate the same result. Note that there are various `divrem_info` cases where we cannot do -/// any transformation, in which case `inst` is left unchanged. -fn do_divrem_transformation(divrem_info: &DivRemByConstInfo, pos: &mut FuncCursor, inst: Inst) { - let is_rem = match *divrem_info { - DivRemByConstInfo::DivU32(_, _) - | DivRemByConstInfo::DivU64(_, _) - | DivRemByConstInfo::DivS32(_, _) - | DivRemByConstInfo::DivS64(_, _) => false, - DivRemByConstInfo::RemU32(_, _) - | DivRemByConstInfo::RemU64(_, _) - | DivRemByConstInfo::RemS32(_, _) - | DivRemByConstInfo::RemS64(_, _) => true, - }; - - match *divrem_info { - // -------------------- U32 -------------------- - - // U32 div, rem by zero: ignore - DivRemByConstInfo::DivU32(_n1, 0) | DivRemByConstInfo::RemU32(_n1, 0) => {} - - // U32 div by 1: identity - // U32 rem by 1: zero - DivRemByConstInfo::DivU32(n1, 1) | DivRemByConstInfo::RemU32(n1, 1) => { - if is_rem { - pos.func.dfg.replace(inst).iconst(I32, 0); - } else { - replace_single_result_with_alias(&mut pos.func.dfg, inst, n1); - } - } - - // U32 div, rem by a power-of-2 - DivRemByConstInfo::DivU32(n1, d) | DivRemByConstInfo::RemU32(n1, d) - if d.is_power_of_two() => - { - debug_assert!(d >= 2); - // compute k where d == 2^k - let k = d.trailing_zeros(); - debug_assert!(k >= 1 && k <= 31); - if is_rem { - let mask = (1u64 << k) - 1; - pos.func.dfg.replace(inst).band_imm(n1, mask as i64); - } else { - pos.func.dfg.replace(inst).ushr_imm(n1, k as i64); - } - } - - // U32 div, rem by non-power-of-2 - DivRemByConstInfo::DivU32(n1, d) | DivRemByConstInfo::RemU32(n1, d) => { - debug_assert!(d >= 3); - let MU32 { - mul_by, - do_add, - shift_by, - } = magic_u32(d); - let qf; // final quotient - let q0 = pos.ins().iconst(I32, mul_by as i64); - let q1 = pos.ins().umulhi(n1, q0); - if do_add { - debug_assert!(shift_by >= 1 && shift_by <= 32); - let t1 = pos.ins().isub(n1, q1); - let t2 = pos.ins().ushr_imm(t1, 1); - let t3 = pos.ins().iadd(t2, q1); - // I never found any case where shift_by == 1 here. - // So there's no attempt to fold out a zero shift. - debug_assert_ne!(shift_by, 1); - qf = pos.ins().ushr_imm(t3, (shift_by - 1) as i64); - } else { - debug_assert!(shift_by >= 0 && shift_by <= 31); - // Whereas there are known cases here for shift_by == 0. - if shift_by > 0 { - qf = pos.ins().ushr_imm(q1, shift_by as i64); - } else { - qf = q1; - } - } - // Now qf holds the final quotient. If necessary calculate the - // remainder instead. - if is_rem { - let tt = pos.ins().imul_imm(qf, d as i64); - pos.func.dfg.replace(inst).isub(n1, tt); - } else { - replace_single_result_with_alias(&mut pos.func.dfg, inst, qf); - } - } - - // -------------------- U64 -------------------- - - // U64 div, rem by zero: ignore - DivRemByConstInfo::DivU64(_n1, 0) | DivRemByConstInfo::RemU64(_n1, 0) => {} - - // U64 div by 1: identity - // U64 rem by 1: zero - DivRemByConstInfo::DivU64(n1, 1) | DivRemByConstInfo::RemU64(n1, 1) => { - if is_rem { - pos.func.dfg.replace(inst).iconst(I64, 0); - } else { - replace_single_result_with_alias(&mut pos.func.dfg, inst, n1); - } - } - - // U64 div, rem by a power-of-2 - DivRemByConstInfo::DivU64(n1, d) | DivRemByConstInfo::RemU64(n1, d) - if d.is_power_of_two() => - { - debug_assert!(d >= 2); - // compute k where d == 2^k - let k = d.trailing_zeros(); - debug_assert!(k >= 1 && k <= 63); - if is_rem { - let mask = (1u64 << k) - 1; - pos.func.dfg.replace(inst).band_imm(n1, mask as i64); - } else { - pos.func.dfg.replace(inst).ushr_imm(n1, k as i64); - } - } - - // U64 div, rem by non-power-of-2 - DivRemByConstInfo::DivU64(n1, d) | DivRemByConstInfo::RemU64(n1, d) => { - debug_assert!(d >= 3); - let MU64 { - mul_by, - do_add, - shift_by, - } = magic_u64(d); - let qf; // final quotient - let q0 = pos.ins().iconst(I64, mul_by as i64); - let q1 = pos.ins().umulhi(n1, q0); - if do_add { - debug_assert!(shift_by >= 1 && shift_by <= 64); - let t1 = pos.ins().isub(n1, q1); - let t2 = pos.ins().ushr_imm(t1, 1); - let t3 = pos.ins().iadd(t2, q1); - // I never found any case where shift_by == 1 here. - // So there's no attempt to fold out a zero shift. - debug_assert_ne!(shift_by, 1); - qf = pos.ins().ushr_imm(t3, (shift_by - 1) as i64); - } else { - debug_assert!(shift_by >= 0 && shift_by <= 63); - // Whereas there are known cases here for shift_by == 0. - if shift_by > 0 { - qf = pos.ins().ushr_imm(q1, shift_by as i64); - } else { - qf = q1; - } - } - // Now qf holds the final quotient. If necessary calculate the - // remainder instead. - if is_rem { - let tt = pos.ins().imul_imm(qf, d as i64); - pos.func.dfg.replace(inst).isub(n1, tt); - } else { - replace_single_result_with_alias(&mut pos.func.dfg, inst, qf); - } - } - - // -------------------- S32 -------------------- - - // S32 div, rem by zero or -1: ignore - DivRemByConstInfo::DivS32(_n1, -1) - | DivRemByConstInfo::RemS32(_n1, -1) - | DivRemByConstInfo::DivS32(_n1, 0) - | DivRemByConstInfo::RemS32(_n1, 0) => {} - - // S32 div by 1: identity - // S32 rem by 1: zero - DivRemByConstInfo::DivS32(n1, 1) | DivRemByConstInfo::RemS32(n1, 1) => { - if is_rem { - pos.func.dfg.replace(inst).iconst(I32, 0); - } else { - replace_single_result_with_alias(&mut pos.func.dfg, inst, n1); - } - } - - DivRemByConstInfo::DivS32(n1, d) | DivRemByConstInfo::RemS32(n1, d) => { - if let Some((is_negative, k)) = i32_is_power_of_two(d) { - // k can be 31 only in the case that d is -2^31. - debug_assert!(k >= 1 && k <= 31); - let t1 = if k - 1 == 0 { - n1 - } else { - pos.ins().sshr_imm(n1, (k - 1) as i64) - }; - let t2 = pos.ins().ushr_imm(t1, (32 - k) as i64); - let t3 = pos.ins().iadd(n1, t2); - if is_rem { - // S32 rem by a power-of-2 - let t4 = pos.ins().band_imm(t3, i32::wrapping_neg(1 << k) as i64); - // Curiously, we don't care here what the sign of d is. - pos.func.dfg.replace(inst).isub(n1, t4); - } else { - // S32 div by a power-of-2 - let t4 = pos.ins().sshr_imm(t3, k as i64); - if is_negative { - pos.func.dfg.replace(inst).irsub_imm(t4, 0); - } else { - replace_single_result_with_alias(&mut pos.func.dfg, inst, t4); - } - } - } else { - // S32 div, rem by a non-power-of-2 - debug_assert!(d < -2 || d > 2); - let MS32 { mul_by, shift_by } = magic_s32(d); - let q0 = pos.ins().iconst(I32, mul_by as i64); - let q1 = pos.ins().smulhi(n1, q0); - let q2 = if d > 0 && mul_by < 0 { - pos.ins().iadd(q1, n1) - } else if d < 0 && mul_by > 0 { - pos.ins().isub(q1, n1) - } else { - q1 - }; - debug_assert!(shift_by >= 0 && shift_by <= 31); - let q3 = if shift_by == 0 { - q2 - } else { - pos.ins().sshr_imm(q2, shift_by as i64) - }; - let t1 = pos.ins().ushr_imm(q3, 31); - let qf = pos.ins().iadd(q3, t1); - // Now qf holds the final quotient. If necessary calculate - // the remainder instead. - if is_rem { - let tt = pos.ins().imul_imm(qf, d as i64); - pos.func.dfg.replace(inst).isub(n1, tt); - } else { - replace_single_result_with_alias(&mut pos.func.dfg, inst, qf); - } - } - } - - // -------------------- S64 -------------------- - - // S64 div, rem by zero or -1: ignore - DivRemByConstInfo::DivS64(_n1, -1) - | DivRemByConstInfo::RemS64(_n1, -1) - | DivRemByConstInfo::DivS64(_n1, 0) - | DivRemByConstInfo::RemS64(_n1, 0) => {} - - // S64 div by 1: identity - // S64 rem by 1: zero - DivRemByConstInfo::DivS64(n1, 1) | DivRemByConstInfo::RemS64(n1, 1) => { - if is_rem { - pos.func.dfg.replace(inst).iconst(I64, 0); - } else { - replace_single_result_with_alias(&mut pos.func.dfg, inst, n1); - } - } - - DivRemByConstInfo::DivS64(n1, d) | DivRemByConstInfo::RemS64(n1, d) => { - if let Some((is_negative, k)) = i64_is_power_of_two(d) { - // k can be 63 only in the case that d is -2^63. - debug_assert!(k >= 1 && k <= 63); - let t1 = if k - 1 == 0 { - n1 - } else { - pos.ins().sshr_imm(n1, (k - 1) as i64) - }; - let t2 = pos.ins().ushr_imm(t1, (64 - k) as i64); - let t3 = pos.ins().iadd(n1, t2); - if is_rem { - // S64 rem by a power-of-2 - let t4 = pos.ins().band_imm(t3, i64::wrapping_neg(1 << k)); - // Curiously, we don't care here what the sign of d is. - pos.func.dfg.replace(inst).isub(n1, t4); - } else { - // S64 div by a power-of-2 - let t4 = pos.ins().sshr_imm(t3, k as i64); - if is_negative { - pos.func.dfg.replace(inst).irsub_imm(t4, 0); - } else { - replace_single_result_with_alias(&mut pos.func.dfg, inst, t4); - } - } - } else { - // S64 div, rem by a non-power-of-2 - debug_assert!(d < -2 || d > 2); - let MS64 { mul_by, shift_by } = magic_s64(d); - let q0 = pos.ins().iconst(I64, mul_by); - let q1 = pos.ins().smulhi(n1, q0); - let q2 = if d > 0 && mul_by < 0 { - pos.ins().iadd(q1, n1) - } else if d < 0 && mul_by > 0 { - pos.ins().isub(q1, n1) - } else { - q1 - }; - debug_assert!(shift_by >= 0 && shift_by <= 63); - let q3 = if shift_by == 0 { - q2 - } else { - pos.ins().sshr_imm(q2, shift_by as i64) - }; - let t1 = pos.ins().ushr_imm(q3, 63); - let qf = pos.ins().iadd(q3, t1); - // Now qf holds the final quotient. If necessary calculate - // the remainder instead. - if is_rem { - let tt = pos.ins().imul_imm(qf, d); - pos.func.dfg.replace(inst).isub(n1, tt); - } else { - replace_single_result_with_alias(&mut pos.func.dfg, inst, qf); - } - } - } - } -} - -mod simplify { - use super::*; - use crate::ir::{ - dfg::ValueDef, - immediates, - instructions::Opcode, - types::{I16, I32, I8}, - }; - use std::marker::PhantomData; - - pub struct PeepholeOptimizer<'a, 'b> { - phantom: PhantomData<(&'a (), &'b ())>, - } - - pub fn peephole_optimizer<'a, 'b>(_: &dyn TargetIsa) -> PeepholeOptimizer<'a, 'b> { - PeepholeOptimizer { - phantom: PhantomData, - } - } - - pub fn apply_all<'a, 'b>( - _optimizer: &mut PeepholeOptimizer<'a, 'b>, - pos: &mut FuncCursor<'a>, - inst: Inst, - native_word_width: u32, - ) { - simplify(pos, inst, native_word_width); - branch_opt(pos, inst); - } - - #[inline] - fn resolve_imm64_value(dfg: &DataFlowGraph, value: Value) -> Option { - if let ValueDef::Result(candidate_inst, _) = dfg.value_def(value) { - if let InstructionData::UnaryImm { - opcode: Opcode::Iconst, - imm, - } = dfg.insts[candidate_inst] - { - return Some(imm); - } - } - None - } - - /// Try to transform [(x << N) >> N] into a (un)signed-extending move. - /// Returns true if the final instruction has been converted to such a move. - fn try_fold_extended_move( - pos: &mut FuncCursor, - inst: Inst, - opcode: Opcode, - arg: Value, - imm: immediates::Imm64, - ) -> bool { - if let ValueDef::Result(arg_inst, _) = pos.func.dfg.value_def(arg) { - if let InstructionData::BinaryImm64 { - opcode: Opcode::IshlImm, - arg: prev_arg, - imm: prev_imm, - } = &pos.func.dfg.insts[arg_inst] - { - if imm != *prev_imm { - return false; - } - - let dest_ty = pos.func.dfg.ctrl_typevar(inst); - if dest_ty != pos.func.dfg.ctrl_typevar(arg_inst) || !dest_ty.is_int() { - return false; - } - - let imm_bits: i64 = imm.into(); - let ireduce_ty = match (dest_ty.lane_bits() as i64).wrapping_sub(imm_bits) { - 8 => I8, - 16 => I16, - 32 => I32, - _ => return false, - }; - let ireduce_ty = ireduce_ty.by(dest_ty.lane_count()).unwrap(); - - // This becomes a no-op, since ireduce_ty has a smaller lane width than - // the argument type (also the destination type). - let arg = *prev_arg; - let narrower_arg = pos.ins().ireduce(ireduce_ty, arg); - - if opcode == Opcode::UshrImm { - pos.func.dfg.replace(inst).uextend(dest_ty, narrower_arg); - } else { - pos.func.dfg.replace(inst).sextend(dest_ty, narrower_arg); - } - return true; - } - } - false - } - - /// Apply basic simplifications. - /// - /// This folds constants with arithmetic to form `_imm` instructions, and other minor - /// simplifications. - /// - /// Doesn't apply some simplifications if the native word width (in bytes) is smaller than the - /// controlling type's width of the instruction. This would result in an illegal instruction that - /// would likely be expanded back into an instruction on smaller types with the same initial - /// opcode, creating unnecessary churn. - fn simplify(pos: &mut FuncCursor, inst: Inst, native_word_width: u32) { - match pos.func.dfg.insts[inst] { - InstructionData::Binary { opcode, args } => { - if let Some(mut imm) = resolve_imm64_value(&pos.func.dfg, args[1]) { - let new_opcode = match opcode { - Opcode::Iadd => Opcode::IaddImm, - Opcode::Imul => Opcode::ImulImm, - Opcode::Sdiv => Opcode::SdivImm, - Opcode::Udiv => Opcode::UdivImm, - Opcode::Srem => Opcode::SremImm, - Opcode::Urem => Opcode::UremImm, - Opcode::Band => Opcode::BandImm, - Opcode::Bor => Opcode::BorImm, - Opcode::Bxor => Opcode::BxorImm, - Opcode::Rotl => Opcode::RotlImm, - Opcode::Rotr => Opcode::RotrImm, - Opcode::Ishl => Opcode::IshlImm, - Opcode::Ushr => Opcode::UshrImm, - Opcode::Sshr => Opcode::SshrImm, - Opcode::Isub => { - imm = imm.wrapping_neg(); - Opcode::IaddImm - } - _ => return, - }; - let ty = pos.func.dfg.ctrl_typevar(inst); - if ty.bytes() <= native_word_width { - pos.func - .dfg - .replace(inst) - .BinaryImm64(new_opcode, ty, imm, args[0]); - - // Repeat for BinaryImm simplification. - simplify(pos, inst, native_word_width); - } - } else if let Some(imm) = resolve_imm64_value(&pos.func.dfg, args[0]) { - let new_opcode = match opcode { - Opcode::Iadd => Opcode::IaddImm, - Opcode::Imul => Opcode::ImulImm, - Opcode::Band => Opcode::BandImm, - Opcode::Bor => Opcode::BorImm, - Opcode::Bxor => Opcode::BxorImm, - Opcode::Isub => Opcode::IrsubImm, - _ => return, - }; - let ty = pos.func.dfg.ctrl_typevar(inst); - if ty.bytes() <= native_word_width { - pos.func - .dfg - .replace(inst) - .BinaryImm64(new_opcode, ty, imm, args[1]); - } - } - } - - InstructionData::BinaryImm64 { opcode, arg, imm } => { - let ty = pos.func.dfg.ctrl_typevar(inst); - - let mut arg = arg; - let mut imm = imm; - match opcode { - Opcode::IaddImm - | Opcode::ImulImm - | Opcode::BorImm - | Opcode::BandImm - | Opcode::BxorImm => { - // Fold binary_op(C2, binary_op(C1, x)) into binary_op(binary_op(C1, C2), x) - if let ValueDef::Result(arg_inst, _) = pos.func.dfg.value_def(arg) { - if let InstructionData::BinaryImm64 { - opcode: prev_opcode, - arg: prev_arg, - imm: prev_imm, - } = &pos.func.dfg.insts[arg_inst] - { - if opcode == *prev_opcode - && ty == pos.func.dfg.ctrl_typevar(arg_inst) - { - let lhs: i64 = imm.into(); - let rhs: i64 = (*prev_imm).into(); - let new_imm = match opcode { - Opcode::BorImm => lhs | rhs, - Opcode::BandImm => lhs & rhs, - Opcode::BxorImm => lhs ^ rhs, - Opcode::IaddImm => lhs.wrapping_add(rhs), - Opcode::ImulImm => lhs.wrapping_mul(rhs), - _ => panic!("can't happen"), - }; - let new_imm = immediates::Imm64::from(new_imm); - let new_arg = *prev_arg; - pos.func - .dfg - .replace(inst) - .BinaryImm64(opcode, ty, new_imm, new_arg); - imm = new_imm; - arg = new_arg; - } - } - } - } - - Opcode::UshrImm | Opcode::SshrImm => { - if pos.func.dfg.ctrl_typevar(inst).bytes() <= native_word_width - && try_fold_extended_move(pos, inst, opcode, arg, imm) - { - return; - } - } - - _ => {} - }; - - // Replace operations that are no-ops. - match (opcode, imm.into(), ty) { - (Opcode::IaddImm, 0, _) - | (Opcode::ImulImm, 1, _) - | (Opcode::SdivImm, 1, _) - | (Opcode::UdivImm, 1, _) - | (Opcode::BorImm, 0, _) - | (Opcode::BandImm, -1, _) - | (Opcode::BxorImm, 0, _) - | (Opcode::RotlImm, 0, _) - | (Opcode::RotrImm, 0, _) - | (Opcode::IshlImm, 0, _) - | (Opcode::UshrImm, 0, _) - | (Opcode::SshrImm, 0, _) => { - // Alias the result value with the original argument. - replace_single_result_with_alias(&mut pos.func.dfg, inst, arg); - } - (Opcode::ImulImm, 0, ty) | (Opcode::BandImm, 0, ty) if ty != I128 => { - // Replace by zero. - pos.func.dfg.replace(inst).iconst(ty, 0); - } - (Opcode::BorImm, -1, ty) if ty != I128 => { - // Replace by minus one. - pos.func.dfg.replace(inst).iconst(ty, -1); - } - _ => {} - } - } - - InstructionData::IntCompare { opcode, cond, args } => { - debug_assert_eq!(opcode, Opcode::Icmp); - if let Some(imm) = resolve_imm64_value(&pos.func.dfg, args[1]) { - if pos.func.dfg.ctrl_typevar(inst).bytes() <= native_word_width { - pos.func.dfg.replace(inst).icmp_imm(cond, args[0], imm); - } - } - } - - _ => {} - } - } - - /// Fold comparisons into branch operations when possible. - /// - /// This matches against operations which compare against zero, then use the - /// result in a conditional branch. - fn branch_opt(pos: &mut FuncCursor, inst: Inst) { - let (cmp_arg, new_then, new_else) = if let InstructionData::Brif { - arg: first_arg, - blocks: [block_then, block_else], - .. - } = pos.func.dfg.insts[inst] - { - let icmp_inst = - if let ValueDef::Result(icmp_inst, _) = pos.func.dfg.value_def(first_arg) { - icmp_inst - } else { - return; - }; - - if let InstructionData::IntCompareImm { - opcode: Opcode::IcmpImm, - arg: cmp_arg, - cond: cmp_cond, - imm: cmp_imm, - } = pos.func.dfg.insts[icmp_inst] - { - let cmp_imm: i64 = cmp_imm.into(); - if cmp_imm != 0 { - return; - } - - let (new_then, new_else) = match cmp_cond { - IntCC::Equal => (block_else, block_then), - IntCC::NotEqual => (block_then, block_else), - _ => return, - }; - - (cmp_arg, new_then, new_else) - } else { - return; - } - } else { - return; - }; - - if let InstructionData::Brif { arg, blocks, .. } = &mut pos.func.dfg.insts[inst] { - *arg = cmp_arg; - blocks[0] = new_then; - blocks[1] = new_else; - } else { - unreachable!(); - } - } -} - -/// The main pre-opt pass. -pub fn do_preopt(func: &mut Function, isa: &dyn TargetIsa) { - let _tt = timing::preopt(); - - let mut pos = FuncCursor::new(func); - let native_word_width = isa.pointer_bytes() as u32; - let mut optimizer = simplify::peephole_optimizer(isa); - - while let Some(_) = pos.next_block() { - while let Some(inst) = pos.next_inst() { - simplify::apply_all(&mut optimizer, &mut pos, inst, native_word_width); - - // Try to transform divide-by-constant into simpler operations. - if let Some(divrem_info) = get_div_info(inst, &pos.func.dfg) { - do_divrem_transformation(&divrem_info, &mut pos, inst); - continue; - } - } - } -} diff --git a/cranelift/filetests/filetests/alias/simple-alias.clif b/cranelift/filetests/filetests/alias/simple-alias.clif index ba3722bdf7d5..2994373265a5 100644 --- a/cranelift/filetests/filetests/alias/simple-alias.clif +++ b/cranelift/filetests/filetests/alias/simple-alias.clif @@ -15,16 +15,15 @@ block0(v0: i64, v1: i32): v2 = global_value.i64 gv1 v3 = load.i32 v2+8 ;; This should reuse the load above. - v4 = global_value.i64 gv1 - v5 = load.i32 v4+8 + v5 = load.i32 v2+8 ; check: v5 -> v3 call fn0(v0) ;; The second load is redundant wrt the first, but the call above ;; is a barrier that prevents reusing v3 or v5. - v6 = load.i32 v4+8 - v7 = load.i32 v4+8 + v6 = load.i32 v2+8 + v7 = load.i32 v2+8 ; check: v7 -> v6 return v3, v5, v6, v7 @@ -44,8 +43,7 @@ block0(v0: i64, v1: i32): store.i32 v1, v2+8 ;; This load should pick up the store above. - v3 = global_value.i64 gv1 - v4 = load.i32 v3+8 + v4 = load.i32 v2+8 ; check: v4 -> v1 return v4 diff --git a/cranelift/filetests/filetests/egraph/algebraic.clif b/cranelift/filetests/filetests/egraph/algebraic.clif index 02e2456ed9a4..b31af79e9326 100644 --- a/cranelift/filetests/filetests/egraph/algebraic.clif +++ b/cranelift/filetests/filetests/egraph/algebraic.clif @@ -1,6 +1,5 @@ test optimize set opt_level=speed -set use_egraphs=true target x86_64 function %f0(i32) -> i32 { diff --git a/cranelift/filetests/filetests/egraph/alias_analysis.clif b/cranelift/filetests/filetests/egraph/alias_analysis.clif index 87bc5073638b..83fa31de618a 100644 --- a/cranelift/filetests/filetests/egraph/alias_analysis.clif +++ b/cranelift/filetests/filetests/egraph/alias_analysis.clif @@ -1,6 +1,5 @@ test optimize set opt_level=speed -set use_egraphs=true target x86_64 function %f(i64) -> i64 { diff --git a/cranelift/filetests/filetests/egraph/basic-gvn.clif b/cranelift/filetests/filetests/egraph/basic-gvn.clif index 3d74a31b1e52..a983df692a49 100644 --- a/cranelift/filetests/filetests/egraph/basic-gvn.clif +++ b/cranelift/filetests/filetests/egraph/basic-gvn.clif @@ -1,6 +1,5 @@ test optimize set opt_level=speed -set use_egraphs=true target x86_64 function %f(i32, i32) -> i32 { diff --git a/cranelift/filetests/filetests/egraph/bitselect.clif b/cranelift/filetests/filetests/egraph/bitselect.clif index 91797bb39777..51c3294583ea 100644 --- a/cranelift/filetests/filetests/egraph/bitselect.clif +++ b/cranelift/filetests/filetests/egraph/bitselect.clif @@ -1,6 +1,5 @@ test optimize set opt_level=speed -set use_egraphs=true target x86_64 target aarch64 target s390x diff --git a/cranelift/filetests/filetests/egraph/cprop-splat.clif b/cranelift/filetests/filetests/egraph/cprop-splat.clif index 549663480ed7..cae8ec1c5882 100644 --- a/cranelift/filetests/filetests/egraph/cprop-splat.clif +++ b/cranelift/filetests/filetests/egraph/cprop-splat.clif @@ -1,6 +1,5 @@ test optimize precise-output set opt_level=speed -set use_egraphs=true target x86_64 function %i8x16_1() -> i8x16 { diff --git a/cranelift/filetests/filetests/egraph/cprop.clif b/cranelift/filetests/filetests/egraph/cprop.clif index 736e6c5c2bec..8e1fe569ccbb 100644 --- a/cranelift/filetests/filetests/egraph/cprop.clif +++ b/cranelift/filetests/filetests/egraph/cprop.clif @@ -1,6 +1,5 @@ test optimize set opt_level=speed -set use_egraphs=true target x86_64 function %f0() -> i8 { diff --git a/cranelift/filetests/filetests/egraph/i128-opts.clif b/cranelift/filetests/filetests/egraph/i128-opts.clif index f30b80bd25c1..40ef77f76763 100644 --- a/cranelift/filetests/filetests/egraph/i128-opts.clif +++ b/cranelift/filetests/filetests/egraph/i128-opts.clif @@ -1,6 +1,5 @@ test optimize set opt_level=speed -set use_egraphs=true target x86_64 ; This it a regression test to ensure that we don't insert a iconst.i128 when optimizing bxor. diff --git a/cranelift/filetests/filetests/egraph/icmp-parameterized.clif b/cranelift/filetests/filetests/egraph/icmp-parameterized.clif index ec1679c2c2a1..315f42481009 100644 --- a/cranelift/filetests/filetests/egraph/icmp-parameterized.clif +++ b/cranelift/filetests/filetests/egraph/icmp-parameterized.clif @@ -1,6 +1,5 @@ test optimize precise-output set opt_level=speed -set use_egraphs=true target x86_64 ;; !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! diff --git a/cranelift/filetests/filetests/egraph/icmp.clif b/cranelift/filetests/filetests/egraph/icmp.clif index eda94bb31eec..af37fa443a50 100644 --- a/cranelift/filetests/filetests/egraph/icmp.clif +++ b/cranelift/filetests/filetests/egraph/icmp.clif @@ -1,6 +1,5 @@ test optimize precise-output set opt_level=speed -set use_egraphs=true target x86_64 ;; Masking the result of a comparison with 1 always results in the comparison diff --git a/cranelift/filetests/filetests/egraph/isplit.clif b/cranelift/filetests/filetests/egraph/isplit.clif index e40c32fef84a..8c964c15e617 100644 --- a/cranelift/filetests/filetests/egraph/isplit.clif +++ b/cranelift/filetests/filetests/egraph/isplit.clif @@ -1,7 +1,6 @@ test interpret test run set opt_level=speed -set use_egraphs=true set enable_llvm_abi_extensions=true target x86_64 target aarch64 diff --git a/cranelift/filetests/filetests/egraph/issue-5405.clif b/cranelift/filetests/filetests/egraph/issue-5405.clif index db6f582ec7bf..90071a18b59b 100644 --- a/cranelift/filetests/filetests/egraph/issue-5405.clif +++ b/cranelift/filetests/filetests/egraph/issue-5405.clif @@ -1,7 +1,6 @@ test interpret test run set opt_level=speed -set use_egraphs=true target aarch64 function %a(i64) -> i8 system_v { diff --git a/cranelift/filetests/filetests/egraph/issue-5417.clif b/cranelift/filetests/filetests/egraph/issue-5417.clif index 98cb16eac157..40f0e8256f84 100644 --- a/cranelift/filetests/filetests/egraph/issue-5417.clif +++ b/cranelift/filetests/filetests/egraph/issue-5417.clif @@ -1,6 +1,5 @@ test compile set opt_level=speed -set use_egraphs=true target x86_64 target aarch64 target s390x diff --git a/cranelift/filetests/filetests/egraph/issue-5437.clif b/cranelift/filetests/filetests/egraph/issue-5437.clif index d20d8d207279..46959ec379c8 100644 --- a/cranelift/filetests/filetests/egraph/issue-5437.clif +++ b/cranelift/filetests/filetests/egraph/issue-5437.clif @@ -1,6 +1,5 @@ test compile set opt_level=speed -set use_egraphs=true target x86_64 target aarch64 target s390x diff --git a/cranelift/filetests/filetests/egraph/licm.clif b/cranelift/filetests/filetests/egraph/licm.clif index 8d6a5ec329cb..f2f84c58302c 100644 --- a/cranelift/filetests/filetests/egraph/licm.clif +++ b/cranelift/filetests/filetests/egraph/licm.clif @@ -1,6 +1,5 @@ test optimize set opt_level=speed -set use_egraphs=true target x86_64 function %f(i32, i32) -> i32 { diff --git a/cranelift/filetests/filetests/egraph/make-icmp-parameterized-tests.sh b/cranelift/filetests/filetests/egraph/make-icmp-parameterized-tests.sh index 9913c528e20e..1b495080a27b 100755 --- a/cranelift/filetests/filetests/egraph/make-icmp-parameterized-tests.sh +++ b/cranelift/filetests/filetests/egraph/make-icmp-parameterized-tests.sh @@ -9,7 +9,6 @@ function main { cat << EOF > $out test optimize precise-output set opt_level=speed -set use_egraphs=true target x86_64 ;; !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! diff --git a/cranelift/filetests/filetests/egraph/misc.clif b/cranelift/filetests/filetests/egraph/misc.clif index 5eb42631c823..8ac3adc20933 100644 --- a/cranelift/filetests/filetests/egraph/misc.clif +++ b/cranelift/filetests/filetests/egraph/misc.clif @@ -1,6 +1,5 @@ test optimize set opt_level=speed -set use_egraphs=true target x86_64 function %stack_load(i64) -> i64 { diff --git a/cranelift/filetests/filetests/egraph/mul-pow-2.clif b/cranelift/filetests/filetests/egraph/mul-pow-2.clif index e81ae49364ea..f3cee9986c7d 100644 --- a/cranelift/filetests/filetests/egraph/mul-pow-2.clif +++ b/cranelift/filetests/filetests/egraph/mul-pow-2.clif @@ -1,6 +1,5 @@ test optimize set opt_level=speed -set use_egraphs=true target x86_64 function %f0(i32) -> i32 { diff --git a/cranelift/filetests/filetests/egraph/multivalue.clif b/cranelift/filetests/filetests/egraph/multivalue.clif index 65c34c477c29..664f5809178f 100644 --- a/cranelift/filetests/filetests/egraph/multivalue.clif +++ b/cranelift/filetests/filetests/egraph/multivalue.clif @@ -1,6 +1,5 @@ test compile precise-output set opt_level=speed -set use_egraphs=true set machine_code_cfg_info=true target x86_64 diff --git a/cranelift/filetests/filetests/egraph/not_a_load.clif b/cranelift/filetests/filetests/egraph/not_a_load.clif index 6f40dfecf690..7eac0e98e9fb 100644 --- a/cranelift/filetests/filetests/egraph/not_a_load.clif +++ b/cranelift/filetests/filetests/egraph/not_a_load.clif @@ -1,6 +1,5 @@ test compile precise-output set opt_level=speed -set use_egraphs=true target x86_64 ;; `atomic_rmw` is not a load, but it reports `true` to `.can_load()`. We want diff --git a/cranelift/filetests/filetests/egraph/remat.clif b/cranelift/filetests/filetests/egraph/remat.clif index 5d43c71febe2..861e4a8cdd9e 100644 --- a/cranelift/filetests/filetests/egraph/remat.clif +++ b/cranelift/filetests/filetests/egraph/remat.clif @@ -1,6 +1,5 @@ test optimize set opt_level=speed -set use_egraphs=true target x86_64 function %f(i32) -> i32 { diff --git a/cranelift/filetests/filetests/egraph/select.clif b/cranelift/filetests/filetests/egraph/select.clif index 12096ce8f180..e8a225b4de73 100644 --- a/cranelift/filetests/filetests/egraph/select.clif +++ b/cranelift/filetests/filetests/egraph/select.clif @@ -1,6 +1,5 @@ test optimize set opt_level=speed -set use_egraphs=true target x86_64 target aarch64 target s390x diff --git a/cranelift/filetests/filetests/isa/x64/amode-opt.clif b/cranelift/filetests/filetests/isa/x64/amode-opt.clif index e3d689998897..116ab61a082c 100644 --- a/cranelift/filetests/filetests/isa/x64/amode-opt.clif +++ b/cranelift/filetests/filetests/isa/x64/amode-opt.clif @@ -1,5 +1,4 @@ test compile precise-output -set use_egraphs=true set opt_level=speed target x86_64 diff --git a/cranelift/filetests/filetests/licm/basic.clif b/cranelift/filetests/filetests/licm/basic.clif deleted file mode 100644 index 60f2f3c2ea22..000000000000 --- a/cranelift/filetests/filetests/licm/basic.clif +++ /dev/null @@ -1,39 +0,0 @@ -test licm -target riscv32 - -function %simple_loop(i32) -> i32 { - -block0(v0: i32): - jump block1(v0) - -block1(v1: i32): - v2 = iconst.i32 1 - v3 = iconst.i32 2 - v4 = iadd v2, v3 - brif v1, block2, block3(v1) - -block2: - v5 = isub v1, v2 - jump block1(v5) - -block3(v6: i32): - return v6 - -} -; sameln: function %simple_loop -; nextln: block0(v0: i32): -; nextln: v2 = iconst.i32 1 -; nextln: v3 = iconst.i32 2 -; nextln: v4 = iadd v2, v3 -; nextln: jump block1(v0) -; nextln: -; nextln: block1(v1: i32): -; nextln: brif v1, block2, block3(v1) -; nextln: -; nextln: block2: -; nextln: v5 = isub.i32 v1, v2 -; nextln: jump block1(v5) -; nextln: -; nextln: block3(v6: i32): -; nextln: return v6 -; nextln: } diff --git a/cranelift/filetests/filetests/licm/br-table.clif b/cranelift/filetests/filetests/licm/br-table.clif deleted file mode 100644 index 3710f7421047..000000000000 --- a/cranelift/filetests/filetests/licm/br-table.clif +++ /dev/null @@ -1,19 +0,0 @@ -test compile -set opt_level=speed_and_size -target aarch64 -target x86_64 - -function %br_table_opt() { - - block0: - v0 = iconst.i32 1 - br_table v0, block2, [block1, block2] - - block1: - return - - block2: - v1 = iconst.i32 1 - jump block2 - -} diff --git a/cranelift/filetests/filetests/licm/complex.clif b/cranelift/filetests/filetests/licm/complex.clif deleted file mode 100644 index 31efa820ae9e..000000000000 --- a/cranelift/filetests/filetests/licm/complex.clif +++ /dev/null @@ -1,88 +0,0 @@ -test licm -target riscv32 - -function %complex(i32) -> i32 system_v { -block0(v0: i32): -[UJ#1b] jump block1(v0) - - block1(v1: i32): - v2 = iconst.i32 1 - v3 = iconst.i32 4 - v4 = iadd v2, v1 -[SBzero#18] brif v1, block4(v4), block2(v2) - - block2(v5: i32): - v6 = iconst.i32 2 - v7 = iadd v5, v4 - v8 = iadd v6, v1 -[UJ#1b] jump block3(v8) - - block3(v9: i32): - v10 = iadd v9, v5 - v11 = iadd.i32 v1, v4 -[SBzero#18] brif.i32 v1, block6(v10), block2(v9) - - block4(v12: i32): - v13 = iconst.i32 3 - v14 = iadd v12, v13 - v15 = iadd.i32 v4, v13 -[UJ#1b] jump block5(v13) - - block5(v16: i32): - v17 = iadd.i32 v14, v4 -[SBzero#18] brif.i32 v1, block6(v16), block4(v16) - - block6(v18: i32): - v19 = iadd v18, v2 - v20 = iadd.i32 v2, v3 -[SBzero#18] brif.i32 v1, block7, block1(v20) - - block7: -[Iret#19] return v19 -} - -; sameln: function %complex -; nextln: block0(v0: i32): -; nextln: v2 = iconst.i32 1 -; nextln: v3 = iconst.i32 4 -; nextln: v6 = iconst.i32 2 -; nextln: v13 = iconst.i32 3 -; nextln: v20 = iadd v2, v3 -; nextln: jump block1(v0) -; nextln: -; nextln: block1(v1: i32): -; nextln: v4 = iadd.i32 v2, v1 -; nextln: brif v1, block9(v4), block8(v2) -; nextln: -; nextln: block8(v21: i32): -; nextln: v8 = iadd.i32 v6, v1 -; nextln: v11 = iadd.i32 v1, v4 -; nextln: jump block2(v21) -; nextln: -; nextln: block2(v5: i32): -; nextln: v7 = iadd v5, v4 -; nextln: jump block3(v8) -; nextln: -; nextln: block3(v9: i32): -; nextln: v10 = iadd v9, v5 -; nextln: brif.i32 v1, block6(v10), block2(v9) -; nextln: -; nextln: block9(v22: i32): -; nextln: v15 = iadd.i32 v4, v13 -; nextln: jump block4(v22) -; nextln: -; nextln: block4(v12: i32): -; nextln: v14 = iadd v12, v13 -; nextln: jump block5(v13) -; nextln: -; nextln: block5(v16: i32): -; nextln: v17 = iadd.i32 v14, v4 -; nextln: brif.i32 v1, block6(v16), block4(v16) -; nextln: -; nextln: block6(v18: i32): -; nextln: v19 = iadd v18, v2 -; nextln: brif.i32 v1, block7, block1(v20) -; nextln: -; nextln: block7: -; nextln: return v19 -; nextln: } diff --git a/cranelift/filetests/filetests/licm/critical-edge.clif b/cranelift/filetests/filetests/licm/critical-edge.clif deleted file mode 100644 index ae8862b644ae..000000000000 --- a/cranelift/filetests/filetests/licm/critical-edge.clif +++ /dev/null @@ -1,50 +0,0 @@ -test licm -target riscv32 - -; The loop in this function is entered from a critical edge. - -function %critical_edge(i32, i32) -> i32 { - - block0(v0: i32, v7: i32): -[SBzero#38] brif v7, block2(v0), block1 - - block1: -[Iret#19] return v0 - - block2(v1: i32): - v2 = iconst.i32 1 - v3 = iconst.i32 2 - v4 = iadd v2, v3 -[SBzero#18] brif v1, block3, block4(v1) - - block3: - v5 = isub v1, v2 -[UJ#1b] jump block2(v5) - - block4(v6: i32): -[Iret#19] return v6 - -} -; sameln: function %critical_edge -; nextln: block0(v0: i32, v7: i32): -; nextln: brif v7, block5(v0), block1 -; nextln: -; nextln: block1: -; nextln: return v0 -; nextln: -; nextln: block5(v8: i32): -; nextln: v2 = iconst.i32 1 -; nextln: v3 = iconst.i32 2 -; nextln: v4 = iadd v2, v3 -; nextln: jump block2(v8) -; nextln: -; nextln: block2(v1: i32): -; nextln: brif v1, block3, block4(v1) -; nextln: -; nextln: block3: -; nextln: v5 = isub.i32 v1, v2 -; nextln: jump block2(v5) -; nextln: -; nextln: block4(v6: i32): -; nextln: return v6 -; nextln: } diff --git a/cranelift/filetests/filetests/licm/encoding.clif b/cranelift/filetests/filetests/licm/encoding.clif deleted file mode 100644 index 7e262bebadd7..000000000000 --- a/cranelift/filetests/filetests/licm/encoding.clif +++ /dev/null @@ -1,40 +0,0 @@ -test licm -target riscv32 - -; Ensure that instructions emitted by LICM get encodings. - -function %simple_loop(i32) -> i32 { - block0(v0: i32): -[UJ#1b] jump block1(v0) - - block1(v1: i32): -[Iz#04,%x0] v2 = iconst.i32 1 -[Iz#04,%x1] v3 = iconst.i32 2 -[R#0c,%x2] v4 = iadd v2, v3 -[SBzero#18] brif v1, block2, block3(v1) - - block2: -[R#200c,%x5] v5 = isub v1, v2 -[UJ#1b] jump block1(v5) - - block3(v6: i32): -[Iret#19] return v6 -} - -; check: function -; nextln: block0(v0: i32): -; nextln: [Iz#04,%x0] v2 = iconst.i32 1 -; nextln: [Iz#04,%x1] v3 = iconst.i32 2 -; nextln: [R#0c,%x2] v4 = iadd v2, v3 -; nextln: [UJ#1b] jump block1(v0) -; nextln: -; nextln: block1(v1: i32): -; nextln: [SBzero#18] brif v1, block2, block3(v1) -; nextln: -; nextln: block2: -; nextln: [R#200c,%x5] v5 = isub.i32 v1, v2 -; nextln: [UJ#1b] jump block1(v5) -; nextln: -; nextln: block3(v6: i32): -; nextln: [Iret#19] return v6 -; nextln: } diff --git a/cranelift/filetests/filetests/licm/load_readonly_notrap.clif b/cranelift/filetests/filetests/licm/load_readonly_notrap.clif deleted file mode 100644 index cd5fc03e4746..000000000000 --- a/cranelift/filetests/filetests/licm/load_readonly_notrap.clif +++ /dev/null @@ -1,51 +0,0 @@ -test licm - -target aarch64 -target x86_64 - -;; Nontrapping readonly load from address that is not loop-dependent -;; should be hoisted out of loop. - -function %hoist_load(i32, i64 vmctx) -> i32 { - gv0 = vmctx - gv1 = load.i64 notrap aligned readonly gv0 - -block0(v0: i32, v1: i64): - jump block1(v0, v1) - -block1(v2: i32, v3: i64): - v4 = iconst.i32 1 - v5 = global_value.i64 gv1 - v6 = load.i32 notrap aligned readonly v5 - v7 = iadd v2, v6 - brif v2, block2, block3(v2) - -block2: - v8 = isub v2, v4 - jump block1(v8, v3) - -block3(v9: i32): - return v9 -} - -; sameln: function %hoist_load(i32, i64 vmctx) -> i32 fast { -; nextln: gv0 = vmctx -; nextln: gv1 = load.i64 notrap aligned readonly gv0 -; nextln: -; nextln: block0(v0: i32, v1: i64): -; nextln: v4 = iconst.i32 1 -; nextln: v5 = global_value.i64 gv1 -; nextln: v6 = load.i32 notrap aligned readonly v5 -; nextln: jump block1(v0, v1) -; nextln: -; nextln: block1(v2: i32, v3: i64): -; nextln: v7 = iadd v2, v6 -; nextln: brif v2, block2, block3(v2) -; nextln: -; nextln: block2: -; nextln: v8 = isub.i32 v2, v4 -; nextln: jump block1(v8, v3) -; nextln: -; nextln: block3(v9: i32): -; nextln: return v9 -; nextln: } diff --git a/cranelift/filetests/filetests/licm/multiple-blocks.clif b/cranelift/filetests/filetests/licm/multiple-blocks.clif deleted file mode 100644 index f2fddb706e3a..000000000000 --- a/cranelift/filetests/filetests/licm/multiple-blocks.clif +++ /dev/null @@ -1,55 +0,0 @@ -test licm -target riscv32 - -function %multiple_blocks(i32) -> i32 { - -block0(v0: i32): - jump block1(v0) - -block1(v10: i32): - v11 = iconst.i32 1 - v12 = iconst.i32 2 - v13 = iadd v11, v12 - brif v10, block2, block4(v10) - -block2: - v15 = isub v10, v11 - brif v15, block3, block5(v15) - -block3: - v14 = isub v10, v11 - jump block1(v14) - -block4(v20: i32): - return v20 - -block5(v30: i32): - v31 = iadd v11, v13 - jump block1(v30) - -} -; sameln:function %multiple_blocks(i32) -> i32 { -; nextln: block0(v0: i32): -; nextln: v11 = iconst.i32 1 -; nextln: v12 = iconst.i32 2 -; nextln: v13 = iadd v11, v12 -; nextln: v31 = iadd v11, v13 -; nextln: jump block1(v0) -; nextln: -; nextln: block1(v10: i32): -; nextln: brif v10, block2, block4(v10) -; nextln: -; nextln: block2: -; nextln: v15 = isub.i32 v10, v11 -; nextln: brif v15, block3, block5(v15) -; nextln: -; nextln: block3: -; nextln: v14 = isub.i32 v10, v11 -; nextln: jump block1(v14) -; nextln: -; nextln: block4(v20: i32): -; nextln: return v20 -; nextln: -; nextln: block5(v30: i32): -; nextln: jump block1(v30) -; nextln: } diff --git a/cranelift/filetests/filetests/licm/nested_loops.clif b/cranelift/filetests/filetests/licm/nested_loops.clif deleted file mode 100644 index 1fb04cf7c54a..000000000000 --- a/cranelift/filetests/filetests/licm/nested_loops.clif +++ /dev/null @@ -1,58 +0,0 @@ -test licm -target riscv32 - -function %nested_loops(i32) -> i32 { - -block0(v0: i32): - jump block1(v0) - -block1(v1: i32): - v2 = iconst.i32 1 - v3 = iconst.i32 2 - v4 = iadd v2, v3 - v5 = isub v1, v2 - jump block2(v5, v5) - -block2(v10: i32, v11: i32): - brif v11, block3, block4(v10) - -block3: - v12 = iconst.i32 1 - v15 = iadd v12, v5 - v13 = isub v11, v12 - jump block2(v10,v13) - -block4(v20: i32): - brif v20, block1(v20), block5(v20) - -block5(v30: i32): - return v30 - -} - -; sameln:function %nested_loops(i32) -> i32 { -; nextln: block0(v0: i32): -; nextln: v2 = iconst.i32 1 -; nextln: v3 = iconst.i32 2 -; nextln: v4 = iadd v2, v3 -; nextln: v12 = iconst.i32 1 -; nextln: jump block1(v0) -; nextln: -; nextln: block1(v1: i32): -; nextln: v5 = isub v1, v2 -; nextln: v15 = iadd.i32 v12, v5 -; nextln: jump block2(v5, v5) -; nextln: -; nextln: block2(v10: i32, v11: i32): -; nextln: brif v11, block3, block4(v10) -; nextln: -; nextln: block3: -; nextln: v13 = isub.i32 v11, v12 -; nextln: jump block2(v10, v13) -; nextln: -; nextln: block4(v20: i32): -; nextln: brif v20, block1(v20), block5(v20) -; nextln: -; nextln: block5(v30: i32): -; nextln: return v30 -; nextln: } diff --git a/cranelift/filetests/filetests/licm/reject.clif b/cranelift/filetests/filetests/licm/reject.clif deleted file mode 100644 index 52933507694d..000000000000 --- a/cranelift/filetests/filetests/licm/reject.clif +++ /dev/null @@ -1,65 +0,0 @@ -test licm -target riscv32 - -function %other_side_effects(i32) -> i32 { - -block0(v0: i32): - jump block1(v0) - -block1(v1: i32): - v2 = iconst.i32 1 - brif v1, block2, block3(v1) - -block2: - v5 = isub v1, v2 - jump block1(v5) - -block3(v6: i32): - return v6 - -} - -function %spill(i32, i32) -> i32 { -block0(v0: i32, v1: i32): - v2 = spill.i32 v0 - jump block1(v0, v1) - -block1(v3: i32, v4: i32): - v5 = spill.i32 v1 - v6 = fill.i32 v2 - v7 = fill.i32 v5 -; check: block1(v3: i32, v4: i32): -; check: v5 = spill.i32 v1 -; check: v6 = fill.i32 v2 -; check: v7 = fill v5 - brif v1, block2, block3(v1) - -block2: - v9 = isub v1, v4 - jump block1(v9, v3) - -block3(v10: i32): - return v10 -} - -function %non_invariant_aliases(i32) -> i32 { - -block0(v0: i32): - jump block1(v0) - -block1(v1: i32): - v8 -> v1 - v9 -> v1 - v2 = iadd v8, v9 -; check: block1(v1: i32): -; check: v2 = iadd v8, v9 - brif v1, block2, block3(v1) - -block2: - v5 = isub v1, v2 - jump block1(v5) - -block3(v6: i32): - return v6 - -} diff --git a/cranelift/filetests/filetests/licm/reject_load_notrap.clif b/cranelift/filetests/filetests/licm/reject_load_notrap.clif deleted file mode 100644 index 904382bc78b0..000000000000 --- a/cranelift/filetests/filetests/licm/reject_load_notrap.clif +++ /dev/null @@ -1,52 +0,0 @@ -test licm - -target aarch64 -target x86_64 - -;; Nontrapping possibly-not-readonly load from address that is not -;; loop-dependent should *not* be hoisted out of loop, though the -;; address computation can be. - -function %hoist_load(i32, i64 vmctx) -> i32 { - gv0 = vmctx - gv1 = load.i64 notrap aligned readonly gv0 - -block0(v0: i32, v1: i64): - v4 = iconst.i32 1 - v5 = global_value.i64 gv1 - jump block1(v0, v1) - -block1(v2: i32, v3: i64): - v6 = load.i32 notrap aligned v5 - v7 = iadd v2, v6 - brif v2, block2, block3(v2) - -block2: - v8 = isub v2, v4 - jump block1(v8, v3) - -block3(v9: i32): - return v9 -} - -; sameln: function %hoist_load(i32, i64 vmctx) -> i32 fast { -; nextln: gv0 = vmctx -; nextln: gv1 = load.i64 notrap aligned readonly gv0 -; nextln: -; nextln: block0(v0: i32, v1: i64): -; nextln: v4 = iconst.i32 1 -; nextln: v5 = global_value.i64 gv1 -; nextln: jump block1(v0, v1) -; nextln: -; nextln: block1(v2: i32, v3: i64): -; nextln: v6 = load.i32 notrap aligned v5 -; nextln: v7 = iadd v2, v6 -; nextln: brif v2, block2, block3(v2) -; nextln: -; nextln: block2: -; nextln: v8 = isub.i32 v2, v4 ; v4 = 1 -; nextln: jump block1(v8, v3) -; nextln: -; nextln: block3(v9: i32): -; nextln: return v9 -; nextln: } diff --git a/cranelift/filetests/filetests/licm/reject_load_readonly.clif b/cranelift/filetests/filetests/licm/reject_load_readonly.clif deleted file mode 100644 index d5cc40dfb6ca..000000000000 --- a/cranelift/filetests/filetests/licm/reject_load_readonly.clif +++ /dev/null @@ -1,52 +0,0 @@ -test licm - -target aarch64 -target x86_64 - -;; Maybe-trapping readonly load from address that is not -;; loop-dependent should *not* be hoisted out of loop, though the -;; address computation can be hoisted. - -function %hoist_load(i32, i64 vmctx) -> i32 { - gv0 = vmctx - gv1 = load.i64 notrap aligned readonly gv0 - -block0(v0: i32, v1: i64): - jump block1(v0, v1) - -block1(v2: i32, v3: i64): - v4 = iconst.i32 1 - v5 = global_value.i64 gv1 - v6 = load.i32 aligned readonly v5 - v7 = iadd v2, v6 - brif v2, block2, block3(v2) - -block2: - v8 = isub v2, v4 - jump block1(v8, v3) - -block3(v9: i32): - return v9 -} - -; sameln: function %hoist_load(i32, i64 vmctx) -> i32 fast { -; nextln: gv0 = vmctx -; nextln: gv1 = load.i64 notrap aligned readonly gv0 -; nextln: -; nextln: block0(v0: i32, v1: i64): -; nextln: v4 = iconst.i32 1 -; nextln: v5 = global_value.i64 gv1 -; nextln: jump block1(v0, v1) -; nextln: -; nextln: block1(v2: i32, v3: i64): -; nextln: v6 = load.i32 aligned readonly v5 -; nextln: v7 = iadd v2, v6 -; nextln: brif v2, block2, block3(v2) -; nextln: -; nextln: block2: -; nextln: v8 = isub.i32 v2, v4 -; nextln: jump block1(v8, v3) -; nextln: -; nextln: block3(v9: i32): -; nextln: return v9 -; nextln: } diff --git a/cranelift/filetests/filetests/licm/rewrite-jump-table.clif b/cranelift/filetests/filetests/licm/rewrite-jump-table.clif deleted file mode 100644 index a4a00c62c571..000000000000 --- a/cranelift/filetests/filetests/licm/rewrite-jump-table.clif +++ /dev/null @@ -1,23 +0,0 @@ -test licm -target aarch64 - -function %rewrite_jump_table() { - - block0: - v0 = iconst.i32 1 - br_table v0, block1, [block1, block2] - - block1: - return - - block2: - v4 = iconst.i8 0 - jump block2 -} - -; sameln: function -; check: block3: -; nextln: v4 = iconst.i8 0 -; nextln: jump block2 -; check: block2: -; nextln: jump block2 diff --git a/cranelift/filetests/filetests/runtests/issue5569.clif b/cranelift/filetests/filetests/runtests/issue5569.clif index 3e2966498801..73b6c8b00cdf 100644 --- a/cranelift/filetests/filetests/runtests/issue5569.clif +++ b/cranelift/filetests/filetests/runtests/issue5569.clif @@ -1,5 +1,4 @@ test run -set use_egraphs=true target riscv64 function %a(i16, f64, i32, i64, i16, i128, f32) -> i16 { diff --git a/cranelift/filetests/filetests/simple_gvn/basic.clif b/cranelift/filetests/filetests/simple_gvn/basic.clif deleted file mode 100644 index 2462a8935705..000000000000 --- a/cranelift/filetests/filetests/simple_gvn/basic.clif +++ /dev/null @@ -1,43 +0,0 @@ -test simple-gvn - -function %simple_redundancy(i32, i32) -> i32 { -block0(v0: i32, v1: i32): - v2 = iadd v0, v1 - v3 = iadd v0, v1 - v4 = imul v2, v3 -; check: v4 = imul v2, v2 - return v4 -} - -function %cascading_redundancy(i32, i32) -> i32 { -block0(v0: i32, v1: i32): - v2 = iadd v0, v1 - v3 = iadd v0, v1 - v4 = imul v2, v3 - v5 = imul v2, v2 - v6 = iadd v4, v5 -; check: v6 = iadd v4, v4 - return v6 -} - -function %redundancies_on_some_paths(i32, i32, i32) -> i32 { -block0(v0: i32, v1: i32, v2: i32): - v3 = iadd v0, v1 - brif v3, block3, block1 - -block3: - v4 = iadd v0, v1 - jump block2(v4) -; check: jump block2(v3) - -block1: - v5 = iadd v0, v1 - jump block2(v5) -; check: jump block2(v3) - -block2(v6: i32): - v7 = iadd v0, v1 - v8 = iadd v6, v7 -; check: v8 = iadd v6, v3 - return v8 -} diff --git a/cranelift/filetests/filetests/simple_gvn/idempotent-trapping.clif b/cranelift/filetests/filetests/simple_gvn/idempotent-trapping.clif deleted file mode 100644 index d9b320c31fa1..000000000000 --- a/cranelift/filetests/filetests/simple_gvn/idempotent-trapping.clif +++ /dev/null @@ -1,68 +0,0 @@ -;; Test that we GVN instructions that can trap (which is idempotent as long as -;; it isn't a resumable trap), but which are still otherwise pure functions of -;; their inputs. - -test simple-gvn - -function %udiv(i32, i32) -> i32 { -block0(v0: i32, v1: i32): - v2 = udiv v0, v1 - v3 = udiv v0, v1 - v4 = iadd v2, v3 -; check: v4 = iadd v2, v2 - return v4 -} - -function %sdiv(i32, i32) -> i32 { -block0(v0: i32, v1: i32): - v2 = sdiv v0, v1 - v3 = sdiv v0, v1 - v4 = iadd v2, v3 -; check: v4 = iadd v2, v2 - return v4 -} - -function %urem(i32, i32) -> i32 { -block0(v0: i32, v1: i32): - v2 = urem v0, v1 - v3 = urem v0, v1 - v4 = iadd v2, v3 -; check: v4 = iadd v2, v2 - return v4 -} - -function %srem(i32, i32) -> i32 { -block0(v0: i32, v1: i32): - v2 = srem v0, v1 - v3 = srem v0, v1 - v4 = iadd v2, v3 -; check: v4 = iadd v2, v2 - return v4 -} - -function %uadd_overflow_trap(i32, i32) -> i32 { -block0(v0: i32, v1: i32): - v2 = uadd_overflow_trap v0, v1, heap_oob - v3 = uadd_overflow_trap v0, v1, heap_oob - v4 = iadd v2, v3 -; check: v4 = iadd v2, v2 - return v4 -} - -function %fcvt_to_uint(f32) -> i32 { -block0(v0: f32): - v1 = fcvt_to_uint.i32 v0 - v2 = fcvt_to_uint.i32 v0 - v3 = iadd v1, v2 -; check: v3 = iadd v1, v1 - return v3 -} - -function %fcvt_to_sint(f32) -> i32 { -block0(v0: f32): - v1 = fcvt_to_sint.i32 v0 - v2 = fcvt_to_sint.i32 v0 - v3 = iadd v1, v2 -; check: v3 = iadd v1, v1 - return v3 -} diff --git a/cranelift/filetests/filetests/simple_gvn/readonly.clif b/cranelift/filetests/filetests/simple_gvn/readonly.clif deleted file mode 100644 index 322ea275d64d..000000000000 --- a/cranelift/filetests/filetests/simple_gvn/readonly.clif +++ /dev/null @@ -1,25 +0,0 @@ -test simple-gvn - -target aarch64 -target x86_64 - -function %eliminate_redundant_global_loads(i32, i64 vmctx) { - gv0 = vmctx - gv1 = load.i64 notrap aligned readonly gv0 - -block0(v0: i32, v1: i64): - v2 = global_value.i64 gv1 - v3 = global_value.i64 gv1 - - v4 = iconst.i32 0 - store.i32 notrap aligned v4, v2 - store.i32 notrap aligned v4, v3 - - return -} -; check: v2 = global_value.i64 gv1 -; check: v3 -> v2 -; check: v4 = iconst.i32 0 -; check: store notrap aligned v4, v2 -; check: store notrap aligned v4, v2 -; check: return diff --git a/cranelift/filetests/filetests/simple_gvn/reject.clif b/cranelift/filetests/filetests/simple_gvn/reject.clif deleted file mode 100644 index 21286996dfbe..000000000000 --- a/cranelift/filetests/filetests/simple_gvn/reject.clif +++ /dev/null @@ -1,27 +0,0 @@ -test simple-gvn - -function %differing_typevars() -> i64 { -block0: - v0 = iconst.i32 7 - v1 = iconst.i64 7 - v2 = iconst.i64 8 -; check: v0 = iconst.i32 7 -; check: v1 = iconst.i64 7 -; check: v2 = iconst.i64 8 - v3 = uextend.i64 v0 - v4 = iadd v2, v1 - v5 = iadd v4, v3 - return v5 -} - -function %cpu_flags() -> i8 { -block0: - v0 = iconst.i32 7 - v1 = iconst.i32 8 - v2 = icmp eq v0, v1 - v3 = icmp eq v0, v1 - v4 = bor v2, v3 -; check: v2 = icmp eq v0, v1 -; check: v4 = bor v2, v2 - return v4 -} diff --git a/cranelift/filetests/filetests/simple_gvn/scopes.clif b/cranelift/filetests/filetests/simple_gvn/scopes.clif deleted file mode 100644 index 8ec95a777742..000000000000 --- a/cranelift/filetests/filetests/simple_gvn/scopes.clif +++ /dev/null @@ -1,80 +0,0 @@ -test simple-gvn - -function %two_diamonds(i32, i32, i32, i32, i32) { -block0(v0: i32, v1: i32, v2: i32, v3: i32, v4: i32): - v5 = iconst.i32 16 - ; check: v5 = iconst.i32 16 - brif v0, block5, block1 - -block5: - v6 = iconst.i32 17 - ; check: v6 = iconst.i32 17 - v7 = iconst.i32 16 - ; not: v7 = iconst.i32 16 - jump block2 - -block1: - v8 = iconst.i32 18 - ; check: v8 = iconst.i32 18 - v9 = iconst.i32 17 - ; check: v9 = iconst.i32 17 - v10 = iconst.i32 16 - ; not: v10 = iconst.i32 16 - jump block2 - -block2: - v11 = iconst.i32 19 - ; check: v11 = iconst.i32 19 - v12 = iconst.i32 18 - ; check: v12 = iconst.i32 18 - v13 = iconst.i32 17 - ; check: v13 = iconst.i32 17 - v14 = iconst.i32 16 - ; not: v14 = iconst.i32 16 - brif v1, block6, block3 - -block6: - v15 = iconst.i32 20 - ; check: v15 = iconst.i32 20 - v16 = iconst.i32 19 - ; not: v16 = iconst.i32 19 - v17 = iconst.i32 18 - ; not: v17 = iconst.i32 18 - v18 = iconst.i32 17 - ; not: v18 = iconst.i32 17 - v19 = iconst.i32 16 - ; not: v19 = iconst.i32 16 - jump block4 - -block3: - v20 = iconst.i32 21 - ; check: v20 = iconst.i32 21 - v21 = iconst.i32 20 - ; check: v21 = iconst.i32 20 - v22 = iconst.i32 19 - ; not: v22 = iconst.i32 19 - v23 = iconst.i32 18 - ; not: v23 = iconst.i32 18 - v24 = iconst.i32 17 - ; not: v24 = iconst.i32 17 - v25 = iconst.i32 16 - ; not: v25 = iconst.i32 16 - jump block4 - -block4: - v26 = iconst.i32 22 - ; check: v26 = iconst.i32 22 - v27 = iconst.i32 21 - ; check: v27 = iconst.i32 21 - v28 = iconst.i32 20 - ; check: v28 = iconst.i32 20 - v29 = iconst.i32 19 - ; not: v29 = iconst.i32 19 - v30 = iconst.i32 18 - ; not: v30 = iconst.i32 18 - v31 = iconst.i32 17 - ; not: v31 = iconst.i32 17 - v32 = iconst.i32 16 - ; not: v32 = iconst.i32 16 - return -} diff --git a/cranelift/filetests/filetests/simple_preopt/branch.clif b/cranelift/filetests/filetests/simple_preopt/branch.clif deleted file mode 100644 index c710ba843fc2..000000000000 --- a/cranelift/filetests/filetests/simple_preopt/branch.clif +++ /dev/null @@ -1,53 +0,0 @@ -test simple_preopt -target aarch64 -target x86_64 - -function %icmp_to_brif_false_fold(i32) -> i32 { -block0(v0: i32): - v1 = icmp_imm eq v0, 0 - brif v1, block1, block2 -block1: - v3 = iconst.i32 1 - return v3 -block2: - v4 = iconst.i32 2 - return v4 -} -; sameln: function %icmp_to_brif_false_fold -; nextln: block0(v0: i32): -; nextln: v1 = icmp_imm eq v0, 0 -; nextln: brif v0, block2, block1 -; nextln: -; nextln: block1: -; nextln: v3 = iconst.i32 1 -; nextln: return v3 -; nextln: -; nextln: block2: -; nextln: v4 = iconst.i32 2 -; nextln: return v4 -; nextln: } - -function %icmp_to_brif_false_inverted_fold(i32) -> i32 { -block0(v0: i32): - v1 = icmp_imm ne v0, 0 - brif v1, block2, block1 -block1: - v3 = iconst.i32 1 - return v3 -block2: - v4 = iconst.i32 2 - return v4 -} -; sameln: function %icmp_to_brif_false_inverted_fold -; nextln: block0(v0: i32): -; nextln: v1 = icmp_imm ne v0, 0 -; nextln: brif v0, block2, block1 -; nextln: -; nextln: block1: -; nextln: v3 = iconst.i32 1 -; nextln: return v3 -; nextln: -; nextln: block2: -; nextln: v4 = iconst.i32 2 -; nextln: return v4 -; nextln: } diff --git a/cranelift/filetests/filetests/simple_preopt/div_by_const_indirect.clif b/cranelift/filetests/filetests/simple_preopt/div_by_const_indirect.clif deleted file mode 100644 index 7b09c24d0489..000000000000 --- a/cranelift/filetests/filetests/simple_preopt/div_by_const_indirect.clif +++ /dev/null @@ -1,60 +0,0 @@ -test simple_preopt -target aarch64 -target x86_64 baseline - -; Cases where the denominator is created by an iconst - -function %indir_udiv32(i32) -> i32 { -block0(v0: i32): - v1 = iconst.i32 7 - v2 = udiv v0, v1 - ; check: iconst.i32 7 - ; check: iconst.i32 0x2492_4925 - ; check: umulhi v0, v3 - ; check: isub v0, v4 - ; check: ushr_imm v5, 1 - ; check: iadd v6, v4 - ; check: v8 = ushr_imm v7, 2 - ; check: v2 -> v8 - return v2 -} - -function %indir_sdiv32(i32) -> i32 { -block0(v0: i32): - v1 = iconst.i32 -17 - v2 = sdiv v0, v1 - ; check: iconst.i32 -17 - ; check: iconst.i32 0xffff_ffff_8787_8787 - ; check: smulhi v0, v3 - ; check: sshr_imm v4, 3 - ; check: ushr_imm v5, 31 - ; check: v7 = iadd v5, v6 - ; check: v2 -> v7 - return v2 -} - -function %indir_udiv64(i64) -> i64 { -block0(v0: i64): - v1 = iconst.i64 1337 - v2 = udiv v0, v1 - ; check: iconst.i64 1337 - ; check: iconst.i64 0xc411_9d95_2866_a139 - ; check: umulhi v0, v3 - ; check: v5 = ushr_imm v4, 10 - ; check: v2 -> v5 - return v2 -} - -function %indir_sdiv64(i64) -> i64 { -block0(v0: i64): - v1 = iconst.i64 -90210 - v2 = sdiv v0, v1 - ; check: iconst.i64 0xffff_ffff_fffe_9f9e - ; check: iconst.i64 0xd181_4ee8_939c_b8bb - ; check: smulhi v0, v3 - ; check: sshr_imm v4, 14 - ; check: ushr_imm v5, 63 - ; check: v7 = iadd v5, v6 - ; check: v2 -> v7 - return v2 -} diff --git a/cranelift/filetests/filetests/simple_preopt/div_by_const_non_power_of_2.clif b/cranelift/filetests/filetests/simple_preopt/div_by_const_non_power_of_2.clif deleted file mode 100644 index f22577771859..000000000000 --- a/cranelift/filetests/filetests/simple_preopt/div_by_const_non_power_of_2.clif +++ /dev/null @@ -1,267 +0,0 @@ -test simple_preopt -target aarch64 -target i686 baseline - -; -------- U32 -------- - -; complex case (mul, sub, shift, add, shift) -function %t_udiv32_p7(i32) -> i32 { -block0(v0: i32): - v1 = udiv_imm v0, 7 - ; check: iconst.i32 0x2492_4925 - ; check: umulhi v0, v2 - ; check: isub v0, v3 - ; check: ushr_imm v4, 1 - ; check: iadd v5, v3 - ; check: v7 = ushr_imm v6, 2 - ; check: v1 -> v7 - return v1 -} - -; simple case (mul, shift) -function %t_udiv32_p125(i32) -> i32 { -block0(v0: i32): - v1 = udiv_imm v0, 125 - ; check: iconst.i32 0x1062_4dd3 - ; check: umulhi v0, v2 - ; check: v4 = ushr_imm v3, 3 - ; check: v1 -> v4 - return v1 -} - -; simple case w/ shift by zero (mul) -function %t_udiv32_p641(i32) -> i32 { -block0(v0: i32): - v1 = udiv_imm v0, 641 - ; check: iconst.i32 0x0066_3d81 - ; check: v3 = umulhi v0, v2 - ; check: v1 -> v3 - return v1 -} - - -; -------- S32 -------- - -; simple case w/ shift by zero (mul, add-sign-bit) -function %t_sdiv32_n6(i32) -> i32 { -block0(v0: i32): - v1 = sdiv_imm v0, -6 - ; check: iconst.i32 0xffff_ffff_d555_5555 - ; check: smulhi v0, v2 - ; check: ushr_imm v3, 31 - ; check: v5 = iadd v3, v4 - ; check: v1 -> v5 - return v1 -} - -; simple case (mul, shift, add-sign-bit) -function %t_sdiv32_n5(i32) -> i32 { -block0(v0: i32): - v1 = sdiv_imm v0, -5 - ; check: iconst.i32 0xffff_ffff_9999_9999 - ; check: smulhi v0, v2 - ; check: sshr_imm v3, 1 - ; check: ushr_imm v4, 31 - ; check: v6 = iadd v4, v5 - ; check: v1 -> v6 - return v1 -} - -; case d < 0 && M > 0 (mul, sub, shift, add-sign-bit) -function %t_sdiv32_n3(i32) -> i32 { -block0(v0: i32): - v1 = sdiv_imm v0, -3 - ; check: iconst.i32 0x5555_5555 - ; check: smulhi v0, v2 - ; check: isub v3, v0 - ; check: sshr_imm v4, 1 - ; check: ushr_imm v5, 31 - ; check: v7 = iadd v5, v6 - ; check: v1 -> v7 - return v1 -} - -; simple case w/ shift by zero (mul, add-sign-bit) -function %t_sdiv32_p6(i32) -> i32 { -block0(v0: i32): - v1 = sdiv_imm v0, 6 - ; check: iconst.i32 0x2aaa_aaab - ; check: smulhi v0, v2 - ; check: ushr_imm v3, 31 - ; check: v5 = iadd v3, v4 - ; check: v1 -> v5 - return v1 -} - -; case d > 0 && M < 0 (mull, add, shift, add-sign-bit) -function %t_sdiv32_p7(i32) -> i32 { -block0(v0: i32): - v1 = sdiv_imm v0, 7 - ; check: iconst.i32 0xffff_ffff_9249_2493 - ; check: smulhi v0, v2 - ; check: iadd v3, v0 - ; check: sshr_imm v4, 2 - ; check: ushr_imm v5, 31 - ; check: v7 = iadd v5, v6 - ; check: v1 -> v7 - return v1 -} - -; simple case (mul, shift, add-sign-bit) -function %t_sdiv32_p625(i32) -> i32 { -block0(v0: i32): - v1 = sdiv_imm v0, 625 - ; check: iconst.i32 0x68db_8bad - ; check: smulhi v0, v2 - ; check: sshr_imm v3, 8 - ; check: ushr_imm v4, 31 - ; check: v6 = iadd v4, v5 - ; check: v1 -> v6 - return v1 -} - - -; -------- U64 -------- - -; complex case (mul, sub, shift, add, shift) -function %t_udiv64_p7(i64) -> i64 { -block0(v0: i64): - v1 = udiv_imm v0, 7 - ; check: iconst.i64 0x2492_4924_9249_2493 - ; check: umulhi v0, v2 - ; check: isub v0, v3 - ; check: ushr_imm v4, 1 - ; check: iadd v5, v3 - ; check: v7 = ushr_imm v6, 2 - ; check: v1 -> v7 - return v1 -} - -; simple case (mul, shift) -function %t_udiv64_p9(i64) -> i64 { -block0(v0: i64): - v1 = udiv_imm v0, 9 - ; check: iconst.i64 0xe38e_38e3_8e38_e38f - ; check: umulhi v0, v2 - ; check: v4 = ushr_imm v3, 3 - ; check: v1 -> v4 - return v1 -} - -; complex case (mul, sub, shift, add, shift) -function %t_udiv64_p125(i64) -> i64 { -block0(v0: i64): - v1 = udiv_imm v0, 125 - ; check: iconst.i64 0x0624_dd2f_1a9f_be77 - ; check: umulhi v0, v2 - ; check: isub v0, v3 - ; check: ushr_imm v4, 1 - ; check: iadd v5, v3 - ; check: v7 = ushr_imm v6, 6 - ; check: v1 -> v7 - return v1 -} - -; simple case w/ shift by zero (mul) -function %t_udiv64_p274177(i64) -> i64 { -block0(v0: i64): - v1 = udiv_imm v0, 274177 - ; check: iconst.i64 0x3d30_f19c_d101 - ; check: v3 = umulhi v0, v2 - ; check: v1 -> v3 - return v1 -} - - -; -------- S64 -------- - -; simple case (mul, shift, add-sign-bit) -function %t_sdiv64_n625(i64) -> i64 { -block0(v0: i64): - v1 = sdiv_imm v0, -625 - ; check: iconst.i64 0xcb92_3a29_c779_a6b5 - ; check: smulhi v0, v2 - ; check: sshr_imm v3, 7 - ; check: ushr_imm v4, 63 - ; check: v6 = iadd v4, v5 - ; check: v1 -> v6 - return v1 -} - -; simple case w/ zero shift (mul, add-sign-bit) -function %t_sdiv64_n6(i64) -> i64 { -block0(v0: i64): - v1 = sdiv_imm v0, -6 - ; check: iconst.i64 0xd555_5555_5555_5555 - ; check: smulhi v0, v2 - ; check: ushr_imm v3, 63 - ; check: v5 = iadd v3, v4 - ; check: v1 -> v5 - return v1 -} - -; simple case w/ zero shift (mul, add-sign-bit) -function %t_sdiv64_n5(i64) -> i64 { -block0(v0: i64): - v1 = sdiv_imm v0, -5 - ; check: iconst.i64 0x9999_9999_9999_9999 - ; check: smulhi v0, v2 - ; check: sshr_imm v3, 1 - ; check: ushr_imm v4, 63 - ; check: v6 = iadd v4, v5 - ; check: v1 -> v6 - return v1 -} - -; case d < 0 && M > 0 (mul, sub, shift, add-sign-bit) -function %t_sdiv64_n3(i64) -> i64 { -block0(v0: i64): - v1 = sdiv_imm v0, -3 - ; check: iconst.i64 0x5555_5555_5555_5555 - ; check: smulhi v0, v2 - ; check: isub v3, v0 - ; check: sshr_imm v4, 1 - ; check: ushr_imm v5, 63 - ; check: v7 = iadd v5, v6 - ; check: v1 -> v7 - return v1 -} - -; simple case w/ zero shift (mul, add-sign-bit) -function %t_sdiv64_p6(i64) -> i64 { -block0(v0: i64): - v1 = sdiv_imm v0, 6 - ; check: iconst.i64 0x2aaa_aaaa_aaaa_aaab - ; check: smulhi v0, v2 - ; check: ushr_imm v3, 63 - ; check: v5 = iadd v3, v4 - ; check: v1 -> v5 - return v1 -} - -; case d > 0 && M < 0 (mul, add, shift, add-sign-bit) -function %t_sdiv64_p15(i64) -> i64 { -block0(v0: i64): - v1 = sdiv_imm v0, 15 - ; check: iconst.i64 0x8888_8888_8888_8889 - ; check: smulhi v0, v2 - ; check: iadd v3, v0 - ; check: sshr_imm v4, 3 - ; check: ushr_imm v5, 63 - ; check: v7 = iadd v5, v6 - ; check: v1 -> v7 - return v1 -} - -; simple case (mul, shift, add-sign-bit) -function %t_sdiv64_p625(i64) -> i64 { -block0(v0: i64): - v1 = sdiv_imm v0, 625 - ; check: iconst.i64 0x346d_c5d6_3886_594b - ; check: smulhi v0, v2 - ; check: sshr_imm v3, 7 - ; check: ushr_imm v4, 63 - ; check: v6 = iadd v4, v5 - ; check: v1 -> v6 - return v1 -} diff --git a/cranelift/filetests/filetests/simple_preopt/div_by_const_power_of_2.clif b/cranelift/filetests/filetests/simple_preopt/div_by_const_power_of_2.clif deleted file mode 100644 index 09e0aa180cb5..000000000000 --- a/cranelift/filetests/filetests/simple_preopt/div_by_const_power_of_2.clif +++ /dev/null @@ -1,293 +0,0 @@ -test simple_preopt -target aarch64 -target i686 baseline - -; -------- U32 -------- - -; ignored -function %t_udiv32_p0(i32) -> i32 { -block0(v0: i32): - v1 = udiv_imm v0, 0 - ; check: udiv_imm v0, 0 - return v1 -} - -; converted to a nop -function %t_udiv32_p1(i32) -> i32 { -block0(v0: i32): - v1 = udiv_imm v0, 1 - ; check: nop - return v1 -} - -; shift -function %t_udiv32_p2(i32) -> i32 { -block0(v0: i32): - v1 = udiv_imm v0, 2 - ; check: ushr_imm v0, 1 - return v1 -} - -; shift -function %t_udiv32_p2p31(i32) -> i32 { -block0(v0: i32): - v1 = udiv_imm v0, 0x8000_0000 - ; check: ushr_imm v0, 31 - return v1 -} - - -; -------- U64 -------- - -; ignored -function %t_udiv64_p0(i64) -> i64 { -block0(v0: i64): - v1 = udiv_imm v0, 0 - ; check: udiv_imm v0, 0 - return v1 -} - -; converted to a nop -function %t_udiv64_p1(i64) -> i64 { -block0(v0: i64): - v1 = udiv_imm v0, 1 - ; check: nop - return v1 -} - -; shift -function %t_udiv64_p2(i64) -> i64 { -block0(v0: i64): - v1 = udiv_imm v0, 2 - ; check: ushr_imm v0, 1 - return v1 -} - -; shift -function %t_udiv64_p2p63(i64) -> i64 { -block0(v0: i64): - v1 = udiv_imm v0, 0x8000_0000_0000_0000 - ; check: ushr_imm v0, 63 - return v1 -} - - -; -------- S32 -------- - -; ignored -function %t_sdiv32_p0(i32) -> i32 { -block0(v0: i32): - v1 = sdiv_imm v0, 0 - ; check: sdiv_imm v0, 0 - return v1 -} - -; converted to a nop -function %t_sdiv32_p1(i32) -> i32 { -block0(v0: i32): - v1 = sdiv_imm v0, 1 - ; check: nop - return v1 -} - -; ignored -function %t_sdiv32_n1(i32) -> i32 { -block0(v0: i32): - v1 = sdiv_imm v0, -1 - ; check: sdiv_imm v0, -1 - return v1 -} - -; shift -function %t_sdiv32_p2(i32) -> i32 { -block0(v0: i32): - v1 = sdiv_imm v0, 2 - ; check: ushr_imm v0, 31 - ; check: iadd v0, v2 - ; check: sshr_imm v3, 1 - ; check: v1 -> v4 - return v1 -} - -; shift -function %t_sdiv32_n2(i32) -> i32 { -block0(v0: i32): - v1 = sdiv_imm v0, -2 - ; check: ushr_imm v0, 31 - ; check: iadd v0, v2 - ; check: sshr_imm v3, 1 - ; check: irsub_imm v4, 0 - return v1 -} - -; shift -function %t_sdiv32_p4(i32) -> i32 { -block0(v0: i32): - v1 = sdiv_imm v0, 4 - ; check: v2 = sshr_imm v0, 1 - ; check: ushr_imm v2, 30 - ; check: iadd v0, v3 - ; check: v5 = sshr_imm v4, 2 - ; check: v1 -> v5 - - return v1 -} - -; shift -function %t_sdiv32_n4(i32) -> i32 { -block0(v0: i32): - v1 = sdiv_imm v0, -4 - ; check: sshr_imm v0, 1 - ; check: ushr_imm v2, 30 - ; check: iadd v0, v3 - ; check: sshr_imm v4, 2 - ; check: irsub_imm v5, 0 - return v1 -} - -; shift -function %t_sdiv32_p2p30(i32) -> i32 { -block0(v0: i32): - v1 = sdiv_imm v0, 0x4000_0000 - ; check: sshr_imm v0, 29 - ; check: ushr_imm v2, 2 - ; check: iadd v0, v3 - ; check: v5 = sshr_imm v4, 30 - ; check: v1 -> v5 - return v1 -} - -; shift -function %t_sdiv32_n2p30(i32) -> i32 { -block0(v0: i32): - v1 = sdiv_imm v0, -0x4000_0000 - ; check: sshr_imm v0, 29 - ; check: ushr_imm v2, 2 - ; check: iadd v0, v3 - ; check: sshr_imm v4, 30 - ; check: irsub_imm v5, 0 - return v1 -} - -; there's no positive version of this, since -(-0x8000_0000) isn't -; representable. -function %t_sdiv32_n2p31(i32) -> i32 { -block0(v0: i32): - v1 = sdiv_imm v0, -0x8000_0000 - ; check: sshr_imm v0, 30 - ; check: ushr_imm v2, 1 - ; check: iadd v0, v3 - ; check: sshr_imm v4, 31 - ; check: irsub_imm v5, 0 - return v1 -} - - -; -------- S64 -------- - -; ignored -function %t_sdiv64_p0(i64) -> i64 { -block0(v0: i64): - v1 = sdiv_imm v0, 0 - ; check: sdiv_imm v0, 0 - return v1 -} - -; converted to a nop -function %t_sdiv64_p1(i64) -> i64 { -block0(v0: i64): - v1 = sdiv_imm v0, 1 - ; check: nop - return v1 -} - -; ignored -function %t_sdiv64_n1(i64) -> i64 { -block0(v0: i64): - v1 = sdiv_imm v0, -1 - ; check: sdiv_imm v0, -1 - return v1 -} - -; shift -function %t_sdiv64_p2(i64) -> i64 { -block0(v0: i64): - v1 = sdiv_imm v0, 2 - ; check: ushr_imm v0, 63 - ; check: iadd v0, v2 - ; check: v4 = sshr_imm v3, 1 - ; check: v1 -> v4 - return v1 -} - -; shift -function %t_sdiv64_n2(i64) -> i64 { -block0(v0: i64): - v1 = sdiv_imm v0, -2 - ; check: ushr_imm v0, 63 - ; check: iadd v0, v2 - ; check: sshr_imm v3, 1 - ; check: irsub_imm v4, 0 - return v1 -} - -; shift -function %t_sdiv64_p4(i64) -> i64 { -block0(v0: i64): - v1 = sdiv_imm v0, 4 - ; check: sshr_imm v0, 1 - ; check: ushr_imm v2, 62 - ; check: iadd v0, v3 - ; check: v5 = sshr_imm v4, 2 - ; check: v1 -> v5 - return v1 -} - -; shift -function %t_sdiv64_n4(i64) -> i64 { -block0(v0: i64): - v1 = sdiv_imm v0, -4 - ; check: sshr_imm v0, 1 - ; check: ushr_imm v2, 62 - ; check: iadd v0, v3 - ; check: sshr_imm v4, 2 - ; check: irsub_imm v5, 0 - return v1 -} - -; shift -function %t_sdiv64_p2p62(i64) -> i64 { -block0(v0: i64): - v1 = sdiv_imm v0, 0x4000_0000_0000_0000 - ; check: sshr_imm v0, 61 - ; check: ushr_imm v2, 2 - ; check: iadd v0, v3 - ; check: v5 = sshr_imm v4, 62 - ; check: v1 -> v5 - return v1 -} - -; shift -function %t_sdiv64_n2p62(i64) -> i64 { -block0(v0: i64): - v1 = sdiv_imm v0, -0x4000_0000_0000_0000 - ; check: sshr_imm v0, 61 - ; check: ushr_imm v2, 2 - ; check: iadd v0, v3 - ; check: sshr_imm v4, 62 - ; check: irsub_imm v5, 0 - return v1 -} - -; there's no positive version of this, since -(-0x8000_0000_0000_0000) isn't -; representable. -function %t_sdiv64_n2p63(i64) -> i64 { -block0(v0: i64): - v1 = sdiv_imm v0, -0x8000_0000_0000_0000 - ; check: sshr_imm v0, 62 - ; check: ushr_imm v2, 1 - ; check: iadd v0, v3 - ; check: sshr_imm v4, 63 - ; check: irsub_imm v5, 0 - return v1 -} diff --git a/cranelift/filetests/filetests/simple_preopt/do_not_reorder_instructions_when_transplanting.clif b/cranelift/filetests/filetests/simple_preopt/do_not_reorder_instructions_when_transplanting.clif deleted file mode 100644 index 90517d2288d4..000000000000 --- a/cranelift/filetests/filetests/simple_preopt/do_not_reorder_instructions_when_transplanting.clif +++ /dev/null @@ -1,23 +0,0 @@ -test simple_preopt -target aarch64 -target x86_64 - -;; Test that although v5 can be replaced with v1, we don't transplant `load.i32 -;; v0` on top of `iadd v3, v4`, because that would move the load past other uses -;; of its result. - -function %foo(i64) -> i32 { -block0(v0: i64): - v1 = load.i32 v0 - v2 = iconst.i32 16 - v3 = iadd_imm v1, -16 - v4 = iconst.i32 16 - v5 = iadd v3, v4 - ; check: v1 = load.i32 v0 - ; nextln: v5 -> v1 - ; nextln: v2 = iconst.i32 16 - ; nextln: v3 = iadd_imm v1, -16 - ; nextln: v4 = iconst.i32 16 - ; nextln: nop - return v5 -} diff --git a/cranelift/filetests/filetests/simple_preopt/fold-extended-move-wraparound.clif b/cranelift/filetests/filetests/simple_preopt/fold-extended-move-wraparound.clif deleted file mode 100644 index 13d77d7cfa53..000000000000 --- a/cranelift/filetests/filetests/simple_preopt/fold-extended-move-wraparound.clif +++ /dev/null @@ -1,15 +0,0 @@ -test simple_preopt -target aarch64 -target x86_64 - -function %wraparound(i64 vmctx) -> f32 system_v { - gv0 = vmctx - gv1 = iadd_imm.i64 gv0, 48 - -block35(v0: i64): - v88 = iconst.i64 0 - v89 = iconst.i64 0x8000_0000_0000_0000 - v90 = ishl_imm v88, 0x8000_0000_0000_0000 - v91 = sshr v90, v89; check: sshr_imm v90, 0x8000_0000_0000_0000 - trap user0 -} diff --git a/cranelift/filetests/filetests/simple_preopt/i128.clif b/cranelift/filetests/filetests/simple_preopt/i128.clif deleted file mode 100644 index b3bc2d666916..000000000000 --- a/cranelift/filetests/filetests/simple_preopt/i128.clif +++ /dev/null @@ -1,28 +0,0 @@ -test simple_preopt -target aarch64 -target x86_64 -target s390x -target riscv64 - -function %imul_imm_zero(i128) -> i128 { -block0(v0: i128): - v1 = imul_imm v0, 0 - return v1 -} -; sameln: function %imul_imm_zero -; nextln: block0(v0: i128): -; nextln: v1 = imul_imm v0, 0 -; nextln: return v1 -; nextln: } - - -function %band_imm_zero(i128) -> i128 { -block0(v0: i128): - v1 = band_imm v0, 0 - return v1 -} -; check: function %band_imm_zero -; nextln: block0(v0: i128): -; nextln: v1 = band_imm v0, 0 -; nextln: return v1 -; nextln: } diff --git a/cranelift/filetests/filetests/simple_preopt/rem_by_const_non_power_of_2.clif b/cranelift/filetests/filetests/simple_preopt/rem_by_const_non_power_of_2.clif deleted file mode 100644 index a7cd49246ed3..000000000000 --- a/cranelift/filetests/filetests/simple_preopt/rem_by_const_non_power_of_2.clif +++ /dev/null @@ -1,286 +0,0 @@ -test simple_preopt -target aarch64 -target i686 baseline - -; -------- U32 -------- - -; complex case (mul, sub, shift, add, shift) -function %t_urem32_p7(i32) -> i32 { -block0(v0: i32): - v1 = urem_imm v0, 7 - ; check: iconst.i32 0x2492_4925 - ; check: umulhi v0, v2 - ; check: isub v0, v3 - ; check: ushr_imm v4, 1 - ; check: iadd v5, v3 - ; check: ushr_imm v6, 2 - ; check: imul_imm v7, 7 - ; check: isub v0, v8 - return v1 -} - -; simple case (mul, shift) -function %t_urem32_p125(i32) -> i32 { -block0(v0: i32): - v1 = urem_imm v0, 125 - ; check: iconst.i32 0x1062_4dd3 - ; check: umulhi v0, v2 - ; check: ushr_imm v3, 3 - ; check: imul_imm v4, 125 - ; check: isub v0, v5 - return v1 -} - -; simple case w/ shift by zero (mul) -function %t_urem32_p641(i32) -> i32 { -block0(v0: i32): - v1 = urem_imm v0, 641 - ; check: iconst.i32 0x0066_3d81 - ; check: umulhi v0, v2 - ; check: imul_imm v3, 641 - ; check: isub v0, v4 - return v1 -} - - -; -------- S32 -------- - -; simple case w/ shift by zero (mul, add-sign-bit) -function %t_srem32_n6(i32) -> i32 { -block0(v0: i32): - v1 = srem_imm v0, -6 - ; check: iconst.i32 0xffff_ffff_d555_5555 - ; check: smulhi v0, v2 - ; check: ushr_imm v3, 31 - ; check: iadd v3, v4 - ; check: imul_imm v5, -6 - ; check: isub v0, v6 - return v1 -} - -; simple case (mul, shift, add-sign-bit) -function %t_srem32_n5(i32) -> i32 { -block0(v0: i32): - v1 = srem_imm v0, -5 - ; check: iconst.i32 0xffff_ffff_9999_9999 - ; check: smulhi v0, v2 - ; check: sshr_imm v3, 1 - ; check: ushr_imm v4, 31 - ; check: iadd v4, v5 - ; check: imul_imm v6, -5 - ; check: isub v0, v7 - return v1 -} - -; case d < 0 && M > 0 (mul, sub, shift, add-sign-bit) -function %t_srem32_n3(i32) -> i32 { -block0(v0: i32): - v1 = srem_imm v0, -3 - ; check: iconst.i32 0x5555_5555 - ; check: smulhi v0, v2 - ; check: isub v3, v0 - ; check: sshr_imm v4, 1 - ; check: ushr_imm v5, 31 - ; check: iadd v5, v6 - ; check: imul_imm v7, -3 - ; check: isub v0, v8 - return v1 -} - -; simple case w/ shift by zero (mul, add-sign-bit) -function %t_srem32_p6(i32) -> i32 { -block0(v0: i32): - v1 = srem_imm v0, 6 - ; check: iconst.i32 0x2aaa_aaab - ; check: smulhi v0, v2 - ; check: ushr_imm v3, 31 - ; check: iadd v3, v4 - ; check: imul_imm v5, 6 - ; check: isub v0, v6 - return v1 -} - -; case d > 0 && M < 0 (mull, add, shift, add-sign-bit) -function %t_srem32_p7(i32) -> i32 { -block0(v0: i32): - v1 = srem_imm v0, 7 - ; check: iconst.i32 0xffff_ffff_9249_2493 - ; check: smulhi v0, v2 - ; check: iadd v3, v0 - ; check: sshr_imm v4, 2 - ; check: ushr_imm v5, 31 - ; check: iadd v5, v6 - ; check: imul_imm v7, 7 - ; check: isub v0, v8 - return v1 -} - -; simple case (mul, shift, add-sign-bit) -function %t_srem32_p625(i32) -> i32 { -block0(v0: i32): - v1 = srem_imm v0, 625 - ; check: iconst.i32 0x68db_8bad - ; check: smulhi v0, v2 - ; check: sshr_imm v3, 8 - ; check: ushr_imm v4, 31 - ; check: iadd v4, v5 - ; check: imul_imm v6, 625 - ; check: isub v0, v7 - return v1 -} - - -; -------- U64 -------- - -; complex case (mul, sub, shift, add, shift) -function %t_urem64_p7(i64) -> i64 { -block0(v0: i64): - v1 = urem_imm v0, 7 - ; check: umulhi v0, v2 - ; check: isub v0, v3 - ; check: ushr_imm v4, 1 - ; check: iadd v5, v3 - ; check: ushr_imm v6, 2 - ; check: imul_imm v7, 7 - ; check: isub v0, v8 - return v1 -} - -; simple case (mul, shift) -function %t_urem64_p9(i64) -> i64 { -block0(v0: i64): - v1 = urem_imm v0, 9 - ; check: iconst.i64 0xe38e_38e3_8e38_e38f - ; check: umulhi v0, v2 - ; check: ushr_imm v3, 3 - ; check: imul_imm v4, 9 - ; check: isub v0, v5 - return v1 -} - -; complex case (mul, sub, shift, add, shift) -function %t_urem64_p125(i64) -> i64 { -block0(v0: i64): - v1 = urem_imm v0, 125 - ; check: iconst.i64 0x0624_dd2f_1a9f_be77 - ; check: umulhi v0, v2 - ; check: isub v0, v3 - ; check: ushr_imm v4, 1 - ; check: iadd v5, v3 - ; check: ushr_imm v6, 6 - ; check: imul_imm v7, 125 - ; check: isub v0, v8 - return v1 -} - -; simple case w/ shift by zero (mul) -function %t_urem64_p274177(i64) -> i64 { -block0(v0: i64): - v1 = urem_imm v0, 274177 - ; check: iconst.i64 0x3d30_f19c_d101 - ; check: umulhi v0, v2 - ; check: imul_imm v3, 0x0004_2f01 - ; check: isub v0, v4 - return v1 -} - - -; -------- S64 -------- - -; simple case (mul, shift, add-sign-bit) -function %t_srem64_n625(i64) -> i64 { -block0(v0: i64): - v1 = srem_imm v0, -625 - ; check: iconst.i64 0xcb92_3a29_c779_a6b5 - ; check: smulhi v0, v2 - ; check: sshr_imm v3, 7 - ; check: ushr_imm v4, 63 - ; check: iadd v4, v5 - ; check: imul_imm v6, -625 - ; check: isub v0, v7 - return v1 -} - -; simple case w/ zero shift (mul, add-sign-bit) -function %t_srem64_n6(i64) -> i64 { -block0(v0: i64): - v1 = srem_imm v0, -6 - ; check: iconst.i64 0xd555_5555_5555_5555 - ; check: smulhi v0, v2 - ; check: ushr_imm v3, 63 - ; check: iadd v3, v4 - ; check: imul_imm v5, -6 - ; check: isub v0, v6 - return v1 -} - -; simple case w/ zero shift (mul, add-sign-bit) -function %t_srem64_n5(i64) -> i64 { -block0(v0: i64): - v1 = srem_imm v0, -5 - ; check: iconst.i64 0x9999_9999_9999_9999 - ; check: smulhi v0, v2 - ; check: sshr_imm v3, 1 - ; check: ushr_imm v4, 63 - ; check: iadd v4, v5 - ; check: imul_imm v6, -5 - ; check: isub v0, v7 - return v1 -} - -; case d < 0 && M > 0 (mul, sub, shift, add-sign-bit) -function %t_srem64_n3(i64) -> i64 { -block0(v0: i64): - v1 = srem_imm v0, -3 - ; check: iconst.i64 0x5555_5555_5555_5555 - ; check: smulhi v0, v2 - ; check: isub v3, v0 - ; check: sshr_imm v4, 1 - ; check: ushr_imm v5, 63 - ; check: iadd v5, v6 - ; check: imul_imm v7, -3 - ; check: isub v0, v8 - return v1 -} - -; simple case w/ zero shift (mul, add-sign-bit) -function %t_srem64_p6(i64) -> i64 { -block0(v0: i64): - v1 = srem_imm v0, 6 - ; check: iconst.i64 0x2aaa_aaaa_aaaa_aaab - ; check: smulhi v0, v2 - ; check: ushr_imm v3, 63 - ; check: iadd v3, v4 - ; check: imul_imm v5, 6 - ; check: isub v0, v6 - return v1 -} - -; case d > 0 && M < 0 (mul, add, shift, add-sign-bit) -function %t_srem64_p15(i64) -> i64 { -block0(v0: i64): - v1 = srem_imm v0, 15 - ; check: iconst.i64 0x8888_8888_8888_8889 - ; check: smulhi v0, v2 - ; check: iadd v3, v0 - ; check: sshr_imm v4, 3 - ; check: ushr_imm v5, 63 - ; check: iadd v5, v6 - ; check: imul_imm v7, 15 - ; check: isub v0, v8 - return v1 -} - -; simple case (mul, shift, add-sign-bit) -function %t_srem64_p625(i64) -> i64 { -block0(v0: i64): - v1 = srem_imm v0, 625 - ; check: iconst.i64 0x346d_c5d6_3886_594b - ; check: smulhi v0, v2 - ; check: sshr_imm v3, 7 - ; check: ushr_imm v4, 63 - ; check: iadd v4, v5 - ; check: imul_imm v6, 625 - ; check: isub v0, v7 - return v1 -} diff --git a/cranelift/filetests/filetests/simple_preopt/rem_by_const_power_of_2.clif b/cranelift/filetests/filetests/simple_preopt/rem_by_const_power_of_2.clif deleted file mode 100644 index 19cc5e82b523..000000000000 --- a/cranelift/filetests/filetests/simple_preopt/rem_by_const_power_of_2.clif +++ /dev/null @@ -1,292 +0,0 @@ -test simple_preopt -target aarch64 -target i686 baseline - -; -------- U32 -------- - -; ignored -function %t_urem32_p0(i32) -> i32 { -block0(v0: i32): - v1 = urem_imm v0, 0 - ; check: urem_imm v0, 0 - return v1 -} - -; converted to constant zero -function %t_urem32_p1(i32) -> i32 { -block0(v0: i32): - v1 = urem_imm v0, 1 - ; check: iconst.i32 0 - return v1 -} - -; shift -function %t_urem32_p2(i32) -> i32 { -block0(v0: i32): - v1 = urem_imm v0, 2 - ; check: band_imm v0, 1 - return v1 -} - -; shift -function %t_urem32_p2p31(i32) -> i32 { -block0(v0: i32): - v1 = urem_imm v0, 0x8000_0000 - ; check: band_imm v0, 0x7fff_ffff - return v1 -} - - -; -------- U64 -------- - -; ignored -function %t_urem64_p0(i64) -> i64 { -block0(v0: i64): - v1 = urem_imm v0, 0 - ; check: urem_imm v0, 0 - return v1 -} - -; converted to constant zero -function %t_urem64_p1(i64) -> i64 { -block0(v0: i64): - v1 = urem_imm v0, 1 - ; check: iconst.i64 0 - return v1 -} - -; shift -function %t_urem64_p2(i64) -> i64 { -block0(v0: i64): - v1 = urem_imm v0, 2 - ; check: band_imm v0, 1 - return v1 -} - -; shift -function %t_urem64_p2p63(i64) -> i64 { -block0(v0: i64): - v1 = urem_imm v0, 0x8000_0000_0000_0000 - ; check: band_imm v0, 0x7fff_ffff_ffff_ffff - return v1 -} - - -; -------- S32 -------- - -; ignored -function %t_srem32_n1(i32) -> i32 { -block0(v0: i32): - v1 = srem_imm v0, -1 - ; check: srem_imm v0, -1 - return v1 -} - -; ignored -function %t_srem32_p0(i32) -> i32 { -block0(v0: i32): - v1 = srem_imm v0, 0 - ; check: srem_imm v0, 0 - return v1 -} - -; converted to constant zero -function %t_srem32_p1(i32) -> i32 { -block0(v0: i32): - v1 = srem_imm v0, 1 - ; check: iconst.i32 0 - return v1 -} - -; shift -function %t_srem32_p2(i32) -> i32 { -block0(v0: i32): - v1 = srem_imm v0, 2 - ; check: ushr_imm v0, 31 - ; check: iadd v0, v2 - ; check: band_imm v3, -2 - ; check: isub v0, v4 - return v1 -} - -; shift -function %t_srem32_n2(i32) -> i32 { -block0(v0: i32): - v1 = srem_imm v0, -2 - ; check: ushr_imm v0, 31 - ; check: iadd v0, v2 - ; check: band_imm v3, -2 - ; check: isub v0, v4 - return v1 -} - -; shift -function %t_srem32_p4(i32) -> i32 { -block0(v0: i32): - v1 = srem_imm v0, 4 - ; check: sshr_imm v0, 1 - ; check: ushr_imm v2, 30 - ; check: iadd v0, v3 - ; check: band_imm v4, -4 - ; check: isub v0, v5 - return v1 -} - -; shift -function %t_srem32_n4(i32) -> i32 { -block0(v0: i32): - v1 = srem_imm v0, -4 - ; check: sshr_imm v0, 1 - ; check: ushr_imm v2, 30 - ; check: iadd v0, v3 - ; check: band_imm v4, -4 - ; check: isub v0, v5 - return v1 -} - -; shift -function %t_srem32_p2p30(i32) -> i32 { -block0(v0: i32): - v1 = srem_imm v0, 0x4000_0000 - ; check: sshr_imm v0, 29 - ; check: ushr_imm v2, 2 - ; check: iadd v0, v3 - ; check: band_imm v4, 0xffff_ffff_c000_0000 - ; check: isub v0, v5 - return v1 -} - -; shift -function %t_srem32_n2p30(i32) -> i32 { -block0(v0: i32): - v1 = srem_imm v0, -0x4000_0000 - ; check: sshr_imm v0, 29 - ; check: ushr_imm v2, 2 - ; check: iadd v0, v3 - ; check: band_imm v4, 0xffff_ffff_c000_0000 - ; check: isub v0, v5 - return v1 -} - -; there's no positive version of this, since -(-0x8000_0000) isn't -; representable. -function %t_srem32_n2p31(i32) -> i32 { -block0(v0: i32): - v1 = srem_imm v0, -0x8000_0000 - ; check: sshr_imm v0, 30 - ; check: ushr_imm v2, 1 - ; check: iadd v0, v3 - ; check: band_imm v4, 0xffff_ffff_8000_0000 - ; check: isub v0, v5 - return v1 -} - - -; -------- S64 -------- - -; ignored -function %t_srem64_n1(i64) -> i64 { -block0(v0: i64): - v1 = srem_imm v0, -1 - ; check: srem_imm v0, -1 - return v1 -} - -; ignored -function %t_srem64_p0(i64) -> i64 { -block0(v0: i64): - v1 = srem_imm v0, 0 - ; check: srem_imm v0, 0 - return v1 -} - -; converted to constant zero -function %t_srem64_p1(i64) -> i64 { -block0(v0: i64): - v1 = srem_imm v0, 1 - ; check: iconst.i64 0 - return v1 -} - -; shift -function %t_srem64_p2(i64) -> i64 { -block0(v0: i64): - v1 = srem_imm v0, 2 - ; check: ushr_imm v0, 63 - ; check: iadd v0, v2 - ; check: band_imm v3, -2 - ; check: isub v0, v4 - return v1 -} - -; shift -function %t_srem64_n2(i64) -> i64 { -block0(v0: i64): - v1 = srem_imm v0, -2 - ; check: ushr_imm v0, 63 - ; check: iadd v0, v2 - ; check: band_imm v3, -2 - ; check: isub v0, v4 - return v1 -} - -; shift -function %t_srem64_p4(i64) -> i64 { -block0(v0: i64): - v1 = srem_imm v0, 4 - ; check: sshr_imm v0, 1 - ; check: ushr_imm v2, 62 - ; check: iadd v0, v3 - ; check: band_imm v4, -4 - ; check: isub v0, v5 - return v1 -} - -; shift -function %t_srem64_n4(i64) -> i64 { -block0(v0: i64): - v1 = srem_imm v0, -4 - ; check: sshr_imm v0, 1 - ; check: ushr_imm v2, 62 - ; check: iadd v0, v3 - ; check: band_imm v4, -4 - ; check: isub v0, v5 - return v1 -} - -; shift -function %t_srem64_p2p62(i64) -> i64 { -block0(v0: i64): - v1 = srem_imm v0, 0x4000_0000_0000_0000 - ; check: sshr_imm v0, 61 - ; check: ushr_imm v2, 2 - ; check: iadd v0, v3 - ; check: band_imm v4, 0xc000_0000_0000_0000 - ; check: isub v0, v5 - return v1 -} - -; shift -function %t_srem64_n2p62(i64) -> i64 { -block0(v0: i64): - v1 = srem_imm v0, -0x4000_0000_0000_0000 - ; check: sshr_imm v0, 61 - ; check: ushr_imm v2, 2 - ; check: iadd v0, v3 - ; check: band_imm v4, 0xc000_0000_0000_0000 - ; check: isub v0, v5 - return v1 -} - -; there's no positive version of this, since -(-0x8000_0000_0000_0000) isn't -; representable. -function %t_srem64_n2p63(i64) -> i64 { -block0(v0: i64): - v1 = srem_imm v0, -0x8000_0000_0000_0000 - ; check: sshr_imm v0, 62 - ; check: ushr_imm v2, 1 - ; check: iadd v0, v3 - ; check: band_imm v4, 0x8000_0000_0000_0000 - ; check: isub v0, v5 - return v1 -} diff --git a/cranelift/filetests/filetests/simple_preopt/replace_branching_instructions_and_cfg_predecessors.clif b/cranelift/filetests/filetests/simple_preopt/replace_branching_instructions_and_cfg_predecessors.clif deleted file mode 100644 index 89a576fab635..000000000000 --- a/cranelift/filetests/filetests/simple_preopt/replace_branching_instructions_and_cfg_predecessors.clif +++ /dev/null @@ -1,18 +0,0 @@ -test simple_preopt -target aarch64 -target x86_64 - -function u0:2(i64 , i64) { - block0(v0: i64, v1: i64): - v18 = load.i32 v0 - v19 = iconst.i32 4 - v20 = icmp ne v18, v19 - v21 = uextend.i32 v20 - brif v21, block2, block4 - block4: - jump block1 - block2: - jump block1 - block1: - return -} diff --git a/cranelift/filetests/filetests/simple_preopt/sign_extend.clif b/cranelift/filetests/filetests/simple_preopt/sign_extend.clif deleted file mode 100644 index 6fccf8553e62..000000000000 --- a/cranelift/filetests/filetests/simple_preopt/sign_extend.clif +++ /dev/null @@ -1,40 +0,0 @@ -test simple_preopt -target aarch64 -target x86_64 - -;; Tests for sign-extending immediates. - -function %sign_extend_signed_icmp(i8) -> i8 { -block0(v0: i8): - ; 255 = -1 as u8 - v1 = iconst.i8 255 - v2 = icmp sge v0, v1 - ; check: v2 = icmp_imm sge v0, -1 - return v2 -} - -function %do_not_sign_extend_unsigned_icmp(i8) -> i8 { -block0(v0: i8): - v1 = iconst.i8 255 - v2 = icmp uge v0, v1 - ; check: v2 = icmp_imm uge v0, 255 - return v2 -} - -function %sign_extend_sdiv(i8) -> i8 { -block0(v0: i8): - ; 255 = -1 as u8 - v1 = iconst.i8 255 - v2 = sdiv v0, v1 - ; check: v2 = sdiv_imm v0, -1 - return v2 -} - -function %sign_extend_srem(i8) -> i8 { -block0(v0: i8): - ; 255 = -1 as u8 - v1 = iconst.i8 255 - v2 = srem v0, v1 - ; check: v2 = srem_imm v0, -1 - return v2 -} diff --git a/cranelift/filetests/filetests/simple_preopt/simplify32.clif b/cranelift/filetests/filetests/simple_preopt/simplify32.clif deleted file mode 100644 index 80fb1363e5d3..000000000000 --- a/cranelift/filetests/filetests/simple_preopt/simplify32.clif +++ /dev/null @@ -1,62 +0,0 @@ -test simple_preopt -target aarch64 -target i686 - -;; 32-bits platforms. - -function %iadd_imm(i32) -> i32 { -block0(v0: i32): - v1 = iconst.i32 2 - v2 = iadd v0, v1 - return v2 -} -; sameln: function %iadd_imm -; nextln: block0(v0: i32): -; nextln: v1 = iconst.i32 2 -; nextln: v2 = iadd_imm v0, 2 -; nextln: return v2 -; nextln: } - -function %isub_imm(i32) -> i32 { -block0(v0: i32): - v1 = iconst.i32 2 - v2 = isub v0, v1 - return v2 -} -; sameln: function %isub_imm -; nextln: block0(v0: i32): -; nextln: v1 = iconst.i32 2 -; nextln: v2 = iadd_imm v0, -2 -; nextln: return v2 -; nextln: } - -function %icmp_imm(i32) -> i32 { -block0(v0: i32): - v1 = iconst.i32 2 - v2 = icmp slt v0, v1 - v3 = uextend.i32 v2 - return v3 -} -; sameln: function %icmp_imm -; nextln: block0(v0: i32): -; nextln: v1 = iconst.i32 2 -; nextln: v2 = icmp_imm slt v0, 2 -; nextln: v3 = uextend.i32 v2 -; nextln: return v3 -; nextln: } - -;; Don't simplify operations that would get illegal because of lack of native -;; support. -function %iadd_imm(i64) -> i64 { -block0(v0: i64): - v1 = iconst.i64 2 - v2 = iadd v0, v1 - return v2 -} -; sameln: function %iadd_imm -; nextln: block0(v0: i64): -; nextln: v1 = iconst.i64 2 -; nextln: v2 = iadd v0, v1 -; nextln: return v2 -; nextln: } - diff --git a/cranelift/filetests/filetests/simple_preopt/simplify64.clif b/cranelift/filetests/filetests/simple_preopt/simplify64.clif deleted file mode 100644 index dc850f715ecc..000000000000 --- a/cranelift/filetests/filetests/simple_preopt/simplify64.clif +++ /dev/null @@ -1,294 +0,0 @@ -test simple_preopt -target aarch64 -target x86_64 - -;; 64-bits platforms. - -function %iadd_imm(i32) -> i32 { -block0(v0: i32): - v1 = iconst.i32 2 - v2 = iadd v0, v1 - return v2 -} -; sameln: function %iadd_imm -; nextln: block0(v0: i32): -; nextln: v1 = iconst.i32 2 -; nextln: v2 = iadd_imm v0, 2 -; nextln: return v2 -; nextln: } - -function %isub_imm(i32) -> i32 { -block0(v0: i32): - v1 = iconst.i32 2 - v2 = isub v0, v1 - return v2 -} -; sameln: function %isub_imm -; nextln: block0(v0: i32): -; nextln: v1 = iconst.i32 2 -; nextln: v2 = iadd_imm v0, -2 -; nextln: return v2 -; nextln: } - -function %icmp_imm(i32) -> i32 { -block0(v0: i32): - v1 = iconst.i32 2 - v2 = icmp slt v0, v1 - v3 = uextend.i32 v2 - return v3 -} -; sameln: function %icmp_imm -; nextln: block0(v0: i32): -; nextln: v1 = iconst.i32 2 -; nextln: v2 = icmp_imm slt v0, 2 -; nextln: v3 = uextend.i32 v2 -; nextln: return v3 -; nextln: } - -function %brif_false_uextend(i32) { -block0(v0: i32): - v3 = icmp_imm slt v0, 0 - v1 = uextend.i32 v3 - v2 = select v1, v1, v1 - trapz v1, user0 - brif v1, block2, block1 - -block1: - return - -block2: - return -} -; sameln: function %brif_false_uextend -; nextln: (v0: i32): -; nextln: v3 = icmp_imm slt v0, 0 -; nextln: v1 = uextend.i32 v3 -; nextln: v2 = select v1, v1, v1 -; nextln: trapz v1, user0 -; nextln: brif v1, block2, block1 - -function %irsub_imm(i32) -> i32 { -block0(v0: i32): - v1 = iconst.i32 2 - v2 = isub v1, v0 - return v2 -} -; sameln: function %irsub_imm -; nextln: block0(v0: i32): -; nextln: v1 = iconst.i32 2 -; nextln: v2 = irsub_imm v0, 2 -; nextln: return v2 -; nextln: } - -;; Sign-extensions. - -;; 8 -> 16 -function %uextend_8_16() -> i16 { -block0: - v0 = iconst.i16 37 - v1 = ishl_imm v0, 8 - v2 = ushr_imm v1, 8 - return v2 -} -; sameln: function %uextend_8_16 -; nextln: block0: -; nextln: v0 = iconst.i16 37 -; nextln: v1 = ishl_imm v0, 8 -; nextln: v3 = ireduce.i8 v0 -; nextln: v2 = uextend.i16 v3 -; nextln: return v2 -; nextln: } - -function %sextend_8_16() -> i16 { -block0: - v0 = iconst.i16 37 - v1 = ishl_imm v0, 8 - v2 = sshr_imm v1, 8 - return v2 -} -; sameln: function %sextend_8_16 -; nextln: block0: -; nextln: v0 = iconst.i16 37 -; nextln: v1 = ishl_imm v0, 8 -; nextln: v3 = ireduce.i8 v0 -; nextln: v2 = sextend.i16 v3 -; nextln: return v2 -; nextln: } - -;; 8 -> 32 -function %uextend_8_32() -> i32 { -block0: - v0 = iconst.i32 37 - v1 = ishl_imm v0, 24 - v2 = ushr_imm v1, 24 - return v2 -} -; sameln: function %uextend_8_32 -; nextln: block0: -; nextln: v0 = iconst.i32 37 -; nextln: v1 = ishl_imm v0, 24 -; nextln: v3 = ireduce.i8 v0 -; nextln: v2 = uextend.i32 v3 -; nextln: return v2 -; nextln: } - -function %sextend_8_32() -> i32 { -block0: - v0 = iconst.i32 37 - v1 = ishl_imm v0, 24 - v2 = sshr_imm v1, 24 - return v2 -} -; sameln: function %sextend_8_32 -; nextln: block0: -; nextln: v0 = iconst.i32 37 -; nextln: v1 = ishl_imm v0, 24 -; nextln: v3 = ireduce.i8 v0 -; nextln: v2 = sextend.i32 v3 -; nextln: return v2 -; nextln: } - -;; 16 -> 32 -function %uextend_16_32() -> i32 { -block0: - v0 = iconst.i32 37 - v1 = ishl_imm v0, 16 - v2 = ushr_imm v1, 16 - return v2 -} -; sameln: function %uextend_16_32 -; nextln: block0: -; nextln: v0 = iconst.i32 37 -; nextln: v1 = ishl_imm v0, 16 -; nextln: v3 = ireduce.i16 v0 -; nextln: v2 = uextend.i32 v3 -; nextln: return v2 -; nextln: } - -function %sextend_16_32() -> i32 { -block0: - v0 = iconst.i32 37 - v1 = ishl_imm v0, 16 - v2 = sshr_imm v1, 16 - return v2 -} -; sameln: function %sextend_16_32 -; nextln: block0: -; nextln: v0 = iconst.i32 37 -; nextln: v1 = ishl_imm v0, 16 -; nextln: v3 = ireduce.i16 v0 -; nextln: v2 = sextend.i32 v3 -; nextln: return v2 -; nextln: } - -;; 8 -> 64 -function %uextend_8_64() -> i64 { -block0: - v0 = iconst.i64 37 - v1 = ishl_imm v0, 56 - v2 = ushr_imm v1, 56 - return v2 -} -; sameln: function %uextend_8_64 -; nextln: block0: -; nextln: v0 = iconst.i64 37 -; nextln: v1 = ishl_imm v0, 56 -; nextln: v3 = ireduce.i8 v0 -; nextln: v2 = uextend.i64 v3 -; nextln: return v2 -; nextln: } - -function %sextend_8_64() -> i64 { -block0: - v0 = iconst.i64 37 - v1 = ishl_imm v0, 56 - v2 = sshr_imm v1, 56 - return v2 -} -; sameln: function %sextend_8_64 -; nextln: block0: -; nextln: v0 = iconst.i64 37 -; nextln: v1 = ishl_imm v0, 56 -; nextln: v3 = ireduce.i8 v0 -; nextln: v2 = sextend.i64 v3 -; nextln: return v2 -; nextln: } - -;; 16 -> 64 -function %uextend_16_64() -> i64 { -block0: - v0 = iconst.i64 37 - v1 = ishl_imm v0, 48 - v2 = ushr_imm v1, 48 - return v2 -} -; sameln: function %uextend_16_64 -; nextln: block0: -; nextln: v0 = iconst.i64 37 -; nextln: v1 = ishl_imm v0, 48 -; nextln: v3 = ireduce.i16 v0 -; nextln: v2 = uextend.i64 v3 -; nextln: return v2 -; nextln: } - -function %sextend_16_64() -> i64 { -block0: - v0 = iconst.i64 37 - v1 = ishl_imm v0, 48 - v2 = sshr_imm v1, 48 - return v2 -} -; sameln: function %sextend_16_64 -; nextln: block0: -; nextln: v0 = iconst.i64 37 -; nextln: v1 = ishl_imm v0, 48 -; nextln: v3 = ireduce.i16 v0 -; nextln: v2 = sextend.i64 v3 -; nextln: return v2 -; nextln: } - -;; 32 -> 64 -function %uextend_32_64() -> i64 { -block0: - v0 = iconst.i64 37 - v1 = ishl_imm v0, 32 - v2 = ushr_imm v1, 32 - return v2 -} -; sameln: function %uextend_32_64 -; nextln: block0: -; nextln: v0 = iconst.i64 37 -; nextln: v1 = ishl_imm v0, 32 -; nextln: v3 = ireduce.i32 v0 -; nextln: v2 = uextend.i64 v3 -; nextln: return v2 -; nextln: } - -function %sextend_32_64() -> i64 { -block0: - v0 = iconst.i64 37 - v1 = ishl_imm v0, 32 - v2 = sshr_imm v1, 32 - return v2 -} -; sameln: function %sextend_32_64 -; nextln: block0: -; nextln: v0 = iconst.i64 37 -; nextln: v1 = ishl_imm v0, 32 -; nextln: v3 = ireduce.i32 v0 -; nextln: v2 = sextend.i64 v3 -; nextln: return v2 -; nextln: } - -function %add_imm_fold(i32) -> i32 { -block0(v0: i32): - v1 = iadd_imm v0, 42 - v2 = iadd_imm v1, -42 - return v2 -} -; sameln: function %add_imm_fold(i32) -; nextln: block0(v0: i32): -; nextln: v2 -> v0 -; nextln: v1 = iadd_imm v0, 42 -; nextln: nop -; nextln: return v2 diff --git a/cranelift/filetests/filetests/wasm/duplicate-loads-dynamic-memory-egraph.wat b/cranelift/filetests/filetests/wasm/duplicate-loads-dynamic-memory-egraph.wat deleted file mode 100644 index 73aaccd829e4..000000000000 --- a/cranelift/filetests/filetests/wasm/duplicate-loads-dynamic-memory-egraph.wat +++ /dev/null @@ -1,88 +0,0 @@ -;;! target = "x86_64" -;;! -;;! optimize = true -;;! -;;! settings = [ -;;! "enable_heap_access_spectre_mitigation=true", -;;! "opt_level=speed_and_size", -;;! "use_egraphs=true" -;;! ] -;;! -;;! [globals.vmctx] -;;! type = "i64" -;;! vmctx = true -;;! -;;! [globals.heap_base] -;;! type = "i64" -;;! load = { base = "vmctx", offset = 0 } -;;! -;;! [globals.heap_bound] -;;! type = "i64" -;;! load = { base = "vmctx", offset = 8 } -;;! -;;! [[heaps]] -;;! base = "heap_base" -;;! min_size = 0 -;;! offset_guard_size = 0xffffffff -;;! index_type = "i32" -;;! style = { kind = "dynamic", bound = "heap_bound" } - -(module - (memory (export "memory") 0) - (func (export "load-without-offset") (param i32) (result i32 i32) - local.get 0 - i32.load - local.get 0 - i32.load - ) - (func (export "load-with-offset") (param i32) (result i32 i32) - local.get 0 - i32.load offset=1234 - local.get 0 - i32.load offset=1234 - ) -) - -;; function u0:0(i32, i64 vmctx) -> i32, i32 fast { -;; gv0 = vmctx -;; gv1 = load.i64 notrap aligned gv0+8 -;; gv2 = load.i64 notrap aligned gv0 -;; -;; block0(v0: i32, v1: i64): -;; @0057 v5 = load.i64 notrap aligned v1+8 -;; @0057 v7 = load.i64 notrap aligned v1 -;; @0057 v4 = uextend.i64 v0 -;; @0057 v6 = icmp ugt v4, v5 -;; @0057 v9 = iconst.i64 0 -;; @0057 v8 = iadd v7, v4 -;; @0057 v10 = select_spectre_guard v6, v9, v8 ; v9 = 0 -;; @0057 v11 = load.i32 little heap v10 -;; v2 -> v11 -;; @005f jump block1 -;; -;; block1: -;; @005f return v11, v11 -;; } -;; -;; function u0:1(i32, i64 vmctx) -> i32, i32 fast { -;; gv0 = vmctx -;; gv1 = load.i64 notrap aligned gv0+8 -;; gv2 = load.i64 notrap aligned gv0 -;; -;; block0(v0: i32, v1: i64): -;; @0064 v5 = load.i64 notrap aligned v1+8 -;; @0064 v7 = load.i64 notrap aligned v1 -;; @0064 v4 = uextend.i64 v0 -;; @0064 v6 = icmp ugt v4, v5 -;; @0064 v10 = iconst.i64 0 -;; @0064 v8 = iadd v7, v4 -;; v22 = iconst.i64 1234 -;; @0064 v9 = iadd v8, v22 ; v22 = 1234 -;; @0064 v11 = select_spectre_guard v6, v10, v9 ; v10 = 0 -;; @0064 v12 = load.i32 little heap v11 -;; v2 -> v12 -;; @006e jump block1 -;; -;; block1: -;; @006e return v12, v12 -;; } diff --git a/cranelift/filetests/filetests/wasm/duplicate-loads-dynamic-memory.wat b/cranelift/filetests/filetests/wasm/duplicate-loads-dynamic-memory.wat index 53b97ceb5143..7d00c81bfdd5 100644 --- a/cranelift/filetests/filetests/wasm/duplicate-loads-dynamic-memory.wat +++ b/cranelift/filetests/filetests/wasm/duplicate-loads-dynamic-memory.wat @@ -5,7 +5,6 @@ ;;! settings = [ ;;! "enable_heap_access_spectre_mitigation=true", ;;! "opt_level=speed_and_size", -;;! "use_egraphs=false" ;;! ] ;;! ;;! [globals.vmctx] @@ -49,24 +48,15 @@ ;; gv2 = load.i64 notrap aligned gv0 ;; ;; block0(v0: i32, v1: i64): -;; @0057 v4 = uextend.i64 v0 -;; v12 -> v4 ;; @0057 v5 = load.i64 notrap aligned v1+8 -;; v13 -> v5 -;; @0057 v6 = icmp ugt v4, v5 -;; v14 -> v6 ;; @0057 v7 = load.i64 notrap aligned v1 -;; v15 -> v7 -;; @0057 v8 = iadd v7, v4 -;; v16 -> v8 +;; @0057 v4 = uextend.i64 v0 +;; @0057 v6 = icmp ugt v4, v5 ;; @0057 v9 = iconst.i64 0 -;; v17 -> v9 +;; @0057 v8 = iadd v7, v4 ;; @0057 v10 = select_spectre_guard v6, v9, v8 ; v9 = 0 -;; v18 -> v10 ;; @0057 v11 = load.i32 little heap v10 ;; v2 -> v11 -;; v19 -> v11 -;; v3 -> v19 ;; @005f jump block1 ;; ;; block1: @@ -79,28 +69,17 @@ ;; gv2 = load.i64 notrap aligned gv0 ;; ;; block0(v0: i32, v1: i64): -;; @0064 v4 = uextend.i64 v0 -;; v13 -> v4 ;; @0064 v5 = load.i64 notrap aligned v1+8 -;; v14 -> v5 -;; @0064 v6 = icmp ugt v4, v5 -;; v15 -> v6 ;; @0064 v7 = load.i64 notrap aligned v1 -;; v16 -> v7 +;; @0064 v4 = uextend.i64 v0 +;; @0064 v6 = icmp ugt v4, v5 +;; @0064 v10 = iconst.i64 0 ;; @0064 v8 = iadd v7, v4 -;; v17 -> v8 ;; v22 = iconst.i64 1234 -;; v23 -> v22 ;; @0064 v9 = iadd v8, v22 ; v22 = 1234 -;; v18 -> v9 -;; @0064 v10 = iconst.i64 0 -;; v19 -> v10 ;; @0064 v11 = select_spectre_guard v6, v10, v9 ; v10 = 0 -;; v20 -> v11 ;; @0064 v12 = load.i32 little heap v11 ;; v2 -> v12 -;; v21 -> v12 -;; v3 -> v21 ;; @006e jump block1 ;; ;; block1: diff --git a/cranelift/filetests/filetests/wasm/duplicate-loads-static-memory-egraph.wat b/cranelift/filetests/filetests/wasm/duplicate-loads-static-memory-egraph.wat deleted file mode 100644 index d434d5a33a61..000000000000 --- a/cranelift/filetests/filetests/wasm/duplicate-loads-static-memory-egraph.wat +++ /dev/null @@ -1,74 +0,0 @@ -;;! target = "x86_64" -;;! -;;! optimize = true -;;! -;;! settings = [ -;;! "enable_heap_access_spectre_mitigation=true", -;;! "opt_level=speed_and_size", -;;! "use_egraphs=true" -;;! ] -;;! -;;! [globals.vmctx] -;;! type = "i64" -;;! vmctx = true -;;! -;;! [globals.heap_base] -;;! type = "i64" -;;! load = { base = "vmctx", offset = 0, readonly = true } -;;! -;;! [[heaps]] -;;! base = "heap_base" -;;! min_size = 0x10000 -;;! offset_guard_size = 0xffffffff -;;! index_type = "i32" -;;! style = { kind = "static", bound = 0x10000000 } - -(module - (memory (export "memory") 1) - (func (export "load-without-offset") (param i32) (result i32 i32) - local.get 0 - i32.load - local.get 0 - i32.load - ) - (func (export "load-with-offset") (param i32) (result i32 i32) - local.get 0 - i32.load offset=1234 - local.get 0 - i32.load offset=1234 - ) -) - -;; function u0:0(i32, i64 vmctx) -> i32, i32 fast { -;; gv0 = vmctx -;; gv1 = load.i64 notrap aligned readonly gv0 -;; -;; block0(v0: i32, v1: i64): -;; @0057 v5 = load.i64 notrap aligned readonly v1 -;; @0057 v4 = uextend.i64 v0 -;; @0057 v6 = iadd v5, v4 -;; @0057 v7 = load.i32 little heap v6 -;; v2 -> v7 -;; @005f jump block1 -;; -;; block1: -;; @005f return v7, v7 -;; } -;; -;; function u0:1(i32, i64 vmctx) -> i32, i32 fast { -;; gv0 = vmctx -;; gv1 = load.i64 notrap aligned readonly gv0 -;; -;; block0(v0: i32, v1: i64): -;; @0064 v5 = load.i64 notrap aligned readonly v1 -;; @0064 v4 = uextend.i64 v0 -;; @0064 v6 = iadd v5, v4 -;; v14 = iconst.i64 1234 -;; @0064 v7 = iadd v6, v14 ; v14 = 1234 -;; @0064 v8 = load.i32 little heap v7 -;; v2 -> v8 -;; @006e jump block1 -;; -;; block1: -;; @006e return v8, v8 -;; } diff --git a/cranelift/filetests/filetests/wasm/duplicate-loads-static-memory.wat b/cranelift/filetests/filetests/wasm/duplicate-loads-static-memory.wat index bef294915803..16a4fe413335 100644 --- a/cranelift/filetests/filetests/wasm/duplicate-loads-static-memory.wat +++ b/cranelift/filetests/filetests/wasm/duplicate-loads-static-memory.wat @@ -5,7 +5,6 @@ ;;! settings = [ ;;! "enable_heap_access_spectre_mitigation=true", ;;! "opt_level=speed_and_size", -;;! "use_egraphs=false" ;;! ] ;;! ;;! [globals.vmctx] @@ -44,16 +43,11 @@ ;; gv1 = load.i64 notrap aligned readonly gv0 ;; ;; block0(v0: i32, v1: i64): -;; @0057 v4 = uextend.i64 v0 -;; v8 -> v4 ;; @0057 v5 = load.i64 notrap aligned readonly v1 -;; v9 -> v5 +;; @0057 v4 = uextend.i64 v0 ;; @0057 v6 = iadd v5, v4 -;; v10 -> v6 ;; @0057 v7 = load.i32 little heap v6 ;; v2 -> v7 -;; v11 -> v7 -;; v3 -> v11 ;; @005f jump block1 ;; ;; block1: @@ -65,20 +59,13 @@ ;; gv1 = load.i64 notrap aligned readonly gv0 ;; ;; block0(v0: i32, v1: i64): -;; @0064 v4 = uextend.i64 v0 -;; v9 -> v4 ;; @0064 v5 = load.i64 notrap aligned readonly v1 -;; v10 -> v5 +;; @0064 v4 = uextend.i64 v0 ;; @0064 v6 = iadd v5, v4 -;; v11 -> v6 ;; v14 = iconst.i64 1234 -;; v15 -> v14 ;; @0064 v7 = iadd v6, v14 ; v14 = 1234 -;; v12 -> v7 ;; @0064 v8 = load.i32 little heap v7 ;; v2 -> v8 -;; v13 -> v8 -;; v3 -> v13 ;; @006e jump block1 ;; ;; block1: diff --git a/cranelift/filetests/filetests/wasm/dynamic-memory-no-spectre-access-same-index-different-offsets.wat b/cranelift/filetests/filetests/wasm/dynamic-memory-no-spectre-access-same-index-different-offsets.wat index 14da1eb48373..d3f19dfe5266 100644 --- a/cranelift/filetests/filetests/wasm/dynamic-memory-no-spectre-access-same-index-different-offsets.wat +++ b/cranelift/filetests/filetests/wasm/dynamic-memory-no-spectre-access-same-index-different-offsets.wat @@ -5,7 +5,6 @@ ;;! settings = [ ;;! "enable_heap_access_spectre_mitigation=false", ;;! "opt_level=speed_and_size", -;;! "use_egraphs=true" ;;! ] ;;! ;;! [globals.vmctx] diff --git a/cranelift/filetests/filetests/wasm/dynamic-memory-yes-spectre-access-same-index-different-offsets.wat b/cranelift/filetests/filetests/wasm/dynamic-memory-yes-spectre-access-same-index-different-offsets.wat index 4dbe53e807bd..c31b82338eb6 100644 --- a/cranelift/filetests/filetests/wasm/dynamic-memory-yes-spectre-access-same-index-different-offsets.wat +++ b/cranelift/filetests/filetests/wasm/dynamic-memory-yes-spectre-access-same-index-different-offsets.wat @@ -5,7 +5,6 @@ ;;! settings = [ ;;! "enable_heap_access_spectre_mitigation=true", ;;! "opt_level=speed_and_size", -;;! "use_egraphs=true" ;;! ] ;;! ;;! [globals.vmctx] diff --git a/cranelift/filetests/src/lib.rs b/cranelift/filetests/src/lib.rs index c82e452809cb..3314aae81c1d 100644 --- a/cranelift/filetests/src/lib.rs +++ b/cranelift/filetests/src/lib.rs @@ -41,13 +41,10 @@ mod test_dce; mod test_domtree; mod test_interpret; mod test_legalizer; -mod test_licm; mod test_optimize; mod test_print_cfg; mod test_run; mod test_safepoint; -mod test_simple_gvn; -mod test_simple_preopt; mod test_unwind; mod test_verifier; mod test_wasm; @@ -114,13 +111,10 @@ fn new_subtest(parsed: &TestCommand) -> anyhow::Result "domtree" => test_domtree::subtest(parsed), "interpret" => test_interpret::subtest(parsed), "legalizer" => test_legalizer::subtest(parsed), - "licm" => test_licm::subtest(parsed), "optimize" => test_optimize::subtest(parsed), "print-cfg" => test_print_cfg::subtest(parsed), "run" => test_run::subtest(parsed), "safepoint" => test_safepoint::subtest(parsed), - "simple-gvn" => test_simple_gvn::subtest(parsed), - "simple_preopt" => test_simple_preopt::subtest(parsed), "unwind" => test_unwind::subtest(parsed), "verifier" => test_verifier::subtest(parsed), _ => anyhow::bail!("unknown test command '{}'", parsed.command), diff --git a/cranelift/filetests/src/test_alias_analysis.rs b/cranelift/filetests/src/test_alias_analysis.rs index 8d155811ba1c..5fbf86dfefcf 100644 --- a/cranelift/filetests/src/test_alias_analysis.rs +++ b/cranelift/filetests/src/test_alias_analysis.rs @@ -35,9 +35,6 @@ impl SubTest for TestAliasAnalysis { let mut comp_ctx = cranelift_codegen::Context::for_function(func.into_owned()); comp_ctx.flowgraph(); - comp_ctx - .simple_gvn(context.flags_or_isa()) - .map_err(|e| crate::pretty_anyhow_error(&comp_ctx.func, Into::into(e)))?; comp_ctx .replace_redundant_loads() .map_err(|e| crate::pretty_anyhow_error(&comp_ctx.func, Into::into(e)))?; diff --git a/cranelift/filetests/src/test_licm.rs b/cranelift/filetests/src/test_licm.rs deleted file mode 100644 index b02bac1e74c6..000000000000 --- a/cranelift/filetests/src/test_licm.rs +++ /dev/null @@ -1,51 +0,0 @@ -//! Test command for testing the LICM pass. -//! -//! The `licm` test command runs each function through the LICM pass after ensuring -//! that all instructions are legal for the target. -//! -//! The resulting function is sent to `filecheck`. - -use crate::subtest::{run_filecheck, Context, SubTest}; -use cranelift_codegen; -use cranelift_codegen::ir::Function; -use cranelift_reader::TestCommand; -use std::borrow::Cow; - -struct TestLICM; - -pub fn subtest(parsed: &TestCommand) -> anyhow::Result> { - assert_eq!(parsed.command, "licm"); - if !parsed.options.is_empty() { - anyhow::bail!("No options allowed on {}", parsed); - } - Ok(Box::new(TestLICM)) -} - -impl SubTest for TestLICM { - fn name(&self) -> &'static str { - "licm" - } - - fn needs_isa(&self) -> bool { - true - } - - fn is_mutating(&self) -> bool { - true - } - - fn run(&self, func: Cow, context: &Context) -> anyhow::Result<()> { - let isa = context.isa.expect("LICM needs an ISA"); - let mut comp_ctx = cranelift_codegen::Context::for_function(func.into_owned()); - - comp_ctx.flowgraph(); - comp_ctx.compute_loop_analysis(); - comp_ctx - .licm(isa) - .map_err(|e| crate::pretty_anyhow_error(&comp_ctx.func, Into::into(e)))?; - - let text = comp_ctx.func.display().to_string(); - log::debug!("Post-LICM CLIF:\n{}", text); - run_filecheck(&text, context) - } -} diff --git a/cranelift/filetests/src/test_simple_gvn.rs b/cranelift/filetests/src/test_simple_gvn.rs deleted file mode 100644 index bb563f4315cb..000000000000 --- a/cranelift/filetests/src/test_simple_gvn.rs +++ /dev/null @@ -1,44 +0,0 @@ -//! Test command for testing the simple GVN pass. -//! -//! The `simple-gvn` test command runs each function through the simple GVN pass after ensuring -//! that all instructions are legal for the target. -//! -//! The resulting function is sent to `filecheck`. - -use crate::subtest::{run_filecheck, Context, SubTest}; -use cranelift_codegen; -use cranelift_codegen::ir::Function; -use cranelift_reader::TestCommand; -use std::borrow::Cow; - -struct TestSimpleGVN; - -pub fn subtest(parsed: &TestCommand) -> anyhow::Result> { - assert_eq!(parsed.command, "simple-gvn"); - if !parsed.options.is_empty() { - anyhow::bail!("No options allowed on {}", parsed); - } - Ok(Box::new(TestSimpleGVN)) -} - -impl SubTest for TestSimpleGVN { - fn name(&self) -> &'static str { - "simple-gvn" - } - - fn is_mutating(&self) -> bool { - true - } - - fn run(&self, func: Cow, context: &Context) -> anyhow::Result<()> { - let mut comp_ctx = cranelift_codegen::Context::for_function(func.into_owned()); - - comp_ctx.flowgraph(); - comp_ctx - .simple_gvn(context.flags_or_isa()) - .map_err(|e| crate::pretty_anyhow_error(&comp_ctx.func, Into::into(e)))?; - - let text = comp_ctx.func.display().to_string(); - run_filecheck(&text, context) - } -} diff --git a/cranelift/filetests/src/test_simple_preopt.rs b/cranelift/filetests/src/test_simple_preopt.rs deleted file mode 100644 index 9a591ef023f0..000000000000 --- a/cranelift/filetests/src/test_simple_preopt.rs +++ /dev/null @@ -1,46 +0,0 @@ -//! Test command for testing the preopt pass. -//! -//! The resulting function is sent to `filecheck`. - -use crate::subtest::{run_filecheck, Context, SubTest}; -use cranelift_codegen; -use cranelift_codegen::ir::Function; -use cranelift_reader::TestCommand; -use std::borrow::Cow; - -struct TestSimplePreopt; - -pub fn subtest(parsed: &TestCommand) -> anyhow::Result> { - assert_eq!(parsed.command, "simple_preopt"); - if !parsed.options.is_empty() { - anyhow::bail!("No options allowed on {}", parsed); - } - Ok(Box::new(TestSimplePreopt)) -} - -impl SubTest for TestSimplePreopt { - fn name(&self) -> &'static str { - "simple_preopt" - } - - fn needs_isa(&self) -> bool { - true - } - - fn is_mutating(&self) -> bool { - true - } - - fn run(&self, func: Cow, context: &Context) -> anyhow::Result<()> { - let mut comp_ctx = cranelift_codegen::Context::for_function(func.into_owned()); - let isa = context.isa.expect("preopt needs an ISA"); - - comp_ctx.compute_cfg(); - comp_ctx - .preopt(isa) - .map_err(|e| crate::pretty_anyhow_error(&comp_ctx.func, e))?; - let text = &comp_ctx.func.display().to_string(); - log::debug!("After simple_preopt:\n{}", text); - run_filecheck(&text, context) - } -} diff --git a/cranelift/fuzzgen/src/lib.rs b/cranelift/fuzzgen/src/lib.rs index e8e649314611..4a759583bd4f 100644 --- a/cranelift/fuzzgen/src/lib.rs +++ b/cranelift/fuzzgen/src/lib.rs @@ -183,7 +183,6 @@ where "enable_incremental_compilation_cache_checks", "regalloc_checker", "enable_llvm_abi_extensions", - "use_egraphs", ]; for flag_name in bool_settings { let enabled = self diff --git a/crates/fuzzing/src/generators/config.rs b/crates/fuzzing/src/generators/config.rs index 11e52f94694a..d6891583262d 100644 --- a/crates/fuzzing/src/generators/config.rs +++ b/crates/fuzzing/src/generators/config.rs @@ -141,7 +141,6 @@ impl Config { } /// Converts this to a `wasmtime::Config` object - #[allow(deprecated)] // Allow use of `cranelift_use_egraphs` below. pub fn to_wasmtime(&self) -> wasmtime::Config { crate::init_fuzzing(); log::debug!("creating wasmtime config with {:#?}", self.wasmtime); @@ -157,7 +156,6 @@ impl Config { .native_unwind_info(self.wasmtime.native_unwind_info) .cranelift_nan_canonicalization(self.wasmtime.canonicalize_nans) .cranelift_opt_level(self.wasmtime.opt_level.to_wasmtime()) - .cranelift_use_egraphs(self.wasmtime.use_egraphs) .consume_fuel(self.wasmtime.consume_fuel) .epoch_interruption(self.wasmtime.epoch_interruption) .memory_init_cow(self.wasmtime.memory_init_cow) @@ -377,7 +375,6 @@ impl<'a> Arbitrary<'a> for Config { #[derive(Arbitrary, Clone, Debug, Eq, Hash, PartialEq)] pub struct WasmtimeConfig { opt_level: OptLevel, - use_egraphs: bool, debug_info: bool, canonicalize_nans: bool, interruptable: bool, diff --git a/crates/wasmtime/src/config.rs b/crates/wasmtime/src/config.rs index c21428392169..504d8d22b067 100644 --- a/crates/wasmtime/src/config.rs +++ b/crates/wasmtime/src/config.rs @@ -884,30 +884,6 @@ impl Config { self } - /// Configures the Cranelift code generator to use its - /// "egraph"-based mid-end optimizer. - /// - /// This optimizer has replaced the compiler's more traditional - /// pipeline of optimization passes with a unified code-rewriting - /// system. It is on by default, but the traditional optimization - /// pass structure is still available for now (it is deprecrated and - /// will be removed in a future version). - /// - /// The default value for this is `true`. - #[cfg(compiler)] - #[cfg_attr(nightlydoc, doc(cfg(any(feature = "cranelift", feature = "winch"))))] // see build.rs - #[deprecated( - since = "5.0.0", - note = "egraphs will be the default and this method will be removed in a future version." - )] - pub fn cranelift_use_egraphs(&mut self, enable: bool) -> &mut Self { - let val = if enable { "true" } else { "false" }; - self.compiler_config - .settings - .insert("use_egraphs".to_string(), val.to_string()); - self - } - /// Configures whether Cranelift should perform a NaN-canonicalization pass. /// /// When Cranelift is used as a code generation backend this will configure diff --git a/crates/wasmtime/src/engine.rs b/crates/wasmtime/src/engine.rs index 6153194186d6..a900a435fe3a 100644 --- a/crates/wasmtime/src/engine.rs +++ b/crates/wasmtime/src/engine.rs @@ -412,7 +412,6 @@ impl Engine { | "machine_code_cfg_info" | "tls_model" // wasmtime doesn't use tls right now | "opt_level" // opt level doesn't change semantics - | "use_egraphs" // optimizing with egraphs doesn't change semantics | "enable_alias_analysis" // alias analysis-based opts don't change semantics | "probestack_func_adjusts_sp" // probestack above asserted disabled | "probestack_size_log2" // probestack above asserted disabled