From 663c58d00dc2c16b10f8d7e8210c97bfadd0ee1f Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Thu, 29 Jun 2023 00:12:17 +0200 Subject: [PATCH] Change SIMD Loop from Fast to only reassoc/contract (#49405) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses #49387 Co-authored-by: Mosè Giordano --- NEWS.md | 4 ++++ base/simdloop.jl | 2 +- src/llvm-muladd.cpp | 8 ++++---- src/llvm-simdloop.cpp | 3 ++- test/llvmpasses/loopinfo.jl | 6 +++--- test/llvmpasses/simdloop.ll | 4 ++-- 6 files changed, 16 insertions(+), 11 deletions(-) diff --git a/NEWS.md b/NEWS.md index 5dd9f2999de5c..50418ffe7309b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -21,6 +21,10 @@ Language changes that significantly improves load and inference times for heavily overloaded methods that dispatch on Types (such as traits and constructors). * The "h bar" `ℏ` (`\hslash` U+210F) character is now treated as equivalent to `ħ` (`\hbar` U+0127). +* The `@simd` macro now has a more limited and clearer semantics, it only enables reordering and contraction + of floating-point operations, instead of turning on all "fastmath" optimizations. + If you observe performance regressions due to this change, you can recover previous behavior with `@fastmath @simd`, + if you are OK with all the optimizations enabled by the `@fastmath` macro. ([#49405]) * When a method with keyword arguments is displayed in the stack trace view, the textual representation of the keyword arguments' types is simplified using the new `@Kwargs{key1::Type1, ...}` macro syntax ([#49959]). diff --git a/base/simdloop.jl b/base/simdloop.jl index 29e2382cf39aa..797b77ed75a99 100644 --- a/base/simdloop.jl +++ b/base/simdloop.jl @@ -100,7 +100,7 @@ The object iterated over in a `@simd for` loop should be a one-dimensional range By using `@simd`, you are asserting several properties of the loop: * It is safe to execute iterations in arbitrary or overlapping order, with special consideration for reduction variables. -* Floating-point operations on reduction variables can be reordered, possibly causing different results than without `@simd`. +* Floating-point operations on reduction variables can be reordered or contracted, possibly causing different results than without `@simd`. In many cases, Julia is able to automatically vectorize inner for loops without the use of `@simd`. Using `@simd` gives the compiler a little extra leeway to make it possible in more situations. In diff --git a/src/llvm-muladd.cpp b/src/llvm-muladd.cpp index 98e56e344f7af..29c0f7e2b10d6 100644 --- a/src/llvm-muladd.cpp +++ b/src/llvm-muladd.cpp @@ -40,10 +40,10 @@ STATISTIC(TotalContracted, "Total number of multiplies marked for FMA"); * Combine * ``` * %v0 = fmul ... %a, %b - * %v = fadd fast ... %v0, %c + * %v = fadd contract ... %v0, %c * ``` * to - * `%v = call fast @llvm.fmuladd.<...>(... %a, ... %b, ... %c)` + * `%v = call contract @llvm.fmuladd.<...>(... %a, ... %b, ... %c)` * when `%v0` has no other use */ @@ -87,13 +87,13 @@ static bool combineMulAdd(Function &F) JL_NOTSAFEPOINT it++; switch (I.getOpcode()) { case Instruction::FAdd: { - if (!I.isFast()) + if (!I.hasAllowContract()) continue; modified |= checkCombine(I.getOperand(0), ORE) || checkCombine(I.getOperand(1), ORE); break; } case Instruction::FSub: { - if (!I.isFast()) + if (!I.hasAllowContract()) continue; modified |= checkCombine(I.getOperand(0), ORE) || checkCombine(I.getOperand(1), ORE); break; diff --git a/src/llvm-simdloop.cpp b/src/llvm-simdloop.cpp index 9a7f61410ba1d..21e2ec574d650 100644 --- a/src/llvm-simdloop.cpp +++ b/src/llvm-simdloop.cpp @@ -149,7 +149,8 @@ static void enableUnsafeAlgebraIfReduction(PHINode *Phi, Loop *L, OptimizationRe return OptimizationRemark(DEBUG_TYPE, "MarkedUnsafeAlgebra", *K) << "marked unsafe algebra on " << ore::NV("Instruction", *K); }); - (*K)->setFast(true); + (*K)->setHasAllowReassoc(true); + (*K)->setHasAllowContract(true); ++length; } ReductionChainLength += length; diff --git a/test/llvmpasses/loopinfo.jl b/test/llvmpasses/loopinfo.jl index 18661ea6fde67..b9b388c73d0c5 100644 --- a/test/llvmpasses/loopinfo.jl +++ b/test/llvmpasses/loopinfo.jl @@ -29,10 +29,10 @@ function simdf(X) acc += x # CHECK: call void @julia.loopinfo_marker(), {{.*}}, !julia.loopinfo [[LOOPINFO:![0-9]+]] # LOWER-NOT: llvm.mem.parallel_loop_access -# LOWER: fadd fast double +# LOWER: fadd reassoc contract double # LOWER-NOT: call void @julia.loopinfo_marker() # LOWER: br {{.*}}, !llvm.loop [[LOOPID:![0-9]+]] -# FINAL: fadd fast <{{(vscale x )?}}{{[0-9]+}} x double> +# FINAL: fadd reassoc contract <{{(vscale x )?}}{{[0-9]+}} x double> end acc end @@ -46,7 +46,7 @@ function simdf2(X) # CHECK: call void @julia.loopinfo_marker(), {{.*}}, !julia.loopinfo [[LOOPINFO2:![0-9]+]] # LOWER: llvm.mem.parallel_loop_access # LOWER-NOT: call void @julia.loopinfo_marker() -# LOWER: fadd fast double +# LOWER: fadd reassoc contract double # LOWER: br {{.*}}, !llvm.loop [[LOOPID2:![0-9]+]] end acc diff --git a/test/llvmpasses/simdloop.ll b/test/llvmpasses/simdloop.ll index bc4b2da007dc2..929fbeea2c3f5 100644 --- a/test/llvmpasses/simdloop.ll +++ b/test/llvmpasses/simdloop.ll @@ -40,7 +40,7 @@ loop: ; CHECK: llvm.mem.parallel_loop_access %aval = load double, double *%aptr %nextv = fsub double %v, %aval -; CHECK: fsub fast double %v, %aval +; CHECK: fsub reassoc contract double %v, %aval %nexti = add i64 %i, 1 call void @julia.loopinfo_marker(), !julia.loopinfo !3 %done = icmp sgt i64 %nexti, 500 @@ -59,7 +59,7 @@ loop: %aptr = getelementptr double, double *%a, i64 %i %aval = load double, double *%aptr %nextv = fsub double %v, %aval -; CHECK: fsub fast double %v, %aval +; CHECK: fsub reassoc contract double %v, %aval %nexti = add i64 %i, 1 call void @julia.loopinfo_marker(), !julia.loopinfo !2 %done = icmp sgt i64 %nexti, 500