From 806f49a91d0f67f15fa94983178998947e267123 Mon Sep 17 00:00:00 2001
From: "pietro.ghiglio" <pietro.ghiglio@codeplay.com>
Date: Wed, 27 Sep 2023 11:12:32 +0100
Subject: [PATCH 01/11] Implement missing math builtins for scalar data types

---
 libclc/x86_64-unknown-linux/libspirv/SOURCES  |   1 +
 .../libspirv/integer/helpers.h                |  13 +
 .../libspirv/integer/popcount.cl              |   3 +
 .../libspirv/math/ceil.cl                     |   3 +
 .../libspirv/math/fabs.cl                     |   3 +
 .../libspirv/math/floor.cl                    |   3 +
 .../x86_64-unknown-linux/libspirv/math/fma.cl |   4 +
 .../libspirv/math/helpers.h                   |  23 ++
 .../libspirv/math/native_cos.cl               |   3 +
 .../libspirv/math/native_exp.cl               |   3 +
 .../libspirv/math/native_exp2.cl              |   3 +
 .../libspirv/math/native_log.cl               |   3 +
 .../libspirv/math/native_log10.cl             |   4 +
 .../libspirv/math/native_log2.cl              |   4 +
 .../libspirv/math/native_sin.cl               |   4 +
 .../libspirv/math/native_sqrt.cl              |   3 +
 .../libspirv/math/rint.cl                     |   3 +
 .../libspirv/math/round.cl                    |   3 +
 .../libspirv/math/sqrt.cl                     |   3 +
 .../libspirv/math/trunc.cl                    |   3 +
 .../libspirv/shared/helpers.ll                | 270 ++++++++++++++++++
 llvm/lib/SYCLLowerIR/PrepareSYCLNativeCPU.cpp |   2 -
 .../SYCLLowerIR/RenameKernelSYCLNativeCPU.cpp |   8 +-
 sycl/test/native_cpu/math_builtins.cpp        | 143 ++++++++++
 24 files changed, 512 insertions(+), 3 deletions(-)
 create mode 100644 libclc/x86_64-unknown-linux/libspirv/integer/helpers.h
 create mode 100644 libclc/x86_64-unknown-linux/libspirv/math/helpers.h
 create mode 100644 libclc/x86_64-unknown-linux/libspirv/shared/helpers.ll
 create mode 100644 sycl/test/native_cpu/math_builtins.cpp

diff --git a/libclc/x86_64-unknown-linux/libspirv/SOURCES b/libclc/x86_64-unknown-linux/libspirv/SOURCES
index ba0b2c7df78e8..5a5536f318379 100644
--- a/libclc/x86_64-unknown-linux/libspirv/SOURCES
+++ b/libclc/x86_64-unknown-linux/libspirv/SOURCES
@@ -16,3 +16,4 @@ math/native_sqrt.cl
 math/rint.cl
 math/round.cl
 math/trunc.cl
+shared/helpers.ll
diff --git a/libclc/x86_64-unknown-linux/libspirv/integer/helpers.h b/libclc/x86_64-unknown-linux/libspirv/integer/helpers.h
new file mode 100644
index 0000000000000..e760e518bf5cc
--- /dev/null
+++ b/libclc/x86_64-unknown-linux/libspirv/integer/helpers.h
@@ -0,0 +1,13 @@
+#include "func.h"
+
+#define GEN_UNARY_BUILTIN_T(NAME, TYPE) \
+_CLC_OVERLOAD TYPE __##NAME##_helper(TYPE); \
+_CLC_OVERLOAD TYPE __spirv_ocl_##NAME(TYPE n) { \
+  return __##NAME##_helper(n); \
+} 
+
+
+#define GEN_UNARY_BUILTIN(NAME) \
+  GEN_UNARY_BUILTIN_T(NAME, int) \
+  GEN_UNARY_BUILTIN_T(NAME, signed char) 
+
diff --git a/libclc/x86_64-unknown-linux/libspirv/integer/popcount.cl b/libclc/x86_64-unknown-linux/libspirv/integer/popcount.cl
index e69de29bb2d1d..fae953de0c340 100644
--- a/libclc/x86_64-unknown-linux/libspirv/integer/popcount.cl
+++ b/libclc/x86_64-unknown-linux/libspirv/integer/popcount.cl
@@ -0,0 +1,3 @@
+#include "helpers.h"
+
+GEN_UNARY_BUILTIN(popcount)
diff --git a/libclc/x86_64-unknown-linux/libspirv/math/ceil.cl b/libclc/x86_64-unknown-linux/libspirv/math/ceil.cl
index e69de29bb2d1d..80d2a9c399bd1 100644
--- a/libclc/x86_64-unknown-linux/libspirv/math/ceil.cl
+++ b/libclc/x86_64-unknown-linux/libspirv/math/ceil.cl
@@ -0,0 +1,3 @@
+#include "helpers.h"
+
+GEN_UNARY_BUILTIN(ceil)
diff --git a/libclc/x86_64-unknown-linux/libspirv/math/fabs.cl b/libclc/x86_64-unknown-linux/libspirv/math/fabs.cl
index e69de29bb2d1d..73f91386ff951 100644
--- a/libclc/x86_64-unknown-linux/libspirv/math/fabs.cl
+++ b/libclc/x86_64-unknown-linux/libspirv/math/fabs.cl
@@ -0,0 +1,3 @@
+#include "helpers.h"
+
+GEN_UNARY_BUILTIN(fabs)
diff --git a/libclc/x86_64-unknown-linux/libspirv/math/floor.cl b/libclc/x86_64-unknown-linux/libspirv/math/floor.cl
index e69de29bb2d1d..793efec8d5685 100644
--- a/libclc/x86_64-unknown-linux/libspirv/math/floor.cl
+++ b/libclc/x86_64-unknown-linux/libspirv/math/floor.cl
@@ -0,0 +1,3 @@
+#include "helpers.h"
+
+GEN_UNARY_BUILTIN(floor)
diff --git a/libclc/x86_64-unknown-linux/libspirv/math/fma.cl b/libclc/x86_64-unknown-linux/libspirv/math/fma.cl
index e69de29bb2d1d..3569576e8c5c9 100644
--- a/libclc/x86_64-unknown-linux/libspirv/math/fma.cl
+++ b/libclc/x86_64-unknown-linux/libspirv/math/fma.cl
@@ -0,0 +1,4 @@
+#include "helpers.h"
+
+GEN_TERNARY_BUILTIN(fma);
+
diff --git a/libclc/x86_64-unknown-linux/libspirv/math/helpers.h b/libclc/x86_64-unknown-linux/libspirv/math/helpers.h
new file mode 100644
index 0000000000000..852f2191022cb
--- /dev/null
+++ b/libclc/x86_64-unknown-linux/libspirv/math/helpers.h
@@ -0,0 +1,23 @@
+#include "func.h"
+
+#define GEN_UNARY_BUILTIN_T(NAME, TYPE) \
+_CLC_OVERLOAD TYPE __##NAME##_helper(TYPE); \
+_CLC_OVERLOAD TYPE __spirv_ocl_##NAME(TYPE n) { \
+  return __##NAME##_helper(n); \
+} 
+
+#define GEN_TERNARY_BUILTIN_T(NAME, TYPE) \
+_CLC_OVERLOAD TYPE __##NAME##_helper(TYPE, TYPE, TYPE); \
+_CLC_OVERLOAD TYPE __spirv_ocl_##NAME(TYPE a, TYPE b, TYPE c) { \
+  return __##NAME##_helper(a, b, c); \
+} 
+
+
+#define GEN_UNARY_BUILTIN(NAME) \
+  GEN_UNARY_BUILTIN_T(NAME, float) \
+  GEN_UNARY_BUILTIN_T(NAME, double) 
+
+
+#define GEN_TERNARY_BUILTIN(NAME) \
+GEN_TERNARY_BUILTIN_T(NAME, float) \
+GEN_TERNARY_BUILTIN_T(NAME, double) \
diff --git a/libclc/x86_64-unknown-linux/libspirv/math/native_cos.cl b/libclc/x86_64-unknown-linux/libspirv/math/native_cos.cl
index e69de29bb2d1d..bf314782388c4 100644
--- a/libclc/x86_64-unknown-linux/libspirv/math/native_cos.cl
+++ b/libclc/x86_64-unknown-linux/libspirv/math/native_cos.cl
@@ -0,0 +1,3 @@
+#include "helpers.h"
+
+GEN_UNARY_BUILTIN(native_cos)
diff --git a/libclc/x86_64-unknown-linux/libspirv/math/native_exp.cl b/libclc/x86_64-unknown-linux/libspirv/math/native_exp.cl
index e69de29bb2d1d..ef26cad9d2f3a 100644
--- a/libclc/x86_64-unknown-linux/libspirv/math/native_exp.cl
+++ b/libclc/x86_64-unknown-linux/libspirv/math/native_exp.cl
@@ -0,0 +1,3 @@
+#include "helpers.h"
+
+GEN_UNARY_BUILTIN(native_exp)
diff --git a/libclc/x86_64-unknown-linux/libspirv/math/native_exp2.cl b/libclc/x86_64-unknown-linux/libspirv/math/native_exp2.cl
index e69de29bb2d1d..ddbe490e13800 100644
--- a/libclc/x86_64-unknown-linux/libspirv/math/native_exp2.cl
+++ b/libclc/x86_64-unknown-linux/libspirv/math/native_exp2.cl
@@ -0,0 +1,3 @@
+#include "helpers.h"
+
+GEN_UNARY_BUILTIN(native_exp2)
diff --git a/libclc/x86_64-unknown-linux/libspirv/math/native_log.cl b/libclc/x86_64-unknown-linux/libspirv/math/native_log.cl
index e69de29bb2d1d..ec298f50d0eb2 100644
--- a/libclc/x86_64-unknown-linux/libspirv/math/native_log.cl
+++ b/libclc/x86_64-unknown-linux/libspirv/math/native_log.cl
@@ -0,0 +1,3 @@
+#include "helpers.h"
+
+GEN_UNARY_BUILTIN(native_log)
diff --git a/libclc/x86_64-unknown-linux/libspirv/math/native_log10.cl b/libclc/x86_64-unknown-linux/libspirv/math/native_log10.cl
index e69de29bb2d1d..a97e14a53c9e2 100644
--- a/libclc/x86_64-unknown-linux/libspirv/math/native_log10.cl
+++ b/libclc/x86_64-unknown-linux/libspirv/math/native_log10.cl
@@ -0,0 +1,4 @@
+#include "helpers.h"
+
+GEN_UNARY_BUILTIN(native_log10)
+
diff --git a/libclc/x86_64-unknown-linux/libspirv/math/native_log2.cl b/libclc/x86_64-unknown-linux/libspirv/math/native_log2.cl
index e69de29bb2d1d..a90a925b036d5 100644
--- a/libclc/x86_64-unknown-linux/libspirv/math/native_log2.cl
+++ b/libclc/x86_64-unknown-linux/libspirv/math/native_log2.cl
@@ -0,0 +1,4 @@
+#include "helpers.h"
+
+GEN_UNARY_BUILTIN(native_log2)
+
diff --git a/libclc/x86_64-unknown-linux/libspirv/math/native_sin.cl b/libclc/x86_64-unknown-linux/libspirv/math/native_sin.cl
index e69de29bb2d1d..2c63c1c250b4d 100644
--- a/libclc/x86_64-unknown-linux/libspirv/math/native_sin.cl
+++ b/libclc/x86_64-unknown-linux/libspirv/math/native_sin.cl
@@ -0,0 +1,4 @@
+#include "helpers.h"
+
+GEN_UNARY_BUILTIN(native_sin)
+
diff --git a/libclc/x86_64-unknown-linux/libspirv/math/native_sqrt.cl b/libclc/x86_64-unknown-linux/libspirv/math/native_sqrt.cl
index e69de29bb2d1d..fe26ec8d9ef6f 100644
--- a/libclc/x86_64-unknown-linux/libspirv/math/native_sqrt.cl
+++ b/libclc/x86_64-unknown-linux/libspirv/math/native_sqrt.cl
@@ -0,0 +1,3 @@
+#include "helpers.h"
+
+GEN_UNARY_BUILTIN(native_sqrt)
diff --git a/libclc/x86_64-unknown-linux/libspirv/math/rint.cl b/libclc/x86_64-unknown-linux/libspirv/math/rint.cl
index e69de29bb2d1d..73f13e29b2181 100644
--- a/libclc/x86_64-unknown-linux/libspirv/math/rint.cl
+++ b/libclc/x86_64-unknown-linux/libspirv/math/rint.cl
@@ -0,0 +1,3 @@
+#include "helpers.h"
+
+GEN_UNARY_BUILTIN(rint)
diff --git a/libclc/x86_64-unknown-linux/libspirv/math/round.cl b/libclc/x86_64-unknown-linux/libspirv/math/round.cl
index e69de29bb2d1d..2ba3048cc41db 100644
--- a/libclc/x86_64-unknown-linux/libspirv/math/round.cl
+++ b/libclc/x86_64-unknown-linux/libspirv/math/round.cl
@@ -0,0 +1,3 @@
+#include "helpers.h"
+
+GEN_UNARY_BUILTIN(round)
diff --git a/libclc/x86_64-unknown-linux/libspirv/math/sqrt.cl b/libclc/x86_64-unknown-linux/libspirv/math/sqrt.cl
index e69de29bb2d1d..b4deb11ea4e31 100644
--- a/libclc/x86_64-unknown-linux/libspirv/math/sqrt.cl
+++ b/libclc/x86_64-unknown-linux/libspirv/math/sqrt.cl
@@ -0,0 +1,3 @@
+#include "helpers.h"
+
+GEN_UNARY_BUILTIN(sqrt)
diff --git a/libclc/x86_64-unknown-linux/libspirv/math/trunc.cl b/libclc/x86_64-unknown-linux/libspirv/math/trunc.cl
index e69de29bb2d1d..1c61212279771 100644
--- a/libclc/x86_64-unknown-linux/libspirv/math/trunc.cl
+++ b/libclc/x86_64-unknown-linux/libspirv/math/trunc.cl
@@ -0,0 +1,3 @@
+#include "helpers.h"
+
+GEN_UNARY_BUILTIN(trunc)
diff --git a/libclc/x86_64-unknown-linux/libspirv/shared/helpers.ll b/libclc/x86_64-unknown-linux/libspirv/shared/helpers.ll
new file mode 100644
index 0000000000000..c19c38634c4b6
--- /dev/null
+++ b/libclc/x86_64-unknown-linux/libspirv/shared/helpers.ll
@@ -0,0 +1,270 @@
+declare float @llvm.sqrt.f32(float %n)
+declare double @llvm.sqrt.f64(double %n)
+declare float @llvm.fabs.f32(float %n)
+declare double @llvm.fabs.f64(double %n)
+declare float @llvm.trunc.f32(float %n)
+declare double @llvm.trunc.f64(double %n)
+declare float @llvm.ceil.f32(float %n)
+declare double @llvm.ceil.f64(double %n)
+declare float @llvm.floor.f32(float %n)
+declare double @llvm.floor.f64(double %n)
+declare float @llvm.round.f32(float %n)
+declare double @llvm.round.f64(double %n)
+declare float @llvm.rint.f32(float %n)
+declare double @llvm.rint.f64(double %n)
+declare float @llvm.cos.f32(float %n)
+declare double @llvm.cos.f64(double %n)
+declare float @llvm.sin.f32(float %n)
+declare double @llvm.sin.f64(double %n)
+declare float @llvm.exp2.f32(float %n)
+declare double @llvm.exp2.f64(double %n)
+declare float @llvm.exp.f32(float %n)
+declare double @llvm.exp.f64(double %n)
+declare float @llvm.log10.f32(float %n)
+declare double @llvm.log10.f64(double %n)
+declare float @llvm.log.f32(float %n)
+declare double @llvm.log.f64(double %n)
+declare float @llvm.log2.f32(float %n)
+declare double @llvm.log2.f64(double %n)
+declare float @llvm.fma.f32(float %n1, float %n2, float %n3)
+declare double @llvm.fma.f64(double %n1, double %n2, double %n3)
+declare i32 @llvm.ctpop.i32(i32 %n)
+declare i8 @llvm.ctpop.i8(i8 %n)
+
+define dso_local float @_Z13__sqrt_helperf(float %x) {
+entry:
+  %call = call float @llvm.sqrt.f32(float %x) 
+  ret float %call
+}
+
+
+define dso_local double @_Z13__sqrt_helperd(double %x) {
+entry:
+  %call = call double @llvm.sqrt.f64(double %x) 
+  ret double %call
+}
+
+
+define dso_local float @_Z13__fabs_helperf(float %x) {
+entry:
+  %call = call float @llvm.fabs.f32(float %x) 
+  ret float %call
+}
+
+
+define dso_local double @_Z13__fabs_helperd(double %x) {
+entry:
+  %call = call double @llvm.fabs.f64(double %x) 
+  ret double %call
+}
+
+
+define dso_local float @_Z14__trunc_helperf(float %x) {
+entry:
+  %call = call float @llvm.trunc.f32(float %x) 
+  ret float %call
+}
+
+
+define dso_local double @_Z14__trunc_helperd(double %x) {
+entry:
+  %call = call double @llvm.trunc.f64(double %x) 
+  ret double %call
+}
+
+
+define dso_local float @_Z13__ceil_helperf(float %x) {
+entry:
+  %call = call float @llvm.ceil.f32(float %x) 
+  ret float %call
+}
+
+
+define dso_local double @_Z13__ceil_helperd(double %x) {
+entry:
+  %call = call double @llvm.ceil.f64(double %x) 
+  ret double %call
+}
+
+
+define dso_local float @_Z14__floor_helperf(float %x) {
+entry:
+  %call = call float @llvm.floor.f32(float %x) 
+  ret float %call
+}
+
+
+define dso_local double @_Z14__floor_helperd(double %x) {
+entry:
+  %call = call double @llvm.floor.f64(double %x) 
+  ret double %call
+}
+
+
+define dso_local float @_Z14__round_helperf(float %x) {
+entry:
+  %call = call float @llvm.round.f32(float %x) 
+  ret float %call
+}
+
+
+define dso_local double @_Z14__round_helperd(double %x) {
+entry:
+  %call = call double @llvm.round.f64(double %x) 
+  ret double %call
+}
+
+
+define dso_local float @_Z13__rint_helperf(float %x) {
+entry:
+  %call = call float @llvm.rint.f32(float %x) 
+  ret float %call
+}
+
+
+define dso_local double @_Z13__rint_helperd(double %x) {
+entry:
+  %call = call double @llvm.rint.f64(double %x) 
+  ret double %call
+}
+
+
+define dso_local float @_Z20__native_sqrt_helperf(float %x) {
+entry:
+  %call = call float @llvm.sqrt.f32(float %x) 
+  ret float %call
+}
+
+
+define dso_local double @_Z20__native_sqrt_helperd(double %x) {
+entry:
+  %call = call double @llvm.sqrt.f64(double %x) 
+  ret double %call
+}
+
+
+define dso_local float @_Z19__native_cos_helperf(float %x) {
+entry:
+  %call = call float @llvm.cos.f32(float %x) 
+  ret float %call
+}
+
+
+define dso_local double @_Z19__native_cos_helperd(double %x) {
+entry:
+  %call = call double @llvm.cos.f64(double %x) 
+  ret double %call
+}
+
+
+define dso_local float @_Z19__native_sin_helperf(float %x) {
+entry:
+  %call = call float @llvm.sin.f32(float %x) 
+  ret float %call
+}
+
+
+define dso_local double @_Z19__native_sin_helperd(double %x) {
+entry:
+  %call = call double @llvm.sin.f64(double %x) 
+  ret double %call
+}
+
+
+define dso_local float @_Z20__native_exp2_helperf(float %x) {
+entry:
+  %call = call float @llvm.exp2.f32(float %x) 
+  ret float %call
+}
+
+
+define dso_local double @_Z20__native_exp2_helperd(double %x) {
+entry:
+  %call = call double @llvm.exp2.f64(double %x) 
+  ret double %call
+}
+
+
+define dso_local float @_Z19__native_exp_helperf(float %x) {
+entry:
+  %call = call float @llvm.exp.f32(float %x) 
+  ret float %call
+}
+
+
+define dso_local double @_Z19__native_exp_helperd(double %x) {
+entry:
+  %call = call double @llvm.exp.f64(double %x) 
+  ret double %call
+}
+
+
+define dso_local float @_Z21__native_log10_helperf(float %x) {
+entry:
+  %call = call float @llvm.log10.f32(float %x) 
+  ret float %call
+}
+
+
+define dso_local double @_Z21__native_log10_helperd(double %x) {
+entry:
+  %call = call double @llvm.log10.f64(double %x) 
+  ret double %call
+}
+
+
+define dso_local float @_Z19__native_log_helperf(float %x) {
+entry:
+  %call = call float @llvm.log.f32(float %x) 
+  ret float %call
+}
+
+
+define dso_local double @_Z19__native_log_helperd(double %x) {
+entry:
+  %call = call double @llvm.log.f64(double %x) 
+  ret double %call
+}
+
+
+define dso_local float @_Z20__native_log2_helperf(float %x) {
+entry:
+  %call = call float @llvm.log2.f32(float %x) 
+  ret float %call
+}
+
+
+define dso_local double @_Z20__native_log2_helperd(double %x) {
+entry:
+  %call = call double @llvm.log2.f64(double %x) 
+  ret double %call
+}
+
+
+define dso_local float @_Z12__fma_helperfff(float %a, float %b, float %c) {
+entry:
+  %call = call float @llvm.fma.f32(float %a, float %b, float %c) 
+  ret float %call
+}
+
+
+define dso_local double @_Z12__fma_helperddd(double %a, double %b, double %c) {
+entry:
+  %call = call double @llvm.fma.f64(double %a, double %b, double %c) 
+  ret double %call
+}
+
+
+define dso_local i32 @_Z17__popcount_helperi(i32 %x) {
+entry:
+  %call = call i32 @llvm.ctpop.i32(i32 %x) 
+  ret i32 %call
+}
+
+
+define dso_local i8 @_Z17__popcount_helpera(i8 %x) {
+entry:
+  %call = call i8 @llvm.ctpop.i8(i8 %x) 
+  ret i8 %call
+}
+
diff --git a/llvm/lib/SYCLLowerIR/PrepareSYCLNativeCPU.cpp b/llvm/lib/SYCLLowerIR/PrepareSYCLNativeCPU.cpp
index 8f6452b76a7d7..228f7d9e4836c 100644
--- a/llvm/lib/SYCLLowerIR/PrepareSYCLNativeCPU.cpp
+++ b/llvm/lib/SYCLLowerIR/PrepareSYCLNativeCPU.cpp
@@ -245,8 +245,6 @@ Value *getStateArg(const Function *F) {
   return F->getArg(FT->getNumParams() - 1);
 }
 
-static constexpr unsigned int NativeCPUGlobalAS = 1;
-
 } // namespace
 
 PreservedAnalyses PrepareSYCLNativeCPUPass::run(Module &M,
diff --git a/llvm/lib/SYCLLowerIR/RenameKernelSYCLNativeCPU.cpp b/llvm/lib/SYCLLowerIR/RenameKernelSYCLNativeCPU.cpp
index 220373c4128a2..545701449750d 100644
--- a/llvm/lib/SYCLLowerIR/RenameKernelSYCLNativeCPU.cpp
+++ b/llvm/lib/SYCLLowerIR/RenameKernelSYCLNativeCPU.cpp
@@ -60,7 +60,13 @@ RenameKernelSYCLNativeCPUPass::run(Module &M, ModuleAnalysisManager &MAM) {
   }
 
   for (auto &F : CalledSet) {
-    F->setName(sycl::utils::addSYCLNativeCPUSuffix(F->getName()));
+    auto NewName = sycl::utils::addSYCLNativeCPUSuffix(F->getName());
+    F->setName(NewName);
+    auto Comdat = F->getComdat();
+    if (Comdat) {
+      auto NewComdat = M.getOrInsertComdat(NewName.str());
+      F->setComdat(NewComdat);
+    }
     ModuleChanged |= true;
   }
   return ModuleChanged ? PreservedAnalyses::none() : PreservedAnalyses::all();
diff --git a/sycl/test/native_cpu/math_builtins.cpp b/sycl/test/native_cpu/math_builtins.cpp
new file mode 100644
index 0000000000000..5b065c3ab4e0f
--- /dev/null
+++ b/sycl/test/native_cpu/math_builtins.cpp
@@ -0,0 +1,143 @@
+// REQUIRES: native_cpu_be
+// RUN: %clangxx -fsycl -fsycl-targets=native_cpu %s -o %t -g
+// RUN: env ONEAPI_DEVICE_SELECTOR="native_cpu:cpu" %t
+
+#include "sycl/builtins_marray_gen.hpp"
+#include "sycl/builtins_vector_gen.hpp"
+#include <sycl/sycl.hpp>
+
+#include <array>
+#include <iostream>
+
+using namespace sycl;
+constexpr sycl::access::mode sycl_read_write = sycl::access::mode::read_write;
+
+template <typename T>
+class Test;
+
+template <typename T>
+class TestInt;
+
+static constexpr int NumMathBuiltins = 16; 
+static constexpr float eps = 0.01;
+
+template <typename T>
+using ResultT = std::array<T, NumMathBuiltins>;
+
+template <typename T>
+ResultT<T> do_test(T in) {
+  ResultT<T> res;
+  unsigned i = 0;
+  res[i++] = sycl::native::sqrt(in);
+  res[i++] = sycl::sqrt(in);
+  res[i++] = sycl::fabs(in);
+  res[i++] = sycl::fma(in,in,in);
+  res[i++] = sycl::trunc(in);
+  res[i++] = sycl::rint(in);
+  res[i++] = sycl::round(in);
+  res[i++] = sycl::ceil(in);
+  res[i++] = sycl::floor(in);
+  res[i++] = sycl::native::cos(in);
+  res[i++] = sycl::native::sin(in);
+  res[i++] = sycl::native::exp2(in);
+  res[i++] = sycl::native::exp(in);
+  res[i++] = sycl::native::log10(in);
+  res[i++] = sycl::native::log(in);
+  res[i++] = sycl::native::log2(in);
+  return res;
+}
+
+template <typename T>
+bool check(T& res, T& exp) {
+  bool correct = std::abs(static_cast<float>(res) - static_cast<float>(exp)) < eps;
+  if(!correct) {
+    std::cout << "Value mismatch; Expected: " << exp << " actual: " << res << "\n";
+    return false;
+  }
+  return true;
+}
+
+template <typename T, int N>
+bool check(sycl::vec<T,N>& res, sycl::vec<T, N>& exp) {
+  bool correct = true;
+  for(int i = 0; i < N; i++) {
+    correct &= check(res[i], exp[i]);
+  }
+  return correct;
+}
+
+
+template<typename T>
+bool test(queue deviceQueue) {
+  const size_t N = 1;
+  const T Init{1};
+  std::array<T, N> A = {Init};
+  std::array<ResultT<T>, 1> Res;
+  sycl::range<1> numOfItems{N};
+  {
+    sycl::buffer<T, 1> bufferA(A.data(), numOfItems);
+    sycl::buffer<ResultT<T>, 1> bufferRes(Res.data(), numOfItems);
+
+    deviceQueue
+        .submit([&](sycl::handler &cgh) {
+          auto accessorA = bufferA.template get_access<sycl_read_write>(cgh);
+          auto accessorRes = bufferRes.template get_access<sycl_read_write>(cgh);
+
+          auto kern = [=]() {
+            accessorRes[0] = do_test(accessorA[0]);
+          };
+          cgh.single_task<Test<T>>(kern);
+        })
+        .wait();
+  }
+  ResultT<T> expected = do_test(Init);
+  for(int i = 0; i < NumMathBuiltins; i++) {
+     if(!check(Res[0][i], expected[i])) {
+       return false;
+     }
+  }
+ return true; 
+}
+
+template <typename T>
+bool test_int(queue deviceQueue) {
+  const size_t N = 1;
+  const T Init{10};
+  std::array<T, N> A = {Init};
+  sycl::range<1> numOfItems{N};
+  {
+    sycl::buffer<T, 1> bufferA(A.data(), numOfItems);
+
+    deviceQueue
+        .submit([&](sycl::handler &cgh) {
+          auto accessorA = bufferA.template get_access<sycl_read_write>(cgh);
+
+          auto kern = [=]() {
+            accessorA[0] = sycl::popcount<T>(accessorA[0]);
+          };
+          cgh.single_task<TestInt<T>>(kern);
+        })
+        .wait();
+  }
+  T expected = sycl::popcount(Init);
+  if(!(A[0] == expected)) {
+    return false;
+  }
+ return true; 
+}
+
+int main() {
+  queue q;
+  bool success = true;
+  success &= test<float>(q);
+  success &= test<double>(q);
+  success &= test_int<int>(q);
+  success &= test_int<char>(q);
+
+  if(!success) {
+    std::cout << "Test failed\n";
+    return 1;
+  }
+  std::cout << "Test passed\n";
+  return 0;
+}

From ec01ed21a8e747b7558e4420ba1ba6c4296788db Mon Sep 17 00:00:00 2001
From: "pietro.ghiglio" <pietro.ghiglio@codeplay.com>
Date: Wed, 27 Sep 2023 11:19:28 +0100
Subject: [PATCH 02/11] formatting

---
 .../libspirv/integer/helpers.h                | 16 ++---
 .../libspirv/math/helpers.h                   | 32 ++++-----
 sycl/test/native_cpu/math_builtins.cpp        | 65 ++++++++-----------
 3 files changed, 48 insertions(+), 65 deletions(-)

diff --git a/libclc/x86_64-unknown-linux/libspirv/integer/helpers.h b/libclc/x86_64-unknown-linux/libspirv/integer/helpers.h
index e760e518bf5cc..50e7c39cb3d23 100644
--- a/libclc/x86_64-unknown-linux/libspirv/integer/helpers.h
+++ b/libclc/x86_64-unknown-linux/libspirv/integer/helpers.h
@@ -1,13 +1,9 @@
 #include "func.h"
 
-#define GEN_UNARY_BUILTIN_T(NAME, TYPE) \
-_CLC_OVERLOAD TYPE __##NAME##_helper(TYPE); \
-_CLC_OVERLOAD TYPE __spirv_ocl_##NAME(TYPE n) { \
-  return __##NAME##_helper(n); \
-} 
-
-
-#define GEN_UNARY_BUILTIN(NAME) \
-  GEN_UNARY_BUILTIN_T(NAME, int) \
-  GEN_UNARY_BUILTIN_T(NAME, signed char) 
+#define GEN_UNARY_BUILTIN_T(NAME, TYPE)                                        \
+  _CLC_OVERLOAD TYPE __##NAME##_helper(TYPE);                                  \
+  _CLC_OVERLOAD TYPE __spirv_ocl_##NAME(TYPE n) { return __##NAME##_helper(n); }
 
+#define GEN_UNARY_BUILTIN(NAME)                                                \
+  GEN_UNARY_BUILTIN_T(NAME, int)                                               \
+  GEN_UNARY_BUILTIN_T(NAME, signed char)
diff --git a/libclc/x86_64-unknown-linux/libspirv/math/helpers.h b/libclc/x86_64-unknown-linux/libspirv/math/helpers.h
index 852f2191022cb..1619617a36944 100644
--- a/libclc/x86_64-unknown-linux/libspirv/math/helpers.h
+++ b/libclc/x86_64-unknown-linux/libspirv/math/helpers.h
@@ -1,23 +1,19 @@
 #include "func.h"
 
-#define GEN_UNARY_BUILTIN_T(NAME, TYPE) \
-_CLC_OVERLOAD TYPE __##NAME##_helper(TYPE); \
-_CLC_OVERLOAD TYPE __spirv_ocl_##NAME(TYPE n) { \
-  return __##NAME##_helper(n); \
-} 
+#define GEN_UNARY_BUILTIN_T(NAME, TYPE)                                        \
+  _CLC_OVERLOAD TYPE __##NAME##_helper(TYPE);                                  \
+  _CLC_OVERLOAD TYPE __spirv_ocl_##NAME(TYPE n) { return __##NAME##_helper(n); }
 
-#define GEN_TERNARY_BUILTIN_T(NAME, TYPE) \
-_CLC_OVERLOAD TYPE __##NAME##_helper(TYPE, TYPE, TYPE); \
-_CLC_OVERLOAD TYPE __spirv_ocl_##NAME(TYPE a, TYPE b, TYPE c) { \
-  return __##NAME##_helper(a, b, c); \
-} 
+#define GEN_TERNARY_BUILTIN_T(NAME, TYPE)                                      \
+  _CLC_OVERLOAD TYPE __##NAME##_helper(TYPE, TYPE, TYPE);                      \
+  _CLC_OVERLOAD TYPE __spirv_ocl_##NAME(TYPE a, TYPE b, TYPE c) {              \
+    return __##NAME##_helper(a, b, c);                                         \
+  }
 
+#define GEN_UNARY_BUILTIN(NAME)                                                \
+  GEN_UNARY_BUILTIN_T(NAME, float)                                             \
+  GEN_UNARY_BUILTIN_T(NAME, double)
 
-#define GEN_UNARY_BUILTIN(NAME) \
-  GEN_UNARY_BUILTIN_T(NAME, float) \
-  GEN_UNARY_BUILTIN_T(NAME, double) 
-
-
-#define GEN_TERNARY_BUILTIN(NAME) \
-GEN_TERNARY_BUILTIN_T(NAME, float) \
-GEN_TERNARY_BUILTIN_T(NAME, double) \
+#define GEN_TERNARY_BUILTIN(NAME)                                              \
+  GEN_TERNARY_BUILTIN_T(NAME, float)                                           \
+  GEN_TERNARY_BUILTIN_T(NAME, double)\
diff --git a/sycl/test/native_cpu/math_builtins.cpp b/sycl/test/native_cpu/math_builtins.cpp
index 5b065c3ab4e0f..2eded252b1d19 100644
--- a/sycl/test/native_cpu/math_builtins.cpp
+++ b/sycl/test/native_cpu/math_builtins.cpp
@@ -12,26 +12,22 @@
 using namespace sycl;
 constexpr sycl::access::mode sycl_read_write = sycl::access::mode::read_write;
 
-template <typename T>
-class Test;
+template <typename T> class Test;
 
-template <typename T>
-class TestInt;
+template <typename T> class TestInt;
 
-static constexpr int NumMathBuiltins = 16; 
+static constexpr int NumMathBuiltins = 16;
 static constexpr float eps = 0.01;
 
-template <typename T>
-using ResultT = std::array<T, NumMathBuiltins>;
+template <typename T> using ResultT = std::array<T, NumMathBuiltins>;
 
-template <typename T>
-ResultT<T> do_test(T in) {
+template <typename T> ResultT<T> do_test(T in) {
   ResultT<T> res;
   unsigned i = 0;
   res[i++] = sycl::native::sqrt(in);
   res[i++] = sycl::sqrt(in);
   res[i++] = sycl::fabs(in);
-  res[i++] = sycl::fma(in,in,in);
+  res[i++] = sycl::fma(in, in, in);
   res[i++] = sycl::trunc(in);
   res[i++] = sycl::rint(in);
   res[i++] = sycl::round(in);
@@ -47,28 +43,27 @@ ResultT<T> do_test(T in) {
   return res;
 }
 
-template <typename T>
-bool check(T& res, T& exp) {
-  bool correct = std::abs(static_cast<float>(res) - static_cast<float>(exp)) < eps;
-  if(!correct) {
-    std::cout << "Value mismatch; Expected: " << exp << " actual: " << res << "\n";
+template <typename T> bool check(T &res, T &exp) {
+  bool correct =
+      std::abs(static_cast<float>(res) - static_cast<float>(exp)) < eps;
+  if (!correct) {
+    std::cout << "Value mismatch; Expected: " << exp << " actual: " << res
+              << "\n";
     return false;
   }
   return true;
 }
 
 template <typename T, int N>
-bool check(sycl::vec<T,N>& res, sycl::vec<T, N>& exp) {
+bool check(sycl::vec<T, N> &res, sycl::vec<T, N> &exp) {
   bool correct = true;
-  for(int i = 0; i < N; i++) {
+  for (int i = 0; i < N; i++) {
     correct &= check(res[i], exp[i]);
   }
   return correct;
 }
 
-
-template<typename T>
-bool test(queue deviceQueue) {
+template <typename T> bool test(queue deviceQueue) {
   const size_t N = 1;
   const T Init{1};
   std::array<T, N> A = {Init};
@@ -81,26 +76,24 @@ bool test(queue deviceQueue) {
     deviceQueue
         .submit([&](sycl::handler &cgh) {
           auto accessorA = bufferA.template get_access<sycl_read_write>(cgh);
-          auto accessorRes = bufferRes.template get_access<sycl_read_write>(cgh);
+          auto accessorRes =
+              bufferRes.template get_access<sycl_read_write>(cgh);
 
-          auto kern = [=]() {
-            accessorRes[0] = do_test(accessorA[0]);
-          };
+          auto kern = [=]() { accessorRes[0] = do_test(accessorA[0]); };
           cgh.single_task<Test<T>>(kern);
         })
         .wait();
   }
   ResultT<T> expected = do_test(Init);
-  for(int i = 0; i < NumMathBuiltins; i++) {
-     if(!check(Res[0][i], expected[i])) {
-       return false;
-     }
+  for (int i = 0; i < NumMathBuiltins; i++) {
+    if (!check(Res[0][i], expected[i])) {
+      return false;
+    }
   }
- return true; 
+  return true;
 }
 
-template <typename T>
-bool test_int(queue deviceQueue) {
+template <typename T> bool test_int(queue deviceQueue) {
   const size_t N = 1;
   const T Init{10};
   std::array<T, N> A = {Init};
@@ -112,18 +105,16 @@ bool test_int(queue deviceQueue) {
         .submit([&](sycl::handler &cgh) {
           auto accessorA = bufferA.template get_access<sycl_read_write>(cgh);
 
-          auto kern = [=]() {
-            accessorA[0] = sycl::popcount<T>(accessorA[0]);
-          };
+          auto kern = [=]() { accessorA[0] = sycl::popcount<T>(accessorA[0]); };
           cgh.single_task<TestInt<T>>(kern);
         })
         .wait();
   }
   T expected = sycl::popcount(Init);
-  if(!(A[0] == expected)) {
+  if (!(A[0] == expected)) {
     return false;
   }
- return true; 
+  return true;
 }
 
 int main() {
@@ -134,7 +125,7 @@ int main() {
   success &= test_int<int>(q);
   success &= test_int<char>(q);
 
-  if(!success) {
+  if (!success) {
     std::cout << "Test failed\n";
     return 1;
   }

From 908425c4381c99f9d43341ee4a0a5e2fadfb927e Mon Sep 17 00:00:00 2001
From: "pietro.ghiglio" <pietro.ghiglio@codeplay.com>
Date: Mon, 2 Oct 2023 14:21:30 +0100
Subject: [PATCH 03/11] use clang builtins

---
 .../libspirv/math/fabs.cl                     |   1 +
 .../libspirv/math/helpers.h                   |  53 +++-
 .../libspirv/math/native_cos.cl               |   3 +-
 .../libspirv/math/native_exp.cl               |   3 +-
 .../libspirv/math/native_exp2.cl              |   3 +-
 .../libspirv/math/native_log.cl               |   3 +-
 .../libspirv/math/native_log10.cl             |   3 +-
 .../libspirv/math/native_log2.cl              |   3 +-
 .../libspirv/math/native_sin.cl               |   3 +-
 .../libspirv/math/native_sqrt.cl              |   3 +-
 .../libspirv/shared/helpers.ll                | 253 ------------------
 sycl/test/native_cpu/math_builtins.cpp        | 101 +++++--
 12 files changed, 151 insertions(+), 281 deletions(-)

diff --git a/libclc/x86_64-unknown-linux/libspirv/math/fabs.cl b/libclc/x86_64-unknown-linux/libspirv/math/fabs.cl
index 73f91386ff951..878d9bbc97c7f 100644
--- a/libclc/x86_64-unknown-linux/libspirv/math/fabs.cl
+++ b/libclc/x86_64-unknown-linux/libspirv/math/fabs.cl
@@ -1,3 +1,4 @@
+#define IS_FABS
 #include "helpers.h"
 
 GEN_UNARY_BUILTIN(fabs)
diff --git a/libclc/x86_64-unknown-linux/libspirv/math/helpers.h b/libclc/x86_64-unknown-linux/libspirv/math/helpers.h
index 1619617a36944..0e3b294128aae 100644
--- a/libclc/x86_64-unknown-linux/libspirv/math/helpers.h
+++ b/libclc/x86_64-unknown-linux/libspirv/math/helpers.h
@@ -1,4 +1,8 @@
 #include "func.h"
+#include "types.h"
+
+
+#ifdef NO_CLANG_BUILTINS
 
 #define GEN_UNARY_BUILTIN_T(NAME, TYPE)                                        \
   _CLC_OVERLOAD TYPE __##NAME##_helper(TYPE);                                  \
@@ -9,7 +13,6 @@
   _CLC_OVERLOAD TYPE __spirv_ocl_##NAME(TYPE a, TYPE b, TYPE c) {              \
     return __##NAME##_helper(a, b, c);                                         \
   }
-
 #define GEN_UNARY_BUILTIN(NAME)                                                \
   GEN_UNARY_BUILTIN_T(NAME, float)                                             \
   GEN_UNARY_BUILTIN_T(NAME, double)
@@ -17,3 +20,51 @@
 #define GEN_TERNARY_BUILTIN(NAME)                                              \
   GEN_TERNARY_BUILTIN_T(NAME, float)                                           \
   GEN_TERNARY_BUILTIN_T(NAME, double)\
+
+#else
+
+#ifndef IS_NATIVE
+#define GETNAME(ID) __spirv_ocl_##ID
+#else
+#define GETNAME(ID) __spirv_ocl_native_##ID
+#endif
+
+// Todo: fabs is the only builtin whose vector version is not named __builtin_elementwise_##NAME
+#ifndef IS_FABS
+#define GEN_UNARY_VECTOR_BUILTIN(NAME, TYPE, NUM) \
+  _CLC_OVERLOAD TYPE##NUM GETNAME(NAME)(TYPE##NUM n) { return __builtin_elementwise_##NAME(n); } 
+#else
+#define GEN_UNARY_VECTOR_BUILTIN(NAME, TYPE, NUM) \
+  _CLC_OVERLOAD TYPE##NUM GETNAME(NAME)(TYPE##NUM n) { return __builtin_elementwise_abs(n); } 
+#endif
+
+#define GEN_UNARY_VECTOR_BUILTIN_T(NAME, TYPE)  \
+  GEN_UNARY_VECTOR_BUILTIN(NAME, TYPE, 2) \
+  GEN_UNARY_VECTOR_BUILTIN(NAME, TYPE, 3) \
+  GEN_UNARY_VECTOR_BUILTIN(NAME, TYPE, 4) \
+  GEN_UNARY_VECTOR_BUILTIN(NAME, TYPE, 8) \
+  GEN_UNARY_VECTOR_BUILTIN(NAME, TYPE, 16) 
+
+#define GEN_UNARY_BUILTIN(NAME)                                                \
+  _CLC_OVERLOAD float GETNAME(NAME)(float n) { return __builtin_##NAME##f(n); } \
+  _CLC_OVERLOAD double GETNAME(NAME)(double n) { return __builtin_##NAME(n); } \
+  GEN_UNARY_VECTOR_BUILTIN_T(NAME, float) \
+  GEN_UNARY_VECTOR_BUILTIN_T(NAME, double) 
+
+#define GEN_TERNARY_VECTOR_BUILTIN(NAME, TYPE, NUM) \
+  _CLC_OVERLOAD TYPE##NUM GETNAME(NAME)(TYPE##NUM n1, TYPE##NUM n2, TYPE##NUM n3) { return __builtin_elementwise_##NAME(n1, n2, n3); } 
+
+#define GEN_TERNARY_VECTOR_BUILTIN_T(NAME, TYPE)  \
+  GEN_TERNARY_VECTOR_BUILTIN(NAME, TYPE, 2) \
+  GEN_TERNARY_VECTOR_BUILTIN(NAME, TYPE, 3) \
+  GEN_TERNARY_VECTOR_BUILTIN(NAME, TYPE, 4) \
+  GEN_TERNARY_VECTOR_BUILTIN(NAME, TYPE, 8) \
+  GEN_TERNARY_VECTOR_BUILTIN(NAME, TYPE, 16) 
+
+#define GEN_TERNARY_BUILTIN(NAME)                                              \
+  _CLC_OVERLOAD float GETNAME(NAME)(float n1, float n2, float n3) { return __builtin_##NAME##f(n1, n2, n3); } \
+  _CLC_OVERLOAD double GETNAME(NAME)(double n1, double n2, double n3) { return __builtin_##NAME(n1, n2, n3); } \
+  GEN_TERNARY_VECTOR_BUILTIN_T(NAME, float) \
+  GEN_TERNARY_VECTOR_BUILTIN_T(NAME, double) 
+#endif
+
diff --git a/libclc/x86_64-unknown-linux/libspirv/math/native_cos.cl b/libclc/x86_64-unknown-linux/libspirv/math/native_cos.cl
index bf314782388c4..42ec1eb7ac6fd 100644
--- a/libclc/x86_64-unknown-linux/libspirv/math/native_cos.cl
+++ b/libclc/x86_64-unknown-linux/libspirv/math/native_cos.cl
@@ -1,3 +1,4 @@
+#define IS_NATIVE
 #include "helpers.h"
 
-GEN_UNARY_BUILTIN(native_cos)
+GEN_UNARY_BUILTIN(cos)
diff --git a/libclc/x86_64-unknown-linux/libspirv/math/native_exp.cl b/libclc/x86_64-unknown-linux/libspirv/math/native_exp.cl
index ef26cad9d2f3a..2e0abd4c58114 100644
--- a/libclc/x86_64-unknown-linux/libspirv/math/native_exp.cl
+++ b/libclc/x86_64-unknown-linux/libspirv/math/native_exp.cl
@@ -1,3 +1,4 @@
+#define IS_NATIVE 
 #include "helpers.h"
 
-GEN_UNARY_BUILTIN(native_exp)
+GEN_UNARY_BUILTIN(exp)
diff --git a/libclc/x86_64-unknown-linux/libspirv/math/native_exp2.cl b/libclc/x86_64-unknown-linux/libspirv/math/native_exp2.cl
index ddbe490e13800..69f6eb8467d24 100644
--- a/libclc/x86_64-unknown-linux/libspirv/math/native_exp2.cl
+++ b/libclc/x86_64-unknown-linux/libspirv/math/native_exp2.cl
@@ -1,3 +1,4 @@
+#define IS_NATIVE
 #include "helpers.h"
 
-GEN_UNARY_BUILTIN(native_exp2)
+GEN_UNARY_BUILTIN(exp2)
diff --git a/libclc/x86_64-unknown-linux/libspirv/math/native_log.cl b/libclc/x86_64-unknown-linux/libspirv/math/native_log.cl
index ec298f50d0eb2..d86a3fa492cac 100644
--- a/libclc/x86_64-unknown-linux/libspirv/math/native_log.cl
+++ b/libclc/x86_64-unknown-linux/libspirv/math/native_log.cl
@@ -1,3 +1,4 @@
+#define IS_NATIVE
 #include "helpers.h"
 
-GEN_UNARY_BUILTIN(native_log)
+GEN_UNARY_BUILTIN(log)
diff --git a/libclc/x86_64-unknown-linux/libspirv/math/native_log10.cl b/libclc/x86_64-unknown-linux/libspirv/math/native_log10.cl
index a97e14a53c9e2..f4c14348a2f4a 100644
--- a/libclc/x86_64-unknown-linux/libspirv/math/native_log10.cl
+++ b/libclc/x86_64-unknown-linux/libspirv/math/native_log10.cl
@@ -1,4 +1,5 @@
+#define IS_NATIVE
 #include "helpers.h"
 
-GEN_UNARY_BUILTIN(native_log10)
+GEN_UNARY_BUILTIN(log10)
 
diff --git a/libclc/x86_64-unknown-linux/libspirv/math/native_log2.cl b/libclc/x86_64-unknown-linux/libspirv/math/native_log2.cl
index a90a925b036d5..b4fdc6017b27d 100644
--- a/libclc/x86_64-unknown-linux/libspirv/math/native_log2.cl
+++ b/libclc/x86_64-unknown-linux/libspirv/math/native_log2.cl
@@ -1,4 +1,5 @@
+#define IS_NATIVE
 #include "helpers.h"
 
-GEN_UNARY_BUILTIN(native_log2)
+GEN_UNARY_BUILTIN(log2)
 
diff --git a/libclc/x86_64-unknown-linux/libspirv/math/native_sin.cl b/libclc/x86_64-unknown-linux/libspirv/math/native_sin.cl
index 2c63c1c250b4d..1967988781a01 100644
--- a/libclc/x86_64-unknown-linux/libspirv/math/native_sin.cl
+++ b/libclc/x86_64-unknown-linux/libspirv/math/native_sin.cl
@@ -1,4 +1,5 @@
+#define IS_NATIVE
 #include "helpers.h"
 
-GEN_UNARY_BUILTIN(native_sin)
+GEN_UNARY_BUILTIN(sin)
 
diff --git a/libclc/x86_64-unknown-linux/libspirv/math/native_sqrt.cl b/libclc/x86_64-unknown-linux/libspirv/math/native_sqrt.cl
index fe26ec8d9ef6f..c1b9f041b1e43 100644
--- a/libclc/x86_64-unknown-linux/libspirv/math/native_sqrt.cl
+++ b/libclc/x86_64-unknown-linux/libspirv/math/native_sqrt.cl
@@ -1,3 +1,4 @@
+#define IS_NATIVE
 #include "helpers.h"
 
-GEN_UNARY_BUILTIN(native_sqrt)
+GEN_UNARY_BUILTIN(sqrt)
diff --git a/libclc/x86_64-unknown-linux/libspirv/shared/helpers.ll b/libclc/x86_64-unknown-linux/libspirv/shared/helpers.ll
index c19c38634c4b6..b3d7d5e2daa9f 100644
--- a/libclc/x86_64-unknown-linux/libspirv/shared/helpers.ll
+++ b/libclc/x86_64-unknown-linux/libspirv/shared/helpers.ll
@@ -1,259 +1,6 @@
-declare float @llvm.sqrt.f32(float %n)
-declare double @llvm.sqrt.f64(double %n)
-declare float @llvm.fabs.f32(float %n)
-declare double @llvm.fabs.f64(double %n)
-declare float @llvm.trunc.f32(float %n)
-declare double @llvm.trunc.f64(double %n)
-declare float @llvm.ceil.f32(float %n)
-declare double @llvm.ceil.f64(double %n)
-declare float @llvm.floor.f32(float %n)
-declare double @llvm.floor.f64(double %n)
-declare float @llvm.round.f32(float %n)
-declare double @llvm.round.f64(double %n)
-declare float @llvm.rint.f32(float %n)
-declare double @llvm.rint.f64(double %n)
-declare float @llvm.cos.f32(float %n)
-declare double @llvm.cos.f64(double %n)
-declare float @llvm.sin.f32(float %n)
-declare double @llvm.sin.f64(double %n)
-declare float @llvm.exp2.f32(float %n)
-declare double @llvm.exp2.f64(double %n)
-declare float @llvm.exp.f32(float %n)
-declare double @llvm.exp.f64(double %n)
-declare float @llvm.log10.f32(float %n)
-declare double @llvm.log10.f64(double %n)
-declare float @llvm.log.f32(float %n)
-declare double @llvm.log.f64(double %n)
-declare float @llvm.log2.f32(float %n)
-declare double @llvm.log2.f64(double %n)
-declare float @llvm.fma.f32(float %n1, float %n2, float %n3)
-declare double @llvm.fma.f64(double %n1, double %n2, double %n3)
 declare i32 @llvm.ctpop.i32(i32 %n)
 declare i8 @llvm.ctpop.i8(i8 %n)
 
-define dso_local float @_Z13__sqrt_helperf(float %x) {
-entry:
-  %call = call float @llvm.sqrt.f32(float %x) 
-  ret float %call
-}
-
-
-define dso_local double @_Z13__sqrt_helperd(double %x) {
-entry:
-  %call = call double @llvm.sqrt.f64(double %x) 
-  ret double %call
-}
-
-
-define dso_local float @_Z13__fabs_helperf(float %x) {
-entry:
-  %call = call float @llvm.fabs.f32(float %x) 
-  ret float %call
-}
-
-
-define dso_local double @_Z13__fabs_helperd(double %x) {
-entry:
-  %call = call double @llvm.fabs.f64(double %x) 
-  ret double %call
-}
-
-
-define dso_local float @_Z14__trunc_helperf(float %x) {
-entry:
-  %call = call float @llvm.trunc.f32(float %x) 
-  ret float %call
-}
-
-
-define dso_local double @_Z14__trunc_helperd(double %x) {
-entry:
-  %call = call double @llvm.trunc.f64(double %x) 
-  ret double %call
-}
-
-
-define dso_local float @_Z13__ceil_helperf(float %x) {
-entry:
-  %call = call float @llvm.ceil.f32(float %x) 
-  ret float %call
-}
-
-
-define dso_local double @_Z13__ceil_helperd(double %x) {
-entry:
-  %call = call double @llvm.ceil.f64(double %x) 
-  ret double %call
-}
-
-
-define dso_local float @_Z14__floor_helperf(float %x) {
-entry:
-  %call = call float @llvm.floor.f32(float %x) 
-  ret float %call
-}
-
-
-define dso_local double @_Z14__floor_helperd(double %x) {
-entry:
-  %call = call double @llvm.floor.f64(double %x) 
-  ret double %call
-}
-
-
-define dso_local float @_Z14__round_helperf(float %x) {
-entry:
-  %call = call float @llvm.round.f32(float %x) 
-  ret float %call
-}
-
-
-define dso_local double @_Z14__round_helperd(double %x) {
-entry:
-  %call = call double @llvm.round.f64(double %x) 
-  ret double %call
-}
-
-
-define dso_local float @_Z13__rint_helperf(float %x) {
-entry:
-  %call = call float @llvm.rint.f32(float %x) 
-  ret float %call
-}
-
-
-define dso_local double @_Z13__rint_helperd(double %x) {
-entry:
-  %call = call double @llvm.rint.f64(double %x) 
-  ret double %call
-}
-
-
-define dso_local float @_Z20__native_sqrt_helperf(float %x) {
-entry:
-  %call = call float @llvm.sqrt.f32(float %x) 
-  ret float %call
-}
-
-
-define dso_local double @_Z20__native_sqrt_helperd(double %x) {
-entry:
-  %call = call double @llvm.sqrt.f64(double %x) 
-  ret double %call
-}
-
-
-define dso_local float @_Z19__native_cos_helperf(float %x) {
-entry:
-  %call = call float @llvm.cos.f32(float %x) 
-  ret float %call
-}
-
-
-define dso_local double @_Z19__native_cos_helperd(double %x) {
-entry:
-  %call = call double @llvm.cos.f64(double %x) 
-  ret double %call
-}
-
-
-define dso_local float @_Z19__native_sin_helperf(float %x) {
-entry:
-  %call = call float @llvm.sin.f32(float %x) 
-  ret float %call
-}
-
-
-define dso_local double @_Z19__native_sin_helperd(double %x) {
-entry:
-  %call = call double @llvm.sin.f64(double %x) 
-  ret double %call
-}
-
-
-define dso_local float @_Z20__native_exp2_helperf(float %x) {
-entry:
-  %call = call float @llvm.exp2.f32(float %x) 
-  ret float %call
-}
-
-
-define dso_local double @_Z20__native_exp2_helperd(double %x) {
-entry:
-  %call = call double @llvm.exp2.f64(double %x) 
-  ret double %call
-}
-
-
-define dso_local float @_Z19__native_exp_helperf(float %x) {
-entry:
-  %call = call float @llvm.exp.f32(float %x) 
-  ret float %call
-}
-
-
-define dso_local double @_Z19__native_exp_helperd(double %x) {
-entry:
-  %call = call double @llvm.exp.f64(double %x) 
-  ret double %call
-}
-
-
-define dso_local float @_Z21__native_log10_helperf(float %x) {
-entry:
-  %call = call float @llvm.log10.f32(float %x) 
-  ret float %call
-}
-
-
-define dso_local double @_Z21__native_log10_helperd(double %x) {
-entry:
-  %call = call double @llvm.log10.f64(double %x) 
-  ret double %call
-}
-
-
-define dso_local float @_Z19__native_log_helperf(float %x) {
-entry:
-  %call = call float @llvm.log.f32(float %x) 
-  ret float %call
-}
-
-
-define dso_local double @_Z19__native_log_helperd(double %x) {
-entry:
-  %call = call double @llvm.log.f64(double %x) 
-  ret double %call
-}
-
-
-define dso_local float @_Z20__native_log2_helperf(float %x) {
-entry:
-  %call = call float @llvm.log2.f32(float %x) 
-  ret float %call
-}
-
-
-define dso_local double @_Z20__native_log2_helperd(double %x) {
-entry:
-  %call = call double @llvm.log2.f64(double %x) 
-  ret double %call
-}
-
-
-define dso_local float @_Z12__fma_helperfff(float %a, float %b, float %c) {
-entry:
-  %call = call float @llvm.fma.f32(float %a, float %b, float %c) 
-  ret float %call
-}
-
-
-define dso_local double @_Z12__fma_helperddd(double %a, double %b, double %c) {
-entry:
-  %call = call double @llvm.fma.f64(double %a, double %b, double %c) 
-  ret double %call
-}
-
 
 define dso_local i32 @_Z17__popcount_helperi(i32 %x) {
 entry:
diff --git a/sycl/test/native_cpu/math_builtins.cpp b/sycl/test/native_cpu/math_builtins.cpp
index 2eded252b1d19..76b7eb3a47883 100644
--- a/sycl/test/native_cpu/math_builtins.cpp
+++ b/sycl/test/native_cpu/math_builtins.cpp
@@ -2,8 +2,6 @@
 // RUN: %clangxx -fsycl -fsycl-targets=native_cpu %s -o %t -g
 // RUN: env ONEAPI_DEVICE_SELECTOR="native_cpu:cpu" %t
 
-#include "sycl/builtins_marray_gen.hpp"
-#include "sycl/builtins_vector_gen.hpp"
 #include <sycl/sycl.hpp>
 
 #include <array>
@@ -16,15 +14,30 @@ template <typename T> class Test;
 
 template <typename T> class TestInt;
 
-static constexpr int NumMathBuiltins = 16;
+static constexpr int NumMathBuiltins = 8;
+static constexpr int NumNativeBuiltins = 8;
 static constexpr float eps = 0.01;
 
-template <typename T> using ResultT = std::array<T, NumMathBuiltins>;
+template <typename T> using MathResultT = std::array<T, NumMathBuiltins>;
+template <typename T> using NativeResultT = std::array<T, NumMathBuiltins>;
 
-template <typename T> ResultT<T> do_test(T in) {
-  ResultT<T> res;
+template <typename T> NativeResultT<T> do_test_native(T in) {
+  NativeResultT<T> res;
   unsigned i = 0;
   res[i++] = sycl::native::sqrt(in);
+  res[i++] = sycl::native::cos(in);
+  res[i++] = sycl::native::sin(in);
+  res[i++] = sycl::native::exp2(in);
+  res[i++] = sycl::native::exp(in);
+  res[i++] = sycl::native::log10(in);
+  res[i++] = sycl::native::log(in);
+  res[i++] = sycl::native::log2(in);
+  return res;
+}
+
+template <typename T> MathResultT<T> do_test_math(T in) {
+  NativeResultT<T> res;
+  unsigned i = 0;
   res[i++] = sycl::sqrt(in);
   res[i++] = sycl::fabs(in);
   res[i++] = sycl::fma(in, in, in);
@@ -33,13 +46,6 @@ template <typename T> ResultT<T> do_test(T in) {
   res[i++] = sycl::round(in);
   res[i++] = sycl::ceil(in);
   res[i++] = sycl::floor(in);
-  res[i++] = sycl::native::cos(in);
-  res[i++] = sycl::native::sin(in);
-  res[i++] = sycl::native::exp2(in);
-  res[i++] = sycl::native::exp(in);
-  res[i++] = sycl::native::log10(in);
-  res[i++] = sycl::native::log(in);
-  res[i++] = sycl::native::log2(in);
   return res;
 }
 
@@ -63,15 +69,45 @@ bool check(sycl::vec<T, N> &res, sycl::vec<T, N> &exp) {
   return correct;
 }
 
-template <typename T> bool test(queue deviceQueue) {
+template <typename T> bool test_native(queue deviceQueue) {
+  const size_t N = 1;
+  const T Init{1};
+  std::array<T, N> A = {Init};
+  std::array<NativeResultT<T>, 1> Res;
+  sycl::range<1> numOfItems{N};
+  {
+    sycl::buffer<T, 1> bufferA(A.data(), numOfItems);
+    sycl::buffer<NativeResultT<T>, 1> bufferRes(Res.data(), numOfItems);
+
+    deviceQueue
+        .submit([&](sycl::handler &cgh) {
+          auto accessorA = bufferA.template get_access<sycl_read_write>(cgh);
+          auto accessorRes =
+              bufferRes.template get_access<sycl_read_write>(cgh);
+
+          auto kern = [=]() { accessorRes[0] = do_test_native(accessorA[0]); };
+          cgh.single_task(kern);
+        })
+        .wait();
+  }
+  NativeResultT<T> expected = do_test_native(Init);
+  for (int i = 0; i < NumNativeBuiltins; i++) {
+    if (!check(Res[0][i], expected[i])) {
+      return false;
+    }
+  }
+  return true;
+}
+
+template <typename T> bool test_math(queue deviceQueue) {
   const size_t N = 1;
   const T Init{1};
   std::array<T, N> A = {Init};
-  std::array<ResultT<T>, 1> Res;
+  std::array<MathResultT<T>, 1> Res;
   sycl::range<1> numOfItems{N};
   {
     sycl::buffer<T, 1> bufferA(A.data(), numOfItems);
-    sycl::buffer<ResultT<T>, 1> bufferRes(Res.data(), numOfItems);
+    sycl::buffer<MathResultT<T>, 1> bufferRes(Res.data(), numOfItems);
 
     deviceQueue
         .submit([&](sycl::handler &cgh) {
@@ -79,12 +115,12 @@ template <typename T> bool test(queue deviceQueue) {
           auto accessorRes =
               bufferRes.template get_access<sycl_read_write>(cgh);
 
-          auto kern = [=]() { accessorRes[0] = do_test(accessorA[0]); };
-          cgh.single_task<Test<T>>(kern);
+          auto kern = [=]() { accessorRes[0] = do_test_math(accessorA[0]); };
+          cgh.single_task(kern);
         })
         .wait();
   }
-  ResultT<T> expected = do_test(Init);
+  MathResultT<T> expected = do_test_math(Init);
   for (int i = 0; i < NumMathBuiltins; i++) {
     if (!check(Res[0][i], expected[i])) {
       return false;
@@ -117,6 +153,33 @@ template <typename T> bool test_int(queue deviceQueue) {
   return true;
 }
 
+template <typename T>
+bool test_vec(queue q) {
+  bool success = true;
+  success &= test_math<sycl::vec<T,2>>(q);
+  if constexpr (std::is_same<T, float>::value) {
+    // these fail on double with wrong values
+    success &= test_math<sycl::vec<T,3>>(q);
+    success &= test_math<sycl::vec<T,4>>(q);
+
+
+    success &= test_native<sycl::vec<T,2>>(q);
+    success &= test_native<sycl::vec<T,3>>(q);
+    success &= test_native<sycl::vec<T,4>>(q);
+  }
+  // vector sizes greater than 4 are currently unsupported
+  return success;
+}
+
+template <typename T>
+bool test(queue q) {
+  bool success = true;
+  success &= test_math<T>(q);
+  success &= test_native<T>(q);
+  success &= test_vec<T>(q);
+  return success;
+}
+
 int main() {
   queue q;
   bool success = true;

From c9d1bda5a812ea3b130c5571c61c363205ad72cd Mon Sep 17 00:00:00 2001
From: "pietro.ghiglio" <pietro.ghiglio@codeplay.com>
Date: Mon, 2 Oct 2023 14:48:18 +0100
Subject: [PATCH 04/11] formatting

---
 .../libspirv/math/helpers.h                   | 70 +++++++++++--------
 sycl/test/native_cpu/math_builtins.cpp        | 19 +++--
 2 files changed, 49 insertions(+), 40 deletions(-)

diff --git a/libclc/x86_64-unknown-linux/libspirv/math/helpers.h b/libclc/x86_64-unknown-linux/libspirv/math/helpers.h
index 0e3b294128aae..0178a74ad6c96 100644
--- a/libclc/x86_64-unknown-linux/libspirv/math/helpers.h
+++ b/libclc/x86_64-unknown-linux/libspirv/math/helpers.h
@@ -1,7 +1,6 @@
 #include "func.h"
 #include "types.h"
 
-
 #ifdef NO_CLANG_BUILTINS
 
 #define GEN_UNARY_BUILTIN_T(NAME, TYPE)                                        \
@@ -19,7 +18,7 @@
 
 #define GEN_TERNARY_BUILTIN(NAME)                                              \
   GEN_TERNARY_BUILTIN_T(NAME, float)                                           \
-  GEN_TERNARY_BUILTIN_T(NAME, double)\
+  GEN_TERNARY_BUILTIN_T(NAME, double)
 
 #else
 
@@ -29,42 +28,55 @@
 #define GETNAME(ID) __spirv_ocl_native_##ID
 #endif
 
-// Todo: fabs is the only builtin whose vector version is not named __builtin_elementwise_##NAME
+// Todo: fabs is the only builtin whose vector version is not named
+// __builtin_elementwise_##NAME
 #ifndef IS_FABS
-#define GEN_UNARY_VECTOR_BUILTIN(NAME, TYPE, NUM) \
-  _CLC_OVERLOAD TYPE##NUM GETNAME(NAME)(TYPE##NUM n) { return __builtin_elementwise_##NAME(n); } 
+#define GEN_UNARY_VECTOR_BUILTIN(NAME, TYPE, NUM)                              \
+  _CLC_OVERLOAD TYPE##NUM GETNAME(NAME)(TYPE##NUM n) {                         \
+    return __builtin_elementwise_##NAME(n);                                    \
+  }
 #else
-#define GEN_UNARY_VECTOR_BUILTIN(NAME, TYPE, NUM) \
-  _CLC_OVERLOAD TYPE##NUM GETNAME(NAME)(TYPE##NUM n) { return __builtin_elementwise_abs(n); } 
+#define GEN_UNARY_VECTOR_BUILTIN(NAME, TYPE, NUM)                              \
+  _CLC_OVERLOAD TYPE##NUM GETNAME(NAME)(TYPE##NUM n) {                         \
+    return __builtin_elementwise_abs(n);                                       \
+  }
 #endif
 
-#define GEN_UNARY_VECTOR_BUILTIN_T(NAME, TYPE)  \
-  GEN_UNARY_VECTOR_BUILTIN(NAME, TYPE, 2) \
-  GEN_UNARY_VECTOR_BUILTIN(NAME, TYPE, 3) \
-  GEN_UNARY_VECTOR_BUILTIN(NAME, TYPE, 4) \
-  GEN_UNARY_VECTOR_BUILTIN(NAME, TYPE, 8) \
-  GEN_UNARY_VECTOR_BUILTIN(NAME, TYPE, 16) 
+#define GEN_UNARY_VECTOR_BUILTIN_T(NAME, TYPE)                                 \
+  GEN_UNARY_VECTOR_BUILTIN(NAME, TYPE, 2)                                      \
+  GEN_UNARY_VECTOR_BUILTIN(NAME, TYPE, 3)                                      \
+  GEN_UNARY_VECTOR_BUILTIN(NAME, TYPE, 4)                                      \
+  GEN_UNARY_VECTOR_BUILTIN(NAME, TYPE, 8)                                      \
+  GEN_UNARY_VECTOR_BUILTIN(NAME, TYPE, 16)
 
 #define GEN_UNARY_BUILTIN(NAME)                                                \
-  _CLC_OVERLOAD float GETNAME(NAME)(float n) { return __builtin_##NAME##f(n); } \
+  _CLC_OVERLOAD float GETNAME(NAME)(float n) {                                 \
+    return __builtin_##NAME##f(n);                                             \
+  }                                                                            \
   _CLC_OVERLOAD double GETNAME(NAME)(double n) { return __builtin_##NAME(n); } \
-  GEN_UNARY_VECTOR_BUILTIN_T(NAME, float) \
-  GEN_UNARY_VECTOR_BUILTIN_T(NAME, double) 
+  GEN_UNARY_VECTOR_BUILTIN_T(NAME, float)                                      \
+  GEN_UNARY_VECTOR_BUILTIN_T(NAME, double)
 
-#define GEN_TERNARY_VECTOR_BUILTIN(NAME, TYPE, NUM) \
-  _CLC_OVERLOAD TYPE##NUM GETNAME(NAME)(TYPE##NUM n1, TYPE##NUM n2, TYPE##NUM n3) { return __builtin_elementwise_##NAME(n1, n2, n3); } 
+#define GEN_TERNARY_VECTOR_BUILTIN(NAME, TYPE, NUM)                            \
+  _CLC_OVERLOAD TYPE##NUM GETNAME(NAME)(TYPE##NUM n1, TYPE##NUM n2,            \
+                                        TYPE##NUM n3) {                        \
+    return __builtin_elementwise_##NAME(n1, n2, n3);                           \
+  }
 
-#define GEN_TERNARY_VECTOR_BUILTIN_T(NAME, TYPE)  \
-  GEN_TERNARY_VECTOR_BUILTIN(NAME, TYPE, 2) \
-  GEN_TERNARY_VECTOR_BUILTIN(NAME, TYPE, 3) \
-  GEN_TERNARY_VECTOR_BUILTIN(NAME, TYPE, 4) \
-  GEN_TERNARY_VECTOR_BUILTIN(NAME, TYPE, 8) \
-  GEN_TERNARY_VECTOR_BUILTIN(NAME, TYPE, 16) 
+#define GEN_TERNARY_VECTOR_BUILTIN_T(NAME, TYPE)                               \
+  GEN_TERNARY_VECTOR_BUILTIN(NAME, TYPE, 2)                                    \
+  GEN_TERNARY_VECTOR_BUILTIN(NAME, TYPE, 3)                                    \
+  GEN_TERNARY_VECTOR_BUILTIN(NAME, TYPE, 4)                                    \
+  GEN_TERNARY_VECTOR_BUILTIN(NAME, TYPE, 8)                                    \
+  GEN_TERNARY_VECTOR_BUILTIN(NAME, TYPE, 16)
 
 #define GEN_TERNARY_BUILTIN(NAME)                                              \
-  _CLC_OVERLOAD float GETNAME(NAME)(float n1, float n2, float n3) { return __builtin_##NAME##f(n1, n2, n3); } \
-  _CLC_OVERLOAD double GETNAME(NAME)(double n1, double n2, double n3) { return __builtin_##NAME(n1, n2, n3); } \
-  GEN_TERNARY_VECTOR_BUILTIN_T(NAME, float) \
-  GEN_TERNARY_VECTOR_BUILTIN_T(NAME, double) 
+  _CLC_OVERLOAD float GETNAME(NAME)(float n1, float n2, float n3) {            \
+    return __builtin_##NAME##f(n1, n2, n3);                                    \
+  }                                                                            \
+  _CLC_OVERLOAD double GETNAME(NAME)(double n1, double n2, double n3) {        \
+    return __builtin_##NAME(n1, n2, n3);                                       \
+  }                                                                            \
+  GEN_TERNARY_VECTOR_BUILTIN_T(NAME, float)                                    \
+  GEN_TERNARY_VECTOR_BUILTIN_T(NAME, double)
 #endif
-
diff --git a/sycl/test/native_cpu/math_builtins.cpp b/sycl/test/native_cpu/math_builtins.cpp
index 76b7eb3a47883..23ce81ca178c8 100644
--- a/sycl/test/native_cpu/math_builtins.cpp
+++ b/sycl/test/native_cpu/math_builtins.cpp
@@ -153,26 +153,23 @@ template <typename T> bool test_int(queue deviceQueue) {
   return true;
 }
 
-template <typename T>
-bool test_vec(queue q) {
+template <typename T> bool test_vec(queue q) {
   bool success = true;
-  success &= test_math<sycl::vec<T,2>>(q);
+  success &= test_math<sycl::vec<T, 2>>(q);
   if constexpr (std::is_same<T, float>::value) {
     // these fail on double with wrong values
-    success &= test_math<sycl::vec<T,3>>(q);
-    success &= test_math<sycl::vec<T,4>>(q);
+    success &= test_math<sycl::vec<T, 3>>(q);
+    success &= test_math<sycl::vec<T, 4>>(q);
 
-
-    success &= test_native<sycl::vec<T,2>>(q);
-    success &= test_native<sycl::vec<T,3>>(q);
-    success &= test_native<sycl::vec<T,4>>(q);
+    success &= test_native<sycl::vec<T, 2>>(q);
+    success &= test_native<sycl::vec<T, 3>>(q);
+    success &= test_native<sycl::vec<T, 4>>(q);
   }
   // vector sizes greater than 4 are currently unsupported
   return success;
 }
 
-template <typename T>
-bool test(queue q) {
+template <typename T> bool test(queue q) {
   bool success = true;
   success &= test_math<T>(q);
   success &= test_native<T>(q);

From a08c279df839cdeae0616b7092fe7bf0cdb7d677 Mon Sep 17 00:00:00 2001
From: "pietro.ghiglio" <pietro.ghiglio@codeplay.com>
Date: Mon, 9 Oct 2023 09:10:38 +0100
Subject: [PATCH 05/11] formatting

---
 .../SYCLLowerIR/RenameKernelSYCLNativeCPU.cpp | 26 +++++++++----------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/SYCLLowerIR/RenameKernelSYCLNativeCPU.cpp b/llvm/lib/SYCLLowerIR/RenameKernelSYCLNativeCPU.cpp
index 8b6ed1c7c8a83..a35f679227f10 100644
--- a/llvm/lib/SYCLLowerIR/RenameKernelSYCLNativeCPU.cpp
+++ b/llvm/lib/SYCLLowerIR/RenameKernelSYCLNativeCPU.cpp
@@ -45,18 +45,18 @@ RenameKernelSYCLNativeCPUPass::run(Module &M, ModuleAnalysisManager &MAM) {
     }
   }
 
-//<<<<<<< HEAD
-//  for (auto &F : CalledSet) {
-//    auto NewName = sycl::utils::addSYCLNativeCPUSuffix(F->getName());
-//    F->setName(NewName);
-//    auto Comdat = F->getComdat();
-//    if (Comdat) {
-//      auto NewComdat = M.getOrInsertComdat(NewName.str());
-//      F->setComdat(NewComdat);
-//    }
-//    ModuleChanged |= true;
-//  }
-//=======
-//>>>>>>> sycl
+  //<<<<<<< HEAD
+  //  for (auto &F : CalledSet) {
+  //    auto NewName = sycl::utils::addSYCLNativeCPUSuffix(F->getName());
+  //    F->setName(NewName);
+  //    auto Comdat = F->getComdat();
+  //    if (Comdat) {
+  //      auto NewComdat = M.getOrInsertComdat(NewName.str());
+  //      F->setComdat(NewComdat);
+  //    }
+  //    ModuleChanged |= true;
+  //  }
+  //=======
+  //>>>>>>> sycl
   return ModuleChanged ? PreservedAnalyses::none() : PreservedAnalyses::all();
 }

From b8f667aa5f06c367287f8f0610148a15526f2371 Mon Sep 17 00:00:00 2001
From: "pietro.ghiglio" <pietro.ghiglio@codeplay.com>
Date: Mon, 9 Oct 2023 09:53:49 +0100
Subject: [PATCH 06/11] Remove commented code

---
 llvm/lib/SYCLLowerIR/RenameKernelSYCLNativeCPU.cpp | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/llvm/lib/SYCLLowerIR/RenameKernelSYCLNativeCPU.cpp b/llvm/lib/SYCLLowerIR/RenameKernelSYCLNativeCPU.cpp
index a35f679227f10..f9a06457b4eea 100644
--- a/llvm/lib/SYCLLowerIR/RenameKernelSYCLNativeCPU.cpp
+++ b/llvm/lib/SYCLLowerIR/RenameKernelSYCLNativeCPU.cpp
@@ -45,18 +45,5 @@ RenameKernelSYCLNativeCPUPass::run(Module &M, ModuleAnalysisManager &MAM) {
     }
   }
 
-  //<<<<<<< HEAD
-  //  for (auto &F : CalledSet) {
-  //    auto NewName = sycl::utils::addSYCLNativeCPUSuffix(F->getName());
-  //    F->setName(NewName);
-  //    auto Comdat = F->getComdat();
-  //    if (Comdat) {
-  //      auto NewComdat = M.getOrInsertComdat(NewName.str());
-  //      F->setComdat(NewComdat);
-  //    }
-  //    ModuleChanged |= true;
-  //  }
-  //=======
-  //>>>>>>> sycl
   return ModuleChanged ? PreservedAnalyses::none() : PreservedAnalyses::all();
 }

From a9d6d094b7aa138cc8de1a1d514b35ca1133250b Mon Sep 17 00:00:00 2001
From: "pietro.ghiglio" <pietro.ghiglio@codeplay.com>
Date: Mon, 9 Oct 2023 17:06:07 +0100
Subject: [PATCH 07/11] Remove math builtins test

---
 sycl/test/native_cpu/math_builtins.cpp | 194 -------------------------
 1 file changed, 194 deletions(-)
 delete mode 100644 sycl/test/native_cpu/math_builtins.cpp

diff --git a/sycl/test/native_cpu/math_builtins.cpp b/sycl/test/native_cpu/math_builtins.cpp
deleted file mode 100644
index 23ce81ca178c8..0000000000000
--- a/sycl/test/native_cpu/math_builtins.cpp
+++ /dev/null
@@ -1,194 +0,0 @@
-// REQUIRES: native_cpu_be
-// RUN: %clangxx -fsycl -fsycl-targets=native_cpu %s -o %t -g
-// RUN: env ONEAPI_DEVICE_SELECTOR="native_cpu:cpu" %t
-
-#include <sycl/sycl.hpp>
-
-#include <array>
-#include <iostream>
-
-using namespace sycl;
-constexpr sycl::access::mode sycl_read_write = sycl::access::mode::read_write;
-
-template <typename T> class Test;
-
-template <typename T> class TestInt;
-
-static constexpr int NumMathBuiltins = 8;
-static constexpr int NumNativeBuiltins = 8;
-static constexpr float eps = 0.01;
-
-template <typename T> using MathResultT = std::array<T, NumMathBuiltins>;
-template <typename T> using NativeResultT = std::array<T, NumMathBuiltins>;
-
-template <typename T> NativeResultT<T> do_test_native(T in) {
-  NativeResultT<T> res;
-  unsigned i = 0;
-  res[i++] = sycl::native::sqrt(in);
-  res[i++] = sycl::native::cos(in);
-  res[i++] = sycl::native::sin(in);
-  res[i++] = sycl::native::exp2(in);
-  res[i++] = sycl::native::exp(in);
-  res[i++] = sycl::native::log10(in);
-  res[i++] = sycl::native::log(in);
-  res[i++] = sycl::native::log2(in);
-  return res;
-}
-
-template <typename T> MathResultT<T> do_test_math(T in) {
-  NativeResultT<T> res;
-  unsigned i = 0;
-  res[i++] = sycl::sqrt(in);
-  res[i++] = sycl::fabs(in);
-  res[i++] = sycl::fma(in, in, in);
-  res[i++] = sycl::trunc(in);
-  res[i++] = sycl::rint(in);
-  res[i++] = sycl::round(in);
-  res[i++] = sycl::ceil(in);
-  res[i++] = sycl::floor(in);
-  return res;
-}
-
-template <typename T> bool check(T &res, T &exp) {
-  bool correct =
-      std::abs(static_cast<float>(res) - static_cast<float>(exp)) < eps;
-  if (!correct) {
-    std::cout << "Value mismatch; Expected: " << exp << " actual: " << res
-              << "\n";
-    return false;
-  }
-  return true;
-}
-
-template <typename T, int N>
-bool check(sycl::vec<T, N> &res, sycl::vec<T, N> &exp) {
-  bool correct = true;
-  for (int i = 0; i < N; i++) {
-    correct &= check(res[i], exp[i]);
-  }
-  return correct;
-}
-
-template <typename T> bool test_native(queue deviceQueue) {
-  const size_t N = 1;
-  const T Init{1};
-  std::array<T, N> A = {Init};
-  std::array<NativeResultT<T>, 1> Res;
-  sycl::range<1> numOfItems{N};
-  {
-    sycl::buffer<T, 1> bufferA(A.data(), numOfItems);
-    sycl::buffer<NativeResultT<T>, 1> bufferRes(Res.data(), numOfItems);
-
-    deviceQueue
-        .submit([&](sycl::handler &cgh) {
-          auto accessorA = bufferA.template get_access<sycl_read_write>(cgh);
-          auto accessorRes =
-              bufferRes.template get_access<sycl_read_write>(cgh);
-
-          auto kern = [=]() { accessorRes[0] = do_test_native(accessorA[0]); };
-          cgh.single_task(kern);
-        })
-        .wait();
-  }
-  NativeResultT<T> expected = do_test_native(Init);
-  for (int i = 0; i < NumNativeBuiltins; i++) {
-    if (!check(Res[0][i], expected[i])) {
-      return false;
-    }
-  }
-  return true;
-}
-
-template <typename T> bool test_math(queue deviceQueue) {
-  const size_t N = 1;
-  const T Init{1};
-  std::array<T, N> A = {Init};
-  std::array<MathResultT<T>, 1> Res;
-  sycl::range<1> numOfItems{N};
-  {
-    sycl::buffer<T, 1> bufferA(A.data(), numOfItems);
-    sycl::buffer<MathResultT<T>, 1> bufferRes(Res.data(), numOfItems);
-
-    deviceQueue
-        .submit([&](sycl::handler &cgh) {
-          auto accessorA = bufferA.template get_access<sycl_read_write>(cgh);
-          auto accessorRes =
-              bufferRes.template get_access<sycl_read_write>(cgh);
-
-          auto kern = [=]() { accessorRes[0] = do_test_math(accessorA[0]); };
-          cgh.single_task(kern);
-        })
-        .wait();
-  }
-  MathResultT<T> expected = do_test_math(Init);
-  for (int i = 0; i < NumMathBuiltins; i++) {
-    if (!check(Res[0][i], expected[i])) {
-      return false;
-    }
-  }
-  return true;
-}
-
-template <typename T> bool test_int(queue deviceQueue) {
-  const size_t N = 1;
-  const T Init{10};
-  std::array<T, N> A = {Init};
-  sycl::range<1> numOfItems{N};
-  {
-    sycl::buffer<T, 1> bufferA(A.data(), numOfItems);
-
-    deviceQueue
-        .submit([&](sycl::handler &cgh) {
-          auto accessorA = bufferA.template get_access<sycl_read_write>(cgh);
-
-          auto kern = [=]() { accessorA[0] = sycl::popcount<T>(accessorA[0]); };
-          cgh.single_task<TestInt<T>>(kern);
-        })
-        .wait();
-  }
-  T expected = sycl::popcount(Init);
-  if (!(A[0] == expected)) {
-    return false;
-  }
-  return true;
-}
-
-template <typename T> bool test_vec(queue q) {
-  bool success = true;
-  success &= test_math<sycl::vec<T, 2>>(q);
-  if constexpr (std::is_same<T, float>::value) {
-    // these fail on double with wrong values
-    success &= test_math<sycl::vec<T, 3>>(q);
-    success &= test_math<sycl::vec<T, 4>>(q);
-
-    success &= test_native<sycl::vec<T, 2>>(q);
-    success &= test_native<sycl::vec<T, 3>>(q);
-    success &= test_native<sycl::vec<T, 4>>(q);
-  }
-  // vector sizes greater than 4 are currently unsupported
-  return success;
-}
-
-template <typename T> bool test(queue q) {
-  bool success = true;
-  success &= test_math<T>(q);
-  success &= test_native<T>(q);
-  success &= test_vec<T>(q);
-  return success;
-}
-
-int main() {
-  queue q;
-  bool success = true;
-  success &= test<float>(q);
-  success &= test<double>(q);
-  success &= test_int<int>(q);
-  success &= test_int<char>(q);
-
-  if (!success) {
-    std::cout << "Test failed\n";
-    return 1;
-  }
-  std::cout << "Test passed\n";
-  return 0;
-}

From 75564441e2493539c5f897b650af0ef0cbb13387 Mon Sep 17 00:00:00 2001
From: "pietro.ghiglio" <pietro.ghiglio@codeplay.com>
Date: Mon, 9 Oct 2023 17:06:46 +0100
Subject: [PATCH 08/11] Use NativeCPU AS map when building libclc

---
 clang/lib/Basic/TargetInfo.cpp | 6 ++++--
 libclc/CMakeLists.txt          | 3 ++-
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/clang/lib/Basic/TargetInfo.cpp b/clang/lib/Basic/TargetInfo.cpp
index 245af0fd580b6..5c68c6d526cff 100644
--- a/clang/lib/Basic/TargetInfo.cpp
+++ b/clang/lib/Basic/TargetInfo.cpp
@@ -517,9 +517,11 @@ void TargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) {
   if (Opts.FakeAddressSpaceMap)
     AddrSpaceMap = &FakeAddrSpaceMap;
 
-  if (Opts.SYCLIsDevice && Opts.SYCLIsNativeCPU) {
+  if ((Opts.SYCLIsDevice || Opts.OpenCL) && Opts.SYCLIsNativeCPU) {
     // For SYCL Native CPU we use the NVPTXAddrSpaceMap because
-    // we need builtins to be mangled with AS information
+    // we need builtins to be mangled with AS information.
+    // This is also enabled in OpenCL mode so that mangling
+    // matches when building libclc.
 
     static const unsigned SYCLNativeCPUASMap[] = {
         0,  // Default
diff --git a/libclc/CMakeLists.txt b/libclc/CMakeLists.txt
index 586ca238ddd52..43e91af7f4221 100644
--- a/libclc/CMakeLists.txt
+++ b/libclc/CMakeLists.txt
@@ -345,8 +345,9 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
 				# clang builtins need to be accessible
 				set( flags "SHELL:-mcpu=gfx940")
       elseif( ${ARCH}  STREQUAL x86_64)
-        # TODO: This is used by native cpu, we should define an option to set this flags
+        # TODO: This is used by SYCL Native Cpu, we should define an option to set this flags
         set( flags "SHELL:-Xclang -target-feature -Xclang +avx"
+            "SHELL:-Xclang -fsycl-is-native-cpu"
             "SHELL:-Xclang -target-feature -Xclang +avx512f")
 			else()
 				set ( flags )

From edfe05805765db0eefb445b7251a7941c3a8281e Mon Sep 17 00:00:00 2001
From: "pietro.ghiglio" <pietro.ghiglio@codeplay.com>
Date: Wed, 11 Oct 2023 15:43:07 +0100
Subject: [PATCH 09/11] Add FE test

---
 clang/test/CodeGenSYCL/native_cpu_as.cpp | 34 ++++++++++++++++++++++++
 1 file changed, 34 insertions(+)
 create mode 100644 clang/test/CodeGenSYCL/native_cpu_as.cpp

diff --git a/clang/test/CodeGenSYCL/native_cpu_as.cpp b/clang/test/CodeGenSYCL/native_cpu_as.cpp
new file mode 100644
index 0000000000000..887f13648982d
--- /dev/null
+++ b/clang/test/CodeGenSYCL/native_cpu_as.cpp
@@ -0,0 +1,34 @@
+// Checks that name mangling matches between SYCL Native CPU and OpenCL when -fsycl-is-native-cpu is set
+// RUN: %clang_cc1 -DCPP -fsycl-is-device -S -emit-llvm -internal-isystem %S/Inputs -fsycl-is-native-cpu -o %t_sycl.ll %s 
+// RUN: FileCheck -input-file=%t_sycl.ll %s 
+
+// RUN: %clang_cc1 -x cl -DOCL -S -emit-llvm -internal-isystem %S/Inputs -fsycl-is-native-cpu -o %t_ocl.ll %s 
+// RUN: FileCheck -input-file=%t_ocl.ll %s 
+
+#ifdef CPP
+#define AS_LOCAL __attribute((address_space(3)))
+#define AS_GLOBAL __attribute((address_space(1)))
+#define AS_PRIVATE __attribute((address_space(0)))
+#define ATTRS [[intel::device_indirectly_callable]]
+#define ATTRS2 SYCL_EXTERNAL
+#else 
+#ifdef OCL
+#define AS_LOCAL __local
+#define AS_GLOBAL __global
+#define AS_PRIVATE __private
+#define ATTRS __attribute((overloadable))
+#define ATTRS2 __attribute((overloadable))
+#endif
+#endif
+
+
+ATTRS2 void use_private(int *p);
+ATTRS  void func(AS_LOCAL int *p1, AS_GLOBAL int *p2, AS_PRIVATE int *p3){
+  int private_var;
+  use_private(&private_var);
+}
+// CHECK: define dso_local void @_Z4funcPU3AS3iPU3AS1iPi(
+// CHECK: call void @_Z11use_privatePi(
+
+
+

From 5009f36d53b3fda592daac55bec8d0ae4437f5c5 Mon Sep 17 00:00:00 2001
From: "pietro.ghiglio" <pietro.ghiglio@codeplay.com>
Date: Wed, 11 Oct 2023 17:03:23 +0100
Subject: [PATCH 10/11] Skip test on windows

---
 clang/test/CodeGenSYCL/native_cpu_as.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/clang/test/CodeGenSYCL/native_cpu_as.cpp b/clang/test/CodeGenSYCL/native_cpu_as.cpp
index 887f13648982d..479da411a02f4 100644
--- a/clang/test/CodeGenSYCL/native_cpu_as.cpp
+++ b/clang/test/CodeGenSYCL/native_cpu_as.cpp
@@ -1,3 +1,5 @@
+// This test is temporarily disabled for SYCL Native CPU on Windows
+// UNSUPPORTED: system-windows
 // Checks that name mangling matches between SYCL Native CPU and OpenCL when -fsycl-is-native-cpu is set
 // RUN: %clang_cc1 -DCPP -fsycl-is-device -S -emit-llvm -internal-isystem %S/Inputs -fsycl-is-native-cpu -o %t_sycl.ll %s 
 // RUN: FileCheck -input-file=%t_sycl.ll %s 

From 45b75c5f1d47d44bb1cc5809c742a561e47a869d Mon Sep 17 00:00:00 2001
From: "pietro.ghiglio" <pietro.ghiglio@codeplay.com>
Date: Tue, 24 Oct 2023 12:13:16 +0100
Subject: [PATCH 11/11] undef IS_NATIVE

---
 libclc/x86_64-unknown-linux/libspirv/math/native_cos.cl   | 1 +
 libclc/x86_64-unknown-linux/libspirv/math/native_exp.cl   | 1 +
 libclc/x86_64-unknown-linux/libspirv/math/native_exp2.cl  | 1 +
 libclc/x86_64-unknown-linux/libspirv/math/native_log.cl   | 1 +
 libclc/x86_64-unknown-linux/libspirv/math/native_log10.cl | 1 +
 libclc/x86_64-unknown-linux/libspirv/math/native_log2.cl  | 1 +
 libclc/x86_64-unknown-linux/libspirv/math/native_sin.cl   | 2 +-
 libclc/x86_64-unknown-linux/libspirv/math/native_sqrt.cl  | 1 +
 8 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/libclc/x86_64-unknown-linux/libspirv/math/native_cos.cl b/libclc/x86_64-unknown-linux/libspirv/math/native_cos.cl
index 42ec1eb7ac6fd..05c72fe235dc2 100644
--- a/libclc/x86_64-unknown-linux/libspirv/math/native_cos.cl
+++ b/libclc/x86_64-unknown-linux/libspirv/math/native_cos.cl
@@ -2,3 +2,4 @@
 #include "helpers.h"
 
 GEN_UNARY_BUILTIN(cos)
+#undef IS_NATIVE
diff --git a/libclc/x86_64-unknown-linux/libspirv/math/native_exp.cl b/libclc/x86_64-unknown-linux/libspirv/math/native_exp.cl
index 2e0abd4c58114..0d852d3b770f1 100644
--- a/libclc/x86_64-unknown-linux/libspirv/math/native_exp.cl
+++ b/libclc/x86_64-unknown-linux/libspirv/math/native_exp.cl
@@ -2,3 +2,4 @@
 #include "helpers.h"
 
 GEN_UNARY_BUILTIN(exp)
+#undef IS_NATIVE
diff --git a/libclc/x86_64-unknown-linux/libspirv/math/native_exp2.cl b/libclc/x86_64-unknown-linux/libspirv/math/native_exp2.cl
index 69f6eb8467d24..0b5bed5e57515 100644
--- a/libclc/x86_64-unknown-linux/libspirv/math/native_exp2.cl
+++ b/libclc/x86_64-unknown-linux/libspirv/math/native_exp2.cl
@@ -2,3 +2,4 @@
 #include "helpers.h"
 
 GEN_UNARY_BUILTIN(exp2)
+#undef IS_NATIVE
diff --git a/libclc/x86_64-unknown-linux/libspirv/math/native_log.cl b/libclc/x86_64-unknown-linux/libspirv/math/native_log.cl
index d86a3fa492cac..43b55feaf78e1 100644
--- a/libclc/x86_64-unknown-linux/libspirv/math/native_log.cl
+++ b/libclc/x86_64-unknown-linux/libspirv/math/native_log.cl
@@ -2,3 +2,4 @@
 #include "helpers.h"
 
 GEN_UNARY_BUILTIN(log)
+#undef IS_NATIVE
diff --git a/libclc/x86_64-unknown-linux/libspirv/math/native_log10.cl b/libclc/x86_64-unknown-linux/libspirv/math/native_log10.cl
index f4c14348a2f4a..5251e58feb2ff 100644
--- a/libclc/x86_64-unknown-linux/libspirv/math/native_log10.cl
+++ b/libclc/x86_64-unknown-linux/libspirv/math/native_log10.cl
@@ -2,4 +2,5 @@
 #include "helpers.h"
 
 GEN_UNARY_BUILTIN(log10)
+#undef IS_NATIVE
 
diff --git a/libclc/x86_64-unknown-linux/libspirv/math/native_log2.cl b/libclc/x86_64-unknown-linux/libspirv/math/native_log2.cl
index b4fdc6017b27d..59f6cd88ac9d6 100644
--- a/libclc/x86_64-unknown-linux/libspirv/math/native_log2.cl
+++ b/libclc/x86_64-unknown-linux/libspirv/math/native_log2.cl
@@ -2,4 +2,5 @@
 #include "helpers.h"
 
 GEN_UNARY_BUILTIN(log2)
+#undef IS_NATIVE
 
diff --git a/libclc/x86_64-unknown-linux/libspirv/math/native_sin.cl b/libclc/x86_64-unknown-linux/libspirv/math/native_sin.cl
index 1967988781a01..342c5b8594ceb 100644
--- a/libclc/x86_64-unknown-linux/libspirv/math/native_sin.cl
+++ b/libclc/x86_64-unknown-linux/libspirv/math/native_sin.cl
@@ -2,4 +2,4 @@
 #include "helpers.h"
 
 GEN_UNARY_BUILTIN(sin)
-
+#undef IS_NATIVE
diff --git a/libclc/x86_64-unknown-linux/libspirv/math/native_sqrt.cl b/libclc/x86_64-unknown-linux/libspirv/math/native_sqrt.cl
index c1b9f041b1e43..88af2fb0b3267 100644
--- a/libclc/x86_64-unknown-linux/libspirv/math/native_sqrt.cl
+++ b/libclc/x86_64-unknown-linux/libspirv/math/native_sqrt.cl
@@ -2,3 +2,4 @@
 #include "helpers.h"
 
 GEN_UNARY_BUILTIN(sqrt)
+#undef IS_NATIVE