Merge from 'main' to 'sycl-web' (#1)

CONFLICT (content): Merge conflict in clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp
vladimirlaz · Jun 10, 2021 · 54cd04d · 54cd04d
2 parents d24e228 + 5fc2673
commit 54cd04d
Show file tree

Hide file tree

Showing 113 changed files with 7,226 additions and 3,385 deletions.
diff --git a/.mailmap b/.mailmap
@@ -22,7 +22,10 @@
 #
 # Please keep this file sorted.
 
-Martin Storsjö <[email protected]>
+<[email protected]> <[email protected]>
 <[email protected]> <[email protected]>
 <[email protected]> <[email protected]>
-<[email protected]> <[email protected]>
+Jon Roelofs <[email protected]> Jon Roelofs <[email protected]>
+Jon Roelofs <[email protected]> Jonathan Roelofs <[email protected]>
+Jon Roelofs <[email protected]> Jonathan Roelofs <[email protected]>
+Martin Storsjö <[email protected]>
diff --git a/clang-tools-extra/clang-tidy/performance/UnnecessaryCopyInitialization.cpp b/clang-tools-extra/clang-tidy/performance/UnnecessaryCopyInitialization.cpp
@@ -100,7 +100,7 @@ static bool isInitializingVariableImmutable(const VarDecl &InitializingVar,
   if (!isOnlyUsedAsConst(InitializingVar, BlockStmt, Context))
     return false;
 
-  QualType T = InitializingVar.getType();
+  QualType T = InitializingVar.getType().getCanonicalType();
   // The variable is a value type and we know it is only used as const. Safe
   // to reference it and avoid the copy.
   if (!isa<ReferenceType, PointerType>(T))

diff --git a/clang-tools-extra/test/clang-tidy/checkers/performance-unnecessary-copy-initialization.cpp b/clang-tools-extra/test/clang-tidy/checkers/performance-unnecessary-copy-initialization.cpp
@@ -4,6 +4,7 @@ struct ExpensiveToCopyType {
   ExpensiveToCopyType();
   virtual ~ExpensiveToCopyType();
   const ExpensiveToCopyType &reference() const;
+  const ExpensiveToCopyType *pointer() const;
   void nonConstMethod();
   bool constMethod() const;
 };
@@ -548,6 +549,25 @@ void negativeCopiedFromGetterOfReferenceToModifiedVar() {
   Orig.nonConstMethod();
 }
 
+void negativeAliasNonCanonicalPointerType() {
+  ExpensiveToCopyType Orig;
+  // The use of auto here hides that the type is a pointer type. The check needs
+  // to look at the canonical type to detect the aliasing through this pointer.
+  const auto Pointer = Orig.pointer();
+  const auto NecessaryCopy = Pointer->reference();
+  Orig.nonConstMethod();
+}
+
+void negativeAliasTypedefedType() {
+  typedef const ExpensiveToCopyType &ReferenceType;
+  ExpensiveToCopyType Orig;
+  // The typedef hides the fact that this is a reference type. The check needs
+  // to look at the canonical type to detect the aliasing.
+  ReferenceType Ref = Orig.reference();
+  const auto NecessaryCopy = Ref.reference();
+  Orig.nonConstMethod();
+}
+
 void positiveCopiedFromGetterOfReferenceToConstVar() {
   ExpensiveToCopyType Orig;
   const auto &Ref = Orig.reference();

diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
@@ -1008,6 +1008,10 @@ def gpu_instrument_lib_EQ : Joined<["--"], "gpu-instrument-lib=">,
 def fgpu_sanitize : Flag<["-"], "fgpu-sanitize">, Group<f_Group>,
   HelpText<"Enable sanitizer for AMDGPU target">;
 def fno_gpu_sanitize : Flag<["-"], "fno-gpu-sanitize">, Group<f_Group>;
+def gpu_bundle_output : Flag<["--"], "gpu-bundle-output">,
+  Group<f_Group>, HelpText<"Bundle output files of HIP device compilation">;
+def no_gpu_bundle_output : Flag<["--"], "no-gpu-bundle-output">,
+  Group<f_Group>, HelpText<"Do not bundle output files of HIP device compilation">;
 def cuid_EQ : Joined<["-"], "cuid=">, Flags<[CC1Option]>,
   HelpText<"An ID for compilation unit, which should be the same for the same "
            "compilation unit but different for different compilation units. "

diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
@@ -3429,6 +3429,12 @@ class OffloadingActionBuilder final {
     /// The linker inputs obtained for each device arch.
     SmallVector<ActionList, 8> DeviceLinkerInputs;
     bool GPUSanitize;
+    // The default bundling behavior depends on the type of output, therefore
+    // BundleOutput needs to be tri-value: None, true, or false.
+    // Bundle code objects except --no-gpu-output is specified for device
+    // only compilation. Bundle other type of output files only if
+    // --gpu-bundle-output is specified for device only compilation.
+    Optional<bool> BundleOutput;
 
   public:
     HIPActionBuilder(Compilation &C, DerivedArgList &Args,
@@ -3437,6 +3443,10 @@ class OffloadingActionBuilder final {
       DefaultCudaArch = CudaArch::GFX803;
       GPUSanitize = Args.hasFlag(options::OPT_fgpu_sanitize,
                                  options::OPT_fno_gpu_sanitize, false);
+      if (Args.hasArg(options::OPT_gpu_bundle_output,
+                      options::OPT_no_gpu_bundle_output))
+        BundleOutput = Args.hasFlag(options::OPT_gpu_bundle_output,
+                                    options::OPT_no_gpu_bundle_output);
     }
 
     bool canUseBundlerUnbundler() const override { return true; }
@@ -3526,22 +3536,25 @@ class OffloadingActionBuilder final {
           CudaDeviceActions[I] = C.MakeAction<OffloadAction>(
               DDep, CudaDeviceActions[I]->getType());
         }
-        // Create HIP fat binary with a special "link" action.
-        CudaFatBinary =
-            C.MakeAction<LinkJobAction>(CudaDeviceActions,
-                types::TY_HIP_FATBIN);
 
-        if (!CompileDeviceOnly) {
-          DA.add(*CudaFatBinary, *ToolChains.front(), /*BoundArch=*/nullptr,
-                 AssociatedOffloadKind);
-          // Clear the fat binary, it is already a dependence to an host
-          // action.
-          CudaFatBinary = nullptr;
-        }
+        if (!CompileDeviceOnly || !BundleOutput.hasValue() ||
+            BundleOutput.getValue()) {
+          // Create HIP fat binary with a special "link" action.
+          CudaFatBinary = C.MakeAction<LinkJobAction>(CudaDeviceActions,
+                                                      types::TY_HIP_FATBIN);
 
-        // Remove the CUDA actions as they are already connected to an host
-        // action or fat binary.
-        CudaDeviceActions.clear();
+          if (!CompileDeviceOnly) {
+            DA.add(*CudaFatBinary, *ToolChains.front(), /*BoundArch=*/nullptr,
+                   AssociatedOffloadKind);
+            // Clear the fat binary, it is already a dependence to an host
+            // action.
+            CudaFatBinary = nullptr;
+          }
+
+          // Remove the CUDA actions as they are already connected to an host
+          // action or fat binary.
+          CudaDeviceActions.clear();
+        }
 
         return CompileDeviceOnly ? ABRT_Ignore_Host : ABRT_Success;
       } else if (CurPhase == phases::Link) {
@@ -3568,6 +3581,20 @@ class OffloadingActionBuilder final {
         A = C.getDriver().ConstructPhaseAction(C, Args, CurPhase, A,
                                                AssociatedOffloadKind);
 
+      if (CompileDeviceOnly && CurPhase == FinalPhase &&
+          BundleOutput.hasValue() && BundleOutput.getValue()) {
+        for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) {
+          OffloadAction::DeviceDependences DDep;
+          DDep.add(*CudaDeviceActions[I], *ToolChains.front(), GpuArchList[I],
+                   AssociatedOffloadKind);
+          CudaDeviceActions[I] = C.MakeAction<OffloadAction>(
+              DDep, CudaDeviceActions[I]->getType());
+        }
+        CudaFatBinary =
+            C.MakeAction<OffloadBundlingJobAction>(CudaDeviceActions);
+        CudaDeviceActions.clear();
+      }
+
       return (CompileDeviceOnly && CurPhase == FinalPhase) ? ABRT_Ignore_Host
                                                            : ABRT_Success;
     }

diff --git a/clang/lib/Frontend/CompilerInstance.cpp b/clang/lib/Frontend/CompilerInstance.cpp
@@ -856,9 +856,7 @@ CompilerInstance::createOutputFileImpl(StringRef OutputPath, bool Binary,
       consumeError(std::move(E));
     } else {
       Temp = std::move(ExpectedFile.get());
-      OS.reset(new llvm::raw_fd_ostream(Temp->FD, /*shouldClose=*/false,
-                                        Binary ? llvm::sys::fs::OF_None
-                                               : llvm::sys::fs::OF_Text));
+      OS.reset(new llvm::raw_fd_ostream(Temp->FD, /*shouldClose=*/false));
       OSFile = Temp->TmpName;
     }
     // If we failed to create the temporary, fallback to writing to the file

diff --git a/clang/test/Driver/clang-offload-bundler.c b/clang/test/Driver/clang-offload-bundler.c
@@ -427,6 +427,21 @@
 // CKLST2-NOT: openmp-powerpc64le-ibm-linux-gnu
 // CKLST2-NOT: openmp-x86_64-pc-linux-gnu
 
+//
+// Check bundling without host target is allowed for HIP.
+//
+// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa-gfx900,hip-amdgcn-amd-amdhsa-gfx906 \
+// RUN:   -inputs=%t.tgt1,%t.tgt2 -outputs=%t.hip.bundle.bc
+// RUN: clang-offload-bundler -type=bc -list -inputs=%t.hip.bundle.bc | FileCheck -check-prefix=NOHOST %s
+// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa-gfx900,hip-amdgcn-amd-amdhsa-gfx906 \
+// RUN:   -outputs=%t.res.tgt1,%t.res.tgt2 -inputs=%t.hip.bundle.bc -unbundle
+// RUN: diff %t.tgt1 %t.res.tgt1
+// RUN: diff %t.tgt2 %t.res.tgt2
+//
+// NOHOST-NOT: host-
+// NOHOST-DAG: hip-amdgcn-amd-amdhsa-gfx900
+// NOHOST-DAG: hip-amdgcn-amd-amdhsa-gfx906
+
 // Some code so that we can create a binary out of this file.
 int A = 0;
 void test_func(void) {

diff --git a/clang/test/Driver/hip-device-compile.hip b/clang/test/Driver/hip-device-compile.hip
@@ -3,28 +3,56 @@
 // REQUIRES: amdgpu-registered-target
 
 // If -emit-llvm and/or -S is used in device only compilation,
-// the output should not be bundled.
+// the output should not be bundled, except --gpu-bundle-output
+// is specified.
 
+// Output unbundled bitcode.
 // RUN: %clang -c -emit-llvm --cuda-device-only -### -target x86_64-linux-gnu \
-// RUN:   -o a.bc -x hip --cuda-gpu-arch=gfx900 \
+// RUN:   -o a.bc -x hip --cuda-gpu-arch=gfx900 --no-gpu-bundle-output \
 // RUN:   --hip-device-lib=lib1.bc \
 // RUN:   --hip-device-lib-path=%S/Inputs/hip_multiple_inputs/lib1 \
 // RUN:   %S/Inputs/hip_multiple_inputs/a.cu \
-// RUN: 2>&1 | FileCheck -check-prefixes=CHECK,BC %s
+// RUN: 2>&1 | FileCheck -check-prefixes=CHECK,BC,NBUN %s
 
+// Output bundled bitcode.
+// RUN: %clang -c -emit-llvm --cuda-device-only -### -target x86_64-linux-gnu \
+// RUN:   -o a.bc -x hip --cuda-gpu-arch=gfx900 --no-gpu-bundle-output \
+// RUN:   --hip-device-lib=lib1.bc \
+// RUN:   --hip-device-lib-path=%S/Inputs/hip_multiple_inputs/lib1 \
+// RUN:   %S/Inputs/hip_multiple_inputs/a.cu --gpu-bundle-output \
+// RUN: 2>&1 | FileCheck -check-prefixes=CHECK,BCBUN %s
+
+// Output unbundled LLVM IR.
 // RUN: %clang -c -S -emit-llvm --cuda-device-only -### -target x86_64-linux-gnu \
-// RUN:   -o a.ll -x hip --cuda-gpu-arch=gfx900 \
+// RUN:   -o a.ll -x hip --cuda-gpu-arch=gfx900 --no-gpu-bundle-output \
 // RUN:   --hip-device-lib=lib1.bc \
 // RUN:   --hip-device-lib-path=%S/Inputs/hip_multiple_inputs/lib1 \
 // RUN:   %S/Inputs/hip_multiple_inputs/a.cu \
-// RUN: 2>&1 | FileCheck -check-prefixes=CHECK,LL %s
+// RUN: 2>&1 | FileCheck -check-prefixes=CHECK,LL,NBUN %s
+
+// Output bundled LLVM IR.
+// RUN: %clang -c -S -emit-llvm --cuda-device-only -### -target x86_64-linux-gnu \
+// RUN:   -o a.ll -x hip --cuda-gpu-arch=gfx900 --no-gpu-bundle-output \
+// RUN:   --hip-device-lib=lib1.bc \
+// RUN:   --hip-device-lib-path=%S/Inputs/hip_multiple_inputs/lib1 \
+// RUN:   %S/Inputs/hip_multiple_inputs/a.cu --gpu-bundle-output \
+// RUN: 2>&1 | FileCheck -check-prefixes=CHECK,LLBUN %s
 
+// Output unbundled assembly.
 // RUN: %clang -c -S --cuda-device-only -### -target x86_64-linux-gnu \
-// RUN:   -o a.s -x hip --cuda-gpu-arch=gfx900 \
+// RUN:   -o a.s -x hip --cuda-gpu-arch=gfx900 --no-gpu-bundle-output \
 // RUN:   --hip-device-lib=lib1.bc \
 // RUN:   --hip-device-lib-path=%S/Inputs/hip_multiple_inputs/lib1 \
 // RUN:   %S/Inputs/hip_multiple_inputs/a.cu \
-// RUN: 2>&1 | FileCheck -check-prefixes=CHECK,ASM %s
+// RUN: 2>&1 | FileCheck -check-prefixes=CHECK,ASM,NBUN %s
+
+// Output bundled assembly.
+// RUN: %clang -c -S --cuda-device-only -### -target x86_64-linux-gnu \
+// RUN:   -o a.s -x hip --cuda-gpu-arch=gfx900 --no-gpu-bundle-output \
+// RUN:   --hip-device-lib=lib1.bc \
+// RUN:   --hip-device-lib-path=%S/Inputs/hip_multiple_inputs/lib1 \
+// RUN:   %S/Inputs/hip_multiple_inputs/a.cu --gpu-bundle-output \
+// RUN: 2>&1 | FileCheck -check-prefixes=CHECK,ASMBUN %s
 
 // CHECK: {{".*clang.*"}} "-cc1" "-triple" "amdgcn-amd-amdhsa"
 // CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
@@ -36,38 +64,64 @@
 // CHECK-SAME: {{".*lib1.bc"}}
 // CHECK-SAME: "-target-cpu" "gfx900"
 // BC-SAME: "-o" "a.bc"
+// BCBUN-SAME: "-o" "{{.*}}.bc"
 // LL-SAME: "-o" "a.ll"
+// LLBUN-SAME: "-o" "{{.*}}.ll"
 // ASM-SAME: "-o" "a.s"
+// ASMBUN-SAME: "-o" "{{.*}}.s"
 // CHECK-SAME: {{".*a.cu"}}
 
 // CHECK-NOT: {{"*.llvm-link"}}
 // CHECK-NOT: {{".*opt"}}
 // CHECK-NOT: {{".*llc"}}
 // CHECK-NOT: {{".*lld.*"}}
-// CHECK-NOT: {{".*clang-offload-bundler"}}
+// NBUN-NOT: {{".*clang-offload-bundler"}}
+// BCBUN: {{".*clang-offload-bundler"}}{{.*}}"-outputs=a.bc"
+// LLBUN: {{".*clang-offload-bundler"}}{{.*}}"-outputs=a.ll"
+// ASMBUN: {{".*clang-offload-bundler"}}{{.*}}"-outputs=a.s"
 // CHECK-NOT: {{".*ld.*"}}
 
 // If neither -emit-llvm nor -S is used in device only compilation,
-// the output should be bundled.
+// the output should be bundled except --no-gpu-bundle-output is
+// specified.
 
+// Output bundled code objects.
 // RUN: %clang -c --cuda-device-only -### -target x86_64-linux-gnu \
-// RUN:   -o a.s -x hip --cuda-gpu-arch=gfx900 \
+// RUN:   -o a.o -x hip --cuda-gpu-arch=gfx900 \
 // RUN:   --hip-device-lib=lib1.bc \
 // RUN:   --hip-device-lib-path=%S/Inputs/hip_multiple_inputs/lib1 \
 // RUN:   %S/Inputs/hip_multiple_inputs/a.cu \
-// RUN: 2>&1 | FileCheck -check-prefixes=BUNDLE %s
+// RUN: 2>&1 | FileCheck -check-prefixes=OBJ,OBJ-BUN %s
 
+// Output unbundled code objects.
+// RUN: %clang -c --cuda-device-only -### -target x86_64-linux-gnu \
+// RUN:   -o a.o -x hip --cuda-gpu-arch=gfx900 \
+// RUN:   --hip-device-lib=lib1.bc \
+// RUN:   --hip-device-lib-path=%S/Inputs/hip_multiple_inputs/lib1 \
+// RUN:   %S/Inputs/hip_multiple_inputs/a.cu --no-gpu-bundle-output \
+// RUN: 2>&1 | FileCheck -check-prefixes=OBJ,OBJ-UBUN %s
+
+// Output bundled code objects.
 // RUN: %clang --cuda-device-only -### -target x86_64-linux-gnu \
-// RUN:   -o a.s -x hip --cuda-gpu-arch=gfx900 \
+// RUN:   -o a.o -x hip --cuda-gpu-arch=gfx900 \
 // RUN:   --hip-device-lib=lib1.bc \
 // RUN:   --hip-device-lib-path=%S/Inputs/hip_multiple_inputs/lib1 \
 // RUN:   %S/Inputs/hip_multiple_inputs/a.cu \
-// RUN: 2>&1 | FileCheck -check-prefixes=BUNDLE %s
+// RUN: 2>&1 | FileCheck -check-prefixes=OBJ,OBJ-BUN %s
 
-// BUNDLE: {{"*.clang.*"}} {{.*}} "-emit-obj"
-// BUNDLE-NOT: {{"*.llvm-link"}}
-// BUNDLE-NOT: {{".*opt"}}
-// BUNDLE-NOT: {{".*llc"}}
-// BUNDLE: {{".*lld.*"}}
-// BUNDLE: {{".*clang-offload-bundler"}}
+// Output unbundled code objects.
+// RUN: %clang --cuda-device-only -### -target x86_64-linux-gnu \
+// RUN:   -o a.o -x hip --cuda-gpu-arch=gfx900 \
+// RUN:   --hip-device-lib=lib1.bc \
+// RUN:   --hip-device-lib-path=%S/Inputs/hip_multiple_inputs/lib1 \
+// RUN:   %S/Inputs/hip_multiple_inputs/a.cu --no-gpu-bundle-output \
+// RUN: 2>&1 | FileCheck -check-prefixes=OBJ,OBJ-UBUN %s
 
+// OBJ: {{"*.clang.*"}} {{.*}} "-emit-obj"
+// OBJ-NOT: {{"*.llvm-link"}}
+// OBJ-NOT: {{".*opt"}}
+// OBJ-NOT: {{".*llc"}}
+// OBJ-BUN: {{".*lld.*"}}{{.*}}"-o" "{{.*}}.o"
+// OBJ-UBUN: {{".*lld.*"}}{{.*}}"-o" "a.o"
+// OBJ-BUN: {{".*clang-offload-bundler"}}{{.*}}"-outputs=a.o"
+// OBJ-UBUN-NOT: {{".*clang-offload-bundler"}}