diff --git a/buildbot/configure.py b/buildbot/configure.py index 2134a566229be..fbc1160715d98 100644 --- a/buildbot/configure.py +++ b/buildbot/configure.py @@ -18,6 +18,8 @@ def do_configure(args): else: icd_loader_lib = os.path.join(args.obj_dir, "OpenCL-ICD-Loader", "build", "OpenCL.lib") + install_dir = os.path.join(args.obj_dir, "install") + cmake_cmd = ["cmake", "-G", "Ninja", "-DCMAKE_BUILD_TYPE={}".format(args.build_type), @@ -30,6 +32,7 @@ def do_configure(args): "-DLLVM_BUILD_TOOLS=OFF", "-DSYCL_ENABLE_WERROR=ON", "-DLLVM_ENABLE_ASSERTIONS=ON", + "-DCMAKE_INSTALL_PREFIX={}".format(install_dir), llvm_dir] print(cmake_cmd) diff --git a/llvm-spirv/CMakeLists.txt b/llvm-spirv/CMakeLists.txt index 915fd670840a2..9a5cf99631691 100644 --- a/llvm-spirv/CMakeLists.txt +++ b/llvm-spirv/CMakeLists.txt @@ -2,6 +2,10 @@ cmake_minimum_required(VERSION 3.3) set(LLVM_SPIRV_VERSION 0.2.1.0) +option(LLVM_SPIRV_INCLUDE_TESTS + "Generate build targets for the llvm-spirv lit tests." + ${LLVM_INCLUDE_TESTS}) + # check if we build inside llvm or not if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) set(BUILD_EXTERNAL YES) @@ -12,17 +16,17 @@ if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) CXX ) - set(CMAKE_CXX_STANDARD 11) + set(CMAKE_CXX_STANDARD 14) set(CMAKE_CXX_STANDARD_REQUIRED ON) - if(LLVM_INCLUDE_TESTS) + if(LLVM_SPIRV_INCLUDE_TESTS) set(LLVM_TEST_COMPONENTS llvm-as llvm-dis ) - endif(LLVM_INCLUDE_TESTS) + endif(LLVM_SPIRV_INCLUDE_TESTS) - find_package(LLVM 9.0.0 REQUIRED + find_package(LLVM 10.0.0 REQUIRED COMPONENTS Analysis BitReader @@ -56,9 +60,9 @@ set(LLVM_SPIRV_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/include) add_subdirectory(lib/SPIRV) add_subdirectory(tools/llvm-spirv) -if(LLVM_INCLUDE_TESTS) +if(LLVM_SPIRV_INCLUDE_TESTS) add_subdirectory(test) -endif(LLVM_INCLUDE_TESTS) +endif(LLVM_SPIRV_INCLUDE_TESTS) install( FILES diff --git a/llvm-spirv/README.md b/llvm-spirv/README.md index 17fb769ec38c2..6fa5526be4698 100644 --- a/llvm-spirv/README.md +++ b/llvm-spirv/README.md @@ -26,7 +26,7 @@ The translator can be built with the latest(nightly) package of LLVM. For Ubuntu ``` sudo add-apt-repository "deb http://apt.llvm.org/xenial/ llvm-toolchain-xenial main" sudo apt-get update -sudo apt-get install llvm-9-dev llvm-9-tools clang-9 libclang-9-dev +sudo apt-get install llvm-10-dev llvm-10-tools clang-10 libclang-10-dev ``` The installed version of LLVM will be used by default for out-of-tree build of the translator. ``` @@ -73,9 +73,12 @@ Execute the following command inside the build directory to run translator tests make test ``` This requires that the `-DLLVM_INCLUDE_TESTS=ON` and -`-DLLVM_EXTERNAL_LIT="/usr/lib/llvm-9/build/utils/lit/lit.py"` arguments were +`-DLLVM_EXTERNAL_LIT="/usr/lib/llvm-10/build/utils/lit/lit.py"` arguments were passed to CMake during the build step. +The translator test suite can be disabled by passing +`-DLLVM_SPIRV_INCLUDE_TESTS=OFF` to cmake. + ## Run Instructions for `llvm-spirv` @@ -100,6 +103,11 @@ To translate between LLVM IR and SPIR-V: * `-spirv-text` - read/write SPIR-V in an internal textual format for debugging purpose. The textual format is not defined by SPIR-V spec. * `-help` - to see full list of options +Translation from LLVM IR to SPIR-V and then back to LLVM IR is not guaranteed to +produce the original LLVM IR. In particular, LLVM intrinsic call instructions +may get replaced by function calls to OpenCL builtins and metadata may be +dropped. + ### Handling SPIR-V versions generated by the translator There is one option to control the behavior of the translator with respect to diff --git a/llvm-spirv/include/LLVMSPIRVOpts.h b/llvm-spirv/include/LLVMSPIRVOpts.h index 1f5aa39855d91..17c452c98aae4 100644 --- a/llvm-spirv/include/LLVMSPIRVOpts.h +++ b/llvm-spirv/include/LLVMSPIRVOpts.h @@ -71,8 +71,9 @@ class TranslatorOpts { TranslatorOpts() = default; - TranslatorOpts(VersionNumber Max, const ExtensionsStatusMap &Map = {}) - : MaxVersion(Max), ExtStatusMap(Map) {} + TranslatorOpts(VersionNumber Max, const ExtensionsStatusMap &Map = {}, + bool ArgNameMD = false) + : MaxVersion(Max), ExtStatusMap(Map), GenKernelArgNameMD(ArgNameMD) {} bool isAllowedToUseVersion(VersionNumber RequestedVersion) const { return RequestedVersion <= MaxVersion; @@ -88,15 +89,22 @@ class TranslatorOpts { VersionNumber getMaxVersion() const { return MaxVersion; } + bool isGenArgNameMDEnabled() const { return GenKernelArgNameMD; } + void enableAllExtensions() { #define EXT(X) ExtStatusMap[ExtensionID::X] = true; #include "LLVMSPIRVExtensions.inc" #undef EXT } + void enableGenArgNameMD() { GenKernelArgNameMD = true; } + private: + // Common translation options VersionNumber MaxVersion = VersionNumber::MaximumVersion; ExtensionsStatusMap ExtStatusMap; + // SPIR-V to LLVM translation options + bool GenKernelArgNameMD; }; } // namespace SPIRV diff --git a/llvm-spirv/lib/SPIRV/LLVMToSPIRVDbgTran.cpp b/llvm-spirv/lib/SPIRV/LLVMToSPIRVDbgTran.cpp index 226582471cb9b..a8e8eed69bce3 100644 --- a/llvm-spirv/lib/SPIRV/LLVMToSPIRVDbgTran.cpp +++ b/llvm-spirv/lib/SPIRV/LLVMToSPIRVDbgTran.cpp @@ -205,6 +205,15 @@ void LLVMToSPIRVDbgTran::transLocationInfo() { LineNo = DL.getLine(); Col = DL.getCol(); V = SPIRVWriter->getTranslatedValue(&I); + // According to the spec, OpLine for an OpBranch/OpBranchConditional + // must precede the merge instruction and not the branch instruction + auto *VPrev = static_cast(V)->getPrevious(); + if (VPrev->getOpCode() == OpLoopMerge || + VPrev->getOpCode() == OpLoopControlINTEL) { + assert(V->getOpCode() == OpBranch || + V->getOpCode() == OpBranchConditional); + V = VPrev; + } BM->addLine(V, File ? File->getId() : getDebugInfoNone()->getId(), LineNo, Col); } diff --git a/llvm-spirv/lib/SPIRV/OCLUtil.cpp b/llvm-spirv/lib/SPIRV/OCLUtil.cpp index ff32960f78bc1..a3a7b109bfa5b 100644 --- a/llvm-spirv/lib/SPIRV/OCLUtil.cpp +++ b/llvm-spirv/lib/SPIRV/OCLUtil.cpp @@ -777,7 +777,7 @@ bool isSamplerTy(Type *Ty) { return STy && STy->hasName() && STy->getName() == kSPR2TypeName::Sampler; } -bool isPipeBI(const StringRef MangledName) { +bool isPipeOrAddressSpaceCastBI(const StringRef MangledName) { return MangledName == "write_pipe_2" || MangledName == "read_pipe_2" || MangledName == "write_pipe_2_bl" || MangledName == "read_pipe_2_bl" || MangledName == "write_pipe_4" || MangledName == "read_pipe_4" || @@ -796,7 +796,9 @@ bool isPipeBI(const StringRef MangledName) { MangledName == "sub_group_reserve_write_pipe" || MangledName == "sub_group_reserve_read_pipe" || MangledName == "sub_group_commit_write_pipe" || - MangledName == "sub_group_commit_read_pipe"; + MangledName == "sub_group_commit_read_pipe" || + MangledName == "to_global" || MangledName == "to_local" || + MangledName == "to_private"; } bool isEnqueueKernelBI(const StringRef MangledName) { diff --git a/llvm-spirv/lib/SPIRV/OCLUtil.h b/llvm-spirv/lib/SPIRV/OCLUtil.h index b246e0f4b3236..09b8db646d12c 100644 --- a/llvm-spirv/lib/SPIRV/OCLUtil.h +++ b/llvm-spirv/lib/SPIRV/OCLUtil.h @@ -416,7 +416,7 @@ bool isPipeStorageInitializer(Instruction *Inst); /// Check (isSamplerInitializer || isPipeStorageInitializer) bool isSpecialTypeInitializer(Instruction *Inst); -bool isPipeBI(const StringRef MangledName); +bool isPipeOrAddressSpaceCastBI(const StringRef MangledName); bool isEnqueueKernelBI(const StringRef MangledName); bool isKernelQueryBI(const StringRef MangledName); @@ -751,7 +751,7 @@ template <> inline void SPIRVMap::init() { } template <> inline void SPIRVMap::init() { -#define _SPIRV_OP(x, y) add("atomic_" #x, Op##y); +#define _SPIRV_OP(x, y) add(#x, Op##y); _SPIRV_OP(add, AtomicIAdd) _SPIRV_OP(sub, AtomicISub) _SPIRV_OP(xchg, AtomicExchange) diff --git a/llvm-spirv/lib/SPIRV/SPIRVReader.cpp b/llvm-spirv/lib/SPIRV/SPIRVReader.cpp index 6315f5bac6acc..cb27067e3067b 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVReader.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVReader.cpp @@ -51,6 +51,7 @@ #include "SPIRVValue.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/Analysis/CFG.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" @@ -90,11 +91,6 @@ cl::opt SPIRVEnableStepExpansion( "spirv-expand-step", cl::init(true), cl::desc("Enable expansion of OpenCL step and smoothstep function")); -cl::opt SPIRVGenKernelArgNameMD( - "spirv-gen-kernel-arg-name-md", cl::init(false), - cl::desc("Enable generating OpenCL kernel argument name " - "metadata")); - // Prefix for placeholder global variable name. const char *KPlaceholderPrefix = "placeholder."; @@ -391,6 +387,9 @@ Type *SPIRVToLLVM::transType(SPIRVType *T, bool IsClassMember) { case OpTypeVector: return mapType(T, VectorType::get(transType(T->getVectorComponentType()), T->getVectorComponentCount())); + case OpTypeMatrix: + return mapType(T, ArrayType::get(transType(T->getMatrixColumnType()), + T->getMatrixColumnCount())); case OpTypeOpaque: return mapType(T, StructType::create(*Context, T->getName())); case OpTypeFunction: { @@ -516,6 +515,9 @@ std::string SPIRVToLLVM::transTypeToOCLTypeName(SPIRVType *T, bool IsSigned) { case OpTypeVector: return transTypeToOCLTypeName(T->getVectorComponentType()) + T->getVectorComponentCount(); + case OpTypeMatrix: + return transTypeToOCLTypeName(T->getMatrixColumnType()) + + T->getMatrixColumnCount(); case OpTypeOpaque: return T->getName(); case OpTypeFunction: @@ -588,9 +590,12 @@ SPIRVToLLVM::getMetadataFromNameAndParameter(std::string Name, } template -void SPIRVToLLVM::setLLVMLoopMetadata(LoopInstType *LM, BranchInst *BI) { +void SPIRVToLLVM::setLLVMLoopMetadata(const LoopInstType *LM, Instruction *BI) { if (!LM) return; + + assert(BI && isa(BI)); + auto Temp = MDNode::getTemporary(*Context, None); auto Self = MDNode::get(*Context, Temp.get()); Self->replaceOperandWith(0, Self); @@ -693,6 +698,40 @@ void SPIRVToLLVM::setLLVMLoopMetadata(LoopInstType *LM, BranchInst *BI) { BI->setMetadata("llvm.loop", Node); } +void SPIRVToLLVM::transLLVMLoopMetadata(const Function *F) { + assert(F); + + if (!FuncLoopMetadataMap.empty()) { + // In SPIRV loop metadata is linked to a header basic block of a loop + // whilst in LLVM IR it is linked to a latch basic block (the one + // whose back edge goes to a header basic block) of the loop. + + using Edge = std::pair; + SmallVector Edges; + FindFunctionBackedges(*F, Edges); + + for (const auto &BkEdge : Edges) { + // Check that loop header BB contains loop metadata. + const auto LMDItr = FuncLoopMetadataMap.find(BkEdge.second); + if (LMDItr == FuncLoopMetadataMap.end()) + continue; + + auto *BI = const_cast(BkEdge.first->getTerminator()); + const auto *LMD = LMDItr->second; + if (LMD->getOpCode() == OpLoopMerge) { + const auto *LM = static_cast(LMD); + setLLVMLoopMetadata(LM, BI); + } else if (LMD->getOpCode() == OpLoopControlINTEL) { + const auto *LCI = static_cast(LMD); + setLLVMLoopMetadata(LCI, BI); + } + } + + // Loop metadata map should be re-filled during each function translation. + FuncLoopMetadataMap.clear(); + } +} + void SPIRVToLLVM::insertImageNameAccessQualifier(SPIRV::SPIRVTypeImage *ST, std::string &Name) { SPIRVAccessQualifierKind Acc = ST->hasAccessQualifier() @@ -1233,6 +1272,7 @@ Value *SPIRVToLLVM::transValueWithoutDecoration(SPIRVValue *BV, Function *F, switch (BV->getType()->getOpCode()) { case OpTypeVector: return mapValue(BV, ConstantVector::get(CV)); + case OpTypeMatrix: case OpTypeArray: return mapValue( BV, ConstantArray::get(dyn_cast(transType(BCC->getType())), @@ -1364,34 +1404,20 @@ Value *SPIRVToLLVM::transValueWithoutDecoration(SPIRVValue *BV, Function *F, // Translation of instructions switch (BV->getOpCode()) { case OpBranch: { - auto BR = static_cast(BV); - auto BI = BranchInst::Create( - dyn_cast(transValue(BR->getTargetLabel(), F, BB)), BB); - auto Prev = BR->getPrevious(); - if (Prev && Prev->getOpCode() == OpLoopMerge) { - auto LM = static_cast(Prev); - setLLVMLoopMetadata(LM, BI); - } else if (Prev && Prev->getOpCode() == OpLoopControlINTEL) { - auto LCI = static_cast(Prev); - setLLVMLoopMetadata(LCI, BI); - } + auto *BR = static_cast(BV); + auto *BI = BranchInst::Create( + cast(transValue(BR->getTargetLabel(), F, BB)), BB); + // Loop metadata will be translated in the end of function translation. return mapValue(BV, BI); } case OpBranchConditional: { - auto BR = static_cast(BV); - auto BC = BranchInst::Create( - dyn_cast(transValue(BR->getTrueLabel(), F, BB)), - dyn_cast(transValue(BR->getFalseLabel(), F, BB)), + auto *BR = static_cast(BV); + auto *BC = BranchInst::Create( + cast(transValue(BR->getTrueLabel(), F, BB)), + cast(transValue(BR->getFalseLabel(), F, BB)), transValue(BR->getCondition(), F, BB), BB); - auto Prev = BR->getPrevious(); - if (Prev && Prev->getOpCode() == OpLoopMerge) { - auto LM = static_cast(Prev); - setLLVMLoopMetadata(LM, BC); - } else if (Prev && Prev->getOpCode() == OpLoopControlINTEL) { - auto LCI = static_cast(Prev); - setLLVMLoopMetadata(LCI, BC); - } + // Loop metadata will be translated in the end of function translation. return mapValue(BV, BC); } @@ -1518,8 +1544,11 @@ Value *SPIRVToLLVM::transValueWithoutDecoration(SPIRVValue *BV, Function *F, case OpVmeImageINTEL: case OpLine: case OpSelectionMerge: // OpenCL Compiler does not use this instruction - case OpLoopMerge: // Should be translated at OpBranch or - case OpLoopControlINTEL: // OpBranchConditional cases + return nullptr; + + case OpLoopMerge: // Will be translated after all other function's + case OpLoopControlINTEL: // instructions are translated. + FuncLoopMetadataMap[BB] = BV; return nullptr; case OpSwitch: { @@ -1557,6 +1586,67 @@ Value *SPIRVToLLVM::transValueWithoutDecoration(SPIRVValue *BV, Function *F, return mapValue(BV, Scale); } + case OpMatrixTimesScalar: { + auto MTS = static_cast(BV); + IRBuilder<> Builder(BB); + auto Scalar = transValue(MTS->getScalar(), F, BB); + auto Matrix = transValue(MTS->getMatrix(), F, BB); + uint64_t ColNum = Matrix->getType()->getArrayNumElements(); + auto ColType = cast(Matrix->getType())->getElementType(); + auto VecSize = ColType->getVectorNumElements(); + auto NewVec = Builder.CreateVectorSplat(VecSize, Scalar, Scalar->getName()); + NewVec->takeName(Scalar); + + Value *V = UndefValue::get(Matrix->getType()); + for (uint64_t Idx = 0; Idx != ColNum; Idx++) { + auto Col = Builder.CreateExtractValue(Matrix, Idx); + auto I = Builder.CreateFMul(Col, NewVec); + V = Builder.CreateInsertValue(V, I, Idx); + } + + return mapValue(BV, V); + } + + case OpMatrixTimesVector: { + auto *MTV = static_cast(BV); + IRBuilder<> Builder(BB); + Value *Mat = transValue(MTV->getMatrix(), F, BB); + Value *Vec = transValue(MTV->getVector(), F, BB); + + // Result is similar to Matrix * Matrix + // Mat is of M columns and N rows. + // Mat consists of vectors: V_1, V_2, ..., V_M + // where each vector is of size N. + // + // Vec is of size M. + // The product is a vector of size N. + // + // |------- N ----------| + // Result = sum ( {Vec_1, Vec_1, ..., Vec_1} * V_1, + // {Vec_2, Vec_2, ..., Vec_2} * V_2, + // ... + // {Vec_M, Vec_M, ..., Vec_M} * V_N ); + // + // where sum is defined as vector sum. + + unsigned M = Mat->getType()->getArrayNumElements(); + VectorType *VTy = + cast(cast(Mat->getType())->getElementType()); + unsigned N = VTy->getVectorNumElements(); + auto ETy = VTy->getElementType(); + Value *V = Builder.CreateVectorSplat(N, ConstantFP::get(ETy, 0.0)); + + for (unsigned Idx = 0; Idx != M; ++Idx) { + Value *S = Builder.CreateExtractElement(Vec, Builder.getInt32(Idx)); + Value *Lhs = Builder.CreateVectorSplat(N, S); + Value *Vx = Builder.CreateExtractValue(Mat, Idx); + Value *Mul = Builder.CreateFMul(Lhs, Vx); + V = Builder.CreateFAdd(V, Mul); + } + + return mapValue(BV, V); + } + case OpCopyObject: { SPIRVCopyObject *CO = static_cast(BV); AllocaInst *AI = @@ -1680,9 +1770,9 @@ Value *SPIRVToLLVM::transValueWithoutDecoration(SPIRVValue *BV, Function *F, case OpBitReverse: { auto *BR = static_cast(BV); - IntegerType *Int32Ty = IntegerType::get(*Context, 32); + auto Ty = transType(BV->getType()); Function *intr = - Intrinsic::getDeclaration(M, llvm::Intrinsic::bitreverse, Int32Ty); + Intrinsic::getDeclaration(M, llvm::Intrinsic::bitreverse, Ty); auto *Call = CallInst::Create(intr, transValue(BR->getOperand(0), F, BB), BR->getName(), BB); return mapValue(BV, Call); @@ -1739,36 +1829,44 @@ Value *SPIRVToLLVM::transValueWithoutDecoration(SPIRVValue *BV, Function *F, } case OpFMod: { - // translate OpFMod(a, b) to copysign(frem(a, b), b) + // translate OpFMod(a, b) to: + // r = frem(a, b) + // c = copysign(r, b) + // needs_fixing = islessgreater(r, c) + // result = needs_fixing ? r + b : c + IRBuilder<> Builder(BB); SPIRVFMod *FMod = static_cast(BV); auto Dividend = transValue(FMod->getDividend(), F, BB); auto Divisor = transValue(FMod->getDivisor(), F, BB); - auto FRem = BinaryOperator::CreateFRem(Dividend, Divisor, "frem.res", BB); - - std::string UnmangledName = OCLExtOpMap::map(OpenCLLIB::Copysign); - std::string MangledName = "copysign"; - - std::vector ArgTypes; - ArgTypes.push_back(FRem->getType()); - ArgTypes.push_back(Divisor->getType()); - mangleOpenClBuiltin(UnmangledName, ArgTypes, MangledName); - - auto FT = FunctionType::get(transType(BV->getType()), ArgTypes, false); - auto Func = - Function::Create(FT, GlobalValue::ExternalLinkage, MangledName, M); - Func->setCallingConv(CallingConv::SPIR_FUNC); - if (isFuncNoUnwind()) - Func->addFnAttr(Attribute::NoUnwind); - - std::vector Args; - Args.push_back(FRem); - Args.push_back(Divisor); - - auto Call = CallInst::Create(Func, Args, "copysign", BB); - setCallingConv(Call); - addFnAttr(Call, Attribute::NoUnwind); - return mapValue(BV, Call); + auto FRem = Builder.CreateFRem(Dividend, Divisor, "frem.res"); + auto CopySign = Builder.CreateBinaryIntrinsic( + llvm::Intrinsic::copysign, FRem, Divisor, nullptr, "copysign.res"); + auto FAdd = Builder.CreateFAdd(FRem, Divisor, "fadd.res"); + auto Cmp = Builder.CreateFCmpONE(FRem, CopySign, "cmp.res"); + auto Select = Builder.CreateSelect(Cmp, FAdd, CopySign); + return mapValue(BV, Select); + } + + case OpSMod: { + // translate OpSMod(a, b) to: + // r = srem(a, b) + // needs_fixing = ((a < 0) != (b < 0) && r != 0) + // result = needs_fixing ? r + b : r + IRBuilder<> Builder(BB); + SPIRVSMod *SMod = static_cast(BV); + auto Dividend = transValue(SMod->getDividend(), F, BB); + auto Divisor = transValue(SMod->getDivisor(), F, BB); + auto SRem = Builder.CreateSRem(Dividend, Divisor, "srem.res"); + auto Xor = Builder.CreateXor(Dividend, Divisor, "xor.res"); + auto Zero = ConstantInt::getNullValue(Dividend->getType()); + auto CmpSign = Builder.CreateICmpSLT(Xor, Zero, "cmpsign.res"); + auto CmpSRem = Builder.CreateICmpNE(SRem, Zero, "cmpsrem.res"); + auto Add = Builder.CreateNSWAdd(SRem, Divisor, "add.res"); + auto Cmp = Builder.CreateAnd(CmpSign, CmpSRem, "cmp.res"); + auto Select = Builder.CreateSelect(Cmp, Add, SRem); + return mapValue(BV, Select); } + case OpFNegate: { SPIRVUnary *BC = static_cast(BV); return mapValue( @@ -1953,6 +2051,9 @@ Function *SPIRVToLLVM::transFunction(SPIRVFunction *BF) { transValue(BInst, F, BB, false); } } + + transLLVMLoopMetadata(F); + return F; } @@ -2085,26 +2186,49 @@ Instruction *SPIRVToLLVM::transEnqueueKernelBI(SPIRVInstruction *BI, // Find or create enqueue kernel BI declaration auto Ops = BI->getOperands(); bool HasVaargs = Ops.size() > 10; + bool HasEvents = true; + SPIRVValue *EventRet = Ops[5]; + if (EventRet->getOpCode() == OpConstantNull) { + SPIRVValue *NumEvents = Ops[3]; + if (NumEvents->getOpCode() == OpConstant) { + SPIRVConstant *NE = static_cast(NumEvents); + HasEvents = NE->getZExtIntValue() != 0; + } else if (NumEvents->getOpCode() == OpConstantNull) + HasEvents = false; + } + + std::string FName = ""; + if (!HasVaargs && !HasEvents) + FName = "__enqueue_kernel_basic"; + else if (!HasVaargs && HasEvents) + FName = "__enqueue_kernel_basic_events"; + else if (HasVaargs && !HasEvents) + FName = "__enqueue_kernel_varargs"; + else + FName = "__enqueue_kernel_events_varargs"; - std::string FName = HasVaargs ? "__enqueue_kernel_events_varargs" - : "__enqueue_kernel_basic_events"; Function *F = M->getFunction(FName); if (!F) { - Type *EventTy = PointerType::get( - getOrCreateOpaquePtrType(M, SPIR_TYPE_NAME_CLK_EVENT_T, - getOCLOpaqueTypeAddrSpace(OpTypeDeviceEvent)), - SPIRAS_Generic); - SmallVector Tys = { transType(Ops[0]->getType()), // queue Int32Ty, // flags transType(Ops[2]->getType()), // ndrange - Int32Ty, - EventTy, - EventTy, // events - Type::getInt8PtrTy(*Context, SPIRAS_Generic), // block_invoke - Type::getInt8PtrTy(*Context, SPIRAS_Generic) // block_literal }; + if (HasEvents) { + Type *EventTy = + PointerType::get(getOrCreateOpaquePtrType( + M, SPIR_TYPE_NAME_CLK_EVENT_T, + getOCLOpaqueTypeAddrSpace(OpTypeDeviceEvent)), + SPIRAS_Generic); + + Tys.push_back(Int32Ty); + Tys.push_back(EventTy); + Tys.push_back(EventTy); + } + + Tys.push_back(Type::getInt8PtrTy(*Context, SPIRAS_Generic)); + Tys.push_back(Type::getInt8PtrTy(*Context, SPIRAS_Generic)); + if (HasVaargs) { // Number of block invoke arguments (local arguments) Tys.push_back(Int32Ty); @@ -2123,13 +2247,17 @@ Instruction *SPIRVToLLVM::transEnqueueKernelBI(SPIRVInstruction *BI, transValue(Ops[0], F, BB, false), // queue transValue(Ops[1], F, BB, false), // flags transValue(Ops[2], F, BB, false), // ndrange - transValue(Ops[3], F, BB, false), // events number - transDeviceEvent(Ops[4], F, BB), // event_wait_list - transDeviceEvent(Ops[5], F, BB), // event_ret - transBlockInvoke(Ops[6], BB), // block_invoke - transValue(Ops[7], F, BB, false) // block_literal }; + if (HasEvents) { + Args.push_back(transValue(Ops[3], F, BB, false)); // events number + Args.push_back(transDeviceEvent(Ops[4], F, BB)); // event_wait_list + Args.push_back(transDeviceEvent(Ops[5], F, BB)); // event_ret + } + + Args.push_back(transBlockInvoke(Ops[6], BB)); // block_invoke + Args.push_back(transValue(Ops[7], F, BB, false)); // block_literal + if (HasVaargs) { // Number of local arguments Args.push_back(ConstantInt::get(Int32Ty, Ops.size() - 10)); @@ -2380,6 +2508,8 @@ bool SPIRVToLLVM::translate() { if (BV->getStorageClass() != StorageClassFunction) transValue(BV, nullptr, nullptr); } + transGlobalAnnotations(); + // Compile unit might be needed during translation of debug intrinsics. for (SPIRVExtInst *EI : BM->getDebugInstVec()) { // Translate Compile Unit first. @@ -2519,57 +2649,100 @@ void generateIntelFPGAAnnotationForStructMember( } void SPIRVToLLVM::transIntelFPGADecorations(SPIRVValue *BV, Value *V) { - if (BV->isVariable()) { - if (auto AL = dyn_cast(V)) { - IRBuilder<> Builder(AL->getParent()); + if (!BV->isVariable()) + return; + + if (auto AL = dyn_cast(V)) { + IRBuilder<> Builder(AL->getParent()); - SPIRVType *ST = BV->getType()->getPointerElementType(); + SPIRVType *ST = BV->getType()->getPointerElementType(); - Type *Int8PtrTyPrivate = Type::getInt8PtrTy(*Context, SPIRAS_Private); - IntegerType *Int32Ty = IntegerType::get(*Context, 32); + Type *Int8PtrTyPrivate = Type::getInt8PtrTy(*Context, SPIRAS_Private); + IntegerType *Int32Ty = IntegerType::get(*Context, 32); - Value *UndefInt8Ptr = UndefValue::get(Int8PtrTyPrivate); - Value *UndefInt32 = UndefValue::get(Int32Ty); + Value *UndefInt8Ptr = UndefValue::get(Int8PtrTyPrivate); + Value *UndefInt32 = UndefValue::get(Int32Ty); - if (ST->isTypeStruct()) { - SPIRVTypeStruct *STS = static_cast(ST); + if (ST->isTypeStruct()) { + SPIRVTypeStruct *STS = static_cast(ST); - for (SPIRVWord I = 0; I < STS->getMemberCount(); ++I) { - SmallString<256> AnnotStr; - generateIntelFPGAAnnotationForStructMember(ST, I, AnnotStr); - if (!AnnotStr.empty()) { - auto *GS = Builder.CreateGlobalStringPtr(AnnotStr); + for (SPIRVWord I = 0; I < STS->getMemberCount(); ++I) { + SmallString<256> AnnotStr; + generateIntelFPGAAnnotationForStructMember(ST, I, AnnotStr); + if (!AnnotStr.empty()) { + auto *GS = Builder.CreateGlobalStringPtr(AnnotStr); - auto AnnotationFn = llvm::Intrinsic::getDeclaration( - M, Intrinsic::ptr_annotation, Int8PtrTyPrivate); + auto AnnotationFn = llvm::Intrinsic::getDeclaration( + M, Intrinsic::ptr_annotation, Int8PtrTyPrivate); - auto GEP = Builder.CreateConstInBoundsGEP2_32( - AL->getAllocatedType(), AL, 0, I); + auto GEP = Builder.CreateConstInBoundsGEP2_32(AL->getAllocatedType(), + AL, 0, I); - llvm::Value *Args[] = { - Builder.CreateBitCast(GEP, Int8PtrTyPrivate, GEP->getName()), - Builder.CreateBitCast(GS, Int8PtrTyPrivate), UndefInt8Ptr, - UndefInt32}; - Builder.CreateCall(AnnotationFn, Args); - } + llvm::Value *Args[] = { + Builder.CreateBitCast(GEP, Int8PtrTyPrivate, GEP->getName()), + Builder.CreateBitCast(GS, Int8PtrTyPrivate), UndefInt8Ptr, + UndefInt32}; + Builder.CreateCall(AnnotationFn, Args); } } + } - SmallString<256> AnnotStr; - generateIntelFPGAAnnotation(BV, AnnotStr); - if (!AnnotStr.empty()) { - auto *GS = Builder.CreateGlobalStringPtr(AnnotStr); + SmallString<256> AnnotStr; + generateIntelFPGAAnnotation(BV, AnnotStr); + if (!AnnotStr.empty()) { + auto *GS = Builder.CreateGlobalStringPtr(AnnotStr); - auto AnnotationFn = - llvm::Intrinsic::getDeclaration(M, Intrinsic::var_annotation); + auto AnnotationFn = + llvm::Intrinsic::getDeclaration(M, Intrinsic::var_annotation); - llvm::Value *Args[] = { - Builder.CreateBitCast(V, Int8PtrTyPrivate, V->getName()), - Builder.CreateBitCast(GS, Int8PtrTyPrivate), UndefInt8Ptr, - UndefInt32}; - Builder.CreateCall(AnnotationFn, Args); - } + llvm::Value *Args[] = { + Builder.CreateBitCast(V, Int8PtrTyPrivate, V->getName()), + Builder.CreateBitCast(GS, Int8PtrTyPrivate), UndefInt8Ptr, + UndefInt32}; + Builder.CreateCall(AnnotationFn, Args); } + } else if (auto *GV = dyn_cast(V)) { + SmallString<256> AnnotStr; + generateIntelFPGAAnnotation(BV, AnnotStr); + + if (AnnotStr.empty()) + return; + + Constant *StrConstant = + ConstantDataArray::getString(*Context, StringRef(AnnotStr)); + + auto *GS = new GlobalVariable(*GV->getParent(), StrConstant->getType(), + /*IsConstant*/ true, + GlobalValue::PrivateLinkage, StrConstant, ""); + + GS->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); + GS->setSection("llvm.metadata"); + + Type *ResType = PointerType::getInt8PtrTy( + GV->getContext(), GV->getType()->getPointerAddressSpace()); + Constant *C = ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, ResType); + + Type *Int8PtrTyPrivate = Type::getInt8PtrTy(*Context, SPIRAS_Private); + IntegerType *Int32Ty = Type::getInt32Ty(*Context); + + llvm::Constant *Fields[4] = { + C, ConstantExpr::getBitCast(GS, Int8PtrTyPrivate), + UndefValue::get(Int8PtrTyPrivate), UndefValue::get(Int32Ty)}; + + GlobalAnnotations.push_back(ConstantStruct::getAnon(Fields)); + } +} + +void SPIRVToLLVM::transGlobalAnnotations() { + if (!GlobalAnnotations.empty()) { + Constant *Array = + ConstantArray::get(ArrayType::get(GlobalAnnotations[0]->getType(), + GlobalAnnotations.size()), + GlobalAnnotations); + auto *GV = new GlobalVariable(*M, Array->getType(), /*IsConstant*/ false, + GlobalValue::AppendingLinkage, Array, + "llvm.global.annotations"); + GV->setSection("llvm.metadata"); } } @@ -2734,7 +2907,7 @@ bool SPIRVToLLVM::transKernelMetadata() { return transOCLKernelArgTypeName(Arg); }); // Generate metadata for kernel_arg_name - if (SPIRVGenKernelArgNameMD) { + if (BM->isGenArgNameMDEnabled()) { addOCLKernelArgumentMetadata(Context, SPIR_MD_KERNEL_ARG_NAME, BF, F, [=](SPIRVFunctionParameter *Arg) { return MDString::get(*Context, diff --git a/llvm-spirv/lib/SPIRV/SPIRVReader.h b/llvm-spirv/lib/SPIRV/SPIRVReader.h index 7e2d1c8eadaa9..34e471e4a9a2a 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVReader.h +++ b/llvm-spirv/lib/SPIRV/SPIRVReader.h @@ -187,6 +187,9 @@ class SPIRVToLLVM { // which are supposed to be replaced by the real values later. typedef std::map SPIRVToLLVMPlaceholderMap; + typedef std::map + SPIRVToLLVMLoopMetadataMap; + private: Module *M; BuiltinVarMap BuiltinGVMap; @@ -198,6 +201,12 @@ class SPIRVToLLVM { SPIRVBlockToLLVMStructMap BlockMap; SPIRVToLLVMPlaceholderMap PlaceholderMap; std::unique_ptr DbgTran; + std::vector GlobalAnnotations; + + // Loops metadata is translated in the end of a function translation. + // This storage contains pairs of translated loop header basic block and loop + // metadata SPIR-V instruction in SPIR-V representation of this basic block. + SPIRVToLLVMLoopMetadataMap FuncLoopMetadataMap; Type *mapType(SPIRVType *BT, Type *T); @@ -246,7 +255,8 @@ class SPIRVToLLVM { Value *oclTransConstantPipeStorage(SPIRV::SPIRVConstantPipeStorage *BCPS); void setName(llvm::Value *V, SPIRVValue *BV); template - void setLLVMLoopMetadata(LoopInstType *LM, BranchInst *BI); + void setLLVMLoopMetadata(const LoopInstType *LM, Instruction *BI); + void transLLVMLoopMetadata(const Function *F); inline llvm::Metadata *getMetadataFromName(std::string Name); inline std::vector getMetadataFromNameAndParameter(std::string Name, SPIRVWord Parameter); @@ -257,6 +267,7 @@ class SPIRVToLLVM { Instruction *transOCLAllAny(SPIRVInstruction *BI, BasicBlock *BB); Instruction *transOCLRelational(SPIRVInstruction *BI, BasicBlock *BB); + void transGlobalAnnotations(); void transIntelFPGADecorations(SPIRVValue *BV, Value *V); }; // class SPIRVToLLVM diff --git a/llvm-spirv/lib/SPIRV/SPIRVToOCL.cpp b/llvm-spirv/lib/SPIRV/SPIRVToOCL.cpp index e3baa7b8de7ae..8bb2a5162c13b 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVToOCL.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVToOCL.cpp @@ -56,8 +56,6 @@ static cl::opt cl::desc("Specify version of OCL builtins to translate " "to (CL1.2, CL2.0, CL2.1)")); -char SPIRVToOCL::ID = 0; - void SPIRVToOCL::visitCallInst(CallInst &CI) { LLVM_DEBUG(dbgs() << "[visistCallInst] " << CI << '\n'); auto F = CI.getCalledFunction(); diff --git a/llvm-spirv/lib/SPIRV/SPIRVToOCL.h b/llvm-spirv/lib/SPIRV/SPIRVToOCL.h index adaf7df6513d0..a582de92c8cbb 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVToOCL.h +++ b/llvm-spirv/lib/SPIRV/SPIRVToOCL.h @@ -49,7 +49,7 @@ namespace SPIRV { class SPIRVToOCL : public ModulePass, public InstVisitor { protected: - SPIRVToOCL() : ModulePass(ID), M(nullptr), Ctx(nullptr) {} + SPIRVToOCL(char &ID) : ModulePass(ID), M(nullptr), Ctx(nullptr) {} public: virtual bool runOnModule(Module &M) = 0; @@ -133,8 +133,6 @@ class SPIRVToOCL : public ModulePass, public InstVisitor { /// using separate maps for OpenCL 1.2 and OpenCL 2.0 virtual Instruction *mutateAtomicName(CallInst *CI, Op OC) = 0; - static char ID; - protected: Module *M; LLVMContext *Ctx; diff --git a/llvm-spirv/lib/SPIRV/SPIRVToOCL12.cpp b/llvm-spirv/lib/SPIRV/SPIRVToOCL12.cpp index ddfaaea92848a..716599660ed6d 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVToOCL12.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVToOCL12.cpp @@ -45,7 +45,7 @@ namespace SPIRV { class SPIRVToOCL12 : public SPIRVToOCL { public: - SPIRVToOCL12() { + SPIRVToOCL12() : SPIRVToOCL(ID) { initializeSPIRVToOCL12Pass(*PassRegistry::getPassRegistry()); } bool runOnModule(Module &M) override; @@ -74,18 +74,18 @@ class SPIRVToOCL12 : public SPIRVToOCL { Instruction *visitCallSPIRVAtomicUMinUMax(CallInst *CI, Op OC); /// Transform __spirv_OpAtomicLoad to atomic_add(*ptr, 0) - Instruction *visitCallSPIRVAtomicLoad(CallInst *CI, Op OC); + Instruction *visitCallSPIRVAtomicLoad(CallInst *CI); /// Transform __spirv_OpAtomicStore to atomic_xchg(*ptr, value) - Instruction *visitCallSPIRVAtomicStore(CallInst *CI, Op OC); + Instruction *visitCallSPIRVAtomicStore(CallInst *CI); /// Transform __spirv_OpAtomicFlagClear to atomic_xchg(*ptr, 0) /// with ignoring the result - Instruction *visitCallSPIRVAtomicFlagClear(CallInst *CI, Op OC); + Instruction *visitCallSPIRVAtomicFlagClear(CallInst *CI); /// Transform __spirv_OpAtomicFlagTestAndTest to /// (bool)atomic_xchg(*ptr, 1) - Instruction *visitCallSPIRVAtomicFlagTestAndSet(CallInst *CI, Op OC); + Instruction *visitCallSPIRVAtomicFlagTestAndSet(CallInst *CI); /// Transform __spirv_OpAtomicCompareExchange and /// __spirv_OpAtomicCompareExchangeWeak into atomic_cmpxchg. There is no @@ -97,15 +97,23 @@ class SPIRVToOCL12 : public SPIRVToOCL { /// Transform atomic builtin name into correct ocl-dependent name Instruction *mutateAtomicName(CallInst *CI, Op OC) override; + + /// Transform SPIR-V atomic instruction opcode into OpenCL 1.2 builtin name. + /// Depending on the type, the return name starts with "atomic_" for 32-bit + /// types or with "atom_" for 64-bit types, as specified by + /// cl_khr_int64_base_atomics and cl_khr_int64_extended_atomics extensions. + std::string mapAtomicName(Op OC, Type *Ty); + + static char ID; }; +char SPIRVToOCL12::ID = 0; + bool SPIRVToOCL12::runOnModule(Module &Module) { M = &Module; Ctx = &M->getContext(); visit(*M); - translateMangledAtomicTypeName(); - eraseUselessFunctions(&Module); LLVM_DEBUG(dbgs() << "After SPIRVToOCL12:\n" << *M); @@ -186,7 +194,7 @@ Instruction *SPIRVToOCL12::visitCallSPIRVAtomicIncDec(CallInst *CI, Op OC) { M, CI, [=](CallInst *, std::vector &Args) { Args.resize(1); - return OCLSPIRVBuiltinMap::rmap(OC); + return mapAtomicName(OC, CI->getType()); }, &Attrs); } @@ -205,7 +213,7 @@ CallInst *SPIRVToOCL12::mutateCommonAtomicArguments(CallInst *CI, Op OC) { auto StartIdx = Ptr + 1; auto StopIdx = StartIdx + ArgsToRemove; Args.erase(Args.begin() + StartIdx, Args.begin() + StopIdx); - return OCL12SPIRVBuiltinMap::rmap(OC); + return mapAtomicName(OC, CI->getType()); }, &Attrs); } @@ -217,13 +225,13 @@ Instruction *SPIRVToOCL12::visitCallSPIRVAtomicUMinUMax(CallInst *CI, Op OC) { [=](CallInst *, std::vector &Args) { std::swap(Args[1], Args[3]); Args.resize(2); - return OCL12SPIRVBuiltinMap::rmap(OC == OpAtomicUMin ? OpAtomicSMin - : OpAtomicSMax); + return mapAtomicName(OC == OpAtomicUMin ? OpAtomicSMin : OpAtomicSMax, + CI->getType()); }, &Attrs); } -Instruction *SPIRVToOCL12::visitCallSPIRVAtomicLoad(CallInst *CI, Op OC) { +Instruction *SPIRVToOCL12::visitCallSPIRVAtomicLoad(CallInst *CI) { AttributeList Attrs = CI->getCalledFunction()->getAttributes(); return mutateCallInstOCL( M, CI, @@ -233,12 +241,12 @@ Instruction *SPIRVToOCL12::visitCallSPIRVAtomicLoad(CallInst *CI, Op OC) { // Emit this builtin via call of atomic_add(*p, 0). Type *ptrElemTy = Args[0]->getType()->getPointerElementType(); Args.push_back(Constant::getNullValue(ptrElemTy)); - return OCL12SPIRVBuiltinMap::rmap(OpAtomicIAdd); + return mapAtomicName(OpAtomicIAdd, ptrElemTy); }, &Attrs); } -Instruction *SPIRVToOCL12::visitCallSPIRVAtomicStore(CallInst *CI, Op OC) { +Instruction *SPIRVToOCL12::visitCallSPIRVAtomicStore(CallInst *CI) { AttributeList Attrs = CI->getCalledFunction()->getAttributes(); return mutateCallInstOCL( M, CI, @@ -248,12 +256,12 @@ Instruction *SPIRVToOCL12::visitCallSPIRVAtomicStore(CallInst *CI, Op OC) { // The type of the value pointed to by Pointer (1st argument) // must be the same as Result Type. RetTy = Args[0]->getType()->getPointerElementType(); - return OCL12SPIRVBuiltinMap::rmap(OpAtomicExchange); + return mapAtomicName(OpAtomicExchange, RetTy); }, [=](CallInst *CI) -> Instruction * { return CI; }, &Attrs); } -Instruction *SPIRVToOCL12::visitCallSPIRVAtomicFlagClear(CallInst *CI, Op OC) { +Instruction *SPIRVToOCL12::visitCallSPIRVAtomicFlagClear(CallInst *CI) { AttributeList Attrs = CI->getCalledFunction()->getAttributes(); return mutateCallInstOCL( M, CI, @@ -261,13 +269,12 @@ Instruction *SPIRVToOCL12::visitCallSPIRVAtomicFlagClear(CallInst *CI, Op OC) { Args.resize(1); Args.push_back(getInt32(M, 0)); RetTy = Type::getInt32Ty(M->getContext()); - return OCL12SPIRVBuiltinMap::rmap(OpAtomicExchange); + return mapAtomicName(OpAtomicExchange, RetTy); }, [=](CallInst *CI) -> Instruction * { return CI; }, &Attrs); } -Instruction *SPIRVToOCL12::visitCallSPIRVAtomicFlagTestAndSet(CallInst *CI, - Op OC) { +Instruction *SPIRVToOCL12::visitCallSPIRVAtomicFlagTestAndSet(CallInst *CI) { AttributeList Attrs = CI->getCalledFunction()->getAttributes(); return mutateCallInstOCL( M, CI, @@ -275,7 +282,7 @@ Instruction *SPIRVToOCL12::visitCallSPIRVAtomicFlagTestAndSet(CallInst *CI, Args.resize(1); Args.push_back(getInt32(M, 1)); RetTy = Type::getInt32Ty(M->getContext()); - return OCL12SPIRVBuiltinMap::rmap(OpAtomicExchange); + return mapAtomicName(OpAtomicExchange, RetTy); }, [=](CallInst *CI) -> Instruction * { return BitCastInst::Create(Instruction::Trunc, CI, @@ -295,7 +302,9 @@ Instruction *SPIRVToOCL12::visitCallSPIRVAtomicCmpExchg(CallInst *CI, Op OC) { // has Value and Comparator in different order than ocl functions // both of them are translated into atomic_cmpxchg std::swap(Args[1], Args[2]); - return OCL12SPIRVBuiltinMap::rmap(OpAtomicCompareExchange); + // Type of return value, pointee of the pointer operand, other operands, + // all match, and should be integer scalar types. + return mapAtomicName(OpAtomicCompareExchange, CI->getType()); }, &Attrs); } @@ -304,16 +313,16 @@ Instruction *SPIRVToOCL12::visitCallSPIRVAtomicBuiltin(CallInst *CI, Op OC) { Instruction *NewCI = nullptr; switch (OC) { case OpAtomicLoad: - NewCI = visitCallSPIRVAtomicLoad(CI, OC); + NewCI = visitCallSPIRVAtomicLoad(CI); break; case OpAtomicStore: - NewCI = visitCallSPIRVAtomicStore(CI, OC); + NewCI = visitCallSPIRVAtomicStore(CI); break; case OpAtomicFlagClear: - NewCI = visitCallSPIRVAtomicFlagClear(CI, OC); + NewCI = visitCallSPIRVAtomicFlagClear(CI); break; case OpAtomicFlagTestAndSet: - NewCI = visitCallSPIRVAtomicFlagTestAndSet(CI, OC); + NewCI = visitCallSPIRVAtomicFlagTestAndSet(CI); break; case OpAtomicUMin: case OpAtomicUMax: @@ -340,6 +349,12 @@ Instruction *SPIRVToOCL12::mutateAtomicName(CallInst *CI, Op OC) { &Attrs); } +std::string SPIRVToOCL12::mapAtomicName(Op OC, Type *Ty) { + std::string Prefix = Ty->isIntegerTy(64) ? kOCLBuiltinName::AtomPrefix + : kOCLBuiltinName::AtomicPrefix; + return Prefix += OCL12SPIRVBuiltinMap::rmap(OC); +} + } // namespace SPIRV INITIALIZE_PASS(SPIRVToOCL12, "spvtoocl12", diff --git a/llvm-spirv/lib/SPIRV/SPIRVToOCL20.cpp b/llvm-spirv/lib/SPIRV/SPIRVToOCL20.cpp index 93f4319f70f90..833313d469e28 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVToOCL20.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVToOCL20.cpp @@ -44,7 +44,7 @@ namespace SPIRV { class SPIRVToOCL20 : public SPIRVToOCL { public: - SPIRVToOCL20() { + SPIRVToOCL20() : SPIRVToOCL(ID) { initializeSPIRVToOCL20Pass(*PassRegistry::getPassRegistry()); } bool runOnModule(Module &M) override; @@ -81,8 +81,12 @@ class SPIRVToOCL20 : public SPIRVToOCL { /// Transform __spirv_OpAtomicCompareExchange/Weak into /// compare_exchange_strong/weak_explicit Instruction *visitCallSPIRVAtomicCmpExchg(CallInst *CI, Op OC) override; + + static char ID; }; +char SPIRVToOCL20::ID = 0; + bool SPIRVToOCL20::runOnModule(Module &Module) { M = &Module; Ctx = &M->getContext(); @@ -283,7 +287,8 @@ Instruction *SPIRVToOCL20::visitCallSPIRVAtomicCmpExchg(CallInst *CI, Op OC) { ->getParent() ->getEntryBlock() .getFirstInsertionPt())); - PExpected->setAlignment(MaybeAlign(CI->getType()->getScalarSizeInBits() / 8)); + PExpected->setAlignment( + MaybeAlign(CI->getType()->getScalarSizeInBits() / 8)); new StoreInst(Args[1], PExpected, PInsertBefore); unsigned AddrSpc = SPIRAS_Generic; Type *PtrTyAS = diff --git a/llvm-spirv/lib/SPIRV/SPIRVUtil.cpp b/llvm-spirv/lib/SPIRV/SPIRVUtil.cpp index cdb6c9bff93b4..a5423217f272b 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVUtil.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVUtil.cpp @@ -461,13 +461,14 @@ bool getSPIRVBuiltin(const std::string &OrigName, spv::BuiltIn &B) { return getByName(R.str(), B); } -// Enqueue kernel, kernel query and pipe built-ins are not mangled +// Enqueue kernel, kernel query, pipe and address space cast built-ins +// are not mangled. bool isNonMangledOCLBuiltin(const StringRef &Name) { if (!Name.startswith("__")) return false; return isEnqueueKernelBI(Name) || isKernelQueryBI(Name) || - isPipeBI(Name.drop_front(2)); + isPipeOrAddressSpaceCastBI(Name.drop_front(2)); } bool oclIsBuiltin(const StringRef &Name, std::string *DemangledName, @@ -1391,10 +1392,12 @@ bool eraseUselessFunctions(Module *M) { } // The mangling algorithm follows OpenCL pipe built-ins clang 3.8 CodeGen rules. -static SPIR::MangleError manglePipeBuiltin(const SPIR::FunctionDescriptor &Fd, - std::string &MangledName) { - assert(OCLUtil::isPipeBI(Fd.Name) && - "Method is expected to be called only for pipe builtins!"); +static SPIR::MangleError +manglePipeOrAddressSpaceCastBuiltin(const SPIR::FunctionDescriptor &Fd, + std::string &MangledName) { + assert(OCLUtil::isPipeOrAddressSpaceCastBI(Fd.Name) && + "Method is expected to be called only for pipe and address space cast " + "builtins!"); if (Fd.isNull()) { MangledName.assign(SPIR::FunctionDescriptor::nullString()); return SPIR::MANGLE_NULL_FUNC_DESCRIPTOR; @@ -1443,8 +1446,8 @@ std::string mangleBuiltin(const std::string &UniqName, SPIR::NameMangler Mangler(SPIR::SPIR20); Mangler.mangle(FD, MangledName); #else - if (OCLUtil::isPipeBI(BtnInfo->getUnmangledName())) { - manglePipeBuiltin(FD, MangledName); + if (OCLUtil::isPipeOrAddressSpaceCastBI(BtnInfo->getUnmangledName())) { + manglePipeOrAddressSpaceCastBuiltin(FD, MangledName); } else { SPIR::NameMangler Mangler(SPIR::SPIR20); Mangler.mangle(FD, MangledName); diff --git a/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp b/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp index d905d0b5fc34d..ba85085e22981 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp @@ -56,6 +56,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" @@ -663,10 +664,20 @@ SPIRVInstruction *LLVMToSPIRV::transBinaryInst(BinaryOperator *B, } SPIRVInstruction *LLVMToSPIRV::transCmpInst(CmpInst *Cmp, SPIRVBasicBlock *BB) { - auto Op0 = transValue(Cmp->getOperand(0), BB); - SPIRVInstruction *BI = BM->addCmpInst( - transBoolOpCode(Op0, CmpMap::map(Cmp->getPredicate())), - transType(Cmp->getType()), Op0, transValue(Cmp->getOperand(1), BB), BB); + auto *Op0 = Cmp->getOperand(0); + SPIRVValue *TOp0 = transValue(Op0, BB); + SPIRVValue *TOp1 = transValue(Cmp->getOperand(1), BB); + // TODO: once the translator supports SPIR-V 1.4, update the condition below: + // if (/* */->isPointerTy() && /* it is not allowed to use SPIR-V 1.4 */) + if (Op0->getType()->isPointerTy()) { + unsigned AS = cast(Op0->getType())->getAddressSpace(); + SPIRVType *Ty = transType(getSizetType(AS)); + TOp0 = BM->addUnaryInst(OpConvertPtrToU, Ty, TOp0, BB); + TOp1 = BM->addUnaryInst(OpConvertPtrToU, Ty, TOp1, BB); + } + SPIRVInstruction *BI = + BM->addCmpInst(transBoolOpCode(TOp0, CmpMap::map(Cmp->getPredicate())), + transType(Cmp->getType()), TOp0, TOp1, BB); return BI; } @@ -714,6 +725,11 @@ SPIRVValue *LLVMToSPIRV::transValueWithoutDecoration(Value *V, return transFunctionDecl(F); if (auto GV = dyn_cast(V)) { + if (GV->getName() == "llvm.global.annotations") { + transGlobalAnnotation(GV); + return nullptr; + } + llvm::PointerType *Ty = GV->getType(); // Though variables with common linkage type are initialized by 0, // they can be represented in SPIR-V as uninitialized variables with @@ -1207,6 +1223,11 @@ void addIntelFPGADecorations( return; for (const auto &I : Decorations) { + // Such decoration already exists on a type, skip it + if (E->hasDecorate(I.first, /*Index=*/0, /*Result=*/nullptr)) { + continue; + } + switch (I.first) { case DecorationUserSemantic: E->addDecorate(new SPIRVDecorateUserSemanticAttr(E, I.second)); @@ -1319,6 +1340,15 @@ SPIRVValue *LLVMToSPIRV::transIntrinsicInst(IntrinsicInst *II, SPIRVValue *Op = transValue(II->getArgOperand(0), BB); return BM->addUnaryInst(OpBitReverse, Ty, Op, BB); } + case Intrinsic::ctlz: + case Intrinsic::cttz: { + SPIRVWord ExtOp = II->getIntrinsicID() == Intrinsic::ctlz ? OpenCLLIB::Clz + : OpenCLLIB::Ctz; + SPIRVType *Ty = transType(II->getType()); + std::vector Ops(1, transValue(II->getArgOperand(0), BB)); + return BM->addExtInst(Ty, BM->getExtInstSetId(SPIRVEIS_OpenCL), ExtOp, Ops, + BB); + } case Intrinsic::fmuladd: { // For llvm.fmuladd.* fusion is not guaranteed. If a fused multiply-add // is required the corresponding llvm.fma.* intrinsic function should be @@ -1395,9 +1425,8 @@ SPIRVValue *LLVMToSPIRV::transIntrinsicInst(IntrinsicInst *II, if (!GEP) return nullptr; Constant *C = cast(GEP->getOperand(0)); - // TODO: Refactor to use getConstantStringInfo() - StringRef AnnotationString = - cast(C->getOperand(0))->getAsCString(); + StringRef AnnotationString; + getConstantStringInfo(C, AnnotationString); if (AnnotationString == kOCLBuiltinName::FPGARegIntel) { if (BM->isAllowedToUseExtension(ExtensionID::SPV_INTEL_fpga_reg)) @@ -1419,9 +1448,8 @@ SPIRVValue *LLVMToSPIRV::transIntrinsicInst(IntrinsicInst *II, GetElementPtrInst *GEP = cast(II->getArgOperand(1)); Constant *C = cast(GEP->getOperand(0)); - // TODO: Refactor to use getConstantStringInfo() - StringRef AnnotationString = - cast(C->getOperand(0))->getAsString(); + StringRef AnnotationString; + getConstantStringInfo(C, AnnotationString); std::vector> Decorations; if (BB->getModule()->isAllowedToUseExtension( @@ -1433,8 +1461,7 @@ SPIRVValue *LLVMToSPIRV::transIntrinsicInst(IntrinsicInst *II, // If we didn't find any IntelFPGA-specific decorations, let's add the whole // annotation string as UserSemantic Decoration if (Decorations.empty()) { - SV->addDecorate(new SPIRVDecorateUserSemanticAttr( - SV, AnnotationString.substr(0, AnnotationString.size() - 1))); + SV->addDecorate(new SPIRVDecorateUserSemanticAttr(SV, AnnotationString)); } else { addIntelFPGADecorations(SV, Decorations); } @@ -1443,9 +1470,8 @@ SPIRVValue *LLVMToSPIRV::transIntrinsicInst(IntrinsicInst *II, case Intrinsic::ptr_annotation: { GetElementPtrInst *GEP = dyn_cast(II->getArgOperand(1)); Constant *C = dyn_cast(GEP->getOperand(0)); - // TODO: Refactor to use getConstantStringInfo() - StringRef AnnotationString = - dyn_cast(C->getOperand(0))->getAsCString(); + StringRef AnnotationString; + getConstantStringInfo(C, AnnotationString); // Strip all bitcast and addrspace casts from the pointer argument: // llvm annotation intrinsic only takes i8*, so the original pointer @@ -1601,6 +1627,40 @@ SPIRVWord LLVMToSPIRV::transFunctionControlMask(Function *F) { return FCM; } +void LLVMToSPIRV::transGlobalAnnotation(GlobalVariable *V) { + SPIRVDBG(dbgs() << "[transGlobalAnnotation] " << *V << '\n'); + + // @llvm.global.annotations is an array that contains structs with 4 fields. + // Get the array of structs with metadata + ConstantArray *CA = cast(V->getOperand(0)); + for (Value *Op : CA->operands()) { + ConstantStruct *CS = cast(Op); + // The first field of the struct contains a pointer to annotated variable + Value *AnnotatedVar = CS->getOperand(0)->stripPointerCasts(); + SPIRVValue *SV = transValue(AnnotatedVar, nullptr); + + // The second field contains a pointer to a global annotation string + GlobalVariable *GV = + cast(CS->getOperand(1)->stripPointerCasts()); + + StringRef AnnotationString; + getConstantStringInfo(GV, AnnotationString); + + std::vector> Decorations; + if (BM->isAllowedToUseExtension( + ExtensionID::SPV_INTEL_fpga_memory_attributes)) + Decorations = tryParseIntelFPGAAnnotationString(AnnotationString); + + // If we didn't find any IntelFPGA-specific decorations, let's + // add the whole annotation string as UserSemantic Decoration + if (Decorations.empty()) { + SV->addDecorate(new SPIRVDecorateUserSemanticAttr(SV, AnnotationString)); + } else { + addIntelFPGADecorations(SV, Decorations); + } + } +} + bool LLVMToSPIRV::transGlobalVariables() { for (auto I = M->global_begin(), E = M->global_end(); I != E; ++I) { if (!transValue(&(*I), nullptr)) @@ -1760,9 +1820,9 @@ bool LLVMToSPIRV::translate() { return true; } -llvm::IntegerType *LLVMToSPIRV::getSizetType() { +llvm::IntegerType *LLVMToSPIRV::getSizetType(unsigned AS) { return IntegerType::getIntNTy(M->getContext(), - M->getDataLayout().getPointerSizeInBits()); + M->getDataLayout().getPointerSizeInBits(AS)); } void LLVMToSPIRV::oclGetMutatedArgumentTypesByBuiltin( diff --git a/llvm-spirv/lib/SPIRV/SPIRVWriter.h b/llvm-spirv/lib/SPIRV/SPIRVWriter.h index d322605540019..d480f158d1f9d 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVWriter.h +++ b/llvm-spirv/lib/SPIRV/SPIRVWriter.h @@ -110,6 +110,7 @@ class LLVMToSPIRV : public ModulePass { SPIRVValue *transConstant(Value *V); SPIRVValue *transValue(Value *V, SPIRVBasicBlock *BB, bool CreateForward = true); + void transGlobalAnnotation(GlobalVariable *V); SPIRVValue *transValueWithoutDecoration(Value *V, SPIRVBasicBlock *BB, bool CreateForward = true); @@ -131,7 +132,7 @@ class LLVMToSPIRV : public ModulePass { SPIRVValue *mapValue(Value *V, SPIRVValue *BV); SPIRVType *getSPIRVType(Type *T) { return TypeMap[T]; } SPIRVErrorLog &getErrorLog() { return BM->getErrorLog(); } - llvm::IntegerType *getSizetType(); + llvm::IntegerType *getSizetType(unsigned AS = 0); std::vector transValue(const std::vector &Values, SPIRVBasicBlock *BB); std::vector transValue(const std::vector &Values, diff --git a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVEntry.h b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVEntry.h index fd141e865927d..ccdf2a20fcd4f 100644 --- a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVEntry.h +++ b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVEntry.h @@ -767,7 +767,6 @@ template bool isa(SPIRVEntry *E) { #define _SPIRV_OP(x) typedef SPIRVEntryUnimplemented SPIRV##x; _SPIRV_OP(Nop) _SPIRV_OP(SourceContinued) -_SPIRV_OP(TypeMatrix) _SPIRV_OP(TypeRuntimeArray) _SPIRV_OP(SpecConstantTrue) _SPIRV_OP(SpecConstantFalse) @@ -787,10 +786,7 @@ _SPIRV_OP(ImageDrefGather) _SPIRV_OP(QuantizeToF16) _SPIRV_OP(Transpose) _SPIRV_OP(ArrayLength) -_SPIRV_OP(SMod) -_SPIRV_OP(MatrixTimesScalar) _SPIRV_OP(VectorTimesMatrix) -_SPIRV_OP(MatrixTimesVector) _SPIRV_OP(MatrixTimesMatrix) _SPIRV_OP(OuterProduct) _SPIRV_OP(IAddCarry) diff --git a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVInstruction.h b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVInstruction.h index f8e428844b2e0..82325048d9467 100644 --- a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVInstruction.h +++ b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVInstruction.h @@ -1057,8 +1057,8 @@ class SPIRVLoopMerge : public SPIRVInstruction { SPIRVId getMergeBlock() { return MergeBlock; } SPIRVId getContinueTarget() { return ContinueTarget; } - SPIRVWord getLoopControl() { return LoopControl; } - std::vector getLoopControlParameters() { + SPIRVWord getLoopControl() const { return LoopControl; } + std::vector getLoopControlParameters() const { return LoopControlParameters; } @@ -1105,7 +1105,7 @@ class SPIRVSwitch : public SPIRVInstruction { setHasNoId(); setHasNoType(); } - std::vector getPairs() { return getValues(Pairs); } + std::vector getPairs() const { return getValues(Pairs); } SPIRVValue *getSelect() const { return getValue(Select); } SPIRVBasicBlock *getDefault() const { return static_cast(getValue(Default)); @@ -1153,22 +1153,21 @@ class SPIRVSwitch : public SPIRVInstruction { std::vector Pairs; }; -class SPIRVFMod : public SPIRVInstruction { +class SPIRVFSMod : public SPIRVInstruction { public: - static const Op OC = OpFMod; static const SPIRVWord FixedWordCount = 4; - // Complete constructor - SPIRVFMod(SPIRVType *TheType, SPIRVId TheId, SPIRVId TheDividend, - SPIRVId TheDivisor, SPIRVBasicBlock *BB) + SPIRVFSMod(Op OC, SPIRVType *TheType, SPIRVId TheId, SPIRVId TheDividend, + SPIRVId TheDivisor, SPIRVBasicBlock *BB) : SPIRVInstruction(5, OC, TheType, TheId, BB), Dividend(TheDividend), Divisor(TheDivisor) { validate(); assert(BB && "Invalid BB"); } // Incomplete constructor - SPIRVFMod() + SPIRVFSMod(Op OC) : SPIRVInstruction(OC), Dividend(SPIRVID_INVALID), Divisor(SPIRVID_INVALID) {} + SPIRVValue *getDividend() const { return getValue(Dividend); } SPIRVValue *getDivisor() const { return getValue(Divisor); } @@ -1195,6 +1194,28 @@ class SPIRVFMod : public SPIRVInstruction { SPIRVId Divisor; }; +class SPIRVFMod : public SPIRVFSMod { +public: + static const Op OC = OpFMod; + // Complete constructor + SPIRVFMod(SPIRVType *TheType, SPIRVId TheId, SPIRVId TheDividend, + SPIRVId TheDivisor, SPIRVBasicBlock *BB) + : SPIRVFSMod(OC, TheType, TheId, TheDividend, TheDivisor, BB) {} + // Incomplete constructor + SPIRVFMod() : SPIRVFSMod(OC) {} +}; + +class SPIRVSMod : public SPIRVFSMod { +public: + static const Op OC = OpSMod; + // Complete constructor + SPIRVSMod(SPIRVType *TheType, SPIRVId TheId, SPIRVId TheDividend, + SPIRVId TheDivisor, SPIRVBasicBlock *BB) + : SPIRVFSMod(OC, TheType, TheId, TheDividend, TheDivisor, BB) {} + // Incomplete constructor + SPIRVSMod() : SPIRVFSMod(OC) {} +}; + class SPIRVVectorTimesScalar : public SPIRVInstruction { public: static const Op OC = OpVectorTimesScalar; @@ -1248,6 +1269,120 @@ class SPIRVVectorTimesScalar : public SPIRVInstruction { SPIRVId Scalar; }; +class SPIRVMatrixTimesScalar : public SPIRVInstruction { +public: + static const Op OC = OpMatrixTimesScalar; + static const SPIRVWord FixedWordCount = 4; + // Complete constructor + SPIRVMatrixTimesScalar(SPIRVType *TheType, SPIRVId TheId, SPIRVId TheMatrix, + SPIRVId TheScalar, SPIRVBasicBlock *BB) + : SPIRVInstruction(5, OC, TheType, TheId, BB), Matrix(TheMatrix), + Scalar(TheScalar) { + validate(); + assert(BB && "Invalid BB"); + } + // Incomplete constructor + SPIRVMatrixTimesScalar() + : SPIRVInstruction(OC), Matrix(SPIRVID_INVALID), Scalar(SPIRVID_INVALID) { + } + SPIRVValue *getMatrix() const { return getValue(Matrix); } + SPIRVValue *getScalar() const { return getValue(Scalar); } + + std::vector getOperands() override { + std::vector Operands; + Operands.push_back(Matrix); + Operands.push_back(Scalar); + return getValues(Operands); + } + + void setWordCount(SPIRVWord FixedWordCount) override { + SPIRVEntry::setWordCount(FixedWordCount); + } + + _SPIRV_DEF_ENCDEC4(Type, Id, Matrix, Scalar) + + void validate() const override { + SPIRVInstruction::validate(); + if (getValue(Matrix)->isForward() || getValue(Scalar)->isForward()) + return; + + SPIRVType *Ty = getType()->getScalarType(); + SPIRVType *MTy = getValueType(Matrix)->getScalarType(); + SPIRVType *STy = getValueType(Scalar); + + (void)Ty; + (void)MTy; + (void)STy; + assert(Ty->isTypeFloat() && "Invalid result type for OpMatrixTimesScalar"); + assert(MTy->isTypeFloat() && "Invalid Matrix type for OpMatrixTimesScalar"); + assert(STy->isTypeFloat() && "Invalid Scalar type for OpMatrixTimesScalar"); + assert(Ty == MTy && Ty == STy && "Mismatch float type"); + } + +private: + SPIRVId Matrix; + SPIRVId Scalar; +}; + +class SPIRVMatrixTimesVector : public SPIRVInstruction { +public: + static const Op OC = OpMatrixTimesVector; + static const SPIRVWord FixedWordCount = 4; + + // Complete constructor + SPIRVMatrixTimesVector(SPIRVType *TheType, SPIRVId TheId, SPIRVId TheMatrix, + SPIRVId TheVector, SPIRVBasicBlock *BB) + : SPIRVInstruction(5, OC, TheType, TheId, BB), Matrix(TheMatrix), + Vector(TheVector) { + validate(); + assert(BB && "Invalid BB"); + } + + // Incomplete constructor + SPIRVMatrixTimesVector() + : SPIRVInstruction(OC), Matrix(SPIRVID_INVALID), Vector(SPIRVID_INVALID) { + } + + SPIRVValue *getMatrix() const { return getValue(Matrix); } + + SPIRVValue *getVector() const { return getValue(Vector); } + + std::vector getOperands() override { + std::vector Operands; + Operands.push_back(Matrix); + Operands.push_back(Vector); + return getValues(Operands); + } + + void setWordCount(SPIRVWord FixedWordCount) override { + SPIRVEntry::setWordCount(FixedWordCount); + } + + _SPIRV_DEF_ENCDEC4(Type, Id, Matrix, Vector) + + void validate() const override { + SPIRVInstruction::validate(); + if (getValue(Matrix)->isForward() || getValue(Vector)->isForward()) + return; + SPIRVType *Ty = getType()->getScalarType(); + SPIRVType *MTy = getValueType(Matrix)->getScalarType(); + SPIRVType *VTy = getValueType(Vector)->getScalarType(); + + (void)Ty; + (void)MTy; + (void)VTy; + assert(Ty->isTypeFloat() && "Invalid result type for OpMatrixTimesVector"); + assert(MTy->isTypeFloat() && "Invalid Matrix type for OpMatrixTimesVector"); + assert(VTy->isTypeFloat() && "Invalid Vector type for OpMatrixTimesVector"); + + assert(Ty == MTy && Ty == VTy && "Mismatch float type"); + } + +private: + SPIRVId Matrix; + SPIRVId Vector; +}; + class SPIRVUnary : public SPIRVInstTemplateBase { protected: void validate() const override { @@ -1361,9 +1496,9 @@ class SPIRVLoopControlINTEL : public SPIRVInstruction { setHasNoType(); } - SPIRVWord getLoopControl() { return LoopControl; } + SPIRVWord getLoopControl() const { return LoopControl; } - std::vector getLoopControlParameters() { + std::vector getLoopControlParameters() const { return LoopControlParameters; } @@ -2319,6 +2454,10 @@ class SPIRVSubgroupShuffleINTELInstBase : public SPIRVInstTemplateBase { SPIRVCapVec getRequiredCapability() const override { return getVec(CapabilitySubgroupShuffleINTEL); } + + SPIRVExtSet getRequiredExtensions() const override { + return getSet(ExtensionID::SPV_INTEL_subgroups); + } }; #define _SPIRV_OP(x, ...) \ @@ -2337,6 +2476,10 @@ class SPIRVSubgroupBufferBlockIOINTELInstBase : public SPIRVInstTemplateBase { SPIRVCapVec getRequiredCapability() const override { return getVec(CapabilitySubgroupBufferBlockIOINTEL); } + + SPIRVExtSet getRequiredExtensions() const override { + return getSet(ExtensionID::SPV_INTEL_subgroups); + } }; #define _SPIRV_OP(x, ...) \ @@ -2353,6 +2496,10 @@ class SPIRVSubgroupImageBlockIOINTELInstBase : public SPIRVInstTemplateBase { SPIRVCapVec getRequiredCapability() const override { return getVec(CapabilitySubgroupImageBlockIOINTEL); } + + SPIRVExtSet getRequiredExtensions() const override { + return getSet(ExtensionID::SPV_INTEL_subgroups); + } }; #define _SPIRV_OP(x, ...) \ diff --git a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVModule.cpp b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVModule.cpp index aa5b48c66a8c3..2cb364ea7dd0a 100644 --- a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVModule.cpp +++ b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVModule.cpp @@ -373,6 +373,14 @@ class SPIRVModuleImpl : public SPIRVModule { SPIRVId TheVector, SPIRVId TheScalar, SPIRVBasicBlock *BB) override; + SPIRVInstruction *addMatrixTimesScalarInst(SPIRVType *TheType, + SPIRVId TheMatrix, + SPIRVId TheScalar, + SPIRVBasicBlock *BB) override; + SPIRVInstruction *addMatrixTimesVectorInst(SPIRVType *TheType, + SPIRVId TheMatrix, + SPIRVId TheVector, + SPIRVBasicBlock *BB) override; SPIRVInstruction *addUnaryInst(Op, SPIRVType *, SPIRVValue *, SPIRVBasicBlock *) override; SPIRVInstruction *addVariable(SPIRVType *, bool, SPIRVLinkageTypeKind, @@ -1063,6 +1071,22 @@ SPIRVModuleImpl::addVectorTimesScalarInst(SPIRVType *TheType, SPIRVId TheVector, new SPIRVVectorTimesScalar(TheType, getId(), TheVector, TheScalar, BB)); } +SPIRVInstruction * +SPIRVModuleImpl::addMatrixTimesScalarInst(SPIRVType *TheType, SPIRVId TheMatrix, + SPIRVId TheScalar, + SPIRVBasicBlock *BB) { + return BB->addInstruction( + new SPIRVMatrixTimesScalar(TheType, getId(), TheMatrix, TheScalar, BB)); +} + +SPIRVInstruction * +SPIRVModuleImpl::addMatrixTimesVectorInst(SPIRVType *TheType, SPIRVId TheMatrix, + SPIRVId TheVector, + SPIRVBasicBlock *BB) { + return BB->addInstruction( + new SPIRVMatrixTimesVector(TheType, getId(), TheMatrix, TheVector, BB)); +} + SPIRVInstruction * SPIRVModuleImpl::addGroupInst(Op OpCode, SPIRVType *Type, Scope Scope, const std::vector &Ops, diff --git a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVModule.h b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVModule.h index d72676379c223..225f1f77d551b 100644 --- a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVModule.h +++ b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVModule.h @@ -374,6 +374,14 @@ class SPIRVModule { SPIRVId TheVector, SPIRVId TheScalar, SPIRVBasicBlock *BB) = 0; + virtual SPIRVInstruction *addMatrixTimesScalarInst(SPIRVType *TheType, + SPIRVId TheMatrix, + SPIRVId TheScalar, + SPIRVBasicBlock *BB) = 0; + virtual SPIRVInstruction *addMatrixTimesVectorInst(SPIRVType *TheType, + SPIRVId TheMatrix, + SPIRVId TheVector, + SPIRVBasicBlock *BB) = 0; virtual SPIRVInstruction *addUnaryInst(Op, SPIRVType *, SPIRVValue *, SPIRVBasicBlock *) = 0; virtual SPIRVInstruction *addVariable(SPIRVType *, bool, SPIRVLinkageTypeKind, @@ -427,6 +435,10 @@ class SPIRVModule { return true; } + virtual bool isGenArgNameMDEnabled() const final { + return TranslationOpts.isGenArgNameMDEnabled(); + } + // I/O functions friend spv_ostream &operator<<(spv_ostream &O, SPIRVModule &M); friend std::istream &operator>>(std::istream &I, SPIRVModule &M); diff --git a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVStream.h b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVStream.h index c0cc69b768df0..d7c26614aff11 100644 --- a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVStream.h +++ b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVStream.h @@ -103,12 +103,38 @@ const SPIRVDecoder &decodeBinary(const SPIRVDecoder &I, T &V) { return I; } +#ifdef _SPIRV_SUPPORT_TEXT_FMT +/// Skip comment and whitespace. Comment starts with ';', ends with '\n'. +inline std::istream &skipcomment(std::istream &IS) { + if (IS.eof() || IS.bad()) + return IS; + + char C = IS.peek(); + + while (std::char_traits::not_eof(C) && std::isspace(C)) { + IS.get(); + C = IS.peek(); + } + + while (std::char_traits::not_eof(C) && C == ';') { + IS.ignore(std::numeric_limits::max(), '\n'); + C = IS.peek(); + while (std::char_traits::not_eof(C) && std::isspace(C)) { + IS.get(); + C = IS.peek(); + } + } + + return IS; +} +#endif + template const SPIRVDecoder &operator>>(const SPIRVDecoder &I, T &V) { #ifdef _SPIRV_SUPPORT_TEXT_FMT if (SPIRVUseTextFormat) { uint32_t W; - I.IS >> W; + I.IS >> skipcomment >> W; V = static_cast(W); SPIRVDBG(spvdbgs() << "Read word: W = " << W << " V = " << V << '\n'); return I; diff --git a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVType.cpp b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVType.cpp index 8187a7e749048..72a94cce7d026 100644 --- a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVType.cpp +++ b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVType.cpp @@ -114,6 +114,36 @@ SPIRVType *SPIRVType::getVectorComponentType() const { return static_cast(this)->getComponentType(); } +SPIRVWord SPIRVType::getMatrixColumnCount() const { + assert(OpCode == OpTypeMatrix && "Not matrix type"); + return static_cast(this)->getColumnCount(); +} + +SPIRVType *SPIRVType::getMatrixColumnType() const { + assert(OpCode == OpTypeMatrix && "Not matrix type"); + return static_cast(this)->getColumnType(); +} + +SPIRVType *SPIRVType::getScalarType() const { + switch (OpCode) { + case OpTypePointer: + return getPointerElementType()->getScalarType(); + case OpTypeArray: + return getArrayElementType(); + case OpTypeVector: + return getVectorComponentType(); + case OpTypeMatrix: + return getMatrixColumnType()->getVectorComponentType(); + case OpTypeInt: + case OpTypeFloat: + case OpTypeBool: + return const_cast(this); + default: + break; + } + return nullptr; +} + bool SPIRVType::isTypeVoid() const { return OpCode == OpTypeVoid; } bool SPIRVType::isTypeArray() const { return OpCode == OpTypeArray; } diff --git a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVType.h b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVType.h index a3ce1d0ab8af4..9cb0041a012c9 100644 --- a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVType.h +++ b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVType.h @@ -75,6 +75,9 @@ class SPIRVType : public SPIRVEntry { SPIRVWord getStructMemberCount() const; SPIRVWord getVectorComponentCount() const; SPIRVType *getVectorComponentType() const; + SPIRVWord getMatrixColumnCount() const; + SPIRVType *getMatrixColumnType() const; + SPIRVType *getScalarType() const; bool isTypeVoid() const; bool isTypeArray() const; @@ -310,6 +313,48 @@ class SPIRVTypeVector : public SPIRVType { SPIRVWord CompCount; // Component Count }; +class SPIRVTypeMatrix : public SPIRVType { +public: + // Complete constructor + SPIRVTypeMatrix(SPIRVModule *M, SPIRVId TheId, SPIRVType *TheColType, + SPIRVWord TheColCount) + : SPIRVType(M, 4, OpTypeMatrix, TheId), ColType(TheColType), + ColCount(TheColCount) { + validate(); + } + // Incomplete constructor + SPIRVTypeMatrix() : SPIRVType(OpTypeMatrix), ColType(nullptr), ColCount(0) {} + + SPIRVType *getColumnType() const { return ColType; } + SPIRVWord getColumnCount() const { return ColCount; } + + bool isValidIndex(SPIRVWord Index) const { return Index < ColCount; } + + SPIRVCapVec getRequiredCapability() const override { + SPIRVCapVec V(getColumnType()->getRequiredCapability()); + if (ColCount >= 8) + V.push_back(CapabilityVector16); + return V; + } + + virtual std::vector getNonLiteralOperands() const override { + return std::vector(1, ColType); + } + + void validate() const override { + SPIRVEntry::validate(); + ColType->validate(); + assert(ColCount >= 2); + } + +protected: + _SPIRV_DEF_ENCDEC3(Id, ColType, ColCount) + +private: + SPIRVType *ColType; // Column Type + SPIRVWord ColCount; // Column Count +}; + class SPIRVConstant; class SPIRVTypeArray : public SPIRVType { public: diff --git a/llvm-spirv/test/ComparePointers.cl b/llvm-spirv/test/ComparePointers.cl new file mode 100644 index 0000000000000..fa484fe0ee2f6 --- /dev/null +++ b/llvm-spirv/test/ComparePointers.cl @@ -0,0 +1,28 @@ +kernel void test(int global *in, int global *in2) { + if (!in) + return; + if (in == 1) + return; + if (in > in2) + return; + if (in < in2) + return; +} +// RUN: %clang_cc1 -triple spir64 -x cl -cl-std=CL2.0 -O0 -emit-llvm-bc %s -o %t.bc +// RUN: llvm-spirv %t.bc -spirv-text -o %t.spt +// RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV +// RUN: llvm-spirv %t.bc -o %t.spv +// RUN: spirv-val %t.spv + +// CHECK-SPIRV:ConvertPtrToU +// CHECK-SPIRV:ConvertPtrToU +// CHECK-SPIRV:INotEqual +// CHECK-SPIRV:ConvertPtrToU +// CHECK-SPIRV:ConvertPtrToU +// CHECK-SPIRV:IEqual +// CHECK-SPIRV:ConvertPtrToU +// CHECK-SPIRV:ConvertPtrToU +// CHECK-SPIRV:UGreaterThan +// CHECK-SPIRV:ConvertPtrToU +// CHECK-SPIRV:ConvertPtrToU +// CHECK-SPIRV:ULessThan diff --git a/llvm-spirv/test/DebugInfo/DebugControlFlow.cl b/llvm-spirv/test/DebugInfo/DebugControlFlow.cl index bb17b98a2147a..fcdaa31f74307 100644 --- a/llvm-spirv/test/DebugInfo/DebugControlFlow.cl +++ b/llvm-spirv/test/DebugInfo/DebugControlFlow.cl @@ -10,11 +10,6 @@ // between LoopMerge and Branch/BranchConditional instructions. // Otherwise, debug info interferes with SPIRVToLLVM translation // of structured flow control -// -// Currently, Line DebugInfo instructions are still present -// between LoopMerge and Branch/BranchConditional instructions. -// This does not affect SPIRVToLLVM translation, however -// should be fixed separately kernel void sample() { @@ -29,12 +24,19 @@ void sample() { } while (j++ < 10); } +// Check that all Line items are retained +// CHECK-SPIRV: Line [[File:[0-9]+]] 18 0 +// Control flow // CHECK-SPIRV: {{[0-9]+}} LoopMerge [[MergeBlock:[0-9]+]] [[ContinueTarget:[0-9]+]] 1 -// CHECK-SPIRV-NOT: ExtInst -// CHECK-SPIRV: BranchConditional +// CHECK-SPIRV-NEXT: BranchConditional + +// Check that all Line items are retained +// CHECK-SPIRV: Line [[File]] 23 0 +// CHECK-SPIRV: Line [[File]] 24 0 +// Control flow // CHECK-SPIRV: {{[0-9]+}} LoopMerge [[MergeBlock:[0-9]+]] [[ContinueTarget:[0-9]+]] 1 -// CHECK-SPIRV-NOT: ExtInst -// CHECK-SPIRV: Branch -// CHECK-LLVM: br i1 %{{.*}}, label %{{.*}}, label %{{.*}}, !dbg !{{[0-9]+}}, !llvm.loop ![[MD:[0-9]+]] +// CHECK-SPIRV-NEXT: Branch + +// CHECK-LLVM: br label %{{.*}}, !dbg !{{[0-9]+}}, !llvm.loop ![[MD:[0-9]+]] // CHECK-LLVM: ![[MD]] = distinct !{![[MD]], ![[MD_unroll:[0-9]+]]} // CHECK-LLVM: ![[MD_unroll]] = !{!"llvm.loop.unroll.enable"} diff --git a/llvm-spirv/test/DebugInfo/DebugUnstructuredControlFlow.cl b/llvm-spirv/test/DebugInfo/DebugUnstructuredControlFlow.cl index 7863924b54691..bb938ef02cd30 100644 --- a/llvm-spirv/test/DebugInfo/DebugUnstructuredControlFlow.cl +++ b/llvm-spirv/test/DebugInfo/DebugUnstructuredControlFlow.cl @@ -7,8 +7,7 @@ // Test that no debug info instruction is inserted between LoopControlINTEL and // Branch instructions. Otherwise, debug info interferes with SPIRVToLLVM -// translation of structured flow control. Yet, Line DebugInfo instruction is -// still presenting between LoopControlINTEL and Branch instructions. +// translation of structured flow control kernel void sample() { @@ -16,10 +15,12 @@ void sample() { for(;;); } +// Check that all Line items are retained +// CHECK-SPIRV: Line [[File:[0-9]+]] 15 0 +// Loop control // CHECK-SPIRV: 2 LoopControlINTEL 1 -// CHECK-SPIRV-NOT: ExtInst -// CHECK-SPIRV: {{[0-9]+}} Line {{[0-9]+}} {{[0-9]+}} {{[0-9]+}} -// CHECK-SPIRV: Branch +// CHECK-SPIRV-NEXT: Branch + // CHECK-LLVM: br label %{{.*}}, !dbg !{{[0-9]+}}, !llvm.loop ![[MD:[0-9]+]] // CHECK-LLVM: ![[MD]] = distinct !{![[MD]], ![[MD_unroll:[0-9]+]]} // CHECK-LLVM: ![[MD_unroll]] = !{!"llvm.loop.unroll.enable"} diff --git a/llvm-spirv/test/DebugInfo/LocalAddressSpace.ll b/llvm-spirv/test/DebugInfo/LocalAddressSpace.ll index 025f7e8ea4e28..e0d470f12f572 100644 --- a/llvm-spirv/test/DebugInfo/LocalAddressSpace.ll +++ b/llvm-spirv/test/DebugInfo/LocalAddressSpace.ll @@ -22,7 +22,7 @@ ; CHECK: DW_TAG_variable ; CHECK-NEXT: DW_AT_name {{.*}} = "a") ; CHECK-NEXT: DW_AT_type {{.*}} "int") -; CHECK-NEXT: DW_AT_decl_file {{.*}} ("/work/tmp{{[/\\]}}tmp.cl") +; CHECK-NEXT: DW_AT_decl_file {{.*}} ("/work{{[/\\]}}tmp{{[/\\]}}tmp.cl") ; CHECK-NEXT: DW_AT_decl_line {{.*}} (2) ; CHECK-NEXT: DW_AT_location [DW_FORM_exprloc] (DW_OP_addr 0x0) diff --git a/llvm-spirv/test/InfiniteLoopMetadataPlacement.ll b/llvm-spirv/test/InfiniteLoopMetadataPlacement.ll new file mode 100644 index 0000000000000..b2e552af46efb --- /dev/null +++ b/llvm-spirv/test/InfiniteLoopMetadataPlacement.ll @@ -0,0 +1,105 @@ +; RUN: llvm-as %s -o %t.bc +; RUN: llvm-spirv --spirv-ext=+SPV_INTEL_unstructured_loop_controls %t.bc -o %t.spv +; RUN: llvm-spirv %t.spv --to-text -o - | FileCheck %s --check-prefix=CHECK-SPV + +; RUN: llvm-spirv -r %t.spv -o %t.rev.bc +; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefix=CHECK-REV-LLVM + +; ModuleID = 'llvm_loop_test.cpp' +source_filename = "llvm_loop_test.cpp" +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" +target triple = "spir64-unknown-linux-sycldevice" + +$_ZTS12WhileOneTest = comdat any + +; CHECK-SPV: {{[0-9]+}} Name [[WH_COND:[0-9]+]] "while.cond" + +; Function Attrs: inlinehint nounwind +define weak_odr dso_local spir_kernel void @_ZTS12WhileOneTest() #0 comdat !kernel_arg_addr_space !4 !kernel_arg_access_qual !4 !kernel_arg_type !4 !kernel_arg_base_type !4 !kernel_arg_type_qual !4 { +entry: + %i = alloca i32, align 4 + %s = alloca i32, align 4 + %0 = bitcast i32* %i to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* %0) #2 + store i32 0, i32* %i, align 4, !tbaa !7 + %1 = bitcast i32* %s to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* %1) #2 + store i32 0, i32* %s, align 4, !tbaa !7 + br label %while.cond + +; CHECK-SPV-NOT: {{[0-9]+}} LoopControlINTEL +; CHECK-SPV-NOT: {{[0-9]+}} LoopMerge + +while.cond: ; preds = %if.end, %entry +; CHECK-SPV: {{[0-9]+}} Label [[WH_COND]] +; CHECK-SPV-NEXT: {{[0-9]+}} LoopControlINTEL 4 +; CHECK-SPV-NEXT: {{[0-9]+}} Branch + br label %while.body + +; CHECK-SPV-NOT: {{[0-9]+}} LoopControlINTEL +; CHECK-SPV-NOT: {{[0-9]+}} LoopMerge + +while.body: ; preds = %while.cond + %2 = load i32, i32* %i, align 4, !tbaa !7 + %cmp = icmp sge i32 %2, 16 + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %while.body + br label %while.end + +if.else: ; preds = %while.body + %3 = load i32, i32* %i, align 4, !tbaa !7 + %4 = load i32, i32* %s, align 4, !tbaa !7 + %add = add nsw i32 %4, %3 + store i32 %add, i32* %s, align 4, !tbaa !7 + br label %if.end + +; CHECK-REV-LLVM-NOT: br {{.*}}, !llvm.loop + +if.end: ; preds = %if.else +; CHECK-REV-LLVM: if.end: + %5 = load i32, i32* %i, align 4, !tbaa !7 + %inc = add nsw i32 %5, 1 + store i32 %inc, i32* %i, align 4, !tbaa !7 + br label %while.cond, !llvm.loop !9 +; CHECK-REV-LLVM: br label %while.cond, !llvm.loop ![[MD_IVDEP:[0-9]+]] + +; CHECK-REV-LLVM-NOT: br {{.*}}, !llvm.loop + +while.end: ; preds = %if.then + %6 = bitcast i32* %s to i8* + call void @llvm.lifetime.end.p0i8(i64 4, i8* %6) #2 + %7 = bitcast i32* %i to i8* + call void @llvm.lifetime.end.p0i8(i64 4, i8* %7) #2 + ret void +} + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1 + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1 + +attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind } +attributes #2 = { nounwind } + +!llvm.module.flags = !{!0} +!opencl.spir.version = !{!1} +!spirv.Source = !{!2} +!llvm.ident = !{!3} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 1, i32 2} +!2 = !{i32 4, i32 100000} +!3 = !{!"clang version 9.0.0"} +!4 = !{} +!5 = !{!"omnipotent char", !6, i64 0} +!6 = !{!"Simple C++ TBAA"} +!7 = !{!8, !8, i64 0} +!8 = !{!"int", !5, i64 0} +!9 = distinct !{!9, !10} +!10 = !{!"llvm.loop.ivdep.enable"} + +; CHECK-REV-LLVM: ![[MD_IVDEP]] = distinct !{![[MD_IVDEP]], ![[MD_ivdep_enable:[0-9]+]]} +; CHECK-REV-LLVM: ![[MD_ivdep_enable]] = !{!"llvm.loop.ivdep.enable"} diff --git a/llvm-spirv/test/IntelFPGAMemoryAttributesForStaticVar.ll b/llvm-spirv/test/IntelFPGAMemoryAttributesForStaticVar.ll new file mode 100644 index 0000000000000..ec28935ed293c --- /dev/null +++ b/llvm-spirv/test/IntelFPGAMemoryAttributesForStaticVar.ll @@ -0,0 +1,156 @@ +; Source +; void foo(int a) { +; static int a_one [[intelfpga::numbanks(2)]]; +; a_one = a_one + a; +; } + +; void bar(char b) { +; static char b_one [[intelfpga::memory("MLAB")]]; +; b_one = b_one + b; +; } + +; void baz(int c) { +; static int c_one[[clang::annotate("foobarbaz")]]; +; c_one = c_one + c; +; } + +; template +; __attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { +; kernelFunc(); +; } + +; int main() { +; kernel_single_task([]() { +; foo(128); +; bar(42); +; baz(16); +; }); +; return 0; +; } + +; RUN: llvm-as %s -o %t.bc +; RUN: llvm-spirv %t.bc --spirv-ext=+SPV_INTEL_fpga_memory_attributes -o %t.spv +; RUN: llvm-spirv %t.spv -to-text -o - | FileCheck %s --check-prefix=CHECK-SPIRV + +; RUN: llvm-spirv -r %t.spv -o %t.rev.bc +; RUN: llvm-dis < %t.rev.bc | FileCheck %s --check-prefix=CHECK-LLVM + +; CHECK-SPIRV: Capability FPGAMemoryAttributesINTEL +; CHECK-SPIRV: Extension "SPV_INTEL_fpga_memory_attributes" +; CHECK-SPIRV: Decorate {{[0-9]+}} UserSemantic "foobarbaz" +; CHECK-SPIRV: Decorate {{[0-9]+}} MemoryINTEL "DEFAULT" +; CHECK-SPIRV: Decorate {{[0-9]+}} MemoryINTEL "MLAB" +; CHECK-SPIRV: Decorate {{[0-9]+}} NumbanksINTEL 2 + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" +target triple = "spir64-unknown-linux-sycldevice" + +%"class._ZTSZ4mainE3$_0.anon" = type { i8 } + +; CHECK-LLVM: [[STR:@[0-9_.]+]] = {{.*}}{memory:DEFAULT}{numbanks:2} +; CHECK-LLVM: [[STR2:@[0-9_.]+]] = {{.*}}{memory:MLAB} +; CHECK-LLVM: [[STR3:@[0-9_.]+]] = {{.*}}foobarbaz +; CHECK-LLVM: @llvm.global.annotations +; CHECK-SAME: _ZZ3fooiE5a_one{{.*}}[[STR]]{{.*}}_ZZ3bariE5b_one{{.*}}[[STR2]]{{.*}}_ZZ3baziE5c_one{{.*}}[[STR3]] +@_ZZ3fooiE5a_one = internal addrspace(1) global i32 0, align 4 +@.str = private unnamed_addr constant [29 x i8] c"{memory:DEFAULT}{numbanks:2}\00", section "llvm.metadata" +@.str.1 = private unnamed_addr constant [9 x i8] c"test.cpp\00", section "llvm.metadata" +@_ZZ3barcE5b_one = internal addrspace(1) global i8 0, align 1 +@.str.2 = private unnamed_addr constant [14 x i8] c"{memory:MLAB}\00", section "llvm.metadata" +@_ZZ3baziE5c_one = internal addrspace(1) global i32 0, align 4 +@.str.3 = private unnamed_addr constant [10 x i8] c"foobarbaz\00", section "llvm.metadata" +@llvm.global.annotations = appending global [3 x { i8 addrspace(1)*, i8*, i8*, i32 }] [{ i8 addrspace(1)*, i8*, i8*, i32 } { i8 addrspace(1)* bitcast (i32 addrspace(1)* @_ZZ3fooiE5a_one to i8 addrspace(1)*), i8* getelementptr inbounds ([29 x i8], [29 x i8]* @.str, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.1, i32 0, i32 0), i32 2 }, { i8 addrspace(1)*, i8*, i8*, i32 } { i8 addrspace(1)* @_ZZ3barcE5b_one, i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str.2, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.1, i32 0, i32 0), i32 7 }, { i8 addrspace(1)*, i8*, i8*, i32 } { i8 addrspace(1)* bitcast (i32 addrspace(1)* @_ZZ3baziE5c_one to i8 addrspace(1)*), i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.3, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.1, i32 0, i32 0), i32 12 }], section "llvm.metadata" + +; Function Attrs: nounwind +define spir_kernel void @_ZTSZ4mainE15kernel_function() #0 !kernel_arg_addr_space !4 !kernel_arg_access_qual !4 !kernel_arg_type !4 !kernel_arg_base_type !4 !kernel_arg_type_qual !4 { +entry: + %0 = alloca %"class._ZTSZ4mainE3$_0.anon", align 1 + %1 = bitcast %"class._ZTSZ4mainE3$_0.anon"* %0 to i8* + call void @llvm.lifetime.start.p0i8(i64 1, i8* %1) #4 + %2 = addrspacecast %"class._ZTSZ4mainE3$_0.anon"* %0 to %"class._ZTSZ4mainE3$_0.anon" addrspace(4)* + call spir_func void @"_ZZ4mainENK3$_0clEv"(%"class._ZTSZ4mainE3$_0.anon" addrspace(4)* %2) + %3 = bitcast %"class._ZTSZ4mainE3$_0.anon"* %0 to i8* + call void @llvm.lifetime.end.p0i8(i64 1, i8* %3) #4 + ret void +} + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1 + +; Function Attrs: inlinehint nounwind +define internal spir_func void @"_ZZ4mainENK3$_0clEv"(%"class._ZTSZ4mainE3$_0.anon" addrspace(4)* %this) #2 align 2 { +entry: + %this.addr = alloca %"class._ZTSZ4mainE3$_0.anon" addrspace(4)*, align 8 + store %"class._ZTSZ4mainE3$_0.anon" addrspace(4)* %this, %"class._ZTSZ4mainE3$_0.anon" addrspace(4)** %this.addr, align 8, !tbaa !5 + %this1 = load %"class._ZTSZ4mainE3$_0.anon" addrspace(4)*, %"class._ZTSZ4mainE3$_0.anon" addrspace(4)** %this.addr, align 8 + call spir_func void @_Z3fooi(i32 128) + call spir_func void @_Z3barc(i8 signext 42) + call spir_func void @_Z3bazi(i32 16) + ret void +} + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1 + +; Function Attrs: nounwind +define spir_func void @_Z3fooi(i32 %a) #3 { +entry: + %a.addr = alloca i32, align 4 + store i32 %a, i32* %a.addr, align 4, !tbaa !9 + %0 = load i32, i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @_ZZ3fooiE5a_one to i32 addrspace(4)*), align 4, !tbaa !9 + %1 = load i32, i32* %a.addr, align 4, !tbaa !9 + %add = add nsw i32 %0, %1 + store i32 %add, i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @_ZZ3fooiE5a_one to i32 addrspace(4)*), align 4, !tbaa !9 + ret void +} + +; Function Attrs: nounwind +define spir_func void @_Z3barc(i8 signext %b) #3 { +entry: + %b.addr = alloca i8, align 1 + store i8 %b, i8* %b.addr, align 1, !tbaa !11 + %0 = load i8, i8 addrspace(4)* addrspacecast (i8 addrspace(1)* @_ZZ3barcE5b_one to i8 addrspace(4)*), align 1, !tbaa !11 + %conv = sext i8 %0 to i32 + %1 = load i8, i8* %b.addr, align 1, !tbaa !11 + %conv1 = sext i8 %1 to i32 + %add = add nsw i32 %conv, %conv1 + %conv2 = trunc i32 %add to i8 + store i8 %conv2, i8 addrspace(4)* addrspacecast (i8 addrspace(1)* @_ZZ3barcE5b_one to i8 addrspace(4)*), align 1, !tbaa !11 + ret void +} + +; Function Attrs: nounwind +define spir_func void @_Z3bazi(i32 %c) #3 { +entry: + %c.addr = alloca i32, align 4 + store i32 %c, i32* %c.addr, align 4, !tbaa !9 + %0 = load i32, i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @_ZZ3baziE5c_one to i32 addrspace(4)*), align 4, !tbaa !9 + %1 = load i32, i32* %c.addr, align 4, !tbaa !9 + %add = add nsw i32 %0, %1 + store i32 %add, i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @_ZZ3baziE5c_one to i32 addrspace(4)*), align 4, !tbaa !9 + ret void +} + +attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind } +attributes #2 = { inlinehint nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #3 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #4 = { nounwind } + +!llvm.module.flags = !{!0} +!opencl.spir.version = !{!1} +!spirv.Source = !{!2} +!llvm.ident = !{!3} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 1, i32 2} +!2 = !{i32 4, i32 100000} +!3 = !{!"clang version 9.0.0"} +!4 = !{} +!5 = !{!6, !6, i64 0} +!6 = !{!"any pointer", !7, i64 0} +!7 = !{!"omnipotent char", !8, i64 0} +!8 = !{!"Simple C++ TBAA"} +!9 = !{!10, !10, i64 0} +!10 = !{!"int", !7, i64 0} +!11 = !{!7, !7, i64 0} diff --git a/llvm-spirv/test/OpBitReverse_i32.ll b/llvm-spirv/test/OpBitReverse_i32.ll new file mode 100644 index 0000000000000..7b29c327e98b0 --- /dev/null +++ b/llvm-spirv/test/OpBitReverse_i32.ll @@ -0,0 +1,40 @@ +; RUN: llvm-as %s -o %t.bc +; RUN: llvm-spirv -spirv-text %t.bc -o - | FileCheck %s --check-prefix=CHECK-SPIRV +; RUN: llvm-spirv %t.bc -o %t.spv +; RUN: spirv-val %t.spv +; RUN: llvm-spirv -r %t.spv -o - | llvm-dis -o - | FileCheck %s --check-prefix=CHECK-LLVM + +; CHECK-SPIRV: 4 TypeInt [[int:[0-9]+]] 32 +; CHECK-SPIRV: 4 BitReverse [[int]] + +; CHECK-LLVM: call i32 @llvm.bitreverse.i32 + +target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" +target triple = "spir-unknown-unknown" + +; Function Attrs: convergent nounwind writeonly +define spir_kernel void @testBitRev(i32 %a, i32 %b, i32 %c, i32 addrspace(1)* nocapture %res) local_unnamed_addr #0 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 { +entry: + %call = tail call i32 @llvm.bitreverse.i32(i32 %b) + store i32 %call, i32 addrspace(1)* %res, align 4, !tbaa !7 + ret void +} + +declare i32 @llvm.bitreverse.i32(i32) + +attributes #0 = { convergent nounwind writeonly "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.module.flags = !{!0} +!opencl.ocl.version = !{!1} +!opencl.spir.version = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 2, i32 0} +!3 = !{i32 0, i32 0, i32 0, i32 1} +!4 = !{!"none", !"none", !"none", !"none"} +!5 = !{!"int", !"int", !"int", !"int*"} +!6 = !{!"", !"", !"", !""} +!7 = !{!8, !8, i64 0} +!8 = !{!"int", !9, i64 0} +!9 = !{!"omnipotent char", !10, i64 0} +!10 = !{!"Simple C/C++ TBAA"} diff --git a/llvm-spirv/test/OpBitReverse_v2i16.ll b/llvm-spirv/test/OpBitReverse_v2i16.ll new file mode 100644 index 0000000000000..76f4bb39e7e1b --- /dev/null +++ b/llvm-spirv/test/OpBitReverse_v2i16.ll @@ -0,0 +1,41 @@ +; RUN: llvm-as %s -o %t.bc +; RUN: llvm-spirv -spirv-text %t.bc -o - | FileCheck %s --check-prefix=CHECK-SPIRV +; RUN: llvm-spirv %t.bc -o %t.spv +; RUN: spirv-val %t.spv +; RUN: llvm-spirv -r %t.spv -o - | llvm-dis -o - | FileCheck %s --check-prefix=CHECK-LLVM + +; CHECK-SPIRV: 4 TypeInt [[short:[0-9]+]] 16 +; CHECK-SPIRV: 4 TypeVector [[short2:[0-9]+]] [[short]] 2 +; CHECK-SPIRV: 4 BitReverse [[short2]] + +; CHECK-LLVM: call <2 x i16> @llvm.bitreverse.v2i16 + +target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" +target triple = "spir-unknown-unknown" + +; Function Attrs: convergent nounwind writeonly +define spir_kernel void @testBitRev(<2 x i16> %a, <2 x i16> %b, <2 x i16> %c, <2 x i16> addrspace(1)* nocapture %res) local_unnamed_addr #0 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 { +entry: + %call = tail call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> %b) + store <2 x i16> %call, <2 x i16> addrspace(1)* %res, align 4, !tbaa !7 + ret void +} + +declare <2 x i16> @llvm.bitreverse.v2i16(<2 x i16>) + +attributes #0 = { convergent nounwind writeonly "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.module.flags = !{!0} +!opencl.ocl.version = !{!1} +!opencl.spir.version = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 2, i32 0} +!3 = !{i32 0, i32 0, i32 0, i32 1} +!4 = !{!"none", !"none", !"none", !"none"} +!5 = !{!"short2", !"short2", !"short2", !"short2*"} +!6 = !{!"", !"", !"", !""} +!7 = !{!8, !8, i64 0} +!8 = !{!"short2", !9, i64 0} +!9 = !{!"omnipotent char", !10, i64 0} +!10 = !{!"Simple C/C++ TBAA"} diff --git a/llvm-spirv/test/OpConvertPtrToU_narrowing.spvasm b/llvm-spirv/test/OpConvertPtrToU_narrowing.spvasm new file mode 100644 index 0000000000000..7b53ed3b22f99 --- /dev/null +++ b/llvm-spirv/test/OpConvertPtrToU_narrowing.spvasm @@ -0,0 +1,27 @@ +; REQUIRES: spirv-as +; RUN: spirv-as --target-env spv1.0 -o %t.spv %s +; RUN: llvm-spirv -r -o - %t.spv | llvm-dis | FileCheck %s + OpCapability Addresses + OpCapability Kernel + OpCapability Int16 + OpMemoryModel Physical32 OpenCL + OpEntryPoint Kernel %1 "testNarrowingPtrToU" + OpName %a "a" + OpName %res "res" + OpName %entry "entry" + %uint = OpTypeInt 32 0 + %ushort = OpTypeInt 16 0 + %void = OpTypeVoid +%_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint +%_ptr_CrossWorkgroup_ushort = OpTypePointer CrossWorkgroup %ushort + %17 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_ushort + %1 = OpFunction %void None %17 + %a = OpFunctionParameter %_ptr_CrossWorkgroup_uint + %res = OpFunctionParameter %_ptr_CrossWorkgroup_ushort + %entry = OpLabel + %18 = OpConvertPtrToU %ushort %a + OpStore %res %18 Aligned 2 + OpReturn + OpFunctionEnd + +; CHECK: ptrtoint i32 addrspace(1)* %a to i16 diff --git a/llvm-spirv/test/OpConvertPtrToU_widening.spvasm b/llvm-spirv/test/OpConvertPtrToU_widening.spvasm new file mode 100644 index 0000000000000..29151b58453f6 --- /dev/null +++ b/llvm-spirv/test/OpConvertPtrToU_widening.spvasm @@ -0,0 +1,27 @@ +; REQUIRES: spirv-as +; RUN: spirv-as --target-env spv1.0 -o %t.spv %s +; RUN: llvm-spirv -r -o - %t.spv | llvm-dis | FileCheck %s + OpCapability Addresses + OpCapability Kernel + OpCapability Int64 + OpMemoryModel Physical32 OpenCL + OpEntryPoint Kernel %1 "testWideningPtrToU" + OpName %a "a" + OpName %res "res" + OpName %entry "entry" + %uint = OpTypeInt 32 0 + %ulong = OpTypeInt 64 0 + %void = OpTypeVoid +%_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint +%_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong + %17 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_ulong + %1 = OpFunction %void None %17 + %a = OpFunctionParameter %_ptr_CrossWorkgroup_uint + %res = OpFunctionParameter %_ptr_CrossWorkgroup_ulong + %entry = OpLabel + %18 = OpConvertPtrToU %ulong %a + OpStore %res %18 Aligned 8 + OpReturn + OpFunctionEnd + +; CHECK: ptrtoint i32 addrspace(1)* %a to i64 diff --git a/llvm-spirv/test/OpConvertUToPtr_narrowing.spvasm b/llvm-spirv/test/OpConvertUToPtr_narrowing.spvasm new file mode 100644 index 0000000000000..10ac6dc719403 --- /dev/null +++ b/llvm-spirv/test/OpConvertUToPtr_narrowing.spvasm @@ -0,0 +1,25 @@ +; REQUIRES: spirv-as +; RUN: spirv-as --target-env spv1.0 -o %t.spv %s +; RUN: llvm-spirv -r -o - %t.spv | llvm-dis | FileCheck %s + OpCapability Addresses + OpCapability Kernel + OpCapability Int64 + OpMemoryModel Physical32 OpenCL + OpEntryPoint Kernel %1 "testNarrowingUToPtr" + OpName %a "a" + OpName %entry "entry" + %uint = OpTypeInt 32 0 + %uint_0 = OpConstant %uint 0 + %ulong = OpTypeInt 64 0 + %void = OpTypeVoid +%_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint + %9 = OpTypeFunction %void %ulong + %1 = OpFunction %void None %9 + %a = OpFunctionParameter %ulong + %entry = OpLabel + %10 = OpConvertUToPtr %_ptr_CrossWorkgroup_uint %a + OpStore %10 %uint_0 Aligned 4 + OpReturn + OpFunctionEnd + +; CHECK: inttoptr i64 %a to i32 addrspace(1)* diff --git a/llvm-spirv/test/OpConvertUToPtr_widening.spvasm b/llvm-spirv/test/OpConvertUToPtr_widening.spvasm new file mode 100644 index 0000000000000..93da43843e535 --- /dev/null +++ b/llvm-spirv/test/OpConvertUToPtr_widening.spvasm @@ -0,0 +1,25 @@ +; REQUIRES: spirv-as +; RUN: spirv-as --target-env spv1.0 -o %t.spv %s +; RUN: llvm-spirv -r -o - %t.spv | llvm-dis | FileCheck %s + OpCapability Addresses + OpCapability Kernel + OpCapability Int16 + OpMemoryModel Physical32 OpenCL + OpEntryPoint Kernel %1 "testWideningUToPtr" + OpName %a "a" + OpName %entry "entry" + %uint = OpTypeInt 32 0 + %uint_0 = OpConstant %uint 0 + %ushort = OpTypeInt 16 0 + %void = OpTypeVoid +%_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint + %9 = OpTypeFunction %void %ushort + %1 = OpFunction %void None %9 + %a = OpFunctionParameter %ushort + %entry = OpLabel + %10 = OpConvertUToPtr %_ptr_CrossWorkgroup_uint %a + OpStore %10 %uint_0 Aligned 4 + OpReturn + OpFunctionEnd + +; CHECK: inttoptr i16 %a to i32 addrspace(1)* diff --git a/llvm-spirv/test/OpFMod.spt b/llvm-spirv/test/OpFMod.spt deleted file mode 100644 index 04f0f3f29145a..0000000000000 --- a/llvm-spirv/test/OpFMod.spt +++ /dev/null @@ -1,66 +0,0 @@ -119734787 65536 458752 27 0 -2 Capability Addresses -2 Capability Linkage -2 Capability Kernel -2 Capability Float64 -2 Capability Int64 -5 ExtInstImport 1 "OpenCL.std" -3 MemoryModel 2 2 -6 EntryPoint 6 2 "fmath_spv" -3 Source 3 102000 -3 Name 3 "res" -3 Name 4 "lhs" -3 Name 5 "rhs" -4 Name 6 "entry" -9 Name 8 "__spirv_GlobalInvocationId" -4 Decorate 7 FuncParamAttr 5 -2 DecorationGroup 7 -4 Decorate 8 BuiltIn 28 -3 Decorate 8 Constant -11 Decorate 8 LinkageAttributes "__spirv_GlobalInvocationId" Import -5 GroupDecorate 7 3 4 5 -4 TypeInt 9 64 0 -5 Constant 9 16 32 0 -4 TypeVector 10 9 3 -4 TypePointer 11 0 10 -2 TypeVoid 12 -3 TypeFloat 13 64 -4 TypePointer 14 5 13 -6 TypeFunction 15 12 14 14 14 -4 Variable 11 8 0 - -5 Function 12 2 0 15 -3 FunctionParameter 14 3 -3 FunctionParameter 14 4 -3 FunctionParameter 14 5 - -2 Label 6 -6 Load 10 17 8 2 0 -5 CompositeExtract 9 18 17 0 -5 ShiftLeftLogical 9 19 18 16 -5 ShiftRightArithmetic 9 20 19 16 -5 InBoundsPtrAccessChain 14 21 4 20 -6 Load 13 22 21 2 8 -5 InBoundsPtrAccessChain 14 23 5 20 -6 Load 13 24 23 2 8 -5 FMod 13 25 22 24 -5 InBoundsPtrAccessChain 14 26 3 20 -5 Store 26 25 2 8 -1 Return - -1 FunctionEnd - -; RUN: llvm-spirv %s -to-binary -o %t.spv -; RUN: spirv-val %t.spv -; RUN: llvm-spirv %t.spv -to-text -o %t.spt -; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV -; RUN: llvm-spirv -r %t.spv -o %t.bc -; RUN: llvm-dis < %t.bc | FileCheck %s --check-prefix=CHECK-LLVM - -; CHECK-SPIRV: 5 FMod {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} - -; CHECK-LLVM: %frem.res = frem double %[[dividend:[0-9]+]], %[[divisor:[0-9]+]] -; CHECK-LLVM: %copysign = call spir_func double @_Z8copysigndd(double %frem.res, double %[[divisor]]) #0 -; CHECK-LLVM: %[[ptr:[0-9]+]] = getelementptr inbounds double, double addrspace(1)* %res, i64 %{{[0-9]*}} -; CHECK-LLVM: store double %copysign, double addrspace(1)* %[[ptr]], align 8 - diff --git a/llvm-spirv/test/OpFMod_f32.spvasm b/llvm-spirv/test/OpFMod_f32.spvasm new file mode 100644 index 0000000000000..ffc0021926d5d --- /dev/null +++ b/llvm-spirv/test/OpFMod_f32.spvasm @@ -0,0 +1,25 @@ +; REQUIRES: spirv-as +; RUN: spirv-as --target-env spv1.0 -o %t.spv %s +; RUN: llvm-spirv -r -o - %t.spv | llvm-dis | FileCheck %s + OpCapability Addresses + OpCapability Kernel + OpMemoryModel Physical32 OpenCL + OpEntryPoint Kernel %1 "testFMod_f32" + OpName %a "a" + OpName %b "b" + %void = OpTypeVoid + %float = OpTypeFloat 32 + %5 = OpTypeFunction %void %float %float + %1 = OpFunction %void None %5 + %a = OpFunctionParameter %float + %b = OpFunctionParameter %float + %6 = OpLabel + %7 = OpFMod %float %a %b + OpReturn + OpFunctionEnd + +; CHECK-DAG: %frem.res = frem float %a, %b +; CHECK-DAG: %copysign.res = call float @llvm.copysign.f32(float %frem.res, float %b) +; CHECK-DAG: %fadd.res = fadd float {{%frem\.res, %b|%b, %frem\.res}} +; CHECK-DAG: %cmp.res = fcmp one float {{%frem\.res, %copysign\.res|%copysign\.res, %frem\.res}} +; CHECK: select i1 %cmp.res, float %fadd.res, float %copysign.res diff --git a/llvm-spirv/test/OpFMod_v2f16.spvasm b/llvm-spirv/test/OpFMod_v2f16.spvasm new file mode 100644 index 0000000000000..5675a7904fc76 --- /dev/null +++ b/llvm-spirv/test/OpFMod_v2f16.spvasm @@ -0,0 +1,27 @@ +; REQUIRES: spirv-as +; RUN: spirv-as --target-env spv1.0 -o %t.spv %s +; RUN: llvm-spirv -r -o - %t.spv | llvm-dis | FileCheck %s + OpCapability Addresses + OpCapability Kernel + OpCapability Float16 + OpMemoryModel Physical32 OpenCL + OpEntryPoint Kernel %1 "testFMod_v2f16" + OpName %a "a" + OpName %b "b" + %void = OpTypeVoid + %half = OpTypeFloat 16 + %half2 = OpTypeVector %half 2 + %5 = OpTypeFunction %void %half2 %half2 + %1 = OpFunction %void None %5 + %a = OpFunctionParameter %half2 + %b = OpFunctionParameter %half2 + %6 = OpLabel + %7 = OpFMod %half2 %a %b + OpReturn + OpFunctionEnd + +; CHECK-DAG: %frem.res = frem <2 x half> %a, %b +; CHECK-DAG: %copysign.res = call <2 x half> @llvm.copysign.v2f16(<2 x half> %frem.res, <2 x half> %b) +; CHECK-DAG: %fadd.res = fadd <2 x half> {{%frem\.res, %b|%b, %frem\.res}} +; CHECK-DAG: %cmp.res = fcmp one <2 x half> {{%frem\.res, %copysign\.res|%copysign\.res, %frem\.res}} +; CHECK: select <2 x i1> %cmp.res, <2 x half> %fadd.res, <2 x half> %copysign.res diff --git a/llvm-spirv/test/OpLoopMergeDontUnrollHint1.spt b/llvm-spirv/test/OpLoopMergeDontUnrollHint1.spt index 4a8217afb3d3e..59b3da40fb0f0 100644 --- a/llvm-spirv/test/OpLoopMergeDontUnrollHint1.spt +++ b/llvm-spirv/test/OpLoopMergeDontUnrollHint1.spt @@ -83,6 +83,6 @@ ; RUN: llvm-spirv -r %t.spv -o %t.bc ; RUN: llvm-dis < %t.bc | FileCheck %s --check-prefix=CHECK-LLVM -; CHECK-LLVM: br i1 %{{[0-9]+}}, label %{{[0-9]+}}, label %{{[0-9]+}}, !llvm.loop ![[MD:[0-9]+]] +; CHECK-LLVM: br label %{{[0-9]+}}, !llvm.loop ![[MD:[0-9]+]] ; CHECK-LLVM: ![[MD]] = distinct !{![[MD]], ![[MD_unroll:[0-9]+]]} ; CHECK-LLVM: ![[MD_unroll]] = !{!"llvm.loop.unroll.disable"} diff --git a/llvm-spirv/test/OpLoopMergeNone.spt b/llvm-spirv/test/OpLoopMergeNone.spt index a17e4524bf5a9..734143f1abbef 100644 --- a/llvm-spirv/test/OpLoopMergeNone.spt +++ b/llvm-spirv/test/OpLoopMergeNone.spt @@ -84,5 +84,5 @@ ; RUN: llvm-spirv -r %t.spv -o %t.bc ; RUN: llvm-dis < %t.bc | FileCheck %s --check-prefix=CHECK-LLVM -; CHECK-LLVM: br i1 %{{[0-9]+}}, label %{{[0-9]+}}, label %{{[0-9]+}}, !llvm.loop ![[MD:[0-9]+]] +; CHECK-LLVM: br label %{{[0-9]+}}, !llvm.loop ![[MD:[0-9]+]] ; CHECK-LLVM: ![[MD]] = distinct !{![[MD]]} diff --git a/llvm-spirv/test/OpLoopMergePartialUnroll.spt b/llvm-spirv/test/OpLoopMergePartialUnroll.spt index 5f3fab6b6025c..9118fc63324ab 100644 --- a/llvm-spirv/test/OpLoopMergePartialUnroll.spt +++ b/llvm-spirv/test/OpLoopMergePartialUnroll.spt @@ -83,6 +83,6 @@ ; RUN: llvm-spirv -r %t.spv -o %t.bc ; RUN: llvm-dis < %t.bc | FileCheck %s --check-prefix=CHECK-LLVM -; CHECK-LLVM: br i1 %{{[0-9]+}}, label %{{[0-9]+}}, label %{{[0-9]+}}, !llvm.loop ![[MD:[0-9]+]] +; CHECK-LLVM: br label %{{[0-9]+}}, !llvm.loop ![[MD:[0-9]+]] ; CHECK-LLVM: ![[MD]] = distinct !{![[MD]], ![[MD_unroll:[0-9]+]]} ; CHECK-LLVM: ![[MD_unroll]] = !{!"llvm.loop.unroll.count", i32 4} diff --git a/llvm-spirv/test/OpLoopMergeUnroll.spt b/llvm-spirv/test/OpLoopMergeUnroll.spt index ad498b956b989..d976168d19fbb 100644 --- a/llvm-spirv/test/OpLoopMergeUnroll.spt +++ b/llvm-spirv/test/OpLoopMergeUnroll.spt @@ -84,6 +84,6 @@ ; RUN: llvm-spirv -r %t.spv -o %t.bc ; RUN: llvm-dis < %t.bc | FileCheck %s --check-prefix=CHECK-LLVM -; CHECK-LLVM: br i1 %{{[0-9]+}}, label %{{[0-9]+}}, label %{{[0-9]+}}, !llvm.loop ![[MD:[0-9]+]] +; CHECK-LLVM: br label %{{[0-9]+}}, !llvm.loop ![[MD:[0-9]+]] ; CHECK-LLVM: ![[MD]] = distinct !{![[MD]], ![[MD_unroll:[0-9]+]]} ; CHECK-LLVM: ![[MD_unroll]] = !{!"llvm.loop.unroll.enable"} diff --git a/llvm-spirv/test/OpSMod_i32.spvasm b/llvm-spirv/test/OpSMod_i32.spvasm new file mode 100644 index 0000000000000..2705f83183b69 --- /dev/null +++ b/llvm-spirv/test/OpSMod_i32.spvasm @@ -0,0 +1,27 @@ +; REQUIRES: spirv-as +; RUN: spirv-as --target-env spv1.0 -o %t.spv %s +; RUN: llvm-spirv -r -o - %t.spv | llvm-dis | FileCheck %s + OpCapability Addresses + OpCapability Kernel + OpMemoryModel Physical32 OpenCL + OpEntryPoint Kernel %1 "testSMod_i32" + OpName %a "a" + OpName %b "b" + %void = OpTypeVoid + %uint = OpTypeInt 32 0 + %5 = OpTypeFunction %void %uint %uint + %1 = OpFunction %void None %5 + %a = OpFunctionParameter %uint + %b = OpFunctionParameter %uint + %6 = OpLabel + %7 = OpSMod %uint %a %b + OpReturn + OpFunctionEnd + +; CHECK-DAG: %srem.res = srem i32 %a, %b +; CHECK-DAG: %xor.res = xor i32 {{%a, %b|%b, %a}} +; CHECK-DAG: %cmpsign.res = icmp slt i32 %xor.res, 0 +; CHECK-DAG: %cmpsrem.res = icmp ne i32 {{%srem\.res, 0|0, %srem\.res}} +; CHECK-DAG: %add.res = add nsw i32 {{%srem\.res, %b|%b, %srem\.res}} +; CHECK-DAG: %cmp.res = and i1 {{%cmpsign\.res, %cmpsrem\.res|%cmpsrem\.res, %cmpsign\.res}} +; CHECK: select i1 %cmp.res, i32 %add.res, i32 %srem.res diff --git a/llvm-spirv/test/OpSMod_v2i16.spvasm b/llvm-spirv/test/OpSMod_v2i16.spvasm new file mode 100644 index 0000000000000..b64e15d73622b --- /dev/null +++ b/llvm-spirv/test/OpSMod_v2i16.spvasm @@ -0,0 +1,29 @@ +; REQUIRES: spirv-as +; RUN: spirv-as --target-env spv1.0 -o %t.spv %s +; RUN: llvm-spirv -r -o - %t.spv | llvm-dis | FileCheck %s + OpCapability Addresses + OpCapability Kernel + OpCapability Int16 + OpMemoryModel Physical32 OpenCL + OpEntryPoint Kernel %1 "testSMod_v2i16" + OpName %a "a" + OpName %b "b" + %void = OpTypeVoid + %ushort = OpTypeInt 16 0 + %ushort2 = OpTypeVector %ushort 2 + %5 = OpTypeFunction %void %ushort2 %ushort2 + %1 = OpFunction %void None %5 + %a = OpFunctionParameter %ushort2 + %b = OpFunctionParameter %ushort2 + %6 = OpLabel + %7 = OpSMod %ushort2 %a %b + OpReturn + OpFunctionEnd + +; CHECK-DAG: %srem.res = srem <2 x i16> %a, %b +; CHECK-DAG: %xor.res = xor <2 x i16> {{%a, %b|%b, %a}} +; CHECK-DAG: %cmpsign.res = icmp slt <2 x i16> %xor.res, zeroinitializer +; CHECK-DAG: %cmpsrem.res = icmp ne <2 x i16> {{%srem\.res, zeroinitializer|zeroinitializer, %srem\.res}} +; CHECK-DAG: %add.res = add nsw <2 x i16> {{%srem\.res, %b|%b, %srem\.res}} +; CHECK-DAG: %cmp.res = and <2 x i1> {{%cmpsign\.res, %cmpsrem\.res|%cmpsrem\.res, %cmpsign\.res}} +; CHECK: select <2 x i1> %cmp.res, <2 x i16> %add.res, <2 x i16> %srem.res diff --git a/llvm-spirv/test/copy_object.spt b/llvm-spirv/test/copy_object.spt index 58a5126dcf58c..3587a4c966173 100644 --- a/llvm-spirv/test/copy_object.spt +++ b/llvm-spirv/test/copy_object.spt @@ -38,9 +38,6 @@ 1 FunctionEnd -; FIXME: LIT comments/commands are moved at the end because llvm-spirv stops -; reading the file after first ';' symbol - ; RUN: llvm-spirv %s -to-binary -o %t.spv ; RUN: spirv-val %t.spv ; RUN: llvm-spirv -r %t.spv -o %t.bc diff --git a/llvm-spirv/test/count-zero-bits.ll b/llvm-spirv/test/count-zero-bits.ll new file mode 100644 index 0000000000000..7ed0e58b49f46 --- /dev/null +++ b/llvm-spirv/test/count-zero-bits.ll @@ -0,0 +1,62 @@ +; RUN: llvm-as %s -o %t.bc +; RUN: llvm-spirv %t.bc -spirv-text -o - | FileCheck %s +; RUN: llvm-spirv %t.bc -o %t.spv +; RUN: spirv-val %t.spv + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" +target triple = "spir64-unknown-unknown" + +; CHECK: ExtInstImport [[extinst_id:[0-9]+]] "OpenCL.std" + +; CHECK: Function +; CHECK: 6 ExtInst {{[0-9]+}} {{[0-9]+}} [[extinst_id]] clz +; CHECK: FunctionEnd + +; Function Attrs: nounwind readnone +define spir_func i32 @TestClz(i32 %x) local_unnamed_addr #0 { +entry: + %0 = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true) + ret i32 %0 +} + +; CHECK: Function +; CHECK: 6 ExtInst {{[0-9]+}} {{[0-9]+}} [[extinst_id]] ctz +; CHECK: FunctionEnd + +; Function Attrs: nounwind readnone +define spir_func i32 @TestCtz(i32 %x) local_unnamed_addr #0 { +entry: + %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 true) + ret i32 %0 +} + +; CHECK: Function +; CHECK: 6 ExtInst {{[0-9]+}} {{[0-9]+}} [[extinst_id]] ctz +; CHECK: FunctionEnd + +; Function Attrs: nounwind readnone +define spir_func <4 x i32> @TestCtzVec(<4 x i32> %x) local_unnamed_addr #0 { +entry: + %0 = tail call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %x, i1 true) + ret <4 x i32> %0 +} + +; Function Attrs: nounwind readnone speculatable willreturn +declare i32 @llvm.ctlz.i32(i32, i1 immarg) #1 + +; Function Attrs: nounwind readnone speculatable willreturn +declare i32 @llvm.cttz.i32(i32, i1 immarg) #1 + +; Function Attrs: nounwind readnone speculatable willreturn +declare <4 x i32> @llvm.cttz.v4i32(<4 x i32>, i1 immarg) #1 + +attributes #0 = { nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind readnone speculatable willreturn } + +!llvm.module.flags = !{!0} +!opencl.ocl.version = !{!1} +!opencl.spir.version = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 1, i32 0} +!2 = !{i32 1, i32 2} diff --git a/llvm-spirv/test/linkage-name.spt b/llvm-spirv/test/linkage-name.spt index 2cae495b49197..241c600b545b0 100644 --- a/llvm-spirv/test/linkage-name.spt +++ b/llvm-spirv/test/linkage-name.spt @@ -23,9 +23,6 @@ 1 FunctionEnd -; FIXME: LIT comments/commands are moved at the end because llvm-spirv stops -; reading the file after first ';' symbol - ; RUN: llvm-spirv %s -to-binary -o %t.spv ; RUN: spirv-val %t.spv ; RUN: llvm-spirv -r %t.spv -o %t.bc diff --git a/llvm-spirv/test/matrix_times_scalar.spt b/llvm-spirv/test/matrix_times_scalar.spt new file mode 100644 index 0000000000000..fbf4ba9055359 --- /dev/null +++ b/llvm-spirv/test/matrix_times_scalar.spt @@ -0,0 +1,57 @@ +; RUN: llvm-spirv %s -to-binary -o %t.spv +; RUN: spirv-val %t.spv +; RUN: llvm-spirv -r %t.spv -o %t.bc +; RUN: llvm-dis < %t.bc | FileCheck %s --check-prefix=CHECK-LLVM + +; CHECK-LLVM: %1 = load [4 x <4 x float>], [4 x <4 x float>]* %lhs +; CHECK-LLVM: %2 = load float, float* %rhs +; CHECK-LLVM: %.splatinsert = insertelement <4 x float> undef, float %2, i32 0 +; CHECK-LLVM: %3 = shufflevector <4 x float> %.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer +; CHECK-LLVM: %4 = extractvalue [4 x <4 x float>] %1, 0 +; CHECK-LLVM: %5 = fmul <4 x float> %4, %3 +; CHECK-LLVM: %6 = insertvalue [4 x <4 x float>] undef, <4 x float> %5, 0 +; CHECK-LLVM: %7 = extractvalue [4 x <4 x float>] %1, 1 +; CHECK-LLVM: %8 = fmul <4 x float> %7, %3 +; CHECK-LLVM: %9 = insertvalue [4 x <4 x float>] %6, <4 x float> %8, 1 +; CHECK-LLVM: %10 = extractvalue [4 x <4 x float>] %1, 2 +; CHECK-LLVM: %11 = fmul <4 x float> %10, %3 +; CHECK-LLVM: %12 = insertvalue [4 x <4 x float>] %9, <4 x float> %11, 2 +; CHECK-LLVM: %13 = extractvalue [4 x <4 x float>] %1, 3 +; CHECK-LLVM: %14 = fmul <4 x float> %13, %3 +; CHECK-LLVM: %15 = insertvalue [4 x <4 x float>] %12, <4 x float> %14, 3 +; CHECK-LLVM: store [4 x <4 x float>] %15, [4 x <4 x float>]* %res + +119734787 65536 458752 21 0 +2 Capability Addresses +2 Capability Linkage +2 Capability Kernel +2 Capability Float64 +2 Capability Matrix +3 MemoryModel 2 2 +8 EntryPoint 6 20 "matrix_times_scalar" +3 Source 3 102000 +3 Name 12 "res" +3 Name 13 "lhs" +3 Name 14 "rhs" + +2 TypeVoid 5 +3 TypeFloat 6 32 +4 TypeVector 7 6 4 +4 TypeMatrix 8 7 4 +4 TypePointer 9 7 8 ; 9 : Pointer to Matrix +4 TypePointer 10 7 6 ; 10 : Pointer to Scalar +6 TypeFunction 11 5 9 9 10 + +5 Function 5 20 0 11 ; lhs x rhs -> res +3 FunctionParameter 9 12 ; res : Pointer to Matrix +3 FunctionParameter 9 13 ; lhs : Pointer to Matrix +3 FunctionParameter 10 14 ; rhs : Pointer to Scalar + +2 Label 15 +4 Load 8 16 13 +4 Load 6 17 14 +5 MatrixTimesScalar 8 18 16 17 +3 Store 12 18 +1 Return + +1 FunctionEnd diff --git a/llvm-spirv/test/matrix_times_vector.spt b/llvm-spirv/test/matrix_times_vector.spt new file mode 100644 index 0000000000000..0726381d440b5 --- /dev/null +++ b/llvm-spirv/test/matrix_times_vector.spt @@ -0,0 +1,68 @@ +119734787 65536 458752 21 0 +2 Capability Addresses +2 Capability Linkage +2 Capability Kernel +2 Capability Float64 +2 Capability Matrix +3 MemoryModel 2 2 +8 EntryPoint 6 20 "matrix_times_vector" +3 Source 3 102000 +3 Name 12 "res" +3 Name 13 "lhs" +3 Name 14 "rhs" + +2 TypeVoid 5 +3 TypeFloat 6 32 +4 TypeVector 7 6 4 +4 TypeMatrix 8 7 4 +4 TypePointer 9 7 8 +4 TypePointer 10 7 7 +6 TypeFunction 11 5 10 9 10 + +5 Function 5 20 0 11 +3 FunctionParameter 10 12 +3 FunctionParameter 9 13 +3 FunctionParameter 10 14 + +2 Label 15 +4 Load 8 16 13 +4 Load 7 17 14 +5 MatrixTimesVector 7 18 16 17 +3 Store 12 18 +1 Return + +1 FunctionEnd + +; RUN: llvm-spirv %s -to-binary -o %t.spv +; RUN: spirv-val %t.spv +; RUN: llvm-spirv -r %t.spv -o %t.bc +; RUN: llvm-dis < %t.bc | FileCheck %s --check-prefix=CHECK-LLVM + +; CHECK-LLVM: %1 = load [4 x <4 x float>], [4 x <4 x float>]* %lhs +; CHECK-LLVM: %2 = load <4 x float>, <4 x float>* %rhs +; CHECK-LLVM: %3 = extractelement <4 x float> %2, i32 0 +; CHECK-LLVM: %.splatinsert = insertelement <4 x float> undef, float %3, i32 0 +; CHECK-LLVM: %.splat = shufflevector <4 x float> %.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer +; CHECK-LLVM: %4 = extractvalue [4 x <4 x float>] %1, 0 +; CHECK-LLVM: %5 = fmul <4 x float> %.splat, %4 +; CHECK-LLVM: %6 = fadd <4 x float> zeroinitializer, %5 +; CHECK-LLVM: %7 = extractelement <4 x float> %2, i32 1 +; CHECK-LLVM: %.splatinsert1 = insertelement <4 x float> undef, float %7, i32 0 +; CHECK-LLVM: %.splat2 = shufflevector <4 x float> %.splatinsert1, <4 x float> undef, <4 x i32> zeroinitializer +; CHECK-LLVM: %8 = extractvalue [4 x <4 x float>] %1, 1 +; CHECK-LLVM: %9 = fmul <4 x float> %.splat2, %8 +; CHECK-LLVM: %10 = fadd <4 x float> %6, %9 +; CHECK-LLVM: %11 = extractelement <4 x float> %2, i32 2 +; CHECK-LLVM: %.splatinsert3 = insertelement <4 x float> undef, float %11, i32 0 +; CHECK-LLVM: %.splat4 = shufflevector <4 x float> %.splatinsert3, <4 x float> undef, <4 x i32> zeroinitializer +; CHECK-LLVM: %12 = extractvalue [4 x <4 x float>] %1, 2 +; CHECK-LLVM: %13 = fmul <4 x float> %.splat4, %12 +; CHECK-LLVM: %14 = fadd <4 x float> %10, %13 +; CHECK-LLVM: %15 = extractelement <4 x float> %2, i32 3 +; CHECK-LLVM: %.splatinsert5 = insertelement <4 x float> undef, float %15, i32 0 +; CHECK-LLVM: %.splat6 = shufflevector <4 x float> %.splatinsert5, <4 x float> undef, <4 x i32> zeroinitializer +; CHECK-LLVM: %16 = extractvalue [4 x <4 x float>] %1, 3 +; CHECK-LLVM: %17 = fmul <4 x float> %.splat6, %16 +; CHECK-LLVM: %18 = fadd <4 x float> %14, %17 +; CHECK-LLVM: store <4 x float> %18, <4 x float>* %res +; CHECK-LLVM: ret void diff --git a/llvm-spirv/test/opundef.spt b/llvm-spirv/test/opundef.spt index 3a8356b00fd18..2a8e433392d13 100644 --- a/llvm-spirv/test/opundef.spt +++ b/llvm-spirv/test/opundef.spt @@ -38,9 +38,6 @@ 1 FunctionEnd -; FIXME: LIT comments/commands are moved at the end because llvm-spirv stops -; reading the file after first ';' symbol - ; RUN: llvm-spirv %s -to-binary -o %t.spv ; RUN: spirv-val %t.spv ; RUN: llvm-spirv -r %t.spv -o %t.bc diff --git a/llvm-spirv/test/redundant_word.spt b/llvm-spirv/test/redundant_word.spt index a6f500b76db75..e851292f02bb0 100644 --- a/llvm-spirv/test/redundant_word.spt +++ b/llvm-spirv/test/redundant_word.spt @@ -28,9 +28,6 @@ 1 FunctionEnd -; FIXME: LIT comments/commands are moved at the end because llvm-spirv stops -; reading the file after first ';' symbol - ; RUN: llvm-spirv %s -to-binary -o %t.spv ; RUN: spirv-val %t.spv ; RUN: llvm-spirv -r %t.spv -o %t.bc diff --git a/llvm-spirv/test/transcoding/ConvertPtr.cl b/llvm-spirv/test/transcoding/ConvertPtr.cl index 14f48731894e3..8f4bf2562bfc4 100644 --- a/llvm-spirv/test/transcoding/ConvertPtr.cl +++ b/llvm-spirv/test/transcoding/ConvertPtr.cl @@ -5,12 +5,23 @@ // RUN: llvm-spirv -r %t.spv -o %t.rev.bc // RUN: llvm-dis < %t.rev.bc | FileCheck %s --check-prefix=CHECK-LLVM -kernel void test(global int *a, global unsigned long *res) { +kernel void testConvertPtrToU(global int *a, global unsigned long *res) { res[0] = (unsigned long)&a[0]; } -// CHECK-SPIRV: ConvertPtrToU +// CHECK-SPIRV: 4 ConvertPtrToU -// CHECK-LLVM-LABEL: @test +// CHECK-LLVM-LABEL: @testConvertPtrToU // CHECK-LLVM: %0 = ptrtoint i32 addrspace(1)* %a to i32 // CHECK-LLVM: zext i32 %0 to i64 + +kernel void testConvertUToPtr(unsigned long a) { + global unsigned int *res = (global unsigned int *)a; + res[0] = 0; +} + +// CHECK-SPIRV: 4 ConvertUToPtr + +// CHECK-LLVM-LABEL: @testConvertUToPtr +// CHECK-LLVM: %[[Conv:[a-z]+]] = trunc i64 %a to i32 +// CHECK-LLVM: inttoptr i32 %[[Conv]] to i32 addrspace(1)* diff --git a/llvm-spirv/test/transcoding/FPGALoopAttr.ll b/llvm-spirv/test/transcoding/FPGALoopAttr.ll index 0f0b1913a4b50..4337fb2f58050 100644 --- a/llvm-spirv/test/transcoding/FPGALoopAttr.ll +++ b/llvm-spirv/test/transcoding/FPGALoopAttr.ll @@ -161,11 +161,11 @@ attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide !10 = !{!"llvm.loop.max_concurrency.count", i32 2} !11 = distinct !{!11, !8, !10} -; CHECK-LLVM: br i1 %cmp, label %for.body, label %for.end, !llvm.loop ![[MD_A:[0-9]+]] -; CHECK-LLVM: br i1 %cmp{{[0-9]+}}, label %for.body{{[0-9]+}}, label %for.end{{[0-9]+}}, !llvm.loop ![[MD_B:[0-9]+]] -; CHECK-LLVM: br i1 %cmp{{[0-9]+}}, label %for.body{{[0-9]+}}, label %for.end{{[0-9]+}}, !llvm.loop ![[MD_C:[0-9]+]] -; CHECK-LLVM: br i1 %cmp{{[0-9]+}}, label %for.body{{[0-9]+}}, label %for.end{{[0-9]+}}, !llvm.loop ![[MD_D:[0-9]+]] -; CHECK-LLVM: br i1 %cmp{{[0-9]+}}, label %for.body{{[0-9]+}}, label %for.end{{[0-9]+}}, !llvm.loop ![[MD_E:[0-9]+]] +; CHECK-LLVM: br label %for.cond{{[0-9]*}}, !llvm.loop ![[MD_A:[0-9]+]] +; CHECK-LLVM: br label %for.cond{{[0-9]+}}, !llvm.loop ![[MD_B:[0-9]+]] +; CHECK-LLVM: br label %for.cond{{[0-9]+}}, !llvm.loop ![[MD_C:[0-9]+]] +; CHECK-LLVM: br label %for.cond{{[0-9]+}}, !llvm.loop ![[MD_D:[0-9]+]] +; CHECK-LLVM: br label %for.cond{{[0-9]+}}, !llvm.loop ![[MD_E:[0-9]+]] ; CHECK-LLVM: ![[MD_A]] = distinct !{![[MD_A]], ![[MD_ivdep_enable:[0-9]+]]} ; CHECK-LLVM: ![[MD_ivdep_enable]] = !{!"llvm.loop.ivdep.enable"} diff --git a/llvm-spirv/test/transcoding/GenericCastToPtr.cl b/llvm-spirv/test/transcoding/GenericCastToPtr.cl new file mode 100644 index 0000000000000..2088c67bf4c89 --- /dev/null +++ b/llvm-spirv/test/transcoding/GenericCastToPtr.cl @@ -0,0 +1,69 @@ +// RUN: %clang_cc1 -triple spir-unknown-unknown -O1 -cl-std=CL2.0 -emit-llvm-bc -finclude-default-header %s -o %t.bc +// RUN: llvm-spirv %t.bc -spirv-text -o - | FileCheck %s --check-prefix=CHECK-SPIRV +// RUN: llvm-spirv %t.bc -o %t.spv +// RUN: spirv-val %t.spv +// RUN: llvm-spirv -r %t.spv -o %t.rev.bc +// RUN: llvm-dis < %t.rev.bc | FileCheck %s --check-prefix=CHECK-LLVM + +// CHECK-SPIRV: 4 GenericCastToPtr + +// CHECK-LLVM-LABEL: @testGenericCastToPtrGlobal +// CHECK-LLVM: %0 = addrspacecast <2 x i16> addrspace(4)* %a to <2 x i16> addrspace(1)* + +global short2 *testGenericCastToPtrGlobal(generic short2 *a) { + return (global short2 *)a; +} + +// CHECK-SPIRV: 4 GenericCastToPtr + +// CHECK-LLVM-LABEL: @testGenericCastToPtrLocal +// CHECK-LLVM: %0 = addrspacecast <2 x i16> addrspace(4)* %a to <2 x i16> addrspace(3)* + +local short2 *testGenericCastToPtrLocal(generic short2 *a) { + return (local short2 *)a; +} + +// CHECK-SPIRV: 4 GenericCastToPtr + +// CHECK-LLVM-LABEL: @testGenericCastToPtrPrivate +// CHECK-LLVM: %0 = addrspacecast <2 x i16> addrspace(4)* %a to <2 x i16>* + +private short2 *testGenericCastToPtrPrivate(generic short2 *a) { + return (private short2 *)a; +} + +// CHECK-SPIRV: 5 GenericCastToPtrExplicit + +// CHECK-LLVM-LABEL: @testGenericCastToPtrExplicitGlobal +// CHECK-LLVM: %[[VoidPtrCast:[0-9]+]] = bitcast <2 x i16> addrspace(4)* %a to i8 addrspace(4)* +// CHECK-LLVM-NEXT: %[[AddrSpaceCast:[0-9]+]] = bitcast i8 addrspace(4)* %[[VoidPtrCast]] to i8 addrspace(4)* +// CHECK-LLVM-NEXT: %.tmp = call spir_func i8 addrspace(1)* @__to_global(i8 addrspace(4)* %[[AddrSpaceCast]]) +// CHECK-LLVM: bitcast i8 addrspace(1)* %{{[0-9]+}} to <2 x i16> addrspace(1)* + +global short2 *testGenericCastToPtrExplicitGlobal(generic short2 *a) { + return to_global(a); +} + +// CHECK-SPIRV: 5 GenericCastToPtrExplicit + +// CHECK-LLVM-LABEL: @testGenericCastToPtrExplicitLocal +// CHECK-LLVM: %[[VoidPtrCast:[0-9]+]] = bitcast <2 x i16> addrspace(4)* %a to i8 addrspace(4)* +// CHECK-LLVM-NEXT: %[[AddrSpaceCast:[0-9]+]] = bitcast i8 addrspace(4)* %[[VoidPtrCast]] to i8 addrspace(4)* +// CHECK-LLVM-NEXT: %.tmp = call spir_func i8 addrspace(3)* @__to_local(i8 addrspace(4)* %[[AddrSpaceCast]]) +// CHECK-LLVM: bitcast i8 addrspace(3)* %{{[0-9]+}} to <2 x i16> addrspace(3)* + +local short2 *testGenericCastToPtrExplicitLocal(generic short2 *a) { + return to_local(a); +} + +// CHECK-SPIRV: 5 GenericCastToPtrExplicit + +// CHECK-LLVM-LABEL: @testGenericCastToPtrExplicitPrivate +// CHECK-LLVM: %[[VoidPtrCast:[0-9]+]] = bitcast <2 x i16> addrspace(4)* %a to i8 addrspace(4)* +// CHECK-LLVM-NEXT: %[[AddrSpaceCast:[0-9]+]] = bitcast i8 addrspace(4)* %[[VoidPtrCast]] to i8 addrspace(4)* +// CHECK-LLVM-NEXT: %.tmp = call spir_func i8* @__to_private(i8 addrspace(4)* %[[AddrSpaceCast]]) +// CHECK-LLVM: bitcast i8* %{{[0-9]+}} to <2 x i16>* + +private short2 *testGenericCastToPtrExplicitPrivate(generic short2 *a) { + return to_private(a); +} diff --git a/llvm-spirv/test/transcoding/OpVectorExtractDynamic.ll b/llvm-spirv/test/transcoding/OpVectorExtractDynamic.ll new file mode 100644 index 0000000000000..b39ebad02975b --- /dev/null +++ b/llvm-spirv/test/transcoding/OpVectorExtractDynamic.ll @@ -0,0 +1,45 @@ +; RUN: llvm-as %s -o %t.bc +; RUN: llvm-spirv %t.bc -spirv-text -o %t.txt +; RUN: FileCheck < %t.txt %s --check-prefix=CHECK-SPIRV +; RUN: llvm-spirv %t.bc -o %t.spv +; RUN: llvm-spirv -r %t.spv -o %t.rev.bc +; RUN: llvm-dis < %t.rev.bc | FileCheck %s --check-prefix=CHECK-LLVM + +; CHECK-LLVM: extractelement <2 x float> %vec, i32 %index + +; CHECK-SPIRV: 3 Name [[vec:[0-9]+]] "vec" +; CHECK-SPIRV: 4 Name [[index:[0-9]+]] "index" +; CHECK-SPIRV: 3 Name [[res:[0-9]+]] "res" + +; CHECK-SPIRV: 3 TypeFloat [[float:[0-9]+]] 32 +; CHECK-SPIRV: 4 TypeVector [[float2:[0-9]+]] [[float]] 2 + +; CHECK-SPIRV: 5 VectorExtractDynamic [[float]] [[res]] [[vec]] [[index]] + +target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" +target triple = "spir-unknown-unknown" + +; Function Attrs: nounwind +define spir_kernel void @test(float addrspace(1)* nocapture %out, <2 x float> %vec, i32 %index) #0 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !4 !kernel_arg_type_qual !5 { +entry: + %res = extractelement <2 x float> %vec, i32 %index + store float %res, float addrspace(1)* %out, align 4 + ret void +} + +attributes #0 = { nounwind } + +!opencl.enable.FP_CONTRACT = !{} +!opencl.spir.version = !{!6} +!opencl.ocl.version = !{!7} +!opencl.used.extensions = !{!8} +!opencl.used.optional.core.features = !{!8} + +!1 = !{i32 1, i32 0} +!2 = !{!"none", !"none", !"none"} +!3 = !{!"float*", !"float2", !"int"} +!4 = !{!"float*", !"float2", !"int"} +!5 = !{!"", !"", !""} +!6 = !{i32 1, i32 2} +!7 = !{i32 2, i32 0} +!8 = !{} diff --git a/llvm-spirv/test/transcoding/atomics_int64.spt b/llvm-spirv/test/transcoding/atomics_int64.spt new file mode 100644 index 0000000000000..f6b310aeaa131 --- /dev/null +++ b/llvm-spirv/test/transcoding/atomics_int64.spt @@ -0,0 +1,83 @@ +119734787 65536 393230 34 0 +2 Capability Addresses +2 Capability Linkage +2 Capability Kernel +2 Capability Int64 +2 Capability Int64Atomics +2 Capability GenericPointer +5 ExtInstImport 1 "OpenCL.std" +3 MemoryModel 1 2 +3 Source 3 200000 +5 Decorate 8 LinkageAttributes "foo" Export +4 TypeInt 3 64 0 +4 TypeInt 5 32 0 +4 Constant 5 17 1 +4 Constant 5 18 16 +2 TypeVoid 2 +4 TypePointer 4 8 3 +4 TypePointer 6 8 5 +9 TypeFunction 7 2 4 4 6 3 3 3 +2 TypeBool 32 + + +5 Function 2 8 0 7 +3 FunctionParameter 4 9 +3 FunctionParameter 4 10 +3 FunctionParameter 6 11 +3 FunctionParameter 3 12 +3 FunctionParameter 3 13 +3 FunctionParameter 3 14 + +2 Label 15 +6 AtomicLoad 3 16 9 17 18 +5 AtomicStore 9 17 18 12 +7 AtomicExchange 3 19 9 17 18 12 +9 AtomicCompareExchange 3 20 9 17 18 18 12 14 +6 AtomicIIncrement 3 21 9 17 18 +6 AtomicIDecrement 3 22 9 17 18 +7 AtomicIAdd 3 23 9 17 18 12 +7 AtomicISub 3 24 9 17 18 12 +7 AtomicSMin 3 25 9 17 18 12 +7 AtomicUMin 3 26 10 17 18 13 +7 AtomicSMax 3 27 9 17 18 12 +7 AtomicUMax 3 28 10 17 18 13 +7 AtomicAnd 3 29 9 17 18 12 +7 AtomicOr 3 30 9 17 18 12 +7 AtomicXor 3 31 9 17 18 12 +6 AtomicFlagTestAndSet 32 33 11 17 18 +4 AtomicFlagClear 11 17 18 +1 Return + +1 FunctionEnd + +; RUN: llvm-spirv --to-binary %s -o %t.spv +; RUN: spirv-val %t.spv +; RUN: llvm-spirv --spirv-ocl-builtins-version=CL1.2 -r %t.spv -o - | llvm-dis -o %t.ll +; RUN: FileCheck %s < %t.ll + +; OpAtomicLoad is emulated via atom_add(*p, 0) +; CHECK: call spir_func i64 @_Z8atom_add + +; OpAtomicStore is emulated via atom_xchg(*p, val) +; CHECK: call spir_func i64 @_Z9atom_xchg + +; CHECK: call spir_func i64 @_Z9atom_xchg +; CHECK: call spir_func i64 @_Z12atom_cmpxchg +; CHECK: call spir_func i64 @_Z8atom_inc +; CHECK: call spir_func i64 @_Z8atom_dec +; CHECK: call spir_func i64 @_Z8atom_add +; CHECK: call spir_func i64 @_Z8atom_sub +; CHECK: call spir_func i64 @_Z8atom_min +; CHECK: call spir_func i64 @_Z8atom_min +; CHECK: call spir_func i64 @_Z8atom_max +; CHECK: call spir_func i64 @_Z8atom_max +; CHECK: call spir_func i64 @_Z8atom_and +; CHECK: call spir_func i64 @_Z7atom_or +; CHECK: call spir_func i64 @_Z8atom_xor + +; OpAtomicFlagTestAndSet is emulated via atomic_xchg(*p, 1) +; CHECK: call spir_func i32 @_Z11atomic_xchg + +; OpAtomicFlagClear is emulated via atomic_xchg(*p, 0) +; CHECK: call spir_func i32 @_Z11atomic_xchg + diff --git a/llvm-spirv/test/transcoding/cl_intel_sub_groups.ll b/llvm-spirv/test/transcoding/cl_intel_sub_groups.ll index e27faad6ddb77..ae470bdb458ee 100644 --- a/llvm-spirv/test/transcoding/cl_intel_sub_groups.ll +++ b/llvm-spirv/test/transcoding/cl_intel_sub_groups.ll @@ -20,14 +20,15 @@ ;} ; RUN: llvm-as %s -o %t.bc -; RUN: llvm-spirv %t.bc -o - -spirv-text | FileCheck %s --check-prefix=CHECK-SPIRV -; RUN: llvm-spirv %t.bc -o %t.spv +; RUN: llvm-spirv %t.bc -o - -spirv-text --spirv-ext=+SPV_INTEL_subgroups | FileCheck %s --check-prefix=CHECK-SPIRV +; RUN: llvm-spirv %t.bc -o %t.spv --spirv-ext=+SPV_INTEL_subgroups ; RUN: llvm-spirv -r %t.spv -o %t.rev.bc ; RUN: llvm-dis < %t.rev.bc | FileCheck %s --check-prefix=CHECK-LLVM ; CHECK-SPIRV: Capability SubgroupShuffleINTEL ; CHECK-SPIRV: Capability SubgroupBufferBlockIOINTEL ; CHECK-SPIRV: Capability SubgroupImageBlockIOINTEL +; CHECK-SPIRV: Extension "SPV_INTEL_subgroups" ; CHECK-SPIRV: Extension "cl_intel_subgroups" ; CHECK-SPIRV: Extension "cl_intel_subgroups_short" diff --git a/llvm-spirv/test/transcoding/enqueue_kernel.cl b/llvm-spirv/test/transcoding/enqueue_kernel.cl index aaa539e5f1089..7394366302cc2 100644 --- a/llvm-spirv/test/transcoding/enqueue_kernel.cl +++ b/llvm-spirv/test/transcoding/enqueue_kernel.cl @@ -10,6 +10,7 @@ // CHECK-SPIRV: EntryPoint {{[0-9]+}} [[BlockKer2:[0-9]+]] "__device_side_enqueue_block_invoke_2_kernel" // CHECK-SPIRV: EntryPoint {{[0-9]+}} [[BlockKer3:[0-9]+]] "__device_side_enqueue_block_invoke_3_kernel" // CHECK-SPIRV: EntryPoint {{[0-9]+}} [[BlockKer4:[0-9]+]] "__device_side_enqueue_block_invoke_4_kernel" +// CHECK-SPIRV: EntryPoint {{[0-9]+}} [[BlockKer5:[0-9]+]] "__device_side_enqueue_block_invoke_5_kernel" // CHECK-SPIRV: Name [[BlockGlb1:[0-9]+]] "__block_literal_global" // CHECK-SPIRV: Name [[BlockGlb2:[0-9]+]] "__block_literal_global.1" @@ -41,6 +42,7 @@ // CHECK-LLVM: @__block_literal_global.1 = internal addrspace(1) constant [[BlockTy1]] { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* null to i8 addrspace(4)*) }, align 4 typedef struct {int a;} ndrange_t; +#define NULL ((void*)0) kernel void device_side_enqueue(global int *a, global int *b, int i, char c0) { queue_t default_queue; @@ -60,7 +62,7 @@ kernel void device_side_enqueue(global int *a, global int *b, int i, char c0) { // CHECK-LLVM: [[Block2:%[0-9]+]] = bitcast [[BlockTy2]]* %block to %struct.__opencl_block_literal_generic* // CHECK-LLVM: [[Block2Ptr:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* [[Block2]] to i8 addrspace(4)* // CHECK-LLVM: [[BlockInv2:%[0-9]+]] = addrspacecast void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_kernel to i8 addrspace(4)* - // CHECK-LLVM: call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 0, %opencl.clk_event_t* addrspace(4)* null, %opencl.clk_event_t* addrspace(4)* null, i8 addrspace(4)* [[BlockInv2]], i8 addrspace(4)* [[Block2Ptr]]) + // CHECK-LLVM: call i32 @__enqueue_kernel_basic(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i8 addrspace(4)* [[BlockInv2]], i8 addrspace(4)* [[Block2Ptr]]) enqueue_kernel(default_queue, flags, ndrange, ^(void) { a[i] = c0; @@ -121,20 +123,40 @@ kernel void device_side_enqueue(global int *a, global int *b, int i, char c0) { // CHECK-LLVM: [[Block1Tmp:%[0-9]+]] = bitcast [[BlockTy1]] addrspace(1)* @__block_literal_global.1 to i8 addrspace(1)* // CHECK-LLVM: [[Block1:%[0-9]+]] = addrspacecast i8 addrspace(1)* [[Block1Tmp]] to i8 addrspace(4)* // CHECK-LLVM: [[BlockInv1:%[0-9]+]] = addrspacecast void (i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_4_kernel to i8 addrspace(4)* - // CHECK-LLVM: call i32 @__enqueue_kernel_events_varargs(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 0, %opencl.clk_event_t* addrspace(4)* null, %opencl.clk_event_t* addrspace(4)* null, i8 addrspace(4)* [[BlockInv1]], i8 addrspace(4)* [[Block1]], i32 3, i32* {{.*}}) + // CHECK-LLVM: call i32 @__enqueue_kernel_varargs(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i8 addrspace(4)* [[BlockInv1]], i8 addrspace(4)* [[Block1]], i32 3, i32* {{.*}}) enqueue_kernel(default_queue, flags, ndrange, ^(local void *p1, local void *p2, local void *p3) { return; }, 1, 2, 4); + + // Emits block literal on stack and block kernel. + + // CHECK-SPIRV: PtrCastToGeneric [[EventPtrTy]] [[Event1:[0-9]+]] + + // CHECK-SPIRV: PtrCastToGeneric [[Int8PtrGenTy]] [[BlockLit2:[0-9]+]] + // CHECK-SPIRV: EnqueueKernel [[Int32Ty]] {{[0-9]+}} {{[0-9]+}} {{[0-9]+}} {{[0-9]+}} + // [[ConstInt0]] [[EventNull]] [[Event1]] + // [[BlockKer5]] [[BlockLit5]] [[ConstInt20]] [[ConstInt8]] + + // CHECK-LLVM: [[Block5:%[0-9]+]] = bitcast [[BlockTy3]]* %block14 to %struct.__opencl_block_literal_generic* + // CHECK-LLVM: [[Block5Ptr:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* [[Block5]] to i8 addrspace(4) + // CHECK-LLVM: [[BlockInv5:%[0-9]+]] = addrspacecast void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_5_kernel to i8 addrspace(4)* + // CHECK-LLVM: call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 0, %opencl.clk_event_t* addrspace(4)* null, %opencl.clk_event_t* addrspace(4)* {{.*}}, i8 addrspace(4)* [[BlockInv5]], i8 addrspace(4)* [[Block5Ptr]]) + enqueue_kernel(default_queue, flags, ndrange, 0, NULL, &clk_event, + ^(void) { + a[i] = b[i]; + }); } // CHECK-SPIRV-DAG: Function [[VoidTy]] [[BlockKer1]] 0 [[BlockTy1]] // CHECK-SPIRV-DAG: Function [[VoidTy]] [[BlockKer2]] 0 [[BlockTy1]] // CHECK-SPIRV-DAG: Function [[VoidTy]] [[BlockKer3]] 0 [[BlockTy2]] // CHECK-SPIRV-DAG: Function [[VoidTy]] [[BlockKer4]] 0 [[BlockTy3]] +// CHECK-SPIRV-DAG: Function [[VoidTy]] [[BlockKer5]] 0 [[BlockTy1]] // CHECK-LLVM-DAG: define spir_kernel void @__device_side_enqueue_block_invoke_kernel(i8 addrspace(4)*{{.*}}) // CHECK-LLVM-DAG: define spir_kernel void @__device_side_enqueue_block_invoke_2_kernel(i8 addrspace(4)*{{.*}}) // CHECK-LLVM-DAG: define spir_kernel void @__device_side_enqueue_block_invoke_3_kernel(i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}}) // CHECK-LLVM-DAG: define spir_kernel void @__device_side_enqueue_block_invoke_4_kernel(i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}}, i8 addrspace(3)*{{.*}}, i8 addrspace(3)*{{.*}}) +// CHECK-LLVM-DAG: define spir_kernel void @__device_side_enqueue_block_invoke_5_kernel(i8 addrspace(4)*{{.*}}) diff --git a/llvm-spirv/test/vector_times_scalar.spt b/llvm-spirv/test/vector_times_scalar.spt index a0fcbe1f020ff..4ad2134da6dcb 100644 --- a/llvm-spirv/test/vector_times_scalar.spt +++ b/llvm-spirv/test/vector_times_scalar.spt @@ -49,9 +49,6 @@ 1 FunctionEnd -; FIXME: LIT comments/commands are moved at the end because llvm-spirv stops -; reading the file after first ';' symbol - ; RUN: llvm-spirv %s -to-binary -o %t.spv ; RUN: spirv-val %t.spv ; RUN: llvm-spirv -r %t.spv -o %t.bc diff --git a/llvm-spirv/tools/llvm-spirv/llvm-spirv.cpp b/llvm-spirv/tools/llvm-spirv/llvm-spirv.cpp index 335255b9977e6..f6a97501c7fe6 100644 --- a/llvm-spirv/tools/llvm-spirv/llvm-spirv.cpp +++ b/llvm-spirv/tools/llvm-spirv/llvm-spirv.cpp @@ -111,6 +111,11 @@ static cl::list cl::value_desc("+SPV_extenstion1_name,-SPV_extension2_name"), cl::ValueRequired); +static cl::opt SPIRVGenKernelArgNameMD( + "spirv-gen-kernel-arg-name-md", cl::init(false), + cl::desc("Enable generating OpenCL kernel argument name " + "metadata")); + using SPIRV::ExtensionID; #ifdef _SPIRV_SUPPORT_TEXT_FMT @@ -236,6 +241,7 @@ static int convertSPIRV() { } return 0; }; + if (OutputFile == "-") return Action(std::cout); @@ -360,7 +366,8 @@ int main(int Ac, char **Av) { if (0 != Ret) return Ret; - SPIRV::TranslatorOpts Opts(MaxSPIRVVersion, ExtensionsStatus); + SPIRV::TranslatorOpts Opts(MaxSPIRVVersion, ExtensionsStatus, + SPIRVGenKernelArgNameMD); #ifdef _SPIRV_SUPPORT_TEXT_FMT if (ToText && (ToBinary || IsReverse || IsRegularization)) { diff --git a/sycl/include/CL/__spirv/spirv_vars.hpp b/sycl/include/CL/__spirv/spirv_vars.hpp index a7415b3ce59d6..51ca2e8fcc37e 100644 --- a/sycl/include/CL/__spirv/spirv_vars.hpp +++ b/sycl/include/CL/__spirv/spirv_vars.hpp @@ -55,12 +55,12 @@ extern "C" const __attribute__((ocl_constant)) uint32_t __spirv_BuiltInSubgroupL }; \ \ template struct InitSizesST##POSTFIX<2, DstT> { \ - static DstT initSize() { return {get##POSTFIX<0>(), get##POSTFIX<1>()}; } \ + static DstT initSize() { return {get##POSTFIX<1>(), get##POSTFIX<0>()}; } \ }; \ \ template struct InitSizesST##POSTFIX<3, DstT> { \ static DstT initSize() { \ - return {get##POSTFIX<0>(), get##POSTFIX<1>(), get##POSTFIX<2>()}; \ + return {get##POSTFIX<2>(), get##POSTFIX<1>(), get##POSTFIX<0>()}; \ } \ }; \ \ diff --git a/sycl/include/CL/sycl/detail/cg.hpp b/sycl/include/CL/sycl/detail/cg.hpp index b52392ad1c119..8155af774f8c5 100644 --- a/sycl/include/CL/sycl/detail/cg.hpp +++ b/sycl/include/CL/sycl/detail/cg.hpp @@ -185,19 +185,12 @@ class HostKernel : public HostKernelBase { template typename std::enable_if>::value>::type runOnHost(const NDRDescT &NDRDesc) { - size_t XYZ[3] = {0}; - sycl::id ID; - for (; XYZ[2] < NDRDesc.GlobalSize[2]; ++XYZ[2]) { - XYZ[1] = 0; - for (; XYZ[1] < NDRDesc.GlobalSize[1]; ++XYZ[1]) { - XYZ[0] = 0; - for (; XYZ[0] < NDRDesc.GlobalSize[0]; ++XYZ[0]) { - for (int I = 0; I < Dims; ++I) - ID[I] = XYZ[I]; - MKernel(ID); - } - } - } + sycl::range Range(InitializedVal::template get<0>()); + for (int I = 0; I < Dims; ++I) + Range[I] = NDRDesc.GlobalSize[I]; + + detail::NDLoop::iterate( + Range, [&](const sycl::id &ID) { MKernel(ID); }); } template @@ -210,20 +203,11 @@ class HostKernel : public HostKernelBase { for (int I = 0; I < Dims; ++I) Range[I] = NDRDesc.GlobalSize[I]; - for (; XYZ[2] < NDRDesc.GlobalSize[2]; ++XYZ[2]) { - XYZ[1] = 0; - for (; XYZ[1] < NDRDesc.GlobalSize[1]; ++XYZ[1]) { - XYZ[0] = 0; - for (; XYZ[0] < NDRDesc.GlobalSize[0]; ++XYZ[0]) { - for (int I = 0; I < Dims; ++I) - ID[I] = XYZ[I]; - - sycl::item Item = - IDBuilder::createItem(Range, ID); - MKernel(Item); - } - } - } + detail::NDLoop::iterate(Range, [&](const sycl::id ID) { + sycl::item Item = + IDBuilder::createItem(Range, ID); + MKernel(Item); + }); } template @@ -236,22 +220,13 @@ class HostKernel : public HostKernelBase { Range[I] = NDRDesc.GlobalSize[I]; Offset[I] = NDRDesc.GlobalOffset[I]; } - size_t XYZ[3] = {0}; - sycl::id ID; - for (; XYZ[2] < NDRDesc.GlobalSize[2]; ++XYZ[2]) { - XYZ[1] = 0; - for (; XYZ[1] < NDRDesc.GlobalSize[1]; ++XYZ[1]) { - XYZ[0] = 0; - for (; XYZ[0] < NDRDesc.GlobalSize[0]; ++XYZ[0]) { - for (int I = 0; I < Dims; ++I) - ID[I] = XYZ[I] + Offset[I]; - - sycl::item Item = - IDBuilder::createItem(Range, ID, Offset); - MKernel(Item); - } - } - } + + detail::NDLoop::iterate(Range, [&](const sycl::id &ID) { + sycl::id OffsetID = ID + Offset; + sycl::item Item = + IDBuilder::createItem(Range, OffsetID, Offset); + MKernel(Item); + }); } template diff --git a/sycl/include/CL/sycl/detail/common.hpp b/sycl/include/CL/sycl/detail/common.hpp index 54e6f59cdf339..7db45cebb8756 100644 --- a/sycl/include/CL/sycl/detail/common.hpp +++ b/sycl/include/CL/sycl/detail/common.hpp @@ -158,9 +158,9 @@ struct NDLoopIterateImpl { const LoopBoundTy &Stride, const LoopBoundTy &UpperBound, FuncTy f, LoopIndexTy &Index) { - - for (Index[DIM] = LowerBound[DIM]; Index[DIM] < UpperBound[DIM]; - Index[DIM] += Stride[DIM]) { + constexpr size_t AdjIdx = NDIMS - 1 - DIM; + for (Index[AdjIdx] = LowerBound[AdjIdx]; Index[AdjIdx] < UpperBound[AdjIdx]; + Index[AdjIdx] += Stride[AdjIdx]) { NDLoopIterateImpl{ LowerBound, Stride, UpperBound, f, Index}; @@ -177,8 +177,9 @@ struct NDLoopIterateImpl { const LoopBoundTy &UpperBound, FuncTy f, LoopIndexTy &Index) { - for (Index[0] = LowerBound[0]; Index[0] < UpperBound[0]; - Index[0] += Stride[0]) { + constexpr size_t AdjIdx = NDIMS - 1; + for (Index[AdjIdx] = LowerBound[AdjIdx]; Index[AdjIdx] < UpperBound[AdjIdx]; + Index[AdjIdx] += Stride[AdjIdx]) { f(Index); } @@ -190,6 +191,7 @@ struct NDLoopIterateImpl { /// over a multi-dimensional space - it allows to avoid generating unnecessary /// outer loops like 'for (int z=0; z<1; z++)' in case of 1D and 2D iteration /// spaces or writing specializations of the algorithms for 1D, 2D and 3D cases. +/// Loop is unrolled in a reverse directions, i.e. ID = 0 is the inner-most one. template struct NDLoop { /// Generates ND loop nest with {0,..0} .. \c UpperBound bounds with unit /// stride. Applies \c f at each iteration, passing current index of diff --git a/sycl/include/CL/sycl/id.hpp b/sycl/include/CL/sycl/id.hpp index fa9cffc43654a..ec400e9cd046e 100644 --- a/sycl/include/CL/sycl/id.hpp +++ b/sycl/include/CL/sycl/id.hpp @@ -21,7 +21,7 @@ template class id : public detail::array { private: using base = detail::array; static_assert(dimensions >= 1 && dimensions <= 3, - "id can only be 1, 2, or 3 dimentional."); + "id can only be 1, 2, or 3 dimensional."); template using ParamTy = detail::enable_if_t<(N == val), T>; diff --git a/sycl/include/CL/sycl/range.hpp b/sycl/include/CL/sycl/range.hpp index bf00d1b0e05a2..8f59c6c94212d 100644 --- a/sycl/include/CL/sycl/range.hpp +++ b/sycl/include/CL/sycl/range.hpp @@ -17,7 +17,7 @@ namespace sycl { template class id; template class range : public detail::array { static_assert(dimensions >= 1 && dimensions <= 3, - "range can only be 1, 2, or 3 dimentional."); + "range can only be 1, 2, or 3 dimensional."); using base = detail::array; public: diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 41a924e4202fe..91d1c38d9fb6b 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -578,6 +578,23 @@ static void adjustNDRangePerKernel(NDRDescT &NDR, RT::PiKernel Kernel, NDR.set(NDR.Dims, nd_range<3>(NDR.NumWorkGroups * WGSize, WGSize)); } +// We have the following mapping between dimensions with SPIRV builtins: +// 1D: id[0] -> x +// 2D: id[0] -> y, id[1] -> x +// 3D: id[0] -> z, id[1] -> y, id[2] -> x +// So in order to ensure the correctness we update all the kernel +// parameters accordingly. +// Initially we keep the order of NDRDescT as it provided by the user, this +// simplifies overall handling and do the reverse only when +// the kernel is enqueued. +static void ReverseRangeDimensionsForKernel(NDRDescT &NDR) { + if (NDR.Dims > 1) { + std::swap(NDR.GlobalSize[0], NDR.GlobalSize[NDR.Dims - 1]); + std::swap(NDR.LocalSize[0], NDR.LocalSize[NDR.Dims - 1]); + std::swap(NDR.GlobalOffset[0], NDR.GlobalOffset[NDR.Dims - 1]); + } +} + // The function initialize accessors and calls lambda. // The function is used as argument to piEnqueueNativeKernel which requires // that the passed function takes one void* argument. @@ -803,10 +820,15 @@ cl_int ExecCGCommand::enqueueImp() { getSyclObjImpl(Context)->getUSMDispatch(); USMDispatch->setKernelIndirectAccess(Kernel, MQueue->getHandleRef()); + // Remember this information before the range dimensions are reversed + const bool HasLocalSize = (NDRDesc.LocalSize[0] != 0); + + ReverseRangeDimensionsForKernel(NDRDesc); + PI_CALL(RT::piEnqueueKernelLaunch( MQueue->getHandleRef(), Kernel, NDRDesc.Dims, &NDRDesc.GlobalOffset[0], &NDRDesc.GlobalSize[0], - NDRDesc.LocalSize[0] ? &NDRDesc.LocalSize[0] : nullptr, + HasLocalSize ? &NDRDesc.LocalSize[0] : nullptr, RawEvents.size(), RawEvents.empty() ? nullptr : &RawEvents[0], &Event)); diff --git a/sycl/test/linear_id/linear-host-dev.cpp b/sycl/test/linear_id/linear-host-dev.cpp new file mode 100644 index 0000000000000..89437da135569 --- /dev/null +++ b/sycl/test/linear_id/linear-host-dev.cpp @@ -0,0 +1,50 @@ +// RUN: %clangxx -fsycl %s -o %t.out +// RUN: env SYCL_DEVICE_TYPE=HOST %t.out | FileCheck %s +//==--------------- linear-host-dev.cpp - SYCL linear id test -------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include + +// Check that linear id is monotincally increased on host device. +// Only there we can reliable check that. Since the kernel has a restriction +// regarding usage of global variables, use stream to log the linear id +// and ensure that they're monotonically increased. +// +// Note: This test heavily relies on the current implementation of +// host device(single-threaded ordered executio). So if the implementation +// is somehow changed so it's no longer possible to run this test reliable +// it can be removed. + +namespace s = cl::sycl; + +int main(int argc, char *argv[]) { + s::queue q; + + const size_t outer = 3; + const size_t inner = 2; + const s::range<2> rng = {outer, inner}; + + q.submit([&](s::handler &h) { + s::stream out(1024, 80, h); + + h.parallel_for(s::range<2>(rng), [=](s::item<2> item) { + // CHECK: 0 + // CHECK-NEXT: 1 + // CHECK-NEXT: 2 + // CHECK-NEXT: 3 + // CHECK-NEXT: 4 + // CHECK-NEXT: 5 + out << item.get_linear_id() << "\n"; + }); + }); + + return 0; +} diff --git a/sycl/test/linear_id/linear-sub_group.cpp b/sycl/test/linear_id/linear-sub_group.cpp new file mode 100644 index 0000000000000..ba50b0e592777 --- /dev/null +++ b/sycl/test/linear_id/linear-sub_group.cpp @@ -0,0 +1,54 @@ +// RUN: %clangxx -fsycl %s -o %t.out +// RUN: env SYCL_DEVICE_TYPE=HOST %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out +//==--------------- linear-sub_group.cpp - SYCL linear id test -------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../sub_group/helper.hpp" +#include +#include +#include +#include + +using namespace cl::sycl; + +int main(int argc, char *argv[]) { + queue q; + if (!core_sg_supported(q.get_device())) { + std::cout << "Skipping test\n"; + return 0; + } + + // Fill output array with sub-group IDs + const uint32_t outer = 2; + const uint32_t inner = 8; + std::vector output(outer * inner, 0); + { + buffer output_buf(output.data(), range<2>(outer, inner)); + q.submit([&](handler &cgh) { + auto output = output_buf.get_access(cgh); + cgh.parallel_for( + nd_range<2>(range<2>(outer, inner), range<2>(outer, inner)), + [=](nd_item<2> it) { + id<2> idx = it.get_global_id(); + intel::sub_group sg = it.get_sub_group(); + output[idx] = sg.get_group_id()[0] * sg.get_local_range()[0] + + sg.get_local_id()[0]; + }); + }); + } + + // Compare with expected result + for (int idx = 0; idx < outer * inner; ++idx) { + assert(output[idx] == idx); + } + std::cout << "Test passed.\n"; + return 0; +} diff --git a/sycl/test/linear_id/opencl-interop.cpp b/sycl/test/linear_id/opencl-interop.cpp new file mode 100644 index 0000000000000..ea9d6620f730c --- /dev/null +++ b/sycl/test/linear_id/opencl-interop.cpp @@ -0,0 +1,74 @@ +// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out +//==---------------- opencl-interop.cpp - SYCL linear id test --------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include +#include + +using namespace cl::sycl; + +int main(int argc, char *argv[]) { + queue q; + if (q.is_host()) { + std::cout << "Skipping test\n"; + return 0; + } + + // Compute expected answer. + const uint32_t dimA = 2; + const uint32_t dimB = 8; + std::vector input(dimA * dimB), output(dimA), expected(dimA); + std::iota(input.begin(), input.end(), 0); + for (int j = 0; j < dimA; ++j) { + int sum = 0; + for (int i = 0; i < dimB; ++i) { + sum += input[j * dimB + i]; + } + expected[j] = sum; + } + + // Compute sum using one work-group per element of dimA + program prog(q.get_context(), {q.get_device()}); + prog.build_with_source("__kernel void sum(__global const int* input, " + "__global int* output, const int dimA, const int dimB)" + "{" + " int j = get_global_id(1);" + " int i = get_global_id(0);" + " int sum = work_group_reduce_add(input[j*dimB+i]);" + " if (get_local_id(0) == 0)" + " {" + " output[j] = sum;" + " }" + "}", + "-cl-std=CL2.0"); + kernel sum = prog.get_kernel("sum"); + { + buffer input_buf(input.data(), range<2>(dimA, dimB)), + output_buf(output.data(), range<2>(dimA, dimB)); + q.submit([&](handler &cgh) { + auto input = input_buf.get_access(cgh); + auto output = output_buf.get_access(cgh); + cgh.set_args(input, output, dimA, dimB); + cgh.parallel_for(nd_range<2>(range<2>(dimA, dimB), range<2>(1, dimB)), + sum); + }); + } + + // Compare with expected result + for (int j = 0; j < dimA; ++j) { + assert(output[j] == expected[j]); + } + std::cout << "Test passed.\n"; + return 0; +}