diff --git a/CMakeLists.txt b/CMakeLists.txt index f1024242f516..0b4867f88530 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -224,6 +224,7 @@ set(LLVM_ALL_TARGETS ARM BPF Hexagon + JSBackend # @LOCALMOD Mips MSP430 NVPTX diff --git a/emscripten-version.txt b/emscripten-version.txt new file mode 100644 index 000000000000..d873f1145a20 --- /dev/null +++ b/emscripten-version.txt @@ -0,0 +1,2 @@ +"1.36.9" + diff --git a/include/llvm/ADT/Triple.h b/include/llvm/ADT/Triple.h index b98f8407d075..4f0805518228 100644 --- a/include/llvm/ADT/Triple.h +++ b/include/llvm/ADT/Triple.h @@ -78,6 +78,7 @@ class Triple { nvptx64, // NVPTX: 64-bit le32, // le32: generic little-endian 32-bit CPU (PNaCl) le64, // le64: generic little-endian 64-bit CPU (PNaCl) + asmjs, // asm.js JavaScript subset @LOCALMOD Emscripten amdil, // AMDIL amdil64, // AMDIL with 64-bit pointers hsail, // AMD HSAIL @@ -156,6 +157,7 @@ class Triple { Haiku, Minix, RTEMS, + Emscripten, // Emscripten JavaScript runtime @LOCALMOD Emscripten NaCl, // Native Client CNK, // BG/P Compute-Node Kernel Bitrig, @@ -531,6 +533,13 @@ class Triple { return getOS() == Triple::NaCl; } + // @LOCALMOD-START Emscripten + /// Tests whether the OS is Emscripten. + bool isOSEmscripten() const { + return getOS() == Triple::Emscripten; + } + // @LOCALMOD-END Emscripten + /// Tests whether the OS is Linux. bool isOSLinux() const { return getOS() == Triple::Linux; diff --git a/include/llvm/Analysis/NaCl.h b/include/llvm/Analysis/NaCl.h new file mode 100644 index 000000000000..eb894ef9b64a --- /dev/null +++ b/include/llvm/Analysis/NaCl.h @@ -0,0 +1,74 @@ +//===-- NaCl.h - NaCl Analysis ---------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_NACL_H +#define LLVM_ANALYSIS_NACL_H + +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include + +namespace llvm { + +class FunctionPass; +class ModulePass; +extern cl::opt PNaClABIAllowDebugMetadata; + +class PNaClABIErrorReporter { + PNaClABIErrorReporter(const PNaClABIErrorReporter&) = delete; + void operator=(const PNaClABIErrorReporter&) = delete; + public: + PNaClABIErrorReporter() : ErrorCount(0), Errors(ErrorString), + UseFatalErrors(true) {} + ~PNaClABIErrorReporter() {} + // Return the number of verification errors from the last run. + int getErrorCount() const { return ErrorCount; } + // Print the error messages to O + void printErrors(llvm::raw_ostream &O) { + Errors.flush(); + O << ErrorString; + } + // Increments the error count and returns an ostream to which the error + // message can be streamed. + raw_ostream &addError() { + ErrorCount++; + return Errors; + } + // Reset the error count and error messages. + void reset() { + ErrorCount = 0; + Errors.flush(); + ErrorString.clear(); + } + void setNonFatal() { + UseFatalErrors = false; + } + void checkForFatalErrors() { + if (UseFatalErrors && ErrorCount != 0) { + printErrors(errs()); + report_fatal_error("PNaCl ABI verification failed"); + } + } + private: + int ErrorCount; + std::string ErrorString; + raw_string_ostream Errors; + bool UseFatalErrors; +}; + +FunctionPass *createPNaClABIVerifyFunctionsPass( + PNaClABIErrorReporter *Reporter); +ModulePass *createPNaClABIVerifyModulePass(PNaClABIErrorReporter *Reporter, + bool StreamingMode = false); + +} + + +#endif diff --git a/include/llvm/IR/Intrinsics.td b/include/llvm/IR/Intrinsics.td index 5ece731fa143..bbe1ee1f5410 100644 --- a/include/llvm/IR/Intrinsics.td +++ b/include/llvm/IR/Intrinsics.td @@ -651,6 +651,56 @@ def int_convertuu : Intrinsic<[llvm_anyint_ty], def int_clear_cache : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty], [], "llvm.clear_cache">; +// @LOCALMOD-BEGIN +//===----------------------- Native Client Intrinsics ---------------------===// +// NaCl-specific setjmp/longjmp intrinsics. +// See https://code.google.com/p/nativeclient/issues/detail?id=3429 +def int_nacl_setjmp : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty]>; +def int_nacl_longjmp : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], + [IntrNoReturn]>; + +// Fast built-in version of NaCl's tls_get() IRT interface. +def int_nacl_read_tp : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>; + +// Atomic intrinsics. +// +// Volatiles and atomics are encoded through these intrinsics to make +// them platform-independent, remove some of LLVM's legacy, and isolate +// PNaCl from future changes to IR. The intrinsics allow user code to +// use `__sync_*` builtins as well as C11/C++11 atomics. +// +// These are further documented in docs/PNaClLangRef.rst. +// +// Note that IntrReadWriteArgMem is used in all cases to prevent +// reordering. +def int_nacl_atomic_load : Intrinsic<[llvm_anyint_ty], + [LLVMPointerType>, llvm_i32_ty], + [IntrArgMemOnly]>; +def int_nacl_atomic_store : Intrinsic<[], + [llvm_anyint_ty, LLVMPointerType>, llvm_i32_ty], + [IntrArgMemOnly]>; +def int_nacl_atomic_rmw : Intrinsic<[llvm_anyint_ty], + [llvm_i32_ty, LLVMPointerType>, LLVMMatchType<0>, + llvm_i32_ty], + [IntrArgMemOnly]>; +def int_nacl_atomic_cmpxchg : Intrinsic<[llvm_anyint_ty], + [LLVMPointerType>, LLVMMatchType<0>, LLVMMatchType<0>, + llvm_i32_ty, llvm_i32_ty], + [IntrArgMemOnly]>; +def int_nacl_atomic_fence : Intrinsic<[], [llvm_i32_ty], + [IntrArgMemOnly]>; +def int_nacl_atomic_fence_all : Intrinsic<[], [], + [IntrArgMemOnly]>; +def int_nacl_atomic_is_lock_free : Intrinsic<[llvm_i1_ty], + [llvm_i32_ty, llvm_ptr_ty], [IntrNoMem]>, + GCCBuiltin<"__nacl_atomic_is_lock_free">; +// @LOCALMOD-END +// Calculate the Absolute Differences of the two input vectors. +def int_sabsdiff : Intrinsic<[llvm_anyvector_ty], + [ LLVMMatchType<0>, LLVMMatchType<0> ], [IntrNoMem]>; +def int_uabsdiff : Intrinsic<[llvm_anyvector_ty], + [ LLVMMatchType<0>, LLVMMatchType<0> ], [IntrNoMem]>; + //===-------------------------- Masked Intrinsics -------------------------===// // def int_masked_store : Intrinsic<[], [llvm_anyvector_ty, diff --git a/include/llvm/IR/NaClAtomicIntrinsics.h b/include/llvm/IR/NaClAtomicIntrinsics.h new file mode 100644 index 000000000000..e820b9df504d --- /dev/null +++ b/include/llvm/IR/NaClAtomicIntrinsics.h @@ -0,0 +1,110 @@ +//===-- llvm/IR/NaClAtomicIntrinsics.h - NaCl Atomic Intrinsics -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes atomic intrinsic functions that are specific to NaCl. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_IR_NACL_ATOMIC_INTRINSICS_H +#define LLVM_IR_NACL_ATOMIC_INTRINSICS_H + +#include "llvm/IR/Intrinsics.h" +#include "llvm/Support/Compiler.h" +#include + +namespace llvm { + +namespace NaCl { + +static const size_t NumAtomicIntrinsics = 6; +static const size_t NumAtomicIntrinsicOverloadTypes = 4; +static const size_t MaxAtomicIntrinsicsParameters = 5; + +/// Describe all the atomic intrinsics and their type signature. Most +/// can be overloaded on a type. +class AtomicIntrinsics { +public: + enum ParamType { + NoP, /// No parameter. + Int, /// Overloaded. + Ptr, /// Overloaded. + RMW, /// Atomic RMW operation type. + Mem /// Memory order. + }; + + struct AtomicIntrinsic { + Type *OverloadedType; + Intrinsic::ID ID; + uint8_t Overloaded : 1; + uint8_t NumParams : 7; + uint8_t ParamType[MaxAtomicIntrinsicsParameters]; + + Function *getDeclaration(Module *M) const { + // The atomic intrinsic can be overloaded on zero or one type, + // which is needed to create the function's declaration. + return Intrinsic::getDeclaration( + M, ID, ArrayRef(&OverloadedType, Overloaded ? 1 : 0)); + } + }; + + AtomicIntrinsics(LLVMContext &C); + ~AtomicIntrinsics() {} + + typedef ArrayRef View; + + /// Access all atomic intrinsics, which can then be iterated over. + View allIntrinsicsAndOverloads() const; + /// Access a particular atomic intrinsic. + /// \returns 0 if no intrinsic was found. + const AtomicIntrinsic *find(Intrinsic::ID ID, Type *OverloadedType) const; + +private: + AtomicIntrinsic I[NumAtomicIntrinsics][NumAtomicIntrinsicOverloadTypes]; + + AtomicIntrinsics() = delete; + AtomicIntrinsics(const AtomicIntrinsics &) = delete; + AtomicIntrinsics &operator=(const AtomicIntrinsics &) = delete; +}; + +/// Operations that can be represented by the @llvm.nacl.atomic.rmw +/// intrinsic. +/// +/// Do not reorder these values: their order offers forward +/// compatibility of bitcode targeted to NaCl. +enum AtomicRMWOperation { + AtomicInvalid = 0, // Invalid, keep first. + AtomicAdd, + AtomicSub, + AtomicOr, + AtomicAnd, + AtomicXor, + AtomicExchange, + AtomicNum // Invalid, keep last. +}; + +/// Memory orderings supported by C11/C++11. +/// +/// Do not reorder these values: their order offers forward +/// compatibility of bitcode targeted to NaCl. +enum MemoryOrder { + MemoryOrderInvalid = 0, // Invalid, keep first. + MemoryOrderRelaxed, + MemoryOrderConsume, + MemoryOrderAcquire, + MemoryOrderRelease, + MemoryOrderAcquireRelease, + MemoryOrderSequentiallyConsistent, + MemoryOrderNum // Invalid, keep last. +}; + +} // End NaCl namespace + +} // End llvm namespace + +#endif diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h index 90ff82fe86d4..58679e848dac 100644 --- a/include/llvm/InitializePasses.h +++ b/include/llvm/InitializePasses.h @@ -336,6 +336,69 @@ void initializeVirtRegMapPass(PassRegistry&); void initializeVirtRegRewriterPass(PassRegistry&); void initializeWholeProgramDevirtPass(PassRegistry &); void initializeWinEHPreparePass(PassRegistry&); + +// @LOCALMOD-BEGIN +void initializeAddPNaClExternalDeclsPass(PassRegistry&); +void initializeAllocateDataSegmentPass(PassRegistry&); +void initializeBackendCanonicalizePass(PassRegistry&); +void initializeCanonicalizeMemIntrinsicsPass(PassRegistry&); +void initializeCleanupUsedGlobalsMetadataPass(PassRegistry&); +void initializeConstantInsertExtractElementIndexPass(PassRegistry&); +void initializeExpandAllocasPass(PassRegistry&); +void initializeExpandArithWithOverflowPass(PassRegistry&); +void initializeExpandByValPass(PassRegistry&); +void initializeExpandConstantExprPass(PassRegistry&); +void initializeExpandCtorsPass(PassRegistry&); +void initializeExpandGetElementPtrPass(PassRegistry&); +void initializeExpandIndirectBrPass(PassRegistry&); +void initializeExpandLargeIntegersPass(PassRegistry&); +void initializeExpandShuffleVectorPass(PassRegistry&); +void initializeExpandSmallArgumentsPass(PassRegistry&); +void initializeExpandStructRegsPass(PassRegistry&); +void initializeExpandTlsConstantExprPass(PassRegistry&); +void initializeExpandTlsPass(PassRegistry&); +void initializeExpandVarArgsPass(PassRegistry&); +void initializeFixVectorLoadStoreAlignmentPass(PassRegistry&); +void initializeFlattenGlobalsPass(PassRegistry&); +void initializeGlobalCleanupPass(PassRegistry&); +void initializeGlobalizeConstantVectorsPass(PassRegistry&); +void initializeInsertDivideCheckPass(PassRegistry&); +void initializeInternalizeUsedGlobalsPass(PassRegistry&); +void initializeNaClCcRewritePass(PassRegistry&); +void initializeNormalizeAlignmentPass(PassRegistry&); +void initializePNaClABIVerifyFunctionsPass(PassRegistry&); +void initializePNaClABIVerifyModulePass(PassRegistry&); +void initializePNaClSjLjEHPass(PassRegistry&); +void initializePromoteI1OpsPass(PassRegistry&); +void initializePromoteIntegersPass(PassRegistry&); +void initializeRemoveAsmMemoryPass(PassRegistry&); +void initializeRenameEntryPointPass(PassRegistry&); +void initializeReplacePtrsWithIntsPass(PassRegistry&); +void initializeResolveAliasesPass(PassRegistry&); +void initializeResolvePNaClIntrinsicsPass(PassRegistry&); +void initializeRewriteAtomicsPass(PassRegistry&); +void initializeRewriteLLVMIntrinsicsPass(PassRegistry&); +void initializeRewritePNaClLibraryCallsPass(PassRegistry&); +void initializeSandboxIndirectCallsPass(PassRegistry&); +void initializeSandboxMemoryAccessesPass(PassRegistry&); +void initializeSimplifyAllocasPass(PassRegistry&); +void initializeSimplifyStructRegSignaturesPass(PassRegistry&); +void initializeStripAttributesPass(PassRegistry&); +void initializeStripMetadataPass(PassRegistry&); +void initializeStripModuleFlagsPass(PassRegistry&); +void initializeStripDanglingDISubprogramsPass(PassRegistry&); +void initializeStripTlsPass(PassRegistry&); +void initializeSubstituteUndefsPass(PassRegistry&); +// Emscripten passes: +void initializeExpandI64Pass(PassRegistry&); +void initializeExpandInsertExtractElementPass(PassRegistry&); +void initializeLowerEmAsyncifyPass(PassRegistry&); +void initializeLowerEmExceptionsPass(PassRegistry&); +void initializeLowerEmSetjmpPass(PassRegistry&); +void initializeNoExitRuntimePass(PassRegistry&); +// Emscripten passes end. +// @LOCALMOD-END + void initializeWriteBitcodePassPass(PassRegistry &); void initializeXRayInstrumentationPass(PassRegistry &); } diff --git a/include/llvm/Transforms/NaCl.h b/include/llvm/Transforms/NaCl.h new file mode 100644 index 000000000000..56884e16a43f --- /dev/null +++ b/include/llvm/Transforms/NaCl.h @@ -0,0 +1,109 @@ +//===-- NaCl.h - NaCl Transformations ---------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_NACL_H +#define LLVM_TRANSFORMS_NACL_H + +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" + +namespace llvm { + +class BasicBlockPass; +class Function; +class FunctionPass; +class FunctionType; +class Instruction; +class ModulePass; +class Triple; +class Use; +class Value; + +BasicBlockPass *createConstantInsertExtractElementIndexPass(); +BasicBlockPass *createExpandGetElementPtrPass(); +BasicBlockPass *createExpandShuffleVectorPass(); +BasicBlockPass *createFixVectorLoadStoreAlignmentPass(); +BasicBlockPass *createPromoteI1OpsPass(); +BasicBlockPass *createSimplifyAllocasPass(); +FunctionPass *createBackendCanonicalizePass(); +FunctionPass *createExpandConstantExprPass(); +FunctionPass *createExpandLargeIntegersPass(); +FunctionPass *createExpandStructRegsPass(); +FunctionPass *createInsertDivideCheckPass(); +FunctionPass *createNormalizeAlignmentPass(); +FunctionPass *createRemoveAsmMemoryPass(); +FunctionPass *createResolvePNaClIntrinsicsPass(); +ModulePass *createAddPNaClExternalDeclsPass(); +ModulePass *createCanonicalizeMemIntrinsicsPass(); +ModulePass *createCleanupUsedGlobalsMetadataPass(); +ModulePass *createExpandArithWithOverflowPass(); +ModulePass *createExpandByValPass(); +ModulePass *createExpandCtorsPass(); +ModulePass *createExpandIndirectBrPass(); +ModulePass *createExpandSmallArgumentsPass(); +ModulePass *createExpandTlsConstantExprPass(); +ModulePass *createExpandTlsPass(); +ModulePass *createExpandVarArgsPass(); +ModulePass *createFlattenGlobalsPass(); +ModulePass *createGlobalCleanupPass(); +ModulePass *createGlobalizeConstantVectorsPass(); +ModulePass *createInternalizeUsedGlobalsPass(); +ModulePass *createPNaClSjLjEHPass(); +ModulePass *createPromoteIntegersPass(); +ModulePass *createReplacePtrsWithIntsPass(); +ModulePass *createResolveAliasesPass(); +ModulePass *createRewriteAtomicsPass(); +ModulePass *createRewriteLLVMIntrinsicsPass(); +ModulePass *createRewritePNaClLibraryCallsPass(); +ModulePass *createSimplifyStructRegSignaturesPass(); +ModulePass *createStripAttributesPass(); +ModulePass *createStripMetadataPass(); +ModulePass *createStripModuleFlagsPass(); +ModulePass *createStripDanglingDISubprogramsPass(); + +// Emscripten passes: +FunctionPass *createExpandInsertExtractElementPass(); +ModulePass *createExpandI64Pass(); +ModulePass *createLowerEmAsyncifyPass(); +ModulePass *createLowerEmExceptionsPass(); +ModulePass *createLowerEmSetjmpPass(); +ModulePass *createNoExitRuntimePass(); +// Emscripten passes end. + +//void PNaClABISimplifyAddPreOptPasses(Triple *T, PassManagerBase &PM); +//void PNaClABISimplifyAddPostOptPasses(Triple *T, PassManagerBase &PM); + +Instruction *PhiSafeInsertPt(Use *U); +void PhiSafeReplaceUses(Use *U, Value *NewVal); + +// Copy debug information from Original to New, and return New. +template T *CopyDebug(T *New, Instruction *Original) { + New->setDebugLoc(Original->getDebugLoc()); + return New; +} + +template +static void CopyLoadOrStoreAttrs(InstType *Dest, InstType *Src) { + Dest->setVolatile(Src->isVolatile()); + Dest->setAlignment(Src->getAlignment()); + Dest->setOrdering(Src->getOrdering()); + Dest->setSynchScope(Src->getSynchScope()); +} + +// In order to change a function's type, the function must be +// recreated. RecreateFunction() recreates Func with type NewType. +// It copies or moves across everything except the argument values, +// which the caller must update because the argument types might be +// different. +Function *RecreateFunction(Function *Func, FunctionType *NewType); + +} + +#endif diff --git a/lib/IR/CMakeLists.txt b/lib/IR/CMakeLists.txt index 07cec97084ee..f023a4da498c 100644 --- a/lib/IR/CMakeLists.txt +++ b/lib/IR/CMakeLists.txt @@ -37,6 +37,7 @@ add_llvm_library(LLVMCore Mangler.cpp Metadata.cpp Module.cpp + NaClAtomicIntrinsics.cpp ModuleSummaryIndex.cpp Operator.cpp OptBisect.cpp diff --git a/lib/IR/NaClAtomicIntrinsics.cpp b/lib/IR/NaClAtomicIntrinsics.cpp new file mode 100644 index 000000000000..8cd18a225b66 --- /dev/null +++ b/lib/IR/NaClAtomicIntrinsics.cpp @@ -0,0 +1,76 @@ +//=== llvm/IR/NaClAtomicIntrinsics.cpp - NaCl Atomic Intrinsics -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes atomic intrinsic functions that are specific to NaCl. +// +//===----------------------------------------------------------------------===// + +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/NaClAtomicIntrinsics.h" +#include "llvm/IR/Type.h" + +namespace llvm { + +namespace NaCl { + +AtomicIntrinsics::AtomicIntrinsics(LLVMContext &C) { + Type *IT[NumAtomicIntrinsicOverloadTypes] = { Type::getInt8Ty(C), + Type::getInt16Ty(C), + Type::getInt32Ty(C), + Type::getInt64Ty(C) }; + size_t CurIntrin = 0; + + // Initialize each of the atomic intrinsics and their overloads. They + // have up to 5 parameters, the following macro will take care of + // overloading. +#define INIT(P0, P1, P2, P3, P4, INTRIN) \ + do { \ + for (size_t CurType = 0; CurType != NumAtomicIntrinsicOverloadTypes; \ + ++CurType) { \ + size_t Param = 0; \ + I[CurIntrin][CurType].OverloadedType = IT[CurType]; \ + I[CurIntrin][CurType].ID = Intrinsic::nacl_atomic_##INTRIN; \ + I[CurIntrin][CurType].Overloaded = \ + P0 == Int || P0 == Ptr || P1 == Int || P1 == Ptr || P2 == Int || \ + P2 == Ptr || P3 == Int || P3 == Ptr || P4 == Int || P4 == Ptr; \ + I[CurIntrin][CurType].NumParams = \ + (P0 != NoP) + (P1 != NoP) + (P2 != NoP) + (P3 != NoP) + (P4 != NoP); \ + I[CurIntrin][CurType].ParamType[Param++] = P0; \ + I[CurIntrin][CurType].ParamType[Param++] = P1; \ + I[CurIntrin][CurType].ParamType[Param++] = P2; \ + I[CurIntrin][CurType].ParamType[Param++] = P3; \ + I[CurIntrin][CurType].ParamType[Param++] = P4; \ + } \ + ++CurIntrin; \ + } while (0) + + INIT(Ptr, Mem, NoP, NoP, NoP, load); + INIT(Ptr, Int, Mem, NoP, NoP, store); + INIT(RMW, Ptr, Int, Mem, NoP, rmw); + INIT(Ptr, Int, Int, Mem, Mem, cmpxchg); + INIT(Mem, NoP, NoP, NoP, NoP, fence); + INIT(NoP, NoP, NoP, NoP, NoP, fence_all); +} + +AtomicIntrinsics::View AtomicIntrinsics::allIntrinsicsAndOverloads() const { + return View(&I[0][0], NumAtomicIntrinsics * NumAtomicIntrinsicOverloadTypes); +} + +const AtomicIntrinsics::AtomicIntrinsic * +AtomicIntrinsics::find(Intrinsic::ID ID, Type *OverloadedType) const { + View R = allIntrinsicsAndOverloads(); + for (const AtomicIntrinsic *AI = R.begin(), *E = R.end(); AI != E; ++AI) + if (AI->ID == ID && AI->OverloadedType == OverloadedType) + return AI; + return 0; +} + +} // End NaCl namespace + +} // End llvm namespace diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp index 2bac2a310670..35156b3df139 100644 --- a/lib/Support/Triple.cpp +++ b/lib/Support/Triple.cpp @@ -53,6 +53,7 @@ const char *Triple::getArchTypeName(ArchType Kind) { case nvptx64: return "nvptx64"; case le32: return "le32"; case le64: return "le64"; + case asmjs: return "asmjs"; // @LOCALMOD Emscripten case amdil: return "amdil"; case amdil64: return "amdil64"; case hsail: return "hsail"; @@ -121,6 +122,8 @@ const char *Triple::getArchTypePrefix(ArchType Kind) { case le32: return "le32"; case le64: return "le64"; + case asmjs: return "asmjs"; // @LOCALMOD Emscripten + case amdil: case amdil64: return "amdil"; @@ -180,6 +183,7 @@ const char *Triple::getOSTypeName(OSType Kind) { case Haiku: return "haiku"; case Minix: return "minix"; case RTEMS: return "rtems"; + case Emscripten: return "emscripten"; // @LOCALMOD Emscripten case NaCl: return "nacl"; case CNK: return "cnk"; case Bitrig: return "bitrig"; @@ -273,6 +277,7 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) { .Case("nvptx64", nvptx64) .Case("le32", le32) .Case("le64", le64) + .Case("asmjs", asmjs) // @LOCALMOD Emscripten .Case("amdil", amdil) .Case("amdil64", amdil64) .Case("hsail", hsail) @@ -384,6 +389,7 @@ static Triple::ArchType parseArch(StringRef ArchName) { .Case("nvptx64", Triple::nvptx64) .Case("le32", Triple::le32) .Case("le64", Triple::le64) + .Case("asmjs", Triple::asmjs) // @LOCALMOD Emscripten .Case("amdil", Triple::amdil) .Case("amdil64", Triple::amdil64) .Case("hsail", Triple::hsail) @@ -450,6 +456,7 @@ static Triple::OSType parseOS(StringRef OSName) { .StartsWith("haiku", Triple::Haiku) .StartsWith("minix", Triple::Minix) .StartsWith("rtems", Triple::RTEMS) + .StartsWith("emscripten", Triple::Emscripten) // @LOCALMOD Emscripten .StartsWith("nacl", Triple::NaCl) .StartsWith("cnk", Triple::CNK) .StartsWith("bitrig", Triple::Bitrig) @@ -584,6 +591,7 @@ static Triple::ObjectFormatType getDefaultFormat(const Triple &T) { case Triple::amdil: case Triple::amdil64: case Triple::armeb: + case Triple::asmjs: // @LOCALMOD Emscripten case Triple::avr: case Triple::bpfeb: case Triple::bpfel: @@ -1127,6 +1135,7 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) { case llvm::Triple::armeb: case llvm::Triple::hexagon: case llvm::Triple::le32: + case llvm::Triple::asmjs: // @LOCALMOD Emscripten case llvm::Triple::mips: case llvm::Triple::mipsel: case llvm::Triple::nvptx: @@ -1207,6 +1216,7 @@ Triple Triple::get32BitArchVariant() const { case Triple::hexagon: case Triple::kalimba: case Triple::le32: + case Triple::asmjs: // @LOCALMOD Emscripten case Triple::mips: case Triple::mipsel: case Triple::nvptx: @@ -1256,6 +1266,7 @@ Triple Triple::get64BitArchVariant() const { case Triple::r600: case Triple::tce: case Triple::xcore: + case Triple::asmjs: // @LOCALMOD Emscripten case Triple::sparcel: case Triple::shave: T.setArch(UnknownArch); @@ -1313,6 +1324,7 @@ Triple Triple::getBigEndianArchVariant() const { case Triple::amdgcn: case Triple::amdil64: case Triple::amdil: + case Triple::asmjs: case Triple::avr: case Triple::hexagon: case Triple::hsail64: @@ -1393,6 +1405,7 @@ bool Triple::isLittleEndian() const { case Triple::amdil64: case Triple::amdil: case Triple::arm: + case Triple::asmjs: case Triple::avr: case Triple::bpfel: case Triple::hexagon: diff --git a/lib/Target/JSBackend/AllocaManager.cpp b/lib/Target/JSBackend/AllocaManager.cpp new file mode 100644 index 000000000000..c8efc1aa376a --- /dev/null +++ b/lib/Target/JSBackend/AllocaManager.cpp @@ -0,0 +1,588 @@ +//===-- AllocaManager.cpp -------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the AllocaManager class. +// +// The AllocaManager computes a frame layout, assigning every static alloca an +// offset. It does alloca liveness analysis in order to reuse stack memory, +// using lifetime intrinsics. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "allocamanager" +#include "AllocaManager.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/CFG.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Timer.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +STATISTIC(NumAllocas, "Number of allocas eliminated"); + +// Return the size of the given alloca. +uint64_t AllocaManager::getSize(const AllocaInst *AI) { + assert(AI->isStaticAlloca()); + return DL->getTypeAllocSize(AI->getAllocatedType()) * + cast(AI->getArraySize())->getValue().getZExtValue(); +} + +// Return the alignment of the given alloca. +unsigned AllocaManager::getAlignment(const AllocaInst *AI) { + assert(AI->isStaticAlloca()); + unsigned Alignment = std::max(AI->getAlignment(), + DL->getABITypeAlignment(AI->getAllocatedType())); + MaxAlignment = std::max(Alignment, MaxAlignment); + return Alignment; +} + +AllocaManager::AllocaInfo AllocaManager::getInfo(const AllocaInst *AI, unsigned Index) { + assert(AI->isStaticAlloca()); + return AllocaInfo(AI, getSize(AI), getAlignment(AI), Index); +} + +// Given a lifetime_start or lifetime_end intrinsic, determine if it's +// describing a single pointer suitable for our analysis. If so, +// return the pointer, otherwise return NULL. +const Value * +AllocaManager::getPointerFromIntrinsic(const CallInst *CI) { + const IntrinsicInst *II = cast(CI); + assert(II->getIntrinsicID() == Intrinsic::lifetime_start || + II->getIntrinsicID() == Intrinsic::lifetime_end); + + // Lifetime intrinsics have a size as their first argument and a pointer as + // their second argument. + const Value *Size = II->getArgOperand(0); + const Value *Ptr = II->getArgOperand(1); + + // Check to see if we can convert the size to a host integer. If we can't, + // it's probably not worth worrying about. + const ConstantInt *SizeCon = dyn_cast(Size); + if (!SizeCon) return NULL; + const APInt &SizeAP = SizeCon->getValue(); + if (SizeAP.getActiveBits() > 64) return NULL; + uint64_t MarkedSize = SizeAP.getZExtValue(); + + // Test whether the pointer operand is an alloca. This ought to be pretty + // simple, but e.g. PRE can decide to PRE bitcasts and no-op geps and + // split critical edges and insert phis for them, even though it's all + // just no-ops, so we have to dig through phis to see whether all the + // inputs are in fact the same pointer after stripping away casts. + const Value *Result = NULL; + SmallPtrSet VisitedPhis; + SmallVector Worklist; + Worklist.push_back(Ptr); + do { + const Value *P = Worklist.pop_back_val()->stripPointerCasts(); + + if (const PHINode *Phi = dyn_cast(P)) { + if (!VisitedPhis.insert(Phi).second) + continue; + for (unsigned i = 0, e = Phi->getNumOperands(); i < e; ++i) + Worklist.push_back(Phi->getOperand(i)); + continue; + } + if (const SelectInst *Select = dyn_cast(P)) { + Worklist.push_back(Select->getTrueValue()); + Worklist.push_back(Select->getFalseValue()); + continue; + } + + if (Result == NULL) + Result = P; + else if (Result != P) + return NULL; + } while (!Worklist.empty()); + + // If it's a static Alloca, make sure the size is suitable. We test this here + // because if this fails, we need to be as conservative as if we don't know + // what the pointer is. + if (const AllocaInst *AI = dyn_cast(Result)) { + if (AI->isStaticAlloca() && MarkedSize < getSize(AI)) + return NULL; + } else if (isa(Result)) { + // And if it's any other kind of non-object/argument, we have to be + // similarly conservative, because we may be dealing with an escaped alloca + // that we can't see. + return NULL; + } + + // Yay, it's all just one Value! + return Result; +} + +// Test whether the given value is an alloca which we have a hope of +const AllocaInst *AllocaManager::isFavorableAlloca(const Value *V) { + const AllocaInst *AI = dyn_cast(V); + if (!AI) return NULL; + + if (!AI->isStaticAlloca()) return NULL; + + return AI; +} + +int AllocaManager::AllocaSort(const AllocaInfo *li, const AllocaInfo *ri) { + // Sort by alignment to minimize padding. + if (li->getAlignment() > ri->getAlignment()) return -1; + if (li->getAlignment() < ri->getAlignment()) return 1; + + // Ensure a stable sort by comparing an index value which we've kept for + // this purpose. + if (li->getIndex() > ri->getIndex()) return -1; + if (li->getIndex() < ri->getIndex()) return 1; + + return 0; +} + +// Collect allocas +void AllocaManager::collectMarkedAllocas() { + NamedRegionTimer Timer("Collect Marked Allocas", "AllocaManager", + TimePassesIsEnabled); + + // Weird semantics: If an alloca *ever* appears in a lifetime start or end + // within the same function, its lifetime begins only at the explicit lifetime + // starts and ends only at the explicit lifetime ends and function exit + // points. Otherwise, its lifetime begins in the entry block and it is live + // everywhere. + // + // And so, instead of just walking the entry block to find all the static + // allocas, we walk the whole body to find the intrinsics so we can find the + // set of static allocas referenced in the intrinsics. + for (Function::const_iterator FI = F->begin(), FE = F->end(); + FI != FE; ++FI) { + for (BasicBlock::const_iterator BI = FI->begin(), BE = FI->end(); + BI != BE; ++BI) { + const CallInst *CI = dyn_cast(BI); + if (!CI) continue; + + const Value *Callee = CI->getCalledValue(); + if (Callee == LifetimeStart || Callee == LifetimeEnd) { + if (const Value *Ptr = getPointerFromIntrinsic(CI)) { + if (const AllocaInst *AI = isFavorableAlloca(Ptr)) + Allocas.insert(std::make_pair(AI, 0)); + } else if (isa(CI->getArgOperand(1)->stripPointerCasts())) { + // Oh noes, There's a lifetime intrinsics with something that + // doesn't appear to resolve to an alloca. This means that it's + // possible that it may be declaring a lifetime for some escaping + // alloca. Look out! + Allocas.clear(); + assert(AllocasByIndex.empty()); + return; + } + } + } + } + + // All that said, we still want the intrinsics in the order they appear in the + // block, so that we can represent later ones with earlier ones and skip + // worrying about dominance, so run through the entry block and index those + // allocas which we identified above. + AllocasByIndex.reserve(Allocas.size()); + const BasicBlock *EntryBB = &F->getEntryBlock(); + for (BasicBlock::const_iterator BI = EntryBB->begin(), BE = EntryBB->end(); + BI != BE; ++BI) { + const AllocaInst *AI = dyn_cast(BI); + if (!AI || !AI->isStaticAlloca()) continue; + + AllocaMap::iterator I = Allocas.find(AI); + if (I != Allocas.end()) { + I->second = AllocasByIndex.size(); + AllocasByIndex.push_back(getInfo(AI, AllocasByIndex.size())); + } + } + assert(AllocasByIndex.size() == Allocas.size()); +} + +// Calculate the starting point from which inter-block liveness will be +// computed. +void AllocaManager::collectBlocks() { + NamedRegionTimer Timer("Collect Blocks", "AllocaManager", + TimePassesIsEnabled); + + size_t AllocaCount = AllocasByIndex.size(); + + BitVector Seen(AllocaCount); + + for (Function::const_iterator I = F->begin(), E = F->end(); I != E; ++I) { + const BasicBlock *BB = &*I; + + BlockLifetimeInfo &BLI = BlockLiveness[BB]; + BLI.Start.resize(AllocaCount); + BLI.End.resize(AllocaCount); + + // Track which allocas we've seen. This is used because if a lifetime start + // is the first lifetime marker for an alloca in a block, the alloca is + // live-in. + Seen.reset(); + + // Walk the instructions and compute the Start and End sets. + for (BasicBlock::const_iterator BI = BB->begin(), BE = BB->end(); + BI != BE; ++BI) { + const CallInst *CI = dyn_cast(BI); + if (!CI) continue; + + const Value *Callee = CI->getCalledValue(); + if (Callee == LifetimeStart) { + if (const Value *Ptr = getPointerFromIntrinsic(CI)) { + if (const AllocaInst *AI = isFavorableAlloca(Ptr)) { + AllocaMap::const_iterator MI = Allocas.find(AI); + if (MI != Allocas.end()) { + size_t AllocaIndex = MI->second; + if (!Seen.test(AllocaIndex)) { + BLI.Start.set(AllocaIndex); + } + BLI.End.reset(AllocaIndex); + Seen.set(AllocaIndex); + } + } + } + } else if (Callee == LifetimeEnd) { + if (const Value *Ptr = getPointerFromIntrinsic(CI)) { + if (const AllocaInst *AI = isFavorableAlloca(Ptr)) { + AllocaMap::const_iterator MI = Allocas.find(AI); + if (MI != Allocas.end()) { + size_t AllocaIndex = MI->second; + BLI.End.set(AllocaIndex); + Seen.set(AllocaIndex); + } + } + } + } + } + + // Lifetimes that start in this block and do not end here are live-out. + BLI.LiveOut = BLI.Start; + BLI.LiveOut.reset(BLI.End); + if (BLI.LiveOut.any()) { + for (succ_const_iterator SI = succ_begin(BB), SE = succ_end(BB); + SI != SE; ++SI) { + InterBlockTopDownWorklist.insert(*SI); + } + } + + // Lifetimes that end in this block and do not start here are live-in. + // TODO: Is this actually true? What are the semantics of a standalone + // lifetime end? See also the code in computeInterBlockLiveness. + BLI.LiveIn = BLI.End; + BLI.LiveIn.reset(BLI.Start); + if (BLI.LiveIn.any()) { + for (const_pred_iterator PI = pred_begin(BB), PE = pred_end(BB); + PI != PE; ++PI) { + InterBlockBottomUpWorklist.insert(*PI); + } + } + } +} + +// Compute the LiveIn and LiveOut sets for each block in F. +void AllocaManager::computeInterBlockLiveness() { + NamedRegionTimer Timer("Compute inter-block liveness", "AllocaManager", + TimePassesIsEnabled); + + size_t AllocaCount = AllocasByIndex.size(); + + BitVector Temp(AllocaCount); + + // Proporgate liveness backwards. + while (!InterBlockBottomUpWorklist.empty()) { + const BasicBlock *BB = InterBlockBottomUpWorklist.pop_back_val(); + BlockLifetimeInfo &BLI = BlockLiveness[BB]; + + // Compute the new live-out set. + for (succ_const_iterator SI = succ_begin(BB), SE = succ_end(BB); + SI != SE; ++SI) { + Temp |= BlockLiveness[*SI].LiveIn; + } + + // If it contains new live blocks, prepare to propagate them. + // TODO: As above, what are the semantics of a standalone lifetime end? + Temp.reset(BLI.Start); + if (Temp.test(BLI.LiveIn)) { + BLI.LiveIn |= Temp; + for (const_pred_iterator PI = pred_begin(BB), PE = pred_end(BB); + PI != PE; ++PI) { + InterBlockBottomUpWorklist.insert(*PI); + } + } + Temp.reset(); + } + + // Proporgate liveness forwards. + while (!InterBlockTopDownWorklist.empty()) { + const BasicBlock *BB = InterBlockTopDownWorklist.pop_back_val(); + BlockLifetimeInfo &BLI = BlockLiveness[BB]; + + // Compute the new live-in set. + for (const_pred_iterator PI = pred_begin(BB), PE = pred_end(BB); + PI != PE; ++PI) { + Temp |= BlockLiveness[*PI].LiveOut; + } + + // Also record the live-in values. + BLI.LiveIn |= Temp; + + // If it contains new live blocks, prepare to propagate them. + Temp.reset(BLI.End); + if (Temp.test(BLI.LiveOut)) { + BLI.LiveOut |= Temp; + for (succ_const_iterator SI = succ_begin(BB), SE = succ_end(BB); + SI != SE; ++SI) { + InterBlockTopDownWorklist.insert(*SI); + } + } + Temp.reset(); + } +} + +// Determine overlapping liveranges within blocks. +void AllocaManager::computeIntraBlockLiveness() { + NamedRegionTimer Timer("Compute intra-block liveness", "AllocaManager", + TimePassesIsEnabled); + + size_t AllocaCount = AllocasByIndex.size(); + + BitVector Current(AllocaCount); + + AllocaCompatibility.resize(AllocaCount, BitVector(AllocaCount, true)); + + for (Function::const_iterator I = F->begin(), E = F->end(); I != E; ++I) { + const BasicBlock *BB = &*I; + const BlockLifetimeInfo &BLI = BlockLiveness[BB]; + + Current = BLI.LiveIn; + + for (int i = Current.find_first(); i >= 0; i = Current.find_next(i)) { + AllocaCompatibility[i].reset(Current); + } + + for (BasicBlock::const_iterator BI = BB->begin(), BE = BB->end(); + BI != BE; ++BI) { + const CallInst *CI = dyn_cast(BI); + if (!CI) continue; + + const Value *Callee = CI->getCalledValue(); + if (Callee == LifetimeStart) { + if (const Value *Ptr = getPointerFromIntrinsic(CI)) { + if (const AllocaInst *AI = isFavorableAlloca(Ptr)) { + size_t AIndex = Allocas[AI]; + // We conflict with everything else that's currently live. + AllocaCompatibility[AIndex].reset(Current); + // Everything else that's currently live conflicts with us. + for (int i = Current.find_first(); i >= 0; i = Current.find_next(i)) { + AllocaCompatibility[i].reset(AIndex); + } + // We're now live. + Current.set(AIndex); + } + } + } else if (Callee == LifetimeEnd) { + if (const Value *Ptr = getPointerFromIntrinsic(CI)) { + if (const AllocaInst *AI = isFavorableAlloca(Ptr)) { + size_t AIndex = Allocas[AI]; + // We're no longer live. + Current.reset(AIndex); + } + } + } + } + } +} + +// Decide which allocas will represent which other allocas, and if so what their +// size and alignment will need to be. +void AllocaManager::computeRepresentatives() { + NamedRegionTimer Timer("Compute Representatives", "AllocaManager", + TimePassesIsEnabled); + + for (size_t i = 0, e = AllocasByIndex.size(); i != e; ++i) { + // If we've already represented this alloca with another, don't visit it. + if (AllocasByIndex[i].isForwarded()) continue; + if (i > size_t(INT_MAX)) continue; + + // Find compatible allocas. This is a simple greedy algorithm. + for (int j = int(i); ; ) { + assert(j >= int(i)); + j = AllocaCompatibility[i].find_next(j); + assert(j != int(i)); + if (j < 0) break; + if (!AllocaCompatibility[j][i]) continue; + + DEBUG(dbgs() << "Allocas: " + "Representing " + << AllocasByIndex[j].getInst()->getName() << " " + "with " + << AllocasByIndex[i].getInst()->getName() << "\n"); + ++NumAllocas; + + assert(!AllocasByIndex[j].isForwarded()); + + AllocasByIndex[i].mergeSize(AllocasByIndex[j].getSize()); + AllocasByIndex[i].mergeAlignment(AllocasByIndex[j].getAlignment()); + AllocasByIndex[j].forward(i); + + AllocaCompatibility[i] &= AllocaCompatibility[j]; + AllocaCompatibility[j].reset(); + } + } +} + +void AllocaManager::computeFrameOffsets() { + NamedRegionTimer Timer("Compute Frame Offsets", "AllocaManager", + TimePassesIsEnabled); + + // Walk through the entry block and collect all the allocas, including the + // ones with no lifetime markers that we haven't looked at yet. We walk in + // reverse order so that we can set the representative allocas as those that + // dominate the others as we go. + const BasicBlock *EntryBB = &F->getEntryBlock(); + for (BasicBlock::const_iterator BI = EntryBB->begin(), BE = EntryBB->end(); + BI != BE; ++BI) { + const AllocaInst *AI = dyn_cast(BI); + if (!AI || !AI->isStaticAlloca()) continue; + + AllocaMap::const_iterator I = Allocas.find(AI); + if (I != Allocas.end()) { + // An alloca with lifetime markers. Emit the record we've crafted for it, + // if we've chosen to keep it as a representative. + const AllocaInfo &Info = AllocasByIndex[I->second]; + if (!Info.isForwarded()) { + SortedAllocas.push_back(Info); + } + } else { + // An alloca with no lifetime markers. + SortedAllocas.push_back(getInfo(AI, SortedAllocas.size())); + } + } + + // Sort the allocas to hopefully reduce padding. + array_pod_sort(SortedAllocas.begin(), SortedAllocas.end(), AllocaSort); + + // Assign stack offsets. + uint64_t CurrentOffset = 0; + for (SmallVectorImpl::const_iterator I = SortedAllocas.begin(), + E = SortedAllocas.end(); I != E; ++I) { + const AllocaInfo &Info = *I; + uint64_t NewOffset = alignTo(CurrentOffset, Info.getAlignment()); + + // For backwards compatibility, align every power-of-two multiple alloca to + // its greatest power-of-two factor, up to 8 bytes. In particular, cube2hash + // is known to depend on this. + // TODO: Consider disabling this and making people fix their code. + if (uint64_t Size = Info.getSize()) { + uint64_t P2 = uint64_t(1) << countTrailingZeros(Size); + unsigned CompatAlign = unsigned(std::min(P2, uint64_t(8))); + NewOffset = alignTo(NewOffset, CompatAlign); + } + + const AllocaInst *AI = Info.getInst(); + StaticAllocas[AI] = StaticAllocation(AI, NewOffset); + + CurrentOffset = NewOffset + Info.getSize(); + } + + // Add allocas that were represented by other allocas to the StaticAllocas map + // so that our clients can look them up. + for (unsigned i = 0, e = AllocasByIndex.size(); i != e; ++i) { + const AllocaInfo &Info = AllocasByIndex[i]; + if (!Info.isForwarded()) continue; + size_t j = Info.getForwardedID(); + assert(!AllocasByIndex[j].isForwarded()); + + StaticAllocaMap::const_iterator I = + StaticAllocas.find(AllocasByIndex[j].getInst()); + assert(I != StaticAllocas.end()); + + std::pair Pair = + StaticAllocas.insert(std::make_pair(AllocasByIndex[i].getInst(), + I->second)); + assert(Pair.second); (void)Pair; + } + + // Record the final frame size. Keep the stack pointer 16-byte aligned. + FrameSize = CurrentOffset; + FrameSize = alignTo(FrameSize, 16); + + DEBUG(dbgs() << "Allocas: " + "Statically allocated frame size is " << FrameSize << "\n"); +} + +AllocaManager::AllocaManager() : MaxAlignment(0) { +} + +void AllocaManager::analyze(const Function &Func, const DataLayout &Layout, + bool PerformColoring) { + NamedRegionTimer Timer("AllocaManager", TimePassesIsEnabled); + assert(Allocas.empty()); + assert(AllocasByIndex.empty()); + assert(AllocaCompatibility.empty()); + assert(BlockLiveness.empty()); + assert(StaticAllocas.empty()); + assert(SortedAllocas.empty()); + + DL = &Layout; + F = &Func; + + // Get the declarations for the lifetime intrinsics so we can quickly test to + // see if they are used at all, and for use later if they are. + const Module *M = F->getParent(); + LifetimeStart = M->getFunction(Intrinsic::getName(Intrinsic::lifetime_start)); + LifetimeEnd = M->getFunction(Intrinsic::getName(Intrinsic::lifetime_end)); + + // If we are optimizing and the module contains any lifetime intrinsics, run + // the alloca coloring algorithm. + if (PerformColoring && + ((LifetimeStart && !LifetimeStart->use_empty()) || + (LifetimeEnd && !LifetimeEnd->use_empty()))) { + + collectMarkedAllocas(); + + if (!AllocasByIndex.empty()) { + DEBUG(dbgs() << "Allocas: " + << AllocasByIndex.size() << " marked allocas found\n"); + + collectBlocks(); + computeInterBlockLiveness(); + computeIntraBlockLiveness(); + BlockLiveness.clear(); + + computeRepresentatives(); + AllocaCompatibility.clear(); + } + } + + computeFrameOffsets(); + SortedAllocas.clear(); + Allocas.clear(); + AllocasByIndex.clear(); +} + +void AllocaManager::clear() { + StaticAllocas.clear(); +} + +bool +AllocaManager::getFrameOffset(const AllocaInst *AI, uint64_t *Offset) const { + assert(AI->isStaticAlloca()); + StaticAllocaMap::const_iterator I = StaticAllocas.find(AI); + assert(I != StaticAllocas.end()); + *Offset = I->second.Offset; + return AI == I->second.Representative; +} + +const AllocaInst * +AllocaManager::getRepresentative(const AllocaInst *AI) const { + assert(AI->isStaticAlloca()); + StaticAllocaMap::const_iterator I = StaticAllocas.find(AI); + assert(I != StaticAllocas.end()); + return I->second.Representative; +} diff --git a/lib/Target/JSBackend/AllocaManager.h b/lib/Target/JSBackend/AllocaManager.h new file mode 100644 index 000000000000..9aa833b71a6a --- /dev/null +++ b/lib/Target/JSBackend/AllocaManager.h @@ -0,0 +1,182 @@ +//===-- AllocaManager.h ---------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass declares the AllocaManager class. +// +//===----------------------------------------------------------------------===// + +#ifndef JSBACKEND_ALLOCAMANAGER_H +#define JSBACKEND_ALLOCAMANAGER_H + +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/SetVector.h" + +namespace llvm { + +class AllocaInst; +class BasicBlock; +class CallInst; +class DataLayout; +class Function; +class Value; + +/// Compute frame layout for allocas. +class AllocaManager { + const DataLayout *DL; + const Function *LifetimeStart; + const Function *LifetimeEnd; + const Function *F; + + // Per-block lifetime information. + struct BlockLifetimeInfo { + BitVector Start; + BitVector End; + BitVector LiveIn; + BitVector LiveOut; + }; + typedef DenseMap LivenessMap; + LivenessMap BlockLiveness; + + // Worklist for inter-block liveness analysis. + typedef SmallSetVector InterBlockWorklistVec; + InterBlockWorklistVec InterBlockTopDownWorklist; + InterBlockWorklistVec InterBlockBottomUpWorklist; + + // Map allocas to their index in AllocasByIndex. + typedef DenseMap AllocaMap; + AllocaMap Allocas; + + // Information about an alloca. Note that the size and alignment may vary + // from what's in the actual AllocaInst when an alloca is also representing + // another with perhaps greater size and/or alignment needs. + // + // When an alloca is represented by another, its AllocaInfo is marked as + // "forwarded", at which point it no longer holds a size and alignment, but + // the index of the representative AllocaInfo. + class AllocaInfo { + const AllocaInst *Inst; + uint64_t Size; + unsigned Alignment; + unsigned Index; + + public: + AllocaInfo(const AllocaInst *I, uint64_t S, unsigned A, unsigned X) + : Inst(I), Size(S), Alignment(A), Index(X) { + assert(I != NULL); + assert(A != 0); + assert(!isForwarded()); + } + + bool isForwarded() const { return Alignment == 0; } + + size_t getForwardedID() const { + assert(isForwarded()); + return static_cast(Size); + } + + void forward(size_t i) { + assert(!isForwarded()); + Alignment = 0; + Size = i; + assert(isForwarded()); + assert(getForwardedID() == i); + } + + const AllocaInst *getInst() const { return Inst; } + + uint64_t getSize() const { assert(!isForwarded()); return Size; } + unsigned getAlignment() const { assert(!isForwarded()); return Alignment; } + unsigned getIndex() const { return Index; } + + void mergeSize(uint64_t S) { + assert(!isForwarded()); + Size = std::max(Size, S); + assert(!isForwarded()); + } + void mergeAlignment(unsigned A) { + assert(A != 0); + assert(!isForwarded()); + Alignment = std::max(Alignment, A); + assert(!isForwarded()); + } + }; + typedef SmallVector AllocaVec; + AllocaVec AllocasByIndex; + + // For each alloca, which allocas can it safely represent? Allocas are + // identified by AllocasByIndex index. + // TODO: Vector-of-vectors isn't the fastest data structure possible here. + typedef SmallVector AllocaCompatibilityVec; + AllocaCompatibilityVec AllocaCompatibility; + + // This is for allocas that will eventually be sorted. + SmallVector SortedAllocas; + + // Static allocation results. + struct StaticAllocation { + const AllocaInst *Representative; + uint64_t Offset; + StaticAllocation() {} + StaticAllocation(const AllocaInst *A, uint64_t O) + : Representative(A), Offset(O) {} + }; + typedef DenseMap StaticAllocaMap; + StaticAllocaMap StaticAllocas; + uint64_t FrameSize; + + uint64_t getSize(const AllocaInst *AI); + unsigned getAlignment(const AllocaInst *AI); + AllocaInfo getInfo(const AllocaInst *AI, unsigned Index); + const Value *getPointerFromIntrinsic(const CallInst *CI); + const AllocaInst *isFavorableAlloca(const Value *V); + static int AllocaSort(const AllocaInfo *l, const AllocaInfo *r); + + void collectMarkedAllocas(); + void collectBlocks(); + void computeInterBlockLiveness(); + void computeIntraBlockLiveness(); + void computeRepresentatives(); + void computeFrameOffsets(); + + unsigned MaxAlignment; + +public: + AllocaManager(); + + /// Analyze the given function and prepare for getRepresentative queries. + void analyze(const Function &Func, const DataLayout &Layout, + bool PerformColoring); + + /// Reset all stored state. + void clear(); + + /// Return the representative alloca for the given alloca. When allocas are + /// merged, one is chosen as the representative to stand for the rest. + /// References to the alloca should take the form of references to the + /// representative. + const AllocaInst *getRepresentative(const AllocaInst *AI) const; + + /// Set *offset to the frame offset for the given alloca. Return true if the + /// given alloca is representative, meaning that it needs an explicit + /// definition in the function entry. Return false if some other alloca + /// represents this one. + bool getFrameOffset(const AllocaInst *AI, uint64_t *offset) const; + + /// Return the total frame size for all static allocas and associated padding. + uint64_t getFrameSize() const { return FrameSize; } + + /// Return the largest alignment seen. + unsigned getMaxAlignment() const { return MaxAlignment; } +}; + +} // namespace llvm + +#endif diff --git a/lib/Target/JSBackend/CMakeLists.txt b/lib/Target/JSBackend/CMakeLists.txt new file mode 100644 index 000000000000..942b9fef1be1 --- /dev/null +++ b/lib/Target/JSBackend/CMakeLists.txt @@ -0,0 +1,16 @@ +add_llvm_target(JSBackendCodeGen + AllocaManager.cpp + ExpandBigSwitches.cpp + JSBackend.cpp + JSTargetMachine.cpp + JSTargetTransformInfo.cpp + Relooper.cpp + RemoveLLVMAssume.cpp + SimplifyAllocas.cpp + ) + +add_dependencies(LLVMJSBackendCodeGen intrinsics_gen) + +add_subdirectory(TargetInfo) +add_subdirectory(MCTargetDesc) +add_subdirectory(NaCl) diff --git a/lib/Target/JSBackend/CallHandlers.h b/lib/Target/JSBackend/CallHandlers.h new file mode 100644 index 000000000000..fef94436e7c1 --- /dev/null +++ b/lib/Target/JSBackend/CallHandlers.h @@ -0,0 +1,2085 @@ +// Call handlers: flexible map of call targets to arbitrary handling code +// +// Each handler needs DEF_CALL_HANDLER and SETUP_CALL_HANDLER +// +// Call handlers emit the code that the call will be replaced by. If that +// emitted code contains calls, it must add the targets to Declares, +// which are reported as declared but not implemented symbols, so that +// JS linking brings them in. + +typedef std::string (JSWriter::*CallHandler)(const Instruction*, std::string Name, int NumArgs); +typedef std::map CallHandlerMap; +CallHandlerMap CallHandlers; + +// Definitions + +unsigned getNumArgOperands(const Instruction *I) { + return ImmutableCallSite(I).arg_size(); +} + +const Value *getActuallyCalledValue(const Instruction *I) { + const Value *CV = ImmutableCallSite(I).getCalledValue(); + + // if the called value is a bitcast of a function, then we just call it directly, properly + // for example, extern void x() in C will turn into void x(...) in LLVM IR, then the IR bitcasts + // it to the proper form right before the call. this both causes an unnecessary indirect + // call, and it is done with the wrong type. TODO: don't even put it into the function table + if (const Function *F = dyn_cast(CV->stripPointerCasts())) { + CV = F; + } + return CV; +} + +// We can't and shouldn't try to invoke an LLVM intrinsic which we overload with a call hander - +// it would end up in a function table, which makes no sense. +bool canInvoke(const Value *V) { + const Function *F = dyn_cast(V); + if (F && F->isDeclaration() && F->isIntrinsic()) { + auto Intrin = F->getIntrinsicID(); + if (Intrin == Intrinsic::memcpy || Intrin == Intrinsic::memset || Intrin == Intrinsic::memmove) { + return false; + } + } + return true; +} + +#define DEF_CALL_HANDLER(Ident, Code) \ + std::string CH_##Ident(const Instruction *CI, std::string Name, int NumArgs=-1) { Code } + +DEF_CALL_HANDLER(__default__, { + if (!CI) return ""; // we are just called from a handler that was called from getFunctionIndex, only to ensure the handler was run at least once + const Value *CV = getActuallyCalledValue(CI); + bool NeedCasts = true; + FunctionType *FT; + bool Invoke = false; + bool Emulated = false; + if (InvokeState == 1) { + InvokeState = 2; + Invoke = canInvoke(CV); + } + std::string Sig; + bool IsMath = Name.find("Math_") == 0; + bool ForcedNumArgs = NumArgs != -1; + if (!ForcedNumArgs) NumArgs = getNumArgOperands(CI); + + const Function *F = dyn_cast(CV); + if (F) { + NeedCasts = F->isDeclaration(); // if ffi call, need casts + if (IsMath && !NeedCasts) { + // this was renamed to a math function, but the actual function is implemented, presumably from libc; use that + IsMath = false; + Name = getJSName(F); + } + FT = F->getFunctionType(); + } else { + FT = dyn_cast(dyn_cast(CV->getType())->getElementType()); + if (isAbsolute(CV->stripPointerCasts())) { + Name = "abort /* segfault, call an absolute addr */ "; + } else { + // function pointer call + ensureFunctionTable(FT); + if (!Invoke) { + Sig = getFunctionSignature(FT); + if (!EmulatedFunctionPointers) { + Name = std::string("FUNCTION_TABLE_") + Sig + "[" + Name + " & #FM_" + Sig + "#]"; + NeedCasts = false; // function table call, so stays in asm module + } else { + Name = std::string(Relocatable ? "mftCall_" : "ftCall_") + Sig + "(" + getCast(Name, Type::getInt32Ty(CI->getContext())); + if (NumArgs > 0) Name += ','; + Emulated = true; + } + } + } + } + + if (!FT->isVarArg() && !ForcedNumArgs) { + int TypeNumArgs = FT->getNumParams(); + if (TypeNumArgs != NumArgs) { + if (EmscriptenAssertions) prettyWarning() << "unexpected number of arguments " << utostr(NumArgs) << " in call to '" << F->getName() << "', should be " << utostr(TypeNumArgs) << "\n"; + if (NumArgs > TypeNumArgs) NumArgs = TypeNumArgs; // lop off the extra params that will not be used and just break validation + } + if (EmscriptenAssertions) { + for (int i = 0; i < std::min(TypeNumArgs, NumArgs); i++) { + Type *TypeType = FT->getParamType(i); + Type *ActualType = CI->getOperand(i)->getType(); + if (getFunctionSignatureLetter(TypeType) != getFunctionSignatureLetter(ActualType)) { + prettyWarning() << "unexpected argument type " << *ActualType << " at index " << utostr(i) << " in call to '" << F->getName() << "', should be " << *TypeType << "\n"; + } + } + } + } + if (EmscriptenAssertions) { + Type *TypeType = FT->getReturnType(); + Type *ActualType = CI->getType(); + if (getFunctionSignatureLetter(TypeType) != getFunctionSignatureLetter(ActualType)) { + prettyWarning() << "unexpected return type " << *ActualType << " in call to '" << F->getName() << "', should be " << *TypeType << "\n"; + } + } + + if (Invoke) { + Sig = getFunctionSignature(FT); + Name = "invoke_" + Sig; + NeedCasts = true; + } + std::string text = Name; + if (!Emulated) text += "("; + if (Invoke) { + // add first param + if (F) { + text += relocateFunctionPointer(utostr(getFunctionIndex(F))); // convert to function pointer + } else { + text += getValueAsCastStr(CV); // already a function pointer + } + if (NumArgs > 0) text += ","; + } + // this is an ffi call if we need casts, and it is not a special Math_ builtin + bool FFI = NeedCasts; + if (FFI && IsMath) { + if (Name == "Math_ceil" || Name == "Math_floor" || Name == "Math_min" || Name == "Math_max" || Name == "Math_sqrt" || Name == "Math_abs") { + // This special Math builtin is optimizable with all types, including floats, so can treat it as non-ffi + FFI = false; + } + } + unsigned FFI_OUT = FFI ? ASM_FFI_OUT : 0; + for (int i = 0; i < NumArgs; i++) { + if (!NeedCasts) { + text += getValueAsStr(CI->getOperand(i)); + } else { + text += getValueAsCastParenStr(CI->getOperand(i), ASM_NONSPECIFIC | FFI_OUT); + } + if (i < NumArgs - 1) text += ","; + } + text += ")"; + // handle return value + Type *InstRT = CI->getType(); + Type *ActualRT = FT->getReturnType(); + if (!InstRT->isVoidTy() && ActualRT->isVoidTy()) { + // the function we are calling was cast to something returning a value, but it really + // does not return a value + getAssignIfNeeded(CI); // ensure the variable is defined, but do not emit it here + // it should have 0 uses, but just to be safe + } else if (!ActualRT->isVoidTy()) { + unsigned FFI_IN = FFI ? ASM_FFI_IN : 0; + text = getAssignIfNeeded(CI) + "(" + getCast(text, ActualRT, ASM_NONSPECIFIC | FFI_IN) + ")"; + } + return text; +}) + +// exceptions support +DEF_CALL_HANDLER(emscripten_preinvoke, { + // InvokeState is normally 0 here, but might be otherwise if a block was split apart TODO: add a function attribute for this + InvokeState = 1; + return "__THREW__ = 0"; +}) +DEF_CALL_HANDLER(emscripten_postinvoke, { + // InvokeState is normally 2 here, but can be 1 if the call in between was optimized out, or 0 if a block was split apart + InvokeState = 0; + return getAssign(CI) + "__THREW__; __THREW__ = 0"; +}) +DEF_CALL_HANDLER(emscripten_landingpad, { + unsigned Num = getNumArgOperands(CI); + std::string target = "__cxa_find_matching_catch_" + utostr(Num); + Declares.insert(target); + std::string Ret = getAssign(CI) + "_" + target + "("; + for (unsigned i = 1; i < Num-1; i++) { // ignore personality and cleanup XXX - we probably should not be doing that! + if (i > 1) Ret += ","; + Ret += getValueAsCastStr(CI->getOperand(i)); + } + Ret += ")|0"; + return Ret; +}) +DEF_CALL_HANDLER(emscripten_resume, { + Declares.insert("__resumeException"); + return "___resumeException(" + getValueAsCastStr(CI->getOperand(0)) + ")"; +}) + +std::string getTempRet0() { + return Relocatable ? "(getTempRet0() | 0)" : "tempRet0"; +} + +std::string setTempRet0(std::string Value) { + return Relocatable ? "setTempRet0((" + Value + ") | 0)" : "tempRet0 = (" + Value + ")"; +} + +// setjmp support + +DEF_CALL_HANDLER(emscripten_prep_setjmp, { + return getAdHocAssign("_setjmpTableSize", Type::getInt32Ty(CI->getContext())) + "4;" + + getAdHocAssign("_setjmpTable", Type::getInt32Ty(CI->getContext())) + "_malloc(40) | 0;" + + "HEAP32[_setjmpTable>>2]=0"; +}) +DEF_CALL_HANDLER(emscripten_cleanup_setjmp, { + return "_free(_setjmpTable|0)"; +}) +DEF_CALL_HANDLER(emscripten_setjmp, { + // env, label, table + Declares.insert("saveSetjmp"); + return "_setjmpTable = _saveSetjmp(" + getValueAsStr(CI->getOperand(0)) + "," + getValueAsStr(CI->getOperand(1)) + ",_setjmpTable|0,_setjmpTableSize|0)|0;_setjmpTableSize = " + getTempRet0(); +}) +DEF_CALL_HANDLER(emscripten_longjmp, { + Declares.insert("longjmp"); + return CH___default__(CI, "_longjmp"); +}) +DEF_CALL_HANDLER(emscripten_check_longjmp, { + std::string Threw = getValueAsStr(CI->getOperand(0)); + std::string Target = getJSName(CI); + std::string Assign = getAssign(CI); + return "if (((" + Threw + "|0) != 0) & ((threwValue|0) != 0)) { " + + Assign + "_testSetjmp(HEAP32[" + Threw + ">>2]|0, _setjmpTable|0, _setjmpTableSize|0)|0; " + + "if ((" + Target + "|0) == 0) { _longjmp(" + Threw + "|0, threwValue|0); } " + // rethrow + setTempRet0("threwValue") + "; " + + "} else { " + Assign + "-1; }"; +}) +DEF_CALL_HANDLER(emscripten_get_longjmp_result, { + std::string Threw = getValueAsStr(CI->getOperand(0)); + return getAssign(CI) + getTempRet0(); +}) + +// supporting async functions, see `/src/library_async.js` for detail. +DEF_CALL_HANDLER(emscripten_alloc_async_context, { + Declares.insert("emscripten_alloc_async_context"); + // insert sp as the 2nd parameter + return getAssign(CI) + "_emscripten_alloc_async_context(" + getValueAsStr(CI->getOperand(0)) + ",sp)|0"; +}) +DEF_CALL_HANDLER(emscripten_check_async, { + return getAssign(CI) + "___async"; +}) +// prevent unwinding the stack +// preserve the return value of the return inst +DEF_CALL_HANDLER(emscripten_do_not_unwind, { + return "sp = STACKTOP"; +}) +// prevent unwinding the async stack +DEF_CALL_HANDLER(emscripten_do_not_unwind_async, { + return "___async_unwind = 0"; +}) +DEF_CALL_HANDLER(emscripten_get_async_return_value_addr, { + return getAssign(CI) + "___async_retval"; +}) + +// emscripten instrinsics +DEF_CALL_HANDLER(emscripten_debugger, { + CantValidate = "emscripten_debugger is used"; + return "debugger"; +}) +DEF_CALL_HANDLER(llvm_debugtrap, { + CantValidate = "llvm.debugtrap is used"; + return "debugger"; +}) + +// i64 support + +DEF_CALL_HANDLER(getHigh32, { + return getAssign(CI) + getTempRet0(); +}) +DEF_CALL_HANDLER(setHigh32, { + return setTempRet0(getValueAsStr(CI->getOperand(0))); +}) +// XXX float handling here is not optimal +#define TO_I(low, high) \ +DEF_CALL_HANDLER(low, { \ + std::string Input = getValueAsStr(CI->getOperand(0)); \ + if (PreciseF32 && CI->getOperand(0)->getType()->isFloatTy()) Input = "+" + Input; \ + return getAssign(CI) + "(~~" + Input + ")>>>0"; \ +}) \ +DEF_CALL_HANDLER(high, { \ + std::string Input = getValueAsStr(CI->getOperand(0)); \ + if (PreciseF32 && CI->getOperand(0)->getType()->isFloatTy()) Input = "+" + Input; \ + return getAssign(CI) + "+Math_abs(" + Input + ") >= +1 ? " + Input + " > +0 ? (~~+Math_min(+Math_floor(" + Input + " / +4294967296), +4294967295)) >>> 0 : ~~+Math_ceil((" + Input + " - +(~~" + Input + " >>> 0)) / +4294967296) >>> 0 : 0"; \ +}) +TO_I(FtoILow, FtoIHigh); +TO_I(DtoILow, DtoIHigh); +DEF_CALL_HANDLER(BDtoILow, { + return "HEAPF64[tempDoublePtr>>3] = " + getValueAsStr(CI->getOperand(0)) + ";" + getAssign(CI) + "HEAP32[tempDoublePtr>>2]|0"; +}) +DEF_CALL_HANDLER(BDtoIHigh, { + return getAssign(CI) + "HEAP32[tempDoublePtr+4>>2]|0"; +}) +DEF_CALL_HANDLER(SItoF, { + std::string Ret = "(+" + getValueAsCastParenStr(CI->getOperand(0), ASM_UNSIGNED) + ") + " + + "(+4294967296*(+" + getValueAsCastParenStr(CI->getOperand(1), ASM_SIGNED) + "))"; + if (PreciseF32 && CI->getType()->isFloatTy()) { + Ret = "Math_fround(" + Ret + ")"; + } + return getAssign(CI) + Ret; +}) +DEF_CALL_HANDLER(UItoF, { + std::string Ret = "(+" + getValueAsCastParenStr(CI->getOperand(0), ASM_UNSIGNED) + ") + " + + "(+4294967296*(+" + getValueAsCastParenStr(CI->getOperand(1), ASM_UNSIGNED) + "))"; + if (PreciseF32 && CI->getType()->isFloatTy()) { + Ret = "Math_fround(" + Ret + ")"; + } + return getAssign(CI) + Ret; +}) +DEF_CALL_HANDLER(SItoD, { + return getAssign(CI) + "(+" + getValueAsCastParenStr(CI->getOperand(0), ASM_UNSIGNED) + ") + " + + "(+4294967296*(+" + getValueAsCastParenStr(CI->getOperand(1), ASM_SIGNED) + "))"; +}) +DEF_CALL_HANDLER(UItoD, { + return getAssign(CI) + "(+" + getValueAsCastParenStr(CI->getOperand(0), ASM_UNSIGNED) + ") + " + + "(+4294967296*(+" + getValueAsCastParenStr(CI->getOperand(1), ASM_UNSIGNED) + "))"; +}) +DEF_CALL_HANDLER(BItoD, { + return "HEAP32[tempDoublePtr>>2] = " + getValueAsStr(CI->getOperand(0)) + ";" + + "HEAP32[tempDoublePtr+4>>2] = " + getValueAsStr(CI->getOperand(1)) + ";" + + getAssign(CI) + "+HEAPF64[tempDoublePtr>>3]"; +}) + +// misc + +DEF_CALL_HANDLER(llvm_nacl_atomic_store_i32, { + return "HEAP32[" + getValueAsStr(CI->getOperand(0)) + ">>2]=" + getValueAsStr(CI->getOperand(1)); +}) + +#define CMPXCHG_HANDLER(name, HeapName) \ +DEF_CALL_HANDLER(name, { \ + const Value *P = CI->getOperand(0); \ + if (EnablePthreads) { \ + return getAssign(CI) + "(Atomics_compareExchange(" HeapName ", " + getShiftedPtr(CI->getOperand(0), 4) + ", " + getValueAsStr(CI->getOperand(1)) + ", " + getValueAsStr(CI->getOperand(2)) + ")|0)"; \ + } else { \ + return getLoad(CI, P, CI->getType(), 0) + ';' + \ + "if ((" + getCast(getJSName(CI), CI->getType()) + ") == " + getValueAsCastParenStr(CI->getOperand(1)) + ") " + \ + getStore(CI, P, CI->getType(), getValueAsStr(CI->getOperand(2)), 0); \ + } \ +}) + +CMPXCHG_HANDLER(llvm_nacl_atomic_cmpxchg_i8, "HEAP8"); +CMPXCHG_HANDLER(llvm_nacl_atomic_cmpxchg_i16, "HEAP16"); +CMPXCHG_HANDLER(llvm_nacl_atomic_cmpxchg_i32, "HEAP32"); + +#define UNROLL_LOOP_MAX 8 +#define WRITE_LOOP_MAX 128 + +DEF_CALL_HANDLER(llvm_memcpy_p0i8_p0i8_i32, { + if (CI) { + ConstantInt *AlignInt = dyn_cast(CI->getOperand(3)); + if (AlignInt) { + ConstantInt *LenInt = dyn_cast(CI->getOperand(2)); + if (LenInt) { + // we can emit inline code for this + unsigned Len = LenInt->getZExtValue(); + if (Len <= WRITE_LOOP_MAX) { + unsigned Align = AlignInt->getZExtValue(); + if (Align > 4) Align = 4; + else if (Align == 0) Align = 1; // align 0 means 1 in memcpy and memset (unlike other places where it means 'default/4') + if (Align == 1 && Len > 1 && WarnOnUnaligned) { + errs() << "emcc: warning: unaligned memcpy in " << CI->getParent()->getParent()->getName() << ":" << *CI << " (compiler's fault?)\n"; + } + unsigned Pos = 0; + std::string Ret; + std::string Dest = getValueAsStr(CI->getOperand(0)); + std::string Src = getValueAsStr(CI->getOperand(1)); + while (Len > 0) { + // handle as much as we can in the current alignment + unsigned CurrLen = Align*(Len/Align); + unsigned Factor = CurrLen/Align; + if (Factor <= UNROLL_LOOP_MAX) { + // unroll + for (unsigned Offset = 0; Offset < CurrLen; Offset += Align) { + unsigned PosOffset = Pos + Offset; + std::string Add = PosOffset == 0 ? "" : ("+" + utostr(PosOffset)); + Ret += ";" + getHeapAccess(Dest + Add, Align) + "=" + getHeapAccess(Src + Add, Align) + "|0"; + } + } else { + // emit a loop + UsedVars["dest"] = UsedVars["src"] = UsedVars["stop"] = Type::getInt32Ty(TheModule->getContext()); + std::string Add = Pos == 0 ? "" : ("+" + utostr(Pos) + "|0"); + Ret += "dest=" + Dest + Add + "; src=" + Src + Add + "; stop=dest+" + utostr(CurrLen) + "|0; do { " + getHeapAccess("dest", Align) + "=" + getHeapAccess("src", Align) + "|0; dest=dest+" + utostr(Align) + "|0; src=src+" + utostr(Align) + "|0; } while ((dest|0) < (stop|0))"; + } + Pos += CurrLen; + Len -= CurrLen; + Align /= 2; + } + return Ret; + } + } + } + } + Declares.insert("memcpy"); + return CH___default__(CI, "_memcpy", 3) + "|0"; +}) + +DEF_CALL_HANDLER(llvm_memset_p0i8_i32, { + if (CI) { + ConstantInt *AlignInt = dyn_cast(CI->getOperand(3)); + if (AlignInt) { + ConstantInt *LenInt = dyn_cast(CI->getOperand(2)); + if (LenInt) { + ConstantInt *ValInt = dyn_cast(CI->getOperand(1)); + if (ValInt) { + // we can emit inline code for this + unsigned Len = LenInt->getZExtValue(); + if (Len <= WRITE_LOOP_MAX) { + unsigned Align = AlignInt->getZExtValue(); + unsigned Val = ValInt->getZExtValue(); + if (Align > 4) Align = 4; + else if (Align == 0) Align = 1; // align 0 means 1 in memcpy and memset (unlike other places where it means 'default/4') + if (Align == 1 && Len > 1 && WarnOnUnaligned) { + errs() << "emcc: warning: unaligned memcpy in " << CI->getParent()->getParent()->getName() << ":" << *CI << " (compiler's fault?)\n"; + } + unsigned Pos = 0; + std::string Ret; + std::string Dest = getValueAsStr(CI->getOperand(0)); + while (Len > 0) { + // handle as much as we can in the current alignment + unsigned CurrLen = Align*(Len/Align); + unsigned FullVal = 0; + for (unsigned i = 0; i < Align; i++) { + FullVal <<= 8; + FullVal |= Val; + } + unsigned Factor = CurrLen/Align; + if (Factor <= UNROLL_LOOP_MAX) { + // unroll + for (unsigned Offset = 0; Offset < CurrLen; Offset += Align) { + unsigned PosOffset = Pos + Offset; + std::string Add = PosOffset == 0 ? "" : ("+" + utostr(PosOffset)); + Ret += ";" + getHeapAccess(Dest + Add, Align) + "=" + utostr(FullVal) + "|0"; + } + } else { + // emit a loop + UsedVars["dest"] = UsedVars["stop"] = Type::getInt32Ty(TheModule->getContext()); + std::string Add = Pos == 0 ? "" : ("+" + utostr(Pos) + "|0"); + Ret += "dest=" + Dest + Add + "; stop=dest+" + utostr(CurrLen) + "|0; do { " + getHeapAccess("dest", Align) + "=" + utostr(FullVal) + "|0; dest=dest+" + utostr(Align) + "|0; } while ((dest|0) < (stop|0))"; + } + Pos += CurrLen; + Len -= CurrLen; + Align /= 2; + } + return Ret; + } + } + } + } + } + Declares.insert("memset"); + return CH___default__(CI, "_memset", 3) + "|0"; +}) + +DEF_CALL_HANDLER(llvm_memmove_p0i8_p0i8_i32, { + Declares.insert("memmove"); + return CH___default__(CI, "_memmove", 3) + "|0"; +}) + +DEF_CALL_HANDLER(llvm_expect_i32, { + return getAssign(CI) + getValueAsStr(CI->getOperand(0)); +}) +DEF_CALL_HANDLER(llvm_expect_i1, { + return getAssign(CI) + getValueAsStr(CI->getOperand(0)); +}) + +DEF_CALL_HANDLER(llvm_dbg_declare, { + if (!EnableCyberDWARF || !EnableCyberDWARFIntrinsics) + return ""; + + auto VariableOffset = "0"; + auto AssignedValue = cast(CI->getOperand(0))->getMetadata(); + auto const LocalVariableMD = cast(CI->getOperand(1))->getMetadata(); + auto const LocalVariableDI = cast(LocalVariableMD); + auto const LocalVariableType = LocalVariableDI->getRawType(); + auto const DwarfOp = cast(CI->getOperand(2))->getMetadata(); + std::string LocalVariableName = LocalVariableDI->getName().str(); + + auto VarMD = utostr(getIDForMetadata(LocalVariableType)) + + "," + VariableOffset + "," + utostr(getIDForMetadata(DwarfOp)) + + ",\"" + LocalVariableName + "\""; + + + if (auto const *ValAsAssign = dyn_cast(AssignedValue)) { + Declares.insert("metadata_llvm_dbg_value_local"); + auto LocalVarName = getJSName(ValAsAssign->getValue()->stripPointerCasts()); + return "_metadata_llvm_dbg_value_local(" + LocalVarName + "," + VarMD + ")"; + } else if (auto const *ValAsAssign = dyn_cast(AssignedValue)) { + Declares.insert("metadata_llvm_dbg_value_constant"); + return "_metadata_llvm_dbg_value_constant(\"" + getValueAsStr(ValAsAssign->getValue()) + + "," + VarMD + ")"; + } + + return ""; +}) + +DEF_CALL_HANDLER(llvm_dbg_value, { + if (!EnableCyberDWARF || !EnableCyberDWARFIntrinsics) + return ""; + + auto VariableOffset = getValueAsStr(CI->getOperand(1)); + auto AssignedValue = cast(CI->getOperand(0))->getMetadata(); + auto const LocalVariableMD = cast(CI->getOperand(1))->getMetadata(); + auto const LocalVariableDI = cast(LocalVariableMD); + auto const LocalVariableType = LocalVariableDI->getRawType(); + auto const DwarfOp = cast(CI->getOperand(2))->getMetadata(); + std::string LocalVariableName = LocalVariableDI->getName().str(); + + auto VarMD = utostr(getIDForMetadata(LocalVariableType)) + + "," + VariableOffset + "," + utostr(getIDForMetadata(DwarfOp)) + + ",\"" + LocalVariableName + "\""; + + if (auto const *ValAsAssign = dyn_cast(AssignedValue)) { + Declares.insert("metadata_llvm_dbg_value_local"); + auto LocalVarName = getJSName(ValAsAssign->getValue()->stripPointerCasts()); + return "_metadata_llvm_dbg_value_local(" + LocalVarName + "," + VarMD + ")"; + } else if (auto const *ValAsAssign = dyn_cast(AssignedValue)) { + Declares.insert("metadata_llvm_dbg_value_constant"); + return "_metadata_llvm_dbg_value_constant(\"" + getValueAsStr(ValAsAssign->getValue()) + + "," + VarMD + ")"; + } + + return ""; +}) + +DEF_CALL_HANDLER(llvm_lifetime_start, { + return ""; +}) + +DEF_CALL_HANDLER(llvm_lifetime_end, { + return ""; +}) + +DEF_CALL_HANDLER(llvm_invariant_start, { + return ""; +}) + +DEF_CALL_HANDLER(llvm_invariant_end, { + return ""; +}) + +DEF_CALL_HANDLER(llvm_prefetch, { + return ""; +}) + +DEF_CALL_HANDLER(llvm_objectsize_i32_p0i8, { + return getAssign(CI) + ((cast(CI->getOperand(1)))->getZExtValue() == 0 ? "-1" : "0"); +}) + +DEF_CALL_HANDLER(llvm_flt_rounds, { + // FLT_ROUNDS helper. We don't support setting the rounding mode dynamically, + // so it's always round-to-nearest (1). + return getAssign(CI) + "1"; +}) + +DEF_CALL_HANDLER(bitshift64Lshr, { + Declares.insert("bitshift64Lshr"); + return CH___default__(CI, "_bitshift64Lshr", 3); +}) + +DEF_CALL_HANDLER(bitshift64Ashr, { + Declares.insert("bitshift64Ashr"); + return CH___default__(CI, "_bitshift64Ashr", 3); +}) + +DEF_CALL_HANDLER(bitshift64Shl, { + Declares.insert("bitshift64Shl"); + return CH___default__(CI, "_bitshift64Shl", 3); +}) + +DEF_CALL_HANDLER(llvm_ctlz_i32, { + return CH___default__(CI, "Math_clz32", 1); +}) + +DEF_CALL_HANDLER(llvm_cttz_i32, { + Declares.insert("llvm_cttz_i32"); + return CH___default__(CI, "_llvm_cttz_i32", 1); +}) + +DEF_CALL_HANDLER(llvm_maxnum_f32, { + return CH___default__(CI, "Math_max", 2); +}) + +DEF_CALL_HANDLER(llvm_maxnum_f64, { + return CH___default__(CI, "Math_max", 2); +}) + +DEF_CALL_HANDLER(llvm_copysign_f32, { + Declares.insert("llvm_copysign_f32"); + return CH___default__(CI, "_llvm_copysign_f32", 2); +}) + +DEF_CALL_HANDLER(llvm_copysign_f64, { + Declares.insert("llvm_copysign_f64"); + return CH___default__(CI, "_llvm_copysign_f64", 2); +}) + +// EM_ASM support + +std::string handleAsmConst(const Instruction *CI) { + unsigned Num = getNumArgOperands(CI); + std::string Sig; + Sig += getFunctionSignatureLetter(CI->getType()); + for (unsigned i = 1; i < Num; i++) { + Sig += getFunctionSignatureLetter(CI->getOperand(i)->getType()); + } + std::string func = "emscripten_asm_const_" + Sig; + std::string ret = "_" + func + "(" + utostr(getAsmConstId(CI->getOperand(0), Sig)); + for (unsigned i = 1; i < Num; i++) { + ret += ", " + getValueAsCastParenStr(CI->getOperand(i), ASM_NONSPECIFIC); + } + return ret + ")"; +} + +DEF_CALL_HANDLER(emscripten_asm_const, { + Declares.insert("emscripten_asm_const"); + return handleAsmConst(CI); +}) +DEF_CALL_HANDLER(emscripten_asm_const_int, { + Declares.insert("emscripten_asm_const_int"); + return getAssign(CI) + getCast(handleAsmConst(CI), Type::getInt32Ty(CI->getContext())); +}) +DEF_CALL_HANDLER(emscripten_asm_const_double, { + Declares.insert("emscripten_asm_const_double"); + return getAssign(CI) + getCast(handleAsmConst(CI), Type::getDoubleTy(CI->getContext())); +}) + +DEF_CALL_HANDLER(emscripten_atomic_exchange_u8, { + return getAssign(CI) + "(Atomics_exchange(HEAP8, " + getValueAsStr(CI->getOperand(0)) + ", " + getValueAsStr(CI->getOperand(1)) + ")|0)"; +}) +DEF_CALL_HANDLER(emscripten_atomic_exchange_u16, { + return getAssign(CI) + "(Atomics_exchange(HEAP16, " + getShiftedPtr(CI->getOperand(0), 2) + ", " + getValueAsStr(CI->getOperand(1)) + ")|0)"; +}) +DEF_CALL_HANDLER(emscripten_atomic_exchange_u32, { + return getAssign(CI) + "(Atomics_exchange(HEAP32, " + getShiftedPtr(CI->getOperand(0), 4) + ", " + getValueAsStr(CI->getOperand(1)) + ")|0)"; +}) + +DEF_CALL_HANDLER(emscripten_atomic_cas_u8, { + return getAssign(CI) + "(Atomics_compareExchange(HEAP8, " + getValueAsStr(CI->getOperand(0)) + ", " + getValueAsStr(CI->getOperand(1)) + ", " + getValueAsStr(CI->getOperand(2)) + ")|0)"; +}) +DEF_CALL_HANDLER(emscripten_atomic_cas_u16, { + return getAssign(CI) + "(Atomics_compareExchange(HEAP16, " + getShiftedPtr(CI->getOperand(0), 2) + ", " + getValueAsStr(CI->getOperand(1)) + ", " + getValueAsStr(CI->getOperand(2)) + ")|0)"; +}) +DEF_CALL_HANDLER(emscripten_atomic_cas_u32, { + return getAssign(CI) + "(Atomics_compareExchange(HEAP32, " + getShiftedPtr(CI->getOperand(0), 4) + ", " + getValueAsStr(CI->getOperand(1)) + ", " + getValueAsStr(CI->getOperand(2)) + ")|0)"; +}) + +DEF_CALL_HANDLER(emscripten_atomic_load_u8, { + return getAssign(CI) + "(Atomics_load(HEAP8, " + getValueAsStr(CI->getOperand(0)) + ")|0)"; +}) +DEF_CALL_HANDLER(emscripten_atomic_load_u16, { + return getAssign(CI) + "(Atomics_load(HEAP16, " + getShiftedPtr(CI->getOperand(0), 2) + ")|0)"; +}) +DEF_CALL_HANDLER(emscripten_atomic_load_u32, { + return getAssign(CI) + "(Atomics_load(HEAP32, " + getShiftedPtr(CI->getOperand(0), 4) + ")|0)"; +}) +DEF_CALL_HANDLER(emscripten_atomic_load_f32, { + // TODO: If https://bugzilla.mozilla.org/show_bug.cgi?id=1131613 is implemented, we could use the commented out version. Until then, + // we must emulate manually. + Declares.insert("_Atomics_load_f32_emulated"); + return getAssign(CI) + (PreciseF32 ? "Math_fround(" : "+") + "__Atomics_load_f32_emulated(" + getShiftedPtr(CI->getOperand(0), 4) + (PreciseF32 ? "))" : ")"); +// return getAssign(CI) + "Atomics_load(HEAPF32, " + getShiftedPtr(CI->getOperand(0), 4) + ")"; +}) +DEF_CALL_HANDLER(emscripten_atomic_load_f64, { + // TODO: If https://bugzilla.mozilla.org/show_bug.cgi?id=1131624 is implemented, we could use the commented out version. Until then, + // we must emulate manually. + Declares.insert("emscripten_atomic_load_f64"); + return getAssign(CI) + "+_emscripten_atomic_load_f64(" + getShiftedPtr(CI->getOperand(0), 8) + ")"; +// return getAssign(CI) + "Atomics_load(HEAPF64, " + getShiftedPtr(CI->getOperand(0), 8) + ")"; +}) + +DEF_CALL_HANDLER(emscripten_atomic_store_u8, { + return getAssign(CI) + "(Atomics_store(HEAP8, " + getValueAsStr(CI->getOperand(0)) + ", " + getValueAsStr(CI->getOperand(1)) + ")|0)"; +}) +DEF_CALL_HANDLER(emscripten_atomic_store_u16, { + return getAssign(CI) + "(Atomics_store(HEAP16, " + getShiftedPtr(CI->getOperand(0), 2) + ", " + getValueAsStr(CI->getOperand(1)) + ")|0)"; +}) +DEF_CALL_HANDLER(emscripten_atomic_store_u32, { + return getAssign(CI) + "(Atomics_store(HEAP32, " + getShiftedPtr(CI->getOperand(0), 4) + ", " + getValueAsStr(CI->getOperand(1)) + ")|0)"; +}) +DEF_CALL_HANDLER(emscripten_atomic_store_f32, { + // TODO: If https://bugzilla.mozilla.org/show_bug.cgi?id=1131613 is implemented, we could use the commented out version. Until then, + // we must emulate manually. + Declares.insert("emscripten_atomic_store_f32"); + return getAssign(CI) + "_emscripten_atomic_store_f32(" + getShiftedPtr(CI->getOperand(0), 4) + ", " + getValueAsStr(CI->getOperand(1)) + ")"; +// return getAssign(CI) + "Atomics_store(HEAPF32, " + getShiftedPtr(CI->getOperand(0), 4) + ", " + getValueAsStr(CI->getOperand(1)) + ")"; +}) +DEF_CALL_HANDLER(emscripten_atomic_store_f64, { + // TODO: If https://bugzilla.mozilla.org/show_bug.cgi?id=1131624 is implemented, we could use the commented out version. Until then, + // we must emulate manually. + Declares.insert("emscripten_atomic_store_f64"); + return getAssign(CI) + "+_emscripten_atomic_store_f64(" + getShiftedPtr(CI->getOperand(0), 8) + ", " + getValueAsStr(CI->getOperand(1)) + ")"; +// return getAssign(CI) + "Atomics_store(HEAPF64, " + getShiftedPtr(CI->getOperand(0), 8) + ", " + getValueAsStr(CI->getOperand(1)) + ")"; +}) + +DEF_CALL_HANDLER(emscripten_atomic_add_u8, { + return getAssign(CI) + "(Atomics_add(HEAP8, " + getValueAsStr(CI->getOperand(0)) + ", " + getValueAsStr(CI->getOperand(1)) + ")|0)"; +}) +DEF_CALL_HANDLER(emscripten_atomic_add_u16, { + return getAssign(CI) + "(Atomics_add(HEAP16, " + getShiftedPtr(CI->getOperand(0), 2) + ", " + getValueAsStr(CI->getOperand(1)) + ")|0)"; +}) +DEF_CALL_HANDLER(emscripten_atomic_add_u32, { + return getAssign(CI) + "(Atomics_add(HEAP32, " + getShiftedPtr(CI->getOperand(0), 4) + ", " + getValueAsStr(CI->getOperand(1)) + ")|0)"; +}) + +DEF_CALL_HANDLER(emscripten_atomic_sub_u8, { + return getAssign(CI) + "(Atomics_sub(HEAP8, " + getValueAsStr(CI->getOperand(0)) + ", " + getValueAsStr(CI->getOperand(1)) + ")|0)"; +}) +DEF_CALL_HANDLER(emscripten_atomic_sub_u16, { + return getAssign(CI) + "(Atomics_sub(HEAP16, " + getShiftedPtr(CI->getOperand(0), 2) + ", " + getValueAsStr(CI->getOperand(1)) + ")|0)"; +}) +DEF_CALL_HANDLER(emscripten_atomic_sub_u32, { + return getAssign(CI) + "(Atomics_sub(HEAP32, " + getShiftedPtr(CI->getOperand(0), 4) + ", " + getValueAsStr(CI->getOperand(1)) + ")|0)"; +}) + +DEF_CALL_HANDLER(emscripten_atomic_and_u8, { + return getAssign(CI) + "(Atomics_and(HEAP8, " + getValueAsStr(CI->getOperand(0)) + ", " + getValueAsStr(CI->getOperand(1)) + ")|0)"; +}) +DEF_CALL_HANDLER(emscripten_atomic_and_u16, { + return getAssign(CI) + "(Atomics_and(HEAP16, " + getShiftedPtr(CI->getOperand(0), 2) + ", " + getValueAsStr(CI->getOperand(1)) + ")|0)"; +}) +DEF_CALL_HANDLER(emscripten_atomic_and_u32, { + return getAssign(CI) + "(Atomics_and(HEAP32, " + getShiftedPtr(CI->getOperand(0), 4) + ", " + getValueAsStr(CI->getOperand(1)) + ")|0)"; +}) + +DEF_CALL_HANDLER(emscripten_atomic_or_u8, { + return getAssign(CI) + "(Atomics_or(HEAP8, " + getValueAsStr(CI->getOperand(0)) + ", " + getValueAsStr(CI->getOperand(1)) + ")|0)"; +}) +DEF_CALL_HANDLER(emscripten_atomic_or_u16, { + return getAssign(CI) + "(Atomics_or(HEAP16, " + getShiftedPtr(CI->getOperand(0), 2) + ", " + getValueAsStr(CI->getOperand(1)) + ")|0)"; +}) +DEF_CALL_HANDLER(emscripten_atomic_or_u32, { + return getAssign(CI) + "(Atomics_or(HEAP32, " + getShiftedPtr(CI->getOperand(0), 4) + ", " + getValueAsStr(CI->getOperand(1)) + ")|0)"; +}) + +DEF_CALL_HANDLER(emscripten_atomic_xor_u8, { + return getAssign(CI) + "(Atomics_xor(HEAP8, " + getValueAsStr(CI->getOperand(0)) + ", " + getValueAsStr(CI->getOperand(1)) + ")|0)"; +}) +DEF_CALL_HANDLER(emscripten_atomic_xor_u16, { + return getAssign(CI) + "(Atomics_xor(HEAP16, " + getShiftedPtr(CI->getOperand(0), 2) + ", " + getValueAsStr(CI->getOperand(1)) + ")|0)"; +}) +DEF_CALL_HANDLER(emscripten_atomic_xor_u32, { + return getAssign(CI) + "(Atomics_xor(HEAP32, " + getShiftedPtr(CI->getOperand(0), 4) + ", " + getValueAsStr(CI->getOperand(1)) + ")|0)"; +}) + +#define DEF_BUILTIN_HANDLER(name, to) \ +DEF_CALL_HANDLER(name, { \ + return CH___default__(CI, #to); \ +}) + +#define DEF_MAYBE_BUILTIN_HANDLER(name, to) \ +DEF_CALL_HANDLER(name, { \ + if (!WebAssembly) return CH___default__(CI, #to); \ + Declares.insert(#name); \ + return CH___default__(CI, "_" #name); \ +}) + +// Various simple redirects for our js libc, see library.js and LibraryManager.load +DEF_BUILTIN_HANDLER(abs, Math_abs); +DEF_BUILTIN_HANDLER(labs, Math_abs); +DEF_MAYBE_BUILTIN_HANDLER(cos, Math_cos); +DEF_MAYBE_BUILTIN_HANDLER(cosf, Math_cos); +DEF_MAYBE_BUILTIN_HANDLER(cosl, Math_cos); +DEF_MAYBE_BUILTIN_HANDLER(sin, Math_sin); +DEF_MAYBE_BUILTIN_HANDLER(sinf, Math_sin); +DEF_MAYBE_BUILTIN_HANDLER(sinl, Math_sin); +DEF_MAYBE_BUILTIN_HANDLER(tan, Math_tan); +DEF_MAYBE_BUILTIN_HANDLER(tanf, Math_tan); +DEF_MAYBE_BUILTIN_HANDLER(tanl, Math_tan); +DEF_MAYBE_BUILTIN_HANDLER(acos, Math_acos); +DEF_MAYBE_BUILTIN_HANDLER(acosf, Math_acos); +DEF_MAYBE_BUILTIN_HANDLER(acosl, Math_acos); +DEF_MAYBE_BUILTIN_HANDLER(asin, Math_asin); +DEF_MAYBE_BUILTIN_HANDLER(asinf, Math_asin); +DEF_MAYBE_BUILTIN_HANDLER(asinl, Math_asin); +DEF_MAYBE_BUILTIN_HANDLER(atan, Math_atan); +DEF_MAYBE_BUILTIN_HANDLER(atanf, Math_atan); +DEF_MAYBE_BUILTIN_HANDLER(atanl, Math_atan); +DEF_MAYBE_BUILTIN_HANDLER(atan2, Math_atan2); +DEF_MAYBE_BUILTIN_HANDLER(atan2f, Math_atan2); +DEF_MAYBE_BUILTIN_HANDLER(atan2l, Math_atan2); +DEF_MAYBE_BUILTIN_HANDLER(exp, Math_exp); +DEF_MAYBE_BUILTIN_HANDLER(expf, Math_exp); +DEF_MAYBE_BUILTIN_HANDLER(expl, Math_exp); +DEF_MAYBE_BUILTIN_HANDLER(log, Math_log); +DEF_MAYBE_BUILTIN_HANDLER(logf, Math_log); +DEF_MAYBE_BUILTIN_HANDLER(logl, Math_log); +DEF_BUILTIN_HANDLER(sqrt, Math_sqrt); +DEF_BUILTIN_HANDLER(sqrtf, Math_sqrt); +DEF_BUILTIN_HANDLER(sqrtl, Math_sqrt); +DEF_BUILTIN_HANDLER(fabs, Math_abs); +DEF_BUILTIN_HANDLER(fabsf, Math_abs); +DEF_BUILTIN_HANDLER(fabsl, Math_abs); +DEF_BUILTIN_HANDLER(llvm_fabs_f32, Math_abs); +DEF_BUILTIN_HANDLER(llvm_fabs_f64, Math_abs); +DEF_BUILTIN_HANDLER(ceil, Math_ceil); +DEF_BUILTIN_HANDLER(ceilf, Math_ceil); +DEF_BUILTIN_HANDLER(ceill, Math_ceil); +DEF_BUILTIN_HANDLER(floor, Math_floor); +DEF_BUILTIN_HANDLER(floorf, Math_floor); +DEF_BUILTIN_HANDLER(floorl, Math_floor); +DEF_MAYBE_BUILTIN_HANDLER(pow, Math_pow); +DEF_MAYBE_BUILTIN_HANDLER(powf, Math_pow); +DEF_MAYBE_BUILTIN_HANDLER(powl, Math_pow); +DEF_BUILTIN_HANDLER(llvm_sqrt_f32, Math_sqrt); +DEF_BUILTIN_HANDLER(llvm_sqrt_f64, Math_sqrt); +DEF_BUILTIN_HANDLER(llvm_pow_f32, Math_pow); // XXX these will be slow in wasm, but need to link in libc before getting here, or stop +DEF_BUILTIN_HANDLER(llvm_pow_f64, Math_pow); // LLVM from creating these intrinsics +DEF_MAYBE_BUILTIN_HANDLER(llvm_sin_f32, Math_sin); +DEF_MAYBE_BUILTIN_HANDLER(llvm_sin_f64, Math_sin); + +DEF_CALL_HANDLER(llvm_powi_f32, { + return getAssign(CI) + getParenCast("Math_pow(" + getValueAsCastStr(CI->getOperand(0)) + ", " + getCast(getValueAsCastStr(CI->getOperand(1)), CI->getOperand(0)->getType()) + ")", CI->getType()); +}) +DEF_CALL_HANDLER(llvm_powi_f64, { + return getAssign(CI) + getParenCast("Math_pow(" + getValueAsCastStr(CI->getOperand(0)) + ", " + getCast(getValueAsCastStr(CI->getOperand(1)), CI->getOperand(0)->getType()) + ")", CI->getType()); +}) + +DEF_BUILTIN_HANDLER(llvm_log_f32, Math_log); +DEF_BUILTIN_HANDLER(llvm_log_f64, Math_log); +DEF_BUILTIN_HANDLER(llvm_exp_f32, Math_exp); +DEF_BUILTIN_HANDLER(llvm_exp_f64, Math_exp); + +// SIMD.js Float64x2 +DEF_BUILTIN_HANDLER(emscripten_float64x2_set, SIMD_Float64x2); +DEF_BUILTIN_HANDLER(emscripten_float64x2_splat, SIMD_Float64x2_splat); +DEF_BUILTIN_HANDLER(emscripten_float64x2_add, SIMD_Float64x2_add); +DEF_BUILTIN_HANDLER(emscripten_float64x2_sub, SIMD_Float64x2_sub); +DEF_BUILTIN_HANDLER(emscripten_float64x2_mul, SIMD_Float64x2_mul); +DEF_BUILTIN_HANDLER(emscripten_float64x2_div, SIMD_Float64x2_div); +DEF_BUILTIN_HANDLER(emscripten_float64x2_max, SIMD_Float64x2_max); +DEF_BUILTIN_HANDLER(emscripten_float64x2_min, SIMD_Float64x2_min); +DEF_BUILTIN_HANDLER(emscripten_float64x2_maxNum, SIMD_Float64x2_maxNum); +DEF_BUILTIN_HANDLER(emscripten_float64x2_minNum, SIMD_Float64x2_minNum); +DEF_BUILTIN_HANDLER(emscripten_float64x2_neg, SIMD_Float64x2_neg); +DEF_BUILTIN_HANDLER(emscripten_float64x2_sqrt, SIMD_Float64x2_sqrt); +DEF_BUILTIN_HANDLER(emscripten_float64x2_reciprocalApproximation, SIMD_Float64x2_reciprocalApproximation); +DEF_BUILTIN_HANDLER(emscripten_float64x2_reciprocalSqrtApproximation, SIMD_Float64x2_reciprocalSqrtApproximation); +DEF_BUILTIN_HANDLER(emscripten_float64x2_abs, SIMD_Float64x2_abs); +// n.b. No emscripten_float64x2_and, only defined on boolean and integer SIMD types. +// n.b. No emscripten_float64x2_xor, only defined on boolean and integer SIMD types. +// n.b. No emscripten_float64x2_or, only defined on boolean and integer SIMD types. +// n.b. No emscripten_float64x2_not, only defined on boolean and integer SIMD types. +static std::string castBool64x2ToInt32x4(const std::string &valueStr) { + return std::string("SIMD_Int32x4_fromBool64x2Bits(") + valueStr + ')'; +} +DEF_CALL_HANDLER(emscripten_float64x2_lessThan, { + return getAssign(CI) + castBool64x2ToInt32x4("SIMD_Float64x2_lessThan(" + getValueAsStr(CI->getOperand(0)) + ", " + getValueAsStr(CI->getOperand(1)) + ")"); +}) +DEF_CALL_HANDLER(emscripten_float64x2_lessThanOrEqual, { + return getAssign(CI) + castBool64x2ToInt32x4("SIMD_Float64x2_lessThanOrEqual(" + getValueAsStr(CI->getOperand(0)) + ", " + getValueAsStr(CI->getOperand(1)) + ")"); +}) +DEF_CALL_HANDLER(emscripten_float64x2_greaterThan, { + return getAssign(CI) + castBool64x2ToInt32x4("SIMD_Float64x2_greaterThan(" + getValueAsStr(CI->getOperand(0)) + ", " + getValueAsStr(CI->getOperand(1)) + ")"); +}) +DEF_CALL_HANDLER(emscripten_float64x2_greaterThanOrEqual, { + return getAssign(CI) + castBool64x2ToInt32x4("SIMD_Float64x2_greaterThanOrEqual(" + getValueAsStr(CI->getOperand(0)) + ", " + getValueAsStr(CI->getOperand(1)) + ")"); +}) +DEF_CALL_HANDLER(emscripten_float64x2_equal, { + return getAssign(CI) + castBool64x2ToInt32x4("SIMD_Float64x2_equal(" + getValueAsStr(CI->getOperand(0)) + ", " + getValueAsStr(CI->getOperand(1)) + ")"); +}) +DEF_CALL_HANDLER(emscripten_float64x2_notEqual, { + return getAssign(CI) + castBool64x2ToInt32x4("SIMD_Float64x2_notEqual(" + getValueAsStr(CI->getOperand(0)) + ", " + getValueAsStr(CI->getOperand(1)) + ")"); +}) +// n.b. No emscripten_float64x2_anyTrue, only defined on boolean SIMD types. +// n.b. No emscripten_float64x2_allTrue, only defined on boolean SIMD types. +DEF_BUILTIN_HANDLER(emscripten_float64x2_select, SIMD_Float64x2_select); +// n.b. No emscripten_float64x2_addSaturate, only defined on 8-bit and 16-bit integer SIMD types. +// n.b. No emscripten_float64x2_subSaturate, only defined on 8-bit and 16-bit integer SIMD types. +// n.b. No emscripten_float64x2_shiftLeftByScalar, only defined on integer SIMD types. +// n.b. No emscripten_float64x2_shiftRightByScalar, only defined on integer SIMD types. +DEF_BUILTIN_HANDLER(emscripten_float64x2_extractLane, SIMD_Float64x2_extractLane); +DEF_BUILTIN_HANDLER(emscripten_float64x2_replaceLane, SIMD_Float64x2_replaceLane); +DEF_CALL_HANDLER(emscripten_float64x2_store, { + UsesSIMDFloat64x2 = true; + return "SIMD_Float64x2_store(HEAPU8, " + getValueAsStr(CI->getOperand(0)) + ", " + getValueAsStr(CI->getOperand(1)) + ")"; +}) +DEF_CALL_HANDLER(emscripten_float64x2_store1, { + UsesSIMDFloat64x2 = true; + return "SIMD_Float64x2_store1(HEAPU8, " + getValueAsStr(CI->getOperand(0)) + ", " + getValueAsStr(CI->getOperand(1)) + ")"; +}) +DEF_CALL_HANDLER(emscripten_float64x2_load, { + UsesSIMDFloat64x2 = true; + return getAssign(CI) + "SIMD_Float64x2_load(HEAPU8, " + getValueAsStr(CI->getOperand(0)) + ")"; +}) +DEF_CALL_HANDLER(emscripten_float64x2_load1, { + UsesSIMDFloat64x2 = true; + return getAssign(CI) + "SIMD_Float64x2_load1(HEAPU8, " + getValueAsStr(CI->getOperand(0)) + ")"; +}) +DEF_BUILTIN_HANDLER(emscripten_float64x2_fromFloat32x4Bits, SIMD_Float64x2_fromFloat32x4Bits); +DEF_BUILTIN_HANDLER(emscripten_float64x2_fromInt32x4Bits, SIMD_Float64x2_fromInt32x4Bits); +DEF_BUILTIN_HANDLER(emscripten_float64x2_fromUint32x4Bits, SIMD_Float64x2_fromUint32x4Bits); +DEF_BUILTIN_HANDLER(emscripten_float64x2_fromInt16x8Bits, SIMD_Float64x2_fromInt16x8Bits); +DEF_BUILTIN_HANDLER(emscripten_float64x2_fromUint16x8Bits, SIMD_Float64x2_fromUint16x8Bits); +DEF_BUILTIN_HANDLER(emscripten_float64x2_fromInt8x16Bits, SIMD_Float64x2_fromInt8x16Bits); +DEF_BUILTIN_HANDLER(emscripten_float64x2_fromUint8x16Bits, SIMD_Float64x2_fromUint8x16Bits); +DEF_BUILTIN_HANDLER(emscripten_float64x2_swizzle, SIMD_Float64x2_swizzle); +DEF_BUILTIN_HANDLER(emscripten_float64x2_shuffle, SIMD_Float64x2_shuffle); + +// SIMD.js Float32x4 +DEF_BUILTIN_HANDLER(emscripten_float32x4_set, SIMD_Float32x4); +DEF_BUILTIN_HANDLER(emscripten_float32x4_splat, SIMD_Float32x4_splat); +DEF_BUILTIN_HANDLER(emscripten_float32x4_add, SIMD_Float32x4_add); +DEF_BUILTIN_HANDLER(emscripten_float32x4_sub, SIMD_Float32x4_sub); +DEF_BUILTIN_HANDLER(emscripten_float32x4_mul, SIMD_Float32x4_mul); +DEF_BUILTIN_HANDLER(emscripten_float32x4_div, SIMD_Float32x4_div); +DEF_BUILTIN_HANDLER(emscripten_float32x4_max, SIMD_Float32x4_max); +DEF_BUILTIN_HANDLER(emscripten_float32x4_min, SIMD_Float32x4_min); +DEF_BUILTIN_HANDLER(emscripten_float32x4_maxNum, SIMD_Float32x4_maxNum); +DEF_BUILTIN_HANDLER(emscripten_float32x4_minNum, SIMD_Float32x4_minNum); +DEF_BUILTIN_HANDLER(emscripten_float32x4_neg, SIMD_Float32x4_neg); +DEF_BUILTIN_HANDLER(emscripten_float32x4_sqrt, SIMD_Float32x4_sqrt); +DEF_BUILTIN_HANDLER(emscripten_float32x4_reciprocalApproximation, SIMD_Float32x4_reciprocalApproximation); +DEF_BUILTIN_HANDLER(emscripten_float32x4_reciprocalSqrtApproximation, SIMD_Float32x4_reciprocalSqrtApproximation); +DEF_BUILTIN_HANDLER(emscripten_float32x4_abs, SIMD_Float32x4_abs); +// n.b. No emscripten_float32x4_and, only defined on boolean and integer SIMD types. +// n.b. No emscripten_float32x4_xor, only defined on boolean and integer SIMD types. +// n.b. No emscripten_float32x4_or, only defined on boolean and integer SIMD types. +// n.b. No emscripten_float32x4_not, only defined on boolean and integer SIMD types. +std::string castBoolVecToIntVec(int numElems, const std::string &str, bool signExtend) +{ + int elemWidth = 128 / numElems; + std::string simdType = "SIMD_Int" + std::to_string(elemWidth) + "x" + std::to_string(numElems); + return simdType + "_select(" + str + ", " + simdType + "_splat(" + (signExtend ? "-1" : "1") + "), " + simdType + "_splat(0))"; +} +DEF_CALL_HANDLER(emscripten_float32x4_lessThan, { + return getAssign(CI) + castBoolVecToIntVec(4, "SIMD_Float32x4_lessThan(" + getValueAsStr(CI->getOperand(0)) + ", " + getValueAsStr(CI->getOperand(1)) + ")", true); +}) +DEF_CALL_HANDLER(emscripten_float32x4_lessThanOrEqual, { + return getAssign(CI) + castBoolVecToIntVec(4, "SIMD_Float32x4_lessThanOrEqual(" + getValueAsStr(CI->getOperand(0)) + ", " + getValueAsStr(CI->getOperand(1)) + ")", true); +}) +DEF_CALL_HANDLER(emscripten_float32x4_greaterThan, { + return getAssign(CI) + castBoolVecToIntVec(4, "SIMD_Float32x4_greaterThan(" + getValueAsStr(CI->getOperand(0)) + ", " + getValueAsStr(CI->getOperand(1)) + ")", true); +}) +DEF_CALL_HANDLER(emscripten_float32x4_greaterThanOrEqual, { + return getAssign(CI) + castBoolVecToIntVec(4, "SIMD_Float32x4_greaterThanOrEqual(" + getValueAsStr(CI->getOperand(0)) + ", " + getValueAsStr(CI->getOperand(1)) + ")", true); +}) +DEF_CALL_HANDLER(emscripten_float32x4_equal, { + return getAssign(CI) + castBoolVecToIntVec(4, "SIMD_Float32x4_equal(" + getValueAsStr(CI->getOperand(0)) + ", " + getValueAsStr(CI->getOperand(1)) + ")", true); +}) +DEF_CALL_HANDLER(emscripten_float32x4_notEqual, { + return getAssign(CI) + castBoolVecToIntVec(4, "SIMD_Float32x4_notEqual(" + getValueAsStr(CI->getOperand(0)) + ", " + getValueAsStr(CI->getOperand(1)) + ")", true); +}) +// n.b. No emscripten_float32x4_anyTrue, only defined on boolean SIMD types. +// n.b. No emscripten_float32x4_allTrue, only defined on boolean SIMD types. +DEF_CALL_HANDLER(emscripten_float32x4_select, { + // FIXME: We really need a more general way of handling boolean types, + // including an optimization to allow more Int32x4 operations to be + // translated as Bool32x4 operations. + std::string Op; + if (SExtInst *SE = dyn_cast(CI->getOperand(0))) { + Op = getValueAsStr(SE->getOperand(0)); + } else { + Op = "SIMD_Int32x4_notEqual(" + getValueAsStr(CI->getOperand(0)) + ", SIMD_Int32x4_splat(0))"; + } + return getAssign(CI) + "SIMD_Float32x4_select(" + Op + "," + getValueAsStr(CI->getOperand(1)) + "," + getValueAsStr(CI->getOperand(2)) + ")"; +}) +// n.b. No emscripten_float32x4_addSaturate, only defined on 8-bit and 16-bit integer SIMD types. +// n.b. No emscripten_float32x4_subSaturate, only defined on 8-bit and 16-bit integer SIMD types. +// n.b. No emscripten_float32x4_shiftLeftByScalar, only defined on integer SIMD types. +// n.b. No emscripten_float32x4_shiftRightByScalar, only defined on integer SIMD types. +DEF_BUILTIN_HANDLER(emscripten_float32x4_extractLane, SIMD_Float32x4_extractLane); +DEF_BUILTIN_HANDLER(emscripten_float32x4_replaceLane, SIMD_Float32x4_replaceLane); +DEF_CALL_HANDLER(emscripten_float32x4_store, { + UsesSIMDFloat32x4 = true; + return "SIMD_Float32x4_store(HEAPU8, " + getValueAsStr(CI->getOperand(0)) + ", " + getValueAsStr(CI->getOperand(1)) + ")"; +}) +DEF_CALL_HANDLER(emscripten_float32x4_store1, { + UsesSIMDFloat32x4 = true; + return "SIMD_Float32x4_store1(HEAPU8, " + getValueAsStr(CI->getOperand(0)) + ", " + getValueAsStr(CI->getOperand(1)) + ")"; +}) +DEF_CALL_HANDLER(emscripten_float32x4_store2, { + UsesSIMDFloat32x4 = true; + return "SIMD_Float32x4_store2(HEAPU8, " + getValueAsStr(CI->getOperand(0)) + ", " + getValueAsStr(CI->getOperand(1)) + ")"; +}) +DEF_CALL_HANDLER(emscripten_float32x4_store3, { + UsesSIMDFloat32x4 = true; + return "SIMD_Float32x4_store3(HEAPU8, " + getValueAsStr(CI->getOperand(0)) + ", " + getValueAsStr(CI->getOperand(1)) + ")"; +}) +DEF_CALL_HANDLER(emscripten_float32x4_load, { + UsesSIMDFloat32x4 = true; + return getAssign(CI) + "SIMD_Float32x4_load(HEAPU8, " + getValueAsStr(CI->getOperand(0)) + ")"; +}) +DEF_CALL_HANDLER(emscripten_float32x4_load1, { + UsesSIMDFloat32x4 = true; + return getAssign(CI) + "SIMD_Float32x4_load1(HEAPU8, " + getValueAsStr(CI->getOperand(0)) + ")"; +}) +DEF_CALL_HANDLER(emscripten_float32x4_load2, { + UsesSIMDFloat32x4 = true; + return getAssign(CI) + "SIMD_Float32x4_load2(HEAPU8, " + getValueAsStr(CI->getOperand(0)) + ")"; +}) +DEF_CALL_HANDLER(emscripten_float32x4_load3, { + UsesSIMDFloat32x4 = true; + return getAssign(CI) + "SIMD_Float32x4_load3(HEAPU8, " + getValueAsStr(CI->getOperand(0)) + ")"; +}) +DEF_BUILTIN_HANDLER(emscripten_float32x4_fromFloat64x2Bits, SIMD_Float32x4_fromFloat64x2Bits); +DEF_BUILTIN_HANDLER(emscripten_float32x4_fromInt32x4Bits, SIMD_Float32x4_fromInt32x4Bits); +DEF_BUILTIN_HANDLER(emscripten_float32x4_fromUint32x4Bits, SIMD_Float32x4_fromUint32x4Bits); +DEF_BUILTIN_HANDLER(emscripten_float32x4_fromInt16x8Bits, SIMD_Float32x4_fromInt16x8Bits); +DEF_BUILTIN_HANDLER(emscripten_float32x4_fromUint16x8Bits, SIMD_Float32x4_fromUint16x8Bits); +DEF_BUILTIN_HANDLER(emscripten_float32x4_fromInt8x16Bits, SIMD_Float32x4_fromInt8x16Bits); +DEF_BUILTIN_HANDLER(emscripten_float32x4_fromUint8x16Bits, SIMD_Float32x4_fromUint8x16Bits); +DEF_BUILTIN_HANDLER(emscripten_float32x4_fromInt32x4, SIMD_Float32x4_fromInt32x4); +DEF_BUILTIN_HANDLER(emscripten_float32x4_fromUint32x4, SIMD_Float32x4_fromUint32x4); +DEF_BUILTIN_HANDLER(emscripten_float32x4_swizzle, SIMD_Float32x4_swizzle); +DEF_BUILTIN_HANDLER(emscripten_float32x4_shuffle, SIMD_Float32x4_shuffle); + +// SIMD.js Int32x4 +DEF_BUILTIN_HANDLER(emscripten_int32x4_set, SIMD_Int32x4); +DEF_BUILTIN_HANDLER(emscripten_int32x4_splat, SIMD_Int32x4_splat); +DEF_BUILTIN_HANDLER(emscripten_int32x4_add, SIMD_Int32x4_add); +DEF_BUILTIN_HANDLER(emscripten_int32x4_sub, SIMD_Int32x4_sub); +DEF_BUILTIN_HANDLER(emscripten_int32x4_mul, SIMD_Int32x4_mul); +// n.b. No emscripten_int32x4_div, division is only defined on floating point types. +// n.b. No emscripten_int32x4_max, only defined on floating point types. +// n.b. No emscripten_int32x4_min, only defined on floating point types. +// n.b. No emscripten_int32x4_maxNum, only defined on floating point types. +// n.b. No emscripten_int32x4_minNum, only defined on floating point types. +DEF_BUILTIN_HANDLER(emscripten_int32x4_neg, SIMD_Int32x4_neg); +// n.b. No emscripten_int32x4_sqrt, only defined on floating point types. +// n.b. No emscripten_int32x4_reciprocalApproximation, only defined on floating point types. +// n.b. No emscripten_int32x4_reciprocalSqrtApproximation, only defined on floating point types. +// n.b. No emscripten_int32x4_abs, only defined on floating point types. +DEF_BUILTIN_HANDLER(emscripten_int32x4_and, SIMD_Int32x4_and); +DEF_BUILTIN_HANDLER(emscripten_int32x4_xor, SIMD_Int32x4_xor); +DEF_BUILTIN_HANDLER(emscripten_int32x4_or, SIMD_Int32x4_or); +DEF_BUILTIN_HANDLER(emscripten_int32x4_not, SIMD_Int32x4_not); +DEF_CALL_HANDLER(emscripten_int32x4_lessThan, { + return getAssign(CI) + castBoolVecToIntVec(4, "SIMD_Int32x4_lessThan(" + getValueAsStr(CI->getOperand(0)) + ", " + getValueAsStr(CI->getOperand(1)) + ")", true); +}) +DEF_CALL_HANDLER(emscripten_int32x4_lessThanOrEqual, { + return getAssign(CI) + castBoolVecToIntVec(4, "SIMD_Int32x4_lessThanOrEqual(" + getValueAsStr(CI->getOperand(0)) + ", " + getValueAsStr(CI->getOperand(1)) + ")", true); +}) +DEF_CALL_HANDLER(emscripten_int32x4_greaterThan, { + return getAssign(CI) + castBoolVecToIntVec(4, "SIMD_Int32x4_greaterThan(" + getValueAsStr(CI->getOperand(0)) + ", " + getValueAsStr(CI->getOperand(1)) + ")", true); +}) +DEF_CALL_HANDLER(emscripten_int32x4_greaterThanOrEqual, { + return getAssign(CI) + castBoolVecToIntVec(4, "SIMD_Int32x4_greaterThanOrEqual(" + getValueAsStr(CI->getOperand(0)) + ", " + getValueAsStr(CI->getOperand(1)) + ")", true); +}) +DEF_CALL_HANDLER(emscripten_int32x4_equal, { + return getAssign(CI) + castBoolVecToIntVec(4, "SIMD_Int32x4_equal(" + getValueAsStr(CI->getOperand(0)) + ", " + getValueAsStr(CI->getOperand(1)) + ")", true); +}) +DEF_CALL_HANDLER(emscripten_int32x4_notEqual, { + return getAssign(CI) + castBoolVecToIntVec(4, "SIMD_Int32x4_notEqual(" + getValueAsStr(CI->getOperand(0)) + ", " + getValueAsStr(CI->getOperand(1)) + ")", true); +}) +DEF_CALL_HANDLER(emscripten_int32x4_select, { + // FIXME: We really need a more general way of handling boolean types, + // including an optimization to allow more Int32x4 operations to be + // translated as Bool32x4 operations. + std::string Op; + if (SExtInst *SE = dyn_cast(CI->getOperand(0))) { + Op = getValueAsStr(SE->getOperand(0)); + } else { + Op = "SIMD_Int32x4_notEqual(" + getValueAsStr(CI->getOperand(0)) + ", SIMD_Int32x4_splat(0))"; + } + return getAssign(CI) + "SIMD_Int32x4_select(" + Op + "," + getValueAsStr(CI->getOperand(1)) + "," + getValueAsStr(CI->getOperand(2)) + ")"; +}) +// n.b. No emscripten_int32x4_addSaturate, only defined on 8-bit and 16-bit integer SIMD types. +// n.b. No emscripten_int32x4_subSaturate, only defined on 8-bit and 16-bit integer SIMD types. +DEF_BUILTIN_HANDLER(emscripten_int32x4_shiftLeftByScalar, SIMD_Int32x4_shiftLeftByScalar); +DEF_BUILTIN_HANDLER(emscripten_int32x4_shiftRightByScalar, SIMD_Int32x4_shiftRightByScalar); +DEF_BUILTIN_HANDLER(emscripten_int32x4_extractLane, SIMD_Int32x4_extractLane); +DEF_BUILTIN_HANDLER(emscripten_int32x4_replaceLane, SIMD_Int32x4_replaceLane); +DEF_CALL_HANDLER(emscripten_int32x4_store, { + UsesSIMDInt32x4 = true; + return "SIMD_Int32x4_store(HEAPU8, " + getValueAsStr(CI->getOperand(0)) + ", " + getValueAsStr(CI->getOperand(1)) + ")"; +}) +DEF_CALL_HANDLER(emscripten_int32x4_store1, { + UsesSIMDInt32x4 = true; + return "SIMD_Int32x4_store1(HEAPU8, " + getValueAsStr(CI->getOperand(0)) + ", " + getValueAsStr(CI->getOperand(1)) + ")"; +}) +DEF_CALL_HANDLER(emscripten_int32x4_store2, { + UsesSIMDInt32x4 = true; + return "SIMD_Int32x4_store2(HEAPU8, " + getValueAsStr(CI->getOperand(0)) + ", " + getValueAsStr(CI->getOperand(1)) + ")"; +}) +DEF_CALL_HANDLER(emscripten_int32x4_store3, { + UsesSIMDInt32x4 = true; + return "SIMD_Int32x4_store3(HEAPU8, " + getValueAsStr(CI->getOperand(0)) + ", " + getValueAsStr(CI->getOperand(1)) + ")"; +}) +DEF_CALL_HANDLER(emscripten_int32x4_load, { + UsesSIMDInt32x4 = true; + return getAssign(CI) + "SIMD_Int32x4_load(HEAPU8, " + getValueAsStr(CI->getOperand(0)) + ")"; +}) +DEF_CALL_HANDLER(emscripten_int32x4_load1, { + UsesSIMDInt32x4 = true; + return getAssign(CI) + "SIMD_Int32x4_load1(HEAPU8, " + getValueAsStr(CI->getOperand(0)) + ")"; +}) +DEF_CALL_HANDLER(emscripten_int32x4_load2, { + UsesSIMDInt32x4 = true; + return getAssign(CI) + "SIMD_Int32x4_load2(HEAPU8, " + getValueAsStr(CI->getOperand(0)) + ")"; +}) +DEF_CALL_HANDLER(emscripten_int32x4_load3, { + UsesSIMDInt32x4 = true; + return getAssign(CI) + "SIMD_Int32x4_load3(HEAPU8, " + getValueAsStr(CI->getOperand(0)) + ")"; +}) +DEF_BUILTIN_HANDLER(emscripten_int32x4_fromFloat64x2Bits, SIMD_Int32x4_fromFloat64x2Bits); +DEF_BUILTIN_HANDLER(emscripten_int32x4_fromFloat32x4Bits, SIMD_Int32x4_fromFloat32x4Bits); +DEF_BUILTIN_HANDLER(emscripten_int32x4_fromUint32x4Bits, SIMD_Int32x4_fromUint32x4Bits); +DEF_BUILTIN_HANDLER(emscripten_int32x4_fromInt16x8Bits, SIMD_Int32x4_fromInt16x8Bits); +DEF_BUILTIN_HANDLER(emscripten_int32x4_fromUint16x8Bits, SIMD_Int32x4_fromUint16x8Bits); +DEF_BUILTIN_HANDLER(emscripten_int32x4_fromInt8x16Bits, SIMD_Int32x4_fromInt8x16Bits); +DEF_BUILTIN_HANDLER(emscripten_int32x4_fromUint8x16Bits, SIMD_Int32x4_fromUint8x16Bits); +DEF_BUILTIN_HANDLER(emscripten_int32x4_fromFloat32x4, SIMD_Int32x4_fromFloat32x4); +DEF_BUILTIN_HANDLER(emscripten_int32x4_fromUint32x4, SIMD_Int32x4_fromUint32x4); +// TODO: emscripten_int32x4_fromFloat64x2? +DEF_BUILTIN_HANDLER(emscripten_int32x4_swizzle, SIMD_Int32x4_swizzle); +DEF_BUILTIN_HANDLER(emscripten_int32x4_shuffle, SIMD_Int32x4_shuffle); + +// SIMD.js Uint32x4 +DEF_BUILTIN_HANDLER(emscripten_uint32x4_set, SIMD_Uint32x4); +DEF_BUILTIN_HANDLER(emscripten_uint32x4_splat, SIMD_Uint32x4_splat); +DEF_BUILTIN_HANDLER(emscripten_uint32x4_add, SIMD_Uint32x4_add); +DEF_BUILTIN_HANDLER(emscripten_uint32x4_sub, SIMD_Uint32x4_sub); +DEF_BUILTIN_HANDLER(emscripten_uint32x4_mul, SIMD_Uint32x4_mul); +// n.b. No emscripten_uint32x4_div, division is only defined on floating point types. +// n.b. No emscripten_uint32x4_max, only defined on floating point types. +// n.b. No emscripten_uint32x4_min, only defined on floating point types. +// n.b. No emscripten_uint32x4_maxNum, only defined on floating point types. +// n.b. No emscripten_uint32x4_minNum, only defined on floating point types. +DEF_BUILTIN_HANDLER(emscripten_uint32x4_neg, SIMD_Uint32x4_neg); +// n.b. No emscripten_uint32x4_sqrt, only defined on floating point types. +// n.b. No emscripten_uint32x4_reciprocalApproximation, only defined on floating point types. +// n.b. No emscripten_uint32x4_reciprocalSqrtApproximation, only defined on floating point types. +// n.b. No emscripten_uint32x4_abs, only defined on floating point types. +DEF_BUILTIN_HANDLER(emscripten_uint32x4_and, SIMD_Uint32x4_and); +DEF_BUILTIN_HANDLER(emscripten_uint32x4_xor, SIMD_Uint32x4_xor); +DEF_BUILTIN_HANDLER(emscripten_uint32x4_or, SIMD_Uint32x4_or); +DEF_BUILTIN_HANDLER(emscripten_uint32x4_not, SIMD_Uint32x4_not); +DEF_BUILTIN_HANDLER(emscripten_uint32x4_lessThan, SIMD_Uint32x4_lessThan); +DEF_BUILTIN_HANDLER(emscripten_uint32x4_lessThanOrEqual, SIMD_Uint32x4_lessThanOrEqual); +DEF_BUILTIN_HANDLER(emscripten_uint32x4_greaterThan, SIMD_Uint32x4_greaterThan); +DEF_BUILTIN_HANDLER(emscripten_uint32x4_greaterThanOrEqual, SIMD_Uint32x4_greaterThanOrEqual); +DEF_BUILTIN_HANDLER(emscripten_uint32x4_equal, SIMD_Uint32x4_equal); +DEF_BUILTIN_HANDLER(emscripten_uint32x4_notEqual, SIMD_Uint32x4_notEqual); +DEF_BUILTIN_HANDLER(emscripten_uint32x4_select, SIMD_Uint32x4_select); +// n.b. No emscripten_uint32x4_addSaturate, only defined on 8-bit and 16-bit integer SIMD types. +// n.b. No emscripten_uint32x4_subSaturate, only defined on 8-bit and 16-bit integer SIMD types. +DEF_BUILTIN_HANDLER(emscripten_uint32x4_shiftLeftByScalar, SIMD_Uint32x4_shiftLeftByScalar); +DEF_CALL_HANDLER(emscripten_uint32x4_shiftRightByScalar, { + UsesSIMDUint32x4 = true; + UsesSIMDInt32x4 = true; + return getAssign(CI) + "SIMD_Int32x4_fromUint32x4Bits(SIMD_Uint32x4_shiftRightByScalar(SIMD_Uint32x4_fromInt32x4Bits(" + getValueAsStr(CI->getOperand(0)) + "), " + getValueAsStr(CI->getOperand(1)) + "))"; +}) +DEF_BUILTIN_HANDLER(emscripten_uint32x4_extractLane, SIMD_Uint32x4_extractLane); +DEF_BUILTIN_HANDLER(emscripten_uint32x4_replaceLane, SIMD_Uint32x4_replaceLane); +DEF_CALL_HANDLER(emscripten_uint32x4_store, { + UsesSIMDUint32x4 = true; + return "SIMD_Uint32x4_store(HEAPU8, " + getValueAsStr(CI->getOperand(0)) + ", " + getValueAsStr(CI->getOperand(1)) + ")"; +}) +DEF_CALL_HANDLER(emscripten_uint32x4_store1, { + UsesSIMDUint32x4 = true; + return "SIMD_Uint32x4_store1(HEAPU8, " + getValueAsStr(CI->getOperand(0)) + ", " + getValueAsStr(CI->getOperand(1)) + ")"; +}) +DEF_CALL_HANDLER(emscripten_uint32x4_store2, { + UsesSIMDUint32x4 = true; + return "SIMD_Uint32x4_store2(HEAPU8, " + getValueAsStr(CI->getOperand(0)) + ", " + getValueAsStr(CI->getOperand(1)) + ")"; +}) +DEF_CALL_HANDLER(emscripten_uint32x4_store3, { + UsesSIMDUint32x4 = true; + return "SIMD_Uint32x4_store3(HEAPU8, " + getValueAsStr(CI->getOperand(0)) + ", " + getValueAsStr(CI->getOperand(1)) + ", " + ")"; +}) +DEF_CALL_HANDLER(emscripten_uint32x4_load, { + UsesSIMDUint32x4 = true; + return getAssign(CI) + "SIMD_Uint32x4_load(HEAPU8, " + getValueAsStr(CI->getOperand(0)) + ")"; +}) +DEF_CALL_HANDLER(emscripten_uint32x4_load1, { + UsesSIMDUint32x4 = true; + return getAssign(CI) + "SIMD_Uint32x4_load1(HEAPU8, " + getValueAsStr(CI->getOperand(0)) + ")"; +}) +DEF_CALL_HANDLER(emscripten_uint32x4_load2, { + UsesSIMDUint32x4 = true; + return getAssign(CI) + "SIMD_Uint32x4_load2(HEAPU8, " + getValueAsStr(CI->getOperand(0)) + ")"; +}) +DEF_CALL_HANDLER(emscripten_uint32x4_load3, { + UsesSIMDUint32x4 = true; + return getAssign(CI) + "SIMD_Uint32x4_load3(HEAPU8, " + getValueAsStr(CI->getOperand(0)) + ")"; +}) +DEF_BUILTIN_HANDLER(emscripten_uint32x4_fromFloat64x2Bits, SIMD_Uint32x4_fromFloat64x2Bits); +DEF_BUILTIN_HANDLER(emscripten_uint32x4_fromFloat32x4Bits, SIMD_Uint32x4_fromFloat32x4Bits); +DEF_BUILTIN_HANDLER(emscripten_uint32x4_fromInt32x4Bits, SIMD_Uint32x4_fromInt32x4Bits); +DEF_BUILTIN_HANDLER(emscripten_uint32x4_fromInt16x8Bits, SIMD_Uint32x4_fromInt16x8Bits); +DEF_BUILTIN_HANDLER(emscripten_uint32x4_fromUint16x8Bits, SIMD_Uint32x4_fromUint16x8Bits); +DEF_BUILTIN_HANDLER(emscripten_uint32x4_fromInt8x16Bits, SIMD_Uint32x4_fromInt8x16Bits); +DEF_BUILTIN_HANDLER(emscripten_uint32x4_fromUint8x16Bits, SIMD_Uint32x4_fromUint8x16Bits); +DEF_BUILTIN_HANDLER(emscripten_uint32x4_fromFloat32x4, SIMD_Uint32x4_fromFloat32x4); +DEF_BUILTIN_HANDLER(emscripten_uint32x4_fromInt32x4, SIMD_Uint32x4_fromInt32x4); +// TODO: emscripten_uint32x4_fromFloat64x2? +DEF_BUILTIN_HANDLER(emscripten_uint32x4_swizzle, SIMD_Uint32x4_swizzle); +DEF_BUILTIN_HANDLER(emscripten_uint32x4_shuffle, SIMD_Uint32x4_shuffle); + +// SIMD.js Int16x8 +DEF_BUILTIN_HANDLER(emscripten_int16x8_set, SIMD_Int16x8); +DEF_BUILTIN_HANDLER(emscripten_int16x8_splat, SIMD_Int16x8_splat); +DEF_BUILTIN_HANDLER(emscripten_int16x8_add, SIMD_Int16x8_add); +DEF_BUILTIN_HANDLER(emscripten_int16x8_sub, SIMD_Int16x8_sub); +DEF_BUILTIN_HANDLER(emscripten_int16x8_mul, SIMD_Int16x8_mul); +// n.b. No emscripten_int16x8_div, division is only defined on floating point types. +// n.b. No emscripten_int16x8_max, only defined on floating point types. +// n.b. No emscripten_int16x8_min, only defined on floating point types. +// n.b. No emscripten_int16x8_maxNum, only defined on floating point types. +// n.b. No emscripten_int16x8_minNum, only defined on floating point types. +DEF_BUILTIN_HANDLER(emscripten_int16x8_neg, SIMD_Int16x8_neg); +// n.b. No emscripten_int16x8_sqrt, only defined on floating point types. +// n.b. No emscripten_int16x8_reciprocalApproximation, only defined on floating point types. +// n.b. No emscripten_int16x8_reciprocalSqrtApproximation, only defined on floating point types. +// n.b. No emscripten_int16x8_abs, only defined on floating point types. +DEF_BUILTIN_HANDLER(emscripten_int16x8_and, SIMD_Int16x8_and); +DEF_BUILTIN_HANDLER(emscripten_int16x8_xor, SIMD_Int16x8_xor); +DEF_BUILTIN_HANDLER(emscripten_int16x8_or, SIMD_Int16x8_or); +DEF_BUILTIN_HANDLER(emscripten_int16x8_not, SIMD_Int16x8_not); +DEF_CALL_HANDLER(emscripten_int16x8_lessThan, { + return getAssign(CI) + castBoolVecToIntVec(8, "SIMD_Int16x8_lessThan(" + getValueAsStr(CI->getOperand(0)) + ", " + getValueAsStr(CI->getOperand(1)) + ")", true); +}) +DEF_CALL_HANDLER(emscripten_int16x8_lessThanOrEqual, { + return getAssign(CI) + castBoolVecToIntVec(8, "SIMD_Int16x8_lessThanOrEqual(" + getValueAsStr(CI->getOperand(0)) + ", " + getValueAsStr(CI->getOperand(1)) + ")", true); +}) +DEF_CALL_HANDLER(emscripten_int16x8_greaterThan, { + return getAssign(CI) + castBoolVecToIntVec(8, "SIMD_Int16x8_greaterThan(" + getValueAsStr(CI->getOperand(0)) + ", " + getValueAsStr(CI->getOperand(1)) + ")", true); +}) +DEF_CALL_HANDLER(emscripten_int16x8_greaterThanOrEqual, { + return getAssign(CI) + castBoolVecToIntVec(8, "SIMD_Int16x8_greaterThanOrEqual(" + getValueAsStr(CI->getOperand(0)) + ", " + getValueAsStr(CI->getOperand(1)) + ")", true); +}) +DEF_CALL_HANDLER(emscripten_int16x8_equal, { + return getAssign(CI) + castBoolVecToIntVec(8, "SIMD_Int16x8_equal(" + getValueAsStr(CI->getOperand(0)) + ", " + getValueAsStr(CI->getOperand(1)) + ")", true); +}) +DEF_CALL_HANDLER(emscripten_int16x8_notEqual, { + return getAssign(CI) + castBoolVecToIntVec(8, "SIMD_Int16x8_notEqual(" + getValueAsStr(CI->getOperand(0)) + ", " + getValueAsStr(CI->getOperand(1)) + ")", true); +}) +DEF_CALL_HANDLER(emscripten_int16x8_select, { + // FIXME: We really need a more general way of handling boolean types, + // including an optimization to allow more Int16x8 operations to be + // translated as Bool16x8 operations. + std::string Op; + if (SExtInst *SE = dyn_cast(CI->getOperand(0))) { + Op = getValueAsStr(SE->getOperand(0)); + } else { + Op = "SIMD_Int16x8_notEqual(" + getValueAsStr(CI->getOperand(0)) + ", SIMD_Int16x8_splat(0))"; + } + return getAssign(CI) + "SIMD_Int16x8_select(" + Op + "," + getValueAsStr(CI->getOperand(1)) + "," + getValueAsStr(CI->getOperand(2)) + ")"; +}) +DEF_BUILTIN_HANDLER(emscripten_int16x8_addSaturate, SIMD_Int16x8_addSaturate); +DEF_BUILTIN_HANDLER(emscripten_int16x8_subSaturate, SIMD_Int16x8_subSaturate); +DEF_BUILTIN_HANDLER(emscripten_int16x8_shiftLeftByScalar, SIMD_Int16x8_shiftLeftByScalar); +DEF_BUILTIN_HANDLER(emscripten_int16x8_shiftRightByScalar, SIMD_Int16x8_shiftRightByScalar); +DEF_BUILTIN_HANDLER(emscripten_int16x8_extractLane, SIMD_Int16x8_extractLane); +DEF_BUILTIN_HANDLER(emscripten_int16x8_replaceLane, SIMD_Int16x8_replaceLane); +DEF_CALL_HANDLER(emscripten_int16x8_store, { + UsesSIMDInt16x8 = true; + return "SIMD_Int16x8_store(HEAPU8, " + getValueAsStr(CI->getOperand(0)) + ", " + getValueAsStr(CI->getOperand(1)) + ")"; +}) +DEF_CALL_HANDLER(emscripten_int16x8_load, { + UsesSIMDInt16x8 = true; + return getAssign(CI) + "SIMD_Int16x8_load(HEAPU8, " + getValueAsStr(CI->getOperand(0)) + ")"; +}) +DEF_BUILTIN_HANDLER(emscripten_int16x8_fromFloat64x2Bits, SIMD_Int16x8_fromFloat64x2Bits); +DEF_BUILTIN_HANDLER(emscripten_int16x8_fromFloat32x4Bits, SIMD_Int16x8_fromFloat32x4Bits); +DEF_BUILTIN_HANDLER(emscripten_int16x8_fromInt32x4Bits, SIMD_Int16x8_fromInt32x4Bits); +DEF_BUILTIN_HANDLER(emscripten_int16x8_fromUint32x4Bits, SIMD_Int16x8_fromUint32x4Bits); +DEF_BUILTIN_HANDLER(emscripten_int16x8_fromUint16x8Bits, SIMD_Int16x8_fromUint16x8Bits); +DEF_BUILTIN_HANDLER(emscripten_int16x8_fromInt8x16Bits, SIMD_Int16x8_fromInt8x16Bits); +DEF_BUILTIN_HANDLER(emscripten_int16x8_fromUint8x16Bits, SIMD_Int16x8_fromUint8x16Bits); +DEF_BUILTIN_HANDLER(emscripten_int16x8_fromUint16x8, SIMD_Int16x8_fromUint16x8); +DEF_BUILTIN_HANDLER(emscripten_int16x8_swizzle, SIMD_Int16x8_swizzle); +DEF_BUILTIN_HANDLER(emscripten_int16x8_shuffle, SIMD_Int16x8_shuffle); + +// SIMD.js Uint16x8 +DEF_BUILTIN_HANDLER(emscripten_uint16x8_set, SIMD_Uint16x8); +DEF_BUILTIN_HANDLER(emscripten_uint16x8_splat, SIMD_Uint16x8_splat); +DEF_BUILTIN_HANDLER(emscripten_uint16x8_add, SIMD_Uint16x8_add); +DEF_BUILTIN_HANDLER(emscripten_uint16x8_sub, SIMD_Uint16x8_sub); +DEF_BUILTIN_HANDLER(emscripten_uint16x8_mul, SIMD_Uint16x8_mul); +// n.b. No emscripten_uint16x8_div, division is only defined on floating point types. +// n.b. No emscripten_uint16x8_max, only defined on floating point types. +// n.b. No emscripten_uint16x8_min, only defined on floating point types. +// n.b. No emscripten_uint16x8_maxNum, only defined on floating point types. +// n.b. No emscripten_uint16x8_minNum, only defined on floating point types. +DEF_BUILTIN_HANDLER(emscripten_uint16x8_neg, SIMD_Uint16x8_neg); +// n.b. No emscripten_uint16x8_sqrt, only defined on floating point types. +// n.b. No emscripten_uint16x8_reciprocalApproximation, only defined on floating point types. +// n.b. No emscripten_uint16x8_reciprocalSqrtApproximation, only defined on floating point types. +// n.b. No emscripten_uint16x8_abs, only defined on floating point types. +DEF_BUILTIN_HANDLER(emscripten_uint16x8_and, SIMD_Uint16x8_and); +DEF_BUILTIN_HANDLER(emscripten_uint16x8_xor, SIMD_Uint16x8_xor); +DEF_BUILTIN_HANDLER(emscripten_uint16x8_or, SIMD_Uint16x8_or); +DEF_BUILTIN_HANDLER(emscripten_uint16x8_not, SIMD_Uint16x8_not); +DEF_BUILTIN_HANDLER(emscripten_uint16x8_lessThan, SIMD_Uint16x8_lessThan); +DEF_BUILTIN_HANDLER(emscripten_uint16x8_lessThanOrEqual, SIMD_Uint16x8_lessThanOrEqual); +DEF_BUILTIN_HANDLER(emscripten_uint16x8_greaterThan, SIMD_Uint16x8_greaterThan); +DEF_BUILTIN_HANDLER(emscripten_uint16x8_greaterThanOrEqual, SIMD_Uint16x8_greaterThanOrEqual); +DEF_BUILTIN_HANDLER(emscripten_uint16x8_equal, SIMD_Uint16x8_equal); +DEF_BUILTIN_HANDLER(emscripten_uint16x8_notEqual, SIMD_Uint16x8_notEqual); +DEF_BUILTIN_HANDLER(emscripten_uint16x8_select, SIMD_Uint16x8_select); +DEF_BUILTIN_HANDLER(emscripten_uint16x8_addSaturate, SIMD_Uint16x8_addSaturate); +DEF_BUILTIN_HANDLER(emscripten_uint16x8_subSaturate, SIMD_Uint16x8_subSaturate); +DEF_BUILTIN_HANDLER(emscripten_uint16x8_shiftLeftByScalar, SIMD_Uint16x8_shiftLeftByScalar); +DEF_CALL_HANDLER(emscripten_uint16x8_shiftRightByScalar, { + UsesSIMDInt16x8 = true; + UsesSIMDUint16x8 = true; + return getAssign(CI) + "SIMD_Int16x8_fromUint16x8Bits(SIMD_Uint16x8_shiftRightByScalar(SIMD_Uint16x8_fromInt16x8Bits(" + getValueAsStr(CI->getOperand(0)) + "), " + getValueAsStr(CI->getOperand(1)) + "))"; +}) +DEF_BUILTIN_HANDLER(emscripten_uint16x8_extractLane, SIMD_Uint16x8_extractLane); +DEF_BUILTIN_HANDLER(emscripten_uint16x8_replaceLane, SIMD_Uint16x8_replaceLane); +DEF_BUILTIN_HANDLER(emscripten_uint16x8_store, SIMD_Uint16x8_store); +DEF_BUILTIN_HANDLER(emscripten_uint16x8_load, SIMD_Uint16x8_load); +DEF_BUILTIN_HANDLER(emscripten_uint16x8_fromFloat64x2Bits, SIMD_Uint16x8_fromFloat64x2Bits); +DEF_BUILTIN_HANDLER(emscripten_uint16x8_fromFloat32x4Bits, SIMD_Uint16x8_fromFloat32x4Bits); +DEF_BUILTIN_HANDLER(emscripten_uint16x8_fromInt32x4Bits, SIMD_Uint16x8_fromInt32x4Bits); +DEF_BUILTIN_HANDLER(emscripten_uint16x8_fromUint32x4Bits, SIMD_Uint16x8_fromUint32x4Bits); +DEF_BUILTIN_HANDLER(emscripten_uint16x8_fromInt16x8Bits, SIMD_Uint16x8_fromInt16x8Bits); +DEF_BUILTIN_HANDLER(emscripten_uint16x8_fromInt8x16Bits, SIMD_Uint16x8_fromInt8x16Bits); +DEF_BUILTIN_HANDLER(emscripten_uint16x8_fromUint8x16Bits, SIMD_Uint16x8_fromUint8x16Bits); +DEF_BUILTIN_HANDLER(emscripten_uint16x8_fromInt16x8, SIMD_Uint16x8_fromInt16x8); +DEF_BUILTIN_HANDLER(emscripten_uint16x8_swizzle, SIMD_Uint16x8_swizzle); +DEF_BUILTIN_HANDLER(emscripten_uint16x8_shuffle, SIMD_Uint16x8_shuffle); + +// SIMD.js Int8x16 +DEF_BUILTIN_HANDLER(emscripten_int8x16_set, SIMD_Int8x16); +DEF_BUILTIN_HANDLER(emscripten_int8x16_splat, SIMD_Int8x16_splat); +DEF_BUILTIN_HANDLER(emscripten_int8x16_add, SIMD_Int8x16_add); +DEF_BUILTIN_HANDLER(emscripten_int8x16_sub, SIMD_Int8x16_sub); +DEF_BUILTIN_HANDLER(emscripten_int8x16_mul, SIMD_Int8x16_mul); +// n.b. No emscripten_int8x16_div, division is only defined on floating point types. +// n.b. No emscripten_int8x16_max, only defined on floating point types. +// n.b. No emscripten_int8x16_min, only defined on floating point types. +// n.b. No emscripten_int8x16_maxNum, only defined on floating point types. +// n.b. No emscripten_int8x16_minNum, only defined on floating point types. +DEF_BUILTIN_HANDLER(emscripten_int8x16_neg, SIMD_Int8x16_neg); +// n.b. No emscripten_int8x16_sqrt, only defined on floating point types. +// n.b. No emscripten_int8x16_reciprocalApproximation, only defined on floating point types. +// n.b. No emscripten_int8x16_reciprocalSqrtApproximation, only defined on floating point types. +// n.b. No emscripten_int8x16_abs, only defined on floating point types. +DEF_BUILTIN_HANDLER(emscripten_int8x16_and, SIMD_Int8x16_and); +DEF_BUILTIN_HANDLER(emscripten_int8x16_xor, SIMD_Int8x16_xor); +DEF_BUILTIN_HANDLER(emscripten_int8x16_or, SIMD_Int8x16_or); +DEF_BUILTIN_HANDLER(emscripten_int8x16_not, SIMD_Int8x16_not); +DEF_CALL_HANDLER(emscripten_int8x16_lessThan, { + return getAssign(CI) + castBoolVecToIntVec(16, "SIMD_Int8x16_lessThan(" + getValueAsStr(CI->getOperand(0)) + ", " + getValueAsStr(CI->getOperand(1)) + ")", true); +}) +DEF_CALL_HANDLER(emscripten_int8x16_lessThanOrEqual, { + return getAssign(CI) + castBoolVecToIntVec(16, "SIMD_Int8x16_lessThanOrEqual(" + getValueAsStr(CI->getOperand(0)) + ", " + getValueAsStr(CI->getOperand(1)) + ")", true); +}) +DEF_CALL_HANDLER(emscripten_int8x16_greaterThan, { + return getAssign(CI) + castBoolVecToIntVec(16, "SIMD_Int8x16_greaterThan(" + getValueAsStr(CI->getOperand(0)) + ", " + getValueAsStr(CI->getOperand(1)) + ")", true); +}) +DEF_CALL_HANDLER(emscripten_int8x16_greaterThanOrEqual, { + return getAssign(CI) + castBoolVecToIntVec(16, "SIMD_Int8x16_greaterThanOrEqual(" + getValueAsStr(CI->getOperand(0)) + ", " + getValueAsStr(CI->getOperand(1)) + ")", true); +}) +DEF_CALL_HANDLER(emscripten_int8x16_equal, { + return getAssign(CI) + castBoolVecToIntVec(16, "SIMD_Int8x16_equal(" + getValueAsStr(CI->getOperand(0)) + ", " + getValueAsStr(CI->getOperand(1)) + ")", true); +}) +DEF_CALL_HANDLER(emscripten_int8x16_notEqual, { + return getAssign(CI) + castBoolVecToIntVec(16, "SIMD_Int8x16_notEqual(" + getValueAsStr(CI->getOperand(0)) + ", " + getValueAsStr(CI->getOperand(1)) + ")", true); +}) +DEF_CALL_HANDLER(emscripten_int8x16_select, { + // FIXME: We really need a more general way of handling boolean types, + // including an optimization to allow more Int8x16 operations to be + // translated as Bool8x16 operations. + std::string Op; + if (SExtInst *SE = dyn_cast(CI->getOperand(0))) { + Op = getValueAsStr(SE->getOperand(0)); + } else { + Op = "SIMD_Int8x16_notEqual(" + getValueAsStr(CI->getOperand(0)) + ", SIMD_Int8x16_splat(0))"; + } + return getAssign(CI) + "SIMD_Int8x16_select(" + Op + "," + getValueAsStr(CI->getOperand(1)) + "," + getValueAsStr(CI->getOperand(2)) + ")"; +}) +DEF_BUILTIN_HANDLER(emscripten_int8x16_addSaturate, SIMD_Int8x16_addSaturate); +DEF_BUILTIN_HANDLER(emscripten_int8x16_subSaturate, SIMD_Int8x16_subSaturate); +DEF_BUILTIN_HANDLER(emscripten_int8x16_shiftLeftByScalar, SIMD_Int8x16_shiftLeftByScalar); +DEF_BUILTIN_HANDLER(emscripten_int8x16_shiftRightByScalar, SIMD_Int8x16_shiftRightByScalar); +DEF_BUILTIN_HANDLER(emscripten_int8x16_extractLane, SIMD_Int8x16_extractLane); +DEF_BUILTIN_HANDLER(emscripten_int8x16_replaceLane, SIMD_Int8x16_replaceLane); +DEF_CALL_HANDLER(emscripten_int8x16_store, { + UsesSIMDInt8x16 = true; + return "SIMD_Int8x16_store(HEAPU8, " + getValueAsStr(CI->getOperand(0)) + ", " + getValueAsStr(CI->getOperand(1)) + ")"; +}) +DEF_CALL_HANDLER(emscripten_int8x16_load, { + UsesSIMDInt8x16 = true; + return getAssign(CI) + "SIMD_Int8x16_load(HEAPU8, " + getValueAsStr(CI->getOperand(0)) + ")"; +}) +DEF_BUILTIN_HANDLER(emscripten_int8x16_fromFloat64x2Bits, SIMD_Int8x16_fromFloat64x2Bits); +DEF_BUILTIN_HANDLER(emscripten_int8x16_fromFloat32x4Bits, SIMD_Int8x16_fromFloat32x4Bits); +DEF_BUILTIN_HANDLER(emscripten_int8x16_fromInt32x4Bits, SIMD_Int8x16_fromInt32x4Bits); +DEF_BUILTIN_HANDLER(emscripten_int8x16_fromUint32x4Bits, SIMD_Int8x16_fromUint32x4Bits); +DEF_BUILTIN_HANDLER(emscripten_int8x16_fromInt16x8Bits, SIMD_Int8x16_fromInt16x8Bits); +DEF_BUILTIN_HANDLER(emscripten_int8x16_fromUint16x8Bits, SIMD_Int8x16_fromUint16x8Bits); +DEF_BUILTIN_HANDLER(emscripten_int8x16_fromUint8x16Bits, SIMD_Int8x16_fromUint8x16Bits); +DEF_BUILTIN_HANDLER(emscripten_int8x16_fromUint8x16, SIMD_Int8x16_fromUint8x16); +DEF_BUILTIN_HANDLER(emscripten_int8x16_swizzle, SIMD_Int8x16_swizzle); +DEF_BUILTIN_HANDLER(emscripten_int8x16_shuffle, SIMD_Int8x16_shuffle); + +// SIMD.js Uint8x16 +DEF_BUILTIN_HANDLER(emscripten_uint8x16_set, SIMD_Uint8x16); +DEF_BUILTIN_HANDLER(emscripten_uint8x16_splat, SIMD_Uint8x16_splat); +DEF_BUILTIN_HANDLER(emscripten_uint8x16_add, SIMD_Uint8x16_add); +DEF_BUILTIN_HANDLER(emscripten_uint8x16_sub, SIMD_Uint8x16_sub); +DEF_BUILTIN_HANDLER(emscripten_uint8x16_mul, SIMD_Uint8x16_mul); +// n.b. No emscripten_uint8x16_div, division is only defined on floating point types. +// n.b. No emscripten_uint8x16_max, only defined on floating point types. +// n.b. No emscripten_uint8x16_min, only defined on floating point types. +// n.b. No emscripten_uint8x16_maxNum, only defined on floating point types. +// n.b. No emscripten_uint8x16_minNum, only defined on floating point types. +DEF_BUILTIN_HANDLER(emscripten_uint8x16_neg, SIMD_Uint8x16_neg); +// n.b. No emscripten_uint8x16_sqrt, only defined on floating point types. +// n.b. No emscripten_uint8x16_reciprocalApproximation, only defined on floating point types. +// n.b. No emscripten_uint8x16_reciprocalSqrtApproximation, only defined on floating point types. +// n.b. No emscripten_uint8x16_abs, only defined on floating point types. +DEF_BUILTIN_HANDLER(emscripten_uint8x16_and, SIMD_Uint8x16_and); +DEF_BUILTIN_HANDLER(emscripten_uint8x16_xor, SIMD_Uint8x16_xor); +DEF_BUILTIN_HANDLER(emscripten_uint8x16_or, SIMD_Uint8x16_or); +DEF_BUILTIN_HANDLER(emscripten_uint8x16_not, SIMD_Uint8x16_not); +DEF_BUILTIN_HANDLER(emscripten_uint8x16_lessThan, SIMD_Uint8x16_lessThan); +DEF_BUILTIN_HANDLER(emscripten_uint8x16_lessThanOrEqual, SIMD_Uint8x16_lessThanOrEqual); +DEF_BUILTIN_HANDLER(emscripten_uint8x16_greaterThan, SIMD_Uint8x16_greaterThan); +DEF_BUILTIN_HANDLER(emscripten_uint8x16_greaterThanOrEqual, SIMD_Uint8x16_greaterThanOrEqual); +DEF_BUILTIN_HANDLER(emscripten_uint8x16_equal, SIMD_Uint8x16_equal); +DEF_BUILTIN_HANDLER(emscripten_uint8x16_notEqual, SIMD_Uint8x16_notEqual); +DEF_BUILTIN_HANDLER(emscripten_uint8x16_select, SIMD_Uint8x16_select); +DEF_BUILTIN_HANDLER(emscripten_uint8x16_addSaturate, SIMD_Uint8x16_addSaturate); +DEF_BUILTIN_HANDLER(emscripten_uint8x16_subSaturate, SIMD_Uint8x16_subSaturate); +DEF_BUILTIN_HANDLER(emscripten_uint8x16_shiftLeftByScalar, SIMD_Uint8x16_shiftLeftByScalar); +DEF_CALL_HANDLER(emscripten_uint8x16_shiftRightByScalar, { + UsesSIMDInt8x16 = true; + UsesSIMDUint8x16 = true; + return getAssign(CI) + "SIMD_Int8x16_fromUint8x16Bits(SIMD_Uint8x16_shiftRightByScalar(SIMD_Uint8x16_fromInt8x16Bits(" + getValueAsStr(CI->getOperand(0)) + "), " + getValueAsStr(CI->getOperand(1)) + "))"; +}) +DEF_BUILTIN_HANDLER(emscripten_uint8x16_extractLane, SIMD_Uint8x16_extractLane); +DEF_BUILTIN_HANDLER(emscripten_uint8x16_replaceLane, SIMD_Uint8x16_replaceLane); +DEF_BUILTIN_HANDLER(emscripten_uint8x16_store, SIMD_Uint8x16_store); +DEF_BUILTIN_HANDLER(emscripten_uint8x16_load, SIMD_Uint8x16_load); +DEF_BUILTIN_HANDLER(emscripten_uint8x16_fromFloat64x2Bits, SIMD_Uint8x16_fromFloat64x2Bits); +DEF_BUILTIN_HANDLER(emscripten_uint8x16_fromFloat32x4Bits, SIMD_Uint8x16_fromFloat32x4Bits); +DEF_BUILTIN_HANDLER(emscripten_uint8x16_fromInt32x4Bits, SIMD_Uint8x16_fromInt32x4Bits); +DEF_BUILTIN_HANDLER(emscripten_uint8x16_fromUint32x4Bits, SIMD_Uint8x16_fromUint32x4Bits); +DEF_BUILTIN_HANDLER(emscripten_uint8x16_fromInt16x8Bits, SIMD_Uint8x16_fromInt16x8Bits); +DEF_BUILTIN_HANDLER(emscripten_uint8x16_fromUint16x8Bits, SIMD_Uint8x16_fromUint16x8Bits); +DEF_BUILTIN_HANDLER(emscripten_uint8x16_fromInt8x16Bits, SIMD_Uint8x16_fromInt8x16Bits); +DEF_BUILTIN_HANDLER(emscripten_uint8x16_fromInt8x16, SIMD_Uint8x16_fromInt8x16); +DEF_BUILTIN_HANDLER(emscripten_uint8x16_swizzle, SIMD_Uint8x16_swizzle); +DEF_BUILTIN_HANDLER(emscripten_uint8x16_shuffle, SIMD_Uint8x16_shuffle); + +// SIMD.js Bool64x2 +DEF_BUILTIN_HANDLER(emscripten_bool64x2_anyTrue, SIMD_Bool64x2_anyTrue); +DEF_BUILTIN_HANDLER(emscripten_bool64x2_allTrue, SIMD_Bool64x2_allTrue); + +// SIMD.js Bool32x4 +DEF_BUILTIN_HANDLER(emscripten_bool32x4_anyTrue, SIMD_Bool32x4_anyTrue); +DEF_BUILTIN_HANDLER(emscripten_bool32x4_allTrue, SIMD_Bool32x4_allTrue); + +// SIMD.js Bool16x8 +DEF_BUILTIN_HANDLER(emscripten_bool16x8_anyTrue, SIMD_Bool16x8_anyTrue); +DEF_BUILTIN_HANDLER(emscripten_bool16x8_allTrue, SIMD_Bool16x8_allTrue); + +// SIMD.js Bool8x16 +DEF_BUILTIN_HANDLER(emscripten_bool8x16_anyTrue, SIMD_Bool8x16_anyTrue); +DEF_BUILTIN_HANDLER(emscripten_bool8x16_allTrue, SIMD_Bool8x16_allTrue); + +DEF_CALL_HANDLER(emscripten_atomic_fence, { + if (EnablePthreads) return "(Atomics_add(HEAP32, 0, 0)|0) /* fence */"; + else return "/* fence */"; +}) + +// Setups + +void setupCallHandlers() { + assert(CallHandlers.empty()); + #define SETUP_CALL_HANDLER(Ident) \ + CallHandlers["_" #Ident] = &JSWriter::CH_##Ident; + + SETUP_CALL_HANDLER(__default__); + SETUP_CALL_HANDLER(emscripten_preinvoke); + SETUP_CALL_HANDLER(emscripten_postinvoke); + SETUP_CALL_HANDLER(emscripten_landingpad); + SETUP_CALL_HANDLER(emscripten_resume); + SETUP_CALL_HANDLER(emscripten_prep_setjmp); + SETUP_CALL_HANDLER(emscripten_cleanup_setjmp); + SETUP_CALL_HANDLER(emscripten_setjmp); + SETUP_CALL_HANDLER(emscripten_longjmp); + SETUP_CALL_HANDLER(emscripten_check_longjmp); + SETUP_CALL_HANDLER(emscripten_get_longjmp_result); + SETUP_CALL_HANDLER(emscripten_alloc_async_context); + SETUP_CALL_HANDLER(emscripten_check_async); + SETUP_CALL_HANDLER(emscripten_do_not_unwind); + SETUP_CALL_HANDLER(emscripten_do_not_unwind_async); + SETUP_CALL_HANDLER(emscripten_get_async_return_value_addr); + SETUP_CALL_HANDLER(emscripten_debugger); + SETUP_CALL_HANDLER(llvm_debugtrap); + SETUP_CALL_HANDLER(getHigh32); + SETUP_CALL_HANDLER(setHigh32); + SETUP_CALL_HANDLER(FtoILow); + SETUP_CALL_HANDLER(FtoIHigh); + SETUP_CALL_HANDLER(DtoILow); + SETUP_CALL_HANDLER(DtoIHigh); + SETUP_CALL_HANDLER(BDtoILow); + SETUP_CALL_HANDLER(BDtoIHigh); + SETUP_CALL_HANDLER(SItoF); + SETUP_CALL_HANDLER(UItoF); + SETUP_CALL_HANDLER(SItoD); + SETUP_CALL_HANDLER(UItoD); + SETUP_CALL_HANDLER(BItoD); + SETUP_CALL_HANDLER(llvm_nacl_atomic_store_i32); + SETUP_CALL_HANDLER(llvm_nacl_atomic_cmpxchg_i8); + SETUP_CALL_HANDLER(llvm_nacl_atomic_cmpxchg_i16); + SETUP_CALL_HANDLER(llvm_nacl_atomic_cmpxchg_i32); + SETUP_CALL_HANDLER(llvm_memcpy_p0i8_p0i8_i32); + SETUP_CALL_HANDLER(llvm_memset_p0i8_i32); + SETUP_CALL_HANDLER(llvm_memmove_p0i8_p0i8_i32); + SETUP_CALL_HANDLER(llvm_expect_i32); + SETUP_CALL_HANDLER(llvm_expect_i1); + SETUP_CALL_HANDLER(llvm_dbg_declare); + SETUP_CALL_HANDLER(llvm_dbg_value); + SETUP_CALL_HANDLER(llvm_lifetime_start); + SETUP_CALL_HANDLER(llvm_lifetime_end); + SETUP_CALL_HANDLER(llvm_invariant_start); + SETUP_CALL_HANDLER(llvm_invariant_end); + SETUP_CALL_HANDLER(llvm_prefetch); + SETUP_CALL_HANDLER(llvm_objectsize_i32_p0i8); + SETUP_CALL_HANDLER(llvm_flt_rounds); + SETUP_CALL_HANDLER(bitshift64Lshr); + SETUP_CALL_HANDLER(bitshift64Ashr); + SETUP_CALL_HANDLER(bitshift64Shl); + SETUP_CALL_HANDLER(llvm_ctlz_i32); + SETUP_CALL_HANDLER(llvm_cttz_i32); + SETUP_CALL_HANDLER(llvm_maxnum_f32); + SETUP_CALL_HANDLER(llvm_maxnum_f64); + SETUP_CALL_HANDLER(llvm_copysign_f32); + SETUP_CALL_HANDLER(llvm_copysign_f64); + + // SIMD.js Float64x2 + SETUP_CALL_HANDLER(emscripten_float64x2_set); + SETUP_CALL_HANDLER(emscripten_float64x2_splat); + SETUP_CALL_HANDLER(emscripten_float64x2_add); + SETUP_CALL_HANDLER(emscripten_float64x2_sub); + SETUP_CALL_HANDLER(emscripten_float64x2_mul); + SETUP_CALL_HANDLER(emscripten_float64x2_div); + SETUP_CALL_HANDLER(emscripten_float64x2_max); + SETUP_CALL_HANDLER(emscripten_float64x2_min); + SETUP_CALL_HANDLER(emscripten_float64x2_maxNum); + SETUP_CALL_HANDLER(emscripten_float64x2_minNum); + SETUP_CALL_HANDLER(emscripten_float64x2_neg); + SETUP_CALL_HANDLER(emscripten_float64x2_sqrt); + SETUP_CALL_HANDLER(emscripten_float64x2_reciprocalApproximation); + SETUP_CALL_HANDLER(emscripten_float64x2_reciprocalSqrtApproximation); + SETUP_CALL_HANDLER(emscripten_float64x2_abs); + // n.b. No emscripten_float64x2_and, only defined on boolean and integer SIMD types. + // n.b. No emscripten_float64x2_xor, only defined on boolean and integer SIMD types. + // n.b. No emscripten_float64x2_or, only defined on boolean and integer SIMD types. + // n.b. No emscripten_float64x2_not, only defined on boolean and integer SIMD types. + SETUP_CALL_HANDLER(emscripten_float64x2_lessThan); + SETUP_CALL_HANDLER(emscripten_float64x2_lessThanOrEqual); + SETUP_CALL_HANDLER(emscripten_float64x2_greaterThan); + SETUP_CALL_HANDLER(emscripten_float64x2_greaterThanOrEqual); + SETUP_CALL_HANDLER(emscripten_float64x2_equal); + SETUP_CALL_HANDLER(emscripten_float64x2_notEqual); + // n.b. No emscripten_float64x2_anyTrue, only defined on boolean SIMD types. + // n.b. No emscripten_float64x2_allTrue, only defined on boolean SIMD types. + SETUP_CALL_HANDLER(emscripten_float64x2_select); + // n.b. No emscripten_float64x2_addSaturate, only defined on 8-bit and 16-bit integer SIMD types. + // n.b. No emscripten_float64x2_subSaturate, only defined on 8-bit and 16-bit integer SIMD types. + // n.b. No emscripten_float64x2_shiftLeftByScalar, only defined on integer SIMD types. + // n.b. No emscripten_float64x2_shiftRightByScalar, only defined on integer SIMD types. + SETUP_CALL_HANDLER(emscripten_float64x2_extractLane); + SETUP_CALL_HANDLER(emscripten_float64x2_replaceLane); + SETUP_CALL_HANDLER(emscripten_float64x2_store); + SETUP_CALL_HANDLER(emscripten_float64x2_store1); + SETUP_CALL_HANDLER(emscripten_float64x2_load); + SETUP_CALL_HANDLER(emscripten_float64x2_load1); + SETUP_CALL_HANDLER(emscripten_float64x2_fromFloat32x4Bits); + SETUP_CALL_HANDLER(emscripten_float64x2_fromInt32x4Bits); + SETUP_CALL_HANDLER(emscripten_float64x2_fromUint32x4Bits); + SETUP_CALL_HANDLER(emscripten_float64x2_fromInt16x8Bits); + SETUP_CALL_HANDLER(emscripten_float64x2_fromUint16x8Bits); + SETUP_CALL_HANDLER(emscripten_float64x2_fromInt8x16Bits); + SETUP_CALL_HANDLER(emscripten_float64x2_fromUint8x16Bits); + SETUP_CALL_HANDLER(emscripten_float64x2_swizzle); + SETUP_CALL_HANDLER(emscripten_float64x2_shuffle); + + // SIMD.js Float32x4 + SETUP_CALL_HANDLER(emscripten_float32x4_set); + SETUP_CALL_HANDLER(emscripten_float32x4_splat); + SETUP_CALL_HANDLER(emscripten_float32x4_add); + SETUP_CALL_HANDLER(emscripten_float32x4_sub); + SETUP_CALL_HANDLER(emscripten_float32x4_mul); + SETUP_CALL_HANDLER(emscripten_float32x4_div); + SETUP_CALL_HANDLER(emscripten_float32x4_max); + SETUP_CALL_HANDLER(emscripten_float32x4_min); + SETUP_CALL_HANDLER(emscripten_float32x4_maxNum); + SETUP_CALL_HANDLER(emscripten_float32x4_minNum); + SETUP_CALL_HANDLER(emscripten_float32x4_neg); + SETUP_CALL_HANDLER(emscripten_float32x4_sqrt); + SETUP_CALL_HANDLER(emscripten_float32x4_reciprocalApproximation); + SETUP_CALL_HANDLER(emscripten_float32x4_reciprocalSqrtApproximation); + SETUP_CALL_HANDLER(emscripten_float32x4_abs); + // n.b. No emscripten_float32x4_and, only defined on boolean and integer SIMD types. + // n.b. No emscripten_float32x4_xor, only defined on boolean and integer SIMD types. + // n.b. No emscripten_float32x4_or, only defined on boolean and integer SIMD types. + // n.b. No emscripten_float32x4_not, only defined on boolean and integer SIMD types. + SETUP_CALL_HANDLER(emscripten_float32x4_lessThan); + SETUP_CALL_HANDLER(emscripten_float32x4_lessThanOrEqual); + SETUP_CALL_HANDLER(emscripten_float32x4_greaterThan); + SETUP_CALL_HANDLER(emscripten_float32x4_greaterThanOrEqual); + SETUP_CALL_HANDLER(emscripten_float32x4_equal); + SETUP_CALL_HANDLER(emscripten_float32x4_notEqual); + // n.b. No emscripten_float32x4_anyTrue, only defined on boolean SIMD types. + // n.b. No emscripten_float32x4_allTrue, only defined on boolean SIMD types. + SETUP_CALL_HANDLER(emscripten_float32x4_select); + // n.b. No emscripten_float32x4_addSaturate, only defined on 8-bit and 16-bit integer SIMD types. + // n.b. No emscripten_float32x4_subSaturate, only defined on 8-bit and 16-bit integer SIMD types. + // n.b. No emscripten_float32x4_shiftLeftByScalar, only defined on integer SIMD types. + // n.b. No emscripten_float32x4_shiftRightByScalar, only defined on integer SIMD types. + SETUP_CALL_HANDLER(emscripten_float32x4_extractLane); + SETUP_CALL_HANDLER(emscripten_float32x4_replaceLane); + SETUP_CALL_HANDLER(emscripten_float32x4_store); + SETUP_CALL_HANDLER(emscripten_float32x4_store1); + SETUP_CALL_HANDLER(emscripten_float32x4_store2); + SETUP_CALL_HANDLER(emscripten_float32x4_store3); + SETUP_CALL_HANDLER(emscripten_float32x4_load); + SETUP_CALL_HANDLER(emscripten_float32x4_load1); + SETUP_CALL_HANDLER(emscripten_float32x4_load2); + SETUP_CALL_HANDLER(emscripten_float32x4_load3); + SETUP_CALL_HANDLER(emscripten_float32x4_fromFloat64x2Bits); + SETUP_CALL_HANDLER(emscripten_float32x4_fromInt32x4Bits); + SETUP_CALL_HANDLER(emscripten_float32x4_fromUint32x4Bits); + SETUP_CALL_HANDLER(emscripten_float32x4_fromInt16x8Bits); + SETUP_CALL_HANDLER(emscripten_float32x4_fromUint16x8Bits); + SETUP_CALL_HANDLER(emscripten_float32x4_fromInt8x16Bits); + SETUP_CALL_HANDLER(emscripten_float32x4_fromUint8x16Bits); + SETUP_CALL_HANDLER(emscripten_float32x4_fromInt32x4); + SETUP_CALL_HANDLER(emscripten_float32x4_fromUint32x4); + SETUP_CALL_HANDLER(emscripten_float32x4_swizzle); + SETUP_CALL_HANDLER(emscripten_float32x4_shuffle); + + // SIMD.js Int32x4 + SETUP_CALL_HANDLER(emscripten_int32x4_set); + SETUP_CALL_HANDLER(emscripten_int32x4_splat); + SETUP_CALL_HANDLER(emscripten_int32x4_add); + SETUP_CALL_HANDLER(emscripten_int32x4_sub); + SETUP_CALL_HANDLER(emscripten_int32x4_mul); + // n.b. No emscripten_int32x4_div, division is only defined on floating point types. + // n.b. No emscripten_int32x4_max, only defined on floating point types. + // n.b. No emscripten_int32x4_min, only defined on floating point types. + // n.b. No emscripten_int32x4_maxNum, only defined on floating point types. + // n.b. No emscripten_int32x4_minNum, only defined on floating point types. + SETUP_CALL_HANDLER(emscripten_int32x4_neg); + // n.b. No emscripten_int32x4_sqrt, only defined on floating point types. + // n.b. No emscripten_int32x4_reciprocalApproximation, only defined on floating point types. + // n.b. No emscripten_int32x4_reciprocalSqrtApproximation, only defined on floating point types. + // n.b. No emscripten_int32x4_abs, only defined on floating point types. + SETUP_CALL_HANDLER(emscripten_int32x4_and); + SETUP_CALL_HANDLER(emscripten_int32x4_xor); + SETUP_CALL_HANDLER(emscripten_int32x4_or); + SETUP_CALL_HANDLER(emscripten_int32x4_not); + SETUP_CALL_HANDLER(emscripten_int32x4_lessThan); + SETUP_CALL_HANDLER(emscripten_int32x4_lessThanOrEqual); + SETUP_CALL_HANDLER(emscripten_int32x4_greaterThan); + SETUP_CALL_HANDLER(emscripten_int32x4_greaterThanOrEqual); + SETUP_CALL_HANDLER(emscripten_int32x4_equal); + SETUP_CALL_HANDLER(emscripten_int32x4_notEqual); + // n.b. No emscripten_int32x4_anyTrue, only defined on boolean SIMD types. + // n.b. No emscripten_int32x4_allTrue, only defined on boolean SIMD types. + SETUP_CALL_HANDLER(emscripten_int32x4_select); + // n.b. No emscripten_int32x4_addSaturate, only defined on 8-bit and 16-bit integer SIMD types. + // n.b. No emscripten_int32x4_subSaturate, only defined on 8-bit and 16-bit integer SIMD types. + SETUP_CALL_HANDLER(emscripten_int32x4_shiftLeftByScalar); + SETUP_CALL_HANDLER(emscripten_int32x4_shiftRightByScalar); + SETUP_CALL_HANDLER(emscripten_int32x4_extractLane); + SETUP_CALL_HANDLER(emscripten_int32x4_replaceLane); + SETUP_CALL_HANDLER(emscripten_int32x4_store); + SETUP_CALL_HANDLER(emscripten_int32x4_store1); + SETUP_CALL_HANDLER(emscripten_int32x4_store2); + SETUP_CALL_HANDLER(emscripten_int32x4_store3); + SETUP_CALL_HANDLER(emscripten_int32x4_load); + SETUP_CALL_HANDLER(emscripten_int32x4_load1); + SETUP_CALL_HANDLER(emscripten_int32x4_load2); + SETUP_CALL_HANDLER(emscripten_int32x4_load3); + SETUP_CALL_HANDLER(emscripten_int32x4_fromFloat64x2Bits); + SETUP_CALL_HANDLER(emscripten_int32x4_fromFloat32x4Bits); + SETUP_CALL_HANDLER(emscripten_int32x4_fromUint32x4Bits); + SETUP_CALL_HANDLER(emscripten_int32x4_fromInt16x8Bits); + SETUP_CALL_HANDLER(emscripten_int32x4_fromUint16x8Bits); + SETUP_CALL_HANDLER(emscripten_int32x4_fromInt8x16Bits); + SETUP_CALL_HANDLER(emscripten_int32x4_fromUint8x16Bits); + SETUP_CALL_HANDLER(emscripten_int32x4_fromFloat32x4); + SETUP_CALL_HANDLER(emscripten_int32x4_fromUint32x4); +// SETUP_CALL_HANDLER(emscripten_int32x4_fromFloat64x2); // TODO: Unofficial extension + SETUP_CALL_HANDLER(emscripten_int32x4_swizzle); + SETUP_CALL_HANDLER(emscripten_int32x4_shuffle); + + // SIMD.js Uint32x4 + SETUP_CALL_HANDLER(emscripten_uint32x4_set); + SETUP_CALL_HANDLER(emscripten_uint32x4_splat); + SETUP_CALL_HANDLER(emscripten_uint32x4_add); + SETUP_CALL_HANDLER(emscripten_uint32x4_sub); + SETUP_CALL_HANDLER(emscripten_uint32x4_mul); + // n.b. No emscripten_uint32x4_div, division is only defined on floating point types. + // n.b. No emscripten_uint32x4_max, only defined on floating point types. + // n.b. No emscripten_uint32x4_min, only defined on floating point types. + // n.b. No emscripten_uint32x4_maxNum, only defined on floating point types. + // n.b. No emscripten_uint32x4_minNum, only defined on floating point types. + SETUP_CALL_HANDLER(emscripten_uint32x4_neg); + // n.b. No emscripten_uint32x4_sqrt, only defined on floating point types. + // n.b. No emscripten_uint32x4_reciprocalApproximation, only defined on floating point types. + // n.b. No emscripten_uint32x4_reciprocalSqrtApproximation, only defined on floating point types. + // n.b. No emscripten_uint32x4_abs, only defined on floating point types. + SETUP_CALL_HANDLER(emscripten_uint32x4_and); + SETUP_CALL_HANDLER(emscripten_uint32x4_xor); + SETUP_CALL_HANDLER(emscripten_uint32x4_or); + SETUP_CALL_HANDLER(emscripten_uint32x4_not); + SETUP_CALL_HANDLER(emscripten_uint32x4_lessThan); + SETUP_CALL_HANDLER(emscripten_uint32x4_lessThanOrEqual); + SETUP_CALL_HANDLER(emscripten_uint32x4_greaterThan); + SETUP_CALL_HANDLER(emscripten_uint32x4_greaterThanOrEqual); + SETUP_CALL_HANDLER(emscripten_uint32x4_equal); + SETUP_CALL_HANDLER(emscripten_uint32x4_notEqual); + // n.b. No emscripten_uint32x4_anyTrue, only defined on boolean SIMD types. + // n.b. No emscripten_uint32x4_allTrue, only defined on boolean SIMD types. + SETUP_CALL_HANDLER(emscripten_uint32x4_select); + // n.b. No emscripten_uint32x4_addSaturate, only defined on 8-bit and 16-bit integer SIMD types. + // n.b. No emscripten_uint32x4_subSaturate, only defined on 8-bit and 16-bit integer SIMD types. + SETUP_CALL_HANDLER(emscripten_uint32x4_shiftLeftByScalar); + SETUP_CALL_HANDLER(emscripten_uint32x4_shiftRightByScalar); + SETUP_CALL_HANDLER(emscripten_uint32x4_extractLane); + SETUP_CALL_HANDLER(emscripten_uint32x4_replaceLane); + SETUP_CALL_HANDLER(emscripten_uint32x4_store); + SETUP_CALL_HANDLER(emscripten_uint32x4_store1); + SETUP_CALL_HANDLER(emscripten_uint32x4_store2); + SETUP_CALL_HANDLER(emscripten_uint32x4_store3); + SETUP_CALL_HANDLER(emscripten_uint32x4_load); + SETUP_CALL_HANDLER(emscripten_uint32x4_load1); + SETUP_CALL_HANDLER(emscripten_uint32x4_load2); + SETUP_CALL_HANDLER(emscripten_uint32x4_load3); + SETUP_CALL_HANDLER(emscripten_uint32x4_fromFloat64x2Bits); + SETUP_CALL_HANDLER(emscripten_uint32x4_fromFloat32x4Bits); + SETUP_CALL_HANDLER(emscripten_uint32x4_fromInt32x4Bits); + SETUP_CALL_HANDLER(emscripten_uint32x4_fromInt16x8Bits); + SETUP_CALL_HANDLER(emscripten_uint32x4_fromUint16x8Bits); + SETUP_CALL_HANDLER(emscripten_uint32x4_fromInt8x16Bits); + SETUP_CALL_HANDLER(emscripten_uint32x4_fromUint8x16Bits); + SETUP_CALL_HANDLER(emscripten_uint32x4_fromFloat32x4); + SETUP_CALL_HANDLER(emscripten_uint32x4_fromInt32x4); + // SETUP_CALL_HANDLER(emscripten_uint32x4_fromFloat64x2); // TODO: Unofficial extension + SETUP_CALL_HANDLER(emscripten_uint32x4_swizzle); + SETUP_CALL_HANDLER(emscripten_uint32x4_shuffle); + + // SIMD.js Int16x8 + SETUP_CALL_HANDLER(emscripten_int16x8_set); + SETUP_CALL_HANDLER(emscripten_int16x8_splat); + SETUP_CALL_HANDLER(emscripten_int16x8_add); + SETUP_CALL_HANDLER(emscripten_int16x8_sub); + SETUP_CALL_HANDLER(emscripten_int16x8_mul); + // n.b. No emscripten_int16x8_div, division is only defined on floating point types. + // n.b. No emscripten_int16x8_max, only defined on floating point types. + // n.b. No emscripten_int16x8_min, only defined on floating point types. + // n.b. No emscripten_int16x8_maxNum, only defined on floating point types. + // n.b. No emscripten_int16x8_minNum, only defined on floating point types. + SETUP_CALL_HANDLER(emscripten_int16x8_neg); + // n.b. No emscripten_int16x8_sqrt, only defined on floating point types. + // n.b. No emscripten_int16x8_reciprocalApproximation, only defined on floating point types. + // n.b. No emscripten_int16x8_reciprocalSqrtApproximation, only defined on floating point types. + // n.b. No emscripten_int16x8_abs, only defined on floating point types. + SETUP_CALL_HANDLER(emscripten_int16x8_and); + SETUP_CALL_HANDLER(emscripten_int16x8_xor); + SETUP_CALL_HANDLER(emscripten_int16x8_or); + SETUP_CALL_HANDLER(emscripten_int16x8_not); + SETUP_CALL_HANDLER(emscripten_int16x8_lessThan); + SETUP_CALL_HANDLER(emscripten_int16x8_lessThanOrEqual); + SETUP_CALL_HANDLER(emscripten_int16x8_greaterThan); + SETUP_CALL_HANDLER(emscripten_int16x8_greaterThanOrEqual); + SETUP_CALL_HANDLER(emscripten_int16x8_equal); + SETUP_CALL_HANDLER(emscripten_int16x8_notEqual); + // n.b. No emscripten_int16x8_anyTrue, only defined on boolean SIMD types. + // n.b. No emscripten_int16x8_allTrue, only defined on boolean SIMD types. + SETUP_CALL_HANDLER(emscripten_int16x8_select); + SETUP_CALL_HANDLER(emscripten_int16x8_addSaturate); + SETUP_CALL_HANDLER(emscripten_int16x8_subSaturate); + SETUP_CALL_HANDLER(emscripten_int16x8_shiftLeftByScalar); + SETUP_CALL_HANDLER(emscripten_int16x8_shiftRightByScalar); + SETUP_CALL_HANDLER(emscripten_int16x8_extractLane); + SETUP_CALL_HANDLER(emscripten_int16x8_replaceLane); + SETUP_CALL_HANDLER(emscripten_int16x8_store); + SETUP_CALL_HANDLER(emscripten_int16x8_load); + SETUP_CALL_HANDLER(emscripten_int16x8_fromFloat64x2Bits); + SETUP_CALL_HANDLER(emscripten_int16x8_fromFloat32x4Bits); + SETUP_CALL_HANDLER(emscripten_int16x8_fromInt32x4Bits); + SETUP_CALL_HANDLER(emscripten_int16x8_fromUint32x4Bits); + SETUP_CALL_HANDLER(emscripten_int16x8_fromUint16x8Bits); + SETUP_CALL_HANDLER(emscripten_int16x8_fromInt8x16Bits); + SETUP_CALL_HANDLER(emscripten_int16x8_fromUint8x16Bits); + SETUP_CALL_HANDLER(emscripten_int16x8_fromUint16x8); + SETUP_CALL_HANDLER(emscripten_int16x8_swizzle); + SETUP_CALL_HANDLER(emscripten_int16x8_shuffle); + + // SIMD.js Uint16x8 + SETUP_CALL_HANDLER(emscripten_uint16x8_set); + SETUP_CALL_HANDLER(emscripten_uint16x8_splat); + SETUP_CALL_HANDLER(emscripten_uint16x8_add); + SETUP_CALL_HANDLER(emscripten_uint16x8_sub); + SETUP_CALL_HANDLER(emscripten_uint16x8_mul); + // n.b. No emscripten_uint16x8_div, division is only defined on floating point types. + // n.b. No emscripten_uint16x8_max, only defined on floating point types. + // n.b. No emscripten_uint16x8_min, only defined on floating point types. + // n.b. No emscripten_uint16x8_maxNum, only defined on floating point types. + // n.b. No emscripten_uint16x8_minNum, only defined on floating point types. + SETUP_CALL_HANDLER(emscripten_uint16x8_neg); + // n.b. No emscripten_uint16x8_sqrt, only defined on floating point types. + // n.b. No emscripten_uint16x8_reciprocalApproximation, only defined on floating point types. + // n.b. No emscripten_uint16x8_reciprocalSqrtApproximation, only defined on floating point types. + // n.b. No emscripten_uint16x8_abs, only defined on floating point types. + SETUP_CALL_HANDLER(emscripten_uint16x8_and); + SETUP_CALL_HANDLER(emscripten_uint16x8_xor); + SETUP_CALL_HANDLER(emscripten_uint16x8_or); + SETUP_CALL_HANDLER(emscripten_uint16x8_not); + SETUP_CALL_HANDLER(emscripten_uint16x8_lessThan); + SETUP_CALL_HANDLER(emscripten_uint16x8_lessThanOrEqual); + SETUP_CALL_HANDLER(emscripten_uint16x8_greaterThan); + SETUP_CALL_HANDLER(emscripten_uint16x8_greaterThanOrEqual); + SETUP_CALL_HANDLER(emscripten_uint16x8_equal); + SETUP_CALL_HANDLER(emscripten_uint16x8_notEqual); + // n.b. No emscripten_uint16x8_anyTrue, only defined on boolean SIMD types. + // n.b. No emscripten_uint16x8_allTrue, only defined on boolean SIMD types. + SETUP_CALL_HANDLER(emscripten_uint16x8_select); + SETUP_CALL_HANDLER(emscripten_uint16x8_addSaturate); + SETUP_CALL_HANDLER(emscripten_uint16x8_subSaturate); + SETUP_CALL_HANDLER(emscripten_uint16x8_shiftLeftByScalar); + SETUP_CALL_HANDLER(emscripten_uint16x8_shiftRightByScalar); + SETUP_CALL_HANDLER(emscripten_uint16x8_extractLane); + SETUP_CALL_HANDLER(emscripten_uint16x8_replaceLane); + SETUP_CALL_HANDLER(emscripten_uint16x8_store); + SETUP_CALL_HANDLER(emscripten_uint16x8_load); + SETUP_CALL_HANDLER(emscripten_uint16x8_fromFloat64x2Bits); + SETUP_CALL_HANDLER(emscripten_uint16x8_fromFloat32x4Bits); + SETUP_CALL_HANDLER(emscripten_uint16x8_fromInt32x4Bits); + SETUP_CALL_HANDLER(emscripten_uint16x8_fromUint32x4Bits); + SETUP_CALL_HANDLER(emscripten_uint16x8_fromInt16x8Bits); + SETUP_CALL_HANDLER(emscripten_uint16x8_fromInt8x16Bits); + SETUP_CALL_HANDLER(emscripten_uint16x8_fromUint8x16Bits); + SETUP_CALL_HANDLER(emscripten_uint16x8_fromInt16x8); + SETUP_CALL_HANDLER(emscripten_uint16x8_swizzle); + SETUP_CALL_HANDLER(emscripten_uint16x8_shuffle); + + // SIMD.js Int8x16 + SETUP_CALL_HANDLER(emscripten_int8x16_set); + SETUP_CALL_HANDLER(emscripten_int8x16_splat); + SETUP_CALL_HANDLER(emscripten_int8x16_add); + SETUP_CALL_HANDLER(emscripten_int8x16_sub); + SETUP_CALL_HANDLER(emscripten_int8x16_mul); + // n.b. No emscripten_int8x16_div, division is only defined on floating point types. + // n.b. No emscripten_int8x16_max, only defined on floating point types. + // n.b. No emscripten_int8x16_min, only defined on floating point types. + // n.b. No emscripten_int8x16_maxNum, only defined on floating point types. + // n.b. No emscripten_int8x16_minNum, only defined on floating point types. + SETUP_CALL_HANDLER(emscripten_int8x16_neg); + // n.b. No emscripten_int8x16_sqrt, only defined on floating point types. + // n.b. No emscripten_int8x16_reciprocalApproximation, only defined on floating point types. + // n.b. No emscripten_int8x16_reciprocalSqrtApproximation, only defined on floating point types. + // n.b. No emscripten_int8x16_abs, only defined on floating point types. + SETUP_CALL_HANDLER(emscripten_int8x16_and); + SETUP_CALL_HANDLER(emscripten_int8x16_xor); + SETUP_CALL_HANDLER(emscripten_int8x16_or); + SETUP_CALL_HANDLER(emscripten_int8x16_not); + SETUP_CALL_HANDLER(emscripten_int8x16_lessThan); + SETUP_CALL_HANDLER(emscripten_int8x16_lessThanOrEqual); + SETUP_CALL_HANDLER(emscripten_int8x16_greaterThan); + SETUP_CALL_HANDLER(emscripten_int8x16_greaterThanOrEqual); + SETUP_CALL_HANDLER(emscripten_int8x16_equal); + SETUP_CALL_HANDLER(emscripten_int8x16_notEqual); + // n.b. No emscripten_int8x16_anyTrue, only defined on boolean SIMD types. + // n.b. No emscripten_int8x16_allTrue, only defined on boolean SIMD types. + SETUP_CALL_HANDLER(emscripten_int8x16_select); + SETUP_CALL_HANDLER(emscripten_int8x16_addSaturate); + SETUP_CALL_HANDLER(emscripten_int8x16_subSaturate); + SETUP_CALL_HANDLER(emscripten_int8x16_shiftLeftByScalar); + SETUP_CALL_HANDLER(emscripten_int8x16_shiftRightByScalar); + SETUP_CALL_HANDLER(emscripten_int8x16_extractLane); + SETUP_CALL_HANDLER(emscripten_int8x16_replaceLane); + SETUP_CALL_HANDLER(emscripten_int8x16_store); + SETUP_CALL_HANDLER(emscripten_int8x16_load); + SETUP_CALL_HANDLER(emscripten_int8x16_fromFloat64x2Bits); + SETUP_CALL_HANDLER(emscripten_int8x16_fromFloat32x4Bits); + SETUP_CALL_HANDLER(emscripten_int8x16_fromInt32x4Bits); + SETUP_CALL_HANDLER(emscripten_int8x16_fromUint32x4Bits); + SETUP_CALL_HANDLER(emscripten_int8x16_fromInt16x8Bits); + SETUP_CALL_HANDLER(emscripten_int8x16_fromUint16x8Bits); + SETUP_CALL_HANDLER(emscripten_int8x16_fromUint8x16Bits); + SETUP_CALL_HANDLER(emscripten_int8x16_fromUint8x16); + SETUP_CALL_HANDLER(emscripten_int8x16_swizzle); + SETUP_CALL_HANDLER(emscripten_int8x16_shuffle); + + // SIMD.js Uint8x16 + SETUP_CALL_HANDLER(emscripten_uint8x16_set); + SETUP_CALL_HANDLER(emscripten_uint8x16_splat); + SETUP_CALL_HANDLER(emscripten_uint8x16_add); + SETUP_CALL_HANDLER(emscripten_uint8x16_sub); + SETUP_CALL_HANDLER(emscripten_uint8x16_mul); + // n.b. No emscripten_uint8x16_div, division is only defined on floating point types. + // n.b. No emscripten_uint8x16_max, only defined on floating point types. + // n.b. No emscripten_uint8x16_min, only defined on floating point types. + // n.b. No emscripten_uint8x16_maxNum, only defined on floating point types. + // n.b. No emscripten_uint8x16_minNum, only defined on floating point types. + SETUP_CALL_HANDLER(emscripten_uint8x16_neg); + // n.b. No emscripten_uint8x16_sqrt, only defined on floating point types. + // n.b. No emscripten_uint8x16_reciprocalApproximation, only defined on floating point types. + // n.b. No emscripten_uint8x16_reciprocalSqrtApproximation, only defined on floating point types. + // n.b. No emscripten_uint8x16_abs, only defined on floating point types. + SETUP_CALL_HANDLER(emscripten_uint8x16_and); + SETUP_CALL_HANDLER(emscripten_uint8x16_xor); + SETUP_CALL_HANDLER(emscripten_uint8x16_or); + SETUP_CALL_HANDLER(emscripten_uint8x16_not); + SETUP_CALL_HANDLER(emscripten_uint8x16_lessThan); + SETUP_CALL_HANDLER(emscripten_uint8x16_lessThanOrEqual); + SETUP_CALL_HANDLER(emscripten_uint8x16_greaterThan); + SETUP_CALL_HANDLER(emscripten_uint8x16_greaterThanOrEqual); + SETUP_CALL_HANDLER(emscripten_uint8x16_equal); + SETUP_CALL_HANDLER(emscripten_uint8x16_notEqual); + // n.b. No emscripten_uint8x16_anyTrue, only defined on boolean SIMD types. + // n.b. No emscripten_uint8x16_allTrue, only defined on boolean SIMD types. + SETUP_CALL_HANDLER(emscripten_uint8x16_select); + SETUP_CALL_HANDLER(emscripten_uint8x16_addSaturate); + SETUP_CALL_HANDLER(emscripten_uint8x16_subSaturate); + SETUP_CALL_HANDLER(emscripten_uint8x16_shiftLeftByScalar); + SETUP_CALL_HANDLER(emscripten_uint8x16_shiftRightByScalar); + SETUP_CALL_HANDLER(emscripten_uint8x16_extractLane); + SETUP_CALL_HANDLER(emscripten_uint8x16_replaceLane); + SETUP_CALL_HANDLER(emscripten_uint8x16_store); + SETUP_CALL_HANDLER(emscripten_uint8x16_load); + SETUP_CALL_HANDLER(emscripten_uint8x16_fromFloat64x2Bits); + SETUP_CALL_HANDLER(emscripten_uint8x16_fromFloat32x4Bits); + SETUP_CALL_HANDLER(emscripten_uint8x16_fromInt32x4Bits); + SETUP_CALL_HANDLER(emscripten_uint8x16_fromUint32x4Bits); + SETUP_CALL_HANDLER(emscripten_uint8x16_fromInt16x8Bits); + SETUP_CALL_HANDLER(emscripten_uint8x16_fromUint16x8Bits); + SETUP_CALL_HANDLER(emscripten_uint8x16_fromInt8x16Bits); + SETUP_CALL_HANDLER(emscripten_uint8x16_fromInt8x16); + SETUP_CALL_HANDLER(emscripten_uint8x16_swizzle); + SETUP_CALL_HANDLER(emscripten_uint8x16_shuffle); + + // SIMD.js Bool64x2 + SETUP_CALL_HANDLER(emscripten_bool64x2_anyTrue); + SETUP_CALL_HANDLER(emscripten_bool64x2_allTrue); + + // SIMD.js Bool32x4 + SETUP_CALL_HANDLER(emscripten_bool32x4_anyTrue); + SETUP_CALL_HANDLER(emscripten_bool32x4_allTrue); + + // SIMD.js Bool16x8 + SETUP_CALL_HANDLER(emscripten_bool16x8_anyTrue); + SETUP_CALL_HANDLER(emscripten_bool16x8_allTrue); + + // SIMD.js Bool8x16 + SETUP_CALL_HANDLER(emscripten_bool8x16_anyTrue); + SETUP_CALL_HANDLER(emscripten_bool8x16_allTrue); + + SETUP_CALL_HANDLER(emscripten_asm_const); + SETUP_CALL_HANDLER(emscripten_asm_const_int); + SETUP_CALL_HANDLER(emscripten_asm_const_double); + + SETUP_CALL_HANDLER(emscripten_atomic_exchange_u8); + SETUP_CALL_HANDLER(emscripten_atomic_exchange_u16); + SETUP_CALL_HANDLER(emscripten_atomic_exchange_u32); + + SETUP_CALL_HANDLER(emscripten_atomic_cas_u8); + SETUP_CALL_HANDLER(emscripten_atomic_cas_u16); + SETUP_CALL_HANDLER(emscripten_atomic_cas_u32); + + SETUP_CALL_HANDLER(emscripten_atomic_load_u8); + SETUP_CALL_HANDLER(emscripten_atomic_load_u16); + SETUP_CALL_HANDLER(emscripten_atomic_load_u32); + SETUP_CALL_HANDLER(emscripten_atomic_load_f32); + SETUP_CALL_HANDLER(emscripten_atomic_load_f64); + + SETUP_CALL_HANDLER(emscripten_atomic_store_u8); + SETUP_CALL_HANDLER(emscripten_atomic_store_u16); + SETUP_CALL_HANDLER(emscripten_atomic_store_u32); + SETUP_CALL_HANDLER(emscripten_atomic_store_f32); + SETUP_CALL_HANDLER(emscripten_atomic_store_f64); + + SETUP_CALL_HANDLER(emscripten_atomic_add_u8); + SETUP_CALL_HANDLER(emscripten_atomic_add_u16); + SETUP_CALL_HANDLER(emscripten_atomic_add_u32); + + SETUP_CALL_HANDLER(emscripten_atomic_sub_u8); + SETUP_CALL_HANDLER(emscripten_atomic_sub_u16); + SETUP_CALL_HANDLER(emscripten_atomic_sub_u32); + + SETUP_CALL_HANDLER(emscripten_atomic_and_u8); + SETUP_CALL_HANDLER(emscripten_atomic_and_u16); + SETUP_CALL_HANDLER(emscripten_atomic_and_u32); + + SETUP_CALL_HANDLER(emscripten_atomic_or_u8); + SETUP_CALL_HANDLER(emscripten_atomic_or_u16); + SETUP_CALL_HANDLER(emscripten_atomic_or_u32); + + SETUP_CALL_HANDLER(emscripten_atomic_xor_u8); + SETUP_CALL_HANDLER(emscripten_atomic_xor_u16); + SETUP_CALL_HANDLER(emscripten_atomic_xor_u32); + + SETUP_CALL_HANDLER(emscripten_atomic_fence); + + SETUP_CALL_HANDLER(abs); + SETUP_CALL_HANDLER(labs); + SETUP_CALL_HANDLER(cos); + SETUP_CALL_HANDLER(cosf); + SETUP_CALL_HANDLER(cosl); + SETUP_CALL_HANDLER(sin); + SETUP_CALL_HANDLER(sinf); + SETUP_CALL_HANDLER(sinl); + SETUP_CALL_HANDLER(tan); + SETUP_CALL_HANDLER(tanf); + SETUP_CALL_HANDLER(tanl); + SETUP_CALL_HANDLER(acos); + SETUP_CALL_HANDLER(acosf); + SETUP_CALL_HANDLER(acosl); + SETUP_CALL_HANDLER(asin); + SETUP_CALL_HANDLER(asinf); + SETUP_CALL_HANDLER(asinl); + SETUP_CALL_HANDLER(atan); + SETUP_CALL_HANDLER(atanf); + SETUP_CALL_HANDLER(atanl); + SETUP_CALL_HANDLER(atan2); + SETUP_CALL_HANDLER(atan2f); + SETUP_CALL_HANDLER(atan2l); + SETUP_CALL_HANDLER(exp); + SETUP_CALL_HANDLER(expf); + SETUP_CALL_HANDLER(expl); + SETUP_CALL_HANDLER(log); + SETUP_CALL_HANDLER(logf); + SETUP_CALL_HANDLER(logl); + SETUP_CALL_HANDLER(sqrt); + SETUP_CALL_HANDLER(sqrtf); + SETUP_CALL_HANDLER(sqrtl); + SETUP_CALL_HANDLER(fabs); + SETUP_CALL_HANDLER(fabsf); + SETUP_CALL_HANDLER(fabsl); + SETUP_CALL_HANDLER(llvm_fabs_f32); + SETUP_CALL_HANDLER(llvm_fabs_f64); + SETUP_CALL_HANDLER(ceil); + SETUP_CALL_HANDLER(ceilf); + SETUP_CALL_HANDLER(ceill); + SETUP_CALL_HANDLER(floor); + SETUP_CALL_HANDLER(floorf); + SETUP_CALL_HANDLER(floorl); + SETUP_CALL_HANDLER(pow); + SETUP_CALL_HANDLER(powf); + SETUP_CALL_HANDLER(powl); + SETUP_CALL_HANDLER(llvm_sqrt_f32); + SETUP_CALL_HANDLER(llvm_sqrt_f64); + SETUP_CALL_HANDLER(llvm_pow_f32); + SETUP_CALL_HANDLER(llvm_pow_f64); + SETUP_CALL_HANDLER(llvm_powi_f32); + SETUP_CALL_HANDLER(llvm_powi_f64); + SETUP_CALL_HANDLER(llvm_log_f32); + SETUP_CALL_HANDLER(llvm_log_f64); + SETUP_CALL_HANDLER(llvm_exp_f32); + SETUP_CALL_HANDLER(llvm_exp_f64); + SETUP_CALL_HANDLER(llvm_sin_f32); + SETUP_CALL_HANDLER(llvm_sin_f64); +} + +std::string handleCall(const Instruction *CI) { + const Value *CV = getActuallyCalledValue(CI); + if (const InlineAsm* IA = dyn_cast(CV)) { + if (IA->hasSideEffects() && IA->getAsmString() == "") { + return "/* asm() memory 'barrier' */"; + } else { + errs() << "In function " << CI->getParent()->getParent()->getName() << "()\n"; + errs() << *IA << "\n"; + report_fatal_error("asm() with non-empty content not supported, use EM_ASM() (see emscripten.h)"); + } + } + + // Get the name to call this function by. If it's a direct call, meaning + // which know which Function we're calling, avoid calling getValueAsStr, as + // we don't need to use a function index. + const std::string &Name = isa(CV) ? getJSName(CV) : getValueAsStr(CV); + + CallHandlerMap::iterator CH = CallHandlers.find("___default__"); + if (isa(CV)) { + CallHandlerMap::iterator Custom = CallHandlers.find(Name); + if (Custom != CallHandlers.end()) CH = Custom; + } + return (this->*(CH->second))(CI, Name, -1); +} diff --git a/lib/Target/JSBackend/ExpandBigSwitches.cpp b/lib/Target/JSBackend/ExpandBigSwitches.cpp new file mode 100644 index 000000000000..59301b72cde8 --- /dev/null +++ b/lib/Target/JSBackend/ExpandBigSwitches.cpp @@ -0,0 +1,163 @@ +//===-- ExpandBigSwitches.cpp - Alloca optimization ---------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===-----------------------------------------------------------------------===// +// +// Very large switches can be a problem for JS engines. We split them up here. +// +//===-----------------------------------------------------------------------===// + +#include "OptPasses.h" + +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Constants.h" +#include "llvm/Support/raw_ostream.h" + +#include +#include + +namespace llvm { + +/* + * Find cases where an alloca is used only to load and store a single value, + * even though it is bitcast. Then replace it with a direct alloca of that + * simple type, and avoid the bitcasts. + */ + +struct ExpandBigSwitches : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + ExpandBigSwitches() : FunctionPass(ID) {} + // XXX initialize..(*PassRegistry::getPassRegistry()); } + + bool runOnFunction(Function &Func) override; + + const char *getPassName() const override { return "ExpandBigSwitches"; } +}; + +char ExpandBigSwitches::ID = 0; + +// Check if we need to split a switch. If so, return the median, on which we will do so +static bool ConsiderSplit(const SwitchInst *SI, int64_t& Median) { + int64_t Minn = INT64_MAX, Maxx = INT64_MIN; + std::vector Values; + for (SwitchInst::ConstCaseIt i = SI->case_begin(), e = SI->case_end(); i != e; ++i) { + int64_t Curr = i.getCaseValue()->getSExtValue(); + if (Curr < Minn) Minn = Curr; + if (Curr > Maxx) Maxx = Curr; + Values.push_back(Curr); + } + int64_t Range = Maxx - Minn; + int Num = SI->getNumCases(); + if (Num < 1024 && Range <= 10*1024 && (Range/Num) <= 1024) return false; + // this is either too big, or too rangey + std::sort(Values.begin(), Values.end()); + Median = Values[Values.size()/2]; + return true; +} + +static void DoSplit(SwitchInst *SI, int64_t Median) { + // switch (x) { ..very many.. } + // + // ==> + // + // if (x < median) { + // switch (x) { ..first half.. } + // } else { + // switch (x) { ..second half.. } + // } + + BasicBlock *SwitchBB = SI->getParent(); + Function *F = SwitchBB->getParent(); + Value *Condition = SI->getOperand(0); + BasicBlock *DD = SI->getDefaultDest(); + unsigned NumItems = SI->getNumCases(); + Type *T = Condition->getType(); + + Instruction *Check = new ICmpInst(SI, ICmpInst::ICMP_SLT, Condition, ConstantInt::get(T, Median), "switch-split"); + BasicBlock *LowBB = BasicBlock::Create(SI->getContext(), "switchsplit_low", F); + BasicBlock *HighBB = BasicBlock::Create(SI->getContext(), "switchsplit_high", F); + BranchInst *Br = BranchInst::Create(LowBB, HighBB, Check, SwitchBB); + + SwitchInst *LowSI = SwitchInst::Create(Condition, DD, NumItems/2, LowBB); + SwitchInst *HighSI = SwitchInst::Create(Condition, DD, NumItems/2, HighBB); + + for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e; ++i) { + BasicBlock *BB = i.getCaseSuccessor(); + auto Value = i.getCaseValue(); + SwitchInst *NewSI = Value->getSExtValue() < Median ? LowSI : HighSI; + NewSI->addCase(Value, BB); + // update phis + BasicBlock *NewBB = NewSI->getParent(); + for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) { + PHINode *Phi = dyn_cast(I); + if (!Phi) break; + int Index = Phi->getBasicBlockIndex(SwitchBB); + if (Index < 0) continue; + Phi->addIncoming(Phi->getIncomingValue(Index), NewBB); + Phi->removeIncomingValue(Index); + } + } + + // fix default dest + for (BasicBlock::iterator I = DD->begin(); I != DD->end(); ++I) { + PHINode *Phi = dyn_cast(I); + if (!Phi) break; + int Index = Phi->getBasicBlockIndex(SwitchBB); + if (Index < 0) continue; + Phi->addIncoming(Phi->getIncomingValue(Index), LowBB); + Phi->addIncoming(Phi->getIncomingValue(Index), HighBB); + Phi->removeIncomingValue(Index); + } + + // finish up + SI->eraseFromParent(); + assert(SwitchBB->getTerminator() == Br); + assert(LowSI->getNumCases() + HighSI->getNumCases() == NumItems); + assert(LowSI->getNumCases() < HighSI->getNumCases() + 2); + assert(HighSI->getNumCases() < LowSI->getNumCases() + 2); +} + +bool ExpandBigSwitches::runOnFunction(Function &Func) { + bool Changed = false; + + struct SplitInfo { + SwitchInst *SI; + int64_t Median; + }; + + while (1) { // repetively split in 2 + std::vector ToSplit; + // find switches we need to split + for (Function::iterator B = Func.begin(), E = Func.end(); B != E; ++B) { + Instruction *I = B->getTerminator(); + SwitchInst *SI = dyn_cast(I); + if (!SI) continue; + SplitInfo Curr; + if (!ConsiderSplit(SI, Curr.Median)) continue; + Curr.SI = SI; + Changed = true; + ToSplit.push_back(Curr); + } + if (ToSplit.size() == 0) break; + // split them + for (auto& Curr : ToSplit) { + DoSplit(Curr.SI, Curr.Median); + } + } + + return Changed; +} + +// + +extern FunctionPass *createEmscriptenExpandBigSwitchesPass() { + return new ExpandBigSwitches(); +} + +} // End llvm namespace diff --git a/lib/Target/JSBackend/JS.h b/lib/Target/JSBackend/JS.h new file mode 100644 index 000000000000..6fe22426b8ea --- /dev/null +++ b/lib/Target/JSBackend/JS.h @@ -0,0 +1,29 @@ +//===-- JS.h - Top-level interface for JS representation ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the entry points for global functions defined in the JS +// target library, as used by the LLVM JIT. +// +//===----------------------------------------------------------------------===// + +#ifndef TARGET_JS_H +#define TARGET_JS_H + +namespace llvm { + +class ImmutablePass; +class JSTargetMachine; + +/// createJSISelDag - This pass converts a legalized DAG into a +/// \brief Creates an JS-specific Target Transformation Info pass. +ImmutablePass *createJSTargetTransformInfoPass(const JSTargetMachine *TM); + +} // End llvm namespace + +#endif diff --git a/lib/Target/JSBackend/JSBackend.cpp b/lib/Target/JSBackend/JSBackend.cpp new file mode 100644 index 000000000000..50214c8edb4c --- /dev/null +++ b/lib/Target/JSBackend/JSBackend.cpp @@ -0,0 +1,4091 @@ +//===-- JSBackend.cpp - Library for converting LLVM code to JS -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements compiling of LLVM IR, which is assumed to have been +// simplified using the PNaCl passes, i64 legalization, and other necessary +// transformations, into JavaScript in asm.js format, suitable for passing +// to emscripten for final processing. +// +//===----------------------------------------------------------------------===// + +#include "JSTargetMachine.h" +#include "MCTargetDesc/JSBackendMCTargetDesc.h" +#include "AllocaManager.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Config/config.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" +#include "llvm/Pass.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/CallSite.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/IR/GetElementPtrTypeIterator.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/NaCl.h" +#include "llvm/Transforms/Scalar.h" +#include +#include +#include +#include // TODO: unordered_set? +#include +using namespace llvm; + +#include +#include + +raw_ostream &prettyWarning() { + errs().changeColor(raw_ostream::YELLOW); + errs() << "warning:"; + errs().resetColor(); + errs() << " "; + return errs(); +} + +static cl::opt +PreciseF32("emscripten-precise-f32", + cl::desc("Enables Math.fround usage to implement precise float32 semantics and performance (see emscripten PRECISE_F32 option)"), + cl::init(false)); + +static cl::opt +EnablePthreads("emscripten-enable-pthreads", + cl::desc("Enables compilation targeting JavaScript Shared Array Buffer and Atomics API to implement support for pthreads-based multithreading"), + cl::init(false)); + +static cl::opt +WarnOnUnaligned("emscripten-warn-unaligned", + cl::desc("Warns about unaligned loads and stores (which can negatively affect performance)"), + cl::init(false)); + +static cl::opt +WarnOnNoncanonicalNans("emscripten-warn-noncanonical-nans", + cl::desc("Warns about detected noncanonical bit patterns in NaNs that will not be preserved in the generated output (this can cause code to run wrong if the exact bits were important)"), + cl::init(true)); + +static cl::opt +ReservedFunctionPointers("emscripten-reserved-function-pointers", + cl::desc("Number of reserved slots in function tables for functions to be added at runtime (see emscripten RESERVED_FUNCTION_POINTERS option)"), + cl::init(0)); + +static cl::opt +EmulatedFunctionPointers("emscripten-emulated-function-pointers", + cl::desc("Emulate function pointers, avoiding asm.js function tables (see emscripten EMULATED_FUNCTION_POINTERS option)"), + cl::init(false)); + +static cl::opt +EmscriptenAssertions("emscripten-assertions", + cl::desc("Additional JS-specific assertions (see emscripten ASSERTIONS)"), + cl::init(0)); + +static cl::opt +NoAliasingFunctionPointers("emscripten-no-aliasing-function-pointers", + cl::desc("Forces function pointers to not alias (this is more correct, but rarely needed, and has the cost of much larger function tables; it is useful for debugging though; see emscripten ALIASING_FUNCTION_POINTERS option)"), + cl::init(false)); + +static cl::opt +GlobalBase("emscripten-global-base", + cl::desc("Where global variables start out in memory (see emscripten GLOBAL_BASE option)"), + cl::init(8)); + +static cl::opt +Relocatable("emscripten-relocatable", + cl::desc("Whether to emit relocatable code (see emscripten RELOCATABLE option)"), + cl::init(false)); + +static cl::opt +EnableSjLjEH("enable-pnacl-sjlj-eh", + cl::desc("Enable use of SJLJ-based C++ exception handling " + "as part of the pnacl-abi-simplify passes"), + cl::init(false)); + +static cl::opt +EnableEmCxxExceptions("enable-emscripten-cxx-exceptions", + cl::desc("Enables C++ exceptions in emscripten"), + cl::init(false)); + +static cl::opt +EnableEmAsyncify("emscripten-asyncify", + cl::desc("Enable asyncify transformation (see emscripten ASYNCIFY option)"), + cl::init(false)); + +static cl::opt +NoExitRuntime("emscripten-no-exit-runtime", + cl::desc("Generate code which assumes the runtime is never exited (so atexit etc. is unneeded; see emscripten NO_EXIT_RUNTIME setting)"), + cl::init(false)); + +static cl::opt + +EnableCyberDWARF("enable-cyberdwarf", + cl::desc("Include CyberDWARF debug information"), + cl::init(false)); + +static cl::opt +EnableCyberDWARFIntrinsics("enable-debug-intrinsics", + cl::desc("Include debug intrinsics in generated output"), + cl::init(false)); + +static cl::opt +WebAssembly("emscripten-wasm", + cl::desc("Generate asm.js which will later be compiled to WebAssembly (see emscripten BINARYEN setting)"), + cl::init(false)); + + +extern "C" void LLVMInitializeJSBackendTarget() { + // Register the target. + RegisterTargetMachine X(TheJSBackendTarget); +} + +namespace { + #define ASM_SIGNED 0 + #define ASM_UNSIGNED 1 + #define ASM_NONSPECIFIC 2 // nonspecific means to not differentiate ints. |0 for all, regardless of size and sign + #define ASM_FFI_IN 4 // FFI return values are limited to things that work in ffis + #define ASM_FFI_OUT 8 // params to FFIs are limited to things that work in ffis + #define ASM_MUST_CAST 16 // this value must be explicitly cast (or be an integer constant) + #define ASM_FORCE_FLOAT_AS_INTBITS 32 // if the value is a float, it should be returned as an integer representing the float bits (or NaN canonicalization will eat them away). This flag cannot be used with ASM_UNSIGNED set. + typedef unsigned AsmCast; + + typedef std::map ValueMap; + typedef std::set NameSet; + typedef std::set IntSet; + typedef std::vector HeapData; + typedef std::map HeapDataMap; + typedef std::vector AlignedHeapStartMap; + struct Address { + unsigned Offset, Alignment; + bool ZeroInit; + Address() {} + Address(unsigned Offset, unsigned Alignment, bool ZeroInit) : Offset(Offset), Alignment(Alignment), ZeroInit(ZeroInit) {} + }; + typedef std::map VarMap; + typedef std::map GlobalAddressMap; + typedef std::vector FunctionTable; + typedef std::map FunctionTableMap; + typedef std::map StringMap; + typedef std::map NameIntMap; + typedef std::map IntIntSetMap; + typedef std::map BlockIndexMap; + typedef std::map BlockAddressMap; + typedef std::map LLVMToRelooperMap; + struct AsmConstInfo { + int Id; + std::set Sigs; + }; + + /// JSWriter - This class is the main chunk of code that converts an LLVM + /// module to JavaScript. + class JSWriter : public ModulePass { + raw_pwrite_stream &Out; + Module *TheModule; + unsigned UniqueNum; + unsigned NextFunctionIndex; // used with NoAliasingFunctionPointers + ValueMap ValueNames; + VarMap UsedVars; + AllocaManager Allocas; + HeapDataMap GlobalDataMap; + std::vector ZeroInitSizes; // alignment => used offset in the zeroinit zone + AlignedHeapStartMap AlignedHeapStarts, ZeroInitStarts; + GlobalAddressMap GlobalAddresses; + NameSet Externals; // vars + NameSet Declares; // funcs + StringMap Redirects; // library function redirects actually used, needed for wrapper funcs in tables + std::vector PostSets; + NameIntMap NamedGlobals; // globals that we export as metadata to JS, so it can access them by name + std::map IndexedFunctions; // name -> index + FunctionTableMap FunctionTables; // sig => list of functions + std::vector GlobalInitializers; + std::vector Exports; // additional exports + StringMap Aliases; + BlockAddressMap BlockAddresses; + std::map AsmConsts; // code => { index, list of seen sigs } + NameSet FuncRelocatableExterns; // which externals are accessed in this function; we load them once at the beginning (avoids a potential call in a heap access, and might be faster) + + struct { + // 0 is reserved for void type + unsigned MetadataNum = 1; + std::map IndexedMetadata; + std::map VtableOffsets; + std::ostringstream TypeDebugData; + std::ostringstream TypeNameMap; + std::ostringstream FunctionMembers; + } cyberDWARFData; + + std::string CantValidate; + bool UsesSIMDUint8x16; + bool UsesSIMDInt8x16; + bool UsesSIMDUint16x8; + bool UsesSIMDInt16x8; + bool UsesSIMDUint32x4; + bool UsesSIMDInt32x4; + bool UsesSIMDFloat32x4; + bool UsesSIMDFloat64x2; + bool UsesSIMDBool8x16; + bool UsesSIMDBool16x8; + bool UsesSIMDBool32x4; + bool UsesSIMDBool64x2; + int InvokeState; // cycles between 0, 1 after preInvoke, 2 after call, 0 again after postInvoke. hackish, no argument there. + CodeGenOpt::Level OptLevel; + const DataLayout *DL; + bool StackBumped; + int GlobalBasePadding; + int MaxGlobalAlign; + int StaticBump; + const Instruction* CurrInstruction; + + #include "CallHandlers.h" + + public: + static char ID; + JSWriter(raw_pwrite_stream &o, CodeGenOpt::Level OptLevel) + : ModulePass(ID), Out(o), UniqueNum(0), NextFunctionIndex(0), CantValidate(""), + UsesSIMDUint8x16(false), UsesSIMDInt8x16(false), UsesSIMDUint16x8(false), + UsesSIMDInt16x8(false), UsesSIMDUint32x4(false), UsesSIMDInt32x4(false), + UsesSIMDFloat32x4(false), UsesSIMDFloat64x2(false), UsesSIMDBool8x16(false), + UsesSIMDBool16x8(false), UsesSIMDBool32x4(false), UsesSIMDBool64x2(false), InvokeState(0), + OptLevel(OptLevel), StackBumped(false), GlobalBasePadding(0), MaxGlobalAlign(0), + CurrInstruction(nullptr) {} + + const char *getPassName() const override { return "JavaScript backend"; } + + bool runOnModule(Module &M) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + ModulePass::getAnalysisUsage(AU); + } + + void printProgram(const std::string& fname, const std::string& modName ); + void printModule(const std::string& fname, const std::string& modName ); + void printFunction(const Function *F); + + LLVM_ATTRIBUTE_NORETURN void error(const std::string& msg); + + raw_pwrite_stream& nl(raw_pwrite_stream &Out, int delta = 0); + + private: + void printCommaSeparated(const HeapData v); + + // parsing of constants has two phases: calculate, and then emit + void parseConstant(const std::string& name, const Constant* CV, int Alignment, bool calculate); + + #define DEFAULT_MEM_ALIGN 8 + + #define STACK_ALIGN 16 + #define STACK_ALIGN_BITS 128 + + unsigned stackAlign(unsigned x) { + return alignTo(x, STACK_ALIGN); + } + std::string stackAlignStr(std::string x) { + return "((" + x + "+" + utostr(STACK_ALIGN-1) + ")&-" + utostr(STACK_ALIGN) + ")"; + } + + void ensureAligned(int Alignment, HeapData* GlobalData) { + assert(isPowerOf2_32(Alignment) && Alignment > 0); + while (GlobalData->size() & (Alignment-1)) GlobalData->push_back(0); + } + void ensureAligned(int Alignment, HeapData& GlobalData) { + assert(isPowerOf2_32(Alignment) && Alignment > 0); + while (GlobalData.size() & (Alignment-1)) GlobalData.push_back(0); + } + + HeapData *allocateAddress(const std::string& Name, unsigned Alignment) { + assert(isPowerOf2_32(Alignment) && Alignment > 0); + HeapData* GlobalData = &GlobalDataMap[Alignment]; + ensureAligned(Alignment, GlobalData); + GlobalAddresses[Name] = Address(GlobalData->size(), Alignment*8, false); + return GlobalData; + } + + void allocateZeroInitAddress(const std::string& Name, unsigned Alignment, unsigned Size) { + assert(isPowerOf2_32(Alignment) && Alignment > 0); + while (ZeroInitSizes.size() <= Alignment) ZeroInitSizes.push_back(0); + GlobalAddresses[Name] = Address(ZeroInitSizes[Alignment], Alignment*8, true); + ZeroInitSizes[Alignment] += Size; + while (ZeroInitSizes[Alignment] & (Alignment-1)) ZeroInitSizes[Alignment]++; + } + + // return the absolute offset of a global + unsigned getGlobalAddress(const std::string &s) { + GlobalAddressMap::const_iterator I = GlobalAddresses.find(s); + if (I == GlobalAddresses.end()) { + report_fatal_error("cannot find global address " + Twine(s)); + } + Address a = I->second; + int Alignment = a.Alignment/8; + assert(AlignedHeapStarts.size() > (unsigned)Alignment); + int Ret = a.Offset + (a.ZeroInit ? ZeroInitStarts[Alignment] : AlignedHeapStarts[Alignment]); + assert(Alignment < (int)(a.ZeroInit ? ZeroInitStarts.size() : AlignedHeapStarts.size())); + assert(Ret % Alignment == 0); + return Ret; + } + // returns the internal offset inside the proper block: GlobalData8, 32, 64 + unsigned getRelativeGlobalAddress(const std::string &s) { + GlobalAddressMap::const_iterator I = GlobalAddresses.find(s); + if (I == GlobalAddresses.end()) { + report_fatal_error("cannot find global address " + Twine(s)); + } + Address a = I->second; + return a.Offset; + } + char getFunctionSignatureLetter(Type *T) { + if (T->isVoidTy()) return 'v'; + else if (T->isFloatingPointTy()) { + if (PreciseF32 && T->isFloatTy()) { + return 'f'; + } else { + return 'd'; + } + } else if (VectorType *VT = dyn_cast(T)) { + checkVectorType(VT); + if (VT->getElementType()->isIntegerTy()) { + return 'I'; + } else { + return 'F'; + } + } else { + return 'i'; + } + } + std::string getFunctionSignature(const FunctionType *F) { + std::string Ret; + Ret += getFunctionSignatureLetter(F->getReturnType()); + for (FunctionType::param_iterator AI = F->param_begin(), + AE = F->param_end(); AI != AE; ++AI) { + Ret += getFunctionSignatureLetter(*AI); + } + return Ret; + } + FunctionTable& ensureFunctionTable(const FunctionType *FT) { + FunctionTable &Table = FunctionTables[getFunctionSignature(FT)]; + unsigned MinSize = ReservedFunctionPointers ? 2*(ReservedFunctionPointers+1) : 1; // each reserved slot must be 2-aligned + while (Table.size() < MinSize) Table.push_back("0"); + return Table; + } + unsigned getFunctionIndex(const Function *F) { + const std::string &Name = getJSName(F); + if (IndexedFunctions.find(Name) != IndexedFunctions.end()) return IndexedFunctions[Name]; + std::string Sig = getFunctionSignature(F->getFunctionType()); + FunctionTable& Table = ensureFunctionTable(F->getFunctionType()); + if (NoAliasingFunctionPointers) { + while (Table.size() < NextFunctionIndex) Table.push_back("0"); + } + // XXX this is wrong, it's always 1. but, that's fine in the ARM-like ABI + // we have which allows unaligned func the one risk is if someone forces a + // function to be aligned, and relies on that. Could do F->getAlignment() + // instead. + unsigned Alignment = 1; + while (Table.size() % Alignment) Table.push_back("0"); + unsigned Index = Table.size(); + Table.push_back(Name); + IndexedFunctions[Name] = Index; + if (NoAliasingFunctionPointers) { + NextFunctionIndex = Index+1; + } + + // invoke the callHandler for this, if there is one. the function may only be indexed but never called directly, and we may need to do things in the handler + CallHandlerMap::const_iterator CH = CallHandlers.find(Name); + if (CH != CallHandlers.end()) { + (this->*(CH->second))(NULL, Name, -1); + } + + return Index; + } + + unsigned getBlockAddress(const Function *F, const BasicBlock *BB) { + BlockIndexMap& Blocks = BlockAddresses[F]; + if (Blocks.find(BB) == Blocks.end()) { + Blocks[BB] = Blocks.size(); // block addresses start from 0 + } + return Blocks[BB]; + } + + unsigned getBlockAddress(const BlockAddress *BA) { + return getBlockAddress(BA->getFunction(), BA->getBasicBlock()); + } + + const Value *resolveFully(const Value *V) { + bool More = true; + while (More) { + More = false; + if (const GlobalAlias *GA = dyn_cast(V)) { + V = GA->getAliasee(); + More = true; + } + if (const ConstantExpr *CE = dyn_cast(V)) { + V = CE->getOperand(0); // ignore bitcasts + More = true; + } + } + return V; + } + + std::string relocateFunctionPointer(std::string FP) { + return Relocatable ? "(fb + (" + FP + ") | 0)" : FP; + } + + std::string relocateGlobal(std::string G) { + return Relocatable ? "(gb + (" + G + ") | 0)" : G; + } + + unsigned getIDForMetadata(Metadata *MD) { + if (cyberDWARFData.IndexedMetadata.find(MD) == cyberDWARFData.IndexedMetadata.end()) { + cyberDWARFData.IndexedMetadata[MD] = cyberDWARFData.MetadataNum++; + } + return cyberDWARFData.IndexedMetadata[MD]; + } + + // Return a constant we are about to write into a global as a numeric offset. If the + // value is not known at compile time, emit a postSet to that location. + unsigned getConstAsOffset(const Value *V, unsigned AbsoluteTarget) { + V = resolveFully(V); + if (const Function *F = dyn_cast(V)) { + if (Relocatable) { + PostSets.push_back("\n HEAP32[" + relocateGlobal(utostr(AbsoluteTarget)) + " >> 2] = " + relocateFunctionPointer(utostr(getFunctionIndex(F))) + ';'); + return 0; // emit zero in there for now, until the postSet + } + return getFunctionIndex(F); + } else if (const BlockAddress *BA = dyn_cast(V)) { + return getBlockAddress(BA); + } else { + if (const GlobalVariable *GV = dyn_cast(V)) { + if (!GV->hasInitializer()) { + // We don't have a constant to emit here, so we must emit a postSet + // All postsets are of external values, so they are pointers, hence 32-bit + std::string Name = getOpName(V); + Externals.insert(Name); + if (Relocatable) { + PostSets.push_back("\n temp = g$" + Name + "() | 0;"); // we access linked externs through calls, and must do so to a temp for heap growth validation + // see later down about adding to an offset + std::string access = "HEAP32[" + relocateGlobal(utostr(AbsoluteTarget)) + " >> 2]"; + PostSets.push_back("\n " + access + " = (" + access + " | 0) + temp;"); + } else { + PostSets.push_back("\n HEAP32[" + relocateGlobal(utostr(AbsoluteTarget)) + " >> 2] = " + Name + ';'); + } + return 0; // emit zero in there for now, until the postSet + } else if (Relocatable) { + // this is one of our globals, but we must relocate it. we return zero, but the caller may store + // an added offset, which we read at postSet time; in other words, we just add to that offset + std::string access = "HEAP32[" + relocateGlobal(utostr(AbsoluteTarget)) + " >> 2]"; + PostSets.push_back("\n " + access + " = (" + access + " | 0) + " + relocateGlobal(utostr(getGlobalAddress(V->getName().str()))) + ';'); + return 0; // emit zero in there for now, until the postSet + } + } + assert(!Relocatable); + return getGlobalAddress(V->getName().str()); + } + } + + // Transform the string input into emscripten_asm_const_*(str, args1, arg2) + // into an id. We emit a map of id => string contents, and emscripten + // wraps it up so that calling that id calls that function. + unsigned getAsmConstId(const Value *V, std::string Sig) { + V = resolveFully(V); + const Constant *CI = cast(V)->getInitializer(); + std::string code; + if (isa(CI)) { + code = " "; + } else { + const ConstantDataSequential *CDS = cast(CI); + code = CDS->getAsString(); + // replace newlines quotes with escaped newlines + size_t curr = 0; + while ((curr = code.find("\\n", curr)) != std::string::npos) { + code = code.replace(curr, 2, "\\\\n"); + curr += 3; // skip this one + } + // replace double quotes with escaped single quotes + curr = 0; + while ((curr = code.find('"', curr)) != std::string::npos) { + if (curr == 0 || code[curr-1] != '\\') { + code = code.replace(curr, 1, "\\" "\""); + curr += 2; // skip this one + } else { // already escaped, escape the slash as well + code = code.replace(curr, 1, "\\" "\\" "\""); + curr += 3; // skip this one + } + } + } + unsigned Id; + if (AsmConsts.count(code) > 0) { + auto& Info = AsmConsts[code]; + Id = Info.Id; + Info.Sigs.insert(Sig); + } else { + AsmConstInfo Info; + Info.Id = Id = AsmConsts.size(); + Info.Sigs.insert(Sig); + AsmConsts[code] = Info; + } + return Id; + } + + // Test whether the given value is known to be an absolute value or one we turn into an absolute value + bool isAbsolute(const Value *P) { + if (const IntToPtrInst *ITP = dyn_cast(P)) { + return isa(ITP->getOperand(0)); + } + if (isa(P) || isa(P)) { + return true; + } + return false; + } + + void checkVectorType(Type *T) { + VectorType *VT = cast(T); + // LLVM represents the results of vector comparison as vectors of i1. We + // represent them as vectors of integers the size of the vector elements + // of the compare that produced them. + assert(VT->getElementType()->getPrimitiveSizeInBits() == 8 || + VT->getElementType()->getPrimitiveSizeInBits() == 16 || + VT->getElementType()->getPrimitiveSizeInBits() == 32 || + VT->getElementType()->getPrimitiveSizeInBits() == 64 || + VT->getElementType()->getPrimitiveSizeInBits() == 128 || + VT->getElementType()->getPrimitiveSizeInBits() == 1); + assert(VT->getBitWidth() <= 128); + assert(VT->getNumElements() <= 16); + if (VT->getElementType()->isIntegerTy()) + { + if (VT->getNumElements() <= 16 && VT->getElementType()->getPrimitiveSizeInBits() == 8) UsesSIMDInt8x16 = true; + else if (VT->getNumElements() <= 8 && VT->getElementType()->getPrimitiveSizeInBits() == 16) UsesSIMDInt16x8 = true; + else if (VT->getNumElements() <= 4 && VT->getElementType()->getPrimitiveSizeInBits() == 32) UsesSIMDInt32x4 = true; + else if (VT->getElementType()->getPrimitiveSizeInBits() == 1) { + if (VT->getNumElements() == 16) UsesSIMDBool8x16 = true; + else if (VT->getNumElements() == 8) UsesSIMDBool16x8 = true; + else if (VT->getNumElements() == 4) UsesSIMDBool32x4 = true; + else if (VT->getNumElements() == 2) UsesSIMDBool64x2 = true; + else report_fatal_error("Unsupported boolean vector type with numElems: " + Twine(VT->getNumElements()) + ", primitiveSize: " + Twine(VT->getElementType()->getPrimitiveSizeInBits()) + "!"); + } else if (VT->getElementType()->getPrimitiveSizeInBits() != 1 && VT->getElementType()->getPrimitiveSizeInBits() != 128) { + report_fatal_error("Unsupported integer vector type with numElems: " + Twine(VT->getNumElements()) + ", primitiveSize: " + Twine(VT->getElementType()->getPrimitiveSizeInBits()) + "!"); + } + } + else + { + if (VT->getNumElements() <= 4 && VT->getElementType()->getPrimitiveSizeInBits() == 32) UsesSIMDFloat32x4 = true; + else if (VT->getNumElements() <= 2 && VT->getElementType()->getPrimitiveSizeInBits() == 64) UsesSIMDFloat64x2 = true; + else report_fatal_error("Unsupported floating point vector type numElems: " + Twine(VT->getNumElements()) + ", primitiveSize: " + Twine(VT->getElementType()->getPrimitiveSizeInBits()) + "!"); + } + } + + std::string ensureCast(std::string S, Type *T, AsmCast sign) { + if (sign & ASM_MUST_CAST) return getCast(S, T); + return S; + } + + static void emitDebugInfo(raw_ostream& Code, const Instruction *I) { + auto &Loc = I->getDebugLoc(); + if (Loc) { + unsigned Line = Loc.getLine(); + auto *Scope = cast_or_null(Loc.getScope()); + if (Scope) { + StringRef File = Scope->getFilename(); + if (Line > 0) + Code << " //@line " << utostr(Line) << " \"" << (File.size() > 0 ? File.str() : "?") << "\""; + } + } + } + + std::string ftostr(const ConstantFP *CFP, AsmCast sign) { + const APFloat &flt = CFP->getValueAPF(); + + // Emscripten has its own spellings for infinity and NaN. + if (flt.getCategory() == APFloat::fcInfinity) return ensureCast(flt.isNegative() ? "-inf" : "inf", CFP->getType(), sign); + else if (flt.getCategory() == APFloat::fcNaN) { + APInt i = flt.bitcastToAPInt(); + if ((i.getBitWidth() == 32 && i != APInt(32, 0x7FC00000)) || (i.getBitWidth() == 64 && i != APInt(64, 0x7FF8000000000000ULL))) { + // If we reach here, things have already gone bad, and JS engine NaN canonicalization will kill the bits in the float. However can't make + // this a build error in order to not break people's existing code, so issue a warning instead. + if (WarnOnNoncanonicalNans) { + errs() << "emcc: warning: cannot represent a NaN literal '" << CFP << "' with custom bit pattern in NaN-canonicalizing JS engines (e.g. Firefox and Safari) without erasing bits!\n"; + if (CurrInstruction) { + errs() << " in " << *CurrInstruction << " in " << CurrInstruction->getParent()->getParent()->getName() << "() "; + emitDebugInfo(errs(), CurrInstruction); + errs() << '\n'; + } + } + } + return ensureCast("nan", CFP->getType(), sign); + } + + // Request 9 or 17 digits, aka FLT_DECIMAL_DIG or DBL_DECIMAL_DIG (our + // long double is the the same as our double), to avoid rounding errors. + SmallString<29> Str; + flt.toString(Str, PreciseF32 && CFP->getType()->isFloatTy() ? 9 : 17); + + // asm.js considers literals to be floating-point literals when they contain a + // dot, however our output may be processed by UglifyJS, which doesn't + // currently preserve dots in all cases. Mark floating-point literals with + // unary plus to force them to floating-point. + if (APFloat(flt).roundToIntegral(APFloat::rmNearestTiesToEven) == APFloat::opOK) { + return '+' + Str.str().str(); + } + + return Str.str().str(); + } + + std::string getPtrLoad(const Value* Ptr); + + /// Given a pointer to memory, returns the HEAP object and index to that object that is used to access that memory. + /// @param Ptr [in] The heap object. + /// @param HeapName [out] Receives the name of the HEAP object used to perform the memory acess. + /// @return The index to the heap HeapName for the memory access. + std::string getHeapNameAndIndex(const Value *Ptr, const char **HeapName); + + // Like getHeapNameAndIndex(), but uses the given memory operation size and whether it is an Integer instead of the type of Ptr. + std::string getHeapNameAndIndex(const Value *Ptr, const char **HeapName, unsigned Bytes, bool Integer); + + /// Like getHeapNameAndIndex(), but for global variables only. + std::string getHeapNameAndIndexToGlobal(const GlobalVariable *GV, unsigned Bytes, bool Integer, const char **HeapName); + + /// Like getHeapNameAndIndex(), but for pointers represented in string expression form. + static std::string getHeapNameAndIndexToPtr(const std::string& Ptr, unsigned Bytes, bool Integer, const char **HeapName); + + std::string getShiftedPtr(const Value *Ptr, unsigned Bytes); + + /// Returns a string expression for accessing the given memory address. + std::string getPtrUse(const Value* Ptr); + + /// Like getPtrUse(), but for pointers represented in string expression form. + static std::string getHeapAccess(const std::string& Name, unsigned Bytes, bool Integer=true); + + std::string getConstant(const Constant*, AsmCast sign=ASM_SIGNED); + template + std::string getConstantVector(const VectorType *C); + std::string getValueAsStr(const Value*, AsmCast sign=ASM_SIGNED); + std::string getValueAsCastStr(const Value*, AsmCast sign=ASM_SIGNED); + std::string getValueAsParenStr(const Value*); + std::string getValueAsCastParenStr(const Value*, AsmCast sign=ASM_SIGNED); + + const std::string &getJSName(const Value* val); + + std::string getPhiCode(const BasicBlock *From, const BasicBlock *To); + + void printAttributes(const AttributeSet &PAL, const std::string &name); + void printType(Type* Ty); + void printTypes(const Module* M); + + std::string getAdHocAssign(const StringRef &, Type *); + std::string getAssign(const Instruction *I); + std::string getAssignIfNeeded(const Value *V); + std::string getCast(const StringRef &, Type *, AsmCast sign=ASM_SIGNED); + std::string getParenCast(const StringRef &, Type *, AsmCast sign=ASM_SIGNED); + std::string getDoubleToInt(const StringRef &); + std::string getIMul(const Value *, const Value *); + std::string getLoad(const Instruction *I, const Value *P, Type *T, unsigned Alignment, char sep=';'); + std::string getStore(const Instruction *I, const Value *P, Type *T, const std::string& VS, unsigned Alignment, char sep=';'); + std::string getStackBump(unsigned Size); + std::string getStackBump(const std::string &Size); + + void addBlock(const BasicBlock *BB, Relooper& R, LLVMToRelooperMap& LLVMToRelooper); + void printFunctionBody(const Function *F); + void generateInsertElementExpression(const InsertElementInst *III, raw_string_ostream& Code); + void generateExtractElementExpression(const ExtractElementInst *EEI, raw_string_ostream& Code); + std::string getSIMDCast(VectorType *fromType, VectorType *toType, const std::string &valueStr, bool signExtend); + void generateShuffleVectorExpression(const ShuffleVectorInst *SVI, raw_string_ostream& Code); + void generateICmpExpression(const ICmpInst *I, raw_string_ostream& Code); + void generateFCmpExpression(const FCmpInst *I, raw_string_ostream& Code); + void generateShiftExpression(const BinaryOperator *I, raw_string_ostream& Code); + void generateUnrolledExpression(const User *I, raw_string_ostream& Code); + bool generateSIMDExpression(const User *I, raw_string_ostream& Code); + void generateExpression(const User *I, raw_string_ostream& Code); + + // debug information + std::string generateDebugRecordForVar(Metadata *MD); + void buildCyberDWARFData(); + + std::string getOpName(const Value*); + + void processConstants(); + + // nativization + + typedef std::set NativizedVarsMap; + NativizedVarsMap NativizedVars; + + void calculateNativizedVars(const Function *F); + + // special analyses + + bool canReloop(const Function *F); + + // main entry point + + void printModuleBody(); + }; +} // end anonymous namespace. + +raw_pwrite_stream &JSWriter::nl(raw_pwrite_stream &Out, int delta) { + Out << '\n'; + return Out; +} + +static inline char halfCharToHex(unsigned char half) { + assert(half <= 15); + if (half <= 9) { + return '0' + half; + } else { + return 'A' + half - 10; + } +} + +static inline void sanitizeGlobal(std::string& str) { + // Global names are prefixed with "_" to prevent them from colliding with + // names of things in normal JS. + str = "_" + str; + + // functions and globals should already be in C-style format, + // in addition to . for llvm intrinsics and possibly $ and so forth. + // There is a risk of collisions here, we just lower all these + // invalid characters to _, but this should not happen in practice. + // TODO: in debug mode, check for such collisions. + size_t OriginalSize = str.size(); + for (size_t i = 1; i < OriginalSize; ++i) { + unsigned char c = str[i]; + if (!isalnum(c) && c != '_') str[i] = '_'; + } +} + +static inline void sanitizeLocal(std::string& str) { + // Local names are prefixed with "$" to prevent them from colliding with + // global names. + str = "$" + str; + + // We need to convert every string that is not a valid JS identifier into + // a valid one, without collisions - we cannot turn "x.a" into "x_a" while + // also leaving "x_a" as is, for example. + // + // We leave valid characters 0-9a-zA-Z and _ unchanged. Anything else + // we replace with $ and append a hex representation of that value, + // so for example x.a turns into x$a2e, x..a turns into x$$a2e2e. + // + // As an optimization, we replace . with $ without appending anything, + // unless there is another illegal character. The reason is that . is + // a common illegal character, and we want to avoid resizing strings + // for perf reasons, and we If we do see we need to append something, then + // for . we just append Z (one character, instead of the hex code). + // + + size_t OriginalSize = str.size(); + int Queued = 0; + for (size_t i = 1; i < OriginalSize; ++i) { + unsigned char c = str[i]; + if (!isalnum(c) && c != '_') { + str[i] = '$'; + if (c == '.') { + Queued++; + } else { + size_t s = str.size(); + str.resize(s+2+Queued); + for (int i = 0; i < Queued; i++) { + str[s++] = 'Z'; + } + Queued = 0; + str[s] = halfCharToHex(c >> 4); + str[s+1] = halfCharToHex(c & 0xf); + } + } + } +} + +static inline std::string ensureFloat(const std::string &S, Type *T) { + if (PreciseF32 && T->isFloatTy()) { + return "Math_fround(" + S + ')'; + } + return S; +} + +static inline std::string ensureFloat(const std::string &value, bool wrap) { + if (wrap) { + return "Math_fround(" + value + ')'; + } + return value; +} + +void JSWriter::error(const std::string& msg) { + report_fatal_error(msg); +} + +std::string JSWriter::getPhiCode(const BasicBlock *From, const BasicBlock *To) { + // FIXME this is all quite inefficient, and also done once per incoming to each phi + + // Find the phis, and generate assignments and dependencies + std::set PhiVars; + for (BasicBlock::const_iterator I = To->begin(), E = To->end(); + I != E; ++I) { + const PHINode* P = dyn_cast(I); + if (!P) break; + PhiVars.insert(getJSName(P)); + } + typedef std::map StringMap; + StringMap assigns; // variable -> assign statement + std::map values; // variable -> Value + StringMap deps; // variable -> dependency + StringMap undeps; // reverse: dependency -> variable + for (BasicBlock::const_iterator I = To->begin(), E = To->end(); + I != E; ++I) { + const PHINode* P = dyn_cast(I); + if (!P) break; + int index = P->getBasicBlockIndex(From); + if (index < 0) continue; + // we found it + const std::string &name = getJSName(P); + assigns[name] = getAssign(P); + // Get the operand, and strip pointer casts, since normal expression + // translation also strips pointer casts, and we want to see the same + // thing so that we can detect any resulting dependencies. + const Value *V = P->getIncomingValue(index)->stripPointerCasts(); + values[name] = V; + std::string vname = getValueAsStr(V); + if (const Instruction *VI = dyn_cast(V)) { + if (VI->getParent() == To && PhiVars.find(vname) != PhiVars.end()) { + deps[name] = vname; + undeps[vname] = name; + } + } + } + // Emit assignments+values, taking into account dependencies, and breaking cycles + std::string pre = "", post = ""; + while (assigns.size() > 0) { + bool emitted = false; + for (StringMap::iterator I = assigns.begin(); I != assigns.end();) { + StringMap::iterator last = I; + std::string curr = last->first; + const Value *V = values[curr]; + std::string CV = getValueAsStr(V); + I++; // advance now, as we may erase + // if we have no dependencies, or we found none to emit and are at the end (so there is a cycle), emit + StringMap::const_iterator dep = deps.find(curr); + if (dep == deps.end() || (!emitted && I == assigns.end())) { + if (dep != deps.end()) { + // break a cycle + std::string depString = dep->second; + std::string temp = curr + "$phi"; + pre += getAdHocAssign(temp, V->getType()) + CV + ';'; + CV = temp; + deps.erase(curr); + undeps.erase(depString); + } + post += assigns[curr] + CV + ';'; + assigns.erase(last); + emitted = true; + } + } + } + return pre + post; +} + +const std::string &JSWriter::getJSName(const Value* val) { + ValueMap::const_iterator I = ValueNames.find(val); + if (I != ValueNames.end() && I->first == val) + return I->second; + + // If this is an alloca we've replaced with another, use the other name. + if (const AllocaInst *AI = dyn_cast(val)) { + if (AI->isStaticAlloca()) { + const AllocaInst *Rep = Allocas.getRepresentative(AI); + if (Rep != AI) { + return getJSName(Rep); + } + } + } + + std::string name; + if (val->hasName()) { + name = val->getName().str(); + } else { + name = utostr(UniqueNum++); + } + + if (isa(val)) { + sanitizeGlobal(name); + } else { + sanitizeLocal(name); + } + + return ValueNames[val] = name; +} + +std::string JSWriter::getAdHocAssign(const StringRef &s, Type *t) { + UsedVars[s] = t; + return (s + " = ").str(); +} + +std::string JSWriter::getAssign(const Instruction *I) { + return getAdHocAssign(getJSName(I), I->getType()); +} + +std::string JSWriter::getAssignIfNeeded(const Value *V) { + if (const Instruction *I = dyn_cast(V)) { + if (!I->use_empty()) return getAssign(I); + } + return std::string(); +} + +const char *SIMDType(VectorType *t) { + int primSize = t->getElementType()->getPrimitiveSizeInBits(); + assert(primSize <= 128); + + if (t->getElementType()->isIntegerTy()) { + if (t->getElementType()->getPrimitiveSizeInBits() == 1) { + if (t->getNumElements() == 2) return "Bool64x2"; + if (t->getNumElements() <= 4) return "Bool32x4"; + if (t->getNumElements() <= 8) return "Bool16x8"; + if (t->getNumElements() <= 16) return "Bool8x16"; + // fall-through to error + } else { + if (t->getElementType()->getPrimitiveSizeInBits() > 32 && t->getNumElements() <= 2) return "Int64x2"; + if (t->getElementType()->getPrimitiveSizeInBits() > 16 && t->getNumElements() <= 4) return "Int32x4"; + if (t->getElementType()->getPrimitiveSizeInBits() > 8 && t->getNumElements() <= 8) return "Int16x8"; + if (t->getElementType()->getPrimitiveSizeInBits() <= 8 && t->getNumElements() <= 16) return "Int8x16"; + // fall-through to error + } + } else { // float type + if (t->getElementType()->getPrimitiveSizeInBits() > 32 && t->getNumElements() <= 2) return "Float64x2"; + if (t->getElementType()->getPrimitiveSizeInBits() > 16 && t->getNumElements() <= 4) return "Float32x4"; + if (t->getElementType()->getPrimitiveSizeInBits() > 8 && t->getNumElements() <= 8) return "Float16x8"; + if (t->getElementType()->getPrimitiveSizeInBits() <= 8 && t->getNumElements() <= 16) return "Float8x16"; + // fall-through to error + } + errs() << *t << "\n"; + report_fatal_error("Unsupported type!"); +} + +std::string JSWriter::getCast(const StringRef &s, Type *t, AsmCast sign) { + switch (t->getTypeID()) { + default: { + errs() << *t << "\n"; + assert(false && "Unsupported type"); + } + case Type::VectorTyID: + return std::string("SIMD_") + SIMDType(cast(t)) + "_check(" + s.str() + ")"; + case Type::FloatTyID: { + if (PreciseF32 && !(sign & ASM_FFI_OUT)) { + if (sign & ASM_FFI_IN) { + return ("Math_fround(+(" + s + "))").str(); + } else { + return ("Math_fround(" + s + ")").str(); + } + } + // otherwise fall through to double + } + case Type::DoubleTyID: return ("+" + s).str(); + case Type::IntegerTyID: { + // fall through to the end for nonspecific + switch (t->getIntegerBitWidth()) { + case 1: if (!(sign & ASM_NONSPECIFIC)) return sign == ASM_UNSIGNED ? (s + "&1").str() : (s + "<<31>>31").str(); + case 8: if (!(sign & ASM_NONSPECIFIC)) return sign == ASM_UNSIGNED ? (s + "&255").str() : (s + "<<24>>24").str(); + case 16: if (!(sign & ASM_NONSPECIFIC)) return sign == ASM_UNSIGNED ? (s + "&65535").str() : (s + "<<16>>16").str(); + case 32: return (sign == ASM_SIGNED || (sign & ASM_NONSPECIFIC) ? s + "|0" : s + ">>>0").str(); + default: llvm_unreachable("Unsupported integer cast bitwidth"); + } + } + case Type::PointerTyID: + return (sign == ASM_SIGNED || (sign & ASM_NONSPECIFIC) ? s + "|0" : s + ">>>0").str(); + } +} + +std::string JSWriter::getParenCast(const StringRef &s, Type *t, AsmCast sign) { + return getCast(("(" + s + ")").str(), t, sign); +} + +std::string JSWriter::getDoubleToInt(const StringRef &s) { + return ("~~(" + s + ")").str(); +} + +std::string JSWriter::getIMul(const Value *V1, const Value *V2) { + const ConstantInt *CI = NULL; + const Value *Other = NULL; + if ((CI = dyn_cast(V1))) { + Other = V2; + } else if ((CI = dyn_cast(V2))) { + Other = V1; + } + // we ignore optimizing the case of multiplying two constants - optimizer would have removed those + if (CI) { + std::string OtherStr = getValueAsStr(Other); + unsigned C = CI->getZExtValue(); + if (C == 0) return "0"; + if (C == 1) return OtherStr; + unsigned Orig = C, Shifts = 0; + while (C) { + if ((C & 1) && (C != 1)) break; // not power of 2 + C >>= 1; + Shifts++; + if (C == 0) return OtherStr + "<<" + utostr(Shifts-1); // power of 2, emit shift + } + if (Orig < (1<<20)) return "(" + OtherStr + "*" + utostr(Orig) + ")|0"; // small enough, avoid imul + } + return "Math_imul(" + getValueAsStr(V1) + ", " + getValueAsStr(V2) + ")|0"; // unknown or too large, emit imul +} + +static inline const char *getHeapName(int Bytes, int Integer) +{ + switch (Bytes) { + default: llvm_unreachable("Unsupported type"); + case 8: return "HEAPF64"; + case 4: return Integer ? "HEAP32" : "HEAPF32"; + case 2: return "HEAP16"; + case 1: return "HEAP8"; + } +} + +static inline int getHeapShift(int Bytes) +{ + switch (Bytes) { + default: llvm_unreachable("Unsupported type"); + case 8: return 3; + case 4: return 2; + case 2: return 1; + case 1: return 0; + } +} + +static inline const char *getHeapShiftStr(int Bytes) +{ + switch (Bytes) { + default: llvm_unreachable("Unsupported type"); + case 8: return ">>3"; + case 4: return ">>2"; + case 2: return ">>1"; + case 1: return ">>0"; + } +} + +std::string JSWriter::getHeapNameAndIndexToGlobal(const GlobalVariable *GV, unsigned Bytes, bool Integer, const char **HeapName) +{ + unsigned Addr = getGlobalAddress(GV->getName().str()); + *HeapName = getHeapName(Bytes, Integer); + if (!Relocatable) { + return utostr(Addr >> getHeapShift(Bytes)); + } else { + return relocateGlobal(utostr(Addr)) + getHeapShiftStr(Bytes); + } +} + +std::string JSWriter::getHeapNameAndIndexToPtr(const std::string& Ptr, unsigned Bytes, bool Integer, const char **HeapName) +{ + *HeapName = getHeapName(Bytes, Integer); + return Ptr + getHeapShiftStr(Bytes); +} + +std::string JSWriter::getHeapNameAndIndex(const Value *Ptr, const char **HeapName, unsigned Bytes, bool Integer) +{ + const GlobalVariable *GV; + if ((GV = dyn_cast(Ptr->stripPointerCasts())) && GV->hasInitializer()) { + // Note that we use the type of the pointer, as it might be a bitcast of the underlying global. We need the right type. + return getHeapNameAndIndexToGlobal(GV, Bytes, Integer, HeapName); + } else { + return getHeapNameAndIndexToPtr(getValueAsStr(Ptr), Bytes, Integer, HeapName); + } +} + +std::string JSWriter::getHeapNameAndIndex(const Value *Ptr, const char **HeapName) +{ + Type *t = cast(Ptr->getType())->getElementType(); + return getHeapNameAndIndex(Ptr, HeapName, DL->getTypeAllocSize(t), t->isIntegerTy() || t->isPointerTy()); +} + +static const char *heapNameToAtomicTypeName(const char *HeapName) +{ + if (!strcmp(HeapName, "HEAPF32")) return "f32"; + if (!strcmp(HeapName, "HEAPF64")) return "f64"; + return ""; +} + +std::string JSWriter::getLoad(const Instruction *I, const Value *P, Type *T, unsigned Alignment, char sep) { + std::string Assign = getAssign(I); + unsigned Bytes = DL->getTypeAllocSize(T); + std::string text; + if (Bytes <= Alignment || Alignment == 0) { + if (EnablePthreads && cast(I)->isVolatile()) { + const char *HeapName; + std::string Index = getHeapNameAndIndex(P, &HeapName); + if (!strcmp(HeapName, "HEAPF32") || !strcmp(HeapName, "HEAPF64")) { + bool fround = PreciseF32 && !strcmp(HeapName, "HEAPF32"); + // TODO: If https://bugzilla.mozilla.org/show_bug.cgi?id=1131613 and https://bugzilla.mozilla.org/show_bug.cgi?id=1131624 are + // implemented, we could remove the emulation, but until then we must emulate manually. + text = Assign + (fround ? "Math_fround(" : "+") + "_emscripten_atomic_load_" + heapNameToAtomicTypeName(HeapName) + "(" + getValueAsStr(P) + (fround ? "))" : ")"); + } else { + text = Assign + "(Atomics_load(" + HeapName + ',' + Index + ")|0)"; + } + } else { + text = Assign + getPtrLoad(P); + } + if (isAbsolute(P)) { + // loads from an absolute constants are either intentional segfaults (int x = *((int*)0)), or code problems + text += "; abort() /* segfault, load from absolute addr */"; + } + } else { + // unaligned in some manner + + if (EnablePthreads && cast(I)->isVolatile()) { + errs() << "emcc: warning: unable to implement unaligned volatile load as atomic in " << I->getParent()->getParent()->getName() << ":" << *I << " | "; + emitDebugInfo(errs(), I); + errs() << "\n"; + } + + if (WarnOnUnaligned) { + errs() << "emcc: warning: unaligned load in " << I->getParent()->getParent()->getName() << ":" << *I << " | "; + emitDebugInfo(errs(), I); + errs() << "\n"; + } + std::string PS = getValueAsStr(P); + switch (Bytes) { + case 8: { + switch (Alignment) { + case 4: { + text = "HEAP32[tempDoublePtr>>2]=HEAP32[" + PS + ">>2]" + sep + + "HEAP32[tempDoublePtr+4>>2]=HEAP32[" + PS + "+4>>2]"; + break; + } + case 2: { + text = "HEAP16[tempDoublePtr>>1]=HEAP16[" + PS + ">>1]" + sep + + "HEAP16[tempDoublePtr+2>>1]=HEAP16[" + PS + "+2>>1]" + sep + + "HEAP16[tempDoublePtr+4>>1]=HEAP16[" + PS + "+4>>1]" + sep + + "HEAP16[tempDoublePtr+6>>1]=HEAP16[" + PS + "+6>>1]"; + break; + } + case 1: { + text = "HEAP8[tempDoublePtr>>0]=HEAP8[" + PS + ">>0]" + sep + + "HEAP8[tempDoublePtr+1>>0]=HEAP8[" + PS + "+1>>0]" + sep + + "HEAP8[tempDoublePtr+2>>0]=HEAP8[" + PS + "+2>>0]" + sep + + "HEAP8[tempDoublePtr+3>>0]=HEAP8[" + PS + "+3>>0]" + sep + + "HEAP8[tempDoublePtr+4>>0]=HEAP8[" + PS + "+4>>0]" + sep + + "HEAP8[tempDoublePtr+5>>0]=HEAP8[" + PS + "+5>>0]" + sep + + "HEAP8[tempDoublePtr+6>>0]=HEAP8[" + PS + "+6>>0]" + sep + + "HEAP8[tempDoublePtr+7>>0]=HEAP8[" + PS + "+7>>0]"; + break; + } + default: assert(0 && "bad 8 store"); + } + text += sep + Assign + "+HEAPF64[tempDoublePtr>>3]"; + break; + } + case 4: { + if (T->isIntegerTy() || T->isPointerTy()) { + switch (Alignment) { + case 2: { + text = Assign + "HEAPU16[" + PS + ">>1]|" + + "(HEAPU16[" + PS + "+2>>1]<<16)"; + break; + } + case 1: { + text = Assign + "HEAPU8[" + PS + ">>0]|" + + "(HEAPU8[" + PS + "+1>>0]<<8)|" + + "(HEAPU8[" + PS + "+2>>0]<<16)|" + + "(HEAPU8[" + PS + "+3>>0]<<24)"; + break; + } + default: assert(0 && "bad 4i store"); + } + } else { // float + assert(T->isFloatingPointTy()); + switch (Alignment) { + case 2: { + text = "HEAP16[tempDoublePtr>>1]=HEAP16[" + PS + ">>1]" + sep + + "HEAP16[tempDoublePtr+2>>1]=HEAP16[" + PS + "+2>>1]"; + break; + } + case 1: { + text = "HEAP8[tempDoublePtr>>0]=HEAP8[" + PS + ">>0]" + sep + + "HEAP8[tempDoublePtr+1>>0]=HEAP8[" + PS + "+1>>0]" + sep + + "HEAP8[tempDoublePtr+2>>0]=HEAP8[" + PS + "+2>>0]" + sep + + "HEAP8[tempDoublePtr+3>>0]=HEAP8[" + PS + "+3>>0]"; + break; + } + default: assert(0 && "bad 4f store"); + } + text += sep + Assign + getCast("HEAPF32[tempDoublePtr>>2]", Type::getFloatTy(TheModule->getContext())); + } + break; + } + case 2: { + text = Assign + "HEAPU8[" + PS + ">>0]|" + + "(HEAPU8[" + PS + "+1>>0]<<8)"; + break; + } + default: assert(0 && "bad store"); + } + } + return text; +} + +std::string JSWriter::getStore(const Instruction *I, const Value *P, Type *T, const std::string& VS, unsigned Alignment, char sep) { + assert(sep == ';'); // FIXME when we need that + unsigned Bytes = DL->getTypeAllocSize(T); + std::string text; + if (Bytes <= Alignment || Alignment == 0) { + if (EnablePthreads && cast(I)->isVolatile()) { + const char *HeapName; + std::string Index = getHeapNameAndIndex(P, &HeapName); + if (!strcmp(HeapName, "HEAPF32") || !strcmp(HeapName, "HEAPF64")) { + // TODO: If https://bugzilla.mozilla.org/show_bug.cgi?id=1131613 and https://bugzilla.mozilla.org/show_bug.cgi?id=1131624 are + // implemented, we could remove the emulation, but until then we must emulate manually. + text = std::string("_emscripten_atomic_store_") + heapNameToAtomicTypeName(HeapName) + "(" + getValueAsStr(P) + ',' + VS + ')'; + if (PreciseF32 && !strcmp(HeapName, "HEAPF32")) + text = "Math_fround(" + text + ")"; + else + text = "+" + text; + } else { + text = std::string("Atomics_store(") + HeapName + ',' + Index + ',' + VS + ")|0"; + } + } else { + text = getPtrUse(P) + " = " + VS; + } + if (Alignment == 536870912) text += "; abort() /* segfault */"; + } else { + // unaligned in some manner + + if (EnablePthreads && cast(I)->isVolatile()) { + errs() << "emcc: warning: unable to implement unaligned volatile store as atomic in " << I->getParent()->getParent()->getName() << ":" << *I << " | "; + emitDebugInfo(errs(), I); + errs() << "\n"; + } + + if (WarnOnUnaligned) { + errs() << "emcc: warning: unaligned store in " << I->getParent()->getParent()->getName() << ":" << *I << " | "; + emitDebugInfo(errs(), I); + errs() << "\n"; + } + std::string PS = getValueAsStr(P); + switch (Bytes) { + case 8: { + text = "HEAPF64[tempDoublePtr>>3]=" + VS + ';'; + switch (Alignment) { + case 4: { + text += "HEAP32[" + PS + ">>2]=HEAP32[tempDoublePtr>>2];" + + "HEAP32[" + PS + "+4>>2]=HEAP32[tempDoublePtr+4>>2]"; + break; + } + case 2: { + text += "HEAP16[" + PS + ">>1]=HEAP16[tempDoublePtr>>1];" + + "HEAP16[" + PS + "+2>>1]=HEAP16[tempDoublePtr+2>>1];" + + "HEAP16[" + PS + "+4>>1]=HEAP16[tempDoublePtr+4>>1];" + + "HEAP16[" + PS + "+6>>1]=HEAP16[tempDoublePtr+6>>1]"; + break; + } + case 1: { + text += "HEAP8[" + PS + ">>0]=HEAP8[tempDoublePtr>>0];" + + "HEAP8[" + PS + "+1>>0]=HEAP8[tempDoublePtr+1>>0];" + + "HEAP8[" + PS + "+2>>0]=HEAP8[tempDoublePtr+2>>0];" + + "HEAP8[" + PS + "+3>>0]=HEAP8[tempDoublePtr+3>>0];" + + "HEAP8[" + PS + "+4>>0]=HEAP8[tempDoublePtr+4>>0];" + + "HEAP8[" + PS + "+5>>0]=HEAP8[tempDoublePtr+5>>0];" + + "HEAP8[" + PS + "+6>>0]=HEAP8[tempDoublePtr+6>>0];" + + "HEAP8[" + PS + "+7>>0]=HEAP8[tempDoublePtr+7>>0]"; + break; + } + default: assert(0 && "bad 8 store"); + } + break; + } + case 4: { + if (T->isIntegerTy() || T->isPointerTy()) { + switch (Alignment) { + case 2: { + text = "HEAP16[" + PS + ">>1]=" + VS + "&65535;" + + "HEAP16[" + PS + "+2>>1]=" + VS + ">>>16"; + break; + } + case 1: { + text = "HEAP8[" + PS + ">>0]=" + VS + "&255;" + + "HEAP8[" + PS + "+1>>0]=(" + VS + ">>8)&255;" + + "HEAP8[" + PS + "+2>>0]=(" + VS + ">>16)&255;" + + "HEAP8[" + PS + "+3>>0]=" + VS + ">>24"; + break; + } + default: assert(0 && "bad 4i store"); + } + } else { // float + assert(T->isFloatingPointTy()); + text = "HEAPF32[tempDoublePtr>>2]=" + VS + ';'; + switch (Alignment) { + case 2: { + text += "HEAP16[" + PS + ">>1]=HEAP16[tempDoublePtr>>1];" + + "HEAP16[" + PS + "+2>>1]=HEAP16[tempDoublePtr+2>>1]"; + break; + } + case 1: { + text += "HEAP8[" + PS + ">>0]=HEAP8[tempDoublePtr>>0];" + + "HEAP8[" + PS + "+1>>0]=HEAP8[tempDoublePtr+1>>0];" + + "HEAP8[" + PS + "+2>>0]=HEAP8[tempDoublePtr+2>>0];" + + "HEAP8[" + PS + "+3>>0]=HEAP8[tempDoublePtr+3>>0]"; + break; + } + default: assert(0 && "bad 4f store"); + } + } + break; + } + case 2: { + text = "HEAP8[" + PS + ">>0]=" + VS + "&255;" + + "HEAP8[" + PS + "+1>>0]=" + VS + ">>8"; + break; + } + default: assert(0 && "bad store"); + } + } + return text; +} + +std::string JSWriter::getStackBump(unsigned Size) { + return getStackBump(utostr(Size)); +} + +std::string JSWriter::getStackBump(const std::string &Size) { + std::string ret = "STACKTOP = STACKTOP + " + Size + "|0;"; + if (EmscriptenAssertions) { + ret += " if ((STACKTOP|0) >= (STACK_MAX|0)) abortStackOverflow(" + Size + "|0);"; + } + return ret; +} + +std::string JSWriter::getOpName(const Value* V) { // TODO: remove this + return getJSName(V); +} + +std::string JSWriter::getPtrLoad(const Value* Ptr) { + Type *t = cast(Ptr->getType())->getElementType(); + return getCast(getPtrUse(Ptr), t, ASM_NONSPECIFIC); +} + +std::string JSWriter::getHeapAccess(const std::string& Name, unsigned Bytes, bool Integer) { + const char *HeapName = 0; + std::string Index = getHeapNameAndIndexToPtr(Name, Bytes, Integer, &HeapName); + return std::string(HeapName) + '[' + Index + ']'; +} + +std::string JSWriter::getShiftedPtr(const Value *Ptr, unsigned Bytes) { + const char *HeapName = 0; // unused + return getHeapNameAndIndex(Ptr, &HeapName, Bytes, true /* Integer; doesn't matter */); +} + +std::string JSWriter::getPtrUse(const Value* Ptr) { + const char *HeapName = 0; + std::string Index = getHeapNameAndIndex(Ptr, &HeapName); + return std::string(HeapName) + '[' + Index + ']'; +} + +std::string JSWriter::getConstant(const Constant* CV, AsmCast sign) { + if (isa(CV)) return "0"; + + if (const Function *F = dyn_cast(CV)) { + return relocateFunctionPointer(utostr(getFunctionIndex(F))); + } + + if (const GlobalValue *GV = dyn_cast(CV)) { + if (GV->isDeclaration()) { + std::string Name = getOpName(GV); + Externals.insert(Name); + if (Relocatable) { + // we access linked externs through calls, which we load at the beginning of basic blocks + FuncRelocatableExterns.insert(Name); + Name = "t$" + Name; + UsedVars[Name] = Type::getInt32Ty(CV->getContext()); + } + return Name; + } + if (const GlobalAlias *GA = dyn_cast(CV)) { + // Since we don't currently support linking of our output, we don't need + // to worry about weak or other kinds of aliases. + return getConstant(GA->getAliasee()->stripPointerCasts(), sign); + } + return relocateGlobal(utostr(getGlobalAddress(GV->getName().str()))); + } + + if (const ConstantFP *CFP = dyn_cast(CV)) { + if (!(sign & ASM_FORCE_FLOAT_AS_INTBITS)) { + std::string S = ftostr(CFP, sign); + if (PreciseF32 && CV->getType()->isFloatTy() && !(sign & ASM_FFI_OUT)) { + S = "Math_fround(" + S + ")"; + } + return S; + } else { + const APFloat &flt = CFP->getValueAPF(); + APInt i = flt.bitcastToAPInt(); + assert(!(sign & ASM_UNSIGNED)); + if (i.getBitWidth() == 32) return itostr((int)(uint32_t)*i.getRawData()); + else return itostr(*i.getRawData()); + } + } else if (const ConstantInt *CI = dyn_cast(CV)) { + if (sign != ASM_UNSIGNED && CI->getValue().getBitWidth() == 1) { + sign = ASM_UNSIGNED; // bools must always be unsigned: either 0 or 1 + } + return CI->getValue().toString(10, sign != ASM_UNSIGNED); + } else if (isa(CV)) { + std::string S; + if (VectorType *VT = dyn_cast(CV->getType())) { + checkVectorType(VT); + S = std::string("SIMD_") + SIMDType(VT) + "_splat(" + ensureFloat("0", !VT->getElementType()->isIntegerTy()) + ')'; + } else { + S = CV->getType()->isFloatingPointTy() ? "+0" : "0"; // XXX refactor this + if (PreciseF32 && CV->getType()->isFloatTy() && !(sign & ASM_FFI_OUT)) { + S = "Math_fround(" + S + ")"; + } + } + return S; + } else if (isa(CV)) { + if (VectorType *VT = dyn_cast(CV->getType())) { + checkVectorType(VT); + return std::string("SIMD_") + SIMDType(VT) + "_splat(" + ensureFloat("0", !VT->getElementType()->isIntegerTy()) + ')'; + } else { + // something like [0 x i8*] zeroinitializer, which clang can emit for landingpads + return "0"; + } + } else if (const ConstantDataVector *DV = dyn_cast(CV)) { + return getConstantVector(DV); + } else if (const ConstantVector *V = dyn_cast(CV)) { + return getConstantVector(V); + } else if (const ConstantArray *CA = dyn_cast(CV)) { + // handle things like [i8* bitcast (<{ i32, i32, i32 }>* @_ZTISt9bad_alloc to i8*)] which clang can emit for landingpads + assert(CA->getNumOperands() == 1); + CV = CA->getOperand(0); + const ConstantExpr *CE = cast(CV); + CV = CE->getOperand(0); // ignore bitcast + return getConstant(CV); + } else if (const BlockAddress *BA = dyn_cast(CV)) { + return utostr(getBlockAddress(BA)); + } else if (const ConstantExpr *CE = dyn_cast(CV)) { + std::string Code; + raw_string_ostream CodeStream(Code); + CodeStream << '('; + generateExpression(CE, CodeStream); + CodeStream << ')'; + return CodeStream.str(); + } else { + CV->dump(); + llvm_unreachable("Unsupported constant kind"); + } +} + +template +class VectorOperandAccessor +{ +public: + static Constant *getOperand(const VectorType *C, unsigned index); +}; +template<> Constant *VectorOperandAccessor::getOperand(const ConstantVector *C, unsigned index) { return C->getOperand(index); } +template<> Constant *VectorOperandAccessor::getOperand(const ConstantDataVector *C, unsigned index) { return C->getElementAsConstant(index); } + +template +std::string JSWriter::getConstantVector(const ConstantVectorType *C) { + checkVectorType(C->getType()); + unsigned NumElts = cast(C->getType())->getNumElements(); + + bool isInt = C->getType()->getElementType()->isIntegerTy(); + + // Test if this is a float vector, but it contains NaNs that have non-canonical bits that can't be represented as nans. + // These must be casted via an integer vector. + bool hasSpecialNaNs = false; + + if (!isInt) { + const APInt nan32(32, 0x7FC00000); + const APInt nan64(64, 0x7FF8000000000000ULL); + + for (unsigned i = 0; i < NumElts; ++i) { + Constant *CV = VectorOperandAccessor::getOperand(C, i); + const ConstantFP *CFP = dyn_cast(CV); + if (CFP) { + const APFloat &flt = CFP->getValueAPF(); + if (flt.getCategory() == APFloat::fcNaN) { + APInt i = flt.bitcastToAPInt(); + if ((i.getBitWidth() == 32 && i != nan32) || (i.getBitWidth() == 64 && i != nan64)) { + hasSpecialNaNs = true; + break; + } + } + } + } + } + + AsmCast cast = hasSpecialNaNs ? ASM_FORCE_FLOAT_AS_INTBITS : 0; + + // Check for a splat. + bool allEqual = true; + std::string op0 = getConstant(VectorOperandAccessor::getOperand(C, 0), cast); + for (unsigned i = 1; i < NumElts; ++i) { + if (getConstant(VectorOperandAccessor::getOperand(C, i), cast) != op0) { + allEqual = false; + break; + } + } + if (allEqual) { + if (!hasSpecialNaNs) { + return std::string("SIMD_") + SIMDType(C->getType()) + "_splat(" + ensureFloat(op0, !isInt) + ')'; + } else { + VectorType *IntTy = VectorType::getInteger(C->getType()); + checkVectorType(IntTy); + return getSIMDCast(IntTy, C->getType(), std::string("SIMD_") + SIMDType(IntTy) + "_splat(" + op0 + ')', true); + } + } + + int primSize = C->getType()->getElementType()->getPrimitiveSizeInBits(); + const int SIMDJsRetNumElements = 128 / primSize; + + std::string c; + if (!hasSpecialNaNs) { + c = std::string("SIMD_") + SIMDType(C->getType()) + '(' + ensureFloat(op0, !isInt); + for (unsigned i = 1; i < NumElts; ++i) { + c += ',' + ensureFloat(getConstant(VectorOperandAccessor::getOperand(C, i)), !isInt); + } + // Promote smaller than 128-bit vector types to 128-bit since smaller ones do not exist in SIMD.js. (pad with zero lanes) + for (int i = NumElts; i < SIMDJsRetNumElements; ++i) { + c += ',' + ensureFloat(isInt ? "0" : "+0", !isInt); + } + + return c + ')'; + } else { + VectorType *IntTy = VectorType::getInteger(C->getType()); + checkVectorType(IntTy); + c = std::string("SIMD_") + SIMDType(IntTy) + '(' + op0; + for (unsigned i = 1; i < NumElts; ++i) { + c += ',' + getConstant(VectorOperandAccessor::getOperand(C, i), ASM_FORCE_FLOAT_AS_INTBITS); + } + + // Promote smaller than 128-bit vector types to 128-bit since smaller ones do not exist in SIMD.js. (pad with zero lanes) + for (int i = NumElts; i < SIMDJsRetNumElements; ++i) { + c += ',' + ensureFloat(isInt ? "0" : "+0", !isInt); + } + + return getSIMDCast(IntTy, C->getType(), c + ")", true); + } +} + +std::string JSWriter::getValueAsStr(const Value* V, AsmCast sign) { + // Skip past no-op bitcasts and zero-index geps. + V = V->stripPointerCasts(); + + if (const Constant *CV = dyn_cast(V)) { + return getConstant(CV, sign); + } else { + return getJSName(V); + } +} + +std::string JSWriter::getValueAsCastStr(const Value* V, AsmCast sign) { + // Skip past no-op bitcasts and zero-index geps. + V = V->stripPointerCasts(); + + if (isa(V) || isa(V)) { + return getConstant(cast(V), sign); + } else { + return getCast(getValueAsStr(V), V->getType(), sign); + } +} + +std::string JSWriter::getValueAsParenStr(const Value* V) { + // Skip past no-op bitcasts and zero-index geps. + V = V->stripPointerCasts(); + + if (const Constant *CV = dyn_cast(V)) { + return getConstant(CV); + } else { + return "(" + getValueAsStr(V) + ")"; + } +} + +std::string JSWriter::getValueAsCastParenStr(const Value* V, AsmCast sign) { + // Skip past no-op bitcasts and zero-index geps. + V = V->stripPointerCasts(); + + if (isa(V) || isa(V) || isa(V)) { + return getConstant(cast(V), sign); + } else { + return "(" + getCast(getValueAsStr(V), V->getType(), sign) + ")"; + } +} + +void JSWriter::generateInsertElementExpression(const InsertElementInst *III, raw_string_ostream& Code) { + // LLVM has no vector type constructor operator; it uses chains of + // insertelement instructions instead. It also has no splat operator; it + // uses an insertelement followed by a shuffle instead. If this insertelement + // is part of either such sequence, skip it for now; we'll process it when we + // reach the end. + if (III->hasOneUse()) { + const User *U = *III->user_begin(); + if (isa(U)) + return; + if (isa(U) && + isa(cast(U)->getMask()) && + !isa(III->getOperand(0)) && + isa(III->getOperand(2)) && + cast(III->getOperand(2))->isZero()) + { + return; + } + } + + // This insertelement is at the base of a chain of single-user insertelement + // instructions. Collect all the inserted elements so that we can categorize + // the chain as either a splat, a constructor, or an actual series of inserts. + VectorType *VT = III->getType(); + checkVectorType(VT); + unsigned NumElems = VT->getNumElements(); + unsigned NumInserted = 0; + SmallVector Operands(NumElems, NULL); + const Value *Splat = III->getOperand(1); + const Value *Base = III; + do { + const InsertElementInst *BaseIII = cast(Base); + const ConstantInt *IndexInt = cast(BaseIII->getOperand(2)); + unsigned Index = IndexInt->getZExtValue(); + if (Operands[Index] == NULL) + ++NumInserted; + Value *Op = BaseIII->getOperand(1); + if (Operands[Index] == NULL) { + Operands[Index] = Op; + if (Op != Splat) + Splat = NULL; + } + Base = BaseIII->getOperand(0); + } while (Base->hasOneUse() && isa(Base)); + + // Emit code for the chain. + Code << getAssignIfNeeded(III); + if (NumInserted == NumElems) { + if (Splat) { + // Emit splat code. + if (VT->getElementType()->isIntegerTy()) { + Code << std::string("SIMD_") + SIMDType(VT) + "_splat(" << getValueAsStr(Splat) << ")"; + } else { + std::string operand = getValueAsStr(Splat); + if (!PreciseF32) { + // SIMD_Float32x4_splat requires an actual float32 even if we're + // otherwise not being precise about it. + operand = "Math_fround(" + operand + ")"; + } + Code << std::string("SIMD_") + SIMDType(VT) + "_splat(" << operand << ")"; + } + } else { + // Emit constructor code. + Code << std::string("SIMD_") + SIMDType(VT) + '('; + for (unsigned Index = 0; Index < NumElems; ++Index) { + if (Index != 0) + Code << ", "; + std::string operand = getValueAsStr(Operands[Index]); + if (!PreciseF32 && VT->getElementType()->isFloatTy()) { + // SIMD_Float32x4_splat requires an actual float32 even if we're + // otherwise not being precise about it. + operand = "Math_fround(" + operand + ")"; + } + Code << operand; + } + Code << ")"; + } + } else { + // Emit a series of inserts. + std::string Result = getValueAsStr(Base); + for (unsigned Index = 0; Index < NumElems; ++Index) { + if (!Operands[Index]) + continue; + std::string operand = getValueAsStr(Operands[Index]); + if (!PreciseF32 && VT->getElementType()->isFloatTy()) { + operand = "Math_fround(" + operand + ")"; + } + Result = std::string("SIMD_") + SIMDType(VT) + "_replaceLane(" + Result + ',' + utostr(Index) + ',' + operand + ')'; + } + Code << Result; + } +} + +void JSWriter::generateExtractElementExpression(const ExtractElementInst *EEI, raw_string_ostream& Code) { + VectorType *VT = cast(EEI->getVectorOperand()->getType()); + checkVectorType(VT); + const ConstantInt *IndexInt = dyn_cast(EEI->getIndexOperand()); + if (IndexInt) { + unsigned Index = IndexInt->getZExtValue(); + Code << getAssignIfNeeded(EEI); + std::string OperandCode; + raw_string_ostream CodeStream(OperandCode); + CodeStream << std::string("SIMD_") << SIMDType(VT) << "_extractLane(" << getValueAsStr(EEI->getVectorOperand()) << ',' << std::to_string(Index) << ')'; + Code << getCast(CodeStream.str(), EEI->getType()); + return; + } + + error("SIMD extract element with non-constant index not implemented yet"); +} + + +std::string castIntVecToBoolVec(int numElems, const std::string &str) +{ + int elemWidth = 128 / numElems; + std::string simdType = "SIMD_Int" + std::to_string(elemWidth) + "x" + std::to_string(numElems); + return simdType + "_notEqual(" + str + ", " + simdType + "_splat(0))"; +} + +std::string JSWriter::getSIMDCast(VectorType *fromType, VectorType *toType, const std::string &valueStr, bool signExtend) +{ + bool toInt = toType->getElementType()->isIntegerTy(); + bool fromInt = fromType->getElementType()->isIntegerTy(); + int fromPrimSize = fromType->getElementType()->getPrimitiveSizeInBits(); + int toPrimSize = toType->getElementType()->getPrimitiveSizeInBits(); + + if (fromInt == toInt && fromPrimSize == toPrimSize) { + // To and from are the same types, no cast needed. + return valueStr; + } + + // Promote smaller than 128-bit vector types to 128-bit since smaller ones do not exist in SIMD.js. (pad with zero lanes) + int toNumElems = 128 / toPrimSize; + + bool fromIsBool = (fromInt && fromPrimSize == 1); + bool toIsBool = (toInt && toPrimSize == 1); + if (fromIsBool && !toIsBool) { // Casting from bool vector to a bit vector looks more complicated (e.g. Bool32x4 to Int32x4) + return castBoolVecToIntVec(toNumElems, valueStr, signExtend); + } + + if (fromType->getBitWidth() != toType->getBitWidth() && !fromIsBool && !toIsBool) { + error("Invalid SIMD cast between items of different bit sizes!"); + } + + return std::string("SIMD_") + SIMDType(toType) + "_from" + SIMDType(fromType) + "Bits(" + valueStr + ")"; +} + +void JSWriter::generateShuffleVectorExpression(const ShuffleVectorInst *SVI, raw_string_ostream& Code) { + Code << getAssignIfNeeded(SVI); + + // LLVM has no splat operator, so it makes do by using an insert and a + // shuffle. If that's what this shuffle is doing, the code in + // generateInsertElementExpression will have also detected it and skipped + // emitting the insert, so we can just emit a splat here. + if (isa(SVI->getMask()) && + isa(SVI->getOperand(0))) + { + InsertElementInst *IEI = cast(SVI->getOperand(0)); + if (ConstantInt *CI = dyn_cast(IEI->getOperand(2))) { + if (CI->isZero()) { + std::string operand = getValueAsStr(IEI->getOperand(1)); + if (!PreciseF32 && SVI->getType()->getElementType()->isFloatTy()) { + // SIMD_Float32x4_splat requires an actual float32 even if we're + // otherwise not being precise about it. + operand = "Math_fround(" + operand + ")"; + } + Code << "SIMD_" << SIMDType(SVI->getType()) << "_splat(" << operand << ')'; + return; + } + } + } + + // Check whether can generate SIMD.js swizzle or shuffle. + std::string A = getValueAsStr(SVI->getOperand(0)); + std::string B = getValueAsStr(SVI->getOperand(1)); + VectorType *op0 = cast(SVI->getOperand(0)->getType()); + int OpNumElements = op0->getNumElements(); + int ResultNumElements = SVI->getType()->getNumElements(); + // Promote smaller than 128-bit vector types to 128-bit since smaller ones do not exist in SIMD.js. (pad with zero lanes) + int SIMDJsRetNumElements = 128 / cast(SVI->getType())->getElementType()->getPrimitiveSizeInBits(); + int SIMDJsOp0NumElements = 128 / op0->getElementType()->getPrimitiveSizeInBits(); + bool swizzleA = true; + bool swizzleB = true; + for(int i = 0; i < ResultNumElements; ++i) { + if (SVI->getMaskValue(i) >= OpNumElements) swizzleA = false; + if (SVI->getMaskValue(i) < OpNumElements) swizzleB = false; + } + assert(!(swizzleA && swizzleB)); + if (swizzleA || swizzleB) { + std::string T = (swizzleA ? A : B); + Code << "SIMD_" << SIMDType(SVI->getType()) << "_swizzle(" << T; + int i = 0; + for (; i < ResultNumElements; ++i) { + Code << ", "; + int Mask = SVI->getMaskValue(i); + if (Mask < 0) { + Code << 0; + } else if (Mask < OpNumElements) { + Code << Mask; + } else { + assert(Mask < OpNumElements * 2); + Code << (Mask-OpNumElements); + } + } + // Promote smaller than 128-bit vector types to 128-bit since smaller ones do not exist in SIMD.js. (pad with zero lanes) + for(int i = ResultNumElements; i < SIMDJsRetNumElements; ++i) { + Code << ", 0"; + } + Code << ")"; + return; + } + + // Emit a fully-general shuffle. + Code << "SIMD_" << SIMDType(SVI->getType()) << "_shuffle("; + + Code << getSIMDCast(cast(SVI->getOperand(0)->getType()), SVI->getType(), A, true) << ", " + << getSIMDCast(cast(SVI->getOperand(1)->getType()), SVI->getType(), B, true) << ", "; + + SmallVector Indices; + SVI->getShuffleMask(Indices); + for (unsigned int i = 0; i < Indices.size(); ++i) { + if (i != 0) + Code << ", "; + int Mask = Indices[i]; + if (Mask < 0) + Code << 0; + else if (Mask < OpNumElements) + Code << Mask; + else + Code << (Mask + SIMDJsOp0NumElements - OpNumElements); // Fix up indices to second operand, since the first operand has potentially different number of lanes in SIMD.js compared to LLVM. + } + + // Promote smaller than 128-bit vector types to 128-bit since smaller ones do not exist in SIMD.js. (pad with zero lanes) + for(int i = Indices.size(); i < SIMDJsRetNumElements; ++i) { + Code << ", 0"; + } + + Code << ')'; +} + +void JSWriter::generateICmpExpression(const ICmpInst *I, raw_string_ostream& Code) { + bool Invert = false; + const char *Name; + switch (cast(I)->getPredicate()) { + case ICmpInst::ICMP_EQ: Name = "equal"; break; + case ICmpInst::ICMP_NE: Name = "equal"; Invert = true; break; + case ICmpInst::ICMP_SLE: Name = "greaterThan"; Invert = true; break; + case ICmpInst::ICMP_SGE: Name = "lessThan"; Invert = true; break; + case ICmpInst::ICMP_ULE: Name = "unsignedLessThanOrEqual"; break; + case ICmpInst::ICMP_UGE: Name = "unsignedGreaterThanOrEqual"; break; + case ICmpInst::ICMP_ULT: Name = "unsignedLessThan"; break; + case ICmpInst::ICMP_SLT: Name = "lessThan"; break; + case ICmpInst::ICMP_UGT: Name = "unsignedGreaterThan"; break; + case ICmpInst::ICMP_SGT: Name = "greaterThan"; break; + default: I->dump(); error("invalid vector icmp"); break; + } + + checkVectorType(I->getOperand(0)->getType()); + checkVectorType(I->getOperand(1)->getType()); + + Code << getAssignIfNeeded(I); + + if (Invert) + Code << "SIMD_" << SIMDType(cast(I->getType())) << "_not("; + + Code << "SIMD_" << SIMDType(cast(I->getOperand(0)->getType())) << '_' << Name << '(' + << getValueAsStr(I->getOperand(0)) << ',' << getValueAsStr(I->getOperand(1)) << ')'; + + if (Invert) + Code << ')'; +} + +void JSWriter::generateFCmpExpression(const FCmpInst *I, raw_string_ostream& Code) { + const char *Name; + bool Invert = false; + VectorType *VT = cast(I->getType()); + checkVectorType(VT); + switch (cast(I)->getPredicate()) { + case ICmpInst::FCMP_FALSE: + Code << getAssignIfNeeded(I) << "SIMD_" << SIMDType(cast(I->getType())) << "_splat(" << ensureFloat("0", true) << ')'; + return; + case ICmpInst::FCMP_TRUE: + Code << getAssignIfNeeded(I) << "SIMD_" << SIMDType(cast(I->getType())) << "_splat(" << ensureFloat("-1", true) << ')'; + return; + case ICmpInst::FCMP_ONE: + checkVectorType(I->getOperand(0)->getType()); + checkVectorType(I->getOperand(1)->getType()); + Code << getAssignIfNeeded(I) + << castIntVecToBoolVec(VT->getNumElements(), std::string("SIMD_") + SIMDType(cast(I->getType())) + "_and(SIMD_" + SIMDType(cast(I->getType())) + "_and(" + + castBoolVecToIntVec(VT->getNumElements(), std::string("SIMD_") + SIMDType(cast(I->getOperand(0)->getType())) + "_equal(" + getValueAsStr(I->getOperand(0)) + ',' + getValueAsStr(I->getOperand(0)) + ')', true) + ',' + + castBoolVecToIntVec(VT->getNumElements(), std::string("SIMD_") + SIMDType(cast(I->getOperand(1)->getType())) + "_equal(" + getValueAsStr(I->getOperand(1)) + ',' + getValueAsStr(I->getOperand(1)) + ')', true) + ',' + + castBoolVecToIntVec(VT->getNumElements(), std::string("SIMD_") + SIMDType(cast(I->getOperand(0)->getType())) + "_notEqual(" + getValueAsStr(I->getOperand(0)) + ',' + getValueAsStr(I->getOperand(1)) + ')', true) + ')'); + return; + case ICmpInst::FCMP_UEQ: + checkVectorType(I->getOperand(0)->getType()); + checkVectorType(I->getOperand(1)->getType()); + Code << getAssignIfNeeded(I) + << castIntVecToBoolVec(VT->getNumElements(), std::string("SIMD_") + SIMDType(cast(I->getType())) + "_or(SIMD_" + SIMDType(cast(I->getType())) + "_or(" + + castBoolVecToIntVec(VT->getNumElements(), std::string("SIMD_") + SIMDType(cast(I->getOperand(0)->getType())) + "_notEqual(" + getValueAsStr(I->getOperand(0)) + ',' + getValueAsStr(I->getOperand(0)) + ')', true) + ',' + + castBoolVecToIntVec(VT->getNumElements(), std::string("SIMD_") + SIMDType(cast(I->getOperand(1)->getType())) + "_notEqual(" + getValueAsStr(I->getOperand(1)) + ',' + getValueAsStr(I->getOperand(1)) + ')', true) + ',' + + castBoolVecToIntVec(VT->getNumElements(), std::string("SIMD_") + SIMDType(cast(I->getOperand(0)->getType())) + "_equal(" + getValueAsStr(I->getOperand(0)) + ',' + getValueAsStr(I->getOperand(1)) + ')', true) + ')'); + return; + case FCmpInst::FCMP_ORD: + checkVectorType(I->getOperand(0)->getType()); + checkVectorType(I->getOperand(1)->getType()); + Code << getAssignIfNeeded(I) + << "SIMD_" << SIMDType(cast(I->getType())) << "_and(" + << "SIMD_" << SIMDType(cast(I->getOperand(0)->getType())) << "_equal(" << getValueAsStr(I->getOperand(0)) << ',' << getValueAsStr(I->getOperand(0)) << ")," + << "SIMD_" << SIMDType(cast(I->getOperand(1)->getType())) << "_equal(" << getValueAsStr(I->getOperand(1)) << ',' << getValueAsStr(I->getOperand(1)) << "))"; + return; + + case FCmpInst::FCMP_UNO: + checkVectorType(I->getOperand(0)->getType()); + checkVectorType(I->getOperand(1)->getType()); + Code << getAssignIfNeeded(I) + << "SIMD_" << SIMDType(cast(I->getType())) << "_or(" + << "SIMD_" << SIMDType(cast(I->getOperand(0)->getType())) << "_notEqual(" << getValueAsStr(I->getOperand(0)) << ',' << getValueAsStr(I->getOperand(0)) << ")," + << "SIMD_" << SIMDType(cast(I->getOperand(1)->getType())) << "_notEqual(" << getValueAsStr(I->getOperand(1)) << ',' << getValueAsStr(I->getOperand(1)) << "))"; + return; + + case ICmpInst::FCMP_OEQ: Name = "equal"; break; + case ICmpInst::FCMP_OGT: Name = "greaterThan"; break; + case ICmpInst::FCMP_OGE: Name = "greaterThanOrEqual"; break; + case ICmpInst::FCMP_OLT: Name = "lessThan"; break; + case ICmpInst::FCMP_OLE: Name = "lessThanOrEqual"; break; + case ICmpInst::FCMP_UGT: Name = "lessThanOrEqual"; Invert = true; break; + case ICmpInst::FCMP_UGE: Name = "lessThan"; Invert = true; break; + case ICmpInst::FCMP_ULT: Name = "greaterThanOrEqual"; Invert = true; break; + case ICmpInst::FCMP_ULE: Name = "greaterThan"; Invert = true; break; + case ICmpInst::FCMP_UNE: Name = "notEqual"; break; + default: I->dump(); error("invalid vector fcmp"); break; + } + + checkVectorType(I->getOperand(0)->getType()); + checkVectorType(I->getOperand(1)->getType()); + + Code << getAssignIfNeeded(I); + + if (Invert) + Code << "SIMD_" << SIMDType(cast(I->getType())) << "_not("; + + Code << "SIMD_" << SIMDType(cast(I->getOperand(0)->getType())) << "_" << Name << "(" + << getValueAsStr(I->getOperand(0)) << ", " << getValueAsStr(I->getOperand(1)) << ")"; + + if (Invert) + Code << ")"; +} + +static const Value *getElement(const Value *V, unsigned i) { + if (const InsertElementInst *II = dyn_cast(V)) { + if (ConstantInt *CI = dyn_cast(II->getOperand(2))) { + if (CI->equalsInt(i)) + return II->getOperand(1); + } + return getElement(II->getOperand(0), i); + } + return NULL; +} + +static const Value *getSplatValue(const Value *V) { + if (const Constant *C = dyn_cast(V)) + return C->getSplatValue(); + + VectorType *VTy = cast(V->getType()); + const Value *Result = NULL; + for (unsigned i = 0; i < VTy->getNumElements(); ++i) { + const Value *E = getElement(V, i); + if (!E) + return NULL; + if (!Result) + Result = E; + else if (Result != E) + return NULL; + } + return Result; + +} + +void JSWriter::generateShiftExpression(const BinaryOperator *I, raw_string_ostream& Code) { + // If we're shifting every lane by the same amount (shifting by a splat value + // then we can use a ByScalar shift. + const Value *Count = I->getOperand(1); + if (const Value *Splat = getSplatValue(Count)) { + Code << getAssignIfNeeded(I) << "SIMD_" << SIMDType(cast(I->getType())) << '_'; + if (I->getOpcode() == Instruction::AShr) + Code << "shiftRightArithmeticByScalar"; + else if (I->getOpcode() == Instruction::LShr) + Code << "shiftRightLogicalByScalar"; + else + Code << "shiftLeftByScalar"; + Code << "(" << getValueAsStr(I->getOperand(0)) << ", " << getValueAsStr(Splat) << ")"; + return; + } + + // SIMD.js does not currently have vector-vector shifts. + generateUnrolledExpression(I, Code); +} + +void JSWriter::generateUnrolledExpression(const User *I, raw_string_ostream& Code) { + VectorType *VT = cast(I->getType()); + + Code << getAssignIfNeeded(I); + + Code << "SIMD_" << SIMDType(VT) << '('; + + int primSize = VT->getElementType()->getPrimitiveSizeInBits(); + int numElems = VT->getNumElements(); + if (primSize == 32 && numElems < 4) { + report_fatal_error("generateUnrolledExpression not expected to handle less than four-wide 32-bit vector types!"); + } + + for (unsigned Index = 0; Index < VT->getNumElements(); ++Index) { + if (Index != 0) + Code << ", "; + if (!PreciseF32 && VT->getElementType()->isFloatTy()) { + Code << "Math_fround("; + } + std::string Extract; + if (VT->getElementType()->isIntegerTy()) { + Extract = "SIMD_Int32x4_extractLane("; + UsesSIMDInt32x4 = true; + } else { + Extract = "SIMD_Float32x4_extractLane("; + UsesSIMDFloat32x4 = true; + } + switch (Operator::getOpcode(I)) { + case Instruction::SDiv: + Code << "(" << Extract << getValueAsStr(I->getOperand(0)) << "," << Index << ")|0)" + " / " + "(" << Extract << getValueAsStr(I->getOperand(1)) << "," << Index << ")|0)" + "|0"; + break; + case Instruction::UDiv: + Code << "(" << Extract << getValueAsStr(I->getOperand(0)) << "," << Index << ")>>>0)" + " / " + "(" << Extract << getValueAsStr(I->getOperand(1)) << "," << Index << ")>>>0)" + ">>>0"; + break; + case Instruction::SRem: + Code << "(" << Extract << getValueAsStr(I->getOperand(0)) << "," << Index << ")|0)" + " % " + "(" << Extract << getValueAsStr(I->getOperand(1)) << "," << Index << ")|0)" + "|0"; + break; + case Instruction::URem: + Code << "(" << Extract << getValueAsStr(I->getOperand(0)) << "," << Index << ")>>>0)" + " % " + "(" << Extract << getValueAsStr(I->getOperand(1)) << "," << Index << ")>>>0)" + ">>>0"; + break; + case Instruction::AShr: + Code << "(" << Extract << getValueAsStr(I->getOperand(0)) << "," << Index << ")|0)" + " >> " + "(" << Extract << getValueAsStr(I->getOperand(1)) << "," << Index << ")|0)" + "|0"; + break; + case Instruction::LShr: + Code << "(" << Extract << getValueAsStr(I->getOperand(0)) << "," << Index << ")|0)" + " >>> " + "(" << Extract << getValueAsStr(I->getOperand(1)) << "," << Index << ")|0)" + "|0"; + break; + case Instruction::Shl: + Code << "(" << Extract << getValueAsStr(I->getOperand(0)) << "," << Index << ")|0)" + " << " + "(" << Extract << getValueAsStr(I->getOperand(1)) << "," << Index << ")|0)" + "|0"; + break; + default: I->dump(); error("invalid unrolled vector instr"); break; + } + if (!PreciseF32 && VT->getElementType()->isFloatTy()) { + Code << ")"; + } + } + + Code << ")"; +} + +bool JSWriter::generateSIMDExpression(const User *I, raw_string_ostream& Code) { + VectorType *VT; + if ((VT = dyn_cast(I->getType()))) { + // vector-producing instructions + checkVectorType(VT); + std::string simdType = SIMDType(VT); + + switch (Operator::getOpcode(I)) { + default: I->dump(); error("invalid vector instr"); break; + case Instruction::Call: // return value is just a SIMD value, no special handling + return false; + case Instruction::PHI: // handled separately - we push them back into the relooper branchings + break; + case Instruction::ICmp: + generateICmpExpression(cast(I), Code); + break; + case Instruction::FCmp: + generateFCmpExpression(cast(I), Code); + break; + case Instruction::SExt: + assert(cast(I->getOperand(0)->getType())->getElementType()->isIntegerTy(1) && + "sign-extension from vector of other than i1 not yet supported"); + Code << getAssignIfNeeded(I) << getSIMDCast(cast(I->getOperand(0)->getType()), VT, getValueAsStr(I->getOperand(0)), true /* signExtend */); + break; + case Instruction::ZExt: + assert(cast(I->getOperand(0)->getType())->getElementType()->isIntegerTy(1) && + "sign-extension from vector of other than i1 not yet supported"); + Code << getAssignIfNeeded(I) << getSIMDCast(cast(I->getOperand(0)->getType()), VT, getValueAsStr(I->getOperand(0)), false /* signExtend */); + break; + case Instruction::Select: + // Since we represent vectors of i1 as vectors of sign extended wider integers, + // selecting on them is just an elementwise select. + if (isa(I->getOperand(0)->getType())) { + if (cast(I->getType())->getElementType()->isIntegerTy()) { + Code << getAssignIfNeeded(I) << "SIMD_" << simdType << "_select(" << getValueAsStr(I->getOperand(0)) << "," << getValueAsStr(I->getOperand(1)) << "," << getValueAsStr(I->getOperand(2)) << ")"; break; + } else { + Code << getAssignIfNeeded(I) << "SIMD_" << simdType << "_select(" << getValueAsStr(I->getOperand(0)) << "," << getValueAsStr(I->getOperand(1)) << "," << getValueAsStr(I->getOperand(2)) << ")"; break; + } + return true; + } + // Otherwise we have a scalar condition, so it's a ?: operator. + return false; + case Instruction::FAdd: Code << getAssignIfNeeded(I) << "SIMD_" << simdType << "_add(" << getValueAsStr(I->getOperand(0)) << "," << getValueAsStr(I->getOperand(1)) << ")"; break; + case Instruction::FMul: Code << getAssignIfNeeded(I) << "SIMD_" << simdType << "_mul(" << getValueAsStr(I->getOperand(0)) << "," << getValueAsStr(I->getOperand(1)) << ")"; break; + case Instruction::FDiv: Code << getAssignIfNeeded(I) << "SIMD_" << simdType << "_div(" << getValueAsStr(I->getOperand(0)) << "," << getValueAsStr(I->getOperand(1)) << ")"; break; + case Instruction::Add: Code << getAssignIfNeeded(I) << "SIMD_" << simdType << "_add(" << getValueAsStr(I->getOperand(0)) << "," << getValueAsStr(I->getOperand(1)) << ")"; break; + case Instruction::Sub: Code << getAssignIfNeeded(I) << "SIMD_" << simdType << "_sub(" << getValueAsStr(I->getOperand(0)) << "," << getValueAsStr(I->getOperand(1)) << ")"; break; + case Instruction::Mul: Code << getAssignIfNeeded(I) << "SIMD_" << simdType << "_mul(" << getValueAsStr(I->getOperand(0)) << "," << getValueAsStr(I->getOperand(1)) << ")"; break; + case Instruction::And: Code << getAssignIfNeeded(I) << "SIMD_" << simdType << "_and(" << getValueAsStr(I->getOperand(0)) << "," << getValueAsStr(I->getOperand(1)) << ")"; break; + case Instruction::Or: Code << getAssignIfNeeded(I) << "SIMD_" << simdType << "_or(" << getValueAsStr(I->getOperand(0)) << "," << getValueAsStr(I->getOperand(1)) << ")"; break; + case Instruction::Xor: + // LLVM represents a not(x) as -1 ^ x + Code << getAssignIfNeeded(I); + if (BinaryOperator::isNot(I)) { + Code << "SIMD_" << simdType << "_not(" << getValueAsStr(BinaryOperator::getNotArgument(I)) << ")"; break; + } else { + Code << "SIMD_" << simdType << "_xor(" << getValueAsStr(I->getOperand(0)) << "," << getValueAsStr(I->getOperand(1)) << ")"; break; + } + break; + case Instruction::FSub: + // LLVM represents an fneg(x) as -0.0 - x. + Code << getAssignIfNeeded(I); + if (BinaryOperator::isFNeg(I)) { + Code << "SIMD_" << simdType << "_neg(" << getValueAsStr(BinaryOperator::getFNegArgument(I)) << ")"; + } else { + Code << "SIMD_" << simdType << "_sub(" << getValueAsStr(I->getOperand(0)) << "," << getValueAsStr(I->getOperand(1)) << ")"; + } + break; + case Instruction::BitCast: { + case Instruction::SIToFP: + Code << getAssignIfNeeded(I); + Code << getSIMDCast(cast(I->getOperand(0)->getType()), cast(I->getType()), getValueAsStr(I->getOperand(0)), true); + break; + } + case Instruction::Load: { + const LoadInst *LI = cast(I); + const Value *P = LI->getPointerOperand(); + std::string PS = getValueAsStr(P); + const char *load = "_load"; + if (VT->getElementType()->getPrimitiveSizeInBits() == 32) { + switch (VT->getNumElements()) { + case 1: load = "_load1"; break; + case 2: load = "_load2"; break; + case 3: load = "_load3"; break; + default: break; + } + } + Code << getAssignIfNeeded(I) << "SIMD_" << simdType << load << "(HEAPU8, " << PS << ")"; + break; + } + case Instruction::InsertElement: + generateInsertElementExpression(cast(I), Code); + break; + case Instruction::ShuffleVector: + generateShuffleVectorExpression(cast(I), Code); + break; + case Instruction::SDiv: + case Instruction::UDiv: + case Instruction::SRem: + case Instruction::URem: + // The SIMD API does not currently support these operations directly. + // Emulate them using scalar operations (which is essentially the same + // as what would happen if the API did support them, since hardware + // doesn't support them). + generateUnrolledExpression(I, Code); + break; + case Instruction::AShr: + case Instruction::LShr: + case Instruction::Shl: + generateShiftExpression(cast(I), Code); + break; + } + return true; + } else { + // vector-consuming instructions + if (Operator::getOpcode(I) == Instruction::Store && (VT = dyn_cast(I->getOperand(0)->getType())) && VT->isVectorTy()) { + checkVectorType(VT); + std::string simdType = SIMDType(VT); + const StoreInst *SI = cast(I); + const Value *P = SI->getPointerOperand(); + std::string PS = "temp_" + simdType + "_ptr"; + std::string VS = getValueAsStr(SI->getValueOperand()); + Code << getAdHocAssign(PS, P->getType()) << getValueAsStr(P) << ';'; + const char *store = "_store"; + if (VT->getElementType()->getPrimitiveSizeInBits() == 32) { + switch (VT->getNumElements()) { + case 1: store = "_store1"; break; + case 2: store = "_store2"; break; + case 3: store = "_store3"; break; + default: break; + } + } + Code << "SIMD_" << simdType << store << "(HEAPU8, " << PS << ", " << VS << ")"; + return true; + } else if (Operator::getOpcode(I) == Instruction::ExtractElement) { + generateExtractElementExpression(cast(I), Code); + return true; + } + } + return false; +} + +static uint64_t LSBMask(unsigned numBits) { + return numBits >= 64 ? 0xFFFFFFFFFFFFFFFFULL : (1ULL << numBits) - 1; +} + +// Given a string which contains a printed base address, print a new string +// which contains that address plus the given offset. +static std::string AddOffset(const std::string &base, int32_t Offset) { + if (base.empty()) + return itostr(Offset); + + if (Offset == 0) + return base; + + return "((" + base + ") + " + itostr(Offset) + "|0)"; +} + +// Generate code for and operator, either an Instruction or a ConstantExpr. +void JSWriter::generateExpression(const User *I, raw_string_ostream& Code) { + // To avoid emiting code and variables for the no-op pointer bitcasts + // and all-zero-index geps that LLVM needs to satisfy its type system, we + // call stripPointerCasts() on all values before translating them. This + // includes bitcasts whose only use is lifetime marker intrinsics. + assert(I == I->stripPointerCasts()); + + Type *T = I->getType(); + if (T->isIntegerTy() && T->getIntegerBitWidth() > 32) { + errs() << *I << "\n"; + report_fatal_error("legalization problem"); + } + + if (!generateSIMDExpression(I, Code)) switch (Operator::getOpcode(I)) { + default: { + I->dump(); + error("Invalid instruction in JSWriter::generateExpression"); + break; + } + case Instruction::Ret: { + const ReturnInst* ret = cast(I); + const Value *RV = ret->getReturnValue(); + if (StackBumped) { + Code << "STACKTOP = sp;"; + } + Code << "return"; + if (RV != NULL) { + Code << " " << getValueAsCastParenStr(RV, ASM_NONSPECIFIC | ASM_MUST_CAST); + } + break; + } + case Instruction::Br: + case Instruction::IndirectBr: + case Instruction::Switch: return; // handled while relooping + case Instruction::Unreachable: { + // Typically there should be an abort right before these, so we don't emit any code // TODO: when ASSERTIONS are on, emit abort(0) + Code << "// unreachable"; + break; + } + case Instruction::Add: + case Instruction::FAdd: + case Instruction::Sub: + case Instruction::FSub: + case Instruction::Mul: + case Instruction::FMul: + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::FDiv: + case Instruction::URem: + case Instruction::SRem: + case Instruction::FRem: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr:{ + Code << getAssignIfNeeded(I); + unsigned opcode = Operator::getOpcode(I); + switch (opcode) { + case Instruction::Add: Code << getParenCast( + getValueAsParenStr(I->getOperand(0)) + + " + " + + getValueAsParenStr(I->getOperand(1)), + I->getType() + ); break; + case Instruction::Sub: Code << getParenCast( + getValueAsParenStr(I->getOperand(0)) + + " - " + + getValueAsParenStr(I->getOperand(1)), + I->getType() + ); break; + case Instruction::Mul: Code << getIMul(I->getOperand(0), I->getOperand(1)); break; + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::URem: + case Instruction::SRem: Code << "(" << + getValueAsCastParenStr(I->getOperand(0), (opcode == Instruction::SDiv || opcode == Instruction::SRem) ? ASM_SIGNED : ASM_UNSIGNED) << + ((opcode == Instruction::UDiv || opcode == Instruction::SDiv) ? " / " : " % ") << + getValueAsCastParenStr(I->getOperand(1), (opcode == Instruction::SDiv || opcode == Instruction::SRem) ? ASM_SIGNED : ASM_UNSIGNED) << + ")&-1"; break; + case Instruction::And: Code << getValueAsStr(I->getOperand(0)) << " & " << getValueAsStr(I->getOperand(1)); break; + case Instruction::Or: Code << getValueAsStr(I->getOperand(0)) << " | " << getValueAsStr(I->getOperand(1)); break; + case Instruction::Xor: Code << getValueAsStr(I->getOperand(0)) << " ^ " << getValueAsStr(I->getOperand(1)); break; + case Instruction::Shl: { + std::string Shifted = getValueAsStr(I->getOperand(0)) + " << " + getValueAsStr(I->getOperand(1)); + if (I->getType()->getIntegerBitWidth() < 32) { + Shifted = getParenCast(Shifted, I->getType(), ASM_UNSIGNED); // remove bits that are shifted beyond the size of this value + } + Code << Shifted; + break; + } + case Instruction::AShr: + case Instruction::LShr: { + std::string Input = getValueAsStr(I->getOperand(0)); + if (I->getType()->getIntegerBitWidth() < 32) { + Input = '(' + getCast(Input, I->getType(), opcode == Instruction::AShr ? ASM_SIGNED : ASM_UNSIGNED) + ')'; // fill in high bits, as shift needs those and is done in 32-bit + } + Code << Input << (opcode == Instruction::AShr ? " >> " : " >>> ") << getValueAsStr(I->getOperand(1)); + break; + } + + case Instruction::FAdd: Code << ensureFloat(getValueAsStr(I->getOperand(0)) + " + " + getValueAsStr(I->getOperand(1)), I->getType()); break; + case Instruction::FMul: Code << ensureFloat(getValueAsStr(I->getOperand(0)) + " * " + getValueAsStr(I->getOperand(1)), I->getType()); break; + case Instruction::FDiv: Code << ensureFloat(getValueAsStr(I->getOperand(0)) + " / " + getValueAsStr(I->getOperand(1)), I->getType()); break; + case Instruction::FRem: Code << ensureFloat(getValueAsStr(I->getOperand(0)) + " % " + getValueAsStr(I->getOperand(1)), I->getType()); break; + case Instruction::FSub: + // LLVM represents an fneg(x) as -0.0 - x. + if (BinaryOperator::isFNeg(I)) { + Code << ensureFloat("-" + getValueAsStr(BinaryOperator::getFNegArgument(I)), I->getType()); + } else { + Code << ensureFloat(getValueAsStr(I->getOperand(0)) + " - " + getValueAsStr(I->getOperand(1)), I->getType()); + } + break; + default: error("bad binary opcode"); break; + } + break; + } + case Instruction::FCmp: { + unsigned predicate = isa(I) ? + (unsigned)cast(I)->getPredicate() : + (unsigned)cast(I)->getPredicate(); + Code << getAssignIfNeeded(I); + switch (predicate) { + // Comparisons which are simple JS operators. + case FCmpInst::FCMP_OEQ: Code << getValueAsStr(I->getOperand(0)) << " == " << getValueAsStr(I->getOperand(1)); break; + case FCmpInst::FCMP_UNE: Code << getValueAsStr(I->getOperand(0)) << " != " << getValueAsStr(I->getOperand(1)); break; + case FCmpInst::FCMP_OGT: Code << getValueAsStr(I->getOperand(0)) << " > " << getValueAsStr(I->getOperand(1)); break; + case FCmpInst::FCMP_OGE: Code << getValueAsStr(I->getOperand(0)) << " >= " << getValueAsStr(I->getOperand(1)); break; + case FCmpInst::FCMP_OLT: Code << getValueAsStr(I->getOperand(0)) << " < " << getValueAsStr(I->getOperand(1)); break; + case FCmpInst::FCMP_OLE: Code << getValueAsStr(I->getOperand(0)) << " <= " << getValueAsStr(I->getOperand(1)); break; + + // Comparisons which are inverses of JS operators. + case FCmpInst::FCMP_UGT: + Code << "!(" << getValueAsStr(I->getOperand(0)) << " <= " << getValueAsStr(I->getOperand(1)) << ")"; + break; + case FCmpInst::FCMP_UGE: + Code << "!(" << getValueAsStr(I->getOperand(0)) << " < " << getValueAsStr(I->getOperand(1)) << ")"; + break; + case FCmpInst::FCMP_ULT: + Code << "!(" << getValueAsStr(I->getOperand(0)) << " >= " << getValueAsStr(I->getOperand(1)) << ")"; + break; + case FCmpInst::FCMP_ULE: + Code << "!(" << getValueAsStr(I->getOperand(0)) << " > " << getValueAsStr(I->getOperand(1)) << ")"; + break; + + // Comparisons which require explicit NaN checks. + case FCmpInst::FCMP_UEQ: + Code << "(" << getValueAsStr(I->getOperand(0)) << " != " << getValueAsStr(I->getOperand(0)) << ") | " << + "(" << getValueAsStr(I->getOperand(1)) << " != " << getValueAsStr(I->getOperand(1)) << ") |" << + "(" << getValueAsStr(I->getOperand(0)) << " == " << getValueAsStr(I->getOperand(1)) << ")"; + break; + case FCmpInst::FCMP_ONE: + Code << "(" << getValueAsStr(I->getOperand(0)) << " == " << getValueAsStr(I->getOperand(0)) << ") & " << + "(" << getValueAsStr(I->getOperand(1)) << " == " << getValueAsStr(I->getOperand(1)) << ") &" << + "(" << getValueAsStr(I->getOperand(0)) << " != " << getValueAsStr(I->getOperand(1)) << ")"; + break; + + // Simple NaN checks. + case FCmpInst::FCMP_ORD: Code << "(" << getValueAsStr(I->getOperand(0)) << " == " << getValueAsStr(I->getOperand(0)) << ") & " << + "(" << getValueAsStr(I->getOperand(1)) << " == " << getValueAsStr(I->getOperand(1)) << ")"; break; + case FCmpInst::FCMP_UNO: Code << "(" << getValueAsStr(I->getOperand(0)) << " != " << getValueAsStr(I->getOperand(0)) << ") | " << + "(" << getValueAsStr(I->getOperand(1)) << " != " << getValueAsStr(I->getOperand(1)) << ")"; break; + + // Simple constants. + case FCmpInst::FCMP_FALSE: Code << "0"; break; + case FCmpInst::FCMP_TRUE : Code << "1"; break; + + default: error("bad fcmp"); break; + } + break; + } + case Instruction::ICmp: { + auto predicate = isa(I) ? + (CmpInst::Predicate)cast(I)->getPredicate() : + cast(I)->getPredicate(); + AsmCast sign = CmpInst::isUnsigned(predicate) ? ASM_UNSIGNED : ASM_SIGNED; + Code << getAssignIfNeeded(I) << "(" << + getValueAsCastStr(I->getOperand(0), sign) << + ")"; + switch (predicate) { + case ICmpInst::ICMP_EQ: Code << "=="; break; + case ICmpInst::ICMP_NE: Code << "!="; break; + case ICmpInst::ICMP_ULE: Code << "<="; break; + case ICmpInst::ICMP_SLE: Code << "<="; break; + case ICmpInst::ICMP_UGE: Code << ">="; break; + case ICmpInst::ICMP_SGE: Code << ">="; break; + case ICmpInst::ICMP_ULT: Code << "<"; break; + case ICmpInst::ICMP_SLT: Code << "<"; break; + case ICmpInst::ICMP_UGT: Code << ">"; break; + case ICmpInst::ICMP_SGT: Code << ">"; break; + default: llvm_unreachable("Invalid ICmp predicate"); + } + Code << "(" << + getValueAsCastStr(I->getOperand(1), sign) << + ")"; + break; + } + case Instruction::Alloca: { + const AllocaInst* AI = cast(I); + + // We've done an alloca, so we'll have bumped the stack and will + // need to restore it. + // Yes, we shouldn't have to bump it for nativized vars, however + // they are included in the frame offset, so the restore is still + // needed until that is fixed. + StackBumped = true; + + if (NativizedVars.count(AI)) { + // nativized stack variable, we just need a 'var' definition + UsedVars[getJSName(AI)] = AI->getType()->getElementType(); + return; + } + + // Fixed-size entry-block allocations are allocated all at once in the + // function prologue. + if (AI->isStaticAlloca()) { + uint64_t Offset; + if (Allocas.getFrameOffset(AI, &Offset)) { + Code << getAssign(AI); + if (Allocas.getMaxAlignment() <= STACK_ALIGN) { + Code << "sp"; + } else { + Code << "sp_a"; // aligned base of stack is different, use that + } + if (Offset != 0) { + Code << " + " << Offset << "|0"; + } + break; + } + // Otherwise, this alloca is being represented by another alloca, so + // there's nothing to print. + return; + } + + assert(AI->getAlignment() <= STACK_ALIGN); // TODO + + Type *T = AI->getAllocatedType(); + std::string Size; + uint64_t BaseSize = DL->getTypeAllocSize(T); + const Value *AS = AI->getArraySize(); + if (const ConstantInt *CI = dyn_cast(AS)) { + Size = Twine(stackAlign(BaseSize * CI->getZExtValue())).str(); + } else { + Size = stackAlignStr("((" + utostr(BaseSize) + '*' + getValueAsStr(AS) + ")|0)"); + } + Code << getAssign(AI) << "STACKTOP; " << getStackBump(Size); + break; + } + case Instruction::Load: { + const LoadInst *LI = cast(I); + const Value *P = LI->getPointerOperand(); + unsigned Alignment = LI->getAlignment(); + if (NativizedVars.count(P)) { + Code << getAssign(LI) << getValueAsStr(P); + } else { + Code << getLoad(LI, P, LI->getType(), Alignment); + } + break; + } + case Instruction::Store: { + const StoreInst *SI = cast(I); + const Value *P = SI->getPointerOperand(); + const Value *V = SI->getValueOperand(); + unsigned Alignment = SI->getAlignment(); + std::string VS = getValueAsStr(V); + if (NativizedVars.count(P)) { + Code << getValueAsStr(P) << " = " << VS; + } else { + Code << getStore(SI, P, V->getType(), VS, Alignment); + } + + Type *T = V->getType(); + if (T->isIntegerTy() && T->getIntegerBitWidth() > 32) { + errs() << *I << "\n"; + report_fatal_error("legalization problem"); + } + break; + } + case Instruction::GetElementPtr: { + Code << getAssignIfNeeded(I); + const GEPOperator *GEP = cast(I); + gep_type_iterator GTI = gep_type_begin(GEP); + int32_t ConstantOffset = 0; + std::string text; + + // If the base is an initialized global variable, the address is just an + // integer constant, so we can fold it into the ConstantOffset directly. + const Value *Ptr = GEP->getPointerOperand()->stripPointerCasts(); + if (isa(Ptr) && cast(Ptr)->hasInitializer() && !Relocatable) { + ConstantOffset = getGlobalAddress(Ptr->getName().str()); + } else { + text = getValueAsParenStr(Ptr); + } + + GetElementPtrInst::const_op_iterator I = GEP->op_begin(); + I++; + for (GetElementPtrInst::const_op_iterator E = GEP->op_end(); + I != E; ++I) { + const Value *Index = *I; + if (StructType *STy = dyn_cast(*GTI++)) { + // For a struct, add the member offset. + unsigned FieldNo = cast(Index)->getZExtValue(); + uint32_t Offset = DL->getStructLayout(STy)->getElementOffset(FieldNo); + ConstantOffset = (uint32_t)ConstantOffset + Offset; + } else { + // For an array, add the element offset, explicitly scaled. + uint32_t ElementSize = DL->getTypeAllocSize(*GTI); + if (const ConstantInt *CI = dyn_cast(Index)) { + // The index is constant. Add it to the accumulating offset. + ConstantOffset = (uint32_t)ConstantOffset + (uint32_t)CI->getSExtValue() * ElementSize; + } else { + // The index is non-constant. To avoid reassociating, which increases + // the risk of slow wraparounds, add the accumulated offset first. + text = AddOffset(text, ConstantOffset); + ConstantOffset = 0; + + // Now add the scaled dynamic index. + std::string Mul = getIMul(Index, ConstantInt::get(Type::getInt32Ty(GEP->getContext()), ElementSize)); + text = text.empty() ? Mul : ("(" + text + " + (" + Mul + ")|0)"); + } + } + } + // Add in the final accumulated offset. + Code << AddOffset(text, ConstantOffset); + break; + } + case Instruction::PHI: { + // handled separately - we push them back into the relooper branchings + return; + } + case Instruction::PtrToInt: + case Instruction::IntToPtr: + Code << getAssignIfNeeded(I) << getValueAsStr(I->getOperand(0)); + break; + case Instruction::Trunc: + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::FPTrunc: + case Instruction::FPExt: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::UIToFP: + case Instruction::SIToFP: { + Code << getAssignIfNeeded(I); + switch (Operator::getOpcode(I)) { + case Instruction::Trunc: { + //unsigned inBits = V->getType()->getIntegerBitWidth(); + unsigned outBits = I->getType()->getIntegerBitWidth(); + Code << getValueAsStr(I->getOperand(0)) << "&" << utostr(LSBMask(outBits)); + break; + } + case Instruction::SExt: { + std::string bits = utostr(32 - I->getOperand(0)->getType()->getIntegerBitWidth()); + Code << getValueAsStr(I->getOperand(0)) << " << " << bits << " >> " << bits; + break; + } + case Instruction::ZExt: { + Code << getValueAsCastStr(I->getOperand(0), ASM_UNSIGNED); + break; + } + case Instruction::FPExt: { + if (PreciseF32) { + Code << "+" << getValueAsStr(I->getOperand(0)); break; + } else { + Code << getValueAsStr(I->getOperand(0)); break; + } + break; + } + case Instruction::FPTrunc: { + Code << ensureFloat(getValueAsStr(I->getOperand(0)), I->getType()); + break; + } + case Instruction::SIToFP: Code << '(' << getCast(getValueAsCastParenStr(I->getOperand(0), ASM_SIGNED), I->getType()) << ')'; break; + case Instruction::UIToFP: Code << '(' << getCast(getValueAsCastParenStr(I->getOperand(0), ASM_UNSIGNED), I->getType()) << ')'; break; + case Instruction::FPToSI: Code << '(' << getDoubleToInt(getValueAsParenStr(I->getOperand(0))) << ')'; break; + case Instruction::FPToUI: Code << '(' << getCast(getDoubleToInt(getValueAsParenStr(I->getOperand(0))), I->getType(), ASM_UNSIGNED) << ')'; break; + case Instruction::PtrToInt: Code << '(' << getValueAsStr(I->getOperand(0)) << ')'; break; + case Instruction::IntToPtr: Code << '(' << getValueAsStr(I->getOperand(0)) << ')'; break; + default: llvm_unreachable("Unreachable"); + } + break; + } + case Instruction::BitCast: { + Code << getAssignIfNeeded(I); + // Most bitcasts are no-ops for us. However, the exception is int to float and float to int + Type *InType = I->getOperand(0)->getType(); + Type *OutType = I->getType(); + std::string V = getValueAsStr(I->getOperand(0)); + if (InType->isIntegerTy() && OutType->isFloatingPointTy()) { + assert(InType->getIntegerBitWidth() == 32); + Code << "(HEAP32[tempDoublePtr>>2]=" << V << "," << getCast("HEAPF32[tempDoublePtr>>2]", Type::getFloatTy(TheModule->getContext())) << ")"; + } else if (OutType->isIntegerTy() && InType->isFloatingPointTy()) { + assert(OutType->getIntegerBitWidth() == 32); + Code << "(HEAPF32[tempDoublePtr>>2]=" << V << "," "HEAP32[tempDoublePtr>>2]|0)"; + } else { + Code << V; + } + break; + } + case Instruction::Call: { + const CallInst *CI = cast(I); + std::string Call = handleCall(CI); + if (Call.empty()) return; + Code << Call; + break; + } + case Instruction::Select: { + Code << getAssignIfNeeded(I) << getValueAsStr(I->getOperand(0)) << " ? " << + getValueAsStr(I->getOperand(1)) << " : " << + getValueAsStr(I->getOperand(2)); + break; + } + case Instruction::AtomicRMW: { + const AtomicRMWInst *rmwi = cast(I); + const Value *P = rmwi->getOperand(0); + const Value *V = rmwi->getOperand(1); + std::string VS = getValueAsStr(V); + + if (EnablePthreads) { + std::string Assign = getAssign(rmwi); + std::string text; + const char *HeapName; + std::string Index = getHeapNameAndIndex(P, &HeapName); + const char *atomicFunc = 0; + switch (rmwi->getOperation()) { + case AtomicRMWInst::Xchg: atomicFunc = "exchange"; break; + case AtomicRMWInst::Add: atomicFunc = "add"; break; + case AtomicRMWInst::Sub: atomicFunc = "sub"; break; + case AtomicRMWInst::And: atomicFunc = "and"; break; + case AtomicRMWInst::Or: atomicFunc = "or"; break; + case AtomicRMWInst::Xor: atomicFunc = "xor"; break; + case AtomicRMWInst::Nand: // TODO + case AtomicRMWInst::Max: + case AtomicRMWInst::Min: + case AtomicRMWInst::UMax: + case AtomicRMWInst::UMin: + case AtomicRMWInst::BAD_BINOP: llvm_unreachable("Bad atomic operation"); + } + if (!strcmp(HeapName, "HEAPF32") || !strcmp(HeapName, "HEAPF64")) { + // TODO: If https://bugzilla.mozilla.org/show_bug.cgi?id=1131613 and https://bugzilla.mozilla.org/show_bug.cgi?id=1131624 are + // implemented, we could remove the emulation, but until then we must emulate manually. + bool fround = PreciseF32 && !strcmp(HeapName, "HEAPF32"); + Code << Assign << (fround ? "Math_fround(" : "+") << "_emscripten_atomic_" << atomicFunc << "_" << heapNameToAtomicTypeName(HeapName) << "(" << getValueAsStr(P) << ", " << VS << (fround ? "))" : ")"); break; + + // TODO: Remove the following two lines once https://bugzilla.mozilla.org/show_bug.cgi?id=1141986 is implemented! + } else if (rmwi->getOperation() == AtomicRMWInst::Xchg && !strcmp(HeapName, "HEAP32")) { + Code << Assign << "_emscripten_atomic_exchange_u32(" << getValueAsStr(P) << ", " << VS << ")|0"; break; + + } else { + Code << Assign << "(Atomics_" << atomicFunc << "(" << HeapName << ", " << Index << ", " << VS << ")|0)"; break; + } + } else { + Code << getLoad(rmwi, P, I->getType(), 0) << ';'; + // Most bitcasts are no-ops for us. However, the exception is int to float and float to int + switch (rmwi->getOperation()) { + case AtomicRMWInst::Xchg: Code << getStore(rmwi, P, I->getType(), VS, 0); break; + case AtomicRMWInst::Add: Code << getStore(rmwi, P, I->getType(), "((" + getJSName(I) + '+' + VS + ")|0)", 0); break; + case AtomicRMWInst::Sub: Code << getStore(rmwi, P, I->getType(), "((" + getJSName(I) + '-' + VS + ")|0)", 0); break; + case AtomicRMWInst::And: Code << getStore(rmwi, P, I->getType(), "(" + getJSName(I) + '&' + VS + ")", 0); break; + case AtomicRMWInst::Nand: Code << getStore(rmwi, P, I->getType(), "(~(" + getJSName(I) + '&' + VS + "))", 0); break; + case AtomicRMWInst::Or: Code << getStore(rmwi, P, I->getType(), "(" + getJSName(I) + '|' + VS + ")", 0); break; + case AtomicRMWInst::Xor: Code << getStore(rmwi, P, I->getType(), "(" + getJSName(I) + '^' + VS + ")", 0); break; + case AtomicRMWInst::Max: + case AtomicRMWInst::Min: + case AtomicRMWInst::UMax: + case AtomicRMWInst::UMin: + case AtomicRMWInst::BAD_BINOP: llvm_unreachable("Bad atomic operation"); + } + } + break; + } + case Instruction::Fence: + if (EnablePthreads) Code << "(Atomics_add(HEAP32, 0, 0)|0) /* fence */"; + else Code << "/* fence */"; + break; + } + + if (const Instruction *Inst = dyn_cast(I)) { + Code << ';'; + // append debug info + emitDebugInfo(Code, Inst); + Code << '\n'; + } +} + +// Checks whether to use a condition variable. We do so for switches and for indirectbrs +static const Value *considerConditionVar(const Instruction *I) { + if (const IndirectBrInst *IB = dyn_cast(I)) { + return IB->getAddress(); + } + const SwitchInst *SI = dyn_cast(I); + if (!SI) return NULL; + // otherwise, we trust LLVM switches. if they were too big or sparse, the switch expansion pass should have fixed that + return SI->getCondition(); +} + +void JSWriter::addBlock(const BasicBlock *BB, Relooper& R, LLVMToRelooperMap& LLVMToRelooper) { + std::string Code; + raw_string_ostream CodeStream(Code); + for (BasicBlock::const_iterator II = BB->begin(), E = BB->end(); + II != E; ++II) { + auto I = &*II; + if (I->stripPointerCasts() == I) { + CurrInstruction = I; + generateExpression(I, CodeStream); + } + } + CurrInstruction = nullptr; + CodeStream.flush(); + const Value* Condition = considerConditionVar(BB->getTerminator()); + Block *Curr = new Block(Code.c_str(), Condition ? getValueAsCastStr(Condition).c_str() : NULL); + LLVMToRelooper[BB] = Curr; + R.AddBlock(Curr); +} + +void JSWriter::printFunctionBody(const Function *F) { + assert(!F->isDeclaration()); + + // Prepare relooper + Relooper::MakeOutputBuffer(1024*1024); + Relooper R; + //if (!canReloop(F)) R.SetEmulate(true); + if (F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize) || + F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize)) { + R.SetMinSize(true); + } + R.SetAsmJSMode(1); + Block *Entry = NULL; + LLVMToRelooperMap LLVMToRelooper; + + // Create relooper blocks with their contents. TODO: We could optimize + // indirectbr by emitting indexed blocks first, so their indexes + // match up with the label index. + for (Function::const_iterator I = F->begin(), BE = F->end(); + I != BE; ++I) { + auto BI = &*I; + InvokeState = 0; // each basic block begins in state 0; the previous may not have cleared it, if e.g. it had a throw in the middle and the rest of it was decapitated + addBlock(BI, R, LLVMToRelooper); + if (!Entry) Entry = LLVMToRelooper[BI]; + } + assert(Entry); + + // Create branchings + for (Function::const_iterator I = F->begin(), BE = F->end(); + I != BE; ++I) { + auto BI = &*I; + const TerminatorInst *TI = BI->getTerminator(); + switch (TI->getOpcode()) { + default: { + report_fatal_error("invalid branch instr " + Twine(TI->getOpcodeName())); + break; + } + case Instruction::Br: { + const BranchInst* br = cast(TI); + if (br->getNumOperands() == 3) { + BasicBlock *S0 = br->getSuccessor(0); + BasicBlock *S1 = br->getSuccessor(1); + std::string P0 = getPhiCode(&*BI, S0); + std::string P1 = getPhiCode(&*BI, S1); + LLVMToRelooper[&*BI]->AddBranchTo(LLVMToRelooper[&*S0], getValueAsStr(TI->getOperand(0)).c_str(), P0.size() > 0 ? P0.c_str() : NULL); + LLVMToRelooper[&*BI]->AddBranchTo(LLVMToRelooper[&*S1], NULL, P1.size() > 0 ? P1.c_str() : NULL); + } else if (br->getNumOperands() == 1) { + BasicBlock *S = br->getSuccessor(0); + std::string P = getPhiCode(&*BI, S); + LLVMToRelooper[&*BI]->AddBranchTo(LLVMToRelooper[&*S], NULL, P.size() > 0 ? P.c_str() : NULL); + } else { + error("Branch with 2 operands?"); + } + break; + } + case Instruction::IndirectBr: { + const IndirectBrInst* br = cast(TI); + unsigned Num = br->getNumDestinations(); + std::set Seen; // sadly llvm allows the same block to appear multiple times + bool SetDefault = false; // pick the first and make it the default, llvm gives no reasonable default here + for (unsigned i = 0; i < Num; i++) { + const BasicBlock *S = br->getDestination(i); + if (Seen.find(S) != Seen.end()) continue; + Seen.insert(S); + std::string P = getPhiCode(&*BI, S); + std::string Target; + if (!SetDefault) { + SetDefault = true; + } else { + Target = "case " + utostr(getBlockAddress(F, S)) + ": "; + } + LLVMToRelooper[&*BI]->AddBranchTo(LLVMToRelooper[&*S], Target.size() > 0 ? Target.c_str() : NULL, P.size() > 0 ? P.c_str() : NULL); + } + break; + } + case Instruction::Switch: { + const SwitchInst* SI = cast(TI); + bool UseSwitch = !!considerConditionVar(SI); + BasicBlock *DD = SI->getDefaultDest(); + std::string P = getPhiCode(&*BI, DD); + LLVMToRelooper[&*BI]->AddBranchTo(LLVMToRelooper[&*DD], NULL, P.size() > 0 ? P.c_str() : NULL); + typedef std::map BlockCondMap; + BlockCondMap BlocksToConditions; + for (SwitchInst::ConstCaseIt i = SI->case_begin(), e = SI->case_end(); i != e; ++i) { + const BasicBlock *BB = i.getCaseSuccessor(); + std::string Curr = i.getCaseValue()->getValue().toString(10, true); + std::string Condition; + if (UseSwitch) { + Condition = "case " + Curr + ": "; + } else { + Condition = "(" + getValueAsCastParenStr(SI->getCondition()) + " == " + Curr + ")"; + } + BlocksToConditions[BB] = Condition + (!UseSwitch && BlocksToConditions[BB].size() > 0 ? " | " : "") + BlocksToConditions[BB]; + } + std::set alreadyProcessed; + for (SwitchInst::ConstCaseIt i = SI->case_begin(), e = SI->case_end(); i != e; ++i) { + const BasicBlock *BB = i.getCaseSuccessor(); + if (!alreadyProcessed.insert(BB).second) continue; + if (BB == DD) continue; // ok to eliminate this, default dest will get there anyhow + std::string P = getPhiCode(&*BI, BB); + LLVMToRelooper[&*BI]->AddBranchTo(LLVMToRelooper[&*BB], BlocksToConditions[BB].c_str(), P.size() > 0 ? P.c_str() : NULL); + } + break; + } + case Instruction::Ret: + case Instruction::Unreachable: break; + } + } + + // Calculate relooping and print + R.Calculate(Entry); + R.Render(); + + // Emit local variables + UsedVars["sp"] = Type::getInt32Ty(F->getContext()); + unsigned MaxAlignment = Allocas.getMaxAlignment(); + if (MaxAlignment > STACK_ALIGN) { + UsedVars["sp_a"] = Type::getInt32Ty(F->getContext()); + } + UsedVars["label"] = Type::getInt32Ty(F->getContext()); + if (!UsedVars.empty()) { + unsigned Count = 0; + for (VarMap::const_iterator VI = UsedVars.begin(); VI != UsedVars.end(); ++VI) { + if (Count == 20) { + Out << ";\n"; + Count = 0; + } + if (Count == 0) Out << " var "; + if (Count > 0) { + Out << ", "; + } + Count++; + Out << VI->first << " = "; + switch (VI->second->getTypeID()) { + default: + llvm_unreachable("unsupported variable initializer type"); + case Type::PointerTyID: + case Type::IntegerTyID: + Out << "0"; + break; + case Type::FloatTyID: + if (PreciseF32) { + Out << "Math_fround(0)"; + break; + } + // otherwise fall through to double + case Type::DoubleTyID: + Out << "+0"; + break; + case Type::VectorTyID: { + VectorType *VT = cast(VI->second); + Out << "SIMD_" << SIMDType(VT) << "(0"; + + // SIMD.js has only a fixed set of SIMD types, and no arbitrary vector sizes like or , so + // codegen rounds up to the smallest appropriate size where the LLVM vector fits. + unsigned simdJsNumElements = VT->getNumElements(); + if (simdJsNumElements <= 2 && VT->getElementType()->getPrimitiveSizeInBits() > 32) simdJsNumElements = 2; + else if (simdJsNumElements <= 4 && VT->getElementType()->getPrimitiveSizeInBits() <= 32) simdJsNumElements = 4; + else if (simdJsNumElements <= 8 && VT->getElementType()->getPrimitiveSizeInBits() <= 16) simdJsNumElements = 8; + else if (simdJsNumElements <= 16 && VT->getElementType()->getPrimitiveSizeInBits() <= 8) simdJsNumElements = 16; + + for (unsigned i = 1; i < simdJsNumElements; ++i) { + Out << ",0"; + } + Out << ')'; + break; + } + } + } + Out << ";"; + nl(Out); + } + + { + static bool Warned = false; + if (!Warned && OptLevel < 2 && UsedVars.size() > 2000) { + prettyWarning() << "emitted code will contain very large numbers of local variables, which is bad for performance (build to JS with -O2 or above to avoid this - make sure to do so both on source files, and during 'linking')\n"; + Warned = true; + } + } + + // Emit stack entry + Out << " " << getAdHocAssign("sp", Type::getInt32Ty(F->getContext())) << "STACKTOP;"; + if (uint64_t FrameSize = Allocas.getFrameSize()) { + if (MaxAlignment > STACK_ALIGN) { + // We must align this entire stack frame to something higher than the default + Out << "\n "; + Out << "sp_a = STACKTOP = (STACKTOP + " << utostr(MaxAlignment-1) << ")&-" << utostr(MaxAlignment) << ";"; + } + Out << "\n "; + Out << getStackBump(FrameSize); + } + + // Emit extern loads, if we have any + if (Relocatable) { + if (FuncRelocatableExterns.size() > 0) { + for (auto& RE : FuncRelocatableExterns) { + std::string Temp = "t$" + RE; + std::string Call = "g$" + RE; + Out << Temp + " = " + Call + "() | 0;\n"; + } + FuncRelocatableExterns.clear(); + } + } + + // Emit (relooped) code + char *buffer = Relooper::GetOutputBuffer(); + nl(Out) << buffer; + + // Ensure a final return if necessary + Type *RT = F->getFunctionType()->getReturnType(); + if (!RT->isVoidTy()) { + char *LastCurly = strrchr(buffer, '}'); + if (!LastCurly) LastCurly = buffer; + char *FinalReturn = strstr(LastCurly, "return "); + if (!FinalReturn) { + Out << " return " << getParenCast(getConstant(UndefValue::get(RT)), RT, ASM_NONSPECIFIC) << ";\n"; + } + } + + if (Relocatable) { + if (!F->hasInternalLinkage()) { + Exports.push_back(getJSName(F)); + } + } +} + +void JSWriter::processConstants() { + // Ensure a name for each global + for (Module::global_iterator I = TheModule->global_begin(), + E = TheModule->global_end(); I != E; ++I) { + if (I->hasInitializer()) { + if (!I->hasName()) { + // ensure a unique name + static int id = 1; + std::string newName; + while (1) { + newName = std::string("glb_") + utostr(id); + if (!TheModule->getGlobalVariable("glb_" + utostr(id))) break; + id++; + assert(id != 0); + } + I->setName(Twine(newName)); + } + } + } + // First, calculate the address of each constant + for (Module::const_global_iterator I = TheModule->global_begin(), + E = TheModule->global_end(); I != E; ++I) { + if (I->hasInitializer()) { + parseConstant(I->getName().str(), I->getInitializer(), I->getAlignment(), true); + } + } + // Calculate MaxGlobalAlign, adjust final paddings, and adjust GlobalBasePadding + assert(MaxGlobalAlign == 0); + for (auto& GI : GlobalDataMap) { + int Alignment = GI.first; + if (Alignment > MaxGlobalAlign) MaxGlobalAlign = Alignment; + ensureAligned(Alignment, &GlobalDataMap[Alignment]); + } + if (int(ZeroInitSizes.size()-1) > MaxGlobalAlign) MaxGlobalAlign = ZeroInitSizes.size()-1; // highest index in ZeroInitSizes is the largest zero-init alignment + if (!Relocatable && MaxGlobalAlign > 0) { + while ((GlobalBase+GlobalBasePadding) % MaxGlobalAlign != 0) GlobalBasePadding++; + } + while (AlignedHeapStarts.size() <= (unsigned)MaxGlobalAlign) AlignedHeapStarts.push_back(0); + while (ZeroInitStarts.size() <= (unsigned)MaxGlobalAlign) ZeroInitStarts.push_back(0); + for (auto& GI : GlobalDataMap) { + int Alignment = GI.first; + int Curr = GlobalBase + GlobalBasePadding; + for (auto& GI : GlobalDataMap) { // bigger alignments show up first, smaller later + if (GI.first > Alignment) { + Curr += GI.second.size(); + } + } + AlignedHeapStarts[Alignment] = Curr; + } + + unsigned ZeroInitStart = GlobalBase + GlobalBasePadding; + for (auto& GI : GlobalDataMap) { + ZeroInitStart += GI.second.size(); + } + if (!ZeroInitSizes.empty()) { + while (ZeroInitStart & (MaxGlobalAlign-1)) ZeroInitStart++; // fully align zero init area + for (int Alignment = ZeroInitSizes.size() - 1; Alignment > 0; Alignment--) { + if (ZeroInitSizes[Alignment] == 0) continue; + assert((ZeroInitStart & (Alignment-1)) == 0); + ZeroInitStarts[Alignment] = ZeroInitStart; + ZeroInitStart += ZeroInitSizes[Alignment]; + } + } + StaticBump = ZeroInitStart; // total size of all the data section + + // Second, allocate their contents + for (Module::const_global_iterator I = TheModule->global_begin(), + E = TheModule->global_end(); I != E; ++I) { + if (I->hasInitializer()) { + parseConstant(I->getName().str(), I->getInitializer(), I->getAlignment(), false); + } + } + if (Relocatable) { + for (Module::const_global_iterator II = TheModule->global_begin(), + E = TheModule->global_end(); II != E; ++II) { + auto I = &*II; + if (I->hasInitializer() && !I->hasInternalLinkage()) { + std::string Name = I->getName().str(); + if (GlobalAddresses.find(Name) != GlobalAddresses.end()) { + std::string JSName = getJSName(I).substr(1); + if (Name == JSName) { // don't export things that have weird internal names, that C can't dlsym anyhow + NamedGlobals[Name] = getGlobalAddress(Name); + } + } + } + } + } +} + +void JSWriter::printFunction(const Function *F) { + ValueNames.clear(); + + // Prepare and analyze function + + UsedVars.clear(); + UniqueNum = 0; + + // When optimizing, the regular optimizer (mem2reg, SROA, GVN, and others) + // will have already taken all the opportunities for nativization. + if (OptLevel == CodeGenOpt::None) + calculateNativizedVars(F); + + // Do alloca coloring at -O1 and higher. + Allocas.analyze(*F, *DL, OptLevel != CodeGenOpt::None); + + // Emit the function + + std::string Name = F->getName(); + sanitizeGlobal(Name); + Out << "function " << Name << "("; + for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end(); + AI != AE; ++AI) { + if (AI != F->arg_begin()) Out << ","; + Out << getJSName(&*AI); + } + Out << ") {"; + nl(Out); + for (Function::const_arg_iterator II = F->arg_begin(), AE = F->arg_end(); + II != AE; ++II) { + auto AI = &*II; + std::string name = getJSName(AI); + Out << " " << name << " = " << getCast(name, AI->getType(), ASM_NONSPECIFIC) << ";"; + nl(Out); + } + printFunctionBody(F); + Out << "}"; + nl(Out); + + Allocas.clear(); + StackBumped = false; +} + +void JSWriter::printModuleBody() { + processConstants(); + + if (Relocatable) { + for (Module::const_alias_iterator I = TheModule->alias_begin(), E = TheModule->alias_end(); + I != E; ++I) { + if (const GlobalAlias *GA = dyn_cast(I)) { + const Value* Target = resolveFully(GA); + Aliases[getJSName(GA)] = getJSName(Target); + } + } + } + + // Emit function bodies. + nl(Out) << "// EMSCRIPTEN_START_FUNCTIONS"; nl(Out); + for (Module::const_iterator II = TheModule->begin(), E = TheModule->end(); + II != E; ++II) { + auto I = &*II; + if (!I->isDeclaration()) printFunction(I); + } + // Emit postSets, split up into smaller functions to avoid one massive one that is slow to compile (more likely to occur in dynamic linking, as more postsets) + { + const int CHUNK = 100; + int i = 0; + int chunk = 0; + int num = PostSets.size(); + do { + if (chunk == 0) { + Out << "function runPostSets() {\n"; + } else { + Out << "function runPostSets" << chunk << "() {\n"; + } + if (Relocatable) Out << " var temp = 0;\n"; // need a temp var for relocation calls, for proper validation in heap growth + int j = i + CHUNK; + if (j > num) j = num; + while (i < j) { + Out << PostSets[i] << "\n"; + i++; + } + // call the next chunk, if there is one + chunk++; + if (i < num) { + Out << " runPostSets" << chunk << "();\n"; + } + Out << "}\n"; + } while (i < num); + PostSets.clear(); + } + Out << "// EMSCRIPTEN_END_FUNCTIONS\n\n"; + + if (EnablePthreads) { + Out << "if (!ENVIRONMENT_IS_PTHREAD) {\n"; + } + Out << "/* memory initializer */ allocate(["; + if (MaxGlobalAlign > 0) { + bool First = true; + for (int i = 0; i < GlobalBasePadding; i++) { + if (First) { + First = false; + } else { + Out << ","; + } + Out << "0"; + } + int Curr = MaxGlobalAlign; + while (Curr > 0) { + if (GlobalDataMap.find(Curr) == GlobalDataMap.end()) { + Curr = Curr/2; + continue; + } + HeapData* GlobalData = &GlobalDataMap[Curr]; + if (GlobalData->size() > 0) { + if (First) { + First = false; + } else { + Out << ","; + } + printCommaSeparated(*GlobalData); + } + Curr = Curr/2; + } + } + Out << "], \"i8\", ALLOC_NONE, Runtime.GLOBAL_BASE);\n"; + if (EnablePthreads) { + Out << "}\n"; + } + // Emit metadata for emcc driver + Out << "\n\n// EMSCRIPTEN_METADATA\n"; + Out << "{\n"; + + Out << "\"staticBump\": " << StaticBump << ",\n"; + + Out << "\"declares\": ["; + bool first = true; + for (Module::const_iterator I = TheModule->begin(), E = TheModule->end(); + I != E; ++I) { + if (I->isDeclaration() && !I->use_empty()) { + // Ignore intrinsics that are always no-ops or expanded into other code + // which doesn't require the intrinsic function itself to be declared. + if (I->isIntrinsic()) { + switch (I->getIntrinsicID()) { + default: break; + case Intrinsic::dbg_declare: + case Intrinsic::dbg_value: + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + case Intrinsic::invariant_start: + case Intrinsic::invariant_end: + case Intrinsic::prefetch: + case Intrinsic::memcpy: + case Intrinsic::memset: + case Intrinsic::memmove: + case Intrinsic::expect: + case Intrinsic::flt_rounds: + continue; + } + } + // Do not report methods implemented in a call handler, unless + // they are accessed by a function pointer (in which case, we + // need the expected name to be available TODO: optimize + // that out, call handlers can declare their "function table + // name"). + std::string fullName = std::string("_") + I->getName().str(); + if (CallHandlers.count(fullName) > 0) { + if (IndexedFunctions.find(fullName) == IndexedFunctions.end()) { + continue; + } + } + + if (first) { + first = false; + } else { + Out << ", "; + } + Out << "\"" << I->getName() << "\""; + } + } + for (NameSet::const_iterator I = Declares.begin(), E = Declares.end(); + I != E; ++I) { + if (first) { + first = false; + } else { + Out << ", "; + } + Out << "\"" << *I << "\""; + } + Out << "],"; + + Out << "\"redirects\": {"; + first = true; + for (StringMap::const_iterator I = Redirects.begin(), E = Redirects.end(); + I != E; ++I) { + if (first) { + first = false; + } else { + Out << ", "; + } + Out << "\"_" << I->first << "\": \"" << I->second << "\""; + } + Out << "},"; + + Out << "\"externs\": ["; + first = true; + for (NameSet::const_iterator I = Externals.begin(), E = Externals.end(); + I != E; ++I) { + if (first) { + first = false; + } else { + Out << ", "; + } + Out << "\"" << *I << "\""; + } + Out << "],"; + + Out << "\"implementedFunctions\": ["; + first = true; + for (Module::const_iterator I = TheModule->begin(), E = TheModule->end(); + I != E; ++I) { + if (!I->isDeclaration()) { + if (first) { + first = false; + } else { + Out << ", "; + } + std::string name = I->getName(); + sanitizeGlobal(name); + Out << "\"" << name << '"'; + } + } + Out << "],"; + + Out << "\"tables\": {"; + unsigned Num = FunctionTables.size(); + for (FunctionTableMap::iterator I = FunctionTables.begin(), E = FunctionTables.end(); I != E; ++I) { + Out << " \"" << I->first << "\": \"var FUNCTION_TABLE_" << I->first << " = ["; + FunctionTable &Table = I->second; + // ensure power of two + unsigned Size = 1; + while (Size < Table.size()) Size <<= 1; + while (Table.size() < Size) Table.push_back("0"); + for (unsigned i = 0; i < Table.size(); i++) { + Out << Table[i]; + if (i < Table.size()-1) Out << ","; + } + Out << "];\""; + if (--Num > 0) Out << ","; + Out << "\n"; + } + Out << "},"; + + Out << "\"initializers\": ["; + first = true; + for (unsigned i = 0; i < GlobalInitializers.size(); i++) { + if (first) { + first = false; + } else { + Out << ", "; + } + Out << "\"" << GlobalInitializers[i] << "\""; + } + Out << "],"; + + Out << "\"exports\": ["; + first = true; + for (unsigned i = 0; i < Exports.size(); i++) { + if (first) { + first = false; + } else { + Out << ", "; + } + Out << "\"" << Exports[i] << "\""; + } + Out << "],"; + + Out << "\"aliases\": {"; + first = true; + for (StringMap::const_iterator I = Aliases.begin(), E = Aliases.end(); + I != E; ++I) { + if (first) { + first = false; + } else { + Out << ", "; + } + Out << "\"" << I->first << "\": \"" << I->second << "\""; + } + Out << "},"; + + Out << "\"cantValidate\": \"" << CantValidate << "\","; + + Out << "\"simd\": " << (UsesSIMDUint8x16 || UsesSIMDInt8x16 || UsesSIMDUint16x8 || UsesSIMDInt16x8 || UsesSIMDUint32x4 || UsesSIMDInt32x4 || UsesSIMDFloat32x4 || UsesSIMDFloat64x2 ? "1" : "0") << ","; + Out << "\"simdUint8x16\": " << (UsesSIMDUint8x16 ? "1" : "0") << ","; + Out << "\"simdInt8x16\": " << (UsesSIMDInt8x16 ? "1" : "0") << ","; + Out << "\"simdUint16x8\": " << (UsesSIMDUint16x8 ? "1" : "0") << ","; + Out << "\"simdInt16x8\": " << (UsesSIMDInt16x8 ? "1" : "0") << ","; + Out << "\"simdUint32x4\": " << (UsesSIMDUint32x4 ? "1" : "0") << ","; + Out << "\"simdInt32x4\": " << (UsesSIMDInt32x4 ? "1" : "0") << ","; + Out << "\"simdFloat32x4\": " << (UsesSIMDFloat32x4 ? "1" : "0") << ","; + Out << "\"simdFloat64x2\": " << (UsesSIMDFloat64x2 ? "1" : "0") << ","; + Out << "\"simdBool8x16\": " << (UsesSIMDBool8x16 ? "1" : "0") << ","; + Out << "\"simdBool16x8\": " << (UsesSIMDBool16x8 ? "1" : "0") << ","; + Out << "\"simdBool32x4\": " << (UsesSIMDBool32x4 ? "1" : "0") << ","; + Out << "\"simdBool64x2\": " << (UsesSIMDBool64x2 ? "1" : "0") << ","; + + Out << "\"maxGlobalAlign\": " << utostr(MaxGlobalAlign) << ","; + + Out << "\"namedGlobals\": {"; + first = true; + for (NameIntMap::const_iterator I = NamedGlobals.begin(), E = NamedGlobals.end(); I != E; ++I) { + if (first) { + first = false; + } else { + Out << ", "; + } + Out << "\"" << I->first << "\": \"" << utostr(I->second) << "\""; + } + Out << "},"; + + Out << "\"asmConsts\": {"; + first = true; + for (auto& I : AsmConsts) { + if (first) { + first = false; + } else { + Out << ", "; + } + Out << "\"" << utostr(I.second.Id) << "\": [\"" << I.first.c_str() << "\", ["; + auto& Sigs = I.second.Sigs; + bool innerFirst = true; + for (auto& Sig : Sigs) { + if (innerFirst) { + innerFirst = false; + } else { + Out << ", "; + } + Out << "\"" << Sig << "\""; + } + Out << "]]"; + } + Out << "}"; + + if (EnableCyberDWARF) { + Out << ",\"cyberdwarf_data\": {\n"; + Out << "\"types\": {"; + + // Remove trailing comma + std::string TDD = cyberDWARFData.TypeDebugData.str().substr(0, cyberDWARFData.TypeDebugData.str().length() - 1); + // One Windows, paths can have \ separators + std::replace(TDD.begin(), TDD.end(), '\\', '/'); + Out << TDD << "}, \"type_name_map\": {"; + + std::string TNM = cyberDWARFData.TypeNameMap.str().substr(0, cyberDWARFData.TypeNameMap.str().length() - 1); + std::replace(TNM.begin(), TNM.end(), '\\', '/'); + Out << TNM << "}, \"functions\": {"; + + std::string FM = cyberDWARFData.FunctionMembers.str().substr(0, cyberDWARFData.FunctionMembers.str().length() - 1); + std::replace(FM.begin(), FM.end(), '\\', '/'); + Out << FM << "}, \"vtable_offsets\": {"; + bool first_elem = true; + for (auto VTO: cyberDWARFData.VtableOffsets) { + if (!first_elem) { + Out << ","; + } + Out << "\"" << VTO.first << "\":\"" << VTO.second << "\""; + first_elem = false; + } + Out << "}\n}"; + } + + Out << "\n}\n"; +} + +void JSWriter::parseConstant(const std::string& name, const Constant* CV, int Alignment, bool calculate) { + if (isa(CV)) + return; + if (Alignment == 0) Alignment = DEFAULT_MEM_ALIGN; + //errs() << "parsing constant " << name << " : " << Alignment << "\n"; + // TODO: we repeat some work in both calculate and emit phases here + // FIXME: use the proper optimal alignments + if (const ConstantDataSequential *CDS = + dyn_cast(CV)) { + assert(CDS->isString()); + if (calculate) { + HeapData *GlobalData = allocateAddress(name, Alignment); + StringRef Str = CDS->getAsString(); + ensureAligned(Alignment, GlobalData); + for (unsigned int i = 0; i < Str.size(); i++) { + GlobalData->push_back(Str.data()[i]); + } + } + } else if (const ConstantFP *CFP = dyn_cast(CV)) { + APFloat APF = CFP->getValueAPF(); + if (CFP->getType() == Type::getFloatTy(CFP->getContext())) { + if (calculate) { + HeapData *GlobalData = allocateAddress(name, Alignment); + union flt { float f; unsigned char b[sizeof(float)]; } flt; + flt.f = APF.convertToFloat(); + ensureAligned(Alignment, GlobalData); + for (unsigned i = 0; i < sizeof(float); ++i) { + GlobalData->push_back(flt.b[i]); + } + } + } else if (CFP->getType() == Type::getDoubleTy(CFP->getContext())) { + if (calculate) { + HeapData *GlobalData = allocateAddress(name, Alignment); + union dbl { double d; unsigned char b[sizeof(double)]; } dbl; + dbl.d = APF.convertToDouble(); + ensureAligned(Alignment, GlobalData); + for (unsigned i = 0; i < sizeof(double); ++i) { + GlobalData->push_back(dbl.b[i]); + } + } + } else { + assert(false && "Unsupported floating-point type"); + } + } else if (const ConstantInt *CI = dyn_cast(CV)) { + if (calculate) { + union { uint64_t i; unsigned char b[sizeof(uint64_t)]; } integer; + integer.i = *CI->getValue().getRawData(); + unsigned BitWidth = 64; // CI->getValue().getBitWidth(); + assert(BitWidth == 32 || BitWidth == 64); + HeapData *GlobalData = allocateAddress(name, Alignment); + // assuming compiler is little endian + ensureAligned(Alignment, GlobalData); + for (unsigned i = 0; i < BitWidth / 8; ++i) { + GlobalData->push_back(integer.b[i]); + } + } + } else if (isa(CV)) { + assert(false && "Unlowered ConstantPointerNull"); + } else if (isa(CV)) { + if (calculate) { + unsigned Bytes = DL->getTypeStoreSize(CV->getType()); + allocateZeroInitAddress(name, Alignment, Bytes); + } + } else if (const ConstantArray *CA = dyn_cast(CV)) { + if (calculate) { + for (Constant::const_user_iterator UI = CV->user_begin(), UE = CV->user_end(); UI != UE; ++UI) { + if ((*UI)->getName() == "llvm.used") { + // export the kept-alives + for (unsigned i = 0; i < CA->getNumOperands(); i++) { + const Constant *C = CA->getOperand(i); + if (const ConstantExpr *CE = dyn_cast(C)) { + C = CE->getOperand(0); // ignore bitcasts + } + if (isa(C)) Exports.push_back(getJSName(C)); + } + } else if ((*UI)->getName() == "llvm.global.annotations") { + // llvm.global.annotations can be ignored. + } else { + llvm_unreachable("Unexpected constant array"); + } + break; // we assume one use here + } + } + } else if (const ConstantStruct *CS = dyn_cast(CV)) { + if (name == "__init_array_start") { + // this is the global static initializer + if (calculate) { + unsigned Num = CS->getNumOperands(); + for (unsigned i = 0; i < Num; i++) { + const Value* C = CS->getOperand(i); + if (const ConstantExpr *CE = dyn_cast(C)) { + C = CE->getOperand(0); // ignore bitcasts + } + GlobalInitializers.push_back(getJSName(C)); + } + } + } else if (calculate) { + HeapData *GlobalData = allocateAddress(name, Alignment); + unsigned Bytes = DL->getTypeStoreSize(CV->getType()); + ensureAligned(Alignment, GlobalData); + for (unsigned i = 0; i < Bytes; ++i) { + GlobalData->push_back(0); + } + } else { + // Per the PNaCl abi, this must be a packed struct of a very specific type + // https://chromium.googlesource.com/native_client/pnacl-llvm/+/7287c45c13dc887cebe3db6abfa2f1080186bb97/lib/Transforms/NaCl/FlattenGlobals.cpp + assert(CS->getType()->isPacked()); + // This is the only constant where we cannot just emit everything during the first phase, 'calculate', as we may refer to other globals + unsigned Num = CS->getNumOperands(); + unsigned Offset = getRelativeGlobalAddress(name); + unsigned OffsetStart = Offset; + unsigned Absolute = getGlobalAddress(name); + + // VTable for the object + if (name.compare(0, 4, "_ZTV") == 0) { + cyberDWARFData.VtableOffsets[Absolute] = name; + } + + for (unsigned i = 0; i < Num; i++) { + const Constant* C = CS->getOperand(i); + if (isa(C)) { + unsigned Bytes = DL->getTypeStoreSize(C->getType()); + Offset += Bytes; // zeros, so just skip + } else if (const ConstantExpr *CE = dyn_cast(C)) { + const Value *V = CE->getOperand(0); + unsigned Data = 0; + if (CE->getOpcode() == Instruction::PtrToInt) { + Data = getConstAsOffset(V, Absolute + Offset - OffsetStart); + } else if (CE->getOpcode() == Instruction::Add) { + V = cast(V)->getOperand(0); + Data = getConstAsOffset(V, Absolute + Offset - OffsetStart); + ConstantInt *CI = cast(CE->getOperand(1)); + Data += *CI->getValue().getRawData(); + } else { + CE->dump(); + llvm_unreachable("Unexpected constant expr kind"); + } + union { unsigned i; unsigned char b[sizeof(unsigned)]; } integer; + integer.i = Data; + HeapData& GlobalData = GlobalDataMap[Alignment]; + assert(Offset+4 <= GlobalData.size()); + ensureAligned(Alignment, GlobalData); + for (unsigned i = 0; i < 4; ++i) { + GlobalData[Offset++] = integer.b[i]; + } + } else if (const ConstantDataSequential *CDS = dyn_cast(C)) { + assert(CDS->isString()); + StringRef Str = CDS->getAsString(); + HeapData& GlobalData = GlobalDataMap[Alignment]; + assert(Offset+Str.size() <= GlobalData.size()); + ensureAligned(Alignment, GlobalData); + for (unsigned int i = 0; i < Str.size(); i++) { + GlobalData[Offset++] = Str.data()[i]; + } + } else { + C->dump(); + llvm_unreachable("Unexpected constant kind"); + } + } + } + } else if (isa(CV)) { + assert(false && "Unlowered ConstantVector"); + } else if (isa(CV)) { + assert(false && "Unlowered BlockAddress"); + } else if (const ConstantExpr *CE = dyn_cast(CV)) { + if (name == "__init_array_start") { + // this is the global static initializer + if (calculate) { + const Value *V = CE->getOperand(0); + GlobalInitializers.push_back(getJSName(V)); + // is the func + } + } else if (name == "__fini_array_start") { + // nothing to do + } else { + // a global equal to a ptrtoint of some function, so a 32-bit integer for us + if (calculate) { + HeapData *GlobalData = allocateAddress(name, Alignment); + ensureAligned(Alignment, GlobalData); + for (unsigned i = 0; i < 4; ++i) { + GlobalData->push_back(0); + } + } else { + unsigned Data = 0; + + // Deconstruct lowered getelementptrs. + if (CE->getOpcode() == Instruction::Add) { + Data = cast(CE->getOperand(1))->getZExtValue(); + CE = cast(CE->getOperand(0)); + } + const Value *V = CE; + if (CE->getOpcode() == Instruction::PtrToInt) { + V = CE->getOperand(0); + } + + // Deconstruct getelementptrs. + int64_t BaseOffset; + V = GetPointerBaseWithConstantOffset(V, BaseOffset, *DL); + Data += (uint64_t)BaseOffset; + + Data += getConstAsOffset(V, getGlobalAddress(name)); + union { unsigned i; unsigned char b[sizeof(unsigned)]; } integer; + integer.i = Data; + unsigned Offset = getRelativeGlobalAddress(name); + HeapData& GlobalData = GlobalDataMap[Alignment]; + assert(Offset+4 <= GlobalData.size()); + ensureAligned(Alignment, GlobalData); + for (unsigned i = 0; i < 4; ++i) { + GlobalData[Offset++] = integer.b[i]; + } + } + } + } else if (isa(CV)) { + assert(false && "Unlowered UndefValue"); + } else { + CV->dump(); + assert(false && "Unsupported constant kind"); + } +} + +std::string JSWriter::generateDebugRecordForVar(Metadata *MD) { + // void shows up as nullptr for Metadata + if (!MD) { + cyberDWARFData.IndexedMetadata[0] = 0; + return "\"0\""; + } + if (cyberDWARFData.IndexedMetadata.find(MD) == cyberDWARFData.IndexedMetadata.end()) { + cyberDWARFData.IndexedMetadata[MD] = cyberDWARFData.MetadataNum++; + } + else { + return "\"" + utostr(cyberDWARFData.IndexedMetadata[MD]) + "\""; + } + + std::string VarIDForJSON = "\"" + utostr(cyberDWARFData.IndexedMetadata[MD]) + "\""; + + if (DIBasicType *BT = dyn_cast(MD)) { + cyberDWARFData.TypeDebugData << VarIDForJSON << ":" + << "[0,\"" + << BT->getName().str() + << "\"," + << BT->getEncoding() + << "," + << BT->getOffsetInBits() + << "," + << BT->getSizeInBits() + << "],"; + } + else if (MDString *MDS = dyn_cast(MD)) { + cyberDWARFData.TypeDebugData << VarIDForJSON << ":" + << "[10,\"" << MDS->getString().str() << "\"],"; + } + else if (DIDerivedType *DT = dyn_cast(MD)) { + if (DT->getRawBaseType() && isa(DT->getRawBaseType())) { + auto MDS = cast(DT->getRawBaseType()); + cyberDWARFData.TypeDebugData << VarIDForJSON << ":" + << "[1, \"" + << DT->getName().str() + << "\"," + << DT->getTag() + << ",\"" + << MDS->getString().str() + << "\"," + << DT->getOffsetInBits() + << "," + << DT->getSizeInBits() << "],"; + } + else { + if (cyberDWARFData.IndexedMetadata.find(DT->getRawBaseType()) == cyberDWARFData.IndexedMetadata.end()) { + generateDebugRecordForVar(DT->getRawBaseType()); + } + + cyberDWARFData.TypeDebugData << VarIDForJSON << ":" + << "[1, \"" + << DT->getName().str() + << "\"," + << DT->getTag() + << "," + << cyberDWARFData.IndexedMetadata[DT->getRawBaseType()] + << "," + << DT->getOffsetInBits() + << "," + << DT->getSizeInBits() << "],"; + } + } + else if (DICompositeType *CT = dyn_cast(MD)) { + + if (CT->getIdentifier().str() != "") { + if (CT->isForwardDecl()) { + cyberDWARFData.TypeNameMap << "\"" << "fd_" << CT->getIdentifier().str() << "\":" << VarIDForJSON << ","; + } else { + cyberDWARFData.TypeNameMap << "\"" << CT->getIdentifier().str() << "\":" << VarIDForJSON << ","; + } + } + + // Pull in debug info for any used elements before emitting ours + for (auto e : CT->getElements()) { + generateDebugRecordForVar(e); + } + + // Build our base type, if we have one (arrays) + if (cyberDWARFData.IndexedMetadata.find(CT->getRawBaseType()) == cyberDWARFData.IndexedMetadata.end()) { + generateDebugRecordForVar(CT->getRawBaseType()); + } + + cyberDWARFData.TypeDebugData << VarIDForJSON << ":" + << "[2, \"" + << CT->getName().str() + << "\"," + << CT->getTag() + << "," + << cyberDWARFData.IndexedMetadata[CT->getRawBaseType()] + << "," + << CT->getOffsetInBits() + << "," + << CT->getSizeInBits() + << ",\"" + << CT->getIdentifier().str() + << "\",["; + + bool first_elem = true; + for (auto e : CT->getElements()) { + auto *vx = dyn_cast(e); + if ((vx && vx->isStaticMember()) || isa(e)) + continue; + if (!first_elem) { + cyberDWARFData.TypeDebugData << ","; + } + first_elem = false; + cyberDWARFData.TypeDebugData << generateDebugRecordForVar(e); + } + + cyberDWARFData.TypeDebugData << "]],"; + + } + else if (DISubroutineType *ST = dyn_cast(MD)) { + cyberDWARFData.TypeDebugData << VarIDForJSON << ":" + << "[3," << ST->getTag() << "],"; + } + else if (DISubrange *SR = dyn_cast(MD)) { + cyberDWARFData.TypeDebugData << VarIDForJSON << ":" + << "[4," << SR->getCount() << "],"; + } + else if (DISubprogram *SP = dyn_cast(MD)) { + cyberDWARFData.TypeDebugData << VarIDForJSON << ":" + << "[5,\"" << SP->getName().str() << "\"],"; + } + else if (DIEnumerator *E = dyn_cast(MD)) { + cyberDWARFData.TypeDebugData << VarIDForJSON << ":" + << "[6,\"" << E->getName().str() << "\"," << E->getValue() << "],"; + } + else { + //MD->dump(); + } + + return VarIDForJSON; +} + +void JSWriter::buildCyberDWARFData() { + for (auto &F : TheModule->functions()) { + auto MD = F.getMetadata("dbg"); + if (MD) { + auto *SP = cast(MD); + + if (SP->getLinkageName() != "") { + cyberDWARFData.FunctionMembers << "\"" << SP->getLinkageName().str() << "\":{"; + } + else { + cyberDWARFData.FunctionMembers << "\"" << SP->getName().str() << "\":{"; + } + bool first_elem = true; + for (auto V : SP->getVariables()) { + auto RT = V->getRawType(); + if (!first_elem) { + cyberDWARFData.FunctionMembers << ","; + } + first_elem = false; + cyberDWARFData.FunctionMembers << "\"" << V->getName().str() << "\":" << generateDebugRecordForVar(RT); + } + cyberDWARFData.FunctionMembers << "},"; + } + } + + // Need to dump any types under each compilation unit's retained types + auto CUs = TheModule->getNamedMetadata("llvm.dbg.cu"); + + for (auto CUi : CUs->operands()) { + auto CU = cast(CUi); + auto RT = CU->getRetainedTypes(); + for (auto RTi : RT) { + generateDebugRecordForVar(RTi); + } + } +} + +// nativization + +void JSWriter::calculateNativizedVars(const Function *F) { + NativizedVars.clear(); + + for (Function::const_iterator I = F->begin(), BE = F->end(); I != BE; ++I) { + auto BI = &*I; + for (BasicBlock::const_iterator II = BI->begin(), E = BI->end(); II != E; ++II) { + const Instruction *I = &*II; + if (const AllocaInst *AI = dyn_cast(I)) { + if (AI->getAllocatedType()->isVectorTy()) continue; // we do not nativize vectors, we rely on the LLVM optimizer to avoid load/stores on them + if (AI->getAllocatedType()->isAggregateType()) continue; // we do not nativize aggregates either + // this is on the stack. if its address is never used nor escaped, we can nativize it + bool Fail = false; + for (Instruction::const_user_iterator UI = I->user_begin(), UE = I->user_end(); UI != UE && !Fail; ++UI) { + const Instruction *U = dyn_cast(*UI); + if (!U) { Fail = true; break; } // not an instruction, not cool + switch (U->getOpcode()) { + case Instruction::Load: break; // load is cool + case Instruction::Store: { + if (U->getOperand(0) == I) Fail = true; // store *of* it is not cool; store *to* it is fine + break; + } + default: { Fail = true; break; } // anything that is "not" "cool", is "not cool" + } + } + if (!Fail) NativizedVars.insert(I); + } + } + } +} + +// special analyses + +bool JSWriter::canReloop(const Function *F) { + return true; +} + +// main entry + +void JSWriter::printCommaSeparated(const HeapData data) { + for (HeapData::const_iterator I = data.begin(); + I != data.end(); ++I) { + if (I != data.begin()) { + Out << ","; + } + Out << (int)*I; + } +} + +void JSWriter::printProgram(const std::string& fname, + const std::string& mName) { + printModule(fname,mName); +} + +void JSWriter::printModule(const std::string& fname, + const std::string& mName) { + printModuleBody(); +} + +bool JSWriter::runOnModule(Module &M) { + TheModule = &M; + DL = &M.getDataLayout(); + + // sanity checks on options + assert(Relocatable ? GlobalBase == 0 : true); + assert(Relocatable ? EmulatedFunctionPointers : true); + + // Build debug data first, so that inline metadata can reuse the indicies + if (EnableCyberDWARF) + buildCyberDWARFData(); + + setupCallHandlers(); + + printProgram("", ""); + + return false; +} + +char JSWriter::ID = 0; + +class CheckTriple : public ModulePass { +public: + static char ID; + CheckTriple() : ModulePass(ID) {} + bool runOnModule(Module &M) override { + if (M.getTargetTriple() != "asmjs-unknown-emscripten") { + prettyWarning() << "incorrect target triple '" << M.getTargetTriple() << "' (did you use emcc/em++ on all source files and not clang directly?)\n"; + } + return false; + } +}; + +char CheckTriple::ID; + +Pass *createCheckTriplePass() { + return new CheckTriple(); +} + +//===----------------------------------------------------------------------===// +// External Interface declaration +//===----------------------------------------------------------------------===// + +bool JSTargetMachine::addPassesToEmitFile( + PassManagerBase &PM, raw_pwrite_stream &Out, CodeGenFileType FileType, + bool DisableVerify, AnalysisID StartBefore, + AnalysisID StartAfter, AnalysisID StopAfter, + MachineFunctionInitializer *MFInitializer) { + assert(FileType == TargetMachine::CGFT_AssemblyFile); + + PM.add(createCheckTriplePass()); + + if (NoExitRuntime) { + PM.add(createNoExitRuntimePass()); + // removing atexits opens up globalopt/globaldce opportunities + PM.add(createGlobalOptimizerPass()); + PM.add(createGlobalDCEPass()); + } + + // PNaCl legalization + { + PM.add(createStripDanglingDISubprogramsPass()); + if (EnableSjLjEH) { + // This comes before ExpandTls because it introduces references to + // a TLS variable, __pnacl_eh_stack. This comes before + // InternalizePass because it assumes various variables (including + // __pnacl_eh_stack) have not been internalized yet. + PM.add(createPNaClSjLjEHPass()); + } else if (EnableEmCxxExceptions) { + PM.add(createLowerEmExceptionsPass()); + } else { + // LowerInvoke prevents use of C++ exception handling by removing + // references to BasicBlocks which handle exceptions. + PM.add(createLowerInvokePass()); + } + // Run CFG simplification passes for a few reasons: + // (1) Landingpad blocks can be made unreachable by LowerInvoke + // when EnableSjLjEH is not enabled, so clean those up to ensure + // there are no landingpad instructions in the stable ABI. + // (2) Unreachable blocks can have strange properties like self-referencing + // instructions, so remove them. + PM.add(createCFGSimplificationPass()); + + PM.add(createLowerEmSetjmpPass()); + + // Expand out computed gotos (indirectbr and blockaddresses) into switches. + PM.add(createExpandIndirectBrPass()); + + // ExpandStructRegs must be run after ExpandVarArgs so that struct-typed + // "va_arg" instructions have been removed. + PM.add(createExpandVarArgsPass()); + + // Convert struct reg function params to struct* byval. This needs to be + // before ExpandStructRegs so it has a chance to rewrite aggregates from + // function arguments and returns into something ExpandStructRegs can expand. + PM.add(createSimplifyStructRegSignaturesPass()); + + // TODO(mtrofin) Remove the following and only run it as a post-opt pass once + // the following bug is fixed. + // https://code.google.com/p/nativeclient/issues/detail?id=3857 + PM.add(createExpandStructRegsPass()); + + PM.add(createExpandCtorsPass()); + + if (EnableEmAsyncify) + PM.add(createLowerEmAsyncifyPass()); + + // ExpandStructRegs must be run after ExpandArithWithOverflow to expand out + // the insertvalue instructions that ExpandArithWithOverflow introduces. + PM.add(createExpandArithWithOverflowPass()); + + // We place ExpandByVal after optimization passes because some byval + // arguments can be expanded away by the ArgPromotion pass. Leaving + // in "byval" during optimization also allows some dead stores to be + // eliminated, because "byval" is a stronger constraint than what + // ExpandByVal expands it to. + PM.add(createExpandByValPass()); + + PM.add(createPromoteI1OpsPass()); + + // We should not place arbitrary passes after ExpandConstantExpr + // because they might reintroduce ConstantExprs. + PM.add(createExpandConstantExprPass()); + // The following pass inserts GEPs, it must precede ExpandGetElementPtr. It + // also creates vector loads and stores, the subsequent pass cleans them up to + // fix their alignment. + PM.add(createConstantInsertExtractElementIndexPass()); + + // Optimization passes and ExpandByVal introduce + // memset/memcpy/memmove intrinsics with a 64-bit size argument. + // This pass converts those arguments to 32-bit. + PM.add(createCanonicalizeMemIntrinsicsPass()); + + // ConstantMerge cleans up after passes such as GlobalizeConstantVectors. It + // must run before the FlattenGlobals pass because FlattenGlobals loses + // information that otherwise helps ConstantMerge do a good job. + PM.add(createConstantMergePass()); + // FlattenGlobals introduces ConstantExpr bitcasts of globals which + // are expanded out later. ReplacePtrsWithInts also creates some + // ConstantExprs, and it locally creates an ExpandConstantExprPass + // to clean both of these up. + PM.add(createFlattenGlobalsPass()); + + // The type legalization passes (ExpandLargeIntegers and PromoteIntegers) do + // not handle constexprs and create GEPs, so they go between those passes. + PM.add(createExpandLargeIntegersPass()); + PM.add(createPromoteIntegersPass()); + // Rewrite atomic and volatile instructions with intrinsic calls. + PM.add(createRewriteAtomicsPass()); + + PM.add(createSimplifyAllocasPass()); + + // The atomic cmpxchg instruction returns a struct, and is rewritten to an + // intrinsic as a post-opt pass, we therefore need to expand struct regs. + PM.add(createExpandStructRegsPass()); + + // Eliminate simple dead code that the post-opt passes could have created. + PM.add(createDeadCodeEliminationPass()); + } + // end PNaCl legalization + + PM.add(createExpandInsertExtractElementPass()); + PM.add(createExpandI64Pass()); + + CodeGenOpt::Level OptLevel = getOptLevel(); + + // When optimizing, there shouldn't be any opportunities for SimplifyAllocas + // because the regular optimizer should have taken them all (GVN, and possibly + // also SROA). + if (OptLevel == CodeGenOpt::None) + PM.add(createEmscriptenSimplifyAllocasPass()); + + PM.add(createEmscriptenRemoveLLVMAssumePass()); + PM.add(createEmscriptenExpandBigSwitchesPass()); + + PM.add(new JSWriter(Out, OptLevel)); + + return false; +} diff --git a/lib/Target/JSBackend/JSTargetMachine.cpp b/lib/Target/JSBackend/JSTargetMachine.cpp new file mode 100644 index 000000000000..2ae3dd6f6a92 --- /dev/null +++ b/lib/Target/JSBackend/JSTargetMachine.cpp @@ -0,0 +1,48 @@ +//===-- JSTargetMachine.cpp - Define TargetMachine for the JS -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the JS specific subclass of TargetMachine. +// +//===----------------------------------------------------------------------===// + +#include "JSTargetMachine.h" +#include "JSTargetTransformInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Support/TargetRegistry.h" +using namespace llvm; + +extern const llvm::SubtargetFeatureKV JSSubTypeKV[] = { + { "asmjs", "Select the asmjs processor", { }, { } } +}; + +static const llvm::SubtargetInfoKV JSProcSchedModels[] = { + { "asmjs", &MCSchedModel::GetDefaultSchedModel() } +}; + +JSSubtarget::JSSubtarget(const TargetMachine& TM, const Triple &TT) : + TargetSubtargetInfo(TT, "asmjs", "asmjs", None, makeArrayRef(JSSubTypeKV, 1), JSProcSchedModels, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr), + TL(TM) + {} + + +JSTargetMachine::JSTargetMachine(const Target &T, const Triple &TT, + StringRef CPU, StringRef FS, const TargetOptions &Options, + Optional& RM, CodeModel::Model CM, + CodeGenOpt::Level OL) + : LLVMTargetMachine(T, "e-p:32:32-i64:64-v128:32:128-n32-S128", TT, + CPU, FS, Options, Reloc::Static, CM, OL), + ST(*this, TT) { +} + +TargetIRAnalysis JSTargetMachine::getTargetIRAnalysis() { + return TargetIRAnalysis([this](const Function &F) { + return TargetTransformInfo(JSTTIImpl(this, F)); + }); +} + diff --git a/lib/Target/JSBackend/JSTargetMachine.h b/lib/Target/JSBackend/JSTargetMachine.h new file mode 100644 index 000000000000..71b79fe3b01b --- /dev/null +++ b/lib/Target/JSBackend/JSTargetMachine.h @@ -0,0 +1,71 @@ +//===-- JSTargetMachine.h - TargetMachine for the JS Backend ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===---------------------------------------------------------------------===// +// +// This file declares the TargetMachine that is used by the JS/asm.js/ +// emscripten backend. +// +//===---------------------------------------------------------------------===// + +#ifndef JSTARGETMACHINE_H +#define JSTARGETMACHINE_H + +#include "JS.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Target/TargetLowering.h" + +namespace llvm { + +class formatted_raw_ostream; + +class JSTargetLowering : public TargetLowering { +public: + explicit JSTargetLowering(const TargetMachine& TM) : TargetLowering(TM) {} +}; + +class JSSubtarget : public TargetSubtargetInfo { + JSTargetLowering TL; + +public: + JSSubtarget(const TargetMachine& TM, const Triple &TT); + + const TargetLowering *getTargetLowering() const override { + return &TL; + } +}; + +class JSTargetMachine : public LLVMTargetMachine { + const JSSubtarget ST; + +public: + JSTargetMachine(const Target &T, const Triple &TT, + StringRef CPU, StringRef FS, const TargetOptions &Options, + Optional& RM, CodeModel::Model CM, + CodeGenOpt::Level OL); + + bool addPassesToEmitFile( + PassManagerBase &PM, raw_pwrite_stream &Out, CodeGenFileType FileType, + bool DisableVerify = true, AnalysisID StartBefore = nullptr, + AnalysisID StartAfter = nullptr, AnalysisID StopAfter = nullptr, + MachineFunctionInitializer *MFInitializer = nullptr) override; + + TargetIRAnalysis getTargetIRAnalysis() override; + + const TargetSubtargetInfo *getJSSubtargetImpl() const { + return &ST; + } + + const JSSubtarget *getSubtargetImpl(const Function &F) const override { + return &ST; + } +}; + +} // End llvm namespace + +#endif diff --git a/lib/Target/JSBackend/JSTargetTransformInfo.cpp b/lib/Target/JSBackend/JSTargetTransformInfo.cpp new file mode 100644 index 000000000000..c1e29fc3d8b2 --- /dev/null +++ b/lib/Target/JSBackend/JSTargetTransformInfo.cpp @@ -0,0 +1,118 @@ +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// \file +// This file implements a TargetTransformInfo analysis pass specific to the +// JS target machine. It uses the target's detailed information to provide +// more precise answers to certain TTI queries, while letting the target +// independent and default TTI implementations handle the rest. +// +//===----------------------------------------------------------------------===// + +#include "JSTargetTransformInfo.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/CodeGen/BasicTTIImpl.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/CostTable.h" +#include "llvm/Target/TargetLowering.h" +using namespace llvm; + +#define DEBUG_TYPE "JStti" + +void JSTTIImpl::getUnrollingPreferences(Loop *L, + TTI::UnrollingPreferences &UP) { + // We generally don't want a lot of unrolling. + UP.Partial = false; + UP.Runtime = false; +} + +unsigned JSTTIImpl::getNumberOfRegisters(bool Vector) { + if (Vector) return 16; // like NEON, x86_64, etc. + + return 8; // like x86, thumb, etc. +} + +unsigned JSTTIImpl::getRegisterBitWidth(bool Vector) { + if (Vector) { + return 128; + } + + return 32; +} + +static const unsigned Nope = 65536; + +// Certain types are fine, but some vector types must be avoided at all Costs. +static bool isOkType(Type *Ty) { + if (VectorType *VTy = dyn_cast(Ty)) { + if (VTy->getNumElements() != 4 || !(VTy->getElementType()->isIntegerTy(1) || + VTy->getElementType()->isIntegerTy(32) || + VTy->getElementType()->isFloatTy())) { + return false; + } + } + return true; +} + +unsigned JSTTIImpl::getArithmeticInstrCost( + unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info, + TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo, + TTI::OperandValueProperties Opd2PropInfo) { + + unsigned Cost = BasicTTIImplBase::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info); + + if (!isOkType(Ty)) + return Nope; + + if (VectorType *VTy = dyn_cast(Ty)) { + switch (Opcode) { + case Instruction::LShr: + case Instruction::AShr: + case Instruction::Shl: + // SIMD.js' shifts are currently only ByScalar. + if (Opd2Info != TTI::OK_UniformValue && Opd2Info != TTI::OK_UniformConstantValue) + Cost = Cost * VTy->getNumElements() + 100; + break; + } + } + return Cost; +} + +unsigned JSTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { + if (!isOkType(Val)) + return Nope; + + unsigned Cost = BasicTTIImplBase::getVectorInstrCost(Opcode, Val, Index); + + // SIMD.js' insert/extract currently only take constant indices. + if (Index == -1u) + return Cost + 100; + + return Cost; +} + + +unsigned JSTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, + unsigned AddressSpace) { + if (!isOkType(Src)) + return Nope; + + return BasicTTIImplBase::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace); +} + +unsigned JSTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) { + if (!isOkType(Src) || !isOkType(Dst)) + return Nope; + + return BasicTTIImplBase::getCastInstrCost(Opcode, Dst, Src); +} + diff --git a/lib/Target/JSBackend/JSTargetTransformInfo.h b/lib/Target/JSBackend/JSTargetTransformInfo.h new file mode 100644 index 000000000000..cf69ce0eb8c4 --- /dev/null +++ b/lib/Target/JSBackend/JSTargetTransformInfo.h @@ -0,0 +1,96 @@ +//===-- JSTargetTransformInfo.h - JS specific TTI -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file a TargetTransformInfo::Concept conforming object specific to the +/// JS target machine. It uses the target's detailed information to +/// provide more precise answers to certain TTI queries, while letting the +/// target independent and default TTI implementations handle the rest. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_JS_JSTARGETTRANSFORMINFO_H +#define LLVM_LIB_TARGET_JS_JSTARGETTRANSFORMINFO_H + +#include "JSTargetMachine.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/BasicTTIImpl.h" +#include "llvm/Target/TargetLowering.h" + +namespace llvm { + +class JSTTIImpl : public BasicTTIImplBase { + typedef BasicTTIImplBase BaseT; + typedef TargetTransformInfo TTI; + friend BaseT; + + const TargetSubtargetInfo *ST; + const TargetLoweringBase *TLI; + + const TargetSubtargetInfo *getST() const { return ST; } + const TargetLoweringBase *getTLI() const { return TLI; } + +public: + explicit JSTTIImpl(const JSTargetMachine *TM, const Function &F) + : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), + TLI(ST->getTargetLowering()) {} + + // Provide value semantics. MSVC requires that we spell all of these out. + JSTTIImpl(const JSTTIImpl &Arg) + : BaseT(static_cast(Arg)), ST(Arg.ST), TLI(Arg.TLI) {} + JSTTIImpl(JSTTIImpl &&Arg) + : BaseT(std::move(static_cast(Arg))), ST(std::move(Arg.ST)), + TLI(std::move(Arg.TLI)) {} +/* + JSTTIImpl &operator=(const JSTTIImpl &RHS) { + BaseT::operator=(static_cast(RHS)); + ST = RHS.ST; + TLI = RHS.TLI; + return *this; + } + JSTTIImpl &operator=(JSTTIImpl &&RHS) { + BaseT::operator=(std::move(static_cast(RHS))); + ST = std::move(RHS.ST); + TLI = std::move(RHS.TLI); + return *this; + } +*/ + + bool hasBranchDivergence() { return true; } + + void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP); + + TTI::PopcntSupportKind getPopcntSupport( + unsigned TyWidth) { + assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); + // Hopefully we'll get popcnt in ES7, but for now, we just have software. + return TargetTransformInfo::PSK_Software; + } + + unsigned getNumberOfRegisters(bool Vector); + + unsigned getRegisterBitWidth(bool Vector); + + unsigned getArithmeticInstrCost( + unsigned Opcode, Type *Ty, + TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, + TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, + TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, + TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None); + + unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); + + unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, + unsigned AddressSpace); + + unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src); +}; + +} // end namespace llvm + +#endif diff --git a/lib/Target/JSBackend/LLVMBuild.txt b/lib/Target/JSBackend/LLVMBuild.txt new file mode 100644 index 000000000000..2a5ed7ed5452 --- /dev/null +++ b/lib/Target/JSBackend/LLVMBuild.txt @@ -0,0 +1,31 @@ +;===- ./lib/Target/JSBackend/LLVMBuild.txt --------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[common] +subdirectories = MCTargetDesc NaCl TargetInfo + +[component_0] +type = TargetGroup +name = JSBackend +parent = Target + +[component_1] +type = Library +name = JSBackendCodeGen +parent = JSBackend +required_libraries = Analysis CodeGen Core JSBackendInfo JSBackendDesc PNaClTransforms Support Target +add_to_library_groups = JSBackend diff --git a/lib/Target/JSBackend/MCTargetDesc/CMakeLists.txt b/lib/Target/JSBackend/MCTargetDesc/CMakeLists.txt new file mode 100644 index 000000000000..81c5eadef6a7 --- /dev/null +++ b/lib/Target/JSBackend/MCTargetDesc/CMakeLists.txt @@ -0,0 +1,6 @@ +add_llvm_library(LLVMJSBackendDesc + JSBackendMCTargetDesc.cpp + ) + +# Hack: we need to include 'main' target directory to grab private headers +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..) diff --git a/lib/Target/JSBackend/MCTargetDesc/JSBackendMCTargetDesc.cpp b/lib/Target/JSBackend/MCTargetDesc/JSBackendMCTargetDesc.cpp new file mode 100644 index 000000000000..01b225ee4e3c --- /dev/null +++ b/lib/Target/JSBackend/MCTargetDesc/JSBackendMCTargetDesc.cpp @@ -0,0 +1,22 @@ +//===-- JSBackendMCTargetDesc.cpp - JS Backend Target Descriptions --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides asm.js specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#include "JSBackendMCTargetDesc.h" +#include "llvm/Support/TargetRegistry.h" +using namespace llvm; + +// Force static initialization. +extern "C" void LLVMInitializeJSBackendTargetMC() { + // nothing to register +} + diff --git a/lib/Target/JSBackend/MCTargetDesc/JSBackendMCTargetDesc.h b/lib/Target/JSBackend/MCTargetDesc/JSBackendMCTargetDesc.h new file mode 100644 index 000000000000..c98a55df83ba --- /dev/null +++ b/lib/Target/JSBackend/MCTargetDesc/JSBackendMCTargetDesc.h @@ -0,0 +1,25 @@ +//===- JSBackendMCTargetDesc.h - JS Backend Target Descriptions -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides asm.js specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#ifndef JSBACKENDMCTARGETDESC_H +#define JSBACKENDMCTARGETDESC_H + +#include "llvm/Support/TargetRegistry.h" + +namespace llvm { + +extern Target TheJSBackendTarget; + +} // End llvm namespace + +#endif diff --git a/lib/Target/JSBackend/MCTargetDesc/LLVMBuild.txt b/lib/Target/JSBackend/MCTargetDesc/LLVMBuild.txt new file mode 100644 index 000000000000..b7f3e6d89a00 --- /dev/null +++ b/lib/Target/JSBackend/MCTargetDesc/LLVMBuild.txt @@ -0,0 +1,24 @@ +;===- ./lib/Target/JSBackend/MCTargetDesc/LLVMBuild.txt --------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = JSBackendDesc +parent = JSBackend +required_libraries = MC Support JSBackendInfo +add_to_library_groups = JSBackend + diff --git a/lib/Target/JSBackend/MCTargetDesc/Makefile b/lib/Target/JSBackend/MCTargetDesc/Makefile new file mode 100644 index 000000000000..9bf7e902aff0 --- /dev/null +++ b/lib/Target/JSBackend/MCTargetDesc/Makefile @@ -0,0 +1,16 @@ +##===- lib/Target/JSBackend/TargetDesc/Makefile ------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../../.. +LIBRARYNAME = LLVMJSBackendDesc + +# Hack: we need to include 'main' target directory to grab private headers +CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/JSBackend/Makefile b/lib/Target/JSBackend/Makefile new file mode 100644 index 000000000000..d9bc395758d2 --- /dev/null +++ b/lib/Target/JSBackend/Makefile @@ -0,0 +1,16 @@ +##===- lib/Target/JSBackend/Makefile --- ------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===---------------------------------------------------------------------===## + +LEVEL = ../../.. +LIBRARYNAME = LLVMJSBackendCodeGen +DIRS = MCTargetDesc NaCl TargetInfo + +include $(LEVEL)/Makefile.common + +CompileCommonOpts += -Wno-format diff --git a/lib/Target/JSBackend/NaCl/AddPNaClExternalDecls.cpp b/lib/Target/JSBackend/NaCl/AddPNaClExternalDecls.cpp new file mode 100644 index 000000000000..871a834b79b6 --- /dev/null +++ b/lib/Target/JSBackend/NaCl/AddPNaClExternalDecls.cpp @@ -0,0 +1,85 @@ +//===- AddPNaClExternalDecls.cpp - Add decls for PNaCl external functions -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass adds function declarations for external functions used by PNaCl. +// These externals are implemented in native libraries and calls to them are +// created as part of the translation process. +// +// Running this pass is a precondition for running ResolvePNaClIntrinsics. They +// are separate because one is a ModulePass and the other is a FunctionPass. +// +//===----------------------------------------------------------------------===// + +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/NaClAtomicIntrinsics.h" +#include "llvm/IR/Type.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/NaCl.h" + +using namespace llvm; + +namespace { + // This is a module pass because it adds declarations to the module. + class AddPNaClExternalDecls : public ModulePass { + public: + static char ID; + AddPNaClExternalDecls() : ModulePass(ID) { + initializeAddPNaClExternalDeclsPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnModule(Module &M); + }; +} + +bool AddPNaClExternalDecls::runOnModule(Module &M) { + // Add declarations for a pre-defined set of external functions to the module. + // The function names must match the functions implemented in native code (in + // pnacl/support). The function types must match the types of the LLVM + // intrinsics. + // We expect these declarations not to exist in the module before this pass + // runs, but don't assert it; it will be handled by the ABI verifier. + LLVMContext &C = M.getContext(); + M.getOrInsertFunction("setjmp", + // return type + Type::getInt32Ty(C), + // arguments + Type::getInt8Ty(C)->getPointerTo(), + NULL); + M.getOrInsertFunction("longjmp", + // return type + Type::getVoidTy(C), + // arguments + Type::getInt8Ty(C)->getPointerTo(), + Type::getInt32Ty(C), + NULL); + + // Add Intrinsic declarations needed by ResolvePNaClIntrinsics up front. + Intrinsic::getDeclaration(&M, Intrinsic::nacl_setjmp); + Intrinsic::getDeclaration(&M, Intrinsic::nacl_longjmp); + NaCl::AtomicIntrinsics AI(C); + NaCl::AtomicIntrinsics::View V = AI.allIntrinsicsAndOverloads(); + for (NaCl::AtomicIntrinsics::View::iterator I = V.begin(), E = V.end(); + I != E; ++I) { + I->getDeclaration(&M); + } + Intrinsic::getDeclaration(&M, Intrinsic::nacl_atomic_is_lock_free); + + return true; +} + +char AddPNaClExternalDecls::ID = 0; +INITIALIZE_PASS(AddPNaClExternalDecls, "add-pnacl-external-decls", + "Add declarations of external functions used by PNaCl", + false, false) + +ModulePass *llvm::createAddPNaClExternalDeclsPass() { + return new AddPNaClExternalDecls(); +} diff --git a/lib/Target/JSBackend/NaCl/BackendCanonicalize.cpp b/lib/Target/JSBackend/NaCl/BackendCanonicalize.cpp new file mode 100644 index 000000000000..de9852336539 --- /dev/null +++ b/lib/Target/JSBackend/NaCl/BackendCanonicalize.cpp @@ -0,0 +1,360 @@ +//===- BackendCanonicalize.cpp --------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Clean up some toolchain-side PNaCl ABI simplification passes. These passes +// allow PNaCl to have a simple and stable ABI, but they sometimes lead to +// harder-to-optimize code. This is desirable because LLVM's definition of +// "canonical" evolves over time, meaning that PNaCl's simple ABI can stay +// simple yet still take full advantage of LLVM's backend by having this pass +// massage the code into something that the backend prefers handling. +// +// It currently: +// - Re-generates shufflevector (not part of the PNaCl ABI) from insertelement / +// extractelement combinations. This is done by duplicating some of +// instcombine's implementation, and ignoring optimizations that should +// already have taken place. +// - Re-materializes constant loads, especially of vectors. This requires doing +// constant folding through bitcasts. +// +// The pass also performs limited DCE on instructions it knows to be dead, +// instead of performing a full global DCE. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Operator.h" +#include "llvm/IR/InstVisitor.h" +#include "llvm/Pass.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Transforms/NaCl.h" +#include "llvm/Transforms/Utils/Local.h" + +using namespace llvm; + +// ============================================================================= +// TODO(jfb) The following functions are as-is from instcombine. Make them +// reusable instead. + +/// CollectSingleShuffleElements - If V is a shuffle of values that ONLY returns +/// elements from either LHS or RHS, return the shuffle mask and true. +/// Otherwise, return false. +static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, + SmallVectorImpl &Mask) { + assert(LHS->getType() == RHS->getType() && + "Invalid CollectSingleShuffleElements"); + unsigned NumElts = V->getType()->getVectorNumElements(); + + if (isa(V)) { + Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext()))); + return true; + } + + if (V == LHS) { + for (unsigned i = 0; i != NumElts; ++i) + Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i)); + return true; + } + + if (V == RHS) { + for (unsigned i = 0; i != NumElts; ++i) + Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), + i+NumElts)); + return true; + } + + if (InsertElementInst *IEI = dyn_cast(V)) { + // If this is an insert of an extract from some other vector, include it. + Value *VecOp = IEI->getOperand(0); + Value *ScalarOp = IEI->getOperand(1); + Value *IdxOp = IEI->getOperand(2); + + if (!isa(IdxOp)) + return false; + unsigned InsertedIdx = cast(IdxOp)->getZExtValue(); + + if (isa(ScalarOp)) { // inserting undef into vector. + // We can handle this if the vector we are inserting into is + // transitively ok. + if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) { + // If so, update the mask to reflect the inserted undef. + Mask[InsertedIdx] = UndefValue::get(Type::getInt32Ty(V->getContext())); + return true; + } + } else if (ExtractElementInst *EI = dyn_cast(ScalarOp)){ + if (isa(EI->getOperand(1))) { + unsigned ExtractedIdx = + cast(EI->getOperand(1))->getZExtValue(); + unsigned NumLHSElts = LHS->getType()->getVectorNumElements(); + + // This must be extracting from either LHS or RHS. + if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) { + // We can handle this if the vector we are inserting into is + // transitively ok. + if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) { + // If so, update the mask to reflect the inserted value. + if (EI->getOperand(0) == LHS) { + Mask[InsertedIdx % NumElts] = + ConstantInt::get(Type::getInt32Ty(V->getContext()), + ExtractedIdx); + } else { + assert(EI->getOperand(0) == RHS); + Mask[InsertedIdx % NumElts] = + ConstantInt::get(Type::getInt32Ty(V->getContext()), + ExtractedIdx + NumLHSElts); + } + return true; + } + } + } + } + } + + return false; +} + +/// We are building a shuffle to create V, which is a sequence of insertelement, +/// extractelement pairs. If PermittedRHS is set, then we must either use it or +/// not rely on the second vector source. Return a std::pair containing the +/// left and right vectors of the proposed shuffle (or 0), and set the Mask +/// parameter as required. +/// +/// Note: we intentionally don't try to fold earlier shuffles since they have +/// often been chosen carefully to be efficiently implementable on the target. +typedef std::pair ShuffleOps; + +static ShuffleOps CollectShuffleElements(Value *V, + SmallVectorImpl &Mask, + Value *PermittedRHS) { + assert(V->getType()->isVectorTy() && "Invalid shuffle!"); + unsigned NumElts = cast(V->getType())->getNumElements(); + + if (isa(V)) { + Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext()))); + return std::make_pair( + PermittedRHS ? UndefValue::get(PermittedRHS->getType()) : V, nullptr); + } + + if (isa(V)) { + Mask.assign(NumElts, ConstantInt::get(Type::getInt32Ty(V->getContext()),0)); + return std::make_pair(V, nullptr); + } + + if (InsertElementInst *IEI = dyn_cast(V)) { + // If this is an insert of an extract from some other vector, include it. + Value *VecOp = IEI->getOperand(0); + Value *ScalarOp = IEI->getOperand(1); + Value *IdxOp = IEI->getOperand(2); + + if (ExtractElementInst *EI = dyn_cast(ScalarOp)) { + if (isa(EI->getOperand(1)) && isa(IdxOp)) { + unsigned ExtractedIdx = + cast(EI->getOperand(1))->getZExtValue(); + unsigned InsertedIdx = cast(IdxOp)->getZExtValue(); + + // Either the extracted from or inserted into vector must be RHSVec, + // otherwise we'd end up with a shuffle of three inputs. + if (EI->getOperand(0) == PermittedRHS || PermittedRHS == nullptr) { + Value *RHS = EI->getOperand(0); + ShuffleOps LR = CollectShuffleElements(VecOp, Mask, RHS); + assert(LR.second == nullptr || LR.second == RHS); + + if (LR.first->getType() != RHS->getType()) { + // We tried our best, but we can't find anything compatible with RHS + // further up the chain. Return a trivial shuffle. + for (unsigned i = 0; i < NumElts; ++i) + Mask[i] = ConstantInt::get(Type::getInt32Ty(V->getContext()), i); + return std::make_pair(V, nullptr); + } + + unsigned NumLHSElts = RHS->getType()->getVectorNumElements(); + Mask[InsertedIdx % NumElts] = + ConstantInt::get(Type::getInt32Ty(V->getContext()), + NumLHSElts+ExtractedIdx); + return std::make_pair(LR.first, RHS); + } + + if (VecOp == PermittedRHS) { + // We've gone as far as we can: anything on the other side of the + // extractelement will already have been converted into a shuffle. + unsigned NumLHSElts = + EI->getOperand(0)->getType()->getVectorNumElements(); + for (unsigned i = 0; i != NumElts; ++i) + Mask.push_back(ConstantInt::get( + Type::getInt32Ty(V->getContext()), + i == InsertedIdx ? ExtractedIdx : NumLHSElts + i)); + return std::make_pair(EI->getOperand(0), PermittedRHS); + } + + // If this insertelement is a chain that comes from exactly these two + // vectors, return the vector and the effective shuffle. + if (EI->getOperand(0)->getType() == PermittedRHS->getType() && + CollectSingleShuffleElements(IEI, EI->getOperand(0), PermittedRHS, + Mask)) + return std::make_pair(EI->getOperand(0), PermittedRHS); + } + } + } + + // Otherwise, can't do anything fancy. Return an identity vector. + for (unsigned i = 0; i != NumElts; ++i) + Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i)); + return std::make_pair(V, nullptr); +} + +// ============================================================================= + + +namespace { + +class BackendCanonicalize : public FunctionPass, + public InstVisitor { +public: + static char ID; // Pass identification, replacement for typeid + BackendCanonicalize() : FunctionPass(ID), DL(0), TLI(0) { + initializeBackendCanonicalizePass(*PassRegistry::getPassRegistry()); + } + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + FunctionPass::getAnalysisUsage(AU); + } + + virtual bool runOnFunction(Function &F); + + // InstVisitor implementation. Unhandled instructions stay as-is. + bool visitInstruction(Instruction &I) { return false; } + bool visitInsertElementInst(InsertElementInst &IE); + bool visitBitCastInst(BitCastInst &C); + bool visitLoadInst(LoadInst &L); + +private: + const DataLayout *DL; + const TargetLibraryInfo *TLI; + + // List of instructions that are now obsolete, and should be DCE'd. + typedef SmallVector KillList; + KillList Kill; + + /// Helper that constant folds an instruction. + bool visitConstantFoldableInstruction(Instruction *I); + + /// Empty the kill list, making sure that all other dead instructions + /// up the chain (but in the current basic block) also get killed. + static void emptyKillList(KillList &Kill); +}; + +} // anonymous namespace + +char BackendCanonicalize::ID = 0; +INITIALIZE_PASS(BackendCanonicalize, "backend-canonicalize", + "Canonicalize PNaCl bitcode for LLVM backends", false, false) + +bool BackendCanonicalize::runOnFunction(Function &F) { + bool Modified = false; + DL = &F.getParent()->getDataLayout(); + TLI = &getAnalysis().getTLI(); + + for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) + for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ++BI) + Modified |= visit(&*BI); + emptyKillList(Kill); + return Modified; +} + +// This function is *almost* as-is from instcombine, avoiding silly +// cases that should already have been optimized. +bool BackendCanonicalize::visitInsertElementInst(InsertElementInst &IE) { + Value *ScalarOp = IE.getOperand(1); + Value *IdxOp = IE.getOperand(2); + + // If the inserted element was extracted from some other vector, and if the + // indexes are constant, try to turn this into a shufflevector operation. + if (ExtractElementInst *EI = dyn_cast(ScalarOp)) { + if (isa(EI->getOperand(1)) && isa(IdxOp)) { + unsigned NumInsertVectorElts = IE.getType()->getNumElements(); + unsigned NumExtractVectorElts = + EI->getOperand(0)->getType()->getVectorNumElements(); + unsigned ExtractedIdx = + cast(EI->getOperand(1))->getZExtValue(); + unsigned InsertedIdx = cast(IdxOp)->getZExtValue(); + + if (ExtractedIdx >= NumExtractVectorElts) // Out of range extract. + return false; + + if (InsertedIdx >= NumInsertVectorElts) // Out of range insert. + return false; + + // If this insertelement isn't used by some other insertelement, turn it + // (and any insertelements it points to), into one big shuffle. + if (!IE.hasOneUse() || !isa(IE.user_back())) { + typedef SmallVector MaskT; + MaskT Mask; + Value *LHS, *RHS; + std::tie(LHS, RHS) = CollectShuffleElements(&IE, Mask, nullptr); + if (!RHS) + RHS = UndefValue::get(LHS->getType()); + // We now have a shuffle of LHS, RHS, Mask. + + if (isa(LHS) && !isa(RHS)) { + // Canonicalize shufflevector to always have undef on the RHS, + // and adjust the mask. + std::swap(LHS, RHS); + for (MaskT::iterator I = Mask.begin(), E = Mask.end(); I != E; ++I) { + unsigned Idx = cast(*I)->getZExtValue(); + unsigned NewIdx = Idx >= NumInsertVectorElts + ? Idx - NumInsertVectorElts + : Idx + NumInsertVectorElts; + *I = ConstantInt::get(Type::getInt32Ty(RHS->getContext()), NewIdx); + } + } + + IRBuilder<> IRB(&IE); + IE.replaceAllUsesWith( + IRB.CreateShuffleVector(LHS, RHS, ConstantVector::get(Mask))); + // The chain of now-dead insertelement / extractelement + // instructions can be deleted. + Kill.push_back(&IE); + + return true; + } + } + } + + return false; +} + +bool BackendCanonicalize::visitBitCastInst(BitCastInst &B) { + return visitConstantFoldableInstruction(&B); +} + +bool BackendCanonicalize::visitLoadInst(LoadInst &L) { + return visitConstantFoldableInstruction(&L); +} + +bool BackendCanonicalize::visitConstantFoldableInstruction(Instruction *I) { + if (Constant *Folded = ConstantFoldInstruction(I, *DL, TLI)) { + I->replaceAllUsesWith(Folded); + Kill.push_back(I); + return true; + } + return false; +} + +void BackendCanonicalize::emptyKillList(KillList &Kill) { + while (!Kill.empty()) + RecursivelyDeleteTriviallyDeadInstructions(Kill.pop_back_val()); +} + +FunctionPass *llvm::createBackendCanonicalizePass() { + return new BackendCanonicalize(); +} diff --git a/lib/Target/JSBackend/NaCl/CMakeLists.txt b/lib/Target/JSBackend/NaCl/CMakeLists.txt new file mode 100644 index 000000000000..53dad6fcd13b --- /dev/null +++ b/lib/Target/JSBackend/NaCl/CMakeLists.txt @@ -0,0 +1,55 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMPNaClTransforms + AddPNaClExternalDecls.cpp + BackendCanonicalize.cpp + CanonicalizeMemIntrinsics.cpp + CleanupUsedGlobalsMetadata.cpp + ConstantInsertExtractElementIndex.cpp + ExceptionInfoWriter.cpp + ExpandArithWithOverflow.cpp + ExpandByVal.cpp + ExpandConstantExpr.cpp + ExpandCtors.cpp + ExpandGetElementPtr.cpp + ExpandIndirectBr.cpp + ExpandLargeIntegers.cpp + ExpandShuffleVector.cpp + ExpandSmallArguments.cpp + ExpandStructRegs.cpp + ExpandTls.cpp + ExpandTlsConstantExpr.cpp + ExpandUtils.cpp + ExpandVarArgs.cpp + FixVectorLoadStoreAlignment.cpp + FlattenGlobals.cpp + SimplifiedFuncTypeMap.cpp + GlobalCleanup.cpp + GlobalizeConstantVectors.cpp + InsertDivideCheck.cpp + InternalizeUsedGlobals.cpp + NormalizeAlignment.cpp + PNaClSjLjEH.cpp + PromoteI1Ops.cpp + PromoteIntegers.cpp + RemoveAsmMemory.cpp + ReplacePtrsWithInts.cpp + ResolvePNaClIntrinsics.cpp + RewriteAtomics.cpp + RewriteLLVMIntrinsics.cpp + RewritePNaClLibraryCalls.cpp + SimplifyAllocas.cpp + SimplifyStructRegSignatures.cpp + StripAttributes.cpp + StripMetadata.cpp + # Emscripten files: + ExpandI64.cpp + ExpandInsertExtractElement.cpp + LowerEmAsyncify.cpp + LowerEmExceptionsPass.cpp + LowerEmSetjmp.cpp + NoExitRuntime.cpp + # Emscripten files end. + ) + +add_dependencies(LLVMPNaClTransforms intrinsics_gen) diff --git a/lib/Target/JSBackend/NaCl/CanonicalizeMemIntrinsics.cpp b/lib/Target/JSBackend/NaCl/CanonicalizeMemIntrinsics.cpp new file mode 100644 index 000000000000..1acde98d322a --- /dev/null +++ b/lib/Target/JSBackend/NaCl/CanonicalizeMemIntrinsics.cpp @@ -0,0 +1,100 @@ +//===- CanonicalizeMemIntrinsics.cpp - Make memcpy's "len" arg consistent--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass canonicalizes uses of the llvm.memset, llvm.memcpy and +// llvm.memmove intrinsics so that the variants with 64-bit "len" +// arguments aren't used, and the 32-bit variants are used instead. +// +// This means the PNaCl translator won't need to handle two versions +// of each of these intrinsics, and it won't need to do any implicit +// truncations from 64-bit to 32-bit. +// +//===----------------------------------------------------------------------===// + +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/NaCl.h" + +using namespace llvm; + +namespace { + // This is a ModulePass because that makes it easier to find all + // uses of intrinsics efficiently. + class CanonicalizeMemIntrinsics : public ModulePass { + public: + static char ID; // Pass identification, replacement for typeid + CanonicalizeMemIntrinsics() : ModulePass(ID) { + initializeCanonicalizeMemIntrinsicsPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnModule(Module &M); + }; +} + +char CanonicalizeMemIntrinsics::ID = 0; +INITIALIZE_PASS(CanonicalizeMemIntrinsics, "canonicalize-mem-intrinsics", + "Make memcpy() et al's \"len\" argument consistent", + false, false) + +static bool expandIntrinsic(Module *M, Intrinsic::ID ID) { + SmallVector Types; + Types.push_back(Type::getInt8PtrTy(M->getContext())); + if (ID != Intrinsic::memset) + Types.push_back(Type::getInt8PtrTy(M->getContext())); + unsigned LengthTypePos = Types.size(); + Types.push_back(Type::getInt64Ty(M->getContext())); + + std::string OldName = Intrinsic::getName(ID, Types); + Function *OldIntrinsic = M->getFunction(OldName); + if (!OldIntrinsic) + return false; + + Types[LengthTypePos] = Type::getInt32Ty(M->getContext()); + Function *NewIntrinsic = Intrinsic::getDeclaration(M, ID, Types); + + SmallVector Calls; + for (User *U : OldIntrinsic->users()) { + if (CallInst *Call = dyn_cast(U)) + Calls.push_back(Call); + else + report_fatal_error("CanonicalizeMemIntrinsics: Taking the address of an " + "intrinsic is not allowed: " + + OldName); + } + + for (CallInst *Call : Calls) { + // This temporarily leaves Call non-well-typed. + Call->setCalledFunction(NewIntrinsic); + // Truncate the "len" argument. No overflow check. + IRBuilder<> Builder(Call); + Value *Length = Builder.CreateTrunc(Call->getArgOperand(2), + Type::getInt32Ty(M->getContext()), + "mem_len_truncate"); + Call->setArgOperand(2, Length); + } + + OldIntrinsic->eraseFromParent(); + return true; +} + +bool CanonicalizeMemIntrinsics::runOnModule(Module &M) { + bool Changed = false; + Changed |= expandIntrinsic(&M, Intrinsic::memset); + Changed |= expandIntrinsic(&M, Intrinsic::memcpy); + Changed |= expandIntrinsic(&M, Intrinsic::memmove); + return Changed; +} + +ModulePass *llvm::createCanonicalizeMemIntrinsicsPass() { + return new CanonicalizeMemIntrinsics(); +} diff --git a/lib/Target/JSBackend/NaCl/CleanupUsedGlobalsMetadata.cpp b/lib/Target/JSBackend/NaCl/CleanupUsedGlobalsMetadata.cpp new file mode 100644 index 000000000000..a0e88effddfc --- /dev/null +++ b/lib/Target/JSBackend/NaCl/CleanupUsedGlobalsMetadata.cpp @@ -0,0 +1,48 @@ +//===- CleanupUsedGlobalsMetadata.cpp - Cleanup llvm.used -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +// ===---------------------------------------------------------------------===// +// +// Remove llvm.used metadata. +// +//===----------------------------------------------------------------------===// + +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/NaCl.h" + +using namespace llvm; + +namespace { +class CleanupUsedGlobalsMetadata : public ModulePass { +public: + static char ID; + CleanupUsedGlobalsMetadata() : ModulePass(ID) { + initializeCleanupUsedGlobalsMetadataPass(*PassRegistry::getPassRegistry()); + } + bool runOnModule(Module &M) override; +}; +} + +char CleanupUsedGlobalsMetadata::ID = 0; +INITIALIZE_PASS(CleanupUsedGlobalsMetadata, "cleanup-used-globals-metadata", + "Removes llvm.used metadata.", false, false) + +bool CleanupUsedGlobalsMetadata::runOnModule(Module &M) { + bool Modified = false; + + if (auto *GV = M.getNamedGlobal("llvm.used")) { + GV->eraseFromParent(); + Modified = true; + } + + return Modified; +} + +ModulePass *llvm::createCleanupUsedGlobalsMetadataPass() { + return new CleanupUsedGlobalsMetadata(); +} \ No newline at end of file diff --git a/lib/Target/JSBackend/NaCl/ConstantInsertExtractElementIndex.cpp b/lib/Target/JSBackend/NaCl/ConstantInsertExtractElementIndex.cpp new file mode 100644 index 000000000000..743cada62233 --- /dev/null +++ b/lib/Target/JSBackend/NaCl/ConstantInsertExtractElementIndex.cpp @@ -0,0 +1,180 @@ +//===- ConstantInsertExtractElementIndex.cpp - Insert/Extract element -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Transform all InsertElement and ExtractElement with non-constant or +// out-of-bounds indices into either in-bounds constant accesses or +// stack accesses. This moves all undefined behavior to the stack, +// making InsertElement and ExtractElement well-defined. +// +//===----------------------------------------------------------------------===// + +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/NaCl.h" + +#include + +using namespace llvm; + +namespace { +class ConstantInsertExtractElementIndex : public BasicBlockPass { +public: + static char ID; // Pass identification, replacement for typeid + ConstantInsertExtractElementIndex() : BasicBlockPass(ID), M(0), DL(0) { + initializeConstantInsertExtractElementIndexPass( + *PassRegistry::getPassRegistry()); + } + using BasicBlockPass::doInitialization; + bool doInitialization(Module &Mod) override { + M = &Mod; + return false; // Unchanged. + } + bool runOnBasicBlock(BasicBlock &BB) override; + +private: + typedef SmallVector Instructions; + const Module *M; + const DataLayout *DL; + + void findNonConstantInsertExtractElements( + const BasicBlock &BB, Instructions &OutOfRangeConstantIndices, + Instructions &NonConstantVectorIndices) const; + void fixOutOfRangeConstantIndices(BasicBlock &BB, + const Instructions &Instrs) const; + void fixNonConstantVectorIndices(BasicBlock &BB, + const Instructions &Instrs) const; +}; + +/// Number of elements in a vector instruction. +unsigned vectorNumElements(const Instruction *I) { + return cast(I->getOperand(0)->getType())->getNumElements(); +} + +/// Get the index of an InsertElement or ExtractElement instruction, or null. +Value *getInsertExtractElementIdx(const Instruction *I) { + switch (I->getOpcode()) { + default: return NULL; + case Instruction::InsertElement: return I->getOperand(2); + case Instruction::ExtractElement: return I->getOperand(1); + } +} + +/// Set the index of an InsertElement or ExtractElement instruction. +void setInsertExtractElementIdx(Instruction *I, Value *NewIdx) { + switch (I->getOpcode()) { + default: + llvm_unreachable( + "expected instruction to be InsertElement or ExtractElement"); + case Instruction::InsertElement: I->setOperand(2, NewIdx); break; + case Instruction::ExtractElement: I->setOperand(1, NewIdx); break; + } +} +} // anonymous namespace + +char ConstantInsertExtractElementIndex::ID = 0; +INITIALIZE_PASS( + ConstantInsertExtractElementIndex, "constant-insert-extract-element-index", + "Force insert and extract vector element to always be in bounds", false, + false) + +void ConstantInsertExtractElementIndex::findNonConstantInsertExtractElements( + const BasicBlock &BB, Instructions &OutOfRangeConstantIndices, + Instructions &NonConstantVectorIndices) const { + for (BasicBlock::const_iterator BBI = BB.begin(), BBE = BB.end(); BBI != BBE; + ++BBI) { + const Instruction *I = &*BBI; + if (Value *Idx = getInsertExtractElementIdx(I)) { + if (ConstantInt *CI = dyn_cast(Idx)) { + if (!CI->getValue().ult(vectorNumElements(I))) + OutOfRangeConstantIndices.push_back(const_cast(I)); + } else + NonConstantVectorIndices.push_back(const_cast(I)); + } + } +} + +void ConstantInsertExtractElementIndex::fixOutOfRangeConstantIndices( + BasicBlock &BB, const Instructions &Instrs) const { + for (Instructions::const_iterator IB = Instrs.begin(), IE = Instrs.end(); + IB != IE; ++IB) { + Instruction *I = *IB; + const APInt &Idx = + cast(getInsertExtractElementIdx(I))->getValue(); + APInt NumElements = APInt(Idx.getBitWidth(), vectorNumElements(I)); + APInt NewIdx = Idx.urem(NumElements); + setInsertExtractElementIdx(I, ConstantInt::get(M->getContext(), NewIdx)); + } +} + +void ConstantInsertExtractElementIndex::fixNonConstantVectorIndices( + BasicBlock &BB, const Instructions &Instrs) const { + for (Instructions::const_iterator IB = Instrs.begin(), IE = Instrs.end(); + IB != IE; ++IB) { + Instruction *I = *IB; + Value *Vec = I->getOperand(0); + Value *Idx = getInsertExtractElementIdx(I); + VectorType *VecTy = cast(Vec->getType()); + Type *ElemTy = VecTy->getElementType(); + unsigned ElemAlign = DL->getPrefTypeAlignment(ElemTy); + unsigned VecAlign = std::max(ElemAlign, DL->getPrefTypeAlignment(VecTy)); + + IRBuilder<> IRB(I); + AllocaInst *Alloca = IRB.CreateAlloca( + ElemTy, ConstantInt::get(Type::getInt32Ty(M->getContext()), + vectorNumElements(I))); + Alloca->setAlignment(VecAlign); + Value *AllocaAsVec = IRB.CreateBitCast(Alloca, VecTy->getPointerTo()); + IRB.CreateAlignedStore(Vec, AllocaAsVec, Alloca->getAlignment()); + Value *GEP = IRB.CreateGEP(Alloca, Idx); + + Value *Res; + switch (I->getOpcode()) { + default: + llvm_unreachable("expected InsertElement or ExtractElement"); + case Instruction::InsertElement: + IRB.CreateAlignedStore(I->getOperand(1), GEP, ElemAlign); + Res = IRB.CreateAlignedLoad(AllocaAsVec, Alloca->getAlignment()); + break; + case Instruction::ExtractElement: + Res = IRB.CreateAlignedLoad(GEP, ElemAlign); + break; + } + + I->replaceAllUsesWith(Res); + I->eraseFromParent(); + } +} + +bool ConstantInsertExtractElementIndex::runOnBasicBlock(BasicBlock &BB) { + bool Changed = false; + if (!DL) + DL = &BB.getParent()->getParent()->getDataLayout(); + Instructions OutOfRangeConstantIndices; + Instructions NonConstantVectorIndices; + + findNonConstantInsertExtractElements(BB, OutOfRangeConstantIndices, + NonConstantVectorIndices); + if (!OutOfRangeConstantIndices.empty()) { + Changed = true; + fixOutOfRangeConstantIndices(BB, OutOfRangeConstantIndices); + } + if (!NonConstantVectorIndices.empty()) { + Changed = true; + fixNonConstantVectorIndices(BB, NonConstantVectorIndices); + } + return Changed; +} + +BasicBlockPass *llvm::createConstantInsertExtractElementIndexPass() { + return new ConstantInsertExtractElementIndex(); +} diff --git a/lib/Target/JSBackend/NaCl/ExceptionInfoWriter.cpp b/lib/Target/JSBackend/NaCl/ExceptionInfoWriter.cpp new file mode 100644 index 000000000000..0596d92c29df --- /dev/null +++ b/lib/Target/JSBackend/NaCl/ExceptionInfoWriter.cpp @@ -0,0 +1,291 @@ +//===- ExceptionInfoWriter.cpp - Generate C++ exception info for PNaCl-----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The ExceptionInfoWriter class converts the clauses of a +// "landingpad" instruction into data tables stored in global +// variables. These tables are interpreted by PNaCl's C++ runtime +// library (either libsupc++ or libcxxabi), which is linked into a +// pexe. +// +// This is similar to the lowering that the LLVM backend does to +// convert landingpad clauses into ".gcc_except_table" sections. The +// difference is that ExceptionInfoWriter is an IR-to-IR +// transformation that runs on the PNaCl user toolchain side. The +// format it produces is not part of PNaCl's stable ABI; the PNaCl +// translator and LLVM backend do not know about this format. +// +// Encoding: +// +// A landingpad instruction contains a list of clauses. +// ExceptionInfoWriter encodes each clause as a 32-bit "clause ID". A +// clause is one of the following forms: +// +// 1) "catch i8* @ExcType" +// * This clause means that the landingpad should be entered if +// the C++ exception being thrown has type @ExcType (or a +// subtype of @ExcType). @ExcType is a pointer to the +// std::type_info object (an RTTI object) for the C++ exception +// type. +// * Clang generates this for a "catch" block in the C++ source. +// * @ExcType is NULL for "catch (...)" (catch-all) blocks. +// * This is encoded as the "type ID" for @ExcType, defined below, +// which is a positive integer. +// +// 2) "filter [i8* @ExcType1, ..., i8* @ExcTypeN]" +// * This clause means that the landingpad should be entered if +// the C++ exception being thrown *doesn't* match any of the +// types in the list (which are again specified as +// std::type_info pointers). +// * Clang uses this to implement C++ exception specifications, e.g. +// void foo() throw(ExcType1, ..., ExcTypeN) { ... } +// * This is encoded as the filter ID, X, where X < 0, and +// &__pnacl_eh_filter_table[-X-1] points to a 0-terminated +// array of integer "type IDs". +// +// 3) "cleanup" +// * This means that the landingpad should always be entered. +// * Clang uses this for calling objects' destructors. +// * This is encoded as 0. +// * The runtime may treat "cleanup" differently from "catch i8* +// null" (a catch-all). In C++, if an unhandled exception +// occurs, the language runtime may abort execution without +// running any destructors. The runtime may implement this by +// searching for a matching non-"cleanup" clause, and aborting +// if it does not find one, before entering any landingpad +// blocks. +// +// The "type ID" for a type @ExcType is a 1-based index into the array +// __pnacl_eh_type_table[]. That is, the type ID is a value X such +// that __pnacl_eh_type_table[X-1] == @ExcType, and X >= 1. +// +// ExceptionInfoWriter generates the following data structures: +// +// struct action_table_entry { +// int32_t clause_id; +// uint32_t next_clause_list_id; +// }; +// +// // Represents singly linked lists of clauses. +// extern const struct action_table_entry __pnacl_eh_action_table[]; +// +// // Allows std::type_infos to be represented using small integer IDs. +// extern std::type_info *const __pnacl_eh_type_table[]; +// +// // Used to represent type arrays for "filter" clauses. +// extern const uint32_t __pnacl_eh_filter_table[]; +// +// A "clause list ID" is either: +// * 0, representing the empty list; or +// * an index into __pnacl_eh_action_table[] with 1 added, which +// specifies a node in the clause list. +// +// Example: +// +// std::type_info *const __pnacl_eh_type_table[] = { +// // defines type ID 1 == ExcA and clause ID 1 == "catch ExcA" +// &typeinfo(ExcA), +// // defines type ID 2 == ExcB and clause ID 2 == "catch ExcB" +// &typeinfo(ExcB), +// // defines type ID 3 == ExcC and clause ID 3 == "catch ExcC" +// &typeinfo(ExcC), +// }; +// +// const uint32_t __pnacl_eh_filter_table[] = { +// 1, // refers to ExcA; defines clause ID -1 as "filter [ExcA, ExcB]" +// 2, // refers to ExcB; defines clause ID -2 as "filter [ExcB]" +// 0, // list terminator; defines clause ID -3 as "filter []" +// 3, // refers to ExcC; defines clause ID -4 as "filter [ExcC]" +// 0, // list terminator; defines clause ID -5 as "filter []" +// }; +// +// const struct action_table_entry __pnacl_eh_action_table[] = { +// // defines clause list ID 1: +// { +// -4, // "filter [ExcC]" +// 0, // end of list (no more actions) +// }, +// // defines clause list ID 2: +// { +// -1, // "filter [ExcA, ExcB]" +// 1, // else go to clause list ID 1 +// }, +// // defines clause list ID 3: +// { +// 2, // "catch ExcB" +// 2, // else go to clause list ID 2 +// }, +// // defines clause list ID 4: +// { +// 1, // "catch ExcA" +// 3, // else go to clause list ID 3 +// }, +// }; +// +// So if a landingpad contains the clause list: +// [catch ExcA, +// catch ExcB, +// filter [ExcA, ExcB], +// filter [ExcC]] +// then this can be represented as clause list ID 4 using the tables above. +// +// The C++ runtime library checks the clauses in order to decide +// whether to enter the landingpad. If a clause matches, the +// landingpad BasicBlock is passed the clause ID. The landingpad code +// can use the clause ID to decide which C++ catch() block (if any) to +// execute. +// +// The purpose of these exception tables is to keep code sizes +// relatively small. The landingpad code only needs to check a small +// integer clause ID, rather than having to call a function to check +// whether the C++ exception matches a type. +// +// ExceptionInfoWriter's encoding corresponds loosely to the format of +// GCC's .gcc_except_table sections. One difference is that +// ExceptionInfoWriter writes fixed-width 32-bit integers, whereas +// .gcc_except_table uses variable-length LEB128 encodings. We could +// switch to LEB128 to save space in the future. +// +//===----------------------------------------------------------------------===// + +#include "ExceptionInfoWriter.h" +#include "llvm/IR/Constants.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +ExceptionInfoWriter::ExceptionInfoWriter(LLVMContext *Context): + Context(Context) { + Type *I32 = Type::getInt32Ty(*Context); + Type *Fields[] = { I32, I32 }; + ActionTableEntryTy = StructType::create(Fields, "action_table_entry"); +} + +unsigned ExceptionInfoWriter::getIDForExceptionType(Value *ExcTy) { + Constant *ExcTyConst = dyn_cast(ExcTy); + if (!ExcTyConst) + report_fatal_error("Exception type not a constant"); + + // Reuse existing ID if one has already been assigned. + TypeTableIDMapType::iterator Iter = TypeTableIDMap.find(ExcTyConst); + if (Iter != TypeTableIDMap.end()) + return Iter->second; + + unsigned Index = TypeTableData.size() + 1; + TypeTableIDMap[ExcTyConst] = Index; + TypeTableData.push_back(ExcTyConst); + return Index; +} + +unsigned ExceptionInfoWriter::getIDForClauseListNode( + unsigned ClauseID, unsigned NextClauseListID) { + // Reuse existing ID if one has already been assigned. + ActionTableEntry Key(ClauseID, NextClauseListID); + ActionTableIDMapType::iterator Iter = ActionTableIDMap.find(Key); + if (Iter != ActionTableIDMap.end()) + return Iter->second; + + Type *I32 = Type::getInt32Ty(*Context); + Constant *Fields[] = { ConstantInt::get(I32, ClauseID), + ConstantInt::get(I32, NextClauseListID) }; + Constant *Entry = ConstantStruct::get(ActionTableEntryTy, Fields); + + // Add 1 so that the empty list can be represented as 0. + unsigned ClauseListID = ActionTableData.size() + 1; + ActionTableIDMap[Key] = ClauseListID; + ActionTableData.push_back(Entry); + return ClauseListID; +} + +unsigned ExceptionInfoWriter::getIDForFilterClause(Value *Filter) { + unsigned FilterClauseID = -(FilterTableData.size() + 1); + Type *I32 = Type::getInt32Ty(*Context); + ArrayType *ArrayTy = dyn_cast(Filter->getType()); + if (!ArrayTy) + report_fatal_error("Landingpad filter clause is not of array type"); + unsigned FilterLength = ArrayTy->getNumElements(); + // Don't try the dyn_cast if the FilterLength is zero, because Array + // could be a zeroinitializer. + if (FilterLength > 0) { + ConstantArray *Array = dyn_cast(Filter); + if (!Array) + report_fatal_error("Landingpad filter clause is not a ConstantArray"); + for (unsigned I = 0; I < FilterLength; ++I) { + unsigned TypeID = getIDForExceptionType(Array->getOperand(I)); + assert(TypeID > 0); + FilterTableData.push_back(ConstantInt::get(I32, TypeID)); + } + } + // Add array terminator. + FilterTableData.push_back(ConstantInt::get(I32, 0)); + return FilterClauseID; +} + +unsigned ExceptionInfoWriter::getIDForLandingPadClauseList(LandingPadInst *LP) { + unsigned NextClauseListID = 0; // ID for empty list. + + if (LP->isCleanup()) { + // Add cleanup clause at the end of the list. + NextClauseListID = getIDForClauseListNode(0, NextClauseListID); + } + + for (int I = (int) LP->getNumClauses() - 1; I >= 0; --I) { + unsigned ClauseID; + if (LP->isCatch(I)) { + ClauseID = getIDForExceptionType(LP->getClause(I)); + } else if (LP->isFilter(I)) { + ClauseID = getIDForFilterClause(LP->getClause(I)); + } else { + report_fatal_error("Unknown kind of landingpad clause"); + } + assert(ClauseID > 0); + NextClauseListID = getIDForClauseListNode(ClauseID, NextClauseListID); + } + + return NextClauseListID; +} + +static void defineArray(Module *M, const char *Name, + const SmallVectorImpl &Elements, + Type *ElementType) { + ArrayType *ArrayTy = ArrayType::get(ElementType, Elements.size()); + Constant *ArrayData = ConstantArray::get(ArrayTy, Elements); + GlobalVariable *OldGlobal = M->getGlobalVariable(Name); + if (OldGlobal) { + if (OldGlobal->hasInitializer()) { + report_fatal_error(std::string("Variable ") + Name + + " already has an initializer"); + } + Constant *NewGlobal = new GlobalVariable( + *M, ArrayTy, /* isConstant= */ true, + GlobalValue::InternalLinkage, ArrayData); + NewGlobal->takeName(OldGlobal); + OldGlobal->replaceAllUsesWith(ConstantExpr::getBitCast( + NewGlobal, OldGlobal->getType())); + OldGlobal->eraseFromParent(); + } else { + if (Elements.size() > 0) { + // This warning could happen for a program that does not link + // against the C++ runtime libraries. Such a program might + // contain "invoke" instructions but never throw any C++ + // exceptions. + errs() << "Warning: Variable " << Name << " not referenced\n"; + } + } +} + +void ExceptionInfoWriter::defineGlobalVariables(Module *M) { + defineArray(M, "__pnacl_eh_type_table", TypeTableData, + Type::getInt8PtrTy(M->getContext())); + + defineArray(M, "__pnacl_eh_action_table", ActionTableData, + ActionTableEntryTy); + + defineArray(M, "__pnacl_eh_filter_table", FilterTableData, + Type::getInt32Ty(M->getContext())); +} diff --git a/lib/Target/JSBackend/NaCl/ExceptionInfoWriter.h b/lib/Target/JSBackend/NaCl/ExceptionInfoWriter.h new file mode 100644 index 000000000000..dadaaf76158c --- /dev/null +++ b/lib/Target/JSBackend/NaCl/ExceptionInfoWriter.h @@ -0,0 +1,71 @@ +//===-- ExceptionInfoWriter.h - Generate C++ exception info------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef TRANSFORMS_NACL_EXCEPTIONINFOWRITER_H +#define TRANSFORMS_NACL_EXCEPTIONINFOWRITER_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" + +namespace llvm { + +// The ExceptionInfoWriter class converts the clauses of a +// "landingpad" instruction into data tables stored in global +// variables, which are interpreted by PNaCl's C++ runtime library. +// See ExceptionInfoWriter.cpp for a full description. +class ExceptionInfoWriter { + LLVMContext *Context; + StructType *ActionTableEntryTy; + + // Data for populating __pnacl_eh_type_table[], which is an array of + // std::type_info* pointers. Each of these pointers represents a + // C++ exception type. + SmallVector TypeTableData; + // Mapping from std::type_info* pointer to type ID (index in + // TypeTableData). + typedef DenseMap TypeTableIDMapType; + TypeTableIDMapType TypeTableIDMap; + + // Data for populating __pnacl_eh_action_table[], which is an array + // of pairs. + SmallVector ActionTableData; + // Pair of (clause_id, clause_list_id). + typedef std::pair ActionTableEntry; + // Mapping from (clause_id, clause_list_id) to clause_id (index in + // ActionTableData). + typedef DenseMap ActionTableIDMapType; + ActionTableIDMapType ActionTableIDMap; + + // Data for populating __pnacl_eh_filter_table[], which is an array + // of integers. + SmallVector FilterTableData; + + // Get the interned ID for an action. + unsigned getIDForClauseListNode(unsigned ClauseID, unsigned NextClauseListID); + + // Get the clause ID for a "filter" clause. + unsigned getIDForFilterClause(Value *Filter); + +public: + explicit ExceptionInfoWriter(LLVMContext *Context); + + // Get the interned type ID (a small integer) for a C++ exception type. + unsigned getIDForExceptionType(Value *Ty); + + // Get the clause list ID for a landingpad's clause list. + unsigned getIDForLandingPadClauseList(LandingPadInst *LP); + + // Add the exception info tables to the module. + void defineGlobalVariables(Module *M); +}; + +} + +#endif diff --git a/lib/Target/JSBackend/NaCl/ExpandArithWithOverflow.cpp b/lib/Target/JSBackend/NaCl/ExpandArithWithOverflow.cpp new file mode 100644 index 000000000000..4adcd74b7a02 --- /dev/null +++ b/lib/Target/JSBackend/NaCl/ExpandArithWithOverflow.cpp @@ -0,0 +1,234 @@ +//===- ExpandArithWithOverflow.cpp - Expand out uses of *.with.overflow----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The llvm.*.with.overflow.*() intrinsics are awkward for PNaCl support because +// they return structs, and we want to omit struct types from IR in PNaCl's +// stable ABI. +// +// However, llvm.{umul,uadd}.with.overflow.*() are used by Clang to implement an +// overflow check for C++'s new[] operator, and {sadd,ssub} are used by +// ubsan. This pass expands out these uses so that PNaCl does not have to +// support *.with.overflow as part of PNaCl's stable ABI. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/APInt.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/NaCl.h" + +#define DEBUG_TYPE "expand-arith-with-overflow" + +using namespace llvm; + +namespace { +class ExpandArithWithOverflow : public ModulePass { +public: + static char ID; + ExpandArithWithOverflow() : ModulePass(ID) { + initializeExpandArithWithOverflowPass(*PassRegistry::getPassRegistry()); + } + virtual bool runOnModule(Module &M); +}; +} + +char ExpandArithWithOverflow::ID = 0; +INITIALIZE_PASS(ExpandArithWithOverflow, "expand-arith-with-overflow", + "Expand out some uses of *.with.overflow intrinsics", false, + false) + +enum class ExpandArith { Add, Sub, Mul }; +static const ExpandArith ExpandArithOps[] = {ExpandArith::Add, ExpandArith::Sub, + ExpandArith::Mul}; + +static Intrinsic::ID getID(ExpandArith Op, bool Signed) { + static const Intrinsic::ID IDs[][2] = { + // Unsigned Signed + /* Add */ {Intrinsic::uadd_with_overflow, Intrinsic::sadd_with_overflow}, + /* Sub */ {Intrinsic::usub_with_overflow, Intrinsic::ssub_with_overflow}, + /* Mul */ {Intrinsic::umul_with_overflow, Intrinsic::smul_with_overflow}, + }; + return IDs[(size_t)Op][Signed]; +} + +static Instruction::BinaryOps getOpcode(ExpandArith Op) { + static const Instruction::BinaryOps Opcodes[] = { + Instruction::Add, Instruction::Sub, Instruction::Mul, + }; + return Opcodes[(size_t)Op]; +} + +static Value *CreateInsertValue(IRBuilder<> *IRB, Value *StructVal, + unsigned Index, Value *Field, + Instruction *BasedOn) { + SmallVector EVIndexes(1, Index); + return IRB->CreateInsertValue(StructVal, Field, EVIndexes, + BasedOn->getName() + ".insert"); +} + +static bool Expand(Module *M, unsigned Bits, ExpandArith Op, bool Signed) { + IntegerType *IntTy = IntegerType::get(M->getContext(), Bits); + SmallVector Types(1, IntTy); + Function *Intrinsic = + M->getFunction(Intrinsic::getName(getID(Op, Signed), Types)); + if (!Intrinsic) + return false; + + SmallVector Calls; + for (User *U : Intrinsic->users()) + if (CallInst *Call = dyn_cast(U)) { + Calls.push_back(Call); + } else { + errs() << "User: " << *U << "\n"; + report_fatal_error("ExpandArithWithOverflow: Taking the address of a " + "*.with.overflow intrinsic is not allowed"); + } + + for (CallInst *Call : Calls) { + DEBUG(dbgs() << "Expanding " << *Call << "\n"); + + StringRef Name = Call->getName(); + Value *LHS; + Value *RHS; + Value *NonConstOperand; + ConstantInt *ConstOperand; + bool hasConstOperand; + + if (ConstantInt *C = dyn_cast(Call->getArgOperand(0))) { + LHS = ConstOperand = C; + RHS = NonConstOperand = Call->getArgOperand(1); + hasConstOperand = true; + } else if (ConstantInt *C = dyn_cast(Call->getArgOperand(1))) { + LHS = NonConstOperand = Call->getArgOperand(0); + RHS = ConstOperand = C; + hasConstOperand = true; + } else { + LHS = Call->getArgOperand(0); + RHS = Call->getArgOperand(1); + hasConstOperand = false; + } + + IRBuilder<> IRB(Call); + Value *ArithResult = + IRB.CreateBinOp(getOpcode(Op), LHS, RHS, Name + ".arith"); + Value *OverflowResult; + + if (ExpandArith::Mul == Op && hasConstOperand && + ConstOperand->getValue() == 0) { + // Mul by zero never overflows but can divide by zero. + OverflowResult = ConstantInt::getFalse(M->getContext()); + } else if (hasConstOperand && !Signed && ExpandArith::Sub != Op) { + // Unsigned add & mul with a constant operand can be optimized. + uint64_t ArgMax = + (ExpandArith::Mul == Op + ? APInt::getMaxValue(Bits).udiv(ConstOperand->getValue()) + : APInt::getMaxValue(Bits) - ConstOperand->getValue()) + .getLimitedValue(); + OverflowResult = + IRB.CreateICmp(CmpInst::ICMP_UGT, NonConstOperand, + ConstantInt::get(IntTy, ArgMax), Name + ".overflow"); + } else if (ExpandArith::Mul == Op) { + // Dividing the result by one of the operands should yield the other + // operand if there was no overflow. Note that this division can't + // overflow (signed division of INT_MIN / -1 overflows but can't occur + // here), but it could divide by 0 in which case we instead divide by 1 + // (this case didn't overflow). + // + // FIXME: This approach isn't optimal because it's better to perform a + // wider multiplication and mask off the result, or perform arithmetic on + // the component pieces. + auto DivOp = Signed ? Instruction::SDiv : Instruction::UDiv; + auto DenomIsZero = + IRB.CreateICmp(CmpInst::ICMP_EQ, RHS, + ConstantInt::get(RHS->getType(), 0), Name + ".iszero"); + auto Denom = + IRB.CreateSelect(DenomIsZero, ConstantInt::get(RHS->getType(), 1), + RHS, Name + ".denom"); + auto Div = IRB.CreateBinOp(DivOp, ArithResult, Denom, Name + ".div"); + OverflowResult = IRB.CreateSelect( + DenomIsZero, ConstantInt::getFalse(M->getContext()), + IRB.CreateICmp(CmpInst::ICMP_NE, Div, LHS, Name + ".same"), + Name + ".overflow"); + } else { + if (!Signed) { + switch (Op) { + case ExpandArith::Add: + // Overflow occurs if unsigned x+y < x (or y). We only need to compare + // with one of them because this is unsigned arithmetic: on overflow + // the result is smaller than both inputs, and when there's no + // overflow the result is greater than both inputs. + OverflowResult = IRB.CreateICmp(CmpInst::ICMP_ULT, ArithResult, LHS, + Name + ".overflow"); + break; + case ExpandArith::Sub: + // Overflow occurs if x < y. + OverflowResult = + IRB.CreateICmp(CmpInst::ICMP_ULT, LHS, RHS, Name + ".overflow"); + break; + case ExpandArith::Mul: // This is handled above. + llvm_unreachable("Unsigned variable saturating multiplication"); + } + } else { + // In the signed case, we care if the sum is >127 or <-128. When looked + // at as an unsigned number, that is precisely when the sum is >= 128. + Value *PositiveTemp = IRB.CreateBinOp( + Instruction::Add, LHS, + ConstantInt::get(IntTy, APInt::getSignedMinValue(Bits) + + (ExpandArith::Sub == Op ? 1 : 0)), + Name + ".postemp"); + Value *NegativeTemp = IRB.CreateBinOp( + Instruction::Add, LHS, + ConstantInt::get(IntTy, APInt::getSignedMaxValue(Bits) + + (ExpandArith::Sub == Op ? 1 : 0)), + Name + ".negtemp"); + Value *PositiveCheck = IRB.CreateICmp(CmpInst::ICMP_SLT, ArithResult, + PositiveTemp, Name + ".poscheck"); + Value *NegativeCheck = IRB.CreateICmp(CmpInst::ICMP_SGT, ArithResult, + NegativeTemp, Name + ".negcheck"); + Value *IsPositive = + IRB.CreateICmp(CmpInst::ICMP_SGE, LHS, ConstantInt::get(IntTy, 0), + Name + ".ispos"); + OverflowResult = IRB.CreateSelect(IsPositive, PositiveCheck, + NegativeCheck, Name + ".select"); + } + } + + // Construct the struct result. + Value *NewStruct = UndefValue::get(Call->getType()); + NewStruct = CreateInsertValue(&IRB, NewStruct, 0, ArithResult, Call); + NewStruct = CreateInsertValue(&IRB, NewStruct, 1, OverflowResult, Call); + Call->replaceAllUsesWith(NewStruct); + Call->eraseFromParent(); + } + + Intrinsic->eraseFromParent(); + return true; +} + +static const unsigned MaxBits = 64; + +bool ExpandArithWithOverflow::runOnModule(Module &M) { + bool Modified = false; + for (ExpandArith Op : ExpandArithOps) + for (int Signed = false; Signed <= true; ++Signed) + for (unsigned Bits = 8; Bits <= MaxBits; Bits <<= 1) + Modified |= Expand(&M, Bits, Op, Signed); + return Modified; +} + +ModulePass *llvm::createExpandArithWithOverflowPass() { + return new ExpandArithWithOverflow(); +} diff --git a/lib/Target/JSBackend/NaCl/ExpandByVal.cpp b/lib/Target/JSBackend/NaCl/ExpandByVal.cpp new file mode 100644 index 000000000000..7022f8e6e355 --- /dev/null +++ b/lib/Target/JSBackend/NaCl/ExpandByVal.cpp @@ -0,0 +1,206 @@ +//===- ExpandByVal.cpp - Expand out use of "byval" and "sret" attributes---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass expands out by-value passing of structs as arguments and +// return values. In LLVM IR terms, it expands out the "byval" and +// "sret" function argument attributes. +// +// The semantics of the "byval" attribute are that the callee function +// gets a private copy of the pointed-to argument that it is allowed +// to modify. In implementing this, we have a choice between making +// the caller responsible for making the copy or making the callee +// responsible for making the copy. We choose the former, because +// this matches how the normal native calling conventions work, and +// because it often allows the caller to write struct contents +// directly into the stack slot that it passes the callee, without an +// additional copy. +// +// Note that this pass does not attempt to modify functions that pass +// structs by value without using "byval" or "sret", such as: +// +// define %struct.X @func() ; struct return +// define void @func(%struct.X %arg) ; struct arg +// +// The pass only handles functions such as: +// +// define void @func(%struct.X* sret %result_buffer) ; struct return +// define void @func(%struct.X* byval %ptr_to_arg) ; struct arg +// +// This is because PNaCl Clang generates the latter and not the former. +// +//===----------------------------------------------------------------------===// + +#include "llvm/IR/Attributes.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/NaCl.h" + +using namespace llvm; + +namespace { + // This is a ModulePass so that it can strip attributes from + // declared functions as well as defined functions. + class ExpandByVal : public ModulePass { + public: + static char ID; // Pass identification, replacement for typeid + ExpandByVal() : ModulePass(ID) { + initializeExpandByValPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnModule(Module &M); + }; +} + +char ExpandByVal::ID = 0; +INITIALIZE_PASS(ExpandByVal, "expand-byval", + "Expand out by-value passing of structs", + false, false) + +// removeAttribute() currently does not work on Attribute::Alignment +// (it fails with an assertion error), so we have to take a more +// convoluted route to removing this attribute by recreating the +// AttributeSet. +AttributeSet RemoveAttrs(LLVMContext &Context, AttributeSet Attrs) { + SmallVector AttrList; + for (unsigned Slot = 0; Slot < Attrs.getNumSlots(); ++Slot) { + unsigned Index = Attrs.getSlotIndex(Slot); + AttrBuilder AB; + for (AttributeSet::iterator Attr = Attrs.begin(Slot), E = Attrs.end(Slot); + Attr != E; ++Attr) { + if (Attr->isEnumAttribute() && + Attr->getKindAsEnum() != Attribute::ByVal && + Attr->getKindAsEnum() != Attribute::StructRet) { + AB.addAttribute(*Attr); + } + // IR semantics require that ByVal implies NoAlias. However, IR + // semantics do not require StructRet to imply NoAlias. For + // example, a global variable address can be passed as a + // StructRet argument, although Clang does not do so and Clang + // explicitly adds NoAlias to StructRet arguments. + if (Attr->isEnumAttribute() && + Attr->getKindAsEnum() == Attribute::ByVal) { + AB.addAttribute(Attribute::get(Context, Attribute::NoAlias)); + } + } + AttrList.push_back(AttributeSet::get(Context, Index, AB)); + } + return AttributeSet::get(Context, AttrList); +} + +// ExpandCall() can take a CallInst or an InvokeInst. It returns +// whether the instruction was modified. +template +static bool ExpandCall(DataLayout *DL, InstType *Call) { + bool Modify = false; + AttributeSet Attrs = Call->getAttributes(); + for (unsigned ArgIdx = 0; ArgIdx < Call->getNumArgOperands(); ++ArgIdx) { + unsigned AttrIdx = ArgIdx + 1; + + if (Attrs.hasAttribute(AttrIdx, Attribute::StructRet)) + Modify = true; + + if (Attrs.hasAttribute(AttrIdx, Attribute::ByVal)) { + Modify = true; + + Value *ArgPtr = Call->getArgOperand(ArgIdx); + Type *ArgType = ArgPtr->getType()->getPointerElementType(); + ConstantInt *ArgSize = ConstantInt::get( + Call->getContext(), APInt(64, DL->getTypeStoreSize(ArgType))); + // In principle, using the alignment from the argument attribute + // should be enough. However, Clang is not emitting this + // attribute for PNaCl. LLVM alloca instructions do not use the + // ABI alignment of the type, so this must be specified + // explicitly. + // See https://code.google.com/p/nativeclient/issues/detail?id=3403 + // + // Note that the parameter may have no alignment, but we have + // more useful information from the type which we can use here + // -- 0 in the parameter means no alignment is specified there, + // so it has default alignment, but in memcpy 0 means + // pessimistic alignment, the same as 1. + unsigned Alignment = + std::max(Attrs.getParamAlignment(AttrIdx), + DL->getABITypeAlignment(ArgType)); + + // Make a copy of the byval argument. + Instruction *CopyBuf = new AllocaInst(ArgType, 0, Alignment, + ArgPtr->getName() + ".byval_copy"); + Function *Func = Call->getParent()->getParent(); + Func->getEntryBlock().getInstList().push_front(CopyBuf); + IRBuilder<> Builder(Call); + Builder.CreateLifetimeStart(CopyBuf, ArgSize); + // Using the argument's alignment attribute for the memcpy + // should be OK because the LLVM Language Reference says that + // the alignment attribute specifies "the alignment of the stack + // slot to form and the known alignment of the pointer specified + // to the call site". + Instruction *MemCpy = Builder.CreateMemCpy(CopyBuf, ArgPtr, ArgSize, + Alignment); + MemCpy->setDebugLoc(Call->getDebugLoc()); + + Call->setArgOperand(ArgIdx, CopyBuf); + + // Mark the argument copy as unused using llvm.lifetime.end. + if (isa(Call)) { + BasicBlock::iterator It = BasicBlock::iterator(Call); + Builder.SetInsertPoint(&*(++It)); + Builder.CreateLifetimeEnd(CopyBuf, ArgSize); + } else if (InvokeInst *Invoke = dyn_cast(Call)) { + Builder.SetInsertPoint(&*Invoke->getNormalDest()->getFirstInsertionPt()); + Builder.CreateLifetimeEnd(CopyBuf, ArgSize); + Builder.SetInsertPoint(&*Invoke->getUnwindDest()->getFirstInsertionPt()); + Builder.CreateLifetimeEnd(CopyBuf, ArgSize); + } + } + } + if (Modify) { + Call->setAttributes(RemoveAttrs(Call->getContext(), Attrs)); + + if (CallInst *CI = dyn_cast(Call)) { + // This is no longer a tail call because the callee references + // memory alloca'd by the caller. + CI->setTailCall(false); + } + } + return Modify; +} + +bool ExpandByVal::runOnModule(Module &M) { + bool Modified = false; + DataLayout DL(&M); + + for (Module::iterator Func = M.begin(), E = M.end(); Func != E; ++Func) { + AttributeSet NewAttrs = RemoveAttrs(Func->getContext(), + Func->getAttributes()); + Modified |= (NewAttrs != Func->getAttributes()); + Func->setAttributes(NewAttrs); + + for (Function::iterator BB = Func->begin(), E = Func->end(); + BB != E; ++BB) { + for (BasicBlock::iterator Inst = BB->begin(), E = BB->end(); + Inst != E; ++Inst) { + if (CallInst *Call = dyn_cast(Inst)) { + Modified |= ExpandCall(&DL, Call); + } else if (InvokeInst *Call = dyn_cast(Inst)) { + Modified |= ExpandCall(&DL, Call); + } + } + } + } + + return Modified; +} + +ModulePass *llvm::createExpandByValPass() { + return new ExpandByVal(); +} diff --git a/lib/Target/JSBackend/NaCl/ExpandConstantExpr.cpp b/lib/Target/JSBackend/NaCl/ExpandConstantExpr.cpp new file mode 100644 index 000000000000..82287ef90c06 --- /dev/null +++ b/lib/Target/JSBackend/NaCl/ExpandConstantExpr.cpp @@ -0,0 +1,121 @@ +//===- ExpandConstantExpr.cpp - Convert ConstantExprs to Instructions------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass expands out ConstantExprs into Instructions. +// +// Note that this only converts ConstantExprs that are referenced by +// Instructions. It does not convert ConstantExprs that are used as +// initializers for global variables. +// +// This simplifies the language so that the PNaCl translator does not +// need to handle ConstantExprs as part of a stable wire format for +// PNaCl. +// +//===----------------------------------------------------------------------===// + +#include + +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/NaCl.h" + +using namespace llvm; + +static bool expandInstruction(Instruction *Inst); + +namespace { + // This is a FunctionPass because our handling of PHI nodes means + // that our modifications may cross BasicBlocks. + struct ExpandConstantExpr : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + ExpandConstantExpr() : FunctionPass(ID) { + initializeExpandConstantExprPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnFunction(Function &Func); + }; +} + +char ExpandConstantExpr::ID = 0; +INITIALIZE_PASS(ExpandConstantExpr, "expand-constant-expr", + "Expand out ConstantExprs into Instructions", + false, false) + +static Value *expandConstantExpr(Instruction *InsertPt, ConstantExpr *Expr) { + Instruction *NewInst = Expr->getAsInstruction(); + NewInst->insertBefore(InsertPt); + NewInst->setName("expanded"); + expandInstruction(NewInst); + return NewInst; +} + +// XXX Emscripten: Utilities for illegal expressions. +static bool isIllegal(Type *T) { + if (!T->isIntegerTy()) return false; + unsigned Bits = T->getIntegerBitWidth(); + // we need to expand out not just 64-bit and larger values, but also i24s, so PromoteIntegers can process them + return Bits != 1 && Bits != 8 && Bits != 16 && Bits != 32; +} +static bool ContainsIllegalTypes(const Value *Expr) { + if (isIllegal(Expr->getType())) + return true; + if (const User *U = dyn_cast(Expr)) { + for (User::const_op_iterator I = U->op_begin(), E = U->op_end(); I != E; ++I) { + if (Constant *C = dyn_cast(*I)) { + if (!isa(C) && ContainsIllegalTypes(C)) { + return true; + } + } + } + } + return false; +} + +static bool expandInstruction(Instruction *Inst) { + // A landingpad can only accept ConstantExprs, so it should remain + // unmodified. + if (isa(Inst)) + return false; + + bool Modified = false; + for (unsigned OpNum = 0; OpNum < Inst->getNumOperands(); OpNum++) { + if (ConstantExpr *Expr = + dyn_cast(Inst->getOperand(OpNum))) { + // XXX Emscripten: Only do the expansion of the expression contains + // illegal types, for now, since we can handle legal ConstantExprs + // in the backend directly. + if (ContainsIllegalTypes(Expr)) { + Modified = true; + Use *U = &Inst->getOperandUse(OpNum); + PhiSafeReplaceUses(U, expandConstantExpr(PhiSafeInsertPt(U), Expr)); + } + } + } + return Modified; +} + +bool ExpandConstantExpr::runOnFunction(Function &Func) { + bool Modified = false; + for (llvm::Function::iterator BB = Func.begin(), E = Func.end(); + BB != E; + ++BB) { + for (BasicBlock::InstListType::iterator Inst = BB->begin(), E = BB->end(); + Inst != E; + ++Inst) { + Modified |= expandInstruction(&*Inst); + } + } + return Modified; +} + +FunctionPass *llvm::createExpandConstantExprPass() { + return new ExpandConstantExpr(); +} diff --git a/lib/Target/JSBackend/NaCl/ExpandCtors.cpp b/lib/Target/JSBackend/NaCl/ExpandCtors.cpp new file mode 100644 index 000000000000..97398870b400 --- /dev/null +++ b/lib/Target/JSBackend/NaCl/ExpandCtors.cpp @@ -0,0 +1,154 @@ +//===- ExpandCtors.cpp - Convert ctors/dtors to concrete arrays -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass converts LLVM's special symbols llvm.global_ctors and +// llvm.global_dtors to concrete arrays, __init_array_start/end and +// __fini_array_start/end, that are usable by a C library. +// +// This pass sorts the contents of global_ctors/dtors according to the +// priority values they contain and removes the priority values. +// +//===----------------------------------------------------------------------===// + +#include + +#include "llvm/Pass.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/TypeBuilder.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/NaCl.h" + +using namespace llvm; + +namespace { + struct ExpandCtors : public ModulePass { + static char ID; // Pass identification, replacement for typeid + ExpandCtors() : ModulePass(ID) { + initializeExpandCtorsPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnModule(Module &M); + }; +} + +char ExpandCtors::ID = 0; +INITIALIZE_PASS(ExpandCtors, "nacl-expand-ctors", + "Hook up constructor and destructor arrays to libc", + false, false) + +static void setGlobalVariableValue(Module &M, const char *Name, + Constant *Value) { + if (GlobalVariable *Var = M.getNamedGlobal(Name)) { + if (Var->hasInitializer()) { + report_fatal_error(std::string("Variable ") + Name + + " already has an initializer"); + } + Var->replaceAllUsesWith(ConstantExpr::getBitCast(Value, Var->getType())); + Var->eraseFromParent(); + } +} + +struct FuncArrayEntry { + uint64_t priority; + Constant *func; +}; + +static bool compareEntries(FuncArrayEntry Entry1, FuncArrayEntry Entry2) { + return Entry1.priority < Entry2.priority; +} + +static void readFuncList(GlobalVariable *Array, std::vector *Funcs) { + if (!Array->hasInitializer()) + return; + Constant *Init = Array->getInitializer(); + ArrayType *Ty = dyn_cast(Init->getType()); + if (!Ty) { + errs() << "Initializer: " << *Array->getInitializer() << "\n"; + report_fatal_error("ExpandCtors: Initializer is not of array type"); + } + if (Ty->getNumElements() == 0) + return; + ConstantArray *InitList = dyn_cast(Init); + if (!InitList) { + errs() << "Initializer: " << *Array->getInitializer() << "\n"; + report_fatal_error("ExpandCtors: Unexpected initializer ConstantExpr"); + } + std::vector FuncsToSort; + for (unsigned Index = 0; Index < InitList->getNumOperands(); ++Index) { + ConstantStruct *CS = cast(InitList->getOperand(Index)); + FuncArrayEntry Entry; + Entry.priority = cast(CS->getOperand(0))->getZExtValue(); + Entry.func = CS->getOperand(1); + FuncsToSort.push_back(Entry); + } + + std::sort(FuncsToSort.begin(), FuncsToSort.end(), compareEntries); + for (std::vector::iterator Iter = FuncsToSort.begin(); + Iter != FuncsToSort.end(); + ++Iter) { + Funcs->push_back(Iter->func); + } +} + +static void defineFuncArray(Module &M, const char *LlvmArrayName, + const char *StartSymbol, + const char *EndSymbol) { + std::vector Funcs; + + GlobalVariable *Array = M.getNamedGlobal(LlvmArrayName); + if (Array) { + readFuncList(Array, &Funcs); + // No code should be referencing global_ctors/global_dtors, + // because this symbol is internal to LLVM. + Array->eraseFromParent(); + } + + Type *FuncTy = FunctionType::get(Type::getVoidTy(M.getContext()), false); + Type *FuncPtrTy = FuncTy->getPointerTo(); + ArrayType *ArrayTy = ArrayType::get(FuncPtrTy, Funcs.size()); + GlobalVariable *NewArray = + new GlobalVariable(M, ArrayTy, /* isConstant= */ true, + GlobalValue::InternalLinkage, + ConstantArray::get(ArrayTy, Funcs)); + setGlobalVariableValue(M, StartSymbol, NewArray); + // We do this last so that LLVM gives NewArray the name + // "__{init,fini}_array_start" without adding any suffixes to + // disambiguate from the original GlobalVariable's name. This is + // not essential -- it just makes the output easier to understand + // when looking at symbols for debugging. + NewArray->setName(StartSymbol); + + // We replace "__{init,fini}_array_end" with the address of the end + // of NewArray. This removes the name "__{init,fini}_array_end" + // from the output, which is not ideal for debugging. Ideally we + // would convert "__{init,fini}_array_end" to being a GlobalAlias + // that points to the end of the array. However, unfortunately LLVM + // does not generate correct code when a GlobalAlias contains a + // GetElementPtr ConstantExpr. + Constant *NewArrayEnd = + ConstantExpr::getGetElementPtr(ArrayTy, NewArray, + ConstantInt::get(M.getContext(), + APInt(32, 1))); + setGlobalVariableValue(M, EndSymbol, NewArrayEnd); +} + +bool ExpandCtors::runOnModule(Module &M) { + defineFuncArray(M, "llvm.global_ctors", + "__init_array_start", "__init_array_end"); + defineFuncArray(M, "llvm.global_dtors", + "__fini_array_start", "__fini_array_end"); + return true; +} + +ModulePass *llvm::createExpandCtorsPass() { + return new ExpandCtors(); +} diff --git a/lib/Target/JSBackend/NaCl/ExpandGetElementPtr.cpp b/lib/Target/JSBackend/NaCl/ExpandGetElementPtr.cpp new file mode 100644 index 000000000000..771b5531909f --- /dev/null +++ b/lib/Target/JSBackend/NaCl/ExpandGetElementPtr.cpp @@ -0,0 +1,151 @@ +//===- ExpandGetElementPtr.cpp - Expand GetElementPtr into arithmetic------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass expands out GetElementPtr instructions into ptrtoint, +// inttoptr and arithmetic instructions. +// +// This simplifies the language so that the PNaCl translator does not +// need to handle GetElementPtr and struct types as part of a stable +// wire format for PNaCl. +// +// Note that we drop the "inbounds" attribute of GetElementPtr. +// +//===----------------------------------------------------------------------===// + +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/NaCl.h" + +using namespace llvm; + +namespace { + class ExpandGetElementPtr : public BasicBlockPass { + public: + static char ID; // Pass identification, replacement for typeid + ExpandGetElementPtr() : BasicBlockPass(ID) { + initializeExpandGetElementPtrPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnBasicBlock(BasicBlock &BB); + }; +} + +char ExpandGetElementPtr::ID = 0; +INITIALIZE_PASS(ExpandGetElementPtr, "expand-getelementptr", + "Expand out GetElementPtr instructions into arithmetic", + false, false) + +static Value *CastToPtrSize(Value *Val, Instruction *InsertPt, + const DebugLoc &Debug, Type *PtrType) { + unsigned ValSize = Val->getType()->getIntegerBitWidth(); + unsigned PtrSize = PtrType->getIntegerBitWidth(); + if (ValSize == PtrSize) + return Val; + Instruction *Inst; + if (ValSize > PtrSize) { + Inst = new TruncInst(Val, PtrType, "gep_trunc", InsertPt); + } else { + // GEP indexes must be sign-extended. + Inst = new SExtInst(Val, PtrType, "gep_sext", InsertPt); + } + Inst->setDebugLoc(Debug); + return Inst; +} + +static void FlushOffset(Instruction **Ptr, uint64_t *CurrentOffset, + Instruction *InsertPt, const DebugLoc &Debug, + Type *PtrType) { + if (*CurrentOffset) { + *Ptr = BinaryOperator::Create(Instruction::Add, *Ptr, + ConstantInt::get(PtrType, *CurrentOffset), + "gep", InsertPt); + (*Ptr)->setDebugLoc(Debug); + *CurrentOffset = 0; + } +} + +static void ExpandGEP(GetElementPtrInst *GEP, DataLayout *DL, Type *PtrType) { + const DebugLoc &Debug = GEP->getDebugLoc(); + Instruction *Ptr = new PtrToIntInst(GEP->getPointerOperand(), PtrType, + "gep_int", GEP); + Ptr->setDebugLoc(Debug); + + Type *CurrentTy = GEP->getPointerOperand()->getType(); + // We do some limited constant folding ourselves. An alternative + // would be to generate verbose, unfolded output (e.g. multiple + // adds; adds of zero constants) and use a later pass such as + // "-instcombine" to clean that up. However, "-instcombine" can + // reintroduce GetElementPtr instructions. + uint64_t CurrentOffset = 0; + + for (GetElementPtrInst::op_iterator Op = GEP->op_begin() + 1; + Op != GEP->op_end(); + ++Op) { + Value *Index = *Op; + if (StructType *StTy = dyn_cast(CurrentTy)) { + uint64_t Field = cast(Op)->getZExtValue(); + CurrentTy = StTy->getElementType(Field); + CurrentOffset += DL->getStructLayout(StTy)->getElementOffset(Field); + } else { + CurrentTy = cast(CurrentTy)->getElementType(); + uint64_t ElementSize = DL->getTypeAllocSize(CurrentTy); + if (ConstantInt *C = dyn_cast(Index)) { + CurrentOffset += C->getSExtValue() * ElementSize; + } else { + FlushOffset(&Ptr, &CurrentOffset, GEP, Debug, PtrType); + Index = CastToPtrSize(Index, GEP, Debug, PtrType); + if (ElementSize != 1) { + Index = CopyDebug( + BinaryOperator::Create(Instruction::Mul, Index, + ConstantInt::get(PtrType, ElementSize), + "gep_array", GEP), + GEP); + } + Ptr = BinaryOperator::Create(Instruction::Add, Ptr, + Index, "gep", GEP); + Ptr->setDebugLoc(Debug); + } + } + } + FlushOffset(&Ptr, &CurrentOffset, GEP, Debug, PtrType); + + assert(CurrentTy == GEP->getType()->getElementType()); + Instruction *Result = new IntToPtrInst(Ptr, GEP->getType(), "", GEP); + Result->setDebugLoc(Debug); + Result->takeName(GEP); + GEP->replaceAllUsesWith(Result); + GEP->eraseFromParent(); +} + +bool ExpandGetElementPtr::runOnBasicBlock(BasicBlock &BB) { + bool Modified = false; + DataLayout DL(BB.getParent()->getParent()); + Type *PtrType = DL.getIntPtrType(BB.getContext()); + + for (BasicBlock::InstListType::iterator Iter = BB.begin(); + Iter != BB.end(); ) { + Instruction *Inst = &*Iter++; + if (GetElementPtrInst *GEP = dyn_cast(Inst)) { + Modified = true; + ExpandGEP(GEP, &DL, PtrType); + } + } + return Modified; +} + +BasicBlockPass *llvm::createExpandGetElementPtrPass() { + return new ExpandGetElementPtr(); +} diff --git a/lib/Target/JSBackend/NaCl/ExpandI64.cpp b/lib/Target/JSBackend/NaCl/ExpandI64.cpp new file mode 100644 index 000000000000..b403bc9b875d --- /dev/null +++ b/lib/Target/JSBackend/NaCl/ExpandI64.cpp @@ -0,0 +1,1231 @@ +//===- ExpandI64.cpp - Expand i64 and wider integer types -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===------------------------------------------------------------------===// +// +// This pass expands and lowers all operations on integers i64 and wider +// into 32-bit operations that can be handled by JS in a natural way. +// +// 64-bit variables become pairs of 2 32-bit variables, for the low and +// high 32 bit chunks. This happens for both registers and function +// arguments. Function return values become a return of the low 32 bits +// and a store of the high 32-bits in tempRet0, a global helper variable. +// Larger values become more chunks of 32 bits. Currently we require that +// types be a multiple of 32 bits. +// +// Many operations then become simple pairs of operations, for example +// bitwise AND becomes and AND of each 32-bit chunk. More complex operations +// like addition are lowered into calls into library support code in +// Emscripten (i64Add for example). +// +//===------------------------------------------------------------------===// + +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Transforms/NaCl.h" +#include "llvm/Transforms/Utils/Local.h" +#include +#include + +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +namespace { + + struct PhiBlockChange { + BasicBlock *DD, *SwitchBB, *NewBB; + }; + + typedef SmallVector ChunksVec; + typedef std::map SplitsMap; + + typedef SmallVector PHIVec; + typedef SmallVector DeadVec; + + // This is a ModulePass because the pass recreates functions in + // order to expand i64 arguments to pairs of i32s. + class ExpandI64 : public ModulePass { + bool Changed; + const DataLayout *DL; + Module *TheModule; + + SplitsMap Splits; // old illegal value to new insts + PHIVec Phis; + std::vector PhiBlockChanges; + + // If the function has an illegal return or argument, create a legal version + void ensureLegalFunc(Function *F); + + // If a function is illegal, remove it + void removeIllegalFunc(Function *F); + + // splits an illegal instruction into 32-bit chunks. We do + // not yet have the values yet, as they depend on other + // splits, so store the parts in Splits, for FinalizeInst. + bool splitInst(Instruction *I); + + // For an illegal value, returns the split out chunks + // representing the low and high parts, that splitInst + // generated. + // The value can also be a constant, in which case we just + // split it, or a function argument, in which case we + // map to the proper legalized new arguments + // + // @param AllowUnreachable It is possible for phi nodes + // to refer to unreachable blocks, + // which our traversal never + // reaches; this flag lets us + // ignore those - otherwise, + // not finding chunks is fatal + ChunksVec getChunks(Value *V, bool AllowUnreachable=false); + + Function *Add, *Sub, *Mul, *SDiv, *UDiv, *SRem, *URem, *LShr, *AShr, *Shl, *GetHigh, *SetHigh, *FtoILow, *FtoIHigh, *DtoILow, *DtoIHigh, *SItoF, *UItoF, *SItoD, *UItoD, *BItoD, *BDtoILow, *BDtoIHigh; + + Function *AtomicAdd, *AtomicSub, *AtomicAnd, *AtomicOr, *AtomicXor; + + void ensureFuncs(); + unsigned getNumChunks(Type *T); + + public: + static char ID; + ExpandI64() : ModulePass(ID) { + initializeExpandI64Pass(*PassRegistry::getPassRegistry()); + + Add = Sub = Mul = SDiv = UDiv = SRem = URem = LShr = AShr = Shl = GetHigh = SetHigh = AtomicAdd = AtomicSub = AtomicAnd = AtomicOr = AtomicXor = NULL; + } + + virtual bool runOnModule(Module &M); + }; +} + +char ExpandI64::ID = 0; +INITIALIZE_PASS(ExpandI64, "expand-illegal-ints", + "Expand and lower illegal >i32 operations into 32-bit chunks", + false, false) + +// Utilities + +static Instruction *CopyDebug(Instruction *NewInst, Instruction *Original) { + NewInst->setDebugLoc(Original->getDebugLoc()); + return NewInst; +} + +static bool isIllegal(Type *T) { + return T->isIntegerTy() && T->getIntegerBitWidth() > 32; +} + +static FunctionType *getLegalizedFunctionType(FunctionType *FT) { + SmallVector ArgTypes; // XXX + int Num = FT->getNumParams(); + for (int i = 0; i < Num; i++) { + Type *T = FT->getParamType(i); + if (!isIllegal(T)) { + ArgTypes.push_back(T); + } else { + Type *i32 = Type::getInt32Ty(FT->getContext()); + ArgTypes.push_back(i32); + ArgTypes.push_back(i32); + } + } + Type *RT = FT->getReturnType(); + Type *NewRT; + if (!isIllegal(RT)) { + NewRT = RT; + } else { + NewRT = Type::getInt32Ty(FT->getContext()); + } + return FunctionType::get(NewRT, ArgTypes, false); +} + +// Implementation of ExpandI64 + +static bool okToRemainIllegal(Function *F) { + StringRef Name = F->getName(); + if (Name == "llvm.dbg.value") return true; + + // XXX EMSCRIPTEN: These take an i64 immediate argument; since they're not + // real instructions, we don't need to legalize them. + if (Name == "llvm.lifetime.start") return true; + if (Name == "llvm.lifetime.end") return true; + if (Name == "llvm.invariant.start") return true; + if (Name == "llvm.invariant.end") return true; + + return false; +} + +unsigned ExpandI64::getNumChunks(Type *T) { + unsigned Num = DL->getTypeSizeInBits(T); + return (Num + 31) / 32; +} + +static bool isLegalFunctionType(FunctionType *FT) { + if (isIllegal(FT->getReturnType())) { + return false; + } + + int Num = FT->getNumParams(); + for (int i = 0; i < Num; i++) { + if (isIllegal(FT->getParamType(i))) { + return false; + } + } + + return true; +} + +static bool isLegalInstruction(const Instruction *I) { + if (isIllegal(I->getType())) { + return false; + } + + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { + if (isIllegal(I->getOperand(i)->getType())) { + return false; + } + } + + return true; +} + +// We can't use RecreateFunction because we need to handle +// function and argument attributes specially. +static Function *RecreateFunctionLegalized(Function *F, FunctionType *NewType) { + Function *NewFunc = Function::Create(NewType, F->getLinkage()); + + AttributeSet Attrs = F->getAttributes(); + AttributeSet FnAttrs = Attrs.getFnAttributes(); + + // Legalizing the return value is done by storing part of the value into + // static storage. Subsequent analysis will see this as a memory access, + // so we can no longer claim to be readonly or readnone. + if (isIllegal(F->getReturnType())) { + FnAttrs = FnAttrs.removeAttribute(F->getContext(), + AttributeSet::FunctionIndex, + Attribute::ReadOnly); + FnAttrs = FnAttrs.removeAttribute(F->getContext(), + AttributeSet::FunctionIndex, + Attribute::ReadNone); + } + + NewFunc->addAttributes(AttributeSet::FunctionIndex, FnAttrs); + NewFunc->addAttributes(AttributeSet::ReturnIndex, Attrs.getRetAttributes()); + Function::arg_iterator AI = F->arg_begin(); + + // We need to recreate the attribute set, with the right indexes + AttributeSet NewAttrs; + unsigned NumArgs = F->arg_size(); + for (unsigned i = 1, j = 1; i < NumArgs+1; i++, j++, AI++) { + if (isIllegal(AI->getType())) { + j++; + continue; + } + if (!Attrs.hasAttributes(i)) continue; + AttributeSet ParamAttrs = Attrs.getParamAttributes(i); + AttrBuilder AB; + unsigned NumSlots = ParamAttrs.getNumSlots(); + for (unsigned k = 0; k < NumSlots; k++) { + for (AttributeSet::iterator I = ParamAttrs.begin(k), E = ParamAttrs.end(k); I != E; I++) { + AB.addAttribute(*I); + } + } + NewFunc->addAttributes(j, AttributeSet::get(F->getContext(), j, AB)); + } + + F->getParent()->getFunctionList().insert(F->getIterator(), NewFunc); + NewFunc->takeName(F); + NewFunc->getBasicBlockList().splice(NewFunc->begin(), + F->getBasicBlockList()); + F->replaceAllUsesWith( + ConstantExpr::getBitCast(NewFunc, + F->getFunctionType()->getPointerTo())); + return NewFunc; +} + +void ExpandI64::ensureLegalFunc(Function *F) { + if (okToRemainIllegal(F)) return; + + FunctionType *FT = F->getFunctionType(); + if (isLegalFunctionType(FT)) return; + + Changed = true; + Function *NF = RecreateFunctionLegalized(F, getLegalizedFunctionType(FT)); + std::string Name = NF->getName(); + if (strncmp(Name.c_str(), "llvm.", 5) == 0) { + // this is an intrinsic, and we are changing its signature, which will annoy LLVM, so rename + const size_t len = Name.size(); + SmallString<256> NewName; + NewName.resize(len); + for (unsigned i = 0; i < len; i++) { + NewName[i] = Name[i] != '.' ? Name[i] : '_'; + } + NF->setName(Twine(NewName)); + } + + // Move and update arguments + for (Function::arg_iterator Arg = F->arg_begin(), E = F->arg_end(), NewArg = NF->arg_begin(); + Arg != E; ++Arg) { + if (Arg->getType() == NewArg->getType()) { + NewArg->takeName(&*Arg); + Arg->replaceAllUsesWith(&*NewArg); + NewArg++; + } else { + // This was legalized + ChunksVec &Chunks = Splits[&*Arg]; + int Num = getNumChunks(Arg->getType()); + assert(Num == 2); + for (int i = 0; i < Num; i++) { + Chunks.push_back(&*NewArg); + if (NewArg->hasName()) Chunks[i]->setName(NewArg->getName() + "$" + utostr(i)); + NewArg++; + } + } + } +} + +void ExpandI64::removeIllegalFunc(Function *F) { + if (okToRemainIllegal(F)) return; + + FunctionType *FT = F->getFunctionType(); + if (!isLegalFunctionType(FT)) { + F->eraseFromParent(); + } +} + +bool ExpandI64::splitInst(Instruction *I) { + Type *i32 = Type::getInt32Ty(I->getContext()); + Type *i32P = i32->getPointerTo(); + Type *i64 = Type::getInt64Ty(I->getContext()); + Value *Zero = Constant::getNullValue(i32); + + ChunksVec &Chunks = Splits[I]; + + switch (I->getOpcode()) { + case Instruction::GetElementPtr: { + GetElementPtrInst *GEP = cast(I); + SmallVector NewOps; + for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i) { + Value *Op = I->getOperand(i); + if (isIllegal(Op->getType())) { + // Truncate the operand down to one chunk. + NewOps.push_back(getChunks(Op)[0]); + } else { + NewOps.push_back(Op); + } + } + Value *NewGEP = CopyDebug(GetElementPtrInst::Create(GEP->getSourceElementType(), GEP->getPointerOperand(), NewOps, "", GEP), GEP); + Chunks.push_back(NewGEP); + I->replaceAllUsesWith(NewGEP); + break; + } + case Instruction::SExt: { + ChunksVec InputChunks; + Value *Op = I->getOperand(0); + if (isIllegal(Op->getType())) { + InputChunks = getChunks(Op); + } else { + InputChunks.push_back(Op); + } + + for (unsigned i = 0, e = InputChunks.size(); i != e; ++i) { + Value *Input = InputChunks[i]; + + Type *T = Input->getType(); + Value *Chunk; + if (T->getIntegerBitWidth() < 32) { + Chunk = CopyDebug(new SExtInst(Input, i32, "", I), I); + } else { + assert(T->getIntegerBitWidth() == 32); + Chunk = Input; + } + Chunks.push_back(Chunk); + } + + Instruction *Check = CopyDebug(new ICmpInst(I, ICmpInst::ICMP_SLT, Chunks.back(), Zero), I); + int Num = getNumChunks(I->getType()); + for (int i = Chunks.size(); i < Num; i++) { + Instruction *High = CopyDebug(new SExtInst(Check, i32, "", I), I); + Chunks.push_back(High); + } + break; + } + case Instruction::PtrToInt: + case Instruction::ZExt: { + Value *Op = I->getOperand(0); + ChunksVec InputChunks; + if (I->getOpcode() == Instruction::PtrToInt) { + InputChunks.push_back(CopyDebug(new PtrToIntInst(Op, i32, "", I), I)); + } else if (isIllegal(Op->getType())) { + InputChunks = getChunks(Op); + } else { + InputChunks.push_back(Op); + } + + for (unsigned i = 0, e = InputChunks.size(); i != e; ++i) { + Value *Input = InputChunks[i]; + Type *T = Input->getType(); + + Value *Chunk; + if (T->getIntegerBitWidth() < 32) { + Chunk = CopyDebug(new ZExtInst(Input, i32, "", I), I); + } else { + assert(T->getIntegerBitWidth() == 32); + Chunk = Input; + } + Chunks.push_back(Chunk); + } + + int Num = getNumChunks(I->getType()); + for (int i = Chunks.size(); i < Num; i++) { + Chunks.push_back(Zero); + } + break; + } + case Instruction::IntToPtr: + case Instruction::Trunc: { + unsigned Num = getNumChunks(I->getType()); + unsigned NumBits = DL->getTypeSizeInBits(I->getType()); + ChunksVec InputChunks = getChunks(I->getOperand(0)); + for (unsigned i = 0; i < Num; i++) { + Value *Input = InputChunks[i]; + + Value *Chunk; + if (NumBits < 32) { + Chunk = CopyDebug(new TruncInst(Input, IntegerType::get(I->getContext(), NumBits), "", I), I); + NumBits = 0; + } else { + Chunk = Input; + NumBits -= 32; + } + if (I->getOpcode() == Instruction::IntToPtr) { + assert(i == 0); + Chunk = CopyDebug(new IntToPtrInst(Chunk, I->getType(), "", I), I); + } + Chunks.push_back(Chunk); + } + if (!isIllegal(I->getType())) { + assert(Chunks.size() == 1); + I->replaceAllUsesWith(Chunks[0]); + } + break; + } + case Instruction::Load: { + LoadInst *LI = cast(I); + Instruction *AI = CopyDebug(new PtrToIntInst(LI->getPointerOperand(), i32, "", I), I); + int Num = getNumChunks(I->getType()); + for (int i = 0; i < Num; i++) { + Instruction *Add = i == 0 ? AI : CopyDebug(BinaryOperator::Create(Instruction::Add, AI, ConstantInt::get(i32, 4*i), "", I), I); + Instruction *Ptr = CopyDebug(new IntToPtrInst(Add, i32P, "", I), I); + LoadInst *Chunk = new LoadInst(Ptr, "", I); CopyDebug(Chunk, I); + Chunk->setAlignment(MinAlign(LI->getAlignment() == 0 ? + DL->getABITypeAlignment(LI->getType()) : + LI->getAlignment(), + 4*i)); + Chunk->setVolatile(LI->isVolatile()); + Chunk->setOrdering(LI->getOrdering()); + Chunk->setSynchScope(LI->getSynchScope()); + Chunks.push_back(Chunk); + } + break; + } + case Instruction::Store: { + StoreInst *SI = cast(I); + Instruction *AI = CopyDebug(new PtrToIntInst(SI->getPointerOperand(), i32, "", I), I); + ChunksVec InputChunks = getChunks(SI->getValueOperand()); + int Num = InputChunks.size(); + for (int i = 0; i < Num; i++) { + Instruction *Add = i == 0 ? AI : CopyDebug(BinaryOperator::Create(Instruction::Add, AI, ConstantInt::get(i32, 4*i), "", I), I); + Instruction *Ptr = CopyDebug(new IntToPtrInst(Add, i32P, "", I), I); + StoreInst *Chunk = new StoreInst(InputChunks[i], Ptr, I); + Chunk->setAlignment(MinAlign(SI->getAlignment() == 0 ? + DL->getABITypeAlignment(SI->getValueOperand()->getType()) : + SI->getAlignment(), + 4*i)); + Chunk->setVolatile(SI->isVolatile()); + Chunk->setOrdering(SI->getOrdering()); + Chunk->setSynchScope(SI->getSynchScope()); + CopyDebug(Chunk, I); + } + break; + } + case Instruction::Ret: { + assert(I->getOperand(0)->getType() == i64); + ChunksVec InputChunks = getChunks(I->getOperand(0)); + ensureFuncs(); + SmallVector Args; + Args.push_back(InputChunks[1]); + CopyDebug(CallInst::Create(SetHigh, Args, "", I), I); + CopyDebug(ReturnInst::Create(I->getContext(), InputChunks[0], I), I); + break; + } + case Instruction::Add: + case Instruction::Sub: + case Instruction::Mul: + case Instruction::SDiv: + case Instruction::UDiv: + case Instruction::SRem: + case Instruction::URem: + case Instruction::LShr: + case Instruction::AShr: + case Instruction::Shl: { + ChunksVec LeftChunks = getChunks(I->getOperand(0)); + ChunksVec RightChunks = getChunks(I->getOperand(1)); + unsigned Num = getNumChunks(I->getType()); + if (Num == 2) { + ensureFuncs(); + Value *Low = NULL, *High = NULL; + Function *F = NULL; + switch (I->getOpcode()) { + case Instruction::Add: F = Add; break; + case Instruction::Sub: F = Sub; break; + case Instruction::Mul: F = Mul; break; + case Instruction::SDiv: F = SDiv; break; + case Instruction::UDiv: F = UDiv; break; + case Instruction::SRem: F = SRem; break; + case Instruction::URem: F = URem; break; + case Instruction::AShr: F = AShr; break; + case Instruction::LShr: { + if (ConstantInt *CI = dyn_cast(I->getOperand(1))) { + unsigned Shifts = CI->getZExtValue(); + if (Shifts == 32) { + Low = LeftChunks[1]; + High = Zero; + break; + } + } + F = LShr; + break; + } + case Instruction::Shl: { + if (ConstantInt *CI = dyn_cast(I->getOperand(1))) { + const APInt &Shifts = CI->getValue(); + if (Shifts == 32) { + Low = Zero; + High = LeftChunks[0]; + break; + } + } + F = Shl; + break; + } + default: assert(0); + } + if (F) { + // use a library call, no special optimization was found + SmallVector Args; + Args.push_back(LeftChunks[0]); + Args.push_back(LeftChunks[1]); + Args.push_back(RightChunks[0]); + Args.push_back(RightChunks[1]); + Low = CopyDebug(CallInst::Create(F, Args, "", I), I); + High = CopyDebug(CallInst::Create(GetHigh, "", I), I); + } + Chunks.push_back(Low); + Chunks.push_back(High); + } else { + // more than 64 bits. handle simple shifts for lshr and shl + assert(I->getOpcode() == Instruction::LShr || I->getOpcode() == Instruction::AShr || I->getOpcode() == Instruction::Shl); + ConstantInt *CI = cast(I->getOperand(1)); + unsigned Shifts = CI->getZExtValue(); + unsigned Fraction = Shifts % 32; + Constant *Frac = ConstantInt::get(i32, Fraction); + Constant *Comp = ConstantInt::get(i32, 32 - Fraction); + Instruction::BinaryOps Opcode, Reverse; + unsigned ShiftChunks, Dir; + Value *TopFiller = Zero; + if (I->getOpcode() == Instruction::Shl) { + Opcode = Instruction::Shl; + Reverse = Instruction::LShr; + ShiftChunks = -(Shifts/32); + Dir = -1; + } else { + Opcode = Instruction::LShr; + Reverse = Instruction::Shl; + ShiftChunks = Shifts/32; + Dir = 1; + if (I->getOpcode() == Instruction::AShr) { + Value *Cond = CopyDebug(new ICmpInst(I, ICmpInst::ICMP_SLT, LeftChunks[LeftChunks.size()-1], Zero), I); + TopFiller = CopyDebug(SelectInst::Create(Cond, ConstantInt::get(i32, -1), Zero, "", I), I); + } + } + for (unsigned i = 0; i < Num; i++) { + Value *L; + if (i + ShiftChunks < LeftChunks.size()) { + L = LeftChunks[i + ShiftChunks]; + } else { + L = Zero; + } + + Value *H; + if (i + ShiftChunks + Dir < LeftChunks.size()) { + H = LeftChunks[i + ShiftChunks + Dir]; + } else { + H = TopFiller; + } + + // shifted the fractional amount + if (Frac != Zero && L != Zero) { + if (Fraction == 32) { + L = Zero; + } else { + L = CopyDebug(BinaryOperator::Create(Opcode, L, Frac, "", I), I); + } + } + // shifted the complement-fractional amount to the other side + if (Comp != Zero && H != Zero) { + if (Fraction == 0) { + H = TopFiller; + } else { + H = CopyDebug(BinaryOperator::Create(Reverse, H, Comp, "", I), I); + } + } + + // Or the parts together. Since we may have zero, try to fold it away. + if (Value *V = SimplifyBinOp(Instruction::Or, L, H, *DL)) { + Chunks.push_back(V); + } else { + Chunks.push_back(CopyDebug(BinaryOperator::Create(Instruction::Or, L, H, "", I), I)); + } + } + } + break; + } + case Instruction::ICmp: { + ICmpInst *CE = cast(I); + ICmpInst::Predicate Pred = CE->getPredicate(); + ChunksVec LeftChunks = getChunks(I->getOperand(0)); + ChunksVec RightChunks = getChunks(I->getOperand(1)); + switch (Pred) { + case ICmpInst::ICMP_EQ: + case ICmpInst::ICMP_NE: { + ICmpInst::Predicate PartPred; // the predicate to use on each of the parts + llvm::Instruction::BinaryOps CombineOp; // the predicate to use to combine the subcomparisons + int Num = LeftChunks.size(); + if (Pred == ICmpInst::ICMP_EQ) { + PartPred = ICmpInst::ICMP_EQ; + CombineOp = Instruction::And; + } else { + PartPred = ICmpInst::ICMP_NE; + CombineOp = Instruction::Or; + } + // first combine 0 and 1. then combine that with 2, etc. + Value *Combined = NULL; + for (int i = 0; i < Num; i++) { + Value *Cmp = CopyDebug(new ICmpInst(I, PartPred, LeftChunks[i], RightChunks[i]), I); + Combined = !Combined ? Cmp : CopyDebug(BinaryOperator::Create(CombineOp, Combined, Cmp, "", I), I); + } + I->replaceAllUsesWith(Combined); + break; + } + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_SLT: + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_SGT: + case ICmpInst::ICMP_ULE: + case ICmpInst::ICMP_SLE: + case ICmpInst::ICMP_UGE: + case ICmpInst::ICMP_SGE: { + if (ConstantInt *CI = dyn_cast(I->getOperand(1))) { + if (CI->getZExtValue() == 0 && Pred == ICmpInst::ICMP_SLT) { + // strict < 0 is easy to do, even on non-i64, just the sign bit matters + Instruction *NewInst = new ICmpInst(I, ICmpInst::ICMP_SLT, LeftChunks[LeftChunks.size()-1], Zero); + CopyDebug(NewInst, I); + I->replaceAllUsesWith(NewInst); + return true; + } + } + Type *T = I->getOperand(0)->getType(); + assert(T->isIntegerTy() && T->getIntegerBitWidth() % 32 == 0); + int NumChunks = getNumChunks(T); + assert(NumChunks >= 2); + ICmpInst::Predicate StrictPred = Pred; + ICmpInst::Predicate UnsignedPred = Pred; + switch (Pred) { + case ICmpInst::ICMP_ULE: StrictPred = ICmpInst::ICMP_ULT; break; + case ICmpInst::ICMP_UGE: StrictPred = ICmpInst::ICMP_UGT; break; + case ICmpInst::ICMP_SLE: StrictPred = ICmpInst::ICMP_SLT; UnsignedPred = ICmpInst::ICMP_ULE; break; + case ICmpInst::ICMP_SGE: StrictPred = ICmpInst::ICMP_SGT; UnsignedPred = ICmpInst::ICMP_UGE; break; + case ICmpInst::ICMP_SLT: UnsignedPred = ICmpInst::ICMP_ULT; break; + case ICmpInst::ICMP_SGT: UnsignedPred = ICmpInst::ICMP_UGT; break; + case ICmpInst::ICMP_ULT: break; + case ICmpInst::ICMP_UGT: break; + default: assert(0); + } + // general pattern is + // a,b,c < A,B,C => c < C || (c == C && b < B) || (c == C && b == B && a < A) + Instruction *Final = CopyDebug(new ICmpInst(I, StrictPred, LeftChunks[NumChunks-1], RightChunks[NumChunks-1]), I); + for (int i = NumChunks-2; i >= 0; i--) { + Instruction *Curr = CopyDebug(new ICmpInst(I, UnsignedPred, LeftChunks[i], RightChunks[i]), I); + for (int j = NumChunks-1; j > i; j--) { + Instruction *Temp = CopyDebug(new ICmpInst(I, ICmpInst::ICMP_EQ, LeftChunks[j], RightChunks[j]), I); + Curr = CopyDebug(BinaryOperator::Create(Instruction::And, Temp, Curr, "", I), I); + } + Final = CopyDebug(BinaryOperator::Create(Instruction::Or, Final, Curr, "", I), I); + } + I->replaceAllUsesWith(Final); + break; + } + default: assert(0); + } + break; + } + case Instruction::Select: { + SelectInst *SI = cast(I); + Value *Cond = SI->getCondition(); + ChunksVec TrueChunks = getChunks(SI->getTrueValue()); + ChunksVec FalseChunks = getChunks(SI->getFalseValue()); + unsigned Num = getNumChunks(I->getType()); + for (unsigned i = 0; i < Num; i++) { + Instruction *Part = CopyDebug(SelectInst::Create(Cond, TrueChunks[i], FalseChunks[i], "", I), I); + Chunks.push_back(Part); + } + break; + } + case Instruction::PHI: { + PHINode *Parent = cast(I); + int Num = getNumChunks(I->getType()); + int PhiNum = Parent->getNumIncomingValues(); + for (int i = 0; i < Num; i++) { + Instruction *P = CopyDebug(PHINode::Create(i32, PhiNum, "", I), I); + Chunks.push_back(P); + } + // PHI node operands may not be translated yet; we'll handle them at the end. + Phis.push_back(Parent); + break; + } + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: { + BinaryOperator *BO = cast(I); + ChunksVec LeftChunks = getChunks(BO->getOperand(0)); + ChunksVec RightChunks = getChunks(BO->getOperand(1)); + int Num = getNumChunks(BO->getType()); + for (int i = 0; i < Num; i++) { + // If there's a constant operand, it's likely enough that one of the + // chunks will be a trivial operation, so it's worth calling + // SimplifyBinOp here. + if (Value *V = SimplifyBinOp(BO->getOpcode(), LeftChunks[i], RightChunks[i], *DL)) { + Chunks.push_back(V); + } else { + Chunks.push_back(CopyDebug(BinaryOperator::Create(BO->getOpcode(), LeftChunks[i], RightChunks[i], "", BO), BO)); + } + } + break; + } + case Instruction::Call: { + CallInst *CI = cast(I); + Function *F = CI->getCalledFunction(); + if (F) { + assert(okToRemainIllegal(F)); + return false; + } + Value *CV = CI->getCalledValue(); + FunctionType *OFT = NULL; + if (ConstantExpr *CE = dyn_cast(CV)) { + assert(CE); + OFT = cast(cast(CE->getType())->getElementType()); + Constant *C = CE->getOperand(0); + if (CE->getOpcode() == Instruction::BitCast) { + CV = ConstantExpr::getBitCast(C, getLegalizedFunctionType(OFT)->getPointerTo()); + } else if (CE->getOpcode() == Instruction::IntToPtr) { + CV = ConstantExpr::getIntToPtr(C, getLegalizedFunctionType(OFT)->getPointerTo()); + } else { + llvm_unreachable("Bad CE in i64 Call"); + } + } else { + // this is a function pointer call + OFT = cast(cast(CV->getType())->getElementType()); + // we need to add a bitcast + CV = new BitCastInst(CV, getLegalizedFunctionType(OFT)->getPointerTo(), "", I); + } + // create a call with space for legal args + SmallVector Args; // XXX + int Num = OFT->getNumParams(); + for (int i = 0; i < Num; i++) { + Type *T = OFT->getParamType(i); + if (!isIllegal(T)) { + Args.push_back(CI->getArgOperand(i)); + } else { + assert(T == i64); + ChunksVec ArgChunks = getChunks(CI->getArgOperand(i)); + Args.push_back(ArgChunks[0]); + Args.push_back(ArgChunks[1]); + } + } + Instruction *L = CopyDebug(CallInst::Create(CV, Args, "", I), I); + Instruction *H = NULL; + // legalize return value as well, if necessary + if (isIllegal(I->getType())) { + assert(I->getType() == i64); + ensureFuncs(); + H = CopyDebug(CallInst::Create(GetHigh, "", I), I); + Chunks.push_back(L); + Chunks.push_back(H); + } else { + I->replaceAllUsesWith(L); + } + break; + } + case Instruction::FPToUI: + case Instruction::FPToSI: { + assert(I->getType() == i64); + ensureFuncs(); + SmallVector Args; + Value *Input = I->getOperand(0); + Args.push_back(Input); + Instruction *L, *H; + if (Input->getType()->isFloatTy()) { + L = CopyDebug(CallInst::Create(FtoILow, Args, "", I), I); + H = CopyDebug(CallInst::Create(FtoIHigh, Args, "", I), I); + } else { + L = CopyDebug(CallInst::Create(DtoILow, Args, "", I), I); + H = CopyDebug(CallInst::Create(DtoIHigh, Args, "", I), I); + } + Chunks.push_back(L); + Chunks.push_back(H); + break; + } + case Instruction::BitCast: { + if (I->getType() == Type::getDoubleTy(TheModule->getContext())) { + // fall through to itofp + } else if (I->getOperand(0)->getType() == Type::getDoubleTy(TheModule->getContext())) { + // double to i64 + assert(I->getType() == i64); + ensureFuncs(); + SmallVector Args; + Args.push_back(I->getOperand(0)); + Instruction *L = CopyDebug(CallInst::Create(BDtoILow, Args, "", I), I); + Instruction *H = CopyDebug(CallInst::Create(BDtoIHigh, Args, "", I), I); + Chunks.push_back(L); + Chunks.push_back(H); + break; + } else if (isa(I->getOperand(0)->getType()) && !isa(I->getType())) { + unsigned NumElts = getNumChunks(I->getType()); + VectorType *IVTy = VectorType::get(i32, NumElts); + Instruction *B = CopyDebug(new BitCastInst(I->getOperand(0), IVTy, "", I), I); + for (unsigned i = 0; i < NumElts; ++i) { + Constant *Idx = ConstantInt::get(i32, i); + Instruction *Ext = CopyDebug(ExtractElementInst::Create(B, Idx, "", I), I); + Chunks.push_back(Ext); + } + break; + } else { + // no-op bitcast + assert(I->getType() == I->getOperand(0)->getType() && "possible hint: optimize with -O0 or -O2+, and not -O1"); + Chunks = getChunks(I->getOperand(0)); + break; + } + } + case Instruction::SIToFP: + case Instruction::UIToFP: { + assert(I->getOperand(0)->getType() == i64); + ensureFuncs(); + ChunksVec InputChunks = getChunks(I->getOperand(0)); + Function *F; + switch (I->getOpcode()) { + case Instruction::SIToFP: F = I->getType() == Type::getDoubleTy(TheModule->getContext()) ? SItoD : SItoF; break; + case Instruction::UIToFP: F = I->getType() == Type::getDoubleTy(TheModule->getContext()) ? UItoD : UItoF; break; + case Instruction::BitCast: { + assert(I->getType() == Type::getDoubleTy(TheModule->getContext())); + F = BItoD; + break; + } + default: assert(0); + } + Instruction *D = CopyDebug(CallInst::Create(F, InputChunks, "", I), I); + I->replaceAllUsesWith(D); + break; + } + case Instruction::Switch: { + assert(I->getOperand(0)->getType() == i64); + ChunksVec InputChunks = getChunks(I->getOperand(0)); + + // do a switch on the lower 32 bits, into a different basic block for each target, then do a branch in each of those on the high 32 bits + SwitchInst* SI = cast(I); + BasicBlock *DD = SI->getDefaultDest(); + BasicBlock *SwitchBB = I->getParent(); + Function *F = SwitchBB->getParent(); + + unsigned NumItems = SI->getNumCases(); + SwitchInst *LowSI = SwitchInst::Create(InputChunks[0], DD, NumItems, I); // same default destination: if lower bits do not match, go straight to default + CopyDebug(LowSI, I); + + typedef std::pair Pair; + typedef std::vector Vec; // vector of pairs of high 32 bits, basic block + typedef std::map Map; // maps low 32 bits to their Vec info + Map Groups; // (as two 64-bit values in the switch may share their lower bits) + + for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e; ++i) { + BasicBlock *BB = i.getCaseSuccessor(); + uint64_t Bits = i.getCaseValue()->getZExtValue(); + uint32_t LowBits = (uint32_t)Bits; + uint32_t HighBits = (uint32_t)(Bits >> 32); + Vec& V = Groups[LowBits]; + V.push_back(Pair(HighBits, BB)); + } + + unsigned Counter = 0; + BasicBlock *InsertPoint = SwitchBB; + + for (Map::iterator GI = Groups.begin(); GI != Groups.end(); GI++) { + uint32_t LowBits = GI->first; + Vec &V = GI->second; + + BasicBlock *NewBB = BasicBlock::Create(F->getContext(), "switch64_" + utostr(Counter++), F); + NewBB->moveAfter(InsertPoint); + InsertPoint = NewBB; + LowSI->addCase(cast(ConstantInt::get(i32, LowBits)), NewBB); + + /*if (V.size() == 1) { + // just one option, create a branch + Instruction *CheckHigh = CopyDebug(new ICmpInst(*NewBB, ICmpInst::ICMP_EQ, InputChunks[1], ConstantInt::get(i32, V[0]->first)), I); + Split.ToFix.push_back(CheckHigh); + CopyDebug(BranchInst::Create(V[0]->second, DD, CheckHigh, NewBB), I); + } else {*/ + + // multiple options, create a switch - we could also optimize and make an icmp/branch if just one, as in commented code above + SwitchInst *HighSI = SwitchInst::Create(InputChunks[1], DD, V.size(), NewBB); // same default destination: if lower bits do not match, go straight to default + for (unsigned i = 0; i < V.size(); i++) { + BasicBlock *BB = V[i].second; + HighSI->addCase(cast(ConstantInt::get(i32, V[i].first)), BB); + // fix phis, we used to go SwitchBB->BB, but now go SwitchBB->NewBB->BB, so we look like we arrived from NewBB. Replace the phi from the + // now unneeded SwitchBB to the new BB + // We cannot do this here right now, as phis we encounter may be in the middle of processing (empty), so we queue these. + for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) { + PHINode *Phi = dyn_cast(I); + if (!Phi) break; + PhiBlockChange Change; + Change.DD = BB; + Change.SwitchBB = SwitchBB; + Change.NewBB = NewBB; + PhiBlockChanges.push_back(Change); + break; // we saw a phi on this BB, and pushed a Change + } + } + + // We used to go SwitchBB->DD, but now go SwitchBB->NewBB->DD, fix that like with BB above. However here we do not replace, + // as the switch BB is still possible to arrive from - we can arrive at the default if either the lower bits were wrong (we + // arrive from the switchBB) or from the NewBB if the high bits were wrong. + PhiBlockChange Change; + Change.DD = DD; + Change.SwitchBB = SwitchBB; + Change.NewBB = NewBB; + PhiBlockChanges.push_back(Change); + } + break; + } + case Instruction::AtomicRMW: { + const AtomicRMWInst *rmwi = cast(I); + ChunksVec Chunks32Bit = getChunks(I->getOperand(1)); + unsigned Num = getNumChunks(I->getType()); + assert(Num == 2 && "Only know how to handle 32-bit and 64-bit AtomicRMW instructions!"); + ensureFuncs(); + Value *Low = NULL, *High = NULL; + Function *F = NULL; + switch (rmwi->getOperation()) { + case AtomicRMWInst::Add: F = AtomicAdd; break; + case AtomicRMWInst::Sub: F = AtomicSub; break; + case AtomicRMWInst::And: F = AtomicAnd; break; + case AtomicRMWInst::Or: F = AtomicOr; break; + case AtomicRMWInst::Xor: F = AtomicXor; break; + case AtomicRMWInst::Xchg: + case AtomicRMWInst::Nand: + case AtomicRMWInst::Max: + case AtomicRMWInst::Min: + case AtomicRMWInst::UMax: + case AtomicRMWInst::UMin: + default: llvm_unreachable("Bad atomic operation"); + } + SmallVector Args; + Args.push_back(new BitCastInst(I->getOperand(0), Type::getInt8PtrTy(TheModule->getContext()), "", I)); + Args.push_back(Chunks32Bit[0]); + Args.push_back(Chunks32Bit[1]); + Low = CopyDebug(CallInst::Create(F, Args, "", I), I); + High = CopyDebug(CallInst::Create(GetHigh, "", I), I); + Chunks.push_back(Low); + Chunks.push_back(High); + break; + } + case Instruction::AtomicCmpXchg: { + assert(0 && "64-bit compare-and-exchange (__sync_bool_compare_and_swap & __sync_val_compare_and_swap) are not supported! Please directly call emscripten_atomic_cas_u64() instead in order to emulate!"); + break; + } + default: { + I->dump(); + assert(0 && "some i64 thing we can't legalize yet. possible hint: optimize with -O0 or -O2+, and not -O1"); + } + } + + return true; +} + +ChunksVec ExpandI64::getChunks(Value *V, bool AllowUnreachable) { + assert(isIllegal(V->getType())); + + unsigned Num = getNumChunks(V->getType()); + Type *i32 = Type::getInt32Ty(V->getContext()); + + if (isa(V)) + return ChunksVec(Num, UndefValue::get(i32)); + + if (Constant *C = dyn_cast(V)) { + ChunksVec Chunks; + for (unsigned i = 0; i < Num; i++) { + Constant *Count = ConstantInt::get(C->getType(), i * 32); + Constant *NewC = ConstantExpr::getTrunc(ConstantExpr::getLShr(C, Count), i32); + TargetLibraryInfo *TLI = 0; // TODO + if (ConstantExpr *NewCE = dyn_cast(NewC)) { + if (Constant *FoldedC = ConstantFoldConstantExpression(NewCE, *DL, TLI)) { + NewC = FoldedC; + } + } + + Chunks.push_back(NewC); + } + return Chunks; + } + + if (Splits.find(V) == Splits.end()) { + if (AllowUnreachable) + return ChunksVec(Num, UndefValue::get(i32)); + errs() << *V << "\n"; + report_fatal_error("could not find chunks for illegal value"); + } + assert(Splits[V].size() == Num); + return Splits[V]; +} + +void ExpandI64::ensureFuncs() { + if (Add != NULL) return; + + Type *i32 = Type::getInt32Ty(TheModule->getContext()); + + SmallVector ThreeArgTypes; + ThreeArgTypes.push_back(Type::getInt8PtrTy(TheModule->getContext())); + ThreeArgTypes.push_back(i32); + ThreeArgTypes.push_back(i32); + FunctionType *ThreeFunc = FunctionType::get(i32, ThreeArgTypes, false); + + AtomicAdd = TheModule->getFunction("_emscripten_atomic_fetch_and_add_u64"); + if (!AtomicAdd) { + AtomicAdd = Function::Create(ThreeFunc, GlobalValue::ExternalLinkage, + "_emscripten_atomic_fetch_and_add_u64", TheModule); + } + AtomicSub = TheModule->getFunction("_emscripten_atomic_fetch_and_sub_u64"); + if (!AtomicSub) { + AtomicSub = Function::Create(ThreeFunc, GlobalValue::ExternalLinkage, + "_emscripten_atomic_fetch_and_sub_u64", TheModule); + } + AtomicAnd = TheModule->getFunction("_emscripten_atomic_fetch_and_and_u64"); + if (!AtomicAnd) { + AtomicAnd = Function::Create(ThreeFunc, GlobalValue::ExternalLinkage, + "_emscripten_atomic_fetch_and_and_u64", TheModule); + } + AtomicOr = TheModule->getFunction("_emscripten_atomic_fetch_and_or_u64"); + if (!AtomicOr) { + AtomicOr = Function::Create(ThreeFunc, GlobalValue::ExternalLinkage, + "_emscripten_atomic_fetch_and_or_u64", TheModule); + } + AtomicXor = TheModule->getFunction("_emscripten_atomic_fetch_and_xor_u64"); + if (!AtomicXor) { + AtomicXor = Function::Create(ThreeFunc, GlobalValue::ExternalLinkage, + "_emscripten_atomic_fetch_and_xor_u64", TheModule); + } + + SmallVector FourArgTypes; + FourArgTypes.push_back(i32); + FourArgTypes.push_back(i32); + FourArgTypes.push_back(i32); + FourArgTypes.push_back(i32); + FunctionType *FourFunc = FunctionType::get(i32, FourArgTypes, false); + + Add = Function::Create(FourFunc, GlobalValue::ExternalLinkage, + "i64Add", TheModule); + Sub = Function::Create(FourFunc, GlobalValue::ExternalLinkage, + "i64Subtract", TheModule); + Mul = Function::Create(FourFunc, GlobalValue::ExternalLinkage, + "__muldi3", TheModule); + SDiv = Function::Create(FourFunc, GlobalValue::ExternalLinkage, + "__divdi3", TheModule); + UDiv = Function::Create(FourFunc, GlobalValue::ExternalLinkage, + "__udivdi3", TheModule); + SRem = Function::Create(FourFunc, GlobalValue::ExternalLinkage, + "__remdi3", TheModule); + URem = Function::Create(FourFunc, GlobalValue::ExternalLinkage, + "__uremdi3", TheModule); + LShr = Function::Create(FourFunc, GlobalValue::ExternalLinkage, + "bitshift64Lshr", TheModule); + AShr = Function::Create(FourFunc, GlobalValue::ExternalLinkage, + "bitshift64Ashr", TheModule); + Shl = Function::Create(FourFunc, GlobalValue::ExternalLinkage, + "bitshift64Shl", TheModule); + + if (!(GetHigh = TheModule->getFunction("getHigh32"))) { + SmallVector GetHighArgTypes; + FunctionType *GetHighFunc = FunctionType::get(i32, GetHighArgTypes, false); + GetHigh = Function::Create(GetHighFunc, GlobalValue::ExternalLinkage, + "getHigh32", TheModule); + } + + Type *V = Type::getVoidTy(TheModule->getContext()); + + SmallVector SetHighArgTypes; + SetHighArgTypes.push_back(i32); + FunctionType *SetHighFunc = FunctionType::get(V, SetHighArgTypes, false); + SetHigh = Function::Create(SetHighFunc, GlobalValue::ExternalLinkage, + "setHigh32", TheModule); + + Type *Double = Type::getDoubleTy(TheModule->getContext()); + Type *Float = Type::getFloatTy(TheModule->getContext()); + + SmallVector FtoITypes; + FtoITypes.push_back(Float); + FunctionType *FtoIFunc = FunctionType::get(i32, FtoITypes, false); + + SmallVector DtoITypes; + DtoITypes.push_back(Double); + FunctionType *DtoIFunc = FunctionType::get(i32, DtoITypes, false); + + FtoILow = Function::Create(FtoIFunc, GlobalValue::ExternalLinkage, + "FtoILow", TheModule); + FtoIHigh = Function::Create(FtoIFunc, GlobalValue::ExternalLinkage, + "FtoIHigh", TheModule); + DtoILow = Function::Create(DtoIFunc, GlobalValue::ExternalLinkage, + "DtoILow", TheModule); + DtoIHigh = Function::Create(DtoIFunc, GlobalValue::ExternalLinkage, + "DtoIHigh", TheModule); + BDtoILow = Function::Create(DtoIFunc, GlobalValue::ExternalLinkage, + "BDtoILow", TheModule); + BDtoIHigh = Function::Create(DtoIFunc, GlobalValue::ExternalLinkage, + "BDtoIHigh", TheModule); + + SmallVector ItoTypes; + ItoTypes.push_back(i32); + ItoTypes.push_back(i32); + + FunctionType *ItoFFunc = FunctionType::get(Float, ItoTypes, false); + SItoF = Function::Create(ItoFFunc, GlobalValue::ExternalLinkage, + "SItoF", TheModule); + UItoF = Function::Create(ItoFFunc, GlobalValue::ExternalLinkage, + "UItoF", TheModule); + + FunctionType *ItoDFunc = FunctionType::get(Double, ItoTypes, false); + SItoD = Function::Create(ItoDFunc, GlobalValue::ExternalLinkage, + "SItoD", TheModule); + UItoD = Function::Create(ItoDFunc, GlobalValue::ExternalLinkage, + "UItoD", TheModule); + + BItoD = Function::Create(ItoDFunc, GlobalValue::ExternalLinkage, + "BItoD", TheModule); +} + +bool ExpandI64::runOnModule(Module &M) { + TheModule = &M; + DL = &M.getDataLayout(); + Splits.clear(); + Changed = false; + + // pre pass - legalize functions + for (Module::iterator Iter = M.begin(), E = M.end(); Iter != E; ) { + Function *Func = &*Iter++; + ensureLegalFunc(Func); + } + + // first pass - split + DeadVec Dead; + for (Module::iterator Iter = M.begin(), E = M.end(); Iter != E; ++Iter) { + Function *Func = &*Iter; + if (Func->isDeclaration()) { + continue; + } + + // Walk the body of the function. We use reverse postorder so that we visit + // all operands of an instruction before the instruction itself. The + // exception to this is PHI nodes, which we put on a list and handle below. + ReversePostOrderTraversal RPOT(Func); + for (ReversePostOrderTraversal::rpo_iterator RI = RPOT.begin(), + RE = RPOT.end(); RI != RE; ++RI) { + BasicBlock *BB = *RI; + for (BasicBlock::iterator Iter = BB->begin(), E = BB->end(); + Iter != E; ) { + Instruction *I = &*Iter++; + if (!isLegalInstruction(I)) { + if (splitInst(I)) { + Changed = true; + Dead.push_back(I); + } + } + } + } + + // Fix up PHI node operands. + while (!Phis.empty()) { + PHINode *PN = Phis.pop_back_val(); + ChunksVec OutputChunks = getChunks(PN); + for (unsigned j = 0, je = PN->getNumIncomingValues(); j != je; ++j) { + Value *Op = PN->getIncomingValue(j); + ChunksVec InputChunks = getChunks(Op, true); + for (unsigned k = 0, ke = OutputChunks.size(); k != ke; ++k) { + PHINode *NewPN = cast(OutputChunks[k]); + NewPN->addIncoming(InputChunks[k], PN->getIncomingBlock(j)); + } + } + PN->dropAllReferences(); + } + + // Delete instructions which were replaced. We do this after the full walk + // of the instructions so that all uses are replaced first. + while (!Dead.empty()) { + Instruction *D = Dead.pop_back_val(); + D->eraseFromParent(); + } + + // Apply basic block changes to phis, now that phis are all processed (and illegal phis erased) + for (unsigned i = 0; i < PhiBlockChanges.size(); i++) { + PhiBlockChange &Change = PhiBlockChanges[i]; + for (BasicBlock::iterator I = Change.DD->begin(); I != Change.DD->end(); ++I) { + PHINode *Phi = dyn_cast(I); + if (!Phi) break; + int Index = Phi->getBasicBlockIndex(Change.SwitchBB); + assert(Index >= 0); + Phi->addIncoming(Phi->getIncomingValue(Index), Change.NewBB); + } + } + PhiBlockChanges.clear(); + + // We only visited blocks found by a DFS walk from the entry, so we haven't + // visited any unreachable blocks, and they may still contain illegal + // instructions at this point. Being unreachable, they can simply be deleted. + removeUnreachableBlocks(*Func); + } + + // post pass - clean up illegal functions that were legalized. We do this + // after the full walk of the functions so that all uses are replaced first. + for (Module::iterator Iter = M.begin(), E = M.end(); Iter != E; ) { + Function *Func = &*Iter++; + removeIllegalFunc(Func); + } + + return Changed; +} + +ModulePass *llvm::createExpandI64Pass() { + return new ExpandI64(); +} diff --git a/lib/Target/JSBackend/NaCl/ExpandIndirectBr.cpp b/lib/Target/JSBackend/NaCl/ExpandIndirectBr.cpp new file mode 100644 index 000000000000..974f0dfee16f --- /dev/null +++ b/lib/Target/JSBackend/NaCl/ExpandIndirectBr.cpp @@ -0,0 +1,152 @@ +//===- ExpandIndirectBr.cpp - Expand out indirectbr and blockaddress-------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass expands out indirectbr instructions and blockaddress +// ConstantExprs, which are not currently supported in PNaCl's stable +// ABI. indirectbr is used to implement computed gotos (a GNU +// extension to C). This pass replaces indirectbr instructions with +// switch instructions. +// +// The resulting use of switches might not be as fast as the original +// indirectbrs. If you are compiling a program that has a +// compile-time option for using computed gotos, it's possible that +// the program will run faster with the option turned off than with +// using computed gotos + ExpandIndirectBr (for example, if the +// program does extra work to take advantage of computed gotos). +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/NaCl.h" + +using namespace llvm; + +namespace { + // This is a ModulePass so that it can expand out blockaddress + // ConstantExprs inside global variable initializers. + class ExpandIndirectBr : public ModulePass { + public: + static char ID; // Pass identification, replacement for typeid + ExpandIndirectBr() : ModulePass(ID) { + initializeExpandIndirectBrPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnModule(Module &M); + }; +} + +char ExpandIndirectBr::ID = 0; +INITIALIZE_PASS(ExpandIndirectBr, "expand-indirectbr", + "Expand out indirectbr and blockaddress (computed gotos)", + false, false) + +static bool convertFunction(Function *Func) { + bool Changed = false; + IntegerType *I32 = Type::getInt32Ty(Func->getContext()); + + // Skip zero in case programs treat a null pointer as special. + uint32_t NextNum = 1; + DenseMap LabelNums; + BasicBlock *DefaultBB = NULL; + + // Replace each indirectbr with a switch. + // + // If there are multiple indirectbr instructions in the function, + // this could be expensive. While an indirectbr is usually + // converted to O(1) machine instructions, the switch we generate + // here will be O(n) in the number of target labels. + // + // However, Clang usually generates just a single indirectbr per + // function anyway when compiling C computed gotos. + // + // We could try to generate one switch to handle all the indirectbr + // instructions in the function, but that would be complicated to + // implement given that variables that are live at one indirectbr + // might not be live at others. + for (llvm::Function::iterator BB = Func->begin(), E = Func->end(); + BB != E; ++BB) { + if (IndirectBrInst *Br = dyn_cast(BB->getTerminator())) { + Changed = true; + + if (!DefaultBB) { + DefaultBB = BasicBlock::Create(Func->getContext(), + "indirectbr_default", Func); + new UnreachableInst(Func->getContext(), DefaultBB); + } + + // An indirectbr can list the same target block multiple times. + // Keep track of the basic blocks we've handled to avoid adding + // the same case multiple times. + DenseSet BlocksSeen; + + Value *Cast = new PtrToIntInst(Br->getAddress(), I32, + "indirectbr_cast", Br); + unsigned Count = Br->getNumSuccessors(); + SwitchInst *Switch = SwitchInst::Create(Cast, DefaultBB, Count, Br); + for (unsigned I = 0; I < Count; ++I) { + BasicBlock *Dest = Br->getSuccessor(I); + if (!BlocksSeen.insert(Dest).second) { + // Remove duplicated entries from phi nodes. + for (BasicBlock::iterator Inst = Dest->begin(); ; ++Inst) { + PHINode *Phi = dyn_cast(Inst); + if (!Phi) + break; + Phi->removeIncomingValue(Br->getParent()); + } + continue; + } + ConstantInt *Val; + if (LabelNums.count(Dest) == 0) { + Val = ConstantInt::get(I32, NextNum++); + LabelNums[Dest] = Val; + + BlockAddress *BA = BlockAddress::get(Func, Dest); + Value *ValAsPtr = ConstantExpr::getIntToPtr(Val, BA->getType()); + BA->replaceAllUsesWith(ValAsPtr); + BA->destroyConstant(); + } else { + Val = LabelNums[Dest]; + } + Switch->addCase(Val, Br->getSuccessor(I)); + } + Br->eraseFromParent(); + } + } + + // If there are any blockaddresses that are never used by an + // indirectbr, replace them with dummy values. + SmallVector Users(Func->user_begin(), Func->user_end()); + for (auto U : Users) { + if (BlockAddress *BA = dyn_cast(U)) { + Changed = true; + Value *DummyVal = ConstantExpr::getIntToPtr(ConstantInt::get(I32, ~0L), + BA->getType()); + BA->replaceAllUsesWith(DummyVal); + BA->destroyConstant(); + } + } + return Changed; +} + +bool ExpandIndirectBr::runOnModule(Module &M) { + bool Changed = false; + for (Module::iterator Func = M.begin(), E = M.end(); Func != E; ++Func) { + Changed |= convertFunction(&*Func); + } + return Changed; +} + +ModulePass *llvm::createExpandIndirectBrPass() { + return new ExpandIndirectBr(); +} diff --git a/lib/Target/JSBackend/NaCl/ExpandInsertExtractElement.cpp b/lib/Target/JSBackend/NaCl/ExpandInsertExtractElement.cpp new file mode 100644 index 000000000000..7c1c88004be2 --- /dev/null +++ b/lib/Target/JSBackend/NaCl/ExpandInsertExtractElement.cpp @@ -0,0 +1,100 @@ +//==- ExpandInsertExtractElement.cpp - Expand vector insert and extract -=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===------------------------------------------------------------------===// +// +// This pass expands insertelement and extractelement instructions with +// variable indices, which SIMD.js doesn't natively support yet. +// +//===------------------------------------------------------------------===// + +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/NaCl.h" +#include "llvm/Transforms/Utils/Local.h" +#include +#include + +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +namespace { + + class ExpandInsertExtractElement : public FunctionPass { + bool Changed; + + public: + static char ID; + ExpandInsertExtractElement() : FunctionPass(ID) { + initializeExpandInsertExtractElementPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override; + }; +} + +char ExpandInsertExtractElement::ID = 0; +INITIALIZE_PASS(ExpandInsertExtractElement, "expand-insert-extract-elements", + "Expand and lower insert and extract element operations", + false, false) + +// Utilities + +bool ExpandInsertExtractElement::runOnFunction(Function &F) { + Changed = false; + + Instruction *Entry = &*F.getEntryBlock().begin(); + Type *Int32 = Type::getInt32Ty(F.getContext()); + Constant *Zero = ConstantInt::get(Int32, 0); + for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) { + Instruction *Inst = &*I++; + + if (InsertElementInst *III = dyn_cast(Inst)) { + if (isa(III->getOperand(2))) + continue; + + Type *AllocaTy = III->getType(); + Instruction *A = new AllocaInst(AllocaTy, 0, "", Entry); + CopyDebug(new StoreInst(III->getOperand(0), A, III), III); + + Value *Idxs[] = { Zero, III->getOperand(2) }; + Instruction *B = CopyDebug( + GetElementPtrInst::Create(AllocaTy, A, Idxs, "", III), III); + CopyDebug(new StoreInst(III->getOperand(1), B, III), III); + + Instruction *L = CopyDebug(new LoadInst(A, "", III), III); + III->replaceAllUsesWith(L); + III->eraseFromParent(); + } else if (ExtractElementInst *EII = dyn_cast(Inst)) { + if (isa(EII->getOperand(1))) + continue; + + Type *AllocaTy = EII->getOperand(0)->getType(); + Instruction *A = new AllocaInst(AllocaTy, 0, "", Entry); + CopyDebug(new StoreInst(EII->getOperand(0), A, EII), EII); + + Value *Idxs[] = { Zero, EII->getOperand(1) }; + Instruction *B = CopyDebug( + GetElementPtrInst::Create(AllocaTy, A, Idxs, "", EII), EII); + Instruction *L = CopyDebug(new LoadInst(B, "", EII), EII); + EII->replaceAllUsesWith(L); + EII->eraseFromParent(); + } + } + + return Changed; +} + +FunctionPass *llvm::createExpandInsertExtractElementPass() { + return new ExpandInsertExtractElement(); +} diff --git a/lib/Target/JSBackend/NaCl/ExpandLargeIntegers.cpp b/lib/Target/JSBackend/NaCl/ExpandLargeIntegers.cpp new file mode 100644 index 000000000000..495d1f9e6d46 --- /dev/null +++ b/lib/Target/JSBackend/NaCl/ExpandLargeIntegers.cpp @@ -0,0 +1,674 @@ +//===- ExpandLargeIntegers.cpp - Expand illegal integers for PNaCl ABI ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +// A limited set of transformations to expand illegal-sized int types. +// +//===----------------------------------------------------------------------===// +// +// Legal sizes for the purposes of expansion are anything 64 bits or less. +// Operations on large integers are split into operations on smaller-sized +// integers. The low parts should always be powers of 2, but the high parts may +// not be. A subsequent pass can promote those. For now this pass only intends +// to support the uses generated by clang, which is basically just for large +// bitfields. +// +// Limitations: +// 1) It can't change function signatures or global variables. +// 3) Doesn't support mul, div/rem, switch. +// 4) Doesn't handle arrays or structs (or GEPs) with illegal types. +// 5) Doesn't handle constant expressions (it also doesn't produce them, so it +// can run after ExpandConstantExpr). +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/NaCl.h" + +using namespace llvm; + +#define DEBUG_TYPE "nacl-expand-ints" + +// Break instructions up into no larger than 64-bit chunks. +static const unsigned kChunkBits = 64; +static const unsigned kChunkBytes = kChunkBits / CHAR_BIT; + +namespace { +class ExpandLargeIntegers : public FunctionPass { +public: + static char ID; + ExpandLargeIntegers() : FunctionPass(ID) { + initializeExpandLargeIntegersPass(*PassRegistry::getPassRegistry()); + } + bool runOnFunction(Function &F) override; +}; + +template struct LoHiPair { + T Lo, Hi; + LoHiPair() : Lo(), Hi() {} + LoHiPair(T Lo, T Hi) : Lo(Lo), Hi(Hi) {} +}; +template struct LoHiBitTriple { + T Lo, Hi, Bit; + LoHiBitTriple() : Lo(), Hi(), Bit() {} + LoHiBitTriple(T Lo, T Hi, T Bit) : Lo(Lo), Hi(Hi), Bit(Bit) {} +}; +typedef LoHiPair TypePair; +typedef LoHiPair ValuePair; +typedef LoHiPair AlignPair; +typedef LoHiBitTriple ValueTriple; + +// Information needed to patch a phi node which forward-references a value. +struct ForwardPHI { + Value *Val; + PHINode *Lo, *Hi; + unsigned ValueNumber; + ForwardPHI(Value *Val, PHINode *Lo, PHINode *Hi, unsigned ValueNumber) + : Val(Val), Lo(Lo), Hi(Hi), ValueNumber(ValueNumber) {} +}; +} + +char ExpandLargeIntegers::ID = 0; +INITIALIZE_PASS(ExpandLargeIntegers, "nacl-expand-ints", + "Expand integer types that are illegal in PNaCl", false, false) + +#define DIE_IF(COND, VAL, MSG) \ + do { \ + if (COND) { \ + errs() << "Unsupported: " << *(VAL) << '\n'; \ + report_fatal_error( \ + MSG " not yet supported for integer types larger than 64 bits"); \ + } \ + } while (0) + +static bool isLegalBitSize(unsigned Bits) { + assert(Bits && "Can't have zero-size integers"); + return Bits <= kChunkBits; +} + +static TypePair getExpandedIntTypes(Type *Ty) { + unsigned BitWidth = Ty->getIntegerBitWidth(); + assert(!isLegalBitSize(BitWidth)); + return {IntegerType::get(Ty->getContext(), kChunkBits), + IntegerType::get(Ty->getContext(), BitWidth - kChunkBits)}; +} + +// Return true if Val is an int which should be converted. +static bool shouldConvert(const Value *Val) { + Type *Ty = Val->getType(); + if (IntegerType *ITy = dyn_cast(Ty)) + return !isLegalBitSize(ITy->getBitWidth()); + return false; +} + +// Return a pair of constants expanded from C. +static ValuePair expandConstant(Constant *C) { + assert(shouldConvert(C)); + TypePair ExpandedTypes = getExpandedIntTypes(C->getType()); + if (isa(C)) { + return {UndefValue::get(ExpandedTypes.Lo), + UndefValue::get(ExpandedTypes.Hi)}; + } else if (ConstantInt *CInt = dyn_cast(C)) { + Constant *ShiftAmt = ConstantInt::get( + CInt->getType(), ExpandedTypes.Lo->getBitWidth(), false); + return {ConstantExpr::getTrunc(CInt, ExpandedTypes.Lo), + ConstantExpr::getTrunc(ConstantExpr::getLShr(CInt, ShiftAmt), + ExpandedTypes.Hi)}; + } + DIE_IF(true, C, "Constant value"); +} + +template +static AlignPair getAlign(const DataLayout &DL, T *I, Type *PrefAlignTy) { + unsigned LoAlign = I->getAlignment(); + if (LoAlign == 0) + LoAlign = DL.getPrefTypeAlignment(PrefAlignTy); + unsigned HiAlign = MinAlign(LoAlign, kChunkBytes); + return {LoAlign, HiAlign}; +} + +static ValuePair createBit(IRBuilder<> *IRB, const BinaryOperator *Binop, + const ValuePair &Lhs, const ValuePair &Rhs, + const TypePair &Tys, const StringRef &Name) { + auto Op = Binop->getOpcode(); + Value *Lo = IRB->CreateBinOp(Op, Lhs.Lo, Rhs.Lo, Twine(Name, ".lo")); + Value *Hi = IRB->CreateBinOp(Op, Lhs.Hi, Rhs.Hi, Twine(Name, ".hi")); + return {Lo, Hi}; +} + +static ValuePair createShl(IRBuilder<> *IRB, const BinaryOperator *Binop, + const ValuePair &Lhs, const ValuePair &Rhs, + const TypePair &Tys, const StringRef &Name) { + ConstantInt *ShlAmount = dyn_cast(Rhs.Lo); + // TODO(dschuff): Expansion of variable-sized shifts isn't supported + // because the behavior depends on whether the shift amount is less than + // the size of the low part of the expanded type, and I haven't yet + // figured out a way to do it for variable-sized shifts without splitting + // the basic block. I don't believe it's actually necessary for + // bitfields. Likewise for LShr below. + DIE_IF(!ShlAmount, Binop, "Expansion of variable-sized shifts"); + unsigned ShiftAmount = ShlAmount->getZExtValue(); + if (ShiftAmount >= Binop->getType()->getIntegerBitWidth()) + ShiftAmount = 0; // Undefined behavior. + unsigned HiBits = Tys.Hi->getIntegerBitWidth(); + // |<------------Hi---------->|<-------Lo------>| + // | | | + // +--------+--------+--------+--------+--------+ + // |abcdefghijklmnopqrstuvwxyz|ABCDEFGHIJKLMNOPQ| + // +--------+--------+--------+--------+--------+ + // Possible shifts: + // |efghijklmnopqrstuvwxyzABCD|EFGHIJKLMNOPQ0000| Some Lo into Hi. + // |vwxyzABCDEFGHIJKLMNOPQ0000|00000000000000000| Lo is 0, keep some Hi. + // |DEFGHIJKLMNOPQ000000000000|00000000000000000| Lo is 0, no Hi left. + Value *Lo, *Hi; + if (ShiftAmount < kChunkBits) { + Lo = IRB->CreateShl(Lhs.Lo, ShiftAmount, Twine(Name, ".lo")); + Hi = + IRB->CreateZExtOrTrunc(IRB->CreateLShr(Lhs.Lo, kChunkBits - ShiftAmount, + Twine(Name, ".lo.shr")), + Tys.Hi, Twine(Name, ".lo.ext")); + } else { + Lo = ConstantInt::get(Tys.Lo, 0); + Hi = IRB->CreateShl( + IRB->CreateZExtOrTrunc(Lhs.Lo, Tys.Hi, Twine(Name, ".lo.ext")), + ShiftAmount - kChunkBits, Twine(Name, ".lo.shl")); + } + if (ShiftAmount < HiBits) + Hi = IRB->CreateOr( + Hi, IRB->CreateShl(Lhs.Hi, ShiftAmount, Twine(Name, ".hi.shl")), + Twine(Name, ".or")); + return {Lo, Hi}; +} + +static ValuePair createShr(IRBuilder<> *IRB, const BinaryOperator *Binop, + const ValuePair &Lhs, const ValuePair &Rhs, + const TypePair &Tys, const StringRef &Name) { + auto Op = Binop->getOpcode(); + ConstantInt *ShrAmount = dyn_cast(Rhs.Lo); + // TODO(dschuff): Expansion of variable-sized shifts isn't supported + // because the behavior depends on whether the shift amount is less than + // the size of the low part of the expanded type, and I haven't yet + // figured out a way to do it for variable-sized shifts without splitting + // the basic block. I don't believe it's actually necessary for bitfields. + DIE_IF(!ShrAmount, Binop, "Expansion of variable-sized shifts"); + bool IsArith = Op == Instruction::AShr; + unsigned ShiftAmount = ShrAmount->getZExtValue(); + if (ShiftAmount >= Binop->getType()->getIntegerBitWidth()) + ShiftAmount = 0; // Undefined behavior. + unsigned HiBitWidth = Tys.Hi->getIntegerBitWidth(); + // |<--Hi-->|<-------Lo------>| + // | | | + // +--------+--------+--------+ + // |abcdefgh|ABCDEFGHIJKLMNOPQ| + // +--------+--------+--------+ + // Possible shifts (0 is sign when doing AShr): + // |0000abcd|defgABCDEFGHIJKLM| Some Hi into Lo. + // |00000000|00abcdefgABCDEFGH| Hi is 0, keep some Lo. + // |00000000|000000000000abcde| Hi is 0, no Lo left. + Value *Lo, *Hi; + if (ShiftAmount < kChunkBits) { + Lo = IRB->CreateShl( + IsArith + ? IRB->CreateSExtOrTrunc(Lhs.Hi, Tys.Lo, Twine(Name, ".hi.ext")) + : IRB->CreateZExtOrTrunc(Lhs.Hi, Tys.Lo, Twine(Name, ".hi.ext")), + kChunkBits - ShiftAmount, Twine(Name, ".hi.shl")); + Lo = IRB->CreateOr( + Lo, IRB->CreateLShr(Lhs.Lo, ShiftAmount, Twine(Name, ".lo.shr")), + Twine(Name, ".lo")); + } else { + Lo = IRB->CreateBinOp(Op, Lhs.Hi, + ConstantInt::get(Tys.Hi, ShiftAmount - kChunkBits), + Twine(Name, ".hi.shr")); + Lo = IsArith ? IRB->CreateSExtOrTrunc(Lo, Tys.Lo, Twine(Name, ".lo.ext")) + : IRB->CreateZExtOrTrunc(Lo, Tys.Lo, Twine(Name, ".lo.ext")); + } + if (ShiftAmount < HiBitWidth) { + Hi = IRB->CreateBinOp(Op, Lhs.Hi, ConstantInt::get(Tys.Hi, ShiftAmount), + Twine(Name, ".hi")); + } else { + Hi = IsArith ? IRB->CreateAShr(Lhs.Hi, HiBitWidth - 1, Twine(Name, ".hi")) + : ConstantInt::get(Tys.Hi, 0); + } + return {Lo, Hi}; +} + +static Value *createCarry(IRBuilder<> *IRB, Value *Lhs, Value *Rhs, + Value *Added, Type *Ty, const StringRef &Name) { + return IRB->CreateZExt( + IRB->CreateICmpULT( + Added, + IRB->CreateSelect(IRB->CreateICmpULT(Lhs, Rhs, Twine(Name, ".cmp")), + Rhs, Lhs, Twine(Name, ".limit")), + Twine(Name, ".overflowed")), + Ty, Twine(Name, ".carry")); +} + +static ValueTriple createAdd(IRBuilder<> *IRB, const ValuePair &Lhs, + const ValuePair &Rhs, const TypePair &Tys, + const StringRef &Name, Type *HiCarryTy) { + auto Op = Instruction::Add; + // Don't propagate NUW/NSW to the lo operation: it can overflow. + Value *Lo = IRB->CreateBinOp(Op, Lhs.Lo, Rhs.Lo, Twine(Name, ".lo")); + Value *LoCarry = createCarry(IRB, Lhs.Lo, Rhs.Lo, Lo, Tys.Hi, Name); + // TODO(jfb) The hi operation could be tagged with NUW/NSW. + Value *HiAdd = IRB->CreateBinOp(Op, Lhs.Hi, Rhs.Hi, Twine(Name, ".hi")); + Value *Hi = IRB->CreateBinOp(Op, HiAdd, LoCarry, Twine(Name, ".carried")); + Value *HiCarry = HiCarryTy + ? createCarry(IRB, Lhs.Hi, Rhs.Hi, Hi, HiCarryTy, Name) + : nullptr; + return {Lo, Hi, HiCarry}; +} + +static ValuePair createSub(IRBuilder<> *IRB, const ValuePair &Lhs, + const ValuePair &Rhs, const TypePair &Tys, + const StringRef &Name) { + auto Op = Instruction::Sub; + Value *Borrowed = IRB->CreateSExt( + IRB->CreateICmpULT(Lhs.Lo, Rhs.Lo, Twine(Name, ".borrow")), Tys.Hi, + Twine(Name, ".borrowing")); + Value *Lo = IRB->CreateBinOp(Op, Lhs.Lo, Rhs.Lo, Twine(Name, ".lo")); + Value *Hi = + IRB->CreateBinOp(Instruction::Add, + IRB->CreateBinOp(Op, Lhs.Hi, Rhs.Hi, Twine(Name, ".hi")), + Borrowed, Twine(Name, ".borrowed")); + return {Lo, Hi}; +} + +static Value *createICmpEquality(IRBuilder<> *IRB, CmpInst::Predicate Pred, + const ValuePair &Lhs, const ValuePair &Rhs, + const StringRef &Name) { + assert(Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE); + Value *Lo = IRB->CreateICmp(Pred, Lhs.Lo, Rhs.Lo, Twine(Name, ".lo")); + Value *Hi = IRB->CreateICmp(Pred, Lhs.Hi, Rhs.Hi, Twine(Name, ".hi")); + return IRB->CreateBinOp( + Instruction::And, Lo, Hi, + Twine(Name, Pred == CmpInst::ICMP_EQ ? ".eq" : ".ne")); +} + +static Value *createICmp(IRBuilder<> *IRB, const ICmpInst *ICmp, + const ValuePair &Lhs, const ValuePair &Rhs, + const TypePair &Tys, const StringRef &Name) { + auto Pred = ICmp->getPredicate(); + switch (Pred) { + case CmpInst::ICMP_EQ: + case CmpInst::ICMP_NE: + return createICmpEquality(IRB, ICmp->getPredicate(), Lhs, Rhs, Name); + + case CmpInst::ICMP_UGT: // C == 1 and Z == 0 + case CmpInst::ICMP_UGE: // C == 1 + case CmpInst::ICMP_ULT: // C == 0 and Z == 0 + case CmpInst::ICMP_ULE: // C == 0 + { + Value *Carry = createAdd(IRB, Lhs, Rhs, Tys, Name, ICmp->getType()).Bit; + if (Pred == CmpInst::ICMP_ULT || Pred == CmpInst::ICMP_ULE) + Carry = IRB->CreateNot(Carry, Name); + if (Pred == CmpInst::ICMP_UGT || Pred == CmpInst::ICMP_ULT) + Carry = IRB->CreateBinOp( + Instruction::And, Carry, + createICmpEquality(IRB, CmpInst::ICMP_EQ, Lhs, Rhs, Name), Name); + return Carry; + } + + case CmpInst::ICMP_SGT: // N == V and Z == 0 + case CmpInst::ICMP_SGE: // N == V + case CmpInst::ICMP_SLT: // N != V + case CmpInst::ICMP_SLE: // N != V or Z == 1 + DIE_IF(true, ICmp, "Signed comparisons"); + default: + llvm_unreachable("Invalid integer comparison"); + } +} + +static ValuePair createLoad(IRBuilder<> *IRB, const DataLayout &DL, + LoadInst *Load) { + DIE_IF(!Load->isSimple(), Load, "Volatile and atomic loads"); + Value *Op = Load->getPointerOperand(); + TypePair Tys = getExpandedIntTypes(Load->getType()); + AlignPair Align = getAlign(DL, Load, Load->getType()); + Value *Loty = IRB->CreateBitCast(Op, Tys.Lo->getPointerTo(), + Twine(Op->getName(), ".loty")); + Value *Lo = + IRB->CreateAlignedLoad(Loty, Align.Lo, Twine(Load->getName(), ".lo")); + Value *HiAddr = + IRB->CreateConstGEP1_32(Loty, 1, Twine(Op->getName(), ".hi.gep")); + Value *HiTy = IRB->CreateBitCast(HiAddr, Tys.Hi->getPointerTo(), + Twine(Op->getName(), ".hity")); + Value *Hi = + IRB->CreateAlignedLoad(HiTy, Align.Hi, Twine(Load->getName(), ".hi")); + return {Lo, Hi}; +} + +static ValuePair createStore(IRBuilder<> *IRB, const DataLayout &DL, + StoreInst *Store, const ValuePair &StoreVals) { + DIE_IF(!Store->isSimple(), Store, "Volatile and atomic stores"); + Value *Ptr = Store->getPointerOperand(); + TypePair Tys = getExpandedIntTypes(Store->getValueOperand()->getType()); + AlignPair Align = getAlign(DL, Store, Store->getValueOperand()->getType()); + Value *Loty = IRB->CreateBitCast(Ptr, Tys.Lo->getPointerTo(), + Twine(Ptr->getName(), ".loty")); + Value *Lo = IRB->CreateAlignedStore(StoreVals.Lo, Loty, Align.Lo); + Value *HiAddr = + IRB->CreateConstGEP1_32(Loty, 1, Twine(Ptr->getName(), ".hi.gep")); + Value *HiTy = IRB->CreateBitCast(HiAddr, Tys.Hi->getPointerTo(), + Twine(Ptr->getName(), ".hity")); + Value *Hi = IRB->CreateAlignedStore(StoreVals.Hi, HiTy, Align.Hi); + return {Lo, Hi}; +} + +namespace { +// Holds the state for converting/replacing values. We visit instructions in +// reverse post-order, phis are therefore the only instructions which can be +// visited before the value they use. +class ConversionState { +public: + // Return the expanded values for Val. + ValuePair getConverted(Value *Val) { + assert(shouldConvert(Val)); + // Directly convert constants. + if (Constant *C = dyn_cast(Val)) + return expandConstant(C); + if (RewrittenIllegals.count(Val)) { + ValuePair Found = RewrittenIllegals[Val]; + if (RewrittenLegals.count(Found.Lo)) + Found.Lo = RewrittenLegals[Found.Lo]; + if (RewrittenLegals.count(Found.Hi)) + Found.Hi = RewrittenLegals[Found.Hi]; + return Found; + } + errs() << "Value: " << *Val << "\n"; + report_fatal_error("Expanded value not found in map"); + } + + // Returns whether a converted value has been recorded. This is only useful + // for phi instructions: they can be encountered before the incoming + // instruction, whereas RPO order guarantees that other instructions always + // use converted values. + bool hasConverted(Value *Val) { + assert(shouldConvert(Val)); + return dyn_cast(Val) || RewrittenIllegals.count(Val); + } + + // Record a forward phi, temporarily setting it to use Undef. This will be + // patched up at the end of RPO. + ValuePair recordForwardPHI(Value *Val, PHINode *Lo, PHINode *Hi, + unsigned ValueNumber) { + DEBUG(dbgs() << "\tRecording as forward PHI\n"); + ForwardPHIs.push_back(ForwardPHI(Val, Lo, Hi, ValueNumber)); + return {UndefValue::get(Lo->getType()), UndefValue::get(Hi->getType())}; + } + + void recordConverted(Instruction *From, const ValuePair &To) { + DEBUG(dbgs() << "\tTo: " << *To.Lo << "\n"); + DEBUG(dbgs() << "\tAnd: " << *To.Hi << "\n"); + ToErase.push_back(From); + RewrittenIllegals[From] = To; + } + + // Replace the uses of From with To, give From's name to To, and mark To for + // deletion. + void recordConverted(Instruction *From, Value *To) { + assert(!shouldConvert(From)); + DEBUG(dbgs() << "\tTo: " << *To << "\n"); + ToErase.push_back(From); + // From does not produce an illegal value, update its users in place. + From->replaceAllUsesWith(To); + To->takeName(From); + RewrittenLegals[From] = To; + } + + void recordToErase(Instruction *TE) { + ToErase.push_back(TE); + } + + void patchForwardPHIs() { + DEBUG(if (!ForwardPHIs.empty()) dbgs() << "Patching forward PHIs:\n"); + for (ForwardPHI &F : ForwardPHIs) { + ValuePair Ops = getConverted(F.Val); + F.Lo->setIncomingValue(F.ValueNumber, Ops.Lo); + F.Hi->setIncomingValue(F.ValueNumber, Ops.Hi); + DEBUG(dbgs() << "\t" << *F.Lo << "\n\t" << *F.Hi << "\n"); + } + } + + void eraseReplacedInstructions() { + for (Instruction *I : ToErase) + I->dropAllReferences(); + for (Instruction *I : ToErase) + I->eraseFromParent(); + } + +private: + // Maps illegal values to their new converted lo/hi values. + DenseMap RewrittenIllegals; + // Maps legal values to their new converted value. + DenseMap RewrittenLegals; + // Illegal values which have already been converted, will be erased. + SmallVector ToErase; + // PHIs which were encountered but had forward references. They need to get + // patched up after RPO traversal. + SmallVector ForwardPHIs; +}; +} // Anonymous namespace + +static void convertInstruction(Instruction *Inst, ConversionState &State, + const DataLayout &DL) { + DEBUG(dbgs() << "Expanding Large Integer: " << *Inst << "\n"); + // Set the insert point *after* Inst, so that any instructions inserted here + // will be visited again. That allows iterative expansion of types > i128. + BasicBlock::iterator InsertPos(Inst); + IRBuilder<> IRB(&*++InsertPos); + StringRef Name = Inst->getName(); + + if (PHINode *Phi = dyn_cast(Inst)) { + unsigned N = Phi->getNumIncomingValues(); + TypePair OpTys = getExpandedIntTypes(Phi->getIncomingValue(0)->getType()); + PHINode *Lo = IRB.CreatePHI(OpTys.Lo, N, Twine(Name + ".lo")); + PHINode *Hi = IRB.CreatePHI(OpTys.Hi, N, Twine(Name + ".hi")); + for (unsigned I = 0; I != N; ++I) { + Value *InVal = Phi->getIncomingValue(I); + BasicBlock *InBB = Phi->getIncomingBlock(I); + // If the value hasn't already been converted then this is a + // forward-reference PHI which needs to be patched up after RPO traversal. + ValuePair Ops = State.hasConverted(InVal) + ? State.getConverted(InVal) + : State.recordForwardPHI(InVal, Lo, Hi, I); + Lo->addIncoming(Ops.Lo, InBB); + Hi->addIncoming(Ops.Hi, InBB); + } + State.recordConverted(Phi, {Lo, Hi}); + + } else if (ZExtInst *ZExt = dyn_cast(Inst)) { + Value *Operand = ZExt->getOperand(0); + Type *OpTy = Operand->getType(); + TypePair Tys = getExpandedIntTypes(Inst->getType()); + Value *Lo, *Hi; + if (OpTy->getIntegerBitWidth() <= kChunkBits) { + Lo = IRB.CreateZExt(Operand, Tys.Lo, Twine(Name, ".lo")); + Hi = ConstantInt::get(Tys.Hi, 0); + } else { + ValuePair Ops = State.getConverted(Operand); + Lo = Ops.Lo; + Hi = IRB.CreateZExt(Ops.Hi, Tys.Hi, Twine(Name, ".hi")); + } + State.recordConverted(ZExt, {Lo, Hi}); + + } else if (TruncInst *Trunc = dyn_cast(Inst)) { + Value *Operand = Trunc->getOperand(0); + assert(shouldConvert(Operand) && "TruncInst is expandable but not its op"); + ValuePair Ops = State.getConverted(Operand); + if (!shouldConvert(Inst)) { + Value *NewInst = IRB.CreateTrunc(Ops.Lo, Trunc->getType(), Name); + State.recordConverted(Trunc, NewInst); + } else { + TypePair Tys = getExpandedIntTypes(Trunc->getType()); + assert(Tys.Lo == getExpandedIntTypes(Operand->getType()).Lo); + Value *Lo = Ops.Lo; + Value *Hi = IRB.CreateTrunc(Ops.Hi, Tys.Hi, Twine(Name, ".hi")); + State.recordConverted(Trunc, {Lo, Hi}); + } + + } else if (BinaryOperator *Binop = dyn_cast(Inst)) { + ValuePair Lhs = State.getConverted(Binop->getOperand(0)); + ValuePair Rhs = State.getConverted(Binop->getOperand(1)); + TypePair Tys = getExpandedIntTypes(Binop->getType()); + ValuePair Conv; + switch (Binop->getOpcode()) { + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + Conv = createBit(&IRB, Binop, Lhs, Rhs, Tys, Name); + break; + case Instruction::Shl: + Conv = createShl(&IRB, Binop, Lhs, Rhs, Tys, Name); + break; + case Instruction::AShr: + case Instruction::LShr: + Conv = createShr(&IRB, Binop, Lhs, Rhs, Tys, Name); + break; + case Instruction::Add: { + ValueTriple VT = + createAdd(&IRB, Lhs, Rhs, Tys, Name, /*HiCarryTy=*/nullptr); + Conv = {VT.Lo, VT.Hi}; // Ignore Hi carry. + } break; + case Instruction::Sub: + Conv = createSub(&IRB, Lhs, Rhs, Tys, Name); + break; + default: + DIE_IF(true, Binop, "Binary operator type"); + } + State.recordConverted(Binop, Conv); + + } else if (ICmpInst *ICmp = dyn_cast(Inst)) { + ValuePair Lhs = State.getConverted(ICmp->getOperand(0)); + ValuePair Rhs = State.getConverted(ICmp->getOperand(1)); + TypePair Tys = getExpandedIntTypes(ICmp->getOperand(0)->getType()); + State.recordConverted(ICmp, createICmp(&IRB, ICmp, Lhs, Rhs, Tys, Name)); + + } else if (LoadInst *Load = dyn_cast(Inst)) { + State.recordConverted(Load, createLoad(&IRB, DL, Load)); + + } else if (StoreInst *Store = dyn_cast(Inst)) { + ValuePair StoreVals = State.getConverted(Store->getValueOperand()); + State.recordConverted(Store, createStore(&IRB, DL, Store, StoreVals)); + + } else if (SelectInst *Select = dyn_cast(Inst)) { + Value *Cond = Select->getCondition(); + ValuePair True = State.getConverted(Select->getTrueValue()); + ValuePair False = State.getConverted(Select->getFalseValue()); + Value *Lo = IRB.CreateSelect(Cond, True.Lo, False.Lo, Twine(Name, ".lo")); + Value *Hi = IRB.CreateSelect(Cond, True.Hi, False.Hi, Twine(Name, ".hi")); + State.recordConverted(Select, {Lo, Hi}); + + } else if (BitCastInst *BitCast = dyn_cast(Inst)) { + // XXX EMSCRIPTEN handle bitcast <4 x i32|float> or <2 x double> to i128 + Value *Input = BitCast->getOperand(0); + if (!Input->getType()->isVectorTy()) { + return; // we can't do anything for it, but see below on trivial casts to i128 and back, it might get handled there + } + VectorType *VT = cast(Input->getType()); + Type *ET = VT->getElementType(); + + // handle trivial casts to i128 and immediately back + if (BitCast->hasOneUse()) { + User* U = *BitCast->user_begin(); + if (BitCastInst *UserBitCast = dyn_cast(U)) { + if (UserBitCast->getType()->isVectorTy()) { + Value* Direct = Input; + if (VT != UserBitCast->getType()) { + Direct = IRB.CreateBitCast(Direct, UserBitCast->getType(), Twine(Name, "dcast")); + } + State.recordToErase(BitCast); + State.recordConverted(UserBitCast, Direct); + return; + } + } + } + + Type *I32 = Type::getInt32Ty(VT->getContext()); + + if (VT->getNumElements() == 4) { + assert(ET->isIntegerTy(32) || ET->isFloatTy()); + if (ET->isFloatTy()) { + Input = IRB.CreateBitCast(Input, VectorType::get(I32, 4), Twine(Name, "toint")); + } + } else if (VT->getNumElements() == 2) { + assert(ET->isDoubleTy()); + Input = IRB.CreateBitCast(Input, VectorType::get(I32, 4), Twine(Name, "toint")); + } else { + DIE_IF(true, Inst, "BitCast Instruction"); + } + + Value *P0 = IRB.CreateExtractElement(Input, ConstantInt::get(I32, 0), Twine(Name, ".p0")); + Value *P1 = IRB.CreateExtractElement(Input, ConstantInt::get(I32, 1), Twine(Name, ".p1")); + Value *P2 = IRB.CreateExtractElement(Input, ConstantInt::get(I32, 2), Twine(Name, ".p2")); + Value *P3 = IRB.CreateExtractElement(Input, ConstantInt::get(I32, 3), Twine(Name, ".p3")); + + Type *I64 = Type::getInt64Ty(VT->getContext()); + P0 = IRB.CreateZExt(P0, I64, Twine(Name, ".p0.64")); + P1 = IRB.CreateZExt(P1, I64, Twine(Name, ".p1.64")); + P2 = IRB.CreateZExt(P2, I64, Twine(Name, ".p2.64")); + P3 = IRB.CreateZExt(P3, I64, Twine(Name, ".p3.64")); + + Value *Lo = IRB.CreateBinOp(Instruction::BinaryOps::Or, P0, IRB.CreateBinOp(Instruction::BinaryOps::Shl, P1, ConstantInt::get(I64, 32), Twine(Name, ".mid.lo")), Twine(Name, ".lo")); + Value *Hi = IRB.CreateBinOp(Instruction::BinaryOps::Or, P2, IRB.CreateBinOp(Instruction::BinaryOps::Shl, P3, ConstantInt::get(I64, 32), Twine(Name, ".mid.hi")), Twine(Name, ".hi")); + State.recordConverted(BitCast, {Lo, Hi}); + + } else { + DIE_IF(true, Inst, "Instruction"); + } +} + +bool ExpandLargeIntegers::runOnFunction(Function &F) { + // Don't support changing the function arguments. Illegal function arguments + // should not be generated by clang. + for (const Argument &Arg : F.args()) + if (shouldConvert(&Arg)) + report_fatal_error("Function " + F.getName() + + " has illegal integer argument"); + + // TODO(jfb) This should loop to handle nested forward PHIs. + + ConversionState State; + DataLayout DL(F.getParent()); + bool Modified = false; + ReversePostOrderTraversal RPOT(&F); + for (ReversePostOrderTraversal::rpo_iterator FI = RPOT.begin(), + FE = RPOT.end(); + FI != FE; ++FI) { + BasicBlock *BB = *FI; + for (Instruction &I : *BB) { + // Only attempt to convert an instruction if its result or any of its + // operands are illegal. + bool ShouldConvert = shouldConvert(&I); + for (Value *Op : I.operands()) + ShouldConvert |= shouldConvert(Op); + if (ShouldConvert) { + convertInstruction(&I, State, DL); + Modified = true; + } + } + } + State.patchForwardPHIs(); + State.eraseReplacedInstructions(); + return Modified; +} + +FunctionPass *llvm::createExpandLargeIntegersPass() { + return new ExpandLargeIntegers(); +} diff --git a/lib/Target/JSBackend/NaCl/ExpandShuffleVector.cpp b/lib/Target/JSBackend/NaCl/ExpandShuffleVector.cpp new file mode 100644 index 000000000000..7212668216bd --- /dev/null +++ b/lib/Target/JSBackend/NaCl/ExpandShuffleVector.cpp @@ -0,0 +1,110 @@ +//===- ExpandShuffleVector.cpp - shufflevector to {insert/extract}element -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Replace all shufflevector instructions by insertelement / extractelement. +// BackendCanonicalize is able to reconstruct the shufflevector. +// +//===----------------------------------------------------------------------===// + +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/NaCl.h" + +using namespace llvm; + +namespace { +class ExpandShuffleVector : public BasicBlockPass { +public: + static char ID; // Pass identification, replacement for typeid + ExpandShuffleVector() : BasicBlockPass(ID), M(0) { + initializeExpandShuffleVectorPass(*PassRegistry::getPassRegistry()); + } + using BasicBlockPass::doInitialization; + bool doInitialization(Module &Mod) override { + M = &Mod; + return false; // Unchanged. + } + bool runOnBasicBlock(BasicBlock &BB) override; + +private: + const Module *M; + void Expand(ShuffleVectorInst *Shuf, Type *Int32); +}; +} + +char ExpandShuffleVector::ID = 0; +INITIALIZE_PASS( + ExpandShuffleVector, "expand-shufflevector", + "Expand shufflevector instructions into insertelement and extractelement", + false, false) + +void ExpandShuffleVector::Expand(ShuffleVectorInst *Shuf, Type *Int32) { + Value *L = Shuf->getOperand(0); + Value *R = Shuf->getOperand(1); + assert(L->getType() == R->getType()); + VectorType *SrcVecTy = cast(L->getType()); + VectorType *DstVecTy = Shuf->getType(); + Type *ElemTy = DstVecTy->getElementType(); + SmallVector Mask = Shuf->getShuffleMask(); + unsigned NumSrcElems = SrcVecTy->getNumElements(); + unsigned NumDstElems = Mask.size(); + + // Start with an undefined vector, extract each element from either L + // or R according to the Mask, and insert it into contiguous element + // locations in the result vector. + // + // The sources for shufflevector must have the same type but the + // destination could be a narrower or wider vector with the same + // element type. + Instruction *ExtractLoc = Shuf; + Value *Res = UndefValue::get(DstVecTy); + for (unsigned Elem = 0; Elem != NumDstElems; ++Elem) { + bool IsUndef = + 0 > Mask[Elem] || static_cast(Mask[Elem]) >= NumSrcElems * 2; + bool IsL = static_cast(Mask[Elem]) < NumSrcElems; + Value *From = IsL ? L : R; + int Adjustment = IsL ? 0 : NumSrcElems; + Constant *ExtractIdx = ConstantInt::get(Int32, Mask[Elem] - Adjustment); + Constant *InsertIdx = ConstantInt::get(Int32, Elem); + Value *ElemToInsert = IsUndef ? UndefValue::get(ElemTy) + : (Value *)ExtractElementInst::Create( + From, ExtractIdx, "", ExtractLoc); + Res = InsertElementInst::Create(Res, ElemToInsert, InsertIdx, "", Shuf); + if (ExtractLoc == Shuf) + // All the extracts should be added just before the first insert we added. + ExtractLoc = cast(Res); + } + + Shuf->replaceAllUsesWith(Res); + Shuf->eraseFromParent(); +} + +bool ExpandShuffleVector::runOnBasicBlock(BasicBlock &BB) { + Type *Int32 = Type::getInt32Ty(M->getContext()); + typedef SmallVector Instructions; + Instructions Shufs; + + for (BasicBlock::iterator BBI = BB.begin(); BBI != BB.end(); ++BBI) + if (ShuffleVectorInst *S = dyn_cast(&*BBI)) + Shufs.push_back(S); + + for (Instructions::iterator S = Shufs.begin(), E = Shufs.end(); S != E; ++S) + Expand(*S, Int32); + + return !Shufs.empty(); +} + +BasicBlockPass *llvm::createExpandShuffleVectorPass() { + return new ExpandShuffleVector(); +} diff --git a/lib/Target/JSBackend/NaCl/ExpandSmallArguments.cpp b/lib/Target/JSBackend/NaCl/ExpandSmallArguments.cpp new file mode 100644 index 000000000000..a2c58034568d --- /dev/null +++ b/lib/Target/JSBackend/NaCl/ExpandSmallArguments.cpp @@ -0,0 +1,250 @@ +//===- ExpandSmallArguments.cpp - Expand out arguments smaller than i32----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// LLVM IR allows function return types and argument types such as +// "zeroext i8" and "signext i8". The Language Reference says that +// zeroext "indicates to the code generator that the parameter or +// return value should be zero-extended to the extent required by the +// target's ABI (which is usually 32-bits, but is 8-bits for a i1 on +// x86-64) by the caller (for a parameter) or the callee (for a return +// value)". +// +// This can lead to non-portable behaviour when calling functions +// without C prototypes or with wrong C prototypes. +// +// In order to remove this non-portability from PNaCl, and to simplify +// the language that the PNaCl translator accepts, the +// ExpandSmallArguments pass widens integer arguments and return types +// to be at least 32 bits. The pass inserts explicit cast +// instructions (ZExtInst/SExtInst/TruncInst) as needed. +// +// The pass chooses between ZExtInst and SExtInst widening based on +// whether a "signext" attribute is present. However, in principle +// the pass could always use zero-extension, because the extent to +// which either zero-extension or sign-extension is done is up to the +// target ABI, which is up to PNaCl to specify. +// +//===----------------------------------------------------------------------===// + +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/NaCl.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" + +using namespace llvm; + +namespace { + // This is a ModulePass because the pass recreates functions in + // order to change their arguments' types. + class ExpandSmallArguments : public ModulePass { + public: + static char ID; // Pass identification, replacement for typeid + ExpandSmallArguments() : ModulePass(ID) { + initializeExpandSmallArgumentsPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnModule(Module &M); + }; +} + +char ExpandSmallArguments::ID = 0; +INITIALIZE_PASS(ExpandSmallArguments, "expand-small-arguments", + "Expand function arguments to be at least 32 bits in size", + false, false) + +// Returns the normalized version of the given argument/return type. +static Type *NormalizeType(Type *Ty) { + if (IntegerType *IntTy = dyn_cast(Ty)) { + if (IntTy->getBitWidth() < 32) { + return IntegerType::get(Ty->getContext(), 32); + } + } + return Ty; +} + +// Returns the normalized version of the given function type. +static FunctionType *NormalizeFunctionType(FunctionType *FTy) { + if (FTy->isVarArg()) { + report_fatal_error( + "ExpandSmallArguments does not handle varargs functions"); + } + SmallVector ArgTypes; + for (unsigned I = 0; I < FTy->getNumParams(); ++I) { + ArgTypes.push_back(NormalizeType(FTy->getParamType(I))); + } + return FunctionType::get(NormalizeType(FTy->getReturnType()), + ArgTypes, false); +} + +// Convert the given function to use normalized argument/return types. +static bool ConvertFunction(Function *Func) { + FunctionType *FTy = Func->getFunctionType(); + FunctionType *NFTy = NormalizeFunctionType(FTy); + if (NFTy == FTy) + return false; // No change needed. + Function *NewFunc = RecreateFunction(Func, NFTy); + + // Move the arguments across to the new function. + for (Function::arg_iterator I = Func->arg_begin(), E = Func->arg_end(), + NewI = NewFunc->arg_begin(); + I != E; ++I, ++NewI) { + auto Arg = &*I; + auto NewArg = &*NewI; + NewArg->takeName(Arg); + if (Arg->getType() == NewArg->getType()) { + Arg->replaceAllUsesWith(NewArg); + } else { + Instruction *Trunc = new TruncInst( + NewArg, Arg->getType(), NewArg->getName() + ".arg_trunc", + &*NewFunc->getEntryBlock().getFirstInsertionPt()); + Arg->replaceAllUsesWith(Trunc); + } + } + + if (FTy->getReturnType() != NFTy->getReturnType()) { + // Fix up return instructions. + Instruction::CastOps CastType = + Func->getAttributes().hasAttribute(0, Attribute::SExt) ? + Instruction::SExt : Instruction::ZExt; + for (Function::iterator BB = NewFunc->begin(), E = NewFunc->end(); + BB != E; + ++BB) { + for (BasicBlock::iterator Iter = BB->begin(), E = BB->end(); + Iter != E; ) { + Instruction *Inst = &*Iter++; + if (ReturnInst *Ret = dyn_cast(Inst)) { + Value *Ext = CopyDebug( + CastInst::Create(CastType, Ret->getReturnValue(), + NFTy->getReturnType(), + Ret->getReturnValue()->getName() + ".ret_ext", + Ret), + Ret); + CopyDebug(ReturnInst::Create(Ret->getContext(), Ext, Ret), Ret); + Ret->eraseFromParent(); + } + } + } + } + + Func->eraseFromParent(); + return true; +} + +// Convert the given call to use normalized argument/return types. +template static bool ConvertCall(T *Call, Pass *P) { + // Don't try to change calls to intrinsics. + if (isa(Call)) + return false; + FunctionType *FTy = cast( + Call->getCalledValue()->getType()->getPointerElementType()); + FunctionType *NFTy = NormalizeFunctionType(FTy); + if (NFTy == FTy) + return false; // No change needed. + + // Convert arguments. + SmallVector Args; + for (unsigned I = 0; I < Call->getNumArgOperands(); ++I) { + Value *Arg = Call->getArgOperand(I); + if (NFTy->getParamType(I) != FTy->getParamType(I)) { + Instruction::CastOps CastType = + Call->getAttributes().hasAttribute(I + 1, Attribute::SExt) ? + Instruction::SExt : Instruction::ZExt; + Arg = CopyDebug(CastInst::Create(CastType, Arg, NFTy->getParamType(I), + "arg_ext", Call), Call); + } + Args.push_back(Arg); + } + Value *CastFunc = + CopyDebug(new BitCastInst(Call->getCalledValue(), NFTy->getPointerTo(), + Call->getName() + ".arg_cast", Call), Call); + Value *Result = NULL; + if (CallInst *OldCall = dyn_cast(Call)) { + CallInst *NewCall = CopyDebug(CallInst::Create(CastFunc, Args, "", OldCall), + OldCall); + NewCall->takeName(OldCall); + NewCall->setAttributes(OldCall->getAttributes()); + NewCall->setCallingConv(OldCall->getCallingConv()); + NewCall->setTailCall(OldCall->isTailCall()); + Result = NewCall; + + if (FTy->getReturnType() != NFTy->getReturnType()) { + Result = CopyDebug(new TruncInst(NewCall, FTy->getReturnType(), + NewCall->getName() + ".ret_trunc", Call), + Call); + } + } else if (InvokeInst *OldInvoke = dyn_cast(Call)) { + BasicBlock *Parent = OldInvoke->getParent(); + BasicBlock *NormalDest = OldInvoke->getNormalDest(); + BasicBlock *UnwindDest = OldInvoke->getUnwindDest(); + + if (FTy->getReturnType() != NFTy->getReturnType()) { + if (BasicBlock *SplitDest = SplitCriticalEdge(Parent, NormalDest)) { + NormalDest = SplitDest; + } + } + + InvokeInst *New = CopyDebug(InvokeInst::Create(CastFunc, NormalDest, + UnwindDest, Args, + "", OldInvoke), + OldInvoke); + New->takeName(OldInvoke); + + if (FTy->getReturnType() != NFTy->getReturnType()) { + Result = CopyDebug(new TruncInst(New, FTy->getReturnType(), + New->getName() + ".ret_trunc", + NormalDest->getTerminator()), + OldInvoke); + } else { + Result = New; + } + + New->setAttributes(OldInvoke->getAttributes()); + New->setCallingConv(OldInvoke->getCallingConv()); + } + Call->replaceAllUsesWith(Result); + Call->eraseFromParent(); + return true; +} + +bool ExpandSmallArguments::runOnModule(Module &M) { + bool Changed = false; + for (Module::iterator Iter = M.begin(), E = M.end(); Iter != E; ) { + Function *Func = &*Iter++; + // Don't try to change intrinsic declarations because intrinsics + // will continue to have non-normalized argument types. For + // example, memset() takes an i8 argument. It shouldn't matter + // whether we modify the types of other function declarations, but + // we don't expect to see non-intrinsic function declarations in a + // PNaCl pexe. + if (Func->empty()) + continue; + + for (Function::iterator BB = Func->begin(), E = Func->end(); BB != E; + ++BB) { + for (BasicBlock::iterator Iter = BB->begin(), E = BB->end(); Iter != E;) { + Instruction *Inst = &*Iter++; + if (CallInst *Call = dyn_cast(Inst)) { + Changed |= ConvertCall(Call, this); + } else if (InvokeInst *Invoke = dyn_cast(Inst)) { + Changed |= ConvertCall(Invoke, this); + } + } + } + + Changed |= ConvertFunction(Func); + } + return Changed; +} + +ModulePass *llvm::createExpandSmallArgumentsPass() { + return new ExpandSmallArguments(); +} diff --git a/lib/Target/JSBackend/NaCl/ExpandStructRegs.cpp b/lib/Target/JSBackend/NaCl/ExpandStructRegs.cpp new file mode 100644 index 000000000000..4922a38308e0 --- /dev/null +++ b/lib/Target/JSBackend/NaCl/ExpandStructRegs.cpp @@ -0,0 +1,572 @@ +//===- ExpandStructRegs.cpp - Expand out variables with struct type--------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass expands out some uses of LLVM variables +// (a.k.a. registers) of struct type. It replaces loads and stores of +// structs with separate loads and stores of the structs' fields. The +// motivation is to omit struct types from PNaCl's stable ABI. +// +// ExpandStructRegs does not yet handle all possible uses of struct +// values. It is intended to handle the uses that Clang and the SROA +// pass generate. Clang generates struct loads and stores, along with +// extractvalue instructions, in its implementation of C++ method +// pointers, and the SROA pass sometimes converts this code to using +// insertvalue instructions too. +// +// ExpandStructRegs does not handle: +// +// * Array types. +// * Function types containing arguments or return values of struct +// type without the "byval" or "sret" attributes. Since by-value +// struct-passing generally uses "byval"/"sret", this does not +// matter. +// +// Other limitations: +// +// * ExpandStructRegs does not attempt to use memcpy() where that +// might be more appropriate than copying fields individually. +// * ExpandStructRegs does not preserve the contents of padding +// between fields when copying structs. However, the contents of +// padding fields are not defined anyway. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/NaCl.h" + +#define DEBUG_TYPE "expand-struct-regs" + +using namespace llvm; + +namespace { +struct ExpandStructRegs : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + ExpandStructRegs() : FunctionPass(ID) { + initializeExpandStructRegsPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnFunction(Function &F); +}; +} + +char ExpandStructRegs::ID = 0; +INITIALIZE_PASS(ExpandStructRegs, "expand-struct-regs", + "Expand out variables with struct types", false, false) + +static bool DoAnotherPass(Type *Ty) { return isa(Ty); } +static bool DoAnotherPass(Value *V) { return DoAnotherPass(V->getType()); } + +static bool SplitUpPHINode(PHINode *Phi) { + StructType *STy = cast(Phi->getType()); + + Value *NewStruct = UndefValue::get(STy); + Instruction *NewStructInsertPt = &*Phi->getParent()->getFirstInsertionPt(); + + bool NeedsAnotherPass = false; + + // Create a separate PHINode for each struct field. + for (unsigned Index = 0; Index < STy->getNumElements(); ++Index) { + SmallVector EVIndexes; + EVIndexes.push_back(Index); + + Type *ElemTy = STy->getElementType(Index); + NeedsAnotherPass = NeedsAnotherPass || DoAnotherPass(ElemTy); + + PHINode *NewPhi = PHINode::Create(ElemTy, Phi->getNumIncomingValues(), + Phi->getName() + ".index", Phi); + CopyDebug(NewPhi, Phi); + for (unsigned PhiIndex = 0; PhiIndex < Phi->getNumIncomingValues(); + ++PhiIndex) { + BasicBlock *IncomingBB = Phi->getIncomingBlock(PhiIndex); + Value *EV = CopyDebug( + ExtractValueInst::Create(Phi->getIncomingValue(PhiIndex), EVIndexes, + Phi->getName() + ".extract", + IncomingBB->getTerminator()), + Phi); + NewPhi->addIncoming(EV, IncomingBB); + } + + // Reconstruct the original struct value. + NewStruct = CopyDebug(InsertValueInst::Create(NewStruct, NewPhi, EVIndexes, + Phi->getName() + ".insert", + NewStructInsertPt), + Phi); + } + Phi->replaceAllUsesWith(NewStruct); + Phi->eraseFromParent(); + + return NeedsAnotherPass; +} + +static bool SplitUpSelect(SelectInst *Select) { + StructType *STy = cast(Select->getType()); + Value *NewStruct = UndefValue::get(STy); + + bool NeedsAnotherPass = false; + // Create a separate SelectInst for each struct field. + for (unsigned Index = 0; Index < STy->getNumElements(); ++Index) { + SmallVector EVIndexes; + EVIndexes.push_back(Index); + + Value *TrueVal = CopyDebug( + ExtractValueInst::Create(Select->getTrueValue(), EVIndexes, + Select->getName() + ".extract", Select), + Select); + Value *FalseVal = CopyDebug( + ExtractValueInst::Create(Select->getFalseValue(), EVIndexes, + Select->getName() + ".extract", Select), + Select); + Value *NewSelect = + CopyDebug(SelectInst::Create(Select->getCondition(), TrueVal, FalseVal, + Select->getName() + ".index", Select), + Select); + + NeedsAnotherPass = NeedsAnotherPass || DoAnotherPass(NewSelect); + + // Reconstruct the original struct value. + NewStruct = CopyDebug( + InsertValueInst::Create(NewStruct, NewSelect, EVIndexes, + Select->getName() + ".insert", Select), + Select); + } + Select->replaceAllUsesWith(NewStruct); + Select->eraseFromParent(); + + return NeedsAnotherPass; +} + +template +static void ProcessLoadOrStoreAttrs(InstType *Dest, InstType *Src, + StructType* STy, const unsigned Index, + const DataLayout *DL) { + CopyDebug(Dest, Src); + Dest->setVolatile(Src->isVolatile()); + if (Src->isAtomic()) { + errs() << "Use: " << *Src << "\n"; + report_fatal_error("Atomic struct loads/stores not supported"); + } + + if (!Src->getAlignment()) { + return; + } + + const StructLayout *SL = DL->getStructLayout(STy); + const unsigned Alignment = Src->getAlignment(); + Dest->setAlignment(MinAlign(Alignment, SL->getElementOffset(Index))); +} + +template +static void ProcessArrayLoadOrStoreAttrs(InstType *Dest, InstType *Src, + ArrayType* ATy, const unsigned Index, + const DataLayout *DL) { + CopyDebug(Dest, Src); + Dest->setVolatile(Src->isVolatile()); + if (Src->isAtomic()) { + errs() << "Use: " << *Src << "\n"; + report_fatal_error("Atomic struct loads/stores not supported"); + } + + if (!Src->getAlignment()) { + return; + } + + const unsigned Alignment = Src->getAlignment(); + Dest->setAlignment(MinAlign(Alignment, Index * DL->getTypeSizeInBits(ATy->getElementType()))); +} + +static bool SplitUpStore(StoreInst *Store, const DataLayout *DL) { + StructType *STy = cast(Store->getValueOperand()->getType()); + + bool NeedsAnotherPass = false; + // Create a separate store instruction for each struct field. + for (unsigned Index = 0; Index < STy->getNumElements(); ++Index) { + SmallVector Indexes; + Indexes.push_back(ConstantInt::get(Store->getContext(), APInt(32, 0))); + Indexes.push_back(ConstantInt::get(Store->getContext(), APInt(32, Index))); + Value *GEP = + CopyDebug(GetElementPtrInst::Create( + STy, + Store->getPointerOperand(), Indexes, + Store->getPointerOperand()->getName() + ".index", Store), + Store); + NeedsAnotherPass = + NeedsAnotherPass || DoAnotherPass(GEP->getType()->getContainedType(0)); + + SmallVector EVIndexes; + EVIndexes.push_back(Index); + Value *Field = ExtractValueInst::Create(Store->getValueOperand(), EVIndexes, + "", Store); + StoreInst *NewStore = new StoreInst(Field, GEP, Store); + ProcessLoadOrStoreAttrs(NewStore, Store, STy, Index, DL); + } + Store->eraseFromParent(); + + return NeedsAnotherPass; +} + +static bool SplitUpLoad(LoadInst *Load, const DataLayout *DL) { + StructType *STy = cast(Load->getType()); + Value *NewStruct = UndefValue::get(STy); + + bool NeedsAnotherPass = false; + // Create a separate load instruction for each struct field. + for (unsigned Index = 0; Index < STy->getNumElements(); ++Index) { + SmallVector Indexes; + Indexes.push_back(ConstantInt::get(Load->getContext(), APInt(32, 0))); + Indexes.push_back(ConstantInt::get(Load->getContext(), APInt(32, Index))); + Value *GEP = + CopyDebug(GetElementPtrInst::Create(STy, + Load->getPointerOperand(), Indexes, + Load->getName() + ".index", Load), + Load); + LoadInst *NewLoad = new LoadInst(GEP, Load->getName() + ".field", Load); + + NeedsAnotherPass = NeedsAnotherPass || DoAnotherPass(NewLoad); + ProcessLoadOrStoreAttrs(NewLoad, Load, STy, Index, DL); + + // Reconstruct the struct value. + SmallVector EVIndexes; + EVIndexes.push_back(Index); + NewStruct = + CopyDebug(InsertValueInst::Create(NewStruct, NewLoad, EVIndexes, + Load->getName() + ".insert", Load), + Load); + } + Load->replaceAllUsesWith(NewStruct); + Load->eraseFromParent(); + + return NeedsAnotherPass; +} + +static bool SplitUpArrayStore(StoreInst *Store, const DataLayout *DL) { + ArrayType *ATy = cast(Store->getValueOperand()->getType()); + + bool NeedsAnotherPass = false; + // Create a separate store instruction for each struct field. + for (unsigned Index = 0; Index < ATy->getNumElements(); ++Index) { + SmallVector Indexes; + Indexes.push_back(ConstantInt::get(Store->getContext(), APInt(32, 0))); + Indexes.push_back(ConstantInt::get(Store->getContext(), APInt(32, Index))); + Value *GEP = + CopyDebug(GetElementPtrInst::Create( + ATy, + Store->getPointerOperand(), Indexes, + Store->getPointerOperand()->getName() + ".index", Store), + Store); + NeedsAnotherPass = + NeedsAnotherPass || DoAnotherPass(GEP->getType()->getContainedType(0)); + + SmallVector EVIndexes; + EVIndexes.push_back(Index); + Value *Field = ExtractValueInst::Create(Store->getValueOperand(), EVIndexes, + "", Store); + StoreInst *NewStore = new StoreInst(Field, GEP, Store); + ProcessArrayLoadOrStoreAttrs(NewStore, Store, ATy, Index, DL); + } + Store->eraseFromParent(); + + return NeedsAnotherPass; +} + +static bool SplitUpArrayLoad(LoadInst *Load, const DataLayout *DL) { + ArrayType *ATy = cast(Load->getType()); + Value *NewStruct = UndefValue::get(ATy); + + bool NeedsAnotherPass = false; + // Create a separate load instruction for each struct field. + for (unsigned Index = 0; Index < ATy->getNumElements(); ++Index) { + SmallVector Indexes; + Indexes.push_back(ConstantInt::get(Load->getContext(), APInt(32, 0))); + Indexes.push_back(ConstantInt::get(Load->getContext(), APInt(32, Index))); + Value *GEP = + CopyDebug(GetElementPtrInst::Create(ATy, + Load->getPointerOperand(), Indexes, + Load->getName() + ".index", Load), + Load); + LoadInst *NewLoad = new LoadInst(GEP, Load->getName() + ".field", Load); + + NeedsAnotherPass = NeedsAnotherPass || DoAnotherPass(NewLoad); + ProcessArrayLoadOrStoreAttrs(NewLoad, Load, ATy, Index, DL); + + // Reconstruct the struct value. + SmallVector EVIndexes; + EVIndexes.push_back(Index); + NewStruct = + CopyDebug(InsertValueInst::Create(NewStruct, NewLoad, EVIndexes, + Load->getName() + ".insert", Load), + Load); + } + Load->replaceAllUsesWith(NewStruct); + Load->eraseFromParent(); + + return NeedsAnotherPass; +} + +static bool ExpandExtractValue(ExtractValueInst *EV, + SmallVectorImpl *ToErase) { + // Search for the insertvalue instruction that inserts the struct field + // referenced by this extractvalue instruction, excluding CmpXchg which + // returns a struct and is handled by RewriteAtomics. + Value *StructVal = EV->getAggregateOperand(); + Value *ResultField = nullptr; + + // The current depth of the search. It's impossible to backtrack in our search + // tree (all prior (not in the CFG sense) extractvalues will already be + // expanded), so this variable is never reset to zero. + size_t EVIndex = 0; + + // Some intrinsics and cmpxchg returns struct vals and this pass can't do + // anything but ignore them. + if (isa(StructVal) || isa(StructVal)) + return false; + + for (;;) { + DEBUG(dbgs() << "Expanding struct value: " << *StructVal << "\n"); + + if (InsertValueInst *IV = dyn_cast(StructVal)) { + + size_t IVIndex = 0; + for (; EVIndex < EV->getIndices().size() && + IVIndex < IV->getIndices().size(); + ++IVIndex, ++EVIndex) { + + const bool Equal = + (EV->getIndices()[EVIndex] == IV->getIndices()[IVIndex]); + + if (IVIndex + 1 == IV->getIndices().size() && Equal) { + if (EVIndex + 1 == EV->getIndices().size()) { + // Exact match. We break out of all loops and ResultField will + // replace EV. + ResultField = IV->getInsertedValueOperand(); + } else { + // We've found a match, but haven't reached the end of EV's indexes. + // We continue looping through the outermost loop, and search for + // indices on the next level down (ie we increment EVIndex). + // This branch is common when encountering nested insertvalues; for + // example: + // ```llvm + // %1 = insertvalue { i32 } undef, i32 1, 0 + // %2 = insertvalue { { i32 } } %1, { i32 } %1, 0 + // %3 = extractvalue { { i32 } } %2, 0, 0 + // ``` + StructVal = IV->getInsertedValueOperand(); + ++EVIndex; + } + break; + } else if (!Equal) { + // No match. Try the next struct value in the chain. + // For example: + // ```llvm + // %1 = insertvalue { i32, i32, i32 } undef, i32 5, 0 + // %2 = insertvalue { i32, i32, i32 } %1, i32 10, 1 + // %3 = insertvalue { i32, i32, i32 } %2, i32 15, 2 + // %4 = extractvalue { i32, i32, i32 } %3, 0 + // ``` + // In this case, to expand %4, this branch will hit insertvalues %3 + // and %2 before + // it finds the solution, %1. + StructVal = IV->getAggregateOperand(); + break; + } + + // One last case worth mentioning: + // ```llvm + // %aa = alloca { i32 } + // %a = insertvalue { i32 } undef, i32 1, 0 + // %b = insertvalue { { i32 } } undef, { i32 } %a, 0 + // %c = extractvalue { { i32 } } %b, 0 + // store { i32 } %c, { i32 }* %aa + // ``` + // In the case of %c, the condition of our inner loop will be false, and + // we will fall into (EVIndex == EV->getIndices().size()) + // Note that in this case, SplitStore will have inserted an extra + // extractvalue and GEP: + // ```llvm + // %aa = alloca { i32 } + // %a = insertvalue { i32 } undef, i32 1, 0 + // %b = insertvalue { { i32 } } undef, { i32 } %a, 0 + // %c.extractval = extractvalue { i32 } %a, 0 + // %aa.index = getelementptr { i32 }* %aa, i32 0, i32 0 + // store i32 %c, i32* %aa.index + // ``` + } + if (ResultField) { + // \O/ We're done with this ExtractValueInst! + break; + } else if (EVIndex == EV->getIndices().size()) { + // We've found an insertvalue that inserts at one or more levels deeper + // than this extractvalue. For example (borrowed from the tests), where + // %h is EV && %e is IV: + // ```llvm + // %e = insertvalue { { { i32, i64 } }, i64 } undef, { i32, i64 } %b, 0, 0 + // %h = extractvalue { { { i32, i64 } }, i64 } %e, 0 + // ; later on.. + // %1 = extractvalue { { i32, i64 } } %h, 0 + // ``` + // This expands to: + // ```llvm + // %e = insertvalue { { { i32, i64 } }, i64 } undef, { i32, i64 } %b, 0, 0 + // %1 = insertvalue { { i32, i64 } } undef, { i32, i64 } %b, 0 + // %h = extractvalue { { { i32, i64 } }, i64 } %e, 0 + // %2 = extractvalue { { i32, i64 } } %h, 0 + // ``` + // Then, outside the outer loop, %h is deleted: + // ```llvm + // %e = insertvalue { { { i32, i64 } }, i64 } undef, { i32, i64 } %b, 0, 0 + // %1 = insertvalue { { i32, i64 } } undef, { i32, i64 } %b, 0 + // %2 = extractvalue { { i32, i64 } } %1, 0 + // ``` + // %2 will be expanded at a later point. + // This branch used the second index in %e to create %1 (because %2 && + // %e's first indices where equal). + // + // Additionally, it's impossible to not change StructVal && not hit this + // branch (but the reverse is not true!). + + SmallVector Indices(IV->getIndices().begin() + IVIndex, + IV->getIndices().end()); + + InsertValueInst *Insert = InsertValueInst::Create( + UndefValue::get(EV->getType()), IV->getInsertedValueOperand(), + Indices, "", EV); + ToErase->push_back(Insert); + ResultField = CopyDebug(Insert, EV); + break; + } + + // At this point, StructVal must be changed. + } else if (Constant *C = dyn_cast(StructVal)) { + SmallVector Indices(EV->getIndices().begin() + EVIndex, + EV->getIndices().end()); + ResultField = ConstantExpr::getExtractValue(C, Indices); + break; + } else if (isa(StructVal)) { + ResultField = StructVal; + break; + } else { + errs() << "Value: " << *StructVal << "\n"; + report_fatal_error("Unrecognized struct value"); + } + } + + assert(ResultField); // Failsafe. + EV->replaceAllUsesWith(ResultField); + EV->eraseFromParent(); + return true; +} + +static bool ExpandExtractValues(Function &Func, bool Finalize) { + bool Changed = false; + + SmallVector ToErase; + // Expand out all the extractvalue instructions. Also collect up + // the insertvalue instructions for later deletion so that we do not + // need to make extra passes across the whole function. + + for (auto &BB : Func) { + for (BasicBlock::iterator Iter = BB.begin(), E = BB.end(); Iter != E;) { + Instruction *Inst = &*Iter++; + if (ExtractValueInst *EV = dyn_cast(Inst)) { + Changed |= ExpandExtractValue(EV, &ToErase); + } else if (isa(Inst)) { + ToErase.push_back(Inst); + Changed = true; + } + } + } + + if (Finalize) { + // Delete the insertvalue instructions. These can reference each + // other, so we must do dropAllReferences() before doing + // eraseFromParent(), otherwise we will try to erase instructions + // that are still referenced. + for (Instruction *I : ToErase) { + I->dropAllReferences(); + } + + for (Instruction *I : ToErase) { + I->eraseFromParent(); + } + } + + return Changed; +} + +bool ExpandStructRegs::runOnFunction(Function &Func) { + bool Changed = false; + const DataLayout *DL = &Func.getParent()->getDataLayout(); + + auto SplitUpInstructions = [&]() { + bool NeedsAnotherPass; + do { + NeedsAnotherPass = false; + // Split up aggregate loads, stores and phi nodes into operations on + // scalar types. This inserts extractvalue and insertvalue + // instructions which we will expand out later. + for (Function::iterator BB = Func.begin(), E = Func.end(); BB != E; ++BB) { + for (BasicBlock::iterator Iter = BB->begin(), E = BB->end(); Iter != E;) { + Instruction *Inst = &*Iter++; + if (StoreInst *Store = dyn_cast(Inst)) { + if (Store->getValueOperand()->getType()->isStructTy()) { + NeedsAnotherPass |= SplitUpStore(Store, DL); + Changed = true; + } else if (Store->getValueOperand()->getType()->isArrayTy()) { + NeedsAnotherPass |= SplitUpArrayStore(Store, DL); + Changed = true; + } + } else if (LoadInst *Load = dyn_cast(Inst)) { + if (Load->getType()->isStructTy()) { + NeedsAnotherPass |= SplitUpLoad(Load, DL); + Changed = true; + } else if (Load->getType()->isArrayTy()) { + NeedsAnotherPass |= SplitUpArrayLoad(Load, DL); + Changed = true; + } + } else if (PHINode *Phi = dyn_cast(Inst)) { + if (Phi->getType()->isStructTy()) { + NeedsAnotherPass |= SplitUpPHINode(Phi); + Changed = true; + } + } else if (SelectInst *Select = dyn_cast(Inst)) { + if (Select->getType()->isStructTy()) { + NeedsAnotherPass |= SplitUpSelect(Select); + Changed = true; + } + } + } + } + } while (NeedsAnotherPass); + }; + + SplitUpInstructions(); + Changed |= ExpandExtractValues(Func, false); + + if (Changed) { + // insertvalues that receive insertvalues may require additional splitting + // and expansion. + // TODO: do we need an arbitrary amount of such passes? + SplitUpInstructions(); + ExpandExtractValues(Func, true); + } + + return Changed; +} + +FunctionPass *llvm::createExpandStructRegsPass() { + return new ExpandStructRegs(); +} diff --git a/lib/Target/JSBackend/NaCl/ExpandTls.cpp b/lib/Target/JSBackend/NaCl/ExpandTls.cpp new file mode 100644 index 000000000000..b254672ea2a1 --- /dev/null +++ b/lib/Target/JSBackend/NaCl/ExpandTls.cpp @@ -0,0 +1,336 @@ +//===- ExpandTls.cpp - Convert TLS variables to a concrete layout----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass expands out uses of thread-local (TLS) variables into +// more primitive operations. +// +// A reference to the address of a TLS variable is expanded into code +// which gets the current thread's thread pointer using +// @llvm.nacl.read.tp() and adds a fixed offset. +// +// This pass allocates the offsets (relative to the thread pointer) +// that will be used for TLS variables. It sets up the global +// variables __tls_template_start, __tls_template_end etc. to contain +// a template for initializing TLS variables' values for each thread. +// This is a task normally performed by the linker in ELF systems. +// +//===----------------------------------------------------------------------===// + +#include + +#include "llvm/Pass.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" +#include "llvm/Transforms/NaCl.h" + +using namespace llvm; + +namespace { + struct VarInfo { + GlobalVariable *TlsVar; + bool IsBss; // Whether variable is in zero-intialized part of template + int TemplateIndex; + }; + + class PassState { + public: + PassState(Module *M): M(M), DL(M), Offset(0), Alignment(1) {} + + Module *M; + DataLayout DL; + uint64_t Offset; + // 'Alignment' is the maximum variable alignment seen so far, in + // bytes. After visiting all TLS variables, this is the overall + // alignment required for the TLS template. + uint32_t Alignment; + }; + + class ExpandTls : public ModulePass { + public: + static char ID; // Pass identification, replacement for typeid + ExpandTls() : ModulePass(ID) { + initializeExpandTlsPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnModule(Module &M); + }; +} + +char ExpandTls::ID = 0; +INITIALIZE_PASS(ExpandTls, "nacl-expand-tls", + "Expand out TLS variables and fix TLS variable layout", + false, false) + +static void setGlobalVariableValue(Module &M, const char *Name, + Constant *Value) { + if (GlobalVariable *Var = M.getNamedGlobal(Name)) { + if (Var->hasInitializer()) { + report_fatal_error(std::string("Variable ") + Name + + " already has an initializer"); + } + Var->replaceAllUsesWith(ConstantExpr::getBitCast(Value, Var->getType())); + Var->eraseFromParent(); + } +} + +// Insert alignment padding into the TLS template. +static void padToAlignment(PassState *State, + std::vector *FieldTypes, + std::vector *FieldValues, + unsigned Alignment) { + if ((State->Offset & (Alignment - 1)) != 0) { + unsigned PadSize = Alignment - (State->Offset & (Alignment - 1)); + Type *i8 = Type::getInt8Ty(State->M->getContext()); + Type *PadType = ArrayType::get(i8, PadSize); + FieldTypes->push_back(PadType); + if (FieldValues) + FieldValues->push_back(Constant::getNullValue(PadType)); + State->Offset += PadSize; + } + if (State->Alignment < Alignment) { + State->Alignment = Alignment; + } +} + +static void addVarToTlsTemplate(PassState *State, + std::vector *FieldTypes, + std::vector *FieldValues, + GlobalVariable *TlsVar) { + unsigned Alignment = State->DL.getPreferredAlignment(TlsVar); + padToAlignment(State, FieldTypes, FieldValues, Alignment); + + FieldTypes->push_back(TlsVar->getType()->getElementType()); + if (FieldValues) + FieldValues->push_back(TlsVar->getInitializer()); + State->Offset += + State->DL.getTypeAllocSize(TlsVar->getType()->getElementType()); +} + +static StructType *buildTlsTemplate(Module &M, std::vector *TlsVars) { + std::vector FieldBssTypes; + std::vector FieldInitTypes; + std::vector FieldInitValues; + PassState State(&M); + + for (Module::global_iterator GV = M.global_begin(); + GV != M.global_end(); + ++GV) { + if (GV->isThreadLocal()) { + if (!GV->hasInitializer()) { + // Since this is a whole-program transformation, "extern" TLS + // variables are not allowed at this point. + report_fatal_error(std::string("TLS variable without an initializer: ") + + GV->getName()); + } + if (!GV->getInitializer()->isNullValue()) { + addVarToTlsTemplate(&State, &FieldInitTypes, + &FieldInitValues, &*GV); + VarInfo Info; + Info.TlsVar = &*GV; + Info.IsBss = false; + Info.TemplateIndex = FieldInitTypes.size() - 1; + TlsVars->push_back(Info); + } + } + } + // Handle zero-initialized TLS variables in a second pass, because + // these should follow non-zero-initialized TLS variables. + for (Module::global_iterator GV = M.global_begin(); + GV != M.global_end(); + ++GV) { + if (GV->isThreadLocal() && GV->getInitializer()->isNullValue()) { + addVarToTlsTemplate(&State, &FieldBssTypes, NULL, &*GV); + VarInfo Info; + Info.TlsVar = &*GV; + Info.IsBss = true; + Info.TemplateIndex = FieldBssTypes.size() - 1; + TlsVars->push_back(Info); + } + } + // Add final alignment padding so that + // (struct tls_struct *) __nacl_read_tp() - 1 + // gives the correct, aligned start of the TLS variables given the + // x86-style layout we are using. This requires some more bytes to + // be memset() to zero at runtime. This wastage doesn't seem + // important gives that we're not trying to optimize packing by + // reordering to put similarly-aligned variables together. + padToAlignment(&State, &FieldBssTypes, NULL, State.Alignment); + + // We create the TLS template structs as "packed" because we insert + // alignment padding ourselves, and LLVM's implicit insertion of + // padding would interfere with ours. tls_bss_template can start at + // a non-aligned address immediately following the last field in + // tls_init_template. + StructType *InitTemplateType = + StructType::create(M.getContext(), "tls_init_template"); + InitTemplateType->setBody(FieldInitTypes, /*isPacked=*/true); + StructType *BssTemplateType = + StructType::create(M.getContext(), "tls_bss_template"); + BssTemplateType->setBody(FieldBssTypes, /*isPacked=*/true); + + StructType *TemplateType = StructType::create(M.getContext(), "tls_struct"); + SmallVector TemplateTopFields; + TemplateTopFields.push_back(InitTemplateType); + TemplateTopFields.push_back(BssTemplateType); + TemplateType->setBody(TemplateTopFields, /*isPacked=*/true); + PointerType *TemplatePtrType = PointerType::get(TemplateType, 0); + + // We define the following symbols, which are the same as those + // defined by NaCl's original customized binutils linker scripts: + // __tls_template_start + // __tls_template_tdata_end + // __tls_template_end + // We also define __tls_template_alignment, which was not defined by + // the original linker scripts. + + const char *StartSymbol = "__tls_template_start"; + Constant *TemplateData = ConstantStruct::get(InitTemplateType, + FieldInitValues); + GlobalVariable *TemplateDataVar = + new GlobalVariable(M, InitTemplateType, /*isConstant=*/true, + GlobalValue::InternalLinkage, TemplateData); + setGlobalVariableValue(M, StartSymbol, TemplateDataVar); + TemplateDataVar->setName(StartSymbol); + + Constant *TdataEnd = ConstantExpr::getGetElementPtr( + InitTemplateType, + TemplateDataVar, + ConstantInt::get(M.getContext(), APInt(32, 1))); + setGlobalVariableValue(M, "__tls_template_tdata_end", TdataEnd); + + Constant *TotalEnd = ConstantExpr::getGetElementPtr( + TemplateType, + ConstantExpr::getBitCast(TemplateDataVar, TemplatePtrType), + ConstantInt::get(M.getContext(), APInt(32, 1))); + setGlobalVariableValue(M, "__tls_template_end", TotalEnd); + + const char *AlignmentSymbol = "__tls_template_alignment"; + Type *i32 = Type::getInt32Ty(M.getContext()); + GlobalVariable *AlignmentVar = new GlobalVariable( + M, i32, /*isConstant=*/true, + GlobalValue::InternalLinkage, + ConstantInt::get(M.getContext(), APInt(32, State.Alignment))); + setGlobalVariableValue(M, AlignmentSymbol, AlignmentVar); + AlignmentVar->setName(AlignmentSymbol); + + return TemplateType; +} + +static void rewriteTlsVars(Module &M, std::vector *TlsVars, + StructType *TemplateType) { + // Set up the intrinsic that reads the thread pointer. + Function *ReadTpFunc = Intrinsic::getDeclaration(&M, Intrinsic::nacl_read_tp); + + for (std::vector::iterator VarInfo = TlsVars->begin(); + VarInfo != TlsVars->end(); + ++VarInfo) { + GlobalVariable *Var = VarInfo->TlsVar; + while (Var->hasNUsesOrMore(1)) { + Use *U = &*Var->use_begin(); + Instruction *InsertPt = PhiSafeInsertPt(U); + Value *RawThreadPtr = CallInst::Create(ReadTpFunc, "tls_raw", InsertPt); + Value *TypedThreadPtr = new BitCastInst( + RawThreadPtr, TemplateType->getPointerTo(), "tls_struct", InsertPt); + SmallVector Indexes; + // We use -1 because we use the x86-style TLS layout in which + // the TLS data is stored at addresses below the thread pointer. + // This is largely because a check in nacl_irt_thread_create() + // in irt/irt_thread.c requires the thread pointer to be a + // self-pointer on x86-32. + // TODO(mseaborn): I intend to remove that check because it is + // non-portable. In the mean time, we want PNaCl pexes to work + // in older Chromium releases when translated to nexes. + Indexes.push_back(ConstantInt::get( + M.getContext(), APInt(32, -1))); + Indexes.push_back(ConstantInt::get( + M.getContext(), APInt(32, VarInfo->IsBss ? 1 : 0))); + Indexes.push_back(ConstantInt::get( + M.getContext(), APInt(32, VarInfo->TemplateIndex))); + Value *TlsField = GetElementPtrInst::Create( + TemplateType, TypedThreadPtr, Indexes, "field", InsertPt); + PhiSafeReplaceUses(U, TlsField); + } + VarInfo->TlsVar->eraseFromParent(); + } +} + +static void replaceFunction(Module &M, const char *Name, Value *NewFunc) { + if (Function *Func = M.getFunction(Name)) { + if (Func->hasLocalLinkage()) + return; + if (!Func->isDeclaration()) + report_fatal_error(std::string("Function already defined: ") + Name); + Func->replaceAllUsesWith(NewFunc); + Func->eraseFromParent(); + } +} + +// Provide fixed definitions for NaCl's TLS layout functions, +// __nacl_tp_*(). We adopt the x86-style layout: ExpandTls will +// output a program that uses the x86-style layout wherever it runs. +// +// This overrides the architecture-specific definitions of +// __nacl_tp_*() that PNaCl's native support code makes available to +// non-ABI-stable code. +static void defineTlsLayoutFunctions(Module &M) { + Type *i32 = Type::getInt32Ty(M.getContext()); + SmallVector ArgTypes; + ArgTypes.push_back(i32); + FunctionType *FuncType = FunctionType::get(i32, ArgTypes, /*isVarArg=*/false); + Function *NewFunc; + BasicBlock *BB; + + // Define the function as follows: + // uint32_t __nacl_tp_tdb_offset(uint32_t tdb_size) { + // return 0; + // } + // This means the thread pointer points to the TDB. + NewFunc = Function::Create(FuncType, GlobalValue::InternalLinkage, + "nacl_tp_tdb_offset", &M); + BB = BasicBlock::Create(M.getContext(), "entry", NewFunc); + ReturnInst::Create(M.getContext(), + ConstantInt::get(M.getContext(), APInt(32, 0)), BB); + replaceFunction(M, "__nacl_tp_tdb_offset", NewFunc); + + // Define the function as follows: + // uint32_t __nacl_tp_tls_offset(uint32_t tls_size) { + // return -tls_size; + // } + // This means the TLS variables are stored below the thread pointer. + NewFunc = Function::Create(FuncType, GlobalValue::InternalLinkage, + "nacl_tp_tls_offset", &M); + BB = BasicBlock::Create(M.getContext(), "entry", NewFunc); + Value *Arg = &*NewFunc->arg_begin(); + Arg->setName("size"); + Value *Result = BinaryOperator::CreateNeg(Arg, "result", BB); + ReturnInst::Create(M.getContext(), Result, BB); + replaceFunction(M, "__nacl_tp_tls_offset", NewFunc); +} + +bool ExpandTls::runOnModule(Module &M) { + ModulePass *Pass = createExpandTlsConstantExprPass(); + Pass->runOnModule(M); + delete Pass; + + std::vector TlsVars; + StructType *TemplateType = buildTlsTemplate(M, &TlsVars); + rewriteTlsVars(M, &TlsVars, TemplateType); + + defineTlsLayoutFunctions(M); + + return true; +} + +ModulePass *llvm::createExpandTlsPass() { + return new ExpandTls(); +} diff --git a/lib/Target/JSBackend/NaCl/ExpandTlsConstantExpr.cpp b/lib/Target/JSBackend/NaCl/ExpandTlsConstantExpr.cpp new file mode 100644 index 000000000000..7426ce68641e --- /dev/null +++ b/lib/Target/JSBackend/NaCl/ExpandTlsConstantExpr.cpp @@ -0,0 +1,107 @@ +//===- ExpandTlsConstantExpr.cpp - Convert ConstantExprs to Instructions---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass is a helper used by the ExpandTls pass. +// +// LLVM treats the address of a TLS variable as a ConstantExpr. This +// is arguably a bug because the address of a TLS variable is *not* a +// constant: it varies between threads. +// +// See http://llvm.org/bugs/show_bug.cgi?id=14353 +// +// This is also a problem for the ExpandTls pass, which wants to use +// replaceUsesOfWith() to replace each TLS variable with an +// Instruction sequence that calls @llvm.nacl.read.tp(). This doesn't +// work if the TLS variable is used inside other ConstantExprs, +// because ConstantExprs are interned and are not associated with any +// function, whereas each Instruction must be part of a function. +// +// To fix that problem, this pass converts ConstantExprs that +// reference TLS variables into Instructions. +// +// For example, this use of a 'ptrtoint' ConstantExpr: +// +// ret i32 ptrtoint (i32* @tls_var to i32) +// +// is converted into this 'ptrtoint' Instruction: +// +// %expanded = ptrtoint i32* @tls_var to i32 +// ret i32 %expanded +// +//===----------------------------------------------------------------------===// + +#include + +#include "llvm/Pass.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/Transforms/NaCl.h" + +using namespace llvm; + +namespace { + class ExpandTlsConstantExpr : public ModulePass { + public: + static char ID; // Pass identification, replacement for typeid + ExpandTlsConstantExpr() : ModulePass(ID) { + initializeExpandTlsConstantExprPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnModule(Module &M); + }; +} + +char ExpandTlsConstantExpr::ID = 0; +INITIALIZE_PASS(ExpandTlsConstantExpr, "nacl-expand-tls-constant-expr", + "Eliminate ConstantExpr references to TLS variables", + false, false) + +// This removes ConstantExpr references to the given Constant. +static void expandConstExpr(Constant *Expr) { + // First, ensure that ConstantExpr references to Expr are converted + // to Instructions so that we can modify them. + for (Use &U : Expr->uses()) + if (ConstantExpr *CE = dyn_cast(U.getUser())) + expandConstExpr(CE); + Expr->removeDeadConstantUsers(); + + if (ConstantExpr *CE = dyn_cast(Expr)) { + while (Expr->hasNUsesOrMore(1)) { + Use *U = &*Expr->use_begin(); + Instruction *NewInst = CE->getAsInstruction(); + NewInst->insertBefore(PhiSafeInsertPt(U)); + NewInst->setName("expanded"); + PhiSafeReplaceUses(U, NewInst); + } + } +} + +bool ExpandTlsConstantExpr::runOnModule(Module &M) { + for (Module::alias_iterator Iter = M.alias_begin(); + Iter != M.alias_end(); ) { + GlobalAlias *GA = &*Iter++; + if (GA->isThreadDependent()) { + GA->replaceAllUsesWith(GA->getAliasee()); + GA->eraseFromParent(); + } + } + for (Module::global_iterator Global = M.global_begin(); + Global != M.global_end(); + ++Global) { + if (Global->isThreadLocal()) { + expandConstExpr(&*Global); + } + } + return true; +} + +ModulePass *llvm::createExpandTlsConstantExprPass() { + return new ExpandTlsConstantExpr(); +} diff --git a/lib/Target/JSBackend/NaCl/ExpandUtils.cpp b/lib/Target/JSBackend/NaCl/ExpandUtils.cpp new file mode 100644 index 000000000000..96ec40d87f04 --- /dev/null +++ b/lib/Target/JSBackend/NaCl/ExpandUtils.cpp @@ -0,0 +1,58 @@ +//===-- ExpandUtils.cpp - Helper functions for expansion passes -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/NaCl.h" + +using namespace llvm; + +Instruction *llvm::PhiSafeInsertPt(Use *U) { + Instruction *InsertPt = cast(U->getUser()); + if (PHINode *PN = dyn_cast(InsertPt)) { + // We cannot insert instructions before a PHI node, so insert + // before the incoming block's terminator. This could be + // suboptimal if the terminator is a conditional. + InsertPt = PN->getIncomingBlock(*U)->getTerminator(); + } + return InsertPt; +} + +void llvm::PhiSafeReplaceUses(Use *U, Value *NewVal) { + User *UR = U->getUser(); + if (PHINode *PN = dyn_cast(UR)) { + // A PHI node can have multiple incoming edges from the same + // block, in which case all these edges must have the same + // incoming value. + BasicBlock *BB = PN->getIncomingBlock(*U); + for (unsigned I = 0; I < PN->getNumIncomingValues(); ++I) { + if (PN->getIncomingBlock(I) == BB) + PN->setIncomingValue(I, NewVal); + } + } else { + UR->replaceUsesOfWith(U->get(), NewVal); + } +} + +Function *llvm::RecreateFunction(Function *Func, FunctionType *NewType) { + Function *NewFunc = Function::Create(NewType, Func->getLinkage()); + NewFunc->copyAttributesFrom(Func); + Func->getParent()->getFunctionList().insert(Func->getIterator(), NewFunc); + NewFunc->takeName(Func); + NewFunc->getBasicBlockList().splice(NewFunc->begin(), + Func->getBasicBlockList()); + Func->replaceAllUsesWith( + ConstantExpr::getBitCast(NewFunc, + Func->getFunctionType()->getPointerTo())); + return NewFunc; +} diff --git a/lib/Target/JSBackend/NaCl/ExpandVarArgs.cpp b/lib/Target/JSBackend/NaCl/ExpandVarArgs.cpp new file mode 100644 index 000000000000..0afddae79de0 --- /dev/null +++ b/lib/Target/JSBackend/NaCl/ExpandVarArgs.cpp @@ -0,0 +1,324 @@ +//===- ExpandVarArgs.cpp - Expand out variable argument function calls-----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass expands out all use of variable argument functions. +// +// This pass replaces a varargs function call with a function call in +// which a pointer to the variable arguments is passed explicitly. +// The callee explicitly allocates space for the variable arguments on +// the stack using "alloca". +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Triple.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/NaCl.h" + +using namespace llvm; + +namespace { +class ExpandVarArgs : public ModulePass { +public: + static char ID; + ExpandVarArgs() : ModulePass(ID) { + initializeExpandVarArgsPass(*PassRegistry::getPassRegistry()); + } + virtual bool runOnModule(Module &M); +}; +} + +char ExpandVarArgs::ID = 0; +INITIALIZE_PASS(ExpandVarArgs, "expand-varargs", + "Expand out variable argument function definitions and calls", + false, false) + +static bool isEmscriptenJSArgsFunc(Module *M, StringRef Name) { + // TODO(jfb) Make these intrinsics in clang and remove the assert: these + // intrinsics should only exist for Emscripten. + bool isEmscriptenSpecial = Name.equals("emscripten_asm_const_int") || + Name.equals("emscripten_asm_const_double") || + Name.equals("emscripten_landingpad") || + Name.equals("emscripten_resume"); + assert(isEmscriptenSpecial ? Triple(M->getTargetTriple()).isOSEmscripten() + : true); + return isEmscriptenSpecial; +} + +static bool ExpandVarArgFunc(Module *M, Function *Func) { + if (Func->isDeclaration() && Func->use_empty()) + return false; // No point in doing any work. + + if (isEmscriptenJSArgsFunc(M, Func->getName())) + return false; + + Type *PtrType = Type::getInt8PtrTy(Func->getContext()); + + FunctionType *FTy = Func->getFunctionType(); + SmallVector Params(FTy->param_begin(), FTy->param_end()); + Params.push_back(PtrType); + FunctionType *NFTy = + FunctionType::get(FTy->getReturnType(), Params, /*isVarArg=*/false); + Function *NewFunc = RecreateFunction(Func, NFTy); + + // Declare the new argument as "noalias". + NewFunc->setAttributes(Func->getAttributes().addAttribute( + Func->getContext(), FTy->getNumParams() + 1, Attribute::NoAlias)); + + // Move the arguments across to the new function. + auto NewArg = NewFunc->arg_begin(); + for (Argument &Arg : Func->args()) { + Arg.replaceAllUsesWith(&*NewArg); + NewArg->takeName(&Arg); + ++NewArg; + } + // The last argument is the new `i8 * noalias %varargs`. + NewArg->setName("varargs"); + + Func->eraseFromParent(); + + // Expand out uses of llvm.va_start in this function. + for (BasicBlock &BB : *NewFunc) { + for (auto BI = BB.begin(), BE = BB.end(); BI != BE;) { + Instruction *I = &*BI++; + if (auto *VAS = dyn_cast(I)) { + IRBuilder<> IRB(VAS); + Value *Cast = IRB.CreateBitCast(VAS->getArgList(), + PtrType->getPointerTo(), "arglist"); + IRB.CreateStore(&*NewArg, Cast); + VAS->eraseFromParent(); + } + } + } + + return true; +} + +static void ExpandVAArgInst(VAArgInst *Inst, DataLayout *DL) { + Type *IntPtrTy = DL->getIntPtrType(Inst->getContext()); + auto *One = ConstantInt::get(IntPtrTy, 1); + IRBuilder<> IRB(Inst); + auto *ArgList = IRB.CreateBitCast( + Inst->getPointerOperand(), + Inst->getType()->getPointerTo()->getPointerTo(), "arglist"); + + // The caller spilled all of the va_args onto the stack in an unpacked + // struct. Each va_arg load from that struct needs to realign the element to + // its target-appropriate alignment in the struct in order to jump over + // padding that may have been in-between arguments. Do this with ConstantExpr + // to ensure good code gets generated, following the same approach as + // Support/MathExtras.h:alignAddr: + // ((uintptr_t)Addr + Alignment - 1) & ~(uintptr_t)(Alignment - 1) + // This assumes the alignment of the type is a power of 2 (or 1, in which case + // no realignment occurs). + auto *Ptr = IRB.CreateLoad(ArgList, "arglist_current"); + auto *AlignOf = ConstantExpr::getIntegerCast( + ConstantExpr::getAlignOf(Inst->getType()), IntPtrTy, /*isSigned=*/false); + auto *AlignMinus1 = ConstantExpr::getNUWSub(AlignOf, One); + auto *NotAlignMinus1 = IRB.CreateNot(AlignMinus1); + auto *CurrentPtr = IRB.CreateIntToPtr( + IRB.CreateAnd( + IRB.CreateNUWAdd(IRB.CreatePtrToInt(Ptr, IntPtrTy), AlignMinus1), + NotAlignMinus1), + Ptr->getType()); + + auto *Result = IRB.CreateLoad(CurrentPtr, "va_arg"); + Result->takeName(Inst); + + // Update the va_list to point to the next argument. + Value *Indexes[] = {One}; + auto *Next = IRB.CreateInBoundsGEP(CurrentPtr, Indexes, "arglist_next"); + IRB.CreateStore(Next, ArgList); + + Inst->replaceAllUsesWith(Result); + Inst->eraseFromParent(); +} + +static void ExpandVAEnd(VAEndInst *VAE) { + // va_end() is a no-op in this implementation. + VAE->eraseFromParent(); +} + +static void ExpandVACopyInst(VACopyInst *Inst) { + // va_list may have more space reserved, but we only need to + // copy a single pointer. + Type *PtrTy = Type::getInt8PtrTy(Inst->getContext())->getPointerTo(); + IRBuilder<> IRB(Inst); + auto *Src = IRB.CreateBitCast(Inst->getSrc(), PtrTy, "vacopy_src"); + auto *Dest = IRB.CreateBitCast(Inst->getDest(), PtrTy, "vacopy_dest"); + auto *CurrentPtr = IRB.CreateLoad(Src, "vacopy_currentptr"); + IRB.CreateStore(CurrentPtr, Dest); + Inst->eraseFromParent(); +} + +// ExpandVarArgCall() converts a CallInst or InvokeInst to expand out +// of varargs. It returns whether the module was modified. +template +static bool ExpandVarArgCall(Module *M, InstType *Call, DataLayout *DL) { + FunctionType *FuncType = cast( + Call->getCalledValue()->getType()->getPointerElementType()); + if (!FuncType->isFunctionVarArg()) + return false; + if (auto *F = dyn_cast(Call->getCalledValue())) + if (isEmscriptenJSArgsFunc(M, F->getName())) + return false; + + Function *F = Call->getParent()->getParent(); + LLVMContext &Ctx = M->getContext(); + + SmallVector Attrs; + Attrs.push_back(Call->getAttributes().getFnAttributes()); + Attrs.push_back(Call->getAttributes().getRetAttributes()); + + // Split argument list into fixed and variable arguments. + SmallVector FixedArgs; + SmallVector VarArgs; + SmallVector VarArgsTypes; + for (unsigned I = 0, E = FuncType->getNumParams(); I < E; ++I) { + FixedArgs.push_back(Call->getArgOperand(I)); + // AttributeSets use 1-based indexing. + Attrs.push_back(Call->getAttributes().getParamAttributes(I + 1)); + } + for (unsigned I = FuncType->getNumParams(), E = Call->getNumArgOperands(); + I < E; ++I) { + Value *ArgVal = Call->getArgOperand(I); + VarArgs.push_back(ArgVal); + bool isByVal = Call->getAttributes().hasAttribute(I + 1, Attribute::ByVal); + // For "byval" arguments we must dereference the pointer. + VarArgsTypes.push_back(isByVal ? ArgVal->getType()->getPointerElementType() + : ArgVal->getType()); + } + if (VarArgsTypes.size() == 0) { + // Some buggy code (e.g. 176.gcc in Spec2k) uses va_arg on an + // empty argument list, which gives undefined behaviour in C. To + // work around such programs, we create a dummy varargs buffer on + // the stack even though there are no arguments to put in it. + // This allows va_arg to read an undefined value from the stack + // rather than crashing by reading from an uninitialized pointer. + // An alternative would be to pass a null pointer to catch the + // invalid use of va_arg. + VarArgsTypes.push_back(Type::getInt32Ty(Ctx)); + } + + // Create struct type for packing variable arguments into. + StructType *VarArgsTy = StructType::get(Ctx, VarArgsTypes); + + // Allocate space for the variable argument buffer. Do this at the + // start of the function so that we don't leak space if the function + // is called in a loop. + IRBuilder<> IRB(&*F->getEntryBlock().getFirstInsertionPt()); + auto *Buf = IRB.CreateAlloca(VarArgsTy, nullptr, "vararg_buffer"); + + // Call llvm.lifetime.start/end intrinsics to indicate that Buf is + // only used for the duration of the function call, so that the + // stack space can be reused elsewhere. + auto LifetimeStart = Intrinsic::getDeclaration(M, Intrinsic::lifetime_start); + auto LifetimeEnd = Intrinsic::getDeclaration(M, Intrinsic::lifetime_end); + auto *I8Ptr = Type::getInt8Ty(Ctx)->getPointerTo(); + auto *BufPtr = IRB.CreateBitCast(Buf, I8Ptr, "vararg_lifetime_bitcast"); + auto *BufSize = + ConstantInt::get(Ctx, APInt(64, DL->getTypeAllocSize(VarArgsTy))); + IRB.CreateCall(LifetimeStart, {BufSize, BufPtr}); + + // Copy variable arguments into buffer. + int Index = 0; + IRB.SetInsertPoint(Call); + for (Value *Arg : VarArgs) { + Value *Indexes[] = {ConstantInt::get(Ctx, APInt(32, 0)), + ConstantInt::get(Ctx, APInt(32, Index))}; + Value *Ptr = IRB.CreateInBoundsGEP(Buf, Indexes, "vararg_ptr"); + bool isByVal = Call->getAttributes().hasAttribute( + FuncType->getNumParams() + Index + 1, Attribute::ByVal); + if (isByVal) + IRB.CreateMemCpy(Ptr, Arg, DL->getTypeAllocSize( + Arg->getType()->getPointerElementType()), + /*Align=*/1); + else + IRB.CreateStore(Arg, Ptr); + ++Index; + } + + // Cast function to new type to add our extra pointer argument. + SmallVector ArgTypes(FuncType->param_begin(), + FuncType->param_end()); + ArgTypes.push_back(VarArgsTy->getPointerTo()); + FunctionType *NFTy = FunctionType::get(FuncType->getReturnType(), ArgTypes, + /*isVarArg=*/false); + Value *CastFunc = IRB.CreateBitCast(Call->getCalledValue(), + NFTy->getPointerTo(), "vararg_func"); + + // Create the converted function call. + FixedArgs.push_back(Buf); + Instruction *NewCall; + if (auto *C = dyn_cast(Call)) { + auto *N = IRB.CreateCall(CastFunc, FixedArgs); + N->setAttributes(AttributeSet::get(Ctx, Attrs)); + NewCall = N; + IRB.CreateCall(LifetimeEnd, {BufSize, BufPtr}); + } else if (auto *C = dyn_cast(Call)) { + auto *N = IRB.CreateInvoke(CastFunc, C->getNormalDest(), C->getUnwindDest(), + FixedArgs, C->getName()); + N->setAttributes(AttributeSet::get(Ctx, Attrs)); + (IRBuilder<>(&*C->getNormalDest()->getFirstInsertionPt())) + .CreateCall(LifetimeEnd, {BufSize, BufPtr}); + (IRBuilder<>(&*C->getUnwindDest()->getFirstInsertionPt())) + .CreateCall(LifetimeEnd, {BufSize, BufPtr}); + NewCall = N; + } else { + llvm_unreachable("not a call/invoke"); + } + + NewCall->takeName(Call); + Call->replaceAllUsesWith(NewCall); + Call->eraseFromParent(); + + return true; +} + +bool ExpandVarArgs::runOnModule(Module &M) { + bool Changed = false; + DataLayout DL(&M); + + for (auto MI = M.begin(), ME = M.end(); MI != ME;) { + Function *F = &*MI++; + for (BasicBlock &BB : *F) { + for (auto BI = BB.begin(), BE = BB.end(); BI != BE;) { + Instruction *I = &*BI++; + if (auto *VI = dyn_cast(I)) { + Changed = true; + ExpandVAArgInst(VI, &DL); + } else if (auto *VAE = dyn_cast(I)) { + Changed = true; + ExpandVAEnd(VAE); + } else if (auto *VAC = dyn_cast(I)) { + Changed = true; + ExpandVACopyInst(VAC); + } else if (auto *Call = dyn_cast(I)) { + Changed |= ExpandVarArgCall(&M, Call, &DL); + } else if (auto *Call = dyn_cast(I)) { + Changed |= ExpandVarArgCall(&M, Call, &DL); + } + } + } + + if (F->isVarArg()) + Changed |= ExpandVarArgFunc(&M, F); + } + + return Changed; +} + +ModulePass *llvm::createExpandVarArgsPass() { return new ExpandVarArgs(); } diff --git a/lib/Target/JSBackend/NaCl/FixVectorLoadStoreAlignment.cpp b/lib/Target/JSBackend/NaCl/FixVectorLoadStoreAlignment.cpp new file mode 100644 index 000000000000..5a7a4998eaf4 --- /dev/null +++ b/lib/Target/JSBackend/NaCl/FixVectorLoadStoreAlignment.cpp @@ -0,0 +1,264 @@ +//===- FixVectorLoadStoreAlignment.cpp - Vector load/store alignment ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Fix vector load/store alignment by: +// - Leaving as-is if the alignment is equal to the vector's element width. +// - Reducing the alignment to vector's element width if it's greater and the +// current alignment is a factor of the element alignment. +// - Scalarizing if the alignment is smaller than the element-wise alignment. +// +// Volatile vector load/store are handled the same, and can therefore be broken +// up as allowed by C/C++. +// +// TODO(jfb) Atomic accesses cause errors at compile-time. This could be +// implemented as a call to the C++ runtime, since 128-bit atomics +// aren't usually lock-free. +// +//===----------------------------------------------------------------------===// + +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/NaCl.h" + +using namespace llvm; + +namespace { +class FixVectorLoadStoreAlignment : public BasicBlockPass { +public: + static char ID; // Pass identification, replacement for typeid + FixVectorLoadStoreAlignment() : BasicBlockPass(ID), M(0), DL(0) { + initializeFixVectorLoadStoreAlignmentPass(*PassRegistry::getPassRegistry()); + } + using BasicBlockPass::doInitialization; + bool doInitialization(Module &Mod) override { + M = &Mod; + return false; // Unchanged. + } + bool runOnBasicBlock(BasicBlock &BB) override; + +private: + typedef SmallVector Instructions; + const Module *M; + const DataLayout *DL; + + /// Some sub-classes of Instruction have a non-virtual function + /// indicating which operand is the pointer operand. This template + /// function returns the pointer operand's type, and requires that + /// InstTy have a getPointerOperand function. + template + static PointerType *pointerOperandType(const InstTy *I) { + return cast(I->getPointerOperand()->getType()); + } + + /// Similar to pointerOperandType, this template function checks + /// whether the pointer operand is a pointer to a vector type. + template + static bool pointerOperandIsVectorPointer(const Instruction *I) { + return pointerOperandType(cast(I))->getElementType()->isVectorTy(); + } + + /// Returns true if one of the Instruction's operands is a pointer to + /// a vector type. This is more general than the above and assumes we + /// don't know which Instruction type is provided. + static bool hasVectorPointerOperand(const Instruction *I) { + for (User::const_op_iterator IB = I->op_begin(), IE = I->op_end(); IB != IE; + ++IB) + if (PointerType *PtrTy = dyn_cast((*IB)->getType())) + if (isa(PtrTy->getElementType())) + return true; + return false; + } + + /// Vectors are expected to be element-aligned. If they are, leave as-is; if + /// the alignment is too much then narrow the alignment (when possible); + /// otherwise return false. + template + static bool tryFixVectorAlignment(const DataLayout *DL, Instruction *I) { + InstTy *LoadStore = cast(I); + VectorType *VecTy = + cast(pointerOperandType(LoadStore)->getElementType()); + Type *ElemTy = VecTy->getElementType(); + uint64_t ElemBitSize = DL->getTypeSizeInBits(ElemTy); + uint64_t ElemByteSize = ElemBitSize / CHAR_BIT; + uint64_t CurrentByteAlign = LoadStore->getAlignment(); + bool isABIAligned = CurrentByteAlign == 0; + uint64_t VecABIByteAlign = DL->getABITypeAlignment(VecTy); + CurrentByteAlign = isABIAligned ? VecABIByteAlign : CurrentByteAlign; + + if (CHAR_BIT * ElemByteSize != ElemBitSize) + return false; // Minimum byte-size elements. + if (MinAlign(ElemByteSize, CurrentByteAlign) == ElemByteSize) { + // Element-aligned, or compatible over-aligned. Keep element-aligned. + LoadStore->setAlignment(ElemByteSize); + return true; + } + return false; // Under-aligned. + } + + void visitVectorLoadStore(BasicBlock &BB, Instructions &Loads, + Instructions &Stores) const; + void scalarizeVectorLoadStore(BasicBlock &BB, const Instructions &Loads, + const Instructions &Stores) const; +}; +} // anonymous namespace + +char FixVectorLoadStoreAlignment::ID = 0; +INITIALIZE_PASS(FixVectorLoadStoreAlignment, "fix-vector-load-store-alignment", + "Ensure vector load/store have element-size alignment", + false, false) + +void FixVectorLoadStoreAlignment::visitVectorLoadStore( + BasicBlock &BB, Instructions &Loads, Instructions &Stores) const { + for (BasicBlock::iterator BBI = BB.begin(), BBE = BB.end(); BBI != BBE; + ++BBI) { + Instruction *I = &*BBI; + // The following list of instructions is based on mayReadOrWriteMemory. + switch (I->getOpcode()) { + case Instruction::Load: + if (pointerOperandIsVectorPointer(I)) { + if (cast(I)->isAtomic()) + report_fatal_error("unhandled: atomic vector store"); + if (!tryFixVectorAlignment(DL, I)) + Loads.push_back(I); + } + break; + case Instruction::Store: + if (pointerOperandIsVectorPointer(I)) { + if (cast(I)->isAtomic()) + report_fatal_error("unhandled: atomic vector store"); + if (!tryFixVectorAlignment(DL, I)) + Stores.push_back(I); + } + break; + case Instruction::Alloca: + case Instruction::Fence: + case Instruction::VAArg: + // Leave these memory operations as-is, even when they deal with + // vectors. + break; + case Instruction::Call: + case Instruction::Invoke: + // Call/invoke don't touch memory per-se, leave them as-is. + break; + case Instruction::AtomicCmpXchg: + if (pointerOperandIsVectorPointer(I)) + report_fatal_error( + "unhandled: atomic compare and exchange operation on vector"); + break; + case Instruction::AtomicRMW: + if (pointerOperandIsVectorPointer(I)) + report_fatal_error("unhandled: atomic RMW operation on vector"); + break; + default: + if (I->mayReadOrWriteMemory() && hasVectorPointerOperand(I)) { + errs() << "Not handled: " << *I << '\n'; + report_fatal_error( + "unexpected: vector operations which may read/write memory"); + } + break; + } + } +} + +void FixVectorLoadStoreAlignment::scalarizeVectorLoadStore( + BasicBlock &BB, const Instructions &Loads, + const Instructions &Stores) const { + for (Instructions::const_iterator IB = Loads.begin(), IE = Loads.end(); + IB != IE; ++IB) { + LoadInst *VecLoad = cast(*IB); + VectorType *LoadedVecTy = + cast(pointerOperandType(VecLoad)->getElementType()); + Type *ElemTy = LoadedVecTy->getElementType(); + + // The base of the vector is as aligned as the vector load (where + // zero means ABI alignment for the vector), whereas subsequent + // elements are as aligned as the base+offset can be. + unsigned BaseAlign = VecLoad->getAlignment() + ? VecLoad->getAlignment() + : DL->getABITypeAlignment(LoadedVecTy); + unsigned ElemAllocSize = DL->getTypeAllocSize(ElemTy); + + // Fill in the vector element by element. + IRBuilder<> IRB(VecLoad); + Value *Loaded = UndefValue::get(LoadedVecTy); + Value *Base = + IRB.CreateBitCast(VecLoad->getPointerOperand(), ElemTy->getPointerTo()); + + for (unsigned Elem = 0, NumElems = LoadedVecTy->getNumElements(); + Elem != NumElems; ++Elem) { + unsigned Align = MinAlign(BaseAlign, ElemAllocSize * Elem); + Value *GEP = IRB.CreateConstInBoundsGEP1_32(ElemTy, Base, Elem); + LoadInst *LoadedElem = + IRB.CreateAlignedLoad(GEP, Align, VecLoad->isVolatile()); + LoadedElem->setSynchScope(VecLoad->getSynchScope()); + Loaded = IRB.CreateInsertElement( + Loaded, LoadedElem, + ConstantInt::get(Type::getInt32Ty(M->getContext()), Elem)); + } + + VecLoad->replaceAllUsesWith(Loaded); + VecLoad->eraseFromParent(); + } + + for (Instructions::const_iterator IB = Stores.begin(), IE = Stores.end(); + IB != IE; ++IB) { + StoreInst *VecStore = cast(*IB); + Value *StoredVec = VecStore->getValueOperand(); + VectorType *StoredVecTy = cast(StoredVec->getType()); + Type *ElemTy = StoredVecTy->getElementType(); + + unsigned BaseAlign = VecStore->getAlignment() + ? VecStore->getAlignment() + : DL->getABITypeAlignment(StoredVecTy); + unsigned ElemAllocSize = DL->getTypeAllocSize(ElemTy); + + // Fill in the vector element by element. + IRBuilder<> IRB(VecStore); + Value *Base = IRB.CreateBitCast(VecStore->getPointerOperand(), + ElemTy->getPointerTo()); + + for (unsigned Elem = 0, NumElems = StoredVecTy->getNumElements(); + Elem != NumElems; ++Elem) { + unsigned Align = MinAlign(BaseAlign, ElemAllocSize * Elem); + Value *GEP = IRB.CreateConstInBoundsGEP1_32(ElemTy, Base, Elem); + Value *ElemToStore = IRB.CreateExtractElement( + StoredVec, ConstantInt::get(Type::getInt32Ty(M->getContext()), Elem)); + StoreInst *StoredElem = IRB.CreateAlignedStore(ElemToStore, GEP, Align, + VecStore->isVolatile()); + StoredElem->setSynchScope(VecStore->getSynchScope()); + } + + VecStore->eraseFromParent(); + } +} + +bool FixVectorLoadStoreAlignment::runOnBasicBlock(BasicBlock &BB) { + bool Changed = false; + if (!DL) + DL = &BB.getParent()->getParent()->getDataLayout(); + Instructions Loads; + Instructions Stores; + visitVectorLoadStore(BB, Loads, Stores); + if (!(Loads.empty() && Stores.empty())) { + Changed = true; + scalarizeVectorLoadStore(BB, Loads, Stores); + } + return Changed; +} + +BasicBlockPass *llvm::createFixVectorLoadStoreAlignmentPass() { + return new FixVectorLoadStoreAlignment(); +} diff --git a/lib/Target/JSBackend/NaCl/FlattenGlobals.cpp b/lib/Target/JSBackend/NaCl/FlattenGlobals.cpp new file mode 100644 index 000000000000..94da2e1c32ba --- /dev/null +++ b/lib/Target/JSBackend/NaCl/FlattenGlobals.cpp @@ -0,0 +1,546 @@ +//===- FlattenGlobals.cpp - Flatten global variable initializers-----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass converts initializers for global variables into a +// flattened normal form which removes nested struct types and +// simplifies ConstantExprs. +// +// In this normal form, an initializer is either a SimpleElement or a +// CompoundElement. +// +// A SimpleElement is one of the following: +// +// 1) An i8 array literal or zeroinitializer: +// +// [SIZE x i8] c"DATA" +// [SIZE x i8] zeroinitializer +// +// 2) A reference to a GlobalValue (a function or global variable) +// with an optional 32-bit byte offset added to it (the addend): +// +// ptrtoint (TYPE* @GLOBAL to i32) +// add (i32 ptrtoint (TYPE* @GLOBAL to i32), i32 ADDEND) +// +// We use ptrtoint+add rather than bitcast+getelementptr because +// the constructor for getelementptr ConstantExprs performs +// constant folding which introduces more complex getelementptrs, +// and it is hard to check that they follow a normal form. +// +// For completeness, the pass also allows a BlockAddress as well as +// a GlobalValue here, although BlockAddresses are currently not +// allowed in the PNaCl ABI, so this should not be considered part +// of the normal form. +// +// A CompoundElement is a unnamed, packed struct containing only +// SimpleElements. +// +// Limitations: +// +// LLVM IR allows ConstantExprs that calculate the difference between +// two globals' addresses. FlattenGlobals rejects these because Clang +// does not generate these and because ELF does not support such +// relocations in general. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/NaCl.h" + +using namespace llvm; + +namespace { + + // Defines a (non-constant) handle that records a use of a + // constant. Used to make sure a relocation, within flattened global + // variable initializers, does not get destroyed when method + // removeDeadConstantUsers gets called. For simplicity, rather than + // defining a new (non-constant) construct, we use a return + // instruction as the handle. + typedef ReturnInst RelocUserType; + + // Define map from a relocation, appearing in the flattened global variable + // initializers, to it's corresponding use handle. + typedef DenseMap RelocMapType; + + // Define the list to hold the list of global variables being flattened. + struct FlattenedGlobal; + typedef std::vector FlattenedGlobalsVectorType; + + // Returns the corresponding relocation, for the given user handle. + Constant *getRelocUseConstant(RelocUserType *RelocUser) { + return cast(RelocUser->getReturnValue()); + } + + // The state associated with flattening globals of a module. + struct FlattenGlobalsState { + /// The module being flattened. + Module &M; + /// The data layout to be used. + DataLayout DL; + /// The relocations (within the original global variable initializers) + /// that must be kept. + RelocMapType RelocMap; + /// The list of global variables that are being flattened. + FlattenedGlobalsVectorType FlattenedGlobalsVector; + /// True if the module was modified during the "flatten globals" pass. + bool Modified; + /// The type model of a byte. + Type *ByteType; + /// The type model of the integer pointer type. + Type *IntPtrType; + /// The size of the pointer type. + unsigned PtrSize; + + explicit FlattenGlobalsState(Module &M) + : M(M), DL(&M), RelocMap(), + Modified(false), + ByteType(Type::getInt8Ty(M.getContext())), + IntPtrType(DL.getIntPtrType(M.getContext())), + PtrSize(DL.getPointerSize()) + {} + + ~FlattenGlobalsState() { + // Remove added user handles. + for (RelocMapType::iterator + I = RelocMap.begin(), E = RelocMap.end(); I != E; ++I) { + delete I->second; + } + // Remove flatteners for global varaibles. + DeleteContainerPointers(FlattenedGlobalsVector); + } + + /// Collect Global variables whose initializers should be + /// flattened. Creates corresponding flattened initializers (if + /// applicable), and creates uninitialized replacement global + /// variables. + void flattenGlobalsWithInitializers(); + + /// Remove initializers from original global variables, and + /// then remove the portions of the initializers that are + /// no longer used. + void removeDeadInitializerConstants(); + + // Replace the original global variables with their flattened + // global variable counterparts. + void replaceGlobalsWithFlattenedGlobals(); + + // Builds and installs initializers for flattened global + // variables, based on the flattened initializers of the + // corresponding original global variables. + void installFlattenedGlobalInitializers(); + + // Returns the user handle associated with the reloc, so that it + // won't be deleted during the flattening process. + RelocUserType *getRelocUserHandle(Constant *Reloc) { + RelocUserType *RelocUser = RelocMap[Reloc]; + if (RelocUser == NULL) { + RelocUser = ReturnInst::Create(M.getContext(), Reloc); + RelocMap[Reloc] = RelocUser; + } + return RelocUser; + } + }; + + // A FlattenedConstant represents a global variable initializer that + // has been flattened and may be converted into the normal form. + class FlattenedConstant { + FlattenGlobalsState &State; + + // A flattened global variable initializer is represented as: + // 1) an array of bytes; + unsigned BufSize; + uint8_t *Buf; + // XXX EMSCRIPTEN: There used to be a BufEnd here. No more. + + // 2) an array of relocations. + class Reloc { + private: + unsigned RelOffset; // Offset at which the relocation is to be applied. + RelocUserType *RelocUser; + public: + + unsigned getRelOffset() const { return RelOffset; } + Constant *getRelocUse() const { return getRelocUseConstant(RelocUser); } + Reloc(FlattenGlobalsState &State, unsigned RelOffset, Constant *NewVal) + : RelOffset(RelOffset), RelocUser(State.getRelocUserHandle(NewVal)) {} + + explicit Reloc(const Reloc &R) + : RelOffset(R.RelOffset), RelocUser(R.RelocUser) {} + + void operator=(const Reloc &R) { + RelOffset = R.RelOffset; + RelocUser = R.RelocUser; + } + }; + typedef SmallVector RelocArray; + RelocArray Relocs; + + const DataLayout &getDataLayout() const { return State.DL; } + + Module &getModule() const { return State.M; } + + Type *getIntPtrType() const { return State.IntPtrType; } + + Type *getByteType() const { return State.ByteType; } + + unsigned getPtrSize() const { return State.PtrSize; } + + void putAtDest(Constant *Value, uint8_t *Dest); + + Constant *dataSlice(unsigned StartPos, unsigned EndPos) const { + return ConstantDataArray::get( + getModule().getContext(), + ArrayRef(Buf + StartPos, Buf + EndPos)); + } + + Type *dataSliceType(unsigned StartPos, unsigned EndPos) const { + return ArrayType::get(getByteType(), EndPos - StartPos); + } + + public: + FlattenedConstant(FlattenGlobalsState &State, Constant *Value): + State(State), + BufSize(getDataLayout().getTypeAllocSize(Value->getType())), + Buf(new uint8_t[BufSize]) { + memset(Buf, 0, BufSize); + putAtDest(Value, Buf); + } + + ~FlattenedConstant() { + delete[] Buf; + } + + // Returns the corresponding flattened initializer. + Constant *getAsNormalFormConstant() const; + + // Returns the type of the corresponding flattened initializer; + Type *getAsNormalFormType() const; + + }; + + // Structure used to flatten a global variable. + struct FlattenedGlobal { + // The state of the flatten globals pass. + FlattenGlobalsState &State; + // The global variable to flatten. + GlobalVariable *Global; + // The replacement global variable, if known. + GlobalVariable *NewGlobal; + // True if Global has an initializer. + bool HasInitializer; + // The flattened initializer, if the initializer would not just be + // filled with zeroes. + FlattenedConstant *FlatConst; + // The type of GlobalType, when used in an initializer. + Type *GlobalType; + // The size of the initializer. + uint64_t Size; + public: + FlattenedGlobal(FlattenGlobalsState &State, GlobalVariable *Global) + : State(State), + Global(Global), + NewGlobal(NULL), + HasInitializer(Global->hasInitializer()), + FlatConst(NULL), + GlobalType(Global->getType()->getPointerElementType()), + Size(GlobalType->isSized() + ? getDataLayout().getTypeAllocSize(GlobalType) : 0) { + Type *NewType = NULL; + if (HasInitializer) { + if (Global->getInitializer()->isNullValue()) { + // Special case of NullValue. As an optimization, for large + // BSS variables, avoid allocating a buffer that would only be filled + // with zeros. + NewType = ArrayType::get(getByteType(), Size); + } else { + FlatConst = new FlattenedConstant(State, Global->getInitializer()); + NewType = FlatConst->getAsNormalFormType(); + } + } else { + NewType = ArrayType::get(getByteType(), Size); + } + NewGlobal = new GlobalVariable(getModule(), NewType, + Global->isConstant(), + Global->getLinkage(), + NULL, "", Global, + Global->getThreadLocalMode()); + NewGlobal->copyAttributesFrom(Global); + if (NewGlobal->getAlignment() == 0 && GlobalType->isSized()) + NewGlobal->setAlignment(getDataLayout(). + getPrefTypeAlignment(GlobalType)); + NewGlobal->setExternallyInitialized(Global->isExternallyInitialized()); + NewGlobal->takeName(Global); + } + + ~FlattenedGlobal() { + delete FlatConst; + } + + const DataLayout &getDataLayout() const { return State.DL; } + + Module &getModule() const { return State.M; } + + Type *getByteType() const { return State.ByteType; } + + // Removes the original initializer from the global variable to be + // flattened, if applicable. + void removeOriginalInitializer() { + if (HasInitializer) Global->setInitializer(NULL); + } + + // Replaces the original global variable with the corresponding + // flattened global variable. + void replaceGlobalWithFlattenedGlobal() { + Global->replaceAllUsesWith( + ConstantExpr::getBitCast(NewGlobal, Global->getType())); + Global->eraseFromParent(); + } + + // Installs flattened initializers to the corresponding flattened + // global variable. + void installFlattenedInitializer() { + if (HasInitializer) { + Constant *NewInit = NULL; + if (FlatConst == NULL) { + // Special case of NullValue. + NewInit = ConstantAggregateZero::get(ArrayType::get(getByteType(), + Size)); + } else { + NewInit = FlatConst->getAsNormalFormConstant(); + } + NewGlobal->setInitializer(NewInit); + } + } + }; + + class FlattenGlobals : public ModulePass { + public: + static char ID; // Pass identification, replacement for typeid + FlattenGlobals() : ModulePass(ID) { + initializeFlattenGlobalsPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnModule(Module &M); + }; +} + +static void ExpandConstant(const DataLayout *DL, Constant *Val, + Constant **ResultGlobal, uint64_t *ResultOffset) { + if (isa(Val) || isa(Val)) { + *ResultGlobal = Val; + *ResultOffset = 0; + } else if (isa(Val)) { + *ResultGlobal = NULL; + *ResultOffset = 0; + } else if (ConstantInt *CI = dyn_cast(Val)) { + *ResultGlobal = NULL; + *ResultOffset = CI->getZExtValue(); + } else if (ConstantExpr *CE = dyn_cast(Val)) { + ExpandConstant(DL, CE->getOperand(0), ResultGlobal, ResultOffset); + if (CE->getOpcode() == Instruction::GetElementPtr) { + auto *PtrTy = cast(CE->getOperand(0)->getType()); + SmallVector Indexes(CE->op_begin() + 1, CE->op_end()); + *ResultOffset += DL->getIndexedOffsetInType(PtrTy->getElementType(), + Indexes); + } else if (CE->getOpcode() == Instruction::BitCast || + CE->getOpcode() == Instruction::IntToPtr) { + // Nothing more to do. + } else if (CE->getOpcode() == Instruction::PtrToInt) { + if (Val->getType()->getIntegerBitWidth() < DL->getPointerSizeInBits()) { + errs() << "Not handled: " << *CE << "\n"; + report_fatal_error("FlattenGlobals: a ptrtoint that truncates " + "a pointer is not allowed"); + } + } else { + errs() << "Not handled: " << *CE << "\n"; + report_fatal_error( + std::string("FlattenGlobals: ConstantExpr opcode not handled: ") + + CE->getOpcodeName()); + } + } else { + errs() << "Not handled: " << *Val << "\n"; + report_fatal_error("FlattenGlobals: Constant type not handled for reloc"); + } +} + +void FlattenedConstant::putAtDest(Constant *Val, uint8_t *Dest) { + uint64_t ValSize = getDataLayout().getTypeAllocSize(Val->getType()); + assert(Dest + ValSize <= Buf + BufSize); + if (isa(Val) || + isa(Val) || + isa(Val)) { + // The buffer is already zero-initialized. + } else if (ConstantInt *CI = dyn_cast(Val)) { + memcpy(Dest, CI->getValue().getRawData(), ValSize); + } else if (ConstantFP *CF = dyn_cast(Val)) { + APInt Data = CF->getValueAPF().bitcastToAPInt(); + assert((Data.getBitWidth() + 7) / 8 == ValSize); + assert(Data.getBitWidth() % 8 == 0); + memcpy(Dest, Data.getRawData(), ValSize); + } else if (ConstantDataSequential *CD = + dyn_cast(Val)) { + // Note that getRawDataValues() assumes the host endianness is the same. + StringRef Data = CD->getRawDataValues(); + assert(Data.size() == ValSize); + memcpy(Dest, Data.data(), Data.size()); + } else if (isa(Val) || isa(Val) || + isa(Val)) { + uint64_t ElementSize = getDataLayout().getTypeAllocSize( + Val->getType()->getSequentialElementType()); + for (unsigned I = 0; I < Val->getNumOperands(); ++I) { + putAtDest(cast(Val->getOperand(I)), Dest + ElementSize * I); + } + } else if (ConstantStruct *CS = dyn_cast(Val)) { + const StructLayout *Layout = getDataLayout().getStructLayout(CS->getType()); + for (unsigned I = 0; I < CS->getNumOperands(); ++I) { + putAtDest(CS->getOperand(I), Dest + Layout->getElementOffset(I)); + } + } else { + Constant *GV; + uint64_t Offset; + ExpandConstant(&getDataLayout(), Val, &GV, &Offset); + if (GV) { + Constant *NewVal = ConstantExpr::getPtrToInt(GV, getIntPtrType()); + if (Offset) { + // For simplicity, require addends to be 32-bit. + if ((int64_t) Offset != (int32_t) (uint32_t) Offset) { + errs() << "Not handled: " << *Val << "\n"; + report_fatal_error( + "FlattenGlobals: Offset does not fit into 32 bits"); + } + NewVal = ConstantExpr::getAdd( + NewVal, ConstantInt::get(getIntPtrType(), Offset, + /* isSigned= */ true)); + } + Reloc NewRel(State, Dest - Buf, NewVal); + Relocs.push_back(NewRel); + } else { + memcpy(Dest, &Offset, ValSize); + } + } +} + +Constant *FlattenedConstant::getAsNormalFormConstant() const { + // Return a single SimpleElement. + if (Relocs.size() == 0) + return dataSlice(0, BufSize); + if (Relocs.size() == 1 && BufSize == getPtrSize()) { + assert(Relocs[0].getRelOffset() == 0); + return Relocs[0].getRelocUse(); + } + + // Return a CompoundElement. + SmallVector Elements; + unsigned PrevPos = 0; + for (RelocArray::const_iterator Rel = Relocs.begin(), E = Relocs.end(); + Rel != E; ++Rel) { + if (Rel->getRelOffset() > PrevPos) + Elements.push_back(dataSlice(PrevPos, Rel->getRelOffset())); + Elements.push_back(Rel->getRelocUse()); + PrevPos = Rel->getRelOffset() + getPtrSize(); + } + if (PrevPos < BufSize) + Elements.push_back(dataSlice(PrevPos, BufSize)); + return ConstantStruct::getAnon(getModule().getContext(), Elements, true); +} + +Type *FlattenedConstant::getAsNormalFormType() const { + // Return a single element type. + if (Relocs.size() == 0) + return dataSliceType(0, BufSize); + if (Relocs.size() == 1 && BufSize == getPtrSize()) { + assert(Relocs[0].getRelOffset() == 0); + return Relocs[0].getRelocUse()->getType(); + } + + // Return a compound type. + SmallVector Elements; + unsigned PrevPos = 0; + for (RelocArray::const_iterator Rel = Relocs.begin(), E = Relocs.end(); + Rel != E; ++Rel) { + if (Rel->getRelOffset() > PrevPos) + Elements.push_back(dataSliceType(PrevPos, Rel->getRelOffset())); + Elements.push_back(Rel->getRelocUse()->getType()); + PrevPos = Rel->getRelOffset() + getPtrSize(); + } + if (PrevPos < BufSize) + Elements.push_back(dataSliceType(PrevPos, BufSize)); + return StructType::get(getModule().getContext(), Elements, true); +} + +char FlattenGlobals::ID = 0; +INITIALIZE_PASS(FlattenGlobals, "flatten-globals", + "Flatten global variable initializers into byte arrays", + false, false) + +void FlattenGlobalsState::flattenGlobalsWithInitializers() { + for (Module::global_iterator I = M.global_begin(), E = M.global_end(); + I != E;) { + GlobalVariable *Global = &*I++; + // Variables with "appending" linkage must always be arrays and so + // cannot be normalized, so leave them alone. + if (Global->hasAppendingLinkage()) + continue; + Modified = true; + FlattenedGlobalsVector.push_back(new FlattenedGlobal(*this, Global)); + } +} + +void FlattenGlobalsState::removeDeadInitializerConstants() { + // Detach original initializers. + for (FlattenedGlobalsVectorType::iterator + I = FlattenedGlobalsVector.begin(), E = FlattenedGlobalsVector.end(); + I != E; ++I) { + (*I)->removeOriginalInitializer(); + } + // Do cleanup of old initializers. + for (RelocMapType::iterator I = RelocMap.begin(), E = RelocMap.end(); + I != E; ++I) { + getRelocUseConstant(I->second)->removeDeadConstantUsers(); + } + +} + +void FlattenGlobalsState::replaceGlobalsWithFlattenedGlobals() { + for (FlattenedGlobalsVectorType::iterator + I = FlattenedGlobalsVector.begin(), E = FlattenedGlobalsVector.end(); + I != E; ++I) { + (*I)->replaceGlobalWithFlattenedGlobal(); + } +} + +void FlattenGlobalsState::installFlattenedGlobalInitializers() { + for (FlattenedGlobalsVectorType::iterator + I = FlattenedGlobalsVector.begin(), E = FlattenedGlobalsVector.end(); + I != E; ++I) { + (*I)->installFlattenedInitializer(); + } +} + +bool FlattenGlobals::runOnModule(Module &M) { + FlattenGlobalsState State(M); + State.flattenGlobalsWithInitializers(); + State.removeDeadInitializerConstants(); + State.replaceGlobalsWithFlattenedGlobals(); + State.installFlattenedGlobalInitializers(); + return State.Modified; +} + +ModulePass *llvm::createFlattenGlobalsPass() { + return new FlattenGlobals(); +} diff --git a/lib/Target/JSBackend/NaCl/GlobalCleanup.cpp b/lib/Target/JSBackend/NaCl/GlobalCleanup.cpp new file mode 100644 index 000000000000..13effcb647ab --- /dev/null +++ b/lib/Target/JSBackend/NaCl/GlobalCleanup.cpp @@ -0,0 +1,116 @@ +//===- GlobalCleanup.cpp - Cleanup global symbols post-bitcode-link -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +// ===---------------------------------------------------------------------===// +// +// PNaCl executables should have no external symbols or aliases. These passes +// internalize (or otherwise remove/resolve) GlobalValues and resolve all +// GlobalAliases. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/Triple.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/NaCl.h" + +using namespace llvm; + +namespace { +class GlobalCleanup : public ModulePass { +public: + static char ID; + GlobalCleanup() : ModulePass(ID) { + initializeGlobalCleanupPass(*PassRegistry::getPassRegistry()); + } + bool runOnModule(Module &M) override; +}; + +class ResolveAliases : public ModulePass { +public: + static char ID; + ResolveAliases() : ModulePass(ID) { + initializeResolveAliasesPass(*PassRegistry::getPassRegistry()); + } + bool runOnModule(Module &M) override; +}; +} + +char GlobalCleanup::ID = 0; +INITIALIZE_PASS(GlobalCleanup, "nacl-global-cleanup", + "GlobalValue cleanup for PNaCl " + "(assumes all of the binary is linked statically)", + false, false) + +static bool CleanUpLinkage(GlobalValue *GV) { + // TODO(dschuff): handle the rest of the linkage types as necessary without + // running afoul of the IR verifier or breaking the native link + switch (GV->getLinkage()) { + case GlobalValue::ExternalWeakLinkage: { + auto *NullRef = Constant::getNullValue(GV->getType()); + GV->replaceAllUsesWith(NullRef); + GV->eraseFromParent(); + return true; + } + case GlobalValue::WeakAnyLinkage: { + GV->setLinkage(GlobalValue::InternalLinkage); + return true; + } + default: + // default with fall through to avoid compiler warning + return false; + } + return false; +} + +bool GlobalCleanup::runOnModule(Module &M) { + bool Modified = false; + + // Cleanup llvm.compiler.used. We leave llvm.used as-is, + // because optimization passes feed off it to understand + // what globals may/may not be optimized away. For PNaCl, + // it is removed before ABI validation by CleanupUsedGlobalsMetadata. + if (auto *GV = M.getNamedGlobal("llvm.compiler.used")) { + GV->eraseFromParent(); + Modified = true; + } + + for (auto I = M.global_begin(), E = M.global_end(); I != E;) { + GlobalVariable *GV = &*I++; + Modified |= CleanUpLinkage(GV); + } + + for (auto I = M.begin(), E = M.end(); I != E;) { + Function *F = &*I++; + Modified |= CleanUpLinkage(F); + } + + return Modified; +} + +ModulePass *llvm::createGlobalCleanupPass() { return new GlobalCleanup(); } + +char ResolveAliases::ID = 0; +INITIALIZE_PASS(ResolveAliases, "resolve-aliases", + "resolve global variable and function aliases", false, false) + +bool ResolveAliases::runOnModule(Module &M) { + bool Modified = false; + + for (auto I = M.alias_begin(), E = M.alias_end(); I != E;) { + GlobalAlias *Alias = &*I++; + Alias->replaceAllUsesWith(Alias->getAliasee()); + Alias->eraseFromParent(); + Modified = true; + } + return Modified; +} + +ModulePass *llvm::createResolveAliasesPass() { return new ResolveAliases(); } diff --git a/lib/Target/JSBackend/NaCl/GlobalizeConstantVectors.cpp b/lib/Target/JSBackend/NaCl/GlobalizeConstantVectors.cpp new file mode 100644 index 000000000000..74e866c1a9fe --- /dev/null +++ b/lib/Target/JSBackend/NaCl/GlobalizeConstantVectors.cpp @@ -0,0 +1,176 @@ +//===- GlobalizeConstantVectors.cpp - Globalize constant vector -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass replaces all constant vector operands by loads of the same +// vector value from a constant global. After this pass functions don't +// rely on ConstantVector and ConstantDataVector. +// +// The FlattenGlobals pass can be used to further simplify the globals +// that this pass creates. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/NaCl.h" +#include +#include + +using namespace llvm; + +namespace { +// Must be a ModulePass since it adds globals. +class GlobalizeConstantVectors : public ModulePass { +public: + static char ID; // Pass identification, replacement for typeid + GlobalizeConstantVectors() : ModulePass(ID), DL(0) { + initializeGlobalizeConstantVectorsPass(*PassRegistry::getPassRegistry()); + } + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + } + virtual bool runOnModule(Module &M); + +private: + typedef SmallPtrSet Constants; + typedef std::pair FunctionConstants; + typedef std::vector FunctionConstantList; + typedef DenseMap GlobalizedConstants; + const DataLayout *DL; + + void findConstantVectors(const Function &F, Constants &Cs) const; + void createGlobalConstantVectors(Module &M, const FunctionConstantList &FCs, + GlobalizedConstants &GCs) const; + void materializeConstantVectors(Function &F, const Constants &Cs, + const GlobalizedConstants &GCs) const; +}; + +const char Name[] = "constant_vector"; +} // anonymous namespace + +char GlobalizeConstantVectors::ID = 0; +INITIALIZE_PASS(GlobalizeConstantVectors, "globalize-constant-vectors", + "Replace constant vector operands with equivalent loads", false, + false) + +void GlobalizeConstantVectors::findConstantVectors(const Function &F, + Constants &Cs) const { + for (const_inst_iterator II = inst_begin(F), IE = inst_end(F); II != IE; + ++II) { + for (User::const_op_iterator OI = II->op_begin(), OE = II->op_end(); + OI != OE; ++OI) { + Value *V = OI->get(); + if (isa(V) || isa(V) || + isa(V)) + Cs.insert(cast(V)); + } + } +} + +void GlobalizeConstantVectors::createGlobalConstantVectors( + Module &M, const FunctionConstantList &FCs, + GlobalizedConstants &GCs) const { + for (FunctionConstantList::const_iterator FCI = FCs.begin(), FCE = FCs.end(); + FCI != FCE; ++FCI) { + const Constants &Cs = FCI->second; + + for (Constants::const_iterator CI = Cs.begin(), CE = Cs.end(); CI != CE; + ++CI) { + Constant *C = *CI; + if (GCs.find(C) != GCs.end()) + continue; // The vector has already been globalized. + GlobalVariable *GV = + new GlobalVariable(M, C->getType(), /* isConstant= */ true, + GlobalValue::InternalLinkage, C, Name); + GV->setAlignment(DL->getPrefTypeAlignment(C->getType())); + GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); // The content is significant, not the address. + GCs[C] = GV; + } + } +} + +void GlobalizeConstantVectors::materializeConstantVectors( + Function &F, const Constants &Cs, const GlobalizedConstants &GCs) const { + // The first instruction in a function dominates all others, it is therefore a + // safe insertion point. + Instruction *FirstInst = F.getEntryBlock().getFirstNonPHI(); + + for (Constants::const_iterator CI = Cs.begin(), CE = Cs.end(); CI != CE; + ++CI) { + Constant *C = *CI; + GlobalizedConstants::const_iterator GVI = GCs.find(C); + assert(GVI != GCs.end()); + GlobalVariable *GV = GVI->second; + LoadInst *MaterializedGV = new LoadInst(GV, Name, /* isVolatile= */ false, + GV->getAlignment(), FirstInst); + + // Find users of the constant vector. + typedef SmallVector UserList; + UserList CVUsers; + for (auto U : C->users()) { + if (Instruction *I = dyn_cast(U)) + if (I->getParent()->getParent() != &F) + // Skip uses of the constant vector in other functions: we need to + // materialize it in every function which has a use. + continue; + if (isa(U)) + // Don't replace global uses of the constant vector: we just created a + // new one. This avoid recursive references. + // Also, it's not legal to replace a constant's operand with + // a non-constant (the load instruction). + continue; + CVUsers.push_back(U); + } + + // Replace these Users. Must be done separately to avoid invalidating the + // User iterator. + for (UserList::iterator UI = CVUsers.begin(), UE = CVUsers.end(); UI != UE; + ++UI) { + User *U = *UI; + for (User::op_iterator OI = U->op_begin(), OE = U->op_end(); OI != OE; + ++OI) + if (dyn_cast(*OI) == C) + // The current operand is a use of the constant vector, replace it + // with the materialized one. + *OI = MaterializedGV; + } + } +} + +bool GlobalizeConstantVectors::runOnModule(Module &M) { + DL = &M.getDataLayout(); + + FunctionConstantList FCs; + FCs.reserve(M.size()); + for (Module::iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) { + Constants Cs; + findConstantVectors(*FI, Cs); + if (!Cs.empty()) + FCs.push_back(std::make_pair(&*FI, Cs)); + } + + GlobalizedConstants GCs; + createGlobalConstantVectors(M, FCs, GCs); + + for (FunctionConstantList::const_iterator FCI = FCs.begin(), FCE = FCs.end(); + FCI != FCE; ++FCI) + materializeConstantVectors(*FCI->first, FCI->second, GCs); + + return FCs.empty(); +} + +ModulePass *llvm::createGlobalizeConstantVectorsPass() { + return new GlobalizeConstantVectors(); +} diff --git a/lib/Target/JSBackend/NaCl/InsertDivideCheck.cpp b/lib/Target/JSBackend/NaCl/InsertDivideCheck.cpp new file mode 100644 index 000000000000..7510931ce2e2 --- /dev/null +++ b/lib/Target/JSBackend/NaCl/InsertDivideCheck.cpp @@ -0,0 +1,112 @@ +//===- InsertDivideCheck.cpp - Add divide by zero checks ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass adds a check for divide by zero before every integer DIV or REM. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "add-divide-check" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/NaCl.h" + +using namespace llvm; + +namespace { + class InsertDivideCheck : public FunctionPass { + public: + static char ID; + InsertDivideCheck() : FunctionPass(ID) { + initializeInsertDivideCheckPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F); + }; +} + +static BasicBlock *CreateTrapBlock(Function &F, DebugLoc dl) { + BasicBlock *TrapBlock = BasicBlock::Create(F.getContext(), "divrem.by.zero", + &F); + Value *TrapFn = Intrinsic::getDeclaration(F.getParent(), Intrinsic::trap); + CallInst::Create(TrapFn, "", TrapBlock)->setDebugLoc(dl); + (new UnreachableInst(F.getContext(), TrapBlock))->setDebugLoc(dl); + return TrapBlock; +} + +bool InsertDivideCheck::runOnFunction(Function &F) { + SmallPtrSet GuardedDivs; + // If the pass finds a DIV/REM that needs to be checked for zero denominator, + // it will insert a new "trap" block, and split the block that contains the + // DIV/REM into two blocks. The new BasicBlocks are added after the current + // BasicBlock, so that if there is more than one DIV/REM in the same block, + // all are visited. + for (Function::iterator I = F.begin(); I != F.end(); I++) { + BasicBlock *BB = &*I; + + for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); + BI != BE; BI++) { + BinaryOperator *DivInst = dyn_cast(BI); + if (!DivInst || (GuardedDivs.count(DivInst) != 0)) + continue; + unsigned Opcode = DivInst->getOpcode(); + if (Opcode != Instruction::SDiv && Opcode != Instruction::UDiv && + Opcode != Instruction::SRem && Opcode != Instruction::URem) + continue; + Value *Denominator = DivInst->getOperand(1); + if (!Denominator->getType()->isIntegerTy()) + continue; + DebugLoc dl = DivInst->getDebugLoc(); + if (ConstantInt *DenomConst = dyn_cast(Denominator)) { + // Divides by constants do not need a denominator test. + if (DenomConst->isZero()) { + // For explicit divides by zero, insert a trap before DIV/REM + Value *TrapFn = Intrinsic::getDeclaration(F.getParent(), + Intrinsic::trap); + CallInst::Create(TrapFn, "", DivInst)->setDebugLoc(dl); + } + continue; + } + // Create a trap block. + BasicBlock *TrapBlock = CreateTrapBlock(F, dl); + // Move instructions in BB from DivInst to BB's end to a new block. + BasicBlock *Successor = BB->splitBasicBlock(BI, "guarded.divrem"); + // Remove the unconditional branch inserted by splitBasicBlock. + BB->getTerminator()->eraseFromParent(); + // Remember that DivInst was already processed, so that when we process + // inserted blocks later, we do not attempt to again guard it. + GuardedDivs.insert(DivInst); + // Compare the denominator with zero. + Value *Zero = ConstantInt::get(Denominator->getType(), 0); + Value *DenomIsZero = new ICmpInst(*BB, ICmpInst::ICMP_EQ, Denominator, + Zero, ""); + // Put in a condbranch to the trap block. + BranchInst::Create(TrapBlock, Successor, DenomIsZero, BB); + // BI is invalidated when we split. Stop the BasicBlock iterator. + break; + } + } + + return false; +} + +char InsertDivideCheck::ID = 0; +INITIALIZE_PASS(InsertDivideCheck, "insert-divide-check", + "Insert divide by zero checks", false, false) + +FunctionPass *llvm::createInsertDivideCheckPass() { + return new InsertDivideCheck(); +} diff --git a/lib/Target/JSBackend/NaCl/InternalizeUsedGlobals.cpp b/lib/Target/JSBackend/NaCl/InternalizeUsedGlobals.cpp new file mode 100644 index 000000000000..fef6fc04be30 --- /dev/null +++ b/lib/Target/JSBackend/NaCl/InternalizeUsedGlobals.cpp @@ -0,0 +1,67 @@ +//===- InternalizeUsedGlobals.cpp - mark used globals as internal ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The internalize pass does not mark internal globals marked as "used", +// which may be achieved with __attribute((used))__ in C++, for example. +// In PNaCl scenarios, we always perform whole program analysis, and +// the ABI requires all but entrypoint globals to be internal. This pass +// satisfies such requirements. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/PassRegistry.h" +#include "llvm/PassSupport.h" +#include "llvm/Transforms/NaCl.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" + +using namespace llvm; + +namespace { + +class InternalizeUsedGlobals : public ModulePass { +public: + static char ID; + + InternalizeUsedGlobals() : ModulePass(ID) { + initializeInternalizeUsedGlobalsPass(*PassRegistry::getPassRegistry()); + } + virtual bool runOnModule(Module &M); +}; +} + +char InternalizeUsedGlobals::ID = 0; + +INITIALIZE_PASS(InternalizeUsedGlobals, "internalize-used-globals", + "Mark internal globals in the llvm.used list", false, false) + +bool InternalizeUsedGlobals::runOnModule(Module &M) { + bool Changed = false; + + SmallPtrSet Used; + collectUsedGlobalVariables(M, Used, /*CompilerUsed =*/false); + for (GlobalValue *V : Used) { + if (V->getLinkage() != GlobalValue::InternalLinkage) { + // Setting Linkage to InternalLinkage also sets the visibility to + // DefaultVisibility. + // For explicitness, we do so upfront. + V->setVisibility(GlobalValue::DefaultVisibility); + V->setLinkage(GlobalValue::InternalLinkage); + Changed = true; + } + } + return Changed; +} + +ModulePass *llvm::createInternalizeUsedGlobalsPass() { + return new InternalizeUsedGlobals(); +} diff --git a/lib/Target/JSBackend/NaCl/LLVMBuild.txt b/lib/Target/JSBackend/NaCl/LLVMBuild.txt new file mode 100644 index 000000000000..f8b3b9eb13d4 --- /dev/null +++ b/lib/Target/JSBackend/NaCl/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===- ./lib/Target/JSBackend/NaCl/LLVMBuild.txt ---------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===-----------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===-----------------------------------------------------------------------===; + +[component_0] +type = Library +name = PNaClTransforms +parent = JSBackend +required_libraries = Analysis Core Support IPO Scalar TransformUtils +add_to_library_groups = JSBackend diff --git a/lib/Target/JSBackend/NaCl/LowerEmAsyncify.cpp b/lib/Target/JSBackend/NaCl/LowerEmAsyncify.cpp new file mode 100644 index 000000000000..4185fd49a3e8 --- /dev/null +++ b/lib/Target/JSBackend/NaCl/LowerEmAsyncify.cpp @@ -0,0 +1,720 @@ +//===- LowerEmAsyncify - transform asynchronous functions for Emscripten/JS -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Lu Wang +// +// In JS we don't have functions like sleep(), which is on the other hand very popuar in C/C++ etc. +// This pass tries to convert funcitons calling sleep() into a valid form in JavaScript +// The basic idea is to split the callee at the place where sleep() is called, +// then the first half may schedule the second half using setTimeout. +// But we need to pay lots of attention to analyzing/saving/restoring context variables and return values +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Value.h" +#include "llvm/IR/CallSite.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/Transforms/NaCl.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/Local.h" // for DemoteRegToStack, removeUnreachableBlocks +#include "llvm/Transforms/Utils/PromoteMemToReg.h" // for PromoteMemToReg +#include "llvm/Transforms/Utils/ValueMapper.h" +#include "llvm/Pass.h" + +#include + +using namespace llvm; + +static cl::list +AsyncifyFunctions("emscripten-asyncify-functions", + cl::desc("Functions that call one of these functions, directly or indirectly, will be asyncified"), + cl::CommaSeparated); + +static cl::list +AsyncifyWhiteList("emscripten-asyncify-whitelist", + cl::desc("Functions that should not be asyncified"), + cl::CommaSeparated); + +namespace { + class LowerEmAsyncify: public ModulePass { + Module *TheModule; + + public: + static char ID; // Pass identification, replacement for typeid + explicit LowerEmAsyncify() : ModulePass(ID), TheModule(NULL) { + initializeLowerEmAsyncifyPass(*PassRegistry::getPassRegistry()); + } + virtual ~LowerEmAsyncify() { } + bool runOnModule(Module &M); + + private: + const DataLayout *DL; + + Type *Void, *I1, *I32, *I32Ptr; + FunctionType *VFunction, *I1Function, *I32PFunction; + FunctionType *VI32PFunction, *I32PI32Function; + FunctionType *CallbackFunctionType; + + Function *AllocAsyncCtxFunction, *ReallocAsyncCtxFunction, *FreeAsyncCtxFunction; + Function *CheckAsyncFunction; + Function *DoNotUnwindFunction, *DoNotUnwindAsyncFunction; + Function *GetAsyncReturnValueAddrFunction; + + void initTypesAndFunctions(void); + + typedef std::vector Instructions; + typedef DenseMap FunctionInstructionsMap; + typedef std::vector Values; + typedef SmallPtrSet BasicBlockSet; + + // all the information we want for an async call + struct AsyncCallEntry { + Instruction *AsyncCallInst; // calling an async function + BasicBlock *AfterCallBlock; // the block we should continue on after getting the return value of AsynCallInst + CallInst *AllocAsyncCtxInst; // where we allocate the async ctx before the async call, in the original function + Values ContextVariables; // those need to be saved and restored for the async call + StructType *ContextStructType; // The structure constructing all the context variables + BasicBlock *SaveAsyncCtxBlock; // the block in which we save all the variables + Function *CallbackFunc; // the callback function for this async call, which is converted from the original function + }; + + BasicBlockSet FindReachableBlocksFrom(BasicBlock *src); + + // Find everything that we should save and restore for the async call + // save them to Entry.ContextVariables + void FindContextVariables(AsyncCallEntry & Entry); + + // The essential function + // F is now in the sync form, transform it into an async form that is valid in JS + void transformAsyncFunction(Function &F, Instructions const& AsyncCalls); + + bool IsFunctionPointerCall(const Instruction *I); + }; +} + +char LowerEmAsyncify::ID = 0; +INITIALIZE_PASS(LowerEmAsyncify, "loweremasyncify", + "Lower async functions for js/emscripten", + false, false) + +bool LowerEmAsyncify::runOnModule(Module &M) { + TheModule = &M; + DL = &M.getDataLayout(); + + std::set WhiteList(AsyncifyWhiteList.begin(), AsyncifyWhiteList.end()); + + /* + * collect all the functions that should be asyncified + * any function that _might_ call an async function is also async + */ + std::vector AsyncFunctionsPending; + for(unsigned i = 0; i < AsyncifyFunctions.size(); ++i) { + std::string const& AFName = AsyncifyFunctions[i]; + Function *F = TheModule->getFunction(AFName); + if (F && !WhiteList.count(F->getName())) { + AsyncFunctionsPending.push_back(F); + } + } + + // No function needed to transform + if (AsyncFunctionsPending.empty()) return false; + + // Walk through the call graph and find all the async functions + FunctionInstructionsMap AsyncFunctionCalls; + { + // pessimistic: consider all indirect calls as possibly async + // TODO: deduce based on function types + for (Module::iterator FI = TheModule->begin(), FE = TheModule->end(); FI != FE; ++FI) { + if (WhiteList.count(FI->getName())) continue; + + bool has_indirect_call = false; + for (inst_iterator I = inst_begin(&*FI), E = inst_end(&*FI); I != E; ++I) { + if (IsFunctionPointerCall(&*I)) { + has_indirect_call = true; + AsyncFunctionCalls[&*FI].push_back(&*I); + } + } + + if (has_indirect_call) AsyncFunctionsPending.push_back(&*FI); + } + + while (!AsyncFunctionsPending.empty()) { + Function *CurFunction = AsyncFunctionsPending.back(); + AsyncFunctionsPending.pop_back(); + + for (Value::user_iterator UI = CurFunction->user_begin(), E = CurFunction->user_end(); UI != E; ++UI) { + ImmutableCallSite ICS(*UI); + if (!ICS) continue; + // we only need those instructions calling the function + // if the function address is used for other purpose, we don't care + if (CurFunction != ICS.getCalledValue()->stripPointerCasts()) continue; + // Now I is either CallInst or InvokeInst + Instruction *I = cast(*UI); + Function *F = I->getParent()->getParent(); + if (AsyncFunctionCalls.count(F) == 0) { + AsyncFunctionsPending.push_back(F); + } + AsyncFunctionCalls[F].push_back(I); + } + } + } + + // exit if no async function is found at all + if (AsyncFunctionCalls.empty()) return false; + + initTypesAndFunctions(); + + for (FunctionInstructionsMap::iterator I = AsyncFunctionCalls.begin(), E = AsyncFunctionCalls.end(); + I != E; ++I) { + transformAsyncFunction(*(I->first), I->second); + } + + return true; +} + +void LowerEmAsyncify::initTypesAndFunctions(void) { + // Data types + Void = Type::getVoidTy(TheModule->getContext()); + I1 = Type::getInt1Ty(TheModule->getContext()); + I32 = Type::getInt32Ty(TheModule->getContext()); + I32Ptr = Type::getInt32PtrTy(TheModule->getContext()); + + // Function types + SmallVector ArgTypes; + VFunction = FunctionType::get(Void, false); + I1Function = FunctionType::get(I1, false); + I32PFunction = FunctionType::get(I32Ptr, false); + + ArgTypes.clear(); + ArgTypes.push_back(I32Ptr); + VI32PFunction = FunctionType::get(Void, ArgTypes, false); + + ArgTypes.clear(); + ArgTypes.push_back(I32); + I32PI32Function = FunctionType::get(I32Ptr, ArgTypes, false); + + CallbackFunctionType = VI32PFunction; + + // Functions + CheckAsyncFunction = Function::Create( + I1Function, + GlobalValue::ExternalLinkage, + "emscripten_check_async", + TheModule + ); + + AllocAsyncCtxFunction = Function::Create( + I32PI32Function, + GlobalValue::ExternalLinkage, + "emscripten_alloc_async_context", + TheModule + ); + + ReallocAsyncCtxFunction = Function::Create( + I32PI32Function, + GlobalValue::ExternalLinkage, + "emscripten_realloc_async_context", + TheModule + ); + + FreeAsyncCtxFunction = Function::Create( + VI32PFunction, + GlobalValue::ExternalLinkage, + "emscripten_free_async_context", + TheModule + ); + + DoNotUnwindFunction = Function::Create( + VFunction, + GlobalValue::ExternalLinkage, + "emscripten_do_not_unwind", + TheModule + ); + + DoNotUnwindAsyncFunction = Function::Create( + VFunction, + GlobalValue::ExternalLinkage, + "emscripten_do_not_unwind_async", + TheModule + ); + + GetAsyncReturnValueAddrFunction = Function::Create( + I32PFunction, + GlobalValue::ExternalLinkage, + "emscripten_get_async_return_value_addr", + TheModule + ); +} + +LowerEmAsyncify::BasicBlockSet LowerEmAsyncify::FindReachableBlocksFrom(BasicBlock *src) { + BasicBlockSet ReachableBlockSet; + std::vector pending; + ReachableBlockSet.insert(src); + pending.push_back(src); + while (!pending.empty()) { + BasicBlock *CurBlock = pending.back(); + pending.pop_back(); + for (succ_iterator SI = succ_begin(CurBlock), SE = succ_end(CurBlock); SI != SE; ++SI) { + if (ReachableBlockSet.count(*SI) == 0) { + ReachableBlockSet.insert(*SI); + pending.push_back(*SI); + } + } + } + return ReachableBlockSet; +} + +void LowerEmAsyncify::FindContextVariables(AsyncCallEntry & Entry) { + BasicBlock *AfterCallBlock = Entry.AfterCallBlock; + + Function & F = *AfterCallBlock->getParent(); + + // Create a new entry block as if in the callback function + // theck check variables that no longer properly dominate their uses + BasicBlock *EntryBlock = BasicBlock::Create(TheModule->getContext(), "", &F, &F.getEntryBlock()); + BranchInst::Create(AfterCallBlock, EntryBlock); + + DominatorTreeWrapperPass DTW; + DTW.runOnFunction(F); + DominatorTree& DT = DTW.getDomTree(); + + // These blocks may be using some values defined at or before AsyncCallBlock + BasicBlockSet Ramifications = FindReachableBlocksFrom(AfterCallBlock); + + SmallPtrSet ContextVariables; + Values Pending; + + // Examine the instructions, find all variables that we need to store in the context + for (BasicBlockSet::iterator RI = Ramifications.begin(), RE = Ramifications.end(); RI != RE; ++RI) { + for (BasicBlock::iterator I = (*RI)->begin(), E = (*RI)->end(); I != E; ++I) { + for (unsigned i = 0, NumOperands = I->getNumOperands(); i < NumOperands; ++i) { + Value *O = I->getOperand(i); + if (Instruction *Inst = dyn_cast(O)) { + if (Inst == Entry.AsyncCallInst) continue; // for the original async call, we will load directly from async return value + if (ContextVariables.count(Inst) != 0) continue; // already examined + + if (!DT.dominates(Inst, I->getOperandUse(i))) { + // `I` is using `Inst`, yet `Inst` does not dominate `I` if we arrive directly at AfterCallBlock + // so we need to save `Inst` in the context + ContextVariables.insert(Inst); + Pending.push_back(Inst); + } + } else if (Argument *Arg = dyn_cast(O)) { + // count() should be as fast/slow as insert, so just insert here + ContextVariables.insert(Arg); + } + } + } + } + + // restore F + EntryBlock->eraseFromParent(); + + Entry.ContextVariables.clear(); + Entry.ContextVariables.reserve(ContextVariables.size()); + for (SmallPtrSet::iterator I = ContextVariables.begin(), E = ContextVariables.end(); I != E; ++I) { + Entry.ContextVariables.push_back(*I); + } +} + +/* + * Consider that F contains a call to G, both of which are async: + * + * function F: + * ... + * %0 = G(%1, %2, ...); + * ... + * return %%; + * + * We want to convert F and generate F__asyn_cb + * they are similar, but with minor yet important differences + * Note those `main func only` and `callback func only` instructions + +////////////////////////////////////////////////////////// + function F: + ... + ctx = alloc_ctx(len, sp); // main func only + // TODO + // we could also do this only after an async call + // but in that case we will need to pass ctx to the function + // since ctx is no longer in the top async stack frame + %0 = G(%1, %2, ...); + if (async) { // G was async + save context variables in ctx + register F.async_cb as the callback in frame + return without unwinding the stack frame + } else { // G was sync + // use %0 as normal + free_ctx(ctx); // main func only + // ctx is freed here, because so far F is still a sync function + // and we don't want any side effects + ... + async return value = %%; + return & normally unwind the stack frame // main func only + } +////////////////////////////////////////////////////////// + + * And here's F.async_cb + +////////////////////////////////////////////////////////// + function F.async_cb(ctx): + load variables from ctx // callback func only + goto resume_point; // callback func only + ... + ctx = realloc_ctx(len); // callback func only + // realloc_ctx is different from alloc_ctx + // which reused the current async stack frame + // we want to keep the saved stack pointer + %0 = G(%1, %2, ...); + if (async) { + save context variables in ctx + register F.async_cb as the callback + return without unwinding the stack frame + } else { + resume_point: + %0'= either $0 or the async return value // callback func only + ... + async return value = %% + return restore the stack pointer back to the value stored in F // callback func only + // no need to free the ctx + // the scheduler will be aware of this return and handle the stack frames + } +////////////////////////////////////////////////////////// + + */ + +void LowerEmAsyncify::transformAsyncFunction(Function &F, Instructions const& AsyncCalls) { + assert(!AsyncCalls.empty()); + + // Pass 0 + // collect all the return instructions from the original function + // will use later + std::vector OrigReturns; + for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ++I) { + if (ReturnInst *RI = dyn_cast(&*I)) { + OrigReturns.push_back(RI); + } + } + + // Pass 1 + // Scan each async call and make the basic structure: + // All these will be cloned into the callback functions + // - allocate the async context before calling an async function + // - check async right after calling an async function, save context & return if async, continue if not + // - retrieve the async return value and free the async context if the called function turns out to be sync + std::vector AsyncCallEntries; + AsyncCallEntries.reserve(AsyncCalls.size()); + for (Instructions::const_iterator I = AsyncCalls.begin(), E = AsyncCalls.end(); I != E; ++I) { + // prepare blocks + Instruction *CurAsyncCall = *I; + + // The block containing the async call + BasicBlock *CurBlock = CurAsyncCall->getParent(); + // The block should run after the async call + BasicBlock *AfterCallBlock = SplitBlock(CurBlock, CurAsyncCall->getNextNode()); + // The block where we store the context and return + BasicBlock *SaveAsyncCtxBlock = BasicBlock::Create(TheModule->getContext(), "SaveAsyncCtx", &F, AfterCallBlock); + // return a dummy value at the end, to make the block valid + new UnreachableInst(TheModule->getContext(), SaveAsyncCtxBlock); + + // allocate the context before making the call + // we don't know the size yet, will fix it later + // we cannot insert the instruction later because, + // we need to make sure that all the instructions and blocks are fixed before we can generate DT and find context variables + // In CallHandler.h `sp` will be put as the second parameter + // such that we can take a note of the original sp + CallInst *AllocAsyncCtxInst = CallInst::Create(AllocAsyncCtxFunction, Constant::getNullValue(I32), "AsyncCtx", CurAsyncCall); + + // Right after the call + // check async and return if so + // TODO: we can define truly async functions and partial async functions + { + // remove old terminator, which came from SplitBlock + CurBlock->getTerminator()->eraseFromParent(); + // go to SaveAsyncCtxBlock if the previous call is async + // otherwise just continue to AfterCallBlock + CallInst *CheckAsync = CallInst::Create(CheckAsyncFunction, "IsAsync", CurBlock); + BranchInst::Create(SaveAsyncCtxBlock, AfterCallBlock, CheckAsync, CurBlock); + } + + // take a note of this async call + AsyncCallEntry CurAsyncCallEntry; + CurAsyncCallEntry.AsyncCallInst = CurAsyncCall; + CurAsyncCallEntry.AfterCallBlock = AfterCallBlock; + CurAsyncCallEntry.AllocAsyncCtxInst = AllocAsyncCtxInst; + CurAsyncCallEntry.SaveAsyncCtxBlock = SaveAsyncCtxBlock; + // create an empty function for the callback, which will be constructed later + CurAsyncCallEntry.CallbackFunc = Function::Create(CallbackFunctionType, F.getLinkage(), F.getName() + "__async_cb", TheModule); + AsyncCallEntries.push_back(CurAsyncCallEntry); + } + + + // Pass 2 + // analyze the context variables and construct SaveAsyncCtxBlock for each async call + // also calculate the size of the context and allocate the async context accordingly + for (std::vector::iterator EI = AsyncCallEntries.begin(), EE = AsyncCallEntries.end(); EI != EE; ++EI) { + AsyncCallEntry & CurEntry = *EI; + + // Collect everything to be saved + FindContextVariables(CurEntry); + + // Pack the variables as a struct + { + // TODO: sort them from large memeber to small ones, in order to make the struct compact even when aligned + SmallVector Types; + Types.push_back(CallbackFunctionType->getPointerTo()); + for (Values::iterator VI = CurEntry.ContextVariables.begin(), VE = CurEntry.ContextVariables.end(); VI != VE; ++VI) { + Types.push_back((*VI)->getType()); + } + CurEntry.ContextStructType = StructType::get(TheModule->getContext(), Types); + } + + // fix the size of allocation + CurEntry.AllocAsyncCtxInst->setOperand(0, + ConstantInt::get(I32, DL->getTypeStoreSize(CurEntry.ContextStructType))); + + // construct SaveAsyncCtxBlock + { + // fill in SaveAsyncCtxBlock + // temporarily remove the terminator for convenience + CurEntry.SaveAsyncCtxBlock->getTerminator()->eraseFromParent(); + assert(CurEntry.SaveAsyncCtxBlock->empty()); + + Type *AsyncCtxAddrTy = CurEntry.ContextStructType->getPointerTo(); + BitCastInst *AsyncCtxAddr = new BitCastInst(CurEntry.AllocAsyncCtxInst, AsyncCtxAddrTy, "AsyncCtxAddr", CurEntry.SaveAsyncCtxBlock); + SmallVector Indices; + // store the callback + { + Indices.push_back(ConstantInt::get(I32, 0)); + Indices.push_back(ConstantInt::get(I32, 0)); + GetElementPtrInst *AsyncVarAddr = GetElementPtrInst::Create(CurEntry.ContextStructType, AsyncCtxAddr, Indices, "", CurEntry.SaveAsyncCtxBlock); + new StoreInst(CurEntry.CallbackFunc, AsyncVarAddr, CurEntry.SaveAsyncCtxBlock); + } + // store the context variables + for (size_t i = 0; i < CurEntry.ContextVariables.size(); ++i) { + Indices.clear(); + Indices.push_back(ConstantInt::get(I32, 0)); + Indices.push_back(ConstantInt::get(I32, i + 1)); // the 0th element is the callback function + GetElementPtrInst *AsyncVarAddr = GetElementPtrInst::Create(CurEntry.ContextStructType, AsyncCtxAddr, Indices, "", CurEntry.SaveAsyncCtxBlock); + new StoreInst(CurEntry.ContextVariables[i], AsyncVarAddr, CurEntry.SaveAsyncCtxBlock); + } + // to exit the block, we want to return without unwinding the stack frame + CallInst::Create(DoNotUnwindFunction, "", CurEntry.SaveAsyncCtxBlock); + ReturnInst::Create(TheModule->getContext(), + (F.getReturnType()->isVoidTy() ? 0 : Constant::getNullValue(F.getReturnType())), + CurEntry.SaveAsyncCtxBlock); + } + } + + // Pass 3 + // now all the SaveAsyncCtxBlock's have been constructed + // we can clone F and construct callback functions + // we could not construct the callbacks in Pass 2 because we need _all_ those SaveAsyncCtxBlock's appear in _each_ callback + for (std::vector::iterator EI = AsyncCallEntries.begin(), EE = AsyncCallEntries.end(); EI != EE; ++EI) { + AsyncCallEntry & CurEntry = *EI; + + Function *CurCallbackFunc = CurEntry.CallbackFunc; + ValueToValueMapTy VMap; + + // Add the entry block + // load variables from the context + // also update VMap for CloneFunction + BasicBlock *EntryBlock = BasicBlock::Create(TheModule->getContext(), "AsyncCallbackEntry", CurCallbackFunc); + std::vector LoadedAsyncVars; + { + Type *AsyncCtxAddrTy = CurEntry.ContextStructType->getPointerTo(); + BitCastInst *AsyncCtxAddr = new BitCastInst(&*CurCallbackFunc->arg_begin(), AsyncCtxAddrTy, "AsyncCtx", EntryBlock); + SmallVector Indices; + for (size_t i = 0; i < CurEntry.ContextVariables.size(); ++i) { + Indices.clear(); + Indices.push_back(ConstantInt::get(I32, 0)); + Indices.push_back(ConstantInt::get(I32, i + 1)); // the 0th element of AsyncCtx is the callback function + GetElementPtrInst *AsyncVarAddr = GetElementPtrInst::Create(CurEntry.ContextStructType, AsyncCtxAddr, Indices, "", EntryBlock); + LoadedAsyncVars.push_back(new LoadInst(AsyncVarAddr, "", EntryBlock)); + // we want the argument to be replaced by the loaded value + if (isa(CurEntry.ContextVariables[i])) + VMap[CurEntry.ContextVariables[i]] = LoadedAsyncVars.back(); + } + } + + // we don't need any argument, just leave dummy entries there to cheat CloneFunctionInto + for (Function::const_arg_iterator AI = F.arg_begin(), AE = F.arg_end(); AI != AE; ++AI) { + if (VMap.count(&*AI) == 0) + VMap[&*AI] = Constant::getNullValue(AI->getType()); + } + + // Clone the function + { + SmallVector Returns; + CloneFunctionInto(CurCallbackFunc, &F, VMap, false, Returns); + + // return type of the callback functions is always void + // need to fix the return type + if (!F.getReturnType()->isVoidTy()) { + // for those return instructions that are from the original function + // it means we are 'truly' leaving this function + // need to store the return value right before ruturn + for (size_t i = 0; i < OrigReturns.size(); ++i) { + ReturnInst *RI = cast(VMap[OrigReturns[i]]); + // Need to store the return value into the global area + CallInst *RawRetValAddr = CallInst::Create(GetAsyncReturnValueAddrFunction, "", RI); + BitCastInst *RetValAddr = new BitCastInst(RawRetValAddr, F.getReturnType()->getPointerTo(), "AsyncRetValAddr", RI); + new StoreInst(RI->getOperand(0), RetValAddr, RI); + } + // we want to unwind the stack back to where it was before the original function as called + // but we don't actually need to do this here + // at this point it must be true that no callback is pended + // so the scheduler will correct the stack pointer and pop the frame + // here we just fix the return type + for (size_t i = 0; i < Returns.size(); ++i) { + ReplaceInstWithInst(Returns[i], ReturnInst::Create(TheModule->getContext())); + } + } + } + + // the callback function does not have any return value + // so clear all the attributes for return + { + AttributeSet Attrs = CurCallbackFunc->getAttributes(); + CurCallbackFunc->setAttributes( + Attrs.removeAttributes(TheModule->getContext(), AttributeSet::ReturnIndex, Attrs.getRetAttributes()) + ); + } + + // in the callback function, we never allocate a new async frame + // instead we reuse the existing one + for (std::vector::iterator EI = AsyncCallEntries.begin(), EE = AsyncCallEntries.end(); EI != EE; ++EI) { + Instruction *I = cast(VMap[EI->AllocAsyncCtxInst]); + ReplaceInstWithInst(I, CallInst::Create(ReallocAsyncCtxFunction, I->getOperand(0), "ReallocAsyncCtx")); + } + + // mapped entry point & async call + BasicBlock *ResumeBlock = cast(VMap[CurEntry.AfterCallBlock]); + Instruction *MappedAsyncCall = cast(VMap[CurEntry.AsyncCallInst]); + + // To save space, for each async call in the callback function, we just ignore the sync case, and leave it to the scheduler + // TODO need an option for this + { + for (std::vector::iterator EI = AsyncCallEntries.begin(), EE = AsyncCallEntries.end(); EI != EE; ++EI) { + AsyncCallEntry & CurEntry = *EI; + Instruction *MappedAsyncCallInst = cast(VMap[CurEntry.AsyncCallInst]); + BasicBlock *MappedAsyncCallBlock = MappedAsyncCallInst->getParent(); + BasicBlock *MappedAfterCallBlock = cast(VMap[CurEntry.AfterCallBlock]); + + // for the sync case of the call, go to NewBlock (instead of MappedAfterCallBlock) + BasicBlock *NewBlock = BasicBlock::Create(TheModule->getContext(), "", CurCallbackFunc, MappedAfterCallBlock); + MappedAsyncCallBlock->getTerminator()->setSuccessor(1, NewBlock); + // store the return value + if (!MappedAsyncCallInst->use_empty()) { + CallInst *RawRetValAddr = CallInst::Create(GetAsyncReturnValueAddrFunction, "", NewBlock); + BitCastInst *RetValAddr = new BitCastInst(RawRetValAddr, MappedAsyncCallInst->getType()->getPointerTo(), "AsyncRetValAddr", NewBlock); + new StoreInst(MappedAsyncCallInst, RetValAddr, NewBlock); + } + // tell the scheduler that we want to keep the current async stack frame + CallInst::Create(DoNotUnwindAsyncFunction, "", NewBlock); + // finally we go to the SaveAsyncCtxBlock, to register the callbac, save the local variables and leave + BasicBlock *MappedSaveAsyncCtxBlock = cast(VMap[CurEntry.SaveAsyncCtxBlock]); + BranchInst::Create(MappedSaveAsyncCtxBlock, NewBlock); + } + } + + std::vector ToPromote; + // applying loaded variables in the entry block + { + BasicBlockSet ReachableBlocks = FindReachableBlocksFrom(ResumeBlock); + for (size_t i = 0; i < CurEntry.ContextVariables.size(); ++i) { + Value *OrigVar = CurEntry.ContextVariables[i]; + if (isa(OrigVar)) continue; // already processed + Value *CurVar = VMap[OrigVar]; + assert(CurVar != MappedAsyncCall); + if (Instruction *Inst = dyn_cast(CurVar)) { + if (ReachableBlocks.count(Inst->getParent())) { + // Inst could be either defined or loaded from the async context + // Do the dirty works in memory + // TODO: might need to check the safety first + // TODO: can we create phi directly? + AllocaInst *Addr = DemoteRegToStack(*Inst, false); + new StoreInst(LoadedAsyncVars[i], Addr, EntryBlock); + ToPromote.push_back(Addr); + } else { + // The parent block is not reachable, which means there is no confliction + // it's safe to replace Inst with the loaded value + assert(Inst != LoadedAsyncVars[i]); // this should only happen when OrigVar is an Argument + Inst->replaceAllUsesWith(LoadedAsyncVars[i]); + } + } + } + } + + // resolve the return value of the previous async function + // it could be the value just loaded from the global area + // or directly returned by the function (in its sync case) + if (!CurEntry.AsyncCallInst->use_empty()) { + // load the async return value + CallInst *RawRetValAddr = CallInst::Create(GetAsyncReturnValueAddrFunction, "", EntryBlock); + BitCastInst *RetValAddr = new BitCastInst(RawRetValAddr, MappedAsyncCall->getType()->getPointerTo(), "AsyncRetValAddr", EntryBlock); + LoadInst *RetVal = new LoadInst(RetValAddr, "AsyncRetVal", EntryBlock); + + AllocaInst *Addr = DemoteRegToStack(*MappedAsyncCall, false); + new StoreInst(RetVal, Addr, EntryBlock); + ToPromote.push_back(Addr); + } + + // TODO remove unreachable blocks before creating phi + + // We go right to ResumeBlock from the EntryBlock + BranchInst::Create(ResumeBlock, EntryBlock); + + /* + * Creating phi's + * Normal stack frames and async stack frames are interleaving with each other. + * In a callback function, if we call an async function, we might need to realloc the async ctx. + * at this point we don't want anything stored after the ctx, + * such that we can free and extend the ctx by simply update STACKTOP. + * Therefore we don't want any alloca's in callback functions. + * + */ + if (!ToPromote.empty()) { + DominatorTreeWrapperPass DTW; + DTW.runOnFunction(*CurCallbackFunc); + PromoteMemToReg(ToPromote, DTW.getDomTree()); + } + + removeUnreachableBlocks(*CurCallbackFunc); + } + + // Pass 4 + // Here are modifications to the original function, which we won't want to be cloned into the callback functions + for (std::vector::iterator EI = AsyncCallEntries.begin(), EE = AsyncCallEntries.end(); EI != EE; ++EI) { + AsyncCallEntry & CurEntry = *EI; + // remove the frame if no async functinon has been called + CallInst::Create(FreeAsyncCtxFunction, CurEntry.AllocAsyncCtxInst, "", CurEntry.AfterCallBlock->getFirstNonPHI()); + } +} + +bool LowerEmAsyncify::IsFunctionPointerCall(const Instruction *I) { + // mostly from CallHandler.h + ImmutableCallSite CS(I); + if (!CS) return false; // not call nor invoke + const Value *CV = CS.getCalledValue()->stripPointerCasts(); + return !isa(CV); +} + +ModulePass *llvm::createLowerEmAsyncifyPass() { + return new LowerEmAsyncify(); +} diff --git a/lib/Target/JSBackend/NaCl/LowerEmExceptionsPass.cpp b/lib/Target/JSBackend/NaCl/LowerEmExceptionsPass.cpp new file mode 100644 index 000000000000..e6814ec12d22 --- /dev/null +++ b/lib/Target/JSBackend/NaCl/LowerEmExceptionsPass.cpp @@ -0,0 +1,275 @@ +//===- LowerEmExceptions - Lower exceptions for Emscripten/JS -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is based off the 'cheap' version of LowerInvoke. It does two things: +// +// 1) Lower +// invoke() to l1 unwind l2 +// into +// preinvoke(id); // (will clear __THREW__) +// call(); +// threw = postinvoke(id); (check __THREW__) +// br threw, l1, l2 +// +// We do this to avoid introducing a new LLVM IR type, or to try to reuse +// invoke-landingpad for our special purposes (as they are checked very +// carefully by llvm) +// +// 2) Lower landingpads to a call to emscripten_landingpad +// +// 3) Lower resume to emscripten_resume which receives non-aggregate inputs +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/NaCl.h" +#include "llvm/Support/raw_ostream.h" + +#include +#include + +using namespace llvm; + +static cl::list +Whitelist("emscripten-cxx-exceptions-whitelist", + cl::desc("Enables C++ exceptions in emscripten (see emscripten EXCEPTION_CATCHING_WHITELIST option)"), + cl::CommaSeparated); + +namespace { + class LowerEmExceptions : public ModulePass { + Function *GetHigh, *PreInvoke, *PostInvoke, *LandingPad, *Resume; + Module *TheModule; + + public: + static char ID; // Pass identification, replacement for typeid + explicit LowerEmExceptions() : ModulePass(ID), GetHigh(NULL), PreInvoke(NULL), PostInvoke(NULL), LandingPad(NULL), Resume(NULL), TheModule(NULL) { + initializeLowerEmExceptionsPass(*PassRegistry::getPassRegistry()); + } + bool runOnModule(Module &M); + }; +} + +char LowerEmExceptions::ID = 0; +INITIALIZE_PASS(LowerEmExceptions, "loweremexceptions", + "Lower invoke and unwind for js/emscripten", + false, false) + +bool canThrow(Value *V) { + if (Function *F = dyn_cast(V)) { + // intrinsics and some emscripten builtins cannot throw + if (F->isIntrinsic()) return false; + StringRef Name = F->getName(); + if (Name.startswith("emscripten_asm_")) return false; + if (Name == "setjmp" || Name == "longjmp") return false; // leave setjmp and longjmp (mostly) alone, we process them properly later + return true; + } + return true; // not a function, so an indirect call - can throw, we can't tell +} + +bool LowerEmExceptions::runOnModule(Module &M) { + TheModule = &M; + + // Add functions + + Type *i32 = Type::getInt32Ty(M.getContext()); + Type *i8 = Type::getInt8Ty(M.getContext()); + Type *i1 = Type::getInt1Ty(M.getContext()); + Type *i8P = i8->getPointerTo(); + Type *Void = Type::getVoidTy(M.getContext()); + + if (!(GetHigh = TheModule->getFunction("getHigh32"))) { + FunctionType *GetHighFunc = FunctionType::get(i32, false); + GetHigh = Function::Create(GetHighFunc, GlobalValue::ExternalLinkage, + "getHigh32", TheModule); + } + + if (!(PreInvoke = TheModule->getFunction("emscripten_preinvoke"))) { + SmallVector IntArgTypes; + IntArgTypes.push_back(i32); + FunctionType *VoidIntFunc = FunctionType::get(Void, IntArgTypes, false); + PreInvoke = Function::Create(VoidIntFunc, GlobalValue::ExternalLinkage, "emscripten_preinvoke", TheModule); + } + + if (!(PostInvoke = TheModule->getFunction("emscripten_postinvoke"))) { + SmallVector IntArgTypes; + IntArgTypes.push_back(i32); + FunctionType *IntIntFunc = FunctionType::get(i32, IntArgTypes, false); + PostInvoke = Function::Create(IntIntFunc, GlobalValue::ExternalLinkage, "emscripten_postinvoke", TheModule); + } + + FunctionType *LandingPadFunc = FunctionType::get(i8P, true); + LandingPad = Function::Create(LandingPadFunc, GlobalValue::ExternalLinkage, "emscripten_landingpad", TheModule); + + FunctionType *ResumeFunc = FunctionType::get(Void, true); + Resume = Function::Create(ResumeFunc, GlobalValue::ExternalLinkage, "emscripten_resume", TheModule); + + // Process + + std::set WhitelistSet(Whitelist.begin(), Whitelist.end()); + + bool Changed = false; + + unsigned InvokeId = 0; + + for (Module::iterator Iter = M.begin(), E = M.end(); Iter != E; ) { + Function *F = &*Iter++; + + std::vector ToErase; + std::set LandingPads; + + bool AllowExceptionsInFunc = WhitelistSet.empty() || (WhitelistSet.count("_" + F->getName().str()) != 0); + + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { + // check terminator for invokes + if (InvokeInst *II = dyn_cast(BB->getTerminator())) { + LandingPads.insert(II->getLandingPadInst()); + + bool NeedInvoke = AllowExceptionsInFunc && canThrow(II->getCalledValue()); + + if (NeedInvoke) { + // If we are calling a function that is noreturn, we must remove that attribute. The code we + // insert here does expect it to return, after we catch the exception. + if (II->doesNotReturn()) { + if (Function *F = dyn_cast(II->getCalledValue())) { + F->removeFnAttr(Attribute::NoReturn); + } + II->setAttributes(II->getAttributes().removeAttribute(TheModule->getContext(), AttributeSet::FunctionIndex, Attribute::NoReturn)); + assert(!II->doesNotReturn()); + } + + // Insert a normal call instruction folded in between pre- and post-invoke + SmallVector HelperArgs; + HelperArgs.push_back(ConstantInt::get(i32, InvokeId++)); + CallInst::Create(PreInvoke, HelperArgs, "", II); + + SmallVector CallArgs(II->op_begin(), II->op_end() - 3); + CallInst *NewCall = CallInst::Create(II->getCalledValue(), + CallArgs, "", II); + NewCall->takeName(II); + NewCall->setCallingConv(II->getCallingConv()); + NewCall->setAttributes(II->getAttributes()); + NewCall->setDebugLoc(II->getDebugLoc()); + II->replaceAllUsesWith(NewCall); + ToErase.push_back(II); + + CallInst *Post = CallInst::Create(PostInvoke, HelperArgs, "", II); + Instruction *Post1 = new TruncInst(Post, i1, "", II); + + // Insert a branch based on the postInvoke + BranchInst::Create(II->getUnwindDest(), II->getNormalDest(), Post1, II); + } else { + // This can't throw, and we don't need this invoke, just replace it with a call+branch + SmallVector CallArgs(II->op_begin(), II->op_end() - 3); + CallInst *NewCall = CallInst::Create(II->getCalledValue(), + CallArgs, "", II); + NewCall->takeName(II); + NewCall->setCallingConv(II->getCallingConv()); + NewCall->setAttributes(II->getAttributes()); + NewCall->setDebugLoc(II->getDebugLoc()); + II->replaceAllUsesWith(NewCall); + ToErase.push_back(II); + + BranchInst::Create(II->getNormalDest(), II); + + // Remove any PHI node entries from the exception destination. + II->getUnwindDest()->removePredecessor(&*BB); + } + + Changed = true; + } + // scan the body of the basic block for resumes + for (BasicBlock::iterator Iter = BB->begin(), E = BB->end(); + Iter != E; ) { + Instruction *I = &*Iter++; + if (ResumeInst *R = dyn_cast(I)) { + // split the input into legal values + Value *Input = R->getValue(); + ExtractValueInst *Low = ExtractValueInst::Create(Input, 0, "", R); + ExtractValueInst *High = ExtractValueInst::Create(Input, 1, "", R); + + // create a resume call + SmallVector CallArgs; + CallArgs.push_back(Low); + CallArgs.push_back(High); + CallInst::Create(Resume, CallArgs, "", R); + + new UnreachableInst(TheModule->getContext(), R); // add a terminator to the block + + ToErase.push_back(R); + } + } + } + + // Look for orphan landingpads, can occur in blocks with no predecesors + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { + Instruction *I = BB->getFirstNonPHI(); + if (LandingPadInst *LP = dyn_cast(I)) { + LandingPads.insert(LP); + } + } + + // Handle all the landingpad for this function together, as multiple invokes may share a single lp + for (std::set::iterator I = LandingPads.begin(); I != LandingPads.end(); I++) { + // Replace the landingpad with a landingpad call to get the low part, and a getHigh for the high + LandingPadInst *LP = *I; + unsigned Num = LP->getNumClauses(); + SmallVector NewLPArgs; + NewLPArgs.push_back(F->getPersonalityFn()); + for (unsigned i = 0; i < Num; i++) { + Value *Arg = LP->getClause(i); + // As a temporary workaround for the lack of aggregate varargs support + // in the varargs lowering code, break out filter operands into their + // component elements. + if (LP->isFilter(i)) { + ArrayType *ATy = cast(Arg->getType()); + for (unsigned elem = 0, elemEnd = ATy->getNumElements(); elem != elemEnd; ++elem) { + Instruction *EE = ExtractValueInst::Create(Arg, makeArrayRef(elem), "", LP); + NewLPArgs.push_back(EE); + } + } else { + NewLPArgs.push_back(Arg); + } + } + NewLPArgs.push_back(LP->isCleanup() ? ConstantInt::getTrue(i1) : ConstantInt::getFalse(i1)); + CallInst *NewLP = CallInst::Create(LandingPad, NewLPArgs, "", LP); + + Instruction *High = CallInst::Create(GetHigh, "", LP); + + // New recreate an aggregate for them, which will be all simplified later (simplification cannot handle landingpad, hence all this) + InsertValueInst *IVA = InsertValueInst::Create(UndefValue::get(LP->getType()), NewLP, 0, "", LP); + InsertValueInst *IVB = InsertValueInst::Create(IVA, High, 1, "", LP); + + LP->replaceAllUsesWith(IVB); + ToErase.push_back(LP); + } + + // erase everything we no longer need in this function + for (unsigned i = 0; i < ToErase.size(); i++) ToErase[i]->eraseFromParent(); + } + + return Changed; +} + +ModulePass *llvm::createLowerEmExceptionsPass() { + return new LowerEmExceptions(); +} + diff --git a/lib/Target/JSBackend/NaCl/LowerEmSetjmp.cpp b/lib/Target/JSBackend/NaCl/LowerEmSetjmp.cpp new file mode 100644 index 000000000000..64500d286804 --- /dev/null +++ b/lib/Target/JSBackend/NaCl/LowerEmSetjmp.cpp @@ -0,0 +1,349 @@ +//===- LowerEmSetjmp - Lower setjmp/longjmp for Emscripten/JS -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Lowers setjmp to a reasonably-performant approach for emscripten. The idea +// is that each block with a setjmp is broken up into the part right after +// the setjmp, and a new basic block is added which is either reached from +// the setjmp, or later from a longjmp. To handle the longjmp, all calls that +// might longjmp are checked immediately afterwards. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/NaCl.h" +#include "llvm/IR/Dominators.h" +#include "llvm/Transforms/Utils/PromoteMemToReg.h" +#include +#include +#include + +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +// Utilities for mem/reg: based on Reg2Mem and MemToReg + +bool valueEscapes(const Instruction *Inst) { + const BasicBlock *BB = Inst->getParent(); + for (Value::const_user_iterator UI = Inst->user_begin(),E = Inst->user_end(); + UI != E; ++UI) { + const User *U = *UI; + const Instruction *I = cast(U); + if (I->getParent() != BB || isa(I)) + return true; + } + return false; +} + +void doRegToMem(Function &F) { // see Reg2Mem.cpp + // Insert all new allocas into entry block. + BasicBlock *BBEntry = &F.getEntryBlock(); + assert(pred_begin(BBEntry) == pred_end(BBEntry) && + "Entry block to function must not have predecessors!"); + + // Find first non-alloca instruction and create insertion point. This is + // safe if block is well-formed: it always have terminator, otherwise + // we'll get and assertion. + BasicBlock::iterator I = BBEntry->begin(); + while (isa(I)) ++I; + + CastInst *AllocaInsertionPoint = + new BitCastInst(Constant::getNullValue(Type::getInt32Ty(F.getContext())), + Type::getInt32Ty(F.getContext()), + "reg2mem alloca point", &*I); + + // Find the escaped instructions. But don't create stack slots for + // allocas in entry block. + std::list WorkList; + for (Function::iterator ibb = F.begin(), ibe = F.end(); + ibb != ibe; ++ibb) + for (BasicBlock::iterator iib = ibb->begin(), iie = ibb->end(); + iib != iie; ++iib) { + if (!(isa(iib) && iib->getParent() == BBEntry) && + valueEscapes(&*iib)) { + WorkList.push_front(&*iib); + } + } + + // Demote escaped instructions + for (std::list::iterator ilb = WorkList.begin(), + ile = WorkList.end(); ilb != ile; ++ilb) + DemoteRegToStack(**ilb, false, AllocaInsertionPoint); + + WorkList.clear(); + + // Find all phi's + for (Function::iterator ibb = F.begin(), ibe = F.end(); + ibb != ibe; ++ibb) + for (BasicBlock::iterator iib = ibb->begin(), iie = ibb->end(); + iib != iie; ++iib) + if (isa(iib)) + WorkList.push_front(&*iib); + + // Demote phi nodes + for (std::list::iterator ilb = WorkList.begin(), + ile = WorkList.end(); ilb != ile; ++ilb) + DemotePHIToStack(cast(*ilb), AllocaInsertionPoint); +} + +void doMemToReg(Function &F) { + std::vector Allocas; + + BasicBlock &BB = F.getEntryBlock(); // Get the entry node for the function + + DominatorTreeWrapperPass DTW; + DTW.runOnFunction(F); + DominatorTree& DT = DTW.getDomTree(); + + while (1) { + Allocas.clear(); + + // Find allocas that are safe to promote, by looking at all instructions in + // the entry node + for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I) + if (AllocaInst *AI = dyn_cast(I)) // Is it an alloca? + if (isAllocaPromotable(AI)) + Allocas.push_back(AI); + + if (Allocas.empty()) break; + + PromoteMemToReg(Allocas, DT); + } +} + +// LowerEmSetjmp + +namespace { + class LowerEmSetjmp : public ModulePass { + Module *TheModule; + + public: + static char ID; // Pass identification, replacement for typeid + explicit LowerEmSetjmp() : ModulePass(ID), TheModule(NULL) { + initializeLowerEmSetjmpPass(*PassRegistry::getPassRegistry()); + } + bool runOnModule(Module &M); + }; +} + +char LowerEmSetjmp::ID = 0; +INITIALIZE_PASS(LowerEmSetjmp, "loweremsetjmp", + "Lower setjmp and longjmp for js/emscripten", + false, false) + +bool LowerEmSetjmp::runOnModule(Module &M) { + TheModule = &M; + + Function *Setjmp = TheModule->getFunction("setjmp"); + Function *Longjmp = TheModule->getFunction("longjmp"); + if (!Setjmp && !Longjmp) return false; + + Type *i32 = Type::getInt32Ty(M.getContext()); + Type *Void = Type::getVoidTy(M.getContext()); + + // Add functions + + Function *EmSetjmp = NULL; + + if (Setjmp) { + SmallVector EmSetjmpTypes; + EmSetjmpTypes.push_back(Setjmp->getFunctionType()->getParamType(0)); + EmSetjmpTypes.push_back(i32); // extra param that says which setjmp in the function it is + FunctionType *EmSetjmpFunc = FunctionType::get(i32, EmSetjmpTypes, false); + EmSetjmp = Function::Create(EmSetjmpFunc, GlobalValue::ExternalLinkage, "emscripten_setjmp", TheModule); + } + + Function *EmLongjmp = Longjmp ? Function::Create(Longjmp->getFunctionType(), GlobalValue::ExternalLinkage, "emscripten_longjmp", TheModule) : NULL; + + SmallVector IntArgTypes; + IntArgTypes.push_back(i32); + FunctionType *IntIntFunc = FunctionType::get(i32, IntArgTypes, false); + FunctionType *VoidIntFunc = FunctionType::get(Void, IntArgTypes, false); + + Function *CheckLongjmp = Function::Create(IntIntFunc, GlobalValue::ExternalLinkage, "emscripten_check_longjmp", TheModule); // gets control flow + + Function *GetLongjmpResult = Function::Create(IntIntFunc, GlobalValue::ExternalLinkage, "emscripten_get_longjmp_result", TheModule); // gets int value longjmp'd + + FunctionType *VoidFunc = FunctionType::get(Void, false); + Function *PrepSetjmp = Function::Create(VoidFunc, GlobalValue::ExternalLinkage, "emscripten_prep_setjmp", TheModule); + + Function *CleanupSetjmp = Function::Create(VoidFunc, GlobalValue::ExternalLinkage, "emscripten_cleanup_setjmp", TheModule); + + Function *PreInvoke = TheModule->getFunction("emscripten_preinvoke"); + if (!PreInvoke) PreInvoke = Function::Create(VoidIntFunc, GlobalValue::ExternalLinkage, "emscripten_preinvoke", TheModule); + + Function *PostInvoke = TheModule->getFunction("emscripten_postinvoke"); + if (!PostInvoke) PostInvoke = Function::Create(IntIntFunc, GlobalValue::ExternalLinkage, "emscripten_postinvoke", TheModule); + + // Process all callers of setjmp and longjmp. Start with setjmp. + + typedef std::vector Phis; + typedef std::map FunctionPhisMap; + FunctionPhisMap SetjmpOutputPhis; + std::vector ToErase; + + if (Setjmp) { + for (Instruction::user_iterator UI = Setjmp->user_begin(), UE = Setjmp->user_end(); UI != UE; ++UI) { + User *U = *UI; + if (CallInst *CI = dyn_cast(U)) { + BasicBlock *SJBB = CI->getParent(); + // The tail is everything right after the call, and will be reached once when setjmp is + // called, and later when longjmp returns to the setjmp + BasicBlock *Tail = SplitBlock(SJBB, CI->getNextNode()); + // Add a phi to the tail, which will be the output of setjmp, which indicates if this is the + // first call or a longjmp back. The phi directly uses the right value based on where we + // arrive from + PHINode *SetjmpOutput = PHINode::Create(i32, 2, "", Tail->getFirstNonPHI()); + SetjmpOutput->addIncoming(ConstantInt::get(i32, 0), SJBB); // setjmp initial call returns 0 + CI->replaceAllUsesWith(SetjmpOutput); // The proper output is now this, not the setjmp call itself + // longjmp returns to the setjmp will add themselves to this phi + Phis& P = SetjmpOutputPhis[SJBB->getParent()]; + P.push_back(SetjmpOutput); + // fix call target + SmallVector Args; + Args.push_back(CI->getArgOperand(0)); + Args.push_back(ConstantInt::get(i32, P.size())); // our index in the function is our place in the array + 1 + CallInst::Create(EmSetjmp, Args, "", CI); + ToErase.push_back(CI); + } else { + errs() << **UI << "\n"; + report_fatal_error("bad use of setjmp, should only call it"); + } + } + } + + // Update longjmp FIXME: we could avoid throwing in longjmp as an optimization when longjmping back into the current function perhaps? + + if (Longjmp) Longjmp->replaceAllUsesWith(EmLongjmp); + + // Update all setjmping functions + + unsigned InvokeId = 0; + + for (FunctionPhisMap::iterator I = SetjmpOutputPhis.begin(); I != SetjmpOutputPhis.end(); I++) { + Function *F = I->first; + Phis& P = I->second; + + CallInst::Create(PrepSetjmp, "", &*F->begin()->begin()); + + // Update each call that can longjmp so it can return to a setjmp where relevant + + for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ) { + BasicBlock *BB = &*BBI++; + for (BasicBlock::iterator Iter = BB->begin(), E = BB->end(); Iter != E; ) { + Instruction *I = &*Iter++; + CallInst *CI; + if ((CI = dyn_cast(I))) { + Value *V = CI->getCalledValue(); + if (V == PrepSetjmp || V == EmSetjmp || V == CheckLongjmp || V == GetLongjmpResult || V == PreInvoke || V == PostInvoke) continue; + if (Function *CF = dyn_cast(V)) if (CF->isIntrinsic()) continue; + // TODO: proper analysis of what can actually longjmp. Currently we assume anything but setjmp can. + // This may longjmp, so we need to check if it did. Split at that point, and + // envelop the call in pre/post invoke, if we need to + CallInst *After; + Instruction *Check = NULL; + if (Iter != E && (After = dyn_cast(Iter)) && After->getCalledValue() == PostInvoke) { + // use the pre|postinvoke that exceptions lowering already made + Check = &*Iter++; + } + BasicBlock *Tail = SplitBlock(BB, &*Iter); // Iter already points to the next instruction, as we need + TerminatorInst *TI = BB->getTerminator(); + if (!Check) { + // no existing pre|postinvoke, create our own + SmallVector HelperArgs; + HelperArgs.push_back(ConstantInt::get(i32, InvokeId++)); + + CallInst::Create(PreInvoke, HelperArgs, "", CI); + Check = CallInst::Create(PostInvoke, HelperArgs, "", TI); // CI is at the end of the block + + // If we are calling a function that is noreturn, we must remove that attribute. The code we + // insert here does expect it to return, after we catch the exception. + if (CI->doesNotReturn()) { + if (Function *F = dyn_cast(CI->getCalledValue())) { + F->removeFnAttr(Attribute::NoReturn); + } + CI->setAttributes(CI->getAttributes().removeAttribute(TheModule->getContext(), AttributeSet::FunctionIndex, Attribute::NoReturn)); + assert(!CI->doesNotReturn()); + } + } + + // We need to replace the terminator in Tail - SplitBlock makes BB go straight to Tail, we need to check if a longjmp occurred, and + // go to the right setjmp-tail if so + SmallVector Args; + Args.push_back(Check); + Instruction *LongjmpCheck = CallInst::Create(CheckLongjmp, Args, "", BB); + Instruction *LongjmpResult = CallInst::Create(GetLongjmpResult, Args, "", BB); + SwitchInst *SI = SwitchInst::Create(LongjmpCheck, Tail, 2, BB); + // -1 means no longjmp happened, continue normally (will hit the default switch case). 0 means a longjmp that is not ours to handle, needs a rethrow. Otherwise + // the index mean is the same as the index in P+1 (to avoid 0). + for (unsigned i = 0; i < P.size(); i++) { + SI->addCase(cast(ConstantInt::get(i32, i+1)), P[i]->getParent()); + P[i]->addIncoming(LongjmpResult, BB); + } + ToErase.push_back(TI); // new terminator is now the switch + + // we are splitting the block here, and must continue to find other calls in the block - which is now split. so continue + // to traverse in the Tail + BB = Tail; + Iter = BB->begin(); + E = BB->end(); + } else if (InvokeInst *CI = dyn_cast(I)) { // XXX check if target is setjmp + (void)CI; + report_fatal_error("TODO: invoke inside setjmping functions"); + } + } + } + + // add a cleanup before each return + for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ) { + BasicBlock *BB = &*BBI++; + TerminatorInst *TI = BB->getTerminator(); + if (isa(TI)) { + CallInst::Create(CleanupSetjmp, "", TI); + } + } + } + + for (unsigned i = 0; i < ToErase.size(); i++) { + ToErase[i]->eraseFromParent(); + } + + // Finally, our modifications to the cfg can break dominance of SSA variables. For example, + // if (x()) { .. setjmp() .. } + // if (y()) { .. longjmp() .. } + // We must split the longjmp block, and it can jump into the setjmp one. But that means that when + // we split the setjmp block, it's first part no longer dominates its second part - there is + // a theoretically possible control flow path where x() is false, then y() is true and we + // reach the second part of the setjmp block, without ever reaching the first part. So, + // we recalculate regs vs. mem + for (FunctionPhisMap::iterator I = SetjmpOutputPhis.begin(); I != SetjmpOutputPhis.end(); I++) { + Function *F = I->first; + doRegToMem(*F); + doMemToReg(*F); + } + + return true; +} + +ModulePass *llvm::createLowerEmSetjmpPass() { + return new LowerEmSetjmp(); +} diff --git a/lib/Target/JSBackend/NaCl/Makefile b/lib/Target/JSBackend/NaCl/Makefile new file mode 100644 index 000000000000..dcb4ab704fb7 --- /dev/null +++ b/lib/Target/JSBackend/NaCl/Makefile @@ -0,0 +1,13 @@ +##===- lib/Target/JSBackend/NaCl/Makefile-------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../../.. +LIBRARYNAME = LLVMPNaClTransforms + +include $(LEVEL)/Makefile.common + diff --git a/lib/Target/JSBackend/NaCl/NoExitRuntime.cpp b/lib/Target/JSBackend/NaCl/NoExitRuntime.cpp new file mode 100644 index 000000000000..aad9f4f42eec --- /dev/null +++ b/lib/Target/JSBackend/NaCl/NoExitRuntime.cpp @@ -0,0 +1,91 @@ +//===- NoExitRuntime.cpp - Expand i64 and wider integer types -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===------------------------------------------------------------------===// +// +//===------------------------------------------------------------------===// + +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/IR/CFG.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Transforms/NaCl.h" +#include "llvm/Transforms/Utils/Local.h" +#include + +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +namespace { + class NoExitRuntime : public ModulePass { + Module *TheModule; + + public: + static char ID; + NoExitRuntime() : ModulePass(ID) { + initializeNoExitRuntimePass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnModule(Module &M); + }; +} + +char NoExitRuntime::ID = 0; +INITIALIZE_PASS(NoExitRuntime, "emscripten-no-exit-runtime", + "Generate code which assumes the runtime is never exited (so atexit etc. is unneeded; see emscripten NO_EXIT_RUNTIME setting)", + false, false) + + +// Implementation of NoExitRuntime + +bool NoExitRuntime::runOnModule(Module &M) { + TheModule = &M; + + Function *AtExit = TheModule->getFunction("__cxa_atexit"); + if (!AtExit || !AtExit->isDeclaration() || AtExit->getNumUses() == 0) return false; + + // The system atexit is used - let's remove calls to it + + Type *i32 = Type::getInt32Ty(TheModule->getContext()); + Value *Zero = Constant::getNullValue(i32); + + std::vector ToErase; + + for (Instruction::user_iterator UI = AtExit->user_begin(), UE = AtExit->user_end(); UI != UE; ++UI) { + if (CallInst *CI = dyn_cast(*UI)) { + if (CI->getCalledValue() == AtExit) { + // calls to atexit can just be removed + CI->replaceAllUsesWith(Zero); + ToErase.push_back(CI); + continue; + } + } + // Possibly other uses of atexit are done - ptrtoint, etc. - but we leave those alone + } + + for (unsigned i = 0; i < ToErase.size(); i++) { + ToErase[i]->eraseFromParent(); + } + + return true; +} + +ModulePass *llvm::createNoExitRuntimePass() { + return new NoExitRuntime(); +} diff --git a/lib/Target/JSBackend/NaCl/NormalizeAlignment.cpp b/lib/Target/JSBackend/NaCl/NormalizeAlignment.cpp new file mode 100644 index 000000000000..d5419bc2caa3 --- /dev/null +++ b/lib/Target/JSBackend/NaCl/NormalizeAlignment.cpp @@ -0,0 +1,86 @@ +//===- NormalizeAlignment.cpp - Normalize Alignment -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Normalize the alignment of loads and stores to better fit the PNaCl ABI: +// +// * On memcpy/memmove/memset intrinsic calls. +// * On regular memory accesses. +// * On atomic memory accesses. +// +//===----------------------------------------------------------------------===// + +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/NaCl.h" + +using namespace llvm; + +namespace { +class NormalizeAlignment : public FunctionPass { +public: + static char ID; + NormalizeAlignment() : FunctionPass(ID) { + initializeNormalizeAlignmentPass(*PassRegistry::getPassRegistry()); + } + bool runOnFunction(Function &F) override; +}; +} + +char NormalizeAlignment::ID = 0; +INITIALIZE_PASS(NormalizeAlignment, "normalize-alignment", + "Normalize the alignment of loads and stores", false, false) + +static unsigned normalizeAlignment(DataLayout *DL, unsigned Alignment, Type *Ty, + bool IsAtomic) { + unsigned MaxAllowed = 1; + if (isa(Ty)) + // Already handled properly by FixVectorLoadStoreAlignment. + return Alignment; + if (Ty->isDoubleTy() || Ty->isFloatTy() || IsAtomic) + MaxAllowed = DL->getTypeAllocSize(Ty); + // If the alignment is set to 0, this means "use the default + // alignment for the target", which we fill in explicitly. + if (Alignment == 0 || Alignment >= MaxAllowed) + return MaxAllowed; + return 1; +} + +bool NormalizeAlignment::runOnFunction(Function &F) { + DataLayout DL(F.getParent()); + bool Modified = false; + + for (BasicBlock &BB : F) { + for (Instruction &I : BB) { + if (auto *MemOp = dyn_cast(&I)) { + Modified = true; + Type *AlignTy = MemOp->getAlignmentCst()->getType(); + MemOp->setAlignment(ConstantInt::get(AlignTy, 1)); + } else if (auto *Load = dyn_cast(&I)) { + Modified = true; + Load->setAlignment(normalizeAlignment( + &DL, Load->getAlignment(), Load->getType(), Load->isAtomic())); + } else if (auto *Store = dyn_cast(&I)) { + Modified = true; + Store->setAlignment(normalizeAlignment( + &DL, Store->getAlignment(), Store->getValueOperand()->getType(), + Store->isAtomic())); + } + } + } + + return Modified; +} + +FunctionPass *llvm::createNormalizeAlignmentPass() { + return new NormalizeAlignment(); +} diff --git a/lib/Target/JSBackend/NaCl/PNaClSjLjEH.cpp b/lib/Target/JSBackend/NaCl/PNaClSjLjEH.cpp new file mode 100644 index 000000000000..1e333ab6b14b --- /dev/null +++ b/lib/Target/JSBackend/NaCl/PNaClSjLjEH.cpp @@ -0,0 +1,465 @@ +//===- PNaClSjLjEH.cpp - Lower C++ exception handling to use setjmp()------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The PNaClSjLjEH pass is part of an implementation of C++ exception +// handling for PNaCl that uses setjmp() and longjmp() to handle C++ +// exceptions. The pass lowers LLVM "invoke" instructions to use +// setjmp(). +// +// For example, consider the following C++ code fragment: +// +// int catcher_func() { +// try { +// int result = external_func(); +// return result + 100; +// } catch (MyException &exc) { +// return exc.value + 200; +// } +// } +// +// PNaClSjLjEH converts the IR for that function to the following +// pseudo-code: +// +// struct LandingPadResult { +// void *exception_obj; // For passing to __cxa_begin_catch() +// int matched_clause_id; // See ExceptionInfoWriter.cpp +// }; +// +// struct ExceptionFrame { +// union { +// jmp_buf jmpbuf; // Context for jumping to landingpad block +// struct LandingPadResult result; // Data returned to landingpad block +// }; +// struct ExceptionFrame *next; // Next frame in linked list +// int clause_list_id; // Reference to landingpad's exception info +// }; +// +// // Thread-local exception state +// __thread struct ExceptionFrame *__pnacl_eh_stack; +// +// int catcher_func() { +// struct ExceptionFrame frame; +// frame.next = __pnacl_eh_stack; +// frame.clause_list_id = 123; +// __pnacl_eh_stack = &frame; // Add frame to stack +// int result; +// if (!catcher_func_setjmp_caller(external_func, &frame.jmpbuf, &result)) { +// __pnacl_eh_stack = frame.next; // Remove frame from stack +// return result + 100; +// } else { +// // Handle exception. This is a simplification. Real code would +// // call __cxa_begin_catch() to extract the thrown object. +// MyException &exc = *(MyException *) frame.result.exception_obj; +// return exc.value + 200; +// } +// } +// +// // Helper function +// static int catcher_func_setjmp_caller(int (*func)(void), jmp_buf jmpbuf, +// int *result) { +// if (!setjmp(jmpbuf)) { +// *result = func(); +// return 0; +// } +// return 1; +// } +// +// We use a helper function so that setjmp() is not called directly +// from catcher_func(), due to a quirk of how setjmp() and longjmp() +// are specified in C. +// +// func() might modify variables (allocas) that are local to +// catcher_func() (if the variables' addresses are taken). The C +// standard says that these variables' values would become undefined +// after longjmp() returned if setjmp() were called from +// catcher_func(). Specifically, LLVM's GVN pass can optimize away +// stores to allocas between setjmp() and longjmp() (see +// pnacl-sjlj-eh-bug.ll for an example). But this only applies to +// allocas inside the caller of setjmp(), not to allocas inside the +// caller of the caller of setjmp(), so doing the setjmp() call inside +// a helper function that catcher_func() calls avoids the problem. +// +// The pass makes the following changes to IR: +// +// * Convert "invoke" and "landingpad" instructions. +// * Convert "resume" instructions into __pnacl_eh_resume() calls. +// * Replace each call to llvm.eh.typeid.for() with an integer +// constant representing the exception type. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/DenseMap.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/NaCl.h" +#include "ExceptionInfoWriter.h" + +using namespace llvm; + +namespace { + // This is a ModulePass so that it can introduce new global variables. + class PNaClSjLjEH : public ModulePass { + public: + static char ID; // Pass identification, replacement for typeid + PNaClSjLjEH() : ModulePass(ID) { + initializePNaClSjLjEHPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnModule(Module &M); + }; + + class FuncRewriter { + Type *ExceptionFrameTy; + ExceptionInfoWriter *ExcInfoWriter; + Function *Func; + + // FrameInitialized indicates whether the following variables have + // been initialized. + bool FrameInitialized; + Function *SetjmpIntrinsic; // setjmp() intrinsic function + Instruction *EHStackTlsVar; // Bitcast of thread-local __pnacl_eh_stack var + Instruction *Frame; // Frame allocated for this function + Instruction *FrameJmpBuf; // Frame's jmp_buf field + Instruction *FrameNextPtr; // Frame's next field + Instruction *FrameExcInfo; // Frame's clause_list_id field + + Function *EHResumeFunc; // __pnacl_eh_resume() function + + // Initialize values that are shared across all "invoke" + // instructions within the function. + void initializeFrame(); + + public: + FuncRewriter(Type *ExceptionFrameTy, ExceptionInfoWriter *ExcInfoWriter, + Function *Func): + ExceptionFrameTy(ExceptionFrameTy), + ExcInfoWriter(ExcInfoWriter), + Func(Func), + FrameInitialized(false), + SetjmpIntrinsic(NULL), EHStackTlsVar(NULL), + Frame(NULL), FrameJmpBuf(NULL), FrameNextPtr(NULL), FrameExcInfo(NULL), + EHResumeFunc(NULL) {} + + Value *createSetjmpWrappedCall(InvokeInst *Invoke); + void expandInvokeInst(InvokeInst *Invoke); + void expandResumeInst(ResumeInst *Resume); + void expandFunc(); + }; +} + +char PNaClSjLjEH::ID = 0; +INITIALIZE_PASS(PNaClSjLjEH, "pnacl-sjlj-eh", + "Lower C++ exception handling to use setjmp()", + false, false) + +static const int kPNaClJmpBufSize = 1024; +static const int kPNaClJmpBufAlign = 8; + +void FuncRewriter::initializeFrame() { + if (FrameInitialized) + return; + FrameInitialized = true; + Module *M = Func->getParent(); + + SetjmpIntrinsic = Intrinsic::getDeclaration(M, Intrinsic::nacl_setjmp); + + Value *EHStackTlsVarUncast = M->getGlobalVariable("__pnacl_eh_stack"); + if (!EHStackTlsVarUncast) + report_fatal_error("__pnacl_eh_stack not defined"); + EHStackTlsVar = new BitCastInst( + EHStackTlsVarUncast, ExceptionFrameTy->getPointerTo()->getPointerTo(), + "pnacl_eh_stack"); + Func->getEntryBlock().getInstList().push_front(EHStackTlsVar); + + // Allocate the new exception frame. This is reused across all + // invoke instructions in the function. + Type *I32 = Type::getInt32Ty(M->getContext()); + Frame = new AllocaInst(ExceptionFrameTy, ConstantInt::get(I32, 1), + kPNaClJmpBufAlign, "invoke_frame"); + Func->getEntryBlock().getInstList().push_front(Frame); + + // Calculate addresses of fields in the exception frame. + Value *JmpBufIndexes[] = { ConstantInt::get(I32, 0), + ConstantInt::get(I32, 0), + ConstantInt::get(I32, 0) }; + FrameJmpBuf = GetElementPtrInst::Create( + ExceptionFrameTy, Frame, JmpBufIndexes, "invoke_jmp_buf"); + FrameJmpBuf->insertAfter(Frame); + + Value *NextPtrIndexes[] = { ConstantInt::get(I32, 0), + ConstantInt::get(I32, 1) }; + FrameNextPtr = GetElementPtrInst::Create( + ExceptionFrameTy, Frame, NextPtrIndexes, "invoke_next"); + FrameNextPtr->insertAfter(Frame); + + Value *ExcInfoIndexes[] = { ConstantInt::get(I32, 0), + ConstantInt::get(I32, 2) }; + FrameExcInfo = GetElementPtrInst::Create( + ExceptionFrameTy, Frame, ExcInfoIndexes, "exc_info_ptr"); + FrameExcInfo->insertAfter(Frame); +} + +// Creates the helper function that will do the setjmp() call and +// function call for implementing Invoke. Creates the call to the +// helper function. Returns a Value which is zero on the normal +// execution path and non-zero if the landingpad block should be +// entered. +Value *FuncRewriter::createSetjmpWrappedCall(InvokeInst *Invoke) { + Type *I32 = Type::getInt32Ty(Func->getContext()); + + // Allocate space for storing the invoke's result temporarily (so + // that the helper function can return multiple values). We don't + // need to do this if the result is unused, and we can't if its type + // is void. + Instruction *ResultAlloca = NULL; + if (!Invoke->use_empty()) { + ResultAlloca = new AllocaInst(Invoke->getType(), "invoke_result_ptr"); + Func->getEntryBlock().getInstList().push_front(ResultAlloca); + } + + // Create type for the helper function. + SmallVector ArgTypes; + for (unsigned I = 0, E = Invoke->getNumArgOperands(); I < E; ++I) + ArgTypes.push_back(Invoke->getArgOperand(I)->getType()); + ArgTypes.push_back(Invoke->getCalledValue()->getType()); + ArgTypes.push_back(FrameJmpBuf->getType()); + if (ResultAlloca) + ArgTypes.push_back(Invoke->getType()->getPointerTo()); + FunctionType *FTy = FunctionType::get(I32, ArgTypes, false); + + // Create the helper function. + Function *HelperFunc = Function::Create( + FTy, GlobalValue::InternalLinkage, Func->getName() + "_setjmp_caller"); + Func->getParent()->getFunctionList().insertAfter(Func->getIterator(), HelperFunc); + BasicBlock *EntryBB = BasicBlock::Create(Func->getContext(), "", HelperFunc); + BasicBlock *NormalBB = BasicBlock::Create(Func->getContext(), "normal", + HelperFunc); + BasicBlock *ExceptionBB = BasicBlock::Create(Func->getContext(), "exception", + HelperFunc); + + // Unpack the helper function's arguments. + Function::arg_iterator ArgIter = HelperFunc->arg_begin(); + SmallVector InnerCallArgs; + for (unsigned I = 0, E = Invoke->getNumArgOperands(); I < E; ++I) { + ArgIter->setName("arg"); + InnerCallArgs.push_back(&*ArgIter++); + } + Argument *CalleeArg = &*ArgIter++; + Argument *JmpBufArg = &*ArgIter++; + CalleeArg->setName("func_ptr"); + JmpBufArg->setName("jmp_buf"); + + // Create setjmp() call. + Value *SetjmpArgs[] = { JmpBufArg }; + CallInst *SetjmpCall = CallInst::Create(SetjmpIntrinsic, SetjmpArgs, + "invoke_sj", EntryBB); + CopyDebug(SetjmpCall, Invoke); + // Setting the "returns_twice" attribute here prevents optimization + // passes from inlining HelperFunc into its caller. + SetjmpCall->setCanReturnTwice(); + // Check setjmp()'s result. + Value *IsZero = CopyDebug(new ICmpInst(*EntryBB, CmpInst::ICMP_EQ, SetjmpCall, + ConstantInt::get(I32, 0), + "invoke_sj_is_zero"), Invoke); + CopyDebug(BranchInst::Create(NormalBB, ExceptionBB, IsZero, EntryBB), Invoke); + // Handle the normal, non-exceptional code path. + CallInst *InnerCall = CallInst::Create(CalleeArg, InnerCallArgs, "", + NormalBB); + CopyDebug(InnerCall, Invoke); + InnerCall->setAttributes(Invoke->getAttributes()); + InnerCall->setCallingConv(Invoke->getCallingConv()); + if (ResultAlloca) { + InnerCall->setName("result"); + Argument *ResultArg = &*ArgIter++; + ResultArg->setName("result_ptr"); + CopyDebug(new StoreInst(InnerCall, ResultArg, NormalBB), Invoke); + } + ReturnInst::Create(Func->getContext(), ConstantInt::get(I32, 0), NormalBB); + // Handle the exceptional code path. + ReturnInst::Create(Func->getContext(), ConstantInt::get(I32, 1), ExceptionBB); + + // Create the outer call to the helper function. + SmallVector OuterCallArgs; + for (unsigned I = 0, E = Invoke->getNumArgOperands(); I < E; ++I) + OuterCallArgs.push_back(Invoke->getArgOperand(I)); + OuterCallArgs.push_back(Invoke->getCalledValue()); + OuterCallArgs.push_back(FrameJmpBuf); + if (ResultAlloca) + OuterCallArgs.push_back(ResultAlloca); + CallInst *OuterCall = CallInst::Create(HelperFunc, OuterCallArgs, + "invoke_is_exc", Invoke); + CopyDebug(OuterCall, Invoke); + + // Retrieve the function return value stored in the alloca. We only + // need to do this on the non-exceptional path, but we currently do + // it unconditionally because that is simpler. + if (ResultAlloca) { + Value *Result = new LoadInst(ResultAlloca, "", Invoke); + Result->takeName(Invoke); + Invoke->replaceAllUsesWith(Result); + } + return OuterCall; +} + +static void convertInvokeToCall(InvokeInst *Invoke) { + SmallVector CallArgs(Invoke->op_begin(), Invoke->op_end() - 3); + // Insert a normal call instruction. + CallInst *NewCall = CallInst::Create(Invoke->getCalledValue(), + CallArgs, "", Invoke); + CopyDebug(NewCall, Invoke); + NewCall->takeName(Invoke); + NewCall->setCallingConv(Invoke->getCallingConv()); + NewCall->setAttributes(Invoke->getAttributes()); + Invoke->replaceAllUsesWith(NewCall); + + // Insert an unconditional branch to the normal destination. + BranchInst::Create(Invoke->getNormalDest(), Invoke); + // Remove any PHI node entries from the exception destination. + Invoke->getUnwindDest()->removePredecessor(Invoke->getParent()); + Invoke->eraseFromParent(); +} + +void FuncRewriter::expandInvokeInst(InvokeInst *Invoke) { + // Calls to ReturnsTwice functions, i.e. setjmp(), can't be moved + // into a helper function. setjmp() can't throw an exception + // anyway, so convert the invoke to a call. + if (Invoke->hasFnAttr(Attribute::ReturnsTwice)) { + convertInvokeToCall(Invoke); + return; + } + + initializeFrame(); + + LandingPadInst *LP = Invoke->getLandingPadInst(); + Type *I32 = Type::getInt32Ty(Func->getContext()); + Value *ExcInfo = ConstantInt::get( + I32, ExcInfoWriter->getIDForLandingPadClauseList(LP)); + + // Append the new frame to the list. + Value *OldList = CopyDebug( + new LoadInst(EHStackTlsVar, "old_eh_stack", Invoke), Invoke); + CopyDebug(new StoreInst(OldList, FrameNextPtr, Invoke), Invoke); + CopyDebug(new StoreInst(ExcInfo, FrameExcInfo, Invoke), Invoke); + CopyDebug(new StoreInst(Frame, EHStackTlsVar, Invoke), Invoke); + Value *IsException = createSetjmpWrappedCall(Invoke); + // Restore the old frame list. We only need to do this on the + // non-exception code path, but we currently do it unconditionally + // because that is simpler. (The PNaCl C++ runtime library restores + // the old frame list on the exceptional path; doing it again here + // redundantly is OK.) + CopyDebug(new StoreInst(OldList, EHStackTlsVar, Invoke), Invoke); + + Value *IsZero = CopyDebug(new ICmpInst(Invoke, CmpInst::ICMP_EQ, IsException, + ConstantInt::get(I32, 0), + "invoke_sj_is_zero"), Invoke); + CopyDebug(BranchInst::Create(Invoke->getNormalDest(), Invoke->getUnwindDest(), + IsZero, Invoke), + Invoke); + + Invoke->eraseFromParent(); +} + +void FuncRewriter::expandResumeInst(ResumeInst *Resume) { + if (!EHResumeFunc) { + EHResumeFunc = Func->getParent()->getFunction("__pnacl_eh_resume"); + if (!EHResumeFunc) + report_fatal_error("__pnacl_eh_resume() not defined"); + } + + // The "resume" instruction gets passed the landingpad's full result + // (struct LandingPadResult above). Extract the exception_obj field + // to pass to __pnacl_eh_resume(), which doesn't need the + // matched_clause_id field. + unsigned Indexes[] = { 0 }; + Value *ExceptionPtr = + CopyDebug(ExtractValueInst::Create(Resume->getValue(), Indexes, + "resume_exc", Resume), Resume); + + // Cast to the pointer type that __pnacl_eh_resume() expects. + if (EHResumeFunc->getFunctionType()->getFunctionNumParams() != 1) + report_fatal_error("Bad type for __pnacl_eh_resume()"); + Type *ArgType = EHResumeFunc->getFunctionType()->getFunctionParamType(0); + ExceptionPtr = new BitCastInst(ExceptionPtr, ArgType, "resume_cast", Resume); + + Value *Args[] = { ExceptionPtr }; + CopyDebug(CallInst::Create(EHResumeFunc, Args, "", Resume), Resume); + new UnreachableInst(Func->getContext(), Resume); + Resume->eraseFromParent(); +} + +void FuncRewriter::expandFunc() { + Type *I32 = Type::getInt32Ty(Func->getContext()); + + // We need to do two passes: When we process an invoke we need to + // look at its landingpad, so we can't remove the landingpads until + // all the invokes have been processed. + for (Function::iterator BB = Func->begin(), E = Func->end(); BB != E; ++BB) { + for (BasicBlock::iterator Iter = BB->begin(), E = BB->end(); Iter != E; ) { + Instruction *Inst = &*Iter++; + if (InvokeInst *Invoke = dyn_cast(Inst)) { + expandInvokeInst(Invoke); + } else if (ResumeInst *Resume = dyn_cast(Inst)) { + expandResumeInst(Resume); + } else if (IntrinsicInst *Intrinsic = dyn_cast(Inst)) { + if (Intrinsic->getIntrinsicID() == Intrinsic::eh_typeid_for) { + Value *ExcType = Intrinsic->getArgOperand(0); + Value *Val = ConstantInt::get( + I32, ExcInfoWriter->getIDForExceptionType(ExcType)); + Intrinsic->replaceAllUsesWith(Val); + Intrinsic->eraseFromParent(); + } + } + } + } + for (Function::iterator BB = Func->begin(), E = Func->end(); BB != E; ++BB) { + for (BasicBlock::iterator Iter = BB->begin(), E = BB->end(); Iter != E; ) { + Instruction *Inst = &*Iter++; + if (LandingPadInst *LP = dyn_cast(Inst)) { + initializeFrame(); + Value *LPPtr = new BitCastInst( + FrameJmpBuf, LP->getType()->getPointerTo(), "landingpad_ptr", LP); + Value *LPVal = CopyDebug(new LoadInst(LPPtr, "", LP), LP); + LPVal->takeName(LP); + LP->replaceAllUsesWith(LPVal); + LP->eraseFromParent(); + } + } + } +} + +bool PNaClSjLjEH::runOnModule(Module &M) { + Type *JmpBufTy = ArrayType::get(Type::getInt8Ty(M.getContext()), + kPNaClJmpBufSize); + + // Define "struct ExceptionFrame". + StructType *ExceptionFrameTy = StructType::create(M.getContext(), + "ExceptionFrame"); + Type *ExceptionFrameFields[] = { + JmpBufTy, // jmp_buf + ExceptionFrameTy->getPointerTo(), // struct ExceptionFrame *next + Type::getInt32Ty(M.getContext()) // Exception info (clause list ID) + }; + ExceptionFrameTy->setBody(ExceptionFrameFields); + + ExceptionInfoWriter ExcInfoWriter(&M.getContext()); + for (Module::iterator Func = M.begin(), E = M.end(); Func != E; ++Func) { + FuncRewriter Rewriter(ExceptionFrameTy, &ExcInfoWriter, &*Func); + Rewriter.expandFunc(); + } + ExcInfoWriter.defineGlobalVariables(&M); + return true; +} + +ModulePass *llvm::createPNaClSjLjEHPass() { + return new PNaClSjLjEH(); +} diff --git a/lib/Target/JSBackend/NaCl/PromoteI1Ops.cpp b/lib/Target/JSBackend/NaCl/PromoteI1Ops.cpp new file mode 100644 index 000000000000..2bb23b217a52 --- /dev/null +++ b/lib/Target/JSBackend/NaCl/PromoteI1Ops.cpp @@ -0,0 +1,170 @@ +//===- PromoteI1Ops.cpp - Promote various operations on the i1 type--------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass expands out various operations on the i1 type so that +// these i1 operations do not need to be supported by the PNaCl +// translator. +// +// This is similar to the PromoteIntegers pass in that it removes uses +// of an unusual-size integer type. The difference is that i1 remains +// a valid type in other operations. i1 can still be used in phi +// nodes, "select" instructions, in "sext" and "zext", and so on. In +// contrast, the integer types that PromoteIntegers removes are not +// allowed in any context by PNaCl's ABI verifier. +// +// This pass expands out the following: +// +// * i1 loads and stores. +// * All i1 comparisons and arithmetic operations, with the exception +// of "and", "or" and "xor", because these are used in practice and +// don't overflow. +// +//===----------------------------------------------------------------------===// + +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/NaCl.h" + +using namespace llvm; + +namespace { + class PromoteI1Ops : public BasicBlockPass { + public: + static char ID; // Pass identification, replacement for typeid + PromoteI1Ops() : BasicBlockPass(ID) { + initializePromoteI1OpsPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnBasicBlock(BasicBlock &BB); + }; +} + +char PromoteI1Ops::ID = 0; +INITIALIZE_PASS(PromoteI1Ops, "nacl-promote-i1-ops", + "Promote various operations on the i1 type", + false, false) + +static Value *promoteValue(Value *Val, bool SignExt, Instruction *InsertPt) { + Instruction::CastOps CastType = + SignExt ? Instruction::SExt : Instruction::ZExt; + return CopyDebug(CastInst::Create(CastType, Val, + Type::getInt8Ty(Val->getContext()), + Val->getName() + ".expand_i1_val", + InsertPt), InsertPt); +} + +bool PromoteI1Ops::runOnBasicBlock(BasicBlock &BB) { + bool Changed = false; + + Type *I1Ty = Type::getInt1Ty(BB.getContext()); + Type *I8Ty = Type::getInt8Ty(BB.getContext()); + + // Rewrite boolean Switch terminators: + if (SwitchInst *Switch = dyn_cast(BB.getTerminator())) { + Value *Condition = Switch->getCondition(); + Type *ConditionTy = Condition->getType(); + if (ConditionTy->isIntegerTy(1)) { + ConstantInt *False = + cast(ConstantInt::getFalse(ConditionTy)); + ConstantInt *True = + cast(ConstantInt::getTrue(ConditionTy)); + + SwitchInst::CaseIt FalseCase = Switch->findCaseValue(False); + SwitchInst::CaseIt TrueCase = Switch->findCaseValue(True); + + BasicBlock *FalseBlock = FalseCase.getCaseSuccessor(); + BasicBlock *TrueBlock = TrueCase.getCaseSuccessor(); + BasicBlock *DefaultDest = Switch->getDefaultDest(); + + if (TrueBlock && FalseBlock) { + // impossible destination + DefaultDest->removePredecessor(Switch->getParent()); + } + + if (!TrueBlock) { + TrueBlock = DefaultDest; + } + if (!FalseBlock) { + FalseBlock = DefaultDest; + } + + CopyDebug(BranchInst::Create(TrueBlock, FalseBlock, Condition, Switch), + Switch); + Switch->eraseFromParent(); + } + } + + for (BasicBlock::iterator Iter = BB.begin(), E = BB.end(); Iter != E; ) { + Instruction *Inst = &*Iter++; + if (LoadInst *Load = dyn_cast(Inst)) { + if (Load->getType() == I1Ty) { + Changed = true; + Value *Ptr = CopyDebug( + new BitCastInst( + Load->getPointerOperand(), I8Ty->getPointerTo(), + Load->getPointerOperand()->getName() + ".i8ptr", Load), Load); + LoadInst *NewLoad = new LoadInst( + Ptr, Load->getName() + ".pre_trunc", Load); + CopyDebug(NewLoad, Load); + CopyLoadOrStoreAttrs(NewLoad, Load); + Value *Result = CopyDebug(new TruncInst(NewLoad, I1Ty, "", Load), Load); + Result->takeName(Load); + Load->replaceAllUsesWith(Result); + Load->eraseFromParent(); + } + } else if (StoreInst *Store = dyn_cast(Inst)) { + if (Store->getValueOperand()->getType() == I1Ty) { + Changed = true; + Value *Ptr = CopyDebug( + new BitCastInst( + Store->getPointerOperand(), I8Ty->getPointerTo(), + Store->getPointerOperand()->getName() + ".i8ptr", Store), + Store); + Value *Val = promoteValue(Store->getValueOperand(), false, Store); + StoreInst *NewStore = new StoreInst(Val, Ptr, Store); + CopyDebug(NewStore, Store); + CopyLoadOrStoreAttrs(NewStore, Store); + Store->eraseFromParent(); + } + } else if (BinaryOperator *Op = dyn_cast(Inst)) { + if (Op->getType() == I1Ty && + !(Op->getOpcode() == Instruction::And || + Op->getOpcode() == Instruction::Or || + Op->getOpcode() == Instruction::Xor)) { + Value *Arg1 = promoteValue(Op->getOperand(0), false, Op); + Value *Arg2 = promoteValue(Op->getOperand(1), false, Op); + Value *NewOp = CopyDebug( + BinaryOperator::Create( + Op->getOpcode(), Arg1, Arg2, + Op->getName() + ".pre_trunc", Op), Op); + Value *Result = CopyDebug(new TruncInst(NewOp, I1Ty, "", Op), Op); + Result->takeName(Op); + Op->replaceAllUsesWith(Result); + Op->eraseFromParent(); + } + } else if (ICmpInst *Op = dyn_cast(Inst)) { + if (Op->getOperand(0)->getType() == I1Ty) { + Value *Arg1 = promoteValue(Op->getOperand(0), Op->isSigned(), Op); + Value *Arg2 = promoteValue(Op->getOperand(1), Op->isSigned(), Op); + Value *Result = CopyDebug( + new ICmpInst(Op, Op->getPredicate(), Arg1, Arg2, ""), Op); + Result->takeName(Op); + Op->replaceAllUsesWith(Result); + Op->eraseFromParent(); + } + } + } + return Changed; +} + +BasicBlockPass *llvm::createPromoteI1OpsPass() { + return new PromoteI1Ops(); +} diff --git a/lib/Target/JSBackend/NaCl/PromoteIntegers.cpp b/lib/Target/JSBackend/NaCl/PromoteIntegers.cpp new file mode 100644 index 000000000000..761f409b33a6 --- /dev/null +++ b/lib/Target/JSBackend/NaCl/PromoteIntegers.cpp @@ -0,0 +1,737 @@ +//===- PromoteIntegers.cpp - Promote illegal integers for PNaCl ABI -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +// A limited set of transformations to promote illegal-sized int types. +// +//===----------------------------------------------------------------------===// +// +// Legal sizes are currently 1, 8, and large power-of-two sizes. Operations on +// illegal integers are changed to operate on the next-higher legal size. +// +// It maintains no invariants about the upper bits (above the size of the +// original type); therefore before operations which can be affected by the +// value of these bits (e.g. cmp, select, lshr), the upper bits of the operands +// are cleared. +// +// Limitations: +// 1) It can't change function signatures or global variables +// 2) Doesn't handle arrays or structs with illegal types +// 3) Doesn't handle constant expressions (it also doesn't produce them, so it +// can run after ExpandConstantExpr) +// +//===----------------------------------------------------------------------===// + +#include "SimplifiedFuncTypeMap.h" + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/NaCl.h" + +using namespace llvm; + +static Type *getPromotedType(Type *Ty); + +namespace { + +class TypeMap : public SimplifiedFuncTypeMap { +protected: + MappingResult getSimpleFuncType(LLVMContext &Ctx, StructMap &Tentatives, + FunctionType *OldFnTy) override { + ParamTypeVector NewArgTypes; + + auto Ret = getPromotedArgType(Ctx, OldFnTy->getReturnType(), Tentatives); + bool Changed = Ret.isChanged(); + for (auto &ArgTy : OldFnTy->params()) { + auto NewArgTy = getPromotedArgType(Ctx, ArgTy, Tentatives); + NewArgTypes.push_back(NewArgTy); + Changed |= NewArgTy.isChanged(); + } + + auto *NewFctType = FunctionType::get(Ret, NewArgTypes, OldFnTy->isVarArg()); + return {NewFctType, Changed}; + } + +private: + MappingResult getPromotedArgType(LLVMContext &Ctx, Type *Ty, + StructMap &Tentatives) { + if (Ty->isIntegerTy()) { + auto *NTy = getPromotedType(Ty); + return {NTy, NTy != Ty}; + } + return getSimpleAggregateTypeInternal(Ctx, Ty, Tentatives); + } +}; + +class PromoteIntegers : public ModulePass { +public: + static char ID; + + PromoteIntegers() : ModulePass(ID) { + initializePromoteIntegersPass(*PassRegistry::getPassRegistry()); + } + + bool runOnModule(Module &M) override; + +private: + typedef DenseMap DebugMap; + TypeMap TypeMapper; + + bool ensureCompliantSignature(LLVMContext &Ctx, Function *OldFct, Module &M); +}; +} // anonymous namespace + +char PromoteIntegers::ID = 0; + +INITIALIZE_PASS(PromoteIntegers, "nacl-promote-ints", + "Promote integer types which are illegal in PNaCl", false, + false) + +static bool isLegalSize(unsigned Size) { + return Size == 1 || (Size >= 8 && isPowerOf2_32(Size)); +} + +static Type *getPromotedIntType(IntegerType *Ty) { + auto Width = Ty->getBitWidth(); + if (isLegalSize(Width)) + return Ty; + assert(Width < (1ull << (sizeof(Width) * CHAR_BIT - 1)) && + "width can't be rounded to the next power of two"); + return IntegerType::get(Ty->getContext(), + Width < 8 ? 8 : NextPowerOf2(Width)); +} + +// Return a legal integer type, promoting to a larger size if necessary. +static Type *getPromotedType(Type *Ty) { + assert(isa(Ty) && "Trying to convert a non-integer type"); + return getPromotedIntType(cast(Ty)); +} + +// Return true if Val is an int which should be converted. +static bool shouldConvert(Value *Val) { + if (auto *ITy = dyn_cast(Val->getType())) + return !isLegalSize(ITy->getBitWidth()); + return false; +} + +// Return a constant which has been promoted to a legal size. +static Value *convertConstant(Constant *C, bool SignExt) { + assert(shouldConvert(C)); + Type *ProTy = getPromotedType(C->getType()); + // ConstantExpr of a Constant yields a Constant, not a ConstantExpr. + return SignExt ? ConstantExpr::getSExt(C, ProTy) + : ConstantExpr::getZExt(C, ProTy); +} + +namespace { +// Holds the state for converting/replacing values. Conversion is done in one +// pass, with each value requiring conversion possibly having two stages. When +// an instruction needs to be replaced (i.e. it has illegal operands or result) +// a new instruction is created, and the pass calls getConverted to get its +// operands. If the original operand has already been converted, the new value +// is returned. Otherwise, a placeholder is created and used in the new +// instruction. After a new instruction is created to replace an illegal one, +// recordConverted is called to register the replacement. All users are updated, +// and if there is a placeholder, its users are also updated. +// +// recordConverted also queues the old value for deletion. +// +// This strategy avoids the need for recursion or worklists for conversion. +class ConversionState { +public: + // Return the promoted value for Val. If Val has not yet been converted, + // return a placeholder, which will be converted later. + Value *getConverted(Value *Val) { + if (!shouldConvert(Val)) + return Val; + if (isa(Val)) + report_fatal_error("Can't convert illegal GlobalVariables"); + if (RewrittenMap.count(Val)) + return RewrittenMap[Val]; + + // Directly convert constants. + if (auto *C = dyn_cast(Val)) + return convertConstant(C, /*SignExt=*/false); + + // No converted value available yet, so create a placeholder. + auto *P = new Argument(getPromotedType(Val->getType())); + + RewrittenMap[Val] = P; + Placeholders[Val] = P; + return P; + } + + // Replace the uses of From with To, replace the uses of any + // placeholders for From, and optionally give From's name to To. + // Also mark To for deletion. + void recordConverted(Instruction *From, Value *To, bool TakeName = true) { + ToErase.push_back(From); + if (!shouldConvert(From)) { + // From does not produce an illegal value, update its users in place. + From->replaceAllUsesWith(To); + } else { + // From produces an illegal value, so its users will be replaced. When + // replacements are created they will use values returned by getConverted. + if (Placeholders.count(From)) { + // Users of the placeholder can be updated in place. + Placeholders[From]->replaceAllUsesWith(To); + Placeholders.erase(From); + } + RewrittenMap[From] = To; + } + if (TakeName) { + To->takeName(From); + } + } + + void eraseReplacedInstructions() { + for (Instruction *E : ToErase) + E->dropAllReferences(); + for (Instruction *E : ToErase) + E->eraseFromParent(); + } + +private: + // Maps illegal values to their new converted values (or placeholders + // if no new value is available yet) + DenseMap RewrittenMap; + // Maps illegal values with no conversion available yet to their placeholders + DenseMap Placeholders; + // Illegal values which have already been converted, will be erased. + SmallVector ToErase; +}; +} // anonymous namespace + +// Create a BitCast instruction from the original Value being cast. These +// instructions aren't replaced by convertInstruction because they are pointer +// types (which are always valid), but their uses eventually lead to an invalid +// type. +static Value *CreateBitCast(IRBuilder<> *IRB, Value *From, Type *ToTy, + const Twine &Name) { + if (auto *BC = dyn_cast(From)) + return CreateBitCast(IRB, BC->getOperand(0), ToTy, Name); + return IRB->CreateBitCast(From, ToTy, Name); +} + +// Split an illegal load into multiple legal loads and return the resulting +// promoted value. The size of the load is assumed to be a multiple of 8. +// +// \param BaseAlign Alignment of the base load. +// \param Offset Offset from the base load. +static Value *splitLoad(DataLayout *DL, LoadInst *Inst, ConversionState &State, + unsigned BaseAlign, unsigned Offset) { + if (Inst->isVolatile() || Inst->isAtomic()) + report_fatal_error("Can't split volatile/atomic loads"); + if (DL->getTypeSizeInBits(Inst->getType()) % 8 != 0) + report_fatal_error("Loads must be a multiple of 8 bits"); + + auto *OrigPtr = State.getConverted(Inst->getPointerOperand()); + // OrigPtr is a placeholder in recursive calls, and so has no name. + if (OrigPtr->getName().empty()) + OrigPtr->setName(Inst->getPointerOperand()->getName()); + unsigned Width = DL->getTypeSizeInBits(Inst->getType()); + auto *NewType = getPromotedType(Inst->getType()); + unsigned LoWidth = PowerOf2Floor(Width); + assert(isLegalSize(LoWidth)); + + auto *LoType = IntegerType::get(Inst->getContext(), LoWidth); + auto *HiType = IntegerType::get(Inst->getContext(), Width - LoWidth); + IRBuilder<> IRB(Inst); + + auto *BCLo = CreateBitCast(&IRB, OrigPtr, LoType->getPointerTo(), + OrigPtr->getName() + ".loty"); + auto *LoadLo = IRB.CreateAlignedLoad(BCLo, MinAlign(BaseAlign, Offset), + Inst->getName() + ".lo"); + auto *LoExt = IRB.CreateZExt(LoadLo, NewType, LoadLo->getName() + ".ext"); + auto *GEPHi = IRB.CreateConstGEP1_32(BCLo, 1, OrigPtr->getName() + ".hi"); + auto *BCHi = CreateBitCast(&IRB, GEPHi, HiType->getPointerTo(), + OrigPtr->getName() + ".hity"); + + auto HiOffset = (Offset + LoWidth) / CHAR_BIT; + auto *LoadHi = IRB.CreateAlignedLoad(BCHi, MinAlign(BaseAlign, HiOffset), + Inst->getName() + ".hi"); + auto *Hi = !isLegalSize(Width - LoWidth) + ? splitLoad(DL, LoadHi, State, BaseAlign, HiOffset) + : LoadHi; + + auto *HiExt = IRB.CreateZExt(Hi, NewType, Hi->getName() + ".ext"); + auto *HiShift = IRB.CreateShl(HiExt, LoWidth, HiExt->getName() + ".sh"); + auto *Result = IRB.CreateOr(LoExt, HiShift); + + State.recordConverted(Inst, Result); + + return Result; +} + +static Value *splitStore(DataLayout *DL, StoreInst *Inst, + ConversionState &State, unsigned BaseAlign, + unsigned Offset) { + if (Inst->isVolatile() || Inst->isAtomic()) + report_fatal_error("Can't split volatile/atomic stores"); + if (DL->getTypeSizeInBits(Inst->getValueOperand()->getType()) % 8 != 0) + report_fatal_error("Stores must be a multiple of 8 bits"); + + auto *OrigPtr = State.getConverted(Inst->getPointerOperand()); + // OrigPtr is now a placeholder in recursive calls, and so has no name. + if (OrigPtr->getName().empty()) + OrigPtr->setName(Inst->getPointerOperand()->getName()); + auto *OrigVal = State.getConverted(Inst->getValueOperand()); + unsigned Width = DL->getTypeSizeInBits(Inst->getValueOperand()->getType()); + unsigned LoWidth = PowerOf2Floor(Width); + assert(isLegalSize(LoWidth)); + + auto *LoType = IntegerType::get(Inst->getContext(), LoWidth); + auto *HiType = IntegerType::get(Inst->getContext(), Width - LoWidth); + IRBuilder<> IRB(Inst); + + auto *BCLo = CreateBitCast(&IRB, OrigPtr, LoType->getPointerTo(), + OrigPtr->getName() + ".loty"); + auto *LoTrunc = IRB.CreateTrunc(OrigVal, LoType, OrigVal->getName() + ".lo"); + IRB.CreateAlignedStore(LoTrunc, BCLo, MinAlign(BaseAlign, Offset)); + + auto HiOffset = (Offset + LoWidth) / CHAR_BIT; + auto *HiLShr = + IRB.CreateLShr(OrigVal, LoWidth, OrigVal->getName() + ".hi.sh"); + auto *GEPHi = IRB.CreateConstGEP1_32(BCLo, 1, OrigPtr->getName() + ".hi"); + auto *HiTrunc = IRB.CreateTrunc(HiLShr, HiType, OrigVal->getName() + ".hi"); + auto *BCHi = CreateBitCast(&IRB, GEPHi, HiType->getPointerTo(), + OrigPtr->getName() + ".hity"); + + auto *StoreHi = + IRB.CreateAlignedStore(HiTrunc, BCHi, MinAlign(BaseAlign, HiOffset)); + Value *Hi = StoreHi; + + if (!isLegalSize(Width - LoWidth)) { + // HiTrunc is still illegal, and is redundant with the truncate in the + // recursive call, so just get rid of it. If HiTrunc is a constant then the + // IRB will have just returned a shifted, truncated constant, which is + // already uniqued (and does not need to be RAUWed), and recordConverted + // expects constants. + if (!isa(HiTrunc)) + State.recordConverted(cast(HiTrunc), HiLShr, + /*TakeName=*/false); + Hi = splitStore(DL, StoreHi, State, BaseAlign, HiOffset); + } + State.recordConverted(Inst, Hi, /*TakeName=*/false); + return Hi; +} + +// Return a converted value with the bits of the operand above the size of the +// original type cleared. +static Value *getClearConverted(Value *Operand, Instruction *InsertPt, + ConversionState &State) { + auto *OrigType = Operand->getType(); + auto *OrigInst = dyn_cast(Operand); + Operand = State.getConverted(Operand); + // If the operand is a constant, it will have been created by + // ConversionState.getConverted, which zero-extends by default. + if (isa(Operand)) + return Operand; + Instruction *NewInst = BinaryOperator::Create( + Instruction::And, Operand, + ConstantInt::get( + getPromotedType(OrigType), + APInt::getLowBitsSet(getPromotedType(OrigType)->getIntegerBitWidth(), + OrigType->getIntegerBitWidth())), + Operand->getName() + ".clear", InsertPt); + if (OrigInst) + CopyDebug(NewInst, OrigInst); + return NewInst; +} + +// Return a value with the bits of the operand above the size of the original +// type equal to the sign bit of the original operand. The new operand is +// assumed to have been legalized already. +// This is done by shifting the sign bit of the smaller value up to the MSB +// position in the larger size, and then arithmetic-shifting it back down. +static Value *getSignExtend(Value *Operand, Value *OrigOperand, + Instruction *InsertPt) { + // If OrigOperand was a constant, NewOperand will have been created by + // ConversionState.getConverted, which zero-extends by default. But that is + // wrong here, so replace it with a sign-extended constant. + if (Constant *C = dyn_cast(OrigOperand)) + return convertConstant(C, /*SignExt=*/true); + Type *OrigType = OrigOperand->getType(); + ConstantInt *ShiftAmt = + ConstantInt::getSigned(cast(getPromotedType(OrigType)), + getPromotedType(OrigType)->getIntegerBitWidth() - + OrigType->getIntegerBitWidth()); + BinaryOperator *Shl = + BinaryOperator::Create(Instruction::Shl, Operand, ShiftAmt, + Operand->getName() + ".getsign", InsertPt); + if (Instruction *Inst = dyn_cast(OrigOperand)) + CopyDebug(Shl, Inst); + return CopyDebug(BinaryOperator::Create(Instruction::AShr, Shl, ShiftAmt, + Operand->getName() + ".signed", + InsertPt), + Shl); +} + +static void convertInstruction(DataLayout *DL, Instruction *Inst, + ConversionState &State) { + if (SExtInst *Sext = dyn_cast(Inst)) { + Value *Op = Sext->getOperand(0); + Value *NewInst = nullptr; + // If the operand to be extended is illegal, we first need to fill its + // upper bits with its sign bit. + if (shouldConvert(Op)) { + NewInst = getSignExtend(State.getConverted(Op), Op, Sext); + } + // If the converted type of the operand is the same as the converted + // type of the result, we won't actually be changing the type of the + // variable, just its value. + if (getPromotedType(Op->getType()) != getPromotedType(Sext->getType())) { + NewInst = CopyDebug( + new SExtInst(NewInst ? NewInst : State.getConverted(Op), + getPromotedType(cast(Sext->getType())), + Sext->getName() + ".sext", Sext), + Sext); + } + assert(NewInst && "Failed to convert sign extension"); + State.recordConverted(Sext, NewInst); + } else if (ZExtInst *Zext = dyn_cast(Inst)) { + Value *Op = Zext->getOperand(0); + Value *NewInst = nullptr; + if (shouldConvert(Op)) { + NewInst = getClearConverted(Op, Zext, State); + } + // If the converted type of the operand is the same as the converted + // type of the result, we won't actually be changing the type of the + // variable, just its value. + if (getPromotedType(Op->getType()) != getPromotedType(Zext->getType())) { + NewInst = CopyDebug( + CastInst::CreateZExtOrBitCast( + NewInst ? NewInst : State.getConverted(Op), + getPromotedType(cast(Zext->getType())), "", Zext), + Zext); + } + assert(NewInst); + State.recordConverted(Zext, NewInst); + } else if (TruncInst *Trunc = dyn_cast(Inst)) { + Value *Op = Trunc->getOperand(0); + Value *NewInst; + // If the converted type of the operand is the same as the converted + // type of the result, we don't actually need to change the type of the + // variable, just its value. However, because we don't care about the values + // of the upper bits until they are consumed, truncation can be a no-op. + if (getPromotedType(Op->getType()) != getPromotedType(Trunc->getType())) { + NewInst = CopyDebug( + new TruncInst(State.getConverted(Op), + getPromotedType(cast(Trunc->getType())), + State.getConverted(Op)->getName() + ".trunc", Trunc), + Trunc); + } else { + NewInst = State.getConverted(Op); + } + State.recordConverted(Trunc, NewInst); + } else if (LoadInst *Load = dyn_cast(Inst)) { + if (shouldConvert(Load)) { + unsigned BaseAlign = Load->getAlignment() == 0 + ? DL->getABITypeAlignment(Load->getType()) + : Load->getAlignment(); + splitLoad(DL, Load, State, BaseAlign, /*Offset=*/0); + } + } else if (StoreInst *Store = dyn_cast(Inst)) { + if (shouldConvert(Store->getValueOperand())) { + unsigned BaseAlign = + Store->getAlignment() == 0 + ? DL->getABITypeAlignment(Store->getValueOperand()->getType()) + : Store->getAlignment(); + splitStore(DL, Store, State, BaseAlign, /*Offset=*/0); + } + } else if (isa(Inst) || isa(Inst) || + isa(Inst)) { + for (unsigned I = 0; I < Inst->getNumOperands(); I++) { + auto *Arg = Inst->getOperand(I); + if (shouldConvert(Arg)) + Inst->setOperand(I, State.getConverted(Arg)); + } + if (shouldConvert(Inst)) { + Inst->mutateType(getPromotedType(Inst->getType())); + } + } else if (auto *Ret = dyn_cast(Inst)) { + auto *NewRet = ReturnInst::Create( + Ret->getContext(), State.getConverted(Ret->getReturnValue()), Inst); + State.recordConverted(Ret, NewRet); + } else if (auto *Resume = dyn_cast(Inst)) { + auto *NewRes = + ResumeInst::Create(State.getConverted(Resume->getValue()), Inst); + State.recordConverted(Ret, NewRes); + } else if (BinaryOperator *Binop = dyn_cast(Inst)) { + Value *NewInst = nullptr; + switch (Binop->getOpcode()) { + case Instruction::AShr: { + // The AShr operand needs to be sign-extended to the promoted size + // before shifting. Because the sign-extension is implemented with + // with AShr, it can be combined with the original operation. + Value *Op = Binop->getOperand(0); + Value *ShiftAmount = nullptr; + APInt SignShiftAmt = + APInt(getPromotedType(Op->getType())->getIntegerBitWidth(), + getPromotedType(Op->getType())->getIntegerBitWidth() - + Op->getType()->getIntegerBitWidth()); + NewInst = CopyDebug( + BinaryOperator::Create( + Instruction::Shl, State.getConverted(Op), + ConstantInt::get(getPromotedType(Op->getType()), SignShiftAmt), + State.getConverted(Op)->getName() + ".getsign", Binop), + Binop); + if (ConstantInt *C = + dyn_cast(State.getConverted(Binop->getOperand(1)))) { + ShiftAmount = ConstantInt::get(getPromotedType(Op->getType()), + SignShiftAmt + C->getValue()); + } else { + // Clear the upper bits of the original shift amount, and add back the + // amount we shifted to get the sign bit. + ShiftAmount = getClearConverted(Binop->getOperand(1), Binop, State); + ShiftAmount = + CopyDebug(BinaryOperator::Create( + Instruction::Add, ShiftAmount, + ConstantInt::get( + getPromotedType(Binop->getOperand(1)->getType()), + SignShiftAmt), + State.getConverted(Op)->getName() + ".shamt", Binop), + Binop); + } + NewInst = CopyDebug( + BinaryOperator::Create(Instruction::AShr, NewInst, ShiftAmount, + Binop->getName() + ".result", Binop), + Binop); + break; + } + + case Instruction::LShr: + case Instruction::Shl: { + // For LShr, clear the upper bits of the operand before shifting them + // down into the valid part of the value. + Value *Op = Binop->getOpcode() == Instruction::LShr + ? getClearConverted(Binop->getOperand(0), Binop, State) + : State.getConverted(Binop->getOperand(0)); + NewInst = BinaryOperator::Create( + Binop->getOpcode(), Op, + // Clear the upper bits of the shift amount. + getClearConverted(Binop->getOperand(1), Binop, State), + Binop->getName() + ".result", Binop); + break; + } + case Instruction::Add: + case Instruction::Sub: + case Instruction::Mul: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + // These operations don't care about the state of the upper bits. + NewInst = CopyDebug( + BinaryOperator::Create(Binop->getOpcode(), + State.getConverted(Binop->getOperand(0)), + State.getConverted(Binop->getOperand(1)), + Binop->getName() + ".result", Binop), + Binop); + break; + case Instruction::UDiv: + case Instruction::URem: + NewInst = + CopyDebug(BinaryOperator::Create( + Binop->getOpcode(), + getClearConverted(Binop->getOperand(0), Binop, State), + getClearConverted(Binop->getOperand(1), Binop, State), + Binop->getName() + ".result", Binop), + Binop); + break; + case Instruction::SDiv: + case Instruction::SRem: + NewInst = + CopyDebug(BinaryOperator::Create( + Binop->getOpcode(), + getSignExtend(State.getConverted(Binop->getOperand(0)), + Binop->getOperand(0), Binop), + getSignExtend(State.getConverted(Binop->getOperand(1)), + Binop->getOperand(0), Binop), + Binop->getName() + ".result", Binop), + Binop); + break; + case Instruction::FAdd: + case Instruction::FSub: + case Instruction::FMul: + case Instruction::FDiv: + case Instruction::FRem: + case Instruction::BinaryOpsEnd: + // We should not see FP operators here. + errs() << *Inst << "\n"; + llvm_unreachable("Cannot handle binary operator"); + break; + } + if (isa(NewInst)) { + cast(NewInst) + ->setHasNoUnsignedWrap(Binop->hasNoUnsignedWrap()); + cast(NewInst) + ->setHasNoSignedWrap(Binop->hasNoSignedWrap()); + } + State.recordConverted(Binop, NewInst); + } else if (ICmpInst *Cmp = dyn_cast(Inst)) { + Value *Op0, *Op1; + // For signed compares, operands are sign-extended to their + // promoted type. For unsigned or equality compares, the upper bits are + // cleared. + if (Cmp->isSigned()) { + Op0 = getSignExtend(State.getConverted(Cmp->getOperand(0)), + Cmp->getOperand(0), Cmp); + Op1 = getSignExtend(State.getConverted(Cmp->getOperand(1)), + Cmp->getOperand(1), Cmp); + } else { + Op0 = getClearConverted(Cmp->getOperand(0), Cmp, State); + Op1 = getClearConverted(Cmp->getOperand(1), Cmp, State); + } + Instruction *NewInst = + CopyDebug(new ICmpInst(Cmp, Cmp->getPredicate(), Op0, Op1, ""), Cmp); + State.recordConverted(Cmp, NewInst); + } else if (SelectInst *Select = dyn_cast(Inst)) { + Instruction *NewInst = CopyDebug( + SelectInst::Create( + Select->getCondition(), State.getConverted(Select->getTrueValue()), + State.getConverted(Select->getFalseValue()), "", Select), + Select); + State.recordConverted(Select, NewInst); + } else if (PHINode *Phi = dyn_cast(Inst)) { + PHINode *NewPhi = PHINode::Create(getPromotedType(Phi->getType()), + Phi->getNumIncomingValues(), "", Phi); + CopyDebug(NewPhi, Phi); + for (unsigned I = 0, E = Phi->getNumIncomingValues(); I < E; ++I) { + NewPhi->addIncoming(State.getConverted(Phi->getIncomingValue(I)), + Phi->getIncomingBlock(I)); + } + State.recordConverted(Phi, NewPhi); + } else if (SwitchInst *Switch = dyn_cast(Inst)) { + Value *Condition = getClearConverted(Switch->getCondition(), Switch, State); + SwitchInst *NewInst = SwitchInst::Create( + Condition, Switch->getDefaultDest(), Switch->getNumCases(), Switch); + CopyDebug(NewInst, Switch); + for (SwitchInst::CaseIt I = Switch->case_begin(), E = Switch->case_end(); + I != E; ++I) { + NewInst->addCase(cast(convertConstant(I.getCaseValue(), + /*SignExt=*/false)), + I.getCaseSuccessor()); + } + Switch->eraseFromParent(); + } else { + errs() << *Inst << "\n"; + llvm_unreachable("unhandled instruction"); + } +} + +static bool processFunction(Function &F, DataLayout &DL) { + ConversionState State; + bool Modified = false; // XXX Emscripten: Fixed use of an uninitialized variable. + for (auto FI = F.begin(), FE = F.end(); FI != FE; ++FI) { + for (auto BBI = FI->begin(), BBE = FI->end(); BBI != BBE;) { + Instruction *Inst = &*BBI++; + // Only attempt to convert an instruction if its result or any of its + // operands are illegal. + bool ShouldConvert = shouldConvert(Inst); + for (auto OI = Inst->op_begin(), OE = Inst->op_end(); OI != OE; ++OI) + ShouldConvert |= shouldConvert(cast(OI)); + + if (ShouldConvert) { + convertInstruction(&DL, Inst, State); + Modified = true; + } + } + } + State.eraseReplacedInstructions(); + + if (Modified) + // Clean up bitcasts that were create with constexprs in them. + std::unique_ptr(createExpandConstantExprPass()) + ->runOnFunction(F); + return Modified; +} + +bool PromoteIntegers::ensureCompliantSignature( + LLVMContext &Ctx, Function *OldFct, Module &M) { + + auto *NewFctType = cast( + TypeMapper.getSimpleType(Ctx, OldFct->getFunctionType())); + if (NewFctType == OldFct->getFunctionType()) + return false; + + auto *NewFct = Function::Create(NewFctType, OldFct->getLinkage(), "", &M); + + NewFct->takeName(OldFct); + NewFct->copyAttributesFrom(OldFct); + for (auto UseIter = OldFct->use_begin(), E = OldFct->use_end(); + E != UseIter;) { + Use &FctUse = *(UseIter++); + // Types are not going to match after this. + FctUse.set(NewFct); + } + + if (OldFct->empty()) + return true; + + NewFct->getBasicBlockList().splice(NewFct->begin(), + OldFct->getBasicBlockList()); + IRBuilder<> Builder(&*NewFct->getEntryBlock().getFirstInsertionPt()); + + auto OldArgIter = OldFct->getArgumentList().begin(); + for (auto &NewArg : NewFct->getArgumentList()) { + Argument *OldArg = &*OldArgIter++; + + if (OldArg->getType() != NewArg.getType()) { + if (NewArg.getType()->isIntegerTy()) { + auto *Replacement = Builder.CreateTrunc(&NewArg, OldArg->getType()); + Replacement->takeName(OldArg); + NewArg.setName(Replacement->getName() + ".exp"); + OldArg->replaceAllUsesWith(Replacement); + } else { + // Blindly replace the type of the uses, this is some composite + // like a function type. + NewArg.takeName(OldArg); + for (auto UseIter = OldArg->use_begin(), E = OldArg->use_end(); + E != UseIter;) { + Use &AUse = *(UseIter++); + AUse.set(&NewArg); + } + } + } else { + NewArg.takeName(OldArg); + OldArg->replaceAllUsesWith(&NewArg); + } + } + + return true; +} + +bool PromoteIntegers::runOnModule(Module &M) { + DataLayout DL(&M); + LLVMContext &Ctx = M.getContext(); + bool Modified = false; + + // Change function signatures first. + for (auto I = M.begin(), E = M.end(); I != E;) { + Function *F = &*I++; + bool Changed = ensureCompliantSignature(Ctx, F, M); + if (Changed) + F->eraseFromParent(); + Modified |= Changed; + } + + for (auto &F : M.getFunctionList()) + Modified |= processFunction(F, DL); + + return Modified; +} + +ModulePass *llvm::createPromoteIntegersPass() { return new PromoteIntegers(); } diff --git a/lib/Target/JSBackend/NaCl/RemoveAsmMemory.cpp b/lib/Target/JSBackend/NaCl/RemoveAsmMemory.cpp new file mode 100644 index 000000000000..f06933b6dd2a --- /dev/null +++ b/lib/Target/JSBackend/NaCl/RemoveAsmMemory.cpp @@ -0,0 +1,70 @@ +//===- RemoveAsmMemory.cpp - Remove ``asm("":::"memory")`` ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass removes all instances of ``asm("":::"memory")``. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/Twine.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/InstVisitor.h" +#include "llvm/Pass.h" +#include + +using namespace llvm; + +namespace { +class RemoveAsmMemory : public FunctionPass { +public: + static char ID; // Pass identification, replacement for typeid + RemoveAsmMemory() : FunctionPass(ID) { + initializeRemoveAsmMemoryPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override; +}; + +class AsmDirectivesVisitor : public InstVisitor { +public: + AsmDirectivesVisitor() : ModifiedFunction(false) {} + ~AsmDirectivesVisitor() {} + bool modifiedFunction() const { return ModifiedFunction; } + + /// Only Call Instructions are ever inline assembly directives. + void visitCallInst(CallInst &CI); + +private: + bool ModifiedFunction; + + AsmDirectivesVisitor(const AsmDirectivesVisitor &) = delete; + AsmDirectivesVisitor &operator=(const AsmDirectivesVisitor &) = delete; +}; +} + +char RemoveAsmMemory::ID = 0; +INITIALIZE_PASS(RemoveAsmMemory, "remove-asm-memory", + "remove all instances of ``asm(\"\":::\"memory\")``", false, + false) + +bool RemoveAsmMemory::runOnFunction(Function &F) { + AsmDirectivesVisitor AV; + AV.visit(F); + return AV.modifiedFunction(); +} + +void AsmDirectivesVisitor::visitCallInst(CallInst &CI) { + llvm_unreachable("no longer maintained"); +} + +namespace llvm { +FunctionPass *createRemoveAsmMemoryPass() { return new RemoveAsmMemory(); } +} diff --git a/lib/Target/JSBackend/NaCl/ReplacePtrsWithInts.cpp b/lib/Target/JSBackend/NaCl/ReplacePtrsWithInts.cpp new file mode 100644 index 000000000000..86f311915b36 --- /dev/null +++ b/lib/Target/JSBackend/NaCl/ReplacePtrsWithInts.cpp @@ -0,0 +1,593 @@ +//===- ReplacePtrsWithInts.cpp - Convert pointer values to integer values--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass strips out aggregate pointer types and replaces them with +// the integer type iPTR, which is i32 for PNaCl (though this pass +// will allow iPTR to be i64 if the DataLayout specifies 64-bit +// pointers). +// +// This pass relies on -simplify-allocas to transform allocas into arrays of +// bytes. +// +// The pass converts IR to the following normal form: +// +// All inttoptr and ptrtoint instructions use the same integer size +// (iPTR), so they do not implicitly truncate or zero-extend. +// +// Pointer types only appear in the following instructions: +// * loads and stores: the pointer operand is a NormalizedPtr. +// * function calls: the function operand is a NormalizedPtr. +// * intrinsic calls: any pointer arguments are NormalizedPtrs. +// * alloca +// * bitcast and inttoptr: only used as part of a NormalizedPtr. +// * ptrtoint: the operand is an InherentPtr. +// +// Where an InherentPtr is defined as a pointer value that is: +// * an alloca; +// * a GlobalValue (a function or global variable); or +// * an intrinsic call. +// +// And a NormalizedPtr is defined as a pointer value that is: +// * an inttoptr instruction; +// * an InherentPtr; or +// * a bitcast of an InherentPtr. +// +// This pass currently strips out lifetime markers (that is, calls to +// the llvm.lifetime.start/end intrinsics) and invariant markers +// (calls to llvm.invariant.start/end). +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/Pass.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/NaCl.h" + +using namespace llvm; + +namespace { + // This is a ModulePass because the pass must recreate functions in + // order to change their argument and return types. + struct ReplacePtrsWithInts : public ModulePass { + static char ID; // Pass identification, replacement for typeid + ReplacePtrsWithInts() : ModulePass(ID) { + initializeReplacePtrsWithIntsPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnModule(Module &M); + }; + + // FunctionConverter stores the state for mapping old instructions + // (of pointer type) to converted instructions (of integer type) + // within a function, and provides methods for doing the conversion. + class FunctionConverter { + // Int type that pointer types are to be replaced with, typically i32. + Type *IntPtrType; + + struct RewrittenVal { + RewrittenVal(): Placeholder(NULL), NewIntVal(NULL) {} + Value *Placeholder; + Value *NewIntVal; + }; + // Maps from old values (of pointer type) to converted values (of + // IntPtrType type). + DenseMap RewriteMap; + + public: + FunctionConverter(Type *IntPtrType) : IntPtrType(IntPtrType) {} + + // Returns the normalized version of the given type, converting + // pointer types to IntPtrType. + Type *convertType(Type *Ty); + // Returns the normalized version of the given function type by + // normalizing the function's argument types. + FunctionType *convertFuncType(FunctionType *FTy); + + // Records that 'To' is the normalized version of 'From'. If 'To' + // is not of pointer type, no type conversion is required, so this + // can take the short cut of replacing 'To' with 'From'. + void recordConverted(Value *From, Value *To); + void recordConvertedAndErase(Instruction *From, Value *To); + + // Returns Val with no-op casts (those that convert between + // IntPtrType and pointer types) stripped off. + Value *stripNoopCasts(Value *Val); + + // Returns the normalized version of the given value. + // + // If the conversion of Val has been deferred, this returns a + // placeholder object, which will later be replaceAllUsesWith'd to + // the final value. Since replaceAllUsesWith does not work on + // references by metadata nodes, this can be bypassed using + // BypassPlaceholder to get the real converted value, assuming it + // is available. + Value *convert(Value *Val, bool BypassPlaceholder = false); + // Returns the NormalizedPtr form of the given pointer value. + // Inserts conversion instructions at InsertPt. + Value *convertBackToPtr(Value *Val, Instruction *InsertPt); + // Returns the NormalizedPtr form of the given function pointer. + // Inserts conversion instructions at InsertPt. + Value *convertFunctionPtr(Value *Callee, Instruction *InsertPt); + // Converts an instruction without recreating it, by wrapping its + // operands and result. + void convertInPlace(Instruction *Inst); + + void eraseReplacedInstructions(); + + // List of instructions whose deletion has been deferred. + SmallVector ToErase; + }; +} + +Type *FunctionConverter::convertType(Type *Ty) { + if (Ty->isPointerTy()) + return IntPtrType; + return Ty; +} + +FunctionType *FunctionConverter::convertFuncType(FunctionType *FTy) { + SmallVector ArgTypes; + for (FunctionType::param_iterator ArgTy = FTy->param_begin(), + E = FTy->param_end(); ArgTy != E; ++ArgTy) { + ArgTypes.push_back(convertType(*ArgTy)); + } + return FunctionType::get(convertType(FTy->getReturnType()), ArgTypes, + FTy->isVarArg()); +} + +void FunctionConverter::recordConverted(Value *From, Value *To) { + if (!From->getType()->isPointerTy()) { + From->replaceAllUsesWith(To); + return; + } + RewrittenVal *RV = &RewriteMap[From]; + assert(!RV->NewIntVal); + RV->NewIntVal = To; +} + +void FunctionConverter::recordConvertedAndErase(Instruction *From, Value *To) { + recordConverted(From, To); + // There may still be references to this value, so defer deleting it. + ToErase.push_back(From); +} + +Value *FunctionConverter::stripNoopCasts(Value *Val) { + SmallPtrSet Visited; + for (;;) { + if (!Visited.insert(Val).second) { + // It is possible to get a circular reference in unreachable + // basic blocks. Handle this case for completeness. + return UndefValue::get(Val->getType()); + } + if (CastInst *Cast = dyn_cast(Val)) { + Value *Src = Cast->getOperand(0); + if ((isa(Cast) && Cast->getType()->isPointerTy()) || + (isa(Cast) && Cast->getType() == IntPtrType) || + (isa(Cast) && Src->getType() == IntPtrType)) { + Val = Src; + continue; + } + } + return Val; + } +} + +Value *FunctionConverter::convert(Value *Val, bool BypassPlaceholder) { + Val = stripNoopCasts(Val); + if (!Val->getType()->isPointerTy()) + return Val; + if (Constant *C = dyn_cast(Val)) + return ConstantExpr::getPtrToInt(C, IntPtrType); + RewrittenVal *RV = &RewriteMap[Val]; + if (BypassPlaceholder) { + assert(RV->NewIntVal); + return RV->NewIntVal; + } + if (!RV->Placeholder) + RV->Placeholder = new Argument(convertType(Val->getType())); + return RV->Placeholder; +} + +Value *FunctionConverter::convertBackToPtr(Value *Val, Instruction *InsertPt) { + Type *NewTy = + convertType(Val->getType()->getPointerElementType())->getPointerTo(); + return new IntToPtrInst(convert(Val), NewTy, "", InsertPt); +} + +Value *FunctionConverter::convertFunctionPtr(Value *Callee, + Instruction *InsertPt) { + FunctionType *FuncType = cast( + Callee->getType()->getPointerElementType()); + return new IntToPtrInst(convert(Callee), + convertFuncType(FuncType)->getPointerTo(), + "", InsertPt); +} + +static bool ShouldLeaveAlone(Value *V) { + if (Function *F = dyn_cast(V)) + return F->isIntrinsic(); + if (isa(V)) + return true; + return false; +} + +void FunctionConverter::convertInPlace(Instruction *Inst) { + // Convert operands. + for (unsigned I = 0; I < Inst->getNumOperands(); ++I) { + Value *Arg = Inst->getOperand(I); + if (Arg->getType()->isPointerTy() && !ShouldLeaveAlone(Arg)) { + Value *Conv = convert(Arg); + Inst->setOperand(I, new IntToPtrInst(Conv, Arg->getType(), "", Inst)); + } + } + // Convert result. + if (Inst->getType()->isPointerTy()) { + Instruction *Cast = new PtrToIntInst( + Inst, convertType(Inst->getType()), Inst->getName() + ".asint"); + Cast->insertAfter(Inst); + recordConverted(Inst, Cast); + } +} + +void FunctionConverter::eraseReplacedInstructions() { + bool Error = false; + for (DenseMap::iterator I = RewriteMap.begin(), + E = RewriteMap.end(); I != E; ++I) { + if (I->second.Placeholder) { + if (I->second.NewIntVal) { + I->second.Placeholder->replaceAllUsesWith(I->second.NewIntVal); + } else { + errs() << "Not converted: " << *I->first << "\n"; + Error = true; + } + } + } + if (Error) + report_fatal_error("Case not handled in ReplacePtrsWithInts"); + + // Delete the placeholders in a separate pass. This means that if + // one placeholder is accidentally rewritten to another, we will get + // a useful error message rather than accessing a dangling pointer. + for (DenseMap::iterator I = RewriteMap.begin(), + E = RewriteMap.end(); I != E; ++I) { + delete I->second.Placeholder; + } + + // We must do dropAllReferences() before doing eraseFromParent(), + // otherwise we will try to erase instructions that are still + // referenced. + for (SmallVectorImpl::iterator I = ToErase.begin(), + E = ToErase.end(); + I != E; ++I) { + (*I)->dropAllReferences(); + } + for (SmallVectorImpl::iterator I = ToErase.begin(), + E = ToErase.end(); + I != E; ++I) { + (*I)->eraseFromParent(); + } +} + +// Remove attributes that only apply to pointer arguments. Returns +// the updated AttributeSet. +static AttributeSet RemovePointerAttrs(LLVMContext &Context, + AttributeSet Attrs) { + SmallVector AttrList; + for (unsigned Slot = 0; Slot < Attrs.getNumSlots(); ++Slot) { + unsigned Index = Attrs.getSlotIndex(Slot); + AttrBuilder AB; + for (AttributeSet::iterator Attr = Attrs.begin(Slot), E = Attrs.end(Slot); + Attr != E; ++Attr) { + if (!Attr->isEnumAttribute()) { + continue; + } + switch (Attr->getKindAsEnum()) { + // ByVal and StructRet should already have been removed by the + // ExpandByVal pass. + case Attribute::ByVal: + case Attribute::StructRet: + case Attribute::Nest: + Attrs.dump(); + report_fatal_error("ReplacePtrsWithInts cannot handle " + "byval, sret or nest attrs"); + break; + // Strip these attributes because they apply only to pointers. This pass + // rewrites pointer arguments, thus these parameter attributes are + // meaningless. Also, they are rejected by the PNaCl module verifier. + case Attribute::NoCapture: + case Attribute::NoAlias: + case Attribute::ReadNone: + case Attribute::ReadOnly: + case Attribute::NonNull: + case Attribute::Dereferenceable: + case Attribute::DereferenceableOrNull: + break; + default: + AB.addAttribute(*Attr); + } + } + AttrList.push_back(AttributeSet::get(Context, Index, AB)); + } + return AttributeSet::get(Context, AttrList); +} + +static void ConvertInstruction(DataLayout *DL, Type *IntPtrType, + FunctionConverter *FC, Instruction *Inst) { + if (ReturnInst *Ret = dyn_cast(Inst)) { + Value *Result = Ret->getReturnValue(); + if (Result) + Result = FC->convert(Result); + CopyDebug(ReturnInst::Create(Ret->getContext(), Result, Ret), Inst); + Ret->eraseFromParent(); + } else if (PHINode *Phi = dyn_cast(Inst)) { + PHINode *Phi2 = PHINode::Create(FC->convertType(Phi->getType()), + Phi->getNumIncomingValues(), + "", Phi); + CopyDebug(Phi2, Phi); + for (unsigned I = 0; I < Phi->getNumIncomingValues(); ++I) { + Phi2->addIncoming(FC->convert(Phi->getIncomingValue(I)), + Phi->getIncomingBlock(I)); + } + Phi2->takeName(Phi); + FC->recordConvertedAndErase(Phi, Phi2); + } else if (SelectInst *Op = dyn_cast(Inst)) { + Instruction *Op2 = SelectInst::Create(Op->getCondition(), + FC->convert(Op->getTrueValue()), + FC->convert(Op->getFalseValue()), + "", Op); + CopyDebug(Op2, Op); + Op2->takeName(Op); + FC->recordConvertedAndErase(Op, Op2); + } else if (isa(Inst) || isa(Inst)) { + Value *Arg = FC->convert(Inst->getOperand(0)); + Type *ResultTy = FC->convertType(Inst->getType()); + unsigned ArgSize = Arg->getType()->getIntegerBitWidth(); + unsigned ResultSize = ResultTy->getIntegerBitWidth(); + Value *Result; + // We avoid using IRBuilder's CreateZExtOrTrunc() here because it + // constant-folds ptrtoint ConstantExprs. This leads to creating + // ptrtoints of non-IntPtrType type, which is not what we want, + // because we want truncation/extension to be done explicitly by + // separate instructions. + if (ArgSize == ResultSize) { + Result = Arg; + } else { + Instruction::CastOps CastType = + ArgSize > ResultSize ? Instruction::Trunc : Instruction::ZExt; + Result = CopyDebug(CastInst::Create(CastType, Arg, ResultTy, "", Inst), + Inst); + } + if (Result != Arg) + Result->takeName(Inst); + FC->recordConvertedAndErase(Inst, Result); + } else if (isa(Inst)) { + if (Inst->getType()->isPointerTy()) { + FC->ToErase.push_back(Inst); + } + } else if (ICmpInst *Cmp = dyn_cast(Inst)) { + Value *Cmp2 = CopyDebug(new ICmpInst(Inst, Cmp->getPredicate(), + FC->convert(Cmp->getOperand(0)), + FC->convert(Cmp->getOperand(1)), ""), + Inst); + Cmp2->takeName(Cmp); + Cmp->replaceAllUsesWith(Cmp2); + Cmp->eraseFromParent(); + } else if (LoadInst *Load = dyn_cast(Inst)) { + Value *Ptr = FC->convertBackToPtr(Load->getPointerOperand(), Inst); + LoadInst *Result = new LoadInst(Ptr, "", Inst); + Result->takeName(Inst); + CopyDebug(Result, Inst); + CopyLoadOrStoreAttrs(Result, Load); + FC->recordConvertedAndErase(Inst, Result); + } else if (StoreInst *Store = dyn_cast(Inst)) { + Value *Ptr = FC->convertBackToPtr(Store->getPointerOperand(), Inst); + StoreInst *Result = new StoreInst(FC->convert(Store->getValueOperand()), + Ptr, Inst); + CopyDebug(Result, Inst); + CopyLoadOrStoreAttrs(Result, Store); + Inst->eraseFromParent(); + } else if (CallInst *Call = dyn_cast(Inst)) { + if (IntrinsicInst *ICall = dyn_cast(Inst)) { + if (ICall->getIntrinsicID() == Intrinsic::lifetime_start || + ICall->getIntrinsicID() == Intrinsic::lifetime_end || + ICall->getIntrinsicID() == Intrinsic::invariant_start) { + // Remove alloca lifetime markers for now. This is because + // the GVN pass can introduce lifetime markers taking PHI + // nodes as arguments. If ReplacePtrsWithInts converts the + // PHI node to int type, we will render those lifetime markers + // ineffective. But dropping a subset of lifetime markers is + // not safe in general. So, until LLVM better defines the + // semantics of lifetime markers, we drop them all. See: + // https://code.google.com/p/nativeclient/issues/detail?id=3443 + // We do the same for invariant.start/end because they work in + // a similar way. + Inst->eraseFromParent(); + } else { + FC->convertInPlace(Inst); + } + } else if (isa(Call->getCalledValue())) { + FC->convertInPlace(Inst); + } else { + SmallVector Args; + for (unsigned I = 0; I < Call->getNumArgOperands(); ++I) + Args.push_back(FC->convert(Call->getArgOperand(I))); + CallInst *NewCall = CallInst::Create( + FC->convertFunctionPtr(Call->getCalledValue(), Call), + Args, "", Inst); + CopyDebug(NewCall, Call); + NewCall->setAttributes(RemovePointerAttrs(Call->getContext(), + Call->getAttributes())); + NewCall->setCallingConv(Call->getCallingConv()); + NewCall->setTailCall(Call->isTailCall()); + NewCall->takeName(Call); + FC->recordConvertedAndErase(Call, NewCall); + } + } else if (InvokeInst *Call = dyn_cast(Inst)) { + SmallVector Args; + for (unsigned I = 0; I < Call->getNumArgOperands(); ++I) + Args.push_back(FC->convert(Call->getArgOperand(I))); + InvokeInst *NewCall = InvokeInst::Create( + FC->convertFunctionPtr(Call->getCalledValue(), Call), + Call->getNormalDest(), + Call->getUnwindDest(), + Args, "", Inst); + CopyDebug(NewCall, Call); + NewCall->setAttributes(RemovePointerAttrs(Call->getContext(), + Call->getAttributes())); + NewCall->setCallingConv(Call->getCallingConv()); + NewCall->takeName(Call); + FC->recordConvertedAndErase(Call, NewCall); + } else if (// Handle these instructions as a convenience to allow + // the pass to be used in more situations, even though we + // don't expect them in PNaCl's stable ABI. + isa(Inst) || + isa(Inst) || + isa(Inst) || + isa(Inst) || + isa(Inst) || + isa(Inst) || + // These atomics only operate on integer pointers, not + // other pointers, so we don't need to recreate the + // instruction. + isa(Inst) || + isa(Inst)) { + FC->convertInPlace(Inst); + } +} + +// Convert ptrtoint+inttoptr to a bitcast because it's shorter and +// because some intrinsics work on bitcasts but not on +// ptrtoint+inttoptr, in particular: +// * llvm.lifetime.start/end (although we strip these out) +// * llvm.eh.typeid.for +static void SimplifyCasts(Instruction *Inst, Type *IntPtrType) { + if (IntToPtrInst *Cast1 = dyn_cast(Inst)) { + if (PtrToIntInst *Cast2 = dyn_cast(Cast1->getOperand(0))) { + assert(Cast2->getType() == IntPtrType); + Value *V = Cast2->getPointerOperand(); + if (V->getType() != Cast1->getType()) + V = new BitCastInst(V, Cast1->getType(), V->getName() + ".bc", Cast1); + Cast1->replaceAllUsesWith(V); + if (Cast1->use_empty()) + Cast1->eraseFromParent(); + if (Cast2->use_empty()) + Cast2->eraseFromParent(); + } + } +} + +static void CleanUpFunction(Function *Func, Type *IntPtrType) { + // Remove the ptrtoint/bitcast ConstantExprs we introduced for + // referencing globals. + FunctionPass *Pass = createExpandConstantExprPass(); + Pass->runOnFunction(*Func); + delete Pass; + + for (Function::iterator BB = Func->begin(), E = Func->end(); + BB != E; ++BB) { + for (BasicBlock::iterator Iter = BB->begin(), E = BB->end(); + Iter != E; ) { + SimplifyCasts(&*Iter++, IntPtrType); + } + } + // Cleanup pass. + for (Function::iterator BB = Func->begin(), E = Func->end(); + BB != E; ++BB) { + for (BasicBlock::iterator Iter = BB->begin(), E = BB->end(); + Iter != E; ) { + Instruction *Inst = &*Iter++; + // Add names to inttoptrs to make the output more readable. The + // placeholder values get in the way of doing this earlier when + // the inttoptrs are created. + if (isa(Inst)) + Inst->setName(Inst->getOperand(0)->getName() + ".asptr"); + // Remove ptrtoints that were introduced for allocas but not used. + if (isa(Inst) && Inst->use_empty()) + Inst->eraseFromParent(); + } + } +} + +char ReplacePtrsWithInts::ID = 0; +INITIALIZE_PASS(ReplacePtrsWithInts, "replace-ptrs-with-ints", + "Convert pointer values to integer values", + false, false) + +bool ReplacePtrsWithInts::runOnModule(Module &M) { + DataLayout DL(&M); + Type *IntPtrType = DL.getIntPtrType(M.getContext()); + + for (Module::iterator Iter = M.begin(), E = M.end(); Iter != E; ) { + Function *OldFunc = &*Iter++; + // Intrinsics' types must be left alone. + if (OldFunc->isIntrinsic()) + continue; + + FunctionConverter FC(IntPtrType); + FunctionType *NFTy = FC.convertFuncType(OldFunc->getFunctionType()); + OldFunc->setAttributes(RemovePointerAttrs(M.getContext(), + OldFunc->getAttributes())); + Function *NewFunc = RecreateFunction(OldFunc, NFTy); + + // Move the arguments across to the new function. + for (Function::arg_iterator Arg = OldFunc->arg_begin(), + E = OldFunc->arg_end(), NewArg = NewFunc->arg_begin(); + Arg != E; ++Arg, ++NewArg) { + FC.recordConverted(&*Arg, &*NewArg); + NewArg->takeName(&*Arg); + } + + // invariant.end calls refer to invariant.start calls, so we must + // remove the former first. + for (Function::iterator BB = NewFunc->begin(), E = NewFunc->end(); + BB != E; ++BB) { + for (BasicBlock::iterator Iter = BB->begin(), E = BB->end(); + Iter != E; ) { + if (IntrinsicInst *ICall = dyn_cast(Iter++)) { + if (ICall->getIntrinsicID() == Intrinsic::invariant_end) + ICall->eraseFromParent(); + } + } + } + + // Convert the function body. + for (Function::iterator BB = NewFunc->begin(), E = NewFunc->end(); + BB != E; ++BB) { + for (BasicBlock::iterator Iter = BB->begin(), E = BB->end(); + Iter != E; ) { + ConvertInstruction(&DL, IntPtrType, &FC, &*Iter++); + } + } + FC.eraseReplacedInstructions(); + + OldFunc->eraseFromParent(); + } + // Now that all functions have their normalized types, we can remove + // various casts. + for (Module::iterator Func = M.begin(), E = M.end(); Func != E; ++Func) { + CleanUpFunction(&*Func, IntPtrType); + // Delete the now-unused bitcast ConstantExprs that we created so + // that they don't interfere with StripDeadPrototypes. + Func->removeDeadConstantUsers(); + } + return true; +} + +ModulePass *llvm::createReplacePtrsWithIntsPass() { + return new ReplacePtrsWithInts(); +} diff --git a/lib/Target/JSBackend/NaCl/ResolvePNaClIntrinsics.cpp b/lib/Target/JSBackend/NaCl/ResolvePNaClIntrinsics.cpp new file mode 100644 index 000000000000..616866782014 --- /dev/null +++ b/lib/Target/JSBackend/NaCl/ResolvePNaClIntrinsics.cpp @@ -0,0 +1,489 @@ +//===- ResolvePNaClIntrinsics.cpp - Resolve calls to PNaCl intrinsics ----====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass resolves calls to PNaCl stable bitcode intrinsics. It is +// normally run in the PNaCl translator. +// +// Running AddPNaClExternalDeclsPass is a precondition for running this +// pass. They are separate because one is a ModulePass and the other is +// a FunctionPass. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Triple.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/NaClAtomicIntrinsics.h" +#include "llvm/IR/Value.h" +#include "llvm/Pass.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/NaCl.h" +#include "llvm/Transforms/Utils/Local.h" +#if defined(PNACL_BROWSER_TRANSLATOR) +#include "native_client/src/untrusted/nacl/pnacl.h" +#endif + +using namespace llvm; + +namespace { +class ResolvePNaClIntrinsics : public FunctionPass { +public: + ResolvePNaClIntrinsics() : FunctionPass(ID) { + initializeResolvePNaClIntrinsicsPass(*PassRegistry::getPassRegistry()); + } + + static char ID; + bool runOnFunction(Function &F) override; + + /// Interface specifying how intrinsic calls should be resolved. Each + /// intrinsic call handled by the implementor will be visited by the + /// doResolve method. + class CallResolver { + public: + /// Called once per \p Call to the intrinsic in the module. + /// Returns true if the Function was changed. + bool resolve(IntrinsicInst *Call) { + // To be a well-behaving FunctionPass, don't touch uses in other + // functions. These will be handled when the pass manager gets to + // those functions. + if (Call->getParent()->getParent() == &F) + return doResolve(Call); + return false; + } + Function *getDeclaration() const { return doGetDeclaration(); } + std::string getName() { return Intrinsic::getName(IntrinsicID); } + + protected: + Function &F; + Module *M; + Intrinsic::ID IntrinsicID; + + CallResolver(Function &F, Intrinsic::ID IntrinsicID) + : F(F), M(F.getParent()), IntrinsicID(IntrinsicID) {} + virtual ~CallResolver() {} + + /// The following pure virtual methods must be defined by + /// implementors, and will be called once per intrinsic call. + /// NOTE: doGetDeclaration() should only "get" the intrinsic declaration + /// and not *add* decls to the module. Declarations should be added + /// up front by the AddPNaClExternalDecls module pass. + virtual Function *doGetDeclaration() const = 0; + /// Returns true if the Function was changed. + virtual bool doResolve(IntrinsicInst *Call) = 0; + + private: + CallResolver(const CallResolver &) = delete; + CallResolver &operator=(const CallResolver &) = delete; + }; + +private: + /// Visit all calls matching the \p Resolver's declaration, and invoke + /// the CallResolver methods on each of them. + bool visitCalls(CallResolver &Resolver); +}; + +/// Rewrite intrinsic calls to another function. +class IntrinsicCallToFunctionCall : + public ResolvePNaClIntrinsics::CallResolver { +public: + IntrinsicCallToFunctionCall(Function &F, Intrinsic::ID IntrinsicID, + const char *TargetFunctionName) + : CallResolver(F, IntrinsicID), + TargetFunction(M->getFunction(TargetFunctionName)) { + // Expect to find the target function for this intrinsic already + // declared, even if it is never used. + if (!TargetFunction) + report_fatal_error(std::string( + "Expected to find external declaration of ") + TargetFunctionName); + } + ~IntrinsicCallToFunctionCall() override {} + +private: + Function *TargetFunction; + + Function *doGetDeclaration() const override { + return Intrinsic::getDeclaration(M, IntrinsicID); + } + + bool doResolve(IntrinsicInst *Call) override { + Call->setCalledFunction(TargetFunction); + if (IntrinsicID == Intrinsic::nacl_setjmp) { + // The "returns_twice" attribute is required for correctness, + // otherwise the backend will reuse stack slots in a way that is + // incorrect for setjmp(). See: + // https://code.google.com/p/nativeclient/issues/detail?id=3733 + Call->setCanReturnTwice(); + } + return true; + } + + IntrinsicCallToFunctionCall(const IntrinsicCallToFunctionCall &) = delete; + IntrinsicCallToFunctionCall & + operator=(const IntrinsicCallToFunctionCall &) = delete; +}; + +/// Rewrite intrinsic calls to a constant whose value is determined by a +/// functor. This functor is called once per Call, and returns a +/// Constant that should replace the Call. +template +class ConstantCallResolver : public ResolvePNaClIntrinsics::CallResolver { +public: + ConstantCallResolver(Function &F, Intrinsic::ID IntrinsicID, + Callable Functor) + : CallResolver(F, IntrinsicID), Functor(Functor) {} + ~ConstantCallResolver() override {} + +private: + Callable Functor; + + Function *doGetDeclaration() const override { + return Intrinsic::getDeclaration(M, IntrinsicID); + } + + bool doResolve(IntrinsicInst *Call) override { + Constant *C = Functor(Call); + Call->replaceAllUsesWith(C); + Call->eraseFromParent(); + return true; + } + + ConstantCallResolver(const ConstantCallResolver &) = delete; + ConstantCallResolver &operator=(const ConstantCallResolver &) = delete; +}; + +/// Resolve __nacl_atomic_is_lock_free to true/false at translation +/// time. PNaCl's currently supported platforms all support lock-free atomics at +/// byte sizes {1,2,4,8} except for MIPS and asmjs architectures that supports +/// lock-free atomics at byte sizes {1,2,4}, and the alignment of the pointer is +/// always expected to be natural (as guaranteed by C11 and C++11). PNaCl's +/// Module-level ABI verification checks that the byte size is constant and in +/// {1,2,4,8}. +struct IsLockFreeToConstant { + Constant *operator()(CallInst *Call) { + uint64_t MaxLockFreeByteSize = 8; + const APInt &ByteSize = + cast(Call->getOperand(0))->getUniqueInteger(); + +# if defined(PNACL_BROWSER_TRANSLATOR) + switch (__builtin_nacl_target_arch()) { + case PnaclTargetArchitectureX86_32: + case PnaclTargetArchitectureX86_64: + case PnaclTargetArchitectureARM_32: + break; + case PnaclTargetArchitectureMips_32: + MaxLockFreeByteSize = 4; + break; + default: + errs() << "Architecture: " << Triple::getArchTypeName(Arch) << "\n"; + report_fatal_error("is_lock_free: unhandled architecture"); + } +# else + switch (Arch) { + case Triple::x86: + case Triple::x86_64: + case Triple::arm: + break; + case Triple::mipsel: + case Triple::asmjs: + MaxLockFreeByteSize = 4; + break; + default: + errs() << "Architecture: " << Triple::getArchTypeName(Arch) << "\n"; + report_fatal_error("is_lock_free: unhandled architecture"); + } +# endif + + bool IsLockFree = ByteSize.ule(MaxLockFreeByteSize); + auto *C = ConstantInt::get(Call->getType(), IsLockFree); + return C; + } + + Triple::ArchType Arch; + IsLockFreeToConstant(Module *M) + : Arch(Triple(M->getTargetTriple()).getArch()) {} + IsLockFreeToConstant() = delete; +}; + +/// Rewrite atomic intrinsics to LLVM IR instructions. +class AtomicCallResolver : public ResolvePNaClIntrinsics::CallResolver { +public: + AtomicCallResolver(Function &F, + const NaCl::AtomicIntrinsics::AtomicIntrinsic *I) + : CallResolver(F, I->ID), I(I) {} + ~AtomicCallResolver() override {} + +private: + const NaCl::AtomicIntrinsics::AtomicIntrinsic *I; + + Function *doGetDeclaration() const override { return I->getDeclaration(M); } + + bool doResolve(IntrinsicInst *Call) override { + // Assume the @llvm.nacl.atomic.* intrinsics follow the PNaCl ABI: + // this should have been checked by the verifier. + bool isVolatile = false; + SynchronizationScope SS = CrossThread; + Instruction *I; + SmallVector MaybeDead; + + switch (Call->getIntrinsicID()) { + default: + llvm_unreachable("unknown atomic intrinsic"); + case Intrinsic::nacl_atomic_load: + I = new LoadInst(Call->getArgOperand(0), "", isVolatile, + alignmentFromPointer(Call->getArgOperand(0)), + thawMemoryOrder(Call->getArgOperand(1)), SS, Call); + break; + case Intrinsic::nacl_atomic_store: + I = new StoreInst(Call->getArgOperand(0), Call->getArgOperand(1), + isVolatile, + alignmentFromPointer(Call->getArgOperand(1)), + thawMemoryOrder(Call->getArgOperand(2)), SS, Call); + break; + case Intrinsic::nacl_atomic_rmw: + I = new AtomicRMWInst(thawRMWOperation(Call->getArgOperand(0)), + Call->getArgOperand(1), Call->getArgOperand(2), + thawMemoryOrder(Call->getArgOperand(3)), SS, Call); + break; + case Intrinsic::nacl_atomic_cmpxchg: + I = new AtomicCmpXchgInst( + Call->getArgOperand(0), Call->getArgOperand(1), + Call->getArgOperand(2), thawMemoryOrder(Call->getArgOperand(3)), + thawMemoryOrder(Call->getArgOperand(4)), SS, Call); + + // cmpxchg returns struct { T loaded, i1 success } whereas the PNaCl + // intrinsic only returns the loaded value. The Call can't simply be + // replaced. Identify loaded+success structs that can be replaced by the + // cmxpchg's returned struct. + { + Instruction *Loaded = nullptr; + Instruction *Success = nullptr; + for (User *CallUser : Call->users()) { + if (auto ICmp = dyn_cast(CallUser)) { + // Identify comparisons for cmpxchg's success. + if (ICmp->getPredicate() != CmpInst::ICMP_EQ) + continue; + Value *LHS = ICmp->getOperand(0); + Value *RHS = ICmp->getOperand(1); + Value *Old = I->getOperand(1); + if (RHS != Old && LHS != Old) // Call is either RHS or LHS. + continue; // The comparison isn't checking for cmpxchg's success. + + // Recognize the pattern creating struct { T loaded, i1 success }: + // it can be replaced by cmpxchg's result. + for (User *InsUser : ICmp->users()) { + if (!isa(InsUser) || + cast(InsUser)->getParent() != Call->getParent()) + continue; // Different basic blocks, don't be clever. + auto Ins = dyn_cast(InsUser); + if (!Ins) + continue; + auto InsTy = dyn_cast(Ins->getType()); + if (!InsTy) + continue; + if (!InsTy->isLayoutIdentical(cast(I->getType()))) + continue; // Not a struct { T loaded, i1 success }. + if (Ins->getNumIndices() != 1 || Ins->getIndices()[0] != 1) + continue; // Not an insert { T, i1 } %something, %success, 1. + auto TIns = dyn_cast(Ins->getAggregateOperand()); + if (!TIns) + continue; // T wasn't inserted into the struct, don't be clever. + if (!isa(TIns->getAggregateOperand())) + continue; // Not an insert into an undef value, don't be clever. + if (TIns->getInsertedValueOperand() != Call) + continue; // Not inserting the loaded value. + if (TIns->getNumIndices() != 1 || TIns->getIndices()[0] != 0) + continue; // Not an insert { T, i1 } undef, %loaded, 0. + // Hooray! This is the struct you're looking for. + + // Keep track of values extracted from the struct, instead of + // recreating them. + for (User *StructUser : Ins->users()) { + if (auto Extract = dyn_cast(StructUser)) { + MaybeDead.push_back(Extract); + if (!Loaded && Extract->getIndices()[0] == 0) { + Loaded = cast(StructUser); + Loaded->moveBefore(Call); + } else if (!Success && Extract->getIndices()[0] == 1) { + Success = cast(StructUser); + Success->moveBefore(Call); + } + } + } + + MaybeDead.push_back(Ins); + MaybeDead.push_back(TIns); + Ins->replaceAllUsesWith(I); + } + + MaybeDead.push_back(ICmp); + if (!Success) + Success = ExtractValueInst::Create(I, 1, "success", Call); + ICmp->replaceAllUsesWith(Success); + } + } + + // Clean up remaining uses of the loaded value, if any. Later code will + // try to replace Call with I, make sure the types match. + if (Call->hasNUsesOrMore(1)) { + if (!Loaded) + Loaded = ExtractValueInst::Create(I, 0, "loaded", Call); + I = Loaded; + } else { + I = nullptr; + } + + if (Loaded) + MaybeDead.push_back(Loaded); + if (Success) + MaybeDead.push_back(Success); + } + break; + case Intrinsic::nacl_atomic_fence: + I = new FenceInst(M->getContext(), + thawMemoryOrder(Call->getArgOperand(0)), SS, Call); + break; + case Intrinsic::nacl_atomic_fence_all: { + FunctionType *FTy = + FunctionType::get(Type::getVoidTy(M->getContext()), false); + std::string AsmString; // Empty. + std::string Constraints("~{memory}"); + bool HasSideEffect = true; + CallInst *Asm = CallInst::Create( + InlineAsm::get(FTy, AsmString, Constraints, HasSideEffect), "", Call); + Asm->setDebugLoc(Call->getDebugLoc()); + I = new FenceInst(M->getContext(), AtomicOrdering::SequentiallyConsistent, SS, Asm); + Asm = CallInst::Create( + InlineAsm::get(FTy, AsmString, Constraints, HasSideEffect), "", I); + Asm->setDebugLoc(Call->getDebugLoc()); + } break; + } + + if (I) { + I->setName(Call->getName()); + I->setDebugLoc(Call->getDebugLoc()); + Call->replaceAllUsesWith(I); + } + Call->eraseFromParent(); + + // Remove dead code. + for (Instruction *Kill : MaybeDead) + if (isInstructionTriviallyDead(Kill)) + Kill->eraseFromParent(); + + return true; + } + + unsigned alignmentFromPointer(const Value *Ptr) const { + auto *PtrType = cast(Ptr->getType()); + unsigned BitWidth = PtrType->getElementType()->getIntegerBitWidth(); + return BitWidth / 8; + } + + AtomicOrdering thawMemoryOrder(const Value *MemoryOrder) const { + auto MO = static_cast( + cast(MemoryOrder)->getUniqueInteger().getLimitedValue()); + switch (MO) { + // Only valid values should pass validation. + default: llvm_unreachable("unknown memory order"); + case NaCl::MemoryOrderRelaxed: return AtomicOrdering::Monotonic; + // TODO Consume is unspecified by LLVM's internal IR. + case NaCl::MemoryOrderConsume: return AtomicOrdering::SequentiallyConsistent; + case NaCl::MemoryOrderAcquire: return AtomicOrdering::Acquire; + case NaCl::MemoryOrderRelease: return AtomicOrdering::Release; + case NaCl::MemoryOrderAcquireRelease: return AtomicOrdering::AcquireRelease; + case NaCl::MemoryOrderSequentiallyConsistent: return AtomicOrdering::SequentiallyConsistent; + } + } + + AtomicRMWInst::BinOp thawRMWOperation(const Value *Operation) const { + auto Op = static_cast( + cast(Operation)->getUniqueInteger().getLimitedValue()); + switch (Op) { + // Only valid values should pass validation. + default: llvm_unreachable("unknown atomic RMW operation"); + case NaCl::AtomicAdd: return AtomicRMWInst::Add; + case NaCl::AtomicSub: return AtomicRMWInst::Sub; + case NaCl::AtomicOr: return AtomicRMWInst::Or; + case NaCl::AtomicAnd: return AtomicRMWInst::And; + case NaCl::AtomicXor: return AtomicRMWInst::Xor; + case NaCl::AtomicExchange: return AtomicRMWInst::Xchg; + } + } + + AtomicCallResolver(const AtomicCallResolver &); + AtomicCallResolver &operator=(const AtomicCallResolver &); +}; +} + +bool ResolvePNaClIntrinsics::visitCalls( + ResolvePNaClIntrinsics::CallResolver &Resolver) { + bool Changed = false; + Function *IntrinsicFunction = Resolver.getDeclaration(); + if (!IntrinsicFunction) + return false; + + SmallVector Calls; + for (User *U : IntrinsicFunction->users()) { + // At this point, the only uses of the intrinsic can be calls, since we + // assume this pass runs on bitcode that passed ABI verification. + auto *Call = dyn_cast(U); + if (!Call) + report_fatal_error("Expected use of intrinsic to be a call: " + + Resolver.getName()); + Calls.push_back(Call); + } + + for (IntrinsicInst *Call : Calls) + Changed |= Resolver.resolve(Call); + + return Changed; +} + +bool ResolvePNaClIntrinsics::runOnFunction(Function &F) { + Module *M = F.getParent(); + LLVMContext &C = M->getContext(); + bool Changed = false; + + IntrinsicCallToFunctionCall SetJmpResolver(F, Intrinsic::nacl_setjmp, + "setjmp"); + IntrinsicCallToFunctionCall LongJmpResolver(F, Intrinsic::nacl_longjmp, + "longjmp"); + Changed |= visitCalls(SetJmpResolver); + Changed |= visitCalls(LongJmpResolver); + + NaCl::AtomicIntrinsics AI(C); + NaCl::AtomicIntrinsics::View V = AI.allIntrinsicsAndOverloads(); + for (auto I = V.begin(), E = V.end(); I != E; ++I) { + AtomicCallResolver AtomicResolver(F, I); + Changed |= visitCalls(AtomicResolver); + } + + ConstantCallResolver IsLockFreeResolver( + F, Intrinsic::nacl_atomic_is_lock_free, IsLockFreeToConstant(M)); + Changed |= visitCalls(IsLockFreeResolver); + + return Changed; +} + +char ResolvePNaClIntrinsics::ID = 0; +INITIALIZE_PASS(ResolvePNaClIntrinsics, "resolve-pnacl-intrinsics", + "Resolve PNaCl intrinsic calls", false, false) + +FunctionPass *llvm::createResolvePNaClIntrinsicsPass() { + return new ResolvePNaClIntrinsics(); +} diff --git a/lib/Target/JSBackend/NaCl/RewriteAtomics.cpp b/lib/Target/JSBackend/NaCl/RewriteAtomics.cpp new file mode 100644 index 000000000000..c7f17a4f72cb --- /dev/null +++ b/lib/Target/JSBackend/NaCl/RewriteAtomics.cpp @@ -0,0 +1,411 @@ +//===- RewriteAtomics.cpp - Stabilize instructions used for concurrency ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass encodes atomics, volatiles and fences using NaCl intrinsics +// instead of LLVM's regular IR instructions. +// +// All of the above are transformed into one of the +// @llvm.nacl.atomic.* intrinsics. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/Twine.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/InstVisitor.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/NaClAtomicIntrinsics.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/NaCl.h" +#include +#include + +using namespace llvm; + +static cl::opt PNaClMemoryOrderSeqCstOnly( + "pnacl-memory-order-seq-cst-only", + cl::desc("PNaCl should upgrade all atomic memory orders to seq_cst"), + cl::init(false)); + +namespace { + +class RewriteAtomics : public ModulePass { +public: + static char ID; // Pass identification, replacement for typeid + RewriteAtomics() : ModulePass(ID) { + // This is a module pass because it may have to introduce + // intrinsic declarations into the module and modify a global function. + initializeRewriteAtomicsPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnModule(Module &M); +}; + +template std::string ToStr(const T &V) { + std::string S; + raw_string_ostream OS(S); + OS << const_cast(V); + return OS.str(); +} + +class AtomicVisitor : public InstVisitor { +public: + AtomicVisitor(Module &M, Pass &P) + : M(M), C(M.getContext()), + TD(M.getDataLayout()), AI(C), + ModifiedModule(false) {} + ~AtomicVisitor() {} + bool modifiedModule() const { return ModifiedModule; } + + void visitLoadInst(LoadInst &I); + void visitStoreInst(StoreInst &I); + void visitAtomicCmpXchgInst(AtomicCmpXchgInst &I); + void visitAtomicRMWInst(AtomicRMWInst &I); + void visitFenceInst(FenceInst &I); + +private: + Module &M; + LLVMContext &C; + const DataLayout TD; + NaCl::AtomicIntrinsics AI; + bool ModifiedModule; + + AtomicVisitor() = delete; + AtomicVisitor(const AtomicVisitor &) = delete; + AtomicVisitor &operator=(const AtomicVisitor &) = delete; + + /// Create an integer constant holding a NaCl::MemoryOrder that can be + /// passed as an argument to one of the @llvm.nacl.atomic.* + /// intrinsics. This function may strengthen the ordering initially + /// specified by the instruction \p I for stability purpose. + template + ConstantInt *freezeMemoryOrder(const Instruction &I, AtomicOrdering O) const; + std::pair + freezeMemoryOrder(const AtomicCmpXchgInst &I, AtomicOrdering S, + AtomicOrdering F) const; + + /// Sanity-check that instruction \p I which has pointer and value + /// parameters have matching sizes \p BitSize for the type-pointed-to + /// and the value's type \p T. + void checkSizeMatchesType(const Instruction &I, unsigned BitSize, + const Type *T) const; + + /// Verify that loads and stores are at least naturally aligned. Use + /// byte alignment because converting to bits could truncate the + /// value. + void checkAlignment(const Instruction &I, unsigned ByteAlignment, + unsigned ByteSize) const; + + /// Create a cast before Instruction \p I from \p Src to \p Dst with \p Name. + CastInst *createCast(Instruction &I, Value *Src, Type *Dst, Twine Name) const; + + /// Try to find the atomic intrinsic of with its \p ID and \OverloadedType. + /// Report fatal error on failure. + const NaCl::AtomicIntrinsics::AtomicIntrinsic * + findAtomicIntrinsic(const Instruction &I, Intrinsic::ID ID, + Type *OverloadedType) const; + + /// Helper function which rewrites a single instruction \p I to a + /// particular \p intrinsic with overloaded type \p OverloadedType, + /// and argument list \p Args. Will perform a bitcast to the proper \p + /// DstType, if different from \p OverloadedType. + void replaceInstructionWithIntrinsicCall( + Instruction &I, const NaCl::AtomicIntrinsics::AtomicIntrinsic *Intrinsic, + Type *DstType, Type *OverloadedType, ArrayRef Args); + + /// Most atomics instructions deal with at least one pointer, this + /// struct automates some of this and has generic sanity checks. + template struct PointerHelper { + Value *P; + Type *OriginalPET; + Type *PET; + unsigned BitSize; + PointerHelper(const AtomicVisitor &AV, Instruction &I) + : P(I.getPointerOperand()) { + if (I.getPointerAddressSpace() != 0) + report_fatal_error("unhandled pointer address space " + + Twine(I.getPointerAddressSpace()) + " for atomic: " + + ToStr(I)); + assert(P->getType()->isPointerTy() && "expected a pointer"); + PET = OriginalPET = P->getType()->getPointerElementType(); + BitSize = AV.TD.getTypeSizeInBits(OriginalPET); + if (!OriginalPET->isIntegerTy()) { + // The pointer wasn't to an integer type. We define atomics in + // terms of integers, so bitcast the pointer to an integer of + // the proper width. + Type *IntNPtr = Type::getIntNPtrTy(AV.C, BitSize); + P = AV.createCast(I, P, IntNPtr, P->getName() + ".cast"); + PET = P->getType()->getPointerElementType(); + } + AV.checkSizeMatchesType(I, BitSize, PET); + } + }; +}; +} + +char RewriteAtomics::ID = 0; +INITIALIZE_PASS(RewriteAtomics, "nacl-rewrite-atomics", + "rewrite atomics, volatiles and fences into stable " + "@llvm.nacl.atomics.* intrinsics", + false, false) + +bool RewriteAtomics::runOnModule(Module &M) { + AtomicVisitor AV(M, *this); + AV.visit(M); + return AV.modifiedModule(); +} + +template +ConstantInt *AtomicVisitor::freezeMemoryOrder(const Instruction &I, + AtomicOrdering O) const { + NaCl::MemoryOrder AO = NaCl::MemoryOrderInvalid; + + // TODO Volatile load/store are promoted to sequentially consistent + // for now. We could do something weaker. + if (const LoadInst *L = dyn_cast(&I)) { + if (L->isVolatile()) + AO = NaCl::MemoryOrderSequentiallyConsistent; + } else if (const StoreInst *S = dyn_cast(&I)) { + if (S->isVolatile()) + AO = NaCl::MemoryOrderSequentiallyConsistent; + } + + if (AO == NaCl::MemoryOrderInvalid) { + switch (O) { + case AtomicOrdering::NotAtomic: llvm_unreachable("unexpected memory order"); + // Monotonic is a strict superset of Unordered. Both can therefore + // map to Relaxed ordering, which is in the C11/C++11 standard. + case AtomicOrdering::Unordered: AO = NaCl::MemoryOrderRelaxed; break; + case AtomicOrdering::Monotonic: AO = NaCl::MemoryOrderRelaxed; break; + // TODO Consume is currently unspecified by LLVM's internal IR. + case AtomicOrdering::Acquire: AO = NaCl::MemoryOrderAcquire; break; + case AtomicOrdering::Release: AO = NaCl::MemoryOrderRelease; break; + case AtomicOrdering::AcquireRelease: AO = NaCl::MemoryOrderAcquireRelease; break; + case AtomicOrdering::SequentiallyConsistent: + AO = NaCl::MemoryOrderSequentiallyConsistent; break; + } + } + + // TODO For now only acquire/release/acq_rel/seq_cst are allowed. + if (PNaClMemoryOrderSeqCstOnly || AO == NaCl::MemoryOrderRelaxed) + AO = NaCl::MemoryOrderSequentiallyConsistent; + + return ConstantInt::get(Type::getInt32Ty(C), AO); +} + +std::pair +AtomicVisitor::freezeMemoryOrder(const AtomicCmpXchgInst &I, AtomicOrdering S, + AtomicOrdering F) const { + if (S == AtomicOrdering::Release || (S == AtomicOrdering::AcquireRelease && F != AtomicOrdering::Acquire)) + // According to C++11's [atomics.types.operations.req], cmpxchg with release + // success memory ordering must have relaxed failure memory ordering, which + // PNaCl currently disallows. The next-strongest ordering is acq_rel which + // is also an invalid failure ordering, we therefore have to change the + // success ordering to seq_cst, which can then fail as seq_cst. + S = F = AtomicOrdering::SequentiallyConsistent; + if (F == AtomicOrdering::Unordered || F == AtomicOrdering::Monotonic) // Both are treated as relaxed. + F = AtomicCmpXchgInst::getStrongestFailureOrdering(S); + return std::make_pair(freezeMemoryOrder(I, S), freezeMemoryOrder(I, F)); +} + +void AtomicVisitor::checkSizeMatchesType(const Instruction &I, unsigned BitSize, + const Type *T) const { + Type *IntType = Type::getIntNTy(C, BitSize); + if (IntType && T == IntType) + return; + report_fatal_error("unsupported atomic type " + ToStr(*T) + " of size " + + Twine(BitSize) + " bits in: " + ToStr(I)); +} + +void AtomicVisitor::checkAlignment(const Instruction &I, unsigned ByteAlignment, + unsigned ByteSize) const { + if (ByteAlignment < ByteSize) + report_fatal_error("atomic load/store must be at least naturally aligned, " + "got " + + Twine(ByteAlignment) + ", bytes expected at least " + + Twine(ByteSize) + " bytes, in: " + ToStr(I)); +} + +CastInst *AtomicVisitor::createCast(Instruction &I, Value *Src, Type *Dst, + Twine Name) const { + Type *SrcT = Src->getType(); + Instruction::CastOps Op = SrcT->isIntegerTy() && Dst->isPointerTy() + ? Instruction::IntToPtr + : SrcT->isPointerTy() && Dst->isIntegerTy() + ? Instruction::PtrToInt + : Instruction::BitCast; + if (!CastInst::castIsValid(Op, Src, Dst)) + report_fatal_error("cannot emit atomic instruction while converting type " + + ToStr(*SrcT) + " to " + ToStr(*Dst) + " for " + Name + + " in " + ToStr(I)); + return CastInst::Create(Op, Src, Dst, Name, &I); +} + +const NaCl::AtomicIntrinsics::AtomicIntrinsic * +AtomicVisitor::findAtomicIntrinsic(const Instruction &I, Intrinsic::ID ID, + Type *OverloadedType) const { + if (const NaCl::AtomicIntrinsics::AtomicIntrinsic *Intrinsic = + AI.find(ID, OverloadedType)) + return Intrinsic; + report_fatal_error("unsupported atomic instruction: " + ToStr(I)); +} + +void AtomicVisitor::replaceInstructionWithIntrinsicCall( + Instruction &I, const NaCl::AtomicIntrinsics::AtomicIntrinsic *Intrinsic, + Type *DstType, Type *OverloadedType, ArrayRef Args) { + std::string Name(I.getName()); + Function *F = Intrinsic->getDeclaration(&M); + CallInst *Call = CallInst::Create(F, Args, "", &I); + Call->setDebugLoc(I.getDebugLoc()); + Instruction *Res = Call; + + assert((I.getType()->isStructTy() == isa(&I)) && + "cmpxchg returns a struct, and other instructions don't"); + if (auto S = dyn_cast(I.getType())) { + assert(S->getNumElements() == 2 && + "cmpxchg returns a struct with two elements"); + assert(S->getElementType(0) == DstType && + "cmpxchg struct's first member should be the value type"); + assert(S->getElementType(1) == Type::getInt1Ty(C) && + "cmpxchg struct's second member should be the success flag"); + // Recreate struct { T value, i1 success } after the call. + auto Success = CmpInst::Create( + Instruction::ICmp, CmpInst::ICMP_EQ, Res, + cast(&I)->getCompareOperand(), "success", &I); + Res = InsertValueInst::Create( + InsertValueInst::Create(UndefValue::get(S), Res, 0, + Name + ".insert.value", &I), + Success, 1, Name + ".insert.success", &I); + } else if (!Call->getType()->isVoidTy() && DstType != OverloadedType) { + // The call returns a value which needs to be cast to a non-integer. + Res = createCast(I, Call, DstType, Name + ".cast"); + Res->setDebugLoc(I.getDebugLoc()); + } + + I.replaceAllUsesWith(Res); + I.eraseFromParent(); + Call->setName(Name); + ModifiedModule = true; +} + +/// %res = load {atomic|volatile} T* %ptr memory_order, align sizeof(T) +/// becomes: +/// %res = call T @llvm.nacl.atomic.load.i(%ptr, memory_order) +void AtomicVisitor::visitLoadInst(LoadInst &I) { + return; // XXX EMSCRIPTEN + if (I.isSimple()) + return; + PointerHelper PH(*this, I); + const NaCl::AtomicIntrinsics::AtomicIntrinsic *Intrinsic = + findAtomicIntrinsic(I, Intrinsic::nacl_atomic_load, PH.PET); + checkAlignment(I, I.getAlignment(), PH.BitSize / CHAR_BIT); + Value *Args[] = {PH.P, freezeMemoryOrder(I, I.getOrdering())}; + replaceInstructionWithIntrinsicCall(I, Intrinsic, PH.OriginalPET, PH.PET, + Args); +} + +/// store {atomic|volatile} T %val, T* %ptr memory_order, align sizeof(T) +/// becomes: +/// call void @llvm.nacl.atomic.store.i(%val, %ptr, memory_order) +void AtomicVisitor::visitStoreInst(StoreInst &I) { + return; // XXX EMSCRIPTEN + if (I.isSimple()) + return; + PointerHelper PH(*this, I); + const NaCl::AtomicIntrinsics::AtomicIntrinsic *Intrinsic = + findAtomicIntrinsic(I, Intrinsic::nacl_atomic_store, PH.PET); + checkAlignment(I, I.getAlignment(), PH.BitSize / CHAR_BIT); + Value *V = I.getValueOperand(); + if (!V->getType()->isIntegerTy()) { + // The store isn't of an integer type. We define atomics in terms of + // integers, so bitcast the value to store to an integer of the + // proper width. + CastInst *Cast = createCast(I, V, Type::getIntNTy(C, PH.BitSize), + V->getName() + ".cast"); + Cast->setDebugLoc(I.getDebugLoc()); + V = Cast; + } + checkSizeMatchesType(I, PH.BitSize, V->getType()); + Value *Args[] = {V, PH.P, freezeMemoryOrder(I, I.getOrdering())}; + replaceInstructionWithIntrinsicCall(I, Intrinsic, PH.OriginalPET, PH.PET, + Args); +} + +/// %res = atomicrmw OP T* %ptr, T %val memory_order +/// becomes: +/// %res = call T @llvm.nacl.atomic.rmw.i(OP, %ptr, %val, memory_order) +void AtomicVisitor::visitAtomicRMWInst(AtomicRMWInst &I) { + return; // XXX EMSCRIPTEN + NaCl::AtomicRMWOperation Op; + switch (I.getOperation()) { + default: report_fatal_error("unsupported atomicrmw operation: " + ToStr(I)); + case AtomicRMWInst::Add: Op = NaCl::AtomicAdd; break; + case AtomicRMWInst::Sub: Op = NaCl::AtomicSub; break; + case AtomicRMWInst::And: Op = NaCl::AtomicAnd; break; + case AtomicRMWInst::Or: Op = NaCl::AtomicOr; break; + case AtomicRMWInst::Xor: Op = NaCl::AtomicXor; break; + case AtomicRMWInst::Xchg: Op = NaCl::AtomicExchange; break; + } + PointerHelper PH(*this, I); + const NaCl::AtomicIntrinsics::AtomicIntrinsic *Intrinsic = + findAtomicIntrinsic(I, Intrinsic::nacl_atomic_rmw, PH.PET); + checkSizeMatchesType(I, PH.BitSize, I.getValOperand()->getType()); + Value *Args[] = {ConstantInt::get(Type::getInt32Ty(C), Op), PH.P, + I.getValOperand(), freezeMemoryOrder(I, I.getOrdering())}; + replaceInstructionWithIntrinsicCall(I, Intrinsic, PH.OriginalPET, PH.PET, + Args); +} + +/// %res = cmpxchg [weak] T* %ptr, T %old, T %new, memory_order_success +/// memory_order_failure +/// %val = extractvalue { T, i1 } %res, 0 +/// %success = extractvalue { T, i1 } %res, 1 +/// becomes: +/// %val = call T @llvm.nacl.atomic.cmpxchg.i( +/// %object, %expected, %desired, memory_order_success, +/// memory_order_failure) +/// %success = icmp eq %old, %val +/// Note: weak is currently dropped if present, the cmpxchg is always strong. +void AtomicVisitor::visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) { + PointerHelper PH(*this, I); + const NaCl::AtomicIntrinsics::AtomicIntrinsic *Intrinsic = + findAtomicIntrinsic(I, Intrinsic::nacl_atomic_cmpxchg, PH.PET); + checkSizeMatchesType(I, PH.BitSize, I.getCompareOperand()->getType()); + checkSizeMatchesType(I, PH.BitSize, I.getNewValOperand()->getType()); + auto Order = + freezeMemoryOrder(I, I.getSuccessOrdering(), I.getFailureOrdering()); + Value *Args[] = {PH.P, I.getCompareOperand(), I.getNewValOperand(), + Order.first, Order.second}; + replaceInstructionWithIntrinsicCall(I, Intrinsic, PH.OriginalPET, PH.PET, + Args); +} + +/// fence memory_order +/// becomes: +/// call void @llvm.nacl.atomic.fence(memory_order) +/// and +/// call void asm sideeffect "", "~{memory}"() +/// fence seq_cst +/// call void asm sideeffect "", "~{memory}"() +/// becomes: +/// call void asm sideeffect "", "~{memory}"() +/// call void @llvm.nacl.atomic.fence.all() +/// call void asm sideeffect "", "~{memory}"() +/// Note that the assembly gets eliminated by the -remove-asm-memory pass. +void AtomicVisitor::visitFenceInst(FenceInst &I) { + return; // XXX EMSCRIPTEN +} + +ModulePass *llvm::createRewriteAtomicsPass() { return new RewriteAtomics(); } diff --git a/lib/Target/JSBackend/NaCl/RewriteLLVMIntrinsics.cpp b/lib/Target/JSBackend/NaCl/RewriteLLVMIntrinsics.cpp new file mode 100644 index 000000000000..119b85aaa1a4 --- /dev/null +++ b/lib/Target/JSBackend/NaCl/RewriteLLVMIntrinsics.cpp @@ -0,0 +1,149 @@ +//===- RewriteLLVMIntrinsics.cpp - Rewrite LLVM intrinsics to other values ===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass replaces calls to LLVM intrinsics that are *not* part of the +// PNaCl stable bitcode ABI into simpler values. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/Twine.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/NaCl.h" +#include + +using namespace llvm; + +namespace { +class RewriteLLVMIntrinsics : public ModulePass { +public: + static char ID; + RewriteLLVMIntrinsics() : ModulePass(ID) { + // This is a module pass because this makes it easier to access uses + // of global intrinsic functions. + initializeRewriteLLVMIntrinsicsPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnModule(Module &M); + + /// Rewrite an intrinsic to something different. + class IntrinsicRewriter { + public: + Function *function() const { return F; } + /// Called once per \p Call of the Intrinsic Function. + void rewriteCall(CallInst *Call) { doRewriteCall(Call); } + + protected: + IntrinsicRewriter(Module &M, Intrinsic::ID IntrinsicID) + : F(Intrinsic::getDeclaration(&M, IntrinsicID)) {} + virtual ~IntrinsicRewriter() {} + /// This pure virtual method must be defined by implementors, and + /// will be called by rewriteCall. + virtual void doRewriteCall(CallInst *Call) = 0; + + Function *F; + + private: + IntrinsicRewriter() = delete; + IntrinsicRewriter(const IntrinsicRewriter &) = delete; + IntrinsicRewriter &operator=(const IntrinsicRewriter &) = delete; + }; + +private: + /// Visit all uses of a Function, rewrite it using the \p Rewriter, + /// and then delete the Call. Later delete the Function from the + /// Module. Returns true if the Module was changed. + bool visitUses(IntrinsicRewriter &Rewriter); +}; + +/// Rewrite a Call to nothing. +class ToNothing : public RewriteLLVMIntrinsics::IntrinsicRewriter { +public: + ToNothing(Module &M, Intrinsic::ID IntrinsicID) + : IntrinsicRewriter(M, IntrinsicID) {} + virtual ~ToNothing() {} + +protected: + virtual void doRewriteCall(CallInst *Call) { + // Nothing to do: the visit does the deletion. + } +}; + +/// Rewrite a Call to a ConstantInt of the same type. +class ToConstantInt : public RewriteLLVMIntrinsics::IntrinsicRewriter { +public: + ToConstantInt(Module &M, Intrinsic::ID IntrinsicID, uint64_t Value) + : IntrinsicRewriter(M, IntrinsicID), Value(Value), + RetType(function()->getFunctionType()->getReturnType()) {} + virtual ~ToConstantInt() {} + +protected: + virtual void doRewriteCall(CallInst *Call) { + Constant *C = ConstantInt::get(RetType, Value); + Call->replaceAllUsesWith(C); + } + +private: + uint64_t Value; + Type *RetType; +}; +} + +char RewriteLLVMIntrinsics::ID = 0; +INITIALIZE_PASS(RewriteLLVMIntrinsics, "rewrite-llvm-intrinsic-calls", + "Rewrite LLVM intrinsic calls to simpler expressions", false, + false) + +bool RewriteLLVMIntrinsics::runOnModule(Module &M) { + // Replace all uses of the @llvm.flt.rounds intrinsic with the constant + // "1" (round-to-nearest). Until we add a second intrinsic like + // @llvm.set.flt.round it is impossible to have a rounding mode that is + // not the initial rounding mode (round-to-nearest). We can remove + // this rewrite after adding a set() intrinsic. + ToConstantInt FltRoundsRewriter(M, Intrinsic::flt_rounds, 1); + + // Remove all @llvm.prefetch intrinsics. + ToNothing PrefetchRewriter(M, Intrinsic::prefetch); + ToNothing AssumeRewriter(M, Intrinsic::assume); + + return visitUses(FltRoundsRewriter) | visitUses(PrefetchRewriter) + | visitUses(AssumeRewriter); +} + +bool RewriteLLVMIntrinsics::visitUses(IntrinsicRewriter &Rewriter) { + Function *F = Rewriter.function(); + SmallVector Calls; + for (User *U : F->users()) { + if (CallInst *Call = dyn_cast(U)) { + Calls.push_back(Call); + } else { + // Intrinsics we care about currently don't need to handle this case. + std::string S; + raw_string_ostream OS(S); + OS << "Taking the address of this intrinsic is invalid: " << *U; + report_fatal_error(OS.str()); + } + } + + for (auto Call : Calls) { + Rewriter.rewriteCall(Call); + Call->eraseFromParent(); + } + + F->eraseFromParent(); + return !Calls.empty(); +} + +ModulePass *llvm::createRewriteLLVMIntrinsicsPass() { + return new RewriteLLVMIntrinsics(); +} diff --git a/lib/Target/JSBackend/NaCl/RewritePNaClLibraryCalls.cpp b/lib/Target/JSBackend/NaCl/RewritePNaClLibraryCalls.cpp new file mode 100644 index 000000000000..c3f1e9409a92 --- /dev/null +++ b/lib/Target/JSBackend/NaCl/RewritePNaClLibraryCalls.cpp @@ -0,0 +1,545 @@ +//===- RewritePNaClLibraryCalls.cpp - PNaCl library calls to intrinsics ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass replaces calls to known library functions with calls to intrinsics +// that are part of the PNaCl stable bitcode ABI. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/Twine.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/NaCl.h" +#include + +using namespace llvm; + +namespace { + class RewritePNaClLibraryCalls : public ModulePass { + public: + static char ID; + RewritePNaClLibraryCalls() : + ModulePass(ID), TheModule(NULL), Context(NULL), SetjmpIntrinsic(NULL), + LongjmpIntrinsic(NULL), MemcpyIntrinsic(NULL), + MemmoveIntrinsic(NULL), MemsetIntrinsic(NULL) { + // This is a module pass because it may have to introduce + // intrinsic declarations into the module and modify globals. + initializeRewritePNaClLibraryCallsPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnModule(Module &M); + private: + typedef void (RewritePNaClLibraryCalls::*RewriteCallFunc)(CallInst *); + typedef void (RewritePNaClLibraryCalls::*PopulateWrapperFunc)(Function *); + + /// Handles a certain pattern of library function -> intrinsic rewrites. + /// Currently all library functions this pass knows how to rewrite fall into + /// this pattern. + /// RewriteLibraryCall performs the rewrite for a single library function + /// and is customized by its arguments. + /// + /// \p LibraryFunctionName Name of the library function to look for. + /// \p CorrectFunctionType is the correct type of this library function. + /// \p CallRewriter Method that rewrites the library function call into an + /// intrinsic call. + /// \p OnlyCallsAllowed Only calls to this library function are allowed. + /// \p WrapperPopulator called to populate the body of the library function + /// with a wrapped intrinsic call. + bool RewriteLibraryCall( + const char *LibraryFunctionName, + FunctionType *CorrectFunctionType, + RewriteCallFunc CallRewriter, + bool OnlyCallsAllowed, + PopulateWrapperFunc WrapperPopulator); + + /// Two function types are compatible if they have compatible return types + /// and the same number of compatible parameters. Return types and + /// parameters are compatible if they are exactly the same type or both are + /// pointer types. + static bool compatibleFunctionTypes(FunctionType *FTy1, FunctionType *FTy2); + static bool compatibleParamOrRetTypes(Type *Ty1, Type *Ty2); + + void rewriteSetjmpCall(CallInst *Call); + void rewriteLongjmpCall(CallInst *Call); + void rewriteMemcpyCall(CallInst *Call); + void rewriteMemmoveCall(CallInst *Call); + void rewriteMemsetCall(CallInst *Call); + + void populateSetjmpWrapper(Function *SetjmpFunc); + void populateLongjmpWrapper(Function *LongjmpFunc); + void populateMemcpyWrapper(Function *MemcpyFunc); + void populateMemmoveWrapper(Function *MemmoveFunc); + void populateMemsetWrapper(Function *MemsetFunc); + + /// Generic implementation of populating a wrapper function. + /// Initially, the function exists in the module as a declaration with + /// unnamed arguments. This method is called with a NULL-terminated list + /// of argument names that get assigned in the generated IR for + /// readability. + void populateWrapperCommon( + Function *Func, + StringRef FuncName, + RewriteCallFunc CallRewriter, + bool CallCannotReturn, + ...); + + /// Find and cache known intrinsics. + Function *findSetjmpIntrinsic(); + Function *findLongjmpIntrinsic(); + Function *findMemcpyIntrinsic(); + Function *findMemmoveIntrinsic(); + Function *findMemsetIntrinsic(); + + /// Cached data that remains the same throughout a module run. + Module *TheModule; + LLVMContext *Context; + + /// These are cached but computed lazily. + Function *SetjmpIntrinsic; + Function *LongjmpIntrinsic; + Function *MemcpyIntrinsic; + Function *MemmoveIntrinsic; + Function *MemsetIntrinsic; + }; +} + +char RewritePNaClLibraryCalls::ID = 0; +INITIALIZE_PASS(RewritePNaClLibraryCalls, "rewrite-pnacl-library-calls", + "Rewrite PNaCl library calls to stable intrinsics", + false, false) + +bool RewritePNaClLibraryCalls::RewriteLibraryCall( + const char *LibraryFunctionName, + FunctionType *CorrectFunctionType, + RewriteCallFunc CallRewriter, + bool OnlyCallsAllowed, + PopulateWrapperFunc WrapperPopulator) { + bool Changed = false; + + Function *LibFunc = TheModule->getFunction(LibraryFunctionName); + + // Iterate over all uses of this function, if it exists in the module with + // external linkage. If it exists but the linkage is not external, this may + // come from code that defines its own private function with the same name + // and doesn't actually include the standard libc header declaring it. + // In such a case we leave the code as it is. + // + // Another case we need to handle here is this function having the wrong + // prototype (incompatible with the C library function prototype, and hence + // incompatible with the intrinsic). In general, this is undefined behavior, + // but we can't fail compilation because some workflows rely on it + // compiling correctly (for example, autoconf). The solution is: + // When the declared type of the function in the module is not correct, we + // re-create the function with the correct prototype and replace all calls + // to this new function (casted to the old function type). Effectively this + // delays the undefined behavior until run-time. + if (LibFunc && LibFunc->hasExternalLinkage()) { + if (!compatibleFunctionTypes(LibFunc->getFunctionType(), + CorrectFunctionType)) { + // Use the RecreateFunction utility to create a new function with the + // correct prototype. RecreateFunction also RAUWs the function with + // proper bitcasts. + // + // One interesting case that may arise is when the original module had + // calls to both a correct and an incorrect version of the library + // function. Depending on the linking order, either version could be + // selected as the global declaration in the module, so even valid calls + // could end up being bitcast-ed from the incorrect to the correct + // function type. The RecreateFunction call below will eliminate such + // bitcasts (because the new type matches the call type), but dead + // constant expressions may be left behind. + // These are cleaned up with removeDeadConstantUsers. + Function *NewFunc = RecreateFunction(LibFunc, CorrectFunctionType); + LibFunc->eraseFromParent(); + NewFunc->setLinkage(Function::InternalLinkage); + Changed = true; + NewFunc->removeDeadConstantUsers(); + LibFunc = NewFunc; + } + + // Handle all uses that are calls. These are simply replaced with + // equivalent intrinsic calls. + { + SmallVector Calls; + for (User *U : LibFunc->users()) + // users() will also provide call instructions in which the used value + // is an argument, and not the value being called. Make sure we rewrite + // only actual calls to LibFunc here. + if (CallInst *Call = dyn_cast(U)) + if (Call->getCalledValue() == LibFunc) + Calls.push_back(Call); + + for (auto Call : Calls) + (this->*(CallRewriter))(Call); + + Changed |= !Calls.empty(); + } + + if (LibFunc->use_empty()) { + LibFunc->eraseFromParent(); + } else if (OnlyCallsAllowed) { + // If additional uses remain, these aren't calls. + report_fatal_error(Twine("Taking the address of ") + + LibraryFunctionName + " is invalid"); + } else { + // If non-call uses remain and allowed for this function, populate it + // with a wrapper. + (this->*(WrapperPopulator))(LibFunc); + LibFunc->setLinkage(Function::InternalLinkage); + Changed = true; + } + } + + return Changed; +} + +bool RewritePNaClLibraryCalls::runOnModule(Module &M) { + TheModule = &M; + Context = &TheModule->getContext(); + bool Changed = false; + + Type *Int8PtrTy = Type::getInt8PtrTy(*Context); + Type *Int64PtrTy = Type::getInt64PtrTy(*Context); + Type *Int32Ty = Type::getInt32Ty(*Context); + Type *VoidTy = Type::getVoidTy(*Context); + + Type *SetjmpParams[] = { Int64PtrTy }; + FunctionType *SetjmpFunctionType = FunctionType::get(Int32Ty, SetjmpParams, + false); + Changed |= RewriteLibraryCall( + "setjmp", + SetjmpFunctionType, + &RewritePNaClLibraryCalls::rewriteSetjmpCall, + true, + &RewritePNaClLibraryCalls::populateSetjmpWrapper); + + Type *LongjmpParams[] = { Int64PtrTy, Int32Ty }; + FunctionType *LongjmpFunctionType = FunctionType::get(VoidTy, LongjmpParams, + false); + Changed |= RewriteLibraryCall( + "longjmp", + LongjmpFunctionType, + &RewritePNaClLibraryCalls::rewriteLongjmpCall, + false, + &RewritePNaClLibraryCalls::populateLongjmpWrapper); + + Type *MemsetParams[] = { Int8PtrTy, Int32Ty, Int32Ty }; + FunctionType *MemsetFunctionType = FunctionType::get(Int8PtrTy, MemsetParams, + false); + Changed |= RewriteLibraryCall( + "memset", + MemsetFunctionType, + &RewritePNaClLibraryCalls::rewriteMemsetCall, + false, + &RewritePNaClLibraryCalls::populateMemsetWrapper); + + Type *MemcpyParams[] = { Int8PtrTy, Int8PtrTy, Int32Ty }; + FunctionType *MemcpyFunctionType = FunctionType::get(Int8PtrTy, MemcpyParams, + false); + Changed |= RewriteLibraryCall( + "memcpy", + MemcpyFunctionType, + &RewritePNaClLibraryCalls::rewriteMemcpyCall, + false, + &RewritePNaClLibraryCalls::populateMemcpyWrapper); + + Type *MemmoveParams[] = { Int8PtrTy, Int8PtrTy, Int32Ty }; + FunctionType *MemmoveFunctionType = FunctionType::get(Int8PtrTy, + MemmoveParams, + false); + Changed |= RewriteLibraryCall( + "memmove", + MemmoveFunctionType, + &RewritePNaClLibraryCalls::rewriteMemmoveCall, + false, + &RewritePNaClLibraryCalls::populateMemmoveWrapper); + + return Changed; +} + +bool RewritePNaClLibraryCalls::compatibleFunctionTypes(FunctionType *FTy1, + FunctionType *FTy2) { + if (FTy1->getNumParams() != FTy2->getNumParams()) { + return false; + } + + if (!compatibleParamOrRetTypes(FTy1->getReturnType(), + FTy2->getReturnType())) { + return false; + } + + for (unsigned I = 0, End = FTy1->getNumParams(); I != End; ++I) { + if (!compatibleParamOrRetTypes(FTy1->getParamType(I), + FTy2->getParamType(I))) { + return false; + } + } + + return true; +} + +bool RewritePNaClLibraryCalls::compatibleParamOrRetTypes(Type *Ty1, + Type *Ty2) { + return (Ty1 == Ty2 || (Ty1->isPointerTy() && Ty2->isPointerTy())); +} + +void RewritePNaClLibraryCalls::rewriteSetjmpCall(CallInst *Call) { + // Find the intrinsic function. + Function *NaClSetjmpFunc = findSetjmpIntrinsic(); + // Cast the jmp_buf argument to the type NaClSetjmpCall expects. + Type *PtrTy = NaClSetjmpFunc->getFunctionType()->getParamType(0); + BitCastInst *JmpBufCast = new BitCastInst(Call->getArgOperand(0), PtrTy, + "jmp_buf_i8", Call); + const DebugLoc &DLoc = Call->getDebugLoc(); + JmpBufCast->setDebugLoc(DLoc); + + // Emit the updated call. + Value *Args[] = { JmpBufCast }; + CallInst *NaClSetjmpCall = CallInst::Create(NaClSetjmpFunc, Args, "", Call); + NaClSetjmpCall->setDebugLoc(DLoc); + NaClSetjmpCall->takeName(Call); + + // Replace the original call. + Call->replaceAllUsesWith(NaClSetjmpCall); + Call->eraseFromParent(); +} + +void RewritePNaClLibraryCalls::rewriteLongjmpCall(CallInst *Call) { + // Find the intrinsic function. + Function *NaClLongjmpFunc = findLongjmpIntrinsic(); + // Cast the jmp_buf argument to the type NaClLongjmpCall expects. + Type *PtrTy = NaClLongjmpFunc->getFunctionType()->getParamType(0); + BitCastInst *JmpBufCast = new BitCastInst(Call->getArgOperand(0), PtrTy, + "jmp_buf_i8", Call); + const DebugLoc &DLoc = Call->getDebugLoc(); + JmpBufCast->setDebugLoc(DLoc); + + // Emit the call. + Value *Args[] = { JmpBufCast, Call->getArgOperand(1) }; + CallInst *NaClLongjmpCall = CallInst::Create(NaClLongjmpFunc, Args, "", Call); + NaClLongjmpCall->setDebugLoc(DLoc); + // No takeName here since longjmp is a void call that does not get assigned to + // a value. + + // Remove the original call. There's no need for RAUW because longjmp + // returns void. + Call->eraseFromParent(); +} + +void RewritePNaClLibraryCalls::rewriteMemcpyCall(CallInst *Call) { + Function *MemcpyIntrinsic = findMemcpyIntrinsic(); + // dest, src, len, align, isvolatile + Value *Args[] = { Call->getArgOperand(0), + Call->getArgOperand(1), + Call->getArgOperand(2), + ConstantInt::get(Type::getInt32Ty(*Context), 1), + ConstantInt::get(Type::getInt1Ty(*Context), 0) }; + CallInst *MemcpyIntrinsicCall = CallInst::Create(MemcpyIntrinsic, + Args, "", Call); + MemcpyIntrinsicCall->setDebugLoc(Call->getDebugLoc()); + + // libc memcpy returns the source pointer, but the LLVM intrinsic doesn't; if + // the return value has actual uses, just replace them with the dest + // argument itself. + Call->replaceAllUsesWith(Call->getArgOperand(0)); + Call->eraseFromParent(); +} + +void RewritePNaClLibraryCalls::rewriteMemmoveCall(CallInst *Call) { + Function *MemmoveIntrinsic = findMemmoveIntrinsic(); + // dest, src, len, align, isvolatile + Value *Args[] = { Call->getArgOperand(0), + Call->getArgOperand(1), + Call->getArgOperand(2), + ConstantInt::get(Type::getInt32Ty(*Context), 1), + ConstantInt::get(Type::getInt1Ty(*Context), 0) }; + CallInst *MemmoveIntrinsicCall = CallInst::Create(MemmoveIntrinsic, + Args, "", Call); + MemmoveIntrinsicCall->setDebugLoc(Call->getDebugLoc()); + + // libc memmove returns the source pointer, but the LLVM intrinsic doesn't; if + // the return value has actual uses, just replace them with the dest + // argument itself. + Call->replaceAllUsesWith(Call->getArgOperand(0)); + Call->eraseFromParent(); +} + +void RewritePNaClLibraryCalls::rewriteMemsetCall(CallInst *Call) { + Function *MemsetIntrinsic = findMemsetIntrinsic(); + // libc memset has 'int c' for the filler byte, but the LLVM intrinsic uses + // a i8; truncation is required. + TruncInst *ByteTrunc = new TruncInst(Call->getArgOperand(1), + Type::getInt8Ty(*Context), + "trunc_byte", Call); + + const DebugLoc &DLoc = Call->getDebugLoc(); + ByteTrunc->setDebugLoc(DLoc); + + // dest, val, len, align, isvolatile + Value *Args[] = { Call->getArgOperand(0), + ByteTrunc, + Call->getArgOperand(2), + ConstantInt::get(Type::getInt32Ty(*Context), 1), + ConstantInt::get(Type::getInt1Ty(*Context), 0) }; + CallInst *MemsetIntrinsicCall = CallInst::Create(MemsetIntrinsic, + Args, "", Call); + MemsetIntrinsicCall->setDebugLoc(DLoc); + + // libc memset returns the source pointer, but the LLVM intrinsic doesn't; if + // the return value has actual uses, just replace them with the dest + // argument itself. + Call->replaceAllUsesWith(Call->getArgOperand(0)); + Call->eraseFromParent(); +} + +void RewritePNaClLibraryCalls::populateWrapperCommon( + Function *Func, + StringRef FuncName, + RewriteCallFunc CallRewriter, + bool CallCannotReturn, + ...) { + if (!Func->isDeclaration()) { + report_fatal_error(Twine("Expected ") + FuncName + + " to be declared, not defined"); + } + + // Populate the function body with code. + BasicBlock *BB = BasicBlock::Create(*Context, "entry", Func); + + // Collect and name the function arguments. + Function::arg_iterator FuncArgs = Func->arg_begin(); + SmallVector Args; + va_list ap; + va_start(ap, CallCannotReturn); + while (true) { + // Iterate over the varargs until a terminated NULL is encountered. + const char *ArgName = va_arg(ap, const char *); + if (!ArgName) + break; + Value *Arg = &*FuncArgs++; + Arg->setName(ArgName); + Args.push_back(Arg); + } + va_end(ap); + + // Emit a call to self, and then call CallRewriter to rewrite it to the + // intrinsic. This is done in order to keep the call rewriting logic in a + // single place. + CallInst *SelfCall = CallInst::Create(Func, Args, "", BB); + + if (CallCannotReturn) { + new UnreachableInst(*Context, BB); + } else if (Func->getReturnType()->isVoidTy()) { + ReturnInst::Create(*Context, BB); + } else { + ReturnInst::Create(*Context, SelfCall, BB); + } + + (this->*(CallRewriter))(SelfCall); +} + +void RewritePNaClLibraryCalls::populateSetjmpWrapper(Function *SetjmpFunc) { + populateWrapperCommon( + /* Func */ SetjmpFunc, + /* FuncName */ "setjmp", + /* CallRewriter */ &RewritePNaClLibraryCalls::rewriteSetjmpCall, + /* CallCannotReturn */ false, + /* ... */ "env", NULL); +} + +void RewritePNaClLibraryCalls::populateLongjmpWrapper(Function *LongjmpFunc) { + populateWrapperCommon( + /* Func */ LongjmpFunc, + /* FuncName */ "longjmp", + /* CallRewriter */ &RewritePNaClLibraryCalls::rewriteLongjmpCall, + /* CallCannotReturn */ true, + /* ... */ "env", "val", NULL); +} + +void RewritePNaClLibraryCalls::populateMemcpyWrapper(Function *MemcpyFunc) { + populateWrapperCommon( + /* Func */ MemcpyFunc, + /* FuncName */ "memcpy", + /* CallRewriter */ &RewritePNaClLibraryCalls::rewriteMemcpyCall, + /* CallCannotReturn */ false, + /* ... */ "dest", "src", "len", NULL); +} + +void RewritePNaClLibraryCalls::populateMemmoveWrapper(Function *MemmoveFunc) { + populateWrapperCommon( + /* Func */ MemmoveFunc, + /* FuncName */ "memmove", + /* CallRewriter */ &RewritePNaClLibraryCalls::rewriteMemmoveCall, + /* CallCannotReturn */ false, + /* ... */ "dest", "src", "len", NULL); +} + +void RewritePNaClLibraryCalls::populateMemsetWrapper(Function *MemsetFunc) { + populateWrapperCommon( + /* Func */ MemsetFunc, + /* FuncName */ "memset", + /* CallRewriter */ &RewritePNaClLibraryCalls::rewriteMemsetCall, + /* CallCannotReturn */ false, + /* ... */ "dest", "val", "len", NULL); +} + +Function *RewritePNaClLibraryCalls::findSetjmpIntrinsic() { + if (!SetjmpIntrinsic) { + SetjmpIntrinsic = Intrinsic::getDeclaration( + TheModule, Intrinsic::nacl_setjmp); + } + return SetjmpIntrinsic; +} + +Function *RewritePNaClLibraryCalls::findLongjmpIntrinsic() { + if (!LongjmpIntrinsic) { + LongjmpIntrinsic = Intrinsic::getDeclaration( + TheModule, Intrinsic::nacl_longjmp); + } + return LongjmpIntrinsic; +} + +Function *RewritePNaClLibraryCalls::findMemcpyIntrinsic() { + if (!MemcpyIntrinsic) { + Type *Tys[] = { Type::getInt8PtrTy(*Context), + Type::getInt8PtrTy(*Context), + Type::getInt32Ty(*Context) }; + MemcpyIntrinsic = Intrinsic::getDeclaration( + TheModule, Intrinsic::memcpy, Tys); + } + return MemcpyIntrinsic; +} + +Function *RewritePNaClLibraryCalls::findMemmoveIntrinsic() { + if (!MemmoveIntrinsic) { + Type *Tys[] = { Type::getInt8PtrTy(*Context), + Type::getInt8PtrTy(*Context), + Type::getInt32Ty(*Context) }; + MemmoveIntrinsic = Intrinsic::getDeclaration( + TheModule, Intrinsic::memmove, Tys); + } + return MemmoveIntrinsic; +} + +Function *RewritePNaClLibraryCalls::findMemsetIntrinsic() { + if (!MemsetIntrinsic) { + Type *Tys[] = { Type::getInt8PtrTy(*Context), Type::getInt32Ty(*Context) }; + MemsetIntrinsic = Intrinsic::getDeclaration( + TheModule, Intrinsic::memset, Tys); + } + return MemsetIntrinsic; +} + +ModulePass *llvm::createRewritePNaClLibraryCallsPass() { + return new RewritePNaClLibraryCalls(); +} diff --git a/lib/Target/JSBackend/NaCl/SimplifiedFuncTypeMap.cpp b/lib/Target/JSBackend/NaCl/SimplifiedFuncTypeMap.cpp new file mode 100644 index 000000000000..5e09e1ac4c29 --- /dev/null +++ b/lib/Target/JSBackend/NaCl/SimplifiedFuncTypeMap.cpp @@ -0,0 +1,140 @@ +//===-- SimplifiedFuncTypeMap.cpp - Consistent type remapping----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "SimplifiedFuncTypeMap.h" + +using namespace llvm; + +Type *SimplifiedFuncTypeMap::getSimpleType(LLVMContext &Ctx, Type *Ty) { + auto Found = MappedTypes.find(Ty); + if (Found != MappedTypes.end()) { + return Found->second; + } + + StructMap Tentatives; + auto Ret = getSimpleAggregateTypeInternal(Ctx, Ty, Tentatives); + assert(Tentatives.size() == 0); + + if (!Ty->isStructTy()) { + // Structs are memoized in getSimpleAggregateTypeInternal. + MappedTypes[Ty] = Ret; + } + return Ret; +} + +// Transforms any type that could transitively reference a function pointer +// into a simplified type. +// We enter this function trying to determine the mapping of a type. Because +// of how structs are handled (not interned by llvm - see further comments +// below) we may be working with temporary types - types (pointers, for example) +// transitively referencing "tentative" structs. For that reason, we do not +// memoize anything here, except for structs. The latter is so that we avoid +// unnecessary repeated creation of types (pointers, function types, etc), +// as we try to map a given type. +SimplifiedFuncTypeMap::MappingResult +SimplifiedFuncTypeMap::getSimpleAggregateTypeInternal(LLVMContext &Ctx, + Type *Ty, + StructMap &Tentatives) { + // Leverage the map for types we encounter on the way. + auto Found = MappedTypes.find(Ty); + if (Found != MappedTypes.end()) { + return {Found->second, Found->second != Ty}; + } + + if (auto *OldFnTy = dyn_cast(Ty)) { + return getSimpleFuncType(Ctx, Tentatives, OldFnTy); + } + + if (auto PtrTy = dyn_cast(Ty)) { + auto NewTy = getSimpleAggregateTypeInternal( + Ctx, PtrTy->getPointerElementType(), Tentatives); + + return {NewTy->getPointerTo(PtrTy->getAddressSpace()), NewTy.isChanged()}; + } + + if (auto ArrTy = dyn_cast(Ty)) { + auto NewTy = getSimpleAggregateTypeInternal( + Ctx, ArrTy->getArrayElementType(), Tentatives); + return {ArrayType::get(NewTy, ArrTy->getArrayNumElements()), + NewTy.isChanged()}; + } + + if (auto VecTy = dyn_cast(Ty)) { + auto NewTy = getSimpleAggregateTypeInternal( + Ctx, VecTy->getVectorElementType(), Tentatives); + return {VectorType::get(NewTy, VecTy->getVectorNumElements()), + NewTy.isChanged()}; + } + + // LLVM doesn't intern identified structs (the ones with a name). This, + // together with the fact that such structs can be recursive, + // complicates things a bit. We want to make sure that we only change + // "unsimplified" structs (those that somehow reference funcs that + // are not simple). + // We don't want to change "simplified" structs, otherwise converting + // instruction types will become trickier. + if (auto StructTy = dyn_cast(Ty)) { + ParamTypeVector ElemTypes; + if (!StructTy->isLiteral()) { + // Literals - struct without a name - cannot be recursive, so we + // don't need to form tentatives. + auto Found = Tentatives.find(StructTy); + + // Having a tentative means we are in a recursion trying to map this + // particular struct, so arriving back to it is not a change. + // We will determine if this struct is actually + // changed by checking its other fields. + if (Found != Tentatives.end()) { + return {Found->second, false}; + } + // We have never seen this struct, so we start a tentative. + std::string NewName = StructTy->getStructName(); + NewName += ".simplified"; + StructType *Tentative = StructType::create(Ctx, NewName); + Tentatives[StructTy] = Tentative; + + bool Changed = isChangedStruct(Ctx, StructTy, ElemTypes, Tentatives); + + Tentatives.erase(StructTy); + // We can now decide the mapping of the struct. We will register it + // early with MappedTypes, to avoid leaking tentatives unnecessarily. + // We are leaking the created struct here, but there is no way to + // correctly delete it. + if (!Changed) { + return {MappedTypes[StructTy] = StructTy, false}; + } else { + Tentative->setBody(ElemTypes, StructTy->isPacked()); + return {MappedTypes[StructTy] = Tentative, true}; + } + } else { + bool Changed = isChangedStruct(Ctx, StructTy, ElemTypes, Tentatives); + return {MappedTypes[StructTy] = + StructType::get(Ctx, ElemTypes, StructTy->isPacked()), + Changed}; + } + } + + // Anything else stays the same. + return {Ty, false}; +} + +bool SimplifiedFuncTypeMap::isChangedStruct(LLVMContext &Ctx, + StructType *StructTy, + ParamTypeVector &ElemTypes, + StructMap &Tentatives) { + bool Changed = false; + unsigned StructElemCount = StructTy->getStructNumElements(); + for (unsigned I = 0; I < StructElemCount; I++) { + auto NewElem = getSimpleAggregateTypeInternal( + Ctx, StructTy->getStructElementType(I), Tentatives); + ElemTypes.push_back(NewElem); + Changed |= NewElem.isChanged(); + } + return Changed; +} \ No newline at end of file diff --git a/lib/Target/JSBackend/NaCl/SimplifiedFuncTypeMap.h b/lib/Target/JSBackend/NaCl/SimplifiedFuncTypeMap.h new file mode 100644 index 000000000000..3847a27247ec --- /dev/null +++ b/lib/Target/JSBackend/NaCl/SimplifiedFuncTypeMap.h @@ -0,0 +1,61 @@ +//===-- SimplifiedFuncTypeMap.h - Consistent type remapping------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SIMPLIFIEDFUNCTYPEMAP_H +#define LLVM_SIMPLIFIEDFUNCTYPEMAP_H + +#include +#include "llvm/IR/DerivedTypes.h" + +namespace llvm { +// SimplifiedFuncTypeMap provides a consistent type map, given a rule +// for mapping function types - which is provided by implementing +// getSimpleFuncType. +// A few transformations require changing function types, for example +// SimplifyStructRegSignatures or PromoteIntegers. When doing so, we also +// want to change any references to function types - for example structs +// with fields typed as function pointer(s). Structs are not interned by LLVM, +// which is what SimplifiedFuncTypeMap addresses. +class SimplifiedFuncTypeMap { +public: + typedef DenseMap StructMap; + Type *getSimpleType(LLVMContext &Ctx, Type *Ty); + virtual ~SimplifiedFuncTypeMap() {} + +protected: + class MappingResult { + public: + MappingResult(Type *ATy, bool Chg) { + Ty = ATy; + Changed = Chg; + } + bool isChanged() { return Changed; } + Type *operator->() { return Ty; } + operator Type *() { return Ty; } + + private: + Type *Ty; + bool Changed; + }; + + virtual MappingResult getSimpleFuncType(LLVMContext &Ctx, + StructMap &Tentatives, + FunctionType *OldFnTy) = 0; + + typedef SmallVector ParamTypeVector; + DenseMap MappedTypes; + + MappingResult getSimpleAggregateTypeInternal(LLVMContext &Ctx, Type *Ty, + StructMap &Tentatives); + + bool isChangedStruct(LLVMContext &Ctx, StructType *StructTy, + ParamTypeVector &ElemTypes, StructMap &Tentatives); +}; +} +#endif // LLVM_SIMPLIFIEDFUNCTYPEMAP_H diff --git a/lib/Target/JSBackend/NaCl/SimplifyAllocas.cpp b/lib/Target/JSBackend/NaCl/SimplifyAllocas.cpp new file mode 100644 index 000000000000..9b9789619deb --- /dev/null +++ b/lib/Target/JSBackend/NaCl/SimplifyAllocas.cpp @@ -0,0 +1,147 @@ +//===- SimplifyAllocas.cpp - Simplify allocas to arrays of bytes --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Simplify all allocas into allocas of byte arrays. +// +//===----------------------------------------------------------------------===// + +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/Transforms/NaCl.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; +namespace { +class SimplifyAllocas : public BasicBlockPass { +public: + static char ID; // Pass identification, replacement for typeid + SimplifyAllocas() + : BasicBlockPass(ID), Initialized(false), M(nullptr), IntPtrType(nullptr), + Int8Type(nullptr), DL(nullptr) { + initializeSimplifyAllocasPass(*PassRegistry::getPassRegistry()); + } + +private: + bool Initialized; + const Module *M; + Type *IntPtrType; + Type *Int8Type; + const DataLayout *DL; + + using llvm::Pass::doInitialization; + bool doInitialization(Function &F) override { + if (!Initialized) { + M = F.getParent(); + DL = &M->getDataLayout(); + IntPtrType = DL->getIntPtrType(M->getContext()); + Int8Type = Type::getInt8Ty(M->getContext()); + Initialized = true; + return true; + } + return false; + } + + AllocaInst *findAllocaFromCast(CastInst *CInst) { + Value *Op0 = CInst->getOperand(0); + while (!llvm::isa(Op0)) { + auto *NextCast = llvm::dyn_cast(Op0); + if (NextCast && NextCast->isNoopCast(IntPtrType)) { + Op0 = NextCast->getOperand(0); + } else { + return nullptr; + } + } + return llvm::cast(Op0); + } + + bool runOnBasicBlock(BasicBlock &BB) override { + bool Changed = false; + for (BasicBlock::iterator I = BB.getFirstInsertionPt(), E = BB.end(); + I != E;) { + Instruction *Inst = &*I++; + if (AllocaInst *Alloca = dyn_cast(Inst)) { + Changed = true; + Type *ElementTy = Alloca->getType()->getPointerElementType(); + Constant *ElementSize = + ConstantInt::get(IntPtrType, DL->getTypeAllocSize(ElementTy)); + // Expand out alloca's built-in multiplication. + Value *MulSize; + if (ConstantInt *C = dyn_cast(Alloca->getArraySize())) { + const APInt Value = + C->getValue().zextOrTrunc(IntPtrType->getScalarSizeInBits()); + MulSize = ConstantExpr::getMul(ElementSize, + ConstantInt::get(IntPtrType, Value)); + } else { + Value *ArraySize = Alloca->getArraySize(); + if (ArraySize->getType() != IntPtrType) { + // We assume ArraySize is always positive, and thus is unsigned. + assert(!isa(ArraySize) || + !cast(ArraySize)->isNegative()); + ArraySize = + CastInst::CreateIntegerCast(ArraySize, IntPtrType, + /* isSigned = */ false, "", Alloca); + } + MulSize = CopyDebug( + BinaryOperator::Create(Instruction::Mul, ElementSize, ArraySize, + Alloca->getName() + ".alloca_mul", Alloca), + Alloca); + } + unsigned Alignment = Alloca->getAlignment(); + if (Alignment == 0) + Alignment = DL->getPrefTypeAlignment(ElementTy); + AllocaInst *Tmp = + new AllocaInst(Int8Type, MulSize, Alignment, "", Alloca); + CopyDebug(Tmp, Alloca); + Tmp->takeName(Alloca); + BitCastInst *BC = new BitCastInst(Tmp, Alloca->getType(), + Tmp->getName() + ".bc", Alloca); + CopyDebug(BC, Alloca); + Alloca->replaceAllUsesWith(BC); + Alloca->eraseFromParent(); + } + else if (auto *Call = dyn_cast(Inst)) { + if (Call->getIntrinsicID() == Intrinsic::dbg_declare) { + // dbg.declare's first argument is a special metadata that wraps a + // value, and RAUW works on those. It is supposed to refer to the + // alloca that represents the variable's storage, but the alloca + // simplification may have RAUWed it to use the bitcast. + // Fix it up here by recreating the metadata to use the new alloca. + auto *MV = cast(Call->getArgOperand(0)); + // Sometimes dbg.declare points to an argument instead of an alloca. + if (auto *VM = dyn_cast(MV->getMetadata())) { + if (auto *CInst = dyn_cast(VM->getValue())) { + if (AllocaInst *Alloca = findAllocaFromCast(CInst)) { + Call->setArgOperand( + 0, + MetadataAsValue::get(Inst->getContext(), + ValueAsMetadata::get(Alloca))); + Changed = true; + } + } + } + } + } + } + return Changed; + } +}; +} +char SimplifyAllocas::ID = 0; + +INITIALIZE_PASS(SimplifyAllocas, "simplify-allocas", + "Simplify allocas to arrays of bytes", false, false) + +BasicBlockPass *llvm::createSimplifyAllocasPass() { + return new SimplifyAllocas(); +} diff --git a/lib/Target/JSBackend/NaCl/SimplifyStructRegSignatures.cpp b/lib/Target/JSBackend/NaCl/SimplifyStructRegSignatures.cpp new file mode 100644 index 000000000000..70d5e7763cd6 --- /dev/null +++ b/lib/Target/JSBackend/NaCl/SimplifyStructRegSignatures.cpp @@ -0,0 +1,597 @@ +//===- SimplifyStructRegSignatures.cpp - struct regs to struct pointers----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass replaces function signatures exposing struct registers +// to byval pointer-based signatures. +// +// There are 2 types of signatures that are thus changed: +// +// @foo(%some_struct %val) -> @foo(%some_struct* byval %val) +// and +// %someStruct @bar() -> void @bar(%someStruct* sret, ) +// +// Such function types may appear in other type declarations, for example: +// +// %a_struct = type { void (%some_struct)*, i32 } +// +// We map such types to corresponding types, mapping the function types +// appropriately: +// +// %a_struct.0 = type { void (%some_struct*)*, i32 } +//===----------------------------------------------------------------------===// +#include "SimplifiedFuncTypeMap.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/ilist.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Twine.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/Pass.h" +#include "llvm/PassInfo.h" +#include "llvm/PassRegistry.h" +#include "llvm/PassSupport.h" +#include "llvm/Transforms/NaCl.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +using namespace llvm; +namespace { +static const unsigned int TypicalFuncArity = 8; +static bool shouldPromote(const Type *Ty) { + return Ty->isAggregateType(); +} +// Utility class. For any given type, get the associated type that is free of +// struct register arguments. +class TypeMapper : public SimplifiedFuncTypeMap { +protected: + MappingResult getSimpleFuncType(LLVMContext &Ctx, StructMap &Tentatives, + FunctionType *OldFnTy) override { + Type *OldRetType = OldFnTy->getReturnType(); + Type *NewRetType = OldRetType; + Type *Void = Type::getVoidTy(Ctx); + ParamTypeVector NewArgs; + bool Changed = false; + // Struct register returns become the first parameter of the new FT. + // The new FT has void for the return type + if (shouldPromote(OldRetType)) { + NewRetType = Void; + Changed = true; + NewArgs.push_back(getSimpleArgumentType(Ctx, OldRetType, Tentatives)); + } + for (auto OldParam : OldFnTy->params()) { + auto NewType = getSimpleArgumentType(Ctx, OldParam, Tentatives); + Changed |= NewType.isChanged(); + NewArgs.push_back(NewType); + } + Type *NewFuncType = + FunctionType::get(NewRetType, NewArgs, OldFnTy->isVarArg()); + return {NewFuncType, Changed}; + } +private: + // Get the simplified type of a function argument. + MappingResult getSimpleArgumentType(LLVMContext &Ctx, Type *Ty, + StructMap &Tentatives) { + // struct registers become pointers to simple structs + if (shouldPromote(Ty)) { + return {PointerType::get( + getSimpleAggregateTypeInternal(Ctx, Ty, Tentatives), 0), + true}; + } + return getSimpleAggregateTypeInternal(Ctx, Ty, Tentatives); + } +}; +// This is a ModulePass because the pass recreates functions in +// order to change their signatures. +class SimplifyStructRegSignatures : public ModulePass { +public: + static char ID; + SimplifyStructRegSignatures() : ModulePass(ID) { + initializeSimplifyStructRegSignaturesPass(*PassRegistry::getPassRegistry()); + } + virtual bool runOnModule(Module &M); +private: + TypeMapper Mapper; + DenseSet FunctionsToDelete; + SetVector CallsToPatch; + SetVector InvokesToPatch; + DenseMap FunctionMap; + + struct FunctionAddressing { + Value *Temp; + Function *Old; + FunctionAddressing(Value *Temp, Function *Old) : Temp(Temp), Old(Old) {} + }; + std::vector FunctionAddressings; + + bool + simplifyFunction(LLVMContext &Ctx, Function *OldFunc); + void scheduleInstructionsForCleanup(Function *NewFunc); + template + void fixCallSite(LLVMContext &Ctx, TCall *Call, unsigned PreferredAlignment); + void fixFunctionBody(LLVMContext &Ctx, Function *OldFunc, Function *NewFunc); + template + TCall *fixCallTargetAndArguments(LLVMContext &Ctx, IRBuilder<> &Builder, + TCall *OldCall, Value *NewTarget, + FunctionType *NewType, + BasicBlock::iterator AllocaInsPoint, + Value *ExtraArg = nullptr); + void checkNoUnsupportedInstructions(LLVMContext &Ctx, Function *Fct); +}; +} +char SimplifyStructRegSignatures::ID = 0; +INITIALIZE_PASS( + SimplifyStructRegSignatures, "simplify-struct-reg-signatures", + "Simplify function signatures by removing struct register parameters", + false, false) +// Update the arg names for a newly created function. +static void UpdateArgNames(Function *OldFunc, Function *NewFunc) { + auto NewArgIter = NewFunc->arg_begin(); + if (shouldPromote(OldFunc->getReturnType())) { + NewArgIter->setName("retVal"); + NewArgIter++; + } + for (const Argument &OldArg : OldFunc->args()) { + Argument *NewArg = &*NewArgIter++; + NewArg->setName(OldArg.getName() + + (shouldPromote(OldArg.getType()) ? ".ptr" : "")); + } +} +// Replace all uses of an old value with a new one, disregarding the type. We +// correct the types after we wire the new parameters in, in fixFunctionBody. +static void BlindReplace(Value *Old, Value *New) { + for (auto UseIter = Old->use_begin(), E = Old->use_end(); E != UseIter;) { + Use &AUse = *(UseIter++); + AUse.set(New); + } +} +// Adapt the body of a function for the new arguments. +static void ConvertArgumentValue(Value *Old, Value *New, Instruction *InsPoint, + const bool IsAggregateToPtr) { + if (Old == New) + return; + if (Old->getType() == New->getType()) { + Old->replaceAllUsesWith(New); + New->takeName(Old); + return; + } + BlindReplace(Old, (IsAggregateToPtr + ? new LoadInst(New, Old->getName() + ".sreg", InsPoint) + : New)); +} +// Fix returns. Return true if fixes were needed. +static void FixReturn(Function *OldFunc, Function *NewFunc) { + Argument *FirstNewArg = &*NewFunc->getArgumentList().begin(); + for (auto BIter = NewFunc->begin(), LastBlock = NewFunc->end(); + LastBlock != BIter;) { + BasicBlock *BB = &*BIter++; + for (auto IIter = BB->begin(), LastI = BB->end(); LastI != IIter;) { + Instruction *Instr = &*IIter++; + if (ReturnInst *Ret = dyn_cast(Instr)) { + auto RetVal = Ret->getReturnValue(); + IRBuilder<> Builder(Ret); + StoreInst *Store = Builder.CreateStore(RetVal, FirstNewArg); + Store->setAlignment(FirstNewArg->getParamAlignment()); + Builder.CreateRetVoid(); + Ret->eraseFromParent(); + } + } + } +} +/// In the next two functions, `RetIndex` is the index of the possibly promoted +/// return. +/// Ie if the return is promoted, `RetIndex` should be `1`, else `0`. +static AttributeSet CopyRetAttributes(LLVMContext &C, const DataLayout &DL, + const AttributeSet From, Type *RetTy, + const unsigned RetIndex) { + AttributeSet NewAttrs; + if (RetIndex != 0) { + NewAttrs = NewAttrs.addAttribute(C, RetIndex, Attribute::StructRet); + NewAttrs = NewAttrs.addAttribute(C, RetIndex, Attribute::NonNull); + NewAttrs = NewAttrs.addAttribute(C, RetIndex, Attribute::NoCapture); + if (RetTy->isSized()) { + NewAttrs = NewAttrs.addDereferenceableAttr(C, RetIndex, + DL.getTypeAllocSize(RetTy)); + } + } else { + NewAttrs = NewAttrs.addAttributes(C, RetIndex, From.getRetAttributes()); + } + auto FnAttrs = From.getFnAttributes(); + if (RetIndex != 0) { + FnAttrs = FnAttrs.removeAttribute(C, AttributeSet::FunctionIndex, + Attribute::ReadOnly); + FnAttrs = FnAttrs.removeAttribute(C, AttributeSet::FunctionIndex, + Attribute::ReadNone); + } + NewAttrs = NewAttrs.addAttributes(C, AttributeSet::FunctionIndex, FnAttrs); + return NewAttrs; +} +/// Iff the argument in question was promoted, `NewArgTy` should be non-null. +static AttributeSet CopyArgAttributes(AttributeSet NewAttrs, LLVMContext &C, + const DataLayout &DL, + const AttributeSet From, + const unsigned OldArg, Type *NewArgTy, + const unsigned RetIndex) { + const unsigned NewIndex = RetIndex + OldArg + 1; + if (!NewArgTy) { + const unsigned OldIndex = OldArg + 1; + auto OldAttrs = From.getParamAttributes(OldIndex); + if (OldAttrs.getNumSlots() == 0) { + return NewAttrs; + } + // move the params to the new index position: + unsigned OldSlot = 0; + for (; OldSlot < OldAttrs.getNumSlots(); ++OldSlot) { + if (OldAttrs.getSlotIndex(OldSlot) == OldIndex) { + break; + } + } + assert(OldSlot != OldAttrs.getNumSlots()); + AttrBuilder B(AttributeSet(), NewIndex); + for (auto II = OldAttrs.begin(OldSlot), IE = OldAttrs.end(OldSlot); + II != IE; ++II) { + B.addAttribute(*II); + } + auto Attrs = AttributeSet::get(C, NewIndex, B); + NewAttrs = NewAttrs.addAttributes(C, NewIndex, Attrs); + return NewAttrs; + } else { + NewAttrs = NewAttrs.addAttribute(C, NewIndex, Attribute::NonNull); + NewAttrs = NewAttrs.addAttribute(C, NewIndex, Attribute::NoCapture); + NewAttrs = NewAttrs.addAttribute(C, NewIndex, Attribute::ReadOnly); + if (NewArgTy->isSized()) { + NewAttrs = NewAttrs.addDereferenceableAttr(C, NewIndex, + DL.getTypeAllocSize(NewArgTy)); + } + return NewAttrs; + } +} +// TODO (mtrofin): is this comprehensive? +template +void CopyCallAttributesAndMetadata(TCall *Orig, TCall *NewCall) { + NewCall->setCallingConv(Orig->getCallingConv()); + NewCall->setAttributes(NewCall->getAttributes().addAttributes( + Orig->getContext(), AttributeSet::FunctionIndex, + Orig->getAttributes().getFnAttributes())); + NewCall->takeName(Orig); +} +static InvokeInst *CreateCallFrom(InvokeInst *Orig, Value *Target, + ArrayRef &Args, + IRBuilder<> &Builder) { + auto Ret = Builder.CreateInvoke(Target, Orig->getNormalDest(), + Orig->getUnwindDest(), Args); + CopyCallAttributesAndMetadata(Orig, Ret); + return Ret; +} +static CallInst *CreateCallFrom(CallInst *Orig, Value *Target, + ArrayRef &Args, IRBuilder<> &Builder) { + CallInst *Ret = Builder.CreateCall(Target, Args); + Ret->setTailCallKind(Orig->getTailCallKind()); + CopyCallAttributesAndMetadata(Orig, Ret); + return Ret; +} +// Insert Alloca at a specified location (normally, beginning of function) +// to avoid memory leaks if reason for inserting the Alloca +// (typically a call/invoke) is in a loop. +static AllocaInst *InsertAllocaAtLocation(IRBuilder<> &Builder, + BasicBlock::iterator &AllocaInsPoint, + Type *ValType) { + auto SavedInsPoint = Builder.GetInsertPoint(); + Builder.SetInsertPoint(&*AllocaInsPoint); + auto *Alloca = Builder.CreateAlloca(ValType); + AllocaInsPoint = Builder.GetInsertPoint(); + Builder.SetInsertPoint(&*SavedInsPoint); + return Alloca; +} +// Fix a call site by handing return type changes and/or parameter type and +// attribute changes. +template +void SimplifyStructRegSignatures::fixCallSite(LLVMContext &Ctx, TCall *OldCall, + unsigned PreferredAlignment) { + Value *NewTarget = OldCall->getCalledValue(); + bool IsTargetFunction = false; + if (Function *CalledFunc = dyn_cast(NewTarget)) { + NewTarget = this->FunctionMap[CalledFunc]; + IsTargetFunction = true; + } + assert(NewTarget); + auto *NewType = cast( + Mapper.getSimpleType(Ctx, NewTarget->getType())->getPointerElementType()); + IRBuilder<> Builder(OldCall); + if (!IsTargetFunction) { + NewTarget = Builder.CreateBitCast(NewTarget, NewType->getPointerTo()); + } + auto *OldRetType = OldCall->getType(); + const bool IsSRet = + !OldCall->getType()->isVoidTy() && NewType->getReturnType()->isVoidTy(); + auto AllocaInsPoint = + OldCall->getParent()->getParent()->getEntryBlock().getFirstInsertionPt(); + if (IsSRet) { + auto *Alloca = InsertAllocaAtLocation(Builder, AllocaInsPoint, OldRetType); + Alloca->takeName(OldCall); + Alloca->setAlignment(PreferredAlignment); + auto *NewCall = fixCallTargetAndArguments(Ctx, Builder, OldCall, NewTarget, + NewType, AllocaInsPoint, Alloca); + assert(NewCall); + if (auto *Invoke = dyn_cast(OldCall)) + Builder.SetInsertPoint(&*Invoke->getNormalDest()->getFirstInsertionPt()); + auto *Load = Builder.CreateLoad(Alloca, Alloca->getName() + ".sreg"); + Load->setAlignment(Alloca->getAlignment()); + OldCall->replaceAllUsesWith(Load); + } else { + auto *NewCall = fixCallTargetAndArguments(Ctx, Builder, OldCall, NewTarget, + NewType, AllocaInsPoint); + OldCall->replaceAllUsesWith(NewCall); + } + OldCall->eraseFromParent(); +} +template +TCall *SimplifyStructRegSignatures::fixCallTargetAndArguments( + LLVMContext &Ctx, IRBuilder<> &Builder, TCall *OldCall, Value *NewTarget, + FunctionType *NewType, BasicBlock::iterator AllocaInsPoint, + Value *ExtraArg) { + SmallVector NewArgs; + const DataLayout &DL = OldCall->getParent() // BB + ->getParent() // F + ->getParent() // M + ->getDataLayout(); + const AttributeSet OldSet = OldCall->getAttributes(); + unsigned argOffset = ExtraArg ? 1 : 0; + const unsigned RetSlot = AttributeSet::ReturnIndex + argOffset; + if (ExtraArg) + NewArgs.push_back(ExtraArg); + AttributeSet NewSet = + CopyRetAttributes(Ctx, DL, OldSet, OldCall->getType(), RetSlot); + // Go over the argument list used in the call/invoke, in order to + // correctly deal with varargs scenarios. + unsigned NumActualParams = OldCall->getNumArgOperands(); + unsigned VarargMark = NewType->getNumParams(); + for (unsigned ArgPos = 0; ArgPos < NumActualParams; ArgPos++) { + Use &OldArgUse = OldCall->getOperandUse(ArgPos); + Value *OldArg = OldArgUse; + Type *OldArgType = OldArg->getType(); + unsigned NewArgPos = OldArgUse.getOperandNo() + argOffset; + Type *NewArgType = NewArgPos < VarargMark ? NewType->getFunctionParamType(NewArgPos) : nullptr; + Type *InnerNewArgType = nullptr; + if (OldArgType != NewArgType && shouldPromote(OldArgType)) { + if (NewArgPos >= VarargMark) { + errs() << *OldCall << '\n'; + report_fatal_error("Aggregate register vararg is not supported"); + } + auto *Alloca = + InsertAllocaAtLocation(Builder, AllocaInsPoint, OldArgType); + Alloca->setName(OldArg->getName() + ".ptr"); + Builder.CreateStore(OldArg, Alloca); + NewArgs.push_back(Alloca); + InnerNewArgType = NewArgType->getPointerElementType(); + } else if (NewArgType && OldArgType != NewArgType && isa(OldArg)) { + // If a function pointer has a changed type due to struct reg changes, it will still have + // the wrong type here, since we may have not changed that method yet. We'll fix it up + // later, and meanwhile place an undef of the right type in that slot. + Value *Temp = UndefValue::get(NewArgType); + FunctionAddressings.emplace_back(Temp, cast(OldArg)); + NewArgs.push_back(Temp); + } else if (NewArgType && OldArgType != NewArgType && OldArgType->isPointerTy()) { + // This would be a function ptr or would have a function type nested in + // it. + NewArgs.push_back(Builder.CreatePointerCast(OldArg, NewArgType)); + } else { + NewArgs.push_back(OldArg); + } + NewSet = CopyArgAttributes(NewSet, Ctx, DL, OldSet, ArgPos, InnerNewArgType, + RetSlot); + } + + if (isa(NewTarget)) { + Type* NewPointerType = PointerType::get(NewType, 0); + if (NewPointerType != OldCall->getType()) { + // This is a function pointer, and it has the wrong type after our + // changes. Bitcast it. + NewTarget = Builder.CreateBitCast(NewTarget, NewPointerType, ".casttarget"); + } + } + + ArrayRef ArrRef = NewArgs; + TCall *NewCall = CreateCallFrom(OldCall, NewTarget, ArrRef, Builder); + NewCall->setAttributes(NewSet); + return NewCall; +} +void +SimplifyStructRegSignatures::scheduleInstructionsForCleanup(Function *NewFunc) { + for (auto &BBIter : NewFunc->getBasicBlockList()) { + for (auto &IIter : BBIter.getInstList()) { + if (CallInst *Call = dyn_cast(&IIter)) { + if (Function *F = dyn_cast(Call->getCalledValue())) { + if (F->isIntrinsic()) { + continue; + } + } + CallsToPatch.insert(Call); + } else if (InvokeInst *Invoke = dyn_cast(&IIter)) { + InvokesToPatch.insert(Invoke); + } + } + } +} +// Change function body in the light of type changes. +void SimplifyStructRegSignatures::fixFunctionBody(LLVMContext &Ctx, + Function *OldFunc, + Function *NewFunc) { + const DataLayout &DL = OldFunc->getParent()->getDataLayout(); + bool returnWasFixed = shouldPromote(OldFunc->getReturnType()); + const AttributeSet OldSet = OldFunc->getAttributes(); + const unsigned RetSlot = AttributeSet::ReturnIndex + (returnWasFixed ? 1 : 0); + AttributeSet NewSet = + CopyRetAttributes(Ctx, DL, OldSet, OldFunc->getReturnType(), RetSlot); + Instruction *InsPoint = &*NewFunc->begin()->begin(); + auto NewArgIter = NewFunc->arg_begin(); + // Advance one more if we used to return a struct register. + if (returnWasFixed) + NewArgIter++; + // Wire new parameters in. + unsigned ArgIndex = 0; + for (auto ArgIter = OldFunc->arg_begin(), E = OldFunc->arg_end(); + E != ArgIter; ArgIndex++) { + Argument *OldArg = &*ArgIter++; + Argument *NewArg = &*NewArgIter++; + const bool IsAggregateToPtr = + shouldPromote(OldArg->getType()) && NewArg->getType()->isPointerTy(); + if (!NewFunc->empty()) { + ConvertArgumentValue(OldArg, NewArg, InsPoint, IsAggregateToPtr); + } + Type *Inner = nullptr; + if (IsAggregateToPtr) { + Inner = NewArg->getType()->getPointerElementType(); + } + NewSet = + CopyArgAttributes(NewSet, Ctx, DL, OldSet, ArgIndex, Inner, RetSlot); + } + NewFunc->setAttributes(NewSet); + // Now fix instruction types. We know that each value could only possibly be + // of a simplified type. At the end of this, call sites will be invalid, but + // we handle that afterwards, to make sure we have all the functions changed + // first (so that calls have valid targets) + for (auto BBIter = NewFunc->begin(), LBlock = NewFunc->end(); + LBlock != BBIter;) { + auto Block = BBIter++; + for (auto IIter = Block->begin(), LIns = Block->end(); LIns != IIter;) { + auto Instr = IIter++; + auto *NewTy = Mapper.getSimpleType(Ctx, Instr->getType()); + Instr->mutateType(NewTy); + if (isa(Instr) || + isa(Instr)) { + continue; + } + for (unsigned OpI = 0; OpI < Instr->getNumOperands(); OpI++) { + if(Constant *C = dyn_cast(Instr->getOperand(OpI))) { + auto *NewTy = Mapper.getSimpleType(Ctx, C->getType()); + if (NewTy == C->getType()) { continue; } + const auto CastOp = CastInst::getCastOpcode(C, false, NewTy, false); + auto *NewOp = ConstantExpr::getCast(CastOp, C, NewTy); + Instr->setOperand(OpI, NewOp); + } + } + } + } + if (returnWasFixed) + FixReturn(OldFunc, NewFunc); +} +// Ensure function is simplified, returning true if the function +// had to be changed. +bool SimplifyStructRegSignatures::simplifyFunction( + LLVMContext &Ctx, Function *OldFunc) { + auto *OldFT = OldFunc->getFunctionType(); + auto *NewFT = cast(Mapper.getSimpleType(Ctx, OldFT)); + Function *&AssociatedFctLoc = FunctionMap[OldFunc]; + if (NewFT != OldFT) { + auto *NewFunc = Function::Create(NewFT, OldFunc->getLinkage()); + AssociatedFctLoc = NewFunc; + OldFunc->getParent()->getFunctionList().insert(OldFunc->getIterator(), NewFunc); + NewFunc->takeName(OldFunc); + UpdateArgNames(OldFunc, NewFunc); + NewFunc->getBasicBlockList().splice(NewFunc->begin(), + OldFunc->getBasicBlockList()); + fixFunctionBody(Ctx, OldFunc, NewFunc); + Constant *Cast = ConstantExpr::getPointerCast(NewFunc, OldFunc->getType()); + OldFunc->replaceAllUsesWith(Cast); + FunctionsToDelete.insert(OldFunc); + } else { + AssociatedFctLoc = OldFunc; + } + scheduleInstructionsForCleanup(AssociatedFctLoc); + return NewFT != OldFT; +} +bool SimplifyStructRegSignatures::runOnModule(Module &M) { + bool Changed = false; + unsigned PreferredAlignment = 0; + PreferredAlignment = M.getDataLayout().getStackAlignment(); + LLVMContext &Ctx = M.getContext(); + // Change function signatures and fix a changed function body by + // wiring the new arguments. Call sites are unchanged at this point. + for (Module::iterator Iter = M.begin(), E = M.end(); Iter != E;) { + Function *Func = &*Iter++; + if (Func->isIntrinsic()) { + // Can't rewrite intrinsics. + continue; + } + checkNoUnsupportedInstructions(Ctx, Func); + Changed |= simplifyFunction(Ctx, Func); + } + // Fix call sites. + for (auto &CallToFix : CallsToPatch) { + fixCallSite(Ctx, CallToFix, PreferredAlignment); + } + for (auto &InvokeToFix : InvokesToPatch) { + fixCallSite(Ctx, InvokeToFix, PreferredAlignment); + } + + // Update taking of a function's address from a parameter + for (auto &Addressing : FunctionAddressings) { + Value *Temp = Addressing.Temp; + Function *Old = Addressing.Old; + Function *New = FunctionMap[Old]; + assert(New); + Temp->replaceAllUsesWith(New); + } + + // Remaining uses of functions we modified (like in a global vtable) + // can be handled via a constantexpr bitcast + for (auto &Old : FunctionsToDelete) { + Function *New = FunctionMap[Old]; + assert(New); + Old->replaceAllUsesWith(ConstantExpr::getBitCast(New, Old->getType())); + } + + // Delete leftover functions - the ones with old signatures. + for (auto &ToDelete : FunctionsToDelete) { + ToDelete->eraseFromParent(); + } + return Changed; +} +void +SimplifyStructRegSignatures::checkNoUnsupportedInstructions(LLVMContext &Ctx, + Function *Fct) { + for (auto &BB : Fct->getBasicBlockList()) { + for (auto &Inst : BB.getInstList()) { + if (auto *Landing = dyn_cast(&Inst)) { + auto *LType = Fct->getPersonalityFn()->getType(); + if (LType != Mapper.getSimpleType(Ctx, LType)) { + errs() << *Landing << '\n'; + report_fatal_error("Landing pads with aggregate register " + "signatures are not supported."); + } + } else if (auto *Resume = dyn_cast(&Inst)) { + auto *RType = Resume->getValue()->getType(); + if (RType != Mapper.getSimpleType(Ctx, RType)) { + errs() << *Resume << '\n'; + report_fatal_error( + "Resumes with aggregate register signatures are not supported."); + } + } + } + } +} +ModulePass *llvm::createSimplifyStructRegSignaturesPass() { + return new SimplifyStructRegSignatures(); +} diff --git a/lib/Target/JSBackend/NaCl/StripAttributes.cpp b/lib/Target/JSBackend/NaCl/StripAttributes.cpp new file mode 100644 index 000000000000..97051b277e47 --- /dev/null +++ b/lib/Target/JSBackend/NaCl/StripAttributes.cpp @@ -0,0 +1,246 @@ +//===- StripAttributes.cpp - Remove attributes not supported by PNaCl------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass strips out attributes that are not supported by PNaCl's +// stable ABI. Currently, this strips out: +// +// * Function and argument attributes from functions and function +// calls. +// * Calling conventions from functions and function calls. +// * The "align" attribute on functions. +// * The "unnamed_addr" attribute on functions and global variables. +// * The distinction between "internal" and "private" linkage. +// * "protected" and "internal" visibility of functions and globals. +// * All sections are stripped. A few sections cause warnings. +// * The arithmetic attributes "nsw", "nuw" and "exact". +// +//===----------------------------------------------------------------------===// + +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" +#include "llvm/Pass.h" +#include "llvm/IR/CallSite.h" +#include "llvm/Transforms/NaCl.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +namespace { + // This is a ModulePass so that it can modify attributes of global + // variables. + class StripAttributes : public ModulePass { + public: + static char ID; // Pass identification, replacement for typeid + StripAttributes() : ModulePass(ID) { + initializeStripAttributesPass(*PassRegistry::getPassRegistry()); + } + + bool runOnModule(Module &M) override; + }; +} + +char StripAttributes::ID = 0; +INITIALIZE_PASS(StripAttributes, "nacl-strip-attributes", + "Strip out attributes that are not part of PNaCl's ABI", + false, false) + +static void CheckAttributes(AttributeSet Attrs) { + for (unsigned Slot = 0; Slot < Attrs.getNumSlots(); ++Slot) { + for (AttributeSet::iterator Attr = Attrs.begin(Slot), E = Attrs.end(Slot); + Attr != E; ++Attr) { + if (!Attr->isEnumAttribute()) { + continue; + } + switch (Attr->getKindAsEnum()) { + // The vast majority of attributes are hints that can safely + // be removed, so don't complain if we see attributes we don't + // recognize. + default: + + // The following attributes can affect calling conventions. + // Rather than complaining, we just strip these out. + // ExpandSmallArguments should have rendered SExt/ZExt + // meaningless since the function arguments will be at least + // 32-bit. + case Attribute::InReg: + case Attribute::SExt: + case Attribute::ZExt: + // These attributes influence ABI decisions that should not be + // visible to PNaCl pexes. + case Attribute::NonLazyBind: // Only relevant to dynamic linking. + case Attribute::NoRedZone: + case Attribute::StackAlignment: + + // The following attributes are just hints, which can be + // safely removed. + case Attribute::AlwaysInline: + case Attribute::InlineHint: + case Attribute::MinSize: + case Attribute::NoAlias: + case Attribute::NoBuiltin: + case Attribute::NoCapture: + case Attribute::NoDuplicate: + case Attribute::NoImplicitFloat: + case Attribute::NoInline: + case Attribute::NoReturn: + case Attribute::OptimizeForSize: + case Attribute::ReadNone: + case Attribute::ReadOnly: + + // PNaCl does not support -fstack-protector in the translator. + case Attribute::StackProtect: + case Attribute::StackProtectReq: + case Attribute::StackProtectStrong: + // PNaCl does not support ASan in the translator. + case Attribute::SanitizeAddress: + case Attribute::SanitizeThread: + case Attribute::SanitizeMemory: + + // The Language References cites setjmp() as an example of a + // function which returns twice, and says ReturnsTwice is + // necessary to disable optimizations such as tail calls. + // However, in the PNaCl ABI, setjmp() is an intrinsic, and + // user-defined functions are not allowed to return twice. + case Attribute::ReturnsTwice: + + // NoUnwind is not a hint if it causes unwind info to be + // omitted, since this will prevent C++ exceptions from + // propagating. In the future, when PNaCl supports zero-cost + // C++ exception handling using unwind info, we might allow + // NoUnwind and UWTable. Alternatively, we might continue to + // disallow them, and just generate unwind info for all + // functions. + case Attribute::NoUnwind: + case Attribute::UWTable: + break; + + // A few attributes can change program behaviour if removed, + // so check for these. + case Attribute::ByVal: + case Attribute::StructRet: + case Attribute::Alignment: + Attrs.dump(); + report_fatal_error( + "Attribute should already have been removed by ExpandByVal"); + + case Attribute::Naked: + case Attribute::Nest: + Attrs.dump(); + report_fatal_error("Unsupported attribute"); + } + } + } +} + +static const char* ShouldWarnAboutSection(const char* Section) { + static const char* SpecialSections[] = { + ".init_array", + ".init", + ".fini_array", + ".fini", + + // Java/LSB: + ".jcr", + + // LSB: + ".ctors", + ".dtors", + }; + + for (auto CheckSection : SpecialSections) { + if (strcmp(Section, CheckSection) == 0) { + return CheckSection; + } + } + + return nullptr; +} + +void stripGlobalValueAttrs(GlobalValue *GV) { + // In case source code uses __attribute__((visibility("hidden"))) or + // __attribute__((visibility("protected"))), strip these attributes. + GV->setVisibility(GlobalValue::DefaultVisibility); + + GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); + + if (GV->hasSection()) { + const char *Section = GV->getSection().data(); + // check for a few special cases + if (const char *WarnSection = ShouldWarnAboutSection(Section)) { + errs() << "Warning: " << GV->getName() << + " will have its section (" << + WarnSection << ") stripped.\n"; + } + + if(GlobalObject* GO = dyn_cast(GV)) { + GO->setSection(""); + } + // Nothing we can do if GV isn't a GlobalObject. + } + + // Convert "private" linkage to "internal" to reduce the number of + // linkage types that need to be represented in PNaCl's wire format. + // + // We convert "private" to "internal" rather than vice versa because + // "private" symbols are omitted from the nexe's symbol table, which + // would get in the way of debugging when an unstripped pexe is + // translated offline. + if (GV->getLinkage() == GlobalValue::PrivateLinkage) + GV->setLinkage(GlobalValue::InternalLinkage); +} + +void stripFunctionAttrs(DataLayout *DL, Function *F) { + CheckAttributes(F->getAttributes()); + F->setAttributes(AttributeSet()); + F->setCallingConv(CallingConv::C); + F->setAlignment(0); + + for (BasicBlock &BB : *F) { + for (Instruction &I : BB) { + CallSite Call(&I); + if (Call) { + CheckAttributes(Call.getAttributes()); + Call.setAttributes(AttributeSet()); + Call.setCallingConv(CallingConv::C); + } else if (OverflowingBinaryOperator *Op = + dyn_cast(&I)) { + cast(Op)->setHasNoUnsignedWrap(false); + cast(Op)->setHasNoSignedWrap(false); + } else if (PossiblyExactOperator *Op = + dyn_cast(&I)) { + cast(Op)->setIsExact(false); + } + } + } +} + +bool StripAttributes::runOnModule(Module &M) { + DataLayout DL(&M); + for (Function &F : M) + // Avoid stripping attributes from intrinsics because the + // constructor for Functions just adds them back again. It would + // be confusing if the attributes were sometimes present on + // intrinsics and sometimes not. + if (!F.isIntrinsic()) { + stripGlobalValueAttrs(&F); + stripFunctionAttrs(&DL, &F); + } + + for (GlobalVariable &GV : M.globals()) + stripGlobalValueAttrs(&GV); + + return true; +} + +ModulePass *llvm::createStripAttributesPass() { + return new StripAttributes(); +} diff --git a/lib/Target/JSBackend/NaCl/StripMetadata.cpp b/lib/Target/JSBackend/NaCl/StripMetadata.cpp new file mode 100644 index 000000000000..bf8eb92c699d --- /dev/null +++ b/lib/Target/JSBackend/NaCl/StripMetadata.cpp @@ -0,0 +1,163 @@ +//===- StripMetadata.cpp - Strip non-stable non-debug metadata ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The StripMetadata transformation strips instruction attachment +// metadata, such as !tbaa and !prof metadata. +// TODO: Strip NamedMetadata too. +// +// It does not strip debug metadata. Debug metadata is used by debug +// intrinsic functions and calls to those intrinsic functions. Use the +// -strip-debug or -strip pass to strip that instead. +// +// The goal of this pass is to reduce bitcode ABI surface area. +// We don't know yet which kind of metadata is considered stable. +//===----------------------------------------------------------------------===// + +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/NaCl.h" + +using namespace llvm; + +namespace { + class StripMetadata : public ModulePass { + public: + static char ID; + StripMetadata() : ModulePass(ID), ShouldStripModuleFlags(false) { + initializeStripMetadataPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnModule(Module &M); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + } + + protected: + bool ShouldStripModuleFlags; + }; + + class StripModuleFlags : public StripMetadata { + public: + static char ID; + StripModuleFlags() : StripMetadata() { + initializeStripModuleFlagsPass(*PassRegistry::getPassRegistry()); + ShouldStripModuleFlags = true; + } + }; + +// In certain cases, linked bitcode files can have DISupbrogram metadata which +// points to a Function that has no dbg attachments. This causes problem later +// (e.g. in inlining). See https://llvm.org/bugs/show_bug.cgi?id=23874 +// Until that bug is fixed upstream (the fix will involve infrastructure that we +// don't have in our branch yet) we have to ensure we don't expose this case +// to further optimizations. So we'd like to strip out such debug info. +// Unfortunately once created the metadata is not easily deleted or even +// modified; the best we can easily do is to set the Function object it points +// to to null. Fortunately this is legitimate (declarations have no Function +// either) and should be workable until the fix lands. +class StripDanglingDISubprograms : public ModulePass { + public: + static char ID; + StripDanglingDISubprograms() : ModulePass(ID) { + initializeStripDanglingDISubprogramsPass(*PassRegistry::getPassRegistry()); + } + bool runOnModule(Module &M) override; +}; +} + +char StripMetadata::ID = 0; +INITIALIZE_PASS(StripMetadata, "strip-metadata", + "Strip all non-stable non-debug metadata from a module.", + false, false) + +char StripModuleFlags::ID = 0; +INITIALIZE_PASS(StripModuleFlags, "strip-module-flags", + "Strip all non-stable non-debug metadata from a module, " + "including the llvm.module.flags metadata.", + false, false) + +char StripDanglingDISubprograms::ID = 0; +INITIALIZE_PASS(StripDanglingDISubprograms, "strip-dangling-disubprograms", + "Strip DISubprogram metadata for functions with no debug info", + false, false) + +ModulePass *llvm::createStripMetadataPass() { + return new StripMetadata(); +} + +ModulePass *llvm::createStripModuleFlagsPass() { + return new StripModuleFlags(); +} + +ModulePass *llvm::createStripDanglingDISubprogramsPass() { + return new StripDanglingDISubprograms(); +} + +static bool IsWhitelistedMetadata(const NamedMDNode *node, + bool StripModuleFlags) { + // Leave debug metadata to the -strip-debug pass. + return (node->getName().startswith("llvm.dbg.") || + // "Debug Info Version" is in llvm.module.flags. + (!StripModuleFlags && node->getName().equals("llvm.module.flags"))); +} + +static bool DoStripMetadata(Module &M, bool StripModuleFlags) { + bool Changed = false; + + if (!StripModuleFlags) + for (Function &F : M) + for (BasicBlock &B : F) + for (Instruction &I : B) { + SmallVector, 8> InstMeta; + // Let the debug metadata be stripped by the -strip-debug pass. + I.getAllMetadataOtherThanDebugLoc(InstMeta); + for (size_t i = 0; i < InstMeta.size(); ++i) { + I.setMetadata(InstMeta[i].first, NULL); + Changed = true; + } + } + + // Strip unsupported named metadata. + SmallVector ToErase; + for (Module::NamedMDListType::iterator I = M.named_metadata_begin(), + E = M.named_metadata_end(); I != E; ++I) { + if (!IsWhitelistedMetadata(&*I, StripModuleFlags)) + ToErase.push_back(&*I); + } + for (size_t i = 0; i < ToErase.size(); ++i) + M.eraseNamedMetadata(ToErase[i]); + + return Changed; +} + +bool StripMetadata::runOnModule(Module &M) { + return DoStripMetadata(M, ShouldStripModuleFlags); +} + +static bool functionHasDbgAttachment(const Function &F) { + for (const BasicBlock &BB : F) { + for (const Instruction &I : BB) { + if (I.getDebugLoc()) { + return true; + } + } + } + return false; +} + +bool StripDanglingDISubprograms::runOnModule(Module &M) { + NamedMDNode *CU_Nodes = M.getNamedMetadata("llvm.dbg.cu"); + if (!CU_Nodes) + return false; + + return false; // TODO: we don't need this anymore +} diff --git a/lib/Target/JSBackend/OptPasses.h b/lib/Target/JSBackend/OptPasses.h new file mode 100644 index 000000000000..86e3c707e1c3 --- /dev/null +++ b/lib/Target/JSBackend/OptPasses.h @@ -0,0 +1,24 @@ +//===-- JSTargetMachine.h - TargetMachine for the JS Backend ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===---------------------------------------------------------------------===// + +#ifndef OPT_PASSES_H +#define OPT_PASSES_H + +#include "llvm/Pass.h" + +namespace llvm { + + extern FunctionPass *createEmscriptenSimplifyAllocasPass(); + extern ModulePass *createEmscriptenRemoveLLVMAssumePass(); + extern FunctionPass *createEmscriptenExpandBigSwitchesPass(); + +} // End llvm namespace + +#endif + diff --git a/lib/Target/JSBackend/Relooper.cpp b/lib/Target/JSBackend/Relooper.cpp new file mode 100644 index 000000000000..00415f2d27e9 --- /dev/null +++ b/lib/Target/JSBackend/Relooper.cpp @@ -0,0 +1,1432 @@ +// We are implementing the Relooper C API, so always export from this file. +#ifndef RELOOPERDLL_EXPORTS +#define RELOOPERDLL_EXPORTS +#endif + +#include "Relooper.h" + +#include +#include +#include +#include +#include + +// uncomment these out to get LLVM errs() debugging support +//#include +//using namespace llvm; + +template static bool contains(const T& container, const U& contained) { + return container.count(contained); +} + +#if DEBUG +static void PrintDebug(const char *Format, ...); +#define DebugDump(x, ...) Debugging::Dump(x, __VA_ARGS__) +#else +#define PrintDebug(x, ...) +#define DebugDump(x, ...) +#endif + +#define INDENTATION 1 + +struct Indenter { + static int CurrIndent; + + static void Indent() { CurrIndent++; } + static void Unindent() { CurrIndent--; } +}; + +static void PrintIndented(const char *Format, ...); +static void PutIndented(const char *String); + +static char *OutputBufferRoot = NULL; +static char *OutputBuffer = NULL; +static int OutputBufferSize = 0; +static int OutputBufferOwned = false; + +static int LeftInOutputBuffer() { + return OutputBufferSize - (OutputBuffer - OutputBufferRoot); +} + +static bool EnsureOutputBuffer(int Needed) { // ensures the output buffer is sufficient. returns true is no problem happened + Needed++; // ensure the trailing \0 is not forgotten + int Left = LeftInOutputBuffer(); + if (!OutputBufferOwned) { + assert(Needed < Left); + } else { + // we own the buffer, and can resize if necessary + if (Needed >= Left) { + int Offset = OutputBuffer - OutputBufferRoot; + int TotalNeeded = OutputBufferSize + Needed - Left + 10240; + int NewSize = OutputBufferSize; + while (NewSize < TotalNeeded) NewSize = NewSize + (NewSize/2); + //printf("resize %d => %d\n", OutputBufferSize, NewSize); + OutputBufferRoot = (char*)realloc(OutputBufferRoot, NewSize); + assert(OutputBufferRoot); + OutputBuffer = OutputBufferRoot + Offset; + OutputBufferSize = NewSize; + return false; + } + } + return true; +} + +void PrintIndented(const char *Format, ...) { + assert(OutputBuffer); + EnsureOutputBuffer(Indenter::CurrIndent*INDENTATION); + for (int i = 0; i < Indenter::CurrIndent*INDENTATION; i++, OutputBuffer++) *OutputBuffer = ' '; + int Written; + while (1) { // write and potentially resize buffer until we have enough room + int Left = LeftInOutputBuffer(); + va_list Args; + va_start(Args, Format); + Written = vsnprintf(OutputBuffer, Left, Format, Args); + va_end(Args); +#ifdef _MSC_VER + // VC CRT specific: vsnprintf returns -1 on failure, other runtimes return the number of characters that would have been + // written. On VC, if we get -1, count the number of characters manually. + if (Written < 0) { + va_start(Args, Format); + Written = _vscprintf(Format, Args); + va_end(Args); + } +#endif + + if (EnsureOutputBuffer(Written)) break; + } + OutputBuffer += Written; +} + +void PutIndented(const char *String) { + assert(OutputBuffer); + EnsureOutputBuffer(Indenter::CurrIndent*INDENTATION); + for (int i = 0; i < Indenter::CurrIndent*INDENTATION; i++, OutputBuffer++) *OutputBuffer = ' '; + int Needed = strlen(String)+1; + EnsureOutputBuffer(Needed); + strcpy(OutputBuffer, String); + OutputBuffer += strlen(String); + *OutputBuffer++ = '\n'; + *OutputBuffer = 0; +} + +static int AsmJS = 0; + +// Indenter + +int Indenter::CurrIndent = 1; + +// Branch + +Branch::Branch(const char *ConditionInit, const char *CodeInit) : Ancestor(NULL), Labeled(true) { + Condition = ConditionInit ? strdup(ConditionInit) : NULL; + Code = CodeInit ? strdup(CodeInit) : NULL; +} + +Branch::~Branch() { + free(static_cast(const_cast(Condition))); + free(static_cast(const_cast(Code))); +} + +void Branch::Render(Block *Target, bool SetLabel) { + if (Code) PrintIndented("%s\n", Code); + if (SetLabel) PrintIndented("label = %d;\n", Target->Id); + if (Ancestor) { + if (Type == Break || Type == Continue) { + if (Labeled) { + PrintIndented("%s L%d;\n", Type == Break ? "break" : "continue", Ancestor->Id); + } else { + PrintIndented("%s;\n", Type == Break ? "break" : "continue"); + } + } + } +} + +// Block + +Block::Block(const char *CodeInit, const char *BranchVarInit) : Parent(NULL), Id(-1), IsCheckedMultipleEntry(false) { + Code = strdup(CodeInit); + BranchVar = BranchVarInit ? strdup(BranchVarInit) : NULL; +} + +Block::~Block() { + free(static_cast(const_cast(Code))); + free(static_cast(const_cast(BranchVar))); + for (BlockBranchMap::iterator iter = ProcessedBranchesOut.begin(); iter != ProcessedBranchesOut.end(); iter++) { + delete iter->second; + } +} + +void Block::AddBranchTo(Block *Target, const char *Condition, const char *Code) { + assert(!contains(BranchesOut, Target)); // cannot add more than one branch to the same target + BranchesOut[Target] = new Branch(Condition, Code); +} + +void Block::Render(bool InLoop) { + if (IsCheckedMultipleEntry && InLoop) { + PrintIndented("label = 0;\n"); + } + + if (Code) { + // Print code in an indented manner, even over multiple lines + char *Start = const_cast(Code); + while (*Start) { + char *End = strchr(Start, '\n'); + if (End) *End = 0; + PutIndented(Start); + if (End) *End = '\n'; else break; + Start = End+1; + } + } + + if (!ProcessedBranchesOut.size()) return; + + bool SetLabel = true; // in some cases it is clear we can avoid setting label, see later + bool ForceSetLabel = Shape::IsEmulated(Parent); + + // A setting of the label variable (label = x) is necessary if it can + // cause an impact. The main case is where we set label to x, then elsewhere + // we check if label is equal to that value, i.e., that label is an entry + // in a multiple block. We also need to reset the label when we enter + // that block, so that each setting is a one-time action: consider + // + // while (1) { + // if (check) label = 1; + // if (label == 1) { label = 0 } + // } + // + // (Note that this case is impossible due to fusing, but that is not + // material here.) So setting to 0 is important just to clear the 1 for + // future iterations. + // TODO: When inside a loop, if necessary clear the label variable + // once on the top, and never do settings that are in effect clears + + // Fusing: If the next is a Multiple, we can fuse it with this block. Note + // that we must be the Inner of a Simple, so fusing means joining a Simple + // to a Multiple. What happens there is that all options in the Multiple + // *must* appear in the Simple (the Simple is the only one reaching the + // Multiple), so we can remove the Multiple and add its independent groups + // into the Simple's branches. + MultipleShape *Fused = Shape::IsMultiple(Parent->Next); + if (Fused) { + PrintDebug("Fusing Multiple to Simple\n", 0); + Parent->Next = Parent->Next->Next; + Fused->UseSwitch = false; // TODO: emit switches here + Fused->RenderLoopPrefix(); + + // When the Multiple has the same number of groups as we have branches, + // they will all be fused, so it is safe to not set the label at all + if (SetLabel && Fused->InnerMap.size() == ProcessedBranchesOut.size()) { + SetLabel = false; + } + } + + Block *DefaultTarget(NULL); // The block we branch to without checking the condition, if none of the other conditions held. + + // Find the default target, the one without a condition + for (BlockBranchMap::iterator iter = ProcessedBranchesOut.begin(); iter != ProcessedBranchesOut.end(); iter++) { + if (!iter->second->Condition) { + assert(!DefaultTarget); // Must be exactly one default + DefaultTarget = iter->first; + } + } + assert(DefaultTarget); // Since each block *must* branch somewhere, this must be set + + bool useSwitch = BranchVar != NULL; + + if (useSwitch) { + PrintIndented("switch (%s) {\n", BranchVar); + } + + std::string RemainingConditions; + bool First = !useSwitch; // when using a switch, there is no special first + for (BlockBranchMap::iterator iter = ProcessedBranchesOut.begin();; iter++) { + Block *Target; + Branch *Details; + if (iter != ProcessedBranchesOut.end()) { + Target = iter->first; + if (Target == DefaultTarget) continue; // done at the end + Details = iter->second; + assert(Details->Condition); // must have a condition if this is not the default target + } else { + Target = DefaultTarget; + Details = ProcessedBranchesOut[DefaultTarget]; + } + bool SetCurrLabel = (SetLabel && Target->IsCheckedMultipleEntry) || ForceSetLabel; + bool HasFusedContent = Fused && contains(Fused->InnerMap, Target->Id); + bool HasContent = SetCurrLabel || Details->Type != Branch::Direct || HasFusedContent || Details->Code; + if (iter != ProcessedBranchesOut.end()) { + // If there is nothing to show in this branch, omit the condition + if (useSwitch) { + PrintIndented("%s {\n", Details->Condition); + } else { + if (HasContent) { + PrintIndented("%sif (%s) {\n", First ? "" : "} else ", Details->Condition); + First = false; + } else { + if (RemainingConditions.size() > 0) RemainingConditions += " && "; + RemainingConditions += "!("; + if (BranchVar) { + RemainingConditions += BranchVar; + RemainingConditions += " == "; + } + RemainingConditions += Details->Condition; + RemainingConditions += ")"; + } + } + } else { + // this is the default + if (useSwitch) { + PrintIndented("default: {\n"); + } else { + if (HasContent) { + if (RemainingConditions.size() > 0) { + if (First) { + PrintIndented("if (%s) {\n", RemainingConditions.c_str()); + First = false; + } else { + PrintIndented("} else if (%s) {\n", RemainingConditions.c_str()); + } + } else if (!First) { + PrintIndented("} else {\n"); + } + } + } + } + if (!First) Indenter::Indent(); + Details->Render(Target, SetCurrLabel); + if (HasFusedContent) { + Fused->InnerMap.find(Target->Id)->second->Render(InLoop); + } else if (Details->Type == Branch::Nested) { + // Nest the parent content here, and remove it from showing up afterwards as Next + assert(Parent->Next); + Parent->Next->Render(InLoop); + Parent->Next = NULL; + } + if (useSwitch && iter != ProcessedBranchesOut.end()) { + PrintIndented("break;\n"); + } + if (!First) Indenter::Unindent(); + if (useSwitch) { + PrintIndented("}\n"); + } + if (iter == ProcessedBranchesOut.end()) break; + } + if (!First) PrintIndented("}\n"); + + if (Fused) { + Fused->RenderLoopPostfix(); + } +} + +// MultipleShape + +void MultipleShape::RenderLoopPrefix() { + if (Breaks) { + if (UseSwitch) { + if (Labeled) { + PrintIndented("L%d: ", Id); + } + } else { + if (Labeled) { + PrintIndented("L%d: do {\n", Id); + } else { + PrintIndented("do {\n"); + } + Indenter::Indent(); + } + } +} + +void MultipleShape::RenderLoopPostfix() { + if (Breaks && !UseSwitch) { + Indenter::Unindent(); + PrintIndented("} while(0);\n"); + } +} + +void MultipleShape::Render(bool InLoop) { + RenderLoopPrefix(); + + if (!UseSwitch) { + // emit an if-else chain + bool First = true; + for (IdShapeMap::iterator iter = InnerMap.begin(); iter != InnerMap.end(); iter++) { + if (AsmJS) { + PrintIndented("%sif ((label|0) == %d) {\n", First ? "" : "else ", iter->first); + } else { + PrintIndented("%sif (label == %d) {\n", First ? "" : "else ", iter->first); + } + First = false; + Indenter::Indent(); + iter->second->Render(InLoop); + Indenter::Unindent(); + PrintIndented("}\n"); + } + } else { + // emit a switch + if (AsmJS) { + PrintIndented("switch (label|0) {\n"); + } else { + PrintIndented("switch (label) {\n"); + } + Indenter::Indent(); + for (IdShapeMap::iterator iter = InnerMap.begin(); iter != InnerMap.end(); iter++) { + PrintIndented("case %d: {\n", iter->first); + Indenter::Indent(); + iter->second->Render(InLoop); + PrintIndented("break;\n"); + Indenter::Unindent(); + PrintIndented("}\n"); + } + Indenter::Unindent(); + PrintIndented("}\n"); + } + + RenderLoopPostfix(); + if (Next) Next->Render(InLoop); +} + +// LoopShape + +void LoopShape::Render(bool InLoop) { + if (Labeled) { + PrintIndented("L%d: while(1) {\n", Id); + } else { + PrintIndented("while(1) {\n"); + } + Indenter::Indent(); + Inner->Render(true); + Indenter::Unindent(); + PrintIndented("}\n"); + if (Next) Next->Render(InLoop); +} + +// EmulatedShape + +void EmulatedShape::Render(bool InLoop) { + PrintIndented("label = %d;\n", Entry->Id); + if (Labeled) { + PrintIndented("L%d: ", Id); + } + PrintIndented("while(1) {\n"); + Indenter::Indent(); + PrintIndented("switch(label|0) {\n"); + Indenter::Indent(); + for (BlockSet::iterator iter = Blocks.begin(); iter != Blocks.end(); iter++) { + Block *Curr = *iter; + PrintIndented("case %d: {\n", Curr->Id); + Indenter::Indent(); + Curr->Render(InLoop); + PrintIndented("break;\n"); + Indenter::Unindent(); + PrintIndented("}\n"); + } + Indenter::Unindent(); + PrintIndented("}\n"); + Indenter::Unindent(); + PrintIndented("}\n"); + if (Next) Next->Render(InLoop); +} + +// Relooper + +Relooper::Relooper() : Root(NULL), Emulate(false), MinSize(false), BlockIdCounter(1), ShapeIdCounter(0) { // block ID 0 is reserved for clearings +} + +Relooper::~Relooper() { + for (unsigned i = 0; i < Blocks.size(); i++) delete Blocks[i]; + for (unsigned i = 0; i < Shapes.size(); i++) delete Shapes[i]; +} + +void Relooper::AddBlock(Block *New, int Id) { + New->Id = Id == -1 ? BlockIdCounter++ : Id; + Blocks.push_back(New); +} + +struct RelooperRecursor { + Relooper *Parent; + RelooperRecursor(Relooper *ParentInit) : Parent(ParentInit) {} +}; + +typedef std::list BlockList; + +void Relooper::Calculate(Block *Entry) { + // Scan and optimize the input + struct PreOptimizer : public RelooperRecursor { + PreOptimizer(Relooper *Parent) : RelooperRecursor(Parent) {} + BlockSet Live; + + void FindLive(Block *Root) { + BlockList ToInvestigate; + ToInvestigate.push_back(Root); + while (ToInvestigate.size() > 0) { + Block *Curr = ToInvestigate.front(); + ToInvestigate.pop_front(); + if (contains(Live, Curr)) continue; + Live.insert(Curr); + for (BlockBranchMap::iterator iter = Curr->BranchesOut.begin(); iter != Curr->BranchesOut.end(); iter++) { + ToInvestigate.push_back(iter->first); + } + } + } + + // If a block has multiple entries but no exits, and it is small enough, it is useful to split it. + // A common example is a C++ function where everything ends up at a final exit block and does some + // RAII cleanup. Without splitting, we will be forced to introduce labelled loops to allow + // reaching the final block + void SplitDeadEnds() { + unsigned TotalCodeSize = 0; + for (BlockSet::iterator iter = Live.begin(); iter != Live.end(); iter++) { + Block *Curr = *iter; + TotalCodeSize += strlen(Curr->Code); + } + BlockSet Splits; + BlockSet Removed; + //DebugDump(Live, "before"); + for (BlockSet::iterator iter = Live.begin(); iter != Live.end(); iter++) { + Block *Original = *iter; + if (Original->BranchesIn.size() <= 1 || Original->BranchesOut.size() > 0) continue; // only dead ends, for now + if (contains(Original->BranchesOut, Original)) continue; // cannot split a looping node + if (strlen(Original->Code)*(Original->BranchesIn.size()-1) > TotalCodeSize/5) continue; // if splitting increases raw code size by a significant amount, abort + // Split the node (for simplicity, we replace all the blocks, even though we could have reused the original) + PrintDebug("Splitting block %d\n", Original->Id); + for (BlockSet::iterator iter = Original->BranchesIn.begin(); iter != Original->BranchesIn.end(); iter++) { + Block *Prior = *iter; + Block *Split = new Block(Original->Code, Original->BranchVar); + Parent->AddBlock(Split, Original->Id); + Split->BranchesIn.insert(Prior); + Branch *Details = Prior->BranchesOut[Original]; + Prior->BranchesOut[Split] = new Branch(Details->Condition, Details->Code); + delete Details; + Prior->BranchesOut.erase(Original); + for (BlockBranchMap::iterator iter = Original->BranchesOut.begin(); iter != Original->BranchesOut.end(); iter++) { + Block *Post = iter->first; + Branch *Details = iter->second; + Split->BranchesOut[Post] = new Branch(Details->Condition, Details->Code); + Post->BranchesIn.insert(Split); + } + Splits.insert(Split); + Removed.insert(Original); + } + for (BlockBranchMap::iterator iter = Original->BranchesOut.begin(); iter != Original->BranchesOut.end(); iter++) { + Block *Post = iter->first; + Post->BranchesIn.erase(Original); + } + //DebugDump(Live, "mid"); + } + for (BlockSet::iterator iter = Splits.begin(); iter != Splits.end(); iter++) { + Live.insert(*iter); + } + for (BlockSet::iterator iter = Removed.begin(); iter != Removed.end(); iter++) { + Live.erase(*iter); + } + //DebugDump(Live, "after"); + } + }; + PreOptimizer Pre(this); + Pre.FindLive(Entry); + + // Add incoming branches from live blocks, ignoring dead code + for (unsigned i = 0; i < Blocks.size(); i++) { + Block *Curr = Blocks[i]; + if (!contains(Pre.Live, Curr)) continue; + for (BlockBranchMap::iterator iter = Curr->BranchesOut.begin(); iter != Curr->BranchesOut.end(); iter++) { + iter->first->BranchesIn.insert(Curr); + } + } + + if (!Emulate && !MinSize) Pre.SplitDeadEnds(); + + // Recursively process the graph + + struct Analyzer : public RelooperRecursor { + Analyzer(Relooper *Parent) : RelooperRecursor(Parent) {} + + // Add a shape to the list of shapes in this Relooper calculation + void Notice(Shape *New) { + New->Id = Parent->ShapeIdCounter++; + Parent->Shapes.push_back(New); + } + + // Create a list of entries from a block. If LimitTo is provided, only results in that set + // will appear + void GetBlocksOut(Block *Source, BlockSet& Entries, BlockSet *LimitTo=NULL) { + for (BlockBranchMap::iterator iter = Source->BranchesOut.begin(); iter != Source->BranchesOut.end(); iter++) { + if (!LimitTo || contains(*LimitTo, iter->first)) { + Entries.insert(iter->first); + } + } + } + + // Converts/processes all branchings to a specific target + void Solipsize(Block *Target, Branch::FlowType Type, Shape *Ancestor, BlockSet &From) { + PrintDebug("Solipsizing branches into %d\n", Target->Id); + DebugDump(From, " relevant to solipsize: "); + for (BlockSet::iterator iter = Target->BranchesIn.begin(); iter != Target->BranchesIn.end();) { + Block *Prior = *iter; + if (!contains(From, Prior)) { + iter++; + continue; + } + Branch *PriorOut = Prior->BranchesOut[Target]; + PriorOut->Ancestor = Ancestor; + PriorOut->Type = Type; + if (MultipleShape *Multiple = Shape::IsMultiple(Ancestor)) { + Multiple->Breaks++; // We are breaking out of this Multiple, so need a loop + } + iter++; // carefully increment iter before erasing + Target->BranchesIn.erase(Prior); + Target->ProcessedBranchesIn.insert(Prior); + Prior->BranchesOut.erase(Target); + Prior->ProcessedBranchesOut[Target] = PriorOut; + PrintDebug(" eliminated branch from %d\n", Prior->Id); + } + } + + Shape *MakeSimple(BlockSet &Blocks, Block *Inner, BlockSet &NextEntries) { + PrintDebug("creating simple block with block #%d\n", Inner->Id); + SimpleShape *Simple = new SimpleShape; + Notice(Simple); + Simple->Inner = Inner; + Inner->Parent = Simple; + if (Blocks.size() > 1) { + Blocks.erase(Inner); + GetBlocksOut(Inner, NextEntries, &Blocks); + BlockSet JustInner; + JustInner.insert(Inner); + for (BlockSet::iterator iter = NextEntries.begin(); iter != NextEntries.end(); iter++) { + Solipsize(*iter, Branch::Direct, Simple, JustInner); + } + } + return Simple; + } + + Shape *MakeEmulated(BlockSet &Blocks, Block *Entry, BlockSet &NextEntries) { + PrintDebug("creating emulated block with entry #%d and everything it can reach, %d blocks\n", Entry->Id, Blocks.size()); + EmulatedShape *Emulated = new EmulatedShape; + Notice(Emulated); + Emulated->Entry = Entry; + for (BlockSet::iterator iter = Blocks.begin(); iter != Blocks.end(); iter++) { + Block *Curr = *iter; + Emulated->Blocks.insert(Curr); + Curr->Parent = Emulated; + Solipsize(Curr, Branch::Continue, Emulated, Blocks); + } + Blocks.clear(); + return Emulated; + } + + Shape *MakeLoop(BlockSet &Blocks, BlockSet& Entries, BlockSet &NextEntries) { + // Find the inner blocks in this loop. Proceed backwards from the entries until + // you reach a seen block, collecting as you go. + BlockSet InnerBlocks; + BlockSet Queue = Entries; + while (Queue.size() > 0) { + Block *Curr = *(Queue.begin()); + Queue.erase(Queue.begin()); + if (!contains(InnerBlocks, Curr)) { + // This element is new, mark it as inner and remove from outer + InnerBlocks.insert(Curr); + Blocks.erase(Curr); + // Add the elements prior to it + for (BlockSet::iterator iter = Curr->BranchesIn.begin(); iter != Curr->BranchesIn.end(); iter++) { + Queue.insert(*iter); + } +#if 0 + // Add elements it leads to, if they are dead ends. There is no reason not to hoist dead ends + // into loops, as it can avoid multiple entries after the loop + for (BlockBranchMap::iterator iter = Curr->BranchesOut.begin(); iter != Curr->BranchesOut.end(); iter++) { + Block *Target = iter->first; + if (Target->BranchesIn.size() <= 1 && Target->BranchesOut.size() == 0) { + Queue.insert(Target); + } + } +#endif + } + } + assert(InnerBlocks.size() > 0); + + for (BlockSet::iterator iter = InnerBlocks.begin(); iter != InnerBlocks.end(); iter++) { + Block *Curr = *iter; + for (BlockBranchMap::iterator iter = Curr->BranchesOut.begin(); iter != Curr->BranchesOut.end(); iter++) { + Block *Possible = iter->first; + if (!contains(InnerBlocks, Possible)) { + NextEntries.insert(Possible); + } + } + } + +#if 0 + // We can avoid multiple next entries by hoisting them into the loop. + if (NextEntries.size() > 1) { + BlockBlockSetMap IndependentGroups; + FindIndependentGroups(NextEntries, IndependentGroups, &InnerBlocks); + + while (IndependentGroups.size() > 0 && NextEntries.size() > 1) { + Block *Min = NULL; + int MinSize = 0; + for (BlockBlockSetMap::iterator iter = IndependentGroups.begin(); iter != IndependentGroups.end(); iter++) { + Block *Entry = iter->first; + BlockSet &Blocks = iter->second; + if (!Min || Blocks.size() < MinSize) { // TODO: code size, not # of blocks + Min = Entry; + MinSize = Blocks.size(); + } + } + // check how many new entries this would cause + BlockSet &Hoisted = IndependentGroups[Min]; + bool abort = false; + for (BlockSet::iterator iter = Hoisted.begin(); iter != Hoisted.end() && !abort; iter++) { + Block *Curr = *iter; + for (BlockBranchMap::iterator iter = Curr->BranchesOut.begin(); iter != Curr->BranchesOut.end(); iter++) { + Block *Target = iter->first; + if (!contains(Hoisted, Target) && !contains(NextEntries, Target)) { + // abort this hoisting + abort = true; + break; + } + } + } + if (abort) { + IndependentGroups.erase(Min); + continue; + } + // hoist this entry + PrintDebug("hoisting %d into loop\n", Min->Id); + NextEntries.erase(Min); + for (BlockSet::iterator iter = Hoisted.begin(); iter != Hoisted.end(); iter++) { + Block *Curr = *iter; + InnerBlocks.insert(Curr); + Blocks.erase(Curr); + } + IndependentGroups.erase(Min); + } + } +#endif + + PrintDebug("creating loop block:\n", 0); + DebugDump(InnerBlocks, " inner blocks:"); + DebugDump(Entries, " inner entries:"); + DebugDump(Blocks, " outer blocks:"); + DebugDump(NextEntries, " outer entries:"); + + LoopShape *Loop = new LoopShape(); + Notice(Loop); + + // Solipsize the loop, replacing with break/continue and marking branches as Processed (will not affect later calculations) + // A. Branches to the loop entries become a continue to this shape + for (BlockSet::iterator iter = Entries.begin(); iter != Entries.end(); iter++) { + Solipsize(*iter, Branch::Continue, Loop, InnerBlocks); + } + // B. Branches to outside the loop (a next entry) become breaks on this shape + for (BlockSet::iterator iter = NextEntries.begin(); iter != NextEntries.end(); iter++) { + Solipsize(*iter, Branch::Break, Loop, InnerBlocks); + } + // Finish up + Shape *Inner = Process(InnerBlocks, Entries, NULL); + Loop->Inner = Inner; + return Loop; + } + + // For each entry, find the independent group reachable by it. The independent group is + // the entry itself, plus all the blocks it can reach that cannot be directly reached by another entry. Note that we + // ignore directly reaching the entry itself by another entry. + // @param Ignore - previous blocks that are irrelevant + void FindIndependentGroups(BlockSet &Entries, BlockBlockSetMap& IndependentGroups, BlockSet *Ignore=NULL) { + typedef std::map BlockBlockMap; + + struct HelperClass { + BlockBlockSetMap& IndependentGroups; + BlockBlockMap Ownership; // For each block, which entry it belongs to. We have reached it from there. + + HelperClass(BlockBlockSetMap& IndependentGroupsInit) : IndependentGroups(IndependentGroupsInit) {} + void InvalidateWithChildren(Block *New) { // TODO: rename New + BlockList ToInvalidate; // Being in the list means you need to be invalidated + ToInvalidate.push_back(New); + while (ToInvalidate.size() > 0) { + Block *Invalidatee = ToInvalidate.front(); + ToInvalidate.pop_front(); + Block *Owner = Ownership[Invalidatee]; + if (contains(IndependentGroups, Owner)) { // Owner may have been invalidated, do not add to IndependentGroups! + IndependentGroups[Owner].erase(Invalidatee); + } + if (Ownership[Invalidatee]) { // may have been seen before and invalidated already + Ownership[Invalidatee] = NULL; + for (BlockBranchMap::iterator iter = Invalidatee->BranchesOut.begin(); iter != Invalidatee->BranchesOut.end(); iter++) { + Block *Target = iter->first; + BlockBlockMap::iterator Known = Ownership.find(Target); + if (Known != Ownership.end()) { + Block *TargetOwner = Known->second; + if (TargetOwner) { + ToInvalidate.push_back(Target); + } + } + } + } + } + } + }; + HelperClass Helper(IndependentGroups); + + // We flow out from each of the entries, simultaneously. + // When we reach a new block, we add it as belonging to the one we got to it from. + // If we reach a new block that is already marked as belonging to someone, it is reachable by + // two entries and is not valid for any of them. Remove it and all it can reach that have been + // visited. + + BlockList Queue; // Being in the queue means we just added this item, and we need to add its children + for (BlockSet::iterator iter = Entries.begin(); iter != Entries.end(); iter++) { + Block *Entry = *iter; + Helper.Ownership[Entry] = Entry; + IndependentGroups[Entry].insert(Entry); + Queue.push_back(Entry); + } + while (Queue.size() > 0) { + Block *Curr = Queue.front(); + Queue.pop_front(); + Block *Owner = Helper.Ownership[Curr]; // Curr must be in the ownership map if we are in the queue + if (!Owner) continue; // we have been invalidated meanwhile after being reached from two entries + // Add all children + for (BlockBranchMap::iterator iter = Curr->BranchesOut.begin(); iter != Curr->BranchesOut.end(); iter++) { + Block *New = iter->first; + BlockBlockMap::iterator Known = Helper.Ownership.find(New); + if (Known == Helper.Ownership.end()) { + // New node. Add it, and put it in the queue + Helper.Ownership[New] = Owner; + IndependentGroups[Owner].insert(New); + Queue.push_back(New); + continue; + } + Block *NewOwner = Known->second; + if (!NewOwner) continue; // We reached an invalidated node + if (NewOwner != Owner) { + // Invalidate this and all reachable that we have seen - we reached this from two locations + Helper.InvalidateWithChildren(New); + } + // otherwise, we have the same owner, so do nothing + } + } + + // Having processed all the interesting blocks, we remain with just one potential issue: + // If a->b, and a was invalidated, but then b was later reached by someone else, we must + // invalidate b. To check for this, we go over all elements in the independent groups, + // if an element has a parent which does *not* have the same owner, we must remove it + // and all its children. + + for (BlockSet::iterator iter = Entries.begin(); iter != Entries.end(); iter++) { + BlockSet &CurrGroup = IndependentGroups[*iter]; + BlockList ToInvalidate; + for (BlockSet::iterator iter = CurrGroup.begin(); iter != CurrGroup.end(); iter++) { + Block *Child = *iter; + for (BlockSet::iterator iter = Child->BranchesIn.begin(); iter != Child->BranchesIn.end(); iter++) { + Block *Parent = *iter; + if (Ignore && contains(*Ignore, Parent)) continue; + if (Helper.Ownership[Parent] != Helper.Ownership[Child]) { + ToInvalidate.push_back(Child); + } + } + } + while (ToInvalidate.size() > 0) { + Block *Invalidatee = ToInvalidate.front(); + ToInvalidate.pop_front(); + Helper.InvalidateWithChildren(Invalidatee); + } + } + + // Remove empty groups + for (BlockSet::iterator iter = Entries.begin(); iter != Entries.end(); iter++) { + if (IndependentGroups[*iter].size() == 0) { + IndependentGroups.erase(*iter); + } + } + +#if DEBUG + PrintDebug("Investigated independent groups:\n"); + for (BlockBlockSetMap::iterator iter = IndependentGroups.begin(); iter != IndependentGroups.end(); iter++) { + DebugDump(iter->second, " group: "); + } +#endif + } + + Shape *MakeMultiple(BlockSet &Blocks, BlockSet& Entries, BlockBlockSetMap& IndependentGroups, Shape *Prev, BlockSet &NextEntries) { + PrintDebug("creating multiple block with %d inner groups\n", IndependentGroups.size()); + bool Fused = !!(Shape::IsSimple(Prev)); + MultipleShape *Multiple = new MultipleShape(); + Notice(Multiple); + BlockSet CurrEntries; + for (BlockBlockSetMap::iterator iter = IndependentGroups.begin(); iter != IndependentGroups.end(); iter++) { + Block *CurrEntry = iter->first; + BlockSet &CurrBlocks = iter->second; + PrintDebug(" multiple group with entry %d:\n", CurrEntry->Id); + DebugDump(CurrBlocks, " "); + // Create inner block + CurrEntries.clear(); + CurrEntries.insert(CurrEntry); + for (BlockSet::iterator iter = CurrBlocks.begin(); iter != CurrBlocks.end(); iter++) { + Block *CurrInner = *iter; + // Remove the block from the remaining blocks + Blocks.erase(CurrInner); + // Find new next entries and fix branches to them + for (BlockBranchMap::iterator iter = CurrInner->BranchesOut.begin(); iter != CurrInner->BranchesOut.end();) { + Block *CurrTarget = iter->first; + BlockBranchMap::iterator Next = iter; + Next++; + if (!contains(CurrBlocks, CurrTarget)) { + NextEntries.insert(CurrTarget); + Solipsize(CurrTarget, Branch::Break, Multiple, CurrBlocks); + } + iter = Next; // increment carefully because Solipsize can remove us + } + } + Multiple->InnerMap[CurrEntry->Id] = Process(CurrBlocks, CurrEntries, NULL); + // If we are not fused, then our entries will actually be checked + if (!Fused) { + CurrEntry->IsCheckedMultipleEntry = true; + } + } + DebugDump(Blocks, " remaining blocks after multiple:"); + // Add entries not handled as next entries, they are deferred + for (BlockSet::iterator iter = Entries.begin(); iter != Entries.end(); iter++) { + Block *Entry = *iter; + if (!contains(IndependentGroups, Entry)) { + NextEntries.insert(Entry); + } + } + // The multiple has been created, we can decide how to implement it + if (Multiple->InnerMap.size() >= 10) { + Multiple->UseSwitch = true; + Multiple->Breaks++; // switch captures breaks + } + return Multiple; + } + + // Main function. + // Process a set of blocks with specified entries, returns a shape + // The Make* functions receive a NextEntries. If they fill it with data, those are the entries for the + // ->Next block on them, and the blocks are what remains in Blocks (which Make* modify). In this way + // we avoid recursing on Next (imagine a long chain of Simples, if we recursed we could blow the stack). + Shape *Process(BlockSet &Blocks, BlockSet& InitialEntries, Shape *Prev) { + PrintDebug("Process() called\n", 0); + BlockSet *Entries = &InitialEntries; + BlockSet TempEntries[2]; + int CurrTempIndex = 0; + BlockSet *NextEntries; + Shape *Ret = NULL; + #define Make(call) \ + Shape *Temp = call; \ + if (Prev) Prev->Next = Temp; \ + if (!Ret) Ret = Temp; \ + if (!NextEntries->size()) { PrintDebug("Process() returning\n", 0); return Ret; } \ + Prev = Temp; \ + Entries = NextEntries; \ + continue; + while (1) { + PrintDebug("Process() running\n", 0); + DebugDump(Blocks, " blocks : "); + DebugDump(*Entries, " entries: "); + + CurrTempIndex = 1-CurrTempIndex; + NextEntries = &TempEntries[CurrTempIndex]; + NextEntries->clear(); + + if (Entries->size() == 0) return Ret; + if (Entries->size() == 1) { + Block *Curr = *(Entries->begin()); + if (Parent->Emulate) { + Make(MakeEmulated(Blocks, Curr, *NextEntries)); + } + if (Curr->BranchesIn.size() == 0) { + // One entry, no looping ==> Simple + Make(MakeSimple(Blocks, Curr, *NextEntries)); + } + // One entry, looping ==> Loop + Make(MakeLoop(Blocks, *Entries, *NextEntries)); + } + + // More than one entry, try to eliminate through a Multiple groups of + // independent blocks from an entry/ies. It is important to remove through + // multiples as opposed to looping since the former is more performant. + BlockBlockSetMap IndependentGroups; + FindIndependentGroups(*Entries, IndependentGroups); + + PrintDebug("Independent groups: %d\n", IndependentGroups.size()); + + if (IndependentGroups.size() > 0) { + // We can handle a group in a multiple if its entry cannot be reached by another group. + // Note that it might be reachable by itself - a loop. But that is fine, we will create + // a loop inside the multiple block (which is the performant order to do it). + for (BlockBlockSetMap::iterator iter = IndependentGroups.begin(); iter != IndependentGroups.end();) { + Block *Entry = iter->first; + BlockSet &Group = iter->second; + BlockBlockSetMap::iterator curr = iter++; // iterate carefully, we may delete + for (BlockSet::iterator iterBranch = Entry->BranchesIn.begin(); iterBranch != Entry->BranchesIn.end(); iterBranch++) { + Block *Origin = *iterBranch; + if (!contains(Group, Origin)) { + // Reached from outside the group, so we cannot handle this + PrintDebug("Cannot handle group with entry %d because of incoming branch from %d\n", Entry->Id, Origin->Id); + IndependentGroups.erase(curr); + break; + } + } + } + + // As an optimization, if we have 2 independent groups, and one is a small dead end, we can handle only that dead end. + // The other then becomes a Next - without nesting in the code and recursion in the analysis. + // TODO: if the larger is the only dead end, handle that too + // TODO: handle >2 groups + // TODO: handle not just dead ends, but also that do not branch to the NextEntries. However, must be careful + // there since we create a Next, and that Next can prevent eliminating a break (since we no longer + // naturally reach the same place), which may necessitate a one-time loop, which makes the unnesting + // pointless. + if (IndependentGroups.size() == 2) { + // Find the smaller one + BlockBlockSetMap::iterator iter = IndependentGroups.begin(); + Block *SmallEntry = iter->first; + int SmallSize = iter->second.size(); + iter++; + Block *LargeEntry = iter->first; + int LargeSize = iter->second.size(); + if (SmallSize != LargeSize) { // ignore the case where they are identical - keep things symmetrical there + if (SmallSize > LargeSize) { + Block *Temp = SmallEntry; + SmallEntry = LargeEntry; + LargeEntry = Temp; // Note: we did not flip the Sizes too, they are now invalid. TODO: use the smaller size as a limit? + } + // Check if dead end + bool DeadEnd = true; + BlockSet &SmallGroup = IndependentGroups[SmallEntry]; + for (BlockSet::iterator iter = SmallGroup.begin(); iter != SmallGroup.end(); iter++) { + Block *Curr = *iter; + for (BlockBranchMap::iterator iter = Curr->BranchesOut.begin(); iter != Curr->BranchesOut.end(); iter++) { + Block *Target = iter->first; + if (!contains(SmallGroup, Target)) { + DeadEnd = false; + break; + } + } + if (!DeadEnd) break; + } + if (DeadEnd) { + PrintDebug("Removing nesting by not handling large group because small group is dead end\n", 0); + IndependentGroups.erase(LargeEntry); + } + } + } + + PrintDebug("Handleable independent groups: %d\n", IndependentGroups.size()); + + if (IndependentGroups.size() > 0) { + // Some groups removable ==> Multiple + Make(MakeMultiple(Blocks, *Entries, IndependentGroups, Prev, *NextEntries)); + } + } + // No independent groups, must be loopable ==> Loop + Make(MakeLoop(Blocks, *Entries, *NextEntries)); + } + } + }; + + // Main + + BlockSet AllBlocks; + for (BlockSet::iterator iter = Pre.Live.begin(); iter != Pre.Live.end(); iter++) { + Block *Curr = *iter; + AllBlocks.insert(Curr); +#if DEBUG + PrintDebug("Adding block %d (%s)\n", Curr->Id, Curr->Code); +#endif + } + + BlockSet Entries; + Entries.insert(Entry); + Root = Analyzer(this).Process(AllBlocks, Entries, NULL); + assert(Root); + + // Post optimizations + + struct PostOptimizer { + Relooper *Parent; + std::stack *Closure; + + PostOptimizer(Relooper *ParentInit) : Parent(ParentInit), Closure(NULL) {} + + #define RECURSE_Multiple(shape, func) \ + for (IdShapeMap::iterator iter = shape->InnerMap.begin(); iter != shape->InnerMap.end(); iter++) { \ + func(iter->second); \ + } + #define RECURSE_Loop(shape, func) \ + func(shape->Inner); + #define RECURSE(shape, func) RECURSE_##shape(shape, func); + + #define SHAPE_SWITCH(var, simple, multiple, loop) \ + if (SimpleShape *Simple = Shape::IsSimple(var)) { \ + (void)Simple; \ + simple; \ + } else if (MultipleShape *Multiple = Shape::IsMultiple(var)) { \ + (void)Multiple; \ + multiple; \ + } else if (LoopShape *Loop = Shape::IsLoop(var)) { \ + (void)Loop; \ + loop; \ + } + + // Find the blocks that natural control flow can get us directly to, or through a multiple that we ignore + void FollowNaturalFlow(Shape *S, BlockSet &Out) { + SHAPE_SWITCH(S, { + Out.insert(Simple->Inner); + }, { + for (IdShapeMap::iterator iter = Multiple->InnerMap.begin(); iter != Multiple->InnerMap.end(); iter++) { + FollowNaturalFlow(iter->second, Out); + } + FollowNaturalFlow(Multiple->Next, Out); + }, { + FollowNaturalFlow(Loop->Inner, Out); + }); + } + + void FindNaturals(Shape *Root, Shape *Otherwise=NULL) { + if (Root->Next) { + Root->Natural = Root->Next; + FindNaturals(Root->Next, Otherwise); + } else { + Root->Natural = Otherwise; + } + + SHAPE_SWITCH(Root, { + }, { + for (IdShapeMap::iterator iter = Multiple->InnerMap.begin(); iter != Multiple->InnerMap.end(); iter++) { + FindNaturals(iter->second, Root->Natural); + } + }, { + FindNaturals(Loop->Inner, Loop->Inner); + }); + } + + // Remove unneeded breaks and continues. + // A flow operation is trivially unneeded if the shape we naturally get to by normal code + // execution is the same as the flow forces us to. + void RemoveUnneededFlows(Shape *Root, Shape *Natural=NULL, LoopShape *LastLoop=NULL, unsigned Depth=0) { + BlockSet NaturalBlocks; + FollowNaturalFlow(Natural, NaturalBlocks); + Shape *Next = Root; + while (Next) { + Root = Next; + Next = NULL; + SHAPE_SWITCH(Root, { + if (Simple->Inner->BranchVar) LastLoop = NULL; // a switch clears out the loop (TODO: only for breaks, not continue) + + if (Simple->Next) { + if (!Simple->Inner->BranchVar && Simple->Inner->ProcessedBranchesOut.size() == 2 && Depth < 20) { + // If there is a next block, we already know at Simple creation time to make direct branches, + // and we can do nothing more in general. But, we try to optimize the case of a break and + // a direct: This would normally be if (break?) { break; } .. but if we + // make sure to nest the else, we can save the break, if (!break?) { .. } . This is also + // better because the more canonical nested form is easier to further optimize later. The + // downside is more nesting, which adds to size in builds with whitespace. + // Note that we avoid switches, as it complicates control flow and is not relevant + // for the common case we optimize here. + bool Found = false; + bool Abort = false; + for (BlockBranchMap::iterator iter = Simple->Inner->ProcessedBranchesOut.begin(); iter != Simple->Inner->ProcessedBranchesOut.end(); iter++) { + Block *Target = iter->first; + Branch *Details = iter->second; + if (Details->Type == Branch::Break) { + Found = true; + if (!contains(NaturalBlocks, Target)) Abort = true; + } else if (Details->Type != Branch::Direct) { + Abort = true; + } + } + if (Found && !Abort) { + for (BlockBranchMap::iterator iter = Simple->Inner->ProcessedBranchesOut.begin(); iter != Simple->Inner->ProcessedBranchesOut.end(); iter++) { + Branch *Details = iter->second; + if (Details->Type == Branch::Break) { + Details->Type = Branch::Direct; + if (MultipleShape *Multiple = Shape::IsMultiple(Details->Ancestor)) { + Multiple->Breaks--; + } + } else { + assert(Details->Type == Branch::Direct); + Details->Type = Branch::Nested; + } + } + } + Depth++; // this optimization increases depth, for us and all our next chain (i.e., until this call returns) + } + Next = Simple->Next; + } else { + // If there is no next then Natural is where we will + // go to by doing nothing, so we can potentially optimize some branches to direct. + for (BlockBranchMap::iterator iter = Simple->Inner->ProcessedBranchesOut.begin(); iter != Simple->Inner->ProcessedBranchesOut.end(); iter++) { + Block *Target = iter->first; + Branch *Details = iter->second; + if (Details->Type != Branch::Direct && contains(NaturalBlocks, Target)) { // note: cannot handle split blocks + Details->Type = Branch::Direct; + if (MultipleShape *Multiple = Shape::IsMultiple(Details->Ancestor)) { + Multiple->Breaks--; + } + } else if (Details->Type == Branch::Break && LastLoop && LastLoop->Natural == Details->Ancestor->Natural) { + // it is important to simplify breaks, as simpler breaks enable other optimizations + Details->Labeled = false; + if (MultipleShape *Multiple = Shape::IsMultiple(Details->Ancestor)) { + Multiple->Breaks--; + } + } + } + } + }, { + for (IdShapeMap::iterator iter = Multiple->InnerMap.begin(); iter != Multiple->InnerMap.end(); iter++) { + RemoveUnneededFlows(iter->second, Multiple->Next, Multiple->Breaks ? NULL : LastLoop, Depth+1); + } + Next = Multiple->Next; + }, { + RemoveUnneededFlows(Loop->Inner, Loop->Inner, Loop, Depth+1); + Next = Loop->Next; + }); + } + } + + // After we know which loops exist, we can calculate which need to be labeled + void FindLabeledLoops(Shape *Root) { + bool First = Closure == NULL; + if (First) { + Closure = new std::stack; + } + std::stack &LoopStack = *Closure; + + Shape *Next = Root; + while (Next) { + Root = Next; + Next = NULL; + + SHAPE_SWITCH(Root, { + MultipleShape *Fused = Shape::IsMultiple(Root->Next); + // If we are fusing a Multiple with a loop into this Simple, then visit it now + if (Fused && Fused->Breaks) { + LoopStack.push(Fused); + } + if (Simple->Inner->BranchVar) { + LoopStack.push(NULL); // a switch means breaks are now useless, push a dummy + } + if (Fused) { + if (Fused->UseSwitch) { + LoopStack.push(NULL); // a switch means breaks are now useless, push a dummy + } + RECURSE_Multiple(Fused, FindLabeledLoops); + } + for (BlockBranchMap::iterator iter = Simple->Inner->ProcessedBranchesOut.begin(); iter != Simple->Inner->ProcessedBranchesOut.end(); iter++) { + Branch *Details = iter->second; + if (Details->Type == Branch::Break || Details->Type == Branch::Continue) { + assert(LoopStack.size() > 0); + if (Details->Ancestor != LoopStack.top() && Details->Labeled) { + LabeledShape *Labeled = Shape::IsLabeled(Details->Ancestor); + Labeled->Labeled = true; + } else { + Details->Labeled = false; + } + } + } + if (Fused && Fused->UseSwitch) { + LoopStack.pop(); + } + if (Simple->Inner->BranchVar) { + LoopStack.pop(); + } + if (Fused && Fused->Breaks) { + LoopStack.pop(); + } + if (Fused) { + Next = Fused->Next; + } else { + Next = Root->Next; + } + }, { + if (Multiple->Breaks) { + LoopStack.push(Multiple); + } + RECURSE(Multiple, FindLabeledLoops); + if (Multiple->Breaks) { + LoopStack.pop(); + } + Next = Root->Next; + }, { + LoopStack.push(Loop); + RECURSE(Loop, FindLabeledLoops); + LoopStack.pop(); + Next = Root->Next; + }); + } + + if (First) { + delete Closure; + } + } + + void Process(Shape *Root) { + FindNaturals(Root); + RemoveUnneededFlows(Root); + FindLabeledLoops(Root); + } + }; + + PrintDebug("=== Optimizing shapes ===\n", 0); + + PostOptimizer(this).Process(Root); +} + +void Relooper::Render() { + OutputBuffer = OutputBufferRoot; + assert(Root); + Root->Render(false); +} + +void Relooper::SetOutputBuffer(char *Buffer, int Size) { + OutputBufferRoot = OutputBuffer = Buffer; + OutputBufferSize = Size; + OutputBufferOwned = false; +} + +void Relooper::MakeOutputBuffer(int Size) { + if (OutputBufferRoot && OutputBufferSize >= Size && OutputBufferOwned) return; + OutputBufferRoot = OutputBuffer = (char*)malloc(Size); + OutputBufferSize = Size; + OutputBufferOwned = true; +} + +char *Relooper::GetOutputBuffer() { + return OutputBufferRoot; +} + +void Relooper::SetAsmJSMode(int On) { + AsmJS = On; +} + +#if DEBUG +// Debugging + +void Debugging::Dump(BlockSet &Blocks, const char *prefix) { + if (prefix) printf("%s ", prefix); + for (BlockSet::iterator iter = Blocks.begin(); iter != Blocks.end(); iter++) { + Block *Curr = *iter; + printf("%d:\n", Curr->Id); + for (BlockBranchMap::iterator iter2 = Curr->BranchesOut.begin(); iter2 != Curr->BranchesOut.end(); iter2++) { + Block *Other = iter2->first; + printf(" -> %d\n", Other->Id); + assert(contains(Other->BranchesIn, Curr)); + } + } +} + +void Debugging::Dump(Shape *S, const char *prefix) { + if (prefix) printf("%s ", prefix); + if (!S) { + printf(" (null)\n"); + return; + } + printf(" %d ", S->Id); + SHAPE_SWITCH(S, { + printf("<< Simple with block %d\n", Simple->Inner->Id); + }, { + printf("<< Multiple\n"); + for (IdShapeMap::iterator iter = Multiple->InnerMap.begin(); iter != Multiple->InnerMap.end(); iter++) { + printf(" with entry %d\n", iter->first); + } + }, { + printf("<< Loop\n"); + }); +} + +static void PrintDebug(const char *Format, ...) { + printf("// "); + va_list Args; + va_start(Args, Format); + vprintf(Format, Args); + va_end(Args); +} +#endif + +// C API - useful for binding to other languages + +typedef std::map VoidIntMap; +VoidIntMap __blockDebugMap__; // maps block pointers in currently running code to block ids, for generated debug output + +extern "C" { + +RELOOPERDLL_API void rl_set_output_buffer(char *buffer, int size) { +#if DEBUG + printf("#include \"Relooper.h\"\n"); + printf("int main() {\n"); + printf(" char buffer[100000];\n"); + printf(" rl_set_output_buffer(buffer);\n"); +#endif + Relooper::SetOutputBuffer(buffer, size); +} + +RELOOPERDLL_API void rl_make_output_buffer(int size) { + Relooper::SetOutputBuffer((char*)malloc(size), size); +} + +RELOOPERDLL_API void rl_set_asm_js_mode(int on) { + Relooper::SetAsmJSMode(on); +} + +RELOOPERDLL_API void *rl_new_block(const char *text, const char *branch_var) { + Block *ret = new Block(text, branch_var); +#if DEBUG + printf(" void *b%d = rl_new_block(\"// code %d\");\n", ret->Id, ret->Id); + __blockDebugMap__[ret] = ret->Id; + printf(" block_map[%d] = b%d;\n", ret->Id, ret->Id); +#endif + return ret; +} + +RELOOPERDLL_API void rl_delete_block(void *block) { +#if DEBUG + printf(" rl_delete_block(block_map[%d]);\n", ((Block*)block)->Id); +#endif + delete (Block*)block; +} + +RELOOPERDLL_API void rl_block_add_branch_to(void *from, void *to, const char *condition, const char *code) { +#if DEBUG + printf(" rl_block_add_branch_to(block_map[%d], block_map[%d], %s%s%s, %s%s%s);\n", ((Block*)from)->Id, ((Block*)to)->Id, condition ? "\"" : "", condition ? condition : "NULL", condition ? "\"" : "", code ? "\"" : "", code ? code : "NULL", code ? "\"" : ""); +#endif + ((Block*)from)->AddBranchTo((Block*)to, condition, code); +} + +RELOOPERDLL_API void *rl_new_relooper() { +#if DEBUG + printf(" void *block_map[10000];\n"); + printf(" void *rl = rl_new_relooper();\n"); +#endif + return new Relooper; +} + +RELOOPERDLL_API void rl_delete_relooper(void *relooper) { + delete (Relooper*)relooper; +} + +RELOOPERDLL_API void rl_relooper_add_block(void *relooper, void *block) { +#if DEBUG + printf(" rl_relooper_add_block(rl, block_map[%d]);\n", ((Block*)block)->Id); +#endif + ((Relooper*)relooper)->AddBlock((Block*)block); +} + +RELOOPERDLL_API void rl_relooper_calculate(void *relooper, void *entry) { +#if DEBUG + printf(" rl_relooper_calculate(rl, block_map[%d]);\n", ((Block*)entry)->Id); + printf(" rl_relooper_render(rl);\n"); + printf(" rl_delete_relooper(rl);\n"); + printf(" puts(buffer);\n"); + printf(" return 0;\n"); + printf("}\n"); +#endif + ((Relooper*)relooper)->Calculate((Block*)entry); +} + +RELOOPERDLL_API void rl_relooper_render(void *relooper) { + ((Relooper*)relooper)->Render(); +} + +} diff --git a/lib/Target/JSBackend/Relooper.h b/lib/Target/JSBackend/Relooper.h new file mode 100644 index 000000000000..776095e4c26a --- /dev/null +++ b/lib/Target/JSBackend/Relooper.h @@ -0,0 +1,376 @@ +/* +This is an optimized C++ implemention of the Relooper algorithm originally +developed as part of Emscripten. This implementation includes optimizations +added since the original academic paper [1] was published about it, and is +written in an LLVM-friendly way with the goal of inclusion in upstream +LLVM. + +[1] Alon Zakai. 2011. Emscripten: an LLVM-to-JavaScript compiler. In Proceedings of the ACM international conference companion on Object oriented programming systems languages and applications companion (SPLASH '11). ACM, New York, NY, USA, 301-312. DOI=10.1145/2048147.2048224 http://doi.acm.org/10.1145/2048147.2048224 +*/ + +#include +#include +#include +#include + +#ifdef __cplusplus + +#include +#include +#include +#include + +struct Block; +struct Shape; + +// Info about a branching from one block to another +struct Branch { + enum FlowType { + Direct = 0, // We will directly reach the right location through other means, no need for continue or break + Break = 1, + Continue = 2, + Nested = 3 // This code is directly reached, but we must be careful to ensure it is nested in an if - it is not reached + // unconditionally, other code paths exist alongside it that we need to make sure do not intertwine + }; + Shape *Ancestor; // If not NULL, this shape is the relevant one for purposes of getting to the target block. We break or continue on it + Branch::FlowType Type; // If Ancestor is not NULL, this says whether to break or continue + bool Labeled; // If a break or continue, whether we need to use a label + const char *Condition; // The condition for which we branch. For example, "my_var == 1". Conditions are checked one by one. One of the conditions should have NULL as the condition, in which case it is the default + const char *Code; // If provided, code that is run right before the branch is taken. This is useful for phis + + Branch(const char *ConditionInit, const char *CodeInit=NULL); + ~Branch(); + + // Prints out the branch + void Render(Block *Target, bool SetLabel); +}; + +// like std::set, except that begin() -> end() iterates in the +// order that elements were added to the set (not in the order +// of operator<(T, T)) +template +struct InsertOrderedSet +{ + std::map::iterator> Map; + std::list List; + + typedef typename std::list::iterator iterator; + iterator begin() { return List.begin(); } + iterator end() { return List.end(); } + + void erase(const T& val) { + auto it = Map.find(val); + if (it != Map.end()) { + List.erase(it->second); + Map.erase(it); + } + } + + void erase(iterator position) { + Map.erase(*position); + List.erase(position); + } + + // cheating a bit, not returning the iterator + void insert(const T& val) { + auto it = Map.find(val); + if (it == Map.end()) { + List.push_back(val); + Map.insert(std::make_pair(val, --List.end())); + } + } + + size_t size() const { return Map.size(); } + + void clear() { + Map.clear(); + List.clear(); + } + + size_t count(const T& val) const { return Map.count(val); } + + InsertOrderedSet() {} + InsertOrderedSet(const InsertOrderedSet& other) { + for (auto i : other.List) { + insert(i); // inserting manually creates proper iterators + } + } + InsertOrderedSet& operator=(const InsertOrderedSet& other) { + abort(); // TODO, watch out for iterators + } +}; + +// like std::map, except that begin() -> end() iterates in the +// order that elements were added to the map (not in the order +// of operator<(Key, Key)) +template +struct InsertOrderedMap +{ + std::map>::iterator> Map; + std::list> List; + + T& operator[](const Key& k) { + auto it = Map.find(k); + if (it == Map.end()) { + List.push_back(std::make_pair(k, T())); + auto e = --List.end(); + Map.insert(std::make_pair(k, e)); + return e->second; + } + return it->second->second; + } + + typedef typename std::list>::iterator iterator; + iterator begin() { return List.begin(); } + iterator end() { return List.end(); } + + void erase(const Key& k) { + auto it = Map.find(k); + if (it != Map.end()) { + List.erase(it->second); + Map.erase(it); + } + } + + void erase(iterator position) { + erase(position->first); + } + + size_t size() const { return Map.size(); } + size_t count(const Key& k) const { return Map.count(k); } + + InsertOrderedMap() {} + InsertOrderedMap(InsertOrderedMap& other) { + abort(); // TODO, watch out for iterators + } + InsertOrderedMap& operator=(const InsertOrderedMap& other) { + abort(); // TODO, watch out for iterators + } +}; + + +typedef InsertOrderedSet BlockSet; +typedef InsertOrderedMap BlockBranchMap; + +// Represents a basic block of code - some instructions that end with a +// control flow modifier (a branch, return or throw). +struct Block { + // Branches become processed after we finish the shape relevant to them. For example, + // when we recreate a loop, branches to the loop start become continues and are now + // processed. When we calculate what shape to generate from a set of blocks, we ignore + // processed branches. + // Blocks own the Branch objects they use, and destroy them when done. + BlockBranchMap BranchesOut; + BlockSet BranchesIn; + BlockBranchMap ProcessedBranchesOut; + BlockSet ProcessedBranchesIn; + Shape *Parent; // The shape we are directly inside + int Id; // A unique identifier, defined when added to relooper. Note that this uniquely identifies a *logical* block - if we split it, the two instances have the same content *and* the same Id + const char *Code; // The string representation of the code in this block. Owning pointer (we copy the input) + const char *BranchVar; // A variable whose value determines where we go; if this is not NULL, emit a switch on that variable + bool IsCheckedMultipleEntry; // If true, we are a multiple entry, so reaching us requires setting the label variable + + Block(const char *CodeInit, const char *BranchVarInit); + ~Block(); + + void AddBranchTo(Block *Target, const char *Condition, const char *Code=NULL); + + // Prints out the instructions code and branchings + void Render(bool InLoop); +}; + +// Represents a structured control flow shape, one of +// +// Simple: No control flow at all, just instructions. If several +// blocks, then +// +// Multiple: A shape with more than one entry. If the next block to +// be entered is among them, we run it and continue to +// the next shape, otherwise we continue immediately to the +// next shape. +// +// Loop: An infinite loop. +// +// Emulated: Control flow is managed by a switch in a loop. This +// is necessary in some cases, for example when control +// flow is not known until runtime (indirect branches, +// setjmp returns, etc.) +// + +struct SimpleShape; +struct LabeledShape; +struct MultipleShape; +struct LoopShape; +struct EmulatedShape; + +struct Shape { + int Id; // A unique identifier. Used to identify loops, labels are Lx where x is the Id. Defined when added to relooper + Shape *Next; // The shape that will appear in the code right after this one + Shape *Natural; // The shape that control flow gets to naturally (if there is Next, then this is Next) + + enum ShapeType { + Simple, + Multiple, + Loop, + Emulated + }; + ShapeType Type; + + Shape(ShapeType TypeInit) : Id(-1), Next(NULL), Type(TypeInit) {} + virtual ~Shape() {} + + virtual void Render(bool InLoop) = 0; + + static SimpleShape *IsSimple(Shape *It) { return It && It->Type == Simple ? (SimpleShape*)It : NULL; } + static MultipleShape *IsMultiple(Shape *It) { return It && It->Type == Multiple ? (MultipleShape*)It : NULL; } + static LoopShape *IsLoop(Shape *It) { return It && It->Type == Loop ? (LoopShape*)It : NULL; } + static LabeledShape *IsLabeled(Shape *It) { return IsMultiple(It) || IsLoop(It) ? (LabeledShape*)It : NULL; } + static EmulatedShape *IsEmulated(Shape *It) { return It && It->Type == Emulated ? (EmulatedShape*)It : NULL; } +}; + +struct SimpleShape : public Shape { + Block *Inner; + + SimpleShape() : Shape(Simple), Inner(NULL) {} + void Render(bool InLoop) override { + Inner->Render(InLoop); + if (Next) Next->Render(InLoop); + } +}; + +// A shape that may be implemented with a labeled loop. +struct LabeledShape : public Shape { + bool Labeled; // If we have a loop, whether it needs to be labeled + + LabeledShape(ShapeType TypeInit) : Shape(TypeInit), Labeled(false) {} +}; + +// Blocks with the same id were split and are identical, so we just care about ids in Multiple entries +typedef std::map IdShapeMap; + +struct MultipleShape : public LabeledShape { + IdShapeMap InnerMap; // entry block ID -> shape + int Breaks; // If we have branches on us, we need a loop (or a switch). This is a counter of requirements, + // if we optimize it to 0, the loop is unneeded + bool UseSwitch; // Whether to switch on label as opposed to an if-else chain + + MultipleShape() : LabeledShape(Multiple), Breaks(0), UseSwitch(false) {} + + void RenderLoopPrefix(); + void RenderLoopPostfix(); + + void Render(bool InLoop) override; +}; + +struct LoopShape : public LabeledShape { + Shape *Inner; + + LoopShape() : LabeledShape(Loop), Inner(NULL) {} + void Render(bool InLoop) override; +}; + +// TODO EmulatedShape is only partially functional. Currently it can be used for the +// entire set of blocks being relooped, but not subsets. +struct EmulatedShape : public LabeledShape { + Block *Entry; + BlockSet Blocks; + + EmulatedShape() : LabeledShape(Emulated) { Labeled = true; } + void Render(bool InLoop) override; +}; + +// Implements the relooper algorithm for a function's blocks. +// +// Usage: +// 1. Instantiate this struct. +// 2. Call AddBlock with the blocks you have. Each should already +// have its branchings in specified (the branchings out will +// be calculated by the relooper). +// 3. Call Render(). +// +// Implementation details: The Relooper instance has +// ownership of the blocks and shapes, and frees them when done. +struct Relooper { + std::deque Blocks; + std::deque Shapes; + Shape *Root; + bool Emulate; + bool MinSize; + int BlockIdCounter; + int ShapeIdCounter; + + Relooper(); + ~Relooper(); + + void AddBlock(Block *New, int Id=-1); + + // Calculates the shapes + void Calculate(Block *Entry); + + // Renders the result. + void Render(); + + // Sets the global buffer all printing goes to. Must call this or MakeOutputBuffer. + // XXX: this is deprecated, see MakeOutputBuffer + static void SetOutputBuffer(char *Buffer, int Size); + + // Creates an internal output buffer. Must call this or SetOutputBuffer. Size is + // a hint for the initial size of the buffer, it can be resized later one demand. + // For that reason this is more recommended than SetOutputBuffer. + static void MakeOutputBuffer(int Size); + + static char *GetOutputBuffer(); + + // Sets asm.js mode on or off (default is off) + static void SetAsmJSMode(int On); + + // Sets whether we must emulate everything with switch-loop code + void SetEmulate(int E) { Emulate = E; } + + // Sets us to try to minimize size + void SetMinSize(bool MinSize_) { MinSize = MinSize_; } +}; + +typedef InsertOrderedMap BlockBlockSetMap; + +#if DEBUG +struct Debugging { + static void Dump(BlockSet &Blocks, const char *prefix=NULL); + static void Dump(Shape *S, const char *prefix=NULL); +}; +#endif + +#endif // __cplusplus + +// C API - useful for binding to other languages + +#ifdef _WIN32 + #ifdef RELOOPERDLL_EXPORTS + #define RELOOPERDLL_API __declspec(dllexport) + #else + #define RELOOPERDLL_API __declspec(dllimport) + #endif +#else + #define RELOOPERDLL_API +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +RELOOPERDLL_API void rl_set_output_buffer(char *buffer, int size); +RELOOPERDLL_API void rl_make_output_buffer(int size); +RELOOPERDLL_API void rl_set_asm_js_mode(int on); +RELOOPERDLL_API void *rl_new_block(const char *text, const char *branch_var); +RELOOPERDLL_API void rl_delete_block(void *block); +RELOOPERDLL_API void rl_block_add_branch_to(void *from, void *to, const char *condition, const char *code); +RELOOPERDLL_API void *rl_new_relooper(); +RELOOPERDLL_API void rl_delete_relooper(void *relooper); +RELOOPERDLL_API void rl_relooper_add_block(void *relooper, void *block); +RELOOPERDLL_API void rl_relooper_calculate(void *relooper, void *entry); +RELOOPERDLL_API void rl_relooper_render(void *relooper); + +#ifdef __cplusplus +} +#endif + diff --git a/lib/Target/JSBackend/RemoveLLVMAssume.cpp b/lib/Target/JSBackend/RemoveLLVMAssume.cpp new file mode 100644 index 000000000000..e4af79747b91 --- /dev/null +++ b/lib/Target/JSBackend/RemoveLLVMAssume.cpp @@ -0,0 +1,64 @@ +//===-- RemoveLLVMAssume.cpp ----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===-----------------------------------------------------------------------===// +// +//===-----------------------------------------------------------------------===// + +#include "OptPasses.h" + +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Module.h" +#include "llvm/Transforms/Utils/Local.h" + +namespace llvm { + +// Remove all uses of llvm.assume; we don't need them anymore +struct RemoveLLVMAssume : public ModulePass { + static char ID; // Pass identification, replacement for typeid + RemoveLLVMAssume() : ModulePass(ID) {} + // XXX initialize..(*PassRegistry::getPassRegistry()); } + + bool runOnModule(Module &M) override; + + const char *getPassName() const override { return "RemoveLLVMAssume"; } +}; + +char RemoveLLVMAssume::ID = 0; + +bool RemoveLLVMAssume::runOnModule(Module &M) { + bool Changed = false; + + Function *LLVMAssume = M.getFunction("llvm.assume"); + + if (LLVMAssume) { + SmallVector Assumes; + for (Instruction::user_iterator UI = LLVMAssume->user_begin(), UE = LLVMAssume->user_end(); UI != UE; ++UI) { + User *U = *UI; + if (CallInst *CI = dyn_cast(U)) { + Assumes.push_back(CI); + } + } + + for (auto CI : Assumes) { + Value *V = CI->getOperand(0); + CI->eraseFromParent(); + RecursivelyDeleteTriviallyDeadInstructions(V); // the single operand is likely dead + } + } + return Changed; +} + +// + +extern ModulePass *createEmscriptenRemoveLLVMAssumePass() { + return new RemoveLLVMAssume(); +} + +} // End llvm namespace diff --git a/lib/Target/JSBackend/SimplifyAllocas.cpp b/lib/Target/JSBackend/SimplifyAllocas.cpp new file mode 100644 index 000000000000..e9091c8ae15e --- /dev/null +++ b/lib/Target/JSBackend/SimplifyAllocas.cpp @@ -0,0 +1,107 @@ +//===-- SimplifyAllocas.cpp - Alloca optimization ---------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===-----------------------------------------------------------------------===// +// +// There shouldn't be any opportunities for this pass to do anything if the +// regular LLVM optimizer passes are run. However, it does make things nicer +// at -O0. +// +//===-----------------------------------------------------------------------===// + +#include "OptPasses.h" + +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Constants.h" + +namespace llvm { + +/* + * Find cases where an alloca is used only to load and store a single value, + * even though it is bitcast. Then replace it with a direct alloca of that + * simple type, and avoid the bitcasts. + */ + +struct SimplifyAllocas : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + SimplifyAllocas() : FunctionPass(ID) {} + // XXX initialize..(*PassRegistry::getPassRegistry()); } + + bool runOnFunction(Function &Func) override; + + const char *getPassName() const override { return "SimplifyAllocas"; } +}; + +char SimplifyAllocas::ID = 0; + +bool SimplifyAllocas::runOnFunction(Function &Func) { + bool Changed = false; + Type *i32 = Type::getInt32Ty(Func.getContext()); + std::vector ToRemove; // removing can invalidate our iterators, so do it all at the end + for (Function::iterator B = Func.begin(), E = Func.end(); B != E; ++B) { + for (BasicBlock::iterator BI = B->begin(), BE = B->end(); BI != BE; ) { + Instruction *I = &*BI++; + AllocaInst *AI = dyn_cast(I); + if (!AI) continue; + if (!isa(AI->getArraySize())) continue; + bool Fail = false; + Type *ActualType = NULL; + #define CHECK_TYPE(TT) { \ + Type *T = TT; \ + if (!ActualType) { \ + ActualType = T; \ + } else { \ + if (T != ActualType) Fail = true; \ + } \ + } + std::vector Aliases; // the bitcasts of this alloca + for (Instruction::user_iterator UI = AI->user_begin(), UE = AI->user_end(); UI != UE && !Fail; ++UI) { + Instruction *U = cast(*UI); + if (U->getOpcode() != Instruction::BitCast) { Fail = true; break; } + // bitcasting just to do loads and stores is ok + for (Instruction::user_iterator BUI = U->user_begin(), BUE = U->user_end(); BUI != BUE && !Fail; ++BUI) { + Instruction *BU = cast(*BUI); + if (BU->getOpcode() == Instruction::Load) { + CHECK_TYPE(BU->getType()); + break; + } + if (BU->getOpcode() != Instruction::Store) { Fail = true; break; } + CHECK_TYPE(BU->getOperand(0)->getType()); + if (BU->getOperand(0) == U) { Fail = true; break; } + } + if (!Fail) Aliases.push_back(U); + } + if (!Fail && Aliases.size() > 0 && ActualType) { + // success, replace the alloca and the bitcast aliases with a single simple alloca + AllocaInst *NA = new AllocaInst(ActualType, ConstantInt::get(i32, 1), "", I); + NA->takeName(AI); + NA->setAlignment(AI->getAlignment()); + NA->setDebugLoc(AI->getDebugLoc()); + for (unsigned i = 0; i < Aliases.size(); i++) { + Aliases[i]->replaceAllUsesWith(NA); + ToRemove.push_back(Aliases[i]); + } + ToRemove.push_back(AI); + Changed = true; + } + } + } + for (unsigned i = 0; i < ToRemove.size(); i++) { + ToRemove[i]->eraseFromParent(); + } + return Changed; +} + +// + +extern FunctionPass *createEmscriptenSimplifyAllocasPass() { + return new SimplifyAllocas(); +} + +} // End llvm namespace diff --git a/lib/Target/JSBackend/TargetInfo/CMakeLists.txt b/lib/Target/JSBackend/TargetInfo/CMakeLists.txt new file mode 100644 index 000000000000..29994eb8f95e --- /dev/null +++ b/lib/Target/JSBackend/TargetInfo/CMakeLists.txt @@ -0,0 +1,5 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMJSBackendInfo + JSBackendTargetInfo.cpp + ) diff --git a/lib/Target/JSBackend/TargetInfo/JSBackendTargetInfo.cpp b/lib/Target/JSBackend/TargetInfo/JSBackendTargetInfo.cpp new file mode 100644 index 000000000000..cdf9752a07e6 --- /dev/null +++ b/lib/Target/JSBackend/TargetInfo/JSBackendTargetInfo.cpp @@ -0,0 +1,20 @@ +//===-- JSBackendTargetInfo.cpp - JSBackend Target Implementation -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===--------------------------------------------------------------------===// + +#include "JSTargetMachine.h" +#include "MCTargetDesc/JSBackendMCTargetDesc.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/TargetRegistry.h" +using namespace llvm; + +Target llvm::TheJSBackendTarget; + +extern "C" void LLVMInitializeJSBackendTargetInfo() { + RegisterTarget X(TheJSBackendTarget, "js", "JavaScript (asm.js, emscripten) backend"); +} diff --git a/lib/Target/JSBackend/TargetInfo/LLVMBuild.txt b/lib/Target/JSBackend/TargetInfo/LLVMBuild.txt new file mode 100644 index 000000000000..732058260970 --- /dev/null +++ b/lib/Target/JSBackend/TargetInfo/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===- ./lib/Target/JSBackend/TargetInfo/LLVMBuild.txt ---------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===-----------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===-----------------------------------------------------------------------===; + +[component_0] +type = Library +name = JSBackendInfo +parent = JSBackend +required_libraries = MC Support Target +add_to_library_groups = JSBackend diff --git a/lib/Target/JSBackend/TargetInfo/Makefile b/lib/Target/JSBackend/TargetInfo/Makefile new file mode 100644 index 000000000000..45f0230efcae --- /dev/null +++ b/lib/Target/JSBackend/TargetInfo/Makefile @@ -0,0 +1,15 @@ +##===- lib/Target/JSBackend/TargetInfo/Makefile -----------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===---------------------------------------------------------------------===## +LEVEL = ../../../.. +LIBRARYNAME = LLVMJSBackendInfo + +# Hack: we need to include 'main' target directory to grab private headers +CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/LLVMBuild.txt b/lib/Target/LLVMBuild.txt index 43621629dd25..5fabde06d09b 100644 --- a/lib/Target/LLVMBuild.txt +++ b/lib/Target/LLVMBuild.txt @@ -24,6 +24,7 @@ subdirectories = AArch64 AVR BPF + JSBackend Lanai Hexagon MSP430 diff --git a/lib/Transforms/IPO/ConstantMerge.cpp b/lib/Transforms/IPO/ConstantMerge.cpp index d75ed206ad23..66fce3934084 100644 --- a/lib/Transforms/IPO/ConstantMerge.cpp +++ b/lib/Transforms/IPO/ConstantMerge.cpp @@ -81,6 +81,9 @@ static bool mergeConstants(Module &M) { bool MadeChange = false; + // XXX EMSCRIPTEN: mark @__init_array_start as not to be touched + const GlobalValue *InitArrayStart = M.getNamedGlobal("__init_array_start"); + // Iterate constant merging while we are still making progress. Merging two // constants together may allow us to merge other constants together if the // second level constants have initializers which point to the globals that @@ -92,6 +95,10 @@ static bool mergeConstants(Module &M) { GVI != E; ) { GlobalVariable *GV = &*GVI++; + // XXX EMSCRIPTEN: mark @__init_array_start as not to be touched + if (GV == InitArrayStart) + continue; + // If this GV is dead, remove it. GV->removeDeadConstantUsers(); if (GV->use_empty() && GV->hasLocalLinkage()) { diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index bfd73f4bbac5..e869da991c1a 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -2058,6 +2058,11 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, // smaller constant, which will be target friendly. unsigned Amt = ShAmt->getLimitedValue(TypeBits-1); if (LHSI->hasOneUse() && + // @LOCALMOD-BEGIN + // We don't want to introduce non-power-of-two integer sizes for PNaCl's + // stable wire format, so modify this transformation for NaCl. + isPowerOf2_32(TypeBits - Amt) && (TypeBits - Amt) >= 8 && + // @LOCALMOD-END Amt != 0 && RHSV.countTrailingZeros() >= Amt) { Type *NTy = IntegerType::get(ICI.getContext(), TypeBits - Amt); Constant *NCI = ConstantExpr::getTrunc( diff --git a/readme-emscripten-fastcomp.txt b/readme-emscripten-fastcomp.txt new file mode 100644 index 000000000000..3df85ca78c08 --- /dev/null +++ b/readme-emscripten-fastcomp.txt @@ -0,0 +1,18 @@ +source info: + +llvm: + +origin https://chromium.googlesource.com/native_client/pnacl-llvm + +commit 7026af7138fccfb256456b04b375d39b025bdb7c +Author: Karl Schimpf +Date: Thu Nov 21 10:34:00 2013 -0800 + +clang: + +origin https://chromium.googlesource.com/native_client/pnacl-clang + +commit a963b803407c9d1cac644cc425004e0ccd28fa45 +Author: JF Bastien +Date: Fri Nov 8 15:52:28 2013 -0800 + diff --git a/test/CodeGen/JS/aliases.ll b/test/CodeGen/JS/aliases.ll new file mode 100644 index 000000000000..11ebda5ff21c --- /dev/null +++ b/test/CodeGen/JS/aliases.ll @@ -0,0 +1,52 @@ +; RUN: llc < %s | FileCheck %s + +target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:128-n32-S128" +target triple = "asmjs-unknown-emscripten" + +@.str = private unnamed_addr constant [18 x i8] c"hello, world! %d\0A\00", align 1 ; [#uses=1 type=[18 x i8]*] + +@othername = internal alias void (i32), void (i32)* @doit +@othername2 = internal alias void (i32), void (i32)* @othername +@othername3 = internal alias void (i32), void (i32)* @othername2 +@othername4 = internal alias void (), bitcast (void (i32)* @othername2 to void ()*) + +@list = global i32 ptrtoint (void ()* @othername4 to i32) +@list2 = global <{ i32, i32, i32, i32, i32 }> <{ i32 ptrtoint (void (i32)* @doit to i32), i32 ptrtoint (void (i32)* @othername to i32), i32 ptrtoint (void (i32)* @othername2 to i32), i32 ptrtoint (void (i32)* @othername3 to i32), i32 ptrtoint (void ()* @othername4 to i32) }> + + +@value = global i32 17 +@value2 = alias i32, i32* @value +@value3 = alias i32, i32* @value + +define internal void @doit(i32 %x) { + %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([18 x i8], [18 x i8]* @.str, i32 0, i32 0), i32 %x) ; [#uses=0 type=i32] + ret void +} + +;;; we just check for compilation to succeed here, specifically of @list and @list2 +; CHECK: function _main() { +; CHECK: } + +define i32 @main() { +entry: + call void () @othername4() + %fp = ptrtoint void ()* @othername4 to i32 + %fp1 = add i32 %fp, 0 + %pf = inttoptr i32 %fp1 to void (i32)* + %x = load i32, i32* @value3 + call void (i32) %pf(i32 %x) + %x1 = load i32, i32* @value2 + call void (i32) @othername3(i32 %x1) + %x2 = load i32, i32* @value + call void (i32) @othername2(i32 %x2) + store i32 18, i32* @value + %x3 = load i32, i32* @value + call void (i32) @othername(i32 %x3) + store i32 19, i32* @value3 + %x4 = load i32, i32* @value3 + call void (i32) @doit(i32 %x4) + ret i32 1 +} + +declare i32 @printf(i8*, ...) + diff --git a/test/CodeGen/JS/alloca-contradiction.ll b/test/CodeGen/JS/alloca-contradiction.ll new file mode 100644 index 000000000000..82b1bf87c9fe --- /dev/null +++ b/test/CodeGen/JS/alloca-contradiction.ll @@ -0,0 +1,35 @@ +; RUN: llc < %s + +; In theory, the @llvm.lifetime intrinsics shouldn't contradict each other, but +; in practice they apparently do sometimes. When they do, we should probably be +; conservative. + +target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:128-n32-S128" +target triple = "asmjs-unknown-emscripten" + +; Don't merge these two allocas, even though lifetime markers may initially +; appear to indicate that it's safe, because they also indicate that it's +; unsafe. + +; CHECK: foo +; CHECK: HEAP8[$p] = 0; +; CHECK: HEAP8[$q] = 1; +define void @foo() nounwind { +entry: + %p = alloca i8 + %q = alloca i8 + br label %loop + +loop: + call void @llvm.lifetime.end(i64 1, i8* %q) + store volatile i8 0, i8* %p + store volatile i8 1, i8* %q + call void @llvm.lifetime.start(i64 1, i8* %p) + br i1 undef, label %loop, label %end + +end: ; preds = %red + ret void +} + +declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind +declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind diff --git a/test/CodeGen/JS/allocamanager-phis.ll b/test/CodeGen/JS/allocamanager-phis.ll new file mode 100644 index 000000000000..c04a21245ef4 --- /dev/null +++ b/test/CodeGen/JS/allocamanager-phis.ll @@ -0,0 +1,142 @@ +; RUN: llc < %s | FileCheck %s + +; Lifetime intrinsics are typically just referencing a single alloca, but +; sometimes PRE decides to totally optimize a redundant bitcast and insert +; phis. We need to look through the phis. In the code below, l_1565.i has +; an overlapping lifetime with l_766.i which is only visible if we can +; see through phis. + +; CHECK: $vararg_buffer3 = sp; +; CHECK: $l_1565$i = sp + 16|0; +; CHECK: $l_766$i = sp + 12|0; + +target datalayout = "e-p:32:32-i64:64-v128:32:128-n32-S128" +target triple = "asmjs-unknown-emscripten" + +@g_15 = external hidden global [4 x i8], align 4 +@g_285 = external hidden global [4 x i8], align 4 +@g_423 = external hidden global i32, align 4 +@g_779 = external hidden global [4 x i8], align 4 +@g_784 = external hidden global [4 x i8], align 4 +@.str = external hidden unnamed_addr constant [25 x i8], align 1 +@.str1 = external hidden unnamed_addr constant [25 x i8], align 1 +@.str2 = external hidden unnamed_addr constant [15 x i8], align 1 +@.str3 = external hidden unnamed_addr constant [8 x i8], align 1 +@__func__._Z6func_6v = external hidden unnamed_addr constant [7 x i8], align 1 + +; Function Attrs: nounwind +declare i32 @printf(i8* nocapture readonly, i8* noalias) #0 + +; Function Attrs: noreturn +declare void @__assert_fail(i8*, i8*, i32, i8*) #1 + +define void @test() { +entry: + %vararg_buffer3 = alloca <{ i32*, i32**, i32* }>, align 8 + %vararg_lifetime_bitcast4 = bitcast <{ i32*, i32**, i32* }>* %vararg_buffer3 to i8* + %vararg_buffer = alloca <{ i32*, i32**, i32* }>, align 8 + %vararg_lifetime_bitcast = bitcast <{ i32*, i32**, i32* }>* %vararg_buffer to i8* + %l_767.i.i = alloca i32, align 4 + %l_1565.i = alloca i32*, align 4 + %l_767.i = alloca i32, align 4 + %l_766.i = alloca [1 x i16*], align 4 + %0 = load i32, i32* bitcast ([4 x i8]* @g_15 to i32*), align 4 + %tobool = icmp eq i32 %0, 0 + br i1 %tobool, label %if.then, label %entry.if.end_crit_edge + +entry.if.end_crit_edge: ; preds = %entry + %.pre = bitcast [1 x i16*]* %l_766.i to i8* + %.pre1 = getelementptr inbounds [1 x i16*], [1 x i16*]* %l_766.i, i32 0, i32 0 + br label %if.end + +if.then: ; preds = %entry + %1 = bitcast i32* %l_767.i to i8* + call void @llvm.lifetime.start(i64 4, i8* %1) + %2 = bitcast [1 x i16*]* %l_766.i to i8* + call void @llvm.lifetime.start(i64 4, i8* %2) + store i32 -1407759351, i32* %l_767.i, align 4 + %3 = getelementptr inbounds [1 x i16*], [1 x i16*]* %l_766.i, i32 0, i32 0 + store i16* null, i16** %3, align 4 + br label %for.body.i + +for.body.i: ; preds = %for.body.i, %if.then + %l_82.02.i = phi i32 [ 0, %if.then ], [ %inc.i, %for.body.i ] + %4 = load i32**, i32*** bitcast (i32* @g_423 to i32***), align 4 + store i32* %l_767.i, i32** %4, align 4 + store i16** %3, i16*** bitcast ([4 x i8]* @g_779 to i16***), align 4 + %inc.i = add i32 %l_82.02.i, 1 + %exitcond.i = icmp eq i32 %inc.i, 27 + br i1 %exitcond.i, label %_Z7func_34v.exit, label %for.body.i + +_Z7func_34v.exit: ; preds = %for.body.i + call void @llvm.lifetime.end(i64 4, i8* %1) + call void @llvm.lifetime.end(i64 4, i8* %2) + %5 = load i32**, i32*** bitcast (i32* @g_423 to i32***), align 4 + store i32* bitcast ([4 x i8]* @g_285 to i32*), i32** %5, align 4 + br label %if.end + +if.end: ; preds = %_Z7func_34v.exit, %entry.if.end_crit_edge + %.pre-phi2 = phi i16** [ %.pre1, %entry.if.end_crit_edge ], [ %3, %_Z7func_34v.exit ] + %.pre-phi = phi i8* [ %.pre, %entry.if.end_crit_edge ], [ %2, %_Z7func_34v.exit ] + %6 = bitcast i32** %l_1565.i to i8* + call void @llvm.lifetime.start(i64 4, i8* %6) + store i32* bitcast ([4 x i8]* @g_784 to i32*), i32** %l_1565.i, align 4 + call void @llvm.lifetime.start(i64 12, i8* %vararg_lifetime_bitcast) + %vararg_ptr = getelementptr <{ i32*, i32**, i32* }>, <{ i32*, i32**, i32* }>* %vararg_buffer, i32 0, i32 0 + store i32* bitcast ([4 x i8]* @g_784 to i32*), i32** %vararg_ptr, align 4 + %vararg_ptr1 = getelementptr <{ i32*, i32**, i32* }>, <{ i32*, i32**, i32* }>* %vararg_buffer, i32 0, i32 1 + store i32** %l_1565.i, i32*** %vararg_ptr1, align 4 + %vararg_ptr2 = getelementptr <{ i32*, i32**, i32* }>, <{ i32*, i32**, i32* }>* %vararg_buffer, i32 0, i32 2 + store i32* bitcast ([4 x i8]* @g_784 to i32*), i32** %vararg_ptr2, align 4 + %call.i = call i32 bitcast (i32 (i8*, i8*)* @printf to i32 (i8*, <{ i32*, i32**, i32* }>*)*)(i8* getelementptr inbounds ([25 x i8], [25 x i8]* @.str, i32 0, i32 0), <{ i32*, i32**, i32* }>* %vararg_buffer) + call void @llvm.lifetime.end(i64 12, i8* %vararg_lifetime_bitcast) + %7 = bitcast i32* %l_767.i.i to i8* + call void @llvm.lifetime.start(i64 4, i8* %7) + call void @llvm.lifetime.start(i64 4, i8* %.pre-phi) + store i32 -1407759351, i32* %l_767.i.i, align 4 + store i16* null, i16** %.pre-phi2, align 4 + br label %for.body.i.i + +for.body.i.i: ; preds = %for.body.i.i, %if.end + %l_82.02.i.i = phi i32 [ 0, %if.end ], [ %inc.i.i, %for.body.i.i ] + %8 = load i32**, i32*** bitcast (i32* @g_423 to i32***), align 4 + store i32* %l_767.i.i, i32** %8, align 4 + store i16** %.pre-phi2, i16*** bitcast ([4 x i8]* @g_779 to i16***), align 4 + %inc.i.i = add i32 %l_82.02.i.i, 1 + %exitcond.i.i = icmp eq i32 %inc.i.i, 27 + br i1 %exitcond.i.i, label %_Z7func_34v.exit.i, label %for.body.i.i + +_Z7func_34v.exit.i: ; preds = %for.body.i.i + call void @llvm.lifetime.end(i64 4, i8* %7) + call void @llvm.lifetime.end(i64 4, i8* %.pre-phi) + %9 = load i32*, i32** %l_1565.i, align 4 + call void @llvm.lifetime.start(i64 12, i8* %vararg_lifetime_bitcast4) + %vararg_ptr5 = getelementptr <{ i32*, i32**, i32* }>, <{ i32*, i32**, i32* }>* %vararg_buffer3, i32 0, i32 0 + store i32* %9, i32** %vararg_ptr5, align 4 + %vararg_ptr6 = getelementptr <{ i32*, i32**, i32* }>, <{ i32*, i32**, i32* }>* %vararg_buffer3, i32 0, i32 1 + store i32** %l_1565.i, i32*** %vararg_ptr6, align 4 + %vararg_ptr7 = getelementptr <{ i32*, i32**, i32* }>, <{ i32*, i32**, i32* }>* %vararg_buffer3, i32 0, i32 2 + store i32* bitcast ([4 x i8]* @g_784 to i32*), i32** %vararg_ptr7, align 4 + %call1.i = call i32 bitcast (i32 (i8*, i8*)* @printf to i32 (i8*, <{ i32*, i32**, i32* }>*)*)(i8* getelementptr inbounds ([25 x i8], [25 x i8]* @.str1, i32 0, i32 0), <{ i32*, i32**, i32* }>* %vararg_buffer3) + call void @llvm.lifetime.end(i64 12, i8* %vararg_lifetime_bitcast4) + %10 = load i32*, i32** %l_1565.i, align 4 + %cmp.i = icmp eq i32* %10, bitcast ([4 x i8]* @g_784 to i32*) + br i1 %cmp.i, label %_Z6func_6v.exit, label %lor.rhs.i + +lor.rhs.i: ; preds = %_Z7func_34v.exit.i + call void @__assert_fail(i8* getelementptr inbounds ([15 x i8], [15 x i8]* @.str2, i32 0, i32 0), i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str3, i32 0, i32 0), i32 33, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @__func__._Z6func_6v, i32 0, i32 0)) #1 + unreachable + +_Z6func_6v.exit: ; preds = %_Z7func_34v.exit.i + call void @llvm.lifetime.end(i64 4, i8* %6) + ret void +} + +; Function Attrs: nounwind +declare void @llvm.lifetime.start(i64, i8* nocapture) #0 + +; Function Attrs: nounwind +declare void @llvm.lifetime.end(i64, i8* nocapture) #0 + +attributes #0 = { nounwind } +attributes #1 = { noreturn } diff --git a/test/CodeGen/JS/allocamanager.ll b/test/CodeGen/JS/allocamanager.ll new file mode 100644 index 000000000000..19f1ca7949f3 --- /dev/null +++ b/test/CodeGen/JS/allocamanager.ll @@ -0,0 +1,165 @@ +; RUN: llc < %s | FileCheck %s + +; Basic AllocaManager feature test. Eliminate user variable cupcake in favor of +; user variable muffin, and combine all the vararg buffers. And align the stack +; pointer. + +target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:128-n32-S128" +target triple = "asmjs-unknown-emscripten" + +%struct._IO_FILE = type opaque + +@stderr = external constant [4 x i8], align 4 +@.str = private unnamed_addr constant [26 x i8] c"hello from %s; argc is %d\00", align 1 +@.str1 = private unnamed_addr constant [33 x i8] c"message from the program: \22%s\22!\0A\00", align 1 +@.str2 = private unnamed_addr constant [38 x i8] c"with argc %d, I, %s, must say goodbye\00", align 1 +@.str3 = private unnamed_addr constant [43 x i8] c"another message from the program: \22%s\22...\0A\00", align 1 + +; CHECK: function _foo($argc,$argv) { +; CHECK-NOT: cupcake +; CHECK: STACKTOP = STACKTOP + 128|0; +; CHECK-NEXT: vararg_buffer0 = +; CHECK-NEXT: $muffin = +; CHECK-NOT: cupcake +; CHECK: } + +; Function Attrs: nounwind +define void @foo(i32 %argc, i8** %argv) #0 { +entry: + %vararg_buffer0 = alloca <{ i8* }>, align 8 + %vararg_lifetime_bitcast10 = bitcast <{ i8* }>* %vararg_buffer0 to i8* + %vararg_buffer5 = alloca <{ i32, i8* }>, align 8 + %vararg_lifetime_bitcast6 = bitcast <{ i32, i8* }>* %vararg_buffer5 to i8* + %vararg_buffer2 = alloca <{ i8* }>, align 8 + %vararg_lifetime_bitcast3 = bitcast <{ i8* }>* %vararg_buffer2 to i8* + %vararg_buffer1 = alloca <{ i8*, i32 }>, align 8 + %vararg_lifetime_bitcast = bitcast <{ i8*, i32 }>* %vararg_buffer1 to i8* + %muffin = alloca [117 x i8], align 1 + %cupcake = alloca [119 x i8], align 1 + %tmp = getelementptr [117 x i8], [117 x i8]* %muffin, i32 0, i32 0 + call void @llvm.lifetime.start(i64 117, i8* %tmp) #0 + %tmp1 = load i8*, i8** %argv, align 4 + call void @llvm.lifetime.start(i64 8, i8* %vararg_lifetime_bitcast) + %vararg_ptr = getelementptr <{ i8*, i32 }>, <{ i8*, i32 }>* %vararg_buffer1, i32 0, i32 0 + store i8* %tmp1, i8** %vararg_ptr, align 4 + %vararg_ptr1 = getelementptr <{ i8*, i32 }>, <{ i8*, i32 }>* %vararg_buffer1, i32 0, i32 1 + store i32 %argc, i32* %vararg_ptr1, align 4 + %call = call i32 bitcast (i32 (i8*, i8*, i8*)* @sprintf to i32 (i8*, i8*, <{ i8*, i32 }>*)*)(i8* %tmp, i8* getelementptr inbounds ([26 x i8], [26 x i8]* @.str, i32 0, i32 0), <{ i8*, i32 }>* %vararg_buffer1) #0 + call void @llvm.lifetime.end(i64 8, i8* %vararg_lifetime_bitcast) + %tmp2 = load %struct._IO_FILE*, %struct._IO_FILE** bitcast ([4 x i8]* @stderr to %struct._IO_FILE**), align 4 + call void @llvm.lifetime.start(i64 4, i8* %vararg_lifetime_bitcast3) + %vararg_ptr4 = getelementptr <{ i8* }>, <{ i8* }>* %vararg_buffer2, i32 0, i32 0 + store i8* %tmp, i8** %vararg_ptr4, align 4 + %call2 = call i32 bitcast (i32 (%struct._IO_FILE*, i8*, i8*)* @fprintf to i32 (%struct._IO_FILE*, i8*, <{ i8* }>*)*)(%struct._IO_FILE* %tmp2, i8* getelementptr inbounds ([33 x i8], [33 x i8]* @.str1, i32 0, i32 0), <{ i8* }>* %vararg_buffer2) #0 + call void @llvm.lifetime.end(i64 4, i8* %vararg_lifetime_bitcast3) + call void @llvm.lifetime.end(i64 117, i8* %tmp) #0 + %tmp3 = getelementptr [119 x i8], [119 x i8]* %cupcake, i32 0, i32 0 + call void @llvm.lifetime.start(i64 119, i8* %tmp3) #0 + %tmp4 = load i8*, i8** %argv, align 4 + call void @llvm.lifetime.start(i64 8, i8* %vararg_lifetime_bitcast6) + %vararg_ptr7 = getelementptr <{ i32, i8* }>, <{ i32, i8* }>* %vararg_buffer5, i32 0, i32 0 + store i32 %argc, i32* %vararg_ptr7, align 4 + %vararg_ptr8 = getelementptr <{ i32, i8* }>, <{ i32, i8* }>* %vararg_buffer5, i32 0, i32 1 + store i8* %tmp4, i8** %vararg_ptr8, align 4 + %call5 = call i32 bitcast (i32 (i8*, i8*, i8*)* @sprintf to i32 (i8*, i8*, <{ i32, i8* }>*)*)(i8* %tmp3, i8* getelementptr inbounds ([38 x i8], [38 x i8]* @.str2, i32 0, i32 0), <{ i32, i8* }>* %vararg_buffer5) #0 + call void @llvm.lifetime.end(i64 8, i8* %vararg_lifetime_bitcast6) + call void @llvm.lifetime.start(i64 4, i8* %vararg_lifetime_bitcast10) + %vararg_ptr11 = getelementptr <{ i8* }>, <{ i8* }>* %vararg_buffer0, i32 0, i32 0 + store i8* %tmp3, i8** %vararg_ptr11, align 4 + %call7 = call i32 bitcast (i32 (%struct._IO_FILE*, i8*, i8*)* @fprintf to i32 (%struct._IO_FILE*, i8*, <{ i8* }>*)*)(%struct._IO_FILE* %tmp2, i8* getelementptr inbounds ([43 x i8], [43 x i8]* @.str3, i32 0, i32 0), <{ i8* }>* %vararg_buffer0) #0 + call void @llvm.lifetime.end(i64 4, i8* %vararg_lifetime_bitcast10) + call void @llvm.lifetime.end(i64 119, i8* %tmp3) #0 + ret void +} + +; CHECK: function _bar($argc,$argv) { +; CHECK-NOT: cupcake +; CHECK: STACKTOP = STACKTOP + 128|0; +; CHECK-NEXT: vararg_buffer0 = +; CHECK-NEXT: $muffin = +; CHECK-NOT: cupcake +; CHECK: } + +; Function Attrs: nounwind +define void @bar(i32 %argc, i8** %argv) #0 { +entry: + %vararg_buffer0 = alloca <{ i8* }>, align 8 + %vararg_lifetime_bitcast10 = bitcast <{ i8* }>* %vararg_buffer0 to i8* + %vararg_buffer5 = alloca <{ i32, i8* }>, align 8 + %vararg_lifetime_bitcast6 = bitcast <{ i32, i8* }>* %vararg_buffer5 to i8* + %vararg_buffer2 = alloca <{ i8* }>, align 8 + %vararg_lifetime_bitcast3 = bitcast <{ i8* }>* %vararg_buffer2 to i8* + %vararg_buffer1 = alloca <{ i8*, i32 }>, align 8 + %vararg_lifetime_bitcast = bitcast <{ i8*, i32 }>* %vararg_buffer1 to i8* + %muffin = alloca [117 x i8], align 1 + %cupcake = alloca [119 x i8], align 1 + %tmp = getelementptr [117 x i8], [117 x i8]* %muffin, i32 0, i32 0 + call void @llvm.lifetime.start(i64 117, i8* %tmp) #0 + %cmp = icmp eq i32 %argc, 39 + br i1 %cmp, label %if.end.thread, label %if.end + +if.end.thread: ; preds = %entry + call void @llvm.lifetime.end(i64 117, i8* %tmp) #0 + %tmp1 = getelementptr [119 x i8], [119 x i8]* %cupcake, i32 0, i32 0 + call void @llvm.lifetime.start(i64 119, i8* %tmp1) #0 + %.pre = load %struct._IO_FILE*, %struct._IO_FILE** bitcast ([4 x i8]* @stderr to %struct._IO_FILE**), align 4 + br label %if.then4 + +if.end: ; preds = %entry + %tmp2 = load i8*, i8** %argv, align 4 + call void @llvm.lifetime.start(i64 8, i8* %vararg_lifetime_bitcast) + %vararg_ptr = getelementptr <{ i8*, i32 }>, <{ i8*, i32 }>* %vararg_buffer1, i32 0, i32 0 + store i8* %tmp2, i8** %vararg_ptr, align 4 + %vararg_ptr1 = getelementptr <{ i8*, i32 }>, <{ i8*, i32 }>* %vararg_buffer1, i32 0, i32 1 + store i32 %argc, i32* %vararg_ptr1, align 4 + %call = call i32 bitcast (i32 (i8*, i8*, i8*)* @sprintf to i32 (i8*, i8*, <{ i8*, i32 }>*)*)(i8* %tmp, i8* getelementptr inbounds ([26 x i8], [26 x i8]* @.str, i32 0, i32 0), <{ i8*, i32 }>* %vararg_buffer1) #0 + call void @llvm.lifetime.end(i64 8, i8* %vararg_lifetime_bitcast) + %tmp3 = load %struct._IO_FILE*, %struct._IO_FILE** bitcast ([4 x i8]* @stderr to %struct._IO_FILE**), align 4 + call void @llvm.lifetime.start(i64 4, i8* %vararg_lifetime_bitcast3) + %vararg_ptr4 = getelementptr <{ i8* }>, <{ i8* }>* %vararg_buffer2, i32 0, i32 0 + store i8* %tmp, i8** %vararg_ptr4, align 4 + %call2 = call i32 bitcast (i32 (%struct._IO_FILE*, i8*, i8*)* @fprintf to i32 (%struct._IO_FILE*, i8*, <{ i8* }>*)*)(%struct._IO_FILE* %tmp3, i8* getelementptr inbounds ([33 x i8], [33 x i8]* @.str1, i32 0, i32 0), <{ i8* }>* %vararg_buffer2) #0 + call void @llvm.lifetime.end(i64 4, i8* %vararg_lifetime_bitcast3) + call void @llvm.lifetime.end(i64 117, i8* %tmp) #0 + %tmp4 = getelementptr [119 x i8], [119 x i8]* %cupcake, i32 0, i32 0 + call void @llvm.lifetime.start(i64 119, i8* %tmp4) #0 + %cmp3 = icmp eq i32 %argc, 45 + br i1 %cmp3, label %if.end10, label %if.then4 + +if.then4: ; preds = %if.end, %if.end.thread + %tmp5 = phi %struct._IO_FILE* [ %.pre, %if.end.thread ], [ %tmp3, %if.end ] + %tmp6 = phi i8* [ %tmp1, %if.end.thread ], [ %tmp4, %if.end ] + %tmp7 = load i8*, i8** %argv, align 4 + call void @llvm.lifetime.start(i64 8, i8* %vararg_lifetime_bitcast6) + %vararg_ptr7 = getelementptr <{ i32, i8* }>, <{ i32, i8* }>* %vararg_buffer5, i32 0, i32 0 + store i32 %argc, i32* %vararg_ptr7, align 4 + %vararg_ptr8 = getelementptr <{ i32, i8* }>, <{ i32, i8* }>* %vararg_buffer5, i32 0, i32 1 + store i8* %tmp7, i8** %vararg_ptr8, align 4 + %call7 = call i32 bitcast (i32 (i8*, i8*, i8*)* @sprintf to i32 (i8*, i8*, <{ i32, i8* }>*)*)(i8* %tmp6, i8* getelementptr inbounds ([38 x i8], [38 x i8]* @.str2, i32 0, i32 0), <{ i32, i8* }>* %vararg_buffer5) #0 + call void @llvm.lifetime.end(i64 8, i8* %vararg_lifetime_bitcast6) + call void @llvm.lifetime.start(i64 4, i8* %vararg_lifetime_bitcast10) + %vararg_ptr11 = getelementptr <{ i8* }>, <{ i8* }>* %vararg_buffer0, i32 0, i32 0 + store i8* %tmp6, i8** %vararg_ptr11, align 4 + %call9 = call i32 bitcast (i32 (%struct._IO_FILE*, i8*, i8*)* @fprintf to i32 (%struct._IO_FILE*, i8*, <{ i8* }>*)*)(%struct._IO_FILE* %tmp5, i8* getelementptr inbounds ([43 x i8], [43 x i8]* @.str3, i32 0, i32 0), <{ i8* }>* %vararg_buffer0) #0 + call void @llvm.lifetime.end(i64 4, i8* %vararg_lifetime_bitcast10) + br label %if.end10 + +if.end10: ; preds = %if.then4, %if.end + %tmp8 = phi i8* [ %tmp4, %if.end ], [ %tmp6, %if.then4 ] + call void @llvm.lifetime.end(i64 119, i8* %tmp8) #0 + ret void +} + +; Function Attrs: nounwind +declare i32 @sprintf(i8*, i8*, i8*) #0 + +; Function Attrs: nounwind +declare i32 @fprintf(%struct._IO_FILE*, i8*, i8*) #0 + +; Function Attrs: nounwind +declare void @llvm.lifetime.start(i64, i8* nocapture) #0 + +; Function Attrs: nounwind +declare void @llvm.lifetime.end(i64, i8* nocapture) #0 + +attributes #0 = { nounwind } diff --git a/test/CodeGen/JS/asm.ll b/test/CodeGen/JS/asm.ll new file mode 100644 index 000000000000..c3099e3239ab --- /dev/null +++ b/test/CodeGen/JS/asm.ll @@ -0,0 +1,16 @@ +; RUN: not llc < %s + +; Inline asm isn't supported (yet?). llc should report an error when it +; encounters inline asm. +; +; We could support the special case of an empty inline asm string without much +; work, but code that uses such things most likely isn't portable anyway, and +; there are usually much better alternatives. + +target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:128-n32-S128" +target triple = "asmjs-unknown-emscripten" + +define void @foo() { + call void asm "", ""() + ret void +} diff --git a/test/CodeGen/JS/basics.ll b/test/CodeGen/JS/basics.ll new file mode 100644 index 000000000000..573680f810ee --- /dev/null +++ b/test/CodeGen/JS/basics.ll @@ -0,0 +1,38 @@ +; RUN: llc < %s | FileCheck %s + +target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:128-n32-S128" +target triple = "asmjs-unknown-emscripten" + +; CHECK: function _simple_integer_math( +; CHECK: [[VAL_A:\$[a-z]+]] = [[VAL_A]]|0; +; CHECK: [[VAL_B:\$[a-z]+]] = [[VAL_B]]|0; +; CHECK: [[VAL_C:\$[a-z]+]] = (([[VAL_A]]) + ([[VAL_B]]))|0; +; CHECK: [[VAL_D:\$[a-z]+]] = ([[VAL_C]]*20)|0; +; CHECK: [[VAL_E:\$[a-z]+]] = (([[VAL_D]]|0) / ([[VAL_A]]|0))&-1; +; CHECK: [[VAL_F:\$[a-z]+]] = (([[VAL_E]]) - 3)|0; +; CHECK: return ([[VAL_F]]|0); +define i32 @simple_integer_math(i32 %a, i32 %b) nounwind { + %c = add i32 %a, %b + %d = mul i32 %c, 20 + %e = sdiv i32 %d, %a + %f = sub i32 %e, 3 + ret i32 %f +} + +; CHECK: function _fneg( +; CHECK: [[VAL_D:\$[a-z]+]] = +[[VAL_D]] +; CHECK: [[VAL_F:\$[a-z]+]] = +0 +; CHECK: [[VAL_F]] = -[[VAL_D]] +; CHECK: return (+[[VAL_F]]); +define double @fneg(double %d) nounwind { + %f = fsub double -0.0, %d + ret double %f +} + +; CHECK: function _flt_rounds( +; CHECK: t = 1; +declare i32 @llvm.flt.rounds() +define i32 @flt_rounds() { + %t = call i32 @llvm.flt.rounds() + ret i32 %t +} diff --git a/test/CodeGen/JS/blockchanges.ll b/test/CodeGen/JS/blockchanges.ll new file mode 100644 index 000000000000..fb79af14d735 --- /dev/null +++ b/test/CodeGen/JS/blockchanges.ll @@ -0,0 +1,400 @@ +; RUN: llc < %s + +; regression check for emscripten #3088 - we were not clearing BlockChanges in i64 lowering + +; ModuleID = 'waka.bc' +target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:128-n32-S128" +target triple = "asmjs-unknown-emscripten" + +%"class.ZenLib::uint128" = type <{ i64, i64 }> + +@.str = private unnamed_addr constant [15 x i8] c"hello, world!\0A\00", align 1 + +@.str368164 = external hidden unnamed_addr constant [10 x i8], align 1 +@.str398167 = external hidden unnamed_addr constant [6 x i8], align 1 +@.str718199 = external hidden unnamed_addr constant [9 x i8], align 1 +@.str738201 = external hidden unnamed_addr constant [21 x i8], align 1 +@.str748202 = external hidden unnamed_addr constant [26 x i8], align 1 +@.str758203 = external hidden unnamed_addr constant [21 x i8], align 1 +@.str768204 = external hidden unnamed_addr constant [8 x i8], align 1 +@.str778205 = external hidden unnamed_addr constant [14 x i8], align 1 +@.str788206 = external hidden unnamed_addr constant [22 x i8], align 1 +@.str798207 = external hidden unnamed_addr constant [25 x i8], align 1 +@.str808208 = external hidden unnamed_addr constant [24 x i8], align 1 +@.str818209 = external hidden unnamed_addr constant [20 x i8], align 1 +@.str828210 = external hidden unnamed_addr constant [34 x i8], align 1 +@.str838211 = external hidden unnamed_addr constant [31 x i8], align 1 +@.str848212 = external hidden unnamed_addr constant [29 x i8], align 1 +@.str858213 = external hidden unnamed_addr constant [44 x i8], align 1 +@.str868214 = external hidden unnamed_addr constant [12 x i8], align 1 +@.str908218 = external hidden unnamed_addr constant [21 x i8], align 1 +@.str918219 = external hidden unnamed_addr constant [8 x i8], align 1 +@.str928220 = external hidden unnamed_addr constant [6 x i8], align 1 +@.str9210864 = external hidden unnamed_addr constant [5 x i8], align 1 +@.str514367 = external hidden unnamed_addr constant [5 x i8], align 1 +@.str214409 = external hidden unnamed_addr constant [4 x i8], align 1 +@.str20216493 = external hidden unnamed_addr constant [3 x i8], align 1 +@.str2017231 = external hidden unnamed_addr constant [11 x i8], align 1 +@.str2317234 = external hidden unnamed_addr constant [14 x i8], align 1 +@.str2417235 = external hidden unnamed_addr constant [4 x i8], align 1 +@.str2717238 = external hidden unnamed_addr constant [5 x i8], align 1 +@.str3217243 = external hidden unnamed_addr constant [4 x i8], align 1 +@.str1717689 = external hidden unnamed_addr constant [5 x i8], align 1 +@.str2104 = external hidden unnamed_addr constant [1 x i8], align 1 + +; Function Attrs: nounwind readonly +define hidden i8* @_ZN12MediaInfoLib22Mxf_EssenceCompressionEN6ZenLib7uint128E(%"class.ZenLib::uint128"* nocapture readonly %EssenceCompression) #0 { +entry: + %hi = getelementptr inbounds %"class.ZenLib::uint128", %"class.ZenLib::uint128"* %EssenceCompression, i32 0, i32 1 + %0 = load i64, i64* %hi, align 1 + %and = and i64 %0, -256 + %cmp = icmp eq i64 %and, 436333716306985216 + br i1 %cmp, label %lor.lhs.false, label %return + +lor.lhs.false: ; preds = %entry + %lo = getelementptr inbounds %"class.ZenLib::uint128", %"class.ZenLib::uint128"* %EssenceCompression, i32 0, i32 0 + %1 = load i64, i64* %lo, align 1 + %and1 = and i64 %1, -72057594037927936 + switch i64 %and1, label %return [ + i64 288230376151711744, label %if.end + i64 1008806316530991104, label %if.end + ] + +if.end: ; preds = %lor.lhs.false, %lor.lhs.false + %shr = lshr i64 %1, 56 + %conv = trunc i64 %shr to i32 + %and10 = lshr i64 %1, 48 + %and14 = lshr i64 %1, 40 + %and18 = lshr i64 %1, 32 + %conv20 = trunc i64 %and18 to i32 + %and22 = lshr i64 %1, 24 + %and26 = lshr i64 %1, 16 + %conv28 = trunc i64 %and26 to i32 + %and30 = lshr i64 %1, 8 + %conv32 = trunc i64 %and30 to i32 + switch i32 %conv, label %return [ + i32 4, label %sw.bb + i32 14, label %sw.bb112 + ] + +sw.bb: ; preds = %if.end + %conv12 = trunc i64 %and10 to i32 + %conv34 = and i32 %conv12, 255 + switch i32 %conv34, label %return [ + i32 1, label %sw.bb35 + i32 2, label %sw.bb64 + ] + +sw.bb35: ; preds = %sw.bb + %conv36 = and i64 %and14, 255 + %cond12 = icmp eq i64 %conv36, 2 + br i1 %cond12, label %sw.bb37, label %return + +sw.bb37: ; preds = %sw.bb35 + %conv38 = and i32 %conv20, 255 + switch i32 %conv38, label %return [ + i32 1, label %sw.bb39 + i32 2, label %sw.bb42 + ] + +sw.bb39: ; preds = %sw.bb37 + %conv40 = and i64 %and22, 255 + %cond14 = icmp eq i64 %conv40, 1 + %. = select i1 %cond14, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str214409, i32 0, i32 0), i8* getelementptr inbounds ([1 x i8], [1 x i8]* @.str2104, i32 0, i32 0) + br label %return + +sw.bb42: ; preds = %sw.bb37 + %2 = trunc i64 %and22 to i32 + %conv43 = and i32 %2, 255 + switch i32 %conv43, label %sw.default61 [ + i32 1, label %sw.bb44 + i32 2, label %return + i32 3, label %sw.bb56 + i32 113, label %sw.bb60 + ] + +sw.bb44: ; preds = %sw.bb42 + %conv45 = and i32 %conv28, 255 + switch i32 %conv45, label %sw.default54 [ + i32 0, label %return + i32 1, label %return + i32 2, label %return + i32 3, label %return + i32 4, label %return + i32 17, label %return + i32 32, label %sw.bb52 + i32 48, label %sw.bb53 + i32 49, label %sw.bb53 + i32 50, label %sw.bb53 + i32 51, label %sw.bb53 + i32 52, label %sw.bb53 + i32 53, label %sw.bb53 + i32 54, label %sw.bb53 + i32 55, label %sw.bb53 + i32 56, label %sw.bb53 + i32 57, label %sw.bb53 + i32 58, label %sw.bb53 + i32 59, label %sw.bb53 + i32 60, label %sw.bb53 + i32 61, label %sw.bb53 + i32 62, label %sw.bb53 + i32 63, label %sw.bb53 + ] + +sw.bb52: ; preds = %sw.bb44 + br label %return + +sw.bb53: ; preds = %sw.bb44, %sw.bb44, %sw.bb44, %sw.bb44, %sw.bb44, %sw.bb44, %sw.bb44, %sw.bb44, %sw.bb44, %sw.bb44, %sw.bb44, %sw.bb44, %sw.bb44, %sw.bb44, %sw.bb44, %sw.bb44 + br label %return + +sw.default54: ; preds = %sw.bb44 + br label %return + +sw.bb56: ; preds = %sw.bb42 + %conv57 = and i64 %and26, 255 + %cond13 = icmp eq i64 %conv57, 1 + %.35 = select i1 %cond13, i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str368164, i32 0, i32 0), i8* getelementptr inbounds ([1 x i8], [1 x i8]* @.str2104, i32 0, i32 0) + br label %return + +sw.bb60: ; preds = %sw.bb42 + br label %return + +sw.default61: ; preds = %sw.bb42 + br label %return + +sw.bb64: ; preds = %sw.bb + %conv65 = and i64 %and14, 255 + %cond9 = icmp eq i64 %conv65, 2 + br i1 %cond9, label %sw.bb66, label %return + +sw.bb66: ; preds = %sw.bb64 + %conv67 = and i32 %conv20, 255 + switch i32 %conv67, label %return [ + i32 1, label %sw.bb68 + i32 2, label %sw.bb75 + ] + +sw.bb68: ; preds = %sw.bb66 + %3 = trunc i64 %and22 to i32 + %conv69 = and i32 %3, 255 + switch i32 %conv69, label %sw.default74 [ + i32 0, label %return + i32 1, label %return + i32 126, label %return + i32 127, label %return + ] + +sw.default74: ; preds = %sw.bb68 + br label %return + +sw.bb75: ; preds = %sw.bb66 + %conv76 = and i64 %and22, 255 + %cond10 = icmp eq i64 %conv76, 3 + br i1 %cond10, label %sw.bb77, label %return + +sw.bb77: ; preds = %sw.bb75 + %conv78 = and i32 %conv28, 255 + switch i32 %conv78, label %return [ + i32 1, label %sw.bb79 + i32 2, label %sw.bb84 + i32 3, label %sw.bb92 + i32 4, label %sw.bb96 + ] + +sw.bb79: ; preds = %sw.bb77 + %conv80 = and i32 %conv32, 255 + switch i32 %conv80, label %sw.default83 [ + i32 1, label %return + i32 16, label %sw.bb82 + ] + +sw.bb82: ; preds = %sw.bb79 + br label %return + +sw.default83: ; preds = %sw.bb79 + br label %return + +sw.bb84: ; preds = %sw.bb77 + %conv85 = and i32 %conv32, 255 + switch i32 %conv85, label %sw.default91 [ + i32 1, label %return + i32 4, label %sw.bb87 + i32 5, label %sw.bb88 + i32 6, label %sw.bb89 + i32 28, label %sw.bb90 + ] + +sw.bb87: ; preds = %sw.bb84 + br label %return + +sw.bb88: ; preds = %sw.bb84 + br label %return + +sw.bb89: ; preds = %sw.bb84 + br label %return + +sw.bb90: ; preds = %sw.bb84 + br label %return + +sw.default91: ; preds = %sw.bb84 + br label %return + +sw.bb92: ; preds = %sw.bb77 + %conv93 = and i64 %and30, 255 + %cond11 = icmp eq i64 %conv93, 1 + %.36 = select i1 %cond11, i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str778205, i32 0, i32 0), i8* getelementptr inbounds ([1 x i8], [1 x i8]* @.str2104, i32 0, i32 0) + br label %return + +sw.bb96: ; preds = %sw.bb77 + %conv97 = and i32 %conv32, 255 + switch i32 %conv97, label %sw.default106 [ + i32 1, label %return + i32 2, label %sw.bb99 + i32 3, label %sw.bb100 + i32 4, label %sw.bb101 + i32 5, label %sw.bb102 + i32 6, label %sw.bb103 + i32 7, label %sw.bb104 + i32 8, label %sw.bb105 + ] + +sw.bb99: ; preds = %sw.bb96 + br label %return + +sw.bb100: ; preds = %sw.bb96 + br label %return + +sw.bb101: ; preds = %sw.bb96 + br label %return + +sw.bb102: ; preds = %sw.bb96 + br label %return + +sw.bb103: ; preds = %sw.bb96 + br label %return + +sw.bb104: ; preds = %sw.bb96 + br label %return + +sw.bb105: ; preds = %sw.bb96 + br label %return + +sw.default106: ; preds = %sw.bb96 + br label %return + +sw.bb112: ; preds = %if.end + %4 = trunc i64 %and10 to i32 + %conv113 = and i32 %4, 255 + switch i32 %conv113, label %return [ + i32 4, label %sw.bb114 + i32 6, label %sw.bb127 + ] + +sw.bb114: ; preds = %sw.bb112 + %conv115 = and i64 %and14, 255 + %cond5 = icmp eq i64 %conv115, 2 + %conv117 = and i64 %and18, 255 + %cond6 = icmp eq i64 %conv117, 1 + %or.cond = and i1 %cond5, %cond6 + %conv119 = and i64 %and22, 255 + %cond7 = icmp eq i64 %conv119, 2 + %or.cond39 = and i1 %or.cond, %cond7 + br i1 %or.cond39, label %sw.bb120, label %return + +sw.bb120: ; preds = %sw.bb114 + %conv121 = and i64 %and26, 255 + %cond8 = icmp eq i64 %conv121, 4 + %.37 = select i1 %cond8, i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str514367, i32 0, i32 0), i8* getelementptr inbounds ([1 x i8], [1 x i8]* @.str2104, i32 0, i32 0) + br label %return + +sw.bb127: ; preds = %sw.bb112 + %conv128 = and i64 %and14, 255 + %cond = icmp eq i64 %conv128, 4 + %conv130 = and i64 %and18, 255 + %cond1 = icmp eq i64 %conv130, 1 + %or.cond40 = and i1 %cond, %cond1 + %conv132 = and i64 %and22, 255 + %cond2 = icmp eq i64 %conv132, 2 + %or.cond41 = and i1 %or.cond40, %cond2 + %conv134 = and i64 %and26, 255 + %cond3 = icmp eq i64 %conv134, 4 + %or.cond42 = and i1 %or.cond41, %cond3 + br i1 %or.cond42, label %sw.bb135, label %return + +sw.bb135: ; preds = %sw.bb127 + %conv136 = and i64 %and30, 255 + %cond4 = icmp eq i64 %conv136, 2 + %.38 = select i1 %cond4, i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str868214, i32 0, i32 0), i8* getelementptr inbounds ([1 x i8], [1 x i8]* @.str2104, i32 0, i32 0) + br label %return + +return: ; preds = %sw.bb135, %sw.bb127, %sw.bb120, %sw.bb114, %sw.bb112, %sw.default106, %sw.bb105, %sw.bb104, %sw.bb103, %sw.bb102, %sw.bb101, %sw.bb100, %sw.bb99, %sw.bb96, %sw.bb92, %sw.default91, %sw.bb90, %sw.bb89, %sw.bb88, %sw.bb87, %sw.bb84, %sw.default83, %sw.bb82, %sw.bb79, %sw.bb77, %sw.bb75, %sw.default74, %sw.bb68, %sw.bb68, %sw.bb68, %sw.bb68, %sw.bb66, %sw.bb64, %sw.default61, %sw.bb60, %sw.bb56, %sw.default54, %sw.bb53, %sw.bb52, %sw.bb44, %sw.bb44, %sw.bb44, %sw.bb44, %sw.bb44, %sw.bb44, %sw.bb42, %sw.bb39, %sw.bb37, %sw.bb35, %sw.bb, %if.end, %lor.lhs.false, %entry + %retval.0 = phi i8* [ getelementptr inbounds ([1 x i8], [1 x i8]* @.str2104, i32 0, i32 0), %sw.default106 ], [ getelementptr inbounds ([44 x i8], [44 x i8]* @.str858213, i32 0, i32 0), %sw.bb105 ], [ getelementptr inbounds ([29 x i8], [29 x i8]* @.str848212, i32 0, i32 0), %sw.bb104 ], [ getelementptr inbounds ([31 x i8], [31 x i8]* @.str838211, i32 0, i32 0), %sw.bb103 ], [ getelementptr inbounds ([34 x i8], [34 x i8]* @.str828210, i32 0, i32 0), %sw.bb102 ], [ getelementptr inbounds ([20 x i8], [20 x i8]* @.str818209, i32 0, i32 0), %sw.bb101 ], [ getelementptr inbounds ([24 x i8], [24 x i8]* @.str808208, i32 0, i32 0), %sw.bb100 ], [ getelementptr inbounds ([25 x i8], [25 x i8]* @.str798207, i32 0, i32 0), %sw.bb99 ], [ getelementptr inbounds ([1 x i8], [1 x i8]* @.str2104, i32 0, i32 0), %sw.default91 ], [ getelementptr inbounds ([8 x i8], [8 x i8]* @.str768204, i32 0, i32 0), %sw.bb90 ], [ getelementptr inbounds ([21 x i8], [21 x i8]* @.str758203, i32 0, i32 0), %sw.bb89 ], [ getelementptr inbounds ([26 x i8], [26 x i8]* @.str748202, i32 0, i32 0), %sw.bb88 ], [ getelementptr inbounds ([21 x i8], [21 x i8]* @.str738201, i32 0, i32 0), %sw.bb87 ], [ getelementptr inbounds ([1 x i8], [1 x i8]* @.str2104, i32 0, i32 0), %sw.default83 ], [ getelementptr inbounds ([9 x i8], [9 x i8]* @.str718199, i32 0, i32 0), %sw.bb82 ], [ getelementptr inbounds ([1 x i8], [1 x i8]* @.str2104, i32 0, i32 0), %sw.default74 ], [ getelementptr inbounds ([1 x i8], [1 x i8]* @.str2104, i32 0, i32 0), %sw.default61 ], [ getelementptr inbounds ([5 x i8], [5 x i8]* @.str514367, i32 0, i32 0), %sw.bb60 ], [ getelementptr inbounds ([1 x i8], [1 x i8]* @.str2104, i32 0, i32 0), %sw.default54 ], [ getelementptr inbounds ([4 x i8], [4 x i8]* @.str2417235, i32 0, i32 0), %sw.bb53 ], [ getelementptr inbounds ([14 x i8], [14 x i8]* @.str2317234, i32 0, i32 0), %sw.bb52 ], [ getelementptr inbounds ([1 x i8], [1 x i8]* @.str2104, i32 0, i32 0), %lor.lhs.false ], [ getelementptr inbounds ([1 x i8], [1 x i8]* @.str2104, i32 0, i32 0), %entry ], [ %., %sw.bb39 ], [ getelementptr inbounds ([11 x i8], [11 x i8]* @.str2017231, i32 0, i32 0), %sw.bb44 ], [ getelementptr inbounds ([11 x i8], [11 x i8]* @.str2017231, i32 0, i32 0), %sw.bb44 ], [ getelementptr inbounds ([11 x i8], [11 x i8]* @.str2017231, i32 0, i32 0), %sw.bb44 ], [ getelementptr inbounds ([11 x i8], [11 x i8]* @.str2017231, i32 0, i32 0), %sw.bb44 ], [ getelementptr inbounds ([11 x i8], [11 x i8]* @.str2017231, i32 0, i32 0), %sw.bb44 ], [ getelementptr inbounds ([11 x i8], [11 x i8]* @.str2017231, i32 0, i32 0), %sw.bb44 ], [ getelementptr inbounds ([3 x i8], [3 x i8]* @.str20216493, i32 0, i32 0), %sw.bb42 ], [ %.35, %sw.bb56 ], [ getelementptr inbounds ([1 x i8], [1 x i8]* @.str2104, i32 0, i32 0), %sw.bb37 ], [ getelementptr inbounds ([1 x i8], [1 x i8]* @.str2104, i32 0, i32 0), %sw.bb35 ], [ getelementptr inbounds ([4 x i8], [4 x i8]* @.str3217243, i32 0, i32 0), %sw.bb68 ], [ getelementptr inbounds ([4 x i8], [4 x i8]* @.str3217243, i32 0, i32 0), %sw.bb68 ], [ getelementptr inbounds ([4 x i8], [4 x i8]* @.str3217243, i32 0, i32 0), %sw.bb68 ], [ getelementptr inbounds ([4 x i8], [4 x i8]* @.str3217243, i32 0, i32 0), %sw.bb68 ], [ getelementptr inbounds ([6 x i8], [6 x i8]* @.str398167, i32 0, i32 0), %sw.bb79 ], [ getelementptr inbounds ([5 x i8], [5 x i8]* @.str2717238, i32 0, i32 0), %sw.bb84 ], [ %.36, %sw.bb92 ], [ getelementptr inbounds ([22 x i8], [22 x i8]* @.str788206, i32 0, i32 0), %sw.bb96 ], [ getelementptr inbounds ([1 x i8], [1 x i8]* @.str2104, i32 0, i32 0), %sw.bb77 ], [ getelementptr inbounds ([1 x i8], [1 x i8]* @.str2104, i32 0, i32 0), %sw.bb75 ], [ getelementptr inbounds ([1 x i8], [1 x i8]* @.str2104, i32 0, i32 0), %sw.bb66 ], [ getelementptr inbounds ([1 x i8], [1 x i8]* @.str2104, i32 0, i32 0), %sw.bb64 ], [ getelementptr inbounds ([1 x i8], [1 x i8]* @.str2104, i32 0, i32 0), %sw.bb ], [ %.37, %sw.bb120 ], [ getelementptr inbounds ([1 x i8], [1 x i8]* @.str2104, i32 0, i32 0), %sw.bb114 ], [ %.38, %sw.bb135 ], [ getelementptr inbounds ([1 x i8], [1 x i8]* @.str2104, i32 0, i32 0), %sw.bb127 ], [ getelementptr inbounds ([1 x i8], [1 x i8]* @.str2104, i32 0, i32 0), %sw.bb112 ], [ getelementptr inbounds ([1 x i8], [1 x i8]* @.str2104, i32 0, i32 0), %if.end ] + ret i8* %retval.0 +} + +; Function Attrs: nounwind readonly +define hidden i8* @_ZN12MediaInfoLib27Mxf_Sequence_DataDefinitionEN6ZenLib7uint128E(%"class.ZenLib::uint128"* nocapture readonly %DataDefinition) #0 { +entry: + %lo = getelementptr inbounds %"class.ZenLib::uint128", %"class.ZenLib::uint128"* %DataDefinition, i32 0, i32 0 + %0 = load i64, i64* %lo, align 1 + %and = lshr i64 %0, 32 + %conv = trunc i64 %and to i32 + %and2 = lshr i64 %0, 24 + %conv5 = and i32 %conv, 255 + switch i32 %conv5, label %return [ + i32 1, label %sw.bb + i32 2, label %sw.bb9 + ] + +sw.bb: ; preds = %entry + %conv4 = trunc i64 %and2 to i32 + %conv6 = and i32 %conv4, 255 + switch i32 %conv6, label %sw.default [ + i32 1, label %return + i32 2, label %return + i32 3, label %return + i32 16, label %sw.bb8 + ] + +sw.bb8: ; preds = %sw.bb + br label %return + +sw.default: ; preds = %sw.bb + br label %return + +sw.bb9: ; preds = %entry + %1 = trunc i64 %and2 to i32 + %conv10 = and i32 %1, 255 + switch i32 %conv10, label %sw.default14 [ + i32 1, label %return + i32 2, label %sw.bb12 + i32 3, label %sw.bb13 + ] + +sw.bb12: ; preds = %sw.bb9 + br label %return + +sw.bb13: ; preds = %sw.bb9 + br label %return + +sw.default14: ; preds = %sw.bb9 + br label %return + +return: ; preds = %sw.default14, %sw.bb13, %sw.bb12, %sw.bb9, %sw.default, %sw.bb8, %sw.bb, %sw.bb, %sw.bb, %entry + %retval.0 = phi i8* [ getelementptr inbounds ([1 x i8], [1 x i8]* @.str2104, i32 0, i32 0), %sw.default14 ], [ getelementptr inbounds ([5 x i8], [5 x i8]* @.str1717689, i32 0, i32 0), %sw.bb13 ], [ getelementptr inbounds ([6 x i8], [6 x i8]* @.str928220, i32 0, i32 0), %sw.bb12 ], [ getelementptr inbounds ([1 x i8], [1 x i8]* @.str2104, i32 0, i32 0), %sw.default ], [ getelementptr inbounds ([21 x i8], [21 x i8]* @.str908218, i32 0, i32 0), %sw.bb8 ], [ getelementptr inbounds ([5 x i8], [5 x i8]* @.str9210864, i32 0, i32 0), %sw.bb ], [ getelementptr inbounds ([5 x i8], [5 x i8]* @.str9210864, i32 0, i32 0), %sw.bb ], [ getelementptr inbounds ([5 x i8], [5 x i8]* @.str9210864, i32 0, i32 0), %sw.bb ], [ getelementptr inbounds ([8 x i8], [8 x i8]* @.str918219, i32 0, i32 0), %sw.bb9 ], [ getelementptr inbounds ([1 x i8], [1 x i8]* @.str2104, i32 0, i32 0), %entry ] + ret i8* %retval.0 +} + + +define i32 @main() { +entry: + %retval = alloca i32, align 4 + store i32 0, i32* %retval + %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([15 x i8], [15 x i8]* @.str, i32 0, i32 0)) + ret i32 0 +} + +declare i32 @printf(i8*, ...) + +attributes #0 = { nounwind readonly } + diff --git a/test/CodeGen/JS/dead-prototypes.ll b/test/CodeGen/JS/dead-prototypes.ll new file mode 100644 index 000000000000..6d57b5a791b2 --- /dev/null +++ b/test/CodeGen/JS/dead-prototypes.ll @@ -0,0 +1,12 @@ +; RUN: llc < %s | not grep printf + +; llc shouldn't emit any code or bookkeeping for unused declarations. + +target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:128-n32-S128" +target triple = "asmjs-unknown-emscripten" + +define void @foo() { + ret void +} + +declare i32 @printf(i8* nocapture, ...) diff --git a/test/CodeGen/JS/expand-i64.ll b/test/CodeGen/JS/expand-i64.ll new file mode 100644 index 000000000000..30971c5ba3dd --- /dev/null +++ b/test/CodeGen/JS/expand-i64.ll @@ -0,0 +1,271 @@ +; RUN: llc < %s | FileCheck %s + +target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:128-n32-S128" +target triple = "asmjs-unknown-emscripten" + +; CHECK: function _add($0,$1,$2,$3) { +; CHECK: $4 = (_i64Add(($0|0),($1|0),($2|0),($3|0))|0); +; CHECK: $5 = tempRet0; +; CHECK: } +define i64 @add(i64 %a, i64 %b) { + %c = add i64 %a, %b + ret i64 %c +} + +; CHECK: function _sub($0,$1,$2,$3) { +; CHECK: $4 = (_i64Subtract(($0|0),($1|0),($2|0),($3|0))|0); +; CHECK: $5 = tempRet0; +; CHECK: } +define i64 @sub(i64 %a, i64 %b) { + %c = sub i64 %a, %b + ret i64 %c +} + +; CHECK: function _mul($0,$1,$2,$3) { +; CHECK: $4 = (___muldi3(($0|0),($1|0),($2|0),($3|0))|0); +; CHECK: $5 = tempRet0; +; CHECK: } +define i64 @mul(i64 %a, i64 %b) { + %c = mul i64 %a, %b + ret i64 %c +} + +; CHECK: function _sdiv($0,$1,$2,$3) { +; CHECK: $4 = (___divdi3(($0|0),($1|0),($2|0),($3|0))|0); +; CHECK: $5 = tempRet0; +; CHECK: } +define i64 @sdiv(i64 %a, i64 %b) { + %c = sdiv i64 %a, %b + ret i64 %c +} + +; CHECK: function _udiv($0,$1,$2,$3) { +; CHECK: $4 = (___udivdi3(($0|0),($1|0),($2|0),($3|0))|0); +; CHECK: $5 = tempRet0; +; CHECK: } +define i64 @udiv(i64 %a, i64 %b) { + %c = udiv i64 %a, %b + ret i64 %c +} + +; CHECK: function _srem($0,$1,$2,$3) { +; CHECK: $4 = (___remdi3(($0|0),($1|0),($2|0),($3|0))|0); +; CHECK: $5 = tempRet0; +; CHECK: } +define i64 @srem(i64 %a, i64 %b) { + %c = srem i64 %a, %b + ret i64 %c +} + +; CHECK: function _urem($0,$1,$2,$3) { +; CHECK: $4 = (___uremdi3(($0|0),($1|0),($2|0),($3|0))|0); +; CHECK: $5 = tempRet0; +; CHECK: } +define i64 @urem(i64 %a, i64 %b) { + %c = urem i64 %a, %b + ret i64 %c +} + +; CHECK: function _and($0,$1,$2,$3) { +; CHECK: $4 = $0 & $2; +; CHECK: $5 = $1 & $3; +; CHECK: } +define i64 @and(i64 %a, i64 %b) { + %c = and i64 %a, %b + ret i64 %c +} + +; CHECK: function _or($0,$1,$2,$3) { +; CHECK: $4 = $0 | $2; +; CHECK: $5 = $1 | $3; +; CHECK: } +define i64 @or(i64 %a, i64 %b) { + %c = or i64 %a, %b + ret i64 %c +} + +; CHECK: function _xor($0,$1,$2,$3) { +; CHECK: $4 = $0 ^ $2; +; CHECK: $5 = $1 ^ $3; +; CHECK: } +define i64 @xor(i64 %a, i64 %b) { + %c = xor i64 %a, %b + ret i64 %c +} + +; CHECK: function _lshr($0,$1,$2,$3) { +; CHECK: $4 = (_bitshift64Lshr(($0|0),($1|0),($2|0))|0); +; CHECK: $5 = tempRet0; +; CHECK: } +define i64 @lshr(i64 %a, i64 %b) { + %c = lshr i64 %a, %b + ret i64 %c +} + +; CHECK: function _ashr($0,$1,$2,$3) { +; CHECK: $4 = (_bitshift64Ashr(($0|0),($1|0),($2|0))|0); +; CHECK: $5 = tempRet0; +; CHECK: } +define i64 @ashr(i64 %a, i64 %b) { + %c = ashr i64 %a, %b + ret i64 %c +} + +; CHECK: function _shl($0,$1,$2,$3) { +; CHECK: $4 = (_bitshift64Shl(($0|0),($1|0),($2|0))|0); +; CHECK: $5 = tempRet0; +; CHECK: } +define i64 @shl(i64 %a, i64 %b) { + %c = shl i64 %a, %b + ret i64 %c +} + +; CHECK: function _icmp_eq($0,$1,$2,$3) { +; CHECK: $4 = ($0|0)==($2|0); +; CHECK: $5 = ($1|0)==($3|0); +; CHECK: $6 = $4 & $5; +; CHECK: } +define i32 @icmp_eq(i64 %a, i64 %b) { + %c = icmp eq i64 %a, %b + %d = zext i1 %c to i32 + ret i32 %d +} + +; CHECK: function _icmp_ne($0,$1,$2,$3) { +; CHECK: $4 = ($0|0)!=($2|0); +; CHECK: $5 = ($1|0)!=($3|0); +; CHECK: $6 = $4 | $5; +; CHECK: } +define i32 @icmp_ne(i64 %a, i64 %b) { + %c = icmp ne i64 %a, %b + %d = zext i1 %c to i32 + ret i32 %d +} + +; CHECK: function _icmp_slt($0,$1,$2,$3) { +; CHECK: $4 = ($1|0)<($3|0); +; CHECK: $5 = ($0>>>0)<($2>>>0); +; CHECK: $6 = ($1|0)==($3|0); +; CHECK: $7 = $6 & $5; +; CHECK: $8 = $4 | $7; +; CHECK: } +define i32 @icmp_slt(i64 %a, i64 %b) { + %c = icmp slt i64 %a, %b + %d = zext i1 %c to i32 + ret i32 %d +} + +; CHECK: function _icmp_ult($0,$1,$2,$3) { +; CHECK: $4 = ($1>>>0)<($3>>>0); +; CHECK: $5 = ($0>>>0)<($2>>>0); +; CHECK: $6 = ($1|0)==($3|0); +; CHECK: $7 = $6 & $5; +; CHECK: $8 = $4 | $7; +; CHECK: } +define i32 @icmp_ult(i64 %a, i64 %b) { + %c = icmp ult i64 %a, %b + %d = zext i1 %c to i32 + ret i32 %d +} + +; CHECK: function _load($a) { +; CHECK: $0 = $a; +; CHECK: $1 = $0; +; CHECK: $2 = HEAP32[$1>>2]|0; +; CHECK: $3 = (($0) + 4)|0; +; CHECK: $4 = $3; +; CHECK: $5 = HEAP32[$4>>2]|0; +; CHECK: } +define i64 @load(i64 *%a) { + %c = load i64, i64* %a + ret i64 %c +} + +; CHECK: function _aligned_load($a) { +; CHECK: $0 = $a; +; CHECK: $1 = $0; +; CHECK: $2 = HEAP32[$1>>2]|0; +; CHECK: $3 = (($0) + 4)|0; +; CHECK: $4 = $3; +; CHECK: $5 = HEAP32[$4>>2]|0; +; CHECK: } +define i64 @aligned_load(i64 *%a) { + %c = load i64, i64* %a, align 16 + ret i64 %c +} + +; CHECK: function _store($a,$0,$1) { +; CHECK: $2 = $a; +; CHECK: $3 = $2; +; CHECK: HEAP32[$3>>2] = $0; +; CHECK: $4 = (($2) + 4)|0; +; CHECK: $5 = $4; +; CHECK: HEAP32[$5>>2] = $1; +; CHECK: } +define void @store(i64 *%a, i64 %b) { + store i64 %b, i64* %a + ret void +} + +; CHECK: function _aligned_store($a,$0,$1) { +; CHECK: $2 = $a; +; CHECK: $3 = $2; +; CHECK: HEAP32[$3>>2] = $0; +; CHECK: $4 = (($2) + 4)|0; +; CHECK: $5 = $4; +; CHECK: HEAP32[$5>>2] = $1; +; CHECK: } +define void @aligned_store(i64 *%a, i64 %b) { + store i64 %b, i64* %a, align 16 + ret void +} + +; CHECK: function _call($0,$1) { +; CHECK: $2 = (_foo(($0|0),($1|0))|0); +; CHECK: } +declare i64 @foo(i64 %arg) +define i64 @call(i64 %arg) { + %ret = call i64 @foo(i64 %arg) + ret i64 %ret +} + +; CHECK: function _trunc($0,$1) { +; CHECK: return ($0|0); +; CHECK: } +define i32 @trunc(i64 %x) { + %y = trunc i64 %x to i32 + ret i32 %y +} + +; CHECK: function _zext($x) { +; CHECK: tempRet0 = (0); +; CHECL: return ($x|0); +; CHECK: } +define i64 @zext(i32 %x) { + %y = zext i32 %x to i64 + ret i64 %y +} + +; CHECK: function _sext($x) { +; CHECK: $0 = ($x|0)<(0); +; CHECK: $1 = $0 << 31 >> 31; +; CHECK: tempRet0 = ($1); +; CHECK: return ($x|0); +; CHECK: } +define i64 @sext(i32 %x) { + %y = sext i32 %x to i64 + ret i64 %y +} + +; CHECK: function _unreachable_blocks($p) { +; CHECK: } +define void @unreachable_blocks(i64* %p) { + ret void + +dead: + %t = load i64, i64* %p + %s = add i64 %t, 1 + store i64 %s, i64* %p + ret void +} + diff --git a/test/CodeGen/JS/expand-insertextract.ll b/test/CodeGen/JS/expand-insertextract.ll new file mode 100644 index 000000000000..984da571bde1 --- /dev/null +++ b/test/CodeGen/JS/expand-insertextract.ll @@ -0,0 +1,31 @@ +; RUN: llc < %s | FileCheck %s + +target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:128-n32-S128" +target triple = "asmjs-unknown-emscripten" + +; CHECK: sp = STACKTOP; +; CHECK: STACKTOP = STACKTOP + 16|0; +; CHECK: $0 = sp; +; CHECK: SIMD_Float32x4_store(HEAPU8, temp_Float32x4_ptr, $p); +; CHECK: $1 = (($0) + ($i<<2)|0); +; CHECK: $2 = +HEAPF32[$1>>2]; +; CHECK: STACKTOP = sp;return (+$2); +; CHECK: } +define float @ext(<4 x float> %p, i32 %i) { + %f = extractelement <4 x float> %p, i32 %i + ret float %f +} + +; CHECK: sp = STACKTOP; +; CHECK: STACKTOP = STACKTOP + 16|0; +; CHECK: $0 = sp; +; CHECK: SIMD_Float32x4_store(HEAPU8, temp_Float32x4_ptr, $p); +; CHECK: $1 = (($0) + ($i<<2)|0); +; CHECK: HEAPF32[$1>>2] = $f; +; CHECK: $2 = SIMD_Float32x4_load(HEAPU8, $0); +; CHECK: STACKTOP = sp;return (SIMD_Float32x4_check($2)); +; CHECK: } +define <4 x float> @ins(<4 x float> %p, float %f, i32 %i) { + %v = insertelement <4 x float> %p, float %f, i32 %i + ret <4 x float> %v +} diff --git a/test/CodeGen/JS/expect-intrinsics.ll b/test/CodeGen/JS/expect-intrinsics.ll new file mode 100644 index 000000000000..6d2cba459260 --- /dev/null +++ b/test/CodeGen/JS/expect-intrinsics.ll @@ -0,0 +1,30 @@ +; RUN: llc < %s | FileCheck %s + +; Handle the llvm.expect intrinsic. + +target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:128-n32-S128" +target triple = "asmjs-unknown-emscripten" + +; CHECK: $expval = $x; +; CHECK: $tobool = ($expval|0)!=(0); + +define void @foo(i32 %x) { +entry: + %expval = call i32 @llvm.expect.i32(i32 %x, i32 0) + %tobool = icmp ne i32 %expval, 0 + br i1 %tobool, label %if.then, label %if.end + +if.then: + call void @callee() + br label %if.end + +if.end: + ret void +} + +; Function Attrs: nounwind readnone +declare i32 @llvm.expect.i32(i32, i32) #0 + +declare void @callee() + +attributes #0 = { nounwind readnone } diff --git a/test/CodeGen/JS/ffis-f32.ll b/test/CodeGen/JS/ffis-f32.ll new file mode 100644 index 000000000000..63b8e9a5ffd1 --- /dev/null +++ b/test/CodeGen/JS/ffis-f32.ll @@ -0,0 +1,81 @@ +; RUN: llc -emscripten-precise-f32 < %s | FileCheck %s + +; Use proper types to ffi calls, with float32 + +target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:128-n32-S128" +target triple = "asmjs-unknown-emscripten" + +; CHECK: (+Math_sqrt(+1)); +; CHECK-NEXT: (Math_fround(Math_sqrt(Math_fround(+1)))); +; CHECK-NEXT: (+Math_sqrt((+$d))); +; CHECK-NEXT: (Math_fround(Math_sqrt((Math_fround($f))))); +; CHECK-NEXT: (+Math_ceil(+1)); +; CHECK-NEXT: (Math_fround(Math_ceil(Math_fround(+1)))); +; CHECK-NEXT: (+Math_floor(+1)); +; CHECK-NEXT: (Math_fround(Math_floor(Math_fround(+1)))); +; CHECK-NEXT: (+_min(+1,+1)); +; CHECK-NEXT: (Math_fround(+(_fmin(+1,+1)))); +; CHECK-NEXT: (+_max(+1,+1)); +; CHECK-NEXT: (Math_fround(+(_fmax(+1,+1)))); +; CHECK-NEXT: (+Math_abs(+1)); +; CHECK-NEXT: (Math_fround(+(_absf(+1)))); +; CHECK-NEXT: (+Math_sin(+1)); +; CHECK-NEXT: (Math_fround(+(Math_sin(+1)))); +define void @foo(i32 %x) { +entry: + %f = fadd float 1.0, 2.0 + %d = fadd double 1.0, 2.0 + + %sqrtd = call double @sqrt(double 1.0) + %sqrtf = call float @sqrtf(float 1.0) + %sqrtdv = call double @sqrt(double %d) ; check vars too + %sqrtfv = call float @sqrtf(float %f) + + %ceild = call double @ceil(double 1.0) + %ceilf = call float @ceilf(float 1.0) + + %floord = call double @floor(double 1.0) + %floorf = call float @floorf(float 1.0) + + ; these could be optimized in theory + + %mind = call double @min(double 1.0, double 1.0) + %minf = call float @fmin(float 1.0, float 1.0) + + %maxd = call double @max(double 1.0, double 1.0) + %maxf = call float @fmax(float 1.0, float 1.0) + + %absd = call double @abs(double 1.0) + %absf = call float @absf(float 1.0) + + ; sin is NOT optimizable with floats + + %sind = call double @sin(double 1.0) + %sinf = call float @sinf(float 1.0) + + ret void +} + +declare double @sqrt(double %x) +declare float @sqrtf(float %x) + +declare double @ceil(double %x) +declare float @ceilf(float %x) + +declare double @floor(double %x) +declare float @floorf(float %x) + +declare double @min(double %x, double %y) +declare float @fmin(float %x, float %y) + +declare double @max(double %x, double %y) +declare float @fmax(float %x, float %y) + +declare double @abs(double %x) +declare float @absf(float %x) + +declare double @sin(double %x) +declare float @sinf(float %x) + +attributes #0 = { nounwind readnone } + diff --git a/test/CodeGen/JS/ffis.ll b/test/CodeGen/JS/ffis.ll new file mode 100644 index 000000000000..d5b8e334e7a8 --- /dev/null +++ b/test/CodeGen/JS/ffis.ll @@ -0,0 +1,81 @@ +; RUN: llc < %s | FileCheck %s + +; Use proper types to ffi calls, no float32 + +target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:128-n32-S128" +target triple = "asmjs-unknown-emscripten" + +; CHECK: (+Math_sqrt(+1)); +; CHECK-NEXT: (+Math_sqrt(+1)); +; CHECK-NEXT: (+Math_sqrt((+$d))); +; CHECK-NEXT: (+Math_sqrt((+$f))); +; CHECK-NEXT: (+Math_ceil(+1)); +; CHECK-NEXT: (+Math_ceil(+1)); +; CHECK-NEXT: (+Math_floor(+1)); +; CHECK-NEXT: (+Math_floor(+1)); +; CHECK-NEXT: (+_min(+1,+1)); +; CHECK-NEXT: (+_fmin(+1,+1)); +; CHECK-NEXT: (+_max(+1,+1)); +; CHECK-NEXT: (+_fmax(+1,+1)); +; CHECK-NEXT: (+Math_abs(+1)); +; CHECK-NEXT: (+_absf(+1)); +; CHECK-NEXT: (+Math_sin(+1)); +; CHECK-NEXT: (+Math_sin(+1)); +define void @foo(i32 %x) { +entry: + %f = fadd float 1.0, 2.0 + %d = fadd double 1.0, 2.0 + + %sqrtd = call double @sqrt(double 1.0) + %sqrtf = call float @sqrtf(float 1.0) + %sqrtdv = call double @sqrt(double %d) ; check vars too + %sqrtfv = call float @sqrtf(float %f) + + %ceild = call double @ceil(double 1.0) + %ceilf = call float @ceilf(float 1.0) + + %floord = call double @floor(double 1.0) + %floorf = call float @floorf(float 1.0) + + ; these could be optimized in theory + + %mind = call double @min(double 1.0, double 1.0) + %minf = call float @fmin(float 1.0, float 1.0) + + %maxd = call double @max(double 1.0, double 1.0) + %maxf = call float @fmax(float 1.0, float 1.0) + + %absd = call double @abs(double 1.0) + %absf = call float @absf(float 1.0) + + ; sin is NOT optimizable with floats + + %sind = call double @sin(double 1.0) + %sinf = call float @sinf(float 1.0) + + ret void +} + +declare double @sqrt(double %x) +declare float @sqrtf(float %x) + +declare double @ceil(double %x) +declare float @ceilf(float %x) + +declare double @floor(double %x) +declare float @floorf(float %x) + +declare double @min(double %x, double %y) +declare float @fmin(float %x, float %y) + +declare double @max(double %x, double %y) +declare float @fmax(float %x, float %y) + +declare double @abs(double %x) +declare float @absf(float %x) + +declare double @sin(double %x) +declare float @sinf(float %x) + +attributes #0 = { nounwind readnone } + diff --git a/test/CodeGen/JS/getelementptr.ll b/test/CodeGen/JS/getelementptr.ll new file mode 100644 index 000000000000..1b1e15cf3f83 --- /dev/null +++ b/test/CodeGen/JS/getelementptr.ll @@ -0,0 +1,47 @@ +; RUN: llc < %s | FileCheck %s + +; Test simple getelementptr codegen. + +target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:128-n32-S128" +target triple = "asmjs-unknown-emscripten" + +; Test that trailing indices are folded. + +; CHECK: function _getelementptr([[VAL_P:\$[a-z_]+]]) { +; CHECK: [[GEP:\$[a-z_]+]] = ((([[GEPINT:\$[a-z_]+]])) + 588|0); +define i32* @getelementptr([10 x [12 x i32] ]* %p) { + %t = getelementptr [10 x [12 x i32]], [10 x [12 x i32]]* %p, i32 1, i32 2, i32 3 + ret i32* %t +} + +%struct.A = type { i32, [34 x i16] } + +@global = global [72 x i8] zeroinitializer, align 4 + +; Fold globals into getelementptr addressing. + +; CHECK: function _fold_global($i) { +; CHECK: $add = (($i) + 34)|0; +; CHECK: $arrayidx = (12 + ($add<<1)|0); +; CHECK: $t0 = HEAP16[$arrayidx>>1]|0; +define i16 @fold_global(i32 %i) { + %add = add i32 %i, 34 + %arrayidx = getelementptr %struct.A, %struct.A* bitcast ([72 x i8]* @global to %struct.A*), i32 0, i32 1, i32 %add + %t0 = load volatile i16, i16* %arrayidx, align 2 + ret i16 %t0 +} + +; Don't reassociate the indices of a getelementptr, which would increase +; the chances of creating out-of-bounds intermediate values. + +; CHECK: function _no_reassociate($p,$i) { +; CHECK: $add = (($i) + 34)|0; +; CHECK: $arrayidx = (((($p)) + 4|0) + ($add<<1)|0); +; CHECK: $t0 = HEAP16[$arrayidx>>1]|0; +define i16 @no_reassociate(%struct.A* %p, i32 %i) { + %add = add i32 %i, 34 + %arrayidx = getelementptr %struct.A, %struct.A* %p, i32 0, i32 1, i32 %add + %t0 = load volatile i16, i16* %arrayidx, align 2 + ret i16 %t0 +} + diff --git a/test/CodeGen/JS/global-alias.ll b/test/CodeGen/JS/global-alias.ll new file mode 100644 index 000000000000..3049216196f4 --- /dev/null +++ b/test/CodeGen/JS/global-alias.ll @@ -0,0 +1,59 @@ +; RUN: llc < %s | FileCheck %s + +; Handle global aliases of various kinds. + +target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:128-n32-S128" +target triple = "asmjs-unknown-emscripten" + +@pri = internal global [60 x i8] zeroinitializer +@pub = global [60 x i8] zeroinitializer + +@pri_int = internal alias [60 x i8], [60 x i8]* @pri +@pri_wea = weak alias [60 x i8], [60 x i8]* @pri +@pri_nor = alias [60 x i8], [60 x i8]* @pri + +@pub_int = internal alias [60 x i8], [60 x i8]* @pub +@pub_wea = weak alias [60 x i8], [60 x i8]* @pub +@pub_nor = alias [60 x i8], [60 x i8]* @pub + +; CHECK: test0( +; CHECK: return ([[PRI:[0-9]+]]|0); +define [60 x i8]* @test0() { + ret [60 x i8]* @pri +} +; CHECK: test1( +; CHECK: return ([[PRI]]|0); +define [60 x i8]* @test1() { + ret [60 x i8]* @pri_int +} +; CHECK: test2( +; CHECK: return ([[PRI]]|0); +define [60 x i8]* @test2() { + ret [60 x i8]* @pri_wea +} +; CHECK: test3( +; CHECK: return ([[PRI]]|0); +define [60 x i8]* @test3() { + ret [60 x i8]* @pri_nor +} + +; CHECK: test4( +; CHECK: return ([[PUB:[0-9]+]]|0); +define [60 x i8]* @test4() { + ret [60 x i8]* @pub +} +; CHECK: test5( +; CHECK: return ([[PUB]]|0); +define [60 x i8]* @test5() { + ret [60 x i8]* @pub_int +} +; CHECK: test6( +; CHECK: return ([[PUB]]|0); +define [60 x i8]* @test6() { + ret [60 x i8]* @pub_wea +} +; CHECK: test7( +; CHECK: return ([[PUB]]|0); +define [60 x i8]* @test7() { + ret [60 x i8]* @pub_nor +} diff --git a/test/CodeGen/JS/globals.ll b/test/CodeGen/JS/globals.ll new file mode 100644 index 000000000000..42e57457722f --- /dev/null +++ b/test/CodeGen/JS/globals.ll @@ -0,0 +1,45 @@ +; RUN: llc < %s | FileCheck %s + +; Test simple global variable codegen. + +target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:128-n32-S128" +target triple = "asmjs-unknown-emscripten" + +; CHECK: function _loads() { +; CHECK: [[VAR_t:\$[a-z]+]] = HEAP32[4]|0; +; CHECK: [[VAR_s:\$[a-z]+]] = +HEAPF64[1]; +; CHECK: [[VAR_u:\$[a-z]+]] = HEAP8[20]|0; +; CHECK: [[VAR_a:\$[a-z]+]] = (~~(([[VAR_s:\$[a-z]+]]))>>>0); +; CHECK: [[VAR_b:\$[a-z]+]] = [[VAR_u:\$[a-z]+]] << 24 >> 24; +; CHECK: [[VAR_c:\$[a-z]+]] = (([[VAR_t:\$[a-z]+]]) + ([[VAR_a:\$[a-z]+]]))|0; +; CHECK: [[VAR_d:\$[a-z]+]] = (([[VAR_c:\$[a-z]+]]) + ([[VAR_b:\$[a-z]+]]))|0; +; CHECK: return ([[VAR_d:\$[a-z]+]]|0); +define i32 @loads() { + %t = load i32, i32* @A + %s = load double, double* @B + %u = load i8, i8* @C + %a = fptoui double %s to i32 + %b = sext i8 %u to i32 + %c = add i32 %t, %a + %d = add i32 %c, %b + ret i32 %d +} + +; CHECK: function _stores([[VAR_m:\$[a-z]+]],[[VAR_n:\$[a-z]+]],[[VAR_o:\$[a-z]+]]) { +; CHECK: [[VAR_m:\$[a-z]+]] = [[VAR_m:\$[a-z]+]]|0; +; CHECK: [[VAR_n:\$[a-z]+]] = [[VAR_n:\$[a-z]+]]|0; +; CHECK: [[VAR_o:\$[a-z]+]] = +[[VAR_o:\$[a-z]+]]; +; CHECK: HEAP32[4] = [[VAR_n:\$[a-z]+]]; +; CHECK: HEAPF64[1] = [[VAR_o:\$[a-z]+]]; +; CHECK: HEAP8[20] = [[VAR_m:\$[a-z]+]]; +define void @stores(i8 %m, i32 %n, double %o) { + store i32 %n, i32* @A + store double %o, double* @B + store i8 %m, i8* @C + ret void +} + +; CHECK: allocate([205,204,204,204,204,76,55,64,133,26,0,0,2], "i8", ALLOC_NONE, Runtime.GLOBAL_BASE); +@A = global i32 6789 +@B = global double 23.3 +@C = global i8 2 diff --git a/test/CodeGen/JS/insertelement-chains.ll b/test/CodeGen/JS/insertelement-chains.ll new file mode 100644 index 000000000000..e28bfb49399d --- /dev/null +++ b/test/CodeGen/JS/insertelement-chains.ll @@ -0,0 +1,99 @@ +; RUN: llc -emscripten-precise-f32 < %s | FileCheck %s + +target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:128-n32-S128" +target triple = "asmjs-unknown-emscripten" + +; Basic constructor. + +; CHECK: function _test0($x,$y,$z,$w) { +; CHECK: $d = SIMD_Float32x4($x, $y, $z, $w) +; CHECK: } +define <4 x float> @test0(float %x, float %y, float %z, float %w) { + %a = insertelement <4 x float> undef, float %x, i32 0 + %b = insertelement <4 x float> %a, float %y, i32 1 + %c = insertelement <4 x float> %b, float %z, i32 2 + %d = insertelement <4 x float> %c, float %w, i32 3 + ret <4 x float> %d +} + +; Same as test0 but elements inserted in a different order. + +; CHECK: function _test1($x,$y,$z,$w) { +; CHECK: $d = SIMD_Float32x4($x, $y, $z, $w) +; CHECK: } +define <4 x float> @test1(float %x, float %y, float %z, float %w) { + %a = insertelement <4 x float> undef, float %w, i32 3 + %b = insertelement <4 x float> %a, float %y, i32 1 + %c = insertelement <4 x float> %b, float %z, i32 2 + %d = insertelement <4 x float> %c, float %x, i32 0 + ret <4 x float> %d +} + +; Overwriting elements. + +; CHECK: function _test2($x,$y,$z,$w) { +; CHECK: $h = SIMD_Float32x4($x, $y, $z, $w) +; CHECK: } +define <4 x float> @test2(float %x, float %y, float %z, float %w) { + %a = insertelement <4 x float> undef, float %z, i32 0 + %b = insertelement <4 x float> %a, float %x, i32 0 + %c = insertelement <4 x float> %b, float %w, i32 1 + %d = insertelement <4 x float> %c, float %y, i32 1 + %e = insertelement <4 x float> %d, float %x, i32 2 + %f = insertelement <4 x float> %e, float %z, i32 2 + %g = insertelement <4 x float> %f, float %y, i32 3 + %h = insertelement <4 x float> %g, float %w, i32 3 + ret <4 x float> %h +} + +; Basic splat testcase. + +; CHECK: function _test3($x) { +; CHECK: $d = SIMD_Float32x4_splat($x) +; CHECK: } +define <4 x float> @test3(float %x) { + %a = insertelement <4 x float> undef, float %x, i32 0 + %b = insertelement <4 x float> %a, float %x, i32 1 + %c = insertelement <4 x float> %b, float %x, i32 2 + %d = insertelement <4 x float> %c, float %x, i32 3 + ret <4 x float> %d +} + +; Same as test3 but elements inserted in a different order. + +; CHECK: function _test4($x) { +; CHECK: $d = SIMD_Float32x4_splat($x) +; CHECK: } +define <4 x float> @test4(float %x) { + %a = insertelement <4 x float> undef, float %x, i32 3 + %b = insertelement <4 x float> %a, float %x, i32 1 + %c = insertelement <4 x float> %b, float %x, i32 2 + %d = insertelement <4 x float> %c, float %x, i32 0 + ret <4 x float> %d +} + +; Insert chain. + +; CHECK: function _test5($x,$y,$z,$w) { +; CHECK: $f = SIMD_Float32x4_replaceLane(SIMD_Float32x4_replaceLane(SIMD_Float32x4_replaceLane(SIMD_Float32x4_splat(Math_fround(0)),0,$x),1,$y),2,$z) +; CHECK: } +define <4 x float> @test5(float %x, float %y, float %z, float %w) { + %a = insertelement <4 x float> undef, float %z, i32 0 + %b = insertelement <4 x float> %a, float %x, i32 0 + %c = insertelement <4 x float> %b, float %w, i32 1 + %d = insertelement <4 x float> %c, float %y, i32 1 + %e = insertelement <4 x float> %d, float %x, i32 2 + %f = insertelement <4 x float> %e, float %z, i32 2 + ret <4 x float> %f +} + +; Splat via insert+shuffle. + +; CHECK: function _test6($x) { +; CHECK: $b = SIMD_Float32x4_splat($x) +; CHECK: } +define <4 x float> @test6(float %x) { + %a = insertelement <4 x float> undef, float %x, i32 0 + %b = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> zeroinitializer + ret <4 x float> %b +} diff --git a/test/CodeGen/JS/invariant-intrinsics.ll b/test/CodeGen/JS/invariant-intrinsics.ll new file mode 100644 index 000000000000..b64355f83b60 --- /dev/null +++ b/test/CodeGen/JS/invariant-intrinsics.ll @@ -0,0 +1,20 @@ +; RUN: llc < %s | not grep invariant + +; llc currently emits no code or bookkeeping for invariant intrinsic calls +; or declarations. + +target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:128-n32-S128" +target triple = "asmjs-unknown-emscripten" + +declare void @bar(i8*) + +define void @foo() { + %p = alloca i8 + %i = call {}* @llvm.invariant.start(i64 1, i8* %p) + call void @bar(i8* %p) + call void @llvm.invariant.end({}* %i, i64 1, i8* %p) + ret void +} + +declare {}* @llvm.invariant.start(i64, i8* nocapture) +declare void @llvm.invariant.end({}*, i64, i8* nocapture) diff --git a/test/CodeGen/JS/lifetime-intrinsics.ll b/test/CodeGen/JS/lifetime-intrinsics.ll new file mode 100644 index 000000000000..46f613bfa3d9 --- /dev/null +++ b/test/CodeGen/JS/lifetime-intrinsics.ll @@ -0,0 +1,20 @@ +; RUN: llc < %s | not grep lifetime + +; llc currently emits no code or bookkeeping for lifetime intrinsic calls +; or declarations. + +target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:128-n32-S128" +target triple = "asmjs-unknown-emscripten" + +declare void @bar(i8*) + +define void @foo() { + %p = alloca i8 + call void @llvm.lifetime.start(i64 1, i8* %p) + call void @bar(i8* %p) + call void @llvm.lifetime.end(i64 1, i8* %p) + ret void +} + +declare void @llvm.lifetime.start(i64, i8* nocapture) +declare void @llvm.lifetime.end(i64, i8* nocapture) diff --git a/test/CodeGen/JS/lit.local.cfg b/test/CodeGen/JS/lit.local.cfg new file mode 100644 index 000000000000..ee9b61f930fe --- /dev/null +++ b/test/CodeGen/JS/lit.local.cfg @@ -0,0 +1,6 @@ +config.suffixes = ['.ll', '.c', '.cpp'] + +targets = set(config.root.targets_to_build.split()) +if not 'JSBackend' in targets: + config.unsupported = True + diff --git a/test/CodeGen/JS/mem-intrinsics.ll b/test/CodeGen/JS/mem-intrinsics.ll new file mode 100644 index 000000000000..f0e21fc78d45 --- /dev/null +++ b/test/CodeGen/JS/mem-intrinsics.ll @@ -0,0 +1,56 @@ +; RUN: llc < %s | FileCheck %s + +; llc should emit small aligned memcpy and memset inline. + +target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:128-n32-S128" +target triple = "asmjs-unknown-emscripten" + +; CHECK: test_unrolled_memcpy +; CHECK: HEAP32[$d>>2]=HEAP32[$s>>2]|0;HEAP32[$d+4>>2]=HEAP32[$s+4>>2]|0;HEAP32[$d+8>>2]=HEAP32[$s+8>>2]|0;HEAP32[$d+12>>2]=HEAP32[$s+12>>2]|0;HEAP32[$d+16>>2]=HEAP32[$s+16>>2]|0;HEAP32[$d+20>>2]=HEAP32[$s+20>>2]|0;HEAP32[$d+24>>2]=HEAP32[$s+24>>2]|0;HEAP32[$d+28>>2]=HEAP32[$s+28>>2]|0; +define void @test_unrolled_memcpy(i8* %d, i8* %s) { + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %d, i8* %s, i32 32, i32 4, i1 false) + ret void +} + +; CHECK: test_loop_memcpy +; CHECK: dest=$d; src=$s; stop=dest+64|0; do { HEAP32[dest>>2]=HEAP32[src>>2]|0; dest=dest+4|0; src=src+4|0; } while ((dest|0) < (stop|0)) +define void @test_loop_memcpy(i8* %d, i8* %s) { + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %d, i8* %s, i32 64, i32 4, i1 false) + ret void +} + +; CHECK: test_call_memcpy +; CHECK: memcpy(($d|0),($s|0),65536) +define void @test_call_memcpy(i8* %d, i8* %s) { + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %d, i8* %s, i32 65536, i32 4, i1 false) + ret void +} + +; CHECK: test_unrolled_memset +; CHECK: HEAP32[$d>>2]=0|0;HEAP32[$d+4>>2]=0|0;HEAP32[$d+8>>2]=0|0;HEAP32[$d+12>>2]=0|0;HEAP32[$d+16>>2]=0|0;HEAP32[$d+20>>2]=0|0;HEAP32[$d+24>>2]=0|0;HEAP32[$d+28>>2]=0|0; +define void @test_unrolled_memset(i8* %d, i8* %s) { + call void @llvm.memset.p0i8.i32(i8* %d, i8 0, i32 32, i32 4, i1 false) + ret void +} + +; CHECK: test_loop_memset +; CHECK: dest=$d; stop=dest+64|0; do { HEAP32[dest>>2]=0|0; dest=dest+4|0; } while ((dest|0) < (stop|0)); +define void @test_loop_memset(i8* %d, i8* %s) { + call void @llvm.memset.p0i8.i32(i8* %d, i8 0, i32 64, i32 4, i1 false) + ret void +} + +; CHECK: test_call_memset +; CHECK: memset(($d|0),0,65536) +define void @test_call_memset(i8* %d, i8* %s) { + call void @llvm.memset.p0i8.i32(i8* %d, i8 0, i32 65536, i32 4, i1 false) + ret void +} + +; Also, don't emit declarations for the intrinsic functions. +; CHECK-NOT: p0i8 + +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) #0 +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) #0 + +attributes #0 = { nounwind } diff --git a/test/CodeGen/JS/phi.ll b/test/CodeGen/JS/phi.ll new file mode 100644 index 000000000000..cef01ce2f4d2 --- /dev/null +++ b/test/CodeGen/JS/phi.ll @@ -0,0 +1,25 @@ +; RUN: llc < %s | FileCheck %s + +; Phi lowering should check for dependency cycles, including looking through +; bitcasts, and emit extra copies as needed. + +target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:128-n32-S128" +target triple = "asmjs-unknown-emscripten" + +; CHECK: while(1) { +; CHECK: $k$phi = $j;$j$phi = $k;$k = $k$phi;$j = $j$phi; +; CHECK: } +define void @foo(float* nocapture %p, i32* %j.init, i32* %k.init) { +entry: + br label %for.body + +for.body: + %j = phi i32* [ %j.init, %entry ], [ %k.cast, %more ] + %k = phi i32* [ %k.init, %entry ], [ %j.cast, %more ] + br label %more + +more: + %j.cast = bitcast i32* %j to i32* + %k.cast = bitcast i32* %k to i32* + br label %for.body +} diff --git a/test/CodeGen/JS/simd-fcmp.ll b/test/CodeGen/JS/simd-fcmp.ll new file mode 100644 index 000000000000..f6df20acd2ab --- /dev/null +++ b/test/CodeGen/JS/simd-fcmp.ll @@ -0,0 +1,37 @@ +; RUN: llc < %s | FileCheck %s + +target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:128-n32-S128" +target triple = "asmjs-unknown-emscripten" + +; CHECK: function _test_ueq($a,$b) { +; CHECK: $a = SIMD_Float32x4_check($a); +; CHECK: $b = SIMD_Float32x4_check($b); +; CHECK: $c = SIMD_Int32x4_notEqual(SIMD_Bool32x4_or(SIMD_Bool32x4_or(SIMD_Int32x4_select(SIMD_Float32x4_notEqual($a,$a), SIMD_Int32x4_splat(-1), SIMD_Int32x4_splat(0)),SIMD_Int32x4_select(SIMD_Float32x4_notEqual($b,$b), SIMD_Int32x4_splat(-1), SIMD_Int32x4_splat(0)),SIMD_Int32x4_select(SIMD_Float32x4_equal($a,$b), SIMD_Int32x4_splat(-1), SIMD_Int32x4_splat(0))), SIMD_Int32x4_splat(0)); +; CHECK: return (SIMD_Bool32x4_check($c)); +; CHECK:} +define <4 x i1> @test_ueq(<4 x float> %a, <4 x float> %b) { + %c = fcmp ueq <4 x float> %a, %b + ret <4 x i1> %c +} + +; CHECK: function _test_ord($a,$b) { +; CHECK: $a = SIMD_Float32x4_check($a); +; CHECK: $b = SIMD_Float32x4_check($b); +; CHECK: $c = SIMD_Int32x4_notEqual(SIMD_Bool32x4_or(SIMD_Bool32x4_or(SIMD_Int32x4_select(SIMD_Float32x4_notEqual($a,$a), SIMD_Int32x4_splat(-1), SIMD_Int32x4_splat(0)),SIMD_Int32x4_select(SIMD_Float32x4_notEqual($b,$b), SIMD_Int32x4_splat(-1), SIMD_Int32x4_splat(0)),SIMD_Int32x4_select(SIMD_Float32x4_equal($a,$b), SIMD_Int32x4_splat(-1), SIMD_Int32x4_splat(0))), SIMD_Int32x4_splat(0)); +; CHECK: return (SIMD_Bool32x4_check($c)); +; CHECK:} +define <4 x i1> @test_ord(<4 x float> %a, <4 x float> %b) { + %c = fcmp ueq <4 x float> %a, %b + ret <4 x i1> %c +} + +; CHECK:function _test_uno($a,$b) { +; CHECK: $a = SIMD_Float32x4_check($a); +; CHECK: $b = SIMD_Float32x4_check($b); +; CHECK: $c = SIMD_Int32x4_notEqual(SIMD_Bool32x4_or(SIMD_Bool32x4_or(SIMD_Int32x4_select(SIMD_Float32x4_notEqual($a,$a), SIMD_Int32x4_splat(-1), SIMD_Int32x4_splat(0)),SIMD_Int32x4_select(SIMD_Float32x4_notEqual($b,$b), SIMD_Int32x4_splat(-1), SIMD_Int32x4_splat(0)),SIMD_Int32x4_select(SIMD_Float32x4_equal($a,$b), SIMD_Int32x4_splat(-1), SIMD_Int32x4_splat(0))), SIMD_Int32x4_splat(0)); +; CHECK: return (SIMD_Bool32x4_check($c)); +; CHECK:} +define <4 x i1> @test_uno(<4 x float> %a, <4 x float> %b) { + %c = fcmp ueq <4 x float> %a, %b + ret <4 x i1> %c +} diff --git a/test/CodeGen/JS/simd-loadstore.ll b/test/CodeGen/JS/simd-loadstore.ll new file mode 100644 index 000000000000..6955d7ec6a82 --- /dev/null +++ b/test/CodeGen/JS/simd-loadstore.ll @@ -0,0 +1,68 @@ +; RUN: llc < %s | FileCheck %s + +target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:128-n32-S128" +target triple = "asmjs-unknown-emscripten" + +; CHECK: function _fx1($p) { +; CHECK: $p = $p|0; +; CHECK: var $s = SIMD_Float32x4(0,0,0,0), $t = SIMD_Float32x4(0,0,0,0), label = 0, sp = 0, temp_Float32x4_ptr = 0; +; CHECK: $t = SIMD_Float32x4_load1(HEAPU8, $p); +; CHECK: $s = SIMD_Float32x4_add($t,SIMD_Float32x4_splat(Math_fround(+0.5))); +; CHECK: temp_Float32x4_ptr = $p;SIMD_Float32x4_store1(HEAPU8, temp_Float32x4_ptr, $s); +; CHECK: return; +; CHECK: } +define void @fx1(i8* %p) { + %q = bitcast i8* %p to <1 x float>* + %t = load <1 x float>, <1 x float>* %q + %s = fadd <1 x float> %t, + store <1 x float> %s, <1 x float>* %q + ret void +} + +; CHECK: function _fx2($p) { +; CHECK: $p = $p|0; +; CHECK: $s = SIMD_Float32x4(0,0,0,0), $t = SIMD_Float32x4(0,0,0,0), label = 0, sp = 0, temp_Float32x4_ptr = 0; +; CHECK: $t = SIMD_Float32x4_load2(HEAPU8, $p); +; CHECK: $s = SIMD_Float32x4_add($t,SIMD_Float32x4(Math_fround(+3.5),Math_fround(+7.5),Math_fround(+0),Math_fround(+0))); +; CHECK: temp_Float32x4_ptr = $p;SIMD_Float32x4_store2(HEAPU8, temp_Float32x4_ptr, $s); +; CHECK: return; +; CHECK: } +define void @fx2(i8* %p) { + %q = bitcast i8* %p to <2 x float>* + %t = load <2 x float>, <2 x float>* %q + %s = fadd <2 x float> %t, + store <2 x float> %s, <2 x float>* %q + ret void +} + +; CHECK: function _fx3($p) { +; CHECK: $p = $p|0; +; CHECK: var $s = SIMD_Float32x4(0,0,0,0), $t = SIMD_Float32x4(0,0,0,0), label = 0, sp = 0, temp_Float32x4_ptr = 0; +; CHECK: $t = SIMD_Float32x4_load3(HEAPU8, $p); +; CHECK: $s = SIMD_Float32x4_add($t,SIMD_Float32x4(Math_fround(+1.5),Math_fround(+4.5),Math_fround(+6.5),Math_fround(+0))); +; CHECK: temp_Float32x4_ptr = $p;SIMD_Float32x4_store3(HEAPU8, temp_Float32x4_ptr, $s); +; CHECK: return; +; CHECK: } +define void @fx3(i8* %p) { + %q = bitcast i8* %p to <3 x float>* + %t = load <3 x float>, <3 x float>* %q + %s = fadd <3 x float> %t, + store <3 x float> %s, <3 x float>* %q + ret void +} + +; CHECK: function _fx4($p) { +; CHECK: $p = $p|0; +; CHECK: var $s = SIMD_Float32x4(0,0,0,0), $t = SIMD_Float32x4(0,0,0,0), label = 0, sp = 0, temp_Float32x4_ptr = 0; +; CHECK: $t = SIMD_Float32x4_load(HEAPU8, $p); +; CHECK: $s = SIMD_Float32x4_add($t,SIMD_Float32x4(Math_fround(+9.5),Math_fround(+5.5),Math_fround(+1.5),Math_fround(+-3.5))); +; CHECK: temp_Float32x4_ptr = $p;SIMD_Float32x4_store(HEAPU8, temp_Float32x4_ptr, $s); +; CHECK: return; +; CHECK: } +define void @fx4(i8* %p) { + %q = bitcast i8* %p to <4 x float>* + %t = load <4 x float>, <4 x float>* %q + %s = fadd <4 x float> %t, + store <4 x float> %s, <4 x float>* %q + ret void +} diff --git a/test/CodeGen/JS/simd-misc.ll b/test/CodeGen/JS/simd-misc.ll new file mode 100644 index 000000000000..99a47131c661 --- /dev/null +++ b/test/CodeGen/JS/simd-misc.ll @@ -0,0 +1,41 @@ +; RUN: llc < %s | FileCheck %s + +target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:128-n32-S128" +target triple = "asmjs-unknown-emscripten" + +declare <4 x float> @emscripten_float32x4_reciprocalApproximation(<4 x float>) +declare <4 x float> @emscripten_float32x4_reciprocalSqrtApproximation(<4 x float>) + +; CHECK: function _test_rcp($a) { +; CHECK: $a = SIMD_Float32x4_check($a); +; CHECK: SIMD_Float32x4_reciprocalApproximation +; CHECK:} +define <4 x float> @test_rcp(<4 x float> %a) { + %c = call <4 x float> @emscripten_float32x4_reciprocalApproximation(<4 x float> %a) + ret <4 x float> %c +} + +; CHECK: function _test_rsqrt($a) { +; CHECK: $a = SIMD_Float32x4_check($a); +; CHECK: SIMD_Float32x4_reciprocalSqrtApproximation +; CHECK:} +define <4 x float> @test_rsqrt(<4 x float> %a) { + %c = call <4 x float> @emscripten_float32x4_reciprocalSqrtApproximation(<4 x float> %a) + ret <4 x float> %c +} + +; CHECK: function _sext_vec($a) { +; CHECK: $b = SIMD_Int32x4_select($a, SIMD_Int32x4_splat(-1), SIMD_Int32x4_splat(0)); +; CHECK: } +define <4 x i32> @sext_vec(<4 x i1> %a) { + %b = sext <4 x i1> %a to <4 x i32> + ret <4 x i32> %b +} + +; CHECK: function _zext_vec($a) { +; CHECK: $b = SIMD_Int32x4_select($a, SIMD_Int32x4_splat(1), SIMD_Int32x4_splat(0)); +; CHECK: } +define <4 x i32> @zext_vec(<4 x i1> %a) { + %b = zext <4 x i1> %a to <4 x i32> + ret <4 x i32> %b +} diff --git a/test/CodeGen/JS/simd-select.ll b/test/CodeGen/JS/simd-select.ll new file mode 100644 index 000000000000..d3f133428792 --- /dev/null +++ b/test/CodeGen/JS/simd-select.ll @@ -0,0 +1,56 @@ +; RUN: llc < %s | FileCheck %s + +target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:128-n32-S128" +target triple = "asmjs-unknown-emscripten" + +; CHECK: function _test0($a,$b,$cond) { +; CHECK: $a = SIMD_Int32x4_check($a); +; CHECK: $b = SIMD_Int32x4_check($b); +; CHECK: $cond = SIMD_Bool32x4_check($cond); +; CHECK: $cmp = SIMD_Int32x4_select($cond,$a,$b); +; CHECK: return (SIMD_Int32x4_check($cmp)); +; CHECK: } +define <4 x i32> @test0(<4 x i32> %a, <4 x i32> %b, <4 x i1> %cond) nounwind { +entry: + %cmp = select <4 x i1> %cond, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %cmp +} + +; CHECK: function _test1($a,$b,$cond) { +; CHECK: $a = SIMD_Float32x4_check($a); +; CHECK: $b = SIMD_Float32x4_check($b); +; CHECK: $cond = SIMD_Bool32x4_check($cond); +; CHECK: $cmp = SIMD_Float32x4_select($cond,$a,$b); +; CHECK: return (SIMD_Float32x4_check($cmp)); +; CHECK: } +define <4 x float> @test1(<4 x float> %a, <4 x float> %b, <4 x i1> %cond) nounwind { +entry: + %cmp = select <4 x i1> %cond, <4 x float> %a, <4 x float> %b + ret <4 x float> %cmp +} + +; CHECK: function _test2($a,$b,$cond) { +; CHECK: $a = SIMD_Int32x4_check($a); +; CHECK: $b = SIMD_Int32x4_check($b); +; CHECK: $cond = $cond|0; +; CHECK: $cmp = $cond ? $a : $b; +; CHECK: return (SIMD_Int32x4_check($cmp)); +; CHECK: } +define <4 x i32> @test2(<4 x i32> %a, <4 x i32> %b, i1 %cond) nounwind { +entry: + %cmp = select i1 %cond, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %cmp +} + +; CHECK: function _test3($a,$b,$cond) { +; CHECK: $a = SIMD_Float32x4_check($a); +; CHECK: $b = SIMD_Float32x4_check($b); +; CHECK: $cond = $cond|0; +; CHECK: $cmp = $cond ? $a : $b; +; CHECK: return (SIMD_Float32x4_check($cmp)); +; CHECK: } +define <4 x float> @test3(<4 x float> %a, <4 x float> %b, i1 %cond) nounwind { +entry: + %cmp = select i1 %cond, <4 x float> %a, <4 x float> %b + ret <4 x float> %cmp +} diff --git a/test/CodeGen/JS/simd-shift.ll b/test/CodeGen/JS/simd-shift.ll new file mode 100644 index 000000000000..7b1d9809c770 --- /dev/null +++ b/test/CodeGen/JS/simd-shift.ll @@ -0,0 +1,142 @@ +; RUN: llc < %s | FileCheck %s + +target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:128-n32-S128" +target triple = "asmjs-unknown-emscripten" + +; CHECK: function _test0($a) { +; CHECK: $a = SIMD_Int32x4_check($a); +; CHECK: $shl = SIMD_Int32x4_shiftLeftByScalar($a, 3); +; CHECK: return (SIMD_Int32x4_check($shl)); +; CHECK: } +define <4 x i32> @test0(<4 x i32> %a) { +entry: + %shl = shl <4 x i32> %a, + ret <4 x i32> %shl +} + +; CHECK: function _test1($a,$b) { +; CHECK: $a = SIMD_Int32x4_check($a); +; CHECK: $b = $b|0; +; CHECK: SIMD_Int32x4_shiftLeftByScalar($a, $b); +; CHECK: return (SIMD_Int32x4_check($shl)); +; CHECK: } +define <4 x i32> @test1(<4 x i32> %a, i32 %b) { +entry: + %vecinit = insertelement <4 x i32> undef, i32 %b, i32 0 + %vecinit1 = insertelement <4 x i32> %vecinit, i32 %b, i32 1 + %vecinit2 = insertelement <4 x i32> %vecinit1, i32 %b, i32 2 + %vecinit3 = insertelement <4 x i32> %vecinit2, i32 %b, i32 3 + %shl = shl <4 x i32> %a, %vecinit3 + ret <4 x i32> %shl +} + +; CHECK: function _test2($a,$b,$c) { +; CHECK: $a = SIMD_Int32x4_check($a); +; CHECK: $b = $b|0; +; CHECK: $c = $c|0; +; CHECK: var $shl = SIMD_Int32x4(0,0,0,0), $vecinit3 = SIMD_Int32x4(0,0,0,0), label = 0, sp = 0; +; CHECK: $vecinit3 = SIMD_Int32x4($b, $b, $c, $b); +; CHECK: $shl = SIMD_Int32x4((SIMD_Int32x4_extractLane($a,0)|0) << (SIMD_Int32x4_extractLane($vecinit3,0)|0)|0, (SIMD_Int32x4_extractLane($a,1)|0) << (SIMD_Int32x4_extractLane($vecinit3,1)|0)|0, (SIMD_Int32x4_extractLane($a,2)|0) << (SIMD_Int32x4_extractLane($vecinit3,2)|0)|0, (SIMD_Int32x4_extractLane($a,3)|0) << (SIMD_Int32x4_extractLane($vecinit3,3)|0)|0); +; CHECK: return (SIMD_Int32x4_check($shl)); +; CHECK: } +define <4 x i32> @test2(<4 x i32> %a, i32 %b, i32 %c) { +entry: + %vecinit = insertelement <4 x i32> undef, i32 %b, i32 0 + %vecinit1 = insertelement <4 x i32> %vecinit, i32 %b, i32 1 + %vecinit2 = insertelement <4 x i32> %vecinit1, i32 %c, i32 2 + %vecinit3 = insertelement <4 x i32> %vecinit2, i32 %b, i32 3 + %shl = shl <4 x i32> %a, %vecinit3 + ret <4 x i32> %shl +} + +; CHECK: function _test3($a) { +; CHECK: $a = SIMD_Int32x4_check($a); +; CHECK: SIMD_Int32x4_shiftRightArithmeticByScalar($a, 3); +; CHECK: return (SIMD_Int32x4_check($shr)); +; CHECK: } +define <4 x i32> @test3(<4 x i32> %a) { +entry: + %shr = ashr <4 x i32> %a, + ret <4 x i32> %shr +} + +; CHECK: function _test4($a,$b) { +; CHECK: $a = SIMD_Int32x4_check($a); +; CHECK: $b = $b|0; +; CHECK: SIMD_Int32x4_shiftRightArithmeticByScalar($a, $b); +; CHECK: return (SIMD_Int32x4_check($shr)); +; CHECK: } +define <4 x i32> @test4(<4 x i32> %a, i32 %b) { +entry: + %vecinit = insertelement <4 x i32> undef, i32 %b, i32 0 + %vecinit1 = insertelement <4 x i32> %vecinit, i32 %b, i32 1 + %vecinit2 = insertelement <4 x i32> %vecinit1, i32 %b, i32 2 + %vecinit3 = insertelement <4 x i32> %vecinit2, i32 %b, i32 3 + %shr = ashr <4 x i32> %a, %vecinit3 + ret <4 x i32> %shr +} + +; CHECK: function _test5($a,$b,$c) { +; CHECK: $a = SIMD_Int32x4_check($a); +; CHECK: $b = $b|0; +; CHECK: $c = $c|0; +; CHECK: var $shr = SIMD_Int32x4(0,0,0,0), $vecinit3 = SIMD_Int32x4(0,0,0,0), label = 0, sp = 0; +; CHECK: $vecinit3 = SIMD_Int32x4($b, $c, $b, $b); +; CHECK: $shr = SIMD_Int32x4((SIMD_Int32x4_extractLane($a,0)|0) >> (SIMD_Int32x4_extractLane($vecinit3,0)|0)|0, (SIMD_Int32x4_extractLane($a,1)|0) >> (SIMD_Int32x4_extractLane($vecinit3,1)|0)|0, (SIMD_Int32x4_extractLane($a,2)|0) >> (SIMD_Int32x4_extractLane($vecinit3,2)|0)|0, (SIMD_Int32x4_extractLane($a,3)|0) >> (SIMD_Int32x4_extractLane($vecinit3,3)|0)|0); +; CHECK: return (SIMD_Int32x4_check($shr)); +; CHECK: } +define <4 x i32> @test5(<4 x i32> %a, i32 %b, i32 %c) { +entry: + %vecinit = insertelement <4 x i32> undef, i32 %b, i32 0 + %vecinit1 = insertelement <4 x i32> %vecinit, i32 %c, i32 1 + %vecinit2 = insertelement <4 x i32> %vecinit1, i32 %b, i32 2 + %vecinit3 = insertelement <4 x i32> %vecinit2, i32 %b, i32 3 + %shr = ashr <4 x i32> %a, %vecinit3 + ret <4 x i32> %shr +} + +; CHECK: function _test6($a) { +; CHECK: $a = SIMD_Int32x4_check($a); +; CHECK: SIMD_Int32x4_shiftRightLogicalByScalar($a, 3); +; CHECK: return (SIMD_Int32x4_check($lshr)); +; CHECK: } +define <4 x i32> @test6(<4 x i32> %a) { +entry: + %lshr = lshr <4 x i32> %a, + ret <4 x i32> %lshr +} + +; CHECK: function _test7($a,$b) { +; CHECK: $a = SIMD_Int32x4_check($a); +; CHECK: $b = $b|0; +; CHECK: $lshr = SIMD_Int32x4_shiftRightLogicalByScalar($a, $b); +; CHECK: return (SIMD_Int32x4_check($lshr)); +; CHECK: } +define <4 x i32> @test7(<4 x i32> %a, i32 %b) { +entry: + %vecinit = insertelement <4 x i32> undef, i32 %b, i32 0 + %vecinit1 = insertelement <4 x i32> %vecinit, i32 %b, i32 1 + %vecinit2 = insertelement <4 x i32> %vecinit1, i32 %b, i32 2 + %vecinit3 = insertelement <4 x i32> %vecinit2, i32 %b, i32 3 + %lshr = lshr <4 x i32> %a, %vecinit3 + ret <4 x i32> %lshr +} + +; CHECK: function _test8($a,$b,$c) { +; CHECK: $a = SIMD_Int32x4_check($a); +; CHECK: $b = $b|0; +; CHECK: $c = $c|0; +; CHECK: var $lshr = SIMD_Int32x4(0,0,0,0), $vecinit3 = SIMD_Int32x4(0,0,0,0), label = 0, sp = 0; +; CHECK: $vecinit3 = SIMD_Int32x4($b, $b, $b, $c); +; CHECK: $lshr = SIMD_Int32x4((SIMD_Int32x4_extractLane($a,0)|0) >>> (SIMD_Int32x4_extractLane($vecinit3,0)|0)|0, (SIMD_Int32x4_extractLane($a,1)|0) >>> (SIMD_Int32x4_extractLane($vecinit3,1)|0)|0, (SIMD_Int32x4_extractLane($a,2)|0) >>> (SIMD_Int32x4_extractLane($vecinit3,2)|0)|0, (SIMD_Int32x4_extractLane($a,3)|0) >>> (SIMD_Int32x4_extractLane($vecinit3,3)|0)|0); +; CHECK: return (SIMD_Int32x4_check($lshr)); +; CHECK: } +define <4 x i32> @test8(<4 x i32> %a, i32 %b, i32 %c) { +entry: + %vecinit = insertelement <4 x i32> undef, i32 %b, i32 0 + %vecinit1 = insertelement <4 x i32> %vecinit, i32 %b, i32 1 + %vecinit2 = insertelement <4 x i32> %vecinit1, i32 %b, i32 2 + %vecinit3 = insertelement <4 x i32> %vecinit2, i32 %c, i32 3 + %lshr = lshr <4 x i32> %a, %vecinit3 + ret <4 x i32> %lshr +} diff --git a/test/CodeGen/JS/simd-shuffle.ll b/test/CodeGen/JS/simd-shuffle.ll new file mode 100644 index 000000000000..88a7aa63bfb0 --- /dev/null +++ b/test/CodeGen/JS/simd-shuffle.ll @@ -0,0 +1,524 @@ +; RUN: llc < %s | FileCheck %s + +target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:128-n32-S128" +target triple = "asmjs-unknown-emscripten" + +; CHECK: function _splat_int32x4($a,$b) { +; CHECK: $a = SIMD_Int32x4_check($a); +; CHECK: $b = SIMD_Int32x4_check($b); +; CHECK: var $sel = SIMD_Int32x4(0,0,0,0) +; CHECK: $sel = SIMD_Int32x4_swizzle($a, 0, 0, 0, 0); +; CHECK: return (SIMD_Int32x4_check($sel)); +; CHECK: } +define <4 x i32> @splat_int32x4(<4 x i32> %a, <4 x i32> %b) nounwind { +entry: + %sel = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %sel +} + +; CHECK: function _swizzle_int32x4($a,$b) { +; CHECK: $a = SIMD_Int32x4_check($a); +; CHECK: $b = SIMD_Int32x4_check($b); +; CHECK: var $sel = SIMD_Int32x4(0,0,0,0) +; CHECK: $sel = SIMD_Int32x4_swizzle($a, 0, 3, 1, 2); +; CHECK: return (SIMD_Int32x4_check($sel)); +; CHECK: } +define <4 x i32> @swizzle_int32x4(<4 x i32> %a, <4 x i32> %b) nounwind { +entry: + %sel = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %sel +} + +; CHECK: function _swizzlehi_int32x4($a,$b) { +; CHECK: $a = SIMD_Int32x4_check($a); +; CHECK: $b = SIMD_Int32x4_check($b); +; CHECK: var $sel = SIMD_Int32x4(0,0,0,0) +; CHECK: $sel = SIMD_Int32x4_swizzle($b, 2, 1, 3, 0); +; CHECK: return (SIMD_Int32x4_check($sel)); +; CHECK: } +define <4 x i32> @swizzlehi_int32x4(<4 x i32> %a, <4 x i32> %b) nounwind { +entry: + %sel = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %sel +} + +; CHECK: function _shuffleXY_float32x4to3($a,$b) { +; CHECK: $a = SIMD_Float32x4_check($a); +; CHECK: $b = SIMD_Float32x4_check($b); +; CHECK: var $sel = SIMD_Float32x4(0,0,0,0) +; CHECK: $sel = SIMD_Float32x4_shuffle($a, $b, 7, 0, 0, 0); +; CHECK: return (SIMD_Float32x4_check($sel)); +; CHECK: } +define <3 x float> @shuffleXY_float32x4to3(<4 x float> %a, <4 x float> %b) nounwind { +entry: + %sel = shufflevector <4 x float> %a, <4 x float> %b, <3 x i32> + ret <3 x float> %sel +} + +; CHECK: function _shuffle_int32x4($a,$b) { +; CHECK: $a = SIMD_Int32x4_check($a); +; CHECK: $b = SIMD_Int32x4_check($b); +; CHECK: var $sel = SIMD_Int32x4(0,0,0,0) +; CHECK: $sel = SIMD_Int32x4_shuffle($a, $b, 7, 0, 5, 3); +; CHECK: return (SIMD_Int32x4_check($sel)); +; CHECK: } +define <4 x i32> @shuffle_int32x4(<4 x i32> %a, <4 x i32> %b) nounwind { +entry: + %sel = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %sel +} + +; CHECK: function _shuffleXY_int32x4($a,$b) { +; CHECK: $a = SIMD_Int32x4_check($a); +; CHECK: $b = SIMD_Int32x4_check($b); +; CHECK: var $sel = SIMD_Int32x4(0,0,0,0) +; CHECK: $sel = SIMD_Int32x4_shuffle($a, $b, 7, 0, 0, 0); +; CHECK: return (SIMD_Int32x4_check($sel)); +; CHECK: } +define <4 x i32> @shuffleXY_int32x4(<4 x i32> %a, <4 x i32> %b) nounwind { +entry: + %sel = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %sel +} + +; CHECK: function _splat_int32x3($a,$b) { +; CHECK: $a = SIMD_Int32x4_check($a); +; CHECK: $b = SIMD_Int32x4_check($b); +; CHECK: var $sel = SIMD_Int32x4(0,0,0,0) +; CHECK: $sel = SIMD_Int32x4_swizzle($a, 0, 0, 0, 0); +; CHECK: return (SIMD_Int32x4_check($sel)); +; CHECK: } +define <3 x i32> @splat_int32x3(<3 x i32> %a, <3 x i32> %b) nounwind { +entry: + %sel = shufflevector <3 x i32> %a, <3 x i32> %b, <3 x i32> + ret <3 x i32> %sel +} + +; CHECK: function _swizzle_int32x3($a,$b) { +; CHECK: $a = SIMD_Int32x4_check($a); +; CHECK: $b = SIMD_Int32x4_check($b); +; CHECK: var $sel = SIMD_Int32x4(0,0,0,0) +; CHECK: $sel = SIMD_Int32x4_swizzle($a, 0, 2, 1, 0); +; CHECK: return (SIMD_Int32x4_check($sel)); +; CHECK: } +define <3 x i32> @swizzle_int32x3(<3 x i32> %a, <3 x i32> %b) nounwind { +entry: + %sel = shufflevector <3 x i32> %a, <3 x i32> %b, <3 x i32> + ret <3 x i32> %sel +} + +; CHECK: function _swizzlehi_int32x3($a,$b) { +; CHECK: $a = SIMD_Int32x4_check($a); +; CHECK: $b = SIMD_Int32x4_check($b); +; CHECK: var $sel = SIMD_Int32x4(0,0,0,0) +; CHECK: $sel = SIMD_Int32x4_swizzle($b, 0, 2, 1, 0); +; CHECK: return (SIMD_Int32x4_check($sel)); +; CHECK: } +define <3 x i32> @swizzlehi_int32x3(<3 x i32> %a, <3 x i32> %b) nounwind { +entry: + %sel = shufflevector <3 x i32> %a, <3 x i32> %b, <3 x i32> + ret <3 x i32> %sel +} + +; CHECK: function _shuffle_int32x3($a,$b) { +; CHECK: $a = SIMD_Int32x4_check($a); +; CHECK: $b = SIMD_Int32x4_check($b); +; CHECK: var $sel = SIMD_Int32x4(0,0,0,0) +; CHECK: $sel = SIMD_Int32x4_shuffle($a, $b, 6, 0, 5, 0); +; CHECK: return (SIMD_Int32x4_check($sel)); +; CHECK: } +define <3 x i32> @shuffle_int32x3(<3 x i32> %a, <3 x i32> %b) nounwind { +entry: + %sel = shufflevector <3 x i32> %a, <3 x i32> %b, <3 x i32> + ret <3 x i32> %sel +} + +; CHECK: function _shuffleXY_int32x3($a,$b) { +; CHECK: $a = SIMD_Int32x4_check($a); +; CHECK: $b = SIMD_Int32x4_check($b); +; CHECK: var $sel = SIMD_Int32x4(0,0,0,0) +; CHECK: $sel = SIMD_Int32x4_shuffle($a, $b, 6, 0, 0, 0); +; CHECK: return (SIMD_Int32x4_check($sel)); +; CHECK: } +define <3 x i32> @shuffleXY_int32x3(<3 x i32> %a, <3 x i32> %b) nounwind { +entry: + %sel = shufflevector <3 x i32> %a, <3 x i32> %b, <3 x i32> + ret <3 x i32> %sel +} + +; CHECK: function _splat_int32x3to4($a,$b) { +; CHECK: $a = SIMD_Int32x4_check($a); +; CHECK: $b = SIMD_Int32x4_check($b); +; CHECK: var $sel = SIMD_Int32x4(0,0,0,0) +; CHECK: $sel = SIMD_Int32x4_swizzle($a, 0, 0, 0, 0); +; CHECK: return (SIMD_Int32x4_check($sel)); +; CHECK: } +define <4 x i32> @splat_int32x3to4(<3 x i32> %a, <3 x i32> %b) nounwind { +entry: + %sel = shufflevector <3 x i32> %a, <3 x i32> %b, <4 x i32> + ret <4 x i32> %sel +} + +; CHECK: function _swizzle_int32x3to4($a,$b) { +; CHECK: $a = SIMD_Int32x4_check($a); +; CHECK: $b = SIMD_Int32x4_check($b); +; CHECK: var $sel = SIMD_Int32x4(0,0,0,0) +; CHECK: $sel = SIMD_Int32x4_swizzle($a, 0, 2, 1, 2); +; CHECK: return (SIMD_Int32x4_check($sel)); +; CHECK: } +define <4 x i32> @swizzle_int32x3to4(<3 x i32> %a, <3 x i32> %b) nounwind { +entry: + %sel = shufflevector <3 x i32> %a, <3 x i32> %b, <4 x i32> + ret <4 x i32> %sel +} + +; CHECK: function _swizzlehi_int32x3to4($a,$b) { +; CHECK: $a = SIMD_Int32x4_check($a); +; CHECK: $b = SIMD_Int32x4_check($b); +; CHECK: var $sel = SIMD_Int32x4(0,0,0,0) +; CHECK: $sel = SIMD_Int32x4_swizzle($b, 2, 1, 0, 2); +; CHECK: return (SIMD_Int32x4_check($sel)); +; CHECK: } +define <4 x i32> @swizzlehi_int32x3to4(<3 x i32> %a, <3 x i32> %b) nounwind { +entry: + %sel = shufflevector <3 x i32> %a, <3 x i32> %b, <4 x i32> + ret <4 x i32> %sel +} + +; CHECK: function _shuffle_int32x3to4($a,$b) { +; CHECK: $a = SIMD_Int32x4_check($a); +; CHECK: $b = SIMD_Int32x4_check($b); +; CHECK: var $sel = SIMD_Int32x4(0,0,0,0) +; CHECK: $sel = SIMD_Int32x4_shuffle($a, $b, 6, 0, 5, 2); +; CHECK: return (SIMD_Int32x4_check($sel)); +; CHECK: } +define <4 x i32> @shuffle_int32x3to4(<3 x i32> %a, <3 x i32> %b) nounwind { +entry: + %sel = shufflevector <3 x i32> %a, <3 x i32> %b, <4 x i32> + ret <4 x i32> %sel +} + +; CHECK: function _shuffleXY_int32x3to4($a,$b) { +; CHECK: $a = SIMD_Int32x4_check($a); +; CHECK: $b = SIMD_Int32x4_check($b); +; CHECK: var $sel = SIMD_Int32x4(0,0,0,0) +; CHECK: $sel = SIMD_Int32x4_shuffle($a, $b, 6, 0, 0, 0); +; CHECK: return (SIMD_Int32x4_check($sel)); +; CHECK: } +define <4 x i32> @shuffleXY_int32x3to4(<3 x i32> %a, <3 x i32> %b) nounwind { +entry: + %sel = shufflevector <3 x i32> %a, <3 x i32> %b, <4 x i32> + ret <4 x i32> %sel +} + +; CHECK: function _splat_int32x4to3($a,$b) { +; CHECK: $a = SIMD_Int32x4_check($a); +; CHECK: $b = SIMD_Int32x4_check($b); +; CHECK: var $sel = SIMD_Int32x4(0,0,0,0) +; CHECK: $sel = SIMD_Int32x4_swizzle($a, 0, 0, 0, 0); +; CHECK: return (SIMD_Int32x4_check($sel)); +; CHECK: } +define <3 x i32> @splat_int32x4to3(<4 x i32> %a, <4 x i32> %b) nounwind { +entry: + %sel = shufflevector <4 x i32> %a, <4 x i32> %b, <3 x i32> + ret <3 x i32> %sel +} + +; CHECK: function _swizzle_int32x4to3($a,$b) { +; CHECK: $a = SIMD_Int32x4_check($a); +; CHECK: $b = SIMD_Int32x4_check($b); +; CHECK: var $sel = SIMD_Int32x4(0,0,0,0) +; CHECK: $sel = SIMD_Int32x4_swizzle($a, 0, 3, 1, 0); +; CHECK: return (SIMD_Int32x4_check($sel)); +; CHECK: } +define <3 x i32> @swizzle_int32x4to3(<4 x i32> %a, <4 x i32> %b) nounwind { +entry: + %sel = shufflevector <4 x i32> %a, <4 x i32> %b, <3 x i32> + ret <3 x i32> %sel +} + +; CHECK: function _swizzlehi_int32x4to3($a,$b) { +; CHECK: $a = SIMD_Int32x4_check($a); +; CHECK: $b = SIMD_Int32x4_check($b); +; CHECK: var $sel = SIMD_Int32x4(0,0,0,0) +; CHECK: $sel = SIMD_Int32x4_swizzle($b, 2, 1, 3, 0); +; CHECK: return (SIMD_Int32x4_check($sel)); +; CHECK: } +define <3 x i32> @swizzlehi_int32x4to3(<4 x i32> %a, <4 x i32> %b) nounwind { +entry: + %sel = shufflevector <4 x i32> %a, <4 x i32> %b, <3 x i32> + ret <3 x i32> %sel +} + +; CHECK: function _shuffle_int32x4to3($a,$b) { +; CHECK: $a = SIMD_Int32x4_check($a); +; CHECK: $b = SIMD_Int32x4_check($b); +; CHECK: var $sel = SIMD_Int32x4(0,0,0,0) +; CHECK: $sel = SIMD_Int32x4_shuffle($a, $b, 7, 0, 5, 0); +; CHECK: return (SIMD_Int32x4_check($sel)); +; CHECK: } +define <3 x i32> @shuffle_int32x4to3(<4 x i32> %a, <4 x i32> %b) nounwind { +entry: + %sel = shufflevector <4 x i32> %a, <4 x i32> %b, <3 x i32> + ret <3 x i32> %sel +} + +; CHECK: function _shuffleXY_int32x4to3($a,$b) { +; CHECK: $a = SIMD_Int32x4_check($a); +; CHECK: $b = SIMD_Int32x4_check($b); +; CHECK: var $sel = SIMD_Int32x4(0,0,0,0) +; CHECK: $sel = SIMD_Int32x4_shuffle($a, $b, 7, 0, 0, 0); +; CHECK: return (SIMD_Int32x4_check($sel)); +; CHECK: } +define <3 x i32> @shuffleXY_int32x4to3(<4 x i32> %a, <4 x i32> %b) nounwind { +entry: + %sel = shufflevector <4 x i32> %a, <4 x i32> %b, <3 x i32> + ret <3 x i32> %sel +} + +; CHECK: function _splat_float32x4($a,$b) { +; CHECK: $a = SIMD_Float32x4_check($a); +; CHECK: $b = SIMD_Float32x4_check($b); +; CHECK: var $sel = SIMD_Float32x4(0,0,0,0) +; CHECK: $sel = SIMD_Float32x4_swizzle($a, 0, 0, 0, 0); +; CHECK: return (SIMD_Float32x4_check($sel)); +; CHECK: } +define <4 x float> @splat_float32x4(<4 x float> %a, <4 x float> %b) nounwind { +entry: + %sel = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %sel +} + +; CHECK: function _swizzle_float32x4($a,$b) { +; CHECK: $a = SIMD_Float32x4_check($a); +; CHECK: $b = SIMD_Float32x4_check($b); +; CHECK: var $sel = SIMD_Float32x4(0,0,0,0) +; CHECK: $sel = SIMD_Float32x4_swizzle($a, 0, 3, 1, 2); +; CHECK: return (SIMD_Float32x4_check($sel)); +; CHECK: } +define <4 x float> @swizzle_float32x4(<4 x float> %a, <4 x float> %b) nounwind { +entry: + %sel = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %sel +} + +; CHECK: function _swizzlehi_float32x4($a,$b) { +; CHECK: $a = SIMD_Float32x4_check($a); +; CHECK: $b = SIMD_Float32x4_check($b); +; CHECK: var $sel = SIMD_Float32x4(0,0,0,0) +; CHECK: $sel = SIMD_Float32x4_swizzle($b, 2, 1, 3, 0); +; CHECK: return (SIMD_Float32x4_check($sel)); +; CHECK: } +define <4 x float> @swizzlehi_float32x4(<4 x float> %a, <4 x float> %b) nounwind { +entry: + %sel = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %sel +} + +; CHECK: function _shuffle_float32x4($a,$b) { +; CHECK: $a = SIMD_Float32x4_check($a); +; CHECK: $b = SIMD_Float32x4_check($b); +; CHECK: var $sel = SIMD_Float32x4(0,0,0,0) +; CHECK: $sel = SIMD_Float32x4_shuffle($a, $b, 7, 0, 5, 3); +; CHECK: return (SIMD_Float32x4_check($sel)); +; CHECK: } +define <4 x float> @shuffle_float32x4(<4 x float> %a, <4 x float> %b) nounwind { +entry: + %sel = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %sel +} + +; CHECK: function _shuffleXY_float32x4($a,$b) { +; CHECK: $a = SIMD_Float32x4_check($a); +; CHECK: $b = SIMD_Float32x4_check($b); +; CHECK: var $sel = SIMD_Float32x4(0,0,0,0) +; CHECK: $sel = SIMD_Float32x4_shuffle($a, $b, 7, 0, 0, 0); +; CHECK: return (SIMD_Float32x4_check($sel)); +; CHECK: } +define <4 x float> @shuffleXY_float32x4(<4 x float> %a, <4 x float> %b) nounwind { +entry: + %sel = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %sel +} + +; CHECK: function _splat_float32x3($a,$b) { +; CHECK: $a = SIMD_Float32x4_check($a); +; CHECK: $b = SIMD_Float32x4_check($b); +; CHECK: var $sel = SIMD_Float32x4(0,0,0,0) +; CHECK: $sel = SIMD_Float32x4_swizzle($a, 0, 0, 0, 0); +; CHECK: return (SIMD_Float32x4_check($sel)); +; CHECK: } +define <3 x float> @splat_float32x3(<3 x float> %a, <3 x float> %b) nounwind { +entry: + %sel = shufflevector <3 x float> %a, <3 x float> %b, <3 x i32> + ret <3 x float> %sel +} + +; CHECK: function _swizzle_float32x3($a,$b) { +; CHECK: $a = SIMD_Float32x4_check($a); +; CHECK: $b = SIMD_Float32x4_check($b); +; CHECK: var $sel = SIMD_Float32x4(0,0,0,0) +; CHECK: $sel = SIMD_Float32x4_swizzle($a, 0, 2, 1, 0); +; CHECK: return (SIMD_Float32x4_check($sel)); +; CHECK: } +define <3 x float> @swizzle_float32x3(<3 x float> %a, <3 x float> %b) nounwind { +entry: + %sel = shufflevector <3 x float> %a, <3 x float> %b, <3 x i32> + ret <3 x float> %sel +} + +; CHECK: function _swizzlehi_float32x3($a,$b) { +; CHECK: $a = SIMD_Float32x4_check($a); +; CHECK: $b = SIMD_Float32x4_check($b); +; CHECK: var $sel = SIMD_Float32x4(0,0,0,0) +; CHECK: $sel = SIMD_Float32x4_swizzle($b, 0, 2, 1, 0); +; CHECK: return (SIMD_Float32x4_check($sel)); +; CHECK: } +define <3 x float> @swizzlehi_float32x3(<3 x float> %a, <3 x float> %b) nounwind { +entry: + %sel = shufflevector <3 x float> %a, <3 x float> %b, <3 x i32> + ret <3 x float> %sel +} + +; CHECK: function _shuffle_float32x3($a,$b) { +; CHECK: $a = SIMD_Float32x4_check($a); +; CHECK: $b = SIMD_Float32x4_check($b); +; CHECK: var $sel = SIMD_Float32x4(0,0,0,0) +; CHECK: $sel = SIMD_Float32x4_shuffle($a, $b, 6, 0, 5, 0); +; CHECK: return (SIMD_Float32x4_check($sel)); +; CHECK: } +define <3 x float> @shuffle_float32x3(<3 x float> %a, <3 x float> %b) nounwind { +entry: + %sel = shufflevector <3 x float> %a, <3 x float> %b, <3 x i32> + ret <3 x float> %sel +} + +; CHECK: function _shuffleXY_float32x3($a,$b) { +; CHECK: $a = SIMD_Float32x4_check($a); +; CHECK: $b = SIMD_Float32x4_check($b); +; CHECK: var $sel = SIMD_Float32x4(0,0,0,0) +; CHECK: $sel = SIMD_Float32x4_shuffle($a, $b, 6, 0, 0, 0); +; CHECK: return (SIMD_Float32x4_check($sel)); +; CHECK: } +define <3 x float> @shuffleXY_float32x3(<3 x float> %a, <3 x float> %b) nounwind { +entry: + %sel = shufflevector <3 x float> %a, <3 x float> %b, <3 x i32> + ret <3 x float> %sel +} + +; CHECK: function _splat_float32x3to4($a,$b) { +; CHECK: $a = SIMD_Float32x4_check($a); +; CHECK: $b = SIMD_Float32x4_check($b); +; CHECK: var $sel = SIMD_Float32x4(0,0,0,0) +; CHECK: $sel = SIMD_Float32x4_swizzle($a, 0, 0, 0, 0); +; CHECK: return (SIMD_Float32x4_check($sel)); +; CHECK: } +define <4 x float> @splat_float32x3to4(<3 x float> %a, <3 x float> %b) nounwind { +entry: + %sel = shufflevector <3 x float> %a, <3 x float> %b, <4 x i32> + ret <4 x float> %sel +} + +; CHECK: function _swizzle_float32x3to4($a,$b) { +; CHECK: $a = SIMD_Float32x4_check($a); +; CHECK: $b = SIMD_Float32x4_check($b); +; CHECK: var $sel = SIMD_Float32x4(0,0,0,0) +; CHECK: $sel = SIMD_Float32x4_swizzle($a, 0, 2, 1, 2); +; CHECK: return (SIMD_Float32x4_check($sel)); +; CHECK: } +define <4 x float> @swizzle_float32x3to4(<3 x float> %a, <3 x float> %b) nounwind { +entry: + %sel = shufflevector <3 x float> %a, <3 x float> %b, <4 x i32> + ret <4 x float> %sel +} + +; CHECK: function _swizzlehi_float32x3to4($a,$b) { +; CHECK: $a = SIMD_Float32x4_check($a); +; CHECK: $b = SIMD_Float32x4_check($b); +; CHECK: var $sel = SIMD_Float32x4(0,0,0,0) +; CHECK: $sel = SIMD_Float32x4_swizzle($b, 2, 1, 0, 2); +; CHECK: return (SIMD_Float32x4_check($sel)); +; CHECK: } +define <4 x float> @swizzlehi_float32x3to4(<3 x float> %a, <3 x float> %b) nounwind { +entry: + %sel = shufflevector <3 x float> %a, <3 x float> %b, <4 x i32> + ret <4 x float> %sel +} + +; CHECK: function _shuffle_float32x3to4($a,$b) { +; CHECK: $a = SIMD_Float32x4_check($a); +; CHECK: $b = SIMD_Float32x4_check($b); +; CHECK: var $sel = SIMD_Float32x4(0,0,0,0) +; CHECK: $sel = SIMD_Float32x4_shuffle($a, $b, 6, 0, 5, 2); +; CHECK: return (SIMD_Float32x4_check($sel)); +; CHECK: } +define <4 x float> @shuffle_float32x3to4(<3 x float> %a, <3 x float> %b) nounwind { +entry: + %sel = shufflevector <3 x float> %a, <3 x float> %b, <4 x i32> + ret <4 x float> %sel +} + +; CHECK: function _shuffleXY_float32x3to4($a,$b) { +; CHECK: $a = SIMD_Float32x4_check($a); +; CHECK: $b = SIMD_Float32x4_check($b); +; CHECK: var $sel = SIMD_Float32x4(0,0,0,0) +; CHECK: $sel = SIMD_Float32x4_shuffle($a, $b, 6, 0, 0, 0); +; CHECK: return (SIMD_Float32x4_check($sel)); +; CHECK: } +define <4 x float> @shuffleXY_float32x3to4(<3 x float> %a, <3 x float> %b) nounwind { +entry: + %sel = shufflevector <3 x float> %a, <3 x float> %b, <4 x i32> + ret <4 x float> %sel +} + +; CHECK: function _splat_float32x4to3($a,$b) { +; CHECK: $a = SIMD_Float32x4_check($a); +; CHECK: $b = SIMD_Float32x4_check($b); +; CHECK: var $sel = SIMD_Float32x4(0,0,0,0) +; CHECK: $sel = SIMD_Float32x4_swizzle($a, 0, 0, 0, 0); +; CHECK: return (SIMD_Float32x4_check($sel)); +; CHECK: } +define <3 x float> @splat_float32x4to3(<4 x float> %a, <4 x float> %b) nounwind { +entry: + %sel = shufflevector <4 x float> %a, <4 x float> %b, <3 x i32> + ret <3 x float> %sel +} + +; CHECK: function _swizzle_float32x4to3($a,$b) { +; CHECK: $a = SIMD_Float32x4_check($a); +; CHECK: $b = SIMD_Float32x4_check($b); +; CHECK: var $sel = SIMD_Float32x4(0,0,0,0) +; CHECK: $sel = SIMD_Float32x4_swizzle($a, 0, 3, 1, 0); +; CHECK: return (SIMD_Float32x4_check($sel)); +; CHECK: } +define <3 x float> @swizzle_float32x4to3(<4 x float> %a, <4 x float> %b) nounwind { +entry: + %sel = shufflevector <4 x float> %a, <4 x float> %b, <3 x i32> + ret <3 x float> %sel +} + +; CHECK: function _swizzlehi_float32x4to3($a,$b) { +; CHECK: $a = SIMD_Float32x4_check($a); +; CHECK: $b = SIMD_Float32x4_check($b); +; CHECK: var $sel = SIMD_Float32x4(0,0,0,0) +; CHECK: $sel = SIMD_Float32x4_swizzle($b, 2, 1, 3, 0); +; CHECK: return (SIMD_Float32x4_check($sel)); +; CHECK: } +define <3 x float> @swizzlehi_float32x4to3(<4 x float> %a, <4 x float> %b) nounwind { +entry: + %sel = shufflevector <4 x float> %a, <4 x float> %b, <3 x i32> + ret <3 x float> %sel +} + +; CHECK: function _shuffle_float32x4to3($a,$b) { +; CHECK: $a = SIMD_Float32x4_check($a); +; CHECK: $b = SIMD_Float32x4_check($b); +; CHECK: var $sel = SIMD_Float32x4(0,0,0,0) +; CHECK: $sel = SIMD_Float32x4_shuffle($a, $b, 7, 0, 5, 0); +; CHECK: return (SIMD_Float32x4_check($sel)); +; CHECK: } +define <3 x float> @shuffle_float32x4to3(<4 x float> %a, <4 x float> %b) nounwind { +entry: + %sel = shufflevector <4 x float> %a, <4 x float> %b, <3 x i32> + ret <3 x float> %sel +} diff --git a/test/CodeGen/JS/splat-precise-f32.ll b/test/CodeGen/JS/splat-precise-f32.ll new file mode 100644 index 000000000000..d3f5e08ff186 --- /dev/null +++ b/test/CodeGen/JS/splat-precise-f32.ll @@ -0,0 +1,46 @@ +; RUN: llc -emscripten-precise-f32=false < %s | FileCheck %s +; RUN: llc -emscripten-precise-f32=true < %s | FileCheck --check-prefix=CHECK-PRECISE_F32 %s + +target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:128-n32-S128" +target triple = "asmjs-unknown-emscripten" + +; SIMD_Float32x4_splat needs a float32 input even if we're not in precise-f32 mode. + +; CHECK: test( +; CHECK: $d = SIMD_Float32x4_splat(Math_fround($f)); +; CHECK-PRECISE_F32: test( +; CHECK-PRECISE_F32: $f = Math_fround($f); +; CHECK-PRECISE_F32: $d = SIMD_Float32x4_splat($f); +define <4 x float> @test(float %f) { + %a = insertelement <4 x float> undef, float %f, i32 0 + %b = insertelement <4 x float> %a, float %f, i32 1 + %c = insertelement <4 x float> %b, float %f, i32 2 + %d = insertelement <4 x float> %c, float %f, i32 3 + ret <4 x float> %d +} + +; CHECK: test_insert( +; CHECK: $a = SIMD_Float32x4_replaceLane($v,0,Math_fround($g)); +; CHECK-PRECISE_F32: test_insert( +; CHECK-PRECISE_F32: $g = Math_fround($g); +; CHECK-PRECISE_F32: $a = SIMD_Float32x4_replaceLane($v,0,$g); +define <4 x float> @test_insert(<4 x float> %v, float %g) { + %a = insertelement <4 x float> %v, float %g, i32 0 + ret <4 x float> %a +} + +; CHECK: test_ctor( +; CHECK: $d = SIMD_Float32x4(Math_fround($x), Math_fround($y), Math_fround($z), Math_fround($w)); +; CHECK-PRECISE_F32: test_ctor( +; CHECK-PRECISE_F32: $x = Math_fround($x); +; CHECK-PRECISE_F32: $y = Math_fround($y); +; CHECK-PRECISE_F32: $z = Math_fround($z); +; CHECK-PRECISE_F32: $w = Math_fround($w); +; CHECK-PRECISE_F32: $d = SIMD_Float32x4($x, $y, $z, $w); +define <4 x float> @test_ctor(<4 x float> %v, float %x, float %y, float %z, float %w) { + %a = insertelement <4 x float> undef, float %x, i32 0 + %b = insertelement <4 x float> %a, float %y, i32 1 + %c = insertelement <4 x float> %b, float %z, i32 2 + %d = insertelement <4 x float> %c, float %w, i32 3 + ret <4 x float> %d +} diff --git a/test/CodeGen/JS/unrolled-simd.ll b/test/CodeGen/JS/unrolled-simd.ll new file mode 100644 index 000000000000..1d169a4a76ab --- /dev/null +++ b/test/CodeGen/JS/unrolled-simd.ll @@ -0,0 +1,28 @@ +; RUN: llc < %s | FileCheck %s + +target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:128-n32-S128" +target triple = "asmjs-unknown-emscripten" + +; CHECK: SIMD_Int32x4((SIMD_Int32x4_extractLane($a,0)|0) / (SIMD_Int32x4_extractLane($b,0)|0)|0, (SIMD_Int32x4_extractLane($a,1)|0) / (SIMD_Int32x4_extractLane($b,1)|0)|0, (SIMD_Int32x4_extractLane($a,2)|0) / (SIMD_Int32x4_extractLane($b,2)|0)|0, (SIMD_Int32x4_extractLane($a,3)|0) / (SIMD_Int32x4_extractLane($b,3)|0)|0); +define <4 x i32> @signed_div(<4 x i32> %a, <4 x i32> %b) { + %c = sdiv <4 x i32> %a, %b + ret <4 x i32> %c +} + +; CHECK: SIMD_Int32x4((SIMD_Int32x4_extractLane($a,0)>>>0) / (SIMD_Int32x4_extractLane($b,0)>>>0)>>>0, (SIMD_Int32x4_extractLane($a,1)>>>0) / (SIMD_Int32x4_extractLane($b,1)>>>0)>>>0, (SIMD_Int32x4_extractLane($a,2)>>>0) / (SIMD_Int32x4_extractLane($b,2)>>>0)>>>0, (SIMD_Int32x4_extractLane($a,3)>>>0) / (SIMD_Int32x4_extractLane($b,3)>>>0)>>>0); +define <4 x i32> @un_div(<4 x i32> %a, <4 x i32> %b) { + %c = udiv <4 x i32> %a, %b + ret <4 x i32> %c +} + +; CHECK: SIMD_Int32x4((SIMD_Int32x4_extractLane($a,0)|0) % (SIMD_Int32x4_extractLane($b,0)|0)|0, (SIMD_Int32x4_extractLane($a,1)|0) % (SIMD_Int32x4_extractLane($b,1)|0)|0, (SIMD_Int32x4_extractLane($a,2)|0) % (SIMD_Int32x4_extractLane($b,2)|0)|0, (SIMD_Int32x4_extractLane($a,3)|0) % (SIMD_Int32x4_extractLane($b,3)|0)|0); +define <4 x i32> @signed_rem(<4 x i32> %a, <4 x i32> %b) { + %c = srem <4 x i32> %a, %b + ret <4 x i32> %c +} + +; CHECK: SIMD_Int32x4((SIMD_Int32x4_extractLane($a,0)>>>0) % (SIMD_Int32x4_extractLane($b,0)>>>0)>>>0, (SIMD_Int32x4_extractLane($a,1)>>>0) % (SIMD_Int32x4_extractLane($b,1)>>>0)>>>0, (SIMD_Int32x4_extractLane($a,2)>>>0) % (SIMD_Int32x4_extractLane($b,2)>>>0)>>>0, (SIMD_Int32x4_extractLane($a,3)>>>0) % (SIMD_Int32x4_extractLane($b,3)>>>0)>>>0); +define <4 x i32> @un_rem(<4 x i32> %a, <4 x i32> %b) { + %c = urem <4 x i32> %a, %b + ret <4 x i32> %c +} diff --git a/test/Transforms/GlobalOpt/integer-bool.ll b/test/Transforms/GlobalOpt/integer-bool.ll index 617febdc0166..3bdf92273b77 100644 --- a/test/Transforms/GlobalOpt/integer-bool.ll +++ b/test/Transforms/GlobalOpt/integer-bool.ll @@ -4,17 +4,17 @@ @G = internal addrspace(1) global i32 0 ; CHECK: @G ; CHECK: addrspace(1) -; CHECK: global i1 false +; CHECK: global i32 0 define void @set1() { store i32 0, i32 addrspace(1)* @G -; CHECK: store i1 false +; CHECK: store i32 0 ret void } define void @set2() { store i32 1, i32 addrspace(1)* @G -; CHECK: store i1 true +; CHECK: store i32 1 ret void } diff --git a/test/Transforms/NaCl/add-pnacl-external-decls.ll b/test/Transforms/NaCl/add-pnacl-external-decls.ll new file mode 100644 index 000000000000..1f525a9268cd --- /dev/null +++ b/test/Transforms/NaCl/add-pnacl-external-decls.ll @@ -0,0 +1,6 @@ +; RUN: opt < %s -add-pnacl-external-decls -S | FileCheck %s + +declare void @foobar(i32) + +; CHECK: declare i32 @setjmp(i8*) +; CHECK: declare void @longjmp(i8*, i32) diff --git a/test/Transforms/NaCl/atomic/atomic-seq-cst-only.ll b/test/Transforms/NaCl/atomic/atomic-seq-cst-only.ll new file mode 100644 index 000000000000..4c620a6d3bbc --- /dev/null +++ b/test/Transforms/NaCl/atomic/atomic-seq-cst-only.ll @@ -0,0 +1,51 @@ +; RUN: opt -nacl-rewrite-atomics -pnacl-memory-order-seq-cst-only=true -S < %s | FileCheck %s +; +; Verify that -pnacl-memory-order-seq-cst-only=true ensures all atomic memory +; orderings become seq_cst (enum value 6). +; +; Note that monotonic doesn't exist in C11/C++11, and consume isn't implemented +; in LLVM yet. + +target datalayout = "p:32:32:32" + +; CHECK-LABEL: @test_atomic_store_monotonic_i32 +define void @test_atomic_store_monotonic_i32(i32* %ptr, i32 %value) { + ; CHECK-NEXT: call void @llvm.nacl.atomic.store.i32(i32 %value, i32* %ptr, i32 6) + store atomic i32 %value, i32* %ptr monotonic, align 4 + ret void ; CHECK-NEXT: ret void +} + +; CHECK-LABEL: @test_atomic_store_unordered_i32 +define void @test_atomic_store_unordered_i32(i32* %ptr, i32 %value) { + ; CHECK-NEXT: call void @llvm.nacl.atomic.store.i32(i32 %value, i32* %ptr, i32 6) + store atomic i32 %value, i32* %ptr unordered, align 4 + ret void ; CHECK-NEXT: ret void +} + +; CHECK-LABEL: @test_atomic_load_acquire_i32 +define i32 @test_atomic_load_acquire_i32(i32* %ptr) { + ; CHECK-NEXT: %res = call i32 @llvm.nacl.atomic.load.i32(i32* %ptr, i32 6) + %res = load atomic i32, i32* %ptr acquire, align 4 + ret i32 %res ; CHECK-NEXT: ret i32 %res +} + +; CHECK-LABEL: @test_atomic_store_release_i32 +define void @test_atomic_store_release_i32(i32* %ptr, i32 %value) { + ; CHECK-NEXT: call void @llvm.nacl.atomic.store.i32(i32 %value, i32* %ptr, i32 6) + store atomic i32 %value, i32* %ptr release, align 4 + ret void ; CHECK-NEXT: ret void +} + +; CHECK-LABEL: @test_fetch_and_add_i32 +define i32 @test_fetch_and_add_i32(i32* %ptr, i32 %value) { + ; CHECK-NEXT: %res = call i32 @llvm.nacl.atomic.rmw.i32(i32 1, i32* %ptr, i32 %value, i32 6) + %res = atomicrmw add i32* %ptr, i32 %value acq_rel + ret i32 %res ; CHECK-NEXT: ret i32 %res +} + +; CHECK-LABEL: @test_atomic_store_seq_cst_i32 +define void @test_atomic_store_seq_cst_i32(i32* %ptr, i32 %value) { + ; CHECK-NEXT: call void @llvm.nacl.atomic.store.i32(i32 %value, i32* %ptr, i32 6) + store atomic i32 %value, i32* %ptr seq_cst, align 4 + ret void ; CHECK-NEXT: ret void +} diff --git a/test/Transforms/NaCl/atomic/atomic_others.ll b/test/Transforms/NaCl/atomic/atomic_others.ll new file mode 100644 index 000000000000..ae2498340fdf --- /dev/null +++ b/test/Transforms/NaCl/atomic/atomic_others.ll @@ -0,0 +1,130 @@ +; RUN: opt -nacl-rewrite-atomics -S < %s | FileCheck %s +; +; Validate that atomic non-{acquire/release/acq_rel/seq_cst} loads/stores get +; rewritten into NaCl atomic builtins with sequentially consistent memory +; ordering (enum value 6), and that acquire/release/acq_rel remain as-is (enum +; values 3/4/5). +; +; Note that monotonic doesn't exist in C11/C++11, and consume isn't implemented +; in LLVM yet. + +target datalayout = "p:32:32:32" + +; CHECK-LABEL: @test_atomic_load_monotonic_i32 +define i32 @test_atomic_load_monotonic_i32(i32* %ptr) { + ; CHECK-NEXT: %res = call i32 @llvm.nacl.atomic.load.i32(i32* %ptr, i32 6) + %res = load atomic i32, i32* %ptr monotonic, align 4 + ret i32 %res ; CHECK-NEXT: ret i32 %res +} + +; CHECK-LABEL: @test_atomic_store_monotonic_i32 +define void @test_atomic_store_monotonic_i32(i32* %ptr, i32 %value) { + ; CHECK-NEXT: call void @llvm.nacl.atomic.store.i32(i32 %value, i32* %ptr, i32 6) + store atomic i32 %value, i32* %ptr monotonic, align 4 + ret void ; CHECK-NEXT: ret void +} + +; CHECK-LABEL: @test_atomic_load_unordered_i32 +define i32 @test_atomic_load_unordered_i32(i32* %ptr) { + ; CHECK-NEXT: %res = call i32 @llvm.nacl.atomic.load.i32(i32* %ptr, i32 6) + %res = load atomic i32, i32* %ptr unordered, align 4 + ret i32 %res ; CHECK-NEXT: ret i32 %res +} + +; CHECK-LABEL: @test_atomic_store_unordered_i32 +define void @test_atomic_store_unordered_i32(i32* %ptr, i32 %value) { + ; CHECK-NEXT: call void @llvm.nacl.atomic.store.i32(i32 %value, i32* %ptr, i32 6) + store atomic i32 %value, i32* %ptr unordered, align 4 + ret void ; CHECK-NEXT: ret void +} + +; CHECK-LABEL: @test_atomic_load_acquire_i32 +define i32 @test_atomic_load_acquire_i32(i32* %ptr) { + ; CHECK-NEXT: %res = call i32 @llvm.nacl.atomic.load.i32(i32* %ptr, i32 3) + %res = load atomic i32, i32* %ptr acquire, align 4 + ret i32 %res ; CHECK-NEXT: ret i32 %res +} + +; CHECK-LABEL: @test_atomic_store_release_i32 +define void @test_atomic_store_release_i32(i32* %ptr, i32 %value) { + ; CHECK-NEXT: call void @llvm.nacl.atomic.store.i32(i32 %value, i32* %ptr, i32 4) + store atomic i32 %value, i32* %ptr release, align 4 + ret void ; CHECK-NEXT: ret void +} + +; CHECK-LABEL: @test_fetch_and_add_i32 +define i32 @test_fetch_and_add_i32(i32* %ptr, i32 %value) { + ; CHECK-NEXT: %res = call i32 @llvm.nacl.atomic.rmw.i32(i32 1, i32* %ptr, i32 %value, i32 5) + %res = atomicrmw add i32* %ptr, i32 %value acq_rel + ret i32 %res ; CHECK-NEXT: ret i32 %res +} + +; Test all the valid cmpxchg orderings for success and failure. + +; CHECK-LABEL: @test_cmpxchg_seqcst_seqcst +define { i32, i1 } @test_cmpxchg_seqcst_seqcst(i32* %ptr, i32 %value) { + ; CHECK-NEXT: %res = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* %ptr, i32 0, i32 %value, i32 6, i32 6) + %res = cmpxchg i32* %ptr, i32 0, i32 %value seq_cst seq_cst + ret { i32, i1 } %res +} + +; CHECK-LABEL: @test_cmpxchg_seqcst_acquire +define { i32, i1 } @test_cmpxchg_seqcst_acquire(i32* %ptr, i32 %value) { + ; CHECK-NEXT: %res = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* %ptr, i32 0, i32 %value, i32 6, i32 3) + %res = cmpxchg i32* %ptr, i32 0, i32 %value seq_cst acquire + ret { i32, i1 } %res +} + +; CHECK-LABEL: @test_cmpxchg_seqcst_relaxed +define { i32, i1 } @test_cmpxchg_seqcst_relaxed(i32* %ptr, i32 %value) { + ; Failure ordering is upgraded. + ; CHECK-NEXT: %res = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* %ptr, i32 0, i32 %value, i32 6, i32 6) + %res = cmpxchg i32* %ptr, i32 0, i32 %value seq_cst monotonic + ret { i32, i1 } %res +} + +; CHECK-LABEL: @test_cmpxchg_acqrel_acquire +define { i32, i1 } @test_cmpxchg_acqrel_acquire(i32* %ptr, i32 %value) { + ; CHECK-NEXT: %res = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* %ptr, i32 0, i32 %value, i32 5, i32 3) + %res = cmpxchg i32* %ptr, i32 0, i32 %value acq_rel acquire + ret { i32, i1 } %res +} + +; CHECK-LABEL: @test_cmpxchg_acqrel_relaxed +define { i32, i1 } @test_cmpxchg_acqrel_relaxed(i32* %ptr, i32 %value) { + ; Success and failure ordering are upgraded. + ; CHECK-NEXT: %res = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* %ptr, i32 0, i32 %value, i32 6, i32 6) + %res = cmpxchg i32* %ptr, i32 0, i32 %value acq_rel monotonic + ret { i32, i1 } %res +} + +; CHECK-LABEL: @test_cmpxchg_release_relaxed +define { i32, i1 } @test_cmpxchg_release_relaxed(i32* %ptr, i32 %value) { + ; Success and failure ordering are upgraded. + ; CHECK-NEXT: %res = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* %ptr, i32 0, i32 %value, i32 6, i32 6) + %res = cmpxchg i32* %ptr, i32 0, i32 %value release monotonic + ret { i32, i1 } %res +} + +; CHECK-LABEL: @test_cmpxchg_acquire_acquire +define { i32, i1 } @test_cmpxchg_acquire_acquire(i32* %ptr, i32 %value) { + ; CHECK-NEXT: %res = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* %ptr, i32 0, i32 %value, i32 3, i32 3) + %res = cmpxchg i32* %ptr, i32 0, i32 %value acquire acquire + ret { i32, i1 } %res +} + +; CHECK-LABEL: @test_cmpxchg_acquire_relaxed +define { i32, i1 } @test_cmpxchg_acquire_relaxed(i32* %ptr, i32 %value) { + ; Failure ordering is upgraded. + ; CHECK-NEXT: %res = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* %ptr, i32 0, i32 %value, i32 3, i32 3) + %res = cmpxchg i32* %ptr, i32 0, i32 %value acquire monotonic + ret { i32, i1 } %res +} + +; CHECK-LABEL: @test_cmpxchg_relaxed_relaxed +define { i32, i1 } @test_cmpxchg_relaxed_relaxed(i32* %ptr, i32 %value) { + ; Failure ordering is upgraded. + ; CHECK-NEXT: %res = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* %ptr, i32 0, i32 %value, i32 6, i32 6) + %res = cmpxchg i32* %ptr, i32 0, i32 %value monotonic monotonic + ret { i32, i1 } %res +} diff --git a/test/Transforms/NaCl/atomic/atomic_seq_cst.ll b/test/Transforms/NaCl/atomic/atomic_seq_cst.ll new file mode 100644 index 000000000000..a6125a4da685 --- /dev/null +++ b/test/Transforms/NaCl/atomic/atomic_seq_cst.ll @@ -0,0 +1,99 @@ +; RUN: opt -nacl-rewrite-atomics -S < %s | FileCheck %s +; +; Validate that sequentially consistent atomic loads/stores get rewritten into +; NaCl atomic builtins with sequentially-consistent memory ordering (enum value +; 6). + +target datalayout = "p:32:32:32" + +; CHECK-LABEL: @test_atomic_load_i8 +define zeroext i8 @test_atomic_load_i8(i8* %ptr) { + ; CHECK-NEXT: %res = call i8 @llvm.nacl.atomic.load.i8(i8* %ptr, i32 6) + %res = load atomic i8, i8* %ptr seq_cst, align 1 + ret i8 %res ; CHECK-NEXT: ret i8 %res +} + +; CHECK-LABEL: @test_atomic_store_i8 +define void @test_atomic_store_i8(i8* %ptr, i8 zeroext %value) { + ; CHECK-NEXT: call void @llvm.nacl.atomic.store.i8(i8 %value, i8* %ptr, i32 6) + store atomic i8 %value, i8* %ptr seq_cst, align 1 + ret void ; CHECK-NEXT: ret void +} + +; CHECK-LABEL: @test_atomic_load_i16 +define zeroext i16 @test_atomic_load_i16(i16* %ptr) { + ; CHECK-NEXT: %res = call i16 @llvm.nacl.atomic.load.i16(i16* %ptr, i32 6) + %res = load atomic i16, i16* %ptr seq_cst, align 2 + ret i16 %res ; CHECK-NEXT: ret i16 %res +} + +; CHECK-LABEL: @test_atomic_store_i16 +define void @test_atomic_store_i16(i16* %ptr, i16 zeroext %value) { + ; CHECK-NEXT: call void @llvm.nacl.atomic.store.i16(i16 %value, i16* %ptr, i32 6) + store atomic i16 %value, i16* %ptr seq_cst, align 2 + ret void ; CHECK-NEXT: ret void +} + +; CHECK-LABEL: @test_atomic_load_i32 +define i32 @test_atomic_load_i32(i32* %ptr) { + ; CHECK-NEXT: %res = call i32 @llvm.nacl.atomic.load.i32(i32* %ptr, i32 6) + %res = load atomic i32, i32* %ptr seq_cst, align 4 + ret i32 %res ; CHECK-NEXT: ret i32 %res +} + +; CHECK-LABEL: @test_atomic_store_i32 +define void @test_atomic_store_i32(i32* %ptr, i32 %value) { + ; CHECK-NEXT: call void @llvm.nacl.atomic.store.i32(i32 %value, i32* %ptr, i32 6) + store atomic i32 %value, i32* %ptr seq_cst, align 4 + ret void ; CHECK-NEXT: ret void +} + +; CHECK-LABEL: @test_atomic_load_i64 +define i64 @test_atomic_load_i64(i64* %ptr) { + ; CHECK-NEXT: %res = call i64 @llvm.nacl.atomic.load.i64(i64* %ptr, i32 6) + %res = load atomic i64, i64* %ptr seq_cst, align 8 + ret i64 %res ; CHECK-NEXT: ret i64 %res +} + +; CHECK-LABEL: @test_atomic_store_i64 +define void @test_atomic_store_i64(i64* %ptr, i64 %value) { + ; CHECK-NEXT: call void @llvm.nacl.atomic.store.i64(i64 %value, i64* %ptr, i32 6) + store atomic i64 %value, i64* %ptr seq_cst, align 8 + ret void ; CHECK-NEXT: ret void +} + +; CHECK-LABEL: @test_atomic_load_i32_pointer +define i32* @test_atomic_load_i32_pointer(i32** %ptr) { + ; CHECK-NEXT: %ptr.cast = bitcast i32** %ptr to i32* + ; CHECK-NEXT: %res = call i32 @llvm.nacl.atomic.load.i32(i32* %ptr.cast, i32 6) + ; CHECK-NEXT: %res.cast = inttoptr i32 %res to i32* + %res = load atomic i32*, i32** %ptr seq_cst, align 4 + ret i32* %res ; CHECK-NEXT: ret i32* %res.cast +} + +; CHECK-LABEL: @test_atomic_store_i32_pointer +define void @test_atomic_store_i32_pointer(i32** %ptr, i32* %value) { + ; CHECK-NEXT: %ptr.cast = bitcast i32** %ptr to i32* + ; CHECK-NEXT: %value.cast = ptrtoint i32* %value to i32 + ; CHECK-NEXT: call void @llvm.nacl.atomic.store.i32(i32 %value.cast, i32* %ptr.cast, i32 6) + store atomic i32* %value, i32** %ptr seq_cst, align 4 + ret void ; CHECK-NEXT: ret void +} + +; CHECK-LABEL: @test_atomic_load_double_pointer +define double* @test_atomic_load_double_pointer(double** %ptr) { + ; CHECK-NEXT: %ptr.cast = bitcast double** %ptr to i32* + ; CHECK-NEXT: %res = call i32 @llvm.nacl.atomic.load.i32(i32* %ptr.cast, i32 6) + ; CHECK-NEXT: %res.cast = inttoptr i32 %res to double* + %res = load atomic double*, double** %ptr seq_cst, align 4 + ret double* %res ; CHECK-NEXT: ret double* %res.cast +} + +; CHECK-LABEL: @test_atomic_store_double_pointer +define void @test_atomic_store_double_pointer(double** %ptr, double* %value) { + ; CHECK-NEXT: %ptr.cast = bitcast double** %ptr to i32* + ; CHECK-NEXT: %value.cast = ptrtoint double* %value to i32 + ; CHECK-NEXT: call void @llvm.nacl.atomic.store.i32(i32 %value.cast, i32* %ptr.cast, i32 6) + store atomic double* %value, double** %ptr seq_cst, align 4 + ret void ; CHECK-NEXT: ret void +} diff --git a/test/Transforms/NaCl/atomic/fetch_and_.ll b/test/Transforms/NaCl/atomic/fetch_and_.ll new file mode 100644 index 000000000000..62f7d8c10d19 --- /dev/null +++ b/test/Transforms/NaCl/atomic/fetch_and_.ll @@ -0,0 +1,154 @@ +; RUN: opt -nacl-rewrite-atomics -S < %s | FileCheck %s + +; Each of these tests validates that the corresponding legacy GCC-style builtins +; are properly rewritten to NaCl atomic builtins. Only the GCC-style builtins +; that have corresponding primitives in C11/C++11 and which emit different code +; are tested. These legacy GCC-builtins only support sequential-consistency +; (enum value 6). +; +; test_* tests the corresponding __sync_* builtin. See: +; http://gcc.gnu.org/onlinedocs/gcc-4.8.1/gcc/_005f_005fsync-Builtins.html + +target datalayout = "p:32:32:32" + +; CHECK-LABEL: @test_fetch_and_add_i8 +define zeroext i8 @test_fetch_and_add_i8(i8* %ptr, i8 zeroext %value) { + ; CHECK-NEXT: %res = call i8 @llvm.nacl.atomic.rmw.i8(i32 1, i8* %ptr, i8 %value, i32 6) + %res = atomicrmw add i8* %ptr, i8 %value seq_cst + ret i8 %res ; CHECK-NEXT: ret i8 %res +} + +; CHECK-LABEL: @test_fetch_and_add_i16 +define zeroext i16 @test_fetch_and_add_i16(i16* %ptr, i16 zeroext %value) { + ; CHECK-NEXT: %res = call i16 @llvm.nacl.atomic.rmw.i16(i32 1, i16* %ptr, i16 %value, i32 6) + %res = atomicrmw add i16* %ptr, i16 %value seq_cst + ret i16 %res ; CHECK-NEXT: ret i16 %res +} + +; CHECK-LABEL: @test_fetch_and_add_i32 +define i32 @test_fetch_and_add_i32(i32* %ptr, i32 %value) { + ; CHECK-NEXT: %res = call i32 @llvm.nacl.atomic.rmw.i32(i32 1, i32* %ptr, i32 %value, i32 6) + %res = atomicrmw add i32* %ptr, i32 %value seq_cst + ret i32 %res ; CHECK-NEXT: ret i32 %res +} + +; CHECK-LABEL: @test_fetch_and_add_i64 +define i64 @test_fetch_and_add_i64(i64* %ptr, i64 %value) { + ; CHECK-NEXT: %res = call i64 @llvm.nacl.atomic.rmw.i64(i32 1, i64* %ptr, i64 %value, i32 6) + %res = atomicrmw add i64* %ptr, i64 %value seq_cst + ret i64 %res ; CHECK-NEXT: ret i64 %res +} + +; CHECK-LABEL: @test_fetch_and_sub_i8 +define zeroext i8 @test_fetch_and_sub_i8(i8* %ptr, i8 zeroext %value) { + ; CHECK-NEXT: %res = call i8 @llvm.nacl.atomic.rmw.i8(i32 2, i8* %ptr, i8 %value, i32 6) + %res = atomicrmw sub i8* %ptr, i8 %value seq_cst + ret i8 %res ; CHECK-NEXT: ret i8 %res +} + +; CHECK-LABEL: @test_fetch_and_sub_i16 +define zeroext i16 @test_fetch_and_sub_i16(i16* %ptr, i16 zeroext %value) { + ; CHECK-NEXT: %res = call i16 @llvm.nacl.atomic.rmw.i16(i32 2, i16* %ptr, i16 %value, i32 6) + %res = atomicrmw sub i16* %ptr, i16 %value seq_cst + ret i16 %res ; CHECK-NEXT: ret i16 %res +} + +; CHECK-LABEL: @test_fetch_and_sub_i32 +define i32 @test_fetch_and_sub_i32(i32* %ptr, i32 %value) { + ; CHECK-NEXT: %res = call i32 @llvm.nacl.atomic.rmw.i32(i32 2, i32* %ptr, i32 %value, i32 6) + %res = atomicrmw sub i32* %ptr, i32 %value seq_cst + ret i32 %res ; CHECK-NEXT: ret i32 %res +} + +; CHECK-LABEL: @test_fetch_and_sub_i64 +define i64 @test_fetch_and_sub_i64(i64* %ptr, i64 %value) { + ; CHECK-NEXT: %res = call i64 @llvm.nacl.atomic.rmw.i64(i32 2, i64* %ptr, i64 %value, i32 6) + %res = atomicrmw sub i64* %ptr, i64 %value seq_cst + ret i64 %res ; CHECK-NEXT: ret i64 %res +} + +; CHECK-LABEL: @test_fetch_and_or_i8 +define zeroext i8 @test_fetch_and_or_i8(i8* %ptr, i8 zeroext %value) { + ; CHECK-NEXT: %res = call i8 @llvm.nacl.atomic.rmw.i8(i32 3, i8* %ptr, i8 %value, i32 6) + %res = atomicrmw or i8* %ptr, i8 %value seq_cst + ret i8 %res ; CHECK-NEXT: ret i8 %res +} + +; CHECK-LABEL: @test_fetch_and_or_i16 +define zeroext i16 @test_fetch_and_or_i16(i16* %ptr, i16 zeroext %value) { + ; CHECK-NEXT: %res = call i16 @llvm.nacl.atomic.rmw.i16(i32 3, i16* %ptr, i16 %value, i32 6) + %res = atomicrmw or i16* %ptr, i16 %value seq_cst + ret i16 %res ; CHECK-NEXT: ret i16 %res +} + +; CHECK-LABEL: @test_fetch_and_or_i32 +define i32 @test_fetch_and_or_i32(i32* %ptr, i32 %value) { + ; CHECK-NEXT: %res = call i32 @llvm.nacl.atomic.rmw.i32(i32 3, i32* %ptr, i32 %value, i32 6) + %res = atomicrmw or i32* %ptr, i32 %value seq_cst + ret i32 %res ; CHECK-NEXT: ret i32 %res +} + +; CHECK-LABEL: @test_fetch_and_or_i64 +define i64 @test_fetch_and_or_i64(i64* %ptr, i64 %value) { + ; CHECK-NEXT: %res = call i64 @llvm.nacl.atomic.rmw.i64(i32 3, i64* %ptr, i64 %value, i32 6) + %res = atomicrmw or i64* %ptr, i64 %value seq_cst + ret i64 %res ; CHECK-NEXT: ret i64 %res +} + +; CHECK-LABEL: @test_fetch_and_and_i8 +define zeroext i8 @test_fetch_and_and_i8(i8* %ptr, i8 zeroext %value) { + ; CHECK-NEXT: %res = call i8 @llvm.nacl.atomic.rmw.i8(i32 4, i8* %ptr, i8 %value, i32 6) + %res = atomicrmw and i8* %ptr, i8 %value seq_cst + ret i8 %res ; CHECK-NEXT: ret i8 %res +} + +; CHECK-LABEL: @test_fetch_and_and_i16 +define zeroext i16 @test_fetch_and_and_i16(i16* %ptr, i16 zeroext %value) { + ; CHECK-NEXT: %res = call i16 @llvm.nacl.atomic.rmw.i16(i32 4, i16* %ptr, i16 %value, i32 6) + %res = atomicrmw and i16* %ptr, i16 %value seq_cst + ret i16 %res ; CHECK-NEXT: ret i16 %res +} + +; CHECK-LABEL: @test_fetch_and_and_i32 +define i32 @test_fetch_and_and_i32(i32* %ptr, i32 %value) { + ; CHECK-NEXT: %res = call i32 @llvm.nacl.atomic.rmw.i32(i32 4, i32* %ptr, i32 %value, i32 6) + %res = atomicrmw and i32* %ptr, i32 %value seq_cst + ret i32 %res ; CHECK-NEXT: ret i32 %res +} + +; CHECK-LABEL: @test_fetch_and_and_i64 +define i64 @test_fetch_and_and_i64(i64* %ptr, i64 %value) { + ; CHECK-NEXT: %res = call i64 @llvm.nacl.atomic.rmw.i64(i32 4, i64* %ptr, i64 %value, i32 6) + %res = atomicrmw and i64* %ptr, i64 %value seq_cst + ret i64 %res ; CHECK-NEXT: ret i64 %res + +} + +; CHECK-LABEL: @test_fetch_and_xor_i8 +define zeroext i8 @test_fetch_and_xor_i8(i8* %ptr, i8 zeroext %value) { + ; CHECK-NEXT: %res = call i8 @llvm.nacl.atomic.rmw.i8(i32 5, i8* %ptr, i8 %value, i32 6) + %res = atomicrmw xor i8* %ptr, i8 %value seq_cst + ret i8 %res ; CHECK-NEXT: ret i8 %res + +} + +; CHECK-LABEL: @test_fetch_and_xor_i16 +define zeroext i16 @test_fetch_and_xor_i16(i16* %ptr, i16 zeroext %value) { + ; CHECK-NEXT: %res = call i16 @llvm.nacl.atomic.rmw.i16(i32 5, i16* %ptr, i16 %value, i32 6) + %res = atomicrmw xor i16* %ptr, i16 %value seq_cst + ret i16 %res ; CHECK-NEXT: ret i16 %res +} + +; CHECK-LABEL: @test_fetch_and_xor_i32 +define i32 @test_fetch_and_xor_i32(i32* %ptr, i32 %value) { + ; CHECK-NEXT: %res = call i32 @llvm.nacl.atomic.rmw.i32(i32 5, i32* %ptr, i32 %value, i32 6) + %res = atomicrmw xor i32* %ptr, i32 %value seq_cst + ret i32 %res ; CHECK-NEXT: ret i32 %res +} + +; CHECK-LABEL: @test_fetch_and_xor_i64 +define i64 @test_fetch_and_xor_i64(i64* %ptr, i64 %value) { + ; CHECK-NEXT: %res = call i64 @llvm.nacl.atomic.rmw.i64(i32 5, i64* %ptr, i64 %value, i32 6) + %res = atomicrmw xor i64* %ptr, i64 %value seq_cst + ret i64 %res ; CHECK-NEXT: ret i64 %res +} diff --git a/test/Transforms/NaCl/atomic/lock_.ll b/test/Transforms/NaCl/atomic/lock_.ll new file mode 100644 index 000000000000..6f66f6706122 --- /dev/null +++ b/test/Transforms/NaCl/atomic/lock_.ll @@ -0,0 +1,68 @@ +; RUN: opt -nacl-rewrite-atomics -S < %s | FileCheck %s + +; Each of these tests validates that the corresponding legacy GCC-style builtins +; are properly rewritten to NaCl atomic builtins. Only the GCC-style builtins +; that have corresponding primitives in C11/C++11 and which emit different code +; are tested. These legacy GCC-builtins only support sequential-consistency +; (enum value 6). +; +; test_* tests the corresponding __sync_* builtin. See: +; http://gcc.gnu.org/onlinedocs/gcc-4.8.1/gcc/_005f_005fsync-Builtins.html + +target datalayout = "p:32:32:32" + +; CHECK-LABEL: @test_lock_test_and_set_i8 +define zeroext i8 @test_lock_test_and_set_i8(i8* %ptr, i8 zeroext %value) { + ; CHECK-NEXT: %res = call i8 @llvm.nacl.atomic.rmw.i8(i32 6, i8* %ptr, i8 %value, i32 6) + %res = atomicrmw xchg i8* %ptr, i8 %value seq_cst + ret i8 %res ; CHECK-NEXT: ret i8 %res +} + +; CHECK-LABEL: @test_lock_release_i8 +define void @test_lock_release_i8(i8* %ptr) { + ; CHECK-NEXT: call void @llvm.nacl.atomic.store.i8(i8 0, i8* %ptr, i32 4) + store atomic i8 0, i8* %ptr release, align 1 + ret void ; CHECK-NEXT: ret void +} + +; CHECK-LABEL: @test_lock_test_and_set_i16 +define zeroext i16 @test_lock_test_and_set_i16(i16* %ptr, i16 zeroext %value) { + ; CHECK-NEXT: %res = call i16 @llvm.nacl.atomic.rmw.i16(i32 6, i16* %ptr, i16 %value, i32 6) + %res = atomicrmw xchg i16* %ptr, i16 %value seq_cst + ret i16 %res ; CHECK-NEXT: ret i16 %res +} + +; CHECK-LABEL: @test_lock_release_i16 +define void @test_lock_release_i16(i16* %ptr) { + ; CHECK-NEXT: call void @llvm.nacl.atomic.store.i16(i16 0, i16* %ptr, i32 4) + store atomic i16 0, i16* %ptr release, align 2 + ret void ; CHECK-NEXT: ret void +} + +; CHECK-LABEL: @test_lock_test_and_set_i32 +define i32 @test_lock_test_and_set_i32(i32* %ptr, i32 %value) { + ; CHECK-NEXT: %res = call i32 @llvm.nacl.atomic.rmw.i32(i32 6, i32* %ptr, i32 %value, i32 6) + %res = atomicrmw xchg i32* %ptr, i32 %value seq_cst + ret i32 %res ; CHECK-NEXT: ret i32 %res +} + +; CHECK-LABEL: @test_lock_release_i32 +define void @test_lock_release_i32(i32* %ptr) { + ; CHECK-NEXT: call void @llvm.nacl.atomic.store.i32(i32 0, i32* %ptr, i32 4) + store atomic i32 0, i32* %ptr release, align 4 + ret void ; CHECK-NEXT: ret void +} + +; CHECK-LABEL: @test_lock_test_and_set_i64 +define i64 @test_lock_test_and_set_i64(i64* %ptr, i64 %value) { + ; CHECK-NEXT: %res = call i64 @llvm.nacl.atomic.rmw.i64(i32 6, i64* %ptr, i64 %value, i32 6) + %res = atomicrmw xchg i64* %ptr, i64 %value seq_cst + ret i64 %res ; CHECK-NEXT: ret i64 %res +} + +; CHECK-LABEL: @test_lock_release_i64 +define void @test_lock_release_i64(i64* %ptr) { + ; CHECK-NEXT: call void @llvm.nacl.atomic.store.i64(i64 0, i64* %ptr, i32 4) + store atomic i64 0, i64* %ptr release, align 8 + ret void ; CHECK-NEXT: ret void +} diff --git a/test/Transforms/NaCl/atomic/sync_synchronize.ll b/test/Transforms/NaCl/atomic/sync_synchronize.ll new file mode 100644 index 000000000000..c9ef9029d3f6 --- /dev/null +++ b/test/Transforms/NaCl/atomic/sync_synchronize.ll @@ -0,0 +1,51 @@ +; RUN: opt -nacl-rewrite-atomics -remove-asm-memory -S < %s | FileCheck %s + +; Each of these tests validates that the corresponding legacy GCC-style builtins +; are properly rewritten to NaCl atomic builtins. Only the GCC-style builtins +; that have corresponding primitives in C11/C++11 and which emit different code +; are tested. These legacy GCC-builtins only support sequential-consistency +; (enum value 6). +; +; test_* tests the corresponding __sync_* builtin. See: +; http://gcc.gnu.org/onlinedocs/gcc-4.8.1/gcc/_005f_005fsync-Builtins.html + +target datalayout = "p:32:32:32" + +; This patterns gets emitted by C11/C++11 atomic thread fences. +; +; CHECK-LABEL: @test_c11_fence +define void @test_c11_fence() { + ; CHECK-NEXT: call void @llvm.nacl.atomic.fence(i32 6) + fence seq_cst + ret void ; CHECK-NEXT: ret void +} + +; This pattern gets emitted for ``__sync_synchronize`` and +; ``asm("":::"memory")`` when Clang is configured for NaCl. +; +; CHECK-LABEL: @test_synchronize +define void @test_synchronize() { + ; CHECK-NEXT: call void @llvm.nacl.atomic.fence.all() + call void asm sideeffect "", "~{memory}"() + fence seq_cst + call void asm sideeffect "", "~{memory}"() + ret void ; CHECK-NEXT: ret void +} + +; Make sure the above pattern is respected and not partially-matched. +; +; CHECK-LABEL: @test_synchronize_bad1 +define void @test_synchronize_bad1() { + ; CHECK-NOT: call void @llvm.nacl.atomic.fence.all() + call void asm sideeffect "", "~{memory}"() + fence seq_cst + ret void +} + +; CHECK-LABEL: @test_synchronize_bad2 +define void @test_synchronize_bad2() { + ; CHECK-NOT: call void @llvm.nacl.atomic.fence.all() + fence seq_cst + call void asm sideeffect "", "~{memory}"() + ret void +} diff --git a/test/Transforms/NaCl/atomic/val_compare_and_swap.ll b/test/Transforms/NaCl/atomic/val_compare_and_swap.ll new file mode 100644 index 000000000000..d30eba2ebdd0 --- /dev/null +++ b/test/Transforms/NaCl/atomic/val_compare_and_swap.ll @@ -0,0 +1,112 @@ +; RUN: opt -nacl-rewrite-atomics -S < %s | FileCheck %s + +; Each of these tests validates that the corresponding legacy GCC-style builtins +; are properly rewritten to NaCl atomic builtins. Only the GCC-style builtins +; that have corresponding primitives in C11/C++11 and which emit different code +; are tested. These legacy GCC-builtins only support sequential-consistency +; (enum value 6). +; +; test_* tests the corresponding __sync_* builtin. See: +; http://gcc.gnu.org/onlinedocs/gcc-4.8.1/gcc/_005f_005fsync-Builtins.html + +target datalayout = "p:32:32:32" + +; __sync_val_compare_and_swap + +; CHECK-LABEL: @test_val_compare_and_swap_i8 +define zeroext i8 @test_val_compare_and_swap_i8(i8* %ptr, i8 zeroext %oldval, i8 zeroext %newval) { + ; CHECK-NEXT: %res = call i8 @llvm.nacl.atomic.cmpxchg.i8(i8* %ptr, i8 %oldval, i8 %newval, i32 6, i32 6) + ; CHECK-NEXT: %success = icmp eq i8 %res, %oldval + ; CHECK-NEXT: %res.insert.value = insertvalue { i8, i1 } undef, i8 %res, 0 + ; CHECK-NEXT: %res.insert.success = insertvalue { i8, i1 } %res.insert.value, i1 %success, 1 + ; CHECK-NEXT: %val = extractvalue { i8, i1 } %res.insert.success, 0 + %res = cmpxchg i8* %ptr, i8 %oldval, i8 %newval seq_cst seq_cst + %val = extractvalue { i8, i1 } %res, 0 + ret i8 %val ; CHECK-NEXT: ret i8 %val +} + +; CHECK-LABEL: @test_val_compare_and_swap_i16 +define zeroext i16 @test_val_compare_and_swap_i16(i16* %ptr, i16 zeroext %oldval, i16 zeroext %newval) { + ; CHECK-NEXT: %res = call i16 @llvm.nacl.atomic.cmpxchg.i16(i16* %ptr, i16 %oldval, i16 %newval, i32 6, i32 6) + ; CHECK-NEXT: %success = icmp eq i16 %res, %oldval + ; CHECK-NEXT: %res.insert.value = insertvalue { i16, i1 } undef, i16 %res, 0 + ; CHECK-NEXT: %res.insert.success = insertvalue { i16, i1 } %res.insert.value, i1 %success, 1 + ; CHECK-NEXT: %val = extractvalue { i16, i1 } %res.insert.success, 0 + %res = cmpxchg i16* %ptr, i16 %oldval, i16 %newval seq_cst seq_cst + %val = extractvalue { i16, i1 } %res, 0 + ret i16 %val ; CHECK-NEXT: ret i16 %val +} + +; CHECK-LABEL: @test_val_compare_and_swap_i32 +define i32 @test_val_compare_and_swap_i32(i32* %ptr, i32 %oldval, i32 %newval) { + ; CHECK-NEXT: %res = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* %ptr, i32 %oldval, i32 %newval, i32 6, i32 6) + ; CHECK-NEXT: %success = icmp eq i32 %res, %oldval + ; CHECK-NEXT: %res.insert.value = insertvalue { i32, i1 } undef, i32 %res, 0 + ; CHECK-NEXT: %res.insert.success = insertvalue { i32, i1 } %res.insert.value, i1 %success, 1 + ; CHECK-NEXT: %val = extractvalue { i32, i1 } %res.insert.success, 0 + %res = cmpxchg i32* %ptr, i32 %oldval, i32 %newval seq_cst seq_cst + %val = extractvalue { i32, i1 } %res, 0 + ret i32 %val ; CHECK-NEXT: ret i32 %val +} + +; CHECK-LABEL: @test_val_compare_and_swap_i64 +define i64 @test_val_compare_and_swap_i64(i64* %ptr, i64 %oldval, i64 %newval) { + ; CHECK-NEXT: %res = call i64 @llvm.nacl.atomic.cmpxchg.i64(i64* %ptr, i64 %oldval, i64 %newval, i32 6, i32 6) + ; CHECK-NEXT: %success = icmp eq i64 %res, %oldval + ; CHECK-NEXT: %res.insert.value = insertvalue { i64, i1 } undef, i64 %res, 0 + ; CHECK-NEXT: %res.insert.success = insertvalue { i64, i1 } %res.insert.value, i1 %success, 1 + ; CHECK-NEXT: %val = extractvalue { i64, i1 } %res.insert.success, 0 + %res = cmpxchg i64* %ptr, i64 %oldval, i64 %newval seq_cst seq_cst + %val = extractvalue { i64, i1 } %res, 0 + ret i64 %val ; CHECK-NEXT: ret i64 %val +} + +; __sync_bool_compare_and_swap + +; CHECK-LABEL: @test_bool_compare_and_swap_i8 +define zeroext i1 @test_bool_compare_and_swap_i8(i8* %ptr, i8 zeroext %oldval, i8 zeroext %newval) { + ; CHECK-NEXT: %res = call i8 @llvm.nacl.atomic.cmpxchg.i8(i8* %ptr, i8 %oldval, i8 %newval, i32 6, i32 6) + ; CHECK-NEXT: %success = icmp eq i8 %res, %oldval + ; CHECK-NEXT: %res.insert.value = insertvalue { i8, i1 } undef, i8 %res, 0 + ; CHECK-NEXT: %res.insert.success = insertvalue { i8, i1 } %res.insert.value, i1 %success, 1 + ; CHECK-NEXT: %suc = extractvalue { i8, i1 } %res.insert.success, 1 + %res = cmpxchg i8* %ptr, i8 %oldval, i8 %newval seq_cst seq_cst + %suc = extractvalue { i8, i1 } %res, 1 + ret i1 %suc ; CHECK-NEXT: ret i1 %suc +} + +; CHECK-LABEL: @test_bool_compare_and_swap_i16 +define zeroext i1 @test_bool_compare_and_swap_i16(i16* %ptr, i16 zeroext %oldval, i16 zeroext %newval) { + ; CHECK-NEXT: %res = call i16 @llvm.nacl.atomic.cmpxchg.i16(i16* %ptr, i16 %oldval, i16 %newval, i32 6, i32 6) + ; CHECK-NEXT: %success = icmp eq i16 %res, %oldval + ; CHECK-NEXT: %res.insert.value = insertvalue { i16, i1 } undef, i16 %res, 0 + ; CHECK-NEXT: %res.insert.success = insertvalue { i16, i1 } %res.insert.value, i1 %success, 1 + ; CHECK-NEXT: %suc = extractvalue { i16, i1 } %res.insert.success, 1 + %res = cmpxchg i16* %ptr, i16 %oldval, i16 %newval seq_cst seq_cst + %suc = extractvalue { i16, i1 } %res, 1 + ret i1 %suc ; CHECK-NEXT: ret i1 %suc +} + +; CHECK-LABEL: @test_bool_compare_and_swap_i32 +define i1 @test_bool_compare_and_swap_i32(i32* %ptr, i32 %oldval, i32 %newval) { + ; CHECK-NEXT: %res = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* %ptr, i32 %oldval, i32 %newval, i32 6, i32 6) + ; CHECK-NEXT: %success = icmp eq i32 %res, %oldval + ; CHECK-NEXT: %res.insert.value = insertvalue { i32, i1 } undef, i32 %res, 0 + ; CHECK-NEXT: %res.insert.success = insertvalue { i32, i1 } %res.insert.value, i1 %success, 1 + ; CHECK-NEXT: %suc = extractvalue { i32, i1 } %res.insert.success, 1 + %res = cmpxchg i32* %ptr, i32 %oldval, i32 %newval seq_cst seq_cst + %suc = extractvalue { i32, i1 } %res, 1 + ret i1 %suc ; CHECK-NEXT: ret i1 %suc +} + +; CHECK-LABEL: @test_bool_compare_and_swap_i64 +define i1 @test_bool_compare_and_swap_i64(i64* %ptr, i64 %oldval, i64 %newval) { + ; CHECK-NEXT: %res = call i64 @llvm.nacl.atomic.cmpxchg.i64(i64* %ptr, i64 %oldval, i64 %newval, i32 6, i32 6) + ; CHECK-NEXT: %success = icmp eq i64 %res, %oldval + ; CHECK-NEXT: %res.insert.value = insertvalue { i64, i1 } undef, i64 %res, 0 + ; CHECK-NEXT: %res.insert.success = insertvalue { i64, i1 } %res.insert.value, i1 %success, 1 + ; CHECK-NEXT: %suc = extractvalue { i64, i1 } %res.insert.success, 1 + %res = cmpxchg i64* %ptr, i64 %oldval, i64 %newval seq_cst seq_cst + %suc = extractvalue { i64, i1 } %res, 1 + ret i1 %suc ; CHECK-NEXT: ret i1 %suc +} diff --git a/test/Transforms/NaCl/atomic/volatile.ll b/test/Transforms/NaCl/atomic/volatile.ll new file mode 100644 index 000000000000..58a3d9aecfe3 --- /dev/null +++ b/test/Transforms/NaCl/atomic/volatile.ll @@ -0,0 +1,171 @@ +; RUN: opt -nacl-rewrite-atomics -S < %s | FileCheck %s +; +; Validate that volatile loads/stores get rewritten into NaCl atomic builtins. +; The memory ordering for volatile loads/stores could technically be constrained +; to sequential consistency (enum value 6), or left as relaxed. + +target datalayout = "p:32:32:32" + +; CHECK-LABEL: @test_volatile_load_i8 +define zeroext i8 @test_volatile_load_i8(i8* %ptr) { + ; CHECK-NEXT: %res = call i8 @llvm.nacl.atomic.load.i8(i8* %ptr, i32 6) + %res = load volatile i8, i8* %ptr, align 1 + ret i8 %res ; CHECK-NEXT: ret i8 %res +} + +; CHECK-LABEL: @test_volatile_store_i8 +define void @test_volatile_store_i8(i8* %ptr, i8 zeroext %value) { + ; CHECK-NEXT: call void @llvm.nacl.atomic.store.i8(i8 %value, i8* %ptr, i32 6) + store volatile i8 %value, i8* %ptr, align 1 + ret void ; CHECK-NEXT: ret void +} + +; CHECK-LABEL: @test_volatile_load_i16 +define zeroext i16 @test_volatile_load_i16(i16* %ptr) { + ; CHECK-NEXT: %res = call i16 @llvm.nacl.atomic.load.i16(i16* %ptr, i32 6) + %res = load volatile i16, i16* %ptr, align 2 + ret i16 %res ; CHECK-NEXT: ret i16 %res +} + +; CHECK-LABEL: @test_volatile_store_i16 +define void @test_volatile_store_i16(i16* %ptr, i16 zeroext %value) { + ; CHECK-NEXT: call void @llvm.nacl.atomic.store.i16(i16 %value, i16* %ptr, i32 6) + store volatile i16 %value, i16* %ptr, align 2 + ret void ; CHECK-NEXT: ret void +} + +; CHECK-LABEL: @test_volatile_load_i32 +define i32 @test_volatile_load_i32(i32* %ptr) { + ; CHECK-NEXT: %res = call i32 @llvm.nacl.atomic.load.i32(i32* %ptr, i32 6) + %res = load volatile i32, i32* %ptr, align 4 + ret i32 %res ; CHECK-NEXT: ret i32 %res +} + +; CHECK-LABEL: @test_volatile_store_i32 +define void @test_volatile_store_i32(i32* %ptr, i32 %value) { + ; CHECK-NEXT: call void @llvm.nacl.atomic.store.i32(i32 %value, i32* %ptr, i32 6) + store volatile i32 %value, i32* %ptr, align 4 + ret void ; CHECK-NEXT: ret void +} + +; CHECK-LABEL: @test_volatile_load_i64 +define i64 @test_volatile_load_i64(i64* %ptr) { + ; CHECK-NEXT: %res = call i64 @llvm.nacl.atomic.load.i64(i64* %ptr, i32 6) + %res = load volatile i64, i64* %ptr, align 8 + ret i64 %res ; CHECK-NEXT: ret i64 %res +} + +; CHECK-LABEL: @test_volatile_store_i64 +define void @test_volatile_store_i64(i64* %ptr, i64 %value) { + ; CHECK-NEXT: call void @llvm.nacl.atomic.store.i64(i64 %value, i64* %ptr, i32 6) + store volatile i64 %value, i64* %ptr, align 8 + ret void ; CHECK-NEXT: ret void +} + +; CHECK-LABEL: @test_volatile_load_float +define float @test_volatile_load_float(float* %ptr) { + ; CHECK-NEXT: %ptr.cast = bitcast float* %ptr to i32* + ; CHECK-NEXT: %res = call i32 @llvm.nacl.atomic.load.i32(i32* %ptr.cast, i32 6) + ; CHECK-NEXT: %res.cast = bitcast i32 %res to float + %res = load volatile float, float* %ptr, align 4 + ret float %res ; CHECK-NEXT: ret float %res.cast +} + +; CHECK-LABEL: @test_volatile_store_float +define void @test_volatile_store_float(float* %ptr, float %value) { + ; CHECK-NEXT: %ptr.cast = bitcast float* %ptr to i32* + ; CHECK-NEXT: %value.cast = bitcast float %value to i32 + ; CHECK-NEXT: call void @llvm.nacl.atomic.store.i32(i32 %value.cast, i32* %ptr.cast, i32 6) + store volatile float %value, float* %ptr, align 4 + ret void ; CHECK-NEXT: ret void +} + +; CHECK-LABEL: @test_volatile_load_double +define double @test_volatile_load_double(double* %ptr) { + ; CHECK-NEXT: %ptr.cast = bitcast double* %ptr to i64* + ; CHECK-NEXT: %res = call i64 @llvm.nacl.atomic.load.i64(i64* %ptr.cast, i32 6) + ; CHECK-NEXT: %res.cast = bitcast i64 %res to double + %res = load volatile double, double* %ptr, align 8 + ret double %res ; CHECK-NEXT: ret double %res.cast +} + +; CHECK-LABEL: @test_volatile_store_double +define void @test_volatile_store_double(double* %ptr, double %value) { + ; CHECK-NEXT: %ptr.cast = bitcast double* %ptr to i64* + ; CHECK-NEXT: %value.cast = bitcast double %value to i64 + ; CHECK-NEXT: call void @llvm.nacl.atomic.store.i64(i64 %value.cast, i64* %ptr.cast, i32 6) + store volatile double %value, double* %ptr, align 8 + ret void ; CHECK-NEXT: ret void +} + +; CHECK-LABEL: @test_volatile_load_i32_pointer +define i32* @test_volatile_load_i32_pointer(i32** %ptr) { + ; CHECK-NEXT: %ptr.cast = bitcast i32** %ptr to i32* + ; CHECK-NEXT: %res = call i32 @llvm.nacl.atomic.load.i32(i32* %ptr.cast, i32 6) + ; CHECK-NEXT: %res.cast = inttoptr i32 %res to i32* + %res = load volatile i32*, i32** %ptr, align 4 + ret i32* %res ; CHECK-NEXT: ret i32* %res.cast +} + +; CHECK-LABEL: @test_volatile_store_i32_pointer +define void @test_volatile_store_i32_pointer(i32** %ptr, i32* %value) { + ; CHECK-NEXT: %ptr.cast = bitcast i32** %ptr to i32* + ; CHECK-NEXT: %value.cast = ptrtoint i32* %value to i32 + ; CHECK-NEXT: call void @llvm.nacl.atomic.store.i32(i32 %value.cast, i32* %ptr.cast, i32 6) + store volatile i32* %value, i32** %ptr, align 4 + ret void ; CHECK-NEXT: ret void +} + +; CHECK-LABEL: @test_volatile_load_double_pointer +define double* @test_volatile_load_double_pointer(double** %ptr) { + ; CHECK-NEXT: %ptr.cast = bitcast double** %ptr to i32* + ; CHECK-NEXT: %res = call i32 @llvm.nacl.atomic.load.i32(i32* %ptr.cast, i32 6) + ; CHECK-NEXT: %res.cast = inttoptr i32 %res to double* + %res = load volatile double*, double** %ptr, align 4 + ret double* %res ; CHECK-NEXT: ret double* %res.cast +} + +; CHECK-LABEL: @test_volatile_store_double_pointer +define void @test_volatile_store_double_pointer(double** %ptr, double* %value) { + ; CHECK-NEXT: %ptr.cast = bitcast double** %ptr to i32* + ; CHECK-NEXT: %value.cast = ptrtoint double* %value to i32 + ; CHECK-NEXT: call void @llvm.nacl.atomic.store.i32(i32 %value.cast, i32* %ptr.cast, i32 6) + store volatile double* %value, double** %ptr, align 4 + ret void ; CHECK-NEXT: ret void +} + +; CHECK-LABEL: @test_volatile_load_v4i8 +define <4 x i8> @test_volatile_load_v4i8(<4 x i8>* %ptr) { + ; CHECK-NEXT: %ptr.cast = bitcast <4 x i8>* %ptr to i32* + ; CHECK-NEXT: %res = call i32 @llvm.nacl.atomic.load.i32(i32* %ptr.cast, i32 6) + ; CHECK-NEXT: %res.cast = bitcast i32 %res to <4 x i8> + %res = load volatile <4 x i8>, <4 x i8>* %ptr, align 8 + ret <4 x i8> %res ; CHECK-NEXT: ret <4 x i8> %res.cast +} + +; CHECK-LABEL: @test_volatile_store_v4i8 +define void @test_volatile_store_v4i8(<4 x i8>* %ptr, <4 x i8> %value) { + ; CHECK-NEXT: %ptr.cast = bitcast <4 x i8>* %ptr to i32* + ; CHECK-NEXT: %value.cast = bitcast <4 x i8> %value to i32 + ; CHECK-NEXT: call void @llvm.nacl.atomic.store.i32(i32 %value.cast, i32* %ptr.cast, i32 6) + store volatile <4 x i8> %value, <4 x i8>* %ptr, align 8 + ret void ; CHECK-NEXT: ret void +} + +; CHECK-LABEL: @test_volatile_load_v4i16 +define <4 x i16> @test_volatile_load_v4i16(<4 x i16>* %ptr) { + ; CHECK-NEXT: %ptr.cast = bitcast <4 x i16>* %ptr to i64* + ; CHECK-NEXT: %res = call i64 @llvm.nacl.atomic.load.i64(i64* %ptr.cast, i32 6) + ; CHECK-NEXT: %res.cast = bitcast i64 %res to <4 x i16> + %res = load volatile <4 x i16>, <4 x i16>* %ptr, align 8 + ret <4 x i16> %res ; CHECK-NEXT: ret <4 x i16> %res.cast +} + +; CHECK-LABEL: @test_volatile_store_v4i16 +define void @test_volatile_store_v4i16(<4 x i16>* %ptr, <4 x i16> %value) { + ; CHECK-NEXT: %ptr.cast = bitcast <4 x i16>* %ptr to i64* + ; CHECK-NEXT: %value.cast = bitcast <4 x i16> %value to i64 + ; CHECK-NEXT: call void @llvm.nacl.atomic.store.i64(i64 %value.cast, i64* %ptr.cast, i32 6) + store volatile <4 x i16> %value, <4 x i16>* %ptr, align 8 + ret void ; CHECK-NEXT: ret void +} diff --git a/test/Transforms/NaCl/canonicalize-mem-intrinsics.ll b/test/Transforms/NaCl/canonicalize-mem-intrinsics.ll new file mode 100644 index 000000000000..9c263fd15e71 --- /dev/null +++ b/test/Transforms/NaCl/canonicalize-mem-intrinsics.ll @@ -0,0 +1,45 @@ +; RUN: opt %s -canonicalize-mem-intrinsics -S | FileCheck %s +; RUN: opt %s -canonicalize-mem-intrinsics -S \ +; RUN: | FileCheck %s -check-prefix=CLEANED + +declare void @llvm.memset.p0i8.i64(i8*, i8, i64, i32, i1) +declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32, i1) +declare void @llvm.memmove.p0i8.p0i8.i64(i8*, i8*, i64, i32, i1) +; CLEANED-NOT: @llvm.mem{{.*}}i64 + + +define void @memset_caller(i8* %dest, i8 %char, i64 %size) { + call void @llvm.memset.p0i8.i64(i8* %dest, i8 %char, i64 %size, i32 1, i1 0) + ret void +} +; CHECK: define void @memset_caller +; CHECK-NEXT: %mem_len_truncate = trunc i64 %size to i32 +; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* %dest, i8 %char, i32 %mem_len_truncate, i32 1, i1 false) + + +define void @memcpy_caller(i8* %dest, i8* %src, i64 %size) { + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dest, i8* %src, i64 %size, i32 1, i1 0) + ret void +} +; CHECK: define void @memcpy_caller +; CHECK-NEXT: %mem_len_truncate = trunc i64 %size to i32 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 %mem_len_truncate, i32 1, i1 false) + + +define void @memmove_caller(i8* %dest, i8* %src, i64 %size) { + call void @llvm.memmove.p0i8.p0i8.i64(i8* %dest, i8* %src, i64 %size, i32 1, i1 0) + ret void +} +; CHECK: define void @memmove_caller +; CHECK-NEXT: %mem_len_truncate = trunc i64 %size to i32 +; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 %mem_len_truncate, i32 1, i1 false) + + +; Check that constant sizes remain as constants. + +define void @memset_caller_const(i8* %dest, i8 %char) { + call void @llvm.memset.p0i8.i64(i8* %dest, i8 %char, i64 123, i32 1, i1 0) + ret void +} +; CHECK: define void @memset_caller +; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* %dest, i8 %char, i32 123, i32 1, i1 false) diff --git a/test/Transforms/NaCl/cleanup-used-globals-metadata.ll b/test/Transforms/NaCl/cleanup-used-globals-metadata.ll new file mode 100644 index 000000000000..4c9d13ba6b0e --- /dev/null +++ b/test/Transforms/NaCl/cleanup-used-globals-metadata.ll @@ -0,0 +1,15 @@ +; RUN: opt %s -cleanup-used-globals-metadata -S | FileCheck %s + +target datalayout = "e-p:32:32-i64:64" +target triple = "le32-unknown-nacl" + +@llvm.used = appending global [1 x i8*] [i8* bitcast (void ()* @foo to i8*)], section "llvm.metadata" +; The used list is removed. +; CHECK-NOT: @llvm.used + + +define internal void @foo() #0 { + ret void +} +; The global (@foo) is still present. +; CHECK-LABEL: define internal void @foo diff --git a/test/Transforms/NaCl/combine-shuffle-vector.ll b/test/Transforms/NaCl/combine-shuffle-vector.ll new file mode 100644 index 000000000000..70cc17efc118 --- /dev/null +++ b/test/Transforms/NaCl/combine-shuffle-vector.ll @@ -0,0 +1,69 @@ +; RUN: opt -expand-shufflevector %s -S | \ +; RUN: opt -backend-canonicalize -S | FileCheck %s + +; Test that shufflevector is re-created after having been expanded to +; insertelement / extractelement: shufflevector isn't part of the stable +; PNaCl ABI but insertelement / extractelement are. Re-creating +; shufflevector allows the backend to generate more efficient code. +; +; TODO(jfb) Narrow and widen aren't tested since the underlying types +; are currently not supported by the PNaCl ABI. + +; The datalayout is needed to fold global constants. +target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:32" + +define <4 x i32> @test_splat_lo_4xi32(<4 x i32> %lhs, <4 x i32> %rhs) { + ; CHECK-LABEL: test_splat_lo_4xi32 + ; CHECK-NEXT: %[[R:[0-9]+]] = shufflevector <4 x i32> %lhs, <4 x i32> undef, <4 x i32> zeroinitializer + %res = shufflevector <4 x i32> %lhs, <4 x i32> %rhs, <4 x i32> + ; CHECK-NEXT: ret <4 x i32> %[[R]] + ret <4 x i32> %res +} + +define <4 x i32> @test_splat_hi_4xi32(<4 x i32> %lhs, <4 x i32> %rhs) { + ; CHECK-LABEL: test_splat_hi_4xi32 + ; CHECK-NEXT: %[[R:[0-9]+]] = shufflevector <4 x i32> %rhs, <4 x i32> undef, <4 x i32> zeroinitializer + %res = shufflevector <4 x i32> %lhs, <4 x i32> %rhs, <4 x i32> + ; CHECK-NEXT: ret <4 x i32> %[[R]] + ret <4 x i32> %res +} + +define <4 x i32> @test_id_lo_4xi32(<4 x i32> %lhs, <4 x i32> %rhs) { + ; CHECK-LABEL: test_id_lo_4xi32 + ; CHECK-NEXT: %[[R:[0-9]+]] = shufflevector <4 x i32> %lhs, <4 x i32> undef, <4 x i32> + %res = shufflevector <4 x i32> %lhs, <4 x i32> %rhs, <4 x i32> + ; CHECK-NEXT: ret <4 x i32> %[[R]] + ret <4 x i32> %res +} + +define <4 x i32> @test_id_hi_4xi32(<4 x i32> %lhs, <4 x i32> %rhs) { + ; CHECK-LABEL: test_id_hi_4xi32 + ; CHECK-NEXT: %[[R:[0-9]+]] = shufflevector <4 x i32> %rhs, <4 x i32> undef, <4 x i32> + %res = shufflevector <4 x i32> %lhs, <4 x i32> %rhs, <4 x i32> + ; CHECK-NEXT: ret <4 x i32> %[[R]] + ret <4 x i32> %res +} + +define <4 x i32> @test_interleave_lo_4xi32(<4 x i32> %lhs, <4 x i32> %rhs) { + ; CHECK-LABEL: test_interleave_lo_4xi32 + ; CHECK-NEXT: %[[R:[0-9]+]] = shufflevector <4 x i32> %lhs, <4 x i32> %rhs, <4 x i32> + %res = shufflevector <4 x i32> %lhs, <4 x i32> %rhs, <4 x i32> + ; CHECK-NEXT: ret <4 x i32> %[[R]] + ret <4 x i32> %res +} + +define <4 x i32> @test_interleave_hi_4xi32(<4 x i32> %lhs, <4 x i32> %rhs) { + ; CHECK-LABEL: test_interleave_hi_4xi32 + ; CHECK-NEXT: %[[R:[0-9]+]] = shufflevector <4 x i32> %lhs, <4 x i32> %rhs, <4 x i32> + %res = shufflevector <4 x i32> %lhs, <4 x i32> %rhs, <4 x i32> + ; CHECK-NEXT: ret <4 x i32> %[[R]] + ret <4 x i32> %res +} + +define <4 x i32> @test_with_constant(<4 x i32> %lhs, <4 x i32> %rhs) { + ; CHECK-LABEL: test_with_constant + ; CHECK-NEXT: %[[R:[0-9]+]] = shufflevector <4 x i32> %lhs, <4 x i32> , <4 x i32> + %res = shufflevector <4 x i32> %lhs, <4 x i32> , <4 x i32> + ; CHECK-NEXT: ret <4 x i32> %[[R]] + ret <4 x i32> %res +} diff --git a/test/Transforms/NaCl/constant-insert-extract-element-index.ll b/test/Transforms/NaCl/constant-insert-extract-element-index.ll new file mode 100644 index 000000000000..4c53afa50940 --- /dev/null +++ b/test/Transforms/NaCl/constant-insert-extract-element-index.ll @@ -0,0 +1,425 @@ +; RUN: opt -constant-insert-extract-element-index %s -S | FileCheck %s + +; The datalayout is needed to determine the alignment of the load/stores. +target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:32" + + +; The following insert/extract elements are all indexed with an in-range +; constant, they should remain unchanged. + +define void @test_16xi1_unchanged(<16 x i1> %in) { + ; CHECK-LABEL: test_16xi1_unchanged + ; CHECK-NOT: alloca + ; CHECK: extractelement + %e.0 = extractelement <16 x i1> %in, i32 0 + %e.1 = extractelement <16 x i1> %in, i32 1 + %e.2 = extractelement <16 x i1> %in, i32 2 + %e.3 = extractelement <16 x i1> %in, i32 3 + %e.4 = extractelement <16 x i1> %in, i32 4 + %e.5 = extractelement <16 x i1> %in, i32 5 + %e.6 = extractelement <16 x i1> %in, i32 6 + %e.7 = extractelement <16 x i1> %in, i32 7 + %e.8 = extractelement <16 x i1> %in, i32 8 + %e.9 = extractelement <16 x i1> %in, i32 9 + %e.10 = extractelement <16 x i1> %in, i32 10 + %e.11 = extractelement <16 x i1> %in, i32 11 + %e.12 = extractelement <16 x i1> %in, i32 12 + %e.13 = extractelement <16 x i1> %in, i32 13 + %e.14 = extractelement <16 x i1> %in, i32 14 + %e.15 = extractelement <16 x i1> %in, i32 15 + ; CHECK-NOT: alloca + ; CHECK: insertelement + %i.0 = insertelement <16 x i1> %in, i1 true, i32 0 + %i.1 = insertelement <16 x i1> %in, i1 true, i32 1 + %i.2 = insertelement <16 x i1> %in, i1 true, i32 2 + %i.3 = insertelement <16 x i1> %in, i1 true, i32 3 + %i.4 = insertelement <16 x i1> %in, i1 true, i32 4 + %i.5 = insertelement <16 x i1> %in, i1 true, i32 5 + %i.6 = insertelement <16 x i1> %in, i1 true, i32 6 + %i.7 = insertelement <16 x i1> %in, i1 true, i32 7 + %i.8 = insertelement <16 x i1> %in, i1 true, i32 8 + %i.9 = insertelement <16 x i1> %in, i1 true, i32 9 + %i.10 = insertelement <16 x i1> %in, i1 true, i32 10 + %i.11 = insertelement <16 x i1> %in, i1 true, i32 11 + %i.12 = insertelement <16 x i1> %in, i1 true, i32 12 + %i.13 = insertelement <16 x i1> %in, i1 true, i32 13 + %i.14 = insertelement <16 x i1> %in, i1 true, i32 14 + %i.15 = insertelement <16 x i1> %in, i1 true, i32 15 + ; CHECK-NOT: alloca + ret void +} + +define void @test_8xi1_unchanged(<8 x i1> %in) { + ; CHECK-LABEL: test_8xi1_unchanged + ; CHECK-NOT: alloca + ; CHECK: extractelement + %e.0 = extractelement <8 x i1> %in, i32 0 + %e.1 = extractelement <8 x i1> %in, i32 1 + %e.2 = extractelement <8 x i1> %in, i32 2 + %e.3 = extractelement <8 x i1> %in, i32 3 + %e.4 = extractelement <8 x i1> %in, i32 4 + %e.5 = extractelement <8 x i1> %in, i32 5 + %e.6 = extractelement <8 x i1> %in, i32 6 + %e.7 = extractelement <8 x i1> %in, i32 7 + ; CHECK-NOT: alloca + ; CHECK: insertelement + %i.0 = insertelement <8 x i1> %in, i1 true, i32 0 + %i.1 = insertelement <8 x i1> %in, i1 true, i32 1 + %i.2 = insertelement <8 x i1> %in, i1 true, i32 2 + %i.3 = insertelement <8 x i1> %in, i1 true, i32 3 + %i.4 = insertelement <8 x i1> %in, i1 true, i32 4 + %i.5 = insertelement <8 x i1> %in, i1 true, i32 5 + %i.6 = insertelement <8 x i1> %in, i1 true, i32 6 + %i.7 = insertelement <8 x i1> %in, i1 true, i32 7 + ; CHECK-NOT: alloca + ret void +} + +define void @test_4xi1_unchanged(<4 x i1> %in) { + ; CHECK-LABEL: test_4xi1_unchanged + ; CHECK-NOT: alloca + ; CHECK: extractelement + %e.0 = extractelement <4 x i1> %in, i32 0 + %e.1 = extractelement <4 x i1> %in, i32 1 + %e.2 = extractelement <4 x i1> %in, i32 2 + %e.3 = extractelement <4 x i1> %in, i32 3 + ; CHECK-NOT: alloca + ; CHECK: insertelement + %i.0 = insertelement <4 x i1> %in, i1 true, i32 0 + %i.1 = insertelement <4 x i1> %in, i1 true, i32 1 + %i.2 = insertelement <4 x i1> %in, i1 true, i32 2 + %i.3 = insertelement <4 x i1> %in, i1 true, i32 3 + ; CHECK-NOT: alloca + ret void +} + +define void @test_16xi8_unchanged(<16 x i8> %in) { + ; CHECK-LABEL: test_16xi8_unchanged + ; CHECK-NOT: alloca + ; CHECK: extractelement + %e.0 = extractelement <16 x i8> %in, i32 0 + %e.1 = extractelement <16 x i8> %in, i32 1 + %e.2 = extractelement <16 x i8> %in, i32 2 + %e.3 = extractelement <16 x i8> %in, i32 3 + %e.4 = extractelement <16 x i8> %in, i32 4 + %e.5 = extractelement <16 x i8> %in, i32 5 + %e.6 = extractelement <16 x i8> %in, i32 6 + %e.7 = extractelement <16 x i8> %in, i32 7 + %e.8 = extractelement <16 x i8> %in, i32 8 + %e.9 = extractelement <16 x i8> %in, i32 9 + %e.10 = extractelement <16 x i8> %in, i32 10 + %e.11 = extractelement <16 x i8> %in, i32 11 + %e.12 = extractelement <16 x i8> %in, i32 12 + %e.13 = extractelement <16 x i8> %in, i32 13 + %e.14 = extractelement <16 x i8> %in, i32 14 + %e.15 = extractelement <16 x i8> %in, i32 15 + ; CHECK-NOT: alloca + ; CHECK: insertelement + %i.0 = insertelement <16 x i8> %in, i8 42, i32 0 + %i.1 = insertelement <16 x i8> %in, i8 42, i32 1 + %i.2 = insertelement <16 x i8> %in, i8 42, i32 2 + %i.3 = insertelement <16 x i8> %in, i8 42, i32 3 + %i.4 = insertelement <16 x i8> %in, i8 42, i32 4 + %i.5 = insertelement <16 x i8> %in, i8 42, i32 5 + %i.6 = insertelement <16 x i8> %in, i8 42, i32 6 + %i.7 = insertelement <16 x i8> %in, i8 42, i32 7 + %i.8 = insertelement <16 x i8> %in, i8 42, i32 8 + %i.9 = insertelement <16 x i8> %in, i8 42, i32 9 + %i.10 = insertelement <16 x i8> %in, i8 42, i32 10 + %i.11 = insertelement <16 x i8> %in, i8 42, i32 11 + %i.12 = insertelement <16 x i8> %in, i8 42, i32 12 + %i.13 = insertelement <16 x i8> %in, i8 42, i32 13 + %i.14 = insertelement <16 x i8> %in, i8 42, i32 14 + %i.15 = insertelement <16 x i8> %in, i8 42, i32 15 + ; CHECK-NOT: alloca + ret void +} + +define void @test_8xi16_unchanged(<8 x i16> %in) { + ; CHECK-LABEL: test_8xi16_unchanged + ; CHECK-NOT: alloca + ; CHECK: extractelement + %e.0 = extractelement <8 x i16> %in, i32 0 + %e.1 = extractelement <8 x i16> %in, i32 1 + %e.2 = extractelement <8 x i16> %in, i32 2 + %e.3 = extractelement <8 x i16> %in, i32 3 + %e.4 = extractelement <8 x i16> %in, i32 4 + %e.5 = extractelement <8 x i16> %in, i32 5 + %e.6 = extractelement <8 x i16> %in, i32 6 + %e.7 = extractelement <8 x i16> %in, i32 7 + ; CHECK-NOT: alloca + ; CHECK: insertelement + %i.0 = insertelement <8 x i16> %in, i16 42, i32 0 + %i.1 = insertelement <8 x i16> %in, i16 42, i32 1 + %i.2 = insertelement <8 x i16> %in, i16 42, i32 2 + %i.3 = insertelement <8 x i16> %in, i16 42, i32 3 + %i.4 = insertelement <8 x i16> %in, i16 42, i32 4 + %i.5 = insertelement <8 x i16> %in, i16 42, i32 5 + %i.6 = insertelement <8 x i16> %in, i16 42, i32 6 + %i.7 = insertelement <8 x i16> %in, i16 42, i32 7 + ; CHECK-NOT: alloca + ret void +} + +define void @test_4xi32_unchanged(<4 x i32> %in) { + ; CHECK-LABEL: test_4xi32_unchanged + ; CHECK-NOT: alloca + ; CHECK: extractelement + %e.0 = extractelement <4 x i32> %in, i32 0 + %e.1 = extractelement <4 x i32> %in, i32 1 + %e.2 = extractelement <4 x i32> %in, i32 2 + %e.3 = extractelement <4 x i32> %in, i32 3 + ; CHECK-NOT: alloca + ; CHECK: insertelement + %i.0 = insertelement <4 x i32> %in, i32 42, i32 0 + %i.1 = insertelement <4 x i32> %in, i32 42, i32 1 + %i.2 = insertelement <4 x i32> %in, i32 42, i32 2 + %i.3 = insertelement <4 x i32> %in, i32 42, i32 3 + ; CHECK-NOT: alloca + ret void +} + +define void @test_4xfloat_unchanged(<4 x float> %in) { + ; CHECK-LABEL: test_4xfloat_unchanged + ; CHECK-NOT: alloca + ; CHECK: extractelement + %e.0 = extractelement <4 x float> %in, i32 0 + %e.1 = extractelement <4 x float> %in, i32 1 + %e.2 = extractelement <4 x float> %in, i32 2 + %e.3 = extractelement <4 x float> %in, i32 3 + ; CHECK-NOT: alloca + ; CHECK: insertelement + %i.0 = insertelement <4 x float> %in, float 42.0, i32 0 + %i.1 = insertelement <4 x float> %in, float 42.0, i32 1 + %i.2 = insertelement <4 x float> %in, float 42.0, i32 2 + %i.3 = insertelement <4 x float> %in, float 42.0, i32 3 + ; CHECK-NOT: alloca + ret void +} + + +; The following insert/extract elements are all indexed with an +; out-of-range constant, they should get modified so that the constant +; is now in-range. + +define <16 x i1> @test_16xi1_out_of_range(<16 x i1> %in) { + ; CHECK-LABEL: test_16xi1_out_of_range + ; CHECK-NEXT: extractelement <16 x i1> %in, i32 0 + %e.16 = extractelement <16 x i1> %in, i32 16 + ; CHECK-NEXT: %i.16 = insertelement <16 x i1> %in, i1 %e.16, i32 0 + %i.16 = insertelement <16 x i1> %in, i1 %e.16, i32 16 + ; CHECK-NEXT: ret <16 x i1> %i.16 + ret <16 x i1> %i.16 +} + +define <8 x i1> @test_8xi1_out_of_range(<8 x i1> %in) { + ; CHECK-LABEL: test_8xi1_out_of_range + ; CHECK-NEXT: %e.8 = extractelement <8 x i1> %in, i32 0 + %e.8 = extractelement <8 x i1> %in, i32 8 + ; CHECK-NEXT: %i.8 = insertelement <8 x i1> %in, i1 %e.8, i32 0 + %i.8 = insertelement <8 x i1> %in, i1 %e.8, i32 8 + ; CHECK-NEXT: ret <8 x i1> %i.8 + ret <8 x i1> %i.8 +} + +define <4 x i1> @test_4xi1_out_of_range(<4 x i1> %in) { + ; CHECK-LABEL: test_4xi1_out_of_range + ; CHECK-NEXT: %e.4 = extractelement <4 x i1> %in, i32 0 + %e.4 = extractelement <4 x i1> %in, i32 4 + ; CHECK-NEXT: %i.4 = insertelement <4 x i1> %in, i1 %e.4, i32 0 + %i.4 = insertelement <4 x i1> %in, i1 %e.4, i32 4 + ; CHECK-NEXT: ret <4 x i1> %i.4 + ret <4 x i1> %i.4 +} + +define <16 x i8> @test_16xi8_out_of_range(<16 x i8> %in) { + ; CHECK-LABEL: test_16xi8_out_of_range + ; CHECK-NEXT: %e.16 = extractelement <16 x i8> %in, i32 0 + %e.16 = extractelement <16 x i8> %in, i32 16 + ; CHECK-NEXT: %i.16 = insertelement <16 x i8> %in, i8 %e.16, i32 0 + %i.16 = insertelement <16 x i8> %in, i8 %e.16, i32 16 + ; CHECK-NEXT: ret <16 x i8> %i.16 + ret <16 x i8> %i.16 +} + +define <8 x i16> @test_8xi16_out_of_range(<8 x i16> %in) { + ; CHECK-LABEL: test_8xi16_out_of_range + ; CHECK-NEXT: %e.8 = extractelement <8 x i16> %in, i32 0 + %e.8 = extractelement <8 x i16> %in, i32 8 + ; CHECK-NEXT: %i.8 = insertelement <8 x i16> %in, i16 %e.8, i32 0 + %i.8 = insertelement <8 x i16> %in, i16 %e.8, i32 8 + ; CHECK-NEXT: ret <8 x i16> %i.8 + ret <8 x i16> %i.8 +} + +define <4 x i32> @test_4xi32_out_of_range(<4 x i32> %in) { + ; CHECK-LABEL: test_4xi32_out_of_range + ; CHECK-NEXT: %e.4 = extractelement <4 x i32> %in, i32 0 + %e.4 = extractelement <4 x i32> %in, i32 4 + ; CHECK-NEXT: %i.4 = insertelement <4 x i32> %in, i32 %e.4, i32 0 + %i.4 = insertelement <4 x i32> %in, i32 %e.4, i32 4 + ; CHECK-NEXT: ret <4 x i32> %i.4 + ret <4 x i32> %i.4 +} + +define <4 x float> @test_4xfloat_out_of_range(<4 x float> %in) { + ; CHECK-LABEL: test_4xfloat_out_of_range + ; CHECK-NEXT: %e.4 = extractelement <4 x float> %in, i32 0 + %e.4 = extractelement <4 x float> %in, i32 4 + ; CHECK-NEXT: %i.4 = insertelement <4 x float> %in, float %e.4, i32 0 + %i.4 = insertelement <4 x float> %in, float %e.4, i32 4 + ; CHECK-NEXT: ret <4 x float> %i.4 + ret <4 x float> %i.4 +} + +define <4 x i32> @test_4xi32_out_of_range_urem(<4 x i32> %in) { + ; CHECK-LABEL: test_4xi32_out_of_range_urem + %e.4 = extractelement <4 x i32> %in, i32 4 ; CHECK-NEXT: {{.*}} extractelement {{.*}} i32 0 + %e.5 = extractelement <4 x i32> %in, i32 5 ; CHECK-NEXT: {{.*}} extractelement {{.*}} i32 1 + %e.6 = extractelement <4 x i32> %in, i32 6 ; CHECK-NEXT: {{.*}} extractelement {{.*}} i32 2 + %e.7 = extractelement <4 x i32> %in, i32 7 ; CHECK-NEXT: {{.*}} extractelement {{.*}} i32 3 + %e.8 = extractelement <4 x i32> %in, i32 8 ; CHECK-NEXT: {{.*}} extractelement {{.*}} i32 0 + %i.4 = insertelement <4 x i32> %in, i32 %e.4, i32 4 ; CHECK-NEXT: {{.*}} insertelement {{.*}} i32 0 + %i.5 = insertelement <4 x i32> %in, i32 %e.5, i32 5 ; CHECK-NEXT: {{.*}} insertelement {{.*}} i32 1 + %i.6 = insertelement <4 x i32> %in, i32 %e.6, i32 6 ; CHECK-NEXT: {{.*}} insertelement {{.*}} i32 2 + %i.7 = insertelement <4 x i32> %in, i32 %e.7, i32 7 ; CHECK-NEXT: {{.*}} insertelement {{.*}} i32 3 + %i.8 = insertelement <4 x i32> %in, i32 %e.8, i32 8 ; CHECK-NEXT: {{.*}} insertelement {{.*}} i32 0 + ; CHECK-NEXT: ret <4 x i32> %i.4 + ret <4 x i32> %i.4 +} + +; The following insert/extract elements are all indexed with a variable, +; they should get modified. + +define <16 x i1> @test_16xi1_variable(<16 x i1> %in, i32 %idx) { + ; CHECK-LABEL: test_16xi1_variable + ; CHECK-NEXT: %[[EALLOCA:[0-9]+]] = alloca i1, i32 16, align 16 + ; CHECK-NEXT: %[[ECAST:[0-9]+]] = bitcast i1* %[[EALLOCA]] to <16 x i1>* + ; CHECK-NEXT: store <16 x i1> %in, <16 x i1>* %[[ECAST]], align 16 + ; CHECK-NEXT: %[[EGEP:[0-9]+]] = getelementptr i1, i1* %[[EALLOCA]], i32 %idx + ; CHECK-NEXT: %[[ELOAD:[0-9]+]] = load i1, i1* %[[EGEP]], align 1 + %e.16 = extractelement <16 x i1> %in, i32 %idx + ; CHECK-NEXT: %[[IALLOCA:[0-9]+]] = alloca i1, i32 16, align 16 + ; CHECK-NEXT: %[[ICAST:[0-9]+]] = bitcast i1* %[[IALLOCA]] to <16 x i1>* + ; CHECK-NEXT: store <16 x i1> %in, <16 x i1>* %[[ICAST]], align 16 + ; CHECK-NEXT: %[[IGEP:[0-9]+]] = getelementptr i1, i1* %[[IALLOCA]], i32 %idx + ; CHECK-NEXT: store i1 %[[ELOAD]], i1* %[[IGEP]], align 1 + ; CHECK-NEXT: %[[ILOAD:[0-9]+]] = load <16 x i1>, <16 x i1>* %[[ICAST]], align 16 + %i.16 = insertelement <16 x i1> %in, i1 %e.16, i32 %idx + ; CHECK-NEXT: ret <16 x i1> %[[ILOAD]] + ret <16 x i1> %i.16 +} + +define <8 x i1> @test_8xi1_variable(<8 x i1> %in, i32 %idx) { + ; CHECK-LABEL: test_8xi1_variable + ; CHECK-NEXT: %[[EALLOCA:[0-9]+]] = alloca i1, i32 8, align 8 + ; CHECK-NEXT: %[[ECAST:[0-9]+]] = bitcast i1* %[[EALLOCA]] to <8 x i1>* + ; CHECK-NEXT: store <8 x i1> %in, <8 x i1>* %[[ECAST]], align 8 + ; CHECK-NEXT: %[[EGEP:[0-9]+]] = getelementptr i1, i1* %[[EALLOCA]], i32 %idx + ; CHECK-NEXT: %[[ELOAD:[0-9]+]] = load i1, i1* %[[EGEP]], align 1 + %e.8 = extractelement <8 x i1> %in, i32 %idx + ; CHECK-NEXT: %[[IALLOCA:[0-9]+]] = alloca i1, i32 8, align 8 + ; CHECK-NEXT: %[[ICAST:[0-9]+]] = bitcast i1* %[[IALLOCA]] to <8 x i1>* + ; CHECK-NEXT: store <8 x i1> %in, <8 x i1>* %[[ICAST]], align 8 + ; CHECK-NEXT: %[[IGEP:[0-9]+]] = getelementptr i1, i1* %[[IALLOCA]], i32 %idx + ; CHECK-NEXT: store i1 %[[ELOAD]], i1* %[[IGEP]], align 1 + ; CHECK-NEXT: %[[ILOAD:[0-9]+]] = load <8 x i1>, <8 x i1>* %[[ICAST]], align 8 + %i.8 = insertelement <8 x i1> %in, i1 %e.8, i32 %idx + ; CHECK-NEXT: ret <8 x i1> %[[ILOAD]] + ret <8 x i1> %i.8 +} + +define <4 x i1> @test_4xi1_variable(<4 x i1> %in, i32 %idx) { + ; CHECK-LABEL: test_4xi1_variable + ; CHECK-NEXT: %[[EALLOCA:[0-9]+]] = alloca i1, i32 4, align 4 + ; CHECK-NEXT: %[[ECAST:[0-9]+]] = bitcast i1* %[[EALLOCA]] to <4 x i1>* + ; CHECK-NEXT: store <4 x i1> %in, <4 x i1>* %[[ECAST]], align 4 + ; CHECK-NEXT: %[[EGEP:[0-9]+]] = getelementptr i1, i1* %[[EALLOCA]], i32 %idx + ; CHECK-NEXT: %[[ELOAD:[0-9]+]] = load i1, i1* %[[EGEP]], align 1 + %e.4 = extractelement <4 x i1> %in, i32 %idx + ; CHECK-NEXT: %[[IALLOCA:[0-9]+]] = alloca i1, i32 4, align 4 + ; CHECK-NEXT: %[[ICAST:[0-9]+]] = bitcast i1* %[[IALLOCA]] to <4 x i1>* + ; CHECK-NEXT: store <4 x i1> %in, <4 x i1>* %[[ICAST]], align 4 + ; CHECK-NEXT: %[[IGEP:[0-9]+]] = getelementptr i1, i1* %[[IALLOCA]], i32 %idx + ; CHECK-NEXT: store i1 %[[ELOAD]], i1* %[[IGEP]], align 1 + ; CHECK-NEXT: %[[ILOAD:[0-9]+]] = load <4 x i1>, <4 x i1>* %[[ICAST]], align 4 + %i.4 = insertelement <4 x i1> %in, i1 %e.4, i32 %idx + ; CHECK-NEXT: ret <4 x i1> %[[ILOAD]] + ret <4 x i1> %i.4 +} + +define <16 x i8> @test_16xi8_variable(<16 x i8> %in, i32 %idx) { + ; CHECK-LABEL: test_16xi8_variable + ; CHECK-NEXT: %[[EALLOCA:[0-9]+]] = alloca i8, i32 16, align 4 + ; CHECK-NEXT: %[[ECAST:[0-9]+]] = bitcast i8* %[[EALLOCA]] to <16 x i8>* + ; CHECK-NEXT: store <16 x i8> %in, <16 x i8>* %[[ECAST]], align 4 + ; CHECK-NEXT: %[[EGEP:[0-9]+]] = getelementptr i8, i8* %[[EALLOCA]], i32 %idx + ; CHECK-NEXT: %[[ELOAD:[0-9]+]] = load i8, i8* %[[EGEP]], align 1 + %e.16 = extractelement <16 x i8> %in, i32 %idx + ; CHECK-NEXT: %[[IALLOCA:[0-9]+]] = alloca i8, i32 16, align 4 + ; CHECK-NEXT: %[[ICAST:[0-9]+]] = bitcast i8* %[[IALLOCA]] to <16 x i8>* + ; CHECK-NEXT: store <16 x i8> %in, <16 x i8>* %[[ICAST]], align 4 + ; CHECK-NEXT: %[[IGEP:[0-9]+]] = getelementptr i8, i8* %[[IALLOCA]], i32 %idx + ; CHECK-NEXT: store i8 %[[ELOAD]], i8* %[[IGEP]], align 1 + ; CHECK-NEXT: %[[ILOAD:[0-9]+]] = load <16 x i8>, <16 x i8>* %[[ICAST]], align 4 + %i.16 = insertelement <16 x i8> %in, i8 %e.16, i32 %idx + ; CHECK-NEXT: ret <16 x i8> %[[ILOAD]] + ret <16 x i8> %i.16 +} + +define <8 x i16> @test_8xi16_variable(<8 x i16> %in, i32 %idx) { + ; CHECK-LABEL: test_8xi16_variable + ; CHECK-NEXT: %[[EALLOCA:[0-9]+]] = alloca i16, i32 8, align 4 + ; CHECK-NEXT: %[[ECAST:[0-9]+]] = bitcast i16* %[[EALLOCA]] to <8 x i16>* + ; CHECK-NEXT: store <8 x i16> %in, <8 x i16>* %[[ECAST]], align 4 + ; CHECK-NEXT: %[[EGEP:[0-9]+]] = getelementptr i16, i16* %[[EALLOCA]], i32 %idx + ; CHECK-NEXT: %[[ELOAD:[0-9]+]] = load i16, i16* %[[EGEP]], align 2 + %e.8 = extractelement <8 x i16> %in, i32 %idx + ; CHECK-NEXT: %[[IALLOCA:[0-9]+]] = alloca i16, i32 8, align 4 + ; CHECK-NEXT: %[[ICAST:[0-9]+]] = bitcast i16* %[[IALLOCA]] to <8 x i16>* + ; CHECK-NEXT: store <8 x i16> %in, <8 x i16>* %[[ICAST]], align 4 + ; CHECK-NEXT: %[[IGEP:[0-9]+]] = getelementptr i16, i16* %[[IALLOCA]], i32 %idx + ; CHECK-NEXT: store i16 %[[ELOAD]], i16* %[[IGEP]], align 2 + ; CHECK-NEXT: %[[ILOAD:[0-9]+]] = load <8 x i16>, <8 x i16>* %[[ICAST]], align 4 + %i.8 = insertelement <8 x i16> %in, i16 %e.8, i32 %idx + ; CHECK-NEXT: ret <8 x i16> %[[ILOAD]] + ret <8 x i16> %i.8 +} + +define <4 x i32> @test_4xi32_variable(<4 x i32> %in, i32 %idx) { + ; CHECK-LABEL: test_4xi32_variable + ; CHECK-NEXT: %[[EALLOCA:[0-9]+]] = alloca i32, i32 4, align 4 + ; CHECK-NEXT: %[[ECAST:[0-9]+]] = bitcast i32* %[[EALLOCA]] to <4 x i32>* + ; CHECK-NEXT: store <4 x i32> %in, <4 x i32>* %[[ECAST]], align 4 + ; CHECK-NEXT: %[[EGEP:[0-9]+]] = getelementptr i32, i32* %[[EALLOCA]], i32 %idx + ; CHECK-NEXT: %[[ELOAD:[0-9]+]] = load i32, i32* %[[EGEP]], align 4 + %e.4 = extractelement <4 x i32> %in, i32 %idx + ; CHECK-NEXT: %[[IALLOCA:[0-9]+]] = alloca i32, i32 4, align 4 + ; CHECK-NEXT: %[[ICAST:[0-9]+]] = bitcast i32* %[[IALLOCA]] to <4 x i32>* + ; CHECK-NEXT: store <4 x i32> %in, <4 x i32>* %[[ICAST]], align 4 + ; CHECK-NEXT: %[[IGEP:[0-9]+]] = getelementptr i32, i32* %[[IALLOCA]], i32 %idx + ; CHECK-NEXT: store i32 %[[ELOAD]], i32* %[[IGEP]], align 4 + ; CHECK-NEXT: %[[ILOAD:[0-9]+]] = load <4 x i32>, <4 x i32>* %[[ICAST]], align 4 + %i.4 = insertelement <4 x i32> %in, i32 %e.4, i32 %idx + ; CHECK-NEXT: ret <4 x i32> %[[ILOAD]] + ret <4 x i32> %i.4 +} + +define <4 x float> @test_4xfloat_variable(<4 x float> %in, i32 %idx) { + ; CHECK-LABEL: test_4xfloat_variable + ; CHECK-NEXT: %[[EALLOCA:[0-9]+]] = alloca float, i32 4, align 4 + ; CHECK-NEXT: %[[ECAST:[0-9]+]] = bitcast float* %[[EALLOCA]] to <4 x float>* + ; CHECK-NEXT: store <4 x float> %in, <4 x float>* %[[ECAST]], align 4 + ; CHECK-NEXT: %[[EGEP:[0-9]+]] = getelementptr float, float* %[[EALLOCA]], i32 %idx + ; CHECK-NEXT: %[[ELOAD:[0-9]+]] = load float, float* %[[EGEP]], align 4 + %e.4 = extractelement <4 x float> %in, i32 %idx + ; CHECK-NEXT: %[[IALLOCA:[0-9]+]] = alloca float, i32 4, align 4 + ; CHECK-NEXT: %[[ICAST:[0-9]+]] = bitcast float* %[[IALLOCA]] to <4 x float>* + ; CHECK-NEXT: store <4 x float> %in, <4 x float>* %[[ICAST]], align 4 + ; CHECK-NEXT: %[[IGEP:[0-9]+]] = getelementptr float, float* %[[IALLOCA]], i32 %idx + ; CHECK-NEXT: store float %[[ELOAD]], float* %[[IGEP]], align 4 + ; CHECK-NEXT: %[[ILOAD:[0-9]+]] = load <4 x float>, <4 x float>* %[[ICAST]], align 4 + %i.4 = insertelement <4 x float> %in, float %e.4, i32 %idx + ; CHECK-NEXT: ret <4 x float> %[[ILOAD]] + ret <4 x float> %i.4 +} diff --git a/test/Transforms/NaCl/constant-vector-rematerialization.ll b/test/Transforms/NaCl/constant-vector-rematerialization.ll new file mode 100644 index 000000000000..625c8c5817e5 --- /dev/null +++ b/test/Transforms/NaCl/constant-vector-rematerialization.ll @@ -0,0 +1,44 @@ +; RUN: opt < %s -backend-canonicalize -S | FileCheck %s + +; Test that constant vectors that were globalized get rematerialized properly. + +; The datalayout is needed to determine the alignment of the globals. +target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:32" + +@veci32 = internal constant [4 x i32] [i32 1, i32 2, i32 3, i32 4] +@veci32zero = internal constant [4 x i32] zeroinitializer + +@veci8 = internal constant [16 x i8] [i8 255, i8 255, i8 255, i8 0, i8 255, i8 255, i8 0, i8 255, i8 255, i8 0, i8 255, i8 255, i8 0, i8 255, i8 255, i8 255] +@veci8zero = internal constant [16 x i8] zeroinitializer + +define <4 x i32> @test_vec_i32() { + %bc = bitcast [4 x i32]* @veci32 to <4 x i32>* + %v = load <4 x i32>, <4 x i32>* %bc + ret <4 x i32> %v +} +; CHECK-LABEL: @test_vec_i32( +; CHECK-NEXT: ret <4 x i32> + +define <4 x i32> @test_vec_i32_zero() { + %bc = bitcast [4 x i32]* @veci32zero to <4 x i32>* + %v = load <4 x i32>, <4 x i32>* %bc + ret <4 x i32> %v +} +; CHECK-LABEL: @test_vec_i32_zero( +; CHECK-NEXT: ret <4 x i32> zeroinitializer + +define <4 x i32> @test_vec_i8() { + %bc = bitcast [16 x i8]* @veci8 to <4 x i32>* + %v = load <4 x i32>, <4 x i32>* %bc + ret <4 x i32> %v +} +; CHECK-LABEL: @test_vec_i8( +; CHECK-NEXT: ret <4 x i32> + +define <4 x i32> @test_vec_i8_zero() { + %bc = bitcast [16 x i8]* @veci8zero to <4 x i32>* + %v = load <4 x i32>, <4 x i32>* %bc + ret <4 x i32> %v +} +; CHECK-LABEL: @test_vec_i8_zero( +; CHECK-NEXT: ret <4 x i32> zeroinitializer diff --git a/test/Transforms/NaCl/expand-arith-with-overflow.ll b/test/Transforms/NaCl/expand-arith-with-overflow.ll new file mode 100644 index 000000000000..228e6646da9a --- /dev/null +++ b/test/Transforms/NaCl/expand-arith-with-overflow.ll @@ -0,0 +1,299 @@ +; RUN: opt %s -expand-arith-with-overflow -expand-struct-regs -S | FileCheck %s +; RUN: opt %s -expand-arith-with-overflow -expand-struct-regs -S | \ +; RUN: FileCheck %s -check-prefix=CLEANUP + +declare {i8, i1} @llvm.sadd.with.overflow.i8(i8, i8) +declare {i8, i1} @llvm.ssub.with.overflow.i8(i8, i8) +declare {i16, i1} @llvm.uadd.with.overflow.i16(i16, i16) +declare {i16, i1} @llvm.usub.with.overflow.i16(i16, i16) +declare {i32, i1} @llvm.umul.with.overflow.i32(i32, i32) +declare {i64, i1} @llvm.umul.with.overflow.i64(i64, i64) +declare {i64, i1} @llvm.smul.with.overflow.i64(i64, i64) + +; CLEANUP-NOT: with.overflow +; CLEANUP-NOT: extractvalue +; CLEANUP-NOT: insertvalue + + +define void @umul32_by_zero(i32 %x, i32* %result_val, i1* %result_overflow) { + %pair = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %x, i32 0) + %val = extractvalue {i32, i1} %pair, 0 + %overflow = extractvalue {i32, i1} %pair, 1 + + store i32 %val, i32* %result_val + store i1 %overflow, i1* %result_overflow + ret void +} +; Make sure it doesn't segfault because of a division by zero. +; CHECK: define void @umul32_by_zero( +; CHECK-NEXT: %pair.arith = mul i32 %x, 0 +; CHECK-NEXT: store i32 %pair.arith, i32* %result_val +; CHECK-NEXT: store i1 false, i1* %result_overflow + + +define void @umul32_by_const(i32 %x, i32* %result_val, i1* %result_overflow) { + %pair = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %x, i32 256) + %val = extractvalue {i32, i1} %pair, 0 + %overflow = extractvalue {i32, i1} %pair, 1 + + store i32 %val, i32* %result_val + store i1 %overflow, i1* %result_overflow + ret void +} +; The bound is 16777215 == 0xffffff == ((1 << 32) - 1) / 256 +; CHECK: define void @umul32_by_const( +; CHECK-NEXT: %pair.arith = mul i32 %x, 256 +; CHECK-NEXT: %pair.overflow = icmp ugt i32 %x, 16777215 +; CHECK-NEXT: store i32 %pair.arith, i32* %result_val +; CHECK-NEXT: store i1 %pair.overflow, i1* %result_overflow + + +; Check that the pass can expand multiple uses of the same intrinsic. +define void @umul32_by_const2(i32 %x, i32* %result_val, i1* %result_overflow) { + %pair = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %x, i32 65536) + %val = extractvalue {i32, i1} %pair, 0 + ; Check that the pass can expand multiple uses of %pair. + %overflow1 = extractvalue {i32, i1} %pair, 1 + %overflow2 = extractvalue {i32, i1} %pair, 1 + + store i32 %val, i32* %result_val + store i1 %overflow1, i1* %result_overflow + store i1 %overflow2, i1* %result_overflow + ret void +} +; CHECK: define void @umul32_by_const2( +; CHECK-NEXT: %pair.arith = mul i32 %x, 65536 +; CHECK-NEXT: %pair.overflow = icmp ugt i32 %x, 65535 +; CHECK-NEXT: store i32 %pair.arith, i32* %result_val +; CHECK-NEXT: store i1 %pair.overflow, i1* %result_overflow +; CHECK-NEXT: store i1 %pair.overflow, i1* %result_overflow + + +define void @umul64_by_const(i64 %x, i64* %result_val, i1* %result_overflow) { + ; Multiply by 1 << 55. + %pair = call {i64, i1} @llvm.umul.with.overflow.i64(i64 36028797018963968, i64 %x) + %val = extractvalue {i64, i1} %pair, 0 + %overflow = extractvalue {i64, i1} %pair, 1 + + store i64 %val, i64* %result_val + store i1 %overflow, i1* %result_overflow + ret void +} +; CHECK: define void @umul64_by_const( +; CHECK-NEXT: %pair.arith = mul i64 36028797018963968, %x +; CHECK-NEXT: %pair.overflow = icmp ugt i64 %x, 511 +; CHECK-NEXT: store i64 %pair.arith, i64* %result_val +; CHECK-NEXT: store i1 %pair.overflow, i1* %result_overflow + + +define void @umul64_by_var(i64 %x, i64 %y, i64* %result_val, i1* %result_overflow) { + %pair = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %x, i64 %y) + %val = extractvalue {i64, i1} %pair, 0 + %overflow = extractvalue {i64, i1} %pair, 1 + + store i64 %val, i64* %result_val + store i1 %overflow, i1* %result_overflow + ret void +} +; CHECK: define void @umul64_by_var( +; CHECK-NEXT: %pair.arith = mul i64 %x, %y +; CHECK-NEXT: %pair.iszero = icmp eq i64 %y, 0 +; CHECK-NEXT: %pair.denom = select i1 %pair.iszero, i64 1, i64 %y +; CHECK-NEXT: %pair.div = udiv i64 %pair.arith, %pair.denom +; CHECK-NEXT: %pair.same = icmp ne i64 %pair.div, %x +; CHECK-NEXT: %pair.overflow = select i1 %pair.iszero, i1 false, i1 %pair.same +; CHECK-NEXT: store i64 %pair.arith, i64* %result_val +; CHECK-NEXT: store i1 %pair.overflow, i1* %result_overflow + + +define void @smul64_by_var(i64 %x, i64 %y, i64* %result_val, i1* %result_overflow) { + %pair = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %x, i64 %y) + %val = extractvalue {i64, i1} %pair, 0 + %overflow = extractvalue {i64, i1} %pair, 1 + + store i64 %val, i64* %result_val + store i1 %overflow, i1* %result_overflow + ret void +} +; CHECK: define void @smul64_by_var( +; CHECK-NEXT: %pair.arith = mul i64 %x, %y +; CHECK-NEXT: %pair.iszero = icmp eq i64 %y, 0 +; CHECK-NEXT: %pair.denom = select i1 %pair.iszero, i64 1, i64 %y +; CHECK-NEXT: %pair.div = sdiv i64 %pair.arith, %pair.denom +; CHECK-NEXT: %pair.same = icmp ne i64 %pair.div, %x +; CHECK-NEXT: %pair.overflow = select i1 %pair.iszero, i1 false, i1 %pair.same +; CHECK-NEXT: store i64 %pair.arith, i64* %result_val +; CHECK-NEXT: store i1 %pair.overflow, i1* %result_overflow + + +define void @uadd16_with_const(i16 %x, i16* %result_val, i1* %result_overflow) { + %pair = call {i16, i1} @llvm.uadd.with.overflow.i16(i16 %x, i16 35) + %val = extractvalue {i16, i1} %pair, 0 + %overflow = extractvalue {i16, i1} %pair, 1 + + store i16 %val, i16* %result_val + store i1 %overflow, i1* %result_overflow + ret void +} +; CHECK: define void @uadd16_with_const( +; CHECK-NEXT: %pair.arith = add i16 %x, 35 +; CHECK-NEXT: %pair.overflow = icmp ugt i16 %x, -36 +; CHECK-NEXT: store i16 %pair.arith, i16* %result_val +; CHECK-NEXT: store i1 %pair.overflow, i1* %result_overflow + + +define void @uadd16_with_var(i16 %x, i16 %y, i16* %result_val, i1* %result_overflow) { + %pair = call {i16, i1} @llvm.uadd.with.overflow.i16(i16 %x, i16 %y) + %val = extractvalue {i16, i1} %pair, 0 + %overflow = extractvalue {i16, i1} %pair, 1 + + store i16 %val, i16* %result_val + store i1 %overflow, i1* %result_overflow + ret void +} +; CHECK: define void @uadd16_with_var( +; CHECK-NEXT: %pair.arith = add i16 %x, %y +; CHECK-NEXT: %pair.overflow = icmp ult i16 %pair.arith, %x +; CHECK-NEXT: store i16 %pair.arith, i16* %result_val +; CHECK-NEXT: store i1 %pair.overflow, i1* %result_overflow + +define void @usub16_with_const(i16 %x, i16* %result_val, i1* %result_overflow) { + %pair = call {i16, i1} @llvm.usub.with.overflow.i16(i16 %x, i16 35) + %val = extractvalue {i16, i1} %pair, 0 + %overflow = extractvalue {i16, i1} %pair, 1 + + store i16 %val, i16* %result_val + store i1 %overflow, i1* %result_overflow + ret void +} +; CHECK: define void @usub16_with_const( +; CHECK-NEXT: %pair.arith = sub i16 %x, 35 +; CHECK-NEXT: %pair.overflow = icmp ult i16 %x, 35 +; CHECK-NEXT: store i16 %pair.arith, i16* %result_val +; CHECK-NEXT: store i1 %pair.overflow, i1* %result_overflow + + +define void @usub16_with_var(i16 %x, i16 %y, i16* %result_val, i1* %result_overflow) { + %pair = call {i16, i1} @llvm.usub.with.overflow.i16(i16 %x, i16 %y) + %val = extractvalue {i16, i1} %pair, 0 + %overflow = extractvalue {i16, i1} %pair, 1 + + store i16 %val, i16* %result_val + store i1 %overflow, i1* %result_overflow + ret void +} +; CHECK: define void @usub16_with_var( +; CHECK-NEXT: %pair.arith = sub i16 %x, %y +; CHECK-NEXT: %pair.overflow = icmp ult i16 %x, %y +; CHECK-NEXT: store i16 %pair.arith, i16* %result_val +; CHECK-NEXT: store i1 %pair.overflow, i1* %result_overflow + +define void @sadd8_with_const(i8 %x, i8* %result_val, i1* %result_overflow) { + %pair = call {i8, i1} @llvm.sadd.with.overflow.i8(i8 %x, i8 35) + %val = extractvalue {i8, i1} %pair, 0 + %overflow = extractvalue {i8, i1} %pair, 1 + + store i8 %val, i8* %result_val + store i1 %overflow, i1* %result_overflow + ret void +} +; CHECK: define void @sadd8_with_const( +; CHECK-NEXT: %pair.arith = add i8 %x, 35 +; CHECK-NEXT: %pair.postemp = add i8 %x, -128 +; CHECK-NEXT: %pair.negtemp = add i8 %x, 127 +; CHECK-NEXT: %pair.poscheck = icmp slt i8 %pair.arith, %pair.postemp +; CHECK-NEXT: %pair.negcheck = icmp sgt i8 %pair.arith, %pair.negtemp +; CHECK-NEXT: %pair.ispos = icmp sge i8 %x, 0 +; CHECK-NEXT: %pair.select = select i1 %pair.ispos, i1 %pair.poscheck, i1 %pair.negcheck +; CHECK-NEXT: store i8 %pair.arith, i8* %result_val +; CHECK-NEXT: store i1 %pair.select, i1* %result_overflow + + +define void @sadd8_with_const_min(i8* %result_val, i1* %result_overflow) { + %pair = call {i8, i1} @llvm.sadd.with.overflow.i8(i8 0, i8 -128) + %val = extractvalue {i8, i1} %pair, 0 + %overflow = extractvalue {i8, i1} %pair, 1 + + store i8 %val, i8* %result_val + store i1 %overflow, i1* %result_overflow + ret void +} +; CHECK: define void @sadd8_with_const_min( +; CHECK-NEXT: store i8 -128, i8* %result_val +; CHECK-NEXT: store i1 false, i1* %result_overflow + + +define void @sadd8_with_var(i8 %x, i8 %y, i8* %result_val, i1* %result_overflow) { + %pair = call {i8, i1} @llvm.sadd.with.overflow.i8(i8 %x, i8 %y) + %val = extractvalue {i8, i1} %pair, 0 + %overflow = extractvalue {i8, i1} %pair, 1 + + store i8 %val, i8* %result_val + store i1 %overflow, i1* %result_overflow + ret void +} +; CHECK: define void @sadd8_with_var( +; CHECK-NEXT: %pair.arith = add i8 %x, %y +; CHECK-NEXT: %pair.postemp = add i8 %x, -128 +; CHECK-NEXT: %pair.negtemp = add i8 %x, 127 +; CHECK-NEXT: %pair.poscheck = icmp slt i8 %pair.arith, %pair.postemp +; CHECK-NEXT: %pair.negcheck = icmp sgt i8 %pair.arith, %pair.negtemp +; CHECK-NEXT: %pair.ispos = icmp sge i8 %x, 0 +; CHECK-NEXT: %pair.select = select i1 %pair.ispos, i1 %pair.poscheck, i1 %pair.negcheck +; CHECK-NEXT: store i8 %pair.arith, i8* %result_val +; CHECK-NEXT: store i1 %pair.select, i1* %result_overflow + + +define void @ssub8_with_const(i8 %x, i8* %result_val, i1* %result_overflow) { + %pair = call {i8, i1} @llvm.ssub.with.overflow.i8(i8 %x, i8 35) + %val = extractvalue {i8, i1} %pair, 0 + %overflow = extractvalue {i8, i1} %pair, 1 + + store i8 %val, i8* %result_val + store i1 %overflow, i1* %result_overflow + ret void +} +; CHECK: define void @ssub8_with_const( +; CHECK-NEXT: %pair.arith = sub i8 %x, 35 +; CHECK-NEXT: %pair.postemp = add i8 %x, -127 +; CHECK-NEXT: %pair.negtemp = add i8 %x, -128 +; CHECK-NEXT: %pair.poscheck = icmp slt i8 %pair.arith, %pair.postemp +; CHECK-NEXT: %pair.negcheck = icmp sgt i8 %pair.arith, %pair.negtemp +; CHECK-NEXT: %pair.ispos = icmp sge i8 %x, 0 +; CHECK-NEXT: %pair.select = select i1 %pair.ispos, i1 %pair.poscheck, i1 %pair.negcheck +; CHECK-NEXT: store i8 %pair.arith, i8* %result_val +; CHECK-NEXT: store i1 %pair.select, i1* %result_overflow + + +define void @ssub8_with_const_min(i8* %result_val, i1* %result_overflow) { + %pair = call {i8, i1} @llvm.ssub.with.overflow.i8(i8 0, i8 -128) + %val = extractvalue {i8, i1} %pair, 0 + %overflow = extractvalue {i8, i1} %pair, 1 + + store i8 %val, i8* %result_val + store i1 %overflow, i1* %result_overflow + ret void +} +; CHECK: define void @ssub8_with_const_min( +; CHECK: store i1 true, i1* %result_overflow + + +define void @ssub8_with_var(i8 %x, i8 %y, i8* %result_val, i1* %result_overflow) { + %pair = call {i8, i1} @llvm.ssub.with.overflow.i8(i8 %x, i8 %y) + %val = extractvalue {i8, i1} %pair, 0 + %overflow = extractvalue {i8, i1} %pair, 1 + + store i8 %val, i8* %result_val + store i1 %overflow, i1* %result_overflow + ret void +} +; CHECK: define void @ssub8_with_var( +; CHECK-NEXT: %pair.arith = sub i8 %x, %y +; CHECK-NEXT: %pair.postemp = add i8 %x, -127 +; CHECK-NEXT: %pair.negtemp = add i8 %x, -128 +; CHECK-NEXT: %pair.poscheck = icmp slt i8 %pair.arith, %pair.postemp +; CHECK-NEXT: %pair.negcheck = icmp sgt i8 %pair.arith, %pair.negtemp +; CHECK-NEXT: %pair.ispos = icmp sge i8 %x, 0 +; CHECK-NEXT: %pair.select = select i1 %pair.ispos, i1 %pair.poscheck, i1 %pair.negcheck +; CHECK-NEXT: store i8 %pair.arith, i8* %result_val +; CHECK-NEXT: store i1 %pair.select, i1* %result_overflow diff --git a/test/Transforms/NaCl/expand-byval.ll b/test/Transforms/NaCl/expand-byval.ll new file mode 100644 index 000000000000..a526173ef0c3 --- /dev/null +++ b/test/Transforms/NaCl/expand-byval.ll @@ -0,0 +1,122 @@ +; RUN: opt -expand-byval %s -S | FileCheck %s + +target datalayout = "p:32:32:32" + +%MyStruct = type { i32, i8, i32 } +%AlignedStruct = type { double, double } + + +; Removal of "byval" attribute for passing structs arguments by value + +declare void @ext_func(%MyStruct*) + +define void @byval_receiver(%MyStruct* byval align 32 %ptr) { + call void @ext_func(%MyStruct* %ptr) + ret void +} +; Strip the "byval" and "align" attributes. +; CHECK: define void @byval_receiver(%MyStruct* noalias %ptr) { +; CHECK-NEXT: call void @ext_func(%MyStruct* %ptr) + + +declare void @ext_byval_func(%MyStruct* byval) +; CHECK: declare void @ext_byval_func(%MyStruct* noalias) + +define void @byval_caller(%MyStruct* %ptr) { + call void @ext_byval_func(%MyStruct* byval %ptr) + ret void +} +; CHECK: define void @byval_caller(%MyStruct* %ptr) { +; CHECK-NEXT: %ptr.byval_copy = alloca %MyStruct, align 4 +; CHECK: call void @llvm.lifetime.start(i64 12, i8* %{{.*}}) +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %{{.*}}, i8* %{{.*}}, i64 12, i32 4, i1 false) +; CHECK-NEXT: call void @ext_byval_func(%MyStruct* noalias %ptr.byval_copy) + + +define void @byval_tail_caller(%MyStruct* %ptr) { + tail call void @ext_byval_func(%MyStruct* byval %ptr) + ret void +} +; CHECK: define void @byval_tail_caller(%MyStruct* %ptr) { +; CHECK: {{^}} call void @ext_byval_func(%MyStruct* noalias %ptr.byval_copy) + + +define void @byval_invoke(%MyStruct* %ptr) { + invoke void @ext_byval_func(%MyStruct* byval align 32 %ptr) + to label %cont unwind label %lpad +cont: + ret void +lpad: + %lp = landingpad { i8*, i32 } personality i8* null cleanup + ret void +} +; CHECK: define void @byval_invoke(%MyStruct* %ptr) { +; CHECK: %ptr.byval_copy = alloca %MyStruct, align 32 +; CHECK: call void @llvm.lifetime.start(i64 12, i8* %{{.*}}) +; CHECK: invoke void @ext_byval_func(%MyStruct* noalias %ptr.byval_copy) +; CHECK: cont: +; CHECK: call void @llvm.lifetime.end(i64 12, i8* %{{.*}}) +; CHECK: lpad: +; CHECK: call void @llvm.lifetime.end(i64 12, i8* %{{.*}}) + + +; Check handling of alignment + +; Check that "align" is stripped for declarations too. +declare void @ext_byval_func_align(%MyStruct* byval align 32) +; CHECK: declare void @ext_byval_func_align(%MyStruct* noalias) + +define void @byval_caller_align_via_attr(%MyStruct* %ptr) { + call void @ext_byval_func(%MyStruct* byval align 32 %ptr) + ret void +} +; CHECK: define void @byval_caller_align_via_attr(%MyStruct* %ptr) { +; CHECK-NEXT: %ptr.byval_copy = alloca %MyStruct, align 32 +; The memcpy may assume that %ptr is 32-byte-aligned. +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 12, i32 32, i1 false) + +declare void @ext_byval_func_align_via_type(%AlignedStruct* byval) + +; %AlignedStruct contains a double so requires an alignment of 8 bytes. +; Looking at the alignment of %AlignedStruct is a workaround for a bug +; in pnacl-clang: +; https://code.google.com/p/nativeclient/issues/detail?id=3403 +define void @byval_caller_align_via_type(%AlignedStruct* %ptr) { + call void @ext_byval_func_align_via_type(%AlignedStruct* byval %ptr) + ret void +} +; CHECK: define void @byval_caller_align_via_type(%AlignedStruct* %ptr) { +; CHECK-NEXT: %ptr.byval_copy = alloca %AlignedStruct, align 8 +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %{{.*}}, i8* %{{.*}}, i64 16, i32 8, i1 false) + + +; Removal of "sret" attribute for returning structs by value + +declare void @ext_sret_func(%MyStruct* sret align 32) +; CHECK: declare void @ext_sret_func(%MyStruct*) + +define void @sret_func(%MyStruct* sret align 32 %buf) { + ret void +} +; CHECK: define void @sret_func(%MyStruct* %buf) { + +define void @sret_caller(%MyStruct* %buf) { + call void @ext_sret_func(%MyStruct* sret align 32 %buf) + ret void +} +; CHECK: define void @sret_caller(%MyStruct* %buf) { +; CHECK-NEXT: call void @ext_sret_func(%MyStruct* %buf) + + +; Check that other attributes are preserved + +define void @inreg_attr(%MyStruct* inreg %ptr) { + ret void +} +; CHECK: define void @inreg_attr(%MyStruct* inreg %ptr) { + +declare void @func_attrs() #0 +; CHECK: declare void @func_attrs() #0 + +attributes #0 = { noreturn nounwind } +; CHECK: attributes #0 = { noreturn nounwind } diff --git a/test/Transforms/NaCl/expand-constantexpr.ll b/test/Transforms/NaCl/expand-constantexpr.ll new file mode 100644 index 000000000000..e8786d4cac7a --- /dev/null +++ b/test/Transforms/NaCl/expand-constantexpr.ll @@ -0,0 +1,109 @@ +; RUN: opt < %s -expand-constant-expr -S | FileCheck %s + +@global_var1 = global i32 123 +@global_var2 = global i32 123 + + +define i8* @constantexpr_bitcast() { + ret i8* bitcast (i32* @global_var1 to i8*) +} +; CHECK: @constantexpr_bitcast +; CHECK: %expanded = bitcast i32* @global_var1 to i8* +; CHECK: ret i8* %expanded + + +define i32 @constantexpr_nested() { + ret i32 add (i32 ptrtoint (i32* @global_var1 to i32), + i32 ptrtoint (i32* @global_var2 to i32)) +} +; CHECK: @constantexpr_nested +; CHECK: %expanded1 = ptrtoint i32* @global_var1 to i32 +; CHECK: %expanded2 = ptrtoint i32* @global_var2 to i32 +; CHECK: %expanded = add i32 %expanded1, %expanded2 +; CHECK: ret i32 %expanded + + +define i32 @constantexpr_nested2() { + ret i32 mul (i32 add (i32 ptrtoint (i32* @global_var1 to i32), + i32 ptrtoint (i32* @global_var2 to i32)), i32 2) +} +; CHECK: @constantexpr_nested2 +; CHECK: %expanded2 = ptrtoint i32* @global_var1 to i32 +; CHECK: %expanded3 = ptrtoint i32* @global_var2 to i32 +; CHECK: %expanded1 = add i32 %expanded2, %expanded3 +; CHECK: %expanded = mul i32 %expanded1, 2 +; CHECK: ret i32 %expanded + + +define i32 @constantexpr_phi() { +entry: + br label %label +label: + %result = phi i32 [ ptrtoint (i32* @global_var1 to i32), %entry ] + ret i32 %result +} +; CHECK: @constantexpr_phi +; CHECK: entry: +; CHECK: %expanded = ptrtoint i32* @global_var1 to i32 +; CHECK: br label %label +; CHECK: label: +; CHECK: %result = phi i32 [ %expanded, %entry ] + + +; This tests that ExpandConstantExpr correctly handles a PHI node that +; contains the same ConstantExpr twice. +; Using replaceAllUsesWith() is not correct on a PHI node when the +; new instruction has to be added to an incoming block. +define i32 @constantexpr_phi_twice(i1 %arg) { + br i1 %arg, label %iftrue, label %iffalse +iftrue: + br label %exit +iffalse: + br label %exit +exit: + %result = phi i32 [ ptrtoint (i32* @global_var1 to i32), %iftrue ], + [ ptrtoint (i32* @global_var1 to i32), %iffalse ] + ret i32 %result +} +; CHECK: @constantexpr_phi_twice +; CHECK: iftrue: +; CHECK: %expanded = ptrtoint i32* @global_var1 to i32 +; CHECK: iffalse: +; CHECK: %expanded1 = ptrtoint i32* @global_var1 to i32 +; CHECK: exit: + + +define i32 @constantexpr_phi_multiple_entry(i1 %arg) { +entry: + br i1 %arg, label %done, label %done +done: + %result = phi i32 [ ptrtoint (i32* @global_var1 to i32), %entry ], + [ ptrtoint (i32* @global_var1 to i32), %entry ] + ret i32 %result +} +; CHECK: @constantexpr_phi_multiple_entry +; CHECK: entry: +; CHECK: %expanded = ptrtoint i32* @global_var1 to i32 +; CHECK: br i1 %arg, label %done, label %done +; CHECK: done: +; CHECK: %result = phi i32 [ %expanded, %entry ], [ %expanded, %entry ] + + + +declare void @external_func() +declare void @personality_func() + +define void @test_landingpad() { + invoke void @external_func() to label %ok unwind label %onerror +ok: + ret void +onerror: + %lp = landingpad i32 + personality i8* bitcast (void ()* @personality_func to i8*) + catch i32* null + ret void +} +; landingpad can only accept a ConstantExpr, so this should remain +; unmodified. +; CHECK: @test_landingpad +; CHECK: personality i8* bitcast (void ()* @personality_func to i8*) diff --git a/test/Transforms/NaCl/expand-ctors-empty.ll b/test/Transforms/NaCl/expand-ctors-empty.ll new file mode 100644 index 000000000000..f0788a0873e4 --- /dev/null +++ b/test/Transforms/NaCl/expand-ctors-empty.ll @@ -0,0 +1,12 @@ +; Currently we do not define __{init,fini}_array_end as named aliases. +; RUN: opt < %s -nacl-expand-ctors -S | FileCheck %s -check-prefix=NO_CTORS +; NO_CTORS-NOT: __init_array_end +; NO_CTORS-NOT: __fini_array_end + +; RUN: opt < %s -nacl-expand-ctors -S | FileCheck %s + +; If llvm.global_ctors is not present, it is treated as if it is an +; empty array, and __{init,fini}_array_start are defined anyway. + +; CHECK: @__init_array_start = internal constant [0 x void ()*] zeroinitializer +; CHECK: @__fini_array_start = internal constant [0 x void ()*] zeroinitializer diff --git a/test/Transforms/NaCl/expand-ctors-emptylist.ll b/test/Transforms/NaCl/expand-ctors-emptylist.ll new file mode 100644 index 000000000000..6ab68852b9d3 --- /dev/null +++ b/test/Transforms/NaCl/expand-ctors-emptylist.ll @@ -0,0 +1,13 @@ +; RUN: opt %s -nacl-expand-ctors -S | FileCheck %s -check-prefix=NO_CTORS +; NO_CTORS-NOT: __init_array_end +; NO_CTORS-NOT: __fini_array_end +; NO_CTORS-NOT: llvm.global_ctors + +; RUN: opt %s -nacl-expand-ctors -S | FileCheck %s + +; Check that the pass works when the initializer is "[]", which gets +; converted into "undef" by the reader. +@llvm.global_ctors = appending global [0 x { i32, void ()* }] [] + +; CHECK: @__init_array_start = internal constant [0 x void ()*] zeroinitializer +; CHECK: @__fini_array_start = internal constant [0 x void ()*] zeroinitializer diff --git a/test/Transforms/NaCl/expand-ctors-zeroinit.ll b/test/Transforms/NaCl/expand-ctors-zeroinit.ll new file mode 100644 index 000000000000..824b2b23b72d --- /dev/null +++ b/test/Transforms/NaCl/expand-ctors-zeroinit.ll @@ -0,0 +1,17 @@ +; Currently we do not define __{init,fini}_array_end as named aliases. +; RUN: opt < %s -nacl-expand-ctors -S | FileCheck %s -check-prefix=NO_CTORS +; NO_CTORS-NOT: __init_array_end +; NO_CTORS-NOT: __fini_array_end + +; We expect this symbol to be removed: +; RUN: opt < %s -nacl-expand-ctors -S | not grep llvm.global_ctors + +; RUN: opt < %s -nacl-expand-ctors -S | FileCheck %s + +; If llvm.global_ctors is zeroinitializer, it should be treated the +; same as an empty array. + +@llvm.global_ctors = appending global [0 x { i32, void ()* }] zeroinitializer + +; CHECK: @__init_array_start = internal constant [0 x void ()*] zeroinitializer +; CHECK: @__fini_array_start = internal constant [0 x void ()*] zeroinitializer diff --git a/test/Transforms/NaCl/expand-ctors.ll b/test/Transforms/NaCl/expand-ctors.ll new file mode 100644 index 000000000000..89aeda00a663 --- /dev/null +++ b/test/Transforms/NaCl/expand-ctors.ll @@ -0,0 +1,37 @@ +; We expect these symbol names to be removed: +; RUN: opt < %s -nacl-expand-ctors -S | FileCheck %s -check-prefix=NO_CTORS +; NO_CTORS-NOT: llvm.global.ctors +; NO_CTORS-NOT: __init_array_end +; NO_CTORS-NOT: __fini_array_end + +; RUN: opt < %s -nacl-expand-ctors -S | FileCheck %s + +@llvm.global_ctors = appending global [3 x { i32, void ()* }] + [{ i32, void ()* } { i32 300, void ()* @init_func_A }, + { i32, void ()* } { i32 100, void ()* @init_func_B }, + { i32, void ()* } { i32 200, void ()* @init_func_C }] + +@__init_array_start = extern_weak global [0 x void ()*] +@__init_array_end = extern_weak global [0 x void ()*] + +; CHECK: @__init_array_start = internal constant [3 x void ()*] [void ()* @init_func_B, void ()* @init_func_C, void ()* @init_func_A] +; CHECK: @__fini_array_start = internal constant [0 x void ()*] zeroinitializer + +define void @init_func_A() { ret void } +define void @init_func_B() { ret void } +define void @init_func_C() { ret void } + +define [0 x void ()*]* @get_array_start() { + ret [0 x void ()*]* @__init_array_start; +} +; CHECK: @get_array_start() +; CHECK: ret {{.*}} @__init_array_start + +define [0 x void ()*]* @get_array_end() { + ret [0 x void ()*]* @__init_array_end; +} + +; @get_array_end() is converted to use a GetElementPtr that returns +; the end of the generated array: +; CHECK: @get_array_end() +; CHECK: ret {{.*}} bitcast ([3 x void ()*]* getelementptr inbounds ([3 x void ()*], [3 x void ()*]* @__init_array_start, i32 1) diff --git a/test/Transforms/NaCl/expand-getelementptr.ll b/test/Transforms/NaCl/expand-getelementptr.ll new file mode 100644 index 000000000000..cb849f8e0144 --- /dev/null +++ b/test/Transforms/NaCl/expand-getelementptr.ll @@ -0,0 +1,123 @@ +; RUN: opt < %s -expand-getelementptr -S | FileCheck %s + +target datalayout = "p:32:32:32" + +%MyStruct = type { i8, i32, i8 } +%MyArray = type { [100 x i64] } +%MyArrayOneByte = type { [100 x i8] } + + +; Test indexing struct field +define i8* @test_struct_field(%MyStruct* %ptr) { + %addr = getelementptr %MyStruct, %MyStruct* %ptr, i32 0, i32 2 + ret i8* %addr +} +; CHECK: @test_struct_field +; CHECK-NEXT: %gep_int = ptrtoint %MyStruct* %ptr to i32 +; CHECK-NEXT: %gep = add i32 %gep_int, 8 +; CHECK-NEXT: %addr = inttoptr i32 %gep to i8* +; CHECK-NEXT: ret i8* %addr + + +; Test non-constant index into an array +define i64* @test_array_index(%MyArray* %ptr, i32 %index) { + %addr = getelementptr %MyArray, %MyArray* %ptr, i32 0, i32 0, i32 %index + ret i64* %addr +} +; CHECK: @test_array_index +; CHECK-NEXT: %gep_int = ptrtoint %MyArray* %ptr to i32 +; CHECK-NEXT: %gep_array = mul i32 %index, 8 +; CHECK-NEXT: %gep = add i32 %gep_int, %gep_array +; CHECK-NEXT: %addr = inttoptr i32 %gep to i64* +; CHECK-NEXT: ret i64* %addr + + +; Test constant index into an array (as a pointer) +define %MyStruct* @test_ptr_add(%MyStruct* %ptr) { + %addr = getelementptr %MyStruct, %MyStruct* %ptr, i32 2 + ret %MyStruct* %addr +} +; CHECK: @test_ptr_add +; CHECK-NEXT: %gep_int = ptrtoint %MyStruct* %ptr to i32 +; CHECK-NEXT: %gep = add i32 %gep_int, 24 +; CHECK-NEXT: %addr = inttoptr i32 %gep to %MyStruct* +; CHECK-NEXT: ret %MyStruct* %addr + + +; Test that additions and multiplications are combined properly +define i64* @test_add_and_index(%MyArray* %ptr, i32 %index) { + %addr = getelementptr %MyArray, %MyArray* %ptr, i32 1, i32 0, i32 %index + ret i64* %addr +} +; CHECK: @test_add_and_index +; CHECK-NEXT: %gep_int = ptrtoint %MyArray* %ptr to i32 +; CHECK-NEXT: %gep = add i32 %gep_int, 800 +; CHECK-NEXT: %gep_array = mul i32 %index, 8 +; CHECK-NEXT: %gep1 = add i32 %gep, %gep_array +; CHECK-NEXT: %addr = inttoptr i32 %gep1 to i64* +; CHECK-NEXT: ret i64* %addr + + +; Test that we don't multiply by 1 unnecessarily +define i8* @test_add_and_index_one_byte(%MyArrayOneByte* %ptr, i32 %index) { + %addr = getelementptr %MyArrayOneByte, %MyArrayOneByte* %ptr, i32 1, i32 0, i32 %index + ret i8* %addr +} +; CHECK: @test_add_and_index +; CHECK-NEXT: %gep_int = ptrtoint %MyArrayOneByte* %ptr to i32 +; CHECK-NEXT: %gep = add i32 %gep_int, 100 +; CHECK-NEXT: %gep1 = add i32 %gep, %index +; CHECK-NEXT: %addr = inttoptr i32 %gep1 to i8* +; CHECK-NEXT: ret i8* %addr + + +; Test >32-bit array index +define i64* @test_array_index64(%MyArray* %ptr, i64 %index) { + %addr = getelementptr %MyArray, %MyArray* %ptr, i32 0, i32 0, i64 %index + ret i64* %addr +} +; CHECK: @test_array_index64 +; CHECK-NEXT: %gep_int = ptrtoint %MyArray* %ptr to i32 +; CHECK-NEXT: %gep_trunc = trunc i64 %index to i32 +; CHECK-NEXT: %gep_array = mul i32 %gep_trunc, 8 +; CHECK-NEXT: %gep = add i32 %gep_int, %gep_array +; CHECK-NEXT: %addr = inttoptr i32 %gep to i64* +; CHECK-NEXT: ret i64* %addr + + +; Test <32-bit array index +define i64* @test_array_index16(%MyArray* %ptr, i16 %index) { + %addr = getelementptr %MyArray, %MyArray* %ptr, i32 0, i32 0, i16 %index + ret i64* %addr +} +; CHECK: @test_array_index16 +; CHECK-NEXT: %gep_int = ptrtoint %MyArray* %ptr to i32 +; CHECK-NEXT: %gep_sext = sext i16 %index to i32 +; CHECK-NEXT: %gep_array = mul i32 %gep_sext, 8 +; CHECK-NEXT: %gep = add i32 %gep_int, %gep_array +; CHECK-NEXT: %addr = inttoptr i32 %gep to i64* +; CHECK-NEXT: ret i64* %addr + + +; Test >32-bit constant array index +define i64* @test_array_index64_const(%MyArray* %ptr) { + %addr = getelementptr %MyArray, %MyArray* %ptr, i32 0, i32 0, i64 100 + ret i64* %addr +} +; CHECK: @test_array_index64_const +; CHECK-NEXT: %gep_int = ptrtoint %MyArray* %ptr to i32 +; CHECK-NEXT: %gep = add i32 %gep_int, 800 +; CHECK-NEXT: %addr = inttoptr i32 %gep to i64* +; CHECK-NEXT: ret i64* %addr + + +; Test <32-bit constant array index -- test sign extension +define i64* @test_array_index16_const(%MyArray* %ptr) { + %addr = getelementptr %MyArray, %MyArray* %ptr, i32 0, i32 0, i16 -100 + ret i64* %addr +} +; CHECK: @test_array_index16_const +; CHECK-NEXT: %gep_int = ptrtoint %MyArray* %ptr to i32 +; CHECK-NEXT: %gep = add i32 %gep_int, -800 +; CHECK-NEXT: %addr = inttoptr i32 %gep to i64* +; CHECK-NEXT: ret i64* %addr diff --git a/test/Transforms/NaCl/expand-indirectbr.ll b/test/Transforms/NaCl/expand-indirectbr.ll new file mode 100644 index 000000000000..5ca53371700a --- /dev/null +++ b/test/Transforms/NaCl/expand-indirectbr.ll @@ -0,0 +1,62 @@ +; RUN: opt %s -expand-indirectbr -S | FileCheck %s + + +@addresses = global [2 x i8*] + [i8* blockaddress(@indirectbr_example, %label1), + i8* blockaddress(@indirectbr_example, %label2)] +; CHECK: @addresses = global [2 x i8*] [i8* inttoptr (i32 1 to i8*), i8* inttoptr (i32 2 to i8*)] + + +define i32 @indirectbr_example(i8* %addr) { + indirectbr i8* %addr, [label %label1, label %label2] +label1: + ret i32 100 +label2: + ret i32 200 +} +; CHECK: define i32 @indirectbr_example +; CHECK-NEXT: %indirectbr_cast = ptrtoint i8* %addr to i32 +; CHECK-NEXT: switch i32 %indirectbr_cast, label %indirectbr_default [ +; CHECK-NEXT: i32 1, label %label1 +; CHECK-NEXT: i32 2, label %label2 +; CHECK-NEXT: ] +; CHECK: indirectbr_default: +; CHECK-NEXT: unreachable + + +define i32 @label_appears_twice(i8* %addr) { +entry: + indirectbr i8* %addr, [label %label, label %label] +label: + %val = phi i32 [ 123, %entry ], [ 123, %entry ] + ret i32 %val +} +; CHECK: define i32 @label_appears_twice +; CHECK: switch i32 %indirectbr_cast, label %indirectbr_default [ +; CHECK-NEXT: i32 1, label %label +; CHECK-NEXT: ] +; CHECK: %val = phi i32 [ 123, %entry ] + + +define i8* @unused_blockaddress() { + ret i8* blockaddress (@unused_blockaddress, %dead_label) +dead_label: + ret i8* null +} +; CHECK: define i8* @unused_blockaddress +; CHECK-NEXT: ret i8* inttoptr (i32 -1 to i8*) + + +; Check that the label is given a consistent switch value across all +; indirectbr expansions. +define i32 @multiple_indirectbr(i8* %addr) { + indirectbr i8* %addr, [label %label] + indirectbr i8* %addr, [label %label] +label: + ret i32 100 +} +; CHECK: define i32 @multiple_indirectbr +; CHECK: switch i32 %indirectbr_cast{{[0-9]*}}, label %indirectbr_default [ +; CHECK-NEXT: i32 1, label %label +; CHECK: switch i32 %indirectbr_cast{{[0-9]*}}, label %indirectbr_default [ +; CHECK-NEXT: i32 1, label %label diff --git a/test/Transforms/NaCl/expand-integers.ll b/test/Transforms/NaCl/expand-integers.ll new file mode 100644 index 000000000000..d08483a3b35f --- /dev/null +++ b/test/Transforms/NaCl/expand-integers.ll @@ -0,0 +1,618 @@ +; RUN: opt < %s -nacl-expand-ints -S | FileCheck %s +; Test large integer expansion for operations required for large packed +; bitfields. + +; CHECK-LABEL: @simpleload +define void @simpleload(i32* %a) { +; CHECK: %a96.loty = bitcast i96* %a96 to i64* +; CHECK-NEXT: %load.lo = load i64, i64* %a96.loty +; CHECK-NEXT: %a96.hi.gep = getelementptr i64, i64* %a96.loty, i32 1 +; CHECK-NEXT: %a96.hity = bitcast i64* %a96.hi.gep to i32* +; CHECK-NEXT: %load.hi = load i32, i32* %a96.hity + %a96 = bitcast i32* %a to i96* + %load = load i96, i96* %a96 + +; CHECK: %a128.loty = bitcast i128* %a128 to i64* +; CHECK-NEXT: %load128.lo = load i64, i64* %a128.loty +; CHECK-NEXT: %a128.hi.gep = getelementptr i64, i64* %a128.loty, i32 1 +; CHECK-NEXT: %load128.hi = load i64, i64* %a128.hi.gep + %a128 = bitcast i32* %a to i128* + %load128 = load i128, i128* %a128 + +; CHECK: %a256.loty = bitcast i256* %a256 to i64* +; CHECK-NEXT: %load256.lo = load i64, i64* %a256.loty +; CHECK-NEXT: %a256.hi.gep = getelementptr i64, i64* %a256.loty, i32 1 +; CHECK-NEXT: %a256.hity = bitcast i64* %a256.hi.gep to i192* +; intermediate expansion: %load256.hi = load i192, i192* %a256.hity +; CHECK-NEXT: %a256.hity.loty = bitcast i192* %a256.hity to i64* +; CHECK-NEXT: %load256.hi.lo = load i64, i64* %a256.hity.loty +; CHECK-NEXT: %a256.hity.hi.gep = getelementptr i64, i64* %a256.hity.loty, i32 1 +; CHECK-NEXT: %a256.hity.hity = bitcast i64* %a256.hity.hi.gep to i128* +; intermediate expansion: %load256.hi.hi = load i128, i128* %a256.hity.hity +; CHECK-NEXT: %a256.hity.hity.loty = bitcast i128* %a256.hity.hity to i64* +; CHECK-NEXT: %load256.hi.hi.lo = load i64, i64* %a256.hity.hity.loty +; CHECK-NEXT: %a256.hity.hity.hi.gep = getelementptr i64, i64* %a256.hity.hity.loty, i32 1 +; CHECK-NEXT: %load256.hi.hi.hi = load i64, i64* %a256.hity.hity.hi.gep + %a256 = bitcast i32* %a to i256* + %load256 = load i256, i256* %a256 + ret void +} + +; CHECK-LABEL: @loadalign +define void @loadalign(i32* %a) { + %a96 = bitcast i32* %a to i96* + +; CHECK: %load.lo = load{{.*}}, align 16 +; CHECK: %load.hi = load{{.*}}, align 8 + %load = load i96, i96* %a96, align 16 + +; CHECK: %loadnoalign.lo = load{{.*}}, align 8 +; CHECK: %loadnoalign.hi = load{{.*}}, align 8 + %loadnoalign = load i96, i96* %a96 + +; CHECK: %load4.lo = load{{.*}}, align 4 +; CHECK: %load4.hi = load{{.*}}, align 4 + %load4 = load i96, i96* %a96, align 4 + + %a256 = bitcast i32* %a to i256* +; CHECK: %load256.lo = load{{.*}}, align 16 +; CHECK: %load256.hi.lo = load{{.*}}, align 8 +; CHECK: %load256.hi.hi.lo = load{{.*}}, align 8 +; CHECK: %load256.hi.hi.hi = load{{.*}}, align 8 + %load256 = load i256, i256* %a256, align 16 + ret void +} + +; CHECK-LABEL: @simplestore +define void @simplestore(i32* %a, i32* %b) { + %a96 = bitcast i32* %a to i96* + %b96 = bitcast i32* %b to i96* + %load96 = load i96, i96* %a96 +; CHECK: %b96.loty = bitcast i96* %b96 to i64* +; CHECK-NEXT: store i64 %load96.lo, i64* %b96.loty +; CHECK-NEXT: %b96.hi.gep = getelementptr i64, i64* %b96.loty, i32 1 +; CHECK-NEXT: %b96.hity = bitcast i64* %b96.hi.gep to i32* +; CHECK-NEXT: store i32 %load96.hi, i32* %b96.hity + store i96 %load96, i96* %b96 + + %a128 = bitcast i32* %a to i128* + %b128 = bitcast i32* %b to i128* + %load128 = load i128, i128* %a128 +; CHECK: %b128.loty = bitcast i128* %b128 to i64* +; CHECK-NEXT: store i64 %load128.lo, i64* %b128.loty +; CHECK-NEXT: %b128.hi.gep = getelementptr i64, i64* %b128.loty, i32 1 +; CHECK-NEXT: store i64 %load128.hi, i64* %b128.hi.gep + store i128 %load128, i128* %b128 + + %a256 = bitcast i32* %a to i256* + %b256 = bitcast i32* %b to i256* + %load256 = load i256, i256* %a256 + +; CHECK: %b256.loty = bitcast i256* %b256 to i64* +; CHECK-NEXT: store i64 %load256.lo, i64* %b256.loty +; CHECK-NEXT: %b256.hi.gep = getelementptr i64, i64* %b256.loty, i32 1 +; CHECK-NEXT: %b256.hity = bitcast i64* %b256.hi.gep to i192* +; CHECK-NEXT: %b256.hity.loty = bitcast i192* %b256.hity to i64* +; CHECK-NEXT: store i64 %load256.hi.lo, i64* %b256.hity.loty +; CHECK-NEXT: %b256.hity.hi.gep = getelementptr i64, i64* %b256.hity.loty, i32 1 +; CHECK-NEXT: %b256.hity.hity = bitcast i64* %b256.hity.hi.gep to i128* +; CHECK-NEXT: %b256.hity.hity.loty = bitcast i128* %b256.hity.hity to i64* +; CHECK-NEXT: store i64 %load256.hi.hi.lo, i64* %b256.hity.hity.loty +; CHECK-NEXT: %b256.hity.hity.hi.gep = getelementptr i64, i64* %b256.hity.hity.loty, i32 1 +; CHECK-NEXT: store i64 %load256.hi.hi.hi, i64* %b256.hity.hity.hi.gep + store i256 %load256, i256* %b256 + ret void +} + +; CHECK-LABEL: @storealign +define void @storealign(i32* %a, i32* %b) { + %a96 = bitcast i32* %a to i96* + %b96 = bitcast i32* %b to i96* + %load96 = load i96, i96* %a96 + +; CHECK: store i64 %load96.lo{{.*}}, align 16 +; CHECK: store i32 %load96.hi{{.*}}, align 8 + store i96 %load96, i96* %b96, align 16 + +; CHECK: store i64 %load96.lo{{.*}}, align 8 +; CHECK: store i32 %load96.hi{{.*}}, align 8 + store i96 %load96, i96* %b96 + +; CHECK: store i64 %load96.lo{{.*}}, align 4 +; CHECK: store i32 %load96.hi{{.*}}, align 4 + store i96 %load96, i96* %b96, align 4 + + %a256 = bitcast i32* %a to i256* + %b256 = bitcast i32* %b to i256* + %load256 = load i256, i256* %a256 +; CHECK: store i64 %load256.lo{{.*}}, align 16 +; CHECK: store i64 %load256.hi.lo{{.*}}, align 8 +; CHECK: store i64 %load256.hi.hi.lo{{.*}}, align 8 +; CHECK: store i64 %load256.hi.hi.hi{{.*}}, align 8 + store i256 %load256, i256* %b256, align 16 + ret void +} + + +; Check that forward references are handled. +; CHECK-LABEL: @fwdref +define void @fwdref(i32* %a, i32* %b) { +entry: + br label %block1 +block2: + %b96 = bitcast i32* %b to i96* +; CHECK: store i64 %load96.lo +; CHECK: store i32 %load96.hi + store i96 %load96, i96* %b96 + ret void +block1: + %a96 = bitcast i32* %a to i96* +; CHECK: load i64, i64* %a96.loty +; CHECK: load i32, i32* %a96.hity + %load96 = load i96, i96* %a96 + br label %block2 +} + +; The subsequent tests use loads and stores to produce and consume the expanded +; values from the opcodes under test. +; CHECK-LABEL: @zext +define void @zext(i32 %a, i64 %b, i8* %p) { + %p96 = bitcast i8* %p to i96* + %a96 = zext i32 %a to i96 +; CHECK: %a96.lo = zext i32 %a to i64 + store i96 %a96, i96* %p96 +; CHECK: store i64 %a96.lo, i64* %p96.loty +; CHECK: store i32 0, i32* %p96.hity + + %b96 = zext i64 %b to i96 +; CHECK: store i64 %b, i64* %p96.loty +; CHECK: store i32 0, i32* %p96.hity + store i96 %b96, i96* %p96 + + %p128 = bitcast i8* %p to i128* + %c96 = load i96, i96* %p96 +; CHECK: %a128.hi = zext i32 %c96.hi to i64 + %a128 = zext i96 %c96 to i128 +; CHECK: store i64 %c96.lo, i64* %p128.loty +; CHECK: store i64 %a128.hi, i64* %p128.hi.gep + store i128 %a128, i128* %p128 + + %p256 = bitcast i8* %p to i256* + +; CHECK: %b256.lo = zext i32 %a to i64 + %b256 = zext i32 %a to i256 +; CHECK: store i64 %b256.lo, i64* %p256.loty +; CHECK: store i64 0, i64* %p256.hity.loty +; CHECK: store i64 0, i64* %p256.hity.hity.loty +; CHECK: store i64 0, i64* %p256.hity.hity.hi.gep + store i256 %b256, i256* %p256 + +; CHECK: %c256.hi.lo = zext i32 %c96.hi to i64 + %c256 = zext i96 %c96 to i256 +; CHECK: store i64 %c96.lo, i64* %p256.loty +; CHECK: store i64 %c256.hi.lo, i64* %p256.hity9.loty +; CHECK: store i64 0, i64* %p256.hity9.hity.loty +; CHECK: store i64 0, i64* %p256.hity9.hity.hi.gep + store i256 %c256, i256* %p256 + ret void +} + + +; CHECK-LABEL: @bitwise +define void @bitwise(i32* %a) { + %a96p = bitcast i32* %a to i96* + %a96 = load i96, i96* %a96p + %b96 = load i96, i96* %a96p + +; CHECK: %c96.lo = and i64 %a96.lo, %b96.lo +; CHECK: %c96.hi = and i32 %a96.hi, %b96.hi + %c96 = and i96 %a96, %b96 +; CHECK: %d96.lo = or i64 %a96.lo, %c96.lo +; CHECK: %d96.hi = or i32 %a96.hi, %c96.hi + %d96 = or i96 %a96, %c96 + +; CHECK: %x96.lo = xor i64 %a96.lo, %c96.lo +; CHECK: %x96.hi = xor i32 %a96.hi, %c96.hi + %x96 = xor i96 %a96, %c96 + ret void +} + +; CHECK-LABEL: @truncs +define void @truncs(i32* %p) { + %p96 = bitcast i32* %p to i96* + %a96 = load i96, i96* %p96 + +; CHECK: %t32 = trunc i64 %a96.lo to i32 + %t32 = trunc i96 %a96 to i32 + + %b96 = load i96, i96* %p96 +; Check that t64 refers directly to the low loaded value from %p96 +; CHECK: %t64 = load i64, i64* %p96.loty + %t64 = trunc i96 %b96 to i64 + + %c96 = load i96, i96* %p96 +; Use the and to get a use of %t90.lo and check that it refers directly to +; %c96.lo +; CHECK: %t90.hi = trunc i32 %c96.hi to i26 +; CHECK: %a90.lo = and i64 %c96.lo, %c96.lo + %t90 = trunc i96 %c96 to i90 + %t90_2 = trunc i96 %c96 to i90 + %a90 = and i90 %t90, %t90_2 + ret void +} + +; CHECK-LABEL: @shls +define void @shls(i32* %p) { + %p96 = bitcast i32* %p to i96* + %a96 = load i96, i96* %p96 + %p128 = bitcast i32* %p to i128* + %a128 = load i128, i128* %p128 + %p192 = bitcast i32* %p to i192* + %a192 = load i192, i192* %p192 + +; CHECK: %b96.lo = shl i64 %a96.lo, 5 +; CHECK-NEXT: %b96.lo.shr = lshr i64 %a96.lo, 59 +; CHECK-NEXT: %b96.lo.ext = trunc i64 %b96.lo.shr to i32 +; CHECK-NEXT: %b96.hi.shl = shl i32 %a96.hi, 5 +; CHECK-NEXT: %b96.or = or i32 %b96.lo.ext, %b96.hi.shl + %b96 = shl i96 %a96, 5 + +; CHECK: %d96.lo = shl i64 %a96.lo, 35 +; CHECK-NEXT: %d96.lo.shr = lshr i64 %a96.lo, 29 +; CHECK-NEXT: %d96.lo.ext = trunc i64 %d96.lo.shr to i32 +; CHECK: store i64 %d96.lo, i64* %p96.loty1 +; CHECK: store i32 %d96.lo.ext, i32* %p96.hity + %d96 = shl i96 %a96, 35 + store i96 %d96, i96* %p96 + +; CHECK: %b128.lo = shl i64 %a128.lo, 35 +; CHECK-NEXT: %b128.lo.shr = lshr i64 %a128.lo, 29 +; CHECK-NEXT: %b128.hi.shl = shl i64 %a128.hi, 35 +; CHECK-NEXT: %b128.or = or i64 %b128.lo.shr, %b128.hi.shl + %b128 = shl i128 %a128, 35 + +; CHECK: %c96.lo.ext = trunc i64 %a96.lo to i32 +; CHECK-NEXT: %c96.lo.shl = shl i32 %c96.lo.ext, 8 +; CHECK: store i64 0, i64* %p96.loty + %c96 = shl i96 %a96, 72 + store i96 %c96, i96* %p96 + +; CHECK: %c128.lo.shl = shl i64 %a128.lo, 36 +; CHECK: store i64 0, i64* %p128.loty + %c128 = shl i128 %a128, 100 + store i128 %c128, i128* %p128 + +; %b192.lo = shl i64 %a192.lo, 35 +; %b192.lo.shr = lshr i64 %a192.lo, 29 +; %b192.hi.shl.lo = shl i64 %a192.hi.lo, 35 +; %b192.hi.shl.lo.shr = lshr i64 %a192.hi.lo, 29 +; %b192.hi.shl.hi.shl = shl i64 %a192.hi.hi, 35 +; %b192.hi.shl.or = or i64 %b192.hi.shl.lo.shr, %b192.hi.shl.hi.shl +; %b192.or.lo = or i64 %b192.lo.shr, %b192.hi.shl.lo +; %b192.or.hi = or i64 0, %b192.hi.shl.or + %b192 = shl i192 %a192, 35 + store i192 %b192, i192* %p192 + +; %c192.lo.shl.lo = shl i64 %a192.lo, 36 +; %c192.lo.shl.lo.shr = lshr i64 %a192.lo, 28 +; %c192.hi.shl.lo.shl = shl i64 %a192.hi.lo, 36 +; %c192.or.lo = or i64 %c192.lo.shl.lo, 0 +; %c192.or.hi = or i64 %c192.lo.shl.lo.shr, %c192.hi.shl.lo.shl + %c192 = shl i192 %a192, 100 + store i192 %c192, i192* %p192 + + ret void +} + +; CHECK-LABEL: @lshrs +define void @lshrs(i32* %p) { + %p96 = bitcast i32* %p to i96* + %a96 = load i96, i96* %p96 + %p128 = bitcast i32* %p to i128* + %a128 = load i128, i128* %p128 + %p192 = bitcast i32* %p to i192* + %a192 = load i192, i192* %p192 + +; CHECK: %b96.hi.shr = lshr i32 %a96.hi, 3 +; CHECK-NEXT: %b96.lo.ext = zext i32 %b96.hi.shr to i64 +; CHECK: store i32 0, i32* %p96.hity + %b96 = lshr i96 %a96, 67 + store i96 %b96, i96* %p96 + +; CHECK: %c96.hi.ext = zext i32 %a96.hi to i64 +; CHECK-NEXT: %c96.hi.shl = shl i64 %c96.hi.ext, 19 +; CHECK-NEXT: %c96.lo.shr = lshr i64 %a96.lo, 45 +; CHECK-NEXT: %c96.lo = or i64 %c96.hi.shl, %c96.lo.shr +; CHECK: store i32 0, i32* %p96.hity + %c96 = lshr i96 %a96, 45 + store i96 %c96, i96* %p96 + +; CHECK: %b128.hi.shr = lshr i64 %a128.hi, 3 +; CHECK: store i64 0, i64* %p128.hi.gep + %b128 = lshr i128 %a128, 67 + store i128 %b128, i128* %p128 + +; CHECK: %d96.hi.ext = zext i32 %a96.hi to i64 +; CHECK-NEXT: %d96.hi.shl = shl i64 %d96.hi.ext, 47 +; CHECK-NEXT: %d96.lo.shr = lshr i64 %a96.lo, 17 +; CHECK-NEXT: %d96.lo = or i64 %d96.hi.shl, %d96.lo.shr +; CHECK-NEXT: %d96.hi = lshr i32 %a96.hi, 17 + %d96 = lshr i96 %a96, 17 + store i96 %d96, i96* %p96 + +; CHECK: %c128.hi.shl = shl i64 %a128.hi, 21 +; CHECK-NEXT: %c128.lo.shr = lshr i64 %a128.lo, 43 +; CHECK-NEXT: %c128.lo = or i64 %c128.hi.shl, %c128.lo.shr +; CHECK-NEXT: %c128.hi = lshr i64 %a128.hi, 43 + %c128 = lshr i128 %a128, 43 + store i128 %c128, i128* %p128 + + %b192 = lshr i192 %a192, 100 + store i192 %b192, i192* %p192 + + ret void +} + +; Make sure that the following doesn't assert out: it generates intermediate +; `trunc` instructions which get progressively smaller and smaller as the +; instructions are cut down. The final bitcode doesn't contain a `trunc` +; instruction. +; +; CHECK-LABEL: @lshr_big +define void @lshr_big(i32* %a) { + %p536 = bitcast i32* %a to i536* + %loaded = load i536, i536* %p536, align 4 + %shifted = lshr i536 %loaded, 161 + store i536 %shifted, i536* %p536 + ret void +} + +; CHECK-LABEL: @ashrs +define void @ashrs(i32* %p) { + %p96 = bitcast i32* %p to i96* + %a96 = load i96, i96* %p96 + %p128 = bitcast i32* %p to i128* + %a128 = load i128, i128* %p128 + +; CHECK: %b96.hi.shr = ashr i32 %a96.hi, 3 +; CHECK-NEXT: %b96.lo.ext = sext i32 %b96.hi.shr to i64 +; CHECK-NEXT: %b96.hi = ashr i32 %a96.hi, 31 + %b96 = ashr i96 %a96, 67 + store i96 %b96, i96* %p96 + +; CHECK: %c96.hi.ext = sext i32 %a96.hi to i64 +; CHECK-NEXT: %c96.hi.shl = shl i64 %c96.hi.ext, 19 +; CHECK-NEXT: %c96.lo.shr = lshr i64 %a96.lo, 45 +; CHECK-NEXT: %c96.lo = or i64 %c96.hi.shl, %c96.lo.shr +; CHECK-NEXT: %c96.hi = ashr i32 %a96.hi, 31 + %c96 = ashr i96 %a96, 45 + store i96 %c96, i96* %p96 + +; CHECK: %b128.hi.shr = ashr i64 %a128.hi, 3 +; CHECK-NEXT: %b128.hi = ashr i64 %a128.hi, 63 +; CHECK: store i64 %b128.hi, i64* %p128.hi.gep + %b128 = ashr i128 %a128, 67 + store i128 %b128, i128* %p128 + +; CHECK: %d96.hi.ext = sext i32 %a96.hi to i64 +; CHECK-NEXT: %d96.hi.shl = shl i64 %d96.hi.ext, 47 +; CHECK-NEXT: %d96.lo.shr = lshr i64 %a96.lo, 17 +; CHECK-NEXT: %d96.lo = or i64 %d96.hi.shl, %d96.lo.shr +; CHECK-NEXT: %d96.hi = ashr i32 %a96.hi, 17 + %d96 = ashr i96 %a96, 17 + store i96 %d96, i96* %p96 + +; CHECK: %c128.hi.shl = shl i64 %a128.hi, 21 +; CHECK-NEXT: %c128.lo.shr = lshr i64 %a128.lo, 43 +; CHECK-NEXT: %c128.lo = or i64 %c128.hi.shl, %c128.lo.shr +; CHECK-NEXT: %c128.hi = ashr i64 %a128.hi, 43 + %c128 = ashr i128 %a128, 43 + store i128 %c128, i128* %p128 + + ret void +} + +; CHECK-LABEL: @adds +define void @adds(i32 *%dest, i32* %lhs, i32* %rhs) { + %d = bitcast i32* %dest to i96* + %lp = bitcast i32* %lhs to i96* + %lv = load i96, i96* %lp + %rp = bitcast i32* %rhs to i96* + %rv = load i96, i96* %rp + +; CHECK: %result.lo = add i64 %lv.lo, %rv.lo +; CHECK-NEXT: %result.cmp = icmp ult i64 %lv.lo, %rv.lo +; CHECK-NEXT: %result.limit = select i1 %result.cmp, i64 %rv.lo, i64 %lv.lo +; CHECK-NEXT: %result.overflowed = icmp ult i64 %result.lo, %result.limit +; CHECK-NEXT: %result.carry = zext i1 %result.overflowed to i32 +; CHECK-NEXT: %result.hi = add i32 %lv.hi, %rv.hi +; CHECK-NEXT: %result.carried = add i32 %result.hi, %result.carry + %result = add i96 %lv, %rv + store i96 %result, i96* %d + ret void +} + +; CHECK-LABEL: @subs +define void @subs(i32 *%dest, i32* %lhs, i32* %rhs) { + %d = bitcast i32* %dest to i96* + %lp = bitcast i32* %lhs to i96* + %lv = load i96, i96* %lp + %rp = bitcast i32* %rhs to i96* + %rv = load i96, i96* %rp + +; CHECK: %result.borrow = icmp ult i64 %lv.lo, %rv.lo +; CHECK-NEXT: %result.borrowing = sext i1 %result.borrow to i32 +; CHECK-NEXT: %result.lo = sub i64 %lv.lo, %rv.lo +; CHECK-NEXT: %result.hi = sub i32 %lv.hi, %rv.hi +; CHECK-NEXT: %result.borrowed = add i32 %result.hi, %result.borrowing + %result = sub i96 %lv, %rv + store i96 %result, i96* %d + ret void +} + +; CHECK-LABEL: @icmp_equality +define void @icmp_equality(i32* %p) { + %p96 = bitcast i32* %p to i96* + %a96 = load i96, i96* %p96 + %b96 = load i96, i96* %p96 + +; CHECK: %eq.lo = icmp eq i64 %a96.lo, %b96.lo +; CHECK-NEXT: %eq.hi = icmp eq i32 %a96.hi, %b96.hi +; CHECK-NEXT: %eq = and i1 %eq.lo, %eq.hi + %eq = icmp eq i96 %a96, %b96 + +; CHECK: %ne.lo = icmp ne i64 %a96.lo, %b96.lo +; CHECK-NEXT: %ne.hi = icmp ne i32 %a96.hi, %b96.hi +; CHECK-NEXT: %ne = and i1 %ne.lo, %ne.hi + %ne = icmp ne i96 %a96, %b96 + ret void +} + +; CHECK-LABEL: @icmp_uge +define void @icmp_uge(i32* %p) { + %p96 = bitcast i32* %p to i96* + %lv = load i96, i96* %p96 + %rv = load i96, i96* %p96 +; Do an add. +; CHECK: %uge.lo = add i64 %lv.lo, %rv.lo +; CHECK-NEXT: %uge.cmp = icmp ult i64 %lv.lo, %rv.lo +; CHECK-NEXT: %uge.limit = select i1 %uge.cmp, i64 %rv.lo, i64 %lv.lo +; CHECK-NEXT: %uge.overflowed = icmp ult i64 %uge.lo, %uge.limit +; CHECK-NEXT: %uge.carry = zext i1 %uge.overflowed to i32 +; CHECK-NEXT: %uge.hi = add i32 %lv.hi, %rv.hi +; CHECK-NEXT: %uge.carried = add i32 %uge.hi, %uge.carry +; Do the hi carry. +; CHECK-NEXT: %uge.cmp4 = icmp ult i32 %lv.hi, %rv.hi +; CHECK-NEXT: %uge.limit5 = select i1 %uge.cmp4, i32 %rv.hi, i32 %lv.hi +; CHECK-NEXT: %uge = icmp ult i32 %uge.carried, %uge.limit5 + %uge = icmp uge i96 %lv, %rv + ret void +} + +; CHECK-LABEL: @icmp_ule +define void @icmp_ule(i32* %p) { + %p96 = bitcast i32* %p to i96* + %lv = load i96, i96* %p96 + %rv = load i96, i96* %p96 +; Do an add. +; CHECK: %ule.lo = add i64 %lv.lo, %rv.lo +; CHECK-NEXT: %ule.cmp = icmp ult i64 %lv.lo, %rv.lo +; CHECK-NEXT: %ule.limit = select i1 %ule.cmp, i64 %rv.lo, i64 %lv.lo +; CHECK-NEXT: %ule.overflowed = icmp ult i64 %ule.lo, %ule.limit +; CHECK-NEXT: %ule.carry = zext i1 %ule.overflowed to i32 +; CHECK-NEXT: %ule.hi = add i32 %lv.hi, %rv.hi +; CHECK-NEXT: %ule.carried = add i32 %ule.hi, %ule.carry +; Do the hi carry. +; CHECK-NEXT: %ule.cmp4 = icmp ult i32 %lv.hi, %rv.hi +; CHECK-NEXT: %ule.limit5 = select i1 %ule.cmp4, i32 %rv.hi, i32 %lv.hi +; CHECK-NEXT: %ule.overflowed6 = icmp ult i32 %ule.carried, %ule.limit5 +; Invert the carry result. +; CHECK-NEXT: %ule = xor i1 %ule.overflowed6, true + %ule = icmp ule i96 %lv, %rv + ret void +} + +; CHECK-LABEL: @icmp_ugt +define void @icmp_ugt(i32* %p) { + %p96 = bitcast i32* %p to i96* + %lv = load i96, i96* %p96 + %rv = load i96, i96* %p96 +; Do an add. +; CHECK: %ugt.lo = add i64 %lv.lo, %rv.lo +; CHECK-NEXT: %ugt.cmp = icmp ult i64 %lv.lo, %rv.lo +; CHECK-NEXT: %ugt.limit = select i1 %ugt.cmp, i64 %rv.lo, i64 %lv.lo +; CHECK-NEXT: %ugt.overflowed = icmp ult i64 %ugt.lo, %ugt.limit +; CHECK-NEXT: %ugt.carry = zext i1 %ugt.overflowed to i32 +; CHECK-NEXT: %ugt.hi = add i32 %lv.hi, %rv.hi +; CHECK-NEXT: %ugt.carried = add i32 %ugt.hi, %ugt.carry +; Do the hi carry. +; CHECK-NEXT: %ugt.cmp4 = icmp ult i32 %lv.hi, %rv.hi +; CHECK-NEXT: %ugt.limit5 = select i1 %ugt.cmp4, i32 %rv.hi, i32 %lv.hi +; CHECK-NEXT: %ugt.overflowed6 = icmp ult i32 %ugt.carried, %ugt.limit5 +; Equality comparison. +; CHECK-NEXT: %ugt.lo7 = icmp eq i64 %lv.lo, %rv.lo +; CHECK-NEXT: %ugt.hi8 = icmp eq i32 %lv.hi, %rv.hi +; CHECK-NEXT: %ugt.eq = and i1 %ugt.lo7, %ugt.hi8 +; Merge the hi carry and equality comparison results. +; CHECK-NEXT: %ugt = and i1 %ugt.overflowed6, %ugt.eq + %ugt = icmp ugt i96 %lv, %rv + ret void +} + +; CHECK-LABEL: @icmp_ult +define void @icmp_ult(i32* %p) { + %p96 = bitcast i32* %p to i96* + %lv = load i96, i96* %p96 + %rv = load i96, i96* %p96 +; Do an add. +; CHECK: %ult.lo = add i64 %lv.lo, %rv.lo +; CHECK-NEXT: %ult.cmp = icmp ult i64 %lv.lo, %rv.lo +; CHECK-NEXT: %ult.limit = select i1 %ult.cmp, i64 %rv.lo, i64 %lv.lo +; CHECK-NEXT: %ult.overflowed = icmp ult i64 %ult.lo, %ult.limit +; CHECK-NEXT: %ult.carry = zext i1 %ult.overflowed to i32 +; CHECK-NEXT: %ult.hi = add i32 %lv.hi, %rv.hi +; CHECK-NEXT: %ult.carried = add i32 %ult.hi, %ult.carry +; Do the hi carry. +; CHECK-NEXT: %ult.cmp4 = icmp ult i32 %lv.hi, %rv.hi +; CHECK-NEXT: %ult.limit5 = select i1 %ult.cmp4, i32 %rv.hi, i32 %lv.hi +; CHECK-NEXT: %ult.overflowed6 = icmp ult i32 %ult.carried, %ult.limit5 +; Invert the carry result. +; CHECK-NEXT: %ult7 = xor i1 %ult.overflowed6, true +; Equality comparison. +; CHECK-NEXT: %ult.lo8 = icmp eq i64 %lv.lo, %rv.lo +; CHECK-NEXT: %ult.hi9 = icmp eq i32 %lv.hi, %rv.hi +; CHECK-NEXT: %ult.eq = and i1 %ult.lo8, %ult.hi9 +; Merge the hi carry and equality comparison results. +; CHECK-NEXT: %ult = and i1 %ult7, %ult.eq + %ult = icmp ult i96 %lv, %rv + ret void +} + +; CHECK-LABEL: @selects +define void @selects(i1 %c, i32* %pl, i32* %pr) { + %pl96 = bitcast i32* %pl to i96* + %pr96 = bitcast i32* %pr to i96* + %l = load i96, i96* %pl96 + %r = load i96, i96* %pr96 + +; CHECK: %result.lo = select i1 %c, i64 %l.lo, i64 %r.lo +; CHECK-NEXT: %result.hi = select i1 %c, i32 %l.hi, i32 %r.hi + %result = select i1 %c, i96 %l, i96 %r + ret void +} + +; CHECK-LABEL: @phis1 +define void @phis1() { +entry: + br label %label1 +label1: + br i1 undef, label %label2, label %end +label2: + br label %end +end: +; CHECK: %foo.lo = phi i64 [ undef, %label1 ], [ undef, %label2 ] +; CHECK-NEXT: %foo.hi = phi i8 [ undef, %label1 ], [ undef, %label2 ] +; CHECK-NEXT: %bar.lo = and i64 %foo.lo, 137438953472 +; CHECK-NEXT: %bar.hi = and i8 %foo.hi, 0 + %foo = phi i72 [ undef, %label1 ], [ undef, %label2 ] + %bar = and i72 %foo, 137438953472 + br i1 undef, label %label1, label %label2 +} + +; CHECK-LABEL: @phis2 +define void @phis2() { +entry: + br label %label1 +label1: +; CHECK: %foo.lo = phi i64 [ %bar.lo, %label2 ], [ undef, %entry ] +; CHECK-NEXT: %foo.hi = phi i8 [ %bar.hi, %label2 ], [ undef, %entry ] + %foo = phi i72 [ %bar, %label2 ], [ undef, %entry ] + br i1 undef, label %label2, label %end +label2: +; CHECK: %bar.lo = load i64, i64* undef, align 4 +; CHECK-NEXT: %bar.hi = load i8, i8* undef, align 4 + %bar = load i72, i72* undef, align 4 + br label %label1 +end: + ret void +} diff --git a/test/Transforms/NaCl/expand-shuffle-vector.ll b/test/Transforms/NaCl/expand-shuffle-vector.ll new file mode 100644 index 000000000000..3c274979abd9 --- /dev/null +++ b/test/Transforms/NaCl/expand-shuffle-vector.ll @@ -0,0 +1,138 @@ +; RUN: opt -expand-shufflevector %s -S | FileCheck %s + +; Test that shufflevector is expanded to insertelement / extractelement. + +define <4 x i32> @test_splat_lo_4xi32(<4 x i32> %lhs, <4 x i32> %rhs) { + ; CHECK-LABEL: test_splat_lo_4xi32 + ; CHECK-NEXT: %1 = extractelement <4 x i32> %lhs, i32 0 + ; CHECK-NEXT: %2 = extractelement <4 x i32> %lhs, i32 0 + ; CHECK-NEXT: %3 = extractelement <4 x i32> %lhs, i32 0 + ; CHECK-NEXT: %4 = extractelement <4 x i32> %lhs, i32 0 + ; CHECK-NEXT: %5 = insertelement <4 x i32> undef, i32 %1, i32 0 + ; CHECK-NEXT: %6 = insertelement <4 x i32> %5, i32 %2, i32 1 + ; CHECK-NEXT: %7 = insertelement <4 x i32> %6, i32 %3, i32 2 + ; CHECK-NEXT: %8 = insertelement <4 x i32> %7, i32 %4, i32 3 + %res = shufflevector <4 x i32> %lhs, <4 x i32> %rhs, <4 x i32> + ; CHECK-NEXT: ret <4 x i32> %8 + ret <4 x i32> %res +} + +define <4 x i32> @test_splat_hi_4xi32(<4 x i32> %lhs, <4 x i32> %rhs) { + ; CHECK-LABEL: test_splat_hi_4xi32 + ; CHECK-NEXT: %1 = extractelement <4 x i32> %rhs, i32 0 + ; CHECK-NEXT: %2 = extractelement <4 x i32> %rhs, i32 0 + ; CHECK-NEXT: %3 = extractelement <4 x i32> %rhs, i32 0 + ; CHECK-NEXT: %4 = extractelement <4 x i32> %rhs, i32 0 + ; CHECK-NEXT: %5 = insertelement <4 x i32> undef, i32 %1, i32 0 + ; CHECK-NEXT: %6 = insertelement <4 x i32> %5, i32 %2, i32 1 + ; CHECK-NEXT: %7 = insertelement <4 x i32> %6, i32 %3, i32 2 + ; CHECK-NEXT: %8 = insertelement <4 x i32> %7, i32 %4, i32 3 + %res = shufflevector <4 x i32> %lhs, <4 x i32> %rhs, <4 x i32> + ; CHECK-NEXT: ret <4 x i32> %8 + ret <4 x i32> %res +} + +define <4 x i32> @test_id_lo_4xi32(<4 x i32> %lhs, <4 x i32> %rhs) { + ; CHECK-LABEL: test_id_lo_4xi32 + ; CHECK-NEXT: %1 = extractelement <4 x i32> %lhs, i32 0 + ; CHECK-NEXT: %2 = extractelement <4 x i32> %lhs, i32 1 + ; CHECK-NEXT: %3 = extractelement <4 x i32> %lhs, i32 2 + ; CHECK-NEXT: %4 = extractelement <4 x i32> %lhs, i32 3 + ; CHECK-NEXT: %5 = insertelement <4 x i32> undef, i32 %1, i32 0 + ; CHECK-NEXT: %6 = insertelement <4 x i32> %5, i32 %2, i32 1 + ; CHECK-NEXT: %7 = insertelement <4 x i32> %6, i32 %3, i32 2 + ; CHECK-NEXT: %8 = insertelement <4 x i32> %7, i32 %4, i32 3 + %res = shufflevector <4 x i32> %lhs, <4 x i32> %rhs, <4 x i32> + ; CHECK-NEXT: ret <4 x i32> %8 + ret <4 x i32> %res +} + +define <4 x i32> @test_id_hi_4xi32(<4 x i32> %lhs, <4 x i32> %rhs) { + ; CHECK-LABEL: test_id_hi_4xi32 + ; CHECK-NEXT: %1 = extractelement <4 x i32> %rhs, i32 0 + ; CHECK-NEXT: %2 = extractelement <4 x i32> %rhs, i32 1 + ; CHECK-NEXT: %3 = extractelement <4 x i32> %rhs, i32 2 + ; CHECK-NEXT: %4 = extractelement <4 x i32> %rhs, i32 3 + ; CHECK-NEXT: %5 = insertelement <4 x i32> undef, i32 %1, i32 0 + ; CHECK-NEXT: %6 = insertelement <4 x i32> %5, i32 %2, i32 1 + ; CHECK-NEXT: %7 = insertelement <4 x i32> %6, i32 %3, i32 2 + ; CHECK-NEXT: %8 = insertelement <4 x i32> %7, i32 %4, i32 3 + %res = shufflevector <4 x i32> %lhs, <4 x i32> %rhs, <4 x i32> + ; CHECK-NEXT: ret <4 x i32> %8 + ret <4 x i32> %res +} + +define <4 x i32> @test_interleave_lo_4xi32(<4 x i32> %lhs, <4 x i32> %rhs) { + ; CHECK-LABEL: test_interleave_lo_4xi32 + ; CHECK-NEXT: %1 = extractelement <4 x i32> %lhs, i32 0 + ; CHECK-NEXT: %2 = extractelement <4 x i32> %rhs, i32 0 + ; CHECK-NEXT: %3 = extractelement <4 x i32> %lhs, i32 1 + ; CHECK-NEXT: %4 = extractelement <4 x i32> %rhs, i32 1 + ; CHECK-NEXT: %5 = insertelement <4 x i32> undef, i32 %1, i32 0 + ; CHECK-NEXT: %6 = insertelement <4 x i32> %5, i32 %2, i32 1 + ; CHECK-NEXT: %7 = insertelement <4 x i32> %6, i32 %3, i32 2 + ; CHECK-NEXT: %8 = insertelement <4 x i32> %7, i32 %4, i32 3 + %res = shufflevector <4 x i32> %lhs, <4 x i32> %rhs, <4 x i32> + ; CHECK-NEXT: ret <4 x i32> %8 + ret <4 x i32> %res +} + +define <4 x i32> @test_interleave_hi_4xi32(<4 x i32> %lhs, <4 x i32> %rhs) { + ; CHECK-LABEL: test_interleave_hi_4xi32 + ; CHECK-NEXT: %1 = extractelement <4 x i32> %lhs, i32 1 + ; CHECK-NEXT: %2 = extractelement <4 x i32> %rhs, i32 1 + ; CHECK-NEXT: %3 = extractelement <4 x i32> %lhs, i32 3 + ; CHECK-NEXT: %4 = extractelement <4 x i32> %rhs, i32 3 + ; CHECK-NEXT: %5 = insertelement <4 x i32> undef, i32 %1, i32 0 + ; CHECK-NEXT: %6 = insertelement <4 x i32> %5, i32 %2, i32 1 + ; CHECK-NEXT: %7 = insertelement <4 x i32> %6, i32 %3, i32 2 + ; CHECK-NEXT: %8 = insertelement <4 x i32> %7, i32 %4, i32 3 + %res = shufflevector <4 x i32> %lhs, <4 x i32> %rhs, <4 x i32> + ; CHECK-NEXT: ret <4 x i32> %8 + ret <4 x i32> %res +} + +define <4 x i32> @test_undef_4xi32(<4 x i32> %lhs, <4 x i32> %rhs) { + ; CHECK-LABEL: test_undef_4xi32 + ; CHECK-NEXT: %1 = insertelement <4 x i32> undef, i32 undef, i32 0 + ; CHECK-NEXT: %2 = insertelement <4 x i32> %1, i32 undef, i32 1 + ; CHECK-NEXT: %3 = insertelement <4 x i32> %2, i32 undef, i32 2 + ; CHECK-NEXT: %4 = insertelement <4 x i32> %3, i32 undef, i32 3 + %res = shufflevector <4 x i32> %lhs, <4 x i32> %rhs, <4 x i32> undef + ; CHECK-NEXT: ret <4 x i32> %4 + ret <4 x i32> %res +} + +define <2 x i32> @test_narrow_4xi32(<4 x i32> %lhs, <4 x i32> %rhs) { + ; CHECK-LABEL: test_narrow_4xi32 + ; CHECK-NEXT: %1 = extractelement <4 x i32> %lhs, i32 0 + ; CHECK-NEXT: %2 = extractelement <4 x i32> %rhs, i32 0 + ; CHECK-NEXT: %3 = insertelement <2 x i32> undef, i32 %1, i32 0 + ; CHECK-NEXT: %4 = insertelement <2 x i32> %3, i32 %2, i32 1 + %res = shufflevector <4 x i32> %lhs, <4 x i32> %rhs, <2 x i32> + ; CHECK-NEXT: ret <2 x i32> %4 + ret <2 x i32> %res +} + +define <8 x i32> @test_widen_4xi32(<4 x i32> %lhs, <4 x i32> %rhs) { + ; CHECK-LABEL: test_widen_4xi32 + ; CHECK-NEXT: %1 = extractelement <4 x i32> %rhs, i32 3 + ; CHECK-NEXT: %2 = extractelement <4 x i32> %rhs, i32 2 + ; CHECK-NEXT: %3 = extractelement <4 x i32> %rhs, i32 1 + ; CHECK-NEXT: %4 = extractelement <4 x i32> %rhs, i32 0 + ; CHECK-NEXT: %5 = extractelement <4 x i32> %lhs, i32 3 + ; CHECK-NEXT: %6 = extractelement <4 x i32> %lhs, i32 2 + ; CHECK-NEXT: %7 = extractelement <4 x i32> %lhs, i32 1 + ; CHECK-NEXT: %8 = extractelement <4 x i32> %lhs, i32 0 + ; CHECK-NEXT: %9 = insertelement <8 x i32> undef, i32 %1, i32 0 + ; CHECK-NEXT: %10 = insertelement <8 x i32> %9, i32 %2, i32 1 + ; CHECK-NEXT: %11 = insertelement <8 x i32> %10, i32 %3, i32 2 + ; CHECK-NEXT: %12 = insertelement <8 x i32> %11, i32 %4, i32 3 + ; CHECK-NEXT: %13 = insertelement <8 x i32> %12, i32 %5, i32 4 + ; CHECK-NEXT: %14 = insertelement <8 x i32> %13, i32 %6, i32 5 + ; CHECK-NEXT: %15 = insertelement <8 x i32> %14, i32 %7, i32 6 + ; CHECK-NEXT: %16 = insertelement <8 x i32> %15, i32 %8, i32 7 + %res = shufflevector <4 x i32> %lhs, <4 x i32> %rhs, <8 x i32> + ; CHECK-NEXT: ret <8 x i32> %16 + ret <8 x i32> %res +} diff --git a/test/Transforms/NaCl/expand-small-arguments.ll b/test/Transforms/NaCl/expand-small-arguments.ll new file mode 100644 index 000000000000..e9d4f05b1c37 --- /dev/null +++ b/test/Transforms/NaCl/expand-small-arguments.ll @@ -0,0 +1,216 @@ +; RUN: opt %s -expand-small-arguments -S | FileCheck %s + +@var = global i8 0 + + +define void @small_arg(i8 %val) { + store i8 %val, i8* @var + ret void +} +; CHECK: define void @small_arg(i32 %val) { +; CHECK-NEXT: %val.arg_trunc = trunc i32 %val to i8 +; CHECK-NEXT: store i8 %val.arg_trunc, i8* @var + + +define i8 @small_result() { + %val = load i8, i8* @var + ret i8 %val +} +; CHECK: define i32 @small_result() { +; CHECK-NEXT: %val = load i8, i8* @var +; CHECK-NEXT: %val.ret_ext = zext i8 %val to i32 +; CHECK-NEXT: ret i32 %val.ret_ext + +define signext i8 @small_result_signext() { + %val = load i8, i8* @var + ret i8 %val +} +; CHECK: define signext i32 @small_result_signext() { +; CHECK-NEXT: %val = load i8, i8* @var +; CHECK-NEXT: %val.ret_ext = sext i8 %val to i32 +; CHECK-NEXT: ret i32 %val.ret_ext + + +define void @call_small_arg() { + call void @small_arg(i8 100) + ret void +} +; CHECK: define void @call_small_arg() { +; CHECK-NEXT: %arg_ext = zext i8 100 to i32 +; CHECK-NEXT: %.arg_cast = bitcast {{.*}} @small_arg +; CHECK-NEXT: call void %.arg_cast(i32 %arg_ext) + +define void @call_small_arg_signext() { + call void @small_arg(i8 signext 100) + ret void +} +; CHECK: define void @call_small_arg_signext() { +; CHECK-NEXT: %arg_ext = sext i8 100 to i32 +; CHECK-NEXT: %.arg_cast = bitcast {{.*}} @small_arg +; CHECK-NEXT: call void %.arg_cast(i32 signext %arg_ext) + + +define void @call_small_result() { + %r = call i8 @small_result() + store i8 %r, i8* @var + ret void +} +; CHECK: define void @call_small_result() { +; CHECK-NEXT: %r.arg_cast = bitcast {{.*}} @small_result +; CHECK-NEXT: %r = call i32 %r.arg_cast() +; CHECK-NEXT: %r.ret_trunc = trunc i32 %r to i8 +; CHECK-NEXT: store i8 %r.ret_trunc, i8* @var + + +; Check that various attributes are preserved. +define i1 @attributes(i8 %arg) nounwind { + %r = tail call fastcc i1 @attributes(i8 %arg) nounwind + ret i1 %r +} +; CHECK: define i32 @attributes(i32 %arg) [[NOUNWIND:#[0-9]+]] { +; CHECK: tail call fastcc i32 {{.*}} [[NOUNWIND]] + + +; These arguments and results should be left alone. +define i64 @larger_arguments(i32 %a, i64 %b, i8* %ptr, double %d) { + %r = call i64 @larger_arguments(i32 %a, i64 %b, i8* %ptr, double %d) + ret i64 %r +} +; CHECK: define i64 @larger_arguments(i32 %a, i64 %b, i8* %ptr, double %d) { +; CHECK-NEXT: %r = call i64 @larger_arguments(i32 %a, i64 %b, i8* %ptr, double %d) +; CHECK-NEXT: ret i64 %r + + +; Intrinsics must be left alone since the pass cannot change their types. + +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) +; CHECK: declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) + +define void @intrinsic_call(i8* %ptr) { + call void @llvm.memset.p0i8.i32(i8* %ptr, i8 99, i32 256, i32 1, i1 0) + ret void +} +; CHECK: define void @intrinsic_call +; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* %ptr, i8 99, + +define void @invoking_small_arg(i8) { + invoke void @small_arg(i8 %0) + to label %cont unwind label %lpad +cont: + ret void +lpad: + %lp = landingpad { i8*, i32 } personality i8* null cleanup + ret void +} +; CHECK-LABEL: define void @invoking_small_arg(i32) +; CHECK-NEXT: %.arg_trunc = trunc i32 %0 to i8 +; CHECK-NEXT: %arg_ext = zext i8 %.arg_trunc to i32 +; CHECK-NEXT: %.arg_cast = bitcast void (i8)* bitcast (void (i32)* @small_arg to void (i8)*) to void (i32)* +; CHECK-NEXT: invoke void %.arg_cast(i32 %arg_ext) +; CHECK-NEXT: to label %cont unwind label %lpad + +; CHECK: cont: +; CHECK-NEXT: ret void + +; CHECK: lpad: +; CHECK-NEXT: %lp = landingpad { i8*, i32 } personality i8* null +; CHECK-NEXT: cleanup +; CHECK-NEXT: ret void + +define fastcc void @invoking_cc() { + invoke fastcc void @invoking_cc() + to label %cont unwind label %lpad +cont: + ret void +lpad: + %lp = landingpad { i8*, i32 } personality i8* null cleanup + ret void +} +; CHECK-LABEL: define fastcc void @invoking_cc() +; CHECK-NEXT: invoke fastcc void @invoking_cc() + +define void @invoking_attrs() noinline { + invoke void @invoking_attrs() noinline + to label %cont unwind label %lpad +cont: + ret void +lpad: + %lp = landingpad { i8*, i32 } personality i8* null cleanup + ret void +} +; CHECK: define void @invoking_attrs() [[NOINLINE:#[0-9]+]] +; CHECK: invoke void @invoking_attrs() [[NOINLINE]] + +define void @invoking_critical_edge() { +entry: + %a = invoke i8 @small_result() + to label %loop unwind label %lpad +loop: + %b = phi i8 [ %a, %entry ], [ %c, %loop ] + %c = add i8 1, %b + %d = icmp eq i8 %c, 5 + br i1 %d, label %exit, label %loop + +exit: + %aa = phi i8 [ 0, %lpad ], [ %c, %loop ] + ret void + +lpad: + %lp = landingpad { i8*, i32 } personality i8* null cleanup + br label %exit +} +; CHECK-LABEL: define void @invoking_critical_edge() +; CHECK: entry: +; CHECK-NEXT: %a.arg_cast = bitcast i8 ()* bitcast (i32 ()* @small_result to i8 ()*) to i32 ()* +; CHECK-NEXT: %a = invoke i32 %a.arg_cast() +; CHECK-NEXT: to label %entry.loop_crit_edge unwind label %lpad + +; CHECK: entry.loop_crit_edge: +; CHECK-NEXT: %a.ret_trunc = trunc i32 %a to i8 +; CHECK-NEXT: br label %loop + +; CHECK: loop: +; CHECK-NEXT: %b = phi i8 [ %a.ret_trunc, %entry.loop_crit_edge ], [ %c, %loop ] +; CHECK-NEXT: %c = add i8 1, %b +; CHECK-NEXT: %d = icmp eq i8 %c, 5 +; CHECK-NEXT: br i1 %d, label %exit, label %loop + +; CHECK: exit: +; CHECK-NEXT: %aa = phi i8 [ 0, %lpad ], [ %c, %loop ] +; CHECK-NEXT: ret void + +; CHECK: lpad: +; CHECK-NEXT: %lp = landingpad { i8*, i32 } personality i8* null +; CHECK-NEXT: cleanup +; CHECK-NEXT: br label %exit + +define i8 @invoking_small_result() { +entry: + %a = invoke i8 @small_result() + to label %cont unwind label %lpad +cont: + ret i8 %a +lpad: + %lp = landingpad { i8*, i32 } personality i8* null cleanup + ret i8 123 +} +; CHECK-LABEL: define i32 @invoking_small_result() +; CHECK: entry: +; CHECK-NEXT: %a.arg_cast = bitcast i8 ()* bitcast (i32 ()* @small_result to i8 ()*) to i32 ()* +; CHECK-NEXT: %a = invoke i32 %a.arg_cast() +; CHECK-NEXT: to label %cont unwind label %lpad + +; CHECK: cont: +; CHECK-NEXT: %a.ret_trunc = trunc i32 %a to i8 +; CHECK-NEXT: %a.ret_trunc.ret_ext = zext i8 %a.ret_trunc to i32 +; CHECK-NEXT: ret i32 %a.ret_trunc.ret_ext + +; CHECK: lpad: +; CHECK-NEXT: %lp = landingpad { i8*, i32 } personality i8* null +; CHECK-NEXT: cleanup +; CHECK-NEXT: %.ret_ext = zext i8 123 to i32 +; CHECK-NEXT: ret i32 %.ret_ext + + +; CHECK: attributes [[NOUNWIND]] = { nounwind } +; CHECK: attributes [[NOINLINE]] = { noinline } diff --git a/test/Transforms/NaCl/expand-struct-regs.ll b/test/Transforms/NaCl/expand-struct-regs.ll new file mode 100644 index 000000000000..ae8e263cb134 --- /dev/null +++ b/test/Transforms/NaCl/expand-struct-regs.ll @@ -0,0 +1,228 @@ +; RUN: opt %s -expand-struct-regs -S | FileCheck %s +; RUN: opt %s -expand-struct-regs -S | FileCheck %s -check-prefix=CLEANUP + +; These two instructions should not appear in the output: +; CLEANUP-NOT: extractvalue +; CLEANUP-NOT: insertvalue + +target datalayout = "p:32:32:32" + +%struct = type { i8, i32 } + + +define void @struct_load(%struct* %p, i8* %out0, i32* %out1) { + %val = load %struct, %struct* %p + %field0 = extractvalue %struct %val, 0 + %field1 = extractvalue %struct %val, 1 + store i8 %field0, i8* %out0 + store i32 %field1, i32* %out1 + ret void +} +; CHECK: define void @struct_load +; CHECK-NEXT: %val.index{{.*}} = getelementptr %struct, %struct* %p, i32 0, i32 0 +; CHECK-NEXT: %val.field{{.*}} = load i8, i8* %val.index{{.*}} +; CHECK-NEXT: %val.index{{.*}} = getelementptr %struct, %struct* %p, i32 0, i32 1 +; CHECK-NEXT: %val.field{{.*}} = load i32, i32* %val.index{{.*}} +; CHECK-NEXT: store i8 %val.field{{.*}}, i8* %out0 +; CHECK-NEXT: store i32 %val.field{{.*}}, i32* %out1 + + +define void @struct_store(%struct* %in_ptr, %struct* %out_ptr) { + %val = load %struct, %struct* %in_ptr + store %struct %val, %struct* %out_ptr + ret void +} +; CHECK: define void @struct_store +; CHECK-NEXT: %val.index{{.*}} = getelementptr %struct, %struct* %in_ptr, i32 0, i32 0 +; CHECK-NEXT: %val.field{{.*}} = load i8, i8* %val.index{{.*}} +; CHECK-NEXT: %val.index{{.*}} = getelementptr %struct, %struct* %in_ptr, i32 0, i32 1 +; CHECK-NEXT: %val.field{{.*}} = load i32, i32* %val.index{{.*}} +; CHECK-NEXT: %out_ptr.index{{.*}} = getelementptr %struct, %struct* %out_ptr, i32 0, i32 0 +; CHECK-NEXT: store i8 %val.field{{.*}}, i8* %out_ptr.index{{.*}} +; CHECK-NEXT: %out_ptr.index{{.*}} = getelementptr %struct, %struct* %out_ptr, i32 0, i32 1 +; CHECK-NEXT: store i32 %val.field{{.*}}, i32* %out_ptr.index{{.*}} + + +; Ensure that the pass works correctly across basic blocks. +define void @across_basic_block(%struct* %in_ptr, %struct* %out_ptr) { + %val = load %struct, %struct* %in_ptr + br label %bb +bb: + store %struct %val, %struct* %out_ptr + ret void +} +; CHECK: define void @across_basic_block +; CHECK: load +; CHECK: load +; CHECK: bb: +; CHECK: store +; CHECK: store + + +define void @const_struct_store(%struct* %ptr) { + store %struct { i8 99, i32 1234 }, %struct* %ptr + ret void +} +; CHECK: define void @const_struct_store +; CHECK: store i8 99 +; CHECK: store i32 1234 + + +define void @struct_phi_node(%struct* %ptr) { +entry: + %val = load %struct, %struct* %ptr + br label %bb +bb: + %phi = phi %struct [ %val, %entry ] + ret void +} +; CHECK: bb: +; CHECK-NEXT: %phi.index{{.*}} = phi i8 [ %val.field{{.*}}, %entry ] +; CHECK-NEXT: %phi.index{{.*}} = phi i32 [ %val.field{{.*}}, %entry ] + + +define void @struct_phi_node_multiple_entry(i1 %arg, %struct* %ptr) { +entry: + %val = load %struct, %struct* %ptr + br i1 %arg, label %bb, label %bb +bb: + %phi = phi %struct [ %val, %entry ], [ %val, %entry ] + ret void +} +; CHECK: bb: +; CHECK-NEXT: %phi.index{{.*}} = phi i8 [ %val.field{{.*}}, %entry ], [ %val.field{{.*}}, %entry ] +; CHECK-NEXT: %phi.index{{.*}} = phi i32 [ %val.field{{.*}}, %entry ], [ %val.field{{.*}}, %entry ] + + +define void @struct_select_inst(i1 %cond, %struct* %ptr1, %struct* %ptr2) { + %val1 = load %struct, %struct* %ptr1 + %val2 = load %struct, %struct* %ptr2 + %select = select i1 %cond, %struct %val1, %struct %val2 + ret void +} +; CHECK: define void @struct_select_inst +; CHECK: %select.index{{.*}} = select i1 %cond, i8 %val1.field{{.*}}, i8 %val2.field{{.*}} +; CHECK-NEXT: %select.index{{.*}} = select i1 %cond, i32 %val1.field{{.*}}, i32 %val2.field{{.*}} + + +define void @insert_and_extract(i8* %out0, i32* %out1) { + %temp = insertvalue %struct undef, i8 100, 0 + %sval = insertvalue %struct %temp, i32 200, 1 + %field0 = extractvalue %struct %sval, 0 + %field1 = extractvalue %struct %sval, 1 + store i8 %field0, i8* %out0 + store i32 %field1, i32* %out1 + ret void +} +; CHECK: define void @insert_and_extract(i8* %out0, i32* %out1) { +; CHECK-NEXT: store i8 100, i8* %out0 +; CHECK-NEXT: store i32 200, i32* %out1 +; CHECK-NEXT: ret void + + +define i32 @extract_from_constant() { + %ev = extractvalue %struct { i8 99, i32 888 }, 1 + ret i32 %ev +} +; CHECK: define i32 @extract_from_constant() { +; CHECK-NEXT: ret i32 888 + +define void @nested_structs() { + %a1 = alloca i64 + %a2 = alloca i32 + %a3 = alloca { { i32, i64 } } + %a = insertvalue { i32, i64 } undef, i32 5, 0 + %b = insertvalue { i32, i64 } %a, i64 6, 1 + %c = insertvalue { { i32, i64 } } undef, { i32, i64 } %b, 0 + %d = insertvalue { { { i32, i64 } }, i64 } undef, { { i32, i64 } } %c, 0 + %e = insertvalue { { { i32, i64 } }, i64 } undef, { i32, i64 } %b, 0, 0 + + %f = extractvalue { { { i32, i64 } }, i64 } %d, 0, 0, 1 + %g = extractvalue { { { i32, i64 } }, i64 } %e, 0, 0, 0 + %h = extractvalue { { { i32, i64 } }, i64 } %e, 0 + store i64 %f, i64* %a1 + store i32 %g, i32* %a2 + store { { i32, i64 } } %h, { { i32, i64 } }* %a3 + ret void +} +; CHECK-LABEL: define void @nested_structs() +; CHECK-NEXT: %a1 = alloca i64 +; CHECK-NEXT: %a2 = alloca i32 +; CHECK-NEXT: %a3 = alloca { { i32, i64 } } +; CHECK-NEXT: store i64 6, i64* %a1 +; CHECK-NEXT: store i32 5, i32* %a2 +; CHECK-NEXT: %a3.index = getelementptr { { i32, i64 } }, { { i32, i64 } }* %a3, i32 0, i32 0 +; CHECK-NEXT: %a3.index.index = getelementptr { i32, i64 }, { i32, i64 }* %a3.index, i32 0, i32 0 +; CHECK-NEXT: store i32 5, i32* %a3.index.index +; CHECK-NEXT: %a3.index.index1 = getelementptr { i32, i64 }, { i32, i64 }* %a3.index, i32 0, i32 1 +; CHECK-NEXT: store i64 6, i64* %a3.index.index1 + +define void @load_another_pass() { + %a = alloca { { i8, i64 } } + %b = load { { i8, i64 } }, { { i8, i64 } }* %a + %c = load { { i8, i64 } }, { { i8, i64 } }* %a, align 16 + ret void +} +; CHECK-LABEL: define void @load_another_pass() +; CHECK: %b.field.field = load i8, i8* %b.field.index +; CHECK: %b.field.field{{.*}} = load i64, i64* %b.field.index{{.*}} +; CHECK: %c.field.field = load i8, i8* %c.field.index, align 16 +; CHECK: %c.field.field{{.*}} = load i64, i64* %c.field.index{{.*}}, align 4 + +define void @store_another_pass() { + %a = alloca { { i16, i64 } } + store { { i16, i64 } } undef, { { i16, i64 } }* %a + store { { i16, i64 } } undef, { { i16, i64 } }* %a, align 16 + ret void +} +; CHECK-LABEL: define void @store_another_pass() +; CHECK: store i16 undef, i16* %a.index.index +; CHECK: store i64 undef, i64* %a.index.index{{.*}} +; CHECK: store i16 undef, i16* %a.index1.index, align 16 +; CHECK: store i64 undef, i64* %a.index1.index{{.*}}, align 4 + +define void @select_another_pass() { + %a = load { { i8, i64 } }, { { i8, i64 } }* null + %b = load { { i8, i64 } }, { { i8, i64 } }* null + %c = select i1 undef, { { i8, i64 } } %a, { { i8, i64 } } %b + store { { i8, i64 } } %c, { { i8, i64 } }* null + ret void +} +; CHECK-LABEL: define void @select_another_pass() +; CHECK-NEXT: %a.index = getelementptr { { i8, i64 } }, { { i8, i64 } }* null, i32 0, i32 0 +; CHECK-NEXT: %a.field.index = getelementptr { i8, i64 }, { i8, i64 }* %a.index, i32 0, i32 0 +; CHECK-NEXT: %a.field.field = load i8, i8* %a.field.index +; CHECK-NEXT: %a.field.index2 = getelementptr { i8, i64 }, { i8, i64 }* %a.index, i32 0, i32 1 +; CHECK-NEXT: %a.field.field3 = load i64, i64* %a.field.index2 +; CHECK-NEXT: %b.index = getelementptr { { i8, i64 } }, { { i8, i64 } }* null, i32 0, i32 0 +; CHECK-NEXT: %b.field.index = getelementptr { i8, i64 }, { i8, i64 }* %b.index, i32 0, i32 0 +; CHECK-NEXT: %b.field.field = load i8, i8* %b.field.index +; CHECK-NEXT: %b.field.index5 = getelementptr { i8, i64 }, { i8, i64 }* %b.index, i32 0, i32 1 +; CHECK-NEXT: %b.field.field6 = load i64, i64* %b.field.index5 +; CHECK-NEXT: %c.index.index = select i1 undef, i8 %a.field.field, i8 %b.field.field +; CHECK-NEXT: %c.index.index11 = select i1 undef, i64 %a.field.field3, i64 %b.field.field6 +; CHECK-NEXT: %.index = getelementptr { { i8, i64 } }, { { i8, i64 } }* null, i32 0, i32 0 +; CHECK-NEXT: %.index.index = getelementptr { i8, i64 }, { i8, i64 }* %.index, i32 0, i32 0 +; CHECK-NEXT: store i8 %c.index.index, i8* %.index.index +; CHECK-NEXT: %.index.index13 = getelementptr { i8, i64 }, { i8, i64 }* %.index, i32 0, i32 1 +; CHECK-NEXT: store i64 %c.index.index11, i64* %.index.index13 +; CHECK-NEXT: ret void + +define void @phi_another_pass() { +entry: + br i1 false, label %next, label %not_next + +not_next: + %a = alloca { { i64, i16 }, i8* } + %b = load { { i64, i16 }, i8* }, { { i64, i16 }, i8* }* %a + br label %next + +next: + %c = phi { { i64, i16 }, i8* } [ undef, %entry ], [ %b, %not_next ] + store { { i64, i16 }, i8* } %c, { { i64, i16 }, i8* }* null + ret void +} +; CHECK-LABEL: define void @phi_another_pass() +; CHECK: %c.index.index = phi i64 [ undef, %entry ], [ %b.field.field, %not_next ] +; CHECK: %c.index.index{{.*}} = phi i16 [ undef, %entry ], [ %b.field.field{{.*}}, %not_next ] +; CHECK: %c.index{{.*}} = phi i8* [ undef, %entry ], [ %b.field{{.*}}, %not_next ] diff --git a/test/Transforms/NaCl/expand-tls-aligned.ll b/test/Transforms/NaCl/expand-tls-aligned.ll new file mode 100644 index 000000000000..75f03ba306ff --- /dev/null +++ b/test/Transforms/NaCl/expand-tls-aligned.ll @@ -0,0 +1,42 @@ +; RUN: opt < %s -nacl-expand-tls -S | FileCheck %s + +target datalayout = "p:32:32:32" + + +@var = global i32 123 + +; Put this first to check that the pass handles BSS variables last. +@bss_tvar_aligned = thread_local global i32 0, align 64 + +@tvar1 = thread_local global i16 234 +; Test a pointer to check we are getting the right pointer size. +@tvar2 = thread_local global i32* @var +@tvar_aligned = thread_local global i8 99, align 32 + + +; CHECK: %tls_init_template = type <{ i16, [2 x i8], i32*, [24 x i8], i8 }> +; CHECK: %tls_struct = type <{ %tls_init_template, %tls_bss_template }> + +; This struct type must be "packed" because the 31 byte padding here +; is followed by an i32. +; CHECK: %tls_bss_template = type <{ [31 x i8], i32, [60 x i8] }> + +; CHECK: @__tls_template_start = internal constant %tls_init_template <{ i16 234, [2 x i8] zeroinitializer, i32* @var, [24 x i8] zeroinitializer, i8 99 }> + +; CHECK: @__tls_template_alignment = internal constant i32 64 + + +; Create references to __tls_template_* to keep these live, otherwise +; the definition of %tls_struct (which we check for above) is removed +; from the output. + +@__tls_template_tdata_end = external global i8 +@__tls_template_end = external global i8 + +define i8* @get_tls_template_tdata_end() { + ret i8* @__tls_template_tdata_end +} + +define i8* @get_tls_template_end() { + ret i8* @__tls_template_end +} diff --git a/test/Transforms/NaCl/expand-tls-bss.ll b/test/Transforms/NaCl/expand-tls-bss.ll new file mode 100644 index 000000000000..82e7e41fef96 --- /dev/null +++ b/test/Transforms/NaCl/expand-tls-bss.ll @@ -0,0 +1,17 @@ +; RUN: opt < %s -nacl-expand-tls -S | FileCheck %s + + +@tvar_bss1 = thread_local global i64 0 +@tvar_bss2 = thread_local global i32 0 + + +; CHECK: %tls_struct = type <{ %tls_init_template, %tls_bss_template }> +; CHECK: %tls_bss_template = type <{ i64, i32, [4 x i8] }> + + +define i64* @get_tvar_bss1() { + ret i64* @tvar_bss1 +} +; CHECK: define i64* @get_tvar_bss1() +; CHECK: %field = getelementptr %tls_struct, %tls_struct* %tls_struct, i32 -1, i32 1, i32 0 +; CHECK: ret i64* %field diff --git a/test/Transforms/NaCl/expand-tls-constexpr-alias.ll b/test/Transforms/NaCl/expand-tls-constexpr-alias.ll new file mode 100644 index 000000000000..2b3d7546d877 --- /dev/null +++ b/test/Transforms/NaCl/expand-tls-constexpr-alias.ll @@ -0,0 +1,28 @@ +; RUN: opt < %s -nacl-expand-tls-constant-expr -S | FileCheck %s + +@real_tvar = thread_local global i32 123 +@tvar_alias = alias i32* @real_tvar +@tvar_alias2 = alias i32* getelementptr (i32, i32* @real_tvar, i32 100) + + +define i32* @get_tvar() { + ret i32* @tvar_alias +} +; CHECK: define i32* @get_tvar() +; CHECK: ret i32* @real_tvar + + +define i32* @get_tvar2() { + ret i32* @tvar_alias2 +} +; CHECK: define i32* @get_tvar2() +; CHECK: %expanded = getelementptr i32, i32* @real_tvar, i32 100 +; CHECK: ret i32* %expanded + + +define i32* @get_tvar3() { + ret i32* getelementptr (i32, i32* @tvar_alias2, i32 100) +} +; CHECK: define i32* @get_tvar3() +; CHECK: %expanded = getelementptr i32, i32* @real_tvar, i32 200 +; CHECK: ret i32* %expanded diff --git a/test/Transforms/NaCl/expand-tls-constexpr.ll b/test/Transforms/NaCl/expand-tls-constexpr.ll new file mode 100644 index 000000000000..fc441d354c3f --- /dev/null +++ b/test/Transforms/NaCl/expand-tls-constexpr.ll @@ -0,0 +1,152 @@ +; RUN: opt < %s -nacl-expand-tls-constant-expr -S | FileCheck %s + +@tvar = thread_local global i32 0 + + +define i32 @test_converting_ptrtoint() { + ret i32 ptrtoint (i32* @tvar to i32) +} +; CHECK: define i32 @test_converting_ptrtoint() +; CHECK: %expanded = ptrtoint i32* @tvar to i32 +; CHECK: ret i32 %expanded + + +define i32 @test_converting_add() { + ret i32 add (i32 ptrtoint (i32* @tvar to i32), i32 4) +} +; CHECK: define i32 @test_converting_add() +; CHECK: %expanded1 = ptrtoint i32* @tvar to i32 +; CHECK: %expanded = add i32 %expanded1, 4 +; CHECK: ret i32 %expanded + + +define i32 @test_converting_multiple_operands() { + ret i32 add (i32 ptrtoint (i32* @tvar to i32), + i32 ptrtoint (i32* @tvar to i32)) +} +; CHECK: define i32 @test_converting_multiple_operands() +; CHECK: %expanded1 = ptrtoint i32* @tvar to i32 +; CHECK: %expanded = add i32 %expanded1, %expanded1 +; CHECK: ret i32 %expanded + + +define i32 @test_allocating_new_var_name(i32 %expanded) { + %result = add i32 %expanded, ptrtoint (i32* @tvar to i32) + ret i32 %result +} +; CHECK: define i32 @test_allocating_new_var_name(i32 %expanded) +; CHECK: %expanded1 = ptrtoint i32* @tvar to i32 +; CHECK: %result = add i32 %expanded, %expanded1 +; CHECK: ret i32 %result + + +define i8* @test_converting_bitcast() { + ret i8* bitcast (i32* @tvar to i8*) +} +; CHECK: define i8* @test_converting_bitcast() +; CHECK: %expanded = bitcast i32* @tvar to i8* +; CHECK: ret i8* %expanded + + +define i32* @test_converting_getelementptr() { + ; Use an index >1 to ensure that "inbounds" is not added automatically. + ret i32* getelementptr (i32, i32* @tvar, i32 2) +} +; CHECK: define i32* @test_converting_getelementptr() +; CHECK: %expanded = getelementptr i32, i32* @tvar, i32 2 +; CHECK: ret i32* %expanded + + +; This is identical to @test_converting_getelementptr(). +; We need to check that both copies of getelementptr are fixed. +define i32* @test_converting_getelementptr_copy() { + ret i32* getelementptr (i32, i32* @tvar, i32 2) +} +; CHECK: define i32* @test_converting_getelementptr_copy() +; CHECK: %expanded = getelementptr i32, i32* @tvar, i32 2 +; CHECK: ret i32* %expanded + + +define i32* @test_converting_getelementptr_inbounds() { + ret i32* getelementptr inbounds (i32, i32* @tvar, i32 2) +} +; CHECK: define i32* @test_converting_getelementptr_inbounds() +; CHECK: %expanded = getelementptr inbounds i32, i32* @tvar, i32 2 +; CHECK: ret i32* %expanded + + +define i32* @test_converting_phi(i1 %cmp) { +entry: + br i1 %cmp, label %return, label %else + +else: + br label %return + +return: + %result = phi i32* [ getelementptr (i32, i32* @tvar, i32 1), %entry ], [ null, %else ] + ret i32* %result +} +; The converted ConstantExprs get pushed back into the PHI node's +; incoming block, which might be suboptimal but works in all cases. +; CHECK: define i32* @test_converting_phi(i1 %cmp) +; CHECK: entry: +; CHECK: %expanded = getelementptr inbounds i32, i32* @tvar, i32 1 +; CHECK: else: +; CHECK: return: +; CHECK: %result = phi i32* [ %expanded, %entry ], [ null, %else ] + + +@addr1 = global i8* blockaddress(@test_converting_phi_with_indirectbr, %return) +@addr2 = global i8* blockaddress(@test_converting_phi_with_indirectbr, %else) +define i32* @test_converting_phi_with_indirectbr(i8* %addr) { +entry: + indirectbr i8* %addr, [ label %return, label %else ] + +else: + br label %return + +return: + %result = phi i32* [ getelementptr (i32, i32* @tvar, i32 1), %entry ], [ null, %else ] + ret i32* %result +} +; CHECK: define i32* @test_converting_phi_with_indirectbr(i8* %addr) +; CHECK: entry: +; CHECK: %expanded = getelementptr inbounds i32, i32* @tvar, i32 1 +; CHECK: return: +; CHECK: %result = phi i32* [ %expanded, %entry ], [ null, %else ] + + +; This tests that ExpandTlsConstantExpr correctly handles a PHI node +; that contains the same ConstantExpr twice. Using +; replaceAllUsesWith() is not correct on a PHI node when the new +; instruction has to be added to an incoming block. +define i32 @test_converting_phi_twice(i1 %arg) { + br i1 %arg, label %iftrue, label %iffalse +iftrue: + br label %exit +iffalse: + br label %exit +exit: + %result = phi i32 [ ptrtoint (i32* @tvar to i32), %iftrue ], + [ ptrtoint (i32* @tvar to i32), %iffalse ] + ret i32 %result +} +; CHECK: define i32 @test_converting_phi_twice(i1 %arg) +; CHECK: iftrue: +; CHECK: %expanded{{.*}} = ptrtoint i32* @tvar to i32 +; CHECK: iffalse: +; CHECK: %expanded{{.*}} = ptrtoint i32* @tvar to i32 +; CHECK: exit: +; CHECK: %result = phi i32 [ %expanded1, %iftrue ], [ %expanded, %iffalse ] + + +define i32 @test_converting_phi_multiple_entry(i1 %arg) { +entry: + br i1 %arg, label %done, label %done +done: + %result = phi i32 [ ptrtoint (i32* @tvar to i32), %entry ], + [ ptrtoint (i32* @tvar to i32), %entry ] + ret i32 %result +} +; CHECK: define i32 @test_converting_phi_multiple_entry(i1 %arg) +; CHECK: %result = phi i32 [ %expanded, %entry ], [ %expanded, %entry ] diff --git a/test/Transforms/NaCl/expand-tls-constexpr2.ll b/test/Transforms/NaCl/expand-tls-constexpr2.ll new file mode 100644 index 000000000000..9f1bbe88cb48 --- /dev/null +++ b/test/Transforms/NaCl/expand-tls-constexpr2.ll @@ -0,0 +1,12 @@ +; RUN: opt < %s -nacl-expand-tls -S | FileCheck %s + +@tvar = thread_local global i32 0 + +define i32 @get_tvar() { + ret i32 ptrtoint (i32* @tvar to i32) +} +; CHECK: %tls_raw = call i8* @llvm.nacl.read.tp() +; CHECK: %tls_struct = bitcast i8* %tls_raw to %tls_struct* +; CHECK: %field = getelementptr %tls_struct, %tls_struct* %tls_struct, i32 -1, i32 1, i32 0 +; CHECK: %expanded = ptrtoint i32* %field to i32 +; CHECK: ret i32 %expanded diff --git a/test/Transforms/NaCl/expand-tls-phi.ll b/test/Transforms/NaCl/expand-tls-phi.ll new file mode 100644 index 000000000000..6c2715b8b965 --- /dev/null +++ b/test/Transforms/NaCl/expand-tls-phi.ll @@ -0,0 +1,60 @@ +; RUN: opt < %s -nacl-expand-tls -S | FileCheck %s + + +@tvar = thread_local global i32 123 + +define i32* @get_tvar(i1 %cmp) { +entry: + br i1 %cmp, label %return, label %else + +else: + br label %return + +return: + %result = phi i32* [ @tvar, %entry ], [ null, %else ] + ret i32* %result +} +; The TLS access gets pushed back into the PHI node's incoming block, +; which might be suboptimal but works in all cases. +; CHECK: define i32* @get_tvar(i1 %cmp) { +; CHECK: entry: +; CHECK: %field = getelementptr %tls_struct, %tls_struct* %tls_struct, i32 -1, i32 0, i32 0 +; CHECK: else: +; CHECK: return: +; CHECK: %result = phi i32* [ %field, %entry ], [ null, %else ] + + +; This tests that ExpandTls correctly handles a PHI node that contains +; the same TLS variable twice. Using replaceAllUsesWith() is not +; correct on a PHI node when the new instruction has to be added to an +; incoming block. +define i32* @tls_phi_twice(i1 %arg) { + br i1 %arg, label %iftrue, label %iffalse +iftrue: + br label %exit +iffalse: + br label %exit +exit: + %result = phi i32* [ @tvar, %iftrue ], [ @tvar, %iffalse ] + ret i32* %result +} +; CHECK: define i32* @tls_phi_twice(i1 %arg) { +; CHECK: iftrue: +; CHECK: %field{{.*}} = getelementptr %tls_struct, %tls_struct* %tls_struct{{.*}}, i32 -1, i32 0, i32 0 +; CHECK: iffalse: +; CHECK: %field{{.*}} = getelementptr %tls_struct, %tls_struct* %tls_struct{{.*}}, i32 -1, i32 0, i32 0 +; CHECK: exit: +; CHECK: %result = phi i32* [ %field{{.*}}, %iftrue ], [ %field{{.*}}, %iffalse ] + + +; In this corner case, ExpandTls must expand out @tvar only once, +; otherwise it will produce invalid IR. +define i32* @tls_phi_multiple_entry(i1 %arg) { +entry: + br i1 %arg, label %done, label %done +done: + %result = phi i32* [ @tvar, %entry ], [ @tvar, %entry ] + ret i32* %result +} +; CHECK: define i32* @tls_phi_multiple_entry(i1 %arg) { +; CHECK: %result = phi i32* [ %field, %entry ], [ %field, %entry ] diff --git a/test/Transforms/NaCl/expand-tls.ll b/test/Transforms/NaCl/expand-tls.ll new file mode 100644 index 000000000000..b1159729544b --- /dev/null +++ b/test/Transforms/NaCl/expand-tls.ll @@ -0,0 +1,86 @@ +; RUN: opt < %s -nacl-expand-tls -S | FileCheck %s + +; All thread-local variables should be removed +; RUN: opt < %s -nacl-expand-tls -S | FileCheck %s -check-prefix=NO_TLS + +; NO_TLS-NOT: thread_local + +@tvar1 = thread_local global i64 123 +@tvar2 = thread_local global i32 456 + + +; CHECK: %tls_init_template = type <{ i64, i32 }> +; CHECK: %tls_struct = type <{ %tls_init_template, %tls_bss_template }> +; CHECK: %tls_bss_template = type <{ [4 x i8] }> + + +; CHECK: @__tls_template_start = internal constant %tls_init_template <{ i64 123, i32 456 }> + +; CHECK: @__tls_template_alignment = internal constant i32 8 + + +define i64* @get_tvar1() { + ret i64* @tvar1 +} +; CHECK: define i64* @get_tvar1() +; CHECK: %tls_raw = call i8* @llvm.nacl.read.tp() +; CHECK: %tls_struct = bitcast i8* %tls_raw to %tls_struct* +; CHECK: %field = getelementptr %tls_struct, %tls_struct* %tls_struct, i32 -1, i32 0, i32 0 +; CHECK: ret i64* %field + + +define i32* @get_tvar2() { + ret i32* @tvar2 +} +; Much the same as for get_tvar1. +; CHECK: define i32* @get_tvar2() +; CHECK: %field = getelementptr %tls_struct, %tls_struct* %tls_struct, i32 -1, i32 0, i32 1 + + +; Check that we define global variables for TLS templates + +@__tls_template_start = external global i8 +@__tls_template_tdata_end = external global i8 +@__tls_template_end = external global i8 + +define i8* @get_tls_template_start() { + ret i8* @__tls_template_start +} +; CHECK: define i8* @get_tls_template_start() +; CHECK: ret i8* bitcast (%tls_init_template* @__tls_template_start to i8*) + +define i8* @get_tls_template_tdata_end() { + ret i8* @__tls_template_tdata_end +} +; CHECK: define i8* @get_tls_template_tdata_end() +; CHECK: ret i8* bitcast (%tls_init_template* getelementptr inbounds (%tls_init_template, %tls_init_template* @__tls_template_start, i32 1) to i8*) + +define i8* @get_tls_template_end() { + ret i8* @__tls_template_end +} +; CHECK: define i8* @get_tls_template_end() +; CHECK: ret i8* bitcast (%tls_struct* getelementptr (%tls_struct, %tls_struct* bitcast (%tls_init_template* @__tls_template_start to %tls_struct*), i32 1) to i8*) + + +; Check that we define the TLS layout functions + +declare i32 @__nacl_tp_tls_offset(i32) +declare i32 @__nacl_tp_tdb_offset(i32) + +define i32 @test_get_tp_tls_offset(i32 %tls_size) { + %offset = call i32 @__nacl_tp_tls_offset(i32 %tls_size) + ret i32 %offset +} +; Uses of the intrinsic are replaced with uses of a regular function. +; CHECK: define i32 @test_get_tp_tls_offset +; CHECK: call i32 @nacl_tp_tls_offset +; NO_TLS-NOT: __nacl_tp_tls_offset + +define i32 @test_get_tp_tdb_offset(i32 %tdb_size) { + %offset = call i32 @__nacl_tp_tdb_offset(i32 %tdb_size) + ret i32 %offset +} +; Uses of the intrinsic are replaced with uses of a regular function. +; CHECK: define i32 @test_get_tp_tdb_offset +; CHECK: call i32 @nacl_tp_tdb_offset +; NO_TLS-NOT: __nacl_tp_tdb_offset diff --git a/test/Transforms/NaCl/expand-varargs-attrs.ll b/test/Transforms/NaCl/expand-varargs-attrs.ll new file mode 100644 index 000000000000..17061abd3cef --- /dev/null +++ b/test/Transforms/NaCl/expand-varargs-attrs.ll @@ -0,0 +1,72 @@ +; RUN: opt < %s -expand-varargs -S | FileCheck %s + +declare i32 @varargs_func(i32 %arg, ...) + + +; Check that attributes such as "byval" are preserved on fixed arguments. + +%MyStruct = type { i64, i64 } + +define void @func_with_arg_attrs(%MyStruct* byval, ...) { + ret void +} +; CHECK-LABEL: define void @func_with_arg_attrs(%MyStruct* byval, i8* noalias %varargs) { + + +declare void @take_struct_arg(%MyStruct* byval %s, ...) + +define void @call_with_arg_attrs(%MyStruct* %s) { + call void (%MyStruct*, ...) @take_struct_arg(%MyStruct* byval %s) + ret void +} +; CHECK-LABEL: @call_with_arg_attrs( +; CHECK: call void bitcast (void (%MyStruct*, i8*)* @take_struct_arg to void (%MyStruct*, { i32 }*)*)(%MyStruct* byval %s, { i32 }* %vararg_buffer) + + +; The "byval" attribute here should be dropped. +define i32 @pass_struct_via_vararg1(%MyStruct* %s) { + %result = call i32 (i32, ...) @varargs_func(i32 111, %MyStruct* byval %s) + ret i32 %result +} +; CHECK-LABEL: @pass_struct_via_vararg1( +; CHECK: %result = call i32 bitcast (i32 (i32, i8*)* @varargs_func to i32 (i32, { %MyStruct }*)*)(i32 111, { %MyStruct }* %vararg_buffer) + + +; The "byval" attribute here should be dropped. +define i32 @pass_struct_via_vararg2(%MyStruct* %s) { + %result = call i32 (i32, ...) @varargs_func(i32 111, i32 2, %MyStruct* byval %s) + ret i32 %result +} +; CHECK-LABEL: @pass_struct_via_vararg2( +; CHECK: %result = call i32 bitcast (i32 (i32, i8*)* @varargs_func to i32 (i32, { i32, %MyStruct }*)*)(i32 111, { i32, %MyStruct }* %vararg_buffer) + + +; Check that return attributes such as "signext" are preserved. +define i32 @call_with_return_attr() { + %result = call signext i32 (i32, ...) @varargs_func(i32 111, i64 222) + ret i32 %result +} +; CHECK-LABEL: @call_with_return_attr( +; CHECK: %result = call signext i32 bitcast (i32 (i32, i8*)* @varargs_func to i32 (i32, { i64 }*)*)(i32 111, { i64 }* %vararg_buffer) + + +; Check that the "readonly" function attribute is preserved. +define i32 @call_readonly() { + %result = call i32 (i32, ...) @varargs_func(i32 111, i64 222) readonly + ret i32 %result +} +; CHECK-LABEL: @call_readonly( +; CHECK: %result = call i32 bitcast (i32 (i32, i8*)* @varargs_func to i32 (i32, { i64 }*)*)(i32 111, { i64 }* %vararg_buffer) #1 + + +; Check that the "tail" attribute gets removed, because the callee +; reads space alloca'd by the caller. +define i32 @tail_call() { + %result = tail call i32 (i32, ...) @varargs_func(i32 111, i64 222) + ret i32 %result +} +; CHECK-LABEL: @tail_call( +; CHECK: %result = call i32 bitcast (i32 (i32, i8*)* @varargs_func to i32 (i32, { i64 }*)*)(i32 111, { i64 }* %vararg_buffer) + + +; CHECK: attributes #1 = { readonly } diff --git a/test/Transforms/NaCl/expand-varargs-emscripten.ll b/test/Transforms/NaCl/expand-varargs-emscripten.ll new file mode 100644 index 000000000000..ae19c519f11f --- /dev/null +++ b/test/Transforms/NaCl/expand-varargs-emscripten.ll @@ -0,0 +1,28 @@ +; RUN: opt < %s -mtriple=asmjs-unknown-emscripten -expand-varargs -S | FileCheck %s + +target datalayout = "p:32:32:32" + +%va_list = type i8* + +declare void @llvm.va_start(i8*) +declare void @llvm.va_end(i8*) +declare void @llvm.va_copy(i8*, i8*) + +declare void @emscripten_asm_const_int(...) +declare void @emscripten_asm_const_double(...) +declare void @emscripten_landingpad(...) +declare void @emscripten_resume(...) + +define void @test(i32 %arg) { + call void (...) @emscripten_asm_const_int(i32 %arg) + call void (...) @emscripten_asm_const_double(i32 %arg) + call void (...) @emscripten_landingpad(i32 %arg) + call void (...) @emscripten_resume(i32 %arg) + ret void +} +; CHECK-LABEL: define void @test( +; CHECK-NEXT: call void (...) @emscripten_asm_const_int(i32 %arg) +; CHECK-NEXT: call void (...) @emscripten_asm_const_double(i32 %arg) +; CHECK-NEXT: call void (...) @emscripten_landingpad(i32 %arg) +; CHECK-NEXT: call void (...) @emscripten_resume(i32 %arg) +; CHECK-NEXT: ret void diff --git a/test/Transforms/NaCl/expand-varargs-struct.ll b/test/Transforms/NaCl/expand-varargs-struct.ll new file mode 100644 index 000000000000..755c9e81021f --- /dev/null +++ b/test/Transforms/NaCl/expand-varargs-struct.ll @@ -0,0 +1,17 @@ +; RUN: opt < %s -expand-varargs -S | FileCheck %s + +declare i32 @varargs_func(i32 %arg, ...) + + +%MyStruct = type { i64, i64 } + +; Test passing a struct by value. +define i32 @varargs_call_struct(%MyStruct* %ptr) { + %result = call i32 (i32, ...) @varargs_func(i32 111, i64 222, %MyStruct* byval %ptr) + ret i32 %result +} +; CHECK-LABEL: @varargs_call_struct( +; CHECK: %vararg_ptr1 = getelementptr inbounds { i64, %MyStruct }, { i64, %MyStruct }* %vararg_buffer, i32 0, i32 1 +; CHECK: %1 = bitcast %MyStruct* %vararg_ptr1 to i8* +; CHECK: %2 = bitcast %MyStruct* %ptr to i8* +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 16, i32 1, i1 false) diff --git a/test/Transforms/NaCl/expand-varargs.ll b/test/Transforms/NaCl/expand-varargs.ll new file mode 100644 index 000000000000..814e13a5857c --- /dev/null +++ b/test/Transforms/NaCl/expand-varargs.ll @@ -0,0 +1,250 @@ +; RUN: opt < %s -expand-varargs -S | FileCheck %s + +target datalayout = "p:32:32:32" + +%va_list = type i8* + +declare void @llvm.va_start(i8*) +declare void @llvm.va_end(i8*) +declare void @llvm.va_copy(i8*, i8*) + +declare i32 @outside_func(i32 %arg, %va_list* %args) + +define i32 @varargs_func(i32 %arg, ...) { + %arglist_alloc = alloca %va_list + %arglist = bitcast %va_list* %arglist_alloc to i8* + + call void @llvm.va_start(i8* %arglist) + %result = call i32 @outside_func(i32 %arg, %va_list* %arglist_alloc) + call void @llvm.va_end(i8* %arglist) + ret i32 %result +} +; CHECK-LABEL: define i32 @varargs_func(i32 %arg, i8* noalias %varargs) { +; CHECK-NEXT: %arglist_alloc = alloca i8* +; CHECK-NEXT: %arglist = bitcast i8** %arglist_alloc to i8* +; CHECK-NEXT: %arglist1 = bitcast i8* %arglist to i8** +; CHECK-NEXT: store i8* %varargs, i8** %arglist1 +; CHECK-NEXT: %result = call i32 @outside_func(i32 %arg, i8** %arglist_alloc) +; CHECK-NEXT: ret i32 %result + + +; Obtain each argument in the va_list according to its type (known from fmt). +; This function ensures that each argument is loaded with the same alignment as +; if it were inside a struct: this is how the caller passed the arguments. +; +; Note that alignof is represented as a GEP off of nullptr to the second element +; of a struct with { i1, types_whose_alignment_is_desired }. +define void @varargs_func_2(i8* nocapture %o8, i8* nocapture readonly %fmt, ...) { +; CHECK-LABEL: @varargs_func_2( +entry: + %o16 = bitcast i8* %o8 to i16* + %o32 = bitcast i8* %o8 to i32* + %o64 = bitcast i8* %o8 to i64* + %ofloat = bitcast i8* %o8 to float* + %odouble = bitcast i8* %o8 to double* + + %arglist_alloc = alloca [4 x i32], align 4 + %arglist = getelementptr inbounds [4 x i32], [4 x i32]* %arglist_alloc, i32 0, i32 0 + %arglist.i8 = bitcast [4 x i32]* %arglist_alloc to i8* + call void @llvm.va_start(i8* %arglist.i8) + br label %start + +start: + %idx = phi i32 [ 0, %entry ], [ %inc, %next ] + %fmt.gep = getelementptr inbounds i8, i8* %fmt, i32 %idx + %arg.type = load i8, i8* %fmt.gep + switch i8 %arg.type, label %next [ + i8 0, label %done + i8 1, label %type.i8 + i8 2, label %type.i16 + i8 3, label %type.i32 + i8 4, label %type.i64 + i8 5, label %type.float + i8 6, label %type.double + ] + +type.i8: ; CHECK: type.i8: + %i8 = va_arg i32* %arglist, i8 + store i8 %i8, i8* %o8 + br label %next +; CHECK-NEXT: %arglist1 = bitcast i32* %arglist to i8** +; CHECK-NEXT: %arglist_current = load i8*, i8** %arglist1 +; CHECK-NEXT: %[[P2I:[0-9]+]] = ptrtoint i8* %arglist_current to i32 +; %A8 = (uintptr_t)Addr + Alignment - 1 +; CHECK-NEXT: %[[A8:[0-9]+]] = add nuw i32 %[[P2I]], sub nuw (i32 ptrtoint (i8* getelementptr ({ i1, i8 }, { i1, i8 }* null, i64 0, i32 1) to i32), i32 1) +; %B8 = %1 & ~(uintptr_t)(Alignment - 1) +; CHECK-NEXT: %[[B8:[0-9]+]] = and i32 %[[A8]], xor (i32 sub nuw (i32 ptrtoint (i8* getelementptr ({ i1, i8 }, { i1, i8 }* null, i64 0, i32 1) to i32), i32 1), i32 -1) +; CHECK-NEXT: %[[C8:[0-9]+]] = inttoptr i32 %[[B8]] to i8* +; CHECK-NEXT: %i8 = load i8, i8* %[[C8]] +; CHECK-NEXT: %arglist_next = getelementptr inbounds i8, i8* %[[C8]], i32 1 +; CHECK-NEXT: store i8* %arglist_next, i8** %arglist1 +; CHECK-NEXT: store i8 %i8, i8* %o8 +; CHECK-NEXT: br label %next + +type.i16: ; CHECK: type.i16: + %i16 = va_arg i32* %arglist, i16 + store i16 %i16, i16* %o16 + br label %next +; CHECK: %[[A16:[0-9]+]] = add nuw i32 %4, sub nuw (i32 ptrtoint (i16* getelementptr ({ i1, i16 }, { i1, i16 }* null, i64 0, i32 1) to i32), i32 1) +; CHECK-NEXT: %[[B16:[0-9]+]] = and i32 %[[A16]], xor (i32 sub nuw (i32 ptrtoint (i16* getelementptr ({ i1, i16 }, { i1, i16 }* null, i64 0, i32 1) to i32), i32 1), i32 -1) +; CHECK-NEXT: %[[C16:[0-9]+]] = inttoptr i32 %[[B16]] to i16* +; CHECK-NEXT: %i16 = load i16, i16* %[[C16]] + +type.i32: ; CHECK: type.i32: + %i32 = va_arg i32* %arglist, i32 + store i32 %i32, i32* %o32 + br label %next +; CHECK: %[[A32:[0-9]+]] = add nuw i32 %8, sub nuw (i32 ptrtoint (i32* getelementptr ({ i1, i32 }, { i1, i32 }* null, i64 0, i32 1) to i32), i32 1) +; CHECK-NEXT: %[[B32:[0-9]+]] = and i32 %[[A32]], xor (i32 sub nuw (i32 ptrtoint (i32* getelementptr ({ i1, i32 }, { i1, i32 }* null, i64 0, i32 1) to i32), i32 1), i32 -1) +; CHECK-NEXT: %[[C32:[0-9]+]] = inttoptr i32 %[[B32]] to i32* +; CHECK-NEXT: %i32 = load i32, i32* %[[C32]] + +type.i64: ; CHECK: type.i64: + %i64 = va_arg i32* %arglist, i64 + store i64 %i64, i64* %o64 + br label %next +; CHECK: %[[A64:[0-9]+]] = add nuw i32 %12, sub nuw (i32 ptrtoint (i64* getelementptr ({ i1, i64 }, { i1, i64 }* null, i64 0, i32 1) to i32), i32 1) +; CHECK-NEXT: %[[B64:[0-9]+]] = and i32 %[[A64]], xor (i32 sub nuw (i32 ptrtoint (i64* getelementptr ({ i1, i64 }, { i1, i64 }* null, i64 0, i32 1) to i32), i32 1), i32 -1) +; CHECK-NEXT: %[[C64:[0-9]+]] = inttoptr i32 %[[B64]] to i64* +; CHECK-NEXT: %i64 = load i64, i64* %[[C64]] + +type.float: ; CHECK: type.float: + %float = va_arg i32* %arglist, float + store float %float, float* %ofloat + br label %next +; CHECK: %[[AF:[0-9]+]] = add nuw i32 %16, sub nuw (i32 ptrtoint (float* getelementptr ({ i1, float }, { i1, float }* null, i64 0, i32 1) to i32), i32 1) +; CHECK-NEXT: %[[BF:[0-9]+]] = and i32 %[[AF]], xor (i32 sub nuw (i32 ptrtoint (float* getelementptr ({ i1, float }, { i1, float }* null, i64 0, i32 1) to i32), i32 1), i32 -1) +; CHECK-NEXT: %[[CF:[0-9]+]] = inttoptr i32 %[[BF]] to float* +; CHECK-NEXT: %float = load float, float* %[[CF]] + +type.double: ; CHECK: type.double: + %double = va_arg i32* %arglist, double + store double %double, double* %odouble + br label %next +; CHECK: %[[AD:[0-9]+]] = add nuw i32 %20, sub nuw (i32 ptrtoint (double* getelementptr ({ i1, double }, { i1, double }* null, i64 0, i32 1) to i32), i32 1) +; CHECK-NEXT: %[[BD:[0-9]+]] = and i32 %[[AD]], xor (i32 sub nuw (i32 ptrtoint (double* getelementptr ({ i1, double }, { i1, double }* null, i64 0, i32 1) to i32), i32 1), i32 -1) +; CHECK-NEXT: %[[CD:[0-9]+]] = inttoptr i32 %[[BD]] to double* +; CHECK-NEXT: %double = load double, double* %[[CD]] + +next: + %inc = add i32 %idx, 1 + br label %start + +done: + call void @llvm.va_end(i8* %arglist.i8) + ret void +} + + +define i32 @varargs_call1() { + %result = call i32 (i32, ...) @varargs_func(i32 111, i64 222, i32 333, double 4.0) + ret i32 %result +} +; CHECK-LABEL: @varargs_call1( +; CHECK-NEXT: %vararg_buffer = alloca { i64, i32, double } +; CHECK-NEXT: %vararg_lifetime_bitcast = bitcast { i64, i32, double }* %vararg_buffer to i8* +; CHECK-NEXT: call void @llvm.lifetime.start(i64 24, i8* %vararg_lifetime_bitcast) +; CHECK-NEXT: %vararg_ptr = getelementptr inbounds { i64, i32, double }, { i64, i32, double }* %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store i64 222, i64* %vararg_ptr +; CHECK-NEXT: %vararg_ptr1 = getelementptr inbounds { i64, i32, double }, { i64, i32, double }* %vararg_buffer, i32 0, i32 1 +; CHECK-NEXT: store i32 333, i32* %vararg_ptr1 +; CHECK-NEXT: %vararg_ptr2 = getelementptr inbounds { i64, i32, double }, { i64, i32, double }* %vararg_buffer, i32 0, i32 2 +; CHECK-NEXT: store double 4.{{0*}}e+00, double* %vararg_ptr2 +; CHECK-NEXT: %result = call i32 bitcast (i32 (i32, i8*)* @varargs_func to i32 (i32, { i64, i32, double }*)*)(i32 111, { i64, i32, double }* %vararg_buffer) +; CHECK-NEXT: call void @llvm.lifetime.end(i64 24, i8* %vararg_lifetime_bitcast) +; CHECK-NEXT: ret i32 %result + + +; Check that the pass works when there are no variable arguments. +define i32 @call_with_zero_varargs() { + %result = call i32 (i32, ...) @varargs_func(i32 111) + ret i32 %result +} +; CHECK-LABEL: @call_with_zero_varargs( +; We have a dummy i32 field to deal with buggy programs: +; CHECK-NEXT: %vararg_buffer = alloca { i32 } +; CHECK-NEXT: %vararg_lifetime_bitcast = bitcast { i32 }* %vararg_buffer to i8* +; CHECK-NEXT: call void @llvm.lifetime.start(i64 4, i8* %vararg_lifetime_bitcast) +; CHECK-NEXT: %result = call i32 bitcast (i32 (i32, i8*)* @varargs_func to i32 (i32, { i32 }*)*)(i32 111, { i32 }* %vararg_buffer) +; CHECK-NEXT: call void @llvm.lifetime.end(i64 4, i8* %vararg_lifetime_bitcast) +; CHECK-NEXT: ret i32 %result + + +; Check that "invoke" instructions are expanded out too. +define i32 @varargs_invoke() { + %result = invoke i32 (i32, ...)* @varargs_func(i32 111, i64 222) + to label %cont unwind label %lpad +cont: + ret i32 %result +lpad: + %lp = landingpad { i8*, i32 } personality i8* null cleanup + ret i32 0 +} +; CHECK-LABEL: @varargs_invoke( +; CHECK: call void @llvm.lifetime.start(i64 8, i8* %vararg_lifetime_bitcast) +; CHECK: %result = invoke i32 bitcast (i32 (i32, i8*)* @varargs_func to i32 (i32, { i64 }*)*)(i32 111, { i64 }* %vararg_buffer) +; CHECK-NEXT: to label %cont unwind label %lpad +; CHECK: cont: +; CHECK-NEXT: call void @llvm.lifetime.end(i64 8, i8* %vararg_lifetime_bitcast) +; CHECK: lpad: +; CHECK: call void @llvm.lifetime.end(i64 8, i8* %vararg_lifetime_bitcast) + + +define void @varargs_multiple_calls() { + %call1 = call i32 (i32, ...) @varargs_func(i32 11, i64 22, i32 33) + %call2 = call i32 (i32, ...) @varargs_func(i32 44, i64 55, i32 66) + ret void +} +; CHECK-LABEL: @varargs_multiple_calls( +; The added allocas should appear at the start of the function. +; CHECK: %vararg_buffer{{.*}} = alloca { i64, i32 } +; CHECK: %vararg_buffer{{.*}} = alloca { i64, i32 } +; CHECK: %call1 = call i32 bitcast (i32 (i32, i8*)* @varargs_func to i32 (i32, { i64, i32 }*)*)(i32 11, { i64, i32 }* %vararg_buffer{{.*}}) +; CHECK: %call2 = call i32 bitcast (i32 (i32, i8*)* @varargs_func to i32 (i32, { i64, i32 }*)*)(i32 44, { i64, i32 }* %vararg_buffer{{.*}}) + + + +define i32 @va_arg_i32(i8* %arglist) { + %result = va_arg i8* %arglist, i32 + ret i32 %result +} +; CHECK-LABEL: define i32 @va_arg_i32(i8* %arglist) { +; CHECK-NEXT: %arglist1 = bitcast i8* %arglist to i32** +; CHECK-NEXT: %arglist_current = load i32*, i32** %arglist1 +; CHECK-NEXT: %1 = ptrtoint i32* %arglist_current to i32 +; CHECK-NEXT: %2 = add nuw i32 %1, sub nuw (i32 ptrtoint (i32* getelementptr ({ i1, i32 }, { i1, i32 }* null, i64 0, i32 1) to i32), i32 1) +; CHECK-NEXT: %3 = and i32 %2, xor (i32 sub nuw (i32 ptrtoint (i32* getelementptr ({ i1, i32 }, { i1, i32 }* null, i64 0, i32 1) to i32), i32 1), i32 -1) +; CHECK-NEXT: %4 = inttoptr i32 %3 to i32* +; CHECK-NEXT: %result = load i32, i32* %4 +; CHECK-NEXT: %arglist_next = getelementptr inbounds i32, i32* %4, i32 1 +; CHECK-NEXT: store i32* %arglist_next, i32** %arglist1 +; CHECK-NEXT: ret i32 %result + + +define i64 @va_arg_i64(i8* %arglist) { + %result = va_arg i8* %arglist, i64 + ret i64 %result +} +; CHECK-LABEL: define i64 @va_arg_i64(i8* %arglist) { +; CHECK-NEXT: %arglist1 = bitcast i8* %arglist to i64** +; CHECK-NEXT: %arglist_current = load i64*, i64** %arglist1 +; CHECK-NEXT: %1 = ptrtoint i64* %arglist_current to i32 +; CHECK-NEXT: %2 = add nuw i32 %1, sub nuw (i32 ptrtoint (i64* getelementptr ({ i1, i64 }, { i1, i64 }* null, i64 0, i32 1) to i32), i32 1) +; CHECK-NEXT: %3 = and i32 %2, xor (i32 sub nuw (i32 ptrtoint (i64* getelementptr ({ i1, i64 }, { i1, i64 }* null, i64 0, i32 1) to i32), i32 1), i32 -1) +; CHECK-NEXT: %4 = inttoptr i32 %3 to i64* +; CHECK-NEXT: %result = load i64, i64* %4 +; CHECK-NEXT: %arglist_next = getelementptr inbounds i64, i64* %4, i32 1 +; CHECK-NEXT: store i64* %arglist_next, i64** %arglist1 +; CHECK-NEXT: ret i64 %result + + +define void @do_va_copy(i8* %dest, i8* %src) { + call void @llvm.va_copy(i8* %dest, i8* %src) + ret void +} +; CHECK-LABEL: define void @do_va_copy( +; CHECK-NEXT: %vacopy_src = bitcast i8* %src to i8** +; CHECK-NEXT: %vacopy_dest = bitcast i8* %dest to i8** +; CHECK-NEXT: %vacopy_currentptr = load i8*, i8** %vacopy_src +; CHECK-NEXT: store i8* %vacopy_currentptr, i8** %vacopy_dest +; CHECK-NEXT: ret void diff --git a/test/Transforms/NaCl/fix-vector-load-store-alignment.ll b/test/Transforms/NaCl/fix-vector-load-store-alignment.ll new file mode 100644 index 000000000000..914da432782b --- /dev/null +++ b/test/Transforms/NaCl/fix-vector-load-store-alignment.ll @@ -0,0 +1,435 @@ +; RUN: opt -fix-vector-load-store-alignment %s -S | FileCheck %s + +; Test that vector load/store are always element-aligned when possible, and get +; converted to scalar load/store when not. + +; The datalayout is needed to determine the alignment of the load/stores. +target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:32" + +; Load ========================================================================= + +define <4 x i1> @test_load_4xi1(<4 x i1>* %loc) { + ; CHECK-LABEL: test_load_4xi1 + ; CHECK-NEXT: %[[BASE:[0-9]+]] = bitcast <4 x i1>* %loc to i1* + ; CHECK-NEXT: %[[GEP0:[0-9]+]] = getelementptr inbounds i1, i1* %[[BASE]], i32 0 + ; CHECK-NEXT: %[[LD0:[0-9]+]] = load i1, i1* %[[GEP0]], align 4 + ; CHECK-NEXT: %[[INS0:[0-9]+]] = insertelement <4 x i1> undef, i1 %[[LD0]], i32 0 + ; CHECK-NEXT: %[[GEP1:[0-9]+]] = getelementptr inbounds i1, i1* %[[BASE]], i32 1 + ; CHECK-NEXT: %[[LD1:[0-9]+]] = load i1, i1* %[[GEP1]], align 1 + ; CHECK-NEXT: %[[INS1:[0-9]+]] = insertelement <4 x i1> %[[INS0]], i1 %[[LD1]], i32 1 + ; CHECK-NEXT: %[[GEP2:[0-9]+]] = getelementptr inbounds i1, i1* %[[BASE]], i32 2 + ; CHECK-NEXT: %[[LD2:[0-9]+]] = load i1, i1* %[[GEP2]], align 2 + ; CHECK-NEXT: %[[INS2:[0-9]+]] = insertelement <4 x i1> %[[INS1]], i1 %[[LD2]], i32 2 + ; CHECK-NEXT: %[[GEP3:[0-9]+]] = getelementptr inbounds i1, i1* %[[BASE]], i32 3 + ; CHECK-NEXT: %[[LD3:[0-9]+]] = load i1, i1* %[[GEP3]], align 1 + ; CHECK-NEXT: %[[INS3:[0-9]+]] = insertelement <4 x i1> %[[INS2]], i1 %[[LD3]], i32 3 + ; CHECK-NEXT: ret <4 x i1> %[[INS3]] + %loaded = load <4 x i1>, <4 x i1>* %loc + ret <4 x i1> %loaded +} + +define <8 x i1> @test_load_8xi1(<8 x i1>* %loc) { + ; CHECK-LABEL: test_load_8xi1 + ; CHECK-NEXT: %[[BASE:[0-9]+]] = bitcast <8 x i1>* %loc to i1* + ; CHECK-NEXT: %[[GEP0:[0-9]+]] = getelementptr inbounds i1, i1* %[[BASE]], i32 0 + ; CHECK-NEXT: %[[LD0:[0-9]+]] = load i1, i1* %[[GEP0]], align 8 + ; CHECK-NEXT: %[[INS0:[0-9]+]] = insertelement <8 x i1> undef, i1 %[[LD0]], i32 0 + ; CHECK-NEXT: %[[GEP1:[0-9]+]] = getelementptr inbounds i1, i1* %[[BASE]], i32 1 + ; CHECK-NEXT: %[[LD1:[0-9]+]] = load i1, i1* %[[GEP1]], align 1 + ; CHECK-NEXT: %[[INS1:[0-9]+]] = insertelement <8 x i1> %[[INS0]], i1 %[[LD1]], i32 1 + ; CHECK-NEXT: %[[GEP2:[0-9]+]] = getelementptr inbounds i1, i1* %[[BASE]], i32 2 + ; CHECK-NEXT: %[[LD2:[0-9]+]] = load i1, i1* %[[GEP2]], align 2 + ; CHECK-NEXT: %[[INS2:[0-9]+]] = insertelement <8 x i1> %[[INS1]], i1 %[[LD2]], i32 2 + ; CHECK-NEXT: %[[GEP3:[0-9]+]] = getelementptr inbounds i1, i1* %[[BASE]], i32 3 + ; CHECK-NEXT: %[[LD3:[0-9]+]] = load i1, i1* %[[GEP3]], align 1 + ; CHECK-NEXT: %[[INS3:[0-9]+]] = insertelement <8 x i1> %[[INS2]], i1 %[[LD3]], i32 3 + ; CHECK-NEXT: %[[GEP4:[0-9]+]] = getelementptr inbounds i1, i1* %[[BASE]], i32 4 + ; CHECK-NEXT: %[[LD4:[0-9]+]] = load i1, i1* %[[GEP4]], align 4 + ; CHECK-NEXT: %[[INS4:[0-9]+]] = insertelement <8 x i1> %[[INS3]], i1 %[[LD4]], i32 4 + ; CHECK-NEXT: %[[GEP5:[0-9]+]] = getelementptr inbounds i1, i1* %[[BASE]], i32 5 + ; CHECK-NEXT: %[[LD5:[0-9]+]] = load i1, i1* %[[GEP5]], align 1 + ; CHECK-NEXT: %[[INS5:[0-9]+]] = insertelement <8 x i1> %[[INS4]], i1 %[[LD5]], i32 5 + ; CHECK-NEXT: %[[GEP6:[0-9]+]] = getelementptr inbounds i1, i1* %[[BASE]], i32 6 + ; CHECK-NEXT: %[[LD6:[0-9]+]] = load i1, i1* %[[GEP6]], align 2 + ; CHECK-NEXT: %[[INS6:[0-9]+]] = insertelement <8 x i1> %[[INS5]], i1 %[[LD6]], i32 6 + ; CHECK-NEXT: %[[GEP7:[0-9]+]] = getelementptr inbounds i1, i1* %[[BASE]], i32 7 + ; CHECK-NEXT: %[[LD7:[0-9]+]] = load i1, i1* %[[GEP7]], align 1 + ; CHECK-NEXT: %[[INS7:[0-9]+]] = insertelement <8 x i1> %[[INS6]], i1 %[[LD7]], i32 7 + ; CHECK-NEXT: ret <8 x i1> %[[INS7]] + %loaded = load <8 x i1>, <8 x i1>* %loc + ret <8 x i1> %loaded +} + +define <16 x i1> @test_load_16xi1(<16 x i1>* %loc) { + ; CHECK-LABEL: test_load_16xi1 + ; CHECK-NEXT: %[[BASE:[0-9]+]] = bitcast <16 x i1>* %loc to i1* + ; CHECK-NEXT: %[[GEP0:[0-9]+]] = getelementptr inbounds i1, i1* %[[BASE]], i32 0 + ; CHECK-NEXT: %[[LD0:[0-9]+]] = load i1, i1* %[[GEP0]], align 16 + ; CHECK-NEXT: %[[INS0:[0-9]+]] = insertelement <16 x i1> undef, i1 %[[LD0]], i32 0 + ; CHECK-NEXT: %[[GEP1:[0-9]+]] = getelementptr inbounds i1, i1* %[[BASE]], i32 1 + ; CHECK-NEXT: %[[LD1:[0-9]+]] = load i1, i1* %[[GEP1]], align 1 + ; CHECK-NEXT: %[[INS1:[0-9]+]] = insertelement <16 x i1> %[[INS0]], i1 %[[LD1]], i32 1 + ; CHECK-NEXT: %[[GEP2:[0-9]+]] = getelementptr inbounds i1, i1* %[[BASE]], i32 2 + ; CHECK-NEXT: %[[LD2:[0-9]+]] = load i1, i1* %[[GEP2]], align 2 + ; CHECK-NEXT: %[[INS2:[0-9]+]] = insertelement <16 x i1> %[[INS1]], i1 %[[LD2]], i32 2 + ; CHECK-NEXT: %[[GEP3:[0-9]+]] = getelementptr inbounds i1, i1* %[[BASE]], i32 3 + ; CHECK-NEXT: %[[LD3:[0-9]+]] = load i1, i1* %[[GEP3]], align 1 + ; CHECK-NEXT: %[[INS3:[0-9]+]] = insertelement <16 x i1> %[[INS2]], i1 %[[LD3]], i32 3 + ; CHECK-NEXT: %[[GEP4:[0-9]+]] = getelementptr inbounds i1, i1* %[[BASE]], i32 4 + ; CHECK-NEXT: %[[LD4:[0-9]+]] = load i1, i1* %[[GEP4]], align 4 + ; CHECK-NEXT: %[[INS4:[0-9]+]] = insertelement <16 x i1> %[[INS3]], i1 %[[LD4]], i32 4 + ; CHECK-NEXT: %[[GEP5:[0-9]+]] = getelementptr inbounds i1, i1* %[[BASE]], i32 5 + ; CHECK-NEXT: %[[LD5:[0-9]+]] = load i1, i1* %[[GEP5]], align 1 + ; CHECK-NEXT: %[[INS5:[0-9]+]] = insertelement <16 x i1> %[[INS4]], i1 %[[LD5]], i32 5 + ; CHECK-NEXT: %[[GEP6:[0-9]+]] = getelementptr inbounds i1, i1* %[[BASE]], i32 6 + ; CHECK-NEXT: %[[LD6:[0-9]+]] = load i1, i1* %[[GEP6]], align 2 + ; CHECK-NEXT: %[[INS6:[0-9]+]] = insertelement <16 x i1> %[[INS5]], i1 %[[LD6]], i32 6 + ; CHECK-NEXT: %[[GEP7:[0-9]+]] = getelementptr inbounds i1, i1* %[[BASE]], i32 7 + ; CHECK-NEXT: %[[LD7:[0-9]+]] = load i1, i1* %[[GEP7]], align 1 + ; CHECK-NEXT: %[[INS7:[0-9]+]] = insertelement <16 x i1> %[[INS6]], i1 %[[LD7]], i32 7 + ; CHECK-NEXT: %[[GEP8:[0-9]+]] = getelementptr inbounds i1, i1* %[[BASE]], i32 8 + ; CHECK-NEXT: %[[LD8:[0-9]+]] = load i1, i1* %[[GEP8]], align 8 + ; CHECK-NEXT: %[[INS8:[0-9]+]] = insertelement <16 x i1> %[[INS7]], i1 %[[LD8]], i32 8 + ; CHECK-NEXT: %[[GEP9:[0-9]+]] = getelementptr inbounds i1, i1* %[[BASE]], i32 9 + ; CHECK-NEXT: %[[LD9:[0-9]+]] = load i1, i1* %[[GEP9]], align 1 + ; CHECK-NEXT: %[[INS9:[0-9]+]] = insertelement <16 x i1> %[[INS8]], i1 %[[LD9]], i32 9 + ; CHECK-NEXT: %[[GEP10:[0-9]+]] = getelementptr inbounds i1, i1* %[[BASE]], i32 10 + ; CHECK-NEXT: %[[LD10:[0-9]+]] = load i1, i1* %[[GEP10]], align 2 + ; CHECK-NEXT: %[[INS10:[0-9]+]] = insertelement <16 x i1> %[[INS9]], i1 %[[LD10]], i32 10 + ; CHECK-NEXT: %[[GEP11:[0-9]+]] = getelementptr inbounds i1, i1* %[[BASE]], i32 11 + ; CHECK-NEXT: %[[LD11:[0-9]+]] = load i1, i1* %[[GEP11]], align 1 + ; CHECK-NEXT: %[[INS11:[0-9]+]] = insertelement <16 x i1> %[[INS10]], i1 %[[LD11]], i32 11 + ; CHECK-NEXT: %[[GEP12:[0-9]+]] = getelementptr inbounds i1, i1* %[[BASE]], i32 12 + ; CHECK-NEXT: %[[LD12:[0-9]+]] = load i1, i1* %[[GEP12]], align 4 + ; CHECK-NEXT: %[[INS12:[0-9]+]] = insertelement <16 x i1> %[[INS11]], i1 %[[LD12]], i32 12 + ; CHECK-NEXT: %[[GEP13:[0-9]+]] = getelementptr inbounds i1, i1* %[[BASE]], i32 13 + ; CHECK-NEXT: %[[LD13:[0-9]+]] = load i1, i1* %[[GEP13]], align 1 + ; CHECK-NEXT: %[[INS13:[0-9]+]] = insertelement <16 x i1> %[[INS12]], i1 %[[LD13]], i32 13 + ; CHECK-NEXT: %[[GEP14:[0-9]+]] = getelementptr inbounds i1, i1* %[[BASE]], i32 14 + ; CHECK-NEXT: %[[LD14:[0-9]+]] = load i1, i1* %[[GEP14]], align 2 + ; CHECK-NEXT: %[[INS14:[0-9]+]] = insertelement <16 x i1> %[[INS13]], i1 %[[LD14]], i32 14 + ; CHECK-NEXT: %[[GEP15:[0-9]+]] = getelementptr inbounds i1, i1* %[[BASE]], i32 15 + ; CHECK-NEXT: %[[LD15:[0-9]+]] = load i1, i1* %[[GEP15]], align 1 + ; CHECK-NEXT: %[[INS15:[0-9]+]] = insertelement <16 x i1> %[[INS14]], i1 %[[LD15]], i32 15 + ; CHECK-NEXT: ret <16 x i1> %[[INS15]] + %loaded = load <16 x i1>, <16 x i1>* %loc + ret <16 x i1> %loaded +} + +define <4 x i32> @test_load_4xi32_align0(<4 x i32>* %loc) { + ; CHECK-LABEL: test_load_4xi32_align0 + ; CHECK-NEXT: %loaded = load <4 x i32>, <4 x i32>* %loc, align 4 + ; CHECK-NEXT: ret <4 x i32> %loaded + %loaded = load <4 x i32>, <4 x i32>* %loc + ret <4 x i32> %loaded +} + +define <4 x i32> @test_load_4xi32_align1(<4 x i32>* %loc) { + ; CHECK-LABEL: test_load_4xi32_align1 + ; CHECK-NEXT: %[[BASE:[0-9]+]] = bitcast <4 x i32>* %loc to i32* + ; CHECK-NEXT: %[[GEP0:[0-9]+]] = getelementptr inbounds i32, i32* %[[BASE]], i32 0 + ; CHECK-NEXT: %[[LD0:[0-9]+]] = load i32, i32* %[[GEP0]], align 1 + ; CHECK-NEXT: %[[INS0:[0-9]+]] = insertelement <4 x i32> undef, i32 %[[LD0]], i32 0 + ; CHECK-NEXT: %[[GEP1:[0-9]+]] = getelementptr inbounds i32, i32* %[[BASE]], i32 1 + ; CHECK-NEXT: %[[LD1:[0-9]+]] = load i32, i32* %[[GEP1]], align 1 + ; CHECK-NEXT: %[[INS1:[0-9]+]] = insertelement <4 x i32> %[[INS0]], i32 %[[LD1]], i32 1 + ; CHECK-NEXT: %[[GEP2:[0-9]+]] = getelementptr inbounds i32, i32* %[[BASE]], i32 2 + ; CHECK-NEXT: %[[LD2:[0-9]+]] = load i32, i32* %[[GEP2]], align 1 + ; CHECK-NEXT: %[[INS2:[0-9]+]] = insertelement <4 x i32> %[[INS1]], i32 %[[LD2]], i32 2 + ; CHECK-NEXT: %[[GEP3:[0-9]+]] = getelementptr inbounds i32, i32* %[[BASE]], i32 3 + ; CHECK-NEXT: %[[LD3:[0-9]+]] = load i32, i32* %[[GEP3]], align 1 + ; CHECK-NEXT: %[[INS3:[0-9]+]] = insertelement <4 x i32> %[[INS2]], i32 %[[LD3]], i32 3 + ; CHECK-NEXT: ret <4 x i32> %[[INS3]] + %loaded = load <4 x i32>, <4 x i32>* %loc, align 1 + ret <4 x i32> %loaded +} + +define <4 x i32> @test_load_4xi32_align2(<4 x i32>* %loc) { + ; CHECK-LABEL: test_load_4xi32_align2 + ; CHECK-NEXT: %[[BASE:[0-9]+]] = bitcast <4 x i32>* %loc to i32* + ; CHECK-NEXT: %[[GEP0:[0-9]+]] = getelementptr inbounds i32, i32* %[[BASE]], i32 0 + ; CHECK-NEXT: %[[LD0:[0-9]+]] = load i32, i32* %[[GEP0]], align 2 + ; CHECK-NEXT: %[[INS0:[0-9]+]] = insertelement <4 x i32> undef, i32 %[[LD0]], i32 0 + ; CHECK-NEXT: %[[GEP1:[0-9]+]] = getelementptr inbounds i32, i32* %[[BASE]], i32 1 + ; CHECK-NEXT: %[[LD1:[0-9]+]] = load i32, i32* %[[GEP1]], align 2 + ; CHECK-NEXT: %[[INS1:[0-9]+]] = insertelement <4 x i32> %[[INS0]], i32 %[[LD1]], i32 1 + ; CHECK-NEXT: %[[GEP2:[0-9]+]] = getelementptr inbounds i32, i32* %[[BASE]], i32 2 + ; CHECK-NEXT: %[[LD2:[0-9]+]] = load i32, i32* %[[GEP2]], align 2 + ; CHECK-NEXT: %[[INS2:[0-9]+]] = insertelement <4 x i32> %[[INS1]], i32 %[[LD2]], i32 2 + ; CHECK-NEXT: %[[GEP3:[0-9]+]] = getelementptr inbounds i32, i32* %[[BASE]], i32 3 + ; CHECK-NEXT: %[[LD3:[0-9]+]] = load i32, i32* %[[GEP3]], align 2 + ; CHECK-NEXT: %[[INS3:[0-9]+]] = insertelement <4 x i32> %[[INS2]], i32 %[[LD3]], i32 3 + ; CHECK-NEXT: ret <4 x i32> %[[INS3]] + %loaded = load <4 x i32>, <4 x i32>* %loc, align 2 + ret <4 x i32> %loaded +} + +define <4 x i32> @test_load_4xi32_align4(<4 x i32>* %loc) { + ; CHECK-LABEL: test_load_4xi32_align4 + ; CHECK-NEXT: %loaded = load <4 x i32>, <4 x i32>* %loc, align 4 + ; CHECK-NEXT: ret <4 x i32> %loaded + %loaded = load <4 x i32>, <4 x i32>* %loc, align 4 + ret <4 x i32> %loaded +} + +define <4 x i32> @test_load_4xi32_align8(<4 x i32>* %loc) { + ; CHECK-LABEL: test_load_4xi32_align8 + ; CHECK-NEXT: %loaded = load <4 x i32>, <4 x i32>* %loc, align 4 + ; CHECK-NEXT: ret <4 x i32> %loaded + %loaded = load <4 x i32>, <4 x i32>* %loc, align 8 + ret <4 x i32> %loaded +} + +define <4 x i32> @test_load_4xi32_align16(<4 x i32>* %loc) { + ; CHECK-LABEL: test_load_4xi32_align16 + ; CHECK-NEXT: %loaded = load <4 x i32>, <4 x i32>* %loc, align 4 + ; CHECK-NEXT: ret <4 x i32> %loaded + %loaded = load <4 x i32>, <4 x i32>* %loc, align 16 + ret <4 x i32> %loaded +} + +define <4 x i32> @test_load_4xi32_align32(<4 x i32>* %loc) { + ; CHECK-LABEL: test_load_4xi32_align32 + ; CHECK-NEXT: %loaded = load <4 x i32>, <4 x i32>* %loc, align 4 + ; CHECK-NEXT: ret <4 x i32> %loaded + %loaded = load <4 x i32>, <4 x i32>* %loc, align 32 + ret <4 x i32> %loaded +} + +define <4 x float> @test_load_4xfloat_align0(<4 x float>* %loc) { + ; CHECK-LABEL: test_load_4xfloat_align0 + ; CHECK-NEXT: %loaded = load <4 x float>, <4 x float>* %loc, align 4 + ; CHECK-NEXT: ret <4 x float> %loaded + %loaded = load <4 x float>, <4 x float>* %loc + ret <4 x float> %loaded +} + +define <4 x float> @test_load_4xfloat_align2(<4 x float>* %loc) { + ; CHECK-LABEL: test_load_4xfloat_align2 + ; CHECK-NEXT: %[[BASE:[0-9]+]] = bitcast <4 x float>* %loc to float* + ; CHECK-NEXT: %[[GEP0:[0-9]+]] = getelementptr inbounds float, float* %[[BASE]], i32 0 + ; CHECK-NEXT: %[[LD0:[0-9]+]] = load float, float* %[[GEP0]], align 2 + ; CHECK-NEXT: %[[INS0:[0-9]+]] = insertelement <4 x float> undef, float %[[LD0]], i32 0 + ; CHECK-NEXT: %[[GEP1:[0-9]+]] = getelementptr inbounds float, float* %[[BASE]], i32 1 + ; CHECK-NEXT: %[[LD1:[0-9]+]] = load float, float* %[[GEP1]], align 2 + ; CHECK-NEXT: %[[INS1:[0-9]+]] = insertelement <4 x float> %[[INS0]], float %[[LD1]], i32 1 + ; CHECK-NEXT: %[[GEP2:[0-9]+]] = getelementptr inbounds float, float* %[[BASE]], i32 2 + ; CHECK-NEXT: %[[LD2:[0-9]+]] = load float, float* %[[GEP2]], align 2 + ; CHECK-NEXT: %[[INS2:[0-9]+]] = insertelement <4 x float> %[[INS1]], float %[[LD2]], i32 2 + ; CHECK-NEXT: %[[GEP3:[0-9]+]] = getelementptr inbounds float, float* %[[BASE]], i32 3 + ; CHECK-NEXT: %[[LD3:[0-9]+]] = load float, float* %[[GEP3]], align 2 + ; CHECK-NEXT: %[[INS3:[0-9]+]] = insertelement <4 x float> %[[INS2]], float %[[LD3]], i32 3 + ; CHECK-NEXT: ret <4 x float> %[[INS3]] + %loaded = load <4 x float>, <4 x float>* %loc, align 2 + ret <4 x float> %loaded +} + +define <4 x float> @test_load_4xfloat_align4(<4 x float>* %loc) { + ; CHECK-LABEL: test_load_4xfloat_align4 + ; CHECK-NEXT: %loaded = load <4 x float>, <4 x float>* %loc, align 4 + ; CHECK-NEXT: ret <4 x float> %loaded + %loaded = load <4 x float>, <4 x float>* %loc, align 4 + ret <4 x float> %loaded +} + +define <8 x i16> @test_load_8xi16_align0(<8 x i16>* %loc) { + ; CHECK-LABEL: test_load_8xi16_align0 + ; CHECK-NEXT: %loaded = load <8 x i16>, <8 x i16>* %loc, align 2 + ; CHECK-NEXT: ret <8 x i16> %loaded + %loaded = load <8 x i16>, <8 x i16>* %loc + ret <8 x i16> %loaded +} + +define <8 x i16> @test_load_8xi16_align1(<8 x i16>* %loc) { + ; CHECK-LABEL: test_load_8xi16_align1 + ; CHECK-NEXT: %[[BASE:[0-9]+]] = bitcast <8 x i16>* %loc to i16* + ; CHECK-NEXT: %[[GEP0:[0-9]+]] = getelementptr inbounds i16, i16* %[[BASE]], i32 0 + ; CHECK-NEXT: %[[LD0:[0-9]+]] = load i16, i16* %[[GEP0]], align 1 + ; CHECK-NEXT: %[[INS0:[0-9]+]] = insertelement <8 x i16> undef, i16 %[[LD0]], i32 0 + ; CHECK-NEXT: %[[GEP1:[0-9]+]] = getelementptr inbounds i16, i16* %[[BASE]], i32 1 + ; CHECK-NEXT: %[[LD1:[0-9]+]] = load i16, i16* %[[GEP1]], align 1 + ; CHECK-NEXT: %[[INS1:[0-9]+]] = insertelement <8 x i16> %[[INS0]], i16 %[[LD1]], i32 1 + ; CHECK-NEXT: %[[GEP2:[0-9]+]] = getelementptr inbounds i16, i16* %[[BASE]], i32 2 + ; CHECK-NEXT: %[[LD2:[0-9]+]] = load i16, i16* %[[GEP2]], align 1 + ; CHECK-NEXT: %[[INS2:[0-9]+]] = insertelement <8 x i16> %[[INS1]], i16 %[[LD2]], i32 2 + ; CHECK-NEXT: %[[GEP3:[0-9]+]] = getelementptr inbounds i16, i16* %[[BASE]], i32 3 + ; CHECK-NEXT: %[[LD3:[0-9]+]] = load i16, i16* %[[GEP3]], align 1 + ; CHECK-NEXT: %[[INS3:[0-9]+]] = insertelement <8 x i16> %[[INS2]], i16 %[[LD3]], i32 3 + ; CHECK-NEXT: %[[GEP4:[0-9]+]] = getelementptr inbounds i16, i16* %[[BASE]], i32 4 + ; CHECK-NEXT: %[[LD4:[0-9]+]] = load i16, i16* %[[GEP4]], align 1 + ; CHECK-NEXT: %[[INS4:[0-9]+]] = insertelement <8 x i16> %[[INS3]], i16 %[[LD4]], i32 4 + ; CHECK-NEXT: %[[GEP5:[0-9]+]] = getelementptr inbounds i16, i16* %[[BASE]], i32 5 + ; CHECK-NEXT: %[[LD5:[0-9]+]] = load i16, i16* %[[GEP5]], align 1 + ; CHECK-NEXT: %[[INS5:[0-9]+]] = insertelement <8 x i16> %[[INS4]], i16 %[[LD5]], i32 5 + ; CHECK-NEXT: %[[GEP6:[0-9]+]] = getelementptr inbounds i16, i16* %[[BASE]], i32 6 + ; CHECK-NEXT: %[[LD6:[0-9]+]] = load i16, i16* %[[GEP6]], align 1 + ; CHECK-NEXT: %[[INS6:[0-9]+]] = insertelement <8 x i16> %[[INS5]], i16 %[[LD6]], i32 6 + ; CHECK-NEXT: %[[GEP7:[0-9]+]] = getelementptr inbounds i16, i16* %[[BASE]], i32 7 + ; CHECK-NEXT: %[[LD7:[0-9]+]] = load i16, i16* %[[GEP7]], align 1 + ; CHECK-NEXT: %[[INS7:[0-9]+]] = insertelement <8 x i16> %[[INS6]], i16 %[[LD7]], i32 7 + ; CHECK-NEXT: ret <8 x i16> %[[INS7]] + %loaded = load <8 x i16>, <8 x i16>* %loc, align 1 + ret <8 x i16> %loaded +} + +define <8 x i16> @test_load_8xi16_align2(<8 x i16>* %loc) { + ; CHECK-LABEL: test_load_8xi16_align2 + ; CHECK-NEXT: %loaded = load <8 x i16>, <8 x i16>* %loc, align 2 + ; CHECK-NEXT: ret <8 x i16> %loaded + %loaded = load <8 x i16>, <8 x i16>* %loc, align 2 + ret <8 x i16> %loaded +} + +define <16 x i8> @test_load_16xi8_align0(<16 x i8>* %loc) { + ; CHECK-LABEL: test_load_16xi8_align0 + ; CHECK-NEXT: %loaded = load <16 x i8>, <16 x i8>* %loc, align 1 + ; CHECK-NEXT: ret <16 x i8> %loaded + %loaded = load <16 x i8>, <16 x i8>* %loc + ret <16 x i8> %loaded +} + +define <16 x i8> @test_load_16xi8_align1(<16 x i8>* %loc) { + ; CHECK-LABEL: test_load_16xi8_align1 + ; CHECK-NEXT: %loaded = load <16 x i8>, <16 x i8>* %loc, align 1 + ; CHECK-NEXT: ret <16 x i8> %loaded + %loaded = load <16 x i8>, <16 x i8>* %loc, align 1 + ret <16 x i8> %loaded +} + + +; Store ======================================================================== + +define void @test_store_4xi1(<4 x i1> %val, <4 x i1>* %loc) { + ; CHECK-LABEL: test_store_4xi1 + ; CHECK-NEXT: %[[BASE:[0-9]+]] = bitcast <4 x i1>* %loc to i1* + ; CHECK-NEXT: %[[GEP0:[0-9]+]] = getelementptr inbounds i1, i1* %[[BASE]], i32 0 + ; CHECK-NEXT: %[[EXT0:[0-9]+]] = extractelement <4 x i1> %val, i32 0 + ; CHECK-NEXT: store i1 %[[EXT0]], i1* %[[GEP0]], align 4 + ; CHECK-NEXT: %[[GEP1:[0-9]+]] = getelementptr inbounds i1, i1* %[[BASE]], i32 1 + ; CHECK-NEXT: %[[EXT1:[0-9]+]] = extractelement <4 x i1> %val, i32 1 + ; CHECK-NEXT: store i1 %[[EXT1]], i1* %[[GEP1]], align 1 + ; CHECK-NEXT: %[[GEP2:[0-9]+]] = getelementptr inbounds i1, i1* %[[BASE]], i32 2 + ; CHECK-NEXT: %[[EXT2:[0-9]+]] = extractelement <4 x i1> %val, i32 2 + ; CHECK-NEXT: store i1 %[[EXT2]], i1* %[[GEP2]], align 2 + ; CHECK-NEXT: %[[GEP3:[0-9]+]] = getelementptr inbounds i1, i1* %[[BASE]], i32 3 + ; CHECK-NEXT: %[[EXT3:[0-9]+]] = extractelement <4 x i1> %val, i32 3 + ; CHECK-NEXT: store i1 %[[EXT3]], i1* %[[GEP3]], align 1 + ; CHECK-NEXT: ret void + store <4 x i1> %val, <4 x i1>* %loc + ret void +} + +define void @test_store_4xi32_align0(<4 x i32> %val, <4 x i32>* %loc) { + ; CHECK-LABEL: test_store_4xi32_align0 + ; CHECK-NEXT: store <4 x i32> %val, <4 x i32>* %loc, align 4 + ; CHECK-NEXT: ret void + store <4 x i32> %val, <4 x i32>* %loc + ret void +} + +define void @test_store_4xi32_align1(<4 x i32> %val, <4 x i32>* %loc) { + ; CHECK-LABEL: test_store_4xi32_align1 + ; CHECK-NEXT: %[[BASE:[0-9]+]] = bitcast <4 x i32>* %loc to i32* + ; CHECK-NEXT: %[[GEP0:[0-9]+]] = getelementptr inbounds i32, i32* %[[BASE]], i32 0 + ; CHECK-NEXT: %[[EXT0:[0-9]+]] = extractelement <4 x i32> %val, i32 0 + ; CHECK-NEXT: store i32 %[[EXT0]], i32* %[[GEP0]], align 1 + ; CHECK-NEXT: %[[GEP1:[0-9]+]] = getelementptr inbounds i32, i32* %[[BASE]], i32 1 + ; CHECK-NEXT: %[[EXT1:[0-9]+]] = extractelement <4 x i32> %val, i32 1 + ; CHECK-NEXT: store i32 %[[EXT1]], i32* %[[GEP1]], align 1 + ; CHECK-NEXT: %[[GEP2:[0-9]+]] = getelementptr inbounds i32, i32* %[[BASE]], i32 2 + ; CHECK-NEXT: %[[EXT2:[0-9]+]] = extractelement <4 x i32> %val, i32 2 + ; CHECK-NEXT: store i32 %[[EXT2]], i32* %[[GEP2]], align 1 + ; CHECK-NEXT: %[[GEP3:[0-9]+]] = getelementptr inbounds i32, i32* %[[BASE]], i32 3 + ; CHECK-NEXT: %[[EXT3:[0-9]+]] = extractelement <4 x i32> %val, i32 3 + ; CHECK-NEXT: store i32 %[[EXT3]], i32* %[[GEP3]], align 1 + ; CHECK-NEXT: ret void + store <4 x i32> %val, <4 x i32>* %loc, align 1 + ret void +} + +define void @test_store_4xi32_align2(<4 x i32> %val, <4 x i32>* %loc) { + ; CHECK-LABEL: test_store_4xi32_align2 + ; CHECK-NEXT: %[[BASE:[0-9]+]] = bitcast <4 x i32>* %loc to i32* + ; CHECK-NEXT: %[[GEP0:[0-9]+]] = getelementptr inbounds i32, i32* %[[BASE]], i32 0 + ; CHECK-NEXT: %[[EXT0:[0-9]+]] = extractelement <4 x i32> %val, i32 0 + ; CHECK-NEXT: store i32 %[[EXT0]], i32* %[[GEP0]], align 2 + ; CHECK-NEXT: %[[GEP1:[0-9]+]] = getelementptr inbounds i32, i32* %[[BASE]], i32 1 + ; CHECK-NEXT: %[[EXT1:[0-9]+]] = extractelement <4 x i32> %val, i32 1 + ; CHECK-NEXT: store i32 %[[EXT1]], i32* %[[GEP1]], align 2 + ; CHECK-NEXT: %[[GEP2:[0-9]+]] = getelementptr inbounds i32, i32* %[[BASE]], i32 2 + ; CHECK-NEXT: %[[EXT2:[0-9]+]] = extractelement <4 x i32> %val, i32 2 + ; CHECK-NEXT: store i32 %[[EXT2]], i32* %[[GEP2]], align 2 + ; CHECK-NEXT: %[[GEP3:[0-9]+]] = getelementptr inbounds i32, i32* %[[BASE]], i32 3 + ; CHECK-NEXT: %[[EXT3:[0-9]+]] = extractelement <4 x i32> %val, i32 3 + ; CHECK-NEXT: store i32 %[[EXT3]], i32* %[[GEP3]], align 2 + ; CHECK-NEXT: ret void + store <4 x i32> %val, <4 x i32>* %loc, align 2 + ret void +} + +define void @test_store_4xi32_align4(<4 x i32> %val, <4 x i32>* %loc) { + ; CHECK-LABEL: test_store_4xi32_align4 + ; CHECK-NEXT: store <4 x i32> %val, <4 x i32>* %loc, align 4 + ; CHECK-NEXT: ret void + store <4 x i32> %val, <4 x i32>* %loc, align 4 + ret void +} + +define void @test_store_4xi32_align8(<4 x i32> %val, <4 x i32>* %loc) { + ; CHECK-LABEL: test_store_4xi32_align8 + ; CHECK-NEXT: store <4 x i32> %val, <4 x i32>* %loc, align 4 + ; CHECK-NEXT: ret void + store <4 x i32> %val, <4 x i32>* %loc, align 8 + ret void +} + +define void @test_store_4xi32_align16(<4 x i32> %val, <4 x i32>* %loc) { + ; CHECK-LABEL: test_store_4xi32_align16 + ; CHECK-NEXT: store <4 x i32> %val, <4 x i32>* %loc, align 4 + ; CHECK-NEXT: ret void + store <4 x i32> %val, <4 x i32>* %loc, align 16 + ret void +} + +define void @test_store_4xi32_align32(<4 x i32> %val, <4 x i32>* %loc) { + ; CHECK-LABEL: test_store_4xi32_align32 + ; CHECK-NEXT: store <4 x i32> %val, <4 x i32>* %loc, align 4 + ; CHECK-NEXT: ret void + store <4 x i32> %val, <4 x i32>* %loc, align 32 + ret void +} + +define void @test_store_4xfloat_align0(<4 x float> %val, <4 x float>* %loc) { + ; CHECK-LABEL: test_store_4xfloat_align0 + ; CHECK-NEXT: store <4 x float> %val, <4 x float>* %loc, align 4 + ; CHECK-NEXT: ret void + store <4 x float> %val, <4 x float>* %loc + ret void +} + + +; Volatile ===================================================================== + +define <4 x i32> @test_volatile_load_4xi32_align0(<4 x i32>* %loc) { + ; CHECK-LABEL: test_volatile_load_4xi32_align0 + ; CHECK-NEXT: %loaded = load volatile <4 x i32>, <4 x i32>* %loc, align 4 + ; CHECK-NEXT: ret <4 x i32> %loaded + %loaded = load volatile <4 x i32>, <4 x i32>* %loc + ret <4 x i32> %loaded +} + +define <4 x i32> @test_volatile_load_4xi32_align4(<4 x i32>* %loc) { + ; CHECK-LABEL: test_volatile_load_4xi32_align4 + ; CHECK-NEXT: %loaded = load volatile <4 x i32>, <4 x i32>* %loc, align 4 + ; CHECK-NEXT: ret <4 x i32> %loaded + %loaded = load volatile <4 x i32>, <4 x i32>* %loc, align 4 + ret <4 x i32> %loaded +} + +define void @test_volatile_store_4xi32_align0(<4 x i32> %val, <4 x i32>* %loc) { + ; CHECK-LABEL: test_volatile_store_4xi32_align0 + ; CHECK-NEXT: store volatile <4 x i32> %val, <4 x i32>* %loc, align 4 + ; CHECK-NEXT: ret void + store volatile <4 x i32> %val, <4 x i32>* %loc + ret void +} diff --git a/test/Transforms/NaCl/flatten-globals.ll b/test/Transforms/NaCl/flatten-globals.ll new file mode 100644 index 000000000000..fa62104ae299 --- /dev/null +++ b/test/Transforms/NaCl/flatten-globals.ll @@ -0,0 +1,209 @@ +; RUN: opt -flatten-globals %s -S | FileCheck %s +; RUN: opt -flatten-globals %s -S | FileCheck %s -check-prefix=CLEANED + +target datalayout = "p:32:32:32" + + +; Check simple cases + +@var_i32 = global i32 258 +; CHECK: @var_i32 = global [4 x i8] c"\02\01\00\00" +; CLEANED-NOT: global i32 258 + +@external_var = external global i32 +; CHECK: @external_var = external global [4 x i8] + +@zero_init = global i32 0 +; CHECK: @zero_init = global [4 x i8] zeroinitializer + +@big_zero_init = global [2000 x i8] zeroinitializer +; CHECK: @big_zero_init = global [2000 x i8] zeroinitializer + +@null_ptr = global i32* null +; CHECK: @null_ptr = global [4 x i8] zeroinitializer + +@undef_value = global i32 undef +; CHECK: @undef_value = global [4 x i8] zeroinitializer + +%opaque = type opaque +@opaque_extern = external global %opaque +; CHECK: @opaque_extern = external global [0 x i8] + + +; Check various data types + +@var_i1 = global i8 1 +; CHECK: @var_i1 = global [1 x i8] c"\01" + +@var_i8 = global i8 65 +; CHECK: @var_i8 = global [1 x i8] c"A" + +@var_i16 = global i16 258 +; CHECK: @var_i16 = global [2 x i8] c"\02\01" + +@var_i64 = global i64 72623859790382856 +; CHECK: @var_i64 = global [8 x i8] c"\08\07\06\05\04\03\02\01" + +@var_i128 = global i128 1339673755198158349044581307228491536 +; CHECK: @var_i128 = global [16 x i8] c"\10\0F\0E\0D\0C\0B\0A\09\08\07\06\05\04\03\02\01" + +; Check that padding bits come out as zero. +@var_i121 = global i121 1339673755198158349044581307228491536 +; CHECK: @var_i121 = global [16 x i8] c"\10\0F\0E\0D\0C\0B\0A\09\08\07\06\05\04\03\02\01" + +@var_double = global double 123.456 +; CHECK: @var_double = global [8 x i8] c"w\BE\9F\1A/\DD^@" + +@var_float = global float 123.0 +; CHECK: @var_float = global [4 x i8] c"\00\00\F6B" + + +; Check aggregates + +@padded_struct = global { i8, i8, i32 } { i8 65, i8 66, i32 258 } +; CHECK: @padded_struct = global [8 x i8] c"AB\00\00\02\01\00\00" + +@packed_struct = global <{ i8, i8, i32 }> <{ i8 67, i8 68, i32 258 }> +; CHECK: @packed_struct = global [6 x i8] c"CD\02\01\00\00" + +@i8_array = global [6 x i8] c"Hello\00" +; CHECK: @i8_array = global [6 x i8] c"Hello\00" + +@i16_array = global [3 x i16] [ i16 1, i16 2, i16 3 ] +; CHECK: @i16_array = global [6 x i8] c"\01\00\02\00\03\00" + +%s = type { i8, i8 } +@struct_array = global [2 x %s] [%s { i8 1, i8 2 }, %s { i8 3, i8 4 }] +; CHECK: @struct_array = global [4 x i8] c"\01\02\03\04" + +@vector = global <2 x i32> +; CHECK: @vector = global [8 x i8] c"\03\01\00\00\08\02\00\00" + + +; Check that various attributes are preserved + +@constant_var = constant i32 259 +; CHECK: @constant_var = constant [4 x i8] c"\03\01\00\00" + +@weak_external_var = extern_weak global i32 +; CHECK: @weak_external_var = extern_weak global [4 x i8] + +@tls_var = external thread_local global i32 +; CHECK: @tls_var = external thread_local global [4 x i8] + +@aligned_var = global i32 260, align 8 +; CHECK: @aligned_var = global [4 x i8] c"\04\01\00\00", align 8 + + +; Check alignment handling + +@implicit_alignment_i32 = global i32 zeroinitializer +; CHECK: @implicit_alignment_i32 = global [4 x i8] zeroinitializer, align 4 + +@implicit_alignment_double = global double zeroinitializer +; CHECK: @implicit_alignment_double = global [8 x i8] zeroinitializer, align 8 + +@implicit_alignment_vector = global <16 x i8> zeroinitializer +; CHECK: @implicit_alignment_vector = global [16 x i8] zeroinitializer, align 16 + +; FlattenGlobals is not allowed to increase the alignment of the +; variable when an explicit section is specified (although PNaCl does +; not support this attribute). +@lower_alignment_section = global i32 0, section "mysection", align 1 +; CHECK: @lower_alignment_section = global [4 x i8] zeroinitializer, section "mysection", align 1 + +; FlattenGlobals could increase the alignment when no section is +; specified, but it does not. +@lower_alignment = global i32 0, align 1 +; CHECK: @lower_alignment = global [4 x i8] zeroinitializer, align 1 + + +; Check handling of global references + +@var1 = external global i32 +@var2 = external global i8 + +%ptrs1 = type { i32*, i8*, i32 } +@ptrs1 = global %ptrs1 { i32* @var1, i8* null, i32 259 } +; CHECK: @ptrs1 = global <{ i32, [8 x i8] }> <{ i32 ptrtoint ([4 x i8]* @var1 to i32), [8 x i8] c"\00\00\00\00\03\01\00\00" }> + +%ptrs2 = type { i32, i32*, i8* } +@ptrs2 = global %ptrs2 { i32 259, i32* @var1, i8* @var2 } +; CHECK: @ptrs2 = global <{ [4 x i8], i32, i32 }> <{ [4 x i8] c"\03\01\00\00", i32 ptrtoint ([4 x i8]* @var1 to i32), i32 ptrtoint ([1 x i8]* @var2 to i32) }> + +%ptrs3 = type { i32*, [3 x i8], i8* } +@ptrs3 = global %ptrs3 { i32* @var1, [3 x i8] c"foo", i8* @var2 } +; CHECK: @ptrs3 = global <{ i32, [4 x i8], i32 }> <{ i32 ptrtoint ([4 x i8]* @var1 to i32), [4 x i8] c"foo\00", i32 ptrtoint ([1 x i8]* @var2 to i32) }> + +@ptr = global i32* @var1 +; CHECK: @ptr = global i32 ptrtoint ([4 x i8]* @var1 to i32) + +@func_ptr = global i32* ()* @get_address +; CHECK: @func_ptr = global i32 ptrtoint (i32* ()* @get_address to i32) + +@block_addr = global i8* blockaddress(@func_with_block, %label) +; CHECK: @block_addr = global i32 ptrtoint (i8* blockaddress(@func_with_block, %label) to i32) + +@vector_reloc = global <2 x i32*> +; CHECK: global <{ i32, i32 }> <{ i32 ptrtoint ([4 x i8]* @var1 to i32), i32 ptrtoint ([4 x i8]* @var1 to i32) }> + + +; Global references with addends + +@reloc_addend = global i32* getelementptr (%ptrs1, %ptrs1* @ptrs1, i32 0, i32 2) +; CHECK: @reloc_addend = global i32 add (i32 ptrtoint (<{ i32, [8 x i8] }>* @ptrs1 to i32), i32 8) + +@negative_addend = global %ptrs1* getelementptr (%ptrs1, %ptrs1* @ptrs1, i32 -1) +; CHECK: @negative_addend = global i32 add (i32 ptrtoint (<{ i32, [8 x i8] }>* @ptrs1 to i32), i32 -12) + +@const_ptr = global i32* getelementptr (%ptrs1, %ptrs1* null, i32 0, i32 2) +; CHECK: @const_ptr = global [4 x i8] c"\08\00\00\00" + +@int_to_ptr = global i32* inttoptr (i16 260 to i32*) +; CHECK: @int_to_ptr = global [4 x i8] c"\04\01\00\00" + +; Clang allows "(uintptr_t) &var" as a global initializer, so we +; handle this case. +@ptr_to_int = global i32 ptrtoint (i8* @var2 to i32) +; CHECK: @ptr_to_int = global i32 ptrtoint ([1 x i8]* @var2 to i32) + +; This is handled via Constant folding. The getelementptr is +; converted to an undef when it is created, so the pass does not see a +; getelementptr here. +@undef_gep = global i32* getelementptr (%ptrs1, %ptrs1* undef, i32 0, i32 2) +; CHECK: @undef_gep = global [4 x i8] zeroinitializer + +; Adding an offset to a function address isn't useful, but check that +; the pass handles it anyway. +@func_addend = global i8* getelementptr ( + i8, + i8* bitcast (void ()* @func_with_block to i8*), i32 123) +; CHECK: @func_addend = global i32 add (i32 ptrtoint (void ()* @func_with_block to i32), i32 123) + +; Similarly, adding an offset to a label address isn't useful, but +; check it anyway. +@block_addend = global i8* getelementptr ( + i8, + i8* blockaddress(@func_with_block, %label), i32 100) +; CHECK: @block_addend = global i32 add (i32 ptrtoint (i8* blockaddress(@func_with_block, %label) to i32), i32 100) + + +; Special cases + +; Leave vars with "appending" linkage alone. +@appending = appending global [1 x i32*] [i32* @var1] +; CHECK: @appending = appending global [1 x i32*] [i32* bitcast ([4 x i8]* @var1 to i32*)] + + +define i32* @get_address() { + ret i32* @var_i32 +} +; CHECK: define i32* @get_address() { +; CHECK-NEXT: ret i32* bitcast ([4 x i8]* @var_i32 to i32*) + + +define void @func_with_block() { + br label %label +label: + ret void +} diff --git a/test/Transforms/NaCl/globalcleanup.ll b/test/Transforms/NaCl/globalcleanup.ll new file mode 100644 index 000000000000..57c814d8522e --- /dev/null +++ b/test/Transforms/NaCl/globalcleanup.ll @@ -0,0 +1,59 @@ +; RUN: opt < %s -nacl-global-cleanup -S | FileCheck %s +; RUN: opt < %s -nacl-global-cleanup -S | FileCheck -check-prefix=GV %s + +@a = global i8 42 + +@llvm.compiler.used = appending global [1 x i8*] [i8* @a], section "llvm.metadata" +; GV-NOT: llvm.compiler.used + +@llvm.used = appending global [1 x i8*] [i8* @a], section "llvm.metadata" +; The used list remains unchanged. +; CHECK: llvm.used + +@extern_weak_const = extern_weak constant i32 +@extern_weak_gv = extern_weak global i32 + +; GV-NOT: @extern_weak_const +; GV-NOT: @extern_weak_gv + +; CHECK: @weak_gv = internal global +@weak_gv = weak global i32 0 + +; CHECK: define void @_start +define void @_start() { + ret void +} + +define i32* @ewgv() { +; CHECK: %bc = getelementptr i8, i8* null, i32 0 + %bc = getelementptr i8, i8* bitcast (i32* @extern_weak_gv to i8*), i32 0 +; CHECK: ret i32* null + ret i32* @extern_weak_gv +} + +define i32* @ewc() { +; CHECK: %bc = getelementptr i8, i8* null, i32 0 + %bc = getelementptr i8, i8* bitcast (i32* @extern_weak_const to i8*), i32 0 +; CHECK: ret i32* null + ret i32* @extern_weak_gv +} + +; Make sure @weak_gv is actually used. +define i32* @wgv() { +; CHECK: ret i32* @weak_gv + ret i32* @weak_gv +} + +; GV-NOT: @extern_weak_func +declare extern_weak i32 @extern_weak_func() +; CHECK: @ewf +define i32 @ewf() { +; CHECK: %ret = call i32 null() + %ret = call i32 @extern_weak_func() + ret i32 %ret +} + +; CHECK: define internal void @weak_func +define weak void @weak_func() { + ret void +} diff --git a/test/Transforms/NaCl/globalize-constant-vectors.ll b/test/Transforms/NaCl/globalize-constant-vectors.ll new file mode 100644 index 000000000000..a77fb7c2214d --- /dev/null +++ b/test/Transforms/NaCl/globalize-constant-vectors.ll @@ -0,0 +1,204 @@ +; RUN: opt -globalize-constant-vectors %s -S | FileCheck %s +; RUN: opt -globalize-constant-vectors %s -S | FileCheck -check-prefix=C4xi1 %s +; RUN: opt -globalize-constant-vectors %s -S | FileCheck -check-prefix=C8xi1 %s +; RUN: opt -globalize-constant-vectors %s -S | FileCheck -check-prefix=C16xi1 %s +; RUN: opt -globalize-constant-vectors %s -S | FileCheck -check-prefix=C16xi8 %s +; RUN: opt -globalize-constant-vectors %s -S | FileCheck -check-prefix=C8xi16 %s +; RUN: opt -globalize-constant-vectors %s -S | FileCheck -check-prefix=C4xi32 %s +; RUN: opt -globalize-constant-vectors %s -S | FileCheck -check-prefix=C4xfloat %s +; RUN: opt -globalize-constant-vectors %s -S | FileCheck -check-prefix=Cbranch %s +; RUN: opt -globalize-constant-vectors %s -S | FileCheck -check-prefix=Cduplicate %s +; RUN: opt -globalize-constant-vectors %s -S | FileCheck -check-prefix=Czeroinitializer %s +; RUN: opt -expand-constant-expr -globalize-constant-vectors %s -S | FileCheck -check-prefix=Cnestedconst %s + +; Run the test once per function so that each check can look at its +; globals as well as its function. + +; The datalayout is needed to determine the alignment of the globals. +target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:32" + +; Globals shouldn't get globalized. +; CHECK: @global_should_stay_untouched = internal constant <4 x i32> +@global_should_stay_untouched = internal constant <4 x i32> + +; Also test a global initializer with nested const-exprs. +; NOTE: Have the global share the same const-expr as an instruction below. +; CHECK: @global_with_nesting = internal global <{ <4 x i32>, <8 x i16> }> <{ <4 x i32> , <8 x i16> }> +@global_with_nesting = internal global <{ <4 x i32>, <8 x i16> }> <{ <4 x i32> , <8 x i16> }> + +; 4xi1 vectors should get globalized. +define void @test4xi1(<4 x i1> %in) { + %ft0 = and <4 x i1> %in, + %ft1 = and <4 x i1> , %in + ret void +} +; C4xi1: @[[C1:[_a-z0-9]+]] = internal unnamed_addr constant <4 x i1> , align 4 +; C4xi1: @[[C2:[_a-z0-9]+]] = internal unnamed_addr constant <4 x i1> , align 4 +; C4xi1: define void @test4xi1(<4 x i1> %in) { +; C4xi1-NEXT: %[[M1:[_a-z0-9]+]] = load <4 x i1>, <4 x i1>* @[[C1]], align 4 +; C4xi1-NEXT: %[[M2:[_a-z0-9]+]] = load <4 x i1>, <4 x i1>* @[[C2]], align 4 +; C4xi1-NEXT: %ft0 = and <4 x i1> %in, %[[M1]] +; C4xi1-NEXT: %ft1 = and <4 x i1> %[[M2]], %in +; C4xi1-NEXT: ret void + +; 8xi1 vectors should get globalized. +define void @test8xi1(<8 x i1> %in) { + %ft0 = and <8 x i1> %in, + %ft1 = and <8 x i1> , %in + ret void +} +; C8xi1: @[[C1:[_a-z0-9]+]] = internal unnamed_addr constant <8 x i1> , align 8 +; C8xi1: @[[C2:[_a-z0-9]+]] = internal unnamed_addr constant <8 x i1> , align 8 +; C8xi1: define void @test8xi1(<8 x i1> %in) { +; C8xi1-NEXT: %[[M1:[_a-z0-9]+]] = load <8 x i1>, <8 x i1>* @[[C1]], align 8 +; C8xi1-NEXT: %[[M2:[_a-z0-9]+]] = load <8 x i1>, <8 x i1>* @[[C2]], align 8 +; C8xi1-NEXT: %ft0 = and <8 x i1> %in, %[[M1]] +; C8xi1-NEXT: %ft1 = and <8 x i1> %[[M2]], %in +; C8xi1-NEXT: ret void + +; 16xi1 vectors should get globalized. +define void @test16xi1(<16 x i1> %in) { + %ft0 = and <16 x i1> %in, + %ft1 = and <16 x i1> , %in + ret void +} +; C16xi1: @[[C1:[_a-z0-9]+]] = internal unnamed_addr constant <16 x i1> , align 16 +; C16xi1: @[[C2:[_a-z0-9]+]] = internal unnamed_addr constant <16 x i1> , align 16 +; C16xi1: define void @test16xi1(<16 x i1> %in) { +; C16xi1-NEXT: %[[M1:[_a-z0-9]+]] = load <16 x i1>, <16 x i1>* @[[C1]], align 16 +; C16xi1-NEXT: %[[M2:[_a-z0-9]+]] = load <16 x i1>, <16 x i1>* @[[C2]], align 16 +; C16xi1-NEXT: %ft0 = and <16 x i1> %in, %[[M1]] +; C16xi1-NEXT: %ft1 = and <16 x i1> %[[M2]], %in +; C16xi1-NEXT: ret void + +; 16xi8 vectors should get globalized. +define void @test16xi8(<16 x i8> %in) { + %nonsquares = add <16 x i8> %in, + %sort = add <16 x i8> , %in + ret void +} +; C16xi8: @[[C1:[_a-z0-9]+]] = internal unnamed_addr constant <16 x i8> , align 4 +; C16xi8: @[[C2:[_a-z0-9]+]] = internal unnamed_addr constant <16 x i8> , align 4 +; C16xi8: define void @test16xi8(<16 x i8> %in) { +; C16xi8-NEXT: %[[M1:[_a-z0-9]+]] = load <16 x i8>, <16 x i8>* @[[C1]], align 4 +; C16xi8-NEXT: %[[M2:[_a-z0-9]+]] = load <16 x i8>, <16 x i8>* @[[C2]], align 4 +; C16xi8-NEXT: %nonsquares = add <16 x i8> %in, %[[M1]] +; C16xi8-NEXT: %sort = add <16 x i8> %[[M2]], %in +; C16xi8-NEXT: ret void + +; 8xi16 vectors should get globalized. +define void @test8xi16(<8 x i16> %in) { + %fib = add <8 x i16> %in, + %answer = add <8 x i16> , %in + ret void +} +; C8xi16: @[[C1:[_a-z0-9]+]] = internal unnamed_addr constant <8 x i16> , align 4 +; C8xi16: @[[C2:[_a-z0-9]+]] = internal unnamed_addr constant <8 x i16> , align 4 +; C8xi16: define void @test8xi16(<8 x i16> %in) { +; C8xi16-NEXT: %[[M1:[_a-z0-9]+]] = load <8 x i16>, <8 x i16>* @[[C1]], align 4 +; C8xi16-NEXT: %[[M2:[_a-z0-9]+]] = load <8 x i16>, <8 x i16>* @[[C2]], align 4 +; C8xi16-NEXT: %fib = add <8 x i16> %in, %[[M1]] +; C8xi16-NEXT: %answer = add <8 x i16> %[[M2]], %in +; C8xi16-NEXT: ret void + +; 4xi32 vectors should get globalized. +define void @test4xi32(<4 x i32> %in) { + %tetrahedral = add <4 x i32> %in, + %serauqs = add <4 x i32> , %in + ret void +} +; C4xi32: @[[C1:[_a-z0-9]+]] = internal unnamed_addr constant <4 x i32> , align 4 +; C4xi32: @[[C2:[_a-z0-9]+]] = internal unnamed_addr constant <4 x i32> , align 4 +; C4xi32: define void @test4xi32(<4 x i32> %in) { +; C4xi32-NEXT: %[[M1:[_a-z0-9]+]] = load <4 x i32>, <4 x i32>* @[[C1]], align 4 +; C4xi32-NEXT: %[[M2:[_a-z0-9]+]] = load <4 x i32>, <4 x i32>* @[[C2]], align 4 +; C4xi32-NEXT: %tetrahedral = add <4 x i32> %in, %[[M1]] +; C4xi32-NEXT: %serauqs = add <4 x i32> %[[M2]], %in +; C4xi32-NEXT: ret void + +; 4xfloat vectors should get globalized. +define void @test4xfloat(<4 x float> %in) { + %polyhex = fadd <4 x float> %in, + %poset = fadd <4 x float> , %in + ret void +} +; C4xfloat: @[[C1:[_a-z0-9]+]] = internal unnamed_addr constant <4 x float> , align 4 +; C4xfloat: @[[C2:[_a-z0-9]+]] = internal unnamed_addr constant <4 x float> , align 4 +; C4xfloat: define void @test4xfloat(<4 x float> %in) { +; C4xfloat-NEXT: %[[M1:[_a-z0-9]+]] = load <4 x float>, <4 x float>* @[[C1]], align 4 +; C4xfloat-NEXT: %[[M2:[_a-z0-9]+]] = load <4 x float>, <4 x float>* @[[C2]], align 4 +; C4xfloat-NEXT: %polyhex = fadd <4 x float> %in, %[[M1]] +; C4xfloat-NEXT: %poset = fadd <4 x float> %[[M2]], %in +; C4xfloat-NEXT: ret void + +; Globalized constant loads have to dominate their use. +define void @testbranch(i1 %cond, <4 x i32> %in) { + br i1 %cond, label %lhs, label %rhs +lhs: + %from_lhs = add <4 x i32> %in, + br label %done +rhs: + %from_rhs = add <4 x i32> , %in + br label %done +done: + %merged = phi <4 x i32> [ %from_lhs, %lhs ], [ %from_rhs, %rhs ] + ret void +} +; Cbranch: @[[C1:[_a-z0-9]+]] = internal unnamed_addr constant <4 x i32> , align 4 +; Cbranch: @[[C2:[_a-z0-9]+]] = internal unnamed_addr constant <4 x i32> , align 4 +; Cbranch: define void @testbranch(i1 %cond, <4 x i32> %in) { +; Cbranch-NEXT: %[[M1:[_a-z0-9]+]] = load <4 x i32>, <4 x i32>* @[[C1]], align 4 +; Cbranch-NEXT: %[[M2:[_a-z0-9]+]] = load <4 x i32>, <4 x i32>* @[[C2]], align 4 +; Cbranch-NEXT: br i1 %cond, label %lhs, label %rhs +; Cbranch: lhs: +; Cbranch-NEXT: %from_lhs = add <4 x i32> %in, %[[M1]] +; Cbranch-NEXT: br label %done +; Cbranch: rhs: +; Cbranch-NEXT: %from_rhs = add <4 x i32> %[[M2]], %in +; Cbranch-NEXT: br label %done +; Cbranch: done: +; Cbranch-NEXT: %merged = phi <4 x i32> [ %from_lhs, %lhs ], [ %from_rhs, %rhs ] +; Cbranch-NEXT: ret void + +; Globalizing redundant constants between functions should materialize +; them in each function, but there should only be a single global. +define void @testduplicate1() { + %foo = add <4 x i32> , undef + ret void +} +define void @testduplicate2() { + %foo = add <4 x i32> , undef + ret void +} +; Cduplicate: @[[C1:[_a-z0-9]+]] = internal unnamed_addr constant <4 x i32> , align 4 +; Cduplicate: define void @testduplicate1() { +; Cduplicate-NEXT: %[[M1:[_a-z0-9]+]] = load <4 x i32>, <4 x i32>* @[[C1]], align 4 +; Cduplicate-NEXT: %foo = add <4 x i32> %[[M1]], undef +; Cduplicate-NEXT: ret void +; Cduplicate: define void @testduplicate2() { +; Cduplicate-NEXT: %[[M1:[_a-z0-9]+]] = load <4 x i32>, <4 x i32>* @[[C1]], align 4 +; Cduplicate-NEXT: %foo = add <4 x i32> %[[M1]], undef +; Cduplicate-NEXT: ret void + +; zeroinitializer vectors should get globalized. +define void @testzeroinitializer(<4 x float> %in) { + %id = fadd <4 x float> %in, + ret void +} +; Czeroinitializer: @[[C1:[_a-z0-9]+]] = internal unnamed_addr constant <4 x float> zeroinitializer, align 4 +; Czeroinitializer: define void @testzeroinitializer(<4 x float> %in) { +; Czeroinitializer-NEXT: %[[M1:[_a-z0-9]+]] = load <4 x float>, <4 x float>* @[[C1]], align 4 +; Czeroinitializer-NEXT: %id = fadd <4 x float> %in, %[[M1]] +; Czeroinitializer-NEXT: ret void + +; Nested constant exprs are handled by running -expand-constant-expr first. +define i64 @test_nested_const(i64 %x) { + %foo = add i64 bitcast (<8 x i8> to i64), %x + ret i64 %foo +} +; Cnestedconst: @[[C1:[_a-z0-9]+]] = internal unnamed_addr constant <8 x i8> , align 8 +; Cnestedconst: define i64 @test_nested_const(i64 %x) { +; Cnestedconst-NEXT: %[[M1:[_a-z0-9]+]] = load <8 x i8>, <8 x i8>* @[[C1]], align 8 +; Cnestedconst-NEXT: %[[X1:[_a-z0-9]+]] = bitcast <8 x i8> %[[M1]] to i64 +; Cnestedconst-NEXT: add i64 %[[X1]], %x +; Cnestedconst-NEXT: ret i64 %foo diff --git a/test/Transforms/NaCl/internalize-used-globals.ll b/test/Transforms/NaCl/internalize-used-globals.ll new file mode 100644 index 000000000000..f25bb6cbd0cf --- /dev/null +++ b/test/Transforms/NaCl/internalize-used-globals.ll @@ -0,0 +1,34 @@ +; RUN: opt %s -internalize-used-globals -S | FileCheck %s + +target datalayout = "e-p:32:32-i64:64" +target triple = "le32-unknown-nacl" + +@llvm.used = appending global [1 x i8*] [i8* bitcast (void ()* @foo to i8*)], section "llvm.metadata" +; The used list remains unchanged. +; CHECK: @llvm.used = appending global [1 x i8*] [i8* bitcast (void ()* @foo to i8*)], section "llvm.metadata" + + +define hidden void @foo() #0 { + ret void +} +; Although in the used list, foo becomes internal. +; CHECK-LABEL: define internal void @foo + + +define i32 @_start() { +entry: + ret i32 0 +} +; @_start is left non-internal. +; CHECK-LABEL: define i32 @_start + +define internal void @my_internal() { + ret void +} + +; Internals are left as-is. +; CHECK-LABEL: define internal void @my_internal() + +!llvm.ident = !{!0} +!0 = !{!"clang version 3.5.0 "} + diff --git a/test/Transforms/NaCl/life.ll b/test/Transforms/NaCl/life.ll new file mode 100644 index 000000000000..dcaf02ff2e17 --- /dev/null +++ b/test/Transforms/NaCl/life.ll @@ -0,0 +1,66 @@ +; RUN: opt -pnacl-abi-simplify-postopt %s -S | \ +; RUN: opt -backend-canonicalize -S | FileCheck %s + +; Test that the SIMD game of life example from the NaCl SDK has an inner loop +; that contains the expected shufflevector instructions. First run the ABI +; simplifications on the code, then run the translator's peepholes. +; +; The stable PNaCl bitcode ABI doesn't have shufflevector nor constant vectors, +; it instead has insertelement, extractelement and load from globals. Note that +; `undef` becomes `0` in the constants. + +; The datalayout is needed to determine the alignment of the globals. +target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:32" + +define <16 x i8> @InnerLoop(<16 x i8>* %pixel_line, <16 x i8> %src00, <16 x i8> %src01, <16 x i8> %src10, <16 x i8> %src11, <16 x i8> %src20, <16 x i8> %src21) { + ; CHECK-LABEL: InnerLoop + ; CHECK-NEXT: shufflevector <16 x i8> %src00, <16 x i8> %src01, <16 x i32> + ; CHECK-NEXT: shufflevector <16 x i8> %src00, <16 x i8> %src01, <16 x i32> + ; CHECK-NEXT: shufflevector <16 x i8> %src10, <16 x i8> %src11, <16 x i32> + ; CHECK-NEXT: shufflevector <16 x i8> %src10, <16 x i8> %src11, <16 x i32> + ; CHECK-NEXT: shufflevector <16 x i8> %src20, <16 x i8> %src21, <16 x i32> + ; CHECK-NEXT: shufflevector <16 x i8> %src20, <16 x i8> %src21, <16 x i32> + ; CHECK-NOT: load + ; CHECK-NOT: insertelement + ; CHECK-NOT: extractelement + %shuffle = shufflevector <16 x i8> %src00, <16 x i8> %src01, <16 x i32> + %shuffle3 = shufflevector <16 x i8> %src00, <16 x i8> %src01, <16 x i32> + %shuffle4 = shufflevector <16 x i8> %src10, <16 x i8> %src11, <16 x i32> + %shuffle5 = shufflevector <16 x i8> %src10, <16 x i8> %src11, <16 x i32> + %shuffle6 = shufflevector <16 x i8> %src20, <16 x i8> %src21, <16 x i32> + %shuffle7 = shufflevector <16 x i8> %src20, <16 x i8> %src21, <16 x i32> + %add = add <16 x i8> %shuffle, %src00 + %add8 = add <16 x i8> %add, %shuffle3 + %add9 = add <16 x i8> %add8, %src10 + %add10 = add <16 x i8> %add9, %shuffle5 + %add11 = add <16 x i8> %add10, %src20 + %add12 = add <16 x i8> %add11, %shuffle6 + %add13 = add <16 x i8> %add12, %shuffle7 + %add14 = shl <16 x i8> %add13, + %add15 = add <16 x i8> %add14, %shuffle4 + %cmp = icmp ugt <16 x i8> %add15, + %sext = sext <16 x i1> %cmp to <16 x i8> + %cmp16 = icmp ult <16 x i8> %add15, + ; CHECK: select + %and = select <16 x i1> %cmp16, <16 x i8> %sext, <16 x i8> zeroinitializer + ; CHECK-NEXT: shufflevector <16 x i8> %and, <16 x i8> , <16 x i32> + ; CHECK-NEXT: shufflevector <16 x i8> %and, <16 x i8> , <16 x i32> + ; CHECK-NEXT: shufflevector <16 x i8> %and, <16 x i8> , <16 x i32> + ; CHECK-NEXT: shufflevector <16 x i8> %and, <16 x i8> , <16 x i32> + ; CHECK-NOT: load + ; CHECK-NOT: insertelement + ; CHECK-NOT: extractelement + %shuffle18 = shufflevector <16 x i8> %and, <16 x i8> , <16 x i32> + %shuffle19 = shufflevector <16 x i8> %and, <16 x i8> , <16 x i32> + %shuffle20 = shufflevector <16 x i8> %and, <16 x i8> , <16 x i32> + %shuffle21 = shufflevector <16 x i8> %and, <16 x i8> , <16 x i32> + store <16 x i8> %shuffle18, <16 x i8>* %pixel_line, align 16 + %add.ptr22 = getelementptr inbounds <16 x i8>, <16 x i8>* %pixel_line, i32 1 + store <16 x i8> %shuffle19, <16 x i8>* %add.ptr22, align 16 + %add.ptr23 = getelementptr inbounds <16 x i8>, <16 x i8>* %pixel_line, i32 2 + store <16 x i8> %shuffle20, <16 x i8>* %add.ptr23, align 16 + %add.ptr24 = getelementptr inbounds <16 x i8>, <16 x i8>* %pixel_line, i32 3 + store <16 x i8> %shuffle21, <16 x i8>* %add.ptr24, align 16 + %and25 = and <16 x i8> %and, + ret <16 x i8> %and25 +} diff --git a/test/Transforms/NaCl/lit.local.cfg b/test/Transforms/NaCl/lit.local.cfg new file mode 100644 index 000000000000..a43fd3ebdd5a --- /dev/null +++ b/test/Transforms/NaCl/lit.local.cfg @@ -0,0 +1,3 @@ +# -*- Python -*- + +config.suffixes = ['.ll'] diff --git a/test/Transforms/NaCl/normalize-alignment.ll b/test/Transforms/NaCl/normalize-alignment.ll new file mode 100644 index 000000000000..75cead528d66 --- /dev/null +++ b/test/Transforms/NaCl/normalize-alignment.ll @@ -0,0 +1,73 @@ +; RUN: opt -S -normalize-alignment %s 2>&1 | FileCheck %s + +target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:32" + +; Implicit default alignments are changed to explicit alignments. +define void @default_alignment_attrs(float %f, double %d) { + load i8, i8* null + load i32, i32* null + load float, float* null + load double, double* null + + store i8 100, i8* null + store i32 100, i32* null + store float %f, float* null + store double %d, double* null + ret void +} +; CHECK-LABEL: @default_alignment_attrs +; CHECK-NEXT: load i8, i8* null, align 1 +; CHECK-NEXT: load i32, i32* null, align 1 +; CHECK-NEXT: load float, float* null, align 4 +; CHECK-NEXT: load double, double* null, align 8 +; CHECK-NEXT: store i8 100, i8* null, align 1 +; CHECK-NEXT: store i32 100, i32* null, align 1 +; CHECK-NEXT: store float %f, float* null, align 4 +; CHECK-NEXT: store double %d, double* null, align 8 + +define void @reduce_alignment_assumptions() { + load i32, i32* null, align 4 + load float, float* null, align 2 + load float, float* null, align 4 + load float, float* null, align 8 + load double, double* null, align 2 + load double, double* null, align 8 + load double, double* null, align 16 + + ; Higher alignment assumptions must be retained for atomics. + load atomic i32, i32* null seq_cst, align 4 + load atomic i32, i32* null seq_cst, align 8 + store atomic i32 100, i32* null seq_cst, align 4 + store atomic i32 100, i32* null seq_cst, align 8 + ret void +} +; CHECK-LABEL: @reduce_alignment_assumptions +; CHECK-NEXT: load i32, i32* null, align 1 +; CHECK-NEXT: load float, float* null, align 1 +; CHECK-NEXT: load float, float* null, align 4 +; CHECK-NEXT: load float, float* null, align 4 +; CHECK-NEXT: load double, double* null, align 1 +; CHECK-NEXT: load double, double* null, align 8 +; CHECK-NEXT: load double, double* null, align 8 +; CHECK-NEXT: load atomic i32, i32* null seq_cst, align 4 +; CHECK-NEXT: load atomic i32, i32* null seq_cst, align 4 +; CHECK-NEXT: store atomic i32 100, i32* null seq_cst, align 4 +; CHECK-NEXT: store atomic i32 100, i32* null seq_cst, align 4 + +declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1) +declare void @llvm.memmove.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1) +declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i32, i1) + +define void @reduce_memcpy_alignment_assumptions(i8* %ptr) { + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr, i8* %ptr, + i32 20, i32 4, i1 false) + call void @llvm.memmove.p0i8.p0i8.i32(i8* %ptr, i8* %ptr, + i32 20, i32 4, i1 false) + call void @llvm.memset.p0i8.i32(i8* %ptr, i8 99, + i32 20, i32 4, i1 false) + ret void +} +; CHECK-LABEL: @reduce_memcpy_alignment_assumptions +; CHECK-NEXT: call void @llvm.memcpy.{{.*}} i32 20, i32 1, i1 false) +; CHECK-NEXT: call void @llvm.memmove.{{.*}} i32 20, i32 1, i1 false) +; CHECK-NEXT: call void @llvm.memset.{{.*}} i32 20, i32 1, i1 false) diff --git a/test/Transforms/NaCl/pnacl-abi-internalize-symbols-pso.ll b/test/Transforms/NaCl/pnacl-abi-internalize-symbols-pso.ll new file mode 100644 index 000000000000..1331e50a957f --- /dev/null +++ b/test/Transforms/NaCl/pnacl-abi-internalize-symbols-pso.ll @@ -0,0 +1,22 @@ +; RUN: opt %s -pnacl-abi-simplify-preopt -S | FileCheck %s + +; Checks that PNaCl ABI pre-opt simplification correctly internalizes +; symbols except __pnacl_pso_root. + + +@__pnacl_pso_root = global i32 123 +; CHECK: @__pnacl_pso_root = global i32 123 + +@global_var = global [4 x i8] c"abcd" +; CHECK: @global_var = internal global [4 x i8] c"abcd" + + +define void @main() { +; CHECK: define internal void @main + ret void +} + +define external void @foobarbaz() { +; CHECK: define internal void @foobarbaz + ret void +} diff --git a/test/Transforms/NaCl/pnacl-abi-internalize-symbols.ll b/test/Transforms/NaCl/pnacl-abi-internalize-symbols.ll new file mode 100644 index 000000000000..cd15439c2735 --- /dev/null +++ b/test/Transforms/NaCl/pnacl-abi-internalize-symbols.ll @@ -0,0 +1,25 @@ +; RUN: opt %s -pnacl-abi-simplify-preopt -S | FileCheck %s + +; Checks that PNaCl ABI pre-opt simplification correctly internalizes +; symbols except _start. + + +@global_var = global [4 x i8] c"abcd" +; CHECK: @global_var = internal global [4 x i8] c"abcd" + + +define void @main() { +; CHECK: define internal void @main + ret void +} + +define external void @foobarbaz() { +; CHECK: define internal void @foobarbaz + ret void +} + +define void @_start() { +; CHECK: define void @_start + ret void +} + diff --git a/test/Transforms/NaCl/pnacl-abi-simplify-postopt.ll b/test/Transforms/NaCl/pnacl-abi-simplify-postopt.ll new file mode 100644 index 000000000000..76561d8a2f18 --- /dev/null +++ b/test/Transforms/NaCl/pnacl-abi-simplify-postopt.ll @@ -0,0 +1,23 @@ +; RUN: opt %s -pnacl-abi-simplify-postopt -S | FileCheck %s +; RUN: opt %s -pnacl-abi-simplify-postopt -S \ +; RUN: | FileCheck %s -check-prefix=CLEANUP + +; "-pnacl-abi-simplify-postopt" runs various passes which are tested +; thoroughly in other *.ll files. This file is a smoke test to check +; that the passes work together OK. + +target datalayout = "p:32:32:32" + +@var = global i32 256 +; CHECK: @var = global [4 x i8] + +define i16 @read_var() { + %val = load i16, i16* bitcast (i32* @var to i16*) + ret i16 %val +} +; CHECK: = bitcast [4 x i8]* @var +; CHECK-NEXT: load i16, i16* + +; Check that dead prototypes are successfully removed. +declare void @unused_prototype(i8*) +; CLEANUP-NOT: unused_prototype diff --git a/test/Transforms/NaCl/pnacl-abi-simplify-preopt.ll b/test/Transforms/NaCl/pnacl-abi-simplify-preopt.ll new file mode 100644 index 000000000000..e34c46d80c8a --- /dev/null +++ b/test/Transforms/NaCl/pnacl-abi-simplify-preopt.ll @@ -0,0 +1,50 @@ +; RUN: opt %s -pnacl-abi-simplify-preopt -S | FileCheck %s + +; "-pnacl-abi-simplify-preopt" runs various passes which are tested +; thoroughly in other *.ll files. This file is a smoke test to check +; that "-pnacl-abi-simplify-preopt" runs what it's supposed to run. + +declare void @ext_func() + + +define void @invoke_func() { + invoke void @ext_func() to label %cont unwind label %lpad +cont: + ret void +lpad: + %lp = landingpad { i8*, i32 } personality i8* null cleanup + ret void +} +; CHECK-NOT: invoke void @ext_func() +; CHECK-NOT: landingpad + + +define void @varargs_func(...) { + ret void +} +; CHECK-NOT: @varargs_func(...) + + +%MyStruct = type { i32, i32 } + +; Checks that ExpandVarArgs and ExpandStructRegs are applied in the +; right order. +define void @get_struct_from_varargs(i8* %va_list, %MyStruct* %dest) { + %val = va_arg i8* %va_list, %MyStruct + store %MyStruct %val, %MyStruct* %dest + ret void +} +; CHECK-NOT: va_arg + + +@llvm.global_ctors = appending global [0 x { i32, void ()* }] zeroinitializer +; CHECK-NOT: @llvm.global_ctors + +@tls_var = thread_local global i32 0 +; CHECK-NOT: thread_local + +@alias = alias i32* @tls_var +; CHECK-NOT: @alias + +@weak_ref = extern_weak global i8* +; CHECK-NOT: extern_weak diff --git a/test/Transforms/NaCl/pnacl-abi-simplify.ll b/test/Transforms/NaCl/pnacl-abi-simplify.ll new file mode 100644 index 000000000000..453990c0e8b0 --- /dev/null +++ b/test/Transforms/NaCl/pnacl-abi-simplify.ll @@ -0,0 +1,54 @@ +; RUN: opt %s -pnacl-abi-simplify-preopt -pnacl-abi-simplify-postopt -S \ +; RUN: | FileCheck %s +; RUN: opt %s -enable-pnacl-sjlj-eh -pnacl-abi-simplify-preopt \ +; RUN: -pnacl-abi-simplify-postopt -S | FileCheck %s + +target datalayout = "p:32:32:32" + +; Check that the "tail" attribute is preserved on calls. +define void @tail_call() { + tail call void @tail_call() + ret void +} +; CHECK: tail call void @tail_call() + +; Check that unreachable blocks are pruned out, whether or not SJLJ-EH is used. +; Unreachable blocks can have instructions with strange properties like +; self references. Normally, self-references are disallowed. +define i32 @unreachable_block_self_ref() { +entry: + br label %bb1 + +bb0: ; preds = %bb0 + %x = add i32 %x, 0 + br i1 undef, label %bb1, label %bb0 + +bb1: ; preds = %bb0, %entry + %phi = phi i32 [ 321, %entry ], [ %x, %bb0 ] + ret i32 %phi +} +; CHECK-LABEL: unreachable_block_self_ref() { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i32 321 +; CHECK-NEXT: } + +declare void @my_exit(i32) + +; Another check for unreachable block pruning: in this case, the unreachable +; block can have instructions that confuse liveness analysis. +define i32 @unreachable_block_bad_liveness() { +entry: + %ret_val = add i32 undef, undef + call void @my_exit(i32 %ret_val) + unreachable +label: + ; ret_val has no reaching definitions, causing an inconsistency in + ; liveness analysis. + ret i32 %ret_val +} +; CHECK-LABEL: unreachable_block_bad_liveness() { +; CHECK-NEXT: entry: +; CHECK-NEXT: %ret_val = add i32 undef, undef +; CHECK-NEXT: call void @my_exit +; CHECK-NEXT: unreachable +; CHECK-NEXT: } diff --git a/test/Transforms/NaCl/pnacl-eh-exception-info.ll b/test/Transforms/NaCl/pnacl-eh-exception-info.ll new file mode 100644 index 000000000000..979ccf0449fc --- /dev/null +++ b/test/Transforms/NaCl/pnacl-eh-exception-info.ll @@ -0,0 +1,127 @@ +; RUN: opt %s -pnacl-sjlj-eh -S | FileCheck %s + +; Example std::type_info objects. +@exc_typeid1 = external global i8 +@exc_typeid2 = external global i8 +@exc_typeid3 = external global i8 + +; This must be declared for "-pnacl-sjlj-eh" to work. +@__pnacl_eh_stack = external thread_local global i8* + +declare i32 @llvm.eh.typeid.for(i8*) + +declare void @external_func() + + +@__pnacl_eh_type_table = external global i8* +@__pnacl_eh_action_table = external global i8* +@__pnacl_eh_filter_table = external global i8* + +; CHECK: %action_table_entry = type { i32, i32 } + +; CHECK: @__pnacl_eh_type_table = internal constant [4 x i8*] [i8* @exc_typeid1, i8* @exc_typeid2, i8* @exc_typeid3, i8* null] + +; CHECK: @__pnacl_eh_action_table = internal constant [7 x %action_table_entry] [%action_table_entry { i32 3, i32 0 }, %action_table_entry { i32 2, i32 1 }, %action_table_entry { i32 1, i32 2 }, %action_table_entry { i32 -1, i32 0 }, %action_table_entry { i32 -2, i32 0 }, %action_table_entry { i32 4, i32 0 }, %action_table_entry zeroinitializer] + +; CHECK: @__pnacl_eh_filter_table = internal constant [5 x i32] [i32 0, i32 2, i32 3, i32 1, i32 0] + + +; Exception type pointers are allocated IDs which specify the index +; into __pnacl_eh_type_table where the type may be found. +define void @test_eh_typeid(i32 %arg) { + %id1 = call i32 @llvm.eh.typeid.for(i8* @exc_typeid1) + %id2 = call i32 @llvm.eh.typeid.for(i8* @exc_typeid2) + %id3 = call i32 @llvm.eh.typeid.for(i8* @exc_typeid3) + %cmp1 = icmp eq i32 %arg, %id1 + %cmp2 = icmp eq i32 %arg, %id2 + %cmp3 = icmp eq i32 %arg, %id3 + ret void +} +; CHECK: define void @test_eh_typeid +; CHECK-NEXT: %cmp1 = icmp eq i32 %arg, 1 +; CHECK-NEXT: %cmp2 = icmp eq i32 %arg, 2 +; CHECK-NEXT: %cmp3 = icmp eq i32 %arg, 3 +; CHECK-NEXT: ret void + + +define void @test_single_catch_clause() { + invoke void @external_func() to label %cont unwind label %lpad +cont: + ret void +lpad: + landingpad i32 personality i8* null + catch i8* @exc_typeid3 + ret void +} +; CHECK: define void @test_single_catch_clause +; CHECK: store i32 1, i32* %exc_info_ptr + + +define void @test_multiple_catch_clauses() { + invoke void @external_func() to label %cont unwind label %lpad +cont: + ret void +lpad: + landingpad i32 personality i8* null + catch i8* @exc_typeid1 + catch i8* @exc_typeid2 + catch i8* @exc_typeid3 + ret void +} +; CHECK: define void @test_multiple_catch_clauses +; CHECK: store i32 3, i32* %exc_info_ptr + + +define void @test_empty_filter_clause() { + invoke void @external_func() to label %cont unwind label %lpad +cont: + ret void +lpad: + landingpad i32 personality i8* null + filter [0 x i8*] zeroinitializer + ret void +} +; CHECK: define void @test_empty_filter_clause +; CHECK: store i32 4, i32* %exc_info_ptr + + +define void @test_filter_clause() { + invoke void @external_func() to label %cont unwind label %lpad +cont: + ret void +lpad: + landingpad i32 personality i8* null + filter [3 x i8*] [i8* @exc_typeid2, + i8* @exc_typeid3, + i8* @exc_typeid1] + ret void +} +; CHECK: define void @test_filter_clause +; CHECK: store i32 5, i32* %exc_info_ptr + + +; "catch i8* null" means that any C++ exception matches. +define void @test_catch_all_clause() { + invoke void @external_func() to label %cont unwind label %lpad +cont: + ret void +lpad: + landingpad i32 personality i8* null + catch i8* null + ret void +} +; CHECK: define void @test_catch_all_clause +; CHECK: store i32 6, i32* %exc_info_ptr + + +define void @test_cleanup_clause() { + invoke void @external_func() to label %cont unwind label %lpad +cont: + ret void +lpad: + landingpad i32 personality i8* null + cleanup + ret void +} +; CHECK: define void @test_cleanup_clause +; CHECK: store i32 7, i32* %exc_info_ptr diff --git a/test/Transforms/NaCl/pnacl-sjlj-eh-bug.ll b/test/Transforms/NaCl/pnacl-sjlj-eh-bug.ll new file mode 100644 index 000000000000..2338e5bfe331 --- /dev/null +++ b/test/Transforms/NaCl/pnacl-sjlj-eh-bug.ll @@ -0,0 +1,81 @@ +; RUN: opt %s -pnacl-sjlj-eh -O2 -S | FileCheck %s + +; datalayout must be specified for GVN to work. +target datalayout = "p:32:32:32" + +; This must be declared for expanding "invoke" and "landingpad" instructions. +@__pnacl_eh_stack = external thread_local global i8* + +declare i1 @might_be_setjmp() +declare void @external_func(i32* %ptr) +declare void @var_is_nonzero() + + +; Test for a bug in which PNaClSjLjEH would transform +; @invoke_optimize_test() such that the call to @var_is_nonzero() +; could get optimized away by a later optimization pass. This +; happened because PNaClSjLjEH generated code similar to +; @branch_optimize_test() below. + +define void @invoke_optimize_test() { + %var = alloca i32 + store i32 0, i32* %var + + invoke void @external_func(i32* %var) + to label %exit unwind label %lpad + +lpad: + landingpad i32 personality i8* null + catch i8* null + %value = load i32, i32* %var + %is_zero = icmp eq i32 %value, 0 + br i1 %is_zero, label %exit, label %do_call + +do_call: + call void @var_is_nonzero() + ret void + +exit: + ret void +} +; CHECK: define void @invoke_optimize_test() +; CHECK: @var_is_nonzero() + + +; In @branch_optimize_test(), the optimizer can optimize away the call +; to @var_is_nonzero(), because it can assume that %var always +; contains 0 on the "iffalse" branch. +; +; The passes "-gvn -instcombine" are enough to do this. +; +; The optimizer can do this regardless of whether @might_be_setjmp() +; is setjmp() or a normal function. It doesn't need to know that +; @might_be_setjmp() might return twice, because storing to %var +; between setjmp() and longjmp() leaves %var pointing to an undefined +; value. + +define void @branch_optimize_test() { + %var = alloca i32 + store i32 0, i32* %var + + %cond = call i1 @might_be_setjmp() returns_twice + br i1 %cond, label %iftrue, label %iffalse + +iftrue: + call void @external_func(i32* %var) + ret void + +iffalse: + %value = load i32, i32* %var + %is_zero = icmp eq i32 %value, 0 + br i1 %is_zero, label %exit, label %do_call + +do_call: + call void @var_is_nonzero() + ret void + +exit: + ret void +} +; CHECK: define void @branch_optimize_test() +; CHECK-NOT: @var_is_nonzero diff --git a/test/Transforms/NaCl/pnacl-sjlj-eh.ll b/test/Transforms/NaCl/pnacl-sjlj-eh.ll new file mode 100644 index 000000000000..6e524a5b775a --- /dev/null +++ b/test/Transforms/NaCl/pnacl-sjlj-eh.ll @@ -0,0 +1,173 @@ +; RUN: opt %s -pnacl-sjlj-eh -S | FileCheck %s + +; This must be declared for expanding "invoke" and "landingpad" instructions. +@__pnacl_eh_stack = external thread_local global i8* + +; This must be declared for expanding "resume" instructions. +declare void @__pnacl_eh_resume(i32* %exception) + +declare i32 @external_func(i64 %arg) +declare void @external_func_void() +declare i32 @my_setjmp() + + +; CHECK: %ExceptionFrame = type { [1024 x i8], %ExceptionFrame*, i32 } + +define i32 @invoke_test(i64 %arg) { + %result = invoke i32 @external_func(i64 %arg) + to label %cont unwind label %lpad +cont: + ret i32 %result +lpad: + %lp = landingpad { i8*, i32 } personality i8* null cleanup + ret i32 999 +} +; CHECK: define i32 @invoke_test +; CHECK-NEXT: %invoke_result_ptr = alloca i32 +; CHECK-NEXT: %invoke_frame = alloca %ExceptionFrame, align 8 +; CHECK-NEXT: %exc_info_ptr = getelementptr %ExceptionFrame, %ExceptionFrame* %invoke_frame, i32 0, i32 2 +; CHECK-NEXT: %invoke_next = getelementptr %ExceptionFrame, %ExceptionFrame* %invoke_frame, i32 0, i32 1 +; CHECK-NEXT: %invoke_jmp_buf = getelementptr %ExceptionFrame, %ExceptionFrame* %invoke_frame, i32 0, i32 0, i32 0 +; CHECK-NEXT: %pnacl_eh_stack = bitcast i8** @__pnacl_eh_stack to %ExceptionFrame** +; CHECK-NEXT: %old_eh_stack = load %ExceptionFrame*, %ExceptionFrame** %pnacl_eh_stack +; CHECK-NEXT: store %ExceptionFrame* %old_eh_stack, %ExceptionFrame** %invoke_next +; CHECK-NEXT: store i32 {{[0-9]+}}, i32* %exc_info_ptr +; CHECK-NEXT: store %ExceptionFrame* %invoke_frame, %ExceptionFrame** %pnacl_eh_stack +; CHECK-NEXT: %invoke_is_exc = call i32 @invoke_test_setjmp_caller(i64 %arg, i32 (i64)* @external_func, i8* %invoke_jmp_buf, i32* %invoke_result_ptr) +; CHECK-NEXT: %result = load i32, i32* %invoke_result_ptr +; CHECK-NEXT: store %ExceptionFrame* %old_eh_stack, %ExceptionFrame** %pnacl_eh_stack +; CHECK-NEXT: %invoke_sj_is_zero = icmp eq i32 %invoke_is_exc, 0 +; CHECK-NEXT: br i1 %invoke_sj_is_zero, label %cont, label %lpad +; CHECK: cont: +; CHECK-NEXT: ret i32 %result +; CHECK: lpad: +; CHECK-NEXT: %landingpad_ptr = bitcast i8* %invoke_jmp_buf to { i8*, i32 }* +; CHECK-NEXT: %lp = load { i8*, i32 }, { i8*, i32 }* %landingpad_ptr +; CHECK-NEXT: ret i32 999 + +; Check definition of helper function: +; CHECK: define internal i32 @invoke_test_setjmp_caller(i64 %arg, i32 (i64)* %func_ptr, i8* %jmp_buf, i32* %result_ptr) { +; CHECK-NEXT: %invoke_sj = call i32 @llvm.nacl.setjmp(i8* %jmp_buf) [[RETURNS_TWICE:#[0-9]+]] +; CHECK-NEXT: %invoke_sj_is_zero = icmp eq i32 %invoke_sj, 0 +; CHECK-NEXT: br i1 %invoke_sj_is_zero, label %normal, label %exception +; CHECK: normal: +; CHECK-NEXT: %result = call i32 %func_ptr(i64 %arg) +; CHECK-NEXT: store i32 %result, i32* %result_ptr +; CHECK-NEXT: ret i32 0 +; CHECK: exception: +; CHECK-NEXT: ret i32 1 + + +; A landingpad block may be used by multiple "invoke" instructions. +define i32 @shared_landingpad(i64 %arg) { + %result1 = invoke i32 @external_func(i64 %arg) + to label %cont1 unwind label %lpad +cont1: + %result2 = invoke i32 @external_func(i64 %arg) + to label %cont2 unwind label %lpad +cont2: + ret i32 %result2 +lpad: + %lp = landingpad { i8*, i32 } personality i8* null cleanup + ret i32 999 +} +; CHECK: define i32 @shared_landingpad +; CHECK: br i1 %invoke_sj_is_zero{{[0-9]*}}, label %cont1, label %lpad +; CHECK: br i1 %invoke_sj_is_zero{{[0-9]*}}, label %cont2, label %lpad + + +; Check that the pass can handle a landingpad appearing before an invoke. +define i32 @landingpad_before_invoke() { + ret i32 123 + +dead_block: + %lp = landingpad i32 personality i8* null cleanup + ret i32 %lp +} +; CHECK: define i32 @landingpad_before_invoke +; CHECK: %lp = load i32, i32* %landingpad_ptr + + +; Test the expansion of the "resume" instruction. +define void @test_resume({ i8*, i32 } %arg) { + resume { i8*, i32 } %arg +} +; CHECK: define void @test_resume +; CHECK-NEXT: %resume_exc = extractvalue { i8*, i32 } %arg, 0 +; CHECK-NEXT: %resume_cast = bitcast i8* %resume_exc to i32* +; CHECK-NEXT: call void @__pnacl_eh_resume(i32* %resume_cast) +; CHECK-NEXT: unreachable + + +; Check that call attributes are preserved. +define i32 @call_attrs(i64 %arg) { + %result = invoke fastcc i32 @external_func(i64 inreg %arg) noreturn + to label %cont unwind label %lpad +cont: + ret i32 %result +lpad: + %lp = landingpad { i8*, i32 } personality i8* null cleanup + ret i32 999 +} +; CHECK: define i32 @call_attrs +; CHECK: %result = call fastcc i32 %func_ptr(i64 inreg %arg) [[NORETURN:#[0-9]+]] + + +; If the PNaClSjLjEH pass needs to insert any instructions into the +; non-exceptional path, check that PHI nodes are updated correctly. +; (An earlier version needed to do this, but the current version +; doesn't.) +define i32 @invoke_with_phi_nodes(i64 %arg) { +entry: + %result = invoke i32 @external_func(i64 %arg) + to label %cont unwind label %lpad +cont: + %cont_phi = phi i32 [ 100, %entry ] + ret i32 %cont_phi +lpad: + %lpad_phi = phi i32 [ 200, %entry ] + %lp = landingpad { i8*, i32 } personality i8* null cleanup + ret i32 %lpad_phi +} +; CHECK: define i32 @invoke_with_phi_nodes +; CHECK: cont: +; CHECK-NEXT: %cont_phi = phi i32 [ 100, %entry ] +; CHECK-NEXT: ret i32 %cont_phi +; CHECK: lpad: +; CHECK-NEXT: %lpad_phi = phi i32 [ 200, %entry ] +; CHECK: ret i32 %lpad_phi + + +; Test "void" result type from "invoke". This requires special +; handling because void* is not a valid type. +define void @invoke_void_result() { + invoke void @external_func_void() to label %cont unwind label %lpad +cont: + ret void +lpad: + landingpad i32 personality i8* null cleanup + ret void +} +; CHECK: define void @invoke_void_result() +; "%result_ptr" argument is omitted from the helper function: +; CHECK: define internal i32 @invoke_void_result_setjmp_caller(void ()* %func_ptr, i8* %jmp_buf) + + +; A call to setjmp() cannot be moved into a helper function, so test +; that it isn't moved. +define void @invoke_setjmp() { + %x = invoke i32 @my_setjmp() returns_twice to label %cont unwind label %lpad +cont: + ret void +lpad: + landingpad i32 personality i8* null cleanup + ret void +} +; CHECK: define void @invoke_setjmp() +; CHECK-NOT: call +; CHECK: %x = call i32 @my_setjmp() [[RETURNS_TWICE]] +; CHECK-NEXT: br label %cont + + +; CHECK: attributes [[RETURNS_TWICE]] = { returns_twice } +; CHECK: attributes [[NORETURN]] = { noreturn } diff --git a/test/Transforms/NaCl/promote-i1-ops.ll b/test/Transforms/NaCl/promote-i1-ops.ll new file mode 100644 index 000000000000..10d9d77c621d --- /dev/null +++ b/test/Transforms/NaCl/promote-i1-ops.ll @@ -0,0 +1,143 @@ +; RUN: opt %s -nacl-promote-i1-ops -S | FileCheck %s + +; Test that the PromoteI1Ops pass expands out i1 loads/stores and i1 +; comparison and arithmetic operations, with the exception of "and", +; "or" and "xor". + + +; i1 loads and stores are converted to i8 load and stores with +; explicit casts. + +define i1 @load(i1* %ptr) { + %val = load i1, i1* %ptr + ret i1 %val +} +; CHECK: define i1 @load +; CHECK-NEXT: %ptr.i8ptr = bitcast i1* %ptr to i8* +; CHECK-NEXT: %val.pre_trunc = load i8, i8* %ptr.i8ptr +; CHECK-NEXT: %val = trunc i8 %val.pre_trunc to i1 + +define void @store(i1 %val, i1* %ptr) { + store i1 %val, i1* %ptr + ret void +} +; CHECK: define void @store +; CHECK-NEXT: %ptr.i8ptr = bitcast i1* %ptr to i8* +; CHECK-NEXT: %val.expand_i1_val = zext i1 %val to i8 +; CHECK-NEXT: store i8 %val.expand_i1_val, i8* %ptr.i8ptr + + +; i1 arithmetic and comparisons are converted to their i8 equivalents +; with explicit casts. + +define i1 @add(i1 %x, i1 %y) { + %result = add i1 %x, %y + ret i1 %result +} +; CHECK: define i1 @add +; CHECK-NEXT: %x.expand_i1_val = zext i1 %x to i8 +; CHECK-NEXT: %y.expand_i1_val = zext i1 %y to i8 +; CHECK-NEXT: %result.pre_trunc = add i8 %x.expand_i1_val, %y.expand_i1_val +; CHECK-NEXT: %result = trunc i8 %result.pre_trunc to i1 + +define i1 @compare(i1 %x, i1 %y) { + %result = icmp slt i1 %x, %y + ret i1 %result +} +; CHECK: define i1 @compare +; CHECK-NEXT: %x.expand_i1_val = sext i1 %x to i8 +; CHECK-NEXT: %y.expand_i1_val = sext i1 %y to i8 +; CHECK-NEXT: %result = icmp slt i8 %x.expand_i1_val, %y.expand_i1_val + + +; Non-shift bitwise operations should not be modified. +define void @bitwise_ops(i1 %x, i1 %y) { + %and = and i1 %x, %y + %or = or i1 %x, %y + %xor = xor i1 %x, %y + ret void +} +; CHECK: define void @bitwise_ops +; CHECK-NEXT: %and = and i1 %x, %y +; CHECK-NEXT: %or = or i1 %x, %y +; CHECK-NEXT: %xor = xor i1 %x, %y + + +define void @unchanged_cases(i32 %x, i32 %y, i32* %ptr) { + %add = add i32 %x, %y + %cmp = icmp slt i32 %x, %y + %val = load i32, i32* %ptr + store i32 %x, i32* %ptr + ret void +} +; CHECK: define void @unchanged_cases +; CHECK-NEXT: %add = add i32 %x, %y +; CHECK-NEXT: %cmp = icmp slt i32 %x, %y +; CHECK-NEXT: %val = load i32, i32* %ptr +; CHECK-NEXT: store i32 %x, i32* %ptr + +define void @i1_switch(i1 %a) { +entry: + switch i1 %a, label %impossible [ + i1 true, label %truedest + i1 false, label %falsedest + ] + +impossible: + %phi = phi i32 [ 123, %entry ] + unreachable + +truedest: + unreachable + +falsedest: + unreachable +} +; CHECK-LABEL: define void @i1_switch +; CHECK-LABEL: entry: +; CHECK-NEXT: br i1 %a, label %truedest, label %falsedest +; CHECK-LABEL: impossible: +; CHECK-NEXT: unreachable +; CHECK-LABEL: truedest: +; CHECK-NEXT: unreachable +; CHECK-LABEL: falsedest: +; CHECK-NEXT: unreachable + +define void @i1_switch_default_true(i1 %a) { +entry: + switch i1 %a, label %truedest [ + i1 false, label %falsedest + ] + +truedest: + unreachable +falsedest: + unreachable +} +; CHECK-LABEL: define void @i1_switch_default_true(i1 %a) +; CHECK-LABEL: entry: +; CHECK-NEXT: br i1 %a, label %truedest, label %falsedest +; CHECK-LABEL: truedest: +; CHECK-NEXT: unreachable +; CHECK-LABEL: falsedest: +; CHECK-NEXT: unreachable + +define void @i1_switch_default_false(i1 %a) { +entry: + switch i1 %a, label %falsedest [ + i1 true, label %truedest + ] + +truedest: + unreachable +falsedest: + unreachable +} +; CHECK-LABEL: define void @i1_switch_default_false(i1 %a) +; CHECK-LABEL: entry: +; CHECK-NEXT: br i1 %a, label %truedest, label %falsedest +; CHECK-LABEL: truedest: +; CHECK-NEXT: unreachable +; CHECK-LABEL: falsedest: +; CHECK-NEXT: unreachable + diff --git a/test/Transforms/NaCl/promote-integer-signatures.ll b/test/Transforms/NaCl/promote-integer-signatures.ll new file mode 100644 index 000000000000..83bc38307407 --- /dev/null +++ b/test/Transforms/NaCl/promote-integer-signatures.ll @@ -0,0 +1,63 @@ +; RUN: opt %s -nacl-promote-ints -S | FileCheck %s + +target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:32" + +%struct.S0 = type { i24, i32 } +declare i32 @__gxx_personality_v0(...) + +declare i13 @ext_fct(i16, i24, i32) +; CHECK-LABEL: declare i16 @ext_fct(i16, i32, i32) + +define internal i16 @func(i32 %x, i24 %y, i32 %z) { + %lo = lshr i24 %y, 8 + %lo.tk = trunc i24 %lo to i16 + ret i16 %lo.tk +} +; CHECK-LABEL: define internal i16 @func(i32 %x, i32 %y, i32 %z) +; CHECK-NEXT: %y.clear = and i32 %y, 16777215 +; CHECK-NEXT: %lo = lshr i32 %y.clear, 8 +; CHECK-NEXT: %lo.tk = trunc i32 %lo to i16 +; CHECK-NEXT: ret i16 %lo.tk + + +define void @invoke_example(i16 %x, i24 %y, i32 %z) { +entry: + %tmp2 = invoke i13 @ext_fct(i16 %x, i24 %y, i32 %z) + to label %Cont unwind label %Cleanup +Cont: + ret void +Cleanup: + %exn = landingpad i13 personality i32 (...)* @__gxx_personality_v0 + cleanup + resume i13 %exn +} +; CHECK-LABEL: define void @invoke_example(i16 %x, i32 %y, i32 %z) +; CHECK-DAG: %tmp2 = invoke i16 @ext_fct(i16 %x, i32 %y, i32 %z) +; CHECK-DAG: %exn = landingpad i16 personality i32 (...)* @__gxx_personality_v0 +; CHECK-DAG: resume i16 %exn + +define i9 @a_func(i32 %x, i9* %y, i9 %z) { + ret i9 %z +} +; CHECK-LABEL: define i16 @a_func(i32 %x, i9* %y, i16 %z) +; CHECK-NEXT: ret i16 %z + +define i9 @applying_fct(i9* %x, i9 %y) { + %ret = call i9 @applicator(i9 (i32, i9*, i9)* @a_func, i9* %x, i9 %y) + ret i9 %ret +} +; CHECK-LABEL: define i16 @applying_fct(i9* %x, i16 %y) +; CHECK-NEXT: call i16 @applicator(i16 (i32, i9*, i16)* @a_func, i9* %x, i16 %y) +; CHECK-NEXT: ret i16 + +define i9 @applicator(i9 (i32, i9*, i9)* %fct, i9* %ptr, i9 %val) { + %ret = call i9 %fct(i32 0, i9* %ptr, i9 %val) +; CHECK: call i16 %fct(i32 0, i9* %ptr, i16 %val) + ret i9 %ret +} + +define i9 @plain_call(i9* %ptr, i9 %val) { + %ret = call i9 @applying_fct(i9* %ptr, i9 %val) +; CHECK: call i16 @applying_fct(i9* %ptr, i16 %val) + ret i9 %ret +} \ No newline at end of file diff --git a/test/Transforms/NaCl/promote-integers.ll b/test/Transforms/NaCl/promote-integers.ll new file mode 100644 index 000000000000..f700be815de0 --- /dev/null +++ b/test/Transforms/NaCl/promote-integers.ll @@ -0,0 +1,568 @@ +; RUN: opt < %s -nacl-promote-ints -S | FileCheck %s + +target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:32" + +declare void @consume_i16(i16 %a) + +; CHECK-LABEL: @sext_to_illegal( +; CHECK-NEXT: %a40 = sext i32 %a to i64 +; (0xFFFFFFFFFF) +define void @sext_to_illegal(i32 %a) { + %a40 = sext i32 %a to i40 + ret void +} + +; CHECK-LABEL: @sext_from_illegal( +define void @sext_from_illegal(i8 %a) { +; CHECK: call void @consume_i16(i16 -2) + %c12 = sext i12 -2 to i16 + call void @consume_i16(i16 %c12) +; CHECK: %a12 = sext i8 %a to i16 + %a12 = sext i8 %a to i12 +; CHECK: %a12.getsign = shl i16 %a12, 4 +; CHECK-NEXT: %a16 = ashr i16 %a12.getsign, 4 + %a16 = sext i12 %a12 to i16 +; CHECK: %a12.getsign1 = shl i16 %a12, 4 +; CHECK-NEXT: %a14 = ashr i16 %a12.getsign1, 4 +; (0x3FFF) + %a14 = sext i12 %a12 to i14 +; CHECK-NEXT: %a12.getsign2 = shl i16 %a12, 4 +; CHECK-NEXT: %a12.signed = ashr i16 %a12.getsign2, 4 +; CHECK-NEXT: %a24 = sext i16 %a12.signed to i32 +; (0xFFFFFF) + %a24 = sext i12 %a12 to i24 + + %a37 = zext i8 %a to i37 +; CHECK: %a37.getsign = shl i64 %a37, 27 +; CHECK-NEXT: %a64 = ashr i64 %a37.getsign, 27 + %a64 = sext i37 %a37 to i64 + ret void +} + +; CHECK-LABEL: @sext_from_undef( +define void @sext_from_undef(i8 %a) { +; CHECK-NEXT: %a12 = sext i8 undef to i16 + %a12 = sext i8 undef to i12 + ret void +} + +; CHECK-LABEL: @zext_to_illegal( +define void @zext_to_illegal(i32 %a) { +; CHECK: zext i32 %a to i64 +; CHECK-NOT: and + %a40 = zext i32 %a to i40 + ret void +} + +; CHECK-LABEL: @zext_from_illegal( +define void @zext_from_illegal(i8 %a) { +; get some illegal values to start with + %a24 = zext i8 %a to i24 + %a40 = zext i8 %a to i40 + %a18 = zext i8 %a to i18 + +; CHECK: %a32 = and i32 %a24, 16777215 +; (0xFFFFFF) + %a32 = zext i24 %a24 to i32 + +; CHECK: %b24 = and i32 %a18, 262143 +; (0x3FFFF) + %b24 = zext i18 %a18 to i24 + +; CHECK: %a24.clear = and i32 %a24, 16777215 +; CHECK: %b40 = zext i32 %a24.clear to i64 + %b40 = zext i24 %a24 to i40 + +; CHECK: call void @consume_i16(i16 4094) + %c16 = zext i12 -2 to i16 + call void @consume_i16(i16 %c16) +; CHECK: call void @consume_i16(i16 4094) + %c14 = zext i12 -2 to i14 + %c16.2 = zext i14 %c14 to i16 + call void @consume_i16(i16 %c16.2) + ret void +} + +; CHECK-LABEL: @trunc_from_illegal( +define void @trunc_from_illegal(i8 %a) { + %a24 = zext i8 %a to i24 +; CHECK: %a16 = trunc i32 %a24 to i16 + %a16 = trunc i24 %a24 to i16 + ret void +} + +; CHECK-LABEL: @trunc_to_illegal( +define void @trunc_to_illegal(i8 %a8) { + %a = zext i8 %a8 to i32 +; CHECK-NOT: trunc i32 %a +; CHECK-NOT: and + %a24 = trunc i32 %a to i24 + +; CHECK: %a12 = trunc i32 %a24 to i16 +; CHECK-NOT: and + %a12 = trunc i24 %a24 to i12 + ret void +} + +; CHECK-LABEL: @icmpsigned( +define void @icmpsigned(i32 %a) { + %shl = trunc i32 %a to i24 +; CHECK: %shl.getsign = shl i32 %shl, 8 +; CHECK-NEXT: %shl.signed = ashr i32 %shl.getsign, 8 +; CHECK-NEXT: %cmp = icmp slt i32 %shl.signed, -2 + %cmp = icmp slt i24 %shl, -2 + ret void +} + +; Bitcasts are left unchanged. +%struct.ints = type { i32, i32 } +; CHECK-LABEL: @bc1( +; CHECK-NEXT: %bc1 = bitcast i32* %a to i40* +; CHECK-NEXT: %bc2 = bitcast i40* %bc1 to i32* +; CHECK-NEXT: %bc3 = bitcast %struct.ints* null to i40* +; CHECK-NEXT: %bc4 = bitcast i40* %bc1 to %struct.ints* +define i32* @bc1(i32* %a) { + %bc1 = bitcast i32* %a to i40* + %bc2 = bitcast i40* %bc1 to i32* + %bc3 = bitcast %struct.ints* null to i40* + %bc4 = bitcast i40* %bc1 to %struct.ints* + ret i32* %bc2 +} + +; CHECK: zext i32 %a to i64 +; CHECK: and i64 %a40, 255 +define void @and1(i32 %a) { + %a40 = zext i32 %a to i40 + %and = and i40 %a40, 255 + ret void +} + +; CHECK-LABEL: @andi3( +define void @andi3(i8 %a) { + %a3 = trunc i8 %a to i3 +; CHECK: and i8 %a3, 2 + %and = and i3 %a3, 2 + ret void +} + +; CHECK-LABEL: @ori7( +define void @ori7(i8 %a, i8 %b) { + %a7 = trunc i8 %a to i7 + %b7 = trunc i8 %b to i7 +; CHECK: %or = or i8 %a7, %b7 + %or = or i7 %a7, %b7 + ret void +} + +; CHECK-LABEL: @add1( +define void @add1(i16 %a) { +; CHECK-NEXT: %a24 = sext i16 %a to i32 + %a24 = sext i16 %a to i24 +; CHECK-NEXT: %sum = add i32 %a24, 16777214 + %sum = add i24 %a24, -2 +; CHECK-NEXT: %sumnsw = add nsw i32 %a24, 16777214 + %sumnsw = add nsw i24 %a24, -2 +; CHECK-NEXT: %sumnuw = add nuw i32 %a24, 16777214 + %sumnuw = add nuw i24 %a24, -2 +; CHECK-NEXT: %sumnw = add nuw nsw i32 %a24, 16777214 + %sumnw = add nuw nsw i24 %a24, -2 + ret void +} + +; CHECK-LABEL: @mul1( +define void @mul1(i32 %a, i32 %b) { +; CHECK-NEXT: %a33 = sext i32 %a to i64 + %a33 = sext i32 %a to i33 +; CHECK-NEXT: %b33 = sext i32 %b to i64 + %b33 = sext i32 %b to i33 +; CHECK-NEXT: %product = mul i64 %a33, %b33 + %product = mul i33 %a33, %b33 +; CHECK-NEXT: %prodnw = mul nuw nsw i64 %a33, %b33 + %prodnw = mul nuw nsw i33 %a33, %b33 + ret void +} + +; CHECK-LABEL: @shl1( +define void @shl1(i16 %a) { + %a24 = zext i16 %a to i24 +; CHECK: %ashl = shl i32 %a24, 5 + %ashl = shl i24 %a24, 5 + +; CHECK-NEXT: %ashl2 = shl i32 %a24, 1 + %ashl2 = shl i24 %a24, 4278190081 ;0xFF000001 + + %b24 = zext i16 %a to i24 +; CHECK: %b24.clear = and i32 %b24, 16777215 +; CHECK-NEXT: %bshl = shl i32 %a24, %b24.clear + %bshl = shl i24 %a24, %b24 + ret void +} + +; CHECK-LABEL: @shlnuw( +define void @shlnuw(i16 %a) { + %a12 = trunc i16 %a to i12 +; CHECK: %ashl = shl nuw i16 %a12, 5 + %ashl = shl nuw i12 %a12, 5 + ret void +} + +; CHECK-LABEL: @lshr1( +define void @lshr1(i16 %a) { + %a24 = zext i16 %a to i24 +; CHECK: %a24.clear = and i32 %a24, 16777215 +; CHECK-NEXT: %b = lshr i32 %a24.clear, 20 + %b = lshr i24 %a24, 20 +; CHECK-NEXT: %a24.clear1 = and i32 %a24, 16777215 +; CHECK-NEXT: %c = lshr i32 %a24.clear1, 5 + %c = lshr i24 %a24, 5 + + %b24 = zext i16 %a to i24 + %d = lshr i24 %a24, %b24 +; CHECK: %a24.clear2 = and i32 %a24, 16777215 +; CHECK-NEXT: %b24.clear = and i32 %b24, 16777215 +; CHECK-NEXT: %d = lshr i32 %a24.clear2, %b24.clear + ret void +} + +; CHECK-LABEL: @ashr1( +define void @ashr1(i16 %a) { + %a24 = sext i16 %a to i24 +; CHECK: %a24.getsign = shl i32 %a24, 8 +; CHECK-NEXT: %b24 = ashr i32 %a24.getsign, 19 + %b24 = ashr i24 %a24, 11 +; CHECK-NEXT: %a24.getsign1 = shl i32 %a24, 8 +; CHECK-NEXT: %b24.clear = and i32 %b24, 16777215 +; CHECK-NEXT: %a24.shamt = add i32 %b24.clear, 8 +; CHECK-NEXT: %c = ashr i32 %a24.getsign1, %a24.shamt + %c = ashr i24 %a24, %b24 + ret void +} + +; CHECK-LABEL: @udiv1( +define void @udiv1(i32 %a, i32 %b) { +; CHECK-NEXT: %a33 = zext i32 %a to i64 + %a33 = zext i32 %a to i33 +; CHECK-NEXT: %b33 = zext i32 %b to i64 + %b33 = zext i32 %b to i33 +; CHECK-NEXT: %a33.clear = and i64 %a33, 8589934591 +; CHECK-NEXT: %b33.clear = and i64 %b33, 8589934591 +; CHECK-NEXT: %result = udiv i64 %a33.clear, %b33.clear + %result = udiv i33 %a33, %b33 + ret void +} + +; CHECK-LABEL: @sdiv1( +define void @sdiv1(i32 %a, i32 %b) { +; CHECK-NEXT: %a33 = sext i32 %a to i64 + %a33 = sext i32 %a to i33 +; CHECK-NEXT: %b33 = sext i32 %b to i64 +; CHECK-NEXT: %a33.getsign = shl i64 %a33, 31 +; CHECK-NEXT: %a33.signed = ashr i64 %a33.getsign, 31 +; CHECK-NEXT: %b33.getsign = shl i64 %b33, 31 +; CHECK-NEXT: %b33.signed = ashr i64 %b33.getsign, 31 + %b33 = sext i32 %b to i33 +; CHECK-NEXT: %result = sdiv i64 %a33.signed, %b33.signed + %result = sdiv i33 %a33, %b33 + ret void +} + +; CHECK-LABEL: @urem1( +define void @urem1(i32 %a, i32 %b) { +; CHECK-NEXT: %a33 = zext i32 %a to i64 + %a33 = zext i32 %a to i33 +; CHECK-NEXT: %b33 = zext i32 %b to i64 +; CHECK-NEXT: %a33.clear = and i64 %a33, 8589934591 +; CHECK-NEXT: %b33.clear = and i64 %b33, 8589934591 + %b33 = zext i32 %b to i33 +; CHECK-NEXT: %result = urem i64 %a33.clear, %b33.clear + %result = urem i33 %a33, %b33 + ret void +} + +; CHECK-LABEL: @srem1( +define void @srem1(i32 %a, i32 %b) { +; CHECK-NEXT: %a33 = sext i32 %a to i64 + %a33 = sext i32 %a to i33 +; CHECK-NEXT: %b33 = sext i32 %b to i64 +; CHECK-NEXT: %a33.getsign = shl i64 %a33, 31 +; CHECK-NEXT: %a33.signed = ashr i64 %a33.getsign, 31 +; CHECK-NEXT: %b33.getsign = shl i64 %b33, 31 +; CHECK-NEXT: %b33.signed = ashr i64 %b33.getsign, 31 + %b33 = sext i32 %b to i33 +; CHECK-NEXT: %result = srem i64 %a33.signed, %b33.signed + %result = srem i33 %a33, %b33 + ret void +} + +; CHECK-LABEL: @phi_icmp( +define void @phi_icmp(i32 %a) { +entry: + br label %loop +loop: +; CHECK: %phi40 = phi i64 [ 1099511627774, %entry ], [ %phi40, %loop ] + %phi40 = phi i40 [ -2, %entry ], [ %phi40, %loop ] +; CHECK-NEXT: %phi40.clear = and i64 %phi40, 1099511627775 +; CHECK-NEXT: %b = icmp eq i64 %phi40.clear, 1099511627775 + %b = icmp eq i40 %phi40, -1 +; CHECK-NEXT: br i1 %b, label %loop, label %end + br i1 %b, label %loop, label %end +end: + ret void +} + +; CHECK-LABEL: @icmp_ult( +define void @icmp_ult(i32 %a) { + %a40 = zext i32 %a to i40 +; CHECK: %a40.clear = and i64 %a40, 1099511627775 +; CHECK-NEXT: %b = icmp ult i64 %a40.clear, 1099511627774 + %b = icmp ult i40 %a40, -2 + +; CHECK: %a40.clear1 = and i64 %a40, 1099511627775 +; CHECK-NEXT: %b40.clear = and i64 %b40, 1099511627775 +; CHECK-NEXT: %c = icmp ult i64 %a40.clear1, %b40.clear + %b40 = zext i32 %a to i40 + %c = icmp ult i40 %a40, %b40 + ret void +} + +; CHECK-LABEL: @select1( +define void @select1(i32 %a) { + %a40 = zext i32 %a to i40 +; CHECK: %s40 = select i1 true, i64 %a40, i64 1099511627775 + %s40 = select i1 true, i40 %a40, i40 -1 + ret void +} + +; Allocas are left unchanged. +; CHECK-LABEL: @alloca40( +; CHECK: %a = alloca i40, align 8 +define void @alloca40() { + %a = alloca i40, align 8 + %b = bitcast i40* %a to i8* + %c = load i8, i8* %b + ret void +} + +; CHECK-LABEL: @load24( +; CHECK: %bc.loty = bitcast i8* %a to i16* +; CHECK-NEXT: %load.lo = load i16, i16* %bc.loty, align 8 +; CHECK-NEXT: %load.lo.ext = zext i16 %load.lo to i32 +; CHECK-NEXT: %bc.hi = getelementptr i16, i16* %bc.loty, i32 1 +; CHECK-NEXT: %bc.hity = bitcast i16* %bc.hi to i8* +; CHECK-NEXT: %load.hi = load i8, i8* %bc.hity, align 2 +; CHECK-NEXT: %load.hi.ext = zext i8 %load.hi to i32 +; CHECK-NEXT: %load.hi.ext.sh = shl i32 %load.hi.ext, 16 +; CHECK-NEXT: %load = or i32 %load.lo.ext, %load.hi.ext.sh +define void @load24(i8* %a) { + %bc = bitcast i8* %a to i24* + %load = load i24, i24* %bc, align 8 + ret void +} + +; CHECK-LABEL: @load24_overaligned( +; CHECK: %load.lo = load i16, i16* %bc.loty, align 32 +; CHECK: %load.hi = load i8, i8* %bc.hity, align 2 +define void @load24_overaligned(i8* %a) { + %bc = bitcast i8* %a to i24* + %load = load i24, i24* %bc, align 32 + ret void +} + +; CHECK-LABEL: @load48( +; CHECK: %load.lo = load i32, i32* %a, align 8 +; CHECK-NEXT: %load.lo.ext = zext i32 %load.lo to i64 +; CHECK-NEXT: %bc.hi = getelementptr i32, i32* %a, i32 1 +; CHECK-NEXT: %bc.hity = bitcast i32* %bc.hi to i16* +; CHECK-NEXT: %load.hi = load i16, i16* %bc.hity, align 4 +; CHECK-NEXT: %load.hi.ext = zext i16 %load.hi to i64 +; CHECK-NEXT: %load.hi.ext.sh = shl i64 %load.hi.ext, 32 +; CHECK-NEXT: %load = or i64 %load.lo.ext, %load.hi.ext.sh +define void @load48(i32* %a) { + %bc = bitcast i32* %a to i48* + %load = load i48, i48* %bc, align 8 + ret void +} + +; CHECK-LABEL: @load56( +; CHECK: %bc = bitcast i32* %a to i56* +; CHECK-NEXT: %load.lo = load i32, i32* %a, align 8 +; CHECK-NEXT: %load.lo.ext = zext i32 %load.lo to i64 +; CHECK-NEXT: %bc.hi = getelementptr i32, i32* %a, i32 1 +; CHECK-NEXT: %bc.hity = bitcast i32* %bc.hi to i24* +; CHECK-NEXT: %bc.hity.loty = bitcast i32* %bc.hi to i16* +; CHECK-NEXT: %load.hi.lo = load i16, i16* %bc.hity.loty, align 4 +; CHECK-NEXT: %load.hi.lo.ext = zext i16 %load.hi.lo to i32 +; CHECK-NEXT: %bc.hity.hi = getelementptr i16, i16* %bc.hity.loty, i32 1 +; CHECK-NEXT: %bc.hity.hity = bitcast i16* %bc.hity.hi to i8* +; CHECK-NEXT: %load.hi.hi = load i8, i8* %bc.hity.hity, align 2 +; CHECK-NEXT: %load.hi.hi.ext = zext i8 %load.hi.hi to i32 +; CHECK-NEXT: %load.hi.hi.ext.sh = shl i32 %load.hi.hi.ext, 16 +; CHECK-NEXT: %load.hi = or i32 %load.hi.lo.ext, %load.hi.hi.ext.sh +; CHECK-NEXT: %load.hi.ext = zext i32 %load.hi to i64 +; CHECK-NEXT: %load.hi.ext.sh = shl i64 %load.hi.ext, 32 +; CHECK-NEXT: %load = or i64 %load.lo.ext, %load.hi.ext.sh +define void @load56(i32* %a) { + %bc = bitcast i32* %a to i56* + %load = load i56, i56* %bc + ret void +} + +; Ensure that types just above and just below large powers of 2 can be compiled. +; CHECK-LABEL: @load_large( +define void @load_large(i32* %a) { + %bc1 = bitcast i32* %a to i2056* + %load1 = load i2056, i2056* %bc1 + %bc2 = bitcast i32* %a to i4088* + %load2 = load i4088, i4088* %bc2 + ret void +} + +; CHECK-LABEL: @store24( +; CHECK: %b24 = zext i8 %b to i32 +; CHECK-NEXT: %bc.loty = bitcast i8* %a to i16* +; CHECK-NEXT: %b24.lo = trunc i32 %b24 to i16 +; CHECK-NEXT: store i16 %b24.lo, i16* %bc.loty, align 4 +; CHECK-NEXT: %b24.hi.sh = lshr i32 %b24, 16 +; CHECK-NEXT: %bc.hi = getelementptr i16, i16* %bc.loty, i32 1 +; CHECK-NEXT: %b24.hi = trunc i32 %b24.hi.sh to i8 +; CHECK-NEXT: %bc.hity = bitcast i16* %bc.hi to i8* +; CHECK-NEXT: store i8 %b24.hi, i8* %bc.hity, align 2 +define void @store24(i8* %a, i8 %b) { + %bc = bitcast i8* %a to i24* + %b24 = zext i8 %b to i24 + store i24 %b24, i24* %bc + ret void +} + +; CHECK-LABEL: @store24_overaligned( +; CHECK: store i16 %b24.lo, i16* %bc.loty, align 32 +; CHECK: store i8 %b24.hi, i8* %bc.hity, align 2 +define void @store24_overaligned(i8* %a, i8 %b) { + %bc = bitcast i8* %a to i24* + %b24 = zext i8 %b to i24 + store i24 %b24, i24* %bc, align 32 + ret void +} + +; CHECK-LABEL: @store56( +; CHECK: %b56 = zext i8 %b to i64 +; CHECK-NEXT: %bc.loty = bitcast i8* %a to i32* +; CHECK-NEXT: %b56.lo = trunc i64 %b56 to i32 +; CHECK-NEXT: store i32 %b56.lo, i32* %bc.loty, align 8 +; CHECK-NEXT: %b56.hi.sh = lshr i64 %b56, 32 +; CHECK-NEXT: %bc.hi = getelementptr i32, i32* %bc.loty, i32 1 +; CHECK-NEXT: %bc.hity = bitcast i32* %bc.hi to i24* +; CHECK-NEXT: %bc.hity.loty = bitcast i32* %bc.hi to i16* +; CHECK-NEXT: %b56.hi.sh.lo = trunc i64 %b56.hi.sh to i16 +; CHECK-NEXT: store i16 %b56.hi.sh.lo, i16* %bc.hity.loty, align 4 +; CHECK-NEXT: %b56.hi.sh.hi.sh = lshr i64 %b56.hi.sh, 16 +; CHECK-NEXT: %bc.hity.hi = getelementptr i16, i16* %bc.hity.loty, i32 1 +; CHECK-NEXT: %b56.hi.sh.hi = trunc i64 %b56.hi.sh.hi.sh to i8 +; CHECK-NEXT: %bc.hity.hity = bitcast i16* %bc.hity.hi to i8* +; CHECK-NEXT: store i8 %b56.hi.sh.hi, i8* %bc.hity.hity, align 2 +define void @store56(i8* %a, i8 %b) { + %bc = bitcast i8* %a to i56* + %b56 = zext i8 %b to i56 + store i56 %b56, i56* %bc + ret void +} + +; Ensure that types just above and just below large powers of 2 can be compiled. +; CHECK-LABEL: @store_large( +define void @store_large(i32* %a, i8 %b) { + %bc1 = bitcast i32* %a to i2056* + %b2056 = zext i8 %b to i2056 + store i2056 %b2056, i2056* %bc1 + %bc2 = bitcast i32* %a to i4088* + %b4088 = zext i8 %b to i4088 + store i4088 %b4088, i4088* %bc2 + ret void +} + +; Undef can be converted to anything that's convenient. +; CHECK-LABEL: @undefoperand( +; CHECK-NEXT: %a40 = zext i32 %a to i64 +; CHECK-NEXT: %au = and i64 %a40, {{.*}} +define void @undefoperand(i32 %a) { + %a40 = zext i32 %a to i40 + %au = and i40 %a40, undef + ret void +} + +; CHECK-LABEL: @constoperand( +; CHECK-NEXT: %a40 = zext i32 %a to i64 +; CHECK-NEXT: %au = and i64 %a40, 1099494850815 +define void @constoperand(i32 %a) { + %a40 = zext i32 %a to i40 + %au = and i40 %a40, 1099494850815 ; 0xffff0000ff + ret void +} + +; CHECK-LABEL: @switch( +; CHECK-NEXT: %a24 = zext i16 %a to i32 +; CHECK-NEXT: %a24.clear = and i32 %a24, 16777215 +; CHECK-NEXT: switch i32 %a24.clear, label %end [ +; CHECK-NEXT: i32 0, label %if1 +; CHECK-NEXT: i32 1, label %if2 +define void @switch(i16 %a) { + %a24 = zext i16 %a to i24 + switch i24 %a24, label %end [ + i24 0, label %if1 + i24 1, label %if2 + ] +if1: + ret void +if2: + ret void +end: + ret void +} + + +; The getelementptr here should be handled unchanged. +; CHECK-LABEL: @pointer_to_array( +; CHECK: %element_ptr = getelementptr [2 x i40], [2 x i40]* %ptr, i32 0, i32 0 +define void @pointer_to_array([2 x i40]* %ptr) { + %element_ptr = getelementptr [2 x i40], [2 x i40]* %ptr, i32 0, i32 0 + load i40, i40* %element_ptr + ret void +} + +; Store 0x1222277777777 and make sure it's split up into 3 stores of each part. +; CHECK-LABEL: @constants( +; CHECK: store i32 2004318071, i32* %{{.*}}, align 4 +; CHECK: store i16 8738, i16* %{{.*}} +; CHECK: store i8 1, i8* %{{.*}} +define void @constants(i56* %ptr) { + store i56 319006405261175, i56* %ptr, align 4 + ret void +} + +@from = external global [300 x i8], align 4 +@to = external global [300 x i8], align 4 + +; CHECK-LABEL: @load_bc_to_i80( +; CHECK-NEXT: %expanded = bitcast [300 x i8]* @from to i64* +; CHECK-NEXT: %loaded.short.lo = load i64, i64* %expanded, align 4 +; CHECK-NEXT: %loaded.short.lo.ext = zext i64 %loaded.short.lo to i128 +; CHECK-NEXT: %expanded5 = bitcast [300 x i8]* @from to i64* +; CHECK-NEXT: %expanded4 = getelementptr i64, i64* %expanded5, i32 1 +; CHECK-NEXT: %expanded3 = bitcast i64* %expanded4 to i16* +; CHECK-NEXT: %loaded.short.hi = load i16, i16* %expanded3, align 4 +; CHECK-NEXT: %loaded.short.hi.ext = zext i16 %loaded.short.hi to i128 +; CHECK-NEXT: %loaded.short.hi.ext.sh = shl i128 %loaded.short.hi.ext, 64 +; CHECK-NEXT: %loaded.short = or i128 %loaded.short.lo.ext, %loaded.short.hi.ext.sh +; CHECK-NEXT: %loaded.short.lo1 = trunc i128 %loaded.short to i64 +; CHECK-NEXT: %expanded6 = bitcast [300 x i8]* @to to i64* +; CHECK-NEXT: store i64 %loaded.short.lo1, i64* %expanded6, align 4 +; CHECK-NEXT: %loaded.short.hi.sh = lshr i128 %loaded.short, 64 +; CHECK-NEXT: %loaded.short.hi2 = trunc i128 %loaded.short.hi.sh to i16 +; CHECK-NEXT: %expanded9 = bitcast [300 x i8]* @to to i64* +; CHECK-NEXT: %expanded8 = getelementptr i64, i64* %expanded9, i32 1 +; CHECK-NEXT: %expanded7 = bitcast i64* %expanded8 to i16* +; CHECK-NEXT: store i16 %loaded.short.hi2, i16* %expanded7, align 4 +define void @load_bc_to_i80() { + %loaded.short = load i80, i80* bitcast ([300 x i8]* @from to i80*), align 4 + store i80 %loaded.short, i80* bitcast ([300 x i8]* @to to i80*), align 4 + ret void +} diff --git a/test/Transforms/NaCl/remove-asm-memory.ll b/test/Transforms/NaCl/remove-asm-memory.ll new file mode 100644 index 000000000000..cd3f99c83e41 --- /dev/null +++ b/test/Transforms/NaCl/remove-asm-memory.ll @@ -0,0 +1,88 @@ +; RUN: opt < %s -nacl-rewrite-atomics -remove-asm-memory -S | \ +; RUN: FileCheck %s +; RUN: opt < %s -O3 -nacl-rewrite-atomics -remove-asm-memory -S | \ +; RUN: FileCheck %s +; RUN: opt < %s -O3 -nacl-rewrite-atomics -remove-asm-memory -S | \ +; RUN: FileCheck %s -check-prefix=ELIM +; RUN: opt < %s -nacl-rewrite-atomics -remove-asm-memory -S | \ +; RUN: FileCheck %s -check-prefix=CLEANED + +; ``asm("":::"memory")`` is used as a compiler barrier and the GCC-style +; builtin ``__sync_synchronize`` is intended as a barrier for all memory +; that could be observed by external threads. They both get rewritten +; for NaCl by Clang to a sequentially-consistent fence surrounded by +; ``call void asm sideeffect "", "~{memory}"``. +; +; The test is also run at O3 to make sure that non-volatile and +; non-atomic loads and stores to escaping objects (i.e. loads and stores +; which could be observed by other threads) don't get unexpectedly +; eliminated. + +; CLEANED-NOT: asm + +target datalayout = "p:32:32:32" + +@a = external global i32 +@b = external global i32 + +; Different triples encode ``asm("":::"memory")``'s "touch everything" +; constraints differently. They should get detected and removed. +define void @memory_assembly_encoding_test() { +; CHECK: @memory_assembly_encoding_test() + call void asm sideeffect "", "~{memory}"() + call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"() + call void asm sideeffect "", "~{foo},~{memory},~{bar}"() + + ret void + ; CHECK-NEXT: ret void +} + +define void @memory_assembly_ordering_test() { +; CHECK: @memory_assembly_ordering_test() + %1 = load i32, i32* @a, align 4 + store i32 %1, i32* @b, align 4 + call void asm sideeffect "", "~{memory}"() + fence seq_cst + call void asm sideeffect "", "~{memory}"() + ; CHECK-NEXT: %1 = load i32, i32* @a, align 4 + ; CHECK-NEXT: store i32 %1, i32* @b, align 4 + ; CHECK-NEXT: call void @llvm.nacl.atomic.fence.all() + + ; Redundant load from the previous location, and store to the same + ; location (making the previous one dead). Shouldn't get eliminated + ; because of the fence. + %2 = load i32, i32* @a, align 4 + store i32 %2, i32* @b, align 4 + call void asm sideeffect "", "~{memory}"() + fence seq_cst + call void asm sideeffect "", "~{memory}"() + ; CHECK-NEXT: %2 = load i32, i32* @a, align 4 + ; CHECK-NEXT: store i32 %2, i32* @b, align 4 + ; CHECK-NEXT: call void @llvm.nacl.atomic.fence.all() + + ; Same here. + %3 = load i32, i32* @a, align 4 + store i32 %3, i32* @b, align 4 + ; CHECK-NEXT: %3 = load i32, i32* @a, align 4 + ; CHECK-NEXT: store i32 %3, i32* @b, align 4 + + ret void + ; CHECK-NEXT: ret void +} + +; Same function as above, but without the barriers. At O3 some loads and +; stores should get eliminated. +define void @memory_ordering_test() { +; ELIM: @memory_ordering_test() + %1 = load i32, i32* @a, align 4 + store i32 %1, i32* @b, align 4 + %2 = load i32, i32* @a, align 4 + store i32 %2, i32* @b, align 4 + %3 = load i32, i32* @a, align 4 + store i32 %3, i32* @b, align 4 + ; ELIM-NEXT: %1 = load i32, i32* @a, align 4 + ; ELIM-NEXT: store i32 %1, i32* @b, align 4 + + ret void + ; ELIM-NEXT: ret void +} diff --git a/test/Transforms/NaCl/replace-ptrs-with-ints.ll b/test/Transforms/NaCl/replace-ptrs-with-ints.ll new file mode 100644 index 000000000000..8a9d3de22179 --- /dev/null +++ b/test/Transforms/NaCl/replace-ptrs-with-ints.ll @@ -0,0 +1,656 @@ +; RUN: opt %s -replace-ptrs-with-ints -S | FileCheck %s + +target datalayout = "p:32:32:32" + + +%struct = type { i32, i32 } + +declare %struct* @addr_taken_func(%struct*) + +@addr_of_func = global %struct* (%struct*)* @addr_taken_func +; CHECK: @addr_of_func = global %struct* (%struct*)* bitcast (i32 (i32)* @addr_taken_func to %struct* (%struct*)*) + +@blockaddr = global i8* blockaddress(@indirectbr, %l1) +; CHECK: @blockaddr = global i8* blockaddress(@indirectbr, %l1) + + +define i8* @pointer_arg(i8* %ptr, i64 %non_ptr) { + ret i8* %ptr +} +; CHECK: define i32 @pointer_arg(i32 %ptr, i64 %non_ptr) { +; CHECK-NEXT: ret i32 %ptr +; CHECK-NEXT: } + + +declare i8* @declared_func(i8*, i64) +; CHECK: declare i32 @declared_func(i32, i64) + + +define void @self_reference_phi(i8* %ptr) { +entry: + br label %loop +loop: + %x = phi i8* [ %x, %loop ], [ %ptr, %entry ] + br label %loop +} +; CHECK: define void @self_reference_phi(i32 %ptr) { +; CHECK: %x = phi i32 [ %x, %loop ], [ %ptr, %entry ] + +; Self-referencing bitcasts are possible in unreachable basic blocks. +; It is not very likely that we will encounter this, but we handle it +; for completeness. +define void @self_reference_bitcast(i8** %dest) { + ret void +unreachable_loop: + store i8* %self_ref, i8** %dest + %self_ref = bitcast i8* %self_ref to i8* + store i8* %self_ref, i8** %dest + br label %unreachable_loop +} +; CHECK: define void @self_reference_bitcast(i32 %dest) { +; CHECK: store i32 undef, i32* %dest.asptr +; CHECK: store i32 undef, i32* %dest.asptr + +define void @circular_reference_bitcasts(i8** %dest) { + ret void +unreachable_loop: + store i8* %cycle1, i8** %dest + %cycle1 = bitcast i8* %cycle2 to i8* + %cycle2 = bitcast i8* %cycle1 to i8* + br label %unreachable_loop +} +; CHECK: define void @circular_reference_bitcasts(i32 %dest) { +; CHECK: store i32 undef, i32* %dest.asptr + +define void @circular_reference_inttoptr(i8** %dest) { + ret void +unreachable_loop: + %ptr = inttoptr i32 %int to i8* + %int = ptrtoint i8* %ptr to i32 + store i8* %ptr, i8** %dest + br label %unreachable_loop +} +; CHECK: define void @circular_reference_inttoptr(i32 %dest) { +; CHECK: store i32 undef, i32* %dest.asptr + +define i8* @forwards_reference(%struct** %ptr) { + br label %block1 +block2: + ; Forwards reference to %val. + %cast = bitcast %struct* %val to i8* + br label %block3 +block1: + %val = load %struct*, %struct** %ptr + br label %block2 +block3: + ; Backwards reference to a forwards reference that has already been + ; resolved. + ret i8* %cast +} +; CHECK: define i32 @forwards_reference(i32 %ptr) { +; CHECK-NEXT: br label %block1 +; CHECK: block2: +; CHECK-NEXT: br label %block3 +; CHECK: block1: +; CHECK-NEXT: %ptr.asptr = inttoptr i32 %ptr to i32* +; CHECK-NEXT: %val = load i32, i32* %ptr.asptr +; CHECK-NEXT: br label %block2 +; CHECK: block3: +; CHECK-NEXT: ret i32 %val + + +define i8* @phi_multiple_entry(i1 %arg, i8* %ptr) { +entry: + br i1 %arg, label %done, label %done +done: + %result = phi i8* [ %ptr, %entry ], [ %ptr, %entry ] + ret i8* %result +} +; CHECK: define i32 @phi_multiple_entry(i1 %arg, i32 %ptr) { +; CHECK: %result = phi i32 [ %ptr, %entry ], [ %ptr, %entry ] + + +define i8* @select(i1 %cond, i8* %val1, i8* %val2) { + %r = select i1 %cond, i8* %val1, i8* %val2 + ret i8* %r +} +; CHECK: define i32 @select(i1 %cond, i32 %val1, i32 %val2) { +; CHECK-NEXT: %r = select i1 %cond, i32 %val1, i32 %val2 + + +define i32* @ptrtoint_same_size(i32* %ptr) { + %a = ptrtoint i32* %ptr to i32 + %b = add i32 %a, 4 + %c = inttoptr i32 %b to i32* + ret i32* %c +} +; CHECK: define i32 @ptrtoint_same_size(i32 %ptr) { +; CHECK-NEXT: %b = add i32 %ptr, 4 +; CHECK-NEXT: ret i32 %b + + +define i32* @ptrtoint_different_size(i32* %ptr) { + %a = ptrtoint i32* %ptr to i64 + %b = add i64 %a, 4 + %c = inttoptr i64 %b to i32* + ret i32* %c +} +; CHECK: define i32 @ptrtoint_different_size(i32 %ptr) { +; CHECK-NEXT: %a = zext i32 %ptr to i64 +; CHECK-NEXT: %b = add i64 %a, 4 +; CHECK-NEXT: %c = trunc i64 %b to i32 +; CHECK-NEXT: ret i32 %c + +define i8 @ptrtoint_truncates_var(i32* %ptr) { + %a = ptrtoint i32* %ptr to i8 + ret i8 %a +} +; CHECK: define i8 @ptrtoint_truncates_var(i32 %ptr) { +; CHECK-NEXT: %a = trunc i32 %ptr to i8 + +define i8 @ptrtoint_truncates_global() { + %a = ptrtoint i32* @var to i8 + ret i8 %a +} +; CHECK: define i8 @ptrtoint_truncates_global() { +; CHECK-NEXT: %expanded = ptrtoint i32* @var to i32 +; CHECK-NEXT: %a = trunc i32 %expanded to i8 + + +define i32* @pointer_bitcast(i64* %ptr) { + %cast = bitcast i64* %ptr to i32* + ret i32* %cast +} +; CHECK: define i32 @pointer_bitcast(i32 %ptr) { +; CHECK-NEXT: ret i32 %ptr + +; Same-type non-pointer bitcasts happen to be left alone by this pass. +define i32 @no_op_bitcast(i32 %val) { + %val2 = bitcast i32 %val to i32 + ret i32 %val2 +} +; CHECK: define i32 @no_op_bitcast(i32 %val) { +; CHECK-NEXT: %val2 = bitcast i32 %val to i32 + +define i64 @kept_bitcast(double %d) { + %i = bitcast double %d to i64 + ret i64 %i +} +; CHECK: define i64 @kept_bitcast(double %d) { +; CHECK-NEXT: %i = bitcast double %d to i64 + + +define i32 @constant_pointer_null() { + %val = ptrtoint i32* null to i32 + ret i32 %val +} +; CHECK: define i32 @constant_pointer_null() { +; CHECK-NEXT: ret i32 0 + +define i32 @constant_pointer_undef() { + %val = ptrtoint i32* undef to i32 + ret i32 %val +} +; CHECK: define i32 @constant_pointer_undef() { +; CHECK-NEXT: ret i32 undef + +define i16* @constant_pointer_null_load() { + %val = load i16*, i16** null + ret i16* %val +} +; CHECK: define i32 @constant_pointer_null_load() { +; CHECK-NEXT: %.asptr = inttoptr i32 0 to i32* +; CHECK-NEXT: %val = load i32, i32* %.asptr + +define i16* @constant_pointer_undef_load() { + %val = load i16*, i16** undef + ret i16* %val +} +; CHECK: define i32 @constant_pointer_undef_load() { +; CHECK-NEXT: %.asptr = inttoptr i32 undef to i32* +; CHECK-NEXT: %val = load i32, i32* %.asptr + + +define i8 @load(i8* %ptr) { + %x = load i8, i8* %ptr + ret i8 %x +} +; CHECK: define i8 @load(i32 %ptr) { +; CHECK-NEXT: %ptr.asptr = inttoptr i32 %ptr to i8* +; CHECK-NEXT: %x = load i8, i8* %ptr.asptr + +define void @store(i8* %ptr, i8 %val) { + store i8 %val, i8* %ptr + ret void +} +; CHECK: define void @store(i32 %ptr, i8 %val) { +; CHECK-NEXT: %ptr.asptr = inttoptr i32 %ptr to i8* +; CHECK-NEXT: store i8 %val, i8* %ptr.asptr + + +define i8* @load_ptr(i8** %ptr) { + %x = load i8*, i8** %ptr + ret i8* %x +} +; CHECK: define i32 @load_ptr(i32 %ptr) { +; CHECK-NEXT: %ptr.asptr = inttoptr i32 %ptr to i32* +; CHECK-NEXT: %x = load i32, i32* %ptr.asptr + +define void @store_ptr(i8** %ptr, i8* %val) { + store i8* %val, i8** %ptr + ret void +} +; CHECK: define void @store_ptr(i32 %ptr, i32 %val) { +; CHECK-NEXT: %ptr.asptr = inttoptr i32 %ptr to i32* +; CHECK-NEXT: store i32 %val, i32* %ptr.asptr + + +define i8 @load_attrs(i8* %ptr) { + %x = load atomic volatile i8, i8* %ptr seq_cst, align 128 + ret i8 %x +} +; CHECK: define i8 @load_attrs(i32 %ptr) { +; CHECK-NEXT: %ptr.asptr = inttoptr i32 %ptr to i8* +; CHECK-NEXT: %x = load atomic volatile i8, i8* %ptr.asptr seq_cst, align 128 + +define void @store_attrs(i8* %ptr, i8 %val) { + store atomic volatile i8 %val, i8* %ptr singlethread release, align 256 + ret void +} +; CHECK: define void @store_attrs(i32 %ptr, i8 %val) { +; CHECK-NEXT: %ptr.asptr = inttoptr i32 %ptr to i8* +; CHECK-NEXT: store atomic volatile i8 %val, i8* %ptr.asptr singlethread release, align 256 + + +define i32 @cmpxchg(i32* %ptr, i32 %a, i32 %b) { + %r = cmpxchg i32* %ptr, i32 %a, i32 %b seq_cst seq_cst + %res = extractvalue { i32, i1 } %r, 0 + ret i32 %res +} +; CHECK: define i32 @cmpxchg(i32 %ptr, i32 %a, i32 %b) { +; CHECK-NEXT: %ptr.asptr = inttoptr i32 %ptr to i32* +; CHECK-NEXT: %r = cmpxchg i32* %ptr.asptr, i32 %a, i32 %b seq_cst seq_cst + +define i32 @atomicrmw(i32* %ptr, i32 %x) { + %r = atomicrmw add i32* %ptr, i32 %x seq_cst + ret i32 %r +} +; CHECK: define i32 @atomicrmw(i32 %ptr, i32 %x) { +; CHECK-NEXT: %ptr.asptr = inttoptr i32 %ptr to i32* +; CHECK-NEXT: %r = atomicrmw add i32* %ptr.asptr, i32 %x seq_cst + + +define i8* @indirect_call(i8* (i8*)* %func, i8* %arg) { + %result = call i8* %func(i8* %arg) + ret i8* %result +} +; CHECK: define i32 @indirect_call(i32 %func, i32 %arg) { +; CHECK-NEXT: %func.asptr = inttoptr i32 %func to i32 (i32)* +; CHECK-NEXT: %result = call i32 %func.asptr(i32 %arg) +; CHECK-NEXT: ret i32 %result + + +; Test forwards reference +define i8* @direct_call1(i8* %arg) { + %result = call i8* @direct_call2(i8* %arg) + ret i8* %result +} +; CHECK: define i32 @direct_call1(i32 %arg) { +; CHECK-NEXT: %result = call i32 @direct_call2(i32 %arg) +; CHECK-NEXT: ret i32 %result + +; Test backwards reference +define i8* @direct_call2(i8* %arg) { + %result = call i8* @direct_call1(i8* %arg) + ret i8* %result +} +; CHECK: define i32 @direct_call2(i32 %arg) { +; CHECK-NEXT: %result = call i32 @direct_call1(i32 %arg) +; CHECK-NEXT: ret i32 %result + + +@var = global i32 0 + +define i32* @get_addr_of_global() { + ret i32* @var +} +; CHECK: define i32 @get_addr_of_global() { +; CHECK-NEXT: %expanded = ptrtoint i32* @var to i32 +; CHECK-NEXT: ret i32 %expanded + +define %struct* (%struct*)* @get_addr_of_func() { + ret %struct* (%struct*)* @addr_taken_func +} +; CHECK: define i32 @get_addr_of_func() { +; CHECK-NEXT: %expanded = ptrtoint i32 (i32)* @addr_taken_func to i32 +; CEHCK-NEXT: ret i32 %expanded + + +define i32 @load_global() { + %val = load i32, i32* @var + ret i32 %val +} +; CHECK: define i32 @load_global() { +; CHECK-NEXT: %val = load i32, i32* @var +; CHECK-NEXT: ret i32 %val + +define i16 @load_global_bitcast() { + %ptr = bitcast i32* @var to i16* + %val = load i16, i16* %ptr + ret i16 %val +} +; CHECK: define i16 @load_global_bitcast() { +; CHECK-NEXT: %var.bc = bitcast i32* @var to i16* +; CHECK-NEXT: %val = load i16, i16* %var.bc +; CHECK-NEXT: ret i16 %val + + +; Check that unsimplified allocas are properly handled: +declare void @receive_alloca(%struct* %ptr) + +define void @unsimplified_alloca() { + %a = alloca %struct + call void @receive_alloca(%struct* %a) + unreachable +} +; CHECK-LABEL: define void @unsimplified_alloca() +; CHECK-NEXT: %a = alloca %struct +; CHECK-NEXT: %a.asint = ptrtoint %struct* %a to i32 +; CHECK-NEXT: call void @receive_alloca(i32 %a.asint) +; CHECK-NEXT: unreachable + + +define i1 @compare(i8* %ptr1, i8* %ptr2) { + %cmp = icmp ult i8* %ptr1, %ptr2 + ret i1 %cmp +} +; CHECK: define i1 @compare(i32 %ptr1, i32 %ptr2) { +; CHECK-NEXT: %cmp = icmp ult i32 %ptr1, %ptr2 + + +declare i8* @llvm.some.intrinsic(i8* %ptr) + +define i8* @preserve_intrinsic_type(i8* %ptr) { + %result = call i8* @llvm.some.intrinsic(i8* %ptr) + ret i8* %result +} +; CHECK: define i32 @preserve_intrinsic_type(i32 %ptr) { +; CHECK-NEXT: %ptr.asptr = inttoptr i32 %ptr to i8* +; CHECK-NEXT: %result = call i8* @llvm.some.intrinsic(i8* %ptr.asptr) +; CHECK-NEXT: %result.asint = ptrtoint i8* %result to i32 +; CHECK-NEXT: ret i32 %result.asint + + +; Just check that the pass does not crash on inline asm. +define i16* @inline_asm1(i8* %ptr) { + %val = call i16* asm "foo", "=r,r"(i8* %ptr) + ret i16* %val +} + +define i16** @inline_asm2(i8** %ptr) { + %val = call i16** asm "foo", "=r,r"(i8** %ptr) + ret i16** %val +} + + +declare void @llvm.dbg.declare(metadata, metadata, metadata) +declare void @llvm.dbg.value(metadata, i64, metadata, metadata) + +define void @debug_declare(i32 %val) { + ; We normally expect llvm.dbg.declare to be used on an alloca. + %var = alloca i32 + call void @llvm.dbg.declare(metadata i32* %var, metadata !11, metadata !12), !dbg !13 + call void @llvm.dbg.declare(metadata i32 %val, metadata !14, metadata !12), !dbg !13 + ret void +} +; CHECK: define void @debug_declare(i32 %val) { +; CHECK-NEXT: %var = alloca i32 +; CHECK-NEXT: call void @llvm.dbg.declare(metadata i32* %var, metadata !11, metadata !12), !dbg !13 +; This case is currently not converted. +; CHECK-NEXT: call void @llvm.dbg.declare(metadata !2, metadata !14, metadata !12) +; CHECK-NEXT: ret void + +; For now, debugging info for values is lost. replaceAllUsesWith() +; does not work for metadata references -- it converts them to nulls. +; This makes dbg.value too tricky to handle for now. +define void @debug_value(i32 %val, i8* %ptr) { + tail call void @llvm.dbg.value(metadata i32 %val, i64 1, metadata !11, metadata !12), !dbg !18 + tail call void @llvm.dbg.value(metadata i8* %ptr, i64 2, metadata !14, metadata !12), !dbg !18 + +; check that we don't crash when encountering odd things: + tail call void @llvm.dbg.value(metadata i8* null, i64 3, metadata !11, metadata !12), !dbg !18 + tail call void @llvm.dbg.value(metadata i8* undef, i64 4, metadata !11, metadata !12), !dbg !18 + tail call void @llvm.dbg.value(metadata !{}, i64 5, metadata !11, metadata !12), !dbg !18 + ret void +} +; CHECK: define void @debug_value(i32 %val, i32 %ptr) { +; CHECK-NEXT: call void @llvm.dbg.value(metadata !2, i64 1, metadata !11, metadata !12) +; CHECK-NEXT: call void @llvm.dbg.value(metadata !2, i64 2, metadata !14, metadata !12) +; CHECK-NEXT: call void @llvm.dbg.value(metadata i8* null, i64 3, metadata !11, metadata !12) +; CHECK-NEXT: call void @llvm.dbg.value(metadata i8* undef, i64 4, metadata !11, metadata !12) +; CHECK-NEXT: call void @llvm.dbg.value(metadata !2, i64 5, metadata !11, metadata !12) +; CHECK-NEXT: ret void + + +declare void @llvm.lifetime.start(i64 %size, i8* %ptr) +declare {}* @llvm.invariant.start(i64 %size, i8* %ptr) +declare void @llvm.invariant.end({}* %start, i64 %size, i8* %ptr) + +; GVN can introduce the following horrible corner case of a lifetime +; marker referencing a PHI node. But we convert the phi to i32 type, +; and lifetime.start doesn't work on an inttoptr converting an i32 phi +; to a pointer. Because of this, we just strip out all lifetime +; markers. + +define void @alloca_lifetime_via_phi() { +entry: + %buf = alloca i8 + br label %block +block: + %phi = phi i8* [ %buf, %entry ] + call void @llvm.lifetime.start(i64 -1, i8* %phi) + ret void +} +; CHECK: define void @alloca_lifetime_via_phi() { +; CHECK: %phi = phi i32 [ %buf.asint, %entry ] +; CHECK-NEXT: ret void + +define void @alloca_lifetime() { + %buf = alloca i8 + call void @llvm.lifetime.start(i64 -1, i8* %buf) + ret void +} +; CHECK: define void @alloca_lifetime() { +; CHECK-NEXT: %buf = alloca i8 +; CHECK-NEXT: ret void + +define void @alloca_lifetime_via_bitcast() { + %buf = alloca i32 + %buf_cast = bitcast i32* %buf to i8* + call void @llvm.lifetime.start(i64 -1, i8* %buf_cast) + ret void +} +; CHECK: define void @alloca_lifetime_via_bitcast() { +; CHECK-NEXT: %buf = alloca i32 +; CHECK-NEXT: ret void + + +define void @strip_invariant_markers() { + %buf = alloca i8 + %start = call {}* @llvm.invariant.start(i64 1, i8* %buf) + call void @llvm.invariant.end({}* %start, i64 1, i8* %buf) + ret void +} +; CHECK: define void @strip_invariant_markers() { +; CHECK-NEXT: %buf = alloca i8 +; CHECK-NEXT: ret void + + +; "nocapture" and "noalias" only apply to pointers, so must be stripped. +define void @nocapture_attr(i8* nocapture noalias %ptr) { + ret void +} +; CHECK: define void @nocapture_attr(i32 %ptr) { + + +define void @readonly_readnone(i8* readonly dereferenceable_or_null(4)) { + ret void +} +; CHECK-LABEL: define void @readonly_readnone(i32) + +define nonnull i8* @nonnull_ptr(i8* nonnull) { + ret i8* undef +} +; CHECK-LABEL: define i32 @nonnull_ptr(i32) + +define dereferenceable(16) i8* @dereferenceable_ptr(i8* dereferenceable(8)) { + ret i8* undef +} +; CHECK-LABEL: define i32 @dereferenceable_ptr(i32) + +; "nounwind" should be preserved. +define void @nounwind_func_attr() nounwind { + ret void +} +; CHECK: define void @nounwind_func_attr() [[NOUNWIND:#[0-9]+]] { + +define void @nounwind_call_attr() { + call void @nounwind_func_attr() nounwind + ret void +} +; CHECK: define void @nounwind_call_attr() { +; CHECK: call void @nounwind_func_attr() {{.*}}[[NOUNWIND]] + +define fastcc void @fastcc_func() { + ret void +} +; CHECK: define fastcc void @fastcc_func() { + +define void @fastcc_call() { + call fastcc void @fastcc_func() + ret void +} +; CHECK: define void @fastcc_call() { +; CHECK-NEXT: call fastcc void @fastcc_func() + +define void @tail_call() { + tail call void @tail_call() + ret void +} +; CHECK: define void @tail_call() +; CHECK-NEXT: tail call void @tail_call() + + +; Just check that the pass does not crash on getelementptr. (The pass +; should not depend unnecessarily on ExpandGetElementPtr having been +; run.) +define i8* @getelementptr(i8, i8* %ptr) { + %gep = getelementptr i8, i8* %ptr, i32 10 + ret i8* %gep +} + +; Just check that the pass does not crash on va_arg. +define i32* @va_arg(i8* %valist) { + %r = va_arg i8* %valist, i32* + ret i32* %r +} + + +define void @indirectbr(i8* %addr) { + indirectbr i8* %addr, [ label %l1, label %l2 ] +l1: + ret void +l2: + ret void +} +; CHECK: define void @indirectbr(i32 %addr) { +; CHECK-NEXT: %addr.asptr = inttoptr i32 %addr to i8* +; CHECK-NEXT: indirectbr i8* %addr.asptr, [label %l1, label %l2] + + +define i8* @invoke(i8* %val) { + %result = invoke i8* @direct_call1(i8* %val) + to label %cont unwind label %lpad +cont: + ret i8* %result +lpad: + %lp = landingpad { i8*, i32 } personality void (i8*)* @personality_func cleanup + %p = extractvalue { i8*, i32 } %lp, 0 + %s = insertvalue { i8*, i32 } %lp, i8* %val, 0 + ret i8* %p +} +; CHECK: define i32 @invoke(i32 %val) { +; CHECK-NEXT: %result = invoke i32 @direct_call1(i32 %val) +; CHECK-NEXT: to label %cont unwind label %lpad +; CHECK: %lp = landingpad { i8*, i32 } personality void (i8*)* bitcast (void (i32)* @personality_func to void (i8*)*) +; CHECK: %p = extractvalue { i8*, i32 } %lp, 0 +; CHECK-NEXT: %p.asint = ptrtoint i8* %p to i32 +; CHECK-NEXT: %val.asptr = inttoptr i32 %val to i8* +; CHECK-NEXT: %s = insertvalue { i8*, i32 } %lp, i8* %val.asptr, 0 +; CHECK-NEXT: ret i32 %p.asint + +define void @personality_func(i8* %arg) { + ret void +} + + +declare i32 @llvm.eh.typeid.for(i8*) + +@typeid = global i32 0 + +; The argument here must be left as a bitcast, otherwise the backend +; rejects it. +define void @typeid_for() { + %bc = bitcast i32* @typeid to i8* + call i32 @llvm.eh.typeid.for(i8* %bc) + ret void +} +; CHECK: define void @typeid_for() { +; CHECK-NEXT: %typeid.bc = bitcast i32* @typeid to i8* +; CHECK-NEXT: call i32 @llvm.eh.typeid.for(i8* %typeid.bc) + + +; Subprogram debug metadata may refer to a function. +; Make sure those are updated too. +; Regenerate the debug info from the following C program: +; void nop(void *ptr) { +; } + +define void @nop(i8* %ptr) { + tail call void @llvm.dbg.value(metadata i8* %ptr, i64 0, metadata !11, metadata !12), !dbg !19 + ret void, !dbg !19 +} +; CHECK: define void @nop(i32 %ptr) { +; CHECK-NEXT: call void @llvm.dbg.value{{.*}} +; CHECK-NEXT: ret void + + +; CHECK: attributes {{.*}}[[NOUNWIND]] = { nounwind } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!8, !9} +!llvm.ident = !{!10} + +; CHECK: !4 = !MDSubprogram(name: "debug_declare", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, function: void (i32)* @debug_declare, variables: !2) + +!0 = !MDCompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (trunk 235150) (llvm/trunk 235152)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2) +!1 = !MDFile(filename: "foo.c", directory: "/s/llvm/cmakebuild") +!2 = !{} +!3 = !{!4} +!4 = !MDSubprogram(name: "debug_declare", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, function: void (i32)* @debug_declare, variables: !2) +!5 = !MDSubroutineType(types: !6) +!6 = !{null, !7} +!7 = !MDBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed) +!8 = !{i32 2, !"Dwarf Version", i32 4} +!9 = !{i32 2, !"Debug Info Version", i32 3} +!10 = !{!"clang version 3.7.0 (trunk 235150) (llvm/trunk 235152)"} +!11 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "val", arg: 1, scope: !4, file: !1, line: 1, type: !7) +!12 = !MDExpression() +!13 = !MDLocation(line: 1, column: 24, scope: !4) + +!14 = !MDLocalVariable(tag: DW_TAG_auto_variable, name: "var", scope: !4, file: !1, line: 2, type: !15) +!15 = !MDCompositeType(tag: DW_TAG_array_type, baseType: !7, align: 32, elements: !16) +!16 = !{!17} +!17 = !MDSubrange(count: -1) +!18 = !MDLocation(line: 2, column: 11, scope: !4) +!19 = !MDLocation(line: 2, column: 3, scope: !4) diff --git a/test/Transforms/NaCl/resolve-aliases.ll b/test/Transforms/NaCl/resolve-aliases.ll new file mode 100644 index 000000000000..82ad54d74e95 --- /dev/null +++ b/test/Transforms/NaCl/resolve-aliases.ll @@ -0,0 +1,36 @@ +; RUN: opt < %s -resolve-aliases -S | FileCheck %s + +; CHECK-NOT: @alias + +@r1 = internal global i32 zeroinitializer +@a1 = alias i32* @r1 +define i32* @usea1() { +; CHECK: ret i32* @r1 + ret i32* @a1 +} + +@funcalias = alias i32* ()* @usea1 +; CHECK: @usefuncalias +define void @usefuncalias() { +; CHECK: call i32* @usea1 + %1 = call i32* @funcalias() + ret void +} + +@bc1 = global i8* bitcast (i32* @r1 to i8*) +@bcalias = alias i8* bitcast (i32* @r1 to i8*) + +; CHECK: @usebcalias +define i8* @usebcalias() { +; CHECK: ret i8* bitcast (i32* @r1 to i8*) + ret i8* @bcalias +} + + +@fa2 = alias i32* ()* @funcalias +; CHECK: @usefa2 +define void @usefa2() { +; CHECK: call i32* @usea1 + call i32* @fa2() + ret void +} diff --git a/test/Transforms/NaCl/resolve-pnacl-intrinsics-lock-free.ll b/test/Transforms/NaCl/resolve-pnacl-intrinsics-lock-free.ll new file mode 100644 index 000000000000..ba4f6e6a1c7f --- /dev/null +++ b/test/Transforms/NaCl/resolve-pnacl-intrinsics-lock-free.ll @@ -0,0 +1,99 @@ +; RUN: opt < %s -resolve-pnacl-intrinsics -mtriple=x86_64 -S | FileCheck %s -check-prefix=CLEANED +; 'CLEANED' only needs to check a single architecture. +; RUN: opt < %s -resolve-pnacl-intrinsics -mtriple=x86_64 -S | FileCheck %s -check-prefix=X8664 +; RUN: opt < %s -resolve-pnacl-intrinsics -mtriple=i386 -S | FileCheck %s -check-prefix=X8632 +; RUN: opt < %s -resolve-pnacl-intrinsics -mtriple=arm -S | FileCheck %s -check-prefix=ARM32 +; RUN: opt < %s -resolve-pnacl-intrinsics -mtriple=mipsel -S | FileCheck %s -check-prefix=MIPS32 +; RUN: opt < %s -resolve-pnacl-intrinsics -mtriple=asmjs -S | FileCheck %s -check-prefix=ASMJS + +; CLEANED-NOT: call {{.*}} @llvm.nacl.atomic + +declare i32 @llvm.nacl.setjmp(i8*) +declare void @llvm.nacl.longjmp(i8*, i32) +declare i1 @llvm.nacl.atomic.is.lock.free(i32, i8*) + +; These declarations must be here because the function pass expects +; to find them. In real life they're inserted by the translator +; before the function pass runs. +declare i32 @setjmp(i8*) +declare void @longjmp(i8*, i32) + + +; X8664-LABEL: @test_is_lock_free_1( +; X8632-LABEL: @test_is_lock_free_1( +; ARM32-LABEL: @test_is_lock_free_1( +; MIPS32-LABEL: @test_is_lock_free_1( +; ASMJS-LABEL: @test_is_lock_free_1( +; X8664: ret i1 true +; X8632: ret i1 true +; ARM32: ret i1 true +; MIPS32: ret i1 true +; ASMJS: ret i1 true +define i1 @test_is_lock_free_1(i8* %ptr) { + %res = call i1 @llvm.nacl.atomic.is.lock.free(i32 1, i8* %ptr) + ret i1 %res +} + +; X8664-LABEL: @test_is_lock_free_2( +; X8632-LABEL: @test_is_lock_free_2( +; ARM32-LABEL: @test_is_lock_free_2( +; MIPS32-LABEL: @test_is_lock_free_2( +; ASMJS-LABEL: @test_is_lock_free_2( +; X8664: ret i1 true +; X8632: ret i1 true +; ARM32: ret i1 true +; MIPS32: ret i1 true +; ASMJS: ret i1 true +define i1 @test_is_lock_free_2(i16* %ptr) { + %ptr2 = bitcast i16* %ptr to i8* + %res = call i1 @llvm.nacl.atomic.is.lock.free(i32 2, i8* %ptr2) + ret i1 %res +} + +; X8664-LABEL: @test_is_lock_free_4( +; X8632-LABEL: @test_is_lock_free_4( +; ARM32-LABEL: @test_is_lock_free_4( +; MIPS32-LABEL: @test_is_lock_free_4( +; ASMJS-LABEL: @test_is_lock_free_4( +; X8664: ret i1 true +; X8632: ret i1 true +; ARM32: ret i1 true +; MIPS32: ret i1 true +; ASMJS: ret i1 true +define i1 @test_is_lock_free_4(i32* %ptr) { + %ptr2 = bitcast i32* %ptr to i8* + %res = call i1 @llvm.nacl.atomic.is.lock.free(i32 4, i8* %ptr2) + ret i1 %res +} + +; X8664-LABEL: @test_is_lock_free_8( +; X8632-LABEL: @test_is_lock_free_8( +; ARM32-LABEL: @test_is_lock_free_8( +; MIPS32-LABEL: @test_is_lock_free_8( +; ASMJS-LABEL: @test_is_lock_free_8( +; X8664: ret i1 true +; X8632: ret i1 true +; ARM32: ret i1 true +; MIPS32: ret i1 false +; ASMJS: ret i1 false +define i1 @test_is_lock_free_8(i64* %ptr) { + %ptr2 = bitcast i64* %ptr to i8* + %res = call i1 @llvm.nacl.atomic.is.lock.free(i32 8, i8* %ptr2) + ret i1 %res +} + +; X8664-LABEL: @test_is_lock_free_16( +; X8632-LABEL: @test_is_lock_free_16( +; ARM32-LABEL: @test_is_lock_free_16( +; MIPS32-LABEL: @test_is_lock_free_16( +; ASMJS-LABEL: @test_is_lock_free_16( +; X8664: ret i1 false +; X8632: ret i1 false +; ARM32: ret i1 false +; MIPS32: ret i1 false +; ASMJS: ret i1 false +define i1 @test_is_lock_free_16(i128* %ptr) { + %ptr2 = bitcast i128* %ptr to i8* + %res = call i1 @llvm.nacl.atomic.is.lock.free(i32 16, i8* %ptr2) + ret i1 %res +} diff --git a/test/Transforms/NaCl/resolve-pnacl-intrinsics.ll b/test/Transforms/NaCl/resolve-pnacl-intrinsics.ll new file mode 100644 index 000000000000..8e2bbb66df4b --- /dev/null +++ b/test/Transforms/NaCl/resolve-pnacl-intrinsics.ll @@ -0,0 +1,293 @@ +; RUN: opt < %s -resolve-pnacl-intrinsics -S | FileCheck %s \ +; RUN: -check-prefix=CLEANED +; RUN: opt < %s -resolve-pnacl-intrinsics -S | FileCheck %s + +; CLEANED-NOT: call i32 @llvm.nacl.setjmp +; CLEANED-NOT: call void @llvm.nacl.longjmp +; CLEANED-NOT: call {{.*}} @llvm.nacl.atomic + +declare i32 @llvm.nacl.setjmp(i8*) +declare void @llvm.nacl.longjmp(i8*, i32) + +; Intrinsic name mangling is based on overloaded parameters only, +; including return type. Note that all pointers parameters are +; overloaded on type-pointed-to in Intrinsics.td, and are therefore +; mangled on the type-pointed-to only. +declare i8 @llvm.nacl.atomic.load.i8(i8*, i32) +declare i16 @llvm.nacl.atomic.load.i16(i16*, i32) +declare i32 @llvm.nacl.atomic.load.i32(i32*, i32) +declare i64 @llvm.nacl.atomic.load.i64(i64*, i32) +declare void @llvm.nacl.atomic.store.i8(i8, i8*, i32) +declare void @llvm.nacl.atomic.store.i16(i16, i16*, i32) +declare void @llvm.nacl.atomic.store.i32(i32, i32*, i32) +declare void @llvm.nacl.atomic.store.i64(i64, i64*, i32) +declare i8 @llvm.nacl.atomic.rmw.i8(i32, i8*, i8, i32) +declare i16 @llvm.nacl.atomic.rmw.i16(i32, i16*, i16, i32) +declare i32 @llvm.nacl.atomic.rmw.i32(i32, i32*, i32, i32) +declare i64 @llvm.nacl.atomic.rmw.i64(i32, i64*, i64, i32) +declare i8 @llvm.nacl.atomic.cmpxchg.i8(i8*, i8, i8, i32, i32) +declare i16 @llvm.nacl.atomic.cmpxchg.i16(i16*, i16, i16, i32, i32) +declare i32 @llvm.nacl.atomic.cmpxchg.i32(i32*, i32, i32, i32, i32) +declare i64 @llvm.nacl.atomic.cmpxchg.i64(i64*, i64, i64, i32, i32) +declare void @llvm.nacl.atomic.fence(i32) +declare void @llvm.nacl.atomic.fence.all() + +; These declarations must be here because the function pass expects +; to find them. In real life they're inserted by the translator +; before the function pass runs. +declare i32 @setjmp(i8*) +declare void @longjmp(i8*, i32) + +; For correctness, the resulting call must get the "returns_twice" attribute. +define i32 @call_setjmp(i8* %arg) { + %val = call i32 @llvm.nacl.setjmp(i8* %arg) +; CHECK: %val = call i32 @setjmp(i8* %arg) [[RETURNS_TWICE:#[0-9]+]] + ret i32 %val +} + +define void @call_longjmp(i8* %arg, i32 %num) { + call void @llvm.nacl.longjmp(i8* %arg, i32 %num) +; CHECK: call void @longjmp(i8* %arg, i32 %num){{$}} + ret void +} + +; atomics. + +; CHECK-LABEL: @test_atomic_acquire +define i32 @test_atomic_acquire(i32* %ptr) { + ; CHECK: %1 = load atomic i32, i32* %ptr acquire, align 4 + %1 = call i32 @llvm.nacl.atomic.load.i32(i32* %ptr, i32 3) + ret i32 %1 +} + +; CHECK-LABEL: @test_atomic_release +define void @test_atomic_release(i32* %ptr, i32 %value) { + ; CHECK: store atomic i32 %value, i32* %ptr release, align 4 + call void @llvm.nacl.atomic.store.i32(i32 %value, i32* %ptr, i32 4) + ret void +} + +; CHECK-LABEL: @test_atomic_acquire_release +define i32 @test_atomic_acquire_release(i32* %ptr, i32 %value) { + ; CHECK: %1 = atomicrmw add i32* %ptr, i32 %value acq_rel + %1 = call i32 @llvm.nacl.atomic.rmw.i32(i32 1, i32* %ptr, i32 %value, i32 5) + ret i32 %1 +} + +; CHECK-LABEL: @test_fetch_and_add_i32 +define i32 @test_fetch_and_add_i32(i32* %ptr, i32 %value) { + ; CHECK: %1 = atomicrmw add i32* %ptr, i32 %value seq_cst + %1 = call i32 @llvm.nacl.atomic.rmw.i32(i32 1, i32* %ptr, i32 %value, i32 6) + ret i32 %1 +} + +; CHECK-LABEL: @test_fetch_and_sub_i32 +define i32 @test_fetch_and_sub_i32(i32* %ptr, i32 %value) { + ; CHECK: %1 = atomicrmw sub i32* %ptr, i32 %value seq_cst + %1 = call i32 @llvm.nacl.atomic.rmw.i32(i32 2, i32* %ptr, i32 %value, i32 6) + ret i32 %1 +} + +; CHECK-LABEL: @test_fetch_and_or_i32 +define i32 @test_fetch_and_or_i32(i32* %ptr, i32 %value) { + ; CHECK: %1 = atomicrmw or i32* %ptr, i32 %value seq_cst + %1 = call i32 @llvm.nacl.atomic.rmw.i32(i32 3, i32* %ptr, i32 %value, i32 6) + ret i32 %1 +} + +; CHECK-LABEL: @test_fetch_and_and_i32 +define i32 @test_fetch_and_and_i32(i32* %ptr, i32 %value) { + ; CHECK: %1 = atomicrmw and i32* %ptr, i32 %value seq_cst + %1 = call i32 @llvm.nacl.atomic.rmw.i32(i32 4, i32* %ptr, i32 %value, i32 6) + ret i32 %1 +} + +; CHECK-LABEL: @test_fetch_and_xor_i32 +define i32 @test_fetch_and_xor_i32(i32* %ptr, i32 %value) { + ; CHECK: %1 = atomicrmw xor i32* %ptr, i32 %value seq_cst + %1 = call i32 @llvm.nacl.atomic.rmw.i32(i32 5, i32* %ptr, i32 %value, i32 6) + ret i32 %1 +} + +; Test different compare-and-swap patterns that commonly occur and are a bit +; tricky because the PNaCl intrinsic only returns the value whereas the LLVM +; intrinsic also returns the success flag (equivalent to comparing the oldval +; with what was just loaded). + +; CHECK-LABEL: @test_val_compare_and_swap_i32 +define i32 @test_val_compare_and_swap_i32(i32* %ptr, i32 %oldval, i32 %newval) { + ; CHECK: %1 = cmpxchg i32* %ptr, i32 %oldval, i32 %newval seq_cst seq_cst + ; CHECK-NEXT: %2 = extractvalue { i32, i1 } %1, 0 + ; CHECK-NEXT: ret i32 %2 + %1 = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* %ptr, i32 %oldval, i32 %newval, i32 6, i32 6) + ret i32 %1 +} + +; CHECK-LABEL: @test_val_compare_and_swap_i32_new +define i32 @test_val_compare_and_swap_i32_new(i32* %ptr, i32 %oldval, i32 %newval) { + ; CHECK: %1 = cmpxchg i32* %ptr, i32 %oldval, i32 %newval seq_cst seq_cst + ; CHECK-NEXT: %res2 = extractvalue { i32, i1 } %1, 0 + ; CHECK-NEXT: ret i32 %res2 + %res = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* %ptr, i32 %oldval, i32 %newval, i32 6, i32 6) + %success = icmp eq i32 %res, %oldval + %res.insert.value = insertvalue { i32, i1 } undef, i32 %res, 0 + %res.insert.success = insertvalue { i32, i1 } %res.insert.value, i1 %success, 1 + %val = extractvalue { i32, i1 } %res.insert.success, 0 + ret i32 %val +} + +; CHECK-LABEL: @test_bool_compare_and_swap_i32 +define i1 @test_bool_compare_and_swap_i32(i32* %ptr, i32 %oldval, i32 %newval) { + ; CHECK: %1 = cmpxchg i32* %ptr, i32 %oldval, i32 %newval seq_cst seq_cst + ; CHECK-NEXT: %success = extractvalue { i32, i1 } %1, 1 + ; CHECK-NEXT: ret i1 %success + %1 = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* %ptr, i32 %oldval, i32 %newval, i32 6, i32 6) + %2 = icmp eq i32 %1, %oldval + ret i1 %2 +} + +; CHECK-LABEL: @test_bool_compare_and_swap_i32_new +define i1 @test_bool_compare_and_swap_i32_new(i32* %ptr, i32 %oldval, i32 %newval) { + ; CHECK: %1 = cmpxchg i32* %ptr, i32 %oldval, i32 %newval seq_cst seq_cst + ; CHECK-NEXT: %suc = extractvalue { i32, i1 } %1, 1 + ; CHECK-NEXT: ret i1 %suc + %res = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* %ptr, i32 %oldval, i32 %newval, i32 6, i32 6) + %success = icmp eq i32 %res, %oldval + %res.insert.value = insertvalue { i32, i1 } undef, i32 %res, 0 + %res.insert.success = insertvalue { i32, i1 } %res.insert.value, i1 %success, 1 + %suc = extractvalue { i32, i1 } %res.insert.success, 1 + ret i1 %suc +} + +; CHECK-LABEL: @test_bool_compare_and_swap_i32_reordered +define i1 @test_bool_compare_and_swap_i32_reordered(i32* %ptr, i32 %oldval, i32 %newval) { + ; CHECK: %1 = cmpxchg i32* %ptr, i32 %oldval, i32 %newval seq_cst seq_cst + ; CHECK-NEXT: %success = extractvalue { i32, i1 } %1, 1 + ; CHECK-NEXT: ret i1 %success + %1 = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* %ptr, i32 %oldval, i32 %newval, i32 6, i32 6) + %2 = icmp eq i32 %oldval, %1 ; Note operands are swapped from above. + ret i1 %2 +} + +; CHECK-LABEL: @test_struct_compare_and_swap_i32 +define { i32, i1 } @test_struct_compare_and_swap_i32(i32* %ptr, i32 %oldval, i32 %newval) { + ; CHECK: %1 = cmpxchg i32* %ptr, i32 %oldval, i32 %newval seq_cst seq_cst + ; CHECK-NEXT: ret { i32, i1 } %1 + %1 = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* %ptr, i32 %oldval, i32 %newval, i32 6, i32 6) + %2 = icmp eq i32 %1, %oldval + %3 = insertvalue { i32, i1 } undef, i32 %1, 0 + %4 = insertvalue { i32, i1 } %3, i1 %2, 1 + ret { i32, i1 } %4 +} + +; Test all allowed cmpxchg success/failure memory orderings. + +; CHECK-LABEL: @test_cmpxchg_seqcst_seqcst +define i32 @test_cmpxchg_seqcst_seqcst(i32* %ptr, i32 %oldval, i32 %newval) { + ; CHECK: %1 = cmpxchg i32* %ptr, i32 %oldval, i32 %newval seq_cst seq_cst + %1 = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* %ptr, i32 %oldval, i32 %newval, i32 6, i32 6) + ret i32 %1 +} + +; CHECK-LABEL: @test_cmpxchg_seqcst_acquire +define i32 @test_cmpxchg_seqcst_acquire(i32* %ptr, i32 %oldval, i32 %newval) { + ; CHECK: %1 = cmpxchg i32* %ptr, i32 %oldval, i32 %newval seq_cst acquire + %1 = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* %ptr, i32 %oldval, i32 %newval, i32 6, i32 3) + ret i32 %1 +} + +; CHECK-LABEL: @test_cmpxchg_acquire_acquire +define i32 @test_cmpxchg_acquire_acquire(i32* %ptr, i32 %oldval, i32 %newval) { + ; CHECK: %1 = cmpxchg i32* %ptr, i32 %oldval, i32 %newval acquire acquire + %1 = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* %ptr, i32 %oldval, i32 %newval, i32 3, i32 3) + ret i32 %1 +} + +; CHECK-LABEL: @test_c11_fence +define void @test_c11_fence() { + ; CHECK: fence seq_cst + call void @llvm.nacl.atomic.fence(i32 6) + ret void +} + +; CHECK-LABEL: @test_synchronize +define void @test_synchronize() { + ; CHECK: call void asm sideeffect "", "~{memory}"() + ; CHECK: fence seq_cst + ; CHECK: call void asm sideeffect "", "~{memory}"() + call void @llvm.nacl.atomic.fence.all() + ret void +} + +; CHECK-LABEL: @test_lock_test_and_set_i32 +define i32 @test_lock_test_and_set_i32(i32* %ptr, i32 %value) { + ; CHECK: %1 = atomicrmw xchg i32* %ptr, i32 %value seq_cst + %1 = call i32 @llvm.nacl.atomic.rmw.i32(i32 6, i32* %ptr, i32 %value, i32 6) + ret i32 %1 +} + +; CHECK-LABEL: @test_lock_release_i32 +define void @test_lock_release_i32(i32* %ptr) { + ; Note that the 'release' was changed to a 'seq_cst'. + ; CHECK: store atomic i32 0, i32* %ptr seq_cst, align 4 + call void @llvm.nacl.atomic.store.i32(i32 0, i32* %ptr, i32 6) + ret void +} + +; CHECK-LABEL: @test_atomic_load_i8 +define zeroext i8 @test_atomic_load_i8(i8* %ptr) { + ; CHECK: %1 = load atomic i8, i8* %ptr seq_cst, align 1 + %1 = call i8 @llvm.nacl.atomic.load.i8(i8* %ptr, i32 6) + ret i8 %1 +} + +; CHECK-LABEL: @test_atomic_store_i8 +define void @test_atomic_store_i8(i8* %ptr, i8 zeroext %value) { + ; CHECK: store atomic i8 %value, i8* %ptr seq_cst, align 1 + call void @llvm.nacl.atomic.store.i8(i8 %value, i8* %ptr, i32 6) + ret void +} + +; CHECK-LABEL: @test_atomic_load_i16 +define zeroext i16 @test_atomic_load_i16(i16* %ptr) { + ; CHECK: %1 = load atomic i16, i16* %ptr seq_cst, align 2 + %1 = call i16 @llvm.nacl.atomic.load.i16(i16* %ptr, i32 6) + ret i16 %1 +} + +; CHECK-LABEL: @test_atomic_store_i16 +define void @test_atomic_store_i16(i16* %ptr, i16 zeroext %value) { + ; CHECK: store atomic i16 %value, i16* %ptr seq_cst, align 2 + call void @llvm.nacl.atomic.store.i16(i16 %value, i16* %ptr, i32 6) + ret void +} + +; CHECK-LABEL: @test_atomic_load_i32 +define i32 @test_atomic_load_i32(i32* %ptr) { + ; CHECK: %1 = load atomic i32, i32* %ptr seq_cst, align 4 + %1 = call i32 @llvm.nacl.atomic.load.i32(i32* %ptr, i32 6) + ret i32 %1 +} + +; CHECK-LABEL: @test_atomic_store_i32 +define void @test_atomic_store_i32(i32* %ptr, i32 %value) { + ; CHECK: store atomic i32 %value, i32* %ptr seq_cst, align 4 + call void @llvm.nacl.atomic.store.i32(i32 %value, i32* %ptr, i32 6) + ret void +} + +; CHECK-LABEL: @test_atomic_load_i64 +define i64 @test_atomic_load_i64(i64* %ptr) { + ; CHECK: %1 = load atomic i64, i64* %ptr seq_cst, align 8 + %1 = call i64 @llvm.nacl.atomic.load.i64(i64* %ptr, i32 6) + ret i64 %1 +} + +; CHECK-LABEL: @test_atomic_store_i64 +define void @test_atomic_store_i64(i64* %ptr, i64 %value) { + ; CHECK: store atomic i64 %value, i64* %ptr seq_cst, align 8 + call void @llvm.nacl.atomic.store.i64(i64 %value, i64* %ptr, i32 6) + ret void +} + +; CHECK: attributes [[RETURNS_TWICE]] = { returns_twice } diff --git a/test/Transforms/NaCl/rewrite-assume.ll b/test/Transforms/NaCl/rewrite-assume.ll new file mode 100644 index 000000000000..50e5d2bb6ff3 --- /dev/null +++ b/test/Transforms/NaCl/rewrite-assume.ll @@ -0,0 +1,35 @@ +; RUN: opt < %s -rewrite-llvm-intrinsic-calls -S | FileCheck %s +; RUN: opt < %s -rewrite-llvm-intrinsic-calls -S | FileCheck %s -check-prefix=CLEANED +; Test the @llvm.assume part of the RewriteLLVMIntrinsics pass + +declare void @llvm.assume(i1) + +; No declaration or definition of llvm.assume() should remain. +; CLEANED-NOT: @llvm.assume + +define void @call_assume(i1 %val) { +; CHECK: call_assume +; CHECK-NEXT: ret void + call void @llvm.assume(i1 %val) + ret void +} + +; A more complex example with a number of calls in several BBs. +define void @multiple_calls(i1 %val) { +; CHECK: multiple_calls +entryblock: +; CHECK: entryblock +; CHECK-NEXT: br + call void @llvm.assume(i1 %val) + br i1 %val, label %exitblock, label %never +never: +; CHECK: never: +; CHECK-NEXT: br + call void @llvm.assume(i1 %val) + br label %exitblock +exitblock: +; CHECK: exitblock: +; CHECK-NEXT: ret void + call void @llvm.assume(i1 %val) + ret void +} diff --git a/test/Transforms/NaCl/rewrite-call-with-libfunc-argument.ll b/test/Transforms/NaCl/rewrite-call-with-libfunc-argument.ll new file mode 100644 index 000000000000..56ee2d2c078e --- /dev/null +++ b/test/Transforms/NaCl/rewrite-call-with-libfunc-argument.ll @@ -0,0 +1,18 @@ +; RUN: opt < %s -rewrite-pnacl-library-calls -S | FileCheck %s + +; See https://code.google.com/p/nativeclient/issues/detail?id=3706 +; Make sure that when @longjmp is used as an argument in a call instruction, +; the rewrite pass does the right thing and doesn't get confused. + +; CHECK: define internal void @longjmp(i64* %env, i32 %val) { + +declare void @longjmp(i64*, i32) + +declare void @somefunc(i32, void (i64*, i32)*, i32) + +define void @foo() { +entry: + call void @somefunc(i32 1, void (i64*, i32)* @longjmp, i32 2) +; CHECK: call void @somefunc(i32 1, void (i64*, i32)* @longjmp, i32 2) + ret void +} diff --git a/test/Transforms/NaCl/rewrite-flt-rounds.ll b/test/Transforms/NaCl/rewrite-flt-rounds.ll new file mode 100644 index 000000000000..cb1a7e4a9924 --- /dev/null +++ b/test/Transforms/NaCl/rewrite-flt-rounds.ll @@ -0,0 +1,38 @@ +; RUN: opt < %s -rewrite-llvm-intrinsic-calls -S | FileCheck %s +; RUN: opt < %s -rewrite-llvm-intrinsic-calls -S | FileCheck %s -check-prefix=CLEANED +; Test the @llvm.flt.rounds part of the RewriteLLVMIntrinsics pass + +declare i32 @llvm.flt.rounds() + +; No declaration or definition of llvm.flt.rounds() should remain. +; CLEANED-NOT: @llvm.flt.rounds + +define i32 @call_flt_rounds() { +; CHECK: call_flt_rounds +; CHECK-NEXT: ret i32 1 + %val = call i32 @llvm.flt.rounds() + ret i32 %val +} + +; A more complex example with a number of calls in several BBs. +define i32 @multiple_calls(i64* %arg, i32 %num) { +; CHECK: multiple_calls +entryblock: +; CHECK: entryblock + %v1 = call i32 @llvm.flt.rounds() + br label %block1 +block1: +; CHECK: block1: +; CHECK-NEXT: %v3 = add i32 1, 1 + %v2 = call i32 @llvm.flt.rounds() + %v3 = add i32 %v2, %v1 + br label %exitblock +exitblock: +; CHECK: exitblock: +; CHECK-NEXT: %v4 = add i32 1, %v3 +; CHECK-NEXT: %v6 = add i32 1, %v4 + %v4 = add i32 %v2, %v3 + %v5 = call i32 @llvm.flt.rounds() + %v6 = add i32 %v5, %v4 + ret i32 %v6 +} diff --git a/test/Transforms/NaCl/rewrite-libcalls-wrong-signature.ll b/test/Transforms/NaCl/rewrite-libcalls-wrong-signature.ll new file mode 100644 index 000000000000..3ab64d9dd26e --- /dev/null +++ b/test/Transforms/NaCl/rewrite-libcalls-wrong-signature.ll @@ -0,0 +1,38 @@ +; RUN: opt < %s -rewrite-pnacl-library-calls -S | FileCheck %s +; Check how the pass behaves in the presence of library functions with wrong +; signatures. + +declare i8 @longjmp(i64) + +@flongjmp = global i8 (i64)* @longjmp +; CHECK: @flongjmp = global i8 (i64)* bitcast (void (i64*, i32)* @longjmp to i8 (i64)*) + +; CHECK: define internal void @longjmp(i64* %env, i32 %val) + +declare i8* @memcpy(i32) + +define i8* @call_bad_memcpy(i32 %arg) { + %result = call i8* @memcpy(i32 %arg) + ret i8* %result +} + +; CHECK: define i8* @call_bad_memcpy(i32 %arg) { +; CHECK: %result = call i8* bitcast (i8* (i8*, i8*, i32)* @memcpy to i8* (i32)*)(i32 %arg) + +declare i8 @setjmp() + +; This simulates a case where the original C file had a correct setjmp +; call but due to linking order a wrong declaration made it into the +; IR. In this case, the correct call is bitcasted to the correct type. +; The pass should treat this properly by creating a direct intrinsic +; call instead of going through the wrapper. +define i32 @call_valid_setjmp(i64* %buf) { + %result = call i32 bitcast (i8 ()* @setjmp to i32 (i64*)*)(i64* %buf) + ret i32 %result +} + +; CHECK: define i32 @call_valid_setjmp(i64* %buf) { +; CHECK-NEXT: %jmp_buf_i8 = bitcast i64* %buf to i8* +; CHECK-NEXT: %result = call i32 @llvm.nacl.setjmp(i8* %jmp_buf_i8) +; CHECK-NEXT: ret i32 %result +; CHECK-NEXT: } diff --git a/test/Transforms/NaCl/rewrite-longjmp-no-store.ll b/test/Transforms/NaCl/rewrite-longjmp-no-store.ll new file mode 100644 index 000000000000..134593ad3971 --- /dev/null +++ b/test/Transforms/NaCl/rewrite-longjmp-no-store.ll @@ -0,0 +1,16 @@ +; RUN: opt < %s -rewrite-pnacl-library-calls -S | FileCheck %s +; RUN: opt < %s -rewrite-pnacl-library-calls -S | FileCheck %s -check-prefix=CLEANED +; Test that when there are no uses other than calls to longjmp, +; no function body is generated. + +declare void @longjmp(i64*, i32) + +; No declaration or definition of longjmp() should remain. +; CLEANED-NOT: @longjmp + +define void @call_longjmp(i64* %arg, i32 %num) { + call void @longjmp(i64* %arg, i32 %num) +; CHECK: call void @llvm.nacl.longjmp(i8* %jmp_buf_i8, i32 %num) + ret void +} + diff --git a/test/Transforms/NaCl/rewrite-longjmp-noncall-uses.ll b/test/Transforms/NaCl/rewrite-longjmp-noncall-uses.ll new file mode 100644 index 000000000000..ed7818ec9688 --- /dev/null +++ b/test/Transforms/NaCl/rewrite-longjmp-noncall-uses.ll @@ -0,0 +1,21 @@ +; RUN: opt < %s -rewrite-pnacl-library-calls -S | FileCheck %s +; Check that the rewrite pass behaves correctly in the presence +; of various uses of longjmp that are not calls. + +@fp = global void (i64*, i32)* @longjmp, align 8 +; CHECK: @fp = global void (i64*, i32)* @longjmp, align 8 +@arrfp = global [3 x void (i64*, i32)*] [void (i64*, i32)* null, void (i64*, i32)* @longjmp, void (i64*, i32)* null], align 16 +; CHECK: @arrfp = global [3 x void (i64*, i32)*] [void (i64*, i32)* null, void (i64*, i32)* @longjmp, void (i64*, i32)* null], align 16 + +; CHECK: define internal void @longjmp(i64* %env, i32 %val) { + +declare void @longjmp(i64*, i32) + +declare void @somefunc(i8*) + +define void @foo() { +entry: + call void @somefunc(i8* bitcast (void (i64*, i32)* @longjmp to i8*)) +; CHECK: call void @somefunc(i8* bitcast (void (i64*, i32)* @longjmp to i8*)) + ret void +} diff --git a/test/Transforms/NaCl/rewrite-memfuncs-no-store.ll b/test/Transforms/NaCl/rewrite-memfuncs-no-store.ll new file mode 100644 index 000000000000..e661fae83af5 --- /dev/null +++ b/test/Transforms/NaCl/rewrite-memfuncs-no-store.ll @@ -0,0 +1,34 @@ +; RUN: opt < %s -rewrite-pnacl-library-calls -S | FileCheck %s +; RUN: opt < %s -rewrite-pnacl-library-calls -S | FileCheck %s -check-prefix=CLEANED + +declare i8* @memcpy(i8*, i8*, i32) +declare i8* @memmove(i8*, i8*, i32) +declare i8* @memset(i8*, i32, i32) + +; No declaration or definition of the library functions should remain, since +; the only uses of mem* functions are calls. +; CLEANED-NOT: @memcpy +; CLEANED-NOT: @memmove +; CLEANED-NOT: @memset + +define i8* @call_memcpy(i8* %dest, i8* %src, i32 %len) { + %result = call i8* @memcpy(i8* %dest, i8* %src, i32 %len) + ret i8* %result +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 %len, i32 1, i1 false) +; CHECK: ret i8* %dest +} + +define i8* @call_memmove(i8* %dest, i8* %src, i32 %len) { + %result = call i8* @memmove(i8* %dest, i8* %src, i32 %len) + ret i8* %result +; CHECK: call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 %len, i32 1, i1 false) +; CHECK: ret i8* %dest +} + +define i8* @call_memset(i8* %dest, i32 %c, i32 %len) { + %result = call i8* @memset(i8* %dest, i32 %c, i32 %len) + ret i8* %result +; CHECK: %trunc_byte = trunc i32 %c to i8 +; CHECK: call void @llvm.memset.p0i8.i32(i8* %dest, i8 %trunc_byte, i32 %len, i32 1, i1 false) +; CHECK: ret i8* %dest +} diff --git a/test/Transforms/NaCl/rewrite-memfuncs-noncall-uses.ll b/test/Transforms/NaCl/rewrite-memfuncs-noncall-uses.ll new file mode 100644 index 000000000000..5c6bdfdcb596 --- /dev/null +++ b/test/Transforms/NaCl/rewrite-memfuncs-noncall-uses.ll @@ -0,0 +1,30 @@ +; RUN: opt < %s -rewrite-pnacl-library-calls -S | FileCheck %s +; Check that the rewrite pass behaves correctly in the presence +; of various uses of mem* that are not calls. + +@fpcpy = global i8* (i8*, i8*, i32)* @memcpy +; CHECK: @fpcpy = global i8* (i8*, i8*, i32)* @memcpy +@fpmove = global i8* (i8*, i8*, i32)* @memmove +; CHECK: @fpmove = global i8* (i8*, i8*, i32)* @memmove +@fpset = global i8* (i8*, i32, i32)* @memset +; CHECK: @fpset = global i8* (i8*, i32, i32)* @memset + +; CHECK: define internal i8* @memcpy(i8* %dest, i8* %src, i32 %len) { +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 %len, i32 1, i1 false) +; CHECK: ret i8* %dest +; CHECK: } + +; CHECK: define internal i8* @memmove(i8* %dest, i8* %src, i32 %len) { +; CHECK: call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 %len, i32 1, i1 false) +; CHECK: ret i8* %dest +; CHECK: } + +; CHECK: define internal i8* @memset(i8* %dest, i32 %val, i32 %len) { +; CHECK: %trunc_byte = trunc i32 %val to i8 +; CHECK: call void @llvm.memset.p0i8.i32(i8* %dest, i8 %trunc_byte, i32 %len, i32 1, i1 false) +; CHECK: ret i8* %dest +; CHECK: } + +declare i8* @memcpy(i8*, i8*, i32) +declare i8* @memmove(i8*, i8*, i32) +declare i8* @memset(i8*, i32, i32) diff --git a/test/Transforms/NaCl/rewrite-prefetch.ll b/test/Transforms/NaCl/rewrite-prefetch.ll new file mode 100644 index 000000000000..0826bd32c988 --- /dev/null +++ b/test/Transforms/NaCl/rewrite-prefetch.ll @@ -0,0 +1,35 @@ +; RUN: opt < %s -rewrite-llvm-intrinsic-calls -S | FileCheck %s +; RUN: opt < %s -rewrite-llvm-intrinsic-calls -S | FileCheck %s -check-prefix=CLEANED +; Test the @llvm.prefetch part of the RewriteLLVMIntrinsics pass + +declare void @llvm.prefetch(i8 *%ptr, i32 %rw, i32 %locality, i32 %cache_type) + +; No declaration or definition of llvm.prefetch() should remain. +; CLEANED-NOT: @llvm.prefetch + +define void @call_prefetch(i8 *%ptr) { +; CHECK: call_prefetch +; CHECK-NEXT: ret void + call void @llvm.prefetch(i8 *%ptr, i32 0, i32 0, i32 0) + ret void +} + +; A more complex example with a number of calls in several BBs. +define void @multiple_calls(i8 *%ptr) { +; CHECK: multiple_calls +entryblock: +; CHECK: entryblock +; CHECK-NEXT: br + call void @llvm.prefetch(i8 *%ptr, i32 1, i32 2, i32 1) + br label %block1 +block1: +; CHECK: block1: +; CHECK-NEXT: br + call void @llvm.prefetch(i8 *%ptr, i32 0, i32 1, i32 0) + br label %exitblock +exitblock: +; CHECK: exitblock: +; CHECK-NEXT: ret void + call void @llvm.prefetch(i8 *%ptr, i32 1, i32 3, i32 1) + ret void +} diff --git a/test/Transforms/NaCl/rewrite-setjmp-store-error.ll b/test/Transforms/NaCl/rewrite-setjmp-store-error.ll new file mode 100644 index 000000000000..05d7dedefba3 --- /dev/null +++ b/test/Transforms/NaCl/rewrite-setjmp-store-error.ll @@ -0,0 +1,13 @@ +; RUN: not opt < %s -rewrite-pnacl-library-calls -S 2>&1 | FileCheck %s +; Test that the pass enforces not being able to store the address +; of setjmp. + +declare i32 @setjmp(i64*) + +define i32 @takeaddr_setjmp(i64* %arg) { + %fp = alloca i32 (i64*)*, align 8 +; CHECK: Taking the address of setjmp is invalid + store i32 (i64*)* @setjmp, i32 (i64*)** %fp, align 8 + ret i32 7 +} + diff --git a/test/Transforms/NaCl/rewrite-setlongjmp-calls.ll b/test/Transforms/NaCl/rewrite-setlongjmp-calls.ll new file mode 100644 index 000000000000..f34f004d7f39 --- /dev/null +++ b/test/Transforms/NaCl/rewrite-setlongjmp-calls.ll @@ -0,0 +1,76 @@ +; RUN: opt < %s -rewrite-pnacl-library-calls -S | FileCheck %s +; RUN: opt < %s -rewrite-pnacl-library-calls -S | FileCheck %s -check-prefix=CLEANED +; Test the RewritePNaClLibraryCalls pass + +declare i32 @setjmp(i64*) +declare void @longjmp(i64*, i32) + +; No declaration or definition of setjmp() should remain. +; CLEANED-NOT: @setjmp + +; Since the address of longjmp is being taken here, a body is generated +; for it, which does a cast and calls an intrinsic + +; CHECK: define internal void @longjmp(i64* %env, i32 %val) { +; CHECK: entry: +; CHECK: %jmp_buf_i8 = bitcast i64* %env to i8* +; CHECK: call void @llvm.nacl.longjmp(i8* %jmp_buf_i8, i32 %val) +; CHECK: unreachable +; CHECK: } + +define i32 @call_setjmp(i64* %arg) { +; CHECK-NOT: call i32 @setjmp +; CHECK: %jmp_buf_i8 = bitcast i64* %arg to i8* +; CHECK-NEXT: %val = call i32 @llvm.nacl.setjmp(i8* %jmp_buf_i8) + %val = call i32 @setjmp(i64* %arg) + ret i32 %val +} + +define void @call_longjmp(i64* %arg, i32 %num) { +; CHECK-NOT: call void @longjmp +; CHECK: %jmp_buf_i8 = bitcast i64* %arg to i8* +; CHECK-NEXT: call void @llvm.nacl.longjmp(i8* %jmp_buf_i8, i32 %num) + call void @longjmp(i64* %arg, i32 %num) + ret void +} + +define i32 @takeaddr_longjmp(i64* %arg, i32 %num) { + %fp = alloca void (i64*, i32)*, align 8 +; CHECK: store void (i64*, i32)* @longjmp, void (i64*, i32)** %fp + store void (i64*, i32)* @longjmp, void (i64*, i32)** %fp, align 8 + ret i32 7 +} + +; A more complex example with a number of calls in several BBs +define void @multiple_calls(i64* %arg, i32 %num) { +entryblock: +; CHECK: entryblock +; CHECK: bitcast +; CHECK-NEXT: call void @llvm.nacl.longjmp + call void @longjmp(i64* %arg, i32 %num) + br label %block1 +block1: +; CHECK: block1 +; CHECK: bitcast +; CHECK-NEXT: call void @llvm.nacl.longjmp + call void @longjmp(i64* %arg, i32 %num) +; CHECK: call i32 @llvm.nacl.setjmp + %val = call i32 @setjmp(i64* %arg) + %num2 = add i32 %val, %num +; CHECK: bitcast +; CHECK-NEXT: call void @llvm.nacl.longjmp + call void @longjmp(i64* %arg, i32 %num2) + br label %exitblock +exitblock: + %num3 = add i32 %num, %num + call void @longjmp(i64* %arg, i32 %num3) + %num4 = add i32 %num, %num3 +; CHECK: bitcast +; CHECK-NEXT: call void @llvm.nacl.longjmp + call void @longjmp(i64* %arg, i32 %num4) + ret void +} + +; CHECK: declare i32 @llvm.nacl.setjmp(i8*) +; CHECK: declare void @llvm.nacl.longjmp(i8*, i32) + diff --git a/test/Transforms/NaCl/simplify-allocas.ll b/test/Transforms/NaCl/simplify-allocas.ll new file mode 100644 index 000000000000..fab28064c9c3 --- /dev/null +++ b/test/Transforms/NaCl/simplify-allocas.ll @@ -0,0 +1,207 @@ +; RUN: opt < %s -simplify-allocas -S | FileCheck %s + +target datalayout = "p:32:32:32" + +%struct = type { i32, i32 } + +declare void @receive_alloca(%struct* %ptr) +declare void @receive_vector_alloca(<4 x i32>* %ptr) + +define void @alloca_fixed() { + %buf = alloca %struct, align 128 + call void @receive_alloca(%struct* %buf) + ret void +} +; CHECK-LABEL: define void @alloca_fixed() { +; CHECK-NEXT: %buf = alloca i8, i32 8, align 128 +; CHECK-NEXT: %buf.bc = bitcast i8* %buf to %struct* +; CHECK-NEXT: call void @receive_alloca(%struct* %buf.bc) + +; When the size passed to alloca is a constant, it should be a +; constant in the output too. +define void @alloca_fixed_array() { + %buf = alloca %struct, i32 100 + call void @receive_alloca(%struct* %buf) + ret void +} +; CHECK-LABEL: define void @alloca_fixed_array() { +; CHECK-NEXT: %buf = alloca i8, i32 800, align 8 +; CHECK-NEXT: %buf.bc = bitcast i8* %buf to %struct* +; CHECK-NEXT: call void @receive_alloca(%struct* %buf.bc) + +define void @alloca_fixed_vector() { + %buf = alloca <4 x i32>, align 128 + call void @receive_vector_alloca(<4 x i32>* %buf) + ret void +} +; CHECK-LABEL: define void @alloca_fixed_vector() { +; CHECK-NEXT: %buf = alloca i8, i32 16, align 128 +; CHECK-NEXT: %buf.bc = bitcast i8* %buf to <4 x i32>* +; CHECK-NEXT: call void @receive_vector_alloca(<4 x i32>* %buf.bc) + +define void @alloca_variable(i32 %size) { + %buf = alloca %struct, i32 %size + call void @receive_alloca(%struct* %buf) + ret void +} +; CHECK-LABEL: define void @alloca_variable(i32 %size) { +; CHECK-NEXT: %buf.alloca_mul = mul i32 8, %size +; CHECK-NEXT: %buf = alloca i8, i32 %buf.alloca_mul +; CHECK-NEXT: %buf.bc = bitcast i8* %buf to %struct* +; CHECK-NEXT: call void @receive_alloca(%struct* %buf.bc) + +define void @alloca_alignment_i32() { + %buf = alloca i32 + ret void +} +; CHECK-LABEL: void @alloca_alignment_i32() { +; CHECK-NEXT: alloca i8, i32 4, align 4 + +define void @alloca_alignment_double() { + %buf = alloca double + ret void +} +; CHECK-LABEL: void @alloca_alignment_double() { +; CHECK-NEXT: alloca i8, i32 8, align 8 + +define void @alloca_lower_alignment() { + %buf = alloca i32, align 1 + ret void +} +; CHECK-LABEL: void @alloca_lower_alignment() { +; CHECK-NEXT: alloca i8, i32 4, align 1 + +define void @alloca_array_trunc() { + %a = alloca i32, i64 1024 + unreachable +} +; CHECK-LABEL: define void @alloca_array_trunc() +; CHECK-NEXT: %a = alloca i8, i32 4096 + +define void @alloca_array_zext() { + %a = alloca i32, i8 128 + unreachable +} +; CHECK-LABEL: define void @alloca_array_zext() +; CHECK-NEXT: %a = alloca i8, i32 512 + +define void @dyn_alloca_array_trunc(i64 %a) { + %b = alloca i32, i64 %a + unreachable +} +; CHECK-LABEL: define void @dyn_alloca_array_trunc(i64 %a) +; CHECK-NEXT: trunc i64 %a to i32 +; CHECK-NEXT: mul i32 4, +; CHECK-NEXT: alloca i8, i32 + +define void @dyn_alloca_array_zext(i8 %a) { + %b = alloca i32, i8 %a + unreachable +} +; CHECK-LABEL: define void @dyn_alloca_array_zext(i8 %a) +; CHECK-NEXT: zext i8 %a to i32 +; CHECK-NEXT: mul i32 4, +; CHECK-NEXT: alloca i8, i32 + +define void @dyn_inst_alloca_array(i32 %a) { + %b = add i32 1, %a + %c = alloca i32, i32 %b + unreachable +} +; CHECK-LABEL: define void @dyn_inst_alloca_array(i32 %a) +; CHECK-NEXT: %b = add i32 1, %a +; CHECK-NEXT: mul i32 4, %b +; CHECK-NEXT: %c = alloca i8, i32 + +define void @dyn_inst_alloca_array_trunc(i64 %a) { + %b = add i64 1, %a + %c = alloca i32, i64 %b + unreachable +} +; CHECK-LABEL: define void @dyn_inst_alloca_array_trunc(i64 %a) +; CHECK-NEXT: %b = add i64 1, %a +; CHECK-NEXT: trunc i64 %b to i32 +; CHECK-NEXT: mul i32 4, +; CHECK-NEXT: %c = alloca i8, i32 + +define void @dyn_inst_alloca_array_zext(i8 %a) { + %b = add i8 1, %a + %c = alloca i32, i8 %b + unreachable +} +; CHECK-LABEL: define void @dyn_inst_alloca_array_zext(i8 %a) +; CHECK-NEXT: %b = add i8 1, %a +; CHECK-NEXT: zext i8 %b to i32 +; CHECK-NEXT: mul i32 4, +; CHECK-NEXT: %c = alloca i8, i32 + +declare void @llvm.dbg.declare(metadata, metadata, metadata) +define void @debug_declare() { + %var = alloca i32 + call void @llvm.dbg.declare(metadata i32* %var, metadata !12, metadata !13), !dbg !14 + unreachable +} +; Ensure that the first arg to dbg.declare points to the alloca, not the bitcast +; CHECK-LABEL: define void @debug_declare +; CHECK-NEXT: %var = alloca i8, i32 4 +; CHECK: call void @llvm.dbg.declare(metadata i8* %var, metadata !12, metadata !13), !dbg !14 + +define void @debug_declare_morecasts() { + %var = alloca i32, i32 2, align 8 + %other_bc = bitcast i32* %var to i64* + %other_bc2 = bitcast i64* %other_bc to i16* + call void @llvm.dbg.declare(metadata i16* %other_bc2, metadata !15, metadata !13), !dbg !16 + unreachable +} +; Ensure that the first arg to dbg.declare points to the alloca, not bitcasts +; CHECK-LABEL: define void @debug_declare_morecasts +; CHECK-NEXT: %var = alloca i8, i32 8, align 8 +; CHECK: call void @llvm.dbg.declare(metadata i8* %var, metadata !15, metadata !13), !dbg !16 + +define void @debug_declare_inttoptr() { + %var = alloca i32, i32 2, align 8 + %i = ptrtoint i32* %var to i32 + %p = inttoptr i32 %i to i8* + call void @llvm.dbg.declare(metadata i8* %p, metadata !15, metadata !13), !dbg !16 + unreachable +} +; Ensure that we can look through ptrtoint/inttoptr +; CHECK-LABEL: define void @debug_declare_inttoptr +; CHECK-NEXT: alloca i8, i32 8, align 8 +; CHECK: call void @llvm.dbg.declare(metadata i8* %var, metadata !15, metadata !13), !dbg !16 + +declare i8* @foo() +define void @debug_declare_noalloca() { + %call = tail call i8* @foo() + %config_.i.i = getelementptr inbounds i8, i8* %call, i32 104, !dbg !16 + %bc = bitcast i8* %config_.i.i to i16*, !dbg !16 + tail call void @llvm.dbg.declare(metadata i16* %bc, metadata !15, metadata !13), !dbg !16 + unreachable +} +; Don't modify dbg.declares which don't ultimately point to an alloca. +; CHECK-LABEL: define void @debug_declare_noalloca() +; CHECK: call void @llvm.dbg.declare(metadata i16* %bc, metadata !15, metadata !13), !dbg !16 + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!9, !10} +!llvm.ident = !{!11} + +; CHECK: !4 = !MDSubprogram(name: "debug_declare", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, function: void ()* @debug_declare, variables: !2) + +!0 = !MDCompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (trunk 235150) (llvm/trunk 235152)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2) +!1 = !MDFile(filename: "foo.c", directory: "/s/llvm/cmakebuild") +!2 = !{} +!3 = !{!4, !8} +!4 = !MDSubprogram(name: "debug_declare", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, function: void ()* @debug_declare, variables: !2) +!5 = !MDSubroutineType(types: !6) +!6 = !{null, !7} +!7 = !MDBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed) +!8 = !MDSubprogram(name: "debug_declare_morecasts", scope: !1, file: !1, line: 8, type: !5, isLocal: false, isDefinition: true, scopeLine: 8, flags: DIFlagPrototyped, isOptimized: false, function: void ()* @debug_declare_morecasts, variables: !2) +!9 = !{i32 2, !"Dwarf Version", i32 4} +!10 = !{i32 2, !"Debug Info Version", i32 3} +!11 = !{!"clang version 3.7.0 (trunk 235150) (llvm/trunk 235152)"} +!12 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "val", arg: 1, scope: !4, file: !1, line: 1, type: !7) +!13 = !MDExpression() +!14 = !MDLocation(line: 1, column: 24, scope: !4) +!15 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "var", arg: 1, scope: !8, file: !1, line: 9, type: !7) +!16 = !MDLocation(line: 9, column: 24, scope: !8) diff --git a/test/Transforms/NaCl/simplify-struct-reg-pad-crash.ll b/test/Transforms/NaCl/simplify-struct-reg-pad-crash.ll new file mode 100644 index 000000000000..2a8541353281 --- /dev/null +++ b/test/Transforms/NaCl/simplify-struct-reg-pad-crash.ll @@ -0,0 +1,21 @@ +; RUN: not opt < %s -simplify-struct-reg-signatures -S + +%struct = type { i32, i32 } + +declare i32 @__hypothetical_personality_1(%struct) + +declare void @something_to_invoke() + +; landingpad with struct +define void @landingpad_is_struct() { + invoke void @something_to_invoke() + to label %OK unwind label %Err + +OK: + ret void + +Err: + %exn = landingpad i32 personality i32(%struct)* @__hypothetical_personality_1 + cleanup + resume i32 %exn +} \ No newline at end of file diff --git a/test/Transforms/NaCl/simplify-struct-reg-resume-crash.ll b/test/Transforms/NaCl/simplify-struct-reg-resume-crash.ll new file mode 100644 index 000000000000..0f7e519a8793 --- /dev/null +++ b/test/Transforms/NaCl/simplify-struct-reg-resume-crash.ll @@ -0,0 +1,20 @@ +; RUN: not opt < %s -simplify-struct-reg-signatures -S + +%struct = type { i8*, void(%struct)* } + +declare i32 @__gxx_personality_v0(...) +declare void @something_to_invoke() + +; landingpad with struct +define void @landingpad_is_struct(%struct %str) { + invoke void @something_to_invoke() + to label %OK unwind label %Err + +OK: + ret void + +Err: + %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0 + cleanup + resume %struct %str +} \ No newline at end of file diff --git a/test/Transforms/NaCl/simplify-struct-reg-signatures.ll b/test/Transforms/NaCl/simplify-struct-reg-signatures.ll new file mode 100644 index 000000000000..7f89cf91307e --- /dev/null +++ b/test/Transforms/NaCl/simplify-struct-reg-signatures.ll @@ -0,0 +1,276 @@ +; RUN: opt %s -simplify-struct-reg-signatures -S | FileCheck %s + +declare i32 @__gxx_personality_v0(...) + +%struct = type { i32, i32 } + +%rec_struct = type {%rec_struct*} +%rec_problem_struct = type{void (%rec_problem_struct)*} +%rec_pair_1 = type {%rec_pair_2*} +%rec_pair_2 = type {%rec_pair_1*} +%rec_returning = type { %rec_returning (%rec_returning)* } +%direct_def = type { void(%struct)*, %struct } + +; new type declarations: +; CHECK: %struct = type { i32, i32 } +; CHECK-NEXT: %rec_struct = type { %rec_struct* } +; CHECK-NEXT: %rec_problem_struct.simplified = type { void (%rec_problem_struct.simplified*)* } +; CHECK-NEXT: %rec_pair_1 = type { %rec_pair_2* } +; CHECK-NEXT: %rec_pair_2 = type { %rec_pair_1* } +; CHECK-NEXT: %rec_returning.simplified = type { void (%rec_returning.simplified*, %rec_returning.simplified*)* } +; CHECK-NEXT: %direct_def.simplified = type { void (%struct*)*, %struct } + +; Leave intrinsics alone: +; CHECK: { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) +declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) + +; CHECK-LABEL: define void @call_intrinsic() +define void @call_intrinsic() { + %a = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 5, i32 5) +; CHECK-NEXT: %a = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 5, i32 5) + ret void +} + +; externs +declare void @extern_func(%struct) +declare %struct @struct_returning_extern(i32, %struct) + +; verify that parameters are mapped correctly: single param, two, and combo +; with non-struct regs +; CHECK-NOT: declare void @extern_func(%struct) +; CHECK-NOT: declare %struct @struct_returning_extern(i32, %struct) +; CHECK-LABEL: declare void @extern_func(%struct* byval) +; CHECK-LABEL: declare void @struct_returning_extern(%struct* sret, i32, %struct* byval) + +define void @main(%struct* byval %ptr) { + %val = load %struct, %struct* %ptr + call void @extern_func(%struct %val) + ret void +} + +define void @two_param_func(%struct %val1, %struct %val2) { + call void @extern_func(%struct %val1) + call void @extern_func(%struct %val2) + ret void +} + +; CHECK-LABEL: define void @two_param_func(%struct* byval %val1.ptr, %struct* byval %val2.ptr) +; CHECK-NOT: define void @two_param_func(%struct %val1, %struct %val2) + +define i32 @another_func(i32 %a, %struct %str, i64 %b) { + call void @two_param_func(%struct %str, %struct %str) + call void @extern_func(%struct %str) + ret i32 0 +} + +; CHECK-LABEL: define i32 @another_func(i32 %a, %struct* byval %str.ptr, i64 %b) +; CHECK: call void @two_param_func(%struct* byval %str.sreg.ptr, %struct* byval %str.sreg.ptr1) + +define %struct @returns_struct(i32 %an_int, %struct %val) { + %tmp = call %struct @struct_returning_extern(i32 %an_int, %struct %val) + %tmp2 = invoke %struct @struct_returning_extern(i32 1, %struct %tmp) + to label %Cont unwind label %Cleanup + +Cont: + ret %struct %tmp2 +Cleanup: + %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0 + cleanup + resume {i8*, i32} %exn +} + +; verify return value and codegen +; CHECK-LABEL: define void @returns_struct(%struct* sret %retVal, i32 %an_int, %struct* byval %val.ptr) +; CHECK-NEXT: %tmp2 = alloca %struct +; CHECK-NEXT: %tmp.sreg.ptr = alloca %struct +; CHECK-NEXT: %tmp = alloca %struct +; CHECK-NEXT: %val.sreg.ptr = alloca %struct +; CHECK-NEXT: %val.sreg = load %struct, %struct* %val.ptr +; CHECK-NEXT: store %struct %val.sreg, %struct* %val.sreg.ptr +; CHECK-NEXT: call void @struct_returning_extern(%struct* sret %tmp, i32 %an_int, %struct* byval %val.sreg.ptr) +; CHECK-NEXT: %tmp.sreg = load %struct, %struct* %tmp +; CHECK-NEXT: store %struct %tmp.sreg, %struct* %tmp.sreg.ptr +; CHECK-NEXT: invoke void @struct_returning_extern(%struct* sret %tmp2, i32 1, %struct* byval %tmp.sreg.ptr) +; CHECK-NEXT: to label %Cont unwind label %Cleanup +; CHECK-DAG: Cont: +; CHECK-NEXT: %tmp2.sreg = load %struct, %struct* %tmp2 +; CHECK-NEXT: store %struct %tmp2.sreg, %struct* %retVal +; CHECK-NEXT: ret void +; CHECK-DAG: Cleanup: +; CHECK-NEXT: %exn = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0 +; CHECK-NEXT: cleanup +; CHECK-NEXT: resume { i8*, i32 } %exn + +define i32 @lots_of_call_attrs() { + %tmp.0 = insertvalue %struct undef, i32 1, 0 + %tmp.1 = insertvalue %struct %tmp.0, i32 2, 1 + %ret = tail call zeroext i32 @another_func(i32 1, %struct %tmp.1, i64 2) readonly + ret i32 %ret +} + +; verify attributes are copied +; CHECK_LABEL: @lots_of_call_attrs +; CHECK: %ret = tail call zeroext i32 @another_func(i32 1, %struct* byval %tmp.1.ptr, i64 2) #1 +; CHECK-NEXT: ret i32 %ret + +declare void @rec_struct_ok(%rec_struct*) +declare void @rec_struct_mod(%rec_struct) + +; compliant recursive structs are kept as-is +; CHECK-LABEL: declare void @rec_struct_ok(%rec_struct*) +; CHECK-LABEL: declare void @rec_struct_mod(%rec_struct* byval) + +define void @rec_call_sreg(%rec_problem_struct %r) { + %tmp = extractvalue %rec_problem_struct %r, 0 + call void %tmp(%rec_problem_struct %r) + ret void +} + +; non-compliant structs are correctly mapped and calls are changed +; CHECK-LABEL: define void @rec_call_sreg(%rec_problem_struct.simplified* byval %r.ptr) +; CHECK: call void %tmp(%rec_problem_struct.simplified* byval %r.sreg.ptr) + +declare void @pairs(%rec_pair_1) + +define %rec_returning @rec_returning_fun(%rec_returning %str) { + %tmp = extractvalue %rec_returning %str, 0 + %ret = call %rec_returning %tmp(%rec_returning %str) + ret %rec_returning %ret +} + +; pair structs +; CHECK-LABEL: declare void @pairs(%rec_pair_1* byval) +; CHECK-LABEL: define void @rec_returning_fun(%rec_returning.simplified* sret %retVal, %rec_returning.simplified* byval %str.ptr) +; CHECK-NEXT: %ret = alloca %rec_returning.simplified +; CHECK-NEXT: %str.sreg.ptr = alloca %rec_returning.simplified +; CHECK-NEXT: %str.sreg = load %rec_returning.simplified, %rec_returning.simplified* %str.ptr +; CHECK-NEXT: %tmp = extractvalue %rec_returning.simplified %str.sreg, 0 +; CHECK-NEXT: store %rec_returning.simplified %str.sreg, %rec_returning.simplified* %str.sreg.ptr +; CHECK-NEXT: call void %tmp(%rec_returning.simplified* sret %ret, %rec_returning.simplified* byval %str.sreg.ptr) +; CHECK-NEXT: %ret.sreg = load %rec_returning.simplified, %rec_returning.simplified* %ret +; CHECK-NEXT: store %rec_returning.simplified %ret.sreg, %rec_returning.simplified* %retVal +; CHECK-NEXT: ret void + +define void @direct_caller(%direct_def %def) { + %func = extractvalue %direct_def %def, 0 + %param = extractvalue %direct_def %def, 1 + call void %func(%struct %param) + ret void +} + +; CHECK-LABEL: define void @direct_caller(%direct_def.simplified* byval %def.ptr) +; CHECK-NEXT: %param.ptr = alloca %struct +; CHECK-NEXT: %def.sreg = load %direct_def.simplified, %direct_def.simplified* %def.ptr +; CHECK-NEXT: %func = extractvalue %direct_def.simplified %def.sreg, 0 +; CHECK-NEXT: %param = extractvalue %direct_def.simplified %def.sreg, 1 +; CHECK-NEXT: store %struct %param, %struct* %param.ptr +; CHECK-NEXT: call void %func(%struct* byval %param.ptr) +; CHECK-NEXT: ret void + +; vararg functions are converted correctly +declare void @vararg_ok(i32, ...) +; CHECK-LABEL: declare void @vararg_ok(i32, ...) + +define void @vararg_problem(%rec_problem_struct %arg1, ...) { + ; CHECK-LABEL: define void @vararg_problem(%rec_problem_struct.simplified* byval %arg1.ptr, ...) + ret void +} + +%vararg_fp_struct = type { i32, void (i32, ...)* } +declare void @vararg_fp_fct(%vararg_fp_struct %arg) +;CHECK-LABEL: declare void @vararg_fp_fct(%vararg_fp_struct* byval) + +define void @call_vararg(%vararg_fp_struct %param1, ...) { + %fptr = extractvalue %vararg_fp_struct %param1, 1 + call void (i32, ...) %fptr(i32 0, i32 1) + ret void +} + +; CHECK-LABEL: define void @call_vararg(%vararg_fp_struct* byval %param1.ptr, ...) +; CHECK-NEXT: %param1.sreg = load %vararg_fp_struct, %vararg_fp_struct* %param1.ptr +; CHECK-NEXT: %fptr = extractvalue %vararg_fp_struct %param1.sreg, 1 +; CHECK-NEXT: call void (i32, ...) %fptr(i32 0, i32 1) +; CHECK-NEXT: ret void + +%vararg_fp_problem_struct = type { void(%vararg_fp_problem_struct)* } +define void @vararg_fp_problem_call(%vararg_fp_problem_struct* byval %param) { + %fct_ptr = getelementptr %vararg_fp_problem_struct, %vararg_fp_problem_struct* %param, i32 0, i32 0 + %fct = load void(%vararg_fp_problem_struct)*, void(%vararg_fp_problem_struct)** %fct_ptr + %param_for_call = load %vararg_fp_problem_struct, %vararg_fp_problem_struct* %param + call void %fct(%vararg_fp_problem_struct %param_for_call) + ret void +} + +; CHECK-LABEL: define void @vararg_fp_problem_call(%vararg_fp_problem_struct.simplified* byval %param) +; CHECK-NEXT: %param_for_call.ptr = alloca %vararg_fp_problem_struct.simplified +; CHECK-NEXT: %fct_ptr = getelementptr %vararg_fp_problem_struct.simplified, %vararg_fp_problem_struct.simplified* %param, i32 0, i32 0 +; CHECK-NEXT: %fct = load void (%vararg_fp_problem_struct.simplified*)*, void (%vararg_fp_problem_struct.simplified*)** %fct_ptr +; CHECK-NEXT: %param_for_call = load %vararg_fp_problem_struct.simplified, %vararg_fp_problem_struct.simplified* %param +; CHECK-NEXT: store %vararg_fp_problem_struct.simplified %param_for_call, %vararg_fp_problem_struct.simplified* %param_for_call.ptr +; CHECK-NEXT: call void %fct(%vararg_fp_problem_struct.simplified* byval %param_for_call.ptr) +; CHECK-NEXT: ret void + +define void @call_with_array([4 x void(%struct)*] %fptrs, %struct %str) { + %fptr = extractvalue [4 x void(%struct)*] %fptrs, 2 + call void %fptr(%struct %str) + ret void +} + +; CHECK-LABEL: define void @call_with_array([4 x void (%struct*)*]* byval %fptrs.ptr, %struct* byval %str.ptr) +; CHECK-NEXT: %str.sreg.ptr = alloca %struct +; CHECK-NEXT: %fptrs.sreg = load [4 x void (%struct*)*], [4 x void (%struct*)*]* %fptrs.ptr +; CHECK-NEXT: %str.sreg = load %struct, %struct* %str.ptr +; CHECK-NEXT: %fptr = extractvalue [4 x void (%struct*)*] %fptrs.sreg, 2 +; CHECK-NEXT: store %struct %str.sreg, %struct* %str.sreg.ptr +; CHECK-NEXT: call void %fptr(%struct* byval %str.sreg.ptr) +; CHECK-NEXT: ret void + +define void @call_with_array_ptr([4 x void(%struct)*]* %fptrs, %struct %str) { + %fptr_ptr = getelementptr [4 x void(%struct)*], [4 x void(%struct)*]* %fptrs, i32 0, i32 2 + %fptr = load void(%struct)*, void(%struct)** %fptr_ptr + call void %fptr(%struct %str) + ret void +} + +; CHECK-LABEL: define void @call_with_array_ptr([4 x void (%struct*)*]* %fptrs, %struct* byval %str.ptr) +; CHECK-NEXT: %str.sreg.ptr = alloca %struct +; CHECK-NEXT: %str.sreg = load %struct, %struct* %str.ptr +; CHECK-NEXT: %fptr_ptr = getelementptr [4 x void (%struct*)*], [4 x void (%struct*)*]* %fptrs, i32 0, i32 2 +; CHECK-NEXT: %fptr = load void (%struct*)*, void (%struct*)** %fptr_ptr +; CHECK-NEXT: store %struct %str.sreg, %struct* %str.sreg.ptr +; CHECK-NEXT: call void %fptr(%struct* byval %str.sreg.ptr) +; CHECK-NEXT: ret void + +define void @call_with_vector(<4 x void (%struct)*> %fptrs, %struct %str) { + %fptr = extractelement <4 x void (%struct)*> %fptrs, i32 2 + call void %fptr(%struct %str) + ret void +} + +; CHECK-LABEL: define void @call_with_vector(<4 x void (%struct*)*> %fptrs, %struct* byval %str.ptr) +; CHECK-NEXT: %str.sreg.ptr = alloca %struct +; CHECK-NEXT: %str.sreg = load %struct, %struct* %str.ptr +; CHECK-NEXT: %fptr = extractelement <4 x void (%struct*)*> %fptrs, i32 2 +; CHECK-NEXT: store %struct %str.sreg, %struct* %str.sreg.ptr +; CHECK-NEXT: call void %fptr(%struct* byval %str.sreg.ptr) +; CHECK-NEXT: ret void + +define void @call_with_array_vect([4 x <2 x void(%struct)*>] %fptrs, %struct %str) { + %vect = extractvalue [4 x <2 x void(%struct)*>] %fptrs, 2 + %fptr = extractelement <2 x void (%struct)*> %vect, i32 1 + call void %fptr(%struct %str) + ret void +} + +; CHECK-LABEL: define void @call_with_array_vect([4 x <2 x void (%struct*)*>]* byval %fptrs.ptr, %struct* byval %str.ptr) +; CHECK-NEXT: %str.sreg.ptr = alloca %struct +; CHECK-NEXT: %fptrs.sreg = load [4 x <2 x void (%struct*)*>], [4 x <2 x void (%struct*)*>]* %fptrs.ptr +; CHECK-NEXT: %str.sreg = load %struct, %struct* %str.ptr +; CHECK-NEXT: %vect = extractvalue [4 x <2 x void (%struct*)*>] %fptrs.sreg, 2 +; CHECK-NEXT: %fptr = extractelement <2 x void (%struct*)*> %vect, i32 1 +; CHECK-NEXT: store %struct %str.sreg, %struct* %str.sreg.ptr +; CHECK-NEXT: call void %fptr(%struct* byval %str.sreg.ptr) +; CHECK-NEXT: ret void + +; this is at the end, corresponds to the call marked as readonly +; CHECK: attributes #1 = { readonly } \ No newline at end of file diff --git a/test/Transforms/NaCl/simplify-struct-reg-vararg-crash.ll b/test/Transforms/NaCl/simplify-struct-reg-vararg-crash.ll new file mode 100644 index 000000000000..2b0e59fe8334 --- /dev/null +++ b/test/Transforms/NaCl/simplify-struct-reg-vararg-crash.ll @@ -0,0 +1,10 @@ +; RUN: not opt < %s -simplify-struct-reg-signatures -S + +%struct = type { i32, i32 } + +declare void @vararg_fct(...) + +define void @vararg_caller_with_agg(%struct %str) { + call void(...)* @vararg_fct(%struct %str) + ret void +} \ No newline at end of file diff --git a/test/Transforms/NaCl/strip-attributes.ll b/test/Transforms/NaCl/strip-attributes.ll new file mode 100644 index 000000000000..9923e1cfcf78 --- /dev/null +++ b/test/Transforms/NaCl/strip-attributes.ll @@ -0,0 +1,66 @@ +; RUN: opt -S -nacl-strip-attributes %s 2>&1 | FileCheck %s + + +; Check that we emit a warning for some special meaning sections: +; CHECK: Warning: func_init_array will have its section (.init_array) stripped. +; CHECK-NOT: Warning: __rustc_debug_gdb_scripts_section__ will have its section + +@var = unnamed_addr global i32 0 +; CHECK: @var = global i32 0 + +@__rustc_debug_gdb_scripts_section__ = internal unnamed_addr constant [34 x i8] c"\01gdb_load_rust_pretty_printers.py\00", section ".debug_gdb_scripts", align 1 +; CHECK: @__rustc_debug_gdb_scripts_section__ = internal constant [34 x i8] c"\01gdb_load_rust_pretty_printers.py\00", align 1 + +define void @func_section() section ".some_section" { + ret void +} +; CHECK-LABEL: define void @func_section() { + +define void @func_init_array() section ".init_array" { + ret void +} +; CHECK-LABEL: define void @func_init_array() { + + +define fastcc void @func_attrs(i32 inreg, i32 zeroext) + unnamed_addr noreturn nounwind readonly align 8 { + ret void +} +; CHECK-LABEL: define void @func_attrs(i32, i32) { + +define hidden void @hidden_visibility() { + ret void +} +; CHECK-LABEL: define void @hidden_visibility() { + +define protected void @protected_visibility() { + ret void +} +; CHECK-LABEL: define void @protected_visibility() { + + +define void @call_attrs() { + call fastcc void @func_attrs(i32 inreg 10, i32 zeroext 20) noreturn nounwind readonly + ret void +} +; CHECK-LABEL: define void @call_attrs() +; CHECK: call void @func_attrs(i32 10, i32 20){{$}} + + +; We currently don't attempt to strip attributes from intrinsic +; declarations because the reader automatically inserts attributes +; based on built-in knowledge of intrinsics, so it is difficult to get +; rid of them here. +declare i8* @llvm.nacl.read.tp() +; CHECK: declare i8* @llvm.nacl.read.tp() #{{[0-9]+}} + +define void @arithmetic_attrs() { + %add = add nsw i32 1, 2 + %shl = shl nuw i32 3, 4 + %lshr = lshr exact i32 2, 1 + ret void +} +; CHECK-LABEL: define void @arithmetic_attrs() { +; CHECK-NEXT: %add = add i32 1, 2 +; CHECK-NEXT: %shl = shl i32 3, 4 +; CHECK-NEXT: %lshr = lshr i32 2, 1 diff --git a/test/Transforms/NaCl/strip-branchweight-metadata.ll b/test/Transforms/NaCl/strip-branchweight-metadata.ll new file mode 100644 index 000000000000..a51f1852a7a6 --- /dev/null +++ b/test/Transforms/NaCl/strip-branchweight-metadata.ll @@ -0,0 +1,29 @@ +; RUN: opt -S -strip-metadata %s | FileCheck %s + +; Test that !prof metadata is removed from branches +; CHECK: @foo +; CHECK-NOT: !prof +define i32 @foo(i32 %c) { + switch i32 %c, label %3 [ + i32 5, label %4 + i32 0, label %1 + i32 4, label %2 + ], !prof !0 + +;