[clang] Use different memory layout type for _BitInt(N) in LLVM IR (#…

…91364) There are two problems with _BitInt prior to this patch: 1. For at least some values of N, we cannot use LLVM's iN for the type of struct elements, array elements, allocas, global variables, and so on, because the LLVM layout for that type does not match the high-level layout of _BitInt(N). Example: Currently for i128:128 targets correct implementation is possible either for __int128 or for _BitInt(129+) with lowering to iN, but not both, since we have now correct implementation of __int128 in place after a21abc7. When this happens, opaque [M x i8] types used, where M = sizeof(_BitInt(N)). 2. LLVM doesn't guarantee any particular extension behavior for integer types that aren't a multiple of 8. For this reason, all _BitInt types are now have in-memory representation that is a whole number of bytes. I.e. for example _BitInt(17) now will have memory layout type i32. This patch also introduces concept of load/store type and adds an API to CodeGenTypes that returns the IR type that should be used for load and store operations. This is particularly useful for the case when a _BitInt ends up having array of bytes as memory layout type. For _BitInt(N), let M = sizeof(_BitInt(N)), and let BITS = M * 8. Loads and stores of iM would both (1) produce far better code from the backends and (2) be far more optimizable by IR passes than loads and stores of [M x i8]. Fixes #85139 Fixes #83419 --------- Co-authored-by: John McCall <[email protected]>
llvm · Jul 15, 2024 · 9ad72df · 9ad72df
1 parent 9ac2b89
commit 9ad72df
Show file tree

Hide file tree

Showing 33 changed files with 2,579 additions and 2,188 deletions.
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
@@ -3863,7 +3863,8 @@ void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI,
       LValue ArgVal =
           LValue::MakeAddr(ArgAddr, RetTy, getContext(), BaseInfo, TBAAInfo);
       EmitStoreOfScalar(
-          Builder.CreateLoad(ReturnValue), ArgVal, /*isInit*/ true);
+          EmitLoadOfScalar(MakeAddrLValue(ReturnValue, RetTy), EndLoc), ArgVal,
+          /*isInit*/ true);
       break;
     }
     }

diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp
@@ -33,6 +33,7 @@
 #include "clang/Basic/TargetInfo.h"
 #include "clang/CodeGen/CGFunctionInfo.h"
 #include "clang/Sema/Sema.h"
+#include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/GlobalVariable.h"
@@ -1969,6 +1970,17 @@ void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) {
       constant = constWithPadding(CGM, IsPattern::No,
                                   replaceUndef(CGM, isPattern, constant));
     }
+
+    if (D.getType()->isBitIntType() &&
+        CGM.getTypes().typeRequiresSplitIntoByteArray(D.getType())) {
+      // Constants for long _BitInt types are split into individual bytes.
+      // Try to fold these back into an integer constant so it can be stored
+      // properly.
+      llvm::Type *LoadType = CGM.getTypes().convertTypeForLoadStore(
+          D.getType(), constant->getType());
+      constant = llvm::ConstantFoldLoadFromConst(
+          constant, LoadType, llvm::APInt::getZero(32), CGM.getDataLayout());
+    }
   }
 
   if (!constant) {

diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
@@ -1986,6 +1986,9 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(Address Addr, bool Volatile,
     return EmitAtomicLoad(AtomicLValue, Loc).getScalarVal();
   }
 
+  Addr =
+      Addr.withElementType(convertTypeForLoadStore(Ty, Addr.getElementType()));
+
   llvm::LoadInst *Load = Builder.CreateLoad(Addr, Volatile);
   if (isNontemporal) {
     llvm::MDNode *Node = llvm::MDNode::get(
@@ -2008,27 +2011,33 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(Address Addr, bool Volatile,
   return EmitFromMemory(Load, Ty);
 }
 
+/// Converts a scalar value from its primary IR type (as returned
+/// by ConvertType) to its load/store type (as returned by
+/// convertTypeForLoadStore).
 llvm::Value *CodeGenFunction::EmitToMemory(llvm::Value *Value, QualType Ty) {
-  // Bool has a different representation in memory than in registers.
-  if (hasBooleanRepresentation(Ty)) {
-    // This should really always be an i1, but sometimes it's already
-    // an i8, and it's awkward to track those cases down.
-    if (Value->getType()->isIntegerTy(1))
-      return Builder.CreateZExt(Value, ConvertTypeForMem(Ty), "frombool");
-    assert(Value->getType()->isIntegerTy(getContext().getTypeSize(Ty)) &&
-           "wrong value rep of bool");
+  if (hasBooleanRepresentation(Ty) || Ty->isBitIntType()) {
+    llvm::Type *StoreTy = convertTypeForLoadStore(Ty, Value->getType());
+    bool Signed = Ty->isSignedIntegerOrEnumerationType();
+    return Builder.CreateIntCast(Value, StoreTy, Signed, "storedv");
+  }
+
+  if (Ty->isExtVectorBoolType()) {
+    llvm::Type *StoreTy = convertTypeForLoadStore(Ty, Value->getType());
+    // Expand to the memory bit width.
+    unsigned MemNumElems = StoreTy->getPrimitiveSizeInBits();
+    // <N x i1> --> <P x i1>.
+    Value = emitBoolVecConversion(Value, MemNumElems, "insertvec");
+    // <P x i1> --> iP.
+    Value = Builder.CreateBitCast(Value, StoreTy);
   }
 
   return Value;
 }
 
+/// Converts a scalar value from its load/store type (as returned
+/// by convertTypeForLoadStore) to its primary IR type (as returned
+/// by ConvertType).
 llvm::Value *CodeGenFunction::EmitFromMemory(llvm::Value *Value, QualType Ty) {
-  // Bool has a different representation in memory than in registers.
-  if (hasBooleanRepresentation(Ty)) {
-    assert(Value->getType()->isIntegerTy(getContext().getTypeSize(Ty)) &&
-           "wrong value rep of bool");
-    return Builder.CreateTrunc(Value, Builder.getInt1Ty(), "tobool");
-  }
   if (Ty->isExtVectorBoolType()) {
     const auto *RawIntTy = Value->getType();
     // Bitcast iP --> <P x i1>.
@@ -2041,6 +2050,11 @@ llvm::Value *CodeGenFunction::EmitFromMemory(llvm::Value *Value, QualType Ty) {
     return emitBoolVecConversion(V, ValNumElems, "extractvec");
   }
 
+  if (hasBooleanRepresentation(Ty) || Ty->isBitIntType()) {
+    llvm::Type *ResTy = ConvertType(Ty);
+    return Builder.CreateTrunc(Value, ResTy, "loadedv");
+  }
+
   return Value;
 }
 
@@ -2093,17 +2107,10 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, Address Addr,
   llvm::Type *SrcTy = Value->getType();
   if (const auto *ClangVecTy = Ty->getAs<VectorType>()) {
     auto *VecTy = dyn_cast<llvm::FixedVectorType>(SrcTy);
-    if (VecTy && ClangVecTy->isExtVectorBoolType()) {
-      auto *MemIntTy = cast<llvm::IntegerType>(Addr.getElementType());
-      // Expand to the memory bit width.
-      unsigned MemNumElems = MemIntTy->getPrimitiveSizeInBits();
-      // <N x i1> --> <P x i1>.
-      Value = emitBoolVecConversion(Value, MemNumElems, "insertvec");
-      // <P x i1> --> iP.
-      Value = Builder.CreateBitCast(Value, MemIntTy);
-    } else if (!CGM.getCodeGenOpts().PreserveVec3Type) {
+    if (!CGM.getCodeGenOpts().PreserveVec3Type) {
       // Handle vec3 special.
-      if (VecTy && cast<llvm::FixedVectorType>(VecTy)->getNumElements() == 3) {
+      if (VecTy && !ClangVecTy->isExtVectorBoolType() &&
+          cast<llvm::FixedVectorType>(VecTy)->getNumElements() == 3) {
         // Our source is a vec3, do a shuffle vector to make it a vec4.
         Value = Builder.CreateShuffleVector(Value, ArrayRef<int>{0, 1, 2, -1},
                                             "extractVec");
@@ -2477,7 +2484,7 @@ void CodeGenFunction::EmitStoreThroughLValue(RValue Src, LValue Dst,
 void CodeGenFunction::EmitStoreThroughBitfieldLValue(RValue Src, LValue Dst,
                                                      llvm::Value **Result) {
   const CGBitFieldInfo &Info = Dst.getBitFieldInfo();
-  llvm::Type *ResLTy = ConvertTypeForMem(Dst.getType());
+  llvm::Type *ResLTy = convertTypeForLoadStore(Dst.getType());
   Address Ptr = Dst.getBitFieldAddress();
 
   // Get the source value, truncated to the width of the bit-field.

diff --git a/clang/lib/CodeGen/CGExprConstant.cpp b/clang/lib/CodeGen/CGExprConstant.cpp
@@ -585,7 +585,7 @@ class ConstStructBuilder {
                    bool AllowOverwrite = false);
 
   bool AppendBitField(const FieldDecl *Field, uint64_t FieldOffset,
-                      llvm::ConstantInt *InitExpr, bool AllowOverwrite = false);
+                      llvm::Constant *InitExpr, bool AllowOverwrite = false);
 
   bool Build(const InitListExpr *ILE, bool AllowOverwrite);
   bool Build(const APValue &Val, const RecordDecl *RD, bool IsPrimaryBase,
@@ -609,9 +609,25 @@ bool ConstStructBuilder::AppendBytes(CharUnits FieldOffsetInChars,
   return Builder.add(InitCst, StartOffset + FieldOffsetInChars, AllowOverwrite);
 }
 
-bool ConstStructBuilder::AppendBitField(
-    const FieldDecl *Field, uint64_t FieldOffset, llvm::ConstantInt *CI,
-    bool AllowOverwrite) {
+bool ConstStructBuilder::AppendBitField(const FieldDecl *Field,
+                                        uint64_t FieldOffset, llvm::Constant *C,
+                                        bool AllowOverwrite) {
+
+  llvm::ConstantInt *CI = dyn_cast<llvm::ConstantInt>(C);
+  if (!CI) {
+    // Constants for long _BitInt types are sometimes split into individual
+    // bytes. Try to fold these back into an integer constant. If that doesn't
+    // work out, then we are trying to initialize a bitfield with a non-trivial
+    // constant, this must require run-time code.
+    llvm::Type *LoadType =
+        CGM.getTypes().convertTypeForLoadStore(Field->getType(), C->getType());
+    llvm::Constant *FoldedConstant = llvm::ConstantFoldLoadFromConst(
+        C, LoadType, llvm::APInt::getZero(32), CGM.getDataLayout());
+    CI = dyn_cast_if_present<llvm::ConstantInt>(FoldedConstant);
+    if (!CI)
+      return false;
+  }
+
   const CGRecordLayout &RL =
       CGM.getTypes().getCGRecordLayout(Field->getParent());
   const CGBitFieldInfo &Info = RL.getBitFieldInfo(Field);
@@ -762,15 +778,9 @@ bool ConstStructBuilder::Build(const InitListExpr *ILE, bool AllowOverwrite) {
         AllowOverwrite = true;
     } else {
       // Otherwise we have a bitfield.
-      if (auto *CI = dyn_cast<llvm::ConstantInt>(EltInit)) {
-        if (!AppendBitField(Field, Layout.getFieldOffset(FieldNo), CI,
-                            AllowOverwrite))
-          return false;
-      } else {
-        // We are trying to initialize a bitfield with a non-trivial constant,
-        // this must require run-time code.
+      if (!AppendBitField(Field, Layout.getFieldOffset(FieldNo), EltInit,
+                          AllowOverwrite))
         return false;
-      }
     }
   }
 
@@ -871,7 +881,7 @@ bool ConstStructBuilder::Build(const APValue &Val, const RecordDecl *RD,
     } else {
       // Otherwise we have a bitfield.
       if (!AppendBitField(*Field, Layout.getFieldOffset(FieldNo) + OffsetBits,
-                          cast<llvm::ConstantInt>(EltInit), AllowOverwrite))
+                          EltInit, AllowOverwrite))
         return false;
     }
   }
@@ -1888,6 +1898,27 @@ llvm::Constant *ConstantEmitter::emitForMemory(CodeGenModule &CGM,
     return Res;
   }
 
+  if (destType->isBitIntType()) {
+    ConstantAggregateBuilder Builder(CGM);
+    llvm::Type *LoadStoreTy = CGM.getTypes().convertTypeForLoadStore(destType);
+    // ptrtoint/inttoptr should not involve _BitInt in constant expressions, so
+    // casting to ConstantInt is safe here.
+    auto *CI = cast<llvm::ConstantInt>(C);
+    llvm::Constant *Res = llvm::ConstantFoldCastOperand(
+        destType->isSignedIntegerOrEnumerationType() ? llvm::Instruction::SExt
+                                                     : llvm::Instruction::ZExt,
+        CI, LoadStoreTy, CGM.getDataLayout());
+    if (CGM.getTypes().typeRequiresSplitIntoByteArray(destType, C->getType())) {
+      // Long _BitInt has array of bytes as in-memory type.
+      // So, split constant into individual bytes.
+      llvm::Type *DesiredTy = CGM.getTypes().ConvertTypeForMem(destType);
+      llvm::APInt Value = cast<llvm::ConstantInt>(Res)->getValue();
+      Builder.addBits(Value, /*OffsetInBits=*/0, /*AllowOverwrite=*/false);
+      return Builder.build(DesiredTy, /*AllowOversized*/ false);
+    }
+    return Res;
+  }
+
   return C;
 }
 

diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -436,9 +436,10 @@ class ScalarExprEmitter
 
     if (Value *Result = ConstantEmitter(CGF).tryEmitConstantExpr(E)) {
       if (E->isGLValue())
-        return CGF.Builder.CreateLoad(Address(
-            Result, CGF.ConvertTypeForMem(E->getType()),
-            CGF.getContext().getTypeAlignInChars(E->getType())));
+        return CGF.EmitLoadOfScalar(
+            Address(Result, CGF.convertTypeForLoadStore(E->getType()),
+                    CGF.getContext().getTypeAlignInChars(E->getType())),
+            /*Volatile*/ false, E->getType(), E->getExprLoc());
       return Result;
     }
     return Visit(E->getSubExpr());

diff --git a/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp b/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp
@@ -427,8 +427,7 @@ CGRecordLowering::accumulateBitFields(bool isNonVirtualBaseType,
         continue;
       }
       uint64_t BitOffset = getFieldBitOffset(*Field);
-      llvm::Type *Type =
-          Types.ConvertTypeForMem(Field->getType(), /*ForBitField=*/true);
+      llvm::Type *Type = Types.ConvertTypeForMem(Field->getType());
       // If we don't have a run yet, or don't live within the previous run's
       // allocated storage then we allocate some storage and start a new run.
       if (Run == FieldEnd || BitOffset >= Tail) {

diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
@@ -1537,9 +1537,15 @@ void CodeGenFunction::EmitReturnStmt(const ReturnStmt &S) {
     Builder.CreateStore(Result.getScalarVal(), ReturnValue);
   } else {
     switch (getEvaluationKind(RV->getType())) {
-    case TEK_Scalar:
-      Builder.CreateStore(EmitScalarExpr(RV), ReturnValue);
+    case TEK_Scalar: {
+      llvm::Value *Ret = EmitScalarExpr(RV);
+      if (CurFnInfo->getReturnInfo().getKind() == ABIArgInfo::Indirect)
+        EmitStoreOfScalar(Ret, MakeAddrLValue(ReturnValue, RV->getType()),
+                          /*isInit*/ true);
+      else
+        Builder.CreateStore(Ret, ReturnValue);
       break;
+    }
     case TEK_Complex:
       EmitComplexExprIntoLValue(RV, MakeAddrLValue(ReturnValue, RV->getType()),
                                 /*isInit*/ true);

diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -233,6 +233,11 @@ llvm::Type *CodeGenFunction::ConvertType(QualType T) {
   return CGM.getTypes().ConvertType(T);
 }
 
+llvm::Type *CodeGenFunction::convertTypeForLoadStore(QualType ASTTy,
+                                                     llvm::Type *LLVMTy) {
+  return CGM.getTypes().convertTypeForLoadStore(ASTTy, LLVMTy);
+}
+
 TypeEvaluationKind CodeGenFunction::getEvaluationKind(QualType type) {
   type = type.getCanonicalType();
   while (true) {

diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
@@ -2576,6 +2576,8 @@ class CodeGenFunction : public CodeGenTypeCache {
 
   llvm::Type *ConvertTypeForMem(QualType T);
   llvm::Type *ConvertType(QualType T);
+  llvm::Type *convertTypeForLoadStore(QualType ASTTy,
+                                      llvm::Type *LLVMTy = nullptr);
   llvm::Type *ConvertType(const TypeDecl *T) {
     return ConvertType(getContext().getTypeDeclType(T));
   }

diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp
@@ -89,7 +89,14 @@ void CodeGenTypes::addRecordTypeName(const RecordDecl *RD,
 /// ConvertType in that it is used to convert to the memory representation for
 /// a type.  For example, the scalar representation for _Bool is i1, but the
 /// memory representation is usually i8 or i32, depending on the target.
-llvm::Type *CodeGenTypes::ConvertTypeForMem(QualType T, bool ForBitField) {
+///
+/// We generally assume that the alloc size of this type under the LLVM
+/// data layout is the same as the size of the AST type.  The alignment
+/// does not have to match: Clang should always use explicit alignments
+/// and packed structs as necessary to produce the layout it needs.
+/// But the size does need to be exactly right or else things like struct
+/// layout will break.
+llvm::Type *CodeGenTypes::ConvertTypeForMem(QualType T) {
   if (T->isConstantMatrixType()) {
     const Type *Ty = Context.getCanonicalType(T).getTypePtr();
     const ConstantMatrixType *MT = cast<ConstantMatrixType>(Ty);
@@ -107,17 +114,65 @@ llvm::Type *CodeGenTypes::ConvertTypeForMem(QualType T, bool ForBitField) {
     return llvm::IntegerType::get(FixedVT->getContext(), BytePadded);
   }
 
-  // If this is a bool type, or a bit-precise integer type in a bitfield
-  // representation, map this integer to the target-specified size.
-  if ((ForBitField && T->isBitIntType()) ||
-      (!T->isBitIntType() && R->isIntegerTy(1)))
+  // If T is _Bool or a _BitInt type, ConvertType will produce an IR type
+  // with the exact semantic bit-width of the AST type; for example,
+  // _BitInt(17) will turn into i17. In memory, however, we need to store
+  // such values extended to their full storage size as decided by AST
+  // layout; this is an ABI requirement. Ideally, we would always use an
+  // integer type that's just the bit-size of the AST type; for example, if
+  // sizeof(_BitInt(17)) == 4, _BitInt(17) would turn into i32. That is what's
+  // returned by convertTypeForLoadStore. However, that type does not
+  // always satisfy the size requirement on memory representation types
+  // describe above. For example, a 32-bit platform might reasonably set
+  // sizeof(_BitInt(65)) == 12, but i96 is likely to have to have an alloc size
+  // of 16 bytes in the LLVM data layout. In these cases, we simply return
+  // a byte array of the appropriate size.
+  if (T->isBitIntType()) {
+    if (typeRequiresSplitIntoByteArray(T, R))
+      return llvm::ArrayType::get(CGM.Int8Ty,
+                                  Context.getTypeSizeInChars(T).getQuantity());
+    return llvm::IntegerType::get(getLLVMContext(),
+                                  (unsigned)Context.getTypeSize(T));
+  }
+
+  if (R->isIntegerTy(1))
     return llvm::IntegerType::get(getLLVMContext(),
                                   (unsigned)Context.getTypeSize(T));
 
   // Else, don't map it.
   return R;
 }
 
+bool CodeGenTypes::typeRequiresSplitIntoByteArray(QualType ASTTy,
+                                                  llvm::Type *LLVMTy) {
+  if (!LLVMTy)
+    LLVMTy = ConvertType(ASTTy);
+
+  CharUnits ASTSize = Context.getTypeSizeInChars(ASTTy);
+  CharUnits LLVMSize =
+      CharUnits::fromQuantity(getDataLayout().getTypeAllocSize(LLVMTy));
+  return ASTSize != LLVMSize;
+}
+
+llvm::Type *CodeGenTypes::convertTypeForLoadStore(QualType T,
+                                                  llvm::Type *LLVMTy) {
+  if (!LLVMTy)
+    LLVMTy = ConvertType(T);
+
+  if (T->isBitIntType())
+    return llvm::Type::getIntNTy(
+        getLLVMContext(), Context.getTypeSizeInChars(T).getQuantity() * 8);
+
+  if (LLVMTy->isIntegerTy(1))
+    return llvm::IntegerType::get(getLLVMContext(),
+                                  (unsigned)Context.getTypeSize(T));
+
+  if (T->isExtVectorBoolType())
+    return ConvertTypeForMem(T);
+
+  return LLVMTy;
+}
+
 /// isRecordLayoutComplete - Return true if the specified type is already
 /// completely laid out.
 bool CodeGenTypes::isRecordLayoutComplete(const Type *Ty) const {