From e1467a4ceeeb74cdc54830e03df8d58893e38892 Mon Sep 17 00:00:00 2001
From: Daniel Dunbar <daniel@zuster.org>
Date: Thu, 22 Apr 2010 02:35:46 +0000
Subject: [PATCH] IRgen: Rewrite bit-field access policy to not access data
 beyond the bounds of the structure, which we also now verify as part of the
 post-layout consistency checks.  - This fixes some pedantic bugs with packed
 structures, as well as major problems with -fno-bitfield-type-align.

 - Fixes PR5591, PR5567, and all known -fno-bitfield-type-align issues.

 - Review appreciated.

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@102045 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/CodeGen/CGRecordLayoutBuilder.cpp | 122 ++++++++++++++++++--------
 test/CodeGen/bitfield-2.c             |  52 ++++++++++-
 2 files changed, 132 insertions(+), 42 deletions(-)

diff --git a/lib/CodeGen/CGRecordLayoutBuilder.cpp b/lib/CodeGen/CGRecordLayoutBuilder.cpp
index 71d0290548..530a41c3d6 100644
--- a/lib/CodeGen/CGRecordLayoutBuilder.cpp
+++ b/lib/CodeGen/CGRecordLayoutBuilder.cpp
@@ -151,6 +151,10 @@ static CGBitFieldInfo ComputeBitFieldInfo(CodeGenTypes &Types,
                                           const FieldDecl *FD,
                                           uint64_t FieldOffset,
                                           uint64_t FieldSize) {
+  const RecordDecl *RD = FD->getParent();
+  uint64_t ContainingTypeSizeInBits =
+    Types.getContext().getASTRecordLayout(RD).getSize();
+
   const llvm::Type *Ty = Types.ConvertTypeForMemRecursive(FD->getType());
   uint64_t TypeSizeInBytes = Types.getTargetData().getTypeAllocSize(Ty);
   uint64_t TypeSizeInBits = TypeSizeInBytes * 8;
@@ -170,42 +174,65 @@ static CGBitFieldInfo ComputeBitFieldInfo(CodeGenTypes &Types,
     FieldSize = TypeSizeInBits;
   }
 
-  unsigned StartBit = FieldOffset % TypeSizeInBits;
-
-  // The current policy is to always access the bit-field using the source type
-  // of the bit-field. With the C bit-field rules, this implies that we always
-  // use either one or two accesses, and two accesses can only occur with a
-  // packed structure when the bit-field straddles an alignment boundary.
-  CGBitFieldInfo::AccessInfo Components[2];
-
-  unsigned LowBits = std::min(FieldSize, TypeSizeInBits - StartBit);
-  bool NeedsHighAccess = LowBits != FieldSize;
-  unsigned NumComponents = 1 + NeedsHighAccess;
-
-  // FIXME: This access policy is probably wrong on big-endian systems.
-  CGBitFieldInfo::AccessInfo &LowAccess = Components[0];
-  LowAccess.FieldIndex = 0;
-  LowAccess.FieldByteOffset =
-    TypeSizeInBytes * ((FieldOffset / 8) / TypeSizeInBytes);
-  LowAccess.FieldBitStart = StartBit;
-  LowAccess.AccessWidth = TypeSizeInBits;
-  // FIXME: This might be wrong!
-  LowAccess.AccessAlignment = 0;
-  LowAccess.TargetBitOffset = 0;
-  LowAccess.TargetBitWidth = LowBits;
-
-  if (NeedsHighAccess) {
-    CGBitFieldInfo::AccessInfo &HighAccess = Components[1];
-    HighAccess.FieldIndex = 0;
-    HighAccess.FieldByteOffset = LowAccess.FieldByteOffset + TypeSizeInBytes;
-    HighAccess.FieldBitStart = 0;
-    HighAccess.AccessWidth = TypeSizeInBits;
+  // Compute the access components. The policy we use is to start by attempting
+  // to access using the width of the bit-field type itself and to always access
+  // at aligned indices of that type. If such an access would fail because it
+  // extends past the bound of the type, then we reduce size to the next smaller
+  // power of two and retry. The current algorithm assumes pow2 sized types,
+  // although this is easy to fix.
+  //
+  // FIXME: This algorithm is wrong on big-endian systems, I think.
+  assert(llvm::isPowerOf2_32(TypeSizeInBits) && "Unexpected type size!");
+  CGBitFieldInfo::AccessInfo Components[3];
+  unsigned NumComponents = 0;
+  unsigned AccessedTargetBits = 0;       // The tumber of target bits accessed.
+  unsigned AccessWidth = TypeSizeInBits; // The current access width to attempt.
+
+  // Round down from the field offset to find the first access position that is
+  // at an aligned offset of the initial access type.
+  uint64_t AccessStart = FieldOffset - (FieldOffset % TypeSizeInBits);
+
+  while (AccessedTargetBits < FieldSize) {
+    // Check that we can access using a type of this size, without reading off
+    // the end of the structure. This can occur with packed structures and
+    // -fno-bitfield-type-align, for example.
+    if (AccessStart + AccessWidth > ContainingTypeSizeInBits) {
+      // If so, reduce access size to the next smaller power-of-two and retry.
+      AccessWidth >>= 1;
+      assert(AccessWidth >= 8 && "Cannot access under byte size!");
+      continue;
+    }
+
+    // Otherwise, add an access component.
+
+    // First, compute the bits inside this access which are part of the
+    // target. We are reading bits [AccessStart, AccessStart + AccessWidth); the
+    // intersection with [FieldOffset, FieldOffset + FieldSize) gives the bits
+    // in the target that we are reading.
+    uint64_t AccessBitsInFieldStart = std::max(AccessStart, FieldOffset);
+    uint64_t AccessBitsInFieldSize =
+      std::min(AccessWidth - (AccessBitsInFieldStart - AccessStart),
+               FieldSize - (AccessBitsInFieldStart-FieldOffset));
+
+    assert(NumComponents < 3 && "Unexpected number of components!");
+    CGBitFieldInfo::AccessInfo &AI = Components[NumComponents++];
+    AI.FieldIndex = 0;
+    // FIXME: We still follow the old access pattern of only using the field
+    // byte offset. We should switch this once we fix the struct layout to be
+    // pretty.
+    AI.FieldByteOffset = AccessStart / 8;
+    AI.FieldBitStart = AccessBitsInFieldStart - AccessStart;
+    AI.AccessWidth = AccessWidth;
     // FIXME: This might be wrong!
-    HighAccess.AccessAlignment = 0;
-    HighAccess.TargetBitOffset = LowBits;
-    HighAccess.TargetBitWidth = FieldSize - LowBits;
+    AI.AccessAlignment = 0;
+    AI.TargetBitOffset = AccessedTargetBits;
+    AI.TargetBitWidth = AccessBitsInFieldSize;
+
+    AccessStart += AccessWidth;
+    AccessedTargetBits += AI.TargetBitWidth;
   }
 
+  assert(AccessedTargetBits == FieldSize && "Invalid bit-field access!");
   return CGBitFieldInfo(FieldSize, NumComponents, Components, IsSigned);
 }
 
@@ -565,13 +592,13 @@ CGRecordLayout *CodeGenTypes::ComputeRecordLayout(const RecordDecl *D) {
     RL->dump();
   }
 
+#ifndef NDEBUG
   // Verify that the computed LLVM struct size matches the AST layout size.
-  assert(getContext().getASTRecordLayout(D).getSize() / 8 ==
-         getTargetData().getTypeAllocSize(Ty) &&
+  uint64_t TypeSizeInBits = getContext().getASTRecordLayout(D).getSize();
+  assert(TypeSizeInBits == getTargetData().getTypeAllocSizeInBits(Ty) &&
          "Type size mismatch!");
 
   // Verify that the LLVM and AST field offsets agree.
-#ifndef NDEBUG
   const llvm::StructType *ST =
     dyn_cast<llvm::StructType>(RL->getLLVMType());
   const llvm::StructLayout *SL = getTargetData().getStructLayout(ST);
@@ -580,12 +607,29 @@ CGRecordLayout *CodeGenTypes::ComputeRecordLayout(const RecordDecl *D) {
   RecordDecl::field_iterator it = D->field_begin();
   for (unsigned i = 0, e = AST_RL.getFieldCount(); i != e; ++i, ++it) {
     const FieldDecl *FD = *it;
-    if (FD->isBitField()) {
-      // FIXME: Verify assorted things.
-    } else {
+
+    // For non-bit-fields, just check that the LLVM struct offset matches the
+    // AST offset.
+    if (!FD->isBitField()) {
       unsigned FieldNo = RL->getLLVMFieldNo(FD);
       assert(AST_RL.getFieldOffset(i) == SL->getElementOffsetInBits(FieldNo) &&
              "Invalid field offset!");
+      continue;
+    }
+
+    // Ignore unnamed bit-fields.
+    if (!FD->getDeclName())
+      continue;
+
+    const CGBitFieldInfo &Info = RL->getBitFieldInfo(FD);
+    for (unsigned i = 0, e = Info.getNumComponents(); i != e; ++i) {
+      const CGBitFieldInfo::AccessInfo &AI = Info.getComponent(i);
+
+      // Verify that every component access is within the structure.
+      uint64_t FieldOffset = SL->getElementOffsetInBits(AI.FieldIndex);
+      uint64_t AccessBitOffset = FieldOffset + AI.FieldByteOffset * 8;
+      assert(AccessBitOffset + AI.AccessWidth <= TypeSizeInBits &&
+             "Invalid bit-field access (out of range)!");
     }
   }
 #endif
diff --git a/test/CodeGen/bitfield-2.c b/test/CodeGen/bitfield-2.c
index 872312f622..7df4da9da9 100644
--- a/test/CodeGen/bitfield-2.c
+++ b/test/CodeGen/bitfield-2.c
@@ -1,10 +1,25 @@
-// RUN: %clang_cc1 -emit-llvm -triple x86_64 -O3 -o - %s | \
-// RUN:   FileCheck -check-prefix=CHECK-OPT %s
+// RUN: %clang_cc1 -emit-llvm -triple x86_64 -O3 -o %t.opt.ll %s \
+// RUN:   -fdump-record-layouts 2> %t.dump.txt
+// RUN: FileCheck -check-prefix=CHECK-RECORD < %t.dump.txt %s
+// RUN: FileCheck -check-prefix=CHECK-OPT < %t.opt.ll %s
 
 /****/
 
+// Check that we don't read off the end a packed 24-bit structure.
 // PR6176
 
+// CHECK-RECORD: *** Dumping IRgen Record Layout
+// CHECK-RECORD: Record: struct s0
+// CHECK-RECORD: Layout: <CGRecordLayout
+// CHECK-RECORD:   LLVMType:<{ [3 x i8] }>
+// CHECK-RECORD:   ContainsPointerToDataMember:0
+// CHECK-RECORD:   BitFields:[
+// CHECK-RECORD:     <CGBitFieldInfo Size:24 IsSigned:1
+// CHECK-RECORD:                     NumComponents:2 Components: [
+// CHECK-RECORD:         <AccessInfo FieldIndex:0 FieldByteOffset:0 FieldBitStart:0 AccessWidth:16
+// CHECK-RECORD:                     AccessAlignment:0 TargetBitOffset:0 TargetBitWidth:16>
+// CHECK-RECORD:         <AccessInfo FieldIndex:0 FieldByteOffset:2 FieldBitStart:0 AccessWidth:8
+// CHECK-RECORD:                     AccessAlignment:0 TargetBitOffset:16 TargetBitWidth:8>
 struct __attribute((packed)) s0 {
   int f0 : 24;
 };
@@ -38,6 +53,24 @@ unsigned long long test_0() {
 
 // PR5591
 
+// CHECK-RECORD: *** Dumping IRgen Record Layout
+// CHECK-RECORD: Record: struct s1
+// CHECK-RECORD: Layout: <CGRecordLayout
+// CHECK-RECORD:   LLVMType:<{ [2 x i8], i8 }>
+// CHECK-RECORD:   ContainsPointerToDataMember:0
+// CHECK-RECORD:   BitFields:[
+// CHECK-RECORD:     <CGBitFieldInfo Size:10 IsSigned:1
+// CHECK-RECORD:                     NumComponents:1 Components: [
+// CHECK-RECORD:         <AccessInfo FieldIndex:0 FieldByteOffset:0 FieldBitStart:0 AccessWidth:16
+// CHECK-RECORD:                     AccessAlignment:0 TargetBitOffset:0 TargetBitWidth:10>
+// CHECK-RECORD:     ]>
+// CHECK-RECORD:     <CGBitFieldInfo Size:10 IsSigned:1
+// CHECK-RECORD:                     NumComponents:2 Components: [
+// CHECK-RECORD:         <AccessInfo FieldIndex:0 FieldByteOffset:0 FieldBitStart:10 AccessWidth:16
+// CHECK-RECORD:                     AccessAlignment:0 TargetBitOffset:0 TargetBitWidth:6>
+// CHECK-RECORD:         <AccessInfo FieldIndex:0 FieldByteOffset:2 FieldBitStart:0 AccessWidth:8
+// CHECK-RECORD:                     AccessAlignment:0 TargetBitOffset:6 TargetBitWidth:4>
+
 #pragma pack(push)
 #pragma pack(1)
 struct __attribute((packed)) s1 {
@@ -73,9 +106,22 @@ unsigned long long test_1() {
 
 /****/
 
+// Check that we don't access beyond the bounds of a union.
+//
 // PR5567
 
-union u2 {
+// CHECK-RECORD: *** Dumping IRgen Record Layout
+// CHECK-RECORD: Record: union u2
+// CHECK-RECORD: Layout: <CGRecordLayout
+// CHECK-RECORD:   LLVMType:<{ i8 }>
+// CHECK-RECORD:   ContainsPointerToDataMember:0
+// CHECK-RECORD:   BitFields:[
+// CHECK-RECORD:     <CGBitFieldInfo Size:3 IsSigned:0
+// CHECK-RECORD:                     NumComponents:1 Components: [
+// CHECK-RECORD:         <AccessInfo FieldIndex:0 FieldByteOffset:0 FieldBitStart:0 AccessWidth:8
+// CHECK-RECORD:                     AccessAlignment:0 TargetBitOffset:0 TargetBitWidth:3>
+
+union __attribute__((packed)) u2 {
   unsigned long long f0 : 3;
 };
 
-- 
2.40.0