return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown});
}
-static RValue EmitBitTestIntrinsic(CodeGenFunction &CGF, const CallExpr *E,
- char TestAnd, char Size,
- bool Locked = false) {
- Value *BitBase = CGF.EmitScalarExpr(E->getArg(0));
- Value *BitPos = CGF.EmitScalarExpr(E->getArg(1));
+// Get properties of an X86 BT* assembly instruction. The first returned value
+// is the action character code, which can be for complement, reset, or set. The
+// second is the size suffix which our assembler needs. The last is whether to
+// add the lock prefix.
+static std::tuple<char, char, bool>
+getBitTestActionSizeAndLocking(unsigned BuiltinID) {
+ switch (BuiltinID) {
+ case Builtin::BI_bittest:
+ return {'\0', 'l', false};
+ case Builtin::BI_bittestandcomplement:
+ return {'c', 'l', false};
+ case Builtin::BI_bittestandreset:
+ return {'r', 'l', false};
+ case Builtin::BI_bittestandset:
+ return {'s', 'l', false};
+ case Builtin::BI_interlockedbittestandreset:
+ return {'r', 'l', /*Locked=*/true};
+ case Builtin::BI_interlockedbittestandset:
+ return {'s', 'l', /*Locked=*/true};
+
+ case Builtin::BI_bittest64:
+ return {'\0', 'q', false};
+ case Builtin::BI_bittestandcomplement64:
+ return {'c', 'q', false};
+ case Builtin::BI_bittestandreset64:
+ return {'r', 'q', false};
+ case Builtin::BI_bittestandset64:
+ return {'s', 'q', false};
+ case Builtin::BI_interlockedbittestandreset64:
+ return {'r', 'q', /*Locked=*/true};
+ case Builtin::BI_interlockedbittestandset64:
+ return {'s', 'q', /*Locked=*/true};
+ }
+ llvm_unreachable("expected only bittest builtins");
+}
+
+static RValue EmitX86BitTestIntrinsic(CodeGenFunction &CGF, unsigned BuiltinID,
+ const CallExpr *E, Value *BitBase,
+ Value *BitPos) {
+ char Action, Size;
+ bool Locked;
+ std::tie(Action, Size, Locked) = getBitTestActionSizeAndLocking(BuiltinID);
// Build the assembly.
SmallString<64> Asm;
if (Locked)
AsmOS << "lock ";
AsmOS << "bt";
- if (TestAnd)
- AsmOS << TestAnd;
+ if (Action)
+ AsmOS << Action;
AsmOS << Size << " $2, ($1)\n\tsetc ${0:b}";
// Build the constraints. FIXME: We should support immediates when possible.
- std::string Constraints = "=r,r,r,~{cc},~{flags},~{memory},~{fpsr}";
+ std::string Constraints = "=r,r,r,~{cc},~{flags},~{fpsr}";
llvm::IntegerType *IntType = llvm::IntegerType::get(
CGF.getLLVMContext(),
CGF.getContext().getTypeSize(E->getArg(1)->getType()));
return RValue::get(CS.getInstruction());
}
+/// Emit a _bittest* intrinsic. These intrinsics take a pointer to an array of
+/// bits and a bit position and read and optionally modify the bit at that
+/// position. The position index can be arbitrarily large, i.e. it can be larger
+/// than 31 or 63, so we need an indexed load in the general case.
+static RValue EmitBitTestIntrinsic(CodeGenFunction &CGF, unsigned BuiltinID,
+ const CallExpr *E) {
+ Value *BitBase = CGF.EmitScalarExpr(E->getArg(0));
+ Value *BitPos = CGF.EmitScalarExpr(E->getArg(1));
+
+ // X86 has special BT, BTC, BTR, and BTS instructions that handle the array
+ // indexing operation internally. Use them if possible.
+ llvm::Triple::ArchType Arch = CGF.getTarget().getTriple().getArch();
+ if (Arch == llvm::Triple::x86 || Arch == llvm::Triple::x86_64)
+ return EmitX86BitTestIntrinsic(CGF, BuiltinID, E, BitBase, BitPos);
+
+ // Otherwise, use generic code to load one byte and test the bit. Use all but
+ // the bottom three bits as the array index, and the bottom three bits to form
+ // a mask.
+ // Bit = BitBaseI8[BitPos >> 3] & (1 << (BitPos & 0x7)) != 0;
+ Value *ByteIndex = CGF.Builder.CreateAShr(
+ BitPos, llvm::ConstantInt::get(BitPos->getType(), 3), "bittest.byteidx");
+ Value *BitBaseI8 = CGF.Builder.CreatePointerCast(BitBase, CGF.Int8PtrTy);
+ Address ByteAddr(CGF.Builder.CreateInBoundsGEP(CGF.Int8Ty, BitBaseI8,
+ ByteIndex, "bittest.byteaddr"),
+ CharUnits::One());
+ Value *PosLow =
+ CGF.Builder.CreateAnd(CGF.Builder.CreateTrunc(BitPos, CGF.Int8Ty),
+ llvm::ConstantInt::get(CGF.Int8Ty, 0x7));
+
+ // The updating instructions will need a mask.
+ Value *Mask = nullptr;
+ if (BuiltinID != Builtin::BI_bittest && BuiltinID != Builtin::BI_bittest64) {
+ Mask = CGF.Builder.CreateShl(llvm::ConstantInt::get(CGF.Int8Ty, 1), PosLow,
+ "bittest.mask");
+ }
+
+ // Emit a combined atomicrmw load/store operation for the interlocked
+ // intrinsics.
+ Value *OldByte = nullptr;
+ switch (BuiltinID) {
+ case Builtin::BI_interlockedbittestandreset:
+ case Builtin::BI_interlockedbittestandreset64:
+ OldByte = CGF.Builder.CreateAtomicRMW(
+ AtomicRMWInst::And, ByteAddr.getPointer(), CGF.Builder.CreateNot(Mask),
+ llvm::AtomicOrdering::SequentiallyConsistent);
+ break;
+ case Builtin::BI_interlockedbittestandset:
+ case Builtin::BI_interlockedbittestandset64:
+ OldByte = CGF.Builder.CreateAtomicRMW(
+ AtomicRMWInst::Or, ByteAddr.getPointer(), Mask,
+ llvm::AtomicOrdering::SequentiallyConsistent);
+ break;
+ default:
+ break;
+ }
+
+ // Emit a plain load for the non-interlocked intrinsics.
+ if (!OldByte) {
+ OldByte = CGF.Builder.CreateLoad(ByteAddr, "bittest.byte");
+ Value *NewByte = nullptr;
+ switch (BuiltinID) {
+ case Builtin::BI_bittest:
+ case Builtin::BI_bittest64:
+ // Don't store anything.
+ break;
+ case Builtin::BI_bittestandcomplement:
+ case Builtin::BI_bittestandcomplement64:
+ NewByte = CGF.Builder.CreateXor(OldByte, Mask);
+ break;
+ case Builtin::BI_bittestandreset:
+ case Builtin::BI_bittestandreset64:
+ NewByte = CGF.Builder.CreateAnd(OldByte, CGF.Builder.CreateNot(Mask));
+ break;
+ case Builtin::BI_bittestandset:
+ case Builtin::BI_bittestandset64:
+ NewByte = CGF.Builder.CreateOr(OldByte, Mask);
+ break;
+ default:
+ llvm_unreachable("non bittest family builtin");
+ }
+ if (NewByte)
+ CGF.Builder.CreateStore(NewByte, ByteAddr);
+ }
+
+ // However we loaded the old byte, either by plain load or atomicrmw, shift
+ // the bit into the low position and mask it to 0 or 1.
+ Value *ShiftedByte = CGF.Builder.CreateLShr(OldByte, PosLow, "bittest.shr");
+ return RValue::get(CGF.Builder.CreateAnd(
+ ShiftedByte, llvm::ConstantInt::get(CGF.Int8Ty, 1), "bittest.res"));
+}
+
// Many of MSVC builtins are on both x64 and ARM; to avoid repeating code, we
// handle them here.
enum class CodeGenFunction::MSVCIntrin {
case Builtin::BI_InterlockedXor:
return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E));
+ case Builtin::BI_bittest64:
case Builtin::BI_bittest:
- return EmitBitTestIntrinsic(*this, E, '\0', 'l');
+ case Builtin::BI_bittestandcomplement64:
case Builtin::BI_bittestandcomplement:
- return EmitBitTestIntrinsic(*this, E, 'c', 'l');
+ case Builtin::BI_bittestandreset64:
case Builtin::BI_bittestandreset:
- return EmitBitTestIntrinsic(*this, E, 'r', 'l');
+ case Builtin::BI_bittestandset64:
case Builtin::BI_bittestandset:
- return EmitBitTestIntrinsic(*this, E, 's', 'l');
case Builtin::BI_interlockedbittestandreset:
- return EmitBitTestIntrinsic(*this, E, 'r', 'l', /*Locked=*/true);
- case Builtin::BI_interlockedbittestandset:
- return EmitBitTestIntrinsic(*this, E, 's', 'l', /*Locked=*/true);
-
- case Builtin::BI_bittest64:
- return EmitBitTestIntrinsic(*this, E, '\0', 'q');
- case Builtin::BI_bittestandcomplement64:
- return EmitBitTestIntrinsic(*this, E, 'c', 'q');
- case Builtin::BI_bittestandreset64:
- return EmitBitTestIntrinsic(*this, E, 'r', 'q');
- case Builtin::BI_bittestandset64:
- return EmitBitTestIntrinsic(*this, E, 's', 'q');
case Builtin::BI_interlockedbittestandreset64:
- return EmitBitTestIntrinsic(*this, E, 'r', 'q', /*Locked=*/true);
case Builtin::BI_interlockedbittestandset64:
- return EmitBitTestIntrinsic(*this, E, 's', 'q', /*Locked=*/true);
+ case Builtin::BI_interlockedbittestandset:
+ return EmitBitTestIntrinsic(*this, BuiltinID, E);
case Builtin::BI__exception_code:
case Builtin::BI_exception_code:
-// RUN: %clang_cc1 -fms-extensions -triple x86_64-windows-msvc %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -fms-extensions -triple x86_64-windows-msvc %s -emit-llvm -o - | FileCheck %s --check-prefix=X64
+// RUN: %clang_cc1 -fms-extensions -triple thumbv7-windows-msvc %s -emit-llvm -o - | FileCheck %s --check-prefix=ARM
+// RUN: %clang_cc1 -fms-extensions -triple aarch64-windows-msvc %s -emit-llvm -o - | FileCheck %s --check-prefix=ARM
volatile unsigned char sink = 0;
void test32(long *base, long idx) {
sink = _interlockedbittestandset64(base, idx);
}
-// CHECK-LABEL: define dso_local void @test32(i32* %base, i32 %idx)
-// CHECK: call i8 asm sideeffect "btl $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i32* %{{.*}}, i32 {{.*}})
-// CHECK: call i8 asm sideeffect "btcl $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i32* %{{.*}}, i32 {{.*}})
-// CHECK: call i8 asm sideeffect "btrl $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i32* %{{.*}}, i32 {{.*}})
-// CHECK: call i8 asm sideeffect "btsl $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i32* %{{.*}}, i32 {{.*}})
-// CHECK: call i8 asm sideeffect "lock btrl $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i32* %{{.*}}, i32 {{.*}})
-// CHECK: call i8 asm sideeffect "lock btsl $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i32* %{{.*}}, i32 {{.*}})
-
-// CHECK-LABEL: define dso_local void @test64(i64* %base, i64 %idx)
-// CHECK: call i8 asm sideeffect "btq $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i64* %{{.*}}, i64 {{.*}})
-// CHECK: call i8 asm sideeffect "btcq $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i64* %{{.*}}, i64 {{.*}})
-// CHECK: call i8 asm sideeffect "btrq $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i64* %{{.*}}, i64 {{.*}})
-// CHECK: call i8 asm sideeffect "btsq $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i64* %{{.*}}, i64 {{.*}})
-// CHECK: call i8 asm sideeffect "lock btrq $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i64* %{{.*}}, i64 {{.*}})
-// CHECK: call i8 asm sideeffect "lock btsq $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i64* %{{.*}}, i64 {{.*}})
+// X64-LABEL: define dso_local void @test32(i32* %base, i32 %idx)
+// X64: call i8 asm sideeffect "btl $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i32* %{{.*}}, i32 {{.*}})
+// X64: call i8 asm sideeffect "btcl $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i32* %{{.*}}, i32 {{.*}})
+// X64: call i8 asm sideeffect "btrl $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i32* %{{.*}}, i32 {{.*}})
+// X64: call i8 asm sideeffect "btsl $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i32* %{{.*}}, i32 {{.*}})
+// X64: call i8 asm sideeffect "lock btrl $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i32* %{{.*}}, i32 {{.*}})
+// X64: call i8 asm sideeffect "lock btsl $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i32* %{{.*}}, i32 {{.*}})
+
+// X64-LABEL: define dso_local void @test64(i64* %base, i64 %idx)
+// X64: call i8 asm sideeffect "btq $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i64* %{{.*}}, i64 {{.*}})
+// X64: call i8 asm sideeffect "btcq $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i64* %{{.*}}, i64 {{.*}})
+// X64: call i8 asm sideeffect "btrq $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i64* %{{.*}}, i64 {{.*}})
+// X64: call i8 asm sideeffect "btsq $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i64* %{{.*}}, i64 {{.*}})
+// X64: call i8 asm sideeffect "lock btrq $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i64* %{{.*}}, i64 {{.*}})
+// X64: call i8 asm sideeffect "lock btsq $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i64* %{{.*}}, i64 {{.*}})
+
+// ARM-LABEL: define dso_local {{.*}}void @test32(i32* %base, i32 %idx)
+// ARM: %[[IDXHI:[^ ]*]] = ashr i32 %{{.*}}, 3
+// ARM: %[[BASE:[^ ]*]] = bitcast i32* %{{.*}} to i8*
+// ARM: %[[BYTEADDR:[^ ]*]] = getelementptr inbounds i8, i8* %[[BASE]], i32 %[[IDXHI]]
+// ARM: %[[IDX8:[^ ]*]] = trunc i32 %{{.*}} to i8
+// ARM: %[[IDXLO:[^ ]*]] = and i8 %[[IDX8]], 7
+// ARM: %[[BYTE:[^ ]*]] = load i8, i8* %[[BYTEADDR]], align 1
+// ARM: %[[BYTESHR:[^ ]*]] = lshr i8 %[[BYTE]], %[[IDXLO]]
+// ARM: %[[RES:[^ ]*]] = and i8 %[[BYTESHR]], 1
+// ARM: store volatile i8 %[[RES]], i8* @sink, align 1
+
+// ARM: %[[IDXHI:[^ ]*]] = ashr i32 %{{.*}}, 3
+// ARM: %[[BASE:[^ ]*]] = bitcast i32* %{{.*}} to i8*
+// ARM: %[[BYTEADDR:[^ ]*]] = getelementptr inbounds i8, i8* %[[BASE]], i32 %[[IDXHI]]
+// ARM: %[[IDX8:[^ ]*]] = trunc i32 %{{.*}} to i8
+// ARM: %[[IDXLO:[^ ]*]] = and i8 %[[IDX8]], 7
+// ARM: %[[MASK:[^ ]*]] = shl i8 1, %[[IDXLO]]
+// ARM: %[[BYTE:[^ ]*]] = load i8, i8* %[[BYTEADDR]], align 1
+// ARM: %[[NEWBYTE:[^ ]*]] = xor i8 %[[BYTE]], %[[MASK]]
+// ARM store i8 %[[NEWBYTE]], i8* %[[BYTEADDR]], align 1
+// ARM: %[[BYTESHR:[^ ]*]] = lshr i8 %[[BYTE]], %[[IDXLO]]
+// ARM: %[[RES:[^ ]*]] = and i8 %[[BYTESHR]], 1
+// ARM: store volatile i8 %[[RES]], i8* @sink, align 1
+
+// ARM: %[[IDXHI:[^ ]*]] = ashr i32 %{{.*}}, 3
+// ARM: %[[BASE:[^ ]*]] = bitcast i32* %{{.*}} to i8*
+// ARM: %[[BYTEADDR:[^ ]*]] = getelementptr inbounds i8, i8* %[[BASE]], i32 %[[IDXHI]]
+// ARM: %[[IDX8:[^ ]*]] = trunc i32 %{{.*}} to i8
+// ARM: %[[IDXLO:[^ ]*]] = and i8 %[[IDX8]], 7
+// ARM: %[[MASK:[^ ]*]] = shl i8 1, %[[IDXLO]]
+// ARM: %[[BYTE:[^ ]*]] = load i8, i8* %[[BYTEADDR]], align 1
+// ARM: %[[NOTMASK:[^ ]*]] = xor i8 %[[MASK]], -1
+// ARM: %[[NEWBYTE:[^ ]*]] = and i8 %[[BYTE]], %[[NOTMASK]]
+// ARM store i8 %[[NEWBYTE]], i8* %[[BYTEADDR]], align 1
+// ARM: %[[BYTESHR:[^ ]*]] = lshr i8 %[[BYTE]], %[[IDXLO]]
+// ARM: %[[RES:[^ ]*]] = and i8 %[[BYTESHR]], 1
+// ARM: store volatile i8 %[[RES]], i8* @sink, align 1
+
+// ARM: %[[IDXHI:[^ ]*]] = ashr i32 %{{.*}}, 3
+// ARM: %[[BASE:[^ ]*]] = bitcast i32* %{{.*}} to i8*
+// ARM: %[[BYTEADDR:[^ ]*]] = getelementptr inbounds i8, i8* %[[BASE]], i32 %[[IDXHI]]
+// ARM: %[[IDX8:[^ ]*]] = trunc i32 %{{.*}} to i8
+// ARM: %[[IDXLO:[^ ]*]] = and i8 %[[IDX8]], 7
+// ARM: %[[MASK:[^ ]*]] = shl i8 1, %[[IDXLO]]
+// ARM: %[[BYTE:[^ ]*]] = load i8, i8* %[[BYTEADDR]], align 1
+// ARM: %[[NEWBYTE:[^ ]*]] = or i8 %[[BYTE]], %[[MASK]]
+// ARM store i8 %[[NEWBYTE]], i8* %[[BYTEADDR]], align 1
+// ARM: %[[BYTESHR:[^ ]*]] = lshr i8 %[[BYTE]], %[[IDXLO]]
+// ARM: %[[RES:[^ ]*]] = and i8 %[[BYTESHR]], 1
+// ARM: store volatile i8 %[[RES]], i8* @sink, align 1
+
+// ARM: %[[IDXHI:[^ ]*]] = ashr i32 %{{.*}}, 3
+// ARM: %[[BASE:[^ ]*]] = bitcast i32* %{{.*}} to i8*
+// ARM: %[[BYTEADDR:[^ ]*]] = getelementptr inbounds i8, i8* %[[BASE]], i32 %[[IDXHI]]
+// ARM: %[[IDX8:[^ ]*]] = trunc i32 %{{.*}} to i8
+// ARM: %[[IDXLO:[^ ]*]] = and i8 %[[IDX8]], 7
+// ARM: %[[MASK:[^ ]*]] = shl i8 1, %[[IDXLO]]
+// ARM: %[[NOTMASK:[^ ]*]] = xor i8 %[[MASK]], -1
+// ARM: %[[BYTE:[^ ]*]] = atomicrmw and i8* %[[BYTEADDR]], i8 %[[NOTMASK]] seq_cst
+// ARM: %[[BYTESHR:[^ ]*]] = lshr i8 %[[BYTE]], %[[IDXLO]]
+// ARM: %[[RES:[^ ]*]] = and i8 %[[BYTESHR]], 1
+// ARM: store volatile i8 %[[RES]], i8* @sink, align 1
+
+// ARM: %[[IDXHI:[^ ]*]] = ashr i32 %{{.*}}, 3
+// ARM: %[[BASE:[^ ]*]] = bitcast i32* %{{.*}} to i8*
+// ARM: %[[BYTEADDR:[^ ]*]] = getelementptr inbounds i8, i8* %[[BASE]], i32 %[[IDXHI]]
+// ARM: %[[IDX8:[^ ]*]] = trunc i32 %{{.*}} to i8
+// ARM: %[[IDXLO:[^ ]*]] = and i8 %[[IDX8]], 7
+// ARM: %[[MASK:[^ ]*]] = shl i8 1, %[[IDXLO]]
+// ARM: %[[BYTE:[^ ]*]] = atomicrmw or i8* %[[BYTEADDR]], i8 %[[MASK]] seq_cst
+// ARM: %[[BYTESHR:[^ ]*]] = lshr i8 %[[BYTE]], %[[IDXLO]]
+// ARM: %[[RES:[^ ]*]] = and i8 %[[BYTESHR]], 1
+// ARM: store volatile i8 %[[RES]], i8* @sink, align 1
+
+// ARM-LABEL: define dso_local {{.*}}void @test64(i64* %base, i64 %idx)
+// ARM: %[[IDXHI:[^ ]*]] = ashr i64 %{{.*}}, 3
+// ARM: %[[BASE:[^ ]*]] = bitcast i64* %{{.*}} to i8*
+// ARM: %[[BYTEADDR:[^ ]*]] = getelementptr inbounds i8, i8* %[[BASE]], i64 %[[IDXHI]]
+// ARM: %[[IDX8:[^ ]*]] = trunc i64 %{{.*}} to i8
+// ARM: %[[IDXLO:[^ ]*]] = and i8 %[[IDX8]], 7
+// ARM: %[[BYTE:[^ ]*]] = load i8, i8* %[[BYTEADDR]], align 1
+// ARM: %[[BYTESHR:[^ ]*]] = lshr i8 %[[BYTE]], %[[IDXLO]]
+// ARM: %[[RES:[^ ]*]] = and i8 %[[BYTESHR]], 1
+// ARM: store volatile i8 %[[RES]], i8* @sink, align 1
+
+// ... the rest is the same, but with i64 instead of i32.