From 644422a4d3f3afaeec80f0183065af9cb894411d Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Mon, 17 Jun 2019 23:39:51 +0000 Subject: [PATCH] hwasan: Use bits [3..11) of the ring buffer entry address as the base stack tag. This saves roughly 32 bytes of instructions per function with stack objects and causes us to preserve enough information that we can recover the original tags of all stack variables. Now that stack tags are deterministic, we no longer need to pass -hwasan-generate-tags-with-calls during check-hwasan. This also means that the new stack tag generation mechanism is exercised by check-hwasan. Differential Revision: https://reviews.llvm.org/D63360 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@363636 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../Instrumentation/HWAddressSanitizer.cpp | 50 ++++++++++++------- .../dbg-declare-tag-offset.ll | 8 +-- .../HWAddressSanitizer/prologue.ll | 7 ++- 3 files changed, 41 insertions(+), 24 deletions(-) diff --git a/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp index 6d70e3bcbf0..4f6dae0cc40 100644 --- a/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp @@ -218,7 +218,7 @@ public: Value *getUARTag(IRBuilder<> &IRB, Value *StackTag); Value *getHwasanThreadSlotPtr(IRBuilder<> &IRB, Type *Ty); - Value *emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord); + void emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord); private: LLVMContext *C; @@ -284,6 +284,7 @@ private: Constant *ShadowGlobal; Value *LocalDynamicShadow = nullptr; + Value *StackBaseTag = nullptr; GlobalValue *ThreadPtrGlobal = nullptr; }; @@ -750,10 +751,16 @@ static unsigned RetagMask(unsigned AllocaNo) { // x = x ^ (mask << 56) can be encoded as a single armv8 instruction for these // masks. // The list does not include the value 255, which is used for UAR. - static unsigned FastMasks[] = { - 0, 1, 2, 3, 4, 6, 7, 8, 12, 14, 15, 16, 24, - 28, 30, 31, 32, 48, 56, 60, 62, 63, 64, 96, 112, 120, - 124, 126, 127, 128, 192, 224, 240, 248, 252, 254}; + // + // Because we are more likely to use earlier elements of this list than later + // ones, it is sorted in increasing order of probability of collision with a + // mask allocated (temporally) nearby. The program that generated this list + // can be found at: + // https://github.com/google/sanitizers/blob/master/hwaddress-sanitizer/sort_masks.py + static unsigned FastMasks[] = {0, 128, 64, 192, 32, 96, 224, 112, 240, + 48, 16, 120, 248, 56, 24, 8, 124, 252, + 60, 28, 12, 4, 126, 254, 62, 30, 14, + 6, 2, 127, 63, 31, 15, 7, 3, 1}; return FastMasks[AllocaNo % (sizeof(FastMasks) / sizeof(FastMasks[0]))]; } @@ -764,6 +771,8 @@ Value *HWAddressSanitizer::getNextTagWithCall(IRBuilder<> &IRB) { Value *HWAddressSanitizer::getStackBaseTag(IRBuilder<> &IRB) { if (ClGenerateTagsWithCalls) return getNextTagWithCall(IRB); + if (StackBaseTag) + return StackBaseTag; // FIXME: use addressofreturnaddress (but implement it in aarch64 backend // first). Module *M = IRB.GetInsertBlock()->getParent()->getParent(); @@ -881,13 +890,16 @@ void HWAddressSanitizer::createFrameGlobal(Function &F, GV->setComdat(Comdat); } -Value *HWAddressSanitizer::emitPrologue(IRBuilder<> &IRB, - bool WithFrameRecord) { - if (!Mapping.InTls) - return getDynamicShadowNonTls(IRB); +void HWAddressSanitizer::emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord) { + if (!Mapping.InTls) { + LocalDynamicShadow = getDynamicShadowNonTls(IRB); + return; + } - if (!WithFrameRecord && TargetTriple.isAndroid()) - return getDynamicShadowIfunc(IRB); + if (!WithFrameRecord && TargetTriple.isAndroid()) { + LocalDynamicShadow = getDynamicShadowIfunc(IRB); + return; + } Value *SlotPtr = getHwasanThreadSlotPtr(IRB, IntptrTy); assert(SlotPtr); @@ -920,6 +932,8 @@ Value *HWAddressSanitizer::emitPrologue(IRBuilder<> &IRB, TargetTriple.isAArch64() ? ThreadLong : untagPointer(IRB, ThreadLong); if (WithFrameRecord) { + StackBaseTag = IRB.CreateAShr(ThreadLong, 3); + // Prepare ring buffer data. auto PC = IRB.CreatePtrToInt(F, IntptrTy); auto GetStackPointerFn = @@ -928,7 +942,7 @@ Value *HWAddressSanitizer::emitPrologue(IRBuilder<> &IRB, IRB.CreateCall(GetStackPointerFn, {Constant::getNullValue(IRB.getInt32Ty())}), IntptrTy); - // Mix SP and PC. TODO: also add the tag to the mix. + // Mix SP and PC. // Assumptions: // PC is 0x0000PPPPPPPPPPPP (48 bits are meaningful, others are zero) // SP is 0xsssssssssssSSSS0 (4 lower bits are zero) @@ -959,13 +973,12 @@ Value *HWAddressSanitizer::emitPrologue(IRBuilder<> &IRB, // Get shadow base address by aligning RecordPtr up. // Note: this is not correct if the pointer is already aligned. // Runtime library will make sure this never happens. - Value *ShadowBase = IRB.CreateAdd( + LocalDynamicShadow = IRB.CreateAdd( IRB.CreateOr( ThreadLongMaybeUntagged, ConstantInt::get(IntptrTy, (1ULL << kShadowBaseAlignment) - 1)), ConstantInt::get(IntptrTy, 1), "hwasan.shadow"); - ShadowBase = IRB.CreateIntToPtr(ShadowBase, Int8PtrTy); - return ShadowBase; + LocalDynamicShadow = IRB.CreateIntToPtr(LocalDynamicShadow, Int8PtrTy); } bool HWAddressSanitizer::instrumentLandingPads( @@ -1115,9 +1128,9 @@ bool HWAddressSanitizer::sanitizeFunction(Function &F) { Instruction *InsertPt = &*F.getEntryBlock().begin(); IRBuilder<> EntryIRB(InsertPt); - LocalDynamicShadow = emitPrologue(EntryIRB, - /*WithFrameRecord*/ ClRecordStackHistory && - !AllocasToInstrument.empty()); + emitPrologue(EntryIRB, + /*WithFrameRecord*/ ClRecordStackHistory && + !AllocasToInstrument.empty()); bool Changed = false; if (!AllocasToInstrument.empty()) { @@ -1146,6 +1159,7 @@ bool HWAddressSanitizer::sanitizeFunction(Function &F) { Changed |= instrumentMemAccess(Inst); LocalDynamicShadow = nullptr; + StackBaseTag = nullptr; return Changed; } diff --git a/test/Instrumentation/HWAddressSanitizer/dbg-declare-tag-offset.ll b/test/Instrumentation/HWAddressSanitizer/dbg-declare-tag-offset.ll index 8474b271d76..b81fa1fd68a 100644 --- a/test/Instrumentation/HWAddressSanitizer/dbg-declare-tag-offset.ll +++ b/test/Instrumentation/HWAddressSanitizer/dbg-declare-tag-offset.ll @@ -13,13 +13,13 @@ entry: %nodebug3 = alloca i8* %a = alloca i8* %b = alloca i8* - ; CHECK: @llvm.dbg.declare{{.*}} !DIExpression(DW_OP_LLVM_tag_offset, 4) + ; CHECK: @llvm.dbg.declare{{.*}} !DIExpression(DW_OP_LLVM_tag_offset, 32) call void @llvm.dbg.declare(metadata i8** %a, metadata !12, metadata !DIExpression()), !dbg !14 - ; CHECK: @llvm.dbg.declare{{.*}} !DIExpression(DW_OP_LLVM_tag_offset, 4) + ; CHECK: @llvm.dbg.declare{{.*}} !DIExpression(DW_OP_LLVM_tag_offset, 32) call void @llvm.dbg.declare(metadata i8** %a, metadata !12, metadata !DIExpression()), !dbg !14 - ; CHECK: @llvm.dbg.declare{{.*}} !DIExpression(DW_OP_LLVM_tag_offset, 6) + ; CHECK: @llvm.dbg.declare{{.*}} !DIExpression(DW_OP_LLVM_tag_offset, 96) call void @llvm.dbg.declare(metadata i8** %b, metadata !13, metadata !DIExpression()), !dbg !14 - ; CHECK: @llvm.dbg.declare{{.*}} !DIExpression(DW_OP_LLVM_tag_offset, 6) + ; CHECK: @llvm.dbg.declare{{.*}} !DIExpression(DW_OP_LLVM_tag_offset, 96) call void @llvm.dbg.declare(metadata i8** %b, metadata !13, metadata !DIExpression()), !dbg !14 call void @g(i8** %nodebug0, i8** %nodebug1, i8** %nodebug2, i8** %nodebug3, i8** %a, i8** %b) ret void, !dbg !15 diff --git a/test/Instrumentation/HWAddressSanitizer/prologue.ll b/test/Instrumentation/HWAddressSanitizer/prologue.ll index d9913f84f42..f197930253a 100644 --- a/test/Instrumentation/HWAddressSanitizer/prologue.ll +++ b/test/Instrumentation/HWAddressSanitizer/prologue.ll @@ -56,6 +56,7 @@ define void @test_alloca() sanitize_hwaddress { ; CHECK-TLS: %[[B:[^ ]*]] = getelementptr i8, i8* %[[A]], i32 48 ; CHECK-TLS: %[[C:[^ ]*]] = bitcast i8* %[[B]] to i64* ; CHECK-TLS: %[[D:[^ ]*]] = load i64, i64* %[[C]] +; CHECK-TLS: %[[E:[^ ]*]] = ashr i64 %[[D]], 3 ; CHECK-NOHISTORY-NOT: store i64 @@ -68,8 +69,10 @@ define void @test_alloca() sanitize_hwaddress { ; CHECK-HISTORY: %[[D5:[^ ]*]] = and i64 %[[D4]], %[[D3]] ; CHECK-HISTORY: store i64 %[[D5]], i64* %[[C]] -; CHECK-TLS: %[[E:[^ ]*]] = or i64 %[[D]], 4294967295 -; CHECK-TLS: = add i64 %[[E]], 1 +; CHECK-TLS: %[[F:[^ ]*]] = or i64 %[[D]], 4294967295 +; CHECK-TLS: = add i64 %[[F]], 1 + +; CHECK-HISTORY: = xor i64 %[[E]], 0 ; CHECK-NOHISTORY-NOT: store i64 -- 2.40.0