From: Vedant Kumar Date: Fri, 17 Feb 2017 02:03:51 +0000 (+0000) Subject: Retry: [ubsan] Reduce null checking of C++ object pointers (PR27581) X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=5e121b46e9c2e1f1ed13b89e118e645cb385427f;p=clang Retry: [ubsan] Reduce null checking of C++ object pointers (PR27581) This patch teaches ubsan to insert exactly one null check for the 'this' pointer per method/lambda. Previously, given a load of a member variable from an instance method ('this->x'), ubsan would insert a null check for 'this', and another null check for '&this->x', before allowing the load to occur. Similarly, given a call to a method from another method bound to the same instance ('this->foo()'), ubsan would a redundant null check for 'this'. There is also a redundant null check in the case where the object pointer is a reference ('Ref.foo()'). This patch teaches ubsan to remove the redundant null checks identified above. Testing: check-clang and check-ubsan. I also compiled X86FastISel.cpp with -fsanitize=null using patched/unpatched clangs based on r293572. Here are the number of null checks emitted: ------------------------------------- | Setup | # of null checks | ------------------------------------- | unpatched, -O0 | 21767 | | patched, -O0 | 10758 | ------------------------------------- Changes since the initial commit: don't rely on IRGen of C labels in the test. Differential Revision: https://reviews.llvm.org/D29530 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@295401 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/CGExpr.cpp b/lib/CodeGen/CGExpr.cpp index e5e34a5f3e..c0a7f5d1ad 100644 --- a/lib/CodeGen/CGExpr.cpp +++ b/lib/CodeGen/CGExpr.cpp @@ -947,15 +947,45 @@ LValue CodeGenFunction::EmitUnsupportedLValue(const Expr *E, E->getType()); } +bool CodeGenFunction::CanElideObjectPointerNullCheck(const Expr *Obj) { + if (isa(Obj)) + return true; + + const Expr *Base = Obj; + while (!isa(Base)) { + // The result of a dynamic_cast can be null. + if (isa(Base)) + return false; + + if (const auto *CE = dyn_cast(Base)) { + Base = CE->getSubExpr(); + } else if (const auto *PE = dyn_cast(Base)) { + Base = PE->getSubExpr(); + } else if (const auto *UO = dyn_cast(Base)) { + if (UO->getOpcode() == UO_Extension) + Base = UO->getSubExpr(); + else + return false; + } else { + return false; + } + } + return true; +} + LValue CodeGenFunction::EmitCheckedLValue(const Expr *E, TypeCheckKind TCK) { LValue LV; if (SanOpts.has(SanitizerKind::ArrayBounds) && isa(E)) LV = EmitArraySubscriptExpr(cast(E), /*Accessed*/true); else LV = EmitLValue(E); - if (!isa(E) && !LV.isBitField() && LV.isSimple()) + if (!isa(E) && !LV.isBitField() && LV.isSimple()) { + bool SkipNullCheck = false; + if (const auto *ME = dyn_cast(E)) + SkipNullCheck = CanElideObjectPointerNullCheck(ME->getBase()); EmitTypeCheck(TCK, E->getExprLoc(), LV.getPointer(), - E->getType(), LV.getAlignment()); + E->getType(), LV.getAlignment(), SkipNullCheck); + } return LV; } @@ -3335,7 +3365,9 @@ LValue CodeGenFunction::EmitMemberExpr(const MemberExpr *E) { AlignmentSource AlignSource; Address Addr = EmitPointerWithAlignment(BaseExpr, &AlignSource); QualType PtrTy = BaseExpr->getType()->getPointeeType(); - EmitTypeCheck(TCK_MemberAccess, E->getExprLoc(), Addr.getPointer(), PtrTy); + bool SkipNullCheck = CanElideObjectPointerNullCheck(BaseExpr); + EmitTypeCheck(TCK_MemberAccess, E->getExprLoc(), Addr.getPointer(), PtrTy, + /*Alignment=*/CharUnits::Zero(), SkipNullCheck); BaseLV = MakeAddrLValue(Addr, PtrTy, AlignSource); } else BaseLV = EmitCheckedLValue(BaseExpr, TCK_MemberAccess); diff --git a/lib/CodeGen/CGExprCXX.cpp b/lib/CodeGen/CGExprCXX.cpp index ebe0841b3c..1c6a577a95 100644 --- a/lib/CodeGen/CGExprCXX.cpp +++ b/lib/CodeGen/CGExprCXX.cpp @@ -290,10 +290,15 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr( if (CE) CallLoc = CE->getExprLoc(); - EmitTypeCheck(isa(CalleeDecl) - ? CodeGenFunction::TCK_ConstructorCall - : CodeGenFunction::TCK_MemberCall, - CallLoc, This.getPointer(), C.getRecordType(CalleeDecl->getParent())); + bool SkipNullCheck = false; + if (const auto *CMCE = dyn_cast(CE)) + SkipNullCheck = + CanElideObjectPointerNullCheck(CMCE->getImplicitObjectArgument()); + EmitTypeCheck( + isa(CalleeDecl) ? CodeGenFunction::TCK_ConstructorCall + : CodeGenFunction::TCK_MemberCall, + CallLoc, This.getPointer(), C.getRecordType(CalleeDecl->getParent()), + /*Alignment=*/CharUnits::Zero(), SkipNullCheck); // FIXME: Uses of 'MD' past this point need to be audited. We may need to use // 'CalleeDecl' instead. diff --git a/lib/CodeGen/CodeGenFunction.cpp b/lib/CodeGen/CodeGenFunction.cpp index 00d5b5fe68..ddcdc030d9 100644 --- a/lib/CodeGen/CodeGenFunction.cpp +++ b/lib/CodeGen/CodeGenFunction.cpp @@ -948,6 +948,11 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, // fast register allocator would be happier... CXXThisValue = CXXABIThisValue; } + + // Sanitize the 'this' pointer once per function, if it's available. + if (CXXThisValue) + EmitTypeCheck(TCK_MemberAccess, Loc, CXXThisValue, + MD->getThisType(getContext())); } // If any of the arguments have a variably modified type, make sure to diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h index b830df73ca..406e3db796 100644 --- a/lib/CodeGen/CodeGenFunction.h +++ b/lib/CodeGen/CodeGenFunction.h @@ -2030,6 +2030,9 @@ public: llvm::BlockAddress *GetAddrOfLabel(const LabelDecl *L); llvm::BasicBlock *GetIndirectGotoBlock(); + /// Check if the null check for \p ObjectPointer can be skipped. + static bool CanElideObjectPointerNullCheck(const Expr *ObjectPointer); + /// EmitNullInitialization - Generate code to set a value of the given type to /// null, If the type contains data member pointers, they will be initialized /// to -1 in accordance with the Itanium C++ ABI. diff --git a/test/CodeGen/catch-undef-behavior.c b/test/CodeGen/catch-undef-behavior.c index d7a26f8a7d..d3fc17620c 100644 --- a/test/CodeGen/catch-undef-behavior.c +++ b/test/CodeGen/catch-undef-behavior.c @@ -1,6 +1,5 @@ // RUN: %clang_cc1 -fsanitize=alignment,null,object-size,shift-base,shift-exponent,return,signed-integer-overflow,vla-bound,float-cast-overflow,integer-divide-by-zero,bool,returns-nonnull-attribute,nonnull-attribute -fsanitize-recover=alignment,null,object-size,shift-base,shift-exponent,signed-integer-overflow,vla-bound,float-cast-overflow,integer-divide-by-zero,bool,returns-nonnull-attribute,nonnull-attribute -emit-llvm %s -o - -triple x86_64-linux-gnu | opt -instnamer -S | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-UBSAN // RUN: %clang_cc1 -fsanitize-trap=alignment,null,object-size,shift-base,shift-exponent,return,signed-integer-overflow,vla-bound,float-cast-overflow,integer-divide-by-zero,bool,returns-nonnull-attribute,nonnull-attribute -fsanitize-recover=alignment,null,object-size,shift-base,shift-exponent,signed-integer-overflow,vla-bound,float-cast-overflow,integer-divide-by-zero,bool,returns-nonnull-attribute,nonnull-attribute -fsanitize=alignment,null,object-size,shift-base,shift-exponent,return,signed-integer-overflow,vla-bound,float-cast-overflow,integer-divide-by-zero,bool,returns-nonnull-attribute,nonnull-attribute -fsanitize-recover=alignment,null,object-size,shift-base,shift-exponent,signed-integer-overflow,vla-bound,float-cast-overflow,integer-divide-by-zero,bool,returns-nonnull-attribute,nonnull-attribute -emit-llvm %s -o - -triple x86_64-linux-gnu | opt -instnamer -S | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-TRAP -// RUN: %clang_cc1 -fsanitize=null -fsanitize-recover=null -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s --check-prefix=CHECK-NULL // RUN: %clang_cc1 -fsanitize=signed-integer-overflow -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s --check-prefix=CHECK-OVERFLOW // CHECK-UBSAN: @[[INT:.*]] = private unnamed_addr constant { i16, i16, [6 x i8] } { i16 0, i16 11, [6 x i8] c"'int'\00" } @@ -30,25 +29,20 @@ // CHECK-UBSAN: @[[LINE_1500:.*]] = {{.*}}, i32 1500, i32 10 {{.*}} @[[FP16]], {{.*}} } // CHECK-UBSAN: @[[LINE_1600:.*]] = {{.*}}, i32 1600, i32 10 {{.*}} @{{.*}} } -// CHECK-NULL: @[[LINE_100:.*]] = private unnamed_addr global {{.*}}, i32 100, i32 5 {{.*}} - // PR6805 // CHECK-COMMON-LABEL: @foo -// CHECK-NULL-LABEL: @foo void foo() { union { int i; } u; - // CHECK-COMMON: %[[CHECK0:.*]] = icmp ne {{.*}}* %[[PTR:.*]], null - // CHECK-COMMON: %[[I8PTR:.*]] = bitcast i32* %[[PTR]] to i8* + // CHECK-COMMON: %[[I8PTR:.*]] = bitcast i32* %[[PTR:.*]] to i8* // CHECK-COMMON-NEXT: %[[SIZE:.*]] = call i64 @llvm.objectsize.i64.p0i8(i8* %[[I8PTR]], i1 false) - // CHECK-COMMON-NEXT: %[[CHECK1:.*]] = icmp uge i64 %[[SIZE]], 4 + // CHECK-COMMON-NEXT: %[[CHECK0:.*]] = icmp uge i64 %[[SIZE]], 4 // CHECK-COMMON: %[[PTRTOINT:.*]] = ptrtoint {{.*}}* %[[PTR]] to i64 // CHECK-COMMON-NEXT: %[[MISALIGN:.*]] = and i64 %[[PTRTOINT]], 3 - // CHECK-COMMON-NEXT: %[[CHECK2:.*]] = icmp eq i64 %[[MISALIGN]], 0 + // CHECK-COMMON-NEXT: %[[CHECK1:.*]] = icmp eq i64 %[[MISALIGN]], 0 - // CHECK-COMMON: %[[CHECK01:.*]] = and i1 %[[CHECK0]], %[[CHECK1]] - // CHECK-COMMON-NEXT: %[[OK:.*]] = and i1 %[[CHECK01]], %[[CHECK2]] + // CHECK-COMMON: %[[OK:.*]] = and i1 %[[CHECK0]], %[[CHECK1]] // CHECK-UBSAN: br i1 %[[OK]], {{.*}} !prof ![[WEIGHT_MD:.*]], !nosanitize // CHECK-TRAP: br i1 %[[OK]], {{.*}} @@ -58,11 +52,6 @@ void foo() { // CHECK-TRAP: call void @llvm.trap() [[NR_NUW:#[0-9]+]] // CHECK-TRAP-NEXT: unreachable - - // With -fsanitize=null, only perform the null check. - // CHECK-NULL: %[[NULL:.*]] = icmp ne {{.*}}, null - // CHECK-NULL: br i1 %[[NULL]] - // CHECK-NULL: call void @__ubsan_handle_type_mismatch_v1(i8* bitcast ({{.*}} @[[LINE_100]] to i8*), i64 %{{.*}}) #line 100 u.i=1; } diff --git a/test/CodeGen/sanitize-recover.c b/test/CodeGen/sanitize-recover.c index dd8734e971..6cf7af84ba 100644 --- a/test/CodeGen/sanitize-recover.c +++ b/test/CodeGen/sanitize-recover.c @@ -19,20 +19,17 @@ void test() { void foo() { union { int i; } u; u.i=1; - // PARTIAL: %[[CHECK0:.*]] = icmp ne {{.*}}* %[[PTR:.*]], null - // PARTIAL: %[[SIZE:.*]] = call i64 @llvm.objectsize.i64.p0i8(i8* {{.*}}, i1 false) - // PARTIAL-NEXT: %[[CHECK1:.*]] = icmp uge i64 %[[SIZE]], 4 + // PARTIAL-NEXT: %[[CHECK0:.*]] = icmp uge i64 %[[SIZE]], 4 // PARTIAL: %[[MISALIGN:.*]] = and i64 {{.*}}, 3 - // PARTIAL-NEXT: %[[CHECK2:.*]] = icmp eq i64 %[[MISALIGN]], 0 + // PARTIAL-NEXT: %[[CHECK1:.*]] = icmp eq i64 %[[MISALIGN]], 0 - // PARTIAL: %[[CHECK02:.*]] = and i1 %[[CHECK0]], %[[CHECK2]] - // PARTIAL-NEXT: %[[CHECK012:.*]] = and i1 %[[CHECK02]], %[[CHECK1]] + // PARTIAL: %[[CHECK01:.*]] = and i1 %[[CHECK1]], %[[CHECK0]] - // PARTIAL: br i1 %[[CHECK012]], {{.*}} !prof ![[WEIGHT_MD:.*]], !nosanitize + // PARTIAL: br i1 %[[CHECK01]], {{.*}} !nosanitize + // PARTIAL: br i1 %[[CHECK1]], {{.*}} !nosanitize - // PARTIAL: br i1 %[[CHECK02]], {{.*}} // PARTIAL: call void @__ubsan_handle_type_mismatch_v1_abort( // PARTIAL-NEXT: unreachable // PARTIAL: call void @__ubsan_handle_type_mismatch_v1( diff --git a/test/CodeGenCXX/ubsan-suppress-null-checks.cpp b/test/CodeGenCXX/ubsan-suppress-null-checks.cpp new file mode 100644 index 0000000000..967c08195f --- /dev/null +++ b/test/CodeGenCXX/ubsan-suppress-null-checks.cpp @@ -0,0 +1,188 @@ +// RUN: %clang_cc1 -std=c++11 -triple x86_64-apple-darwin10 -emit-llvm -o - %s -fsanitize=null | FileCheck %s +// RUN: %clang_cc1 -std=c++11 -triple x86_64-apple-darwin10 -emit-llvm -o - %s -fsanitize=null -DCHECK_LAMBDA | FileCheck %s --check-prefix=LAMBDA + +struct A { + int foo; + + // CHECK-LABEL: define linkonce_odr void @_ZN1A10do_nothingEv + void do_nothing() { + // CHECK: icmp ne %struct.A* %[[THIS1:[a-z0-9]+]], null, !nosanitize + // CHECK: ptrtoint %struct.A* %[[THIS1]] to i64, !nosanitize + // CHECK-NEXT: call void @__ubsan_handle_type_mismatch + // CHECK-NOT: call void @__ubsan_handle_type_mismatch + // CHECK: ret void + } + +#ifdef CHECK_LAMBDA + // LAMBDA-LABEL: define linkonce_odr void @_ZN1A22do_nothing_with_lambdaEv + void do_nothing_with_lambda() { + // LAMBDA: icmp ne %struct.A* %[[THIS2:[a-z0-9]+]], null, !nosanitize + // LAMBDA: ptrtoint %struct.A* %[[THIS2]] to i64, !nosanitize + // LAMBDA-NEXT: call void @__ubsan_handle_type_mismatch + + auto f = [&] { + foo = 0; + }; + f(); + + // LAMBDA: ret void + } + +// Check the IR for the lambda: +// +// LAMBDA-LABEL: define linkonce_odr void @_ZZN1A22do_nothing_with_lambdaEvENKUlvE_clEv +// LAMBDA: call void @__ubsan_handle_type_mismatch +// LAMBDA-NOT: call void @__ubsan_handle_type_mismatch +// LAMBDA: ret void +#endif + + // CHECK-LABEL: define linkonce_odr i32 @_ZN1A11load_memberEv + int load_member() { + // CHECK: icmp ne %struct.A* %[[THIS3:[a-z0-9]+]], null, !nosanitize + // CHECK: ptrtoint %struct.A* %[[THIS3]] to i64, !nosanitize + // CHECK-NEXT: call void @__ubsan_handle_type_mismatch + // CHECK-NOT: call void @__ubsan_handle_type_mismatch + return foo; + // CHECK: ret i32 + } + + // CHECK-LABEL: define linkonce_odr i32 @_ZN1A11call_methodEv + int call_method() { + // CHECK: icmp ne %struct.A* %[[THIS4:[a-z0-9]+]], null, !nosanitize + // CHECK: ptrtoint %struct.A* %[[THIS4]] to i64, !nosanitize + // CHECK-NEXT: call void @__ubsan_handle_type_mismatch + // CHECK-NOT: call void @__ubsan_handle_type_mismatch + return load_member(); + // CHECK: ret i32 + } + + // CHECK-LABEL: define linkonce_odr void @_ZN1A15assign_member_1Ev + void assign_member_1() { + // CHECK: icmp ne %struct.A* %[[THIS5:[a-z0-9]+]], null, !nosanitize + // CHECK: ptrtoint %struct.A* %[[THIS5]] to i64, !nosanitize + // CHECK-NEXT: call void @__ubsan_handle_type_mismatch + // CHECK-NOT: call void @__ubsan_handle_type_mismatch + foo = 0; + // CHECK: ret void + } + + // CHECK-LABEL: define linkonce_odr void @_ZN1A15assign_member_2Ev + void assign_member_2() { + // CHECK: icmp ne %struct.A* %[[THIS6:[a-z0-9]+]], null, !nosanitize + // CHECK: ptrtoint %struct.A* %[[THIS6]] to i64, !nosanitize + // CHECK-NEXT: call void @__ubsan_handle_type_mismatch + // CHECK-NOT: call void @__ubsan_handle_type_mismatch + (__extension__ (this))->foo = 0; + // CHECK: ret void + } + + // CHECK-LABEL: define linkonce_odr void @_ZNK1A15assign_member_3Ev + void assign_member_3() const { + // CHECK: icmp ne %struct.A* %[[THIS7:[a-z0-9]+]], null, !nosanitize + // CHECK: ptrtoint %struct.A* %[[THIS7]] to i64, !nosanitize + // CHECK-NEXT: call void @__ubsan_handle_type_mismatch + // CHECK-NOT: call void @__ubsan_handle_type_mismatch + const_cast(this)->foo = 0; + // CHECK: ret void + } + + // CHECK-LABEL: define linkonce_odr i32 @_ZN1A22call_through_referenceERS_ + static int call_through_reference(A &a) { + // CHECK-NOT: call void @__ubsan_handle_type_mismatch + return a.load_member(); + // CHECK: ret i32 + } + + // CHECK-LABEL: define linkonce_odr i32 @_ZN1A20call_through_pointerEPS_ + static int call_through_pointer(A *a) { + // CHECK: call void @__ubsan_handle_type_mismatch + return a->load_member(); + // CHECK: ret i32 + } +}; + +struct B { + operator A*() const { return nullptr; } + + // CHECK-LABEL: define linkonce_odr i32 @_ZN1B11load_memberEv + static int load_member() { + // Null-check &b before converting it to an A*. + // CHECK: call void @__ubsan_handle_type_mismatch + // + // Null-check the result of the conversion before using it. + // CHECK: call void @__ubsan_handle_type_mismatch + // + // CHECK-NOT: call void @__ubsan_handle_type_mismatch + B b; + return static_cast(b)->load_member(); + // CHECK: ret i32 + } +}; + +struct Base { + int foo; + + virtual int load_member_1() = 0; +}; + +struct Derived : public Base { + int bar; + + // CHECK-LABEL: define linkonce_odr i32 @_ZN7Derived13load_member_2Ev + int load_member_2() { + // CHECK: icmp ne %struct.Derived* %[[THIS8:[a-z0-9]+]], null, !nosanitize + // CHECK: ptrtoint %struct.Derived* %[[THIS8]] to i64, !nosanitize + // CHECK-NEXT: call void @__ubsan_handle_type_mismatch + // + // Null-check the result of the cast before using it. + // CHECK: call void @__ubsan_handle_type_mismatch + // + // CHECK-NOT: call void @__ubsan_handle_type_mismatch + return dynamic_cast(this)->load_member_1(); + // CHECK: ret i32 + } + + // CHECK-LABEL: define linkonce_odr i32 @_ZN7Derived13load_member_3Ev + int load_member_3() { + // CHECK: icmp ne %struct.Derived* %[[THIS9:[a-z0-9]+]], null, !nosanitize + // CHECK: ptrtoint %struct.Derived* %[[THIS9]] to i64, !nosanitize + // CHECK-NEXT: call void @__ubsan_handle_type_mismatch + // CHECK-NOT: call void @__ubsan_handle_type_mismatch + return reinterpret_cast(static_cast(this))->foo; + // CHECK: ret i32 + } + + // CHECK-LABEL: define linkonce_odr i32 @_ZN7Derived13load_member_1Ev + int load_member_1() override { + // CHECK: icmp ne %struct.Derived* %[[THIS10:[a-z0-9]+]], null, !nosanitize + // CHECK: ptrtoint %struct.Derived* %[[THIS10]] to i64, !nosanitize + // CHECK-NEXT: call void @__ubsan_handle_type_mismatch + // CHECK-NOT: call void @__ubsan_handle_type_mismatch + return foo + bar; + // CHECK: ret i32 + } +}; + +void force_irgen() { + A *a; + a->do_nothing(); +#ifdef CHECK_LAMBDA + a->do_nothing_with_lambda(); +#endif + a->load_member(); + a->call_method(); + a->assign_member_1(); + a->assign_member_2(); + a->assign_member_3(); + A::call_through_reference(*a); + A::call_through_pointer(a); + + B::load_member(); + + Base *b = new Derived; + b->load_member_1(); + + Derived *d; + d->load_member_2(); + d->load_member_3(); +}