// Ignore blockaddress uses.
if (isa<BlockAddress>(UR)) continue;
- // Used by a non-instruction, or not the callee of a function, do not
- // transform.
- if (!isa<CallInst>(UR) && !isa<InvokeInst>(UR))
- return false;
-
- CallSite CS(cast<Instruction>(UR));
- if (!CS.isCallee(&U))
+ // If no abstract call site was created we did not understand the use, bail.
+ AbstractCallSite ACS(&U);
+ if (!ACS)
return false;
// Check out all of the potentially constant arguments. Note that we don't
// inspect varargs here.
- CallSite::arg_iterator AI = CS.arg_begin();
Function::arg_iterator Arg = F.arg_begin();
- for (unsigned i = 0, e = ArgumentConstants.size(); i != e;
- ++i, ++AI, ++Arg) {
+ for (unsigned i = 0, e = ArgumentConstants.size(); i != e; ++i, ++Arg) {
// If this argument is known non-constant, ignore it.
if (ArgumentConstants[i].second)
continue;
- Constant *C = dyn_cast<Constant>(*AI);
+ Value *V = ACS.getCallArgOperand(i);
+ Constant *C = dyn_cast_or_null<Constant>(V);
+
+ // We can only propagate thread independent values through callbacks.
+ // This is different to direct/indirect call sites because for them we
+ // know the thread executing the caller and callee is the same. For
+ // callbacks this is not guaranteed, thus a thread dependent value could
+ // be different for the caller and callee, making it invalid to propagate.
+ if (C && ACS.isCallbackCall() && C->isThreadDependent()) {
+ // Argument became non-constant. If all arguments are non-constant now,
+ // give up on this function.
+ if (++NumNonconstant == ArgumentConstants.size())
+ return false;
+
+ ArgumentConstants[i].second = true;
+ continue;
+ }
+
if (C && ArgumentConstants[i].first == nullptr) {
ArgumentConstants[i].first = C; // First constant seen.
} else if (C && ArgumentConstants[i].first == C) {
// Still the constant value we think it is.
- } else if (*AI == &*Arg) {
+ } else if (V == &*Arg) {
// Ignore recursive calls passing argument down.
} else {
// Argument became non-constant. If all arguments are non-constant now,
--- /dev/null
+; RUN: opt -ipconstprop -S < %s | FileCheck %s
+;
+;
+; /---------------------------------------|
+; | /----------------------|----|
+; | | /-----| |
+; V V V | |
+; void broker(int (*cb0)(int), int (*cb1)(int), int (*cb2)(int), int, int);
+;
+; static int cb0(int zero) {
+; return zero;
+; }
+; static int cb1(int unknown) {
+; return unknown;
+; }
+; static int cb2(int unknown) {
+; cb0(0);
+; return unknown;
+; }
+; static int cb3(int unknown) {
+; return unknown;
+; }
+; static int cb4(int unknown) {
+; return unknown;
+; }
+;
+; void foo() {
+; cb0(0);
+; cb3(1);
+; broker(cb0, cb1, cb0, 0, 1);
+; broker(cb1, cb2, cb2, 0, 1);
+; broker(cb3, cb2, cb3, 0, 1);
+; broker(cb4, cb4, cb4, 0, 1);
+; }
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define internal i32 @cb0(i32 %zero) {
+entry:
+; CHECK: @cb0
+; CHECK-NEXT: entry
+; CHECK-NEXT: ret i32 0
+ ret i32 %zero
+}
+
+define internal i32 @cb1(i32 %unknown) {
+entry:
+; CHECK: ret i32 %unknown
+ ret i32 %unknown
+}
+
+define internal i32 @cb2(i32 %unknown) {
+entry:
+ %call = call i32 @cb0(i32 0)
+; CHECK: ret i32 %unknown
+ ret i32 %unknown
+}
+
+define internal i32 @cb3(i32 %unknown) {
+entry:
+; CHECK: ret i32 %unknown
+ ret i32 %unknown
+}
+
+define internal i32 @cb4(i32 %unknown) {
+entry:
+; CHECK: ret i32 %unknown
+ ret i32 %unknown
+}
+
+define void @foo() {
+entry:
+ %call = call i32 @cb0(i32 0)
+ %call1 = call i32 @cb3(i32 1)
+ call void @broker(i32 (i32)* nonnull @cb0, i32 (i32)* nonnull @cb1, i32 (i32)* nonnull @cb0, i32 0, i32 1)
+ call void @broker(i32 (i32)* nonnull @cb1, i32 (i32)* nonnull @cb2, i32 (i32)* nonnull @cb2, i32 0, i32 1)
+ call void @broker(i32 (i32)* nonnull @cb3, i32 (i32)* nonnull @cb2, i32 (i32)* nonnull @cb3, i32 0, i32 1)
+ call void @broker(i32 (i32)* nonnull @cb4, i32 (i32)* nonnull @cb4, i32 (i32)* nonnull @cb4, i32 0, i32 1)
+ ret void
+}
+
+declare !callback !3 void @broker(i32 (i32)*, i32 (i32)*, i32 (i32)*, i32, i32)
+
+!0 = !{i64 0, i64 3, i1 false}
+!1 = !{i64 1, i64 4, i1 false}
+!2 = !{i64 2, i64 3, i1 false}
+!3 = !{!0, !2, !1}
--- /dev/null
+; RUN: opt -S -ipconstprop < %s | FileCheck %s
+;
+; void bar(int, float, double);
+;
+; void foo(int N) {
+; float p = 3;
+; double q = 5;
+; N = 7;
+;
+; #pragma omp parallel for firstprivate(q)
+; for (int i = 2; i < N; i++) {
+; bar(i, p, q);
+; }
+; }
+;
+; Verify the constant value of q is propagated into the outlined function.
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+%struct.ident_t = type { i32, i32, i32, i32, i8* }
+
+@.str = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
+@0 = private unnamed_addr global %struct.ident_t { i32 0, i32 514, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str, i32 0, i32 0) }, align 8
+@1 = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str, i32 0, i32 0) }, align 8
+
+define dso_local void @foo(i32 %N) {
+entry:
+ %N.addr = alloca i32, align 4
+ %p = alloca float, align 4
+ store i32 %N, i32* %N.addr, align 4
+ store float 3.000000e+00, float* %p, align 4
+ store i32 7, i32* %N.addr, align 4
+ call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, float*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* nonnull %N.addr, float* nonnull %p, i64 4617315517961601024)
+ ret void
+}
+
+define internal void @.omp_outlined.(i32* noalias %.global_tid., i32* noalias %.bound_tid., i32* dereferenceable(4) %N, float* dereferenceable(4) %p, i64 %q) {
+entry:
+ %q.addr = alloca i64, align 8
+ %.omp.lb = alloca i32, align 4
+ %.omp.ub = alloca i32, align 4
+ %.omp.stride = alloca i32, align 4
+ %.omp.is_last = alloca i32, align 4
+; CHECK: store i64 4617315517961601024, i64* %q.addr, align 8
+ store i64 %q, i64* %q.addr, align 8
+ %conv = bitcast i64* %q.addr to double*
+ %tmp = load i32, i32* %N, align 4
+ %sub3 = add nsw i32 %tmp, -3
+ %cmp = icmp sgt i32 %tmp, 2
+ br i1 %cmp, label %omp.precond.then, label %omp.precond.end
+
+omp.precond.then: ; preds = %entry
+ store i32 0, i32* %.omp.lb, align 4
+ store i32 %sub3, i32* %.omp.ub, align 4
+ store i32 1, i32* %.omp.stride, align 4
+ store i32 0, i32* %.omp.is_last, align 4
+ %tmp5 = load i32, i32* %.global_tid., align 4
+ call void @__kmpc_for_static_init_4(%struct.ident_t* nonnull @0, i32 %tmp5, i32 34, i32* nonnull %.omp.is_last, i32* nonnull %.omp.lb, i32* nonnull %.omp.ub, i32* nonnull %.omp.stride, i32 1, i32 1)
+ %tmp6 = load i32, i32* %.omp.ub, align 4
+ %cmp6 = icmp sgt i32 %tmp6, %sub3
+ br i1 %cmp6, label %cond.true, label %cond.false
+
+cond.true: ; preds = %omp.precond.then
+ br label %cond.end
+
+cond.false: ; preds = %omp.precond.then
+ %tmp7 = load i32, i32* %.omp.ub, align 4
+ br label %cond.end
+
+cond.end: ; preds = %cond.false, %cond.true
+ %cond = phi i32 [ %sub3, %cond.true ], [ %tmp7, %cond.false ]
+ store i32 %cond, i32* %.omp.ub, align 4
+ %tmp8 = load i32, i32* %.omp.lb, align 4
+ br label %omp.inner.for.cond
+
+omp.inner.for.cond: ; preds = %omp.inner.for.inc, %cond.end
+ %.omp.iv.0 = phi i32 [ %tmp8, %cond.end ], [ %add11, %omp.inner.for.inc ]
+ %tmp9 = load i32, i32* %.omp.ub, align 4
+ %cmp8 = icmp sgt i32 %.omp.iv.0, %tmp9
+ br i1 %cmp8, label %omp.inner.for.cond.cleanup, label %omp.inner.for.body
+
+omp.inner.for.cond.cleanup: ; preds = %omp.inner.for.cond
+ br label %omp.inner.for.end
+
+omp.inner.for.body: ; preds = %omp.inner.for.cond
+ %add10 = add nsw i32 %.omp.iv.0, 2
+ %tmp10 = load float, float* %p, align 4
+ %tmp11 = load double, double* %conv, align 8
+ call void @bar(i32 %add10, float %tmp10, double %tmp11)
+ br label %omp.body.continue
+
+omp.body.continue: ; preds = %omp.inner.for.body
+ br label %omp.inner.for.inc
+
+omp.inner.for.inc: ; preds = %omp.body.continue
+ %add11 = add nsw i32 %.omp.iv.0, 1
+ br label %omp.inner.for.cond
+
+omp.inner.for.end: ; preds = %omp.inner.for.cond.cleanup
+ br label %omp.loop.exit
+
+omp.loop.exit: ; preds = %omp.inner.for.end
+ %tmp12 = load i32, i32* %.global_tid., align 4
+ call void @__kmpc_for_static_fini(%struct.ident_t* nonnull @0, i32 %tmp12)
+ br label %omp.precond.end
+
+omp.precond.end: ; preds = %omp.loop.exit, %entry
+ ret void
+}
+
+declare dso_local void @__kmpc_for_static_init_4(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32, i32)
+
+declare dso_local void @bar(i32, float, double)
+
+declare dso_local void @__kmpc_for_static_fini(%struct.ident_t*, i32)
+
+declare !callback !0 dso_local void @__kmpc_fork_call(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...)
+
+!1 = !{i64 2, i64 -1, i64 -1, i1 true}
+!0 = !{!1}
--- /dev/null
+; RUN: opt -ipconstprop -S < %s | FileCheck %s
+;
+; #include <pthread.h>
+;
+; void *GlobalVPtr;
+;
+; static void *foo(void *arg) { return arg; }
+; static void *bar(void *arg) { return arg; }
+;
+; int main() {
+; pthread_t thread;
+; pthread_create(&thread, NULL, foo, NULL);
+; pthread_create(&thread, NULL, bar, &GlobalVPtr);
+; return 0;
+; }
+;
+; Verify the constant values NULL and &GlobalVPtr are propagated into foo and
+; bar, respectively.
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+%union.pthread_attr_t = type { i64, [48 x i8] }
+
+@GlobalVPtr = common dso_local global i8* null, align 8
+
+define dso_local i32 @main() {
+entry:
+ %thread = alloca i64, align 8
+ %call = call i32 @pthread_create(i64* nonnull %thread, %union.pthread_attr_t* null, i8* (i8*)* nonnull @foo, i8* null)
+ %call1 = call i32 @pthread_create(i64* nonnull %thread, %union.pthread_attr_t* null, i8* (i8*)* nonnull @bar, i8* bitcast (i8** @GlobalVPtr to i8*))
+ ret i32 0
+}
+
+declare !callback !0 dso_local i32 @pthread_create(i64*, %union.pthread_attr_t*, i8* (i8*)*, i8*)
+
+define internal i8* @foo(i8* %arg) {
+entry:
+; CHECK: ret i8* null
+ ret i8* %arg
+}
+
+define internal i8* @bar(i8* %arg) {
+entry:
+; CHECK: ret i8* bitcast (i8** @GlobalVPtr to i8*)
+ ret i8* %arg
+}
+
+!1 = !{i64 2, i64 3, i1 false}
+!0 = !{!1}
--- /dev/null
+; RUN: opt -ipconstprop -S < %s | FileCheck %s
+;
+; #include <threads.h>
+; thread_local int gtl = 0;
+; int gsh = 0;
+;
+; static int callee(int *thread_local_ptr, int *shared_ptr) {
+; return *thread_local_ptr + *shared_ptr;
+; }
+;
+; void broker(int *, int (*callee)(int *, int *), int *);
+;
+; void caller() {
+; broker(>l, callee, &gsh);
+; }
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+@gtl = dso_local thread_local global i32 0, align 4
+@gsh = dso_local global i32 0, align 4
+
+define internal i32 @callee(i32* %thread_local_ptr, i32* %shared_ptr) {
+entry:
+; CHECK: %tmp = load i32, i32* %thread_local_ptr, align 4
+; CHECK: %tmp1 = load i32, i32* @gsh, align 4
+; CHECK: %add = add nsw i32 %tmp, %tmp1
+ %tmp = load i32, i32* %thread_local_ptr, align 4
+ %tmp1 = load i32, i32* %shared_ptr, align 4
+ %add = add nsw i32 %tmp, %tmp1
+ ret i32 %add
+}
+
+define dso_local void @caller() {
+entry:
+ call void @broker(i32* nonnull @gtl, i32 (i32*, i32*)* nonnull @callee, i32* nonnull @gsh)
+ ret void
+}
+
+declare !callback !0 dso_local void @broker(i32*, i32 (i32*, i32*)*, i32*)
+
+!1 = !{i64 1, i64 0, i64 2, i1 false}
+!0 = !{!1}