llvm::Value *CGOpenMPRuntimeNVPTX::emitParallelOutlinedFunction(
const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
- SourceLocation Loc = D.getLocStart();
-
// Emit target region as a standalone region.
class NVPTXPrePostActionTy : public PrePostActionTy {
- SourceLocation &Loc;
bool &IsInParallelRegion;
bool PrevIsInParallelRegion;
public:
- NVPTXPrePostActionTy(SourceLocation &Loc, bool &IsInParallelRegion)
- : Loc(Loc), IsInParallelRegion(IsInParallelRegion) {}
+ NVPTXPrePostActionTy(bool &IsInParallelRegion)
+ : IsInParallelRegion(IsInParallelRegion) {}
void Enter(CodeGenFunction &CGF) override {
- static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime())
- .emitGenericVarsProlog(CGF, Loc);
PrevIsInParallelRegion = IsInParallelRegion;
IsInParallelRegion = true;
}
void Exit(CodeGenFunction &CGF) override {
IsInParallelRegion = PrevIsInParallelRegion;
- static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime())
- .emitGenericVarsEpilog(CGF);
}
- } Action(Loc, IsInParallelRegion);
+ } Action(IsInParallelRegion);
CodeGen.setAction(Action);
bool PrevIsInTargetMasterThreadRegion = IsInTargetMasterThreadRegion;
IsInTargetMasterThreadRegion = false;
// parallel region
// CHECK: define {{.*}}void @{{.*}}(i32* noalias {{.*}}, i32* noalias {{.*}}, i32* dereferenceable{{.*}})
-// CHECK: [[RES:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i64 4, i16 0)
-// CHECK: [[GLOBALS:%.+]] = bitcast i8* [[RES]] to [[GLOBAL_ST:%struct[.].*]]*
-// CHECK: [[B_ADDR:%.+]] = getelementptr inbounds [[GLOBAL_ST]], [[GLOBAL_ST]]* [[GLOBALS]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// CHECK-NOT: call i8* @__kmpc_data_sharing_push_stack(
+// CHECK: [[B_ADDR:%.+]] = alloca i32,
// CHECK: call {{.*}}[[FOO:@.*foo.*]](i32* dereferenceable{{.*}} [[B_ADDR]])
// CHECK: call {{.*}}[[BAR:@.*bar.*]]()
-// CHECK: call void @__kmpc_data_sharing_pop_stack(i8* [[RES]])
+// CHECK-NOT: call void @__kmpc_data_sharing_pop_stack(
// CHECK: ret void
// CHECK: define {{.*}}[[FOO]](i32* dereferenceable{{.*}})
/// outlined function for the second parallel region ///
// CK1: define internal void @{{.+}}(i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* dereferenceable{{.+}}, i32* dereferenceable{{.+}})
-// CK1: [[RES:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i64 4, i16 0)
-// CK1: [[GLOBALS:%.+]] = bitcast i8* [[RES]] to [[GLOBAL_TY:%.+]]*
-// CK1: [[C_ADDR:%.+]] = getelementptr inbounds [[GLOBAL_TY]], [[GLOBAL_TY]]* [[GLOBALS]], i32 0, i32 0
+// CK1-NOT: call i8* @__kmpc_data_sharing_push_stack(
+// CK1: [[C_ADDR:%.+]] = alloca i32,
// CK1: store i32* [[C_ADDR]], i32** %
-// CK1: call void @__kmpc_data_sharing_pop_stack(i8* [[RES]])
+// CK1i-NOT: call void @__kmpc_data_sharing_pop_stack(
/// ========= In the data sharing wrapper function ========= ///
// CHECK: call void @__kmpc_data_sharing_pop_stack(i8* [[PTR]])
// CHECK: define internal void [[PARALLEL]](
-// CHECK: [[PTR:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i{{64|32}} 4, i16 0)
+// CHECK-NOT: call i8* @__kmpc_data_sharing_push_stack(
-// CHECK: call void @__kmpc_data_sharing_pop_stack(i8* [[PTR]])
+// CHECK-NOT: call void @__kmpc_data_sharing_pop_stack(
#endif
// CHECK: call void @__kmpc_spmd_kernel_deinit()
-// CHECK: define internal void [[PARALLEL]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i{{64|32}} %{{.+}}, i{{64|32}} %{{.+}}, i{{64|32}} %{{.+}}, i32* dereferenceable{{.*}})
-// CHECK: [[RES:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i{{64|32}} 8, i16 0)
-// CHECK: [[GLOBALS:%.+]] = bitcast i8* [[RES]] to [[GLOBAL_TY:%.+]]*
-// CHECK: [[I:%.+]] = getelementptr inbounds [[GLOBAL_TY]], [[GLOBAL_TY]]* [[GLOBALS]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-// CHECK: [[ARGC_VAL:%.+]] = load i32, i32* %
-// CHECK: [[ARGC:%.+]] = getelementptr inbounds [[GLOBAL_TY]], [[GLOBAL_TY]]* [[GLOBALS]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
-// CHECK: store i32 [[ARGC_VAL]], i32* [[ARGC]],
+// CHECK: define internal void [[PARALLEL]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i{{64|32}} %{{.+}}, i{{64|32}} %{{.+}}, i{{64|32}} [[ARGC:%.+]], i32* dereferenceable{{.*}})
+// CHECK-NOT: call i8* @__kmpc_data_sharing_push_stack(
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: [[ARGC_ADDR:%.+]] = alloca i{{32|64}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: [[I:%.+]] = alloca i32,
+// CHECK-32: store i32 [[ARGC]], i32* [[ARGC_ADDR]],
+// CHECK-64: store i{{64|32}} [[ARGC]], i{{64|32}}* [[ARGC_ADDR]],
+// CHECK-64: [[ARGC:%.+]] = bitcast i64* [[ARGC_ADDR]] to i32*
// CHECK: call void @__kmpc_for_static_init_4(
// CHECK: call i32 [[FOO:@.+foo.+]](i32* [[I]])
// CHECK: call i32 [[FOO]](i32* %{{.+}})
-// CHECK: call i32 [[FOO]](i32* [[ARGC]])
+// CHECK-32: call i32 [[FOO]](i32* [[ARGC_ADDR]])
+// CHECK-64: call i32 [[FOO]](i32* [[ARGC]])
// CHECK: call void @__kmpc_for_static_fini(
-// CHECK: call void @__kmpc_data_sharing_pop_stack(i8* [[RES]])
+// CHECK-NOT: call void @__kmpc_data_sharing_pop_stack(
#endif