From: Nemanja Ivanovic Date: Thu, 27 Oct 2016 06:23:57 +0000 (+0000) Subject: [PPC] add vector byte reverse functions to altivec.h X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=f1626ac6b7bd8e6164adc97988935a1364d2497f;p=clang [PPC] add vector byte reverse functions to altivec.h This patch corresponds to review https://reviews.llvm.org/D25915. Committing on behalf of Sean Fertile. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@285268 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Headers/altivec.h b/lib/Headers/altivec.h index 6deef6aabb..94e0e083e9 100644 --- a/lib/Headers/altivec.h +++ b/lib/Headers/altivec.h @@ -15125,6 +15125,121 @@ static inline __ATTRS_o_ai vector double vec_reve(vector double __a) { } #endif +/* vec_revb */ +static __inline__ vector bool char __ATTRS_o_ai +vec_revb(vector bool char __a) { + return __a; +} + +static __inline__ vector signed char __ATTRS_o_ai +vec_revb(vector signed char __a) { + return __a; +} + +static __inline__ vector unsigned char __ATTRS_o_ai +vec_revb(vector unsigned char __a) { + return __a; +} + +static __inline__ vector bool short __ATTRS_o_ai +vec_revb(vector bool short __a) { + vector unsigned char __indices = + { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 }; + return vec_perm(__a, __a, __indices); +} + +static __inline__ vector signed short __ATTRS_o_ai +vec_revb(vector signed short __a) { + vector unsigned char __indices = + { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 }; + return vec_perm(__a, __a, __indices); +} + +static __inline__ vector unsigned short __ATTRS_o_ai +vec_revb(vector unsigned short __a) { + vector unsigned char __indices = + { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 }; + return vec_perm(__a, __a, __indices); +} + +static __inline__ vector bool int __ATTRS_o_ai +vec_revb(vector bool int __a) { + vector unsigned char __indices = + { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 }; + return vec_perm(__a, __a, __indices); +} + +static __inline__ vector signed int __ATTRS_o_ai +vec_revb(vector signed int __a) { + vector unsigned char __indices = + { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 }; + return vec_perm(__a, __a, __indices); +} + +static __inline__ vector unsigned int __ATTRS_o_ai +vec_revb(vector unsigned int __a) { + vector unsigned char __indices = + { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 }; + return vec_perm(__a, __a, __indices); +} + +static __inline__ vector float __ATTRS_o_ai +vec_revb(vector float __a) { + vector unsigned char __indices = + { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 }; + return vec_perm(__a, __a, __indices); +} + +#ifdef __VSX__ +static __inline__ vector bool long long __ATTRS_o_ai +vec_revb(vector bool long long __a) { + vector unsigned char __indices = + { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 }; + return vec_perm(__a, __a, __indices); +} + +static __inline__ vector signed long long __ATTRS_o_ai +vec_revb(vector signed long long __a) { + vector unsigned char __indices = + { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 }; + return vec_perm(__a, __a, __indices); +} + +static __inline__ vector unsigned long long __ATTRS_o_ai +vec_revb(vector unsigned long long __a) { + vector unsigned char __indices = + { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 }; + return vec_perm(__a, __a, __indices); +} + +static __inline__ vector double __ATTRS_o_ai +vec_revb(vector double __a) { + vector unsigned char __indices = + { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 }; + return vec_perm(__a, __a, __indices); +} +#endif /* End __VSX__ */ + +#if defined(__POWER8_VECTOR__) && defined(__powerpc64__) +static __inline__ vector signed __int128 __ATTRS_o_ai +vec_revb(vector signed __int128 __a) { + vector unsigned char __indices = + { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }; + return (vector signed __int128)vec_perm((vector signed int)__a, + (vector signed int)__a, + __indices); +} + +static __inline__ vector unsigned __int128 __ATTRS_o_ai +vec_revb(vector unsigned __int128 __a) { + vector unsigned char __indices = + { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }; + return (vector unsigned __int128)vec_perm((vector signed int)__a, + (vector signed int)__a, + __indices); +} +#endif /* END __POWER8_VECTOR__ && __powerpc64__ */ + #undef __ATTRS_o_ai #endif /* __ALTIVEC_H */ diff --git a/test/CodeGen/builtins-ppc-altivec.c b/test/CodeGen/builtins-ppc-altivec.c index 895938cb44..8f0de015d7 100644 --- a/test/CodeGen/builtins-ppc-altivec.c +++ b/test/CodeGen/builtins-ppc-altivec.c @@ -8997,9 +8997,10 @@ void test7() { // CHECK-LE: @llvm.ppc.altivec.vcmpgefp.p(i32 2 } +/* ------------------------------ optional ---------------------------------- */ void test8() { - // CHECK-LABEL: define void @test8 - // CHECK-LE-LABEL: define void @test8 +// CHECK-LABEL: define void @test8 +// CHECK-LE-LABEL: define void @test8 res_vbc = vec_reve(vbc); // CHECK: shufflevector <16 x i8> %{{[0-9]+}}, <16 x i8> %{{[0-9]+}}, <16 x i32> // CHECK-LE: shufflevector <16 x i8> %{{[0-9]+}}, <16 x i8> %{{[0-9]+}}, <16 x i32> @@ -9040,4 +9041,89 @@ void test8() { // CHECK: shufflevector <4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}, <4 x i32> // CHECK-LE: shufflevector <4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}, <4 x i32> + res_vbc = vec_revb(vbc); +// CHECK: [[T1:%.+]] = load <16 x i8>, <16 x i8>* @vbc, align 16 +// CHECK: store <16 x i8> [[T1]], <16 x i8>* [[T2:%.+]], align 16 +// CHECK: [[T3:%.+]] = load <16 x i8>, <16 x i8>* [[T2]], align 16 +// CHECK: store <16 x i8> [[T3]], <16 x i8>* @res_vbc, align 16 +// CHECK-LE: [[T1:%.+]] = load <16 x i8>, <16 x i8>* @vbc, align 16 +// CHECK-LE: store <16 x i8> [[T1]], <16 x i8>* [[T2:%.+]], align 16 +// CHECK-LE: [[T3:%.+]] = load <16 x i8>, <16 x i8>* [[T2]], align 16 +// CHECK-LE: store <16 x i8> [[T3]], <16 x i8>* @res_vbc, align 16 + + res_vsc = vec_revb(vsc); +// CHECK: [[T1:%.+]] = load <16 x i8>, <16 x i8>* @vsc, align 16 +// CHECK: store <16 x i8> [[T1]], <16 x i8>* [[T2:%.+]], align 16 +// CHECK: [[T3:%.+]] = load <16 x i8>, <16 x i8>* [[T2]], align 16 +// CHECK: store <16 x i8> [[T3]], <16 x i8>* @res_vsc, align 16 +// CHECK-LE: [[T1:%.+]] = load <16 x i8>, <16 x i8>* @vsc, align 16 +// CHECK-LE: store <16 x i8> [[T1]], <16 x i8>* [[T2:%.+]], align 16 +// CHECK-LE: [[T3:%.+]] = load <16 x i8>, <16 x i8>* [[T2]], align 16 +// CHECK-LE: store <16 x i8> [[T3]], <16 x i8>* @res_vsc, align 16 + + res_vuc = vec_revb(vuc); +// CHECK: [[T1:%.+]] = load <16 x i8>, <16 x i8>* @vuc, align 16 +// CHECK: store <16 x i8> [[T1]], <16 x i8>* [[T2:%.+]], align 16 +// CHECK: [[T3:%.+]] = load <16 x i8>, <16 x i8>* [[T2]], align 16 +// CHECK: store <16 x i8> [[T3]], <16 x i8>* @res_vuc, align 16 +// CHECK-LE: [[T1:%.+]] = load <16 x i8>, <16 x i8>* @vuc, align 16 +// CHECK-LE: store <16 x i8> [[T1]], <16 x i8>* [[T2:%.+]], align 16 +// CHECK-LE: [[T3:%.+]] = load <16 x i8>, <16 x i8>* [[T2]], align 16 +// CHECK-LE: store <16 x i8> [[T3]], <16 x i8>* @res_vuc, align 16 + + res_vbs = vec_revb(vbs); +// CHECK: store <16 x i8> , <16 x i8>* {{%.+}}, align 16 +// CHECK: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) +// CHECK-LE: store <16 x i8> , <16 x i8>* {{%.+}}, align 16 +// CHECK-LE: store <16 x i8> , <16 x i8>* {{%.+}}, align 16 +// CHECK-LE: xor <16 x i8> +// CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) + + res_vs = vec_revb(vs); +// CHECK: store <16 x i8> , <16 x i8>* {{%.+}}, align 16 +// CHECK: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) +// CHECK-LE: store <16 x i8> , <16 x i8>* {{%.+}}, align 16 +// CHECK-LE: store <16 x i8> , <16 x i8>* {{%.+}}, align 16 +// CHECK-LE: xor <16 x i8> +// CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) + + res_vus = vec_revb(vus); +// CHECK: store <16 x i8> , <16 x i8>* {{%.+}}, align 16 +// CHECK: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) +// CHECK-LE: store <16 x i8> , <16 x i8>* {{%.+}}, align 16 +// CHECK-LE: store <16 x i8> , <16 x i8>* {{%.+}}, align 16 +// CHECK-LE: xor <16 x i8> +// CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) + + res_vbi = vec_revb(vbi); +// CHECK: store <16 x i8> , <16 x i8>* {{%.+}}, align 16 +// CHECK: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) +// CHECK-LE: store <16 x i8> , <16 x i8>* {{%.+}}, align 16 +// CHECK-LE: store <16 x i8> , <16 x i8>* {{%.+}}, align 16 +// CHECK-LE: xor <16 x i8> +// CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) + + res_vi = vec_revb(vi); +// CHECK: store <16 x i8> , <16 x i8>* {{%.+}}, align 16 +// CHECK: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) +// CHECK-LE: store <16 x i8> , <16 x i8>* {{%.+}}, align 16 +// CHECK-LE: store <16 x i8> , <16 x i8>* {{%.+}}, align 16 +// CHECK-LE: xor <16 x i8> +// CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) + + res_vui = vec_revb(vui); +// CHECK: store <16 x i8> , <16 x i8>* {{%.+}}, align 16 +// CHECK: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) +// CHECK-LE: store <16 x i8> , <16 x i8>* {{%.+}}, align 16 +// CHECK-LE: store <16 x i8> , <16 x i8>* {{%.+}}, align 16 +// CHECK-LE: xor <16 x i8> +// CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) + + res_vf = vec_revb(vf); +// CHECK: store <16 x i8> , <16 x i8>* {{%.+}}, align 16 +// CHECK: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) +// CHECK-LE: store <16 x i8> , <16 x i8>* {{%.+}}, align 16 +// CHECK-LE: store <16 x i8> , <16 x i8>* {{%.+}}, align 16 +// CHECK-LE: xor <16 x i8> +// CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) } diff --git a/test/CodeGen/builtins-ppc-quadword.c b/test/CodeGen/builtins-ppc-quadword.c index f381642c42..2a6bbaeb9f 100644 --- a/test/CodeGen/builtins-ppc-quadword.c +++ b/test/CodeGen/builtins-ppc-quadword.c @@ -157,4 +157,12 @@ void test1() { // CHECK-LE: @llvm.ppc.altivec.vsubecuq // CHECK-PPC: error: assigning to '__vector unsigned __int128' (vector of 1 'unsigned __int128' value) from incompatible type 'int' + res_vulll = vec_revb(vulll); +// CHECK: store <16 x i8> , <16 x i8>* {{%.+}}, align 16 +// CHECK: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) +// CHECK-LE: store <16 x i8> , <16 x i8>* {{%.+}}, align 16 +// CHECK-LE: store <16 x i8> , <16 x i8>* {{%.+}}, align 16 +// CHECK-LE: xor <16 x i8> +// CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) +// CHECK_PPC: error: call to 'vec_revb' is ambiguous } diff --git a/test/CodeGen/builtins-ppc-vsx.c b/test/CodeGen/builtins-ppc-vsx.c index 481d7dc44e..bacc70a62f 100644 --- a/test/CodeGen/builtins-ppc-vsx.c +++ b/test/CodeGen/builtins-ppc-vsx.c @@ -1096,4 +1096,36 @@ void test1() { res_vd = vec_reve(vd); // CHECK: shufflevector <2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}, <2 x i32> // CHECK-LE: shufflevector <2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}, <2 x i32> + + res_vbll = vec_revb(vbll); +// CHECK: store <16 x i8> , <16 x i8>* {{%.+}}, align 16 +// CHECK: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) +// CHECK-LE: store <16 x i8> , <16 x i8>* {{%.+}}, align 16 +// CHECK-LE: store <16 x i8> , <16 x i8>* {{%.+}}, align 16 +// CHECK-LE: xor <16 x i8> +// CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) + + res_vsll = vec_revb(vsll); +// CHECK: store <16 x i8> , <16 x i8>* {{%.+}}, align 16 +// CHECK: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) +// CHECK-LE: store <16 x i8> , <16 x i8>* {{%.+}}, align 16 +// CHECK-LE: store <16 x i8> , <16 x i8>* {{%.+}}, align 16 +// CHECK-LE: xor <16 x i8> +// CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) + + res_vull = vec_revb(vull); +// CHECK: store <16 x i8> , <16 x i8>* {{%.+}}, align 16 +// CHECK: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) +// CHECK-LE: store <16 x i8> , <16 x i8>* {{%.+}}, align 16 +// CHECK-LE: store <16 x i8> , <16 x i8>* {{%.+}}, align 16 +// CHECK-LE: xor <16 x i8> +// CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) + + res_vd = vec_revb(vd); +// CHECK: store <16 x i8> , <16 x i8>* {{%.+}}, align 16 +// CHECK: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) +// CHECK-LE: store <16 x i8> , <16 x i8>* {{%.+}}, align 16 +// CHECK-LE: store <16 x i8> , <16 x i8>* {{%.+}}, align 16 +// CHECK-LE: xor <16 x i8> +// CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) }