From c6caf602de7d07d4ba7669f2e4264d1bd9e867fd Mon Sep 17 00:00:00 2001 From: Nemanja Ivanovic Date: Fri, 11 Nov 2016 22:34:44 +0000 Subject: [PATCH] [PowerPC] Implement remaining permute builtins in altivec.h - Clang portion This patch corresponds to review: https://reviews.llvm.org/D26479 It adds the remaining vector permute/rotate builtins to altivec.h. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@286650 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/clang/Basic/BuiltinsPPC.def | 8 ++ lib/Headers/altivec.h | 129 +++++++++++++++++++++++++++ test/CodeGen/builtins-ppc-crypto.c | 24 +++++ test/CodeGen/builtins-ppc-p8vector.c | 20 +++++ test/CodeGen/builtins-ppc-p9vector.c | 99 +++++++++++++++++++- 5 files changed, 279 insertions(+), 1 deletion(-) diff --git a/include/clang/Basic/BuiltinsPPC.def b/include/clang/Basic/BuiltinsPPC.def index 07d01388c8..eb502f5dda 100644 --- a/include/clang/Basic/BuiltinsPPC.def +++ b/include/clang/Basic/BuiltinsPPC.def @@ -293,6 +293,12 @@ BUILTIN(__builtin_altivec_vabsduw, "V4UiV4UiV4Ui", "") BUILTIN(__builtin_altivec_vslv, "V16UcV16UcV16Uc", "") BUILTIN(__builtin_altivec_vsrv, "V16UcV16UcV16Uc", "") +// P9 Vector rotate built-ins +BUILTIN(__builtin_altivec_vrlwmi, "V4UiV4UiV4UiV4Ui", "") +BUILTIN(__builtin_altivec_vrldmi, "V2ULLiV2ULLiV2ULLiV2ULLi", "") +BUILTIN(__builtin_altivec_vrlwnm, "V4UiV4UiV4Ui", "") +BUILTIN(__builtin_altivec_vrldnm, "V2ULLiV2ULLiV2ULLi", "") + // VSX built-ins. BUILTIN(__builtin_vsx_lxvd2x, "V2divC*", "") @@ -390,6 +396,8 @@ BUILTIN(__builtin_vsx_xvcvsxdsp, "V4fV2SLLi", "") BUILTIN(__builtin_vsx_xvcvuxdsp, "V4fV2ULLi", "") BUILTIN(__builtin_vsx_xvcvdpsp, "V4fV2d", "") +BUILTIN(__builtin_vsx_xvcvsphp, "V4fV4f", "") + // HTM builtins BUILTIN(__builtin_tbegin, "UiUIi", "") BUILTIN(__builtin_tend, "UiUIi", "") diff --git a/lib/Headers/altivec.h b/lib/Headers/altivec.h index 68f79aa79b..55617fb7b6 100644 --- a/lib/Headers/altivec.h +++ b/lib/Headers/altivec.h @@ -4815,6 +4815,34 @@ vec_mergee(vector unsigned int __a, vector unsigned int __b) { 0x18, 0x19, 0x1A, 0x1B)); } +static __inline__ vector bool long long __ATTRS_o_ai +vec_mergee(vector bool long long __a, vector bool long long __b) { + return vec_mergeh(__a, __b); +} + +static __inline__ vector signed long long __ATTRS_o_ai +vec_mergee(vector signed long long __a, vector signed long long __b) { + return vec_mergeh(__a, __b); +} + +static __inline__ vector unsigned long long __ATTRS_o_ai +vec_mergee(vector unsigned long long __a, vector unsigned long long __b) { + return vec_mergeh(__a, __b); +} + +static __inline__ vector float __ATTRS_o_ai +vec_mergee(vector float __a, vector float __b) { + return vec_perm(__a, __b, + (vector unsigned char)(0x00, 0x01, 0x02, 0x03, 0x10, 0x11, + 0x12, 0x13, 0x08, 0x09, 0x0A, 0x0B, + 0x18, 0x19, 0x1A, 0x1B)); +} + +static __inline__ vector double __ATTRS_o_ai +vec_mergee(vector double __a, vector double __b) { + return vec_mergeh(__a, __b); +} + /* vec_mergeo */ static __inline__ vector bool int __ATTRS_o_ai vec_mergeo(vector bool int __a, @@ -4841,6 +4869,34 @@ vec_mergeo(vector unsigned int __a, vector unsigned int __b) { 0x1C, 0x1D, 0x1E, 0x1F)); } +static __inline__ vector bool long long __ATTRS_o_ai +vec_mergeo(vector bool long long __a, vector bool long long __b) { + return vec_mergel(__a, __b); +} + +static __inline__ vector signed long long __ATTRS_o_ai +vec_mergeo(vector signed long long __a, vector signed long long __b) { + return vec_mergel(__a, __b); +} + +static __inline__ vector unsigned long long __ATTRS_o_ai +vec_mergeo(vector unsigned long long __a, vector unsigned long long __b) { + return vec_mergel(__a, __b); +} + +static __inline__ vector float __ATTRS_o_ai +vec_mergeo(vector float __a, vector float __b) { + return vec_perm(__a, __b, + (vector unsigned char)(0x04, 0x05, 0x06, 0x07, 0x14, 0x15, + 0x16, 0x17, 0x0C, 0x0D, 0x0E, 0x0F, + 0x1C, 0x1D, 0x1E, 0x1F)); +} + +static __inline__ vector double __ATTRS_o_ai +vec_mergeo(vector double __a, vector double __b) { + return vec_mergel(__a, __b); +} + #endif /* vec_mfvscr */ @@ -6548,8 +6604,25 @@ vec_pack(vector bool long long __a, vector bool long long __b) { #endif } +static __inline__ vector float __ATTRS_o_ai +vec_pack(vector double __a, vector double __b) { + return (vector float) (__a[0], __a[1], __b[0], __b[1]); +} +#endif + +#ifdef __POWER9_VECTOR__ +static __inline__ vector unsigned short __ATTRS_o_ai +vec_pack_to_short_fp32(vector float __a, vector float __b) { + vector float __resa = __builtin_vsx_xvcvsphp(__a); + vector float __resb = __builtin_vsx_xvcvsphp(__b); +#ifdef __LITTLE_ENDIAN__ + return (vector unsigned short)vec_mergee(__resa, __resb); +#else + return (vector unsigned short)vec_mergeo(__resa, __resb); #endif +} +#endif /* vec_vpkuhum */ #define __builtin_altivec_vpkuhum vec_vpkuhum @@ -7336,6 +7409,34 @@ vec_rl(vector unsigned long long __a, vector unsigned long long __b) { } #endif +/* vec_rlmi */ +#ifdef __POWER9_VECTOR__ +static __inline__ vector unsigned int __ATTRS_o_ai +vec_rlmi(vector unsigned int __a, vector unsigned int __b, + vector unsigned int __c) { + return __builtin_altivec_vrlwmi(__a, __c, __b); +} + +static __inline__ vector unsigned long long __ATTRS_o_ai +vec_rlmi(vector unsigned long long __a, vector unsigned long long __b, + vector unsigned long long __c) { + return __builtin_altivec_vrldmi(__a, __c, __b); +} + +/* vec_rlnm */ +static __inline__ vector unsigned int __ATTRS_o_ai +vec_rlnm(vector unsigned int __a, vector unsigned int __b, + vector unsigned int __c) { + return __builtin_altivec_vrlwnm(__a, __b) & __c; +} + +static __inline__ vector unsigned long long __ATTRS_o_ai +vec_rlnm(vector unsigned long long __a, vector unsigned long long __b, + vector unsigned long long __c) { + return __builtin_altivec_vrldnm(__a, __b) & __c; +} +#endif + /* vec_vrlb */ static __inline__ vector signed char __ATTRS_o_ai @@ -11282,6 +11383,11 @@ vec_unpackh(vector bool int __a) { return (vector bool long long)__builtin_altivec_vupkhsw((vector int)__a); #endif } + +static __inline__ vector double __ATTRS_o_ai +vec_unpackh(vector float __a) { + return (vector double)(__a[0], __a[1]); +} #endif /* vec_vupkhsb */ @@ -11416,6 +11522,11 @@ vec_unpackl(vector bool int __a) { return (vector bool long long)__builtin_altivec_vupklsw((vector int)__a); #endif } + +static __inline__ vector double __ATTRS_o_ai +vec_unpackl(vector float __a) { + return (vector double)(__a[2], __a[3]); +} #endif /* vec_vupklsb */ @@ -15600,6 +15711,24 @@ __builtin_crypto_vncipherlast(vector unsigned long long __a, #endif #ifdef __POWER8_VECTOR__ +static __inline__ vector bool char __ATTRS_o_ai +vec_permxor(vector bool char __a, vector bool char __b, + vector bool char __c) { + return __builtin_altivec_crypto_vpermxor(__a, __b, __c); +} + +static __inline__ vector signed char __ATTRS_o_ai +vec_permxor(vector signed char __a, vector signed char __b, + vector signed char __c) { + return __builtin_altivec_crypto_vpermxor(__a, __b, __c); +} + +static __inline__ vector unsigned char __ATTRS_o_ai +vec_permxor(vector unsigned char __a, vector unsigned char __b, + vector unsigned char __c) { + return __builtin_altivec_crypto_vpermxor(__a, __b, __c); +} + static __inline__ vector unsigned char __ATTRS_o_ai __builtin_crypto_vpermxor(vector unsigned char __a, vector unsigned char __b, vector unsigned char __c) { diff --git a/test/CodeGen/builtins-ppc-crypto.c b/test/CodeGen/builtins-ppc-crypto.c index 60bdc4982d..eaf568b09f 100644 --- a/test/CodeGen/builtins-ppc-crypto.c +++ b/test/CodeGen/builtins-ppc-crypto.c @@ -108,6 +108,30 @@ vector unsigned long long test_vpermxord(void) // CHECK: @llvm.ppc.altivec.crypto.vpermxor } +// CHECK-LABEL: test_vpermxorbc +vector bool char test_vpermxorbc(vector bool char a, + vector bool char b, + vector bool char c) { + return vec_permxor(a, b, c); +// CHECK: @llvm.ppc.altivec.crypto.vpermxor +} + +// CHECK-LABEL: test_vpermxorsc +vector signed char test_vpermxorsc(vector signed char a, + vector signed char b, + vector signed char c) { + return vec_permxor(a, b, c); +// CHECK: @llvm.ppc.altivec.crypto.vpermxor +} + +// CHECK-LABEL: test_vpermxoruc +vector unsigned char test_vpermxoruc(vector unsigned char a, + vector unsigned char b, + vector unsigned char c) { + return vec_permxor(a, b, c); +// CHECK: @llvm.ppc.altivec.crypto.vpermxor +} + // CHECK-LABEL: define <2 x i64> @test_vcipher vector unsigned long long test_vcipher(void) { diff --git a/test/CodeGen/builtins-ppc-p8vector.c b/test/CodeGen/builtins-ppc-p8vector.c index 6271a1c1a8..5e16825ac7 100644 --- a/test/CodeGen/builtins-ppc-p8vector.c +++ b/test/CodeGen/builtins-ppc-p8vector.c @@ -136,6 +136,26 @@ void test1() { // CHECK-LE: @llvm.ppc.altivec.vperm // CHECK-PPC: warning: implicit declaration of function 'vec_mergee' + res_vbll = vec_mergee(vbll, vbll); +// CHECK: @llvm.ppc.altivec.vperm +// CHECK-LE: @llvm.ppc.altivec.vperm + + res_vsll = vec_mergee(vsll, vsll); +// CHECK: @llvm.ppc.altivec.vperm +// CHECK-LE: @llvm.ppc.altivec.vperm + + res_vull = vec_mergee(vull, vull); +// CHECK: @llvm.ppc.altivec.vperm +// CHECK-LE: @llvm.ppc.altivec.vperm + + res_vf = vec_mergee(vfa, vfa); +// CHECK: @llvm.ppc.altivec.vperm +// CHECK-LE: @llvm.ppc.altivec.vperm + + res_vd = vec_mergee(vda, vda); +// CHECK: @llvm.ppc.altivec.vperm +// CHECK-LE: @llvm.ppc.altivec.vperm + /* vec_mergeo */ res_vbi = vec_mergeo(vbi, vbi); // CHECK: @llvm.ppc.altivec.vperm diff --git a/test/CodeGen/builtins-ppc-p9vector.c b/test/CodeGen/builtins-ppc-p9vector.c index 32c84f1c7f..5e942a38f6 100644 --- a/test/CodeGen/builtins-ppc-p9vector.c +++ b/test/CodeGen/builtins-ppc-p9vector.c @@ -827,4 +827,101 @@ vector unsigned char test73(void) { // CHECK-NEXT: ret <16 x i8> return vec_srv (vuca, vucb); } - +vector unsigned short test74(void) { +// CHECK-BE: @llvm.ppc.vsx.xvcvsphp(<4 x float> +// CHECK-BE: @llvm.ppc.vsx.xvcvsphp(<4 x float> +// CHECK-BE: @llvm.ppc.altivec.vperm +// CHECK: @llvm.ppc.vsx.xvcvsphp(<4 x float> +// CHECK: @llvm.ppc.vsx.xvcvsphp(<4 x float> +// CHECK: @llvm.ppc.altivec.vperm + return vec_pack_to_short_fp32(vfa, vfb); +} +vector unsigned int test75(void) { +// CHECK-BE: @llvm.ppc.altivec.vrlwmi(<4 x i32 +// CHECK-BE-NEXT: ret <4 x i32> +// CHECK: @llvm.ppc.altivec.vrlwmi(<4 x i32 +// CHECK-NEXT: ret <4 x i32> + return vec_rlmi(vuia, vuia, vuia); +} +vector unsigned long long test76(void) { +// CHECK-BE: @llvm.ppc.altivec.vrldmi(<2 x i64 +// CHECK-BE-NEXT: ret <2 x i64> +// CHECK: @llvm.ppc.altivec.vrldmi(<2 x i64 +// CHECK-NEXT: ret <2 x i64> + return vec_rlmi(vula, vula, vula); +} +vector unsigned int test77(void) { +// CHECK-BE: @llvm.ppc.altivec.vrlwnm(<4 x i32 +// CHECK-BE: and <4 x i32 +// CHECK-BE: ret <4 x i32> +// CHECK: @llvm.ppc.altivec.vrlwnm(<4 x i32 +// CHECK: and <4 x i32 +// CHECK: ret <4 x i32> + return vec_rlnm(vuia, vuia, vuia); +} +vector unsigned long long test78(void) { +// CHECK-BE: @llvm.ppc.altivec.vrldnm(<2 x i64 +// CHECK-BE: and <2 x i64 +// CHECK-BE-NEXT: ret <2 x i64> +// CHECK: @llvm.ppc.altivec.vrldnm(<2 x i64 +// CHECK: and <2 x i64 +// CHECK-NEXT: ret <2 x i64> + return vec_rlnm(vula, vula, vula); +} +vector double test79(void) { +// CHECK-BE: extractelement <4 x float> +// CHECK-BE: fpext float +// CHECK-BE: insertelement <2 x double> +// CHECK-BE: extractelement <4 x float> +// CHECK-BE: fpext float +// CHECK-BE: insertelement <2 x double> +// CHECK: extractelement <4 x float> +// CHECK: fpext float +// CHECK: insertelement <2 x double> +// CHECK: extractelement <4 x float> +// CHECK: fpext float +// CHECK: insertelement <2 x double> + return vec_unpackh(vfa); +} +vector double test80(void) { +// CHECK-BE: extractelement <4 x float> +// CHECK-BE: fpext float +// CHECK-BE: insertelement <2 x double> +// CHECK-BE: extractelement <4 x float> +// CHECK-BE: fpext float +// CHECK-BE: insertelement <2 x double> +// CHECK: extractelement <4 x float> +// CHECK: fpext float +// CHECK: insertelement <2 x double> +// CHECK: extractelement <4 x float> +// CHECK: fpext float +// CHECK: insertelement <2 x double> + return vec_unpackl(vfa); +} +vector double test81(void) { + // CHECK: extractelement <2 x double> + // CHECK: fptrunc double + // CHECK: insertelement <4 x float> + // CHECK: extractelement <2 x double> + // CHECK: fptrunc double + // CHECK: insertelement <4 x float> + // CHECK: extractelement <2 x double> + // CHECK: fptrunc double + // CHECK: insertelement <4 x float> + // CHECK: extractelement <2 x double> + // CHECK: fptrunc double + // CHECK: insertelement <4 x float> + // CHECK-LE: extractelement <2 x double> + // CHECK-LE: fptrunc double + // CHECK-LE: insertelement <4 x float> + // CHECK-LE: extractelement <2 x double> + // CHECK-LE: fptrunc double + // CHECK-LE: insertelement <4 x float> + // CHECK-LE: extractelement <2 x double> + // CHECK-LE: fptrunc double + // CHECK-LE: insertelement <4 x float> + // CHECK-LE: extractelement <2 x double> + // CHECK-LE: fptrunc double + // CHECK-LE: insertelement <4 x float> + return vec_pack(vda, vdb); +} -- 2.40.0