From a04f51fdfec64bd6ea1679832a1a75c8d3ecc68a Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Wed, 11 Jun 2014 15:48:46 +0000 Subject: [PATCH] [PPC64LE] Fix vec_sld and vec_vsldoi for little endian The vec_sld and vec_vsldoi interfaces perform a left-shift on vector arguments for both big and little endian. However, because they rely on the vec_perm interface which is endian-dependent, the permutation vector needs to be reversed for LE to get the proper shift direction. I've added some extra testing for these interfaces for LE in the builtins-ppc-altivec.c. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@210657 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Headers/altivec.h | 96 +++++++++++++++++++++++++++++ test/CodeGen/builtins-ppc-altivec.c | 32 ++++++++++ 2 files changed, 128 insertions(+) diff --git a/lib/Headers/altivec.h b/lib/Headers/altivec.h index bda5a0ea39..bee9e2de8a 100644 --- a/lib/Headers/altivec.h +++ b/lib/Headers/altivec.h @@ -5224,65 +5224,113 @@ vec_vslw(vector unsigned int __a, vector unsigned int __b) static vector signed char __ATTRS_o_ai vec_sld(vector signed char __a, vector signed char __b, unsigned char __c) { +#ifdef __LITTLE_ENDIAN__ + return vec_perm(__a, __b, (vector unsigned char) + (__c, __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7, + __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15)); +#else return vec_perm(__a, __b, (vector unsigned char) (__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7, __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15)); +#endif } static vector unsigned char __ATTRS_o_ai vec_sld(vector unsigned char __a, vector unsigned char __b, unsigned char __c) { +#ifdef __LITTLE_ENDIAN__ + return vec_perm(__a, __b, (vector unsigned char) + (__c, __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7, + __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15)); +#else return vec_perm(__a, __b, (vector unsigned char) (__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7, __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15)); +#endif } static vector short __ATTRS_o_ai vec_sld(vector short __a, vector short __b, unsigned char __c) { +#ifdef __LITTLE_ENDIAN__ + return vec_perm(__a, __b, (vector unsigned char) + (__c, __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7, + __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15)); +#else return vec_perm(__a, __b, (vector unsigned char) (__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7, __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15)); +#endif } static vector unsigned short __ATTRS_o_ai vec_sld(vector unsigned short __a, vector unsigned short __b, unsigned char __c) { +#ifdef __LITTLE_ENDIAN__ + return vec_perm(__a, __b, (vector unsigned char) + (__c, __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7, + __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15)); +#else return vec_perm(__a, __b, (vector unsigned char) (__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7, __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15)); +#endif } static vector pixel __ATTRS_o_ai vec_sld(vector pixel __a, vector pixel __b, unsigned char __c) { +#ifdef __LITTLE_ENDIAN__ + return vec_perm(__a, __b, (vector unsigned char) + (__c, __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7, + __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15)); +#else return vec_perm(__a, __b, (vector unsigned char) (__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7, __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15)); +#endif } static vector int __ATTRS_o_ai vec_sld(vector int __a, vector int __b, unsigned char __c) { +#ifdef __LITTLE_ENDIAN__ + return vec_perm(__a, __b, (vector unsigned char) + (__c, __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7, + __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15)); +#else return vec_perm(__a, __b, (vector unsigned char) (__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7, __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15)); +#endif } static vector unsigned int __ATTRS_o_ai vec_sld(vector unsigned int __a, vector unsigned int __b, unsigned char __c) { +#ifdef __LITTLE_ENDIAN__ + return vec_perm(__a, __b, (vector unsigned char) + (__c, __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7, + __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15)); +#else return vec_perm(__a, __b, (vector unsigned char) (__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7, __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15)); +#endif } static vector float __ATTRS_o_ai vec_sld(vector float __a, vector float __b, unsigned char __c) { +#ifdef __LITTLE_ENDIAN__ + return vec_perm(__a, __b, (vector unsigned char) + (__c, __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7, + __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15)); +#else return vec_perm(__a, __b, (vector unsigned char) (__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7, __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15)); +#endif } /* vec_vsldoi */ @@ -5290,65 +5338,113 @@ vec_sld(vector float __a, vector float __b, unsigned char __c) static vector signed char __ATTRS_o_ai vec_vsldoi(vector signed char __a, vector signed char __b, unsigned char __c) { +#ifdef __LITTLE_ENDIAN__ + return vec_perm(__a, __b, (vector unsigned char) + (__c, __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7, + __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15)); +#else return vec_perm(__a, __b, (vector unsigned char) (__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7, __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15)); +#endif } static vector unsigned char __ATTRS_o_ai vec_vsldoi(vector unsigned char __a, vector unsigned char __b, unsigned char __c) { +#ifdef __LITTLE_ENDIAN__ + return vec_perm(__a, __b, (vector unsigned char) + (__c, __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7, + __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15)); +#else return vec_perm(__a, __b, (vector unsigned char) (__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7, __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15)); +#endif } static vector short __ATTRS_o_ai vec_vsldoi(vector short __a, vector short __b, unsigned char __c) { +#ifdef __LITTLE_ENDIAN__ + return vec_perm(__a, __b, (vector unsigned char) + (__c, __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7, + __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15)); +#else return vec_perm(__a, __b, (vector unsigned char) (__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7, __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15)); +#endif } static vector unsigned short __ATTRS_o_ai vec_vsldoi(vector unsigned short __a, vector unsigned short __b, unsigned char __c) { +#ifdef __LITTLE_ENDIAN__ + return vec_perm(__a, __b, (vector unsigned char) + (__c, __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7, + __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15)); +#else return vec_perm(__a, __b, (vector unsigned char) (__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7, __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15)); +#endif } static vector pixel __ATTRS_o_ai vec_vsldoi(vector pixel __a, vector pixel __b, unsigned char __c) { +#ifdef __LITTLE_ENDIAN__ + return vec_perm(__a, __b, (vector unsigned char) + (__c, __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7, + __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15)); +#else return vec_perm(__a, __b, (vector unsigned char) (__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7, __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15)); +#endif } static vector int __ATTRS_o_ai vec_vsldoi(vector int __a, vector int __b, unsigned char __c) { +#ifdef __LITTLE_ENDIAN__ + return vec_perm(__a, __b, (vector unsigned char) + (__c, __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7, + __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15)); +#else return vec_perm(__a, __b, (vector unsigned char) (__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7, __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15)); +#endif } static vector unsigned int __ATTRS_o_ai vec_vsldoi(vector unsigned int __a, vector unsigned int __b, unsigned char __c) { +#ifdef __LITTLE_ENDIAN__ + return vec_perm(__a, __b, (vector unsigned char) + (__c, __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7, + __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15)); +#else return vec_perm(__a, __b, (vector unsigned char) (__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7, __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15)); +#endif } static vector float __ATTRS_o_ai vec_vsldoi(vector float __a, vector float __b, unsigned char __c) { +#ifdef __LITTLE_ENDIAN__ + return vec_perm(__a, __b, (vector unsigned char) + (__c, __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7, + __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15)); +#else return vec_perm(__a, __b, (vector unsigned char) (__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7, __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15)); +#endif } /* vec_sll */ diff --git a/test/CodeGen/builtins-ppc-altivec.c b/test/CodeGen/builtins-ppc-altivec.c index c94656e6b5..b161426769 100644 --- a/test/CodeGen/builtins-ppc-altivec.c +++ b/test/CodeGen/builtins-ppc-altivec.c @@ -3258,66 +3258,98 @@ void test6() { /* vec_sld */ res_vsc = vec_sld(vsc, vsc, 0); // CHECK: @llvm.ppc.altivec.vperm +// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1 +// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15 // CHECK-LE: @llvm.ppc.altivec.vperm res_vuc = vec_sld(vuc, vuc, 0); // CHECK: @llvm.ppc.altivec.vperm +// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1 +// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15 // CHECK-LE: @llvm.ppc.altivec.vperm res_vs = vec_sld(vs, vs, 0); // CHECK: @llvm.ppc.altivec.vperm +// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1 +// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15 // CHECK-LE: @llvm.ppc.altivec.vperm res_vus = vec_sld(vus, vus, 0); // CHECK: @llvm.ppc.altivec.vperm +// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1 +// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15 // CHECK-LE: @llvm.ppc.altivec.vperm res_vp = vec_sld(vp, vp, 0); // CHECK: @llvm.ppc.altivec.vperm +// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1 +// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15 // CHECK-LE: @llvm.ppc.altivec.vperm res_vi = vec_sld(vi, vi, 0); // CHECK: @llvm.ppc.altivec.vperm +// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1 +// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15 // CHECK-LE: @llvm.ppc.altivec.vperm res_vui = vec_sld(vui, vui, 0); // CHECK: @llvm.ppc.altivec.vperm +// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1 +// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15 // CHECK-LE: @llvm.ppc.altivec.vperm res_vf = vec_sld(vf, vf, 0); // CHECK: @llvm.ppc.altivec.vperm +// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1 +// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15 // CHECK-LE: @llvm.ppc.altivec.vperm res_vsc = vec_vsldoi(vsc, vsc, 0); // CHECK: @llvm.ppc.altivec.vperm +// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1 +// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15 // CHECK-LE: @llvm.ppc.altivec.vperm res_vuc = vec_vsldoi(vuc, vuc, 0); // CHECK: @llvm.ppc.altivec.vperm +// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1 +// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15 // CHECK-LE: @llvm.ppc.altivec.vperm res_vs = vec_vsldoi(vs, vs, 0); // CHECK: @llvm.ppc.altivec.vperm +// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1 +// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15 // CHECK-LE: @llvm.ppc.altivec.vperm res_vus = vec_vsldoi(vus, vus, 0); // CHECK: @llvm.ppc.altivec.vperm +// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1 +// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15 // CHECK-LE: @llvm.ppc.altivec.vperm res_vp = vec_vsldoi(vp, vp, 0); // CHECK: @llvm.ppc.altivec.vperm +// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1 +// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15 // CHECK-LE: @llvm.ppc.altivec.vperm res_vi = vec_vsldoi(vi, vi, 0); // CHECK: @llvm.ppc.altivec.vperm +// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1 +// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15 // CHECK-LE: @llvm.ppc.altivec.vperm res_vui = vec_vsldoi(vui, vui, 0); // CHECK: @llvm.ppc.altivec.vperm +// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1 +// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15 // CHECK-LE: @llvm.ppc.altivec.vperm res_vf = vec_vsldoi(vf, vf, 0); // CHECK: @llvm.ppc.altivec.vperm +// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1 +// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15 // CHECK-LE: @llvm.ppc.altivec.vperm /* vec_sll */ -- 2.40.0