From 2bf3cc07f09341d5cb995efe8d84010ca3228253 Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Thu, 20 Dec 2018 13:09:09 +0000 Subject: [PATCH] [SystemZ] Fix wrong codegen caused by typos in vecintrin.h The following two bugs in SystemZ high-level vector intrinsics are fixes by this patch: - The float case of vec_insert_and_zero should generate a VLLEZF pattern, but currently erroneously generates VLLEZLF. - The float and double versions of vec_orc erroneously generate and-with-complement instead of or-with-complement. The patch also fixes a couple of typos in the associated test. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@349751 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Headers/vecintrin.h | 6 +++--- test/CodeGen/builtins-systemz-zvector2.c | 10 +++++----- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/lib/Headers/vecintrin.h b/lib/Headers/vecintrin.h index f7061e8894..e627389838 100644 --- a/lib/Headers/vecintrin.h +++ b/lib/Headers/vecintrin.h @@ -381,7 +381,7 @@ vec_insert_and_zero(const unsigned long long *__ptr) { static inline __ATTRS_o_ai vector float vec_insert_and_zero(const float *__ptr) { vector float __vec = (vector float)0; - __vec[0] = *__ptr; + __vec[1] = *__ptr; return __vec; } #endif @@ -5942,13 +5942,13 @@ vec_orc(vector unsigned long long __a, vector unsigned long long __b) { static inline __ATTRS_o_ai vector float vec_orc(vector float __a, vector float __b) { - return (vector float)((vector unsigned int)__a & + return (vector float)((vector unsigned int)__a | ~(vector unsigned int)__b); } static inline __ATTRS_o_ai vector double vec_orc(vector double __a, vector double __b) { - return (vector double)((vector unsigned long long)__a & + return (vector double)((vector unsigned long long)__a | ~(vector unsigned long long)__b); } #endif diff --git a/test/CodeGen/builtins-systemz-zvector2.c b/test/CodeGen/builtins-systemz-zvector2.c index d9607b3a87..acdaebd7a5 100644 --- a/test/CodeGen/builtins-systemz-zvector2.c +++ b/test/CodeGen/builtins-systemz-zvector2.c @@ -65,9 +65,9 @@ void test_core(void) { d = vec_extract(vd, idx); // CHECK: extractelement <2 x double> %{{.*}}, i32 %{{.*}} - vf = vec_insert(d, vf, idx); + vf = vec_insert(f, vf, idx); // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 %{{.*}} - vd = vec_insert(f, vd, idx); + vd = vec_insert(d, vd, idx); // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 %{{.*}} vf = vec_promote(f, idx); @@ -76,7 +76,7 @@ void test_core(void) { // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 %{{.*}} vf = vec_insert_and_zero(cptrf); - // CHECK: insertelement <4 x float> , float %{{.*}}, i32 0 + // CHECK: insertelement <4 x float> , float %{{.*}}, i32 1 vd = vec_insert_and_zero(cptrd); // CHECK: insertelement <2 x double> , double %{{.*}}, i32 0 @@ -227,8 +227,8 @@ void test_compare(void) { idx = vec_all_lt(vd, vd); // CHECK: call { <2 x i64>, i32 } @llvm.s390.vfchdbs(<2 x double> %{{.*}}, <2 x double> %{{.*}}) - idx = vec_all_nge(vd, vd); - // CHECK: call { <2 x i64>, i32 } @llvm.s390.vfchedbs(<2 x double> %{{.*}}, <2 x double> %{{.*}}) + idx = vec_all_nge(vf, vf); + // CHECK: call { <4 x i32>, i32 } @llvm.s390.vfchesbs(<4 x float> %{{.*}}, <4 x float> %{{.*}}) idx = vec_all_nge(vd, vd); // CHECK: call { <2 x i64>, i32 } @llvm.s390.vfchedbs(<2 x double> %{{.*}}, <2 x double> %{{.*}}) -- 2.40.0