From: Craig Topper Date: Mon, 12 Feb 2018 22:38:52 +0000 (+0000) Subject: [X86] Reverse the operand order of the implementation of the kunpack builtins. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=9a5369fb11cd49c31002b9a4c4d60548f71bc770;p=clang [X86] Reverse the operand order of the implementation of the kunpack builtins. The second operand needs to be in the lower bits of the concatenation. This matches llvm 5.0, gcc, and icc behavior. Fixes PR36360. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@324954 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index 8a48b4eae7..49dfc60d0c 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -8846,7 +8846,8 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, RHS = Builder.CreateShuffleVector(RHS, RHS, makeArrayRef(Indices, NumElts / 2)); // Concat the vectors. - Value *Res = Builder.CreateShuffleVector(LHS, RHS, + // NOTE: Operands are swapped to match the intrinsic definition. + Value *Res = Builder.CreateShuffleVector(RHS, LHS, makeArrayRef(Indices, NumElts)); return Builder.CreateBitCast(Res, Ops[0]->getType()); } diff --git a/test/CodeGen/avx512bw-builtins.c b/test/CodeGen/avx512bw-builtins.c index bfdf46d261..4288c08692 100644 --- a/test/CodeGen/avx512bw-builtins.c +++ b/test/CodeGen/avx512bw-builtins.c @@ -1632,7 +1632,7 @@ __mmask64 test_mm512_kunpackd(__m512i __A, __m512i __B, __m512i __C, __m512i __D // CHECK: [[RHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1> // CHECK: [[LHS2:%.*]] = shufflevector <64 x i1> [[LHS]], <64 x i1> [[LHS]], <32 x i32> // CHECK: [[RHS2:%.*]] = shufflevector <64 x i1> [[RHS]], <64 x i1> [[RHS]], <32 x i32> - // CHECK: [[CONCAT:%.*]] = shufflevector <32 x i1> [[LHS2]], <32 x i1> [[RHS2]], <64 x i32> + // CHECK: [[CONCAT:%.*]] = shufflevector <32 x i1> [[RHS2]], <32 x i1> [[LHS2]], <64 x i32> // CHECK: bitcast <64 x i1> [[CONCAT]] to i64 return _mm512_mask_cmpneq_epu8_mask(_mm512_kunpackd(_mm512_cmpneq_epu8_mask(__B, __A),_mm512_cmpneq_epu8_mask(__C, __D)), __E, __F); } @@ -1643,7 +1643,7 @@ __mmask32 test_mm512_kunpackw(__m512i __A, __m512i __B, __m512i __C, __m512i __D // CHECK: [[RHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1> // CHECK: [[LHS2:%.*]] = shufflevector <32 x i1> [[LHS]], <32 x i1> [[LHS]], <16 x i32> // CHECK: [[RHS2:%.*]] = shufflevector <32 x i1> [[RHS]], <32 x i1> [[RHS]], <16 x i32> - // CHECK: [[CONCAT:%.*]] = shufflevector <16 x i1> [[LHS2]], <16 x i1> [[RHS2]], <32 x i32> + // CHECK: [[CONCAT:%.*]] = shufflevector <16 x i1> [[RHS2]], <16 x i1> [[LHS2]], <32 x i32> return _mm512_mask_cmpneq_epu16_mask(_mm512_kunpackw(_mm512_cmpneq_epu16_mask(__B, __A),_mm512_cmpneq_epu16_mask(__C, __D)), __E, __F); } diff --git a/test/CodeGen/avx512f-builtins.c b/test/CodeGen/avx512f-builtins.c index 8e6ed959ae..5847f311ef 100644 --- a/test/CodeGen/avx512f-builtins.c +++ b/test/CodeGen/avx512f-builtins.c @@ -6281,7 +6281,7 @@ __mmask16 test_mm512_kunpackb(__m512i __A, __m512i __B, __m512i __C, __m512i __D // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> // CHECK: [[LHS2:%.*]] = shufflevector <16 x i1> [[LHS]], <16 x i1> [[LHS]], <8 x i32> // CHECK: [[RHS2:%.*]] = shufflevector <16 x i1> [[RHS]], <16 x i1> [[RHS]], <8 x i32> - // CHECK: [[CONCAT:%.*]] = shufflevector <8 x i1> [[LHS2]], <8 x i1> [[RHS2]], <16 x i32> + // CHECK: [[CONCAT:%.*]] = shufflevector <8 x i1> [[RHS2]], <8 x i1> [[LHS2]], <16 x i32> // CHECK: bitcast <16 x i1> [[CONCAT]] to i16 return _mm512_mask_cmpneq_epu32_mask(_mm512_kunpackb(_mm512_cmpneq_epu32_mask(__A, __B), _mm512_cmpneq_epu32_mask(__C, __D)),