From: Craig Topper Date: Fri, 31 Aug 2018 18:22:52 +0000 (+0000) Subject: [X86] Add kshift intrinsics to match gcc and icc. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=ead0755dee7343dfd3840891fd34cb6fb743e487;p=clang [X86] Add kshift intrinsics to match gcc and icc. This adds the following intrinsics: _kshiftli_mask8 _kshiftli_mask16 _kshiftli_mask32 _kshiftli_mask64 _kshiftri_mask8 _kshiftri_mask16 _kshiftri_mask32 _kshiftri_mask64 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@341234 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/clang/Basic/BuiltinsX86.def b/include/clang/Basic/BuiltinsX86.def index f227584952..5b90b73155 100644 --- a/include/clang/Basic/BuiltinsX86.def +++ b/include/clang/Basic/BuiltinsX86.def @@ -1770,6 +1770,14 @@ TARGET_BUILTIN(__builtin_ia32_kxorqi, "UcUcUc", "nc", "avx512dq") TARGET_BUILTIN(__builtin_ia32_kxorhi, "UsUsUs", "nc", "avx512f") TARGET_BUILTIN(__builtin_ia32_kxorsi, "UiUiUi", "nc", "avx512bw") TARGET_BUILTIN(__builtin_ia32_kxordi, "ULLiULLiULLi", "nc", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_kshiftliqi, "UcUcIUi", "nc", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_kshiftlihi, "UsUsIUi", "nc", "avx512f") +TARGET_BUILTIN(__builtin_ia32_kshiftlisi, "UiUiIUi", "nc", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_kshiftlidi, "ULLiULLiIUi", "nc", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_kshiftriqi, "UcUcIUi", "nc", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_kshiftrihi, "UsUsIUi", "nc", "avx512f") +TARGET_BUILTIN(__builtin_ia32_kshiftrisi, "UiUiIUi", "nc", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_kshiftridi, "ULLiULLiIUi", "nc", "avx512bw") TARGET_BUILTIN(__builtin_ia32_palignr512, "V64cV64cV64cIi", "ncV:512:", "avx512bw") TARGET_BUILTIN(__builtin_ia32_dbpsadbw128, "V8sV16cV16cIi", "ncV:128:", "avx512bw,avx512vl") TARGET_BUILTIN(__builtin_ia32_dbpsadbw256, "V16sV32cV32cIi", "ncV:256:", "avx512bw,avx512vl") diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index 7bb1a652d6..50e9b5ed09 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -9929,6 +9929,50 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, "psrldq"); return Builder.CreateBitCast(SV, ResultType, "cast"); } + case X86::BI__builtin_ia32_kshiftliqi: + case X86::BI__builtin_ia32_kshiftlihi: + case X86::BI__builtin_ia32_kshiftlisi: + case X86::BI__builtin_ia32_kshiftlidi: { + unsigned ShiftVal = cast(Ops[1])->getZExtValue() & 0xff; + unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth(); + + if (ShiftVal >= NumElts) + return llvm::Constant::getNullValue(Ops[0]->getType()); + + Value *In = getMaskVecValue(*this, Ops[0], NumElts); + + uint32_t Indices[64]; + for (unsigned i = 0; i != NumElts; ++i) + Indices[i] = NumElts + i - ShiftVal; + + Value *Zero = llvm::Constant::getNullValue(In->getType()); + Value *SV = Builder.CreateShuffleVector(Zero, In, + makeArrayRef(Indices, NumElts), + "kshiftl"); + return Builder.CreateBitCast(SV, Ops[0]->getType()); + } + case X86::BI__builtin_ia32_kshiftriqi: + case X86::BI__builtin_ia32_kshiftrihi: + case X86::BI__builtin_ia32_kshiftrisi: + case X86::BI__builtin_ia32_kshiftridi: { + unsigned ShiftVal = cast(Ops[1])->getZExtValue() & 0xff; + unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth(); + + if (ShiftVal >= NumElts) + return llvm::Constant::getNullValue(Ops[0]->getType()); + + Value *In = getMaskVecValue(*this, Ops[0], NumElts); + + uint32_t Indices[64]; + for (unsigned i = 0; i != NumElts; ++i) + Indices[i] = i + ShiftVal; + + Value *Zero = llvm::Constant::getNullValue(In->getType()); + Value *SV = Builder.CreateShuffleVector(In, Zero, + makeArrayRef(Indices, NumElts), + "kshiftr"); + return Builder.CreateBitCast(SV, Ops[0]->getType()); + } case X86::BI__builtin_ia32_movnti: case X86::BI__builtin_ia32_movnti64: case X86::BI__builtin_ia32_movntsd: diff --git a/lib/Headers/avx512bwintrin.h b/lib/Headers/avx512bwintrin.h index fdf26765b6..d7a03c961b 100644 --- a/lib/Headers/avx512bwintrin.h +++ b/lib/Headers/avx512bwintrin.h @@ -155,6 +155,18 @@ _kadd_mask64(__mmask64 __A, __mmask64 __B) return (__mmask64)__builtin_ia32_kadddi((__mmask64)__A, (__mmask64)__B); } +#define _kshiftli_mask32(A, I) \ + (__mmask32)__builtin_ia32_kshiftlisi((__mmask32)(A), (unsigned int)(I)) + +#define _kshiftri_mask32(A, I) \ + (__mmask32)__builtin_ia32_kshiftrisi((__mmask32)(A), (unsigned int)(I)) + +#define _kshiftli_mask64(A, I) \ + (__mmask64)__builtin_ia32_kshiftlidi((__mmask64)(A), (unsigned int)(I)) + +#define _kshiftri_mask64(A, I) \ + (__mmask64)__builtin_ia32_kshiftridi((__mmask64)(A), (unsigned int)(I)) + /* Integer compare */ #define _mm512_cmp_epi8_mask(a, b, p) \ diff --git a/lib/Headers/avx512dqintrin.h b/lib/Headers/avx512dqintrin.h index a9a35794b4..b54e5474df 100644 --- a/lib/Headers/avx512dqintrin.h +++ b/lib/Headers/avx512dqintrin.h @@ -98,6 +98,12 @@ _kadd_mask16(__mmask16 __A, __mmask16 __B) return (__mmask16)__builtin_ia32_kaddhi((__mmask16)__A, (__mmask16)__B); } +#define _kshiftli_mask8(A, I) \ + (__mmask8)__builtin_ia32_kshiftliqi((__mmask8)(A), (unsigned int)(I)) + +#define _kshiftri_mask8(A, I) \ + (__mmask8)__builtin_ia32_kshiftriqi((__mmask8)(A), (unsigned int)(I)) + static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mullo_epi64 (__m512i __A, __m512i __B) { return (__m512i) ((__v8du) __A * (__v8du) __B); diff --git a/lib/Headers/avx512fintrin.h b/lib/Headers/avx512fintrin.h index b5ef07a425..262f46cf31 100644 --- a/lib/Headers/avx512fintrin.h +++ b/lib/Headers/avx512fintrin.h @@ -8394,6 +8394,12 @@ _mm512_kxor (__mmask16 __A, __mmask16 __B) #define _kxnor_mask16 _mm512_kxnor #define _kxor_mask16 _mm512_kxor +#define _kshiftli_mask16(A, I) \ + (__mmask16)__builtin_ia32_kshiftlihi((__mmask16)(A), (unsigned int)(I)) + +#define _kshiftri_mask16(A, I) \ + (__mmask16)__builtin_ia32_kshiftrihi((__mmask16)(A), (unsigned int)(I)) + static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_stream_si512 (__m512i * __P, __m512i __A) { diff --git a/lib/Sema/SemaChecking.cpp b/lib/Sema/SemaChecking.cpp index 7826446b79..ac7c046dff 100644 --- a/lib/Sema/SemaChecking.cpp +++ b/lib/Sema/SemaChecking.cpp @@ -3626,6 +3626,14 @@ bool Sema::CheckX86BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { case X86::BI__builtin_ia32_psrldqi128_byteshift: case X86::BI__builtin_ia32_psrldqi256_byteshift: case X86::BI__builtin_ia32_psrldqi512_byteshift: + case X86::BI__builtin_ia32_kshiftliqi: + case X86::BI__builtin_ia32_kshiftlihi: + case X86::BI__builtin_ia32_kshiftlisi: + case X86::BI__builtin_ia32_kshiftlidi: + case X86::BI__builtin_ia32_kshiftriqi: + case X86::BI__builtin_ia32_kshiftrihi: + case X86::BI__builtin_ia32_kshiftrisi: + case X86::BI__builtin_ia32_kshiftridi: i = 1; l = 0; u = 255; break; case X86::BI__builtin_ia32_vperm2f128_pd256: diff --git a/test/CodeGen/avx512bw-builtins.c b/test/CodeGen/avx512bw-builtins.c index f1707a1ed9..2075a0ccba 100644 --- a/test/CodeGen/avx512bw-builtins.c +++ b/test/CodeGen/avx512bw-builtins.c @@ -248,6 +248,38 @@ __mmask64 test_kadd_mask64(__m512i __A, __m512i __B, __m512i __C, __m512i __D, _ __E, __F); } +__mmask32 test_kshiftli_mask32(__m512i A, __m512i B, __m512i C, __m512i D) { + // CHECK-LABEL: @test_kshiftli_mask32 + // CHECK: [[VAL:%.*]] = bitcast i32 %{{.*}} to <32 x i1> + // CHECK: [[RES:%.*]] = shufflevector <32 x i1> zeroinitializer, <32 x i1> [[VAL]], <32 x i32> + // CHECK: bitcast <32 x i1> [[RES]] to i32 + return _mm512_mask_cmpneq_epu16_mask(_kshiftli_mask32(_mm512_cmpneq_epu16_mask(A, B), 31), C, D); +} + +__mmask32 test_kshiftri_mask32(__m512i A, __m512i B, __m512i C, __m512i D) { + // CHECK-LABEL: @test_kshiftri_mask32 + // CHECK: [[VAL:%.*]] = bitcast i32 %{{.*}} to <32 x i1> + // CHECK: [[RES:%.*]] = shufflevector <32 x i1> [[VAL]], <32 x i1> zeroinitializer, <32 x i32> + // CHECK: bitcast <32 x i1> [[RES]] to i32 + return _mm512_mask_cmpneq_epu16_mask(_kshiftri_mask32(_mm512_cmpneq_epu16_mask(A, B), 31), C, D); +} + +__mmask64 test_kshiftli_mask64(__m512i A, __m512i B, __m512i C, __m512i D) { + // CHECK-LABEL: @test_kshiftli_mask64 + // CHECK: [[VAL:%.*]] = bitcast i64 %{{.*}} to <64 x i1> + // CHECK: [[RES:%.*]] = shufflevector <64 x i1> zeroinitializer, <64 x i1> [[VAL]], <64 x i32> + // CHECK: bitcast <64 x i1> [[RES]] to i64 + return _mm512_mask_cmpneq_epu8_mask(_kshiftli_mask64(_mm512_cmpneq_epu8_mask(A, B), 32), C, D); +} + +__mmask64 test_kshiftri_mask64(__m512i A, __m512i B, __m512i C, __m512i D) { + // CHECK-LABEL: @test_kshiftri_mask64 + // CHECK: [[VAL:%.*]] = bitcast i64 %{{.*}} to <64 x i1> + // CHECK: [[RES:%.*]] = shufflevector <64 x i1> [[VAL]], <64 x i1> zeroinitializer, <64 x i32> + // CHECK: bitcast <64 x i1> [[RES]] to i64 + return _mm512_mask_cmpneq_epu8_mask(_kshiftri_mask64(_mm512_cmpneq_epu8_mask(A, B), 32), C, D); +} + __mmask64 test_mm512_cmpeq_epi8_mask(__m512i __a, __m512i __b) { // CHECK-LABEL: @test_mm512_cmpeq_epi8_mask // CHECK: icmp eq <64 x i8> %{{.*}}, %{{.*}} diff --git a/test/CodeGen/avx512dq-builtins.c b/test/CodeGen/avx512dq-builtins.c index f8f3e4bc62..6d2f93d71e 100644 --- a/test/CodeGen/avx512dq-builtins.c +++ b/test/CodeGen/avx512dq-builtins.c @@ -136,6 +136,22 @@ __mmask16 test_kadd_mask16(__m512i __A, __m512i __B, __m512i __C, __m512i __D, _ __E, __F); } +__mmask8 test_kshiftli_mask8(__m512i A, __m512i B, __m512i C, __m512i D) { + // CHECK-LABEL: @test_kshiftli_mask8 + // CHECK: [[VAL:%.*]] = bitcast i8 %{{.*}} to <8 x i1> + // CHECK: [[RES:%.*]] = shufflevector <8 x i1> zeroinitializer, <8 x i1> [[VAL]], <8 x i32> + // CHECK: bitcast <8 x i1> [[RES]] to i8 + return _mm512_mask_cmpneq_epu64_mask(_kshiftli_mask8(_mm512_cmpneq_epu64_mask(A, B), 2), C, D); +} + +__mmask8 test_kshiftri_mask8(__m512i A, __m512i B, __m512i C, __m512i D) { + // CHECK-LABEL: @test_kshiftri_mask8 + // CHECK: [[VAL:%.*]] = bitcast i8 %{{.*}} to <8 x i1> + // CHECK: [[RES:%.*]] = shufflevector <8 x i1> [[VAL]], <8 x i1> zeroinitializer, <8 x i32> + // CHECK: bitcast <8 x i1> [[RES]] to i8 + return _mm512_mask_cmpneq_epu64_mask(_kshiftri_mask8(_mm512_cmpneq_epu64_mask(A, B), 2), C, D); +} + __m512i test_mm512_mullo_epi64 (__m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_mullo_epi64 // CHECK: mul <8 x i64> diff --git a/test/CodeGen/avx512f-builtins.c b/test/CodeGen/avx512f-builtins.c index 39ac42f25f..8ddabc5003 100644 --- a/test/CodeGen/avx512f-builtins.c +++ b/test/CodeGen/avx512f-builtins.c @@ -8296,6 +8296,22 @@ __mmask16 test_kxor_mask16(__m512i __A, __m512i __B, __m512i __C, __m512i __D, _ __E, __F); } +__mmask16 test_kshiftli_mask16(__m512i A, __m512i B, __m512i C, __m512i D) { + // CHECK-LABEL: @test_kshiftli_mask16 + // CHECK: [[VAL:%.*]] = bitcast i16 %{{.*}} to <16 x i1> + // CHECK: [[RES:%.*]] = shufflevector <16 x i1> zeroinitializer, <16 x i1> [[VAL]], <16 x i32> + // CHECK: bitcast <16 x i1> [[RES]] to i16 + return _mm512_mask_cmpneq_epu32_mask(_kshiftli_mask16(_mm512_cmpneq_epu32_mask(A, B), 1), C, D); +} + +__mmask16 test_kshiftri_mask16(__m512i A, __m512i B, __m512i C, __m512i D) { + // CHECK-LABEL: @test_kshiftri_mask16 + // CHECK: [[VAL:%.*]] = bitcast i16 %{{.*}} to <16 x i1> + // CHECK: [[RES:%.*]] = shufflevector <16 x i1> [[VAL]], <16 x i1> zeroinitializer, <16 x i32> + // CHECK: bitcast <16 x i1> [[RES]] to i16 + return _mm512_mask_cmpneq_epu32_mask(_kshiftri_mask16(_mm512_cmpneq_epu32_mask(A, B), 1), C, D); +} + void test_mm512_stream_si512(__m512i * __P, __m512i __A) { // CHECK-LABEL: @test_mm512_stream_si512 // CHECK: store <8 x i64> %{{.*}}, <8 x i64>* %{{.*}}, align 64, !nontemporal