From af26b710854e15cdd4a6a2489bca9447f4537e7a Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 31 Mar 2017 17:24:29 +0000 Subject: [PATCH] [AVX-512] Update lowering for gather/scatter prefetch intrinsics to match the immediate encodings the frontend uses based on the _MM_HINT_T0/T1 constant values in clang's headers. Our _MM_HINT_T0/T1 constant values are 3/2 which matches gcc, but not icc or Intel documentation. Interestingly gcc had this same bug on their implementation of the gather/scatter builtins at one point too. Fixes PR32411. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@299234 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 6 +++--- test/CodeGen/X86/avx512-gather-scatter-intrin.ll | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index aa741336f92..11b3c2e32f6 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -20433,9 +20433,9 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget, case PREFETCH: { SDValue Hint = Op.getOperand(6); unsigned HintVal = cast(Hint)->getZExtValue(); - assert((HintVal == 1 || HintVal == 2) && - "Wrong prefetch hint in intrinsic: should be 1 or 2"); - unsigned Opcode = (HintVal != 1 ? IntrData->Opc1 : IntrData->Opc0); + assert((HintVal == 2 || HintVal == 3) && + "Wrong prefetch hint in intrinsic: should be 2 or 3"); + unsigned Opcode = (HintVal == 2 ? IntrData->Opc1 : IntrData->Opc0); SDValue Chain = Op.getOperand(0); SDValue Mask = Op.getOperand(2); SDValue Index = Op.getOperand(3); diff --git a/test/CodeGen/X86/avx512-gather-scatter-intrin.ll b/test/CodeGen/X86/avx512-gather-scatter-intrin.ll index 3eaee2d313f..4890afec216 100644 --- a/test/CodeGen/X86/avx512-gather-scatter-intrin.ll +++ b/test/CodeGen/X86/avx512-gather-scatter-intrin.ll @@ -285,9 +285,9 @@ define void @prefetch(<8 x i64> %ind, i8* %base) { ; CHECK-NEXT: vscatterpf1qps (%rdi,%zmm0,2) {%k1} ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq - call void @llvm.x86.avx512.gatherpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 4, i32 1) + call void @llvm.x86.avx512.gatherpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 4, i32 3) call void @llvm.x86.avx512.gatherpf.qps.512(i8 0, <8 x i64> %ind, i8* %base, i32 4, i32 2) - call void @llvm.x86.avx512.scatterpf.qps.512(i8 1, <8 x i64> %ind, i8* %base, i32 2, i32 1) + call void @llvm.x86.avx512.scatterpf.qps.512(i8 1, <8 x i64> %ind, i8* %base, i32 2, i32 3) call void @llvm.x86.avx512.scatterpf.qps.512(i8 120, <8 x i64> %ind, i8* %base, i32 2, i32 2) ret void } -- 2.50.1