[X86][AVX-512] Don't raise inexact in ceil, floor, round, trunc.

author Ahmed Bougacha <ahmed.bougacha@gmail.com>

Mon, 26 Jun 2017 16:00:24 +0000 (16:00 +0000)

committer Ahmed Bougacha <ahmed.bougacha@gmail.com>

Mon, 26 Jun 2017 16:00:24 +0000 (16:00 +0000)
author Ahmed Bougacha <ahmed.bougacha@gmail.com>
Mon, 26 Jun 2017 16:00:24 +0000 (16:00 +0000)
committer Ahmed Bougacha <ahmed.bougacha@gmail.com>
Mon, 26 Jun 2017 16:00:24 +0000 (16:00 +0000)
diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td

index d46262573f71a32e73efb641fd5d3e993b53e9ae..01a70323224c3251993b1e43925e52bee0e67a27 100644 (file)
--- a/lib/Target/X86/X86InstrAVX512.td
+++ b/lib/Target/X86/X86InstrAVX512.td
@@ -7265,13 +7265,13 @@ avx512_rndscale_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
    let Predicates = [HasAVX512] in {
    def : Pat<(ffloor _.FRC:$src), (COPY_TO_REGCLASS
               (_.VT (!cast<Instruction>(NAME##r) (_.VT (IMPLICIT_DEF)),
-             (_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0x1))), _.FRC)>;
+             (_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0x9))), _.FRC)>;
    def : Pat<(fceil _.FRC:$src), (COPY_TO_REGCLASS
               (_.VT (!cast<Instruction>(NAME##r) (_.VT (IMPLICIT_DEF)),
-             (_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0x2))), _.FRC)>;
+             (_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0xa))), _.FRC)>;
    def : Pat<(ftrunc _.FRC:$src), (COPY_TO_REGCLASS
               (_.VT (!cast<Instruction>(NAME##r) (_.VT (IMPLICIT_DEF)),
-             (_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0x3))), _.FRC)>;
+             (_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0xb))), _.FRC)>;
    def : Pat<(frint _.FRC:$src), (COPY_TO_REGCLASS
               (_.VT (!cast<Instruction>(NAME##r) (_.VT (IMPLICIT_DEF)),
               (_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0x4))), _.FRC)>;
@@ -7281,13 +7281,13 @@ avx512_rndscale_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
  
    def : Pat<(ffloor (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS
               (_.VT (!cast<Instruction>(NAME##m) (_.VT (IMPLICIT_DEF)),
-             addr:$src, (i32 0x1))), _.FRC)>;
+             addr:$src, (i32 0x9))), _.FRC)>;
    def : Pat<(fceil (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS
               (_.VT (!cast<Instruction>(NAME##m) (_.VT (IMPLICIT_DEF)),
-             addr:$src, (i32 0x2))), _.FRC)>;
+             addr:$src, (i32 0xa))), _.FRC)>;
    def : Pat<(ftrunc (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS
               (_.VT (!cast<Instruction>(NAME##m) (_.VT (IMPLICIT_DEF)),
-             addr:$src, (i32 0x3))), _.FRC)>;
+             addr:$src, (i32 0xb))), _.FRC)>;
    def : Pat<(frint (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS
               (_.VT (!cast<Instruction>(NAME##m) (_.VT (IMPLICIT_DEF)),
               addr:$src, (i32 0x4))), _.FRC)>;
@@ -8471,26 +8471,26 @@ multiclass avx512_shuff_packed_128<string OpcodeStr, AVX512VLVectorVTInfo _,
  }
  let Predicates = [HasAVX512] in {
  def : Pat<(v16f32 (ffloor VR512:$src)),
-          (VRNDSCALEPSZrri VR512:$src, (i32 0x1))>;
+          (VRNDSCALEPSZrri VR512:$src, (i32 0x9))>;
  def : Pat<(v16f32 (fnearbyint VR512:$src)),
            (VRNDSCALEPSZrri VR512:$src, (i32 0xC))>;
  def : Pat<(v16f32 (fceil VR512:$src)),
-          (VRNDSCALEPSZrri VR512:$src, (i32 0x2))>;
+          (VRNDSCALEPSZrri VR512:$src, (i32 0xA))>;
  def : Pat<(v16f32 (frint VR512:$src)),
            (VRNDSCALEPSZrri VR512:$src, (i32 0x4))>;
  def : Pat<(v16f32 (ftrunc VR512:$src)),
-          (VRNDSCALEPSZrri VR512:$src, (i32 0x3))>;
+          (VRNDSCALEPSZrri VR512:$src, (i32 0xB))>;
  
  def : Pat<(v8f64 (ffloor VR512:$src)),
-          (VRNDSCALEPDZrri VR512:$src, (i32 0x1))>;
+          (VRNDSCALEPDZrri VR512:$src, (i32 0x9))>;
  def : Pat<(v8f64 (fnearbyint VR512:$src)),
            (VRNDSCALEPDZrri VR512:$src, (i32 0xC))>;
  def : Pat<(v8f64 (fceil VR512:$src)),
-          (VRNDSCALEPDZrri VR512:$src, (i32 0x2))>;
+          (VRNDSCALEPDZrri VR512:$src, (i32 0xA))>;
  def : Pat<(v8f64 (frint VR512:$src)),
            (VRNDSCALEPDZrri VR512:$src, (i32 0x4))>;
  def : Pat<(v8f64 (ftrunc VR512:$src)),
-          (VRNDSCALEPDZrri VR512:$src, (i32 0x3))>;
+          (VRNDSCALEPDZrri VR512:$src, (i32 0xB))>;
  }
  
  defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4",avx512vl_f32_info, 0x23>,
diff --git a/test/CodeGen/X86/avx512-round.ll b/test/CodeGen/X86/avx512-round.ll

index c4f417e75ab0975f0d0ef43ce1468876488873f7..b23af2b09a789a984a5227ebc9d4732f50716fdd 100644 (file)
--- a/test/CodeGen/X86/avx512-round.ll
+++ b/test/CodeGen/X86/avx512-round.ll
@@ -2,7 +2,7 @@
  
  define <16 x float> @floor_v16f32(<16 x float> %a) {
  ; CHECK-LABEL: floor_v16f32
-; CHECK: vrndscaleps $1, {{.*}}encoding: [0x62,0xf3,0x7d,0x48,0x08,0xc0,0x01]
+; CHECK: vrndscaleps $9, {{.*}}encoding: [0x62,0xf3,0x7d,0x48,0x08,0xc0,0x09]
    %res = call <16 x float> @llvm.floor.v16f32(<16 x float> %a)
    ret <16 x float> %res
  }
@@ -10,7 +10,7 @@ declare <16 x float> @llvm.floor.v16f32(<16 x float> %p)
  
  define <8 x double> @floor_v8f64(<8 x double> %a) {
  ; CHECK-LABEL: floor_v8f64
-; CHECK: vrndscalepd $1, {{.*}}encoding: [0x62,0xf3,0xfd,0x48,0x09,0xc0,0x01]
+; CHECK: vrndscalepd $9, {{.*}}encoding: [0x62,0xf3,0xfd,0x48,0x09,0xc0,0x09]
    %res = call <8 x double> @llvm.floor.v8f64(<8 x double> %a)
    ret <8 x double> %res
  }
@@ -18,7 +18,7 @@ declare <8 x double> @llvm.floor.v8f64(<8 x double> %p)
  
  define <16 x float> @ceil_v16f32(<16 x float> %a) {
  ; CHECK-LABEL: ceil_v16f32
-; CHECK: vrndscaleps $2, {{.*}}encoding: [0x62,0xf3,0x7d,0x48,0x08,0xc0,0x02]
+; CHECK: vrndscaleps $10, {{.*}}encoding: [0x62,0xf3,0x7d,0x48,0x08,0xc0,0x0a]
    %res = call <16 x float> @llvm.ceil.v16f32(<16 x float> %a)
    ret <16 x float> %res
  }
@@ -26,7 +26,7 @@ declare <16 x float> @llvm.ceil.v16f32(<16 x float> %p)
  
  define <8 x double> @ceil_v8f64(<8 x double> %a) {
  ; CHECK-LABEL: ceil_v8f64
-; CHECK: vrndscalepd $2, {{.*}}encoding: [0x62,0xf3,0xfd,0x48,0x09,0xc0,0x02]
+; CHECK: vrndscalepd $10, {{.*}}encoding: [0x62,0xf3,0xfd,0x48,0x09,0xc0,0x0a]
    %res = call <8 x double> @llvm.ceil.v8f64(<8 x double> %a)
    ret <8 x double> %res
  }
@@ -34,7 +34,7 @@ declare <8 x double> @llvm.ceil.v8f64(<8 x double> %p)
  
  define <16 x float> @trunc_v16f32(<16 x float> %a) {
  ; CHECK-LABEL: trunc_v16f32
-; CHECK: vrndscaleps $3, {{.*}}encoding: [0x62,0xf3,0x7d,0x48,0x08,0xc0,0x03]
+; CHECK: vrndscaleps $11, {{.*}}encoding: [0x62,0xf3,0x7d,0x48,0x08,0xc0,0x0b]
    %res = call <16 x float> @llvm.trunc.v16f32(<16 x float> %a)
    ret <16 x float> %res
  }
@@ -42,7 +42,7 @@ declare <16 x float> @llvm.trunc.v16f32(<16 x float> %p)
  
  define <8 x double> @trunc_v8f64(<8 x double> %a) {
  ; CHECK-LABEL: trunc_v8f64
-; CHECK: vrndscalepd $3, {{.*}}encoding: [0x62,0xf3,0xfd,0x48,0x09,0xc0,0x03]
+; CHECK: vrndscalepd $11, {{.*}}encoding: [0x62,0xf3,0xfd,0x48,0x09,0xc0,0x0b]
    %res = call <8 x double> @llvm.trunc.v8f64(<8 x double> %a)
    ret <8 x double> %res
  }
@@ -90,7 +90,7 @@ declare double @llvm.nearbyint.f64(double %p)
  
  define float @floor_f32(float %a) {
  ; CHECK-LABEL: floor_f32
-; CHECK: vrndscaless $1, {{.*}}encoding: [0x62,0xf3,0x7d,0x08,0x0a,0xc0,0x01]
+; CHECK: vrndscaless $9, {{.*}}encoding: [0x62,0xf3,0x7d,0x08,0x0a,0xc0,0x09]
    %res = call float @llvm.floor.f32(float %a)
    ret float %res
  }
@@ -98,7 +98,7 @@ declare float @llvm.floor.f32(float %p)
  
  define float @floor_f32m(float* %aptr) {
  ; CHECK-LABEL: floor_f32m
-; CHECK: vrndscaless $1, (%rdi), {{.*}}encoding: [0x62,0xf3,0x7d,0x08,0x0a,0x07,0x01]
+; CHECK: vrndscaless $9, (%rdi), {{.*}}encoding: [0x62,0xf3,0x7d,0x08,0x0a,0x07,0x09]
    %a = load float, float* %aptr, align 4
    %res = call float @llvm.floor.f32(float %a)
    ret float %res
author	Ahmed Bougacha <ahmed.bougacha@gmail.com>
	Mon, 26 Jun 2017 16:00:24 +0000 (16:00 +0000)
committer	Ahmed Bougacha <ahmed.bougacha@gmail.com>
	Mon, 26 Jun 2017 16:00:24 +0000 (16:00 +0000)
lib/Target/X86/X86InstrAVX512.td		patch \| blob \| history
test/CodeGen/X86/avx512-round.ll		patch \| blob \| history