[AArch64][GlobalISel] Select G_INTRINSIC_ROUND

author Jessica Paquette <jpaquette@apple.com>

Tue, 23 Apr 2019 23:03:03 +0000 (23:03 +0000)

committer Jessica Paquette <jpaquette@apple.com>

Tue, 23 Apr 2019 23:03:03 +0000 (23:03 +0000)
author Jessica Paquette <jpaquette@apple.com>
Tue, 23 Apr 2019 23:03:03 +0000 (23:03 +0000)
committer Jessica Paquette <jpaquette@apple.com>
Tue, 23 Apr 2019 23:03:03 +0000 (23:03 +0000)
diff --git a/lib/Target/AArch64/AArch64InstructionSelector.cpp b/lib/Target/AArch64/AArch64InstructionSelector.cpp

index 40fec6c2669451c78706261d7d8bdb5828bc107e..51501c1a9c78b5a747269d8330ed99fb5c143dd3 100644 (file)
--- a/lib/Target/AArch64/AArch64InstructionSelector.cpp
+++ b/lib/Target/AArch64/AArch64InstructionSelector.cpp
@@ -103,6 +103,7 @@ private:
                                        MachineRegisterInfo &MRI) const;
    bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI) const;
    bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const;
+  bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const;
    unsigned emitConstantPoolEntry(Constant *CPVal, MachineFunction &MF) const;
    MachineInstr *emitLoadFromConstantPool(Constant *CPVal,
                                           MachineIRBuilder &MIRBuilder) const;
@@ -1849,6 +1850,8 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
    }
    case TargetOpcode::G_INTRINSIC_TRUNC:
      return selectIntrinsicTrunc(I, MRI);
+  case TargetOpcode::G_INTRINSIC_ROUND:
+    return selectIntrinsicRound(I, MRI);
    case TargetOpcode::G_BUILD_VECTOR:
      return selectBuildVector(I, MRI);
    case TargetOpcode::G_MERGE_VALUES:
@@ -1923,6 +1926,61 @@ bool AArch64InstructionSelector::selectIntrinsicTrunc(
    return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
  }
  
+bool AArch64InstructionSelector::selectIntrinsicRound(
+    MachineInstr &I, MachineRegisterInfo &MRI) const {
+  const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
+
+  // Select the correct opcode.
+  unsigned Opc = 0;
+  if (!SrcTy.isVector()) {
+    switch (SrcTy.getSizeInBits()) {
+    default:
+    case 16:
+      Opc = AArch64::FRINTAHr;
+      break;
+    case 32:
+      Opc = AArch64::FRINTASr;
+      break;
+    case 64:
+      Opc = AArch64::FRINTADr;
+      break;
+    }
+  } else {
+    unsigned NumElts = SrcTy.getNumElements();
+    switch (SrcTy.getElementType().getSizeInBits()) {
+    default:
+      break;
+    case 16:
+      if (NumElts == 4)
+        Opc = AArch64::FRINTAv4f16;
+      else if (NumElts == 8)
+        Opc = AArch64::FRINTAv8f16;
+      break;
+    case 32:
+      if (NumElts == 2)
+        Opc = AArch64::FRINTAv2f32;
+      else if (NumElts == 4)
+        Opc = AArch64::FRINTAv4f32;
+      break;
+    case 64:
+      if (NumElts == 2)
+        Opc = AArch64::FRINTAv2f64;
+      break;
+    }
+  }
+
+  if (!Opc) {
+    // Didn't get an opcode above, bail.
+    LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n");
+    return false;
+  }
+
+  // Legalization would have set us up perfectly for this; we just need to
+  // set the opcode and move on.
+  I.setDesc(TII.get(Opc));
+  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+}
+
  bool AArch64InstructionSelector::selectVectorICmp(
      MachineInstr &I, MachineRegisterInfo &MRI) const {
    unsigned DstReg = I.getOperand(0).getReg();
diff --git a/test/CodeGen/AArch64/GlobalISel/select-intrinsic-round.mir b/test/CodeGen/AArch64/GlobalISel/select-intrinsic-round.mir

new file mode 100644 (file)

index 0000000..4efec65
--- /dev/null
+++ b/test/CodeGen/AArch64/GlobalISel/select-intrinsic-round.mir
@@ -0,0 +1,198 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc %s -verify-machineinstrs -mtriple=aarch64-unknown-unknown -run-pass=instruction-select -mattr=+fullfp16 -o - | FileCheck %s
+
+...
+---
+name:            test_f64.intrinsic_round
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $d0
+
+    ; CHECK-LABEL: name: test_f64.intrinsic_round
+    ; CHECK: liveins: $d0
+    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK: [[FRINTADr:%[0-9]+]]:fpr64 = FRINTADr [[COPY]]
+    ; CHECK: $d0 = COPY [[FRINTADr]]
+    ; CHECK: RET_ReallyLR implicit $d0
+    %0:fpr(s64) = COPY $d0
+    %1:fpr(s64) = G_INTRINSIC_ROUND %0
+    $d0 = COPY %1(s64)
+    RET_ReallyLR implicit $d0
+
+...
+---
+name:            test_f32.intrinsic_round
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $s0
+
+    ; CHECK-LABEL: name: test_f32.intrinsic_round
+    ; CHECK: liveins: $s0
+    ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s0
+    ; CHECK: [[FRINTASr:%[0-9]+]]:fpr32 = FRINTASr [[COPY]]
+    ; CHECK: $s0 = COPY [[FRINTASr]]
+    ; CHECK: RET_ReallyLR implicit $s0
+    %0:fpr(s32) = COPY $s0
+    %1:fpr(s32) = G_INTRINSIC_ROUND %0
+    $s0 = COPY %1(s32)
+    RET_ReallyLR implicit $s0
+
+...
+---
+name:            test_f16.intrinsic_round
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+frameInfo:
+  maxCallFrameSize: 0
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $h0
+
+    ; CHECK-LABEL: name: test_f16.intrinsic_round
+    ; CHECK: liveins: $h0
+    ; CHECK: [[COPY:%[0-9]+]]:fpr16 = COPY $h0
+    ; CHECK: [[FRINTAHr:%[0-9]+]]:fpr16 = FRINTAHr [[COPY]]
+    ; CHECK: $h0 = COPY [[FRINTAHr]]
+    ; CHECK: RET_ReallyLR implicit $h0
+    %0:fpr(s16) = COPY $h0
+    %1:fpr(s16) = G_INTRINSIC_ROUND %0
+    $h0 = COPY %1(s16)
+    RET_ReallyLR implicit $h0
+
+...
+---
+name:            test_v4f16.intrinsic_round
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+frameInfo:
+  maxCallFrameSize: 0
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $d0
+
+    ; CHECK-LABEL: name: test_v4f16.intrinsic_round
+    ; CHECK: liveins: $d0
+    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK: [[FRINTAv4f16_:%[0-9]+]]:fpr64 = FRINTAv4f16 [[COPY]]
+    ; CHECK: $d0 = COPY [[FRINTAv4f16_]]
+    ; CHECK: RET_ReallyLR implicit $d0
+    %0:fpr(<4 x s16>) = COPY $d0
+    %1:fpr(<4 x s16>) = G_INTRINSIC_ROUND %0
+    $d0 = COPY %1(<4 x s16>)
+    RET_ReallyLR implicit $d0
+
+...
+---
+name:            test_v8f16.intrinsic_round
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+frameInfo:
+  maxCallFrameSize: 0
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $q0
+
+    ; CHECK-LABEL: name: test_v8f16.intrinsic_round
+    ; CHECK: liveins: $q0
+    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK: [[FRINTAv8f16_:%[0-9]+]]:fpr128 = FRINTAv8f16 [[COPY]]
+    ; CHECK: $q0 = COPY [[FRINTAv8f16_]]
+    ; CHECK: RET_ReallyLR implicit $q0
+    %0:fpr(<8 x s16>) = COPY $q0
+    %1:fpr(<8 x s16>) = G_INTRINSIC_ROUND %0
+    $q0 = COPY %1(<8 x s16>)
+    RET_ReallyLR implicit $q0
+
+...
+---
+name:            test_v2f32.intrinsic_round
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+frameInfo:
+  maxCallFrameSize: 0
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $d0
+
+    ; CHECK-LABEL: name: test_v2f32.intrinsic_round
+    ; CHECK: liveins: $d0
+    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK: [[FRINTAv2f32_:%[0-9]+]]:fpr64 = FRINTAv2f32 [[COPY]]
+    ; CHECK: $d0 = COPY [[FRINTAv2f32_]]
+    ; CHECK: RET_ReallyLR implicit $d0
+    %0:fpr(<2 x s32>) = COPY $d0
+    %1:fpr(<2 x s32>) = G_INTRINSIC_ROUND %0
+    $d0 = COPY %1(<2 x s32>)
+    RET_ReallyLR implicit $d0
+
+...
+---
+name:            test_v4f32.intrinsic_round
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+frameInfo:
+  maxCallFrameSize: 0
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $q0
+
+    ; CHECK-LABEL: name: test_v4f32.intrinsic_round
+    ; CHECK: liveins: $q0
+    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK: [[FRINTAv4f32_:%[0-9]+]]:fpr128 = FRINTAv4f32 [[COPY]]
+    ; CHECK: $q0 = COPY [[FRINTAv4f32_]]
+    ; CHECK: RET_ReallyLR implicit $q0
+    %0:fpr(<4 x s32>) = COPY $q0
+    %1:fpr(<4 x s32>) = G_INTRINSIC_ROUND %0
+    $q0 = COPY %1(<4 x s32>)
+    RET_ReallyLR implicit $q0
+
+...
+---
+name:            test_v2f64.intrinsic_round
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+frameInfo:
+  maxCallFrameSize: 0
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $q0
+
+    ; CHECK-LABEL: name: test_v2f64.intrinsic_round
+    ; CHECK: liveins: $q0
+    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK: [[FRINTAv2f64_:%[0-9]+]]:fpr128 = FRINTAv2f64 [[COPY]]
+    ; CHECK: $q0 = COPY [[FRINTAv2f64_]]
+    ; CHECK: RET_ReallyLR implicit $q0
+    %0:fpr(<2 x s64>) = COPY $q0
+    %1:fpr(<2 x s64>) = G_INTRINSIC_ROUND %0
+    $q0 = COPY %1(<2 x s64>)
+    RET_ReallyLR implicit $q0
diff --git a/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll b/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll

index 16344184b27baf45c7ee7b731f49f9de6ada8209..1718ed4d9a963c747ba84275eb940e4171d6a778 100644 (file)
--- a/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll
+++ b/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll
@@ -230,6 +230,11 @@ define %v4f16 @test_v4f16.round(%v4f16 %a) {
    ; CHECK-FP16-NOT:       fcvt
    ; CHECK-FP16:           frinta.4h
    ; CHECK-FP16-NEXT:      ret
+  ; GISEL-LABEL:          test_v4f16.round:
+  ; GISEL-NOFP16-COUNT-4: frinta s{{[0-9]+}}, s{{[0-9]+}}
+  ; GISEL-FP16-NOT:       fcvt
+  ; GISEL-FP16:           frinta.4h
+  ; GISEL-FP16-NEXT:      ret
    %1 =  call %v4f16 @llvm.round.v4f16(%v4f16 %a)
    ret %v4f16 %1
  }
@@ -475,6 +480,11 @@ define %v8f16 @test_v8f16.round(%v8f16 %a) {
    ; CHECK-FP16-NOT:       fcvt
    ; CHECK-FP16:           frinta.8h
    ; CHECK-FP16-NEXT:      ret
+  ; GISEL-LABEL:          test_v8f16.round:
+  ; GISEL-NOFP16-COUNT-8: frinta s{{[0-9]+}}, s{{[0-9]+}}
+  ; GISEL-FP16-NOT:       fcvt
+  ; GISEL-FP16:           frinta.8h
+  ; GISEL-FP16-NEXT:      ret
    %1 =  call %v8f16 @llvm.round.v8f16(%v8f16 %a)
    ret %v8f16 %1
  }
diff --git a/test/CodeGen/AArch64/f16-instructions.ll b/test/CodeGen/AArch64/f16-instructions.ll

index b677feecf672e9456ca594bfe5bb5ce03607474a..1ed4bdec9fb9a19ca3bf544702e4b8d8ae7b1f34 100644 (file)
--- a/test/CodeGen/AArch64/f16-instructions.ll
+++ b/test/CodeGen/AArch64/f16-instructions.ll
@@ -1273,10 +1273,21 @@ define half @test_nearbyint(half %a) #0 {
  ; CHECK-CVT-NEXT: fcvt h0, [[INT32]]
  ; CHECK-CVT-NEXT: ret
  
+; GISEL-CVT-LABEL: test_round:
+; GISEL-CVT-NEXT: fcvt [[FLOAT32:s[0-9]+]], h0
+; GISEL-CVT-NEXT: frinta [[INT32:s[0-9]+]], [[FLOAT32]]
+; GISEL-CVT-NEXT: fcvt h0, [[INT32]]
+; GISEL-CVT-NEXT: ret
+
+
  ; CHECK-FP16-LABEL: test_round:
  ; CHECK-FP16-NEXT: frinta h0, h0
  ; CHECK-FP16-NEXT: ret
  
+; GISEL-FP16-LABEL: test_round:
+; GISEL-FP16-NEXT: frinta h0, h0
+; GISEL-FP16-NEXT: ret
+
  define half @test_round(half %a) #0 {
    %r = call half @llvm.round.f16(half %a)
    ret half %r
author	Jessica Paquette <jpaquette@apple.com>
	Tue, 23 Apr 2019 23:03:03 +0000 (23:03 +0000)
committer	Jessica Paquette <jpaquette@apple.com>
	Tue, 23 Apr 2019 23:03:03 +0000 (23:03 +0000)
lib/Target/AArch64/AArch64InstructionSelector.cpp		patch \| blob \| history
test/CodeGen/AArch64/GlobalISel/select-intrinsic-round.mir	[new file with mode: 0644]	patch \| blob
test/CodeGen/AArch64/arm64-vfloatintrinsics.ll		patch \| blob \| history
test/CodeGen/AArch64/f16-instructions.ll		patch \| blob \| history