[AArch64][GlobalISel] Select arithmetic extended register patterns

author Jessica Paquette <jpaquette@apple.com>

Thu, 29 Aug 2019 21:53:58 +0000 (21:53 +0000)

committer Jessica Paquette <jpaquette@apple.com>

Thu, 29 Aug 2019 21:53:58 +0000 (21:53 +0000)
author Jessica Paquette <jpaquette@apple.com>
Thu, 29 Aug 2019 21:53:58 +0000 (21:53 +0000)
committer Jessica Paquette <jpaquette@apple.com>
Thu, 29 Aug 2019 21:53:58 +0000 (21:53 +0000)
diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td

index fc3b0efb43d15fd57939446ef1232734ebd15de6..19b10fc0806f447cd43c6d44517cd31a1cc5652c 100644 (file)
--- a/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/lib/Target/AArch64/AArch64InstrFormats.td
@@ -944,6 +944,21 @@ class arith_extended_reg32to64<ValueType Ty> : Operand<Ty>,
    let MIOperandInfo = (ops GPR32, arith_extend64);
  }
  
+def arith_extended_reg32_i32 : arith_extended_reg32<i32>;
+def gi_arith_extended_reg32_i32 :
+    GIComplexOperandMatcher<s32, "selectArithExtendedRegister">,
+    GIComplexPatternEquiv<arith_extended_reg32_i32>;
+
+def arith_extended_reg32_i64 : arith_extended_reg32<i64>;
+def gi_arith_extended_reg32_i64 :
+    GIComplexOperandMatcher<s64, "selectArithExtendedRegister">,
+    GIComplexPatternEquiv<arith_extended_reg32_i64>;
+
+def arith_extended_reg32to64_i64 : arith_extended_reg32to64<i64>;
+def gi_arith_extended_reg32to64_i64 :
+    GIComplexOperandMatcher<s64, "selectArithExtendedRegister">,
+    GIComplexPatternEquiv<arith_extended_reg32to64_i64>;
+
  // Floating-point immediate.
  def fpimm16 : Operand<f16>,
                FPImmLeaf<f16, [{
@@ -2215,11 +2230,11 @@ multiclass AddSub<bit isSub, string mnemonic, string alias,
    // Add/Subtract extended register
    let AddedComplexity = 1, hasSideEffects = 0 in {
    def Wrx : BaseAddSubEReg<isSub, 0, GPR32sp, GPR32sp,
-                           arith_extended_reg32<i32>, mnemonic, OpNode> {
+                           arith_extended_reg32_i32, mnemonic, OpNode> {
      let Inst{31} = 0;
    }
    def Xrx : BaseAddSubEReg<isSub, 0, GPR64sp, GPR64sp,
-                           arith_extended_reg32to64<i64>, mnemonic, OpNode> {
+                           arith_extended_reg32to64_i64, mnemonic, OpNode> {
      let Inst{31} = 1;
    }
    }
@@ -2289,11 +2304,11 @@ multiclass AddSubS<bit isSub, string mnemonic, SDNode OpNode, string cmp,
    // Add/Subtract extended register
    let AddedComplexity = 1 in {
    def Wrx : BaseAddSubEReg<isSub, 1, GPR32, GPR32sp,
-                           arith_extended_reg32<i32>, mnemonic, OpNode> {
+                           arith_extended_reg32_i32, mnemonic, OpNode> {
      let Inst{31} = 0;
    }
    def Xrx : BaseAddSubEReg<isSub, 1, GPR64, GPR64sp,
-                           arith_extended_reg32<i64>, mnemonic, OpNode> {
+                           arith_extended_reg32_i64, mnemonic, OpNode> {
      let Inst{31} = 1;
    }
    }
diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td

index dddfaab7f71864c146bd63180bf6f6cd10109a5e..5128fc6bd1d2b66b842658d83219db066279fd0f 100644 (file)
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -1035,10 +1035,10 @@ def : Pat<(sub GPR32:$Rn, arith_shifted_reg32:$Rm),
  def : Pat<(sub GPR64:$Rn, arith_shifted_reg64:$Rm),
            (SUBSXrs GPR64:$Rn, arith_shifted_reg64:$Rm)>;
  let AddedComplexity = 1 in {
-def : Pat<(sub GPR32sp:$R2, arith_extended_reg32<i32>:$R3),
-          (SUBSWrx GPR32sp:$R2, arith_extended_reg32<i32>:$R3)>;
-def : Pat<(sub GPR64sp:$R2, arith_extended_reg32to64<i64>:$R3),
-          (SUBSXrx GPR64sp:$R2, arith_extended_reg32to64<i64>:$R3)>;
+def : Pat<(sub GPR32sp:$R2, arith_extended_reg32_i32:$R3),
+          (SUBSWrx GPR32sp:$R2, arith_extended_reg32_i32:$R3)>;
+def : Pat<(sub GPR64sp:$R2, arith_extended_reg32to64_i64:$R3),
+          (SUBSXrx GPR64sp:$R2, arith_extended_reg32to64_i64:$R3)>;
  }
  
  // Because of the immediate format for add/sub-imm instructions, the
diff --git a/lib/Target/AArch64/AArch64InstructionSelector.cpp b/lib/Target/AArch64/AArch64InstructionSelector.cpp

index 1865e577b21cb7227f376ae3ba5d9075c6e4375c..8503a0dbb062a94815d9fe033666ce0c68230429 100644 (file)
--- a/lib/Target/AArch64/AArch64InstructionSelector.cpp
+++ b/lib/Target/AArch64/AArch64InstructionSelector.cpp
@@ -227,6 +227,14 @@ private:
      return selectShiftedRegister(Root);
    }
  
+  /// Instructions that accept extend modifiers like UXTW expect the register
+  /// being extended to be a GPR32. Narrow ExtReg to a 32-bit register using a
+  /// subregister copy if necessary. Return either ExtReg, or the result of the
+  /// new copy.
+  Register narrowExtendRegIfNeeded(Register ExtReg,
+                                             MachineIRBuilder &MIB) const;
+  ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
+
    void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI) const;
    void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I) const;
    void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I) const;
@@ -246,6 +254,11 @@ private:
    /// Return true if \p MI is a load or store of \p NumBytes bytes.
    bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
  
+  /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit
+  /// register zeroed out. In other words, the result of MI has been explicitly
+  /// zero extended.
+  bool isDef32(const MachineInstr &MI) const;
+
    const AArch64TargetMachine &TM;
    const AArch64Subtarget &STI;
    const AArch64InstrInfo &TII;
@@ -363,7 +376,7 @@ static bool getSubRegForClass(const TargetRegisterClass *RC,
      SubReg = AArch64::hsub;
      break;
    case 32:
-    if (RC == &AArch64::GPR32RegClass)
+    if (RC != &AArch64::FPR32RegClass)
        SubReg = AArch64::sub_32;
      else
        SubReg = AArch64::ssub;
@@ -676,35 +689,35 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
        return false;
      }
  
-    // Is this a cross-bank copy?
-    if (DstRegBank.getID() != SrcRegBank.getID()) {
-      // If we're doing a cross-bank copy on different-sized registers, we need
-      // to do a bit more work.
-      unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
-      unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
-
-      if (SrcSize > DstSize) {
-        // We're doing a cross-bank copy into a smaller register. We need a
-        // subregister copy. First, get a register class that's on the same bank
-        // as the destination, but the same size as the source.
-        const TargetRegisterClass *SubregRC =
-            getMinClassForRegBank(DstRegBank, SrcSize, true);
-        assert(SubregRC && "Didn't get a register class for subreg?");
-
-        // Get the appropriate subregister for the destination.
-        unsigned SubReg = 0;
-        if (!getSubRegForClass(DstRC, TRI, SubReg)) {
-          LLVM_DEBUG(dbgs() << "Couldn't determine subregister for copy.\n");
-          return false;
-        }
-
-        // Now, insert a subregister copy using the new register class.
-        selectSubregisterCopy(I, MRI, RBI, SrcReg, SubregRC, DstRC, SubReg);
-        return CheckCopy();
+    unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
+    unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
+
+    // If we're doing a cross-bank copy on different-sized registers, we need
+    // to do a bit more work.
+    if (SrcSize > DstSize) {
+      // We're doing a cross-bank copy into a smaller register. We need a
+      // subregister copy. First, get a register class that's on the same bank
+      // as the destination, but the same size as the source.
+      const TargetRegisterClass *SubregRC =
+          getMinClassForRegBank(DstRegBank, SrcSize, true);
+      assert(SubregRC && "Didn't get a register class for subreg?");
+
+      // Get the appropriate subregister for the destination.
+      unsigned SubReg = 0;
+      if (!getSubRegForClass(DstRC, TRI, SubReg)) {
+        LLVM_DEBUG(dbgs() << "Couldn't determine subregister for copy.\n");
+        return false;
        }
  
-      else if (DstRegBank.getID() == AArch64::GPRRegBankID && DstSize == 32 &&
-               SrcSize == 16) {
+      // Now, insert a subregister copy using the new register class.
+      selectSubregisterCopy(I, MRI, RBI, SrcReg, SubregRC, DstRC, SubReg);
+      return CheckCopy();
+    }
+
+    // Is this a cross-bank copy?
+    if (DstRegBank.getID() != SrcRegBank.getID()) {
+      if (DstRegBank.getID() == AArch64::GPRRegBankID && DstSize == 32 &&
+          SrcSize == 16) {
          // Special case for FPR16 to GPR32.
          // FIXME: This can probably be generalized like the above case.
          Register PromoteReg =
@@ -4472,6 +4485,146 @@ AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root) const {
             [=](MachineInstrBuilder &MIB) { MIB.addImm(ShiftVal); }}};
  }
  
+/// Get the correct ShiftExtendType for an extend instruction.
+static AArch64_AM::ShiftExtendType
+getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI) {
+  unsigned Opc = MI.getOpcode();
+
+  // Handle explicit extend instructions first.
+  if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) {
+    unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
+    assert(Size != 64 && "Extend from 64 bits?");
+    switch (Size) {
+    case 8:
+      return AArch64_AM::SXTB;
+    case 16:
+      return AArch64_AM::SXTH;
+    case 32:
+      return AArch64_AM::SXTW;
+    default:
+      return AArch64_AM::InvalidShiftExtend;
+    }
+  }
+
+  if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) {
+    unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
+    assert(Size != 64 && "Extend from 64 bits?");
+    switch (Size) {
+    case 8:
+      return AArch64_AM::UXTB;
+    case 16:
+      return AArch64_AM::UXTH;
+    case 32:
+      return AArch64_AM::UXTW;
+    default:
+      return AArch64_AM::InvalidShiftExtend;
+    }
+  }
+
+  // Don't have an explicit extend. Try to handle a G_AND with a constant mask
+  // on the RHS.
+  if (Opc != TargetOpcode::G_AND)
+    return AArch64_AM::InvalidShiftExtend;
+
+  Optional<uint64_t> MaybeAndMask = getImmedFromMO(MI.getOperand(2));
+  if (!MaybeAndMask)
+    return AArch64_AM::InvalidShiftExtend;
+  uint64_t AndMask = *MaybeAndMask;
+  switch (AndMask) {
+  default:
+    return AArch64_AM::InvalidShiftExtend;
+  case 0xFF:
+    return AArch64_AM::UXTB;
+  case 0xFFFF:
+    return AArch64_AM::UXTH;
+  case 0xFFFFFFFF:
+    return AArch64_AM::UXTW;
+  }
+}
+
+Register AArch64InstructionSelector::narrowExtendRegIfNeeded(
+    Register ExtReg, MachineIRBuilder &MIB) const {
+  MachineRegisterInfo &MRI = *MIB.getMRI();
+  if (MRI.getType(ExtReg).getSizeInBits() == 32)
+    return ExtReg;
+
+  // Insert a copy to move ExtReg to GPR32.
+  Register NarrowReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
+  auto Copy = MIB.buildCopy({NarrowReg}, {ExtReg});
+
+  // Select the copy into a subregister copy.
+  selectCopy(*Copy, TII, MRI, TRI, RBI);
+  return Copy.getReg(0);
+}
+
+/// Select an "extended register" operand. This operand folds in an extend
+/// followed by an optional left shift.
+InstructionSelector::ComplexRendererFns
+AArch64InstructionSelector::selectArithExtendedRegister(
+    MachineOperand &Root) const {
+  if (!Root.isReg())
+    return None;
+  MachineRegisterInfo &MRI =
+      Root.getParent()->getParent()->getParent()->getRegInfo();
+
+  uint64_t ShiftVal = 0;
+  Register ExtReg;
+  AArch64_AM::ShiftExtendType Ext;
+  MachineInstr *RootDef = getDefIgnoringCopies(Root.getReg(), MRI);
+  if (!RootDef)
+    return None;
+
+  if (!isWorthFoldingIntoExtendedReg(*RootDef, MRI))
+    return None;
+
+  // Check if we can fold a shift and an extend.
+  if (RootDef->getOpcode() == TargetOpcode::G_SHL) {
+    // Look for a constant on the RHS of the shift.
+    MachineOperand &RHS = RootDef->getOperand(2);
+    Optional<uint64_t> MaybeShiftVal = getImmedFromMO(RHS);
+    if (!MaybeShiftVal)
+      return None;
+    ShiftVal = *MaybeShiftVal;
+    if (ShiftVal > 4)
+      return None;
+    // Look for a valid extend instruction on the LHS of the shift.
+    MachineOperand &LHS = RootDef->getOperand(1);
+    MachineInstr *ExtDef = getDefIgnoringCopies(LHS.getReg(), MRI);
+    if (!ExtDef)
+      return None;
+    Ext = getExtendTypeForInst(*ExtDef, MRI);
+    if (Ext == AArch64_AM::InvalidShiftExtend)
+      return None;
+    ExtReg = ExtDef->getOperand(1).getReg();
+  } else {
+    // Didn't get a shift. Try just folding an extend.
+    Ext = getExtendTypeForInst(*RootDef, MRI);
+    if (Ext == AArch64_AM::InvalidShiftExtend)
+      return None;
+    ExtReg = RootDef->getOperand(1).getReg();
+
+    // If we have a 32 bit instruction which zeroes out the high half of a
+    // register, we get an implicit zero extend for free. Check if we have one.
+    // FIXME: We actually emit the extend right now even though we don't have
+    // to.
+    if (Ext == AArch64_AM::UXTW && MRI.getType(ExtReg).getSizeInBits() == 32) {
+      MachineInstr *ExtInst = MRI.getVRegDef(ExtReg);
+      if (ExtInst && isDef32(*ExtInst))
+        return None;
+    }
+  }
+
+  // We require a GPR32 here. Narrow the ExtReg if needed using a subregister
+  // copy.
+  MachineIRBuilder MIB(*RootDef);
+  ExtReg = narrowExtendRegIfNeeded(ExtReg, MIB);
+
+  return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
+           [=](MachineInstrBuilder &MIB) {
+             MIB.addImm(getArithExtendImm(Ext, ShiftVal));
+           }}};
+}
+
  void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
                                                  const MachineInstr &MI) const {
    const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
@@ -4506,6 +4659,26 @@ bool AArch64InstructionSelector::isLoadStoreOfNumBytes(
    return (*MI.memoperands_begin())->getSize() == NumBytes;
  }
  
+bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const {
+  const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
+  if (MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() != 32)
+    return false;
+
+  // Only return true if we know the operation will zero-out the high half of
+  // the 64-bit register. Truncates can be subregister copies, which don't
+  // zero out the high bits. Copies and other copy-like instructions can be
+  // fed by truncates, or could be lowered as subregister copies.
+  switch (MI.getOpcode()) {
+  default:
+    return true;
+  case TargetOpcode::COPY:
+  case TargetOpcode::G_BITCAST:
+  case TargetOpcode::G_TRUNC:
+  case TargetOpcode::G_PHI:
+    return false;
+  }
+}
+
  namespace llvm {
  InstructionSelector *
  createAArch64InstructionSelector(const AArch64TargetMachine &TM,
diff --git a/test/CodeGen/AArch64/GlobalISel/select-arith-extended-reg.mir b/test/CodeGen/AArch64/GlobalISel/select-arith-extended-reg.mir

new file mode 100644 (file)

index 0000000..3107750
--- /dev/null
+++ b/test/CodeGen/AArch64/GlobalISel/select-arith-extended-reg.mir
@@ -0,0 +1,634 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=aarch64-unknown-unknown -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
+
+---
+name:            add_sext_s32_to_s64
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $w1, $x2
+    ; CHECK-LABEL: name: add_sext_s32_to_s64
+    ; CHECK: liveins: $w1, $x2
+    ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w1
+    ; CHECK: %add_lhs:gpr64sp = COPY $x2
+    ; CHECK: %res:gpr64sp = ADDXrx %add_lhs, [[COPY]], 48
+    ; CHECK: $x3 = COPY %res
+    ; CHECK: RET_ReallyLR implicit $x3
+    %1:gpr(s32) = COPY $w1
+    %ext:gpr(s64) = G_SEXT %1(s32)
+    %add_lhs:gpr(s64) = COPY $x2
+    %res:gpr(s64) = G_ADD %add_lhs, %ext
+    $x3 = COPY %res(s64)
+    RET_ReallyLR implicit $x3
+...
+---
+name:            add_and_s32_to_s64
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $x1, $x2
+    ; CHECK-LABEL: name: add_and_s32_to_s64
+    ; CHECK: liveins: $x1, $x2
+    ; CHECK: [[COPY:%[0-9]+]]:gpr64all = COPY $x1
+    ; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[COPY]].sub_32
+    ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY [[COPY1]]
+    ; CHECK: %add_lhs:gpr64sp = COPY $x2
+    ; CHECK: %res:gpr64sp = ADDXrx %add_lhs, [[COPY2]], 16
+    ; CHECK: $x3 = COPY %res
+    ; CHECK: RET_ReallyLR implicit $x3
+    %1:gpr(s64) = COPY $x1
+    %mask:gpr(s64) = G_CONSTANT i64 4294967295 ; 0xffff
+    %ext:gpr(s64) = G_AND %1(s64), %mask
+    %add_lhs:gpr(s64) = COPY $x2
+    %res:gpr(s64) = G_ADD %add_lhs, %ext
+    $x3 = COPY %res(s64)
+    RET_ReallyLR implicit $x3
+...
+---
+name:            add_sext_s16_to_s32
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $w1, $w2, $x2
+    ; CHECK-LABEL: name: add_sext_s16_to_s32
+    ; CHECK: liveins: $w1, $w2, $x2
+    ; CHECK: %wide_1:gpr32 = COPY $w1
+    ; CHECK: %add_lhs:gpr32sp = COPY $w2
+    ; CHECK: %res:gpr32sp = ADDWrx %add_lhs, %wide_1, 40
+    ; CHECK: $w3 = COPY %res
+    ; CHECK: RET_ReallyLR implicit $w3
+    %wide_1:gpr(s32) = COPY $w1
+    %1:gpr(s16) = G_TRUNC %wide_1
+    %ext:gpr(s32) = G_SEXT %1(s16)
+    %add_lhs:gpr(s32) = COPY $w2
+    %res:gpr(s32) = G_ADD %add_lhs, %ext
+    $w3 = COPY %res(s32)
+    RET_ReallyLR implicit $w3
+...
+---
+name:            add_zext_s16_to_s32
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $w1, $w2, $x2
+    ; CHECK-LABEL: name: add_zext_s16_to_s32
+    ; CHECK: liveins: $w1, $w2, $x2
+    ; CHECK: %wide_1:gpr32 = COPY $w1
+    ; CHECK: %add_lhs:gpr32sp = COPY $w2
+    ; CHECK: %res:gpr32sp = ADDWrx %add_lhs, %wide_1, 8
+    ; CHECK: $w3 = COPY %res
+    ; CHECK: RET_ReallyLR implicit $w3
+    %wide_1:gpr(s32) = COPY $w1
+    %1:gpr(s16) = G_TRUNC %wide_1
+    %ext:gpr(s32) = G_ZEXT %1(s16)
+    %add_lhs:gpr(s32) = COPY $w2
+    %res:gpr(s32) = G_ADD %add_lhs, %ext
+    $w3 = COPY %res(s32)
+    RET_ReallyLR implicit $w3
+...
+---
+name:            add_anyext_s16_to_s32
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $w1, $w2, $x2
+    ; CHECK-LABEL: name: add_anyext_s16_to_s32
+    ; CHECK: liveins: $w1, $w2, $x2
+    ; CHECK: %wide_1:gpr32 = COPY $w1
+    ; CHECK: %add_lhs:gpr32sp = COPY $w2
+    ; CHECK: %res:gpr32sp = ADDWrx %add_lhs, %wide_1, 8
+    ; CHECK: $w3 = COPY %res
+    ; CHECK: RET_ReallyLR implicit $w3
+    %wide_1:gpr(s32) = COPY $w1
+    %1:gpr(s16) = G_TRUNC %wide_1
+    %ext:gpr(s32) = G_ANYEXT %1(s16)
+    %add_lhs:gpr(s32) = COPY $w2
+    %res:gpr(s32) = G_ADD %add_lhs, %ext
+    $w3 = COPY %res(s32)
+    RET_ReallyLR implicit $w3
+...
+---
+name:            add_and_s16_to_s32_uxtb
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $w1, $w2, $x2
+    ; CHECK-LABEL: name: add_and_s16_to_s32_uxtb
+    ; CHECK: liveins: $w1, $w2, $x2
+    ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w1
+    ; CHECK: %add_lhs:gpr32sp = COPY $w2
+    ; CHECK: %res:gpr32sp = ADDWrx %add_lhs, [[COPY]], 0
+    ; CHECK: $w3 = COPY %res
+    ; CHECK: RET_ReallyLR implicit $w3
+    %1:gpr(s32) = COPY $w1
+    %mask:gpr(s32) = G_CONSTANT i32 255 ; 0xff
+    %ext:gpr(s32) = G_AND %1(s32), %mask
+    %add_lhs:gpr(s32) = COPY $w2
+    %res:gpr(s32) = G_ADD %add_lhs, %ext
+    $w3 = COPY %res(s32)
+    RET_ReallyLR implicit $w3
+...
+---
+name:            add_and_s16_to_s32_uxth
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $w1, $w2, $x2
+    ; CHECK-LABEL: name: add_and_s16_to_s32_uxth
+    ; CHECK: liveins: $w1, $w2, $x2
+    ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w1
+    ; CHECK: %add_lhs:gpr32sp = COPY $w2
+    ; CHECK: %res:gpr32sp = ADDWrx %add_lhs, [[COPY]], 8
+    ; CHECK: $w3 = COPY %res
+    ; CHECK: RET_ReallyLR implicit $w3
+    %1:gpr(s32) = COPY $w1
+    %mask:gpr(s32) = G_CONSTANT i32 65535 ; 0xffff
+    %ext:gpr(s32) = G_AND %1(s32), %mask
+    %add_lhs:gpr(s32) = COPY $w2
+    %res:gpr(s32) = G_ADD %add_lhs, %ext
+    $w3 = COPY %res(s32)
+    RET_ReallyLR implicit $w3
+...
+---
+name:            add_sext_s8_to_s32
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $w1, $w2, $x2
+    ; CHECK-LABEL: name: add_sext_s8_to_s32
+    ; CHECK: liveins: $w1, $w2, $x2
+    ; CHECK: %wide_1:gpr32 = COPY $w1
+    ; CHECK: %add_lhs:gpr32sp = COPY $w2
+    ; CHECK: %res:gpr32sp = ADDWrx %add_lhs, %wide_1, 32
+    ; CHECK: $w3 = COPY %res
+    ; CHECK: RET_ReallyLR implicit $w3
+    %wide_1:gpr(s32) = COPY $w1
+    %1:gpr(s8) = G_TRUNC %wide_1
+    %ext:gpr(s32) = G_SEXT %1(s8)
+    %add_lhs:gpr(s32) = COPY $w2
+    %res:gpr(s32) = G_ADD %add_lhs, %ext
+    $w3 = COPY %res(s32)
+    RET_ReallyLR implicit $w3
+...
+---
+name:            add_zext_s8_to_s32
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $w1, $w2, $x2
+    ; CHECK-LABEL: name: add_zext_s8_to_s32
+    ; CHECK: liveins: $w1, $w2, $x2
+    ; CHECK: %wide_1:gpr32 = COPY $w1
+    ; CHECK: %add_lhs:gpr32sp = COPY $w2
+    ; CHECK: %res:gpr32sp = ADDWrx %add_lhs, %wide_1, 0
+    ; CHECK: $w3 = COPY %res
+    ; CHECK: RET_ReallyLR implicit $w3
+    %wide_1:gpr(s32) = COPY $w1
+    %1:gpr(s8) = G_TRUNC %wide_1
+    %ext:gpr(s32) = G_ZEXT %1(s8)
+    %add_lhs:gpr(s32) = COPY $w2
+    %res:gpr(s32) = G_ADD %add_lhs, %ext
+    $w3 = COPY %res(s32)
+    RET_ReallyLR implicit $w3
+...
+---
+name:            add_anyext_s8_to_s32
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $w1, $w2, $x2
+    ; CHECK-LABEL: name: add_anyext_s8_to_s32
+    ; CHECK: liveins: $w1, $w2, $x2
+    ; CHECK: %wide_1:gpr32 = COPY $w1
+    ; CHECK: %add_lhs:gpr32sp = COPY $w2
+    ; CHECK: %res:gpr32sp = ADDWrx %add_lhs, %wide_1, 0
+    ; CHECK: $w3 = COPY %res
+    ; CHECK: RET_ReallyLR implicit $w3
+    %wide_1:gpr(s32) = COPY $w1
+    %1:gpr(s8) = G_TRUNC %wide_1
+    %ext:gpr(s32) = G_ANYEXT %1(s8)
+    %add_lhs:gpr(s32) = COPY $w2
+    %res:gpr(s32) = G_ADD %add_lhs, %ext
+    $w3 = COPY %res(s32)
+    RET_ReallyLR implicit $w3
+...
+---
+name:            add_sext_with_shl
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $w1, $w2, $x2
+    ; CHECK-LABEL: name: add_sext_with_shl
+    ; CHECK: liveins: $w1, $w2, $x2
+    ; CHECK: %wide_1:gpr32 = COPY $w1
+    ; CHECK: %add_lhs:gpr32sp = COPY $w2
+    ; CHECK: %res:gpr32sp = ADDWrx %add_lhs, %wide_1, 43
+    ; CHECK: $w3 = COPY %res
+    ; CHECK: RET_ReallyLR implicit $w3
+    %wide_1:gpr(s32) = COPY $w1
+    %1:gpr(s16) = G_TRUNC %wide_1
+    %ext:gpr(s32) = G_SEXT %1(s16)
+    %imm:gpr(s32) = G_CONSTANT i32 3
+    %shl:gpr(s32) = G_SHL %ext, %imm
+    %add_lhs:gpr(s32) = COPY $w2
+    %res:gpr(s32) = G_ADD %add_lhs, %shl
+    $w3 = COPY %res(s32)
+    RET_ReallyLR implicit $w3
+...
+---
+name:            add_and_with_shl
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $w1, $w2, $x2
+    ; CHECK-LABEL: name: add_and_with_shl
+    ; CHECK: liveins: $w1, $w2, $x2
+    ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w1
+    ; CHECK: %add_lhs:gpr32sp = COPY $w2
+    ; CHECK: %res:gpr32sp = ADDWrx %add_lhs, [[COPY]], 3
+    ; CHECK: $w3 = COPY %res
+    ; CHECK: RET_ReallyLR implicit $w3
+    %1:gpr(s32) = COPY $w1
+    %mask:gpr(s32) = G_CONSTANT i32 255 ; 0xff
+    %ext:gpr(s32) = G_AND %1(s32), %mask
+    %imm:gpr(s32) = G_CONSTANT i32 3
+    %shl:gpr(s32) = G_SHL %ext, %imm
+    %add_lhs:gpr(s32) = COPY $w2
+    %res:gpr(s32) = G_ADD %add_lhs, %shl
+    $w3 = COPY %res(s32)
+    RET_ReallyLR implicit $w3
+...
+---
+name:            dont_fold_invalid_mask
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    ; Check that we only fold when we have a supported AND mask.
+    liveins: $w1, $w2, $x2
+    ; CHECK-LABEL: name: dont_fold_invalid_mask
+    ; CHECK: liveins: $w1, $w2, $x2
+    ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w1
+    ; CHECK: %mask:gpr32 = MOVi32imm 42
+    ; CHECK: %ext:gpr32 = ANDWrr [[COPY]], %mask
+    ; CHECK: %add_lhs:gpr32 = COPY $w2
+    ; CHECK: %res:gpr32 = ADDWrr %add_lhs, %ext
+    ; CHECK: $w3 = COPY %res
+    ; CHECK: RET_ReallyLR implicit $w3
+    %1:gpr(s32) = COPY $w1
+    %mask:gpr(s32) = G_CONSTANT i32 42
+    %ext:gpr(s32) = G_AND %1(s32), %mask
+    %add_lhs:gpr(s32) = COPY $w2
+    %res:gpr(s32) = G_ADD %add_lhs, %ext
+    $w3 = COPY %res(s32)
+    RET_ReallyLR implicit $w3
+...
+---
+name:            dont_fold_invalid_shl
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $w1, $w2, $x2
+    ; CHECK-LABEL: name: dont_fold_invalid_shl
+    ; CHECK: liveins: $w1, $w2, $x2
+    ; CHECK: %wide_1:gpr32 = COPY $w1
+    ; CHECK: %ext:gpr32 = SBFMWri %wide_1, 0, 15
+    ; CHECK: %add_lhs:gpr32 = COPY $w2
+    ; CHECK: %res:gpr32 = ADDWrs %add_lhs, %ext, 5
+    ; CHECK: $w3 = COPY %res
+    ; CHECK: RET_ReallyLR implicit $w3
+    %wide_1:gpr(s32) = COPY $w1
+    %1:gpr(s16) = G_TRUNC %wide_1
+    %ext:gpr(s32) = G_SEXT %1(s16)
+    %imm:gpr(s32) = G_CONSTANT i32 5
+    %shl:gpr(s32) = G_SHL %ext, %imm
+    %add_lhs:gpr(s32) = COPY $w2
+    %res:gpr(s32) = G_ADD %add_lhs, %shl
+    $w3 = COPY %res(s32)
+    RET_ReallyLR implicit $w3
+...
+---
+name:            sub_sext_s32_to_s64
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $w1, $x2
+    ; CHECK-LABEL: name: sub_sext_s32_to_s64
+    ; CHECK: liveins: $w1, $x2
+    ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w1
+    ; CHECK: %sub_lhs:gpr64sp = COPY $x2
+    ; CHECK: %res:gpr64 = SUBSXrx %sub_lhs, [[COPY]], 48, implicit-def $nzcv
+    ; CHECK: $x3 = COPY %res
+    ; CHECK: RET_ReallyLR implicit $x3
+    %1:gpr(s32) = COPY $w1
+    %ext:gpr(s64) = G_SEXT %1(s32)
+    %sub_lhs:gpr(s64) = COPY $x2
+    %res:gpr(s64) = G_SUB %sub_lhs, %ext
+    $x3 = COPY %res(s64)
+    RET_ReallyLR implicit $x3
+...
+---
+name:            sub_sext_s16_to_s32
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $w1, $w2, $x2
+    ; CHECK-LABEL: name: sub_sext_s16_to_s32
+    ; CHECK: liveins: $w1, $w2, $x2
+    ; CHECK: %wide_1:gpr32 = COPY $w1
+    ; CHECK: %sub_lhs:gpr32sp = COPY $w2
+    ; CHECK: %res:gpr32 = SUBSWrx %sub_lhs, %wide_1, 40, implicit-def $nzcv
+    ; CHECK: $w3 = COPY %res
+    ; CHECK: RET_ReallyLR implicit $w3
+    %wide_1:gpr(s32) = COPY $w1
+    %1:gpr(s16) = G_TRUNC %wide_1
+    %ext:gpr(s32) = G_SEXT %1(s16)
+    %sub_lhs:gpr(s32) = COPY $w2
+    %res:gpr(s32) = G_SUB %sub_lhs, %ext
+    $w3 = COPY %res(s32)
+    RET_ReallyLR implicit $w3
+...
+---
+name:            sub_zext_s16_to_s32
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $w1, $w2, $x2
+    ; CHECK-LABEL: name: sub_zext_s16_to_s32
+    ; CHECK: liveins: $w1, $w2, $x2
+    ; CHECK: %wide_1:gpr32 = COPY $w1
+    ; CHECK: %sub_lhs:gpr32sp = COPY $w2
+    ; CHECK: %res:gpr32 = SUBSWrx %sub_lhs, %wide_1, 8, implicit-def $nzcv
+    ; CHECK: $w3 = COPY %res
+    ; CHECK: RET_ReallyLR implicit $w3
+    %wide_1:gpr(s32) = COPY $w1
+    %1:gpr(s16) = G_TRUNC %wide_1
+    %ext:gpr(s32) = G_ZEXT %1(s16)
+    %sub_lhs:gpr(s32) = COPY $w2
+    %res:gpr(s32) = G_SUB %sub_lhs, %ext
+    $w3 = COPY %res(s32)
+    RET_ReallyLR implicit $w3
+...
+---
+name:            sub_anyext_s16_to_s32
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $w1, $w2, $x2
+    ; CHECK-LABEL: name: sub_anyext_s16_to_s32
+    ; CHECK: liveins: $w1, $w2, $x2
+    ; CHECK: %wide_1:gpr32 = COPY $w1
+    ; CHECK: %sub_lhs:gpr32sp = COPY $w2
+    ; CHECK: %res:gpr32 = SUBSWrx %sub_lhs, %wide_1, 8, implicit-def $nzcv
+    ; CHECK: $w3 = COPY %res
+    ; CHECK: RET_ReallyLR implicit $w3
+    %wide_1:gpr(s32) = COPY $w1
+    %1:gpr(s16) = G_TRUNC %wide_1
+    %ext:gpr(s32) = G_ANYEXT %1(s16)
+    %sub_lhs:gpr(s32) = COPY $w2
+    %res:gpr(s32) = G_SUB %sub_lhs, %ext
+    $w3 = COPY %res(s32)
+    RET_ReallyLR implicit $w3
+...
+---
+name:            sub_and_s16_to_s32_uxtb
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $w1, $w2, $x2
+    ; CHECK-LABEL: name: sub_and_s16_to_s32_uxtb
+    ; CHECK: liveins: $w1, $w2, $x2
+    ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w1
+    ; CHECK: %sub_lhs:gpr32sp = COPY $w2
+    ; CHECK: %res:gpr32 = SUBSWrx %sub_lhs, [[COPY]], 0, implicit-def $nzcv
+    ; CHECK: $w3 = COPY %res
+    ; CHECK: RET_ReallyLR implicit $w3
+    %1:gpr(s32) = COPY $w1
+    %mask:gpr(s32) = G_CONSTANT i32 255 ; 0xff
+    %ext:gpr(s32) = G_AND %1(s32), %mask
+    %sub_lhs:gpr(s32) = COPY $w2
+    %res:gpr(s32) = G_SUB %sub_lhs, %ext
+    $w3 = COPY %res(s32)
+    RET_ReallyLR implicit $w3
+...
+---
+name:            sub_and_s16_to_s32_uxth
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $w1, $w2, $x2
+    ; CHECK-LABEL: name: sub_and_s16_to_s32_uxth
+    ; CHECK: liveins: $w1, $w2, $x2
+    ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w1
+    ; CHECK: %sub_lhs:gpr32sp = COPY $w2
+    ; CHECK: %res:gpr32 = SUBSWrx %sub_lhs, [[COPY]], 8, implicit-def $nzcv
+    ; CHECK: $w3 = COPY %res
+    ; CHECK: RET_ReallyLR implicit $w3
+    %1:gpr(s32) = COPY $w1
+    %mask:gpr(s32) = G_CONSTANT i32 65535 ; 0xffff
+    %ext:gpr(s32) = G_AND %1(s32), %mask
+    %sub_lhs:gpr(s32) = COPY $w2
+    %res:gpr(s32) = G_SUB %sub_lhs, %ext
+    $w3 = COPY %res(s32)
+    RET_ReallyLR implicit $w3
+---
+name:            sub_sext_s8_to_s32
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $w1, $w2, $x2
+    %wide_1:gpr(s32) = COPY $w1
+    %1:gpr(s8) = G_TRUNC %wide_1
+    %ext:gpr(s32) = G_SEXT %1(s8)
+    %sub_lhs:gpr(s32) = COPY $w2
+    %res:gpr(s32) = G_SUB %sub_lhs, %ext
+    $w3 = COPY %res(s32)
+    RET_ReallyLR implicit $w3
+...
+---
+name:            sub_zext_s8_to_s32
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $w1, $w2, $x2
+    ; CHECK-LABEL: name: sub_zext_s8_to_s32
+    ; CHECK: liveins: $w1, $w2, $x2
+    ; CHECK: %wide_1:gpr32 = COPY $w1
+    ; CHECK: %sub_lhs:gpr32sp = COPY $w2
+    ; CHECK: %res:gpr32 = SUBSWrx %sub_lhs, %wide_1, 0, implicit-def $nzcv
+    ; CHECK: $w3 = COPY %res
+    ; CHECK: RET_ReallyLR implicit $w3
+    %wide_1:gpr(s32) = COPY $w1
+    %1:gpr(s8) = G_TRUNC %wide_1
+    %ext:gpr(s32) = G_ZEXT %1(s8)
+    %sub_lhs:gpr(s32) = COPY $w2
+    %res:gpr(s32) = G_SUB %sub_lhs, %ext
+    $w3 = COPY %res(s32)
+    RET_ReallyLR implicit $w3
+...
+---
+name:            sub_anyext_s8_to_s32
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $w1, $w2, $x2
+    ; CHECK-LABEL: name: sub_anyext_s8_to_s32
+    ; CHECK: liveins: $w1, $w2, $x2
+    ; CHECK: %wide_1:gpr32 = COPY $w1
+    ; CHECK: %sub_lhs:gpr32sp = COPY $w2
+    ; CHECK: %res:gpr32 = SUBSWrx %sub_lhs, %wide_1, 0, implicit-def $nzcv
+    ; CHECK: $w3 = COPY %res
+    ; CHECK: RET_ReallyLR implicit $w3
+    %wide_1:gpr(s32) = COPY $w1
+    %1:gpr(s8) = G_TRUNC %wide_1
+    %ext:gpr(s32) = G_ANYEXT %1(s8)
+    %sub_lhs:gpr(s32) = COPY $w2
+    %res:gpr(s32) = G_SUB %sub_lhs, %ext
+    $w3 = COPY %res(s32)
+    RET_ReallyLR implicit $w3
+---
+...
+---
+name:            sub_sext_with_shl
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $w1, $w2, $x2
+    ; CHECK-LABEL: name: sub_sext_with_shl
+    ; CHECK: liveins: $w1, $w2, $x2
+    ; CHECK: %wide_1:gpr32 = COPY $w1
+    ; CHECK: %sub_lhs:gpr32sp = COPY $w2
+    ; CHECK: %res:gpr32 = SUBSWrx %sub_lhs, %wide_1, 43, implicit-def $nzcv
+    ; CHECK: $w3 = COPY %res
+    ; CHECK: RET_ReallyLR implicit $w3
+    %wide_1:gpr(s32) = COPY $w1
+    %1:gpr(s16) = G_TRUNC %wide_1
+    %ext:gpr(s32) = G_SEXT %1(s16)
+    %imm:gpr(s32) = G_CONSTANT i32 3
+    %shl:gpr(s32) = G_SHL %ext, %imm
+    %sub_lhs:gpr(s32) = COPY $w2
+    %res:gpr(s32) = G_SUB %sub_lhs, %shl
+    $w3 = COPY %res(s32)
+    RET_ReallyLR implicit $w3
+...
+---
+name:            sub_and_with_shl
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $w1, $w2, $x2
+    ; CHECK-LABEL: name: sub_and_with_shl
+    ; CHECK: liveins: $w1, $w2, $x2
+    ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w1
+    ; CHECK: %sub_lhs:gpr32sp = COPY $w2
+    ; CHECK: %res:gpr32 = SUBSWrx %sub_lhs, [[COPY]], 3, implicit-def $nzcv
+    ; CHECK: $w3 = COPY %res
+    ; CHECK: RET_ReallyLR implicit $w3
+    %1:gpr(s32) = COPY $w1
+    %mask:gpr(s32) = G_CONSTANT i32 255 ; 0xff
+    %ext:gpr(s32) = G_AND %1(s32), %mask
+    %imm:gpr(s32) = G_CONSTANT i32 3
+    %shl:gpr(s32) = G_SHL %ext, %imm
+    %sub_lhs:gpr(s32) = COPY $w2
+    %res:gpr(s32) = G_SUB %sub_lhs, %shl
+    $w3 = COPY %res(s32)
+    RET_ReallyLR implicit $w3
diff --git a/test/CodeGen/AArch64/addsub_ext.ll b/test/CodeGen/AArch64/addsub_ext.ll

index df1b9fe7855fb7903ea68191b4c76a19c8c0affa..573ce9b6f33ac91ccdb013bc86abbfe0c1411eb8 100644 (file)
--- a/test/CodeGen/AArch64/addsub_ext.ll
+++ b/test/CodeGen/AArch64/addsub_ext.ll
@@ -1,4 +1,9 @@
-; RUN: llc -verify-machineinstrs %s -o - -mtriple=aarch64-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 | FileCheck %s
+; RUN: llc -enable-machine-outliner=never -verify-machineinstrs %s -o - -mtriple=aarch64-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 | FileCheck %s
+; RUN: llc -global-isel -enable-machine-outliner=never -verify-machineinstrs %s -o - -mtriple=aarch64-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 | FileCheck %s --check-prefix=GISEL
+
+; FIXME: GISel only knows how to handle explicit G_SEXT instructions. So when
+; G_SEXT is lowered to anything else, it won't fold in a stx*.
+; FIXME: GISel doesn't currently handle folding the addressing mode into a cmp.
  
  @var8 = global i8 0
  @var16 = global i16 0
@@ -7,6 +12,7 @@
  
  define void @addsub_i8rhs() minsize {
  ; CHECK-LABEL: addsub_i8rhs:
+; GISEL-LABEL: addsub_i8rhs:
      %val8_tmp = load i8, i8* @var8
      %lhs32 = load i32, i32* @var32
      %lhs64 = load i64, i64* @var64
@@ -20,23 +26,26 @@ define void @addsub_i8rhs() minsize {
      %res32_zext = add i32 %lhs32, %rhs32_zext
      store volatile i32 %res32_zext, i32* @var32
  ; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxtb
+; GISEL: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxtb
  
     %rhs32_zext_shift = shl i32 %rhs32_zext, 3
     %res32_zext_shift = add i32 %lhs32, %rhs32_zext_shift
     store volatile i32 %res32_zext_shift, i32* @var32
  ; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxtb #3
-
+; GISEL: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxtb #3
  
  ; Zero-extending to 64-bits
      %rhs64_zext = zext i8 %val8 to i64
      %res64_zext = add i64 %lhs64, %rhs64_zext
      store volatile i64 %res64_zext, i64* @var64
  ; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtb
+; GISEL: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtb
  
     %rhs64_zext_shift = shl i64 %rhs64_zext, 1
     %res64_zext_shift = add i64 %lhs64, %rhs64_zext_shift
     store volatile i64 %res64_zext_shift, i64* @var64
  ; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtb #1
+; GISEL: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtb #1
  
  ; Sign-extending to 32-bits
      %rhs32_sext = sext i8 %val8 to i32
@@ -95,23 +104,26 @@ define void @sub_i8rhs() minsize {
      %res32_zext = sub i32 %lhs32, %rhs32_zext
      store volatile i32 %res32_zext, i32* @var32
  ; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxtb
+; GISEL: subs {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxtb
  
     %rhs32_zext_shift = shl i32 %rhs32_zext, 3
     %res32_zext_shift = sub i32 %lhs32, %rhs32_zext_shift
     store volatile i32 %res32_zext_shift, i32* @var32
  ; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxtb #3
-
+; GISEL: subs {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxtb #3
  
  ; Zero-extending to 64-bits
      %rhs64_zext = zext i8 %val8 to i64
      %res64_zext = sub i64 %lhs64, %rhs64_zext
      store volatile i64 %res64_zext, i64* @var64
  ; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtb
+; GISEL: subs {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtb
  
     %rhs64_zext_shift = shl i64 %rhs64_zext, 1
     %res64_zext_shift = sub i64 %lhs64, %rhs64_zext_shift
     store volatile i64 %res64_zext_shift, i64* @var64
  ; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtb #1
+; GISEL: subs {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtb #1
  
  ; Sign-extending to 32-bits
      %rhs32_sext = sext i8 %val8 to i32
@@ -140,6 +152,7 @@ define void @sub_i8rhs() minsize {
  
  define void @addsub_i16rhs() minsize {
  ; CHECK-LABEL: addsub_i16rhs:
+; GISEL-LABEL: addsub_i16rhs:
      %val16_tmp = load i16, i16* @var16
      %lhs32 = load i32, i32* @var32
      %lhs64 = load i64, i64* @var64
@@ -153,23 +166,26 @@ define void @addsub_i16rhs() minsize {
      %res32_zext = add i32 %lhs32, %rhs32_zext
      store volatile i32 %res32_zext, i32* @var32
  ; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxth
+; GISEL: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxth
  
     %rhs32_zext_shift = shl i32 %rhs32_zext, 3
     %res32_zext_shift = add i32 %lhs32, %rhs32_zext_shift
     store volatile i32 %res32_zext_shift, i32* @var32
  ; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxth #3
-
+; GISEL: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxth #3
  
  ; Zero-extending to 64-bits
      %rhs64_zext = zext i16 %val16 to i64
      %res64_zext = add i64 %lhs64, %rhs64_zext
      store volatile i64 %res64_zext, i64* @var64
  ; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxth
+; GISEL: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxth
  
     %rhs64_zext_shift = shl i64 %rhs64_zext, 1
     %res64_zext_shift = add i64 %lhs64, %rhs64_zext_shift
     store volatile i64 %res64_zext_shift, i64* @var64
  ; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxth #1
+; GISEL: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxth #1
  
  ; Sign-extending to 32-bits
      %rhs32_sext = sext i16 %val16 to i32
@@ -215,6 +231,7 @@ end:
  
  define void @sub_i16rhs() minsize {
  ; CHECK-LABEL: sub_i16rhs:
+; GISEL-LABEL: sub_i16rhs:
      %val16_tmp = load i16, i16* @var16
      %lhs32 = load i32, i32* @var32
      %lhs64 = load i64, i64* @var64
@@ -228,23 +245,26 @@ define void @sub_i16rhs() minsize {
      %res32_zext = sub i32 %lhs32, %rhs32_zext
      store volatile i32 %res32_zext, i32* @var32
  ; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxth
+; GISEL: subs {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxth
  
     %rhs32_zext_shift = shl i32 %rhs32_zext, 3
     %res32_zext_shift = sub i32 %lhs32, %rhs32_zext_shift
     store volatile i32 %res32_zext_shift, i32* @var32
  ; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxth #3
-
+; GISEL: subs {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxth #3
  
  ; Zero-extending to 64-bits
      %rhs64_zext = zext i16 %val16 to i64
      %res64_zext = sub i64 %lhs64, %rhs64_zext
      store volatile i64 %res64_zext, i64* @var64
  ; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxth
+; GISEL: subs {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxth
  
     %rhs64_zext_shift = shl i64 %rhs64_zext, 1
     %res64_zext_shift = sub i64 %lhs64, %rhs64_zext_shift
     store volatile i64 %res64_zext_shift, i64* @var64
  ; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxth #1
+; GISEL: subs {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxth #1
  
  ; Sign-extending to 32-bits
      %rhs32_sext = sext i16 %val16 to i32
@@ -276,6 +296,7 @@ define void @sub_i16rhs() minsize {
  ; in the face of "add/sub (shifted register)" so I don't intend to.
  define void @addsub_i32rhs(i32 %in32) minsize {
  ; CHECK-LABEL: addsub_i32rhs:
+; GISEL-LABEL: addsub_i32rhs:
      %val32_tmp = load i32, i32* @var32
      %lhs64 = load i64, i64* @var64
  
@@ -285,22 +306,26 @@ define void @addsub_i32rhs(i32 %in32) minsize {
      %res64_zext = add i64 %lhs64, %rhs64_zext
      store volatile i64 %res64_zext, i64* @var64
  ; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw
+; GISEL: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw
  
      %rhs64_zext2 = zext i32 %val32 to i64
      %rhs64_zext_shift = shl i64 %rhs64_zext2, 2
      %res64_zext_shift = add i64 %lhs64, %rhs64_zext_shift
      store volatile i64 %res64_zext_shift, i64* @var64
  ; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw #2
+; GISEL: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw #2
  
      %rhs64_sext = sext i32 %val32 to i64
      %res64_sext = add i64 %lhs64, %rhs64_sext
      store volatile i64 %res64_sext, i64* @var64
  ; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtw
+; GISEL: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtw
  
      %rhs64_sext_shift = shl i64 %rhs64_sext, 2
      %res64_sext_shift = add i64 %lhs64, %rhs64_sext_shift
      store volatile i64 %res64_sext_shift, i64* @var64
  ; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtw #2
+; GISEL: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtw #2
  
      ret void
  }
@@ -316,12 +341,14 @@ define void @sub_i32rhs(i32 %in32) minsize {
      %res64_zext = sub i64 %lhs64, %rhs64_zext
      store volatile i64 %res64_zext, i64* @var64
  ; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw
+; GISEL: subs {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw
  
      %rhs64_zext2 = zext i32 %val32 to i64
      %rhs64_zext_shift = shl i64 %rhs64_zext2, 2
      %res64_zext_shift = sub i64 %lhs64, %rhs64_zext_shift
      store volatile i64 %res64_zext_shift, i64* @var64
  ; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw #2
+; GISEL: subs {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw #2
  
      %rhs64_sext = sext i32 %val32 to i64
      %res64_sext = sub i64 %lhs64, %rhs64_sext
@@ -339,11 +366,15 @@ define void @sub_i32rhs(i32 %in32) minsize {
  ; Check that implicit zext from w reg write is used instead of uxtw form of add.
  define i64 @add_fold_uxtw(i32 %x, i64 %y) {
  ; CHECK-LABEL: add_fold_uxtw:
+; GISEL-LABEL: add_fold_uxtw:
  entry:
  ; CHECK: and w[[TMP:[0-9]+]], w0, #0x3
+; GISEL: and w[[TMP:[0-9]+]], w0, #0x3
+; FIXME: Global ISel produces an unncessary ubfx here.
    %m = and i32 %x, 3
    %ext = zext i32 %m to i64
  ; CHECK-NEXT: add x0, x1, x[[TMP]]
+; GISEL: add x0, x1, x[[TMP]]
    %ret = add i64 %y, %ext
    ret i64 %ret
  }
@@ -352,11 +383,14 @@ entry:
  ; form of sub and that mov WZR is folded to form a neg instruction.
  define i64 @sub_fold_uxtw_xzr(i32 %x)  {
  ; CHECK-LABEL: sub_fold_uxtw_xzr:
+; GISEL-LABEL: sub_fold_uxtw_xzr:
  entry:
  ; CHECK: and w[[TMP:[0-9]+]], w0, #0x3
+; GISEL: and w[[TMP:[0-9]+]], w0, #0x3
    %m = and i32 %x, 3
    %ext = zext i32 %m to i64
  ; CHECK-NEXT: neg x0, x[[TMP]]
+; GISEL: negs x0, x[[TMP]]
    %ret = sub i64 0, %ext
    ret i64 %ret
  }
@@ -378,10 +412,13 @@ entry:
  ; form of add, leading to madd selection.
  define i64 @madd_fold_uxtw(i32 %x, i64 %y) {
  ; CHECK-LABEL: madd_fold_uxtw:
+; GISEL-LABEL: madd_fold_uxtw:
  entry:
  ; CHECK: and w[[TMP:[0-9]+]], w0, #0x3
+; GISEL: and w[[TMP:[0-9]+]], w0, #0x3
    %m = and i32 %x, 3
    %ext = zext i32 %m to i64
+; GISEL: madd x0, x1, x1, x[[TMP]]
  ; CHECK-NEXT: madd x0, x1, x1, x[[TMP]]
    %mul = mul i64 %y, %y
    %ret = add i64 %mul, %ext
@@ -408,11 +445,14 @@ entry:
  ; form of add and add of -1 gets selected as sub.
  define i64 @add_imm_fold_uxtw(i32 %x) {
  ; CHECK-LABEL: add_imm_fold_uxtw:
+; GISEL-LABEL: add_imm_fold_uxtw:
  entry:
  ; CHECK: and w[[TMP:[0-9]+]], w0, #0x3
+; GISEL: and w[[TMP:[0-9]+]], w0, #0x3
    %m = and i32 %x, 3
    %ext = zext i32 %m to i64
  ; CHECK-NEXT: sub x0, x[[TMP]], #1
+; GISEL: subs x0, x[[TMP]], #1
    %ret = add i64 %ext, -1
    ret i64 %ret
  }
@@ -421,12 +461,15 @@ entry:
  ; form of add and add lsl form gets selected.
  define i64 @add_lsl_fold_uxtw(i32 %x, i64 %y) {
  ; CHECK-LABEL: add_lsl_fold_uxtw:
+; GISEL-LABEL: add_lsl_fold_uxtw:
  entry:
  ; CHECK: orr w[[TMP:[0-9]+]], w0, #0x3
+; GISEL: orr w[[TMP:[0-9]+]], w0, #0x3
    %m = or i32 %x, 3
    %ext = zext i32 %m to i64
    %shift = shl i64 %y, 3
  ; CHECK-NEXT: add x0, x[[TMP]], x1, lsl #3
+; GISEL: add x0, x[[TMP]], x1, lsl #3
    %ret = add i64 %ext, %shift
    ret i64 %ret
  }
author	Jessica Paquette <jpaquette@apple.com>
	Thu, 29 Aug 2019 21:53:58 +0000 (21:53 +0000)
committer	Jessica Paquette <jpaquette@apple.com>
	Thu, 29 Aug 2019 21:53:58 +0000 (21:53 +0000)
lib/Target/AArch64/AArch64InstrFormats.td		patch \| blob \| history
lib/Target/AArch64/AArch64InstrInfo.td		patch \| blob \| history
lib/Target/AArch64/AArch64InstructionSelector.cpp		patch \| blob \| history
test/CodeGen/AArch64/GlobalISel/select-arith-extended-reg.mir	[new file with mode: 0644]	patch \| blob
test/CodeGen/AArch64/addsub_ext.ll		patch \| blob \| history