From: Tim Northover <tnorthover@apple.com>
Date: Mon, 6 Feb 2017 21:56:47 +0000 (+0000)
Subject: GlobalISel: legalize G_INSERT instructions
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=bfa46663d1c362d19cebd7644bdb2e6b2f7b159f;p=llvm

GlobalISel: legalize G_INSERT instructions

We don't handle all cases yet (see arm64-fallback.ll for an example), but this
is enough to cover most common C++ code so it's a good place to start.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@294247 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
index 56c444ca46b..1b7d86b2dd4 100644
--- a/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
+++ b/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
@@ -95,6 +95,15 @@ private:
   void extractParts(unsigned Reg, LLT Ty, int NumParts,
                     SmallVectorImpl<unsigned> &Ops);
 
+  /// Set \p CurOp and \p EndOp to the range of G_INSERT operands that fall
+  /// inside the bit-range specified by \DstStart and \p DstEnd. Assumes \p
+  /// CurOp is initially pointing at one of the (Reg, Offset) pairs in \p MI (or
+  /// at the end), which should be a G_INSERT instruction.
+  void findInsertionsForRange(int64_t DstStart, int64_t DstEnd,
+                              MachineInstr::mop_iterator &CurOp,
+                              MachineInstr::mop_iterator &EndOp,
+                              MachineInstr &MI);
+
   MachineIRBuilder MIRBuilder;
   MachineRegisterInfo &MRI;
 };
diff --git a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 6da02646250..27b6dc755a2 100644
--- a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -119,6 +119,17 @@ LegalizerHelper::libcall(MachineInstr &MI) {
   }
 }
 
+void LegalizerHelper::findInsertionsForRange(
+    int64_t DstStart, int64_t DstEnd, MachineInstr::mop_iterator &CurOp,
+    MachineInstr::mop_iterator &EndOp, MachineInstr &MI) {
+  while (CurOp != MI.operands_end() && std::next(CurOp)->getImm() < DstStart)
+    CurOp += 2;
+
+  EndOp = CurOp;
+  while (EndOp != MI.operands_end() && std::next(EndOp)->getImm() < DstEnd)
+    EndOp += 2;
+}
+
 LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
                                                               unsigned TypeIdx,
                                                               LLT NarrowTy) {
@@ -161,6 +172,65 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
     MI.eraseFromParent();
     return Legalized;
   }
+  case TargetOpcode::G_INSERT: {
+    if (TypeIdx != 0)
+      return UnableToLegalize;
+
+    unsigned NarrowSize = NarrowTy.getSizeInBits();
+    int NumParts =
+        MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() / NarrowSize;
+
+    SmallVector<unsigned, 2> SrcRegs, DstRegs;
+    SmallVector<uint64_t, 2> Indexes;
+    extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
+
+    MachineInstr::mop_iterator CurOp = MI.operands_begin() + 2, EndOp;
+    for (int i = 0; i < NumParts; ++i) {
+      unsigned DstStart = i * NarrowSize;
+      unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
+      Indexes.push_back(DstStart);
+
+      findInsertionsForRange(DstStart, DstStart + NarrowSize, CurOp, EndOp, MI);
+
+      if (CurOp == EndOp) {
+        // No part of the insert affects this subregister, forward the original.
+        DstRegs.push_back(SrcRegs[i]);
+        continue;
+      } else if (MRI.getType(CurOp->getReg()) == NarrowTy &&
+                 std::next(CurOp)->getImm() == DstStart) {
+        // The entire subregister is defined by this insert, forward the new
+        // value.
+        DstRegs.push_back(CurOp->getReg());
+        continue;
+      }
+
+      auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_INSERT)
+        .addDef(DstReg)
+        .addUse(SrcRegs[i]);
+
+      for (; CurOp != EndOp; CurOp += 2) {
+        unsigned Reg = CurOp->getReg();
+        uint64_t Offset = std::next(CurOp)->getImm() - DstStart;
+
+        // Make sure we don't have a cross-register insert.
+        if (Offset + MRI.getType(Reg).getSizeInBits() > NarrowSize) {
+          // FIXME: we should handle this case, though it's unlikely to be
+          // common given ABI-related layout restrictions.
+          return UnableToLegalize;
+        }
+
+        MIB.addUse(Reg);
+        MIB.addImm(Offset);
+      }
+
+      DstRegs.push_back(DstReg);
+    }
+
+    assert(DstRegs.size() == (unsigned)NumParts && "not all parts covered");
+    MIRBuilder.buildSequence(MI.getOperand(0).getReg(), DstRegs, Indexes);
+    MI.eraseFromParent();
+    return Legalized;
+  }
   case TargetOpcode::G_LOAD: {
     unsigned NarrowSize = NarrowTy.getSizeInBits();
     int NumParts =
@@ -309,6 +379,26 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
     MI.eraseFromParent();
     return Legalized;
   }
+  case TargetOpcode::G_INSERT: {
+    if (TypeIdx != 0)
+      return UnableToLegalize;
+
+    unsigned Src = MI.getOperand(1).getReg();
+    unsigned SrcExt = MRI.createGenericVirtualRegister(WideTy);
+    MIRBuilder.buildAnyExt(SrcExt, Src);
+
+    unsigned DstExt = MRI.createGenericVirtualRegister(WideTy);
+    auto MIB = MIRBuilder.buildInsert(DstExt, SrcExt, MI.getOperand(2).getReg(),
+                                      MI.getOperand(3).getImm());
+    for (unsigned OpNum = 4; OpNum < MI.getNumOperands(); OpNum += 2) {
+      MIB.addReg(MI.getOperand(OpNum).getReg());
+      MIB.addImm(MI.getOperand(OpNum + 1).getImm());
+    }
+
+    MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), DstExt);
+    MI.eraseFromParent();
+    return Legalized;
+  }
   case TargetOpcode::G_LOAD: {
     assert(alignTo(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(), 8) ==
                WideTy.getSizeInBits() &&
diff --git a/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
index e49662075ed..e757f2587ac 100644
--- a/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
+++ b/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
@@ -41,6 +41,7 @@ LegalizerInfo::LegalizerInfo() : TablesInitialized(false) {
   DefaultActions[TargetOpcode::G_STORE] = NarrowScalar;
 
   DefaultActions[TargetOpcode::G_BRCOND] = WidenScalar;
+  DefaultActions[TargetOpcode::G_INSERT] = NarrowScalar;
 }
 
 void LegalizerInfo::computeTables() {
diff --git a/lib/Target/AArch64/AArch64LegalizerInfo.cpp b/lib/Target/AArch64/AArch64LegalizerInfo.cpp
index 86037c97731..838cb673b1a 100644
--- a/lib/Target/AArch64/AArch64LegalizerInfo.cpp
+++ b/lib/Target/AArch64/AArch64LegalizerInfo.cpp
@@ -78,6 +78,18 @@ AArch64LegalizerInfo::AArch64LegalizerInfo() {
   setAction({G_FREM, s32}, Libcall);
   setAction({G_FREM, s64}, Libcall);
 
+  // FIXME: what should we do about G_INSERTs with more than one source value?
+  // For now the default of not specifying means we'll fall back.
+  for (auto Ty : {s32, s64}) {
+    setAction({G_INSERT, Ty}, Legal);
+    setAction({G_INSERT, 1, Ty}, Legal);
+  }
+  for (auto Ty : {s1, s8, s16}) {
+    setAction({G_INSERT, Ty}, WidenScalar);
+    // FIXME: Can't widen the sources because that violates the constraints on
+    // G_INSERT (It seems entirely reasonable that inputs shouldn't overlap).
+  }
+
   for (unsigned MemOp : {G_LOAD, G_STORE}) {
     for (auto Ty : {s8, s16, s32, s64, p0, v2s32})
       setAction({MemOp, Ty}, Legal);
diff --git a/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll b/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll
index e4a2bb2dbf5..84f2a1c6e70 100644
--- a/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll
+++ b/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll
@@ -78,7 +78,7 @@ define void @sequence_mapping([2 x i64] %in) {
   ; Legalizer was asserting when it enountered an unexpected default action.
 ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for legal_default
 ; FALLBACK-WITH-REPORT-LABEL: legal_default:
-define void @legal_default(i64 %in) {
-  insertvalue [2 x i64] undef, i64 %in, 0
+define void @legal_default([8 x i8] %in) {
+  insertvalue { [4 x i8], [8 x i8], [4 x i8] } undef, [8 x i8] %in, 1
   ret void
 }
diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-inserts.mir b/test/CodeGen/AArch64/GlobalISel/legalize-inserts.mir
new file mode 100644
index 00000000000..9e1b5084d1e
--- /dev/null
+++ b/test/CodeGen/AArch64/GlobalISel/legalize-inserts.mir
@@ -0,0 +1,100 @@
+# RUN: llc -O0 -run-pass=legalizer -global-isel %s -o - 2>&1 | FileCheck %s
+
+--- |
+  target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+  target triple = "aarch64--"
+  define void @test_inserts_1() { ret void }
+  define void @test_inserts_2() { ret void }
+  define void @test_inserts_3() { ret void }
+  define void @test_inserts_4() { ret void }
+...
+
+---
+name:            test_inserts_1
+body: |
+  bb.0:
+    liveins: %w0
+
+      ; Low part of insertion wipes out the old register entirely, so %0 gets
+      ; forwarded to the G_STORE. Hi part is unchanged so (split) G_LOAD gets
+      ; forwarded.
+    ; CHECK-LABEL: name: test_inserts_1
+    ; CHECK: [[LO:%[0-9]+]](s64) = G_LOAD
+    ; CHECK: [[HI:%[0-9]+]](s64) = G_LOAD
+    ; CHECK: G_STORE %0(s64)
+    ; CHECK: G_STORE [[HI]]
+    %0:_(s64) = COPY %x0
+    %1:_(s32) = COPY %w1
+    %2:_(p0) = COPY %x2
+    %3:_(s128) = G_LOAD %2(p0) :: (load 16)
+    %4:_(s128) = G_INSERT %3(s128), %0(s64), 0
+    G_STORE %4(s128), %2(p0) :: (store 16)
+    RET_ReallyLR
+...
+
+---
+name:            test_inserts_2
+body: |
+  bb.0:
+    liveins: %w0
+
+      ; Low insertion wipes out the old register entirely, so %0 gets forwarded
+      ; to the G_STORE again. Second insertion is real.
+    ; CHECK-LABEL: name: test_inserts_2
+    ; CHECK: [[LO:%[0-9]+]](s64) = G_LOAD
+    ; CHECK: [[HI:%[0-9]+]](s64) = G_LOAD
+    ; CHECK: [[NEWHI:%[0-9]+]](s64) = G_INSERT [[HI]](s64), %1(s32), 0
+    ; CHECK: G_STORE %0(s64)
+    ; CHECK: G_STORE [[NEWHI]]
+    %0:_(s64) = COPY %x0
+    %1:_(s32) = COPY %w1
+    %2:_(p0) = COPY %x2
+    %3:_(s128) = G_LOAD %2(p0) :: (load 16)
+    %4:_(s128) = G_INSERT %3(s128), %0(s64), 0
+    %5:_(s128) = G_INSERT %4(s128), %1(s32), 64
+    G_STORE %5(s128), %2(p0) :: (store 16)
+    RET_ReallyLR
+...
+
+---
+name:            test_inserts_3
+body: |
+  bb.0:
+    liveins: %w0
+
+      ; I'm not entirely convinced inserting a p0 into an s64 is valid, but it's
+      ; certainly better than the alternative of directly forwarding the value
+      ; which would cause a nasty type mismatch.
+    ; CHECK-LABEL: name: test_inserts_3
+    ; CHECK: [[LO:%[0-9]+]](s64) = G_LOAD
+    ; CHECK: [[HI:%[0-9]+]](s64) = G_LOAD
+    ; CHECK: [[NEWLO:%[0-9]+]](s64) = G_INSERT [[LO]](s64), %0(p0), 0
+    ; CHECK: G_STORE [[NEWLO]](s64)
+    ; CHECK: G_STORE [[HI]]
+    %0:_(p0) = COPY %x0
+    %1:_(s32) = COPY %w1
+    %2:_(p0) = COPY %x2
+    %3:_(s128) = G_LOAD %2(p0) :: (load 16)
+    %4:_(s128) = G_INSERT %3(s128), %0(p0), 0
+    G_STORE %4(s128), %2(p0) :: (store 16)
+    RET_ReallyLR
+...
+
+---
+name:            test_inserts_4
+body: |
+  bb.0:
+    liveins: %w0
+
+      ; A narrow insert gets surrounded by a G_ANYEXT/G_TRUNC pair.
+    ; CHECK-LABEL: name: test_inserts_4
+    ; CHECK: [[VALEXT:%[0-9]+]](s32) = G_ANYEXT %1(s8)
+    ; CHECK: [[VAL:%[0-9]+]](s32) = G_INSERT [[VALEXT]](s32), %0(s1), 0
+    ; CHECK: %3(s8) = G_TRUNC [[VAL]](s32)
+    %0:_(s1) = COPY %w0
+    %1:_(s8) = COPY %w1
+    %2:_(p0) = COPY %x2
+    %3:_(s8) = G_INSERT %1(s8), %0(s1), 0
+    G_STORE %3(s8), %2(p0) :: (store 1)
+    RET_ReallyLR
+...