unsigned Reg) const {
return hasVGPRs(getRegClassForReg(MRI, Reg));
}
+
+bool SIRegisterInfo::shouldCoalesce(MachineInstr *MI,
+ const TargetRegisterClass *SrcRC,
+ unsigned SubReg,
+ const TargetRegisterClass *DstRC,
+ unsigned DstSubReg,
+ const TargetRegisterClass *NewRC) const {
+ unsigned SrcSize = SrcRC->getSize();
+ unsigned DstSize = DstRC->getSize();
+ unsigned NewSize = NewRC->getSize();
+
+ // Do not increase size of registers beyond dword, we would need to allocate
+ // adjacent registers and constraint regalloc more than needed.
+
+ // Always allow dword coalescing.
+ if (SrcSize <= 4 || DstSize <= 4)
+ return true;
+
+ return NewSize <= DstSize || NewSize <= SrcSize;
+}
; XVI-NOT: v_cvt_f32_f16
; GCN: buffer_load_dwordx2 v{{\[}}[[IN_LO:[0-9]+]]:[[IN_HI:[0-9]+]]
-; VI: v_lshrrev_b32_e32 [[Y16:v[0-9]+]], 16, v[[IN_LO]]
-; GCN: v_cvt_f32_f16_e32 [[Z32:v[0-9]+]], v[[IN_HI]]
-; GCN: v_cvt_f32_f16_e32 [[X32:v[0-9]+]], v[[IN_LO]]
-; SI: v_lshrrev_b32_e32 [[Y16:v[0-9]+]], 16, v[[IN_LO]]
-; GCN: v_cvt_f32_f16_e32 [[Y32:v[0-9]+]], [[Y16]]
-
-; GCN: v_cvt_f64_f32_e32 [[Z:v\[[0-9]+:[0-9]+\]]], [[Z32]]
-; GCN: v_cvt_f64_f32_e32 v{{\[}}[[XLO:[0-9]+]]:{{[0-9]+}}], [[X32]]
-; GCN: v_cvt_f64_f32_e32 v[{{[0-9]+}}:[[YHI:[0-9]+]]{{\]}}, [[Y32]]
+; VI-DAG: v_lshrrev_b32_e32 [[Y16:v[0-9]+]], 16, v[[IN_LO]]
+; GCN-DAG: v_cvt_f32_f16_e32 [[Z32:v[0-9]+]], v[[IN_HI]]
+; GCN-DAG: v_cvt_f32_f16_e32 [[X32:v[0-9]+]], v[[IN_LO]]
+; SI: v_lshrrev_b32_e32 [[Y16:v[0-9]+]], 16, v[[IN_LO]]
+; GCN-DAG: v_cvt_f32_f16_e32 [[Y32:v[0-9]+]], [[Y16]]
+
+; GCN-DAG: v_cvt_f64_f32_e32 [[Z:v\[[0-9]+:[0-9]+\]]], [[Z32]]
+; GCN-DAG: v_cvt_f64_f32_e32 v{{\[}}[[XLO:[0-9]+]]:{{[0-9]+}}], [[X32]]
+; GCN-DAG: v_cvt_f64_f32_e32 v[{{[0-9]+}}:[[YHI:[0-9]+]]{{\]}}, [[Y32]]
; GCN-NOT: v_cvt_f64_f32_e32
; GCN-DAG: buffer_store_dwordx4 v{{\[}}[[XLO]]:[[YHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
--- /dev/null
+# RUN: llc -march=amdgcn -run-pass simple-register-coalescing -o - %s | FileCheck %s
+
+# Check that coalescer does not create wider register tuple than in source
+
+# CHECK: - { id: 2, class: vreg_64 }
+# CHECK: - { id: 3, class: vreg_64 }
+# CHECK: - { id: 4, class: vreg_64 }
+# CHECK: - { id: 5, class: vreg_96 }
+# CHECK: - { id: 6, class: vreg_96 }
+# CHECK: - { id: 7, class: vreg_128 }
+# CHECK: - { id: 8, class: vreg_128 }
+# No more registers shall be defined
+# CHECK-NEXT: liveins:
+# CHECK: FLAT_STORE_DWORDX2 %vgpr0_vgpr1, %4,
+# CHECK: FLAT_STORE_DWORDX3 %vgpr0_vgpr1, %6,
+
+---
+name: main
+alignment: 0
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 1, class: sreg_32_xm0, preferred-register: '%1' }
+ - { id: 2, class: vreg_64, preferred-register: '%2' }
+ - { id: 3, class: vreg_64 }
+ - { id: 4, class: vreg_64 }
+ - { id: 5, class: vreg_64 }
+ - { id: 6, class: vreg_96 }
+ - { id: 7, class: vreg_96 }
+ - { id: 8, class: vreg_128 }
+ - { id: 9, class: vreg_128 }
+liveins:
+ - { reg: '%sgpr6', virtual-reg: '%1' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 0
+ adjustsStack: false
+ hasCalls: false
+ maxCallFrameSize: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+body: |
+ bb.0.entry:
+ liveins: %sgpr0, %vgpr0_vgpr1
+
+ %3 = IMPLICIT_DEF
+ undef %4.sub0 = COPY %sgpr0
+ %4.sub1 = COPY %3.sub0
+ undef %5.sub0 = COPY %4.sub1
+ %5.sub1 = COPY %4.sub0
+ FLAT_STORE_DWORDX2 %vgpr0_vgpr1, killed %5, 0, 0, 0, implicit %exec, implicit %flat_scr
+
+ %6 = IMPLICIT_DEF
+ undef %7.sub0_sub1 = COPY %6
+ %7.sub2 = COPY %3.sub0
+ FLAT_STORE_DWORDX3 %vgpr0_vgpr1, killed %7, 0, 0, 0, implicit %exec, implicit %flat_scr
+
+ %8 = IMPLICIT_DEF
+ undef %9.sub0_sub1_sub2 = COPY %8
+ %9.sub3 = COPY %3.sub0
+ FLAT_STORE_DWORDX4 %vgpr0_vgpr1, killed %9, 0, 0, 0, implicit %exec, implicit %flat_scr
+...