def zextloadi16_local_m0 : PatFrag<(ops node:$ptr), (zextloadi16_glue node:$ptr)>;
}
-def load_align8_local_m0 : LoadFrag <load_glue>, LocalAddress {
- let MinAlignment = 8;
+def load_align8_local_m0 : PatFrag<(ops node:$ptr),
+ (load_local_m0 node:$ptr)> {
+ let IsLoad = 1;
let IsNonExtLoad = 1;
+ let MinAlignment = 8;
}
-def load_align16_local_m0 : LoadFrag <load_glue>, LocalAddress {
- let MinAlignment = 16;
+def load_align16_local_m0 : PatFrag<(ops node:$ptr),
+ (load_local_m0 node:$ptr)> {
+ let IsLoad = 1;
let IsNonExtLoad = 1;
+ let MinAlignment = 16;
}
} // End IsLoad = 1
-def atomic_load_32_local_m0 : LoadFrag<atomic_load_32_glue>, LocalAddress;
-def atomic_load_64_local_m0 : LoadFrag<atomic_load_64_glue>, LocalAddress;
+let AddressSpaces = LoadAddress_local.AddrSpaces in {
+
+def atomic_load_32_local_m0 : PatFrag<(ops node:$ptr),
+ (atomic_load_32_glue node:$ptr)> {
+ let IsAtomic = 1;
+}
+def atomic_load_64_local_m0 : PatFrag<(ops node:$ptr),
+ (atomic_load_64_glue node:$ptr)> {
+ let IsAtomic = 1;
+}
+} // End let AddressSpaces = LoadAddress_local.AddrSpaces
def AMDGPUst_glue : SDNode <"ISD::STORE", SDTStore,
[SDNPHasChain, SDNPMayStore, SDNPMemOperand, SDNPInGlue]
>;
-def atomic_store_glue : PatFrag<(ops node:$ptr, node:$val),
- (AMDGPUatomic_st_glue node:$ptr, node:$val)> {
-}
-
def unindexedstore_glue : PatFrag<(ops node:$val, node:$ptr),
(AMDGPUst_glue node:$val, node:$ptr)> {
let IsStore = 1;
}
let IsStore = 1, AddressSpaces = StoreAddress_local.AddrSpaces in {
-def store_glue_align8 : PatFrag<(ops node:$val, node:$ptr),
- (store_glue node:$val, node:$ptr)>, Aligned<8>;
-def store_glue_align16 : PatFrag<(ops node:$val, node:$ptr),
- (store_glue node:$val, node:$ptr)>, Aligned<16>;
-
def store_local_m0 : PatFrag<(ops node:$val, node:$ptr),
- (unindexedstore_glue node:$val, node:$ptr)> {
+ (store_glue node:$val, node:$ptr)> {
let IsStore = 1;
let IsTruncStore = 0;
}
let MemoryVT = i8;
}
-
def truncstorei16_local_m0 : PatFrag<(ops node:$val, node:$ptr),
(unindexedstore_glue node:$val, node:$ptr)> {
let IsStore = 1;
let MemoryVT = i16;
}
+}
-// FIXME: atomic store doesn't work.
-def atomic_store_local_m0 : StoreFrag<AMDGPUatomic_st_glue>, LocalAddress;
-def store_align8_local_m0 : StoreFrag<store_glue_align8>, LocalAddress {
+def store_align16_local_m0 : PatFrag <
+ (ops node:$value, node:$ptr),
+ (store_local_m0 node:$value, node:$ptr)> {
+ let IsStore = 1;
let IsTruncStore = 0;
+ let MinAlignment = 16;
}
-def store_align16_local_m0 : StoreFrag<store_glue_align16>, LocalAddress {
+def store_align8_local_m0 : PatFrag <
+ (ops node:$value, node:$ptr),
+ (store_local_m0 node:$value, node:$ptr)> {
+ let IsStore = 1;
let IsTruncStore = 0;
+ let MinAlignment = 8;
+}
+
+let AddressSpaces = StoreAddress_local.AddrSpaces in {
+
+def atomic_store_local_32_m0 : PatFrag <
+ (ops node:$value, node:$ptr),
+ (AMDGPUatomic_st_glue node:$value, node:$ptr)> {
+ let IsAtomic = 1;
+ let MemoryVT = i32;
}
+def atomic_store_local_64_m0 : PatFrag <
+ (ops node:$value, node:$ptr),
+ (AMDGPUatomic_st_glue node:$value, node:$ptr)> {
+ let IsAtomic = 1;
+ let MemoryVT = i64;
}
+} // End let AddressSpaces = StoreAddress_local.AddrSpaces
+
def si_setcc_uniform : PatFrag <
(ops node:$lhs, node:$rhs, node:$cond),
G_STORE %1, %0 :: (store 1, align 1, addrspace 3)
...
+
+---
+
+name: store_local_s64_align4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+ scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+ scratchWaveOffsetReg: $sgpr4
+ stackPtrOffsetReg: $sgpr32
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2
+
+ ; GFX6-LABEL: name: store_local_s64_align4
+ ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
+ ; GFX6: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2
+ ; GFX6: $m0 = S_MOV_B32 -1
+ ; GFX6: G_STORE [[COPY]](s64), [[COPY1]](p3) :: (store 8, align 4, addrspace 3)
+ ; GFX7-LABEL: name: store_local_s64_align4
+ ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
+ ; GFX7: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2
+ ; GFX7: $m0 = S_MOV_B32 -1
+ ; GFX7: G_STORE [[COPY]](s64), [[COPY1]](p3) :: (store 8, align 4, addrspace 3)
+ ; GFX9-LABEL: name: store_local_s64_align4
+ ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
+ ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2
+ ; GFX9: G_STORE [[COPY]](s64), [[COPY1]](p3) :: (store 8, align 4, addrspace 3)
+ %0:vgpr(s64) = COPY $vgpr0_vgpr1
+ %1:vgpr(p3) = COPY $vgpr2
+ G_STORE %0, %1 :: (store 8, align 4, addrspace 3)
+
+...
+
+---
+
+name: store_local_p1_align4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+ scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+ scratchWaveOffsetReg: $sgpr4
+ stackPtrOffsetReg: $sgpr32
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2
+
+ ; GFX6-LABEL: name: store_local_p1_align4
+ ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+ ; GFX6: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2
+ ; GFX6: $m0 = S_MOV_B32 -1
+ ; GFX6: G_STORE [[COPY]](p1), [[COPY1]](p3) :: (store 8, align 4, addrspace 3)
+ ; GFX7-LABEL: name: store_local_p1_align4
+ ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+ ; GFX7: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2
+ ; GFX7: $m0 = S_MOV_B32 -1
+ ; GFX7: G_STORE [[COPY]](p1), [[COPY1]](p3) :: (store 8, align 4, addrspace 3)
+ ; GFX9-LABEL: name: store_local_p1_align4
+ ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+ ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2
+ ; GFX9: G_STORE [[COPY]](p1), [[COPY1]](p3) :: (store 8, align 4, addrspace 3)
+ %0:vgpr(p1) = COPY $vgpr0_vgpr1
+ %1:vgpr(p3) = COPY $vgpr2
+ G_STORE %0, %1 :: (store 8, align 4, addrspace 3)
+
+...
+
+---
+
+name: store_local_v2s32_align4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+ scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+ scratchWaveOffsetReg: $sgpr4
+ stackPtrOffsetReg: $sgpr32
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2
+
+ ; GFX6-LABEL: name: store_local_v2s32_align4
+ ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1
+ ; GFX6: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2
+ ; GFX6: $m0 = S_MOV_B32 -1
+ ; GFX6: G_STORE [[COPY]](<2 x s32>), [[COPY1]](p3) :: (store 8, align 4, addrspace 3)
+ ; GFX7-LABEL: name: store_local_v2s32_align4
+ ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1
+ ; GFX7: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2
+ ; GFX7: $m0 = S_MOV_B32 -1
+ ; GFX7: G_STORE [[COPY]](<2 x s32>), [[COPY1]](p3) :: (store 8, align 4, addrspace 3)
+ ; GFX9-LABEL: name: store_local_v2s32_align4
+ ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1
+ ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2
+ ; GFX9: G_STORE [[COPY]](<2 x s32>), [[COPY1]](p3) :: (store 8, align 4, addrspace 3)
+ %0:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1
+ %1:vgpr(p3) = COPY $vgpr2
+ G_STORE %0, %1 :: (store 8, align 4, addrspace 3)
+
+...
+
+---
+
+name: store_local_v4s16_align4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+ scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+ scratchWaveOffsetReg: $sgpr4
+ stackPtrOffsetReg: $sgpr32
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2
+
+ ; GFX6-LABEL: name: store_local_v4s16_align4
+ ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1
+ ; GFX6: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2
+ ; GFX6: $m0 = S_MOV_B32 -1
+ ; GFX6: G_STORE [[COPY]](<4 x s16>), [[COPY1]](p3) :: (store 8, align 4, addrspace 3)
+ ; GFX7-LABEL: name: store_local_v4s16_align4
+ ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1
+ ; GFX7: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2
+ ; GFX7: $m0 = S_MOV_B32 -1
+ ; GFX7: G_STORE [[COPY]](<4 x s16>), [[COPY1]](p3) :: (store 8, align 4, addrspace 3)
+ ; GFX9-LABEL: name: store_local_v4s16_align4
+ ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1
+ ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2
+ ; GFX9: G_STORE [[COPY]](<4 x s16>), [[COPY1]](p3) :: (store 8, align 4, addrspace 3)
+ %0:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1
+ %1:vgpr(p3) = COPY $vgpr2
+ G_STORE %0, %1 :: (store 8, align 4, addrspace 3)
+
+...
+
+---
+
+name: store_local_s64_align8
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+ scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+ scratchWaveOffsetReg: $sgpr4
+ stackPtrOffsetReg: $sgpr32
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2
+
+ ; GFX6-LABEL: name: store_local_s64_align8
+ ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
+ ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX6: $m0 = S_MOV_B32 -1
+ ; GFX6: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store 8, addrspace 3)
+ ; GFX7-LABEL: name: store_local_s64_align8
+ ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
+ ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX7: $m0 = S_MOV_B32 -1
+ ; GFX7: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store 8, addrspace 3)
+ ; GFX9-LABEL: name: store_local_s64_align8
+ ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
+ ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX9: DS_WRITE_B64_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store 8, addrspace 3)
+ %0:vgpr(s64) = COPY $vgpr0_vgpr1
+ %1:vgpr(p3) = COPY $vgpr2
+ G_STORE %0, %1 :: (store 8, align 8, addrspace 3)
+
+...
+
+---
+
+name: store_local_p1_align8
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+ scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+ scratchWaveOffsetReg: $sgpr4
+ stackPtrOffsetReg: $sgpr32
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2
+
+ ; GFX6-LABEL: name: store_local_p1_align8
+ ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
+ ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX6: $m0 = S_MOV_B32 -1
+ ; GFX6: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store 8, addrspace 3)
+ ; GFX7-LABEL: name: store_local_p1_align8
+ ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
+ ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX7: $m0 = S_MOV_B32 -1
+ ; GFX7: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store 8, addrspace 3)
+ ; GFX9-LABEL: name: store_local_p1_align8
+ ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
+ ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX9: DS_WRITE_B64_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store 8, addrspace 3)
+ %0:vgpr(p1) = COPY $vgpr0_vgpr1
+ %1:vgpr(p3) = COPY $vgpr2
+ G_STORE %0, %1 :: (store 8, align 8, addrspace 3)
+
+...
+
+---
+
+name: store_local_v2s32_align8
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+ scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+ scratchWaveOffsetReg: $sgpr4
+ stackPtrOffsetReg: $sgpr32
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2
+
+ ; GFX6-LABEL: name: store_local_v2s32_align8
+ ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
+ ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX6: $m0 = S_MOV_B32 -1
+ ; GFX6: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store 8, addrspace 3)
+ ; GFX7-LABEL: name: store_local_v2s32_align8
+ ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
+ ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX7: $m0 = S_MOV_B32 -1
+ ; GFX7: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store 8, addrspace 3)
+ ; GFX9-LABEL: name: store_local_v2s32_align8
+ ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
+ ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX9: DS_WRITE_B64_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store 8, addrspace 3)
+ %0:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1
+ %1:vgpr(p3) = COPY $vgpr2
+ G_STORE %0, %1 :: (store 8, align 8, addrspace 3)
+
+...
+
+---
+
+name: store_local_v4s16_align8
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+ scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+ scratchWaveOffsetReg: $sgpr4
+ stackPtrOffsetReg: $sgpr32
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2
+
+ ; GFX6-LABEL: name: store_local_v4s16_align8
+ ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
+ ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX6: $m0 = S_MOV_B32 -1
+ ; GFX6: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store 8, addrspace 3)
+ ; GFX7-LABEL: name: store_local_v4s16_align8
+ ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
+ ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX7: $m0 = S_MOV_B32 -1
+ ; GFX7: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store 8, addrspace 3)
+ ; GFX9-LABEL: name: store_local_v4s16_align8
+ ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
+ ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX9: DS_WRITE_B64_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store 8, addrspace 3)
+ %0:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1
+ %1:vgpr(p3) = COPY $vgpr2
+ G_STORE %0, %1 :: (store 8, align 8, addrspace 3)
+
+...