};
}
+static LegalityPredicate numElementsNotEven(unsigned TypeIdx) {
+ return [=](const LegalityQuery &Query) {
+ const LLT QueryTy = Query.Types[TypeIdx];
+ return QueryTy.isVector() && QueryTy.getNumElements() % 2 != 0;
+ };
+}
AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST,
const GCNTargetMachine &TM) {
GlobalPtr, LocalPtr, FlatPtr, PrivatePtr,
LLT::vector(2, LocalPtr), LLT::vector(2, PrivatePtr)}, {S1})
.clampScalar(0, S32, S64)
- .fewerElementsIf(
- [=](const LegalityQuery &Query) {
- if (Query.Types[1].isVector())
- return true;
-
- LLT Ty = Query.Types[0];
-
- // FIXME: Hack until odd splits handled
- return Ty.isVector() &&
- (Ty.getScalarSizeInBits() > 32 || Ty.getNumElements() % 2 != 0);
- },
- scalarize(0))
- // FIXME: Handle 16-bit vectors better
- .fewerElementsIf(
- [=](const LegalityQuery &Query) {
- return Query.Types[0].isVector() &&
- Query.Types[0].getElementType().getSizeInBits() < 32;},
- scalarize(0))
+ .moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
+ .fewerElementsIf(numElementsNotEven(0), scalarize(0))
.scalarize(1)
.clampMaxNumElements(0, S32, 2)
.clampMaxNumElements(0, LocalPtr, 2)
.clampMaxNumElements(0, PrivatePtr, 2)
+ .scalarize(0)
.legalIf(all(isPointer(0), typeIs(1, S1)));
// TODO: Only the low 4/5/6 bits of the shift amount are observed, so we can
; CHECK: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[COPY1]](<3 x s32>)
; CHECK: [[TRUNC1:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[COPY2]](<3 x s32>)
; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[C]]
- ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[TRUNC]](<3 x s8>)
- ; CHECK: [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[TRUNC1]](<3 x s8>)
+ ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF
+ ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[TRUNC]](<3 x s8>), 0
+ ; CHECK: [[DEF1:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF
+ ; CHECK: [[INSERT1:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF1]], [[TRUNC1]](<3 x s8>), 0
+ ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[INSERT]](<4 x s8>)
+ ; CHECK: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[INSERT1]](<4 x s8>)
; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8)
- ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8)
+ ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s8)
; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ANYEXT]], [[ANYEXT1]]
; CHECK: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[SELECT]](s32)
; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8)
- ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s8)
+ ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s8)
; CHECK: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ANYEXT2]], [[ANYEXT3]]
; CHECK: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[SELECT1]](s32)
; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8)
- ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s8)
+ ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s8)
; CHECK: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ANYEXT4]], [[ANYEXT5]]
; CHECK: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[SELECT2]](s32)
- ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s8>) = G_BUILD_VECTOR [[TRUNC2]](s8), [[TRUNC3]](s8), [[TRUNC4]](s8)
- ; CHECK: [[ANYEXT6:%[0-9]+]]:_(<3 x s32>) = G_ANYEXT [[BUILD_VECTOR]](<3 x s8>)
- ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[ANYEXT6]](<3 x s32>)
+ ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8)
+ ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s8)
+ ; CHECK: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ANYEXT6]], [[ANYEXT7]]
+ ; CHECK: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[SELECT3]](s32)
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s8>) = G_BUILD_VECTOR [[TRUNC2]](s8), [[TRUNC3]](s8), [[TRUNC4]](s8), [[TRUNC5]](s8)
+ ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s8>) = G_EXTRACT [[BUILD_VECTOR]](<4 x s8>), 0
+ ; CHECK: [[ANYEXT8:%[0-9]+]]:_(<3 x s32>) = G_ANYEXT [[EXTRACT]](<3 x s8>)
+ ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[ANYEXT8]](<3 x s32>)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = G_CONSTANT i32 0
%2:_(<3 x s32>) = COPY $vgpr1_vgpr2_vgpr3
; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[C]]
; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY1]](<4 x s16>), 0
; CHECK: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY2]](<4 x s16>), 0
- ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s16>)
- ; CHECK: [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[EXTRACT1]](<3 x s16>)
- ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s16)
- ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s16)
- ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ANYEXT]], [[ANYEXT1]]
- ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SELECT]](s32)
- ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16)
- ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s16)
- ; CHECK: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ANYEXT2]], [[ANYEXT3]]
- ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SELECT1]](s32)
- ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s16)
- ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s16)
- ; CHECK: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ANYEXT4]], [[ANYEXT5]]
- ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SELECT2]](s32)
- ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16)
; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s16>), 0
- ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+ ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0
+ ; CHECK: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+ ; CHECK: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0
+ ; CHECK: [[SELECT:%[0-9]+]]:_(<4 x s16>) = G_SELECT [[ICMP]](s1), [[INSERT]], [[INSERT1]]
+ ; CHECK: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[SELECT]](<4 x s16>), 0
+ ; CHECK: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+ ; CHECK: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT2]](<3 x s16>), 0
+ ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>)
%0:_(s32) = COPY $vgpr0
%1:_(<4 x s16>) = COPY $vgpr1_vgpr2
%2:_(<4 x s16>) = COPY $vgpr3_vgpr4