}
break;
}
- case X86::VPERMILPSrm:
+
case X86::VPERMILPDrm:
- case X86::VPERMILPSYrm:
- case X86::VPERMILPDYrm: {
+ case X86::VPERMILPDYrm:
+ case X86::VPERMILPDZ128rm:
+ case X86::VPERMILPDZ256rm:
+ case X86::VPERMILPDZrm: {
if (!OutStreamer->isVerboseAsm())
break;
assert(MI->getNumOperands() > 5 &&
const MachineOperand &SrcOp = MI->getOperand(1);
const MachineOperand &MaskOp = MI->getOperand(5);
- unsigned ElSize;
- switch (MI->getOpcode()) {
- default: llvm_unreachable("Invalid opcode");
- case X86::VPERMILPSrm: case X86::VPERMILPSYrm: ElSize = 32; break;
- case X86::VPERMILPDrm: case X86::VPERMILPDYrm: ElSize = 64; break;
+ if (auto *C = getConstantFromPool(*MI, MaskOp)) {
+ SmallVector<int, 8> Mask;
+ DecodeVPERMILPMask(C, 64, Mask);
+ if (!Mask.empty())
+ OutStreamer->AddComment(getShuffleComment(DstOp, SrcOp, SrcOp, Mask));
}
+ break;
+ }
+
+ case X86::VPERMILPSrm:
+ case X86::VPERMILPSYrm:
+ case X86::VPERMILPSZ128rm:
+ case X86::VPERMILPSZ256rm:
+ case X86::VPERMILPSZrm: {
+ if (!OutStreamer->isVerboseAsm())
+ break;
+ assert(MI->getNumOperands() > 5 &&
+ "We should always have at least 5 operands!");
+ const MachineOperand &DstOp = MI->getOperand(0);
+ const MachineOperand &SrcOp = MI->getOperand(1);
+ const MachineOperand &MaskOp = MI->getOperand(5);
if (auto *C = getConstantFromPool(*MI, MaskOp)) {
SmallVector<int, 16> Mask;
- DecodeVPERMILPMask(C, ElSize, Mask);
+ DecodeVPERMILPMask(C, 32, Mask);
if (!Mask.empty())
OutStreamer->AddComment(getShuffleComment(DstOp, SrcOp, SrcOp, Mask));
}
define <16 x float> @combine_vpermilvar_16f32_230146759A8BCFDE(<16 x float> %x0) {
; CHECK-LABEL: combine_vpermilvar_16f32_230146759A8BCFDE:
; CHECK: # BB#0:
-; CHECK-NEXT: vpermilps {{.*}}(%rip), %zmm0, %zmm0
+; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[2,3,0,1,4,6,7,5,9,10,8,11,12,15,13,14]
; CHECK-NEXT: retq
%res0 = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 3, i32 2, i32 1, i32 0, i32 2, i32 3, i32 0, i32 1, i32 1, i32 0, i32 3, i32 2>, <16 x float> undef, i16 -1)
%res1 = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %res0, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 3, i32 1, i32 0, i32 2, i32 3, i32 0, i32 2, i32 1, i32 1, i32 2, i32 0, i32 3>, <16 x float> undef, i16 -1)