From c90b4243b5f9377b0f4d75f5d5a8f7d41901097e Mon Sep 17 00:00:00 2001 From: Dmitry Preobrazhensky Date: Fri, 18 Oct 2019 14:49:53 +0000 Subject: [PATCH] [AMDGPU][MC][GFX10] Added sdwa/dpp versions of v_cndmask_b32 See https://bugs.llvm.org/show_bug.cgi?id=43608 Reviewers: arsenm, rampitec Differential Revision: https://reviews.llvm.org/D69096 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@375241 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp | 3 + lib/Target/AMDGPU/VOP2Instructions.td | 129 +++++++++++------- test/MC/AMDGPU/gfx10_asm_dpp8.s | 20 +++ test/MC/AMDGPU/wave32.s | 24 ++++ .../Disassembler/AMDGPU/gfx10_dasm_dpp16.txt | 8 ++ test/MC/Disassembler/AMDGPU/wave32.txt | 8 ++ 6 files changed, 140 insertions(+), 52 deletions(-) diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp index d2ea94548df..a9888e6ed92 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp @@ -599,9 +599,11 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, case AMDGPU::V_ADD_CO_CI_U32_e32_gfx10: case AMDGPU::V_SUB_CO_CI_U32_e32_gfx10: case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: + case AMDGPU::V_CNDMASK_B32_dpp_gfx10: case AMDGPU::V_ADD_CO_CI_U32_dpp_gfx10: case AMDGPU::V_SUB_CO_CI_U32_dpp_gfx10: case AMDGPU::V_SUBREV_CO_CI_U32_dpp_gfx10: + case AMDGPU::V_CNDMASK_B32_dpp8_gfx10: case AMDGPU::V_ADD_CO_CI_U32_dpp8_gfx10: case AMDGPU::V_SUB_CO_CI_U32_dpp8_gfx10: case AMDGPU::V_SUBREV_CO_CI_U32_dpp8_gfx10: @@ -665,6 +667,7 @@ void AMDGPUInstPrinter::printOperandAndIntInputMods(const MCInst *MI, switch (MI->getOpcode()) { default: break; + case AMDGPU::V_CNDMASK_B32_sdwa_gfx10: case AMDGPU::V_ADD_CO_CI_U32_sdwa_gfx10: case AMDGPU::V_SUB_CO_CI_U32_sdwa_gfx10: case AMDGPU::V_SUBREV_CO_CI_U32_sdwa_gfx10: diff --git a/lib/Target/AMDGPU/VOP2Instructions.td b/lib/Target/AMDGPU/VOP2Instructions.td index f91096b0f72..1ab0fc1ab58 100644 --- a/lib/Target/AMDGPU/VOP2Instructions.td +++ b/lib/Target/AMDGPU/VOP2Instructions.td @@ -956,13 +956,15 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { } // End DecoderNamespace = "SDWA10" //===------------------------------ VOP2be ------------------------------===// - multiclass VOP2be_Real_gfx10 op, string opName, string asmName> { + multiclass VOP2be_Real_e32_gfx10 op, string opName, string asmName> { def _e32_gfx10 : VOP2_Real(opName#"_e32"), SIEncodingFamily.GFX10>, VOP2e(opName#"_e32").Pfl> { VOP2_Pseudo Ps = !cast(opName#"_e32"); let AsmString = asmName # !subst(", vcc", "", Ps.AsmOperands); } + } + multiclass VOP2be_Real_e64_gfx10 op, string opName, string asmName> { def _e64_gfx10 : VOP3_Real(opName#"_e64"), SIEncodingFamily.GFX10>, VOP3be_gfx10<{0, 1, 0, 0, op{5-0}}, @@ -970,6 +972,8 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { VOP3_Pseudo Ps = !cast(opName#"_e64"); let AsmString = asmName # Ps.AsmOperands; } + } + multiclass VOP2be_Real_sdwa_gfx10 op, string opName, string asmName> { foreach _ = BoolToList(opName#"_e32").Pfl.HasExtSDWA9>.ret in def _sdwa_gfx10 : VOP_SDWA10_Real(opName#"_sdwa")>, @@ -978,6 +982,28 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { let AsmString = asmName # !subst(", vcc", "", Ps.AsmOperands); let DecoderNamespace = "SDWA10"; } + foreach _ = BoolToList(opName#"_e32").Pfl.HasExtSDWA9>.ret in + def _sdwa_w32_gfx10 : + Base_VOP_SDWA10_Real(opName#"_sdwa")>, + VOP2_SDWA9Ae(opName#"_sdwa").Pfl> { + VOP2_SDWA_Pseudo Ps = !cast(opName#"_sdwa"); + let AsmString = asmName # !subst("vcc", "vcc_lo", Ps.AsmOperands); + let isAsmParserOnly = 1; + let DecoderNamespace = "SDWA10"; + let WaveSizePredicate = isWave32; + } + foreach _ = BoolToList(opName#"_e32").Pfl.HasExtSDWA9>.ret in + def _sdwa_w64_gfx10 : + Base_VOP_SDWA10_Real(opName#"_sdwa")>, + VOP2_SDWA9Ae(opName#"_sdwa").Pfl> { + VOP2_SDWA_Pseudo Ps = !cast(opName#"_sdwa"); + let AsmString = asmName # Ps.AsmOperands; + let isAsmParserOnly = 1; + let DecoderNamespace = "SDWA10"; + let WaveSizePredicate = isWave64; + } + } + multiclass VOP2be_Real_dpp_gfx10 op, string opName, string asmName> { foreach _ = BoolToList(opName#"_e32").Pfl.HasExtDPP>.ret in def _dpp_gfx10 : VOP2_DPP16(opName#"_dpp"), asmName> { @@ -986,60 +1012,46 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { let DecoderNamespace = "SDWA10"; } foreach _ = BoolToList(opName#"_e32").Pfl.HasExtDPP>.ret in + def _dpp_w32_gfx10 : + Base_VOP2_DPP16(opName#"_dpp"), asmName> { + string AsmDPP = !cast(opName#"_e32").Pfl.AsmDPP16; + let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP); + let isAsmParserOnly = 1; + let WaveSizePredicate = isWave32; + } + foreach _ = BoolToList(opName#"_e32").Pfl.HasExtDPP>.ret in + def _dpp_w64_gfx10 : + Base_VOP2_DPP16(opName#"_dpp"), asmName> { + string AsmDPP = !cast(opName#"_e32").Pfl.AsmDPP16; + let AsmString = asmName # AsmDPP; + let isAsmParserOnly = 1; + let WaveSizePredicate = isWave64; + } + } + multiclass VOP2be_Real_dpp8_gfx10 op, string opName, string asmName> { + foreach _ = BoolToList(opName#"_e32").Pfl.HasExtDPP>.ret in def _dpp8_gfx10 : VOP2_DPP8(opName#"_e32"), asmName> { string AsmDPP8 = !cast(opName#"_e32").Pfl.AsmDPP8; let AsmString = asmName # !subst(", vcc", "", AsmDPP8); let DecoderNamespace = "DPP8"; } - - let WaveSizePredicate = isWave32 in { - foreach _ = BoolToList(opName#"_e32").Pfl.HasExtSDWA9>.ret in - def _sdwa_w32_gfx10 : - Base_VOP_SDWA10_Real(opName#"_sdwa")>, - VOP2_SDWA9Ae(opName#"_sdwa").Pfl> { - VOP2_SDWA_Pseudo Ps = !cast(opName#"_sdwa"); - let AsmString = asmName # !subst("vcc", "vcc_lo", Ps.AsmOperands); - let isAsmParserOnly = 1; - let DecoderNamespace = "SDWA10"; - } - def _dpp_w32_gfx10 : - Base_VOP2_DPP16(opName#"_dpp"), asmName> { - string AsmDPP = !cast(opName#"_e32").Pfl.AsmDPP16; - let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP); - let isAsmParserOnly = 1; - } - def _dpp8_w32_gfx10 : - VOP2_DPP8(opName#"_e32"), asmName> { - string AsmDPP8 = !cast(opName#"_e32").Pfl.AsmDPP8; - let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP8); - let isAsmParserOnly = 1; - } - } // End WaveSizePredicate = isWave32 - - let WaveSizePredicate = isWave64 in { - foreach _ = BoolToList(opName#"_e32").Pfl.HasExtSDWA9>.ret in - def _sdwa_w64_gfx10 : - Base_VOP_SDWA10_Real(opName#"_sdwa")>, - VOP2_SDWA9Ae(opName#"_sdwa").Pfl> { - VOP2_SDWA_Pseudo Ps = !cast(opName#"_sdwa"); - let AsmString = asmName # Ps.AsmOperands; - let isAsmParserOnly = 1; - let DecoderNamespace = "SDWA10"; - } - def _dpp_w64_gfx10 : - Base_VOP2_DPP16(opName#"_dpp"), asmName> { - string AsmDPP = !cast(opName#"_e32").Pfl.AsmDPP16; - let AsmString = asmName # AsmDPP; - let isAsmParserOnly = 1; - } - def _dpp8_w64_gfx10 : - VOP2_DPP8(opName#"_e32"), asmName> { - string AsmDPP8 = !cast(opName#"_e32").Pfl.AsmDPP8; - let AsmString = asmName # AsmDPP8; - let isAsmParserOnly = 1; - } - } // End WaveSizePredicate = isWave64 + foreach _ = BoolToList(opName#"_e32").Pfl.HasExtDPP>.ret in + def _dpp8_w32_gfx10 : + VOP2_DPP8(opName#"_e32"), asmName> { + string AsmDPP8 = !cast(opName#"_e32").Pfl.AsmDPP8; + let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP8); + let isAsmParserOnly = 1; + let WaveSizePredicate = isWave32; + } + foreach _ = BoolToList(opName#"_e32").Pfl.HasExtDPP>.ret in + def _dpp8_w64_gfx10 : + VOP2_DPP8(opName#"_e32"), asmName> { + string AsmDPP8 = !cast(opName#"_e32").Pfl.AsmDPP8; + let AsmString = asmName # AsmDPP8; + let isAsmParserOnly = 1; + let WaveSizePredicate = isWave64; + } } //===----------------------------- VOP3Only -----------------------------===// @@ -1060,8 +1072,19 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { } } // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" -multiclass Base_VOP2_Real_gfx10 op> : - VOP2_Real_e32_gfx10, VOP2_Real_e64_gfx10; +multiclass VOP2be_Real_gfx10 op, string opName, string asmName> : + VOP2be_Real_e32_gfx10, + VOP2be_Real_e64_gfx10, + VOP2be_Real_sdwa_gfx10, + VOP2be_Real_dpp_gfx10, + VOP2be_Real_dpp8_gfx10; + +multiclass VOP2e_Real_gfx10 op, string opName, string asmName> : + VOP2_Real_e32_gfx10, + VOP2_Real_e64_gfx10, + VOP2be_Real_sdwa_gfx10, + VOP2be_Real_dpp_gfx10, + VOP2be_Real_dpp8_gfx10; multiclass VOP2_Real_gfx10 op> : VOP2_Real_e32_gfx10, VOP2_Real_e64_gfx10, @@ -1075,7 +1098,6 @@ multiclass VOP2_Real_gfx10_with_name op, string opName, VOP2_Real_dpp_gfx10_with_name, VOP2_Real_dpp8_gfx10_with_name; -defm V_CNDMASK_B32 : Base_VOP2_Real_gfx10<0x001>; defm V_XNOR_B32 : VOP2_Real_gfx10<0x01e>; defm V_FMAC_F32 : VOP2_Real_gfx10<0x02b>; defm V_FMAMK_F32 : VOP2Only_Real_MADK_gfx10<0x02c>; @@ -1108,6 +1130,9 @@ defm V_SUB_CO_CI_U32 : defm V_SUBREV_CO_CI_U32 : VOP2be_Real_gfx10<0x02a, "V_SUBBREV_U32", "v_subrev_co_ci_u32">; +defm V_CNDMASK_B32 : + VOP2e_Real_gfx10<0x001, "V_CNDMASK_B32", "v_cndmask_b32">; + // VOP3 only. defm V_BFM_B32 : VOP3Only_Real_gfx10<0x363>; defm V_BCNT_U32_B32 : VOP3Only_Real_gfx10<0x364>; diff --git a/test/MC/AMDGPU/gfx10_asm_dpp8.s b/test/MC/AMDGPU/gfx10_asm_dpp8.s index 7ef2c44ad91..b148356c96d 100644 --- a/test/MC/AMDGPU/gfx10_asm_dpp8.s +++ b/test/MC/AMDGPU/gfx10_asm_dpp8.s @@ -510,6 +510,26 @@ v_min_f16_dpp v5, v1, v2 dpp8:[0,1,2,3,4,5,6,7] fi:1 v_ldexp_f16_dpp v5, v1, v2 dpp8:[0,1,2,3,4,5,6,7] fi:1 // GFX10: encoding: [0xea,0x04,0x0a,0x76,0x01,0x88,0xc6,0xfa] +v_cndmask_b32_dpp v0, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cndmask_b32_dpp v0, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x00,0x02,0x01,0x77,0x39,0x05] +// W64-ERR: error: instruction not supported on this GPU + +v_cndmask_b32_dpp v0, v1, v2, vcc_lo dpp8:[0,1,2,3,4,5,6,7] fi:1 +// W32: v_cndmask_b32_dpp v0, v1, v2, vcc_lo dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x04,0x00,0x02,0x01,0x88,0xc6,0xfa] +// W64-ERR: error: instruction not supported on this GPU + +v_cndmask_b32_dpp v0, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cndmask_b32_dpp v0, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x00,0x02,0x01,0x77,0x39,0x05] +// W32-ERR: error: instruction not supported on this GPU + +v_cndmask_b32_dpp v0, v1, v2, vcc dpp8:[0,1,2,3,4,5,6,7] fi:1 +// W64: v_cndmask_b32_dpp v0, v1, v2, vcc dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x04,0x00,0x02,0x01,0x88,0xc6,0xfa] +// W32-ERR: error: instruction not supported on this GPU + +v_cndmask_b32_dpp v0, v1, v2 dpp8:[0,1,2,3,4,5,6,7] fi:1 +// W32: v_cndmask_b32_dpp v0, v1, v2, vcc_lo dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x04,0x00,0x02,0x01,0x88,0xc6,0xfa] +// W64: v_cndmask_b32_dpp v0, v1, v2, vcc dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x04,0x00,0x02,0x01,0x88,0xc6,0xfa] + v_add_co_ci_u32_dpp v0, vcc_lo, v0, v0, vcc_lo dpp8:[7,6,5,4,3,2,1,0] // W32: [0xe9,0x00,0x00,0x50,0x00,0x77,0x39,0x05] // W64-ERR: error: instruction not supported on this GPU diff --git a/test/MC/AMDGPU/wave32.s b/test/MC/AMDGPU/wave32.s index d6b3d581093..f7a0835f0a1 100644 --- a/test/MC/AMDGPU/wave32.s +++ b/test/MC/AMDGPU/wave32.s @@ -63,6 +63,30 @@ v_cndmask_b32_e32 v1, v2, v3, vcc // GFX1032-ERR: error: instruction not supported on this GPU // GFX1064: v_cndmask_b32_e32 v1, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x02] +v_cndmask_b32_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD +// GFX1032: v_cndmask_b32_sdwa v5, v1, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x02,0x01,0x16,0x06,0x06] +// GFX1064: v_cndmask_b32_sdwa v5, v1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x02,0x01,0x16,0x06,0x06] + +v_cndmask_b32_sdwa v5, v1, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD +// GFX1032: v_cndmask_b32_sdwa v5, v1, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x02,0x01,0x16,0x06,0x06] +// GFX1064-ERR: error: instruction not supported on this GPU + +v_cndmask_b32_sdwa v5, v1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD +// GFX1032-ERR: error: instruction not supported on this GPU +// GFX1064: v_cndmask_b32_sdwa v5, v1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x02,0x01,0x16,0x06,0x06] + +v_cndmask_b32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// GFX1032: v_cndmask_b32_dpp v5, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0xe4,0x00,0x00] +// GFX1064: v_cndmask_b32_dpp v5, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0xe4,0x00,0x00] + +v_cndmask_b32_dpp v5, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// GFX1032: v_cndmask_b32_dpp v5, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0xe4,0x00,0x00] +// GFX1064-ERR: error: instruction not supported on this GPU + +v_cndmask_b32_dpp v5, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// GFX1032-ERR: error: instruction not supported on this GPU +// GFX1064: v_cndmask_b32_dpp v5, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0xe4,0x00,0x00] + v_add_co_u32_e32 v2, vcc_lo, s0, v2 // GFX1032-ERR: error: instruction not supported on this GPU // GFX1064-ERR: error: instruction not supported on this GPU diff --git a/test/MC/Disassembler/AMDGPU/gfx10_dasm_dpp16.txt b/test/MC/Disassembler/AMDGPU/gfx10_dasm_dpp16.txt index d0bd9e0c9f7..2e6df6e283b 100644 --- a/test/MC/Disassembler/AMDGPU/gfx10_dasm_dpp16.txt +++ b/test/MC/Disassembler/AMDGPU/gfx10_dasm_dpp16.txt @@ -316,6 +316,14 @@ # GFX10: v_mac_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x3e,0x01,0x1b,0x00,0x00] 0xfa,0x04,0x0a,0x3e,0x01,0x1b,0x00,0x00 +# W32: v_cndmask_b32_dpp v0, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x00,0x02,0x01,0x77,0x39,0x05] +# W64: v_cndmask_b32_dpp v0, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x00,0x02,0x01,0x77,0x39,0x05] +0xe9,0x04,0x00,0x02,0x01,0x77,0x39,0x05 + +# W32: v_cndmask_b32_dpp v0, v1, v2, vcc_lo dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x04,0x00,0x02,0x01,0x88,0xc6,0xfa] +# W64: v_cndmask_b32_dpp v0, v1, v2, vcc dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x04,0x00,0x02,0x01,0x88,0xc6,0xfa] +0xea,0x04,0x00,0x02,0x01,0x88,0xc6,0xfa + # W32: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x50,0x01,0x1b,0x00,0x00] # W64: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x50,0x01,0x1b,0x00,0x00] 0xfa,0x04,0x0a,0x50,0x01,0x1b,0x00,0x00 diff --git a/test/MC/Disassembler/AMDGPU/wave32.txt b/test/MC/Disassembler/AMDGPU/wave32.txt index 643cd7828da..6e1cad046ee 100644 --- a/test/MC/Disassembler/AMDGPU/wave32.txt +++ b/test/MC/Disassembler/AMDGPU/wave32.txt @@ -37,6 +37,14 @@ # GFX1064: v_cndmask_b32_e32 v1, v2, v3, vcc ; 0x02,0x07,0x02,0x02 +# GFX1032: v_cndmask_b32_sdwa v5, v1, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD +# GFX1064: v_cndmask_b32_sdwa v5, v1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD +0xf9,0x04,0x0a,0x02,0x01,0x16,0x06,0x06 + +# GFX1032: v_cndmask_b32_dpp v5, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +# GFX1064: v_cndmask_b32_dpp v5, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +0xfa,0x04,0x0a,0x02,0x01,0xe4,0x00,0x00 + # GFX1032: v_add_co_u32_e64 v2, vcc_lo, s0, v2 # GFX1064: v_add_co_u32_e64 v2, vcc, s0, v2 0x02,0x6a,0x0f,0xd7,0x00,0x04,0x02,0x00 -- 2.40.0