add_insn("ucomiss", "xmm_xmm32", modifiers=[0, 0x2E])
add_insn("vaddss", "xmm_xmm32", modifiers=[0xF3, 0x58, VEXL0], avx=True)
-add_insn("vcomiss", "xmm_xmm32", modifiers=[0, 0x2F, VEXL0], avx=True)
+# vcomiss and vucomiss are only two operand
add_insn("vdivss", "xmm_xmm32", modifiers=[0xF3, 0x5E, VEXL0], avx=True)
add_insn("vmaxss", "xmm_xmm32", modifiers=[0xF3, 0x5F, VEXL0], avx=True)
add_insn("vminss", "xmm_xmm32", modifiers=[0xF3, 0x5D, VEXL0], avx=True)
add_insn("vrsqrtss", "xmm_xmm32", modifiers=[0xF3, 0x52, VEXL0], avx=True)
add_insn("vsqrtss", "xmm_xmm32", modifiers=[0xF3, 0x51, VEXL0], avx=True)
add_insn("vsubss", "xmm_xmm32", modifiers=[0xF3, 0x5C, VEXL0], avx=True)
-add_insn("vucomiss", "xmm_xmm32", modifiers=[0, 0x2E, VEXL0], avx=True)
add_group("ssecmp_128",
cpu=["SSE"],
add_insn("ucomisd", "xmm_xmm64", modifiers=[0x66, 0x2E])
add_insn("vaddsd", "xmm_xmm64", modifiers=[0xF2, 0x58, VEXL0], avx=True)
-add_insn("vcomisd", "xmm_xmm64", modifiers=[0x66, 0x2F, VEXL0], avx=True)
+# vcomisd and vucomisd are only two operand
# vcvtdq2pd and vcvtps2pd can take ymm, xmm version
add_insn("vcvtsd2ss", "xmm_xmm64", modifiers=[0xF2, 0x5A, VEXL0], avx=True)
add_insn("vdivsd", "xmm_xmm64", modifiers=[0xF2, 0x5E, VEXL0], avx=True)
add_insn("vmulsd", "xmm_xmm64", modifiers=[0xF2, 0x59, VEXL0], avx=True)
add_insn("vsubsd", "xmm_xmm64", modifiers=[0xF2, 0x5C, VEXL0], avx=True)
add_insn("vsqrtsd", "xmm_xmm64", modifiers=[0xF2, 0x51, VEXL0], avx=True)
-add_insn("vucomisd", "xmm_xmm64", modifiers=[0x66, 0x2E, VEXL0], avx=True)
add_insn("addpd", "xmm_xmm128", modifiers=[0x66, 0x58], cpu=["SSE2"])
add_insn("andnpd", "xmm_xmm128", modifiers=[0x66, 0x55], cpu=["SSE2"])
add_insn("vpsignw", "ssse3", modifiers=[0x09, VEXL0], avx=True)
add_insn("vpsignd", "ssse3", modifiers=[0x0A, VEXL0], avx=True)
add_insn("vpmulhrsw", "ssse3", modifiers=[0x0B, VEXL0], avx=True)
-add_insn("vpabsb", "ssse3", modifiers=[0x1C, VEXL0], avx=True)
-add_insn("vpabsw", "ssse3", modifiers=[0x1D, VEXL0], avx=True)
-add_insn("vpabsd", "ssse3", modifiers=[0x1E, VEXL0], avx=True)
+# vpabsb/vpabsw/vpabsd are 2 operand only
add_group("ssse3imm",
cpu=["SSSE3"],
add_insn("vpackusdw", "ssse3", modifiers=[0x2B, VEXL0], avx=True)
add_insn("vpcmpeqq", "ssse3", modifiers=[0x29, VEXL0], avx=True)
add_insn("vpcmpgtq", "ssse3", modifiers=[0x37, VEXL0], avx=True)
-add_insn("vphminposuw", "ssse3", modifiers=[0x41, VEXL0], avx=True)
+# vphminposuw is 2 operand only
add_insn("vpmaxsb", "ssse3", modifiers=[0x3C, VEXL0], avx=True)
add_insn("vpmaxsd", "ssse3", modifiers=[0x3D, VEXL0], avx=True)
add_insn("vpmaxud", "ssse3", modifiers=[0x3F, VEXL0], avx=True)
add_insn("vpminuw", "ssse3", modifiers=[0x3A, VEXL0], avx=True)
add_insn("vpmuldq", "ssse3", modifiers=[0x28, VEXL0], avx=True)
add_insn("vpmulld", "ssse3", modifiers=[0x40, VEXL0], avx=True)
-# vptest, however, uses SSE4 style (2 operand only)
+# vptest uses SSE4 style (2 operand only), and takes 256-bit operands
add_insn("vptest", "sse4", modifiers=[0x17, VEXL0], avx=True)
add_group("sse4imm_256",
vex=128,
prefix=0x66,
opcode=[0x0F, 0x3A, 0x00],
- operands=[Operand(type="SIMDReg", size=128, dest="SpareVEX"),
+ operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
Operand(type="SIMDRM", size=128, relaxed=True, dest="EA"),
Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
add_group("avx_sse4imm",
vex=128,
prefix=0x00,
opcode=[0x0F, 0x00],
- operands=[Operand(type="SIMDReg", size=128, dest="SpareVEX"),
+ operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
Operand(type="SIMDReg", size=128, dest="EA")])
add_group("vmovddup",
cpu=["AVX"],
vex=128,
prefix=0x00,
opcode=[0x0F, 0x00],
- operands=[Operand(type="SIMDReg", size=128, dest="SpareVEX"),
+ operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
Operand(type="Mem", size=64, relaxed=True, dest="EA")])
add_group("vmovddup",
cpu=["AVX"],
add_insn("vmovddup", "vmovddup", modifiers=[0xF2, 0x12])
+# Some xmm_xmm64 combinations only take two operands in AVX
+# (VEX.vvvv must be 1111b)
+add_group("avx_xmm_xmm64",
+ cpu=["SSE2"],
+ modifiers=["PreAdd", "Op1Add"],
+ vex=128,
+ prefix=0x00,
+ opcode=[0x0F, 0x00],
+ operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+ Operand(type="SIMDReg", size=128, dest="EA")])
+add_group("avx_xmm_xmm64",
+ cpu=["SSE2"],
+ modifiers=["PreAdd", "Op1Add"],
+ vex=128,
+ prefix=0x00,
+ opcode=[0x0F, 0x00],
+ operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+ Operand(type="Mem", size=64, relaxed=True, dest="EA")])
+
+add_insn("vcomisd", "avx_xmm_xmm64", modifiers=[0x66, 0x2F], avx=True)
+add_insn("vucomisd", "avx_xmm_xmm64", modifiers=[0x66, 0x2E], avx=True)
+
+# Some xmm_xmm64 combinations only take two operands in AVX
+# (VEX.vvvv must be 1111b)
+add_group("avx_xmm_xmm32",
+ cpu=["SSE"],
+ modifiers=["PreAdd", "Op1Add"],
+ vex=128,
+ prefix=0x00,
+ opcode=[0x0F, 0x00],
+ operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+ Operand(type="SIMDReg", size=128, dest="EA")])
+add_group("avx_xmm_xmm32",
+ cpu=["SSE"],
+ modifiers=["PreAdd", "Op1Add"],
+ vex=128,
+ prefix=0x00,
+ opcode=[0x0F, 0x00],
+ operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+ Operand(type="Mem", size=32, relaxed=True, dest="EA")])
+
+add_insn("vcomiss", "avx_xmm_xmm32", modifiers=[0, 0x2F], avx=True)
+add_insn("vucomiss", "avx_xmm_xmm32", modifiers=[0, 0x2E], avx=True)
+
# Some conversion functions take ymm, xmm combination
add_group("avx_cvt_xmm64",
cpu=["AVX"],
add_insn("vcvtdq2pd", "avx_cvt_xmm64", modifiers=[0xF3, 0xE6])
add_insn("vcvtps2pd", "avx_cvt_xmm64", modifiers=[0, 0x5A])
+# Some SSE3 opcodes are only two operand in AVX
+# (VEX.vvvv must be 1111b)
+add_group("avx_ssse3_2op",
+ cpu=["AVX"],
+ modifiers=["Op2Add"],
+ vex=128,
+ prefix=0x66,
+ opcode=[0x0F, 0x38, 0x00],
+ operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+ Operand(type="SIMDRM", size=128, relaxed=True, dest="EA")])
+
+add_insn("vpabsb", "avx_ssse3_2op", modifiers=[0x1C], avx=True)
+add_insn("vpabsw", "avx_ssse3_2op", modifiers=[0x1D], avx=True)
+add_insn("vpabsd", "avx_ssse3_2op", modifiers=[0x1E], avx=True)
+add_insn("vphminposuw", "avx_ssse3_2op", modifiers=[0x41], avx=True)
+
# Some conversion functions take xmm, ymm combination
# Need separate x and y versions for gas mode
add_group("avx_cvt_xmm128_x",