"P4", "IA64", "Hammer"]
ordered_cpu_features = [
"FPU", "Cyrix", "AMD", "MMX", "3DNow", "SMM", "SSE", "SSE2",
- "SSE3", "SVM", "PadLock", "SSSE3", "SSE41", "SSE42", "SSE4a"]
+ "SSE3", "SVM", "PadLock", "SSSE3", "SSE41", "SSE42", "SSE4a", "SSE5"]
unordered_cpu_features = ["Priv", "Prot", "Undoc", "Obs"]
def cpu_lcd(cpu1, cpu2):
else:
raise KeyError("missing opcode")
+ # DREX opcode0 field
+ self.drex_oc0 = kwargs.pop("drex_oc0", 0) and 0x08 or 0
+
# Build operands string (C array initializer)
self.operands = kwargs.pop("operands")
for op in self.operands:
self.cpu.add("586")
if op.dest == "EA64":
self.cpu.add("64")
+ if op.dest == "DREX":
+ self.drex_oc0 |= 0x80
# Modifiers
self.modifiers = kwargs.pop("modifiers", [])
"%d" % (self.opersize or 0),
"%d" % (self.def_opersize_64 or 0),
self.special_prefix or "0",
+ self.drex_oc0 and
+ ("0x%02X" % self.drex_oc0) or "0",
"%d" % self.opcode_len,
opcodes_str,
"%d" % (self.spare or 0),
operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
Operand(type="SIMDRM", size=128, relaxed=True, dest="EA"),
Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+for sz in [32, 64]:
+ add_group("sse4m%dimm" % sz,
+ cpu=["SSE41"],
+ modifiers=["Op2Add"],
+ prefix=0x66,
+ opcode=[0x0F, 0x3A, 0x00],
+ operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+ Operand(type="SIMDReg", size=128, dest="EA"),
+ Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+ add_group("sse4m%dimm" % sz,
+ cpu=["SSE41"],
+ modifiers=["Op2Add"],
+ prefix=0x66,
+ opcode=[0x0F, 0x3A, 0x00],
+ operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+ Operand(type="Mem", size=sz, relaxed=True, dest="EA"),
+ Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
add_insn("blendpd", "sse4imm", modifiers=[0x0D])
add_insn("blendps", "sse4imm", modifiers=[0x0C])
add_insn("pblendw", "sse4imm", modifiers=[0x0E])
add_insn("roundpd", "sse4imm", modifiers=[0x09])
add_insn("roundps", "sse4imm", modifiers=[0x08])
-add_insn("roundsd", "sse4imm", modifiers=[0x0B])
-add_insn("roundss", "sse4imm", modifiers=[0x0A])
+add_insn("roundsd", "sse4m64imm", modifiers=[0x0B])
+add_insn("roundss", "sse4m32imm", modifiers=[0x0A])
add_group("sse4xmm0",
cpu=["SSE41"],
add_insn("pinsrq", "pinsrq")
-add_group("sse4m64",
- cpu=["SSE41"],
- modifiers=["Op2Add"],
- prefix=0x66,
- opcode=[0x0F, 0x38, 0x00],
- operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
- Operand(type="Mem", size=64, relaxed=True, dest="EA")])
-add_group("sse4m64",
- cpu=["SSE41"],
- modifiers=["Op2Add"],
- prefix=0x66,
- opcode=[0x0F, 0x38, 0x00],
- operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
- Operand(type="SIMDReg", size=128, dest="EA")])
+for sz in [16, 32, 64]:
+ add_group("sse4m%d" % sz,
+ cpu=["SSE41"],
+ modifiers=["Op2Add"],
+ prefix=0x66,
+ opcode=[0x0F, 0x38, 0x00],
+ operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+ Operand(type="Mem", size=sz, relaxed=True, dest="EA")])
+ add_group("sse4m%d" % sz,
+ cpu=["SSE41"],
+ modifiers=["Op2Add"],
+ prefix=0x66,
+ opcode=[0x0F, 0x38, 0x00],
+ operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+ Operand(type="SIMDReg", size=128, dest="EA")])
add_insn("pmovsxbw", "sse4m64", modifiers=[0x20])
add_insn("pmovsxwd", "sse4m64", modifiers=[0x23])
add_insn("pmovzxwd", "sse4m64", modifiers=[0x33])
add_insn("pmovzxdq", "sse4m64", modifiers=[0x35])
-add_group("sse4m32",
- cpu=["SSE41"],
- modifiers=["Op2Add"],
- prefix=0x66,
- opcode=[0x0F, 0x38, 0x00],
- operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
- Operand(type="Mem", size=32, relaxed=True, dest="EA")])
-add_group("sse4m32",
- cpu=["SSE41"],
- modifiers=["Op2Add"],
- prefix=0x66,
- opcode=[0x0F, 0x38, 0x00],
- operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
- Operand(type="SIMDReg", size=128, dest="EA")])
-
add_insn("pmovsxbd", "sse4m32", modifiers=[0x21])
add_insn("pmovsxwq", "sse4m32", modifiers=[0x24])
add_insn("pmovzxbd", "sse4m32", modifiers=[0x31])
add_insn("pmovzxwq", "sse4m32", modifiers=[0x34])
-add_group("sse4m16",
- cpu=["SSE41"],
- modifiers=["Op2Add"],
- prefix=0x66,
- opcode=[0x0F, 0x38, 0x00],
- operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
- Operand(type="Mem", size=16, relaxed=True, dest="EA")])
-add_group("sse4m16",
- cpu=["SSE41"],
- modifiers=["Op2Add"],
- prefix=0x66,
- opcode=[0x0F, 0x38, 0x00],
- operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
- Operand(type="SIMDReg", size=128, dest="EA")])
-
add_insn("pmovsxbq", "sse4m16", modifiers=[0x22])
add_insn("pmovzxbq", "sse4m16", modifiers=[0x32])
add_insn("movntss", "movntss")
+#####################################################################
+# AMD SSE5 instructions
+#####################################################################
+
+add_group("sse5com",
+ cpu=["SSE5"],
+ modifiers=["Op2Add"],
+ opcode=[0x0F, 0x25, 0x00],
+ drex_oc0=0,
+ operands=[Operand(type="SIMDReg", size=128, dest="DREX"),
+ Operand(type="SIMDReg", size=128, dest="Spare"),
+ Operand(type="SIMDRM", size=128, relaxed=True, dest="EA"),
+ Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+
+for sz in [32, 64]:
+ add_group("sse5com%d" % sz,
+ cpu=["SSE5"],
+ modifiers=["Op2Add"],
+ opcode=[0x0F, 0x25, 0x00],
+ drex_oc0=0,
+ operands=[Operand(type="SIMDReg", size=128, dest="DREX"),
+ Operand(type="SIMDReg", size=128, dest="Spare"),
+ Operand(type="SIMDReg", size=128, dest="EA"),
+ Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+ add_group("sse5com%d" % sz,
+ cpu=["SSE5"],
+ modifiers=["Op2Add"],
+ opcode=[0x0F, 0x25, 0x00],
+ drex_oc0=0,
+ operands=[Operand(type="SIMDReg", size=128, dest="DREX"),
+ Operand(type="SIMDReg", size=128, dest="Spare"),
+ Operand(type="Mem", size=sz, relaxed=True, dest="EA"),
+ Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+
+add_insn("comps", "sse5com", modifiers=[0x2C])
+add_insn("compd", "sse5com", modifiers=[0x2D])
+add_insn("comss", "sse5com32", modifiers=[0x2E])
+add_insn("comsd", "sse5com64", modifiers=[0x2F])
+
+add_insn("pcomb", "sse5com", modifiers=[0x4C])
+add_insn("pcomw", "sse5com", modifiers=[0x4D])
+add_insn("pcomd", "sse5com", modifiers=[0x4E])
+add_insn("pcomq", "sse5com", modifiers=[0x4F])
+
+add_insn("pcomub", "sse5com", modifiers=[0x6C])
+add_insn("pcomuw", "sse5com", modifiers=[0x6D])
+add_insn("pcomud", "sse5com", modifiers=[0x6E])
+add_insn("pcomuq", "sse5com", modifiers=[0x6F])
+
+add_group("cvtph2ps",
+ cpu=["SSE5"],
+ opcode=[0x0F, 0x7A, 0x30],
+ operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+ Operand(type="SIMDReg", size=128, dest="EA")])
+add_group("cvtph2ps",
+ cpu=["SSE5"],
+ opcode=[0x0F, 0x7A, 0x30],
+ operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+ Operand(type="Mem", size=64, relaxed=True, dest="EA")])
+
+add_insn("cvtph2ps", "cvtph2ps")
+
+add_group("cvtps2ph",
+ cpu=["SSE5"],
+ opcode=[0x0F, 0x7A, 0x31],
+ operands=[Operand(type="SIMDReg", size=128, dest="EA"),
+ Operand(type="SIMDReg", size=128, dest="Spare")])
+add_group("cvtps2ph",
+ cpu=["SSE5"],
+ opcode=[0x0F, 0x7A, 0x31],
+ operands=[Operand(type="Mem", size=64, relaxed=True, dest="EA"),
+ Operand(type="SIMDReg", size=128, dest="Spare")])
+
+add_insn("cvtps2ph", "cvtps2ph")
+
+add_group("sse5arith",
+ cpu=["SSE5"],
+ modifiers=["Op2Add"],
+ opcode=[0x0F, 0x24, 0x00],
+ drex_oc0=0,
+ operands=[Operand(type="SIMDReg", size=128, dest="DREX"),
+ Operand(type="SIMDRegMatch0", size=128, dest=None),
+ Operand(type="SIMDReg", size=128, dest="Spare"),
+ Operand(type="SIMDRM", size=128, relaxed=True, dest="EA")])
+add_group("sse5arith",
+ cpu=["SSE5"],
+ modifiers=["Op2Add"],
+ opcode=[0x0F, 0x24, 0x00],
+ drex_oc0=1,
+ operands=[Operand(type="SIMDReg", size=128, dest="DREX"),
+ Operand(type="SIMDRegMatch0", size=128, dest=None),
+ Operand(type="SIMDRM", size=128, relaxed=True, dest="EA"),
+ Operand(type="SIMDReg", size=128, dest="Spare")])
+add_group("sse5arith",
+ cpu=["SSE5"],
+ modifiers=["Op2Add"],
+ opcode=[0x0F, 0x24, 0x04],
+ drex_oc0=0,
+ operands=[Operand(type="SIMDReg", size=128, dest="DREX"),
+ Operand(type="SIMDReg", size=128, dest="Spare"),
+ Operand(type="SIMDRM", size=128, relaxed=True, dest="EA"),
+ Operand(type="SIMDRegMatch0", size=128, dest=None)])
+add_group("sse5arith",
+ cpu=["SSE5"],
+ modifiers=["Op2Add"],
+ opcode=[0x0F, 0x24, 0x04],
+ drex_oc0=1,
+ operands=[Operand(type="SIMDReg", size=128, dest="DREX"),
+ Operand(type="SIMDRM", size=128, relaxed=True, dest="EA"),
+ Operand(type="SIMDReg", size=128, dest="Spare"),
+ Operand(type="SIMDRegMatch0", size=128, dest=None)])
+
+for sz in [32, 64]:
+ add_group("sse5arith%d" % sz,
+ cpu=["SSE5"],
+ modifiers=["Op2Add"],
+ opcode=[0x0F, 0x24, 0x00],
+ drex_oc0=0,
+ operands=[Operand(type="SIMDReg", size=128, dest="DREX"),
+ Operand(type="SIMDRegMatch0", size=128, dest=None),
+ Operand(type="SIMDReg", size=128, dest="Spare"),
+ Operand(type="SIMDReg", size=128, dest="EA")])
+ add_group("sse5arith%d" % sz,
+ cpu=["SSE5"],
+ modifiers=["Op2Add"],
+ opcode=[0x0F, 0x24, 0x00],
+ drex_oc0=0,
+ operands=[Operand(type="SIMDReg", size=128, dest="DREX"),
+ Operand(type="SIMDRegMatch0", size=128, dest=None),
+ Operand(type="SIMDReg", size=128, dest="Spare"),
+ Operand(type="Mem", size=sz, relaxed=True, dest="EA")])
+ add_group("sse5arith%d" % sz,
+ cpu=["SSE5"],
+ modifiers=["Op2Add"],
+ opcode=[0x0F, 0x24, 0x00],
+ drex_oc0=1,
+ operands=[Operand(type="SIMDReg", size=128, dest="DREX"),
+ Operand(type="SIMDRegMatch0", size=128, dest=None),
+ Operand(type="SIMDReg", size=128, dest="EA"),
+ Operand(type="SIMDReg", size=128, dest="Spare")])
+ add_group("sse5arith%d" % sz,
+ cpu=["SSE5"],
+ modifiers=["Op2Add"],
+ opcode=[0x0F, 0x24, 0x00],
+ drex_oc0=1,
+ operands=[Operand(type="SIMDReg", size=128, dest="DREX"),
+ Operand(type="SIMDRegMatch0", size=128, dest=None),
+ Operand(type="Mem", size=sz, relaxed=True, dest="EA"),
+ Operand(type="SIMDReg", size=128, dest="Spare")])
+ add_group("sse5arith%d" % sz,
+ cpu=["SSE5"],
+ modifiers=["Op2Add"],
+ opcode=[0x0F, 0x24, 0x04],
+ drex_oc0=0,
+ operands=[Operand(type="SIMDReg", size=128, dest="DREX"),
+ Operand(type="SIMDReg", size=128, dest="Spare"),
+ Operand(type="SIMDReg", size=128, dest="EA"),
+ Operand(type="SIMDRegMatch0", size=128, dest=None)])
+ add_group("sse5arith%d" % sz,
+ cpu=["SSE5"],
+ modifiers=["Op2Add"],
+ opcode=[0x0F, 0x24, 0x04],
+ drex_oc0=0,
+ operands=[Operand(type="SIMDReg", size=128, dest="DREX"),
+ Operand(type="SIMDReg", size=128, dest="Spare"),
+ Operand(type="Mem", size=sz, relaxed=True, dest="EA"),
+ Operand(type="SIMDRegMatch0", size=128, dest=None)])
+ add_group("sse5arith%d" % sz,
+ cpu=["SSE5"],
+ modifiers=["Op2Add"],
+ opcode=[0x0F, 0x24, 0x04],
+ drex_oc0=1,
+ operands=[Operand(type="SIMDReg", size=128, dest="DREX"),
+ Operand(type="SIMDReg", size=128, dest="EA"),
+ Operand(type="SIMDReg", size=128, dest="Spare"),
+ Operand(type="SIMDRegMatch0", size=128, dest=None)])
+ add_group("sse5arith%d" % sz,
+ cpu=["SSE5"],
+ modifiers=["Op2Add"],
+ opcode=[0x0F, 0x24, 0x04],
+ drex_oc0=1,
+ operands=[Operand(type="SIMDReg", size=128, dest="DREX"),
+ Operand(type="Mem", size=sz, relaxed=True, dest="EA"),
+ Operand(type="SIMDReg", size=128, dest="Spare"),
+ Operand(type="SIMDRegMatch0", size=128, dest=None)])
+
+add_insn("fmaddps", "sse5arith", modifiers=[0x00])
+add_insn("fmaddpd", "sse5arith", modifiers=[0x01])
+add_insn("fmaddss", "sse5arith32", modifiers=[0x02])
+add_insn("fmaddsd", "sse5arith64", modifiers=[0x03])
+
+add_insn("fmsubps", "sse5arith", modifiers=[0x08])
+add_insn("fmsubpd", "sse5arith", modifiers=[0x09])
+add_insn("fmsubss", "sse5arith32", modifiers=[0x0A])
+add_insn("fmsubsd", "sse5arith64", modifiers=[0x0B])
+
+add_insn("fnmaddps", "sse5arith", modifiers=[0x10])
+add_insn("fnmaddpd", "sse5arith", modifiers=[0x11])
+add_insn("fnmaddss", "sse5arith32", modifiers=[0x12])
+add_insn("fnmaddsd", "sse5arith64", modifiers=[0x13])
+
+add_insn("fnmsubps", "sse5arith", modifiers=[0x18])
+add_insn("fnmsubpd", "sse5arith", modifiers=[0x19])
+add_insn("fnmsubss", "sse5arith32", modifiers=[0x1A])
+add_insn("fnmsubsd", "sse5arith64", modifiers=[0x1B])
+
+add_insn("pcmov", "sse5arith", modifiers=[0x22])
+
+add_insn("permps", "sse5arith", modifiers=[0x20])
+add_insn("permpd", "sse5arith", modifiers=[0x21])
+add_insn("pperm", "sse5arith", modifiers=[0x23])
+
+add_group("sse5two",
+ cpu=["SSE5"],
+ modifiers=["Op2Add"],
+ opcode=[0x0F, 0x7A, 0x00],
+ operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+ Operand(type="SIMDRM", size=128, relaxed=True, dest="EA")])
+for sz in [32, 64]:
+ add_group("sse5two%d" % sz,
+ cpu=["SSE5"],
+ modifiers=["Op2Add"],
+ opcode=[0x0F, 0x7A, 0x00],
+ operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+ Operand(type="SIMDReg", size=128, dest="EA")])
+ add_group("sse5two%d" % sz,
+ cpu=["SSE5"],
+ modifiers=["Op2Add"],
+ opcode=[0x0F, 0x7A, 0x00],
+ operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+ Operand(type="Mem", size=sz, relaxed=True, dest="EA")])
+
+add_insn("frczps", "sse5two", modifiers=[0x10])
+add_insn("frczpd", "sse5two", modifiers=[0x11])
+add_insn("frczss", "sse5two32", modifiers=[0x12])
+add_insn("frczsd", "sse5two64", modifiers=[0x13])
+
+add_insn("phaddbw", "sse5two", modifiers=[0x41])
+add_insn("phaddbd", "sse5two", modifiers=[0x42])
+add_insn("phaddbq", "sse5two", modifiers=[0x43])
+add_insn("phaddwd", "sse5two", modifiers=[0x46])
+add_insn("phaddwq", "sse5two", modifiers=[0x47])
+add_insn("phadddq", "sse5two", modifiers=[0x4B])
+
+add_insn("phaddubw", "sse5two", modifiers=[0x51])
+add_insn("phaddubd", "sse5two", modifiers=[0x52])
+add_insn("phaddubq", "sse5two", modifiers=[0x53])
+add_insn("phadduwd", "sse5two", modifiers=[0x56])
+add_insn("phadduwq", "sse5two", modifiers=[0x57])
+add_insn("phaddudq", "sse5two", modifiers=[0x5B])
+
+add_insn("phsubbw", "sse5two", modifiers=[0x61])
+add_insn("phsubwd", "sse5two", modifiers=[0x62])
+add_insn("phsubdq", "sse5two", modifiers=[0x63])
+
+add_group("sse5pmacs",
+ cpu=["SSE5"],
+ modifiers=["Op2Add"],
+ opcode=[0x0F, 0x24, 0x00],
+ drex_oc0=0,
+ operands=[Operand(type="SIMDReg", size=128, dest="DREX"),
+ Operand(type="SIMDReg", size=128, dest="Spare"),
+ Operand(type="SIMDRM", size=128, relaxed=True, dest="EA"),
+ Operand(type="SIMDRegMatch0", size=128, dest=None)])
+
+add_insn("pmacsww", "sse5pmacs", modifiers=[0x95])
+add_insn("pmacswd", "sse5pmacs", modifiers=[0x96])
+add_insn("pmacsdql", "sse5pmacs", modifiers=[0x97])
+add_insn("pmacsdd", "sse5pmacs", modifiers=[0x9E])
+add_insn("pmacsdqh", "sse5pmacs", modifiers=[0x9F])
+
+add_insn("pmacssww", "sse5pmacs", modifiers=[0x85])
+add_insn("pmacsswd", "sse5pmacs", modifiers=[0x86])
+add_insn("pmacssdql", "sse5pmacs", modifiers=[0x87])
+add_insn("pmacssdd", "sse5pmacs", modifiers=[0x8E])
+add_insn("pmacssdqh", "sse5pmacs", modifiers=[0x8F])
+
+add_insn("pmadcsswd", "sse5pmacs", modifiers=[0xA6])
+add_insn("pmadcswd", "sse5pmacs", modifiers=[0xB6])
+
+add_group("sse5prot",
+ cpu=["SSE5"],
+ modifiers=["Op2Add"],
+ opcode=[0x0F, 0x24, 0x40],
+ drex_oc0=0,
+ operands=[Operand(type="SIMDReg", size=128, dest="DREX"),
+ Operand(type="SIMDReg", size=128, dest="Spare"),
+ Operand(type="SIMDRM", size=128, relaxed=True, dest="EA")])
+add_group("sse5prot",
+ cpu=["SSE5"],
+ modifiers=["Op2Add"],
+ opcode=[0x0F, 0x24, 0x40],
+ drex_oc0=1,
+ operands=[Operand(type="SIMDReg", size=128, dest="DREX"),
+ Operand(type="SIMDRM", size=128, relaxed=True, dest="EA"),
+ Operand(type="SIMDReg", size=128, dest="Spare")])
+add_group("sse5prot",
+ cpu=["SSE5"],
+ modifiers=["Op2Add"],
+ opcode=[0x0F, 0x7B, 0x40],
+ operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+ Operand(type="SIMDRM", size=128, relaxed=True, dest="EA"),
+ Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+
+add_insn("protb", "sse5prot", modifiers=[0x00])
+add_insn("protw", "sse5prot", modifiers=[0x01])
+add_insn("protd", "sse5prot", modifiers=[0x02])
+add_insn("protq", "sse5prot", modifiers=[0x03])
+
+add_group("sse5psh",
+ cpu=["SSE5"],
+ modifiers=["Op2Add"],
+ opcode=[0x0F, 0x24, 0x44],
+ drex_oc0=0,
+ operands=[Operand(type="SIMDReg", size=128, dest="DREX"),
+ Operand(type="SIMDReg", size=128, dest="Spare"),
+ Operand(type="SIMDRM", size=128, relaxed=True, dest="EA")])
+add_group("sse5psh",
+ cpu=["SSE5"],
+ modifiers=["Op2Add"],
+ opcode=[0x0F, 0x24, 0x44],
+ drex_oc0=1,
+ operands=[Operand(type="SIMDReg", size=128, dest="DREX"),
+ Operand(type="SIMDRM", size=128, relaxed=True, dest="EA"),
+ Operand(type="SIMDReg", size=128, dest="Spare")])
+
+add_insn("pshlb", "sse5psh", modifiers=[0x00])
+add_insn("pshlw", "sse5psh", modifiers=[0x01])
+add_insn("pshld", "sse5psh", modifiers=[0x02])
+add_insn("pshlq", "sse5psh", modifiers=[0x03])
+
+add_insn("pshab", "sse5psh", modifiers=[0x04])
+add_insn("pshaw", "sse5psh", modifiers=[0x05])
+add_insn("pshad", "sse5psh", modifiers=[0x06])
+add_insn("pshaq", "sse5psh", modifiers=[0x07])
+
+# roundps, roundpd, roundss, roundsd, ptest are in SSE4.1
+
#####################################################################
# AMD 3DNow! instructions
#####################################################################
EXTRA_DIST += modules/arch/x86/tests/sse4.hex
EXTRA_DIST += modules/arch/x86/tests/sse4-err.asm
EXTRA_DIST += modules/arch/x86/tests/sse4-err.errwarn
+EXTRA_DIST += modules/arch/x86/tests/sse5-all.asm
+EXTRA_DIST += modules/arch/x86/tests/sse5-all.hex
+EXTRA_DIST += modules/arch/x86/tests/sse5-basic.asm
+EXTRA_DIST += modules/arch/x86/tests/sse5-basic.hex
+EXTRA_DIST += modules/arch/x86/tests/sse5-err.asm
+EXTRA_DIST += modules/arch/x86/tests/sse5-err.errwarn
EXTRA_DIST += modules/arch/x86/tests/ssse3.asm
EXTRA_DIST += modules/arch/x86/tests/ssse3.c
EXTRA_DIST += modules/arch/x86/tests/ssse3.hex
--- /dev/null
+; Instructions are ordered in SSE5 databook order
+; BITS=16 to minimize output length
+[bits 16]
+compd xmm1, xmm4, xmm7, 5 ; 0F 25 2D 347 10 05
+compd xmm2, xmm5, [0], byte 5 ; 0F 25 2D 056 20 00 00 05
+compd xmm3, xmm6, dqword [0], 5 ; 0F 25 2D 066 30 00 00 05
+
+comps xmm1, xmm4, xmm7, 5 ; 0F 25 2C 347 10 05
+comps xmm2, xmm5, [0], byte 5 ; 0F 25 2C 056 20 00 00 05
+comps xmm3, xmm6, dqword [0], 5 ; 0F 25 2C 066 30 00 00 05
+
+comsd xmm1, xmm4, xmm7, 5 ; 0F 25 2F 347 10 05
+comsd xmm2, xmm5, [0], byte 5 ; 0F 25 2F 056 20 00 00 05
+comsd xmm3, xmm6, qword [0], 5 ; 0F 25 2F 066 30 00 00 05
+
+comss xmm1, xmm4, xmm7, 5 ; 0F 25 2E 347 10 05
+comss xmm2, xmm5, [0], byte 5 ; 0F 25 2E 056 20 00 00 05
+comss xmm3, xmm6, dword [0], 5 ; 0F 25 2E 066 30 00 00 05
+
+cvtph2ps xmm1, xmm4 ; 0F 7A 30 314
+cvtph2ps xmm2, [0] ; 0F 7A 30 026 00 00
+cvtph2ps xmm3, qword [0] ; 0F 7A 30 036 00 00
+
+cvtps2ph xmm1, xmm4 ; 0F 7A 31 341
+cvtps2ph [0], xmm2 ; 0F 7A 31 026 00 00
+cvtps2ph qword [0], xmm3 ; 0F 7A 31 036 00 00
+
+fmaddpd xmm1, xmm1, xmm2, xmm3 ; 0F 24 01 323 10 /or/ 0F 24 01 332 18
+fmaddpd xmm1, xmm1, xmm2, [0] ; 0F 24 01 026 10 00 00
+fmaddpd xmm1, xmm1, xmm2, dqword [0] ; 0F 24 01 026 10 00 00
+fmaddpd xmm1, xmm1, [0], xmm3 ; 0F 24 01 036 18 00 00
+fmaddpd xmm1, xmm1, dqword [0], xmm3 ; 0F 24 01 036 18 00 00
+fmaddpd xmm1, xmm2, xmm3, xmm1 ; 0F 24 05 323 10 /or/ 0F 24 05 332 18
+fmaddpd xmm1, xmm2, [0], xmm1 ; 0F 24 05 026 10 00 00
+fmaddpd xmm1, xmm2, dqword [0], xmm1 ; 0F 24 05 026 10 00 00
+fmaddpd xmm1, [0], xmm3, xmm1 ; 0F 24 05 036 18 00 00
+fmaddpd xmm1, dqword [0], xmm3, xmm1 ; 0F 24 05 036 18 00 00
+
+fmaddps xmm1, xmm1, xmm2, xmm3 ; 0F 24 00 323 10 /or/ 0F 24 00 332 18
+fmaddps xmm1, xmm1, xmm2, [0] ; 0F 24 00 026 10 00 00
+fmaddps xmm1, xmm1, xmm2, dqword [0] ; 0F 24 00 026 10 00 00
+fmaddps xmm1, xmm1, [0], xmm3 ; 0F 24 00 036 18 00 00
+fmaddps xmm1, xmm1, dqword [0], xmm3 ; 0F 24 00 036 18 00 00
+fmaddps xmm1, xmm2, xmm3, xmm1 ; 0F 24 04 323 10 /or/ 0F 24 04 332 18
+fmaddps xmm1, xmm2, [0], xmm1 ; 0F 24 04 026 10 00 00
+fmaddps xmm1, xmm2, dqword [0], xmm1 ; 0F 24 04 026 10 00 00
+fmaddps xmm1, [0], xmm3, xmm1 ; 0F 24 04 036 18 00 00
+fmaddps xmm1, dqword [0], xmm3, xmm1 ; 0F 24 04 036 18 00 00
+
+fmaddsd xmm1, xmm1, xmm2, xmm3 ; 0F 24 03 323 10 /or/ 0F 24 03 332 18
+fmaddsd xmm1, xmm1, xmm2, [0] ; 0F 24 03 026 10 00 00
+fmaddsd xmm1, xmm1, xmm2, qword [0] ; 0F 24 03 026 10 00 00
+fmaddsd xmm1, xmm1, [0], xmm3 ; 0F 24 03 036 18 00 00
+fmaddsd xmm1, xmm1, qword [0], xmm3 ; 0F 24 03 036 18 00 00
+fmaddsd xmm1, xmm2, xmm3, xmm1 ; 0F 24 07 323 10 /or/ 0F 24 07 332 18
+fmaddsd xmm1, xmm2, [0], xmm1 ; 0F 24 07 026 10 00 00
+fmaddsd xmm1, xmm2, qword [0], xmm1 ; 0F 24 07 026 10 00 00
+fmaddsd xmm1, [0], xmm3, xmm1 ; 0F 24 07 036 18 00 00
+fmaddsd xmm1, qword [0], xmm3, xmm1 ; 0F 24 07 036 18 00 00
+
+fmaddss xmm1, xmm1, xmm2, xmm3 ; 0F 24 02 323 10 /or/ 0F 24 02 332 18
+fmaddss xmm1, xmm1, xmm2, [0] ; 0F 24 02 026 10 00 00
+fmaddss xmm1, xmm1, xmm2, dword [0] ; 0F 24 02 026 10 00 00
+fmaddss xmm1, xmm1, [0], xmm3 ; 0F 24 02 036 18 00 00
+fmaddss xmm1, xmm1, dword [0], xmm3 ; 0F 24 02 036 18 00 00
+fmaddss xmm1, xmm2, xmm3, xmm1 ; 0F 24 06 323 10 /or/ 0F 24 06 332 18
+fmaddss xmm1, xmm2, [0], xmm1 ; 0F 24 06 026 10 00 00
+fmaddss xmm1, xmm2, dword [0], xmm1 ; 0F 24 06 026 10 00 00
+fmaddss xmm1, [0], xmm3, xmm1 ; 0F 24 06 036 18 00 00
+fmaddss xmm1, dword [0], xmm3, xmm1 ; 0F 24 06 036 18 00 00
+
+fmsubpd xmm1, xmm1, xmm2, xmm3 ; 0F 24 09 323 10 /or/ 0F 24 09 332 18
+fmsubpd xmm1, xmm1, xmm2, [0] ; 0F 24 09 026 10 00 00
+fmsubpd xmm1, xmm1, xmm2, dqword [0] ; 0F 24 09 026 10 00 00
+fmsubpd xmm1, xmm1, [0], xmm3 ; 0F 24 09 036 18 00 00
+fmsubpd xmm1, xmm1, dqword [0], xmm3 ; 0F 24 09 036 18 00 00
+fmsubpd xmm1, xmm2, xmm3, xmm1 ; 0F 24 0D 323 10 /or/ 0F 24 0D 332 18
+fmsubpd xmm1, xmm2, [0], xmm1 ; 0F 24 0D 026 10 00 00
+fmsubpd xmm1, xmm2, dqword [0], xmm1 ; 0F 24 0D 026 10 00 00
+fmsubpd xmm1, [0], xmm3, xmm1 ; 0F 24 0D 036 18 00 00
+fmsubpd xmm1, dqword [0], xmm3, xmm1 ; 0F 24 0D 036 18 00 00
+
+fmsubps xmm1, xmm1, xmm2, xmm3 ; 0F 24 08 323 10 /or/ 0F 24 08 332 18
+fmsubps xmm1, xmm1, xmm2, [0] ; 0F 24 08 026 10 00 00
+fmsubps xmm1, xmm1, xmm2, dqword [0] ; 0F 24 08 026 10 00 00
+fmsubps xmm1, xmm1, [0], xmm3 ; 0F 24 08 036 18 00 00
+fmsubps xmm1, xmm1, dqword [0], xmm3 ; 0F 24 08 036 18 00 00
+fmsubps xmm1, xmm2, xmm3, xmm1 ; 0F 24 0C 323 10 /or/ 0F 24 0C 332 18
+fmsubps xmm1, xmm2, [0], xmm1 ; 0F 24 0C 026 10 00 00
+fmsubps xmm1, xmm2, dqword [0], xmm1 ; 0F 24 0C 026 10 00 00
+fmsubps xmm1, [0], xmm3, xmm1 ; 0F 24 0C 036 18 00 00
+fmsubps xmm1, dqword [0], xmm3, xmm1 ; 0F 24 0C 036 18 00 00
+
+fmsubsd xmm1, xmm1, xmm2, xmm3 ; 0F 24 0B 323 10 /or/ 0F 24 0B 332 18
+fmsubsd xmm1, xmm1, xmm2, [0] ; 0F 24 0B 026 10 00 00
+fmsubsd xmm1, xmm1, xmm2, qword [0] ; 0F 24 0B 026 10 00 00
+fmsubsd xmm1, xmm1, [0], xmm3 ; 0F 24 0B 036 18 00 00
+fmsubsd xmm1, xmm1, qword [0], xmm3 ; 0F 24 0B 036 18 00 00
+fmsubsd xmm1, xmm2, xmm3, xmm1 ; 0F 24 0F 323 10 /or/ 0F 24 0F 332 18
+fmsubsd xmm1, xmm2, [0], xmm1 ; 0F 24 0F 026 10 00 00
+fmsubsd xmm1, xmm2, qword [0], xmm1 ; 0F 24 0F 026 10 00 00
+fmsubsd xmm1, [0], xmm3, xmm1 ; 0F 24 0F 036 18 00 00
+fmsubsd xmm1, qword [0], xmm3, xmm1 ; 0F 24 0F 036 18 00 00
+
+fmsubss xmm1, xmm1, xmm2, xmm3 ; 0F 24 0A 323 10 /or/ 0F 24 0A 332 18
+fmsubss xmm1, xmm1, xmm2, [0] ; 0F 24 0A 026 10 00 00
+fmsubss xmm1, xmm1, xmm2, dword [0] ; 0F 24 0A 026 10 00 00
+fmsubss xmm1, xmm1, [0], xmm3 ; 0F 24 0A 036 18 00 00
+fmsubss xmm1, xmm1, dword [0], xmm3 ; 0F 24 0A 036 18 00 00
+fmsubss xmm1, xmm2, xmm3, xmm1 ; 0F 24 0E 323 10 /or/ 0F 24 0E 332 18
+fmsubss xmm1, xmm2, [0], xmm1 ; 0F 24 0E 026 10 00 00
+fmsubss xmm1, xmm2, dword [0], xmm1 ; 0F 24 0E 026 10 00 00
+fmsubss xmm1, [0], xmm3, xmm1 ; 0F 24 0E 036 18 00 00
+fmsubss xmm1, dword [0], xmm3, xmm1 ; 0F 24 0E 036 18 00 00
+
+fnmaddpd xmm1, xmm1, xmm2, xmm3 ; 0F 24 11 323 10 /or/ 0F 24 11 332 18
+fnmaddpd xmm1, xmm1, xmm2, [0] ; 0F 24 11 026 10 00 00
+fnmaddpd xmm1, xmm1, xmm2, dqword [0] ; 0F 24 11 026 10 00 00
+fnmaddpd xmm1, xmm1, [0], xmm3 ; 0F 24 11 036 18 00 00
+fnmaddpd xmm1, xmm1, dqword [0], xmm3 ; 0F 24 11 036 18 00 00
+fnmaddpd xmm1, xmm2, xmm3, xmm1 ; 0F 24 15 323 10 /or/ 0F 24 15 332 18
+fnmaddpd xmm1, xmm2, [0], xmm1 ; 0F 24 15 026 10 00 00
+fnmaddpd xmm1, xmm2, dqword [0], xmm1 ; 0F 24 15 026 10 00 00
+fnmaddpd xmm1, [0], xmm3, xmm1 ; 0F 24 15 036 18 00 00
+fnmaddpd xmm1, dqword [0], xmm3, xmm1 ; 0F 24 15 036 18 00 00
+
+fnmaddps xmm1, xmm1, xmm2, xmm3 ; 0F 24 10 323 10 /or/ 0F 24 10 332 18
+fnmaddps xmm1, xmm1, xmm2, [0] ; 0F 24 10 026 10 00 00
+fnmaddps xmm1, xmm1, xmm2, dqword [0] ; 0F 24 10 026 10 00 00
+fnmaddps xmm1, xmm1, [0], xmm3 ; 0F 24 10 036 18 00 00
+fnmaddps xmm1, xmm1, dqword [0], xmm3 ; 0F 24 10 036 18 00 00
+fnmaddps xmm1, xmm2, xmm3, xmm1 ; 0F 24 14 323 10 /or/ 0F 24 14 332 18
+fnmaddps xmm1, xmm2, [0], xmm1 ; 0F 24 14 026 10 00 00
+fnmaddps xmm1, xmm2, dqword [0], xmm1 ; 0F 24 14 026 10 00 00
+fnmaddps xmm1, [0], xmm3, xmm1 ; 0F 24 14 036 18 00 00
+fnmaddps xmm1, dqword [0], xmm3, xmm1 ; 0F 24 14 036 18 00 00
+
+fnmaddsd xmm1, xmm1, xmm2, xmm3 ; 0F 24 13 323 10 /or/ 0F 24 13 332 18
+fnmaddsd xmm1, xmm1, xmm2, [0] ; 0F 24 13 026 10 00 00
+fnmaddsd xmm1, xmm1, xmm2, qword [0] ; 0F 24 13 026 10 00 00
+fnmaddsd xmm1, xmm1, [0], xmm3 ; 0F 24 13 036 18 00 00
+fnmaddsd xmm1, xmm1, qword [0], xmm3 ; 0F 24 13 036 18 00 00
+fnmaddsd xmm1, xmm2, xmm3, xmm1 ; 0F 24 17 323 10 /or/ 0F 24 17 332 18
+fnmaddsd xmm1, xmm2, [0], xmm1 ; 0F 24 17 026 10 00 00
+fnmaddsd xmm1, xmm2, qword [0], xmm1 ; 0F 24 17 026 10 00 00
+fnmaddsd xmm1, [0], xmm3, xmm1 ; 0F 24 17 036 18 00 00
+fnmaddsd xmm1, qword [0], xmm3, xmm1 ; 0F 24 17 036 18 00 00
+
+fnmaddss xmm1, xmm1, xmm2, xmm3 ; 0F 24 12 323 10 /or/ 0F 24 12 332 18
+fnmaddss xmm1, xmm1, xmm2, [0] ; 0F 24 12 026 10 00 00
+fnmaddss xmm1, xmm1, xmm2, dword [0] ; 0F 24 12 026 10 00 00
+fnmaddss xmm1, xmm1, [0], xmm3 ; 0F 24 12 036 18 00 00
+fnmaddss xmm1, xmm1, dword [0], xmm3 ; 0F 24 12 036 18 00 00
+fnmaddss xmm1, xmm2, xmm3, xmm1 ; 0F 24 16 323 10 /or/ 0F 24 16 332 18
+fnmaddss xmm1, xmm2, [0], xmm1 ; 0F 24 16 026 10 00 00
+fnmaddss xmm1, xmm2, dword [0], xmm1 ; 0F 24 16 026 10 00 00
+fnmaddss xmm1, [0], xmm3, xmm1 ; 0F 24 16 036 18 00 00
+fnmaddss xmm1, dword [0], xmm3, xmm1 ; 0F 24 16 036 18 00 00
+
+fnmsubpd xmm1, xmm1, xmm2, xmm3 ; 0F 24 19 323 10 /or/ 0F 24 19 332 18
+fnmsubpd xmm1, xmm1, xmm2, [0] ; 0F 24 19 026 10 00 00
+fnmsubpd xmm1, xmm1, xmm2, dqword [0] ; 0F 24 19 026 10 00 00
+fnmsubpd xmm1, xmm1, [0], xmm3 ; 0F 24 19 036 18 00 00
+fnmsubpd xmm1, xmm1, dqword [0], xmm3 ; 0F 24 19 036 18 00 00
+fnmsubpd xmm1, xmm2, xmm3, xmm1 ; 0F 24 1D 323 10 /or/ 0F 24 1D 332 18
+fnmsubpd xmm1, xmm2, [0], xmm1 ; 0F 24 1D 026 10 00 00
+fnmsubpd xmm1, xmm2, dqword [0], xmm1 ; 0F 24 1D 026 10 00 00
+fnmsubpd xmm1, [0], xmm3, xmm1 ; 0F 24 1D 036 18 00 00
+fnmsubpd xmm1, dqword [0], xmm3, xmm1 ; 0F 24 1D 036 18 00 00
+
+fnmsubps xmm1, xmm1, xmm2, xmm3 ; 0F 24 18 323 10 /or/ 0F 24 18 332 18
+fnmsubps xmm1, xmm1, xmm2, [0] ; 0F 24 18 026 10 00 00
+fnmsubps xmm1, xmm1, xmm2, dqword [0] ; 0F 24 18 026 10 00 00
+fnmsubps xmm1, xmm1, [0], xmm3 ; 0F 24 18 036 18 00 00
+fnmsubps xmm1, xmm1, dqword [0], xmm3 ; 0F 24 18 036 18 00 00
+fnmsubps xmm1, xmm2, xmm3, xmm1 ; 0F 24 1C 323 10 /or/ 0F 24 1C 332 18
+fnmsubps xmm1, xmm2, [0], xmm1 ; 0F 24 1C 026 10 00 00
+fnmsubps xmm1, xmm2, dqword [0], xmm1 ; 0F 24 1C 026 10 00 00
+fnmsubps xmm1, [0], xmm3, xmm1 ; 0F 24 1C 036 18 00 00
+fnmsubps xmm1, dqword [0], xmm3, xmm1 ; 0F 24 1C 036 18 00 00
+
+fnmsubsd xmm1, xmm1, xmm2, xmm3 ; 0F 24 1B 323 10 /or/ 0F 24 1B 332 18
+fnmsubsd xmm1, xmm1, xmm2, [0] ; 0F 24 1B 026 10 00 00
+fnmsubsd xmm1, xmm1, xmm2, qword [0] ; 0F 24 1B 026 10 00 00
+fnmsubsd xmm1, xmm1, [0], xmm3 ; 0F 24 1B 036 18 00 00
+fnmsubsd xmm1, xmm1, qword [0], xmm3 ; 0F 24 1B 036 18 00 00
+fnmsubsd xmm1, xmm2, xmm3, xmm1 ; 0F 24 1F 323 10 /or/ 0F 24 1F 332 18
+fnmsubsd xmm1, xmm2, [0], xmm1 ; 0F 24 1F 026 10 00 00
+fnmsubsd xmm1, xmm2, qword [0], xmm1 ; 0F 24 1F 026 10 00 00
+fnmsubsd xmm1, [0], xmm3, xmm1 ; 0F 24 1F 036 18 00 00
+fnmsubsd xmm1, qword [0], xmm3, xmm1 ; 0F 24 1F 036 18 00 00
+
+fnmsubss xmm1, xmm1, xmm2, xmm3 ; 0F 24 1A 323 10 /or/ 0F 24 1A 332 18
+fnmsubss xmm1, xmm1, xmm2, [0] ; 0F 24 1A 026 10 00 00
+fnmsubss xmm1, xmm1, xmm2, dword [0] ; 0F 24 1A 026 10 00 00
+fnmsubss xmm1, xmm1, [0], xmm3 ; 0F 24 1A 036 18 00 00
+fnmsubss xmm1, xmm1, dword [0], xmm3 ; 0F 24 1A 036 18 00 00
+fnmsubss xmm1, xmm2, xmm3, xmm1 ; 0F 24 1E 323 10 /or/ 0F 24 1E 332 18
+fnmsubss xmm1, xmm2, [0], xmm1 ; 0F 24 1E 026 10 00 00
+fnmsubss xmm1, xmm2, dword [0], xmm1 ; 0F 24 1E 026 10 00 00
+fnmsubss xmm1, [0], xmm3, xmm1 ; 0F 24 1E 036 18 00 00
+fnmsubss xmm1, dword [0], xmm3, xmm1 ; 0F 24 1E 036 18 00 00
+
+frczpd xmm1, xmm2 ; 0F 7A 11 312
+frczpd xmm1, [0] ; 0F 7A 11 016 00 00
+frczpd xmm1, dqword [0] ; 0F 7A 11 016 00 00
+
+frczps xmm1, xmm2 ; 0F 7A 10 312
+frczps xmm1, [0] ; 0F 7A 10 016 00 00
+frczps xmm1, dqword [0] ; 0F 7A 10 016 00 00
+
+frczsd xmm1, xmm2 ; 0F 7A 13 312
+frczsd xmm1, [0] ; 0F 7A 13 016 00 00
+frczsd xmm1, qword [0] ; 0F 7A 13 016 00 00
+
+frczss xmm1, xmm2 ; 0F 7A 12 312
+frczss xmm1, [0] ; 0F 7A 12 016 00 00
+frczss xmm1, dword [0] ; 0F 7A 12 016 00 00
+
+pcmov xmm1, xmm1, xmm2, xmm3 ; 0F 24 22 323 10 /or/ 0F 24 22 332 18
+pcmov xmm1, xmm1, xmm2, [0] ; 0F 24 22 026 10 00 00
+pcmov xmm1, xmm1, xmm2, dqword [0] ; 0F 24 22 026 10 00 00
+pcmov xmm1, xmm1, [0], xmm3 ; 0F 24 22 036 18 00 00
+pcmov xmm1, xmm1, dqword [0], xmm3 ; 0F 24 22 036 18 00 00
+pcmov xmm1, xmm2, xmm3, xmm1 ; 0F 24 26 323 10 /or/ 0F 24 26 332 18
+pcmov xmm1, xmm2, [0], xmm1 ; 0F 24 26 026 10 00 00
+pcmov xmm1, xmm2, dqword [0], xmm1 ; 0F 24 26 026 10 00 00
+pcmov xmm1, [0], xmm3, xmm1 ; 0F 24 26 036 18 00 00
+pcmov xmm1, dqword [0], xmm3, xmm1 ; 0F 24 26 036 18 00 00
+
+pcomb xmm1, xmm4, xmm7, 5 ; 0F 25 4C 347 10 05
+pcomb xmm2, xmm5, [0], byte 5 ; 0F 25 4C 056 20 00 00 05
+pcomb xmm3, xmm6, dqword [0], 5 ; 0F 25 4C 066 30 00 00 05
+
+pcomd xmm1, xmm4, xmm7, 5 ; 0F 25 4E 347 10 05
+pcomd xmm2, xmm5, [0], byte 5 ; 0F 25 4E 056 20 00 00 05
+pcomd xmm3, xmm6, dqword [0], 5 ; 0F 25 4E 066 30 00 00 05
+
+pcomq xmm1, xmm4, xmm7, 5 ; 0F 25 4F 347 10 05
+pcomq xmm2, xmm5, [0], byte 5 ; 0F 25 4F 056 20 00 00 05
+pcomq xmm3, xmm6, dqword [0], 5 ; 0F 25 4F 066 30 00 00 05
+
+pcomub xmm1, xmm4, xmm7, 5 ; 0F 25 6C 347 10 05
+pcomub xmm2, xmm5, [0], byte 5 ; 0F 25 6C 056 20 00 00 05
+pcomub xmm3, xmm6, dqword [0], 5 ; 0F 25 6C 066 30 00 00 05
+
+pcomud xmm1, xmm4, xmm7, 5 ; 0F 25 6E 347 10 05
+pcomud xmm2, xmm5, [0], byte 5 ; 0F 25 6E 056 20 00 00 05
+pcomud xmm3, xmm6, dqword [0], 5 ; 0F 25 6E 066 30 00 00 05
+
+pcomuq xmm1, xmm4, xmm7, 5 ; 0F 25 6F 347 10 05
+pcomuq xmm2, xmm5, [0], byte 5 ; 0F 25 6F 056 20 00 00 05
+pcomuq xmm3, xmm6, dqword [0], 5 ; 0F 25 6F 066 30 00 00 05
+
+pcomuw xmm1, xmm4, xmm7, 5 ; 0F 25 6D 347 10 05
+pcomuw xmm2, xmm5, [0], byte 5 ; 0F 25 6D 056 20 00 00 05
+pcomuw xmm3, xmm6, dqword [0], 5 ; 0F 25 6D 066 30 00 00 05
+
+pcomw xmm1, xmm4, xmm7, 5 ; 0F 25 4D 347 10 05
+pcomw xmm2, xmm5, [0], byte 5 ; 0F 25 4D 056 20 00 00 05
+pcomw xmm3, xmm6, dqword [0], 5 ; 0F 25 4D 066 30 00 00 05
+
+permpd xmm1, xmm1, xmm2, xmm3 ; 0F 24 21 323 10 /or/ 0F 24 21 332 18
+permpd xmm1, xmm1, xmm2, [0] ; 0F 24 21 026 10 00 00
+permpd xmm1, xmm1, xmm2, dqword [0] ; 0F 24 21 026 10 00 00
+permpd xmm1, xmm1, [0], xmm3 ; 0F 24 21 036 18 00 00
+permpd xmm1, xmm1, dqword [0], xmm3 ; 0F 24 21 036 18 00 00
+permpd xmm1, xmm2, xmm3, xmm1 ; 0F 24 25 323 10 /or/ 0F 24 25 332 18
+permpd xmm1, xmm2, [0], xmm1 ; 0F 24 25 026 10 00 00
+permpd xmm1, xmm2, dqword [0], xmm1 ; 0F 24 25 026 10 00 00
+permpd xmm1, [0], xmm3, xmm1 ; 0F 24 25 036 18 00 00
+permpd xmm1, dqword [0], xmm3, xmm1 ; 0F 24 25 036 18 00 00
+
+permps xmm1, xmm1, xmm2, xmm3 ; 0F 24 20 323 10 /or/ 0F 24 20 332 18
+permps xmm1, xmm1, xmm2, [0] ; 0F 24 20 026 10 00 00
+permps xmm1, xmm1, xmm2, dqword [0] ; 0F 24 20 026 10 00 00
+permps xmm1, xmm1, [0], xmm3 ; 0F 24 20 036 18 00 00
+permps xmm1, xmm1, dqword [0], xmm3 ; 0F 24 20 036 18 00 00
+permps xmm1, xmm2, xmm3, xmm1 ; 0F 24 24 323 10 /or/ 0F 24 24 332 18
+permps xmm1, xmm2, [0], xmm1 ; 0F 24 24 026 10 00 00
+permps xmm1, xmm2, dqword [0], xmm1 ; 0F 24 24 026 10 00 00
+permps xmm1, [0], xmm3, xmm1 ; 0F 24 24 036 18 00 00
+permps xmm1, dqword [0], xmm3, xmm1 ; 0F 24 24 036 18 00 00
+
+phaddbd xmm1, xmm2 ; 0F 7A 42 312
+phaddbd xmm1, [0] ; 0F 7A 42 016 00 00
+phaddbd xmm1, dqword [0] ; 0F 7A 42 016 00 00
+
+phaddbq xmm1, xmm2 ; 0F 7A 43 312
+phaddbq xmm1, [0] ; 0F 7A 43 016 00 00
+phaddbq xmm1, dqword [0] ; 0F 7A 43 016 00 00
+
+phaddbw xmm1, xmm2 ; 0F 7A 41 312
+phaddbw xmm1, [0] ; 0F 7A 41 016 00 00
+phaddbw xmm1, dqword [0] ; 0F 7A 41 016 00 00
+
+phadddq xmm1, xmm2 ; 0F 7A 4B 312
+phadddq xmm1, [0] ; 0F 7A 4B 016 00 00
+phadddq xmm1, dqword [0] ; 0F 7A 4B 016 00 00
+
+phaddubd xmm1, xmm2 ; 0F 7A 52 312
+phaddubd xmm1, [0] ; 0F 7A 52 016 00 00
+phaddubd xmm1, dqword [0] ; 0F 7A 52 016 00 00
+
+phaddubq xmm1, xmm2 ; 0F 7A 53 312
+phaddubq xmm1, [0] ; 0F 7A 53 016 00 00
+phaddubq xmm1, dqword [0] ; 0F 7A 53 016 00 00
+
+phaddubw xmm1, xmm2 ; 0F 7A 51 312
+phaddubw xmm1, [0] ; 0F 7A 51 016 00 00
+phaddubw xmm1, dqword [0] ; 0F 7A 51 016 00 00
+
+phaddudq xmm1, xmm2 ; 0F 7A 5B 312
+phaddudq xmm1, [0] ; 0F 7A 5B 016 00 00
+phaddudq xmm1, dqword [0] ; 0F 7A 5B 016 00 00
+
+phadduwd xmm1, xmm2 ; 0F 7A 56 312
+phadduwd xmm1, [0] ; 0F 7A 56 016 00 00
+phadduwd xmm1, dqword [0] ; 0F 7A 56 016 00 00
+
+phadduwq xmm1, xmm2 ; 0F 7A 57 312
+phadduwq xmm1, [0] ; 0F 7A 57 016 00 00
+phadduwq xmm1, dqword [0] ; 0F 7A 57 016 00 00
+
+phaddwd xmm1, xmm2 ; 0F 7A 46 312
+phaddwd xmm1, [0] ; 0F 7A 46 016 00 00
+phaddwd xmm1, dqword [0] ; 0F 7A 46 016 00 00
+
+phaddwq xmm1, xmm2 ; 0F 7A 47 312
+phaddwq xmm1, [0] ; 0F 7A 47 016 00 00
+phaddwq xmm1, dqword [0] ; 0F 7A 47 016 00 00
+
+phsubbw xmm1, xmm2 ; 0F 7A 61 312
+phsubbw xmm1, [0] ; 0F 7A 61 016 00 00
+phsubbw xmm1, dqword [0] ; 0F 7A 61 016 00 00
+
+phsubdq xmm1, xmm2 ; 0F 7A 63 312
+phsubdq xmm1, [0] ; 0F 7A 63 016 00 00
+phsubdq xmm1, dqword [0] ; 0F 7A 63 016 00 00
+
+phsubwd xmm1, xmm2 ; 0F 7A 62 312
+phsubwd xmm1, [0] ; 0F 7A 62 016 00 00
+phsubwd xmm1, dqword [0] ; 0F 7A 62 016 00 00
+
+pmacsdd xmm1, xmm4, xmm7, xmm1 ; 0F 24 9E 347 10
+pmacsdd xmm2, xmm5, [0], xmm2 ; 0F 24 9E 056 20 00 00
+pmacsdd xmm3, xmm6, dqword [0], xmm3 ; 0F 24 9E 066 30 00 00
+
+pmacsdqh xmm1, xmm4, xmm7, xmm1 ; 0F 24 9F 347 10
+pmacsdqh xmm2, xmm5, [0], xmm2 ; 0F 24 9F 056 20 00 00
+pmacsdqh xmm3, xmm6, dqword [0], xmm3 ; 0F 24 9F 066 30 00 00
+
+pmacsdql xmm1, xmm4, xmm7, xmm1 ; 0F 24 97 347 10
+pmacsdql xmm2, xmm5, [0], xmm2 ; 0F 24 97 056 20 00 00
+pmacsdql xmm3, xmm6, dqword [0], xmm3 ; 0F 24 97 066 30 00 00
+
+pmacssdd xmm1, xmm4, xmm7, xmm1 ; 0F 24 8E 347 10
+pmacssdd xmm2, xmm5, [0], xmm2 ; 0F 24 8E 056 20 00 00
+pmacssdd xmm3, xmm6, dqword [0], xmm3 ; 0F 24 8E 066 30 00 00
+
+pmacssdqh xmm1, xmm4, xmm7, xmm1 ; 0F 24 8F 347 10
+pmacssdqh xmm2, xmm5, [0], xmm2 ; 0F 24 8F 056 20 00 00
+pmacssdqh xmm3, xmm6, dqword [0], xmm3 ; 0F 24 8F 066 30 00 00
+
+pmacssdql xmm1, xmm4, xmm7, xmm1 ; 0F 24 87 347 10
+pmacssdql xmm2, xmm5, [0], xmm2 ; 0F 24 87 056 20 00 00
+pmacssdql xmm3, xmm6, dqword [0], xmm3 ; 0F 24 87 066 30 00 00
+
+pmacsswd xmm1, xmm4, xmm7, xmm1 ; 0F 24 86 347 10
+pmacsswd xmm2, xmm5, [0], xmm2 ; 0F 24 86 056 20 00 00
+pmacsswd xmm3, xmm6, dqword [0], xmm3 ; 0F 24 86 066 30 00 00
+
+pmacssww xmm1, xmm4, xmm7, xmm1 ; 0F 24 85 347 10
+pmacssww xmm2, xmm5, [0], xmm2 ; 0F 24 85 056 20 00 00
+pmacssww xmm3, xmm6, dqword [0], xmm3 ; 0F 24 85 066 30 00 00
+
+pmacswd xmm1, xmm4, xmm7, xmm1 ; 0F 24 96 347 10
+pmacswd xmm2, xmm5, [0], xmm2 ; 0F 24 96 056 20 00 00
+pmacswd xmm3, xmm6, dqword [0], xmm3 ; 0F 24 96 066 30 00 00
+
+pmacsww xmm1, xmm4, xmm7, xmm1 ; 0F 24 95 347 10
+pmacsww xmm2, xmm5, [0], xmm2 ; 0F 24 95 056 20 00 00
+pmacsww xmm3, xmm6, dqword [0], xmm3 ; 0F 24 95 066 30 00 00
+
+pmadcsswd xmm1, xmm4, xmm7, xmm1 ; 0F 24 A6 347 10
+pmadcsswd xmm2, xmm5, [0], xmm2 ; 0F 24 A6 056 20 00 00
+pmadcsswd xmm3, xmm6, dqword [0], xmm3 ; 0F 24 A6 066 30 00 00
+
+pmadcswd xmm1, xmm4, xmm7, xmm1 ; 0F 24 B6 347 10
+pmadcswd xmm2, xmm5, [0], xmm2 ; 0F 24 B6 056 20 00 00
+pmadcswd xmm3, xmm6, dqword [0], xmm3 ; 0F 24 B6 066 30 00 00
+
+pperm xmm1, xmm1, xmm2, xmm3 ; 0F 24 23 323 10 /or/ 0F 24 23 332 18
+pperm xmm1, xmm1, xmm2, [0] ; 0F 24 23 026 10 00 00
+pperm xmm1, xmm1, xmm2, dqword [0] ; 0F 24 23 026 10 00 00
+pperm xmm1, xmm1, [0], xmm3 ; 0F 24 23 036 18 00 00
+pperm xmm1, xmm1, dqword [0], xmm3 ; 0F 24 23 036 18 00 00
+pperm xmm1, xmm2, xmm3, xmm1 ; 0F 24 27 323 10 /or/ 0F 24 27 332 18
+pperm xmm1, xmm2, [0], xmm1 ; 0F 24 27 026 10 00 00
+pperm xmm1, xmm2, dqword [0], xmm1 ; 0F 24 27 026 10 00 00
+pperm xmm1, [0], xmm3, xmm1 ; 0F 24 27 036 18 00 00
+pperm xmm1, dqword [0], xmm3, xmm1 ; 0F 24 27 036 18 00 00
+
+protb xmm1, xmm2, xmm3 ; 0F 24 40 323 10 /or/ 0F 24 40 332 18
+protb xmm1, xmm2, [0] ; 0F 24 40 026 10 00 00
+protb xmm1, xmm2, dqword [0] ; 0F 24 40 026 10 00 00
+protb xmm1, [0], xmm3 ; 0F 24 40 036 18 00 00
+protb xmm1, dqword [0], xmm3 ; 0F 24 40 036 18 00 00
+protb xmm1, xmm2, byte 5 ; 0F 7B 40 312 05
+protb xmm1, [0], byte 5 ; 0F 7B 40 016 00 00 05
+protb xmm1, dqword [0], 5 ; 0F 7B 40 016 00 00 05
+
+protd xmm1, xmm2, xmm3 ; 0F 24 42 323 10 /or/ 0F 24 42 332 18
+protd xmm1, xmm2, [0] ; 0F 24 42 026 10 00 00
+protd xmm1, xmm2, dqword [0] ; 0F 24 42 026 10 00 00
+protd xmm1, [0], xmm3 ; 0F 24 42 036 18 00 00
+protd xmm1, dqword [0], xmm3 ; 0F 24 42 036 18 00 00
+protd xmm1, xmm2, byte 5 ; 0F 7B 42 312 05
+protd xmm1, [0], byte 5 ; 0F 7B 42 016 00 00 05
+protd xmm1, dqword [0], 5 ; 0F 7B 42 016 00 00 05
+
+protq xmm1, xmm2, xmm3 ; 0F 24 43 323 10 /or/ 0F 24 43 332 18
+protq xmm1, xmm2, [0] ; 0F 24 43 026 10 00 00
+protq xmm1, xmm2, dqword [0] ; 0F 24 43 026 10 00 00
+protq xmm1, [0], xmm3 ; 0F 24 43 036 18 00 00
+protq xmm1, dqword [0], xmm3 ; 0F 24 43 036 18 00 00
+protq xmm1, xmm2, byte 5 ; 0F 7B 43 312 05
+protq xmm1, [0], byte 5 ; 0F 7B 43 016 00 00 05
+protq xmm1, dqword [0], 5 ; 0F 7B 43 016 00 00 05
+
+protw xmm1, xmm2, xmm3 ; 0F 24 41 323 10 /or/ 0F 24 41 332 18
+protw xmm1, xmm2, [0] ; 0F 24 41 026 10 00 00
+protw xmm1, xmm2, dqword [0] ; 0F 24 41 026 10 00 00
+protw xmm1, [0], xmm3 ; 0F 24 41 036 18 00 00
+protw xmm1, dqword [0], xmm3 ; 0F 24 41 036 18 00 00
+protw xmm1, xmm2, byte 5 ; 0F 7B 41 312 05
+protw xmm1, [0], byte 5 ; 0F 7B 41 016 00 00 05
+protw xmm1, dqword [0], 5 ; 0F 7B 41 016 00 00 05
+
+pshab xmm1, xmm2, xmm3 ; 0F 24 48 323 10 /or/ 0F 24 48 332 18
+pshab xmm1, xmm2, [0] ; 0F 24 48 026 10 00 00
+pshab xmm1, xmm2, dqword [0] ; 0F 24 48 026 10 00 00
+pshab xmm1, [0], xmm3 ; 0F 24 48 036 18 00 00
+pshab xmm1, dqword [0], xmm3 ; 0F 24 48 036 18 00 00
+
+pshad xmm1, xmm2, xmm3 ; 0F 24 4A 323 10 /or/ 0F 24 4A 332 18
+pshad xmm1, xmm2, [0] ; 0F 24 4A 026 10 00 00
+pshad xmm1, xmm2, dqword [0] ; 0F 24 4A 026 10 00 00
+pshad xmm1, [0], xmm3 ; 0F 24 4A 036 18 00 00
+pshad xmm1, dqword [0], xmm3 ; 0F 24 4A 036 18 00 00
+
+pshaq xmm1, xmm2, xmm3 ; 0F 24 4B 323 10 /or/ 0F 24 4B 332 18
+pshaq xmm1, xmm2, [0] ; 0F 24 4B 026 10 00 00
+pshaq xmm1, xmm2, dqword [0] ; 0F 24 4B 026 10 00 00
+pshaq xmm1, [0], xmm3 ; 0F 24 4B 036 18 00 00
+pshaq xmm1, dqword [0], xmm3 ; 0F 24 4B 036 18 00 00
+
+pshaw xmm1, xmm2, xmm3 ; 0F 24 49 323 10 /or/ 0F 24 49 332 18
+pshaw xmm1, xmm2, [0] ; 0F 24 49 026 10 00 00
+pshaw xmm1, xmm2, dqword [0] ; 0F 24 49 026 10 00 00
+pshaw xmm1, [0], xmm3 ; 0F 24 49 036 18 00 00
+pshaw xmm1, dqword [0], xmm3 ; 0F 24 49 036 18 00 00
+
+pshlb xmm1, xmm2, xmm3 ; 0F 24 44 323 10 /or/ 0F 24 44 332 18
+pshlb xmm1, xmm2, [0] ; 0F 24 44 026 10 00 00
+pshlb xmm1, xmm2, dqword [0] ; 0F 24 44 026 10 00 00
+pshlb xmm1, [0], xmm3 ; 0F 24 44 036 18 00 00
+pshlb xmm1, dqword [0], xmm3 ; 0F 24 44 036 18 00 00
+
+pshld xmm1, xmm2, xmm3 ; 0F 24 46 323 10 /or/ 0F 24 46 332 18
+pshld xmm1, xmm2, [0] ; 0F 24 46 026 10 00 00
+pshld xmm1, xmm2, dqword [0] ; 0F 24 46 026 10 00 00
+pshld xmm1, [0], xmm3 ; 0F 24 46 036 18 00 00
+pshld xmm1, dqword [0], xmm3 ; 0F 24 46 036 18 00 00
+
+pshlq xmm1, xmm2, xmm3 ; 0F 24 47 323 10 /or/ 0F 24 47 332 18
+pshlq xmm1, xmm2, [0] ; 0F 24 47 026 10 00 00
+pshlq xmm1, xmm2, dqword [0] ; 0F 24 47 026 10 00 00
+pshlq xmm1, [0], xmm3 ; 0F 24 47 036 18 00 00
+pshlq xmm1, dqword [0], xmm3 ; 0F 24 47 036 18 00 00
+
+pshlw xmm1, xmm2, xmm3 ; 0F 24 45 323 10 /or/ 0F 24 45 332 18
+pshlw xmm1, xmm2, [0] ; 0F 24 45 026 10 00 00
+pshlw xmm1, xmm2, dqword [0] ; 0F 24 45 026 10 00 00
+pshlw xmm1, [0], xmm3 ; 0F 24 45 036 18 00 00
+pshlw xmm1, dqword [0], xmm3 ; 0F 24 45 036 18 00 00
+
+; SSE5 instructions that are also SSE4.1 instructions
+
+ptest xmm1, xmm2 ; 66 0F 38 17 312
+ptest xmm1, [0] ; 66 0F 38 17 016 00 00
+ptest xmm1, dqword [0] ; 66 0F 38 17 016 00 00
+
+roundpd xmm1, xmm2, 5 ; 66 0F 3A 09 312 05
+roundpd xmm1, [0], byte 5 ; 66 0F 3A 09 016 00 00 05
+roundpd xmm1, dqword [0], 5 ; 66 0F 3A 09 016 00 00 05
+
+roundps xmm1, xmm2, 5 ; 66 0F 3A 08 312 05
+roundps xmm1, [0], byte 5 ; 66 0F 3A 08 016 00 00 05
+roundps xmm1, dqword [0], 5 ; 66 0F 3A 08 016 00 00 05
+
+roundsd xmm1, xmm2, 5 ; 66 0F 3A 0B 312 05
+roundsd xmm1, [0], byte 5 ; 66 0F 3A 0B 016 00 00 05
+roundsd xmm1, qword [0], 5 ; 66 0F 3A 0B 016 00 00 05
+
+roundss xmm1, xmm2, 5 ; 66 0F 3A 0A 312 05
+roundss xmm1, [0], byte 5 ; 66 0F 3A 0A 016 00 00 05
+roundss xmm1, dword [0], 5 ; 66 0F 3A 0A 016 00 00 05
+
--- /dev/null
+0f
+25
+2d
+e7
+10
+05
+0f
+25
+2d
+2e
+20
+00
+00
+05
+0f
+25
+2d
+36
+30
+00
+00
+05
+0f
+25
+2c
+e7
+10
+05
+0f
+25
+2c
+2e
+20
+00
+00
+05
+0f
+25
+2c
+36
+30
+00
+00
+05
+0f
+25
+2f
+e7
+10
+05
+0f
+25
+2f
+2e
+20
+00
+00
+05
+0f
+25
+2f
+36
+30
+00
+00
+05
+0f
+25
+2e
+e7
+10
+05
+0f
+25
+2e
+2e
+20
+00
+00
+05
+0f
+25
+2e
+36
+30
+00
+00
+05
+0f
+7a
+30
+cc
+0f
+7a
+30
+16
+00
+00
+0f
+7a
+30
+1e
+00
+00
+0f
+7a
+31
+e1
+0f
+7a
+31
+16
+00
+00
+0f
+7a
+31
+1e
+00
+00
+0f
+24
+01
+d3
+10
+0f
+24
+01
+16
+10
+00
+00
+0f
+24
+01
+16
+10
+00
+00
+0f
+24
+01
+1e
+18
+00
+00
+0f
+24
+01
+1e
+18
+00
+00
+0f
+24
+05
+d3
+10
+0f
+24
+05
+16
+10
+00
+00
+0f
+24
+05
+16
+10
+00
+00
+0f
+24
+05
+1e
+18
+00
+00
+0f
+24
+05
+1e
+18
+00
+00
+0f
+24
+00
+d3
+10
+0f
+24
+00
+16
+10
+00
+00
+0f
+24
+00
+16
+10
+00
+00
+0f
+24
+00
+1e
+18
+00
+00
+0f
+24
+00
+1e
+18
+00
+00
+0f
+24
+04
+d3
+10
+0f
+24
+04
+16
+10
+00
+00
+0f
+24
+04
+16
+10
+00
+00
+0f
+24
+04
+1e
+18
+00
+00
+0f
+24
+04
+1e
+18
+00
+00
+0f
+24
+03
+d3
+10
+0f
+24
+03
+16
+10
+00
+00
+0f
+24
+03
+16
+10
+00
+00
+0f
+24
+03
+1e
+18
+00
+00
+0f
+24
+03
+1e
+18
+00
+00
+0f
+24
+07
+d3
+10
+0f
+24
+07
+16
+10
+00
+00
+0f
+24
+07
+16
+10
+00
+00
+0f
+24
+07
+1e
+18
+00
+00
+0f
+24
+07
+1e
+18
+00
+00
+0f
+24
+02
+d3
+10
+0f
+24
+02
+16
+10
+00
+00
+0f
+24
+02
+16
+10
+00
+00
+0f
+24
+02
+1e
+18
+00
+00
+0f
+24
+02
+1e
+18
+00
+00
+0f
+24
+06
+d3
+10
+0f
+24
+06
+16
+10
+00
+00
+0f
+24
+06
+16
+10
+00
+00
+0f
+24
+06
+1e
+18
+00
+00
+0f
+24
+06
+1e
+18
+00
+00
+0f
+24
+09
+d3
+10
+0f
+24
+09
+16
+10
+00
+00
+0f
+24
+09
+16
+10
+00
+00
+0f
+24
+09
+1e
+18
+00
+00
+0f
+24
+09
+1e
+18
+00
+00
+0f
+24
+0d
+d3
+10
+0f
+24
+0d
+16
+10
+00
+00
+0f
+24
+0d
+16
+10
+00
+00
+0f
+24
+0d
+1e
+18
+00
+00
+0f
+24
+0d
+1e
+18
+00
+00
+0f
+24
+08
+d3
+10
+0f
+24
+08
+16
+10
+00
+00
+0f
+24
+08
+16
+10
+00
+00
+0f
+24
+08
+1e
+18
+00
+00
+0f
+24
+08
+1e
+18
+00
+00
+0f
+24
+0c
+d3
+10
+0f
+24
+0c
+16
+10
+00
+00
+0f
+24
+0c
+16
+10
+00
+00
+0f
+24
+0c
+1e
+18
+00
+00
+0f
+24
+0c
+1e
+18
+00
+00
+0f
+24
+0b
+d3
+10
+0f
+24
+0b
+16
+10
+00
+00
+0f
+24
+0b
+16
+10
+00
+00
+0f
+24
+0b
+1e
+18
+00
+00
+0f
+24
+0b
+1e
+18
+00
+00
+0f
+24
+0f
+d3
+10
+0f
+24
+0f
+16
+10
+00
+00
+0f
+24
+0f
+16
+10
+00
+00
+0f
+24
+0f
+1e
+18
+00
+00
+0f
+24
+0f
+1e
+18
+00
+00
+0f
+24
+0a
+d3
+10
+0f
+24
+0a
+16
+10
+00
+00
+0f
+24
+0a
+16
+10
+00
+00
+0f
+24
+0a
+1e
+18
+00
+00
+0f
+24
+0a
+1e
+18
+00
+00
+0f
+24
+0e
+d3
+10
+0f
+24
+0e
+16
+10
+00
+00
+0f
+24
+0e
+16
+10
+00
+00
+0f
+24
+0e
+1e
+18
+00
+00
+0f
+24
+0e
+1e
+18
+00
+00
+0f
+24
+11
+d3
+10
+0f
+24
+11
+16
+10
+00
+00
+0f
+24
+11
+16
+10
+00
+00
+0f
+24
+11
+1e
+18
+00
+00
+0f
+24
+11
+1e
+18
+00
+00
+0f
+24
+15
+d3
+10
+0f
+24
+15
+16
+10
+00
+00
+0f
+24
+15
+16
+10
+00
+00
+0f
+24
+15
+1e
+18
+00
+00
+0f
+24
+15
+1e
+18
+00
+00
+0f
+24
+10
+d3
+10
+0f
+24
+10
+16
+10
+00
+00
+0f
+24
+10
+16
+10
+00
+00
+0f
+24
+10
+1e
+18
+00
+00
+0f
+24
+10
+1e
+18
+00
+00
+0f
+24
+14
+d3
+10
+0f
+24
+14
+16
+10
+00
+00
+0f
+24
+14
+16
+10
+00
+00
+0f
+24
+14
+1e
+18
+00
+00
+0f
+24
+14
+1e
+18
+00
+00
+0f
+24
+13
+d3
+10
+0f
+24
+13
+16
+10
+00
+00
+0f
+24
+13
+16
+10
+00
+00
+0f
+24
+13
+1e
+18
+00
+00
+0f
+24
+13
+1e
+18
+00
+00
+0f
+24
+17
+d3
+10
+0f
+24
+17
+16
+10
+00
+00
+0f
+24
+17
+16
+10
+00
+00
+0f
+24
+17
+1e
+18
+00
+00
+0f
+24
+17
+1e
+18
+00
+00
+0f
+24
+12
+d3
+10
+0f
+24
+12
+16
+10
+00
+00
+0f
+24
+12
+16
+10
+00
+00
+0f
+24
+12
+1e
+18
+00
+00
+0f
+24
+12
+1e
+18
+00
+00
+0f
+24
+16
+d3
+10
+0f
+24
+16
+16
+10
+00
+00
+0f
+24
+16
+16
+10
+00
+00
+0f
+24
+16
+1e
+18
+00
+00
+0f
+24
+16
+1e
+18
+00
+00
+0f
+24
+19
+d3
+10
+0f
+24
+19
+16
+10
+00
+00
+0f
+24
+19
+16
+10
+00
+00
+0f
+24
+19
+1e
+18
+00
+00
+0f
+24
+19
+1e
+18
+00
+00
+0f
+24
+1d
+d3
+10
+0f
+24
+1d
+16
+10
+00
+00
+0f
+24
+1d
+16
+10
+00
+00
+0f
+24
+1d
+1e
+18
+00
+00
+0f
+24
+1d
+1e
+18
+00
+00
+0f
+24
+18
+d3
+10
+0f
+24
+18
+16
+10
+00
+00
+0f
+24
+18
+16
+10
+00
+00
+0f
+24
+18
+1e
+18
+00
+00
+0f
+24
+18
+1e
+18
+00
+00
+0f
+24
+1c
+d3
+10
+0f
+24
+1c
+16
+10
+00
+00
+0f
+24
+1c
+16
+10
+00
+00
+0f
+24
+1c
+1e
+18
+00
+00
+0f
+24
+1c
+1e
+18
+00
+00
+0f
+24
+1b
+d3
+10
+0f
+24
+1b
+16
+10
+00
+00
+0f
+24
+1b
+16
+10
+00
+00
+0f
+24
+1b
+1e
+18
+00
+00
+0f
+24
+1b
+1e
+18
+00
+00
+0f
+24
+1f
+d3
+10
+0f
+24
+1f
+16
+10
+00
+00
+0f
+24
+1f
+16
+10
+00
+00
+0f
+24
+1f
+1e
+18
+00
+00
+0f
+24
+1f
+1e
+18
+00
+00
+0f
+24
+1a
+d3
+10
+0f
+24
+1a
+16
+10
+00
+00
+0f
+24
+1a
+16
+10
+00
+00
+0f
+24
+1a
+1e
+18
+00
+00
+0f
+24
+1a
+1e
+18
+00
+00
+0f
+24
+1e
+d3
+10
+0f
+24
+1e
+16
+10
+00
+00
+0f
+24
+1e
+16
+10
+00
+00
+0f
+24
+1e
+1e
+18
+00
+00
+0f
+24
+1e
+1e
+18
+00
+00
+0f
+7a
+11
+ca
+0f
+7a
+11
+0e
+00
+00
+0f
+7a
+11
+0e
+00
+00
+0f
+7a
+10
+ca
+0f
+7a
+10
+0e
+00
+00
+0f
+7a
+10
+0e
+00
+00
+0f
+7a
+13
+ca
+0f
+7a
+13
+0e
+00
+00
+0f
+7a
+13
+0e
+00
+00
+0f
+7a
+12
+ca
+0f
+7a
+12
+0e
+00
+00
+0f
+7a
+12
+0e
+00
+00
+0f
+24
+22
+d3
+10
+0f
+24
+22
+16
+10
+00
+00
+0f
+24
+22
+16
+10
+00
+00
+0f
+24
+22
+1e
+18
+00
+00
+0f
+24
+22
+1e
+18
+00
+00
+0f
+24
+26
+d3
+10
+0f
+24
+26
+16
+10
+00
+00
+0f
+24
+26
+16
+10
+00
+00
+0f
+24
+26
+1e
+18
+00
+00
+0f
+24
+26
+1e
+18
+00
+00
+0f
+25
+4c
+e7
+10
+05
+0f
+25
+4c
+2e
+20
+00
+00
+05
+0f
+25
+4c
+36
+30
+00
+00
+05
+0f
+25
+4e
+e7
+10
+05
+0f
+25
+4e
+2e
+20
+00
+00
+05
+0f
+25
+4e
+36
+30
+00
+00
+05
+0f
+25
+4f
+e7
+10
+05
+0f
+25
+4f
+2e
+20
+00
+00
+05
+0f
+25
+4f
+36
+30
+00
+00
+05
+0f
+25
+6c
+e7
+10
+05
+0f
+25
+6c
+2e
+20
+00
+00
+05
+0f
+25
+6c
+36
+30
+00
+00
+05
+0f
+25
+6e
+e7
+10
+05
+0f
+25
+6e
+2e
+20
+00
+00
+05
+0f
+25
+6e
+36
+30
+00
+00
+05
+0f
+25
+6f
+e7
+10
+05
+0f
+25
+6f
+2e
+20
+00
+00
+05
+0f
+25
+6f
+36
+30
+00
+00
+05
+0f
+25
+6d
+e7
+10
+05
+0f
+25
+6d
+2e
+20
+00
+00
+05
+0f
+25
+6d
+36
+30
+00
+00
+05
+0f
+25
+4d
+e7
+10
+05
+0f
+25
+4d
+2e
+20
+00
+00
+05
+0f
+25
+4d
+36
+30
+00
+00
+05
+0f
+24
+21
+d3
+10
+0f
+24
+21
+16
+10
+00
+00
+0f
+24
+21
+16
+10
+00
+00
+0f
+24
+21
+1e
+18
+00
+00
+0f
+24
+21
+1e
+18
+00
+00
+0f
+24
+25
+d3
+10
+0f
+24
+25
+16
+10
+00
+00
+0f
+24
+25
+16
+10
+00
+00
+0f
+24
+25
+1e
+18
+00
+00
+0f
+24
+25
+1e
+18
+00
+00
+0f
+24
+20
+d3
+10
+0f
+24
+20
+16
+10
+00
+00
+0f
+24
+20
+16
+10
+00
+00
+0f
+24
+20
+1e
+18
+00
+00
+0f
+24
+20
+1e
+18
+00
+00
+0f
+24
+24
+d3
+10
+0f
+24
+24
+16
+10
+00
+00
+0f
+24
+24
+16
+10
+00
+00
+0f
+24
+24
+1e
+18
+00
+00
+0f
+24
+24
+1e
+18
+00
+00
+0f
+7a
+42
+ca
+0f
+7a
+42
+0e
+00
+00
+0f
+7a
+42
+0e
+00
+00
+0f
+7a
+43
+ca
+0f
+7a
+43
+0e
+00
+00
+0f
+7a
+43
+0e
+00
+00
+0f
+7a
+41
+ca
+0f
+7a
+41
+0e
+00
+00
+0f
+7a
+41
+0e
+00
+00
+0f
+7a
+4b
+ca
+0f
+7a
+4b
+0e
+00
+00
+0f
+7a
+4b
+0e
+00
+00
+0f
+7a
+52
+ca
+0f
+7a
+52
+0e
+00
+00
+0f
+7a
+52
+0e
+00
+00
+0f
+7a
+53
+ca
+0f
+7a
+53
+0e
+00
+00
+0f
+7a
+53
+0e
+00
+00
+0f
+7a
+51
+ca
+0f
+7a
+51
+0e
+00
+00
+0f
+7a
+51
+0e
+00
+00
+0f
+7a
+5b
+ca
+0f
+7a
+5b
+0e
+00
+00
+0f
+7a
+5b
+0e
+00
+00
+0f
+7a
+56
+ca
+0f
+7a
+56
+0e
+00
+00
+0f
+7a
+56
+0e
+00
+00
+0f
+7a
+57
+ca
+0f
+7a
+57
+0e
+00
+00
+0f
+7a
+57
+0e
+00
+00
+0f
+7a
+46
+ca
+0f
+7a
+46
+0e
+00
+00
+0f
+7a
+46
+0e
+00
+00
+0f
+7a
+47
+ca
+0f
+7a
+47
+0e
+00
+00
+0f
+7a
+47
+0e
+00
+00
+0f
+7a
+61
+ca
+0f
+7a
+61
+0e
+00
+00
+0f
+7a
+61
+0e
+00
+00
+0f
+7a
+63
+ca
+0f
+7a
+63
+0e
+00
+00
+0f
+7a
+63
+0e
+00
+00
+0f
+7a
+62
+ca
+0f
+7a
+62
+0e
+00
+00
+0f
+7a
+62
+0e
+00
+00
+0f
+24
+9e
+e7
+10
+0f
+24
+9e
+2e
+20
+00
+00
+0f
+24
+9e
+36
+30
+00
+00
+0f
+24
+9f
+e7
+10
+0f
+24
+9f
+2e
+20
+00
+00
+0f
+24
+9f
+36
+30
+00
+00
+0f
+24
+97
+e7
+10
+0f
+24
+97
+2e
+20
+00
+00
+0f
+24
+97
+36
+30
+00
+00
+0f
+24
+8e
+e7
+10
+0f
+24
+8e
+2e
+20
+00
+00
+0f
+24
+8e
+36
+30
+00
+00
+0f
+24
+8f
+e7
+10
+0f
+24
+8f
+2e
+20
+00
+00
+0f
+24
+8f
+36
+30
+00
+00
+0f
+24
+87
+e7
+10
+0f
+24
+87
+2e
+20
+00
+00
+0f
+24
+87
+36
+30
+00
+00
+0f
+24
+86
+e7
+10
+0f
+24
+86
+2e
+20
+00
+00
+0f
+24
+86
+36
+30
+00
+00
+0f
+24
+85
+e7
+10
+0f
+24
+85
+2e
+20
+00
+00
+0f
+24
+85
+36
+30
+00
+00
+0f
+24
+96
+e7
+10
+0f
+24
+96
+2e
+20
+00
+00
+0f
+24
+96
+36
+30
+00
+00
+0f
+24
+95
+e7
+10
+0f
+24
+95
+2e
+20
+00
+00
+0f
+24
+95
+36
+30
+00
+00
+0f
+24
+a6
+e7
+10
+0f
+24
+a6
+2e
+20
+00
+00
+0f
+24
+a6
+36
+30
+00
+00
+0f
+24
+b6
+e7
+10
+0f
+24
+b6
+2e
+20
+00
+00
+0f
+24
+b6
+36
+30
+00
+00
+0f
+24
+23
+d3
+10
+0f
+24
+23
+16
+10
+00
+00
+0f
+24
+23
+16
+10
+00
+00
+0f
+24
+23
+1e
+18
+00
+00
+0f
+24
+23
+1e
+18
+00
+00
+0f
+24
+27
+d3
+10
+0f
+24
+27
+16
+10
+00
+00
+0f
+24
+27
+16
+10
+00
+00
+0f
+24
+27
+1e
+18
+00
+00
+0f
+24
+27
+1e
+18
+00
+00
+0f
+24
+40
+d3
+10
+0f
+24
+40
+16
+10
+00
+00
+0f
+24
+40
+16
+10
+00
+00
+0f
+24
+40
+1e
+18
+00
+00
+0f
+24
+40
+1e
+18
+00
+00
+0f
+7b
+40
+ca
+05
+0f
+7b
+40
+0e
+00
+00
+05
+0f
+7b
+40
+0e
+00
+00
+05
+0f
+24
+42
+d3
+10
+0f
+24
+42
+16
+10
+00
+00
+0f
+24
+42
+16
+10
+00
+00
+0f
+24
+42
+1e
+18
+00
+00
+0f
+24
+42
+1e
+18
+00
+00
+0f
+7b
+42
+ca
+05
+0f
+7b
+42
+0e
+00
+00
+05
+0f
+7b
+42
+0e
+00
+00
+05
+0f
+24
+43
+d3
+10
+0f
+24
+43
+16
+10
+00
+00
+0f
+24
+43
+16
+10
+00
+00
+0f
+24
+43
+1e
+18
+00
+00
+0f
+24
+43
+1e
+18
+00
+00
+0f
+7b
+43
+ca
+05
+0f
+7b
+43
+0e
+00
+00
+05
+0f
+7b
+43
+0e
+00
+00
+05
+0f
+24
+41
+d3
+10
+0f
+24
+41
+16
+10
+00
+00
+0f
+24
+41
+16
+10
+00
+00
+0f
+24
+41
+1e
+18
+00
+00
+0f
+24
+41
+1e
+18
+00
+00
+0f
+7b
+41
+ca
+05
+0f
+7b
+41
+0e
+00
+00
+05
+0f
+7b
+41
+0e
+00
+00
+05
+0f
+24
+48
+d3
+10
+0f
+24
+48
+16
+10
+00
+00
+0f
+24
+48
+16
+10
+00
+00
+0f
+24
+48
+1e
+18
+00
+00
+0f
+24
+48
+1e
+18
+00
+00
+0f
+24
+4a
+d3
+10
+0f
+24
+4a
+16
+10
+00
+00
+0f
+24
+4a
+16
+10
+00
+00
+0f
+24
+4a
+1e
+18
+00
+00
+0f
+24
+4a
+1e
+18
+00
+00
+0f
+24
+4b
+d3
+10
+0f
+24
+4b
+16
+10
+00
+00
+0f
+24
+4b
+16
+10
+00
+00
+0f
+24
+4b
+1e
+18
+00
+00
+0f
+24
+4b
+1e
+18
+00
+00
+0f
+24
+49
+d3
+10
+0f
+24
+49
+16
+10
+00
+00
+0f
+24
+49
+16
+10
+00
+00
+0f
+24
+49
+1e
+18
+00
+00
+0f
+24
+49
+1e
+18
+00
+00
+0f
+24
+44
+d3
+10
+0f
+24
+44
+16
+10
+00
+00
+0f
+24
+44
+16
+10
+00
+00
+0f
+24
+44
+1e
+18
+00
+00
+0f
+24
+44
+1e
+18
+00
+00
+0f
+24
+46
+d3
+10
+0f
+24
+46
+16
+10
+00
+00
+0f
+24
+46
+16
+10
+00
+00
+0f
+24
+46
+1e
+18
+00
+00
+0f
+24
+46
+1e
+18
+00
+00
+0f
+24
+47
+d3
+10
+0f
+24
+47
+16
+10
+00
+00
+0f
+24
+47
+16
+10
+00
+00
+0f
+24
+47
+1e
+18
+00
+00
+0f
+24
+47
+1e
+18
+00
+00
+0f
+24
+45
+d3
+10
+0f
+24
+45
+16
+10
+00
+00
+0f
+24
+45
+16
+10
+00
+00
+0f
+24
+45
+1e
+18
+00
+00
+0f
+24
+45
+1e
+18
+00
+00
+66
+0f
+38
+17
+ca
+66
+0f
+38
+17
+0e
+00
+00
+66
+0f
+38
+17
+0e
+00
+00
+66
+0f
+3a
+09
+ca
+05
+66
+0f
+3a
+09
+0e
+00
+00
+05
+66
+0f
+3a
+09
+0e
+00
+00
+05
+66
+0f
+3a
+08
+ca
+05
+66
+0f
+3a
+08
+0e
+00
+00
+05
+66
+0f
+3a
+08
+0e
+00
+00
+05
+66
+0f
+3a
+0b
+ca
+05
+66
+0f
+3a
+0b
+0e
+00
+00
+05
+66
+0f
+3a
+0b
+0e
+00
+00
+05
+66
+0f
+3a
+0a
+ca
+05
+66
+0f
+3a
+0a
+0e
+00
+00
+05
+66
+0f
+3a
+0a
+0e
+00
+00
+05
--- /dev/null
+[bits 32]
+compd xmm1, xmm4, xmm7, 5 ; 0F 25 2D 347 10 05
+compd xmm2, xmm5, [0], byte 5 ; 0F 25 2D 055 20 00 00 00 00 05
+compd xmm3, xmm6, dqword [ebx+ecx*4], byte 5 ; 0F 25 2D 064 213 30 05
+
+[bits 64]
+compd xmm8, xmm11, xmm3, 5 ; 0F 25 2D 333 84 05
+compd xmm12, xmm4, xmm14, 5 ; 0F 25 2D 346 C1 05
+compd xmm9, xmm12, [0], byte 5 ; 0F 25 2D 044 045 94 00 00 00 00 05
+compd xmm9, xmm12, [r8], byte 5 ; 0F 25 2D 040 95 05
+compd xmm10, xmm13, dqword [rbx+r9*4], 5 ; 0F 25 2D 054 213 A6 05
+
--- /dev/null
+0f
+25
+2d
+e7
+10
+05
+0f
+25
+2d
+2d
+20
+00
+00
+00
+00
+05
+0f
+25
+2d
+34
+8b
+30
+05
+0f
+25
+2d
+db
+84
+05
+0f
+25
+2d
+e6
+c1
+05
+0f
+25
+2d
+24
+25
+94
+00
+00
+00
+00
+05
+0f
+25
+2d
+20
+95
+05
+0f
+25
+2d
+2c
+8b
+a6
+05
--- /dev/null
+fmaddpd xmm1, xmm2, xmm1, xmm3 ; illegal
+fmaddpd xmm1, xmm2, xmm3, xmm3 ; illegal
+fmaddpd xmm1, xmm2, xmm2, xmm3 ; illegal
+
+fmaddps xmm1, xmm2, xmm1, xmm3 ; illegal
+fmaddps xmm1, xmm2, xmm3, xmm3 ; illegal
+fmaddps xmm1, xmm2, xmm2, xmm3 ; illegal
+
+fmaddsd xmm1, xmm2, xmm1, xmm3 ; illegal
+fmaddsd xmm1, xmm2, xmm3, xmm3 ; illegal
+fmaddsd xmm1, xmm2, xmm2, xmm3 ; illegal
+
+fmaddss xmm1, xmm2, xmm1, xmm3 ; illegal
+fmaddss xmm1, xmm2, xmm3, xmm3 ; illegal
+fmaddss xmm1, xmm2, xmm2, xmm3 ; illegal
+
+fmsubpd xmm1, xmm2, xmm1, xmm3 ; illegal
+fmsubpd xmm1, xmm2, xmm3, xmm3 ; illegal
+fmsubpd xmm1, xmm2, xmm2, xmm3 ; illegal
+
+fmsubps xmm1, xmm2, xmm1, xmm3 ; illegal
+fmsubps xmm1, xmm2, xmm3, xmm3 ; illegal
+fmsubps xmm1, xmm2, xmm2, xmm3 ; illegal
+
+fmsubsd xmm1, xmm2, xmm1, xmm3 ; illegal
+fmsubsd xmm1, xmm2, xmm3, xmm3 ; illegal
+fmsubsd xmm1, xmm2, xmm2, xmm3 ; illegal
+
+fmsubss xmm1, xmm2, xmm1, xmm3 ; illegal
+fmsubss xmm1, xmm2, xmm3, xmm3 ; illegal
+fmsubss xmm1, xmm2, xmm2, xmm3 ; illegal
+
+fnmaddpd xmm1, xmm2, xmm1, xmm3 ; illegal
+fnmaddpd xmm1, xmm2, xmm3, xmm3 ; illegal
+fnmaddpd xmm1, xmm2, xmm2, xmm3 ; illegal
+
+fnmaddps xmm1, xmm2, xmm1, xmm3 ; illegal
+fnmaddps xmm1, xmm2, xmm3, xmm3 ; illegal
+fnmaddps xmm1, xmm2, xmm2, xmm3 ; illegal
+
+fnmaddsd xmm1, xmm2, xmm1, xmm3 ; illegal
+fnmaddsd xmm1, xmm2, xmm3, xmm3 ; illegal
+fnmaddsd xmm1, xmm2, xmm2, xmm3 ; illegal
+
+fnmaddss xmm1, xmm2, xmm1, xmm3 ; illegal
+fnmaddss xmm1, xmm2, xmm3, xmm3 ; illegal
+fnmaddss xmm1, xmm2, xmm2, xmm3 ; illegal
+
+fnmsubpd xmm1, xmm2, xmm1, xmm3 ; illegal
+fnmsubpd xmm1, xmm2, xmm3, xmm3 ; illegal
+fnmsubpd xmm1, xmm2, xmm2, xmm3 ; illegal
+
+fnmsubps xmm1, xmm2, xmm1, xmm3 ; illegal
+fnmsubps xmm1, xmm2, xmm3, xmm3 ; illegal
+fnmsubps xmm1, xmm2, xmm2, xmm3 ; illegal
+
+fnmsubsd xmm1, xmm2, xmm1, xmm3 ; illegal
+fnmsubsd xmm1, xmm2, xmm3, xmm3 ; illegal
+fnmsubsd xmm1, xmm2, xmm2, xmm3 ; illegal
+
+fnmsubss xmm1, xmm2, xmm1, xmm3 ; illegal
+fnmsubss xmm1, xmm2, xmm3, xmm3 ; illegal
+fnmsubss xmm1, xmm2, xmm2, xmm3 ; illegal
+
+pcmov xmm1, xmm2, xmm1, xmm3 ; illegal
+pcmov xmm1, xmm2, xmm3, xmm3 ; illegal
+pcmov xmm1, xmm2, xmm2, xmm3 ; illegal
+
+permpd xmm1, xmm2, xmm1, xmm3 ; illegal
+permpd xmm1, xmm2, xmm3, xmm3 ; illegal
+permpd xmm1, xmm2, xmm2, xmm3 ; illegal
+
+permps xmm1, xmm2, xmm1, xmm3 ; illegal
+permps xmm1, xmm2, xmm3, xmm3 ; illegal
+permps xmm1, xmm2, xmm2, xmm3 ; illegal
+
+pmacsdd xmm1, xmm2, xmm1, xmm3 ; illegal
+pmacsdd xmm1, xmm1, xmm2, xmm3 ; illegal - better message?
+
+pmacsdqh xmm1, xmm2, xmm1, xmm3 ; illegal
+pmacsdqh xmm1, xmm1, xmm2, xmm3 ; illegal - better message?
+
+pmacsdql xmm1, xmm2, xmm1, xmm3 ; illegal
+pmacsdql xmm1, xmm1, xmm2, xmm3 ; illegal - better message?
+
+pmacssdd xmm1, xmm2, xmm1, xmm3 ; illegal
+pmacssdd xmm1, xmm1, xmm2, xmm3 ; illegal - better message?
+
+pmacssdqh xmm1, xmm2, xmm1, xmm3 ; illegal
+pmacssdqh xmm1, xmm1, xmm2, xmm3 ; illegal - better message?
+
+pmacssdql xmm1, xmm2, xmm1, xmm3 ; illegal
+pmacssdql xmm1, xmm1, xmm2, xmm3 ; illegal - better message?
+
+pmacsswd xmm1, xmm2, xmm1, xmm3 ; illegal
+pmacsswd xmm1, xmm1, xmm2, xmm3 ; illegal - better message?
+
+pmacssww xmm1, xmm2, xmm1, xmm3 ; illegal
+pmacssww xmm1, xmm1, xmm2, xmm3 ; illegal - better message?
+
+pmacswd xmm1, xmm2, xmm1, xmm3 ; illegal
+pmacswd xmm1, xmm1, xmm2, xmm3 ; illegal - better message?
+
+pmacsww xmm1, xmm2, xmm1, xmm3 ; illegal
+pmacsww xmm1, xmm1, xmm2, xmm3 ; illegal - better message?
+
+pmadcsswd xmm1, xmm2, xmm1, xmm3 ; illegal
+pmadcsswd xmm1, xmm1, xmm2, xmm3 ; illegal - better message?
+
+pmadcswd xmm1, xmm2, xmm1, xmm3 ; illegal
+pmadcswd xmm1, xmm1, xmm2, xmm3 ; illegal - better message?
+
+pperm xmm1, xmm2, xmm1, xmm3 ; illegal
+pperm xmm1, xmm2, xmm3, xmm3 ; illegal
+pperm xmm1, xmm2, xmm2, xmm3 ; illegal
+
--- /dev/null
+-:1: one of source operand 1 or 3 must match dest operand
+-:2: one of source operand 1 or 3 must match dest operand
+-:3: one of source operand 1 or 3 must match dest operand
+-:5: one of source operand 1 or 3 must match dest operand
+-:6: one of source operand 1 or 3 must match dest operand
+-:7: one of source operand 1 or 3 must match dest operand
+-:9: one of source operand 1 or 3 must match dest operand
+-:10: one of source operand 1 or 3 must match dest operand
+-:11: one of source operand 1 or 3 must match dest operand
+-:13: one of source operand 1 or 3 must match dest operand
+-:14: one of source operand 1 or 3 must match dest operand
+-:15: one of source operand 1 or 3 must match dest operand
+-:17: one of source operand 1 or 3 must match dest operand
+-:18: one of source operand 1 or 3 must match dest operand
+-:19: one of source operand 1 or 3 must match dest operand
+-:21: one of source operand 1 or 3 must match dest operand
+-:22: one of source operand 1 or 3 must match dest operand
+-:23: one of source operand 1 or 3 must match dest operand
+-:25: one of source operand 1 or 3 must match dest operand
+-:26: one of source operand 1 or 3 must match dest operand
+-:27: one of source operand 1 or 3 must match dest operand
+-:29: one of source operand 1 or 3 must match dest operand
+-:30: one of source operand 1 or 3 must match dest operand
+-:31: one of source operand 1 or 3 must match dest operand
+-:33: one of source operand 1 or 3 must match dest operand
+-:34: one of source operand 1 or 3 must match dest operand
+-:35: one of source operand 1 or 3 must match dest operand
+-:37: one of source operand 1 or 3 must match dest operand
+-:38: one of source operand 1 or 3 must match dest operand
+-:39: one of source operand 1 or 3 must match dest operand
+-:41: one of source operand 1 or 3 must match dest operand
+-:42: one of source operand 1 or 3 must match dest operand
+-:43: one of source operand 1 or 3 must match dest operand
+-:45: one of source operand 1 or 3 must match dest operand
+-:46: one of source operand 1 or 3 must match dest operand
+-:47: one of source operand 1 or 3 must match dest operand
+-:49: one of source operand 1 or 3 must match dest operand
+-:50: one of source operand 1 or 3 must match dest operand
+-:51: one of source operand 1 or 3 must match dest operand
+-:53: one of source operand 1 or 3 must match dest operand
+-:54: one of source operand 1 or 3 must match dest operand
+-:55: one of source operand 1 or 3 must match dest operand
+-:57: one of source operand 1 or 3 must match dest operand
+-:58: one of source operand 1 or 3 must match dest operand
+-:59: one of source operand 1 or 3 must match dest operand
+-:61: one of source operand 1 or 3 must match dest operand
+-:62: one of source operand 1 or 3 must match dest operand
+-:63: one of source operand 1 or 3 must match dest operand
+-:65: one of source operand 1 or 3 must match dest operand
+-:66: one of source operand 1 or 3 must match dest operand
+-:67: one of source operand 1 or 3 must match dest operand
+-:69: one of source operand 1 or 3 must match dest operand
+-:70: one of source operand 1 or 3 must match dest operand
+-:71: one of source operand 1 or 3 must match dest operand
+-:73: one of source operand 1 or 3 must match dest operand
+-:74: one of source operand 1 or 3 must match dest operand
+-:75: one of source operand 1 or 3 must match dest operand
+-:77: one of source operand 1 or 3 must match dest operand
+-:78: one of source operand 1 or 3 must match dest operand
+-:80: one of source operand 1 or 3 must match dest operand
+-:81: one of source operand 1 or 3 must match dest operand
+-:83: one of source operand 1 or 3 must match dest operand
+-:84: one of source operand 1 or 3 must match dest operand
+-:86: one of source operand 1 or 3 must match dest operand
+-:87: one of source operand 1 or 3 must match dest operand
+-:89: one of source operand 1 or 3 must match dest operand
+-:90: one of source operand 1 or 3 must match dest operand
+-:92: one of source operand 1 or 3 must match dest operand
+-:93: one of source operand 1 or 3 must match dest operand
+-:95: one of source operand 1 or 3 must match dest operand
+-:96: one of source operand 1 or 3 must match dest operand
+-:98: one of source operand 1 or 3 must match dest operand
+-:99: one of source operand 1 or 3 must match dest operand
+-:101: one of source operand 1 or 3 must match dest operand
+-:102: one of source operand 1 or 3 must match dest operand
+-:104: one of source operand 1 or 3 must match dest operand
+-:105: one of source operand 1 or 3 must match dest operand
+-:107: one of source operand 1 or 3 must match dest operand
+-:108: one of source operand 1 or 3 must match dest operand
+-:110: one of source operand 1 or 3 must match dest operand
+-:111: one of source operand 1 or 3 must match dest operand
+-:113: one of source operand 1 or 3 must match dest operand
+-:114: one of source operand 1 or 3 must match dest operand
+-:115: one of source operand 1 or 3 must match dest operand
#define CPU_SSE41 30 /* Streaming SIMD extensions 4.1 required */
#define CPU_SSE42 31 /* Streaming SIMD extensions 4.2 required */
#define CPU_SSE4a 32 /* AMD Streaming SIMD extensions 4a required */
+#define CPU_SSE5 33 /* AMD Streaming SIMD extensions 5 required */
/* Technically not CPU capabilities, they do affect what instructions are
* available. These are tested against BITS==64.
* indicates bit of REX to use if REX is needed. Will not modify REX if not
* in 64-bit mode or if it wasn't needed to express reg.
*/
-int yasm_x86__set_rex_from_reg(unsigned char *rex, unsigned char *low3,
- uintptr_t reg, unsigned int bits,
- x86_rex_bit_pos rexbit);
+int yasm_x86__set_rex_from_reg(unsigned char *rex, unsigned char *drex,
+ unsigned char *low3, uintptr_t reg,
+ unsigned int bits, x86_rex_bit_pos rexbit);
/* Effective address type */
typedef struct x86_effaddr {
unsigned char valid_sib; /* 1 if SIB byte currently valid, 0 if not */
unsigned char need_sib; /* 1 if SIB byte needed, 0 if not,
0xff if unknown */
+
+ unsigned char drex; /* DREX SSE5 extension byte */
+ unsigned char need_drex; /* 1 if DREX byte needed, 0 if not */
} x86_effaddr;
void yasm_x86__ea_init(x86_effaddr *x86_ea, unsigned int spare,
+ unsigned int drex, unsigned int need_drex,
yasm_bytecode *precbc);
void yasm_x86__ea_set_disponly(x86_effaddr *x86_ea);
x86_effaddr *yasm_x86__ea_create_reg(x86_effaddr *x86_ea, unsigned long reg,
- unsigned char *rex, unsigned int bits);
+ unsigned char *rex, unsigned char *drex,
+ unsigned int bits);
x86_effaddr *yasm_x86__ea_create_imm
(x86_effaddr *x86_ea, /*@keep@*/ yasm_expr *imm, unsigned int im_len);
yasm_effaddr *yasm_x86__ea_create_expr(yasm_arch *arch,
};
int
-yasm_x86__set_rex_from_reg(unsigned char *rex, unsigned char *low3,
- uintptr_t reg, unsigned int bits,
- x86_rex_bit_pos rexbit)
+yasm_x86__set_rex_from_reg(unsigned char *rex, unsigned char *drex,
+ unsigned char *low3, uintptr_t reg,
+ unsigned int bits, x86_rex_bit_pos rexbit)
{
*low3 = (unsigned char)(reg&7);
x86_expritem_reg_size size = (x86_expritem_reg_size)(reg & ~0xFUL);
if (size == X86_REG8X || (reg & 0xF) >= 8) {
- /* Check to make sure we can set it */
- if (*rex == 0xff) {
- yasm_error_set(YASM_ERROR_TYPE,
- N_("cannot use A/B/C/DH with instruction needing REX"));
- return 1;
+ if (drex) {
+ *drex |= ((reg & 8) >> 3) << rexbit;
+ } else {
+ /* Check to make sure we can set it */
+ if (*rex == 0xff) {
+ yasm_error_set(YASM_ERROR_TYPE,
+ N_("cannot use A/B/C/DH with instruction needing REX"));
+ return 1;
+ }
+ *rex |= 0x40 | (((reg & 8) >> 3) << rexbit);
}
- *rex |= 0x40 | (((reg & 8) >> 3) << rexbit);
} else if (size == X86_REG8 && (reg & 7) >= 4) {
/* AH/BH/CH/DH, so no REX allowed */
if (*rex != 0 && *rex != 0xff) {
}
void
-yasm_x86__ea_init(x86_effaddr *x86_ea, unsigned int spare,
- yasm_bytecode *precbc)
+yasm_x86__ea_init(x86_effaddr *x86_ea, unsigned int spare, unsigned int drex,
+ unsigned int need_drex, yasm_bytecode *precbc)
{
if (yasm_value_finalize(&x86_ea->ea.disp, precbc))
yasm_error_set(YASM_ERROR_TOO_COMPLEX,
N_("effective address too complex"));
x86_ea->modrm &= 0xC7; /* zero spare/reg bits */
x86_ea->modrm |= (spare << 3) & 0x38; /* plug in provided bits */
+ x86_ea->drex = (unsigned char)drex;
+ x86_ea->need_drex = (unsigned char)need_drex;
}
void
x86_ea->need_modrm = 0;
x86_ea->valid_sib = 0;
x86_ea->need_sib = 0;
+ x86_ea->need_drex = 0;
}
static x86_effaddr *
x86_ea->sib = 0;
x86_ea->valid_sib = 0;
x86_ea->need_sib = 0;
+ x86_ea->drex = 0;
+ x86_ea->need_drex = 0;
return x86_ea;
}
x86_effaddr *
yasm_x86__ea_create_reg(x86_effaddr *x86_ea, unsigned long reg,
- unsigned char *rex, unsigned int bits)
+ unsigned char *rex, unsigned char *drex,
+ unsigned int bits)
{
unsigned char rm;
- if (yasm_x86__set_rex_from_reg(rex, &rm, reg, bits, X86_REX_B))
+ if (yasm_x86__set_rex_from_reg(rex, drex, &rm, reg, bits, X86_REX_B))
return NULL;
if (!x86_ea)
/* Compute length of ea and add to total */
bc->len += x86_ea->need_modrm + (x86_ea->need_sib ? 1:0);
+ bc->len += x86_ea->need_drex ? 1:0;
bc->len += (x86_ea->ea.segreg != 0) ? 1 : 0;
}
YASM_WRITE_8(*bufp, x86_ea->sib);
}
+ if (x86_ea->need_drex)
+ YASM_WRITE_8(*bufp, x86_ea->drex);
+
if (x86_ea->ea.need_disp) {
unsigned int disp_len = x86_ea->ea.disp.size/8;
BitVector_Bit_On(cpu, CPU_086);
}
+#define PROC_bulldozer 11
#define PROC_k10 10
#define PROC_venice 9
#define PROC_hammer 8
BitVector_Bit_On(cpu, CPU_Prot);
BitVector_Bit_On(cpu, CPU_SMM);
BitVector_Bit_On(cpu, CPU_3DNow);
+ if (data >= PROC_bulldozer)
+ BitVector_Bit_On(cpu, CPU_SSE5);
if (data >= PROC_k10)
BitVector_Bit_On(cpu, CPU_SSE4a);
if (data >= PROC_venice)
athlon-64, x86_cpu_amd, PROC_hammer
venice, x86_cpu_amd, PROC_venice
k10, x86_cpu_amd, PROC_k10
+bulldozer, x86_cpu_amd, PROC_bulldozer
prescott, x86_cpu_intel, PROC_prescott
conroe, x86_cpu_intel, PROC_conroe
penryn, x86_cpu_intel, PROC_penryn
nosse4a, x86_cpu_clear, CPU_SSE4a
sse4, x86_cpu_set_sse4, 0
nosse4, x86_cpu_clear_sse4, 0
+sse5, x86_cpu_set, CPU_SSE5
+nosse5, x86_cpu_clear, CPU_SSE5
%%
void
yasm_bytecode *bc)
{
int retval;
+ unsigned char *drex = x86_ea->need_drex ? &x86_ea->drex : NULL;
if (*addrsize == 0) {
/* we need to figure out the address size from what we know about:
* of register basereg is, as x86_set_rex_from_reg doesn't pay
* much attention.
*/
- if (yasm_x86__set_rex_from_reg(rex, &low3,
+ if (yasm_x86__set_rex_from_reg(rex, drex, &low3,
(unsigned int)(X86_REG64 | basereg),
bits, X86_REX_B))
return 1;
if (basereg == REG3264_NONE)
x86_ea->sib |= 5;
else {
- if (yasm_x86__set_rex_from_reg(rex, &low3, (unsigned int)
+ if (yasm_x86__set_rex_from_reg(rex, drex, &low3, (unsigned int)
(X86_REG64 | basereg), bits,
X86_REX_B))
return 1;
x86_ea->sib |= 040;
/* Any scale field is valid, just leave at 0. */
else {
- if (yasm_x86__set_rex_from_reg(rex, &low3, (unsigned int)
+ if (yasm_x86__set_rex_from_reg(rex, drex, &low3, (unsigned int)
(X86_REG64 | indexreg), bits,
X86_REX_X))
return 1;
*/
OPT_MemrAX = 25,
/* EAX memory operand only (EA) [special case for SVM skinit opcode] */
- OPT_MemEAX = 26
+ OPT_MemEAX = 26,
+ /* SIMDReg with value equal to operand 0 SIMDReg */
+ OPT_SIMDRegMatch0 = 27
};
enum x86_operand_size {
/* far jump (outputs a farjmp instead of normal insn) */
OPA_JmpFar = 10,
/* ea operand only sets address size (no actual ea field) */
- OPA_AdSizeEA = 11
+ OPA_AdSizeEA = 11,
+ OPA_DREX = 12 /* operand data goes into DREX "dest" field */
};
enum x86_operand_post_action {
*/
unsigned char special_prefix;
+ /* The DREX base byte value (almost). The only bit kept from this
+ * value is the OC0 bit (0x08). The MSB (0x80) of this value indicates
+ * if the DREX byte needs to be present in the instruction.
+ */
+#define NEED_DREX_MASK 0x80
+#define DREX_OC0_MASK 0x08
+ unsigned char drex_oc0;
+
/* The length of the basic opcode */
unsigned char opcode_len;
cpu1 = CPU_Any;
if (cpu2 == CPU_64 || cpu2 == CPU_Not64)
cpu2 = CPU_Any;
- if (bypass != 7 && (!BitVector_bit_test(id_insn->cpu_enabled, cpu0) ||
+ if (bypass != 8 && (!BitVector_bit_test(id_insn->cpu_enabled, cpu0) ||
!BitVector_bit_test(id_insn->cpu_enabled, cpu1) ||
!BitVector_bit_test(id_insn->cpu_enabled, cpu2)))
continue;
if (op->type == YASM_INSN__OPERAND_MEMORY)
break;
/*@fallthrough@*/
+ case OPT_SIMDRegMatch0:
case OPT_SIMDReg:
if (op->type != YASM_INSN__OPERAND_REG)
mismatch = 1;
break;
}
}
+ if (!mismatch && info_ops[i].type == OPT_SIMDRegMatch0 &&
+ bypass != 7 && op->data.reg != use_ops[0]->data.reg)
+ mismatch = 1;
break;
case OPT_SegReg:
if (op->type != YASM_INSN__OPERAND_SEGREG)
return;
}
- for (bypass=1; bypass<8; bypass++) {
+ for (bypass=1; bypass<9; bypass++) {
i = x86_find_match(id_insn, ops, rev_ops, size_lookup, bypass);
if (i)
break;
N_("invalid size for operand %d"), 3);
break;
case 7:
+ yasm_error_set(YASM_ERROR_TYPE,
+ N_("one of source operand 1 or 3 must match dest operand"));
+ break;
+ case 8:
{
unsigned int cpu0 = i->cpu0, cpu1 = i->cpu1, cpu2 = i->cpu2;
yasm_error_set(YASM_ERROR_TYPE,
unsigned char im_len;
unsigned char im_sign;
unsigned char spare;
+ unsigned char drex;
+ unsigned char *pdrex;
unsigned int i;
unsigned int size_lookup[] = {0, 8, 16, 32, 64, 80, 128, 0};
unsigned long do_postop = 0;
insn->def_opersize_64 = info->def_opersize_64;
insn->special_prefix = info->special_prefix;
spare = info->spare;
+ drex = info->drex_oc0 & DREX_OC0_MASK;
im_len = 0;
im_sign = 0;
insn->postop = X86_POSTOP_NONE;
insn->rex = 0;
+ pdrex = (info->drex_oc0 & NEED_DREX_MASK) ? &drex : NULL;
/* Apply modifiers */
for (i=0; i<NELEMS(info->modifiers); i++) {
insn->x86_ea =
yasm_x86__ea_create_reg(insn->x86_ea,
(unsigned long)op->data.reg, &insn->rex,
- mode_bits);
+ pdrex, mode_bits);
break;
case YASM_INSN__OPERAND_SEGREG:
yasm_internal_error(
if (op->type == YASM_INSN__OPERAND_SEGREG)
spare = (unsigned char)(op->data.reg&7);
else if (op->type == YASM_INSN__OPERAND_REG) {
- if (yasm_x86__set_rex_from_reg(&insn->rex, &spare,
- op->data.reg, mode_bits, X86_REX_R))
+ if (yasm_x86__set_rex_from_reg(&insn->rex, pdrex,
+ &spare, op->data.reg, mode_bits, X86_REX_R))
return;
} else
yasm_internal_error(N_("invalid operand conversion"));
case OPA_Op0Add:
if (op->type == YASM_INSN__OPERAND_REG) {
unsigned char opadd;
- if (yasm_x86__set_rex_from_reg(&insn->rex, &opadd,
- op->data.reg, mode_bits, X86_REX_B))
+ if (yasm_x86__set_rex_from_reg(&insn->rex, pdrex,
+ &opadd, op->data.reg, mode_bits, X86_REX_B))
return;
insn->opcode.opcode[0] += opadd;
} else
case OPA_Op1Add:
if (op->type == YASM_INSN__OPERAND_REG) {
unsigned char opadd;
- if (yasm_x86__set_rex_from_reg(&insn->rex, &opadd,
- op->data.reg, mode_bits, X86_REX_B))
+ if (yasm_x86__set_rex_from_reg(&insn->rex, pdrex,
+ &opadd, op->data.reg, mode_bits, X86_REX_B))
return;
insn->opcode.opcode[1] += opadd;
} else
if (op->type == YASM_INSN__OPERAND_REG) {
insn->x86_ea =
yasm_x86__ea_create_reg(insn->x86_ea,
- (unsigned long)op->data.reg,
- &insn->rex, mode_bits);
+ (unsigned long)op->data.reg, &insn->rex,
+ pdrex, mode_bits);
if (!insn->x86_ea ||
- yasm_x86__set_rex_from_reg(&insn->rex, &spare,
- op->data.reg, mode_bits, X86_REX_R)) {
+ yasm_x86__set_rex_from_reg(&insn->rex, pdrex,
+ &spare, op->data.reg, mode_bits, X86_REX_R)) {
if (insn->x86_ea)
yasm_xfree(insn->x86_ea);
yasm_xfree(insn);
yasm_x86__ea_destroy(op->data.ea);
break;
}
+ case OPA_DREX:
+ drex &= 0x0F;
+ drex |= (op->data.reg << 4) & 0xF0;
+ break;
default:
yasm_internal_error(N_("unknown operand action"));
}
}
if (insn->x86_ea) {
- yasm_x86__ea_init(insn->x86_ea, spare, prev_bc);
+ yasm_x86__ea_init(insn->x86_ea, spare, drex,
+ (unsigned int)(info->drex_oc0 & NEED_DREX_MASK),
+ prev_bc);
for (i=0; i<id_insn->insn.num_segregs; i++)
yasm_ea_set_segreg(&insn->x86_ea->ea, id_insn->insn.segregs[i]);
} else if (id_insn->insn.num_segregs > 0 && insn->special_prefix == 0) {
* opcode 0 being a mov instruction!
*/
insn->x86_ea = yasm_x86__ea_create_reg(insn->x86_ea,
- (unsigned long)insn->opcode.opcode[0]-0xB8, &rex_temp, 64);
+ (unsigned long)insn->opcode.opcode[0]-0xB8, &rex_temp,
+ NULL, 64);
/* Make the imm32s form permanent. */
insn->opcode.opcode[0] = insn->opcode.opcode[1];