arch_x86->mode_bits = 0;
arch_x86->force_strict = 0;
arch_x86->default_rel = 0;
+ arch_x86->nop = X86_NOP_BASIC;
if (yasm__strcasecmp(parser, "nasm") == 0)
arch_x86->parser = X86_PARSER_NASM;
static const unsigned char fill32_1[1] =
{0x90}; /* 1 - nop */
static const unsigned char fill32_2[2] =
- {0x89, 0xf6}; /* 2 - mov esi, esi */
+ {0x66, 0x90}; /* 2 - xchg ax, ax (o16 nop) */
static const unsigned char fill32_3[3] =
{0x8d, 0x76, 0x00}; /* 3 - lea esi, [esi+byte 0] */
static const unsigned char fill32_4[4] =
fill32_12, fill32_13, fill32_14, fill32_15
};
- static const unsigned char fill64_1[1] =
- {0x90}; /* 1 - nop */
- static const unsigned char fill64_2[2] =
- {0x66, 0x90}; /* 2 - o16; nop */
-#if 1
- /* recommmended padding for AMD K8 processors */
- static const unsigned char fill64_3[3] =
- {0x66, 0x66, 0x90}; /* 3 - o16; o16; nop */
- static const unsigned char fill64_4[4] =
- {0x66, 0x66, 0x66, 0x90}; /* 4 - o16; o16; o16; nop */
- static const unsigned char fill64_5[5] =
- {0x66, 0x66, 0x90, 0x66, 0x90}; /* 5 */
- static const unsigned char fill64_6[6] =
- {0x66, 0x66, 0x90, 0x66, 0x66, 0x90};/* 6 */
- static const unsigned char fill64_7[7] =
- {0x66, 0x66, 0x66, 0x90, 0x66, 0x66, 0x90}; /* 7 */
- static const unsigned char fill64_8[8] =
- {0x66, 0x66, 0x66, 0x90, 0x66, 0x66, 0x66, /* 8 */
- 0x90};
- static const unsigned char fill64_9[9] =
- {0x66, 0x66, 0x90, 0x66, 0x66, 0x90, 0x66, /* 9 */
- 0x66, 0x90};
- static const unsigned char fill64_10[10] =
- {0x66, 0x66, 0x66, 0x90, 0x66, 0x66, 0x90, /* 10 */
- 0x66, 0x66, 0x90};
- static const unsigned char fill64_11[11] =
- {0x66, 0x66, 0x66, 0x90, 0x66, 0x66, 0x66, /* 11 */
- 0x90, 0x66, 0x66, 0x90};
- static const unsigned char fill64_12[12] =
- {0x66, 0x66, 0x66, 0x90, 0x66, 0x66, 0x66, /* 12 */
- 0x90, 0x66, 0x66, 0x66, 0x90};
- static const unsigned char fill64_13[13] =
- {0x66, 0x66, 0x66, 0x90, 0x66, 0x66, 0x90, /* 13 */
- 0x66, 0x66, 0x90, 0x66, 0x66, 0x90};
- static const unsigned char fill64_14[14] =
- {0x66, 0x66, 0x66, 0x90, 0x66, 0x66, 0x66, /* 14 */
- 0x90, 0x66, 0x66, 0x90, 0x66, 0x66, 0x90};
- static const unsigned char fill64_15[15] =
- {0x66, 0x66, 0x66, 0x90, 0x66, 0x66, 0x66, /* 15 */
- 0x90, 0x66, 0x66, 0x66, 0x90, 0x66, 0x66, 0x90};
-#else
- /* from Software Optimisation Guide for AMD Family 10h */
- /* Processors 40546 revision 3.10 February 2009 */
- static const unsigned char fill64_3[3] =
- {0x0f, 0x1f, 0x00}; /* 3 */
- static const unsigned char fill64_4[4] =
- {0x0f, 0x1f, 0x40, 0x00}; /* 4 */
- static const unsigned char fill64_5[5] =
- {0x0f, 0x1f, 0x44, 0x00, 0x00}; /* 5 */
- static const unsigned char fill64_6[6] =
- {0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00}; /* 6 */
- static const unsigned char fill64_7[7] =
- {0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00}; /* 7 */
- static const unsigned char fill64_8[8] =
- {0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, /* 8 */
+ /* Long form nops available on more recent Intel and AMD processors */
+ static const unsigned char fill32new_3[3] =
+ {0x0f, 0x1f, 0x00}; /* 3 - nop(3) */
+ static const unsigned char fill32new_4[4] =
+ {0x0f, 0x1f, 0x40, 0x00}; /* 4 - nop(4) */
+ static const unsigned char fill32new_5[5] =
+ {0x0f, 0x1f, 0x44, 0x00, 0x00}; /* 5 - nop(5) */
+ static const unsigned char fill32new_6[6] =
+ {0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00}; /* 6 - nop(6) */
+ static const unsigned char fill32new_7[7] =
+ {0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00}; /* 7 - nop(7) */
+ static const unsigned char fill32new_8[8] =
+ {0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, /* 8 - nop(8) */
0x00};
- static const unsigned char fill64_9[9] =
- {0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, /* 9 */
+ static const unsigned char fill32new_9[9] =
+ {0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, /* 9 - nop(9) */
0x00, 0x00};
- static const unsigned char fill64_10[10] =
- {0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, /* 10 */
+
+ /* Longer forms preferred by Intel use repeated o16 prefixes */
+ static const unsigned char fill32intel_10[10] =
+ {0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, /* 10 - o16; cs; nop */
0x00, 0x00, 0x00};
- static const unsigned char fill64_11[11] =
- {0x0f, 0x1f, 0x44, 0x00, 0x00, 0x66, 0x0f, /* 11 */
- 0x1f, 0x44, 0x00, 0x00};
- static const unsigned char fill64_12[12] =
- {0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00, 0x66, /* 12 */
- 0x0f, 0x1f, 0x44, 0x00, 0x00};
- static const unsigned char fill64_13[13] =
- {0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00, 0x0f, /* 13 */
- 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00};
- static const unsigned char fill64_14[14] =
- {0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00, /* 14 */
- 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00};
- static const unsigned char fill64_15[15] =
- {0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00, /* 15 */
+ static const unsigned char fill32intel_11[11] =
+ {0x66, 0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, /* 11 - 2x o16; cs; nop */
+ 0x00, 0x00, 0x00, 0x00};
+ static const unsigned char fill32intel_12[12] =
+ {0x66, 0x66, 0x66, 0x2e, 0x0f, 0x1f, 0x84, /* 12 - 3x o16; cs; nop */
+ 0x00, 0x00, 0x00, 0x00, 0x00};
+ static const unsigned char fill32intel_13[13] =
+ {0x66, 0x66, 0x66, 0x66, 0x2e, 0x0f, 0x1f, /* 13 - 4x o16; cs; nop */
+ 0x84, 0x00, 0x00, 0x00, 0x00, 0x00};
+ static const unsigned char fill32intel_14[14] =
+ {0x66, 0x66, 0x66, 0x66, 0x66, 0x2e, 0x0f, /* 14 - 5x o16; cs; nop */
+ 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00};
+ static const unsigned char fill32intel_15[15] =
+ {0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x2e, /* 15 - 6x o16; cs; nop */
0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00};
-#endif
- static const unsigned char *fill64[16] =
+
+ /* Longer forms preferred by AMD use fewer o16 prefixes and no CS prefix;
+ * Source: Software Optimisation Guide for AMD Family 10h
+ * Processors 40546 revision 3.10 February 2009
+ */
+ static const unsigned char fill32amd_10[10] =
+ {0x66, 0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, /* 10 - nop(10) */
+ 0x00, 0x00, 0x00};
+ static const unsigned char fill32amd_11[11] =
+ {0x0f, 0x1f, 0x44, 0x00, 0x00, /* 11 - nop(5) */
+ 0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00}; /* nop(6) */
+ static const unsigned char fill32amd_12[12] =
+ {0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00, /* 12 - nop(6) */
+ 0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00}; /* nop(6) */
+ static const unsigned char fill32amd_13[13] =
+ {0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00, /* 13 - nop(6) */
+ 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00}; /* nop(7) */
+ static const unsigned char fill32amd_14[14] =
+ {0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00, /* 14 - nop(7) */
+ 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00}; /* nop(7) */
+ static const unsigned char fill32amd_15[15] =
+ {0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00, /* 15 - nop(7) */
+ 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}; /* nop(8) */
+
+ static const unsigned char *fill32_intel[16] =
+ {
+ NULL, fill32_1, fill32_2, fill32new_3,
+ fill32new_4, fill32new_5, fill32new_6, fill32new_7,
+ fill32new_8, fill32new_9, fill32intel_10, fill32intel_11,
+ fill32intel_12, fill32intel_13, fill32intel_14, fill32intel_15
+ };
+ static const unsigned char *fill32_amd[16] =
{
- NULL, fill64_1, fill64_2, fill64_3,
- fill64_4, fill64_5, fill64_6, fill64_7,
- fill64_8, fill64_9, fill64_10, fill64_11,
- fill64_12, fill64_13, fill64_14, fill64_15
+ NULL, fill32_1, fill32_2, fill32new_3,
+ fill32new_4, fill32new_5, fill32new_6, fill32new_7,
+ fill32new_8, fill32new_9, fill32amd_10, fill32amd_11,
+ fill32amd_12, fill32amd_13, fill32amd_14, fill32amd_15
};
switch (arch_x86->mode_bits) {
case 16:
return fill16;
case 32:
- return fill32;
+ if (arch_x86->nop == X86_NOP_INTEL)
+ return fill32_intel;
+ else if (arch_x86->nop == X86_NOP_AMD)
+ return fill32_amd;
+ else
+ return fill32;
case 64:
- return fill64;
+ /* We know long nops are available in 64-bit mode; default to Intel
+ * ones if unspecified (to match GAS behavior).
+ */
+ if (arch_x86->nop == X86_NOP_AMD)
+ return fill32_amd;
+ else
+ return fill32_intel;
default:
yasm_error_set(YASM_ERROR_VALUE,
N_("Invalid mode_bits in x86_get_fill"));
#define PROC_sandybridge 15
static void
-x86_cpu_intel(wordptr cpu, unsigned int data)
+x86_cpu_intel(wordptr cpu, yasm_arch_x86 *arch_x86, unsigned int data)
{
BitVector_Empty(cpu);
if (data >= PROC_186)
BitVector_Bit_On(cpu, CPU_186);
BitVector_Bit_On(cpu, CPU_086);
+
+ /* Use Intel long NOPs if 686 or better */
+ if (data >= PROC_686)
+ arch_x86->nop = X86_NOP_INTEL;
+ else
+ arch_x86->nop = X86_NOP_BASIC;
}
static void
-x86_cpu_ia64(wordptr cpu, unsigned int data)
+x86_cpu_ia64(wordptr cpu, yasm_arch_x86 *arch_x86, unsigned int data)
{
BitVector_Empty(cpu);
BitVector_Bit_On(cpu, CPU_Priv);
#define PROC_k6 6
static void
-x86_cpu_amd(wordptr cpu, unsigned int data)
+x86_cpu_amd(wordptr cpu, yasm_arch_x86 *arch_x86, unsigned int data)
{
BitVector_Empty(cpu);
BitVector_Bit_On(cpu, CPU_286);
BitVector_Bit_On(cpu, CPU_186);
BitVector_Bit_On(cpu, CPU_086);
+
+ /* Use AMD long NOPs if k6 or better */
+ if (data >= PROC_k6)
+ arch_x86->nop = X86_NOP_AMD;
+ else
+ arch_x86->nop = X86_NOP_BASIC;
}
static void
-x86_cpu_set(wordptr cpu, unsigned int data)
+x86_cpu_set(wordptr cpu, yasm_arch_x86 *arch_x86, unsigned int data)
{
BitVector_Bit_On(cpu, data);
}
static void
-x86_cpu_clear(wordptr cpu, unsigned int data)
+x86_cpu_clear(wordptr cpu, yasm_arch_x86 *arch_x86, unsigned int data)
{
BitVector_Bit_Off(cpu, data);
}
static void
-x86_cpu_set_sse4(wordptr cpu, unsigned int data)
+x86_cpu_set_sse4(wordptr cpu, yasm_arch_x86 *arch_x86, unsigned int data)
{
BitVector_Bit_On(cpu, CPU_SSE41);
BitVector_Bit_On(cpu, CPU_SSE42);
}
static void
-x86_cpu_clear_sse4(wordptr cpu, unsigned int data)
+x86_cpu_clear_sse4(wordptr cpu, yasm_arch_x86 *arch_x86, unsigned int data)
{
BitVector_Bit_Off(cpu, CPU_SSE41);
BitVector_Bit_Off(cpu, CPU_SSE42);
}
+static void
+x86_nop(wordptr cpu, yasm_arch_x86 *arch_x86, unsigned int data)
+{
+ arch_x86->nop = data;
+}
+
%}
%ignore-case
%language=ANSI-C
%define lookup-function-name cpu_find
struct cpu_parse_data {
const char *name;
- void (*handler) (wordptr cpu, unsigned int data);
+ void (*handler) (wordptr cpu, yasm_arch_x86 *arch_x86, unsigned int data);
unsigned int data;
};
%%
athlon-64, x86_cpu_amd, PROC_hammer
venice, x86_cpu_amd, PROC_venice
k10, x86_cpu_amd, PROC_k10
+phenom, x86_cpu_amd, PROC_k10
+family10h, x86_cpu_amd, PROC_k10
bulldozer, x86_cpu_amd, PROC_bulldozer
prescott, x86_cpu_intel, PROC_prescott
conroe, x86_cpu_intel, PROC_conroe
core2, x86_cpu_intel, PROC_conroe
penryn, x86_cpu_intel, PROC_penryn
nehalem, x86_cpu_intel, PROC_nehalem
+corei7, x86_cpu_intel, PROC_nehalem
westmere, x86_cpu_intel, PROC_westmere
sandybridge, x86_cpu_intel, PROC_sandybridge
#
nopclmulqdq, x86_cpu_clear, CPU_CLMUL
movbe, x86_cpu_set, CPU_MOVBE
nomovbe, x86_cpu_clear, CPU_MOVBE
+# Change NOP patterns
+basicnop, x86_nop, X86_NOP_BASIC
+intelnop, x86_nop, X86_NOP_INTEL
+amdnop, x86_nop, X86_NOP_AMD
%%
void
}
new_cpu = BitVector_Clone(arch_x86->cpu_enables[arch_x86->active_cpu]);
- pdata->handler(new_cpu, pdata->data);
+ pdata->handler(new_cpu, arch_x86, pdata->data);
/* try to find an existing match in the CPU table first */
for (i=0; i<arch_x86->cpu_enables_size; i++) {