From: Peter Johnson Date: Thu, 21 Sep 2006 05:18:20 +0000 (-0000) Subject: Add SSE4 (aka SSSE3) instructions. X-Git-Tag: v0.6.0~150 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=b30d40b1d44c736b701d5480a907fb432d6a83b0;p=yasm Add SSE4 (aka SSSE3) instructions. Patch contributed by: Mathieu Monnier svn path=/trunk/yasm/; revision=1629 --- diff --git a/modules/arch/x86/tests/Makefile.inc b/modules/arch/x86/tests/Makefile.inc index 469f0b86..3f94a449 100644 --- a/modules/arch/x86/tests/Makefile.inc +++ b/modules/arch/x86/tests/Makefile.inc @@ -134,6 +134,9 @@ EXTRA_DIST += modules/arch/x86/tests/simd64-2.asm EXTRA_DIST += modules/arch/x86/tests/simd64-2.hex EXTRA_DIST += modules/arch/x86/tests/sse3.asm EXTRA_DIST += modules/arch/x86/tests/sse3.hex +EXTRA_DIST += modules/arch/x86/tests/sse4.asm +EXTRA_DIST += modules/arch/x86/tests/sse4.c +EXTRA_DIST += modules/arch/x86/tests/sse4.hex EXTRA_DIST += modules/arch/x86/tests/stos.asm EXTRA_DIST += modules/arch/x86/tests/stos.hex EXTRA_DIST += modules/arch/x86/tests/str.asm diff --git a/modules/arch/x86/tests/sse4.asm b/modules/arch/x86/tests/sse4.asm new file mode 100644 index 00000000..a9e8578d --- /dev/null +++ b/modules/arch/x86/tests/sse4.asm @@ -0,0 +1,71 @@ +%MACRO TEST_GENERIC 5 +;global _test_ %+ %1 %+ _ %+ %4 +;global test_ %+ %1 %+ _ %+ %4 +_test_ %+ %1 %+ _ %+ %4: +test_ %+ %1 %+ _ %+ %4: + mov edx, [ esp + 4 ] + mov eax, [ esp + 8 ] + %2 %3, [ edx ] + %2 %5, [ eax ] + %1 %3, %5 + %2 [ edx ], %3 + ret +%ENDMACRO + +TEST_GENERIC pabsb, movq, mm0, mmx, mm1 +TEST_GENERIC pabsw, movq, mm0, mmx, mm1 +TEST_GENERIC pabsd, movq, mm0, mmx, mm1 + +TEST_GENERIC pabsb, movdqu, xmm0, xmm, xmm1 +TEST_GENERIC pabsw, movdqu, xmm0, xmm, xmm1 +TEST_GENERIC pabsd, movdqu, xmm0, xmm, xmm1 + +TEST_GENERIC psignb, movq, mm0, mmx, mm1 +TEST_GENERIC psignw, movq, mm0, mmx, mm1 +TEST_GENERIC psignd, movq, mm0, mmx, mm1 + +TEST_GENERIC psignb, movdqu, xmm0, xmm, xmm1 +TEST_GENERIC psignw, movdqu, xmm0, xmm, xmm1 +TEST_GENERIC psignd, movdqu, xmm0, xmm, xmm1 + +TEST_GENERIC phaddw, movq, mm0, mmx, mm1 +TEST_GENERIC phaddsw, movq, mm0, mmx, mm1 +TEST_GENERIC phaddd, movq, mm0, mmx, mm1 + +TEST_GENERIC phaddw, movdqu, xmm0, xmm, xmm1 +TEST_GENERIC phaddsw, movdqu, xmm0, xmm, xmm1 +TEST_GENERIC phaddd, movdqu, xmm0, xmm, xmm1 + +TEST_GENERIC phsubw, movq, mm0, mmx, mm1 +TEST_GENERIC phsubsw, movq, mm0, mmx, mm1 +TEST_GENERIC phsubd, movq, mm0, mmx, mm1 + +TEST_GENERIC phsubw, movdqu, xmm0, xmm, xmm1 +TEST_GENERIC phsubsw, movdqu, xmm0, xmm, xmm1 +TEST_GENERIC phsubd, movdqu, xmm0, xmm, xmm1 + +TEST_GENERIC pmulhrsw, movq, mm0, mmx, mm1 +TEST_GENERIC pmulhrsw, movdqu, xmm0, xmm, xmm1 + +TEST_GENERIC pmaddubsw, movq, mm0, mmx, mm1 +TEST_GENERIC pmaddubsw, movdqu, xmm0, xmm, xmm1 + +TEST_GENERIC pshufb, movq, mm0, mmx, mm1 +TEST_GENERIC pshufb, movdqu, xmm0, xmm, xmm1 + +%MACRO TEST_ALIGNR 5 +;global _test_ %+ %1 %+ _ %+ %4 +;global test_ %+ %1 %+ _ %+ %4 +_test_ %+ %1 %+ _ %+ %4: +test_ %+ %1 %+ _ %+ %4: + mov edx, [ esp + 4 ] + mov eax, [ esp + 8 ] + %2 %3, [ edx ] + %2 %5, [ eax ] + %1 %3, %5, 3 + %2 [ edx ], %3 + ret +%ENDMACRO + +TEST_ALIGNR palignr, movq, mm0, mmx, mm1 +TEST_ALIGNR palignr, movdqu, xmm0, xmm, xmm1 diff --git a/modules/arch/x86/tests/sse4.c b/modules/arch/x86/tests/sse4.c new file mode 100644 index 00000000..b8803699 --- /dev/null +++ b/modules/arch/x86/tests/sse4.c @@ -0,0 +1,223 @@ +#include +#include +#include + +#define SAT(x) (((x) < -32768) ? -32768 : (((x) > 32767) ? 32767 : (x))) + +static void test_pabsb_c(char *pDst, char *pSrc, int xmm) +{ + int i; + + for ( i = 0; i < (8 << xmm); i++ ) + pDst[ i ] = pSrc[ i ] > 0 ? pSrc[i ] : -pSrc[ i ]; +} + +static void test_pabsw_c(short *pDst, short *pSrc, int xmm) +{ + int i; + + for ( i = 0; i < (4 << xmm); i++ ) + pDst[ i ] = pSrc[ i ] > 0 ? pSrc[i ] : -pSrc[ i ]; +} + +static void test_pabsd_c(int *pDst, int *pSrc, int xmm) +{ + int i; + + for ( i = 0; i < (2 << xmm); i++ ) + pDst[ i ] = pSrc[ i ] > 0 ? pSrc[i ] : -pSrc[ i ]; +} + +static void test_psignb_c(char *pDst, char *pSrc, int xmm) +{ + int i; + + for ( i = 0; i < (8 << xmm); i++ ) + pDst[ i ] = pSrc[i] ? ( pSrc[ i ] >= 0 ? pDst[i ] : -pDst[ i ] ) : 0; +} + +static void test_psignw_c(short *pDst, short *pSrc, int xmm) +{ + int i; + + for ( i = 0; i < (4 << xmm); i++ ) + pDst[ i ] = pSrc[i] ? ( pSrc[ i ] >= 0 ? pDst[i ] : -pDst[ i ] ) : 0; +} + +static void test_psignd_c(int *pDst, int *pSrc, int xmm) +{ + int i; + + for ( i = 0; i < (2 << xmm); i++ ) + pDst[ i ] = pSrc[i] ? ( pSrc[ i ] >= 0 ? pDst[i ] : -pDst[ i ] ) : 0; +} + +static void test_phaddw_c(unsigned short *pDst,unsigned short *pSrc, int xmm) +{ + int i; + + for ( i = 0; i < (2 << xmm); i++ ) + pDst[ i ] = pDst[ i * 2 ] + pDst[ i * 2 + 1 ]; + + for ( i = 0; i < (2 << xmm); i++ ) + pDst[ i + (2 << xmm) ] = pSrc[ i * 2 ] + pSrc[ i * 2 + 1 ]; +} + +static void test_phaddsw_c(short *pDst, short *pSrc, int xmm) +{ + int i; + + for ( i = 0; i < (2 << xmm); i++ ) + pDst[ i ] = SAT( pDst[ i * 2 ] + pDst[ i * 2 + 1 ] ); + + for ( i = 0; i < (2 << xmm); i++ ) + pDst[ i + (2 << xmm) ] = SAT( pSrc[ i * 2 ] + pSrc[ i * 2 + 1 ] ); +} + +static void test_phaddd_c(unsigned int *pDst, unsigned int *pSrc, int xmm) +{ + int i; + + for ( i = 0; i < (1 << xmm); i++ ) + pDst[ i ] = pDst[ i * 2 ] + pDst[ i * 2 + 1 ]; + + for ( i = 0; i < (1 << xmm); i++ ) + pDst[ i + (1 << xmm) ] = pSrc[ i * 2 ] + pSrc[ i * 2 + 1 ]; +} + +static void test_phsubw_c(unsigned short *pDst,unsigned short *pSrc, int xmm) +{ + int i; + + for ( i = 0; i < (2 << xmm); i++ ) + pDst[ i ] = pDst[ i * 2 ] - pDst[ i * 2 + 1 ]; + + for ( i = 0; i < (2 << xmm); i++ ) + pDst[ i + (2 << xmm) ] = pSrc[ i * 2 ] - pSrc[ i * 2 + 1 ]; +} + +static void test_phsubsw_c(short *pDst, short *pSrc, int xmm) +{ + int i; + + for ( i = 0; i < (2 << xmm); i++ ) + pDst[ i ] = SAT( pDst[ i * 2 ] - pDst[ i * 2 + 1 ] ); + + for ( i = 0; i < (2 << xmm); i++ ) + pDst[ i + (2 << xmm) ] = SAT( pSrc[ i * 2 ] - pSrc[ i * 2 + 1 ] ); +} + +static void test_phsubd_c(unsigned int *pDst, unsigned int *pSrc, int xmm) +{ + int i; + + for ( i = 0; i < (1 << xmm); i++ ) + pDst[ i ] = pDst[ i * 2 ] - pDst[ i * 2 + 1 ]; + + for ( i = 0; i < (1 << xmm); i++ ) + pDst[ i + (1 << xmm) ] = pSrc[ i * 2 ] - pSrc[ i * 2 + 1 ]; +} + +static void test_pmulhrsw_c(short *pDst, short *pSrc, int xmm) +{ + int i; + + for ( i = 0; i < (4 << xmm); i++ ) + { + int a = pSrc[ i ] * pDst[ i ]; + pDst[i] = (short)(((a >> 14) + 1) >> 1); + } +} + +static void test_pmaddubsw_c(unsigned char *pDst, signed char *pSrc, int xmm) +{ + int i; + + for ( i = 0; i < (4 << xmm); i++ ) + { + int a = pSrc[ 2 * i ] * pDst[ 2 * i ] + pSrc[ 2 * i + 1 ] * pDst[ 2 * i + 1]; + ((signed short *)pDst)[i] = SAT(a); + } +} + +static void test_pshufb_c(unsigned char *pDst, unsigned char *pSrc, int xmm) +{ + unsigned char bla[16]; + int i; + + memcpy( bla, pDst, ( 8 << xmm ) ); + + for ( i = 0; i < (8 << xmm); i++ ) + pDst[ i ] = (pSrc[ i ] >= 0x80) ? 0 : bla[ pSrc[ i ] & ((1 << (xmm + 3)) - 1) ]; +} + +static void test_palignr_c(unsigned char *pDst, unsigned char *pSrc, int xmm) +{ + int i; + + for ( i = 0; i < 3; i++ ) + pDst[ i + (8 << xmm) - 3 ] = pDst[ i ]; + + for ( i = 3; i < (8 << xmm); i++ ) + pDst[ i - 3 ] = pSrc[ i ]; +} + +static void randomize_args(unsigned char *pDst, unsigned char *pSrc) +{ + int j; + for ( j = 0; j < 16; j++ ) + { + pDst[ j ] = rand() % 256; + pSrc[ j ] = rand() % 256; + } +} + +#define CHECK_FUNCTION(instruction, extension, additionnal, pDst, pSrc) \ + do { \ + unsigned char temp_dst[16]; \ + unsigned char temp_src[16]; \ + randomize_args( pDst, pSrc ); \ + memcpy( temp_dst, pDst, 16 ); \ + memcpy( temp_src, pSrc, 16 ); \ + test_##instruction##_c( pDst, pSrc, additionnal ); \ + test_##instruction##_##extension( temp_dst, temp_src ); \ + assert( !memcmp( pDst, temp_dst, (8 << additionnal) ) ); \ + } while( 0 ) + +#define CHECK_FUNCTIONS(instruction) \ + CHECK_FUNCTION(instruction, mmx, 0, pDst, pSrc); \ + CHECK_FUNCTION(instruction, xmm, 1, pDst, pSrc) + + +void main(int nArgC, char *pArgv[]) +{ + void *pSrc = malloc(16); + void *pDst = malloc(16); + int nIter = atoi( pArgv[ 1 ] ); + int i; + + for ( i = 0; i < nIter; i++ ) + { + CHECK_FUNCTIONS( psignb ); + CHECK_FUNCTIONS( psignw ); + CHECK_FUNCTIONS( psignd ); + + CHECK_FUNCTIONS( pabsb ); + CHECK_FUNCTIONS( pabsw ); + CHECK_FUNCTIONS( pabsd ); + + CHECK_FUNCTIONS( phaddw ); + CHECK_FUNCTIONS( phaddsw ); + CHECK_FUNCTIONS( phaddd ); + + CHECK_FUNCTIONS( phsubw ); + CHECK_FUNCTIONS( phsubsw ); + CHECK_FUNCTIONS( phsubd ); + + CHECK_FUNCTIONS( pmulhrsw ); + CHECK_FUNCTIONS( pmaddubsw ); + + CHECK_FUNCTIONS( pshufb ); + CHECK_FUNCTIONS( palignr ); + } +} \ No newline at end of file diff --git a/modules/arch/x86/tests/sse4.hex b/modules/arch/x86/tests/sse4.hex new file mode 100644 index 00000000..eaa26c65 --- /dev/null +++ b/modules/arch/x86/tests/sse4.hex @@ -0,0 +1,994 @@ +66 +67 +8b +54 +24 +04 +66 +67 +8b +44 +24 +08 +67 +0f +6f +02 +67 +0f +6f +08 +0f +38 +1c +c1 +67 +0f +7f +02 +c3 +66 +67 +8b +54 +24 +04 +66 +67 +8b +44 +24 +08 +67 +0f +6f +02 +67 +0f +6f +08 +0f +38 +1d +c1 +67 +0f +7f +02 +c3 +66 +67 +8b +54 +24 +04 +66 +67 +8b +44 +24 +08 +67 +0f +6f +02 +67 +0f +6f +08 +0f +38 +1e +c1 +67 +0f +7f +02 +c3 +66 +67 +8b +54 +24 +04 +66 +67 +8b +44 +24 +08 +f3 +67 +0f +6f +02 +f3 +67 +0f +6f +08 +66 +0f +38 +1c +c1 +f3 +67 +0f +7f +02 +c3 +66 +67 +8b +54 +24 +04 +66 +67 +8b +44 +24 +08 +f3 +67 +0f +6f +02 +f3 +67 +0f +6f +08 +66 +0f +38 +1d +c1 +f3 +67 +0f +7f +02 +c3 +66 +67 +8b +54 +24 +04 +66 +67 +8b +44 +24 +08 +f3 +67 +0f +6f +02 +f3 +67 +0f +6f +08 +66 +0f +38 +1e +c1 +f3 +67 +0f +7f +02 +c3 +66 +67 +8b +54 +24 +04 +66 +67 +8b +44 +24 +08 +67 +0f +6f +02 +67 +0f +6f +08 +0f +38 +08 +c1 +67 +0f +7f +02 +c3 +66 +67 +8b +54 +24 +04 +66 +67 +8b +44 +24 +08 +67 +0f +6f +02 +67 +0f +6f +08 +0f +38 +09 +c1 +67 +0f +7f +02 +c3 +66 +67 +8b +54 +24 +04 +66 +67 +8b +44 +24 +08 +67 +0f +6f +02 +67 +0f +6f +08 +0f +38 +0a +c1 +67 +0f +7f +02 +c3 +66 +67 +8b +54 +24 +04 +66 +67 +8b +44 +24 +08 +f3 +67 +0f +6f +02 +f3 +67 +0f +6f +08 +66 +0f +38 +08 +c1 +f3 +67 +0f +7f +02 +c3 +66 +67 +8b +54 +24 +04 +66 +67 +8b +44 +24 +08 +f3 +67 +0f +6f +02 +f3 +67 +0f +6f +08 +66 +0f +38 +09 +c1 +f3 +67 +0f +7f +02 +c3 +66 +67 +8b +54 +24 +04 +66 +67 +8b +44 +24 +08 +f3 +67 +0f +6f +02 +f3 +67 +0f +6f +08 +66 +0f +38 +0a +c1 +f3 +67 +0f +7f +02 +c3 +66 +67 +8b +54 +24 +04 +66 +67 +8b +44 +24 +08 +67 +0f +6f +02 +67 +0f +6f +08 +0f +38 +01 +c1 +67 +0f +7f +02 +c3 +66 +67 +8b +54 +24 +04 +66 +67 +8b +44 +24 +08 +67 +0f +6f +02 +67 +0f +6f +08 +0f +38 +03 +c1 +67 +0f +7f +02 +c3 +66 +67 +8b +54 +24 +04 +66 +67 +8b +44 +24 +08 +67 +0f +6f +02 +67 +0f +6f +08 +0f +38 +02 +c1 +67 +0f +7f +02 +c3 +66 +67 +8b +54 +24 +04 +66 +67 +8b +44 +24 +08 +f3 +67 +0f +6f +02 +f3 +67 +0f +6f +08 +66 +0f +38 +01 +c1 +f3 +67 +0f +7f +02 +c3 +66 +67 +8b +54 +24 +04 +66 +67 +8b +44 +24 +08 +f3 +67 +0f +6f +02 +f3 +67 +0f +6f +08 +66 +0f +38 +03 +c1 +f3 +67 +0f +7f +02 +c3 +66 +67 +8b +54 +24 +04 +66 +67 +8b +44 +24 +08 +f3 +67 +0f +6f +02 +f3 +67 +0f +6f +08 +66 +0f +38 +02 +c1 +f3 +67 +0f +7f +02 +c3 +66 +67 +8b +54 +24 +04 +66 +67 +8b +44 +24 +08 +67 +0f +6f +02 +67 +0f +6f +08 +0f +38 +05 +c1 +67 +0f +7f +02 +c3 +66 +67 +8b +54 +24 +04 +66 +67 +8b +44 +24 +08 +67 +0f +6f +02 +67 +0f +6f +08 +0f +38 +07 +c1 +67 +0f +7f +02 +c3 +66 +67 +8b +54 +24 +04 +66 +67 +8b +44 +24 +08 +67 +0f +6f +02 +67 +0f +6f +08 +0f +38 +06 +c1 +67 +0f +7f +02 +c3 +66 +67 +8b +54 +24 +04 +66 +67 +8b +44 +24 +08 +f3 +67 +0f +6f +02 +f3 +67 +0f +6f +08 +66 +0f +38 +05 +c1 +f3 +67 +0f +7f +02 +c3 +66 +67 +8b +54 +24 +04 +66 +67 +8b +44 +24 +08 +f3 +67 +0f +6f +02 +f3 +67 +0f +6f +08 +66 +0f +38 +07 +c1 +f3 +67 +0f +7f +02 +c3 +66 +67 +8b +54 +24 +04 +66 +67 +8b +44 +24 +08 +f3 +67 +0f +6f +02 +f3 +67 +0f +6f +08 +66 +0f +38 +06 +c1 +f3 +67 +0f +7f +02 +c3 +66 +67 +8b +54 +24 +04 +66 +67 +8b +44 +24 +08 +67 +0f +6f +02 +67 +0f +6f +08 +0f +38 +0b +c1 +67 +0f +7f +02 +c3 +66 +67 +8b +54 +24 +04 +66 +67 +8b +44 +24 +08 +f3 +67 +0f +6f +02 +f3 +67 +0f +6f +08 +66 +0f +38 +0b +c1 +f3 +67 +0f +7f +02 +c3 +66 +67 +8b +54 +24 +04 +66 +67 +8b +44 +24 +08 +67 +0f +6f +02 +67 +0f +6f +08 +0f +38 +04 +c1 +67 +0f +7f +02 +c3 +66 +67 +8b +54 +24 +04 +66 +67 +8b +44 +24 +08 +f3 +67 +0f +6f +02 +f3 +67 +0f +6f +08 +66 +0f +38 +04 +c1 +f3 +67 +0f +7f +02 +c3 +66 +67 +8b +54 +24 +04 +66 +67 +8b +44 +24 +08 +67 +0f +6f +02 +67 +0f +6f +08 +0f +38 +00 +c1 +67 +0f +7f +02 +c3 +66 +67 +8b +54 +24 +04 +66 +67 +8b +44 +24 +08 +f3 +67 +0f +6f +02 +f3 +67 +0f +6f +08 +66 +0f +38 +00 +c1 +f3 +67 +0f +7f +02 +c3 +66 +67 +8b +54 +24 +04 +66 +67 +8b +44 +24 +08 +67 +0f +6f +02 +67 +0f +6f +08 +0f +3a +0f +c1 +03 +67 +0f +7f +02 +c3 +66 +67 +8b +54 +24 +04 +66 +67 +8b +44 +24 +08 +f3 +67 +0f +6f +02 +f3 +67 +0f +6f +08 +66 +0f +3a +0f +c1 +03 +f3 +67 +0f +7f +02 +c3 diff --git a/modules/arch/x86/x86arch.h b/modules/arch/x86/x86arch.h index 46267b0c..328e270a 100644 --- a/modules/arch/x86/x86arch.h +++ b/modules/arch/x86/x86arch.h @@ -58,6 +58,7 @@ #define CPU_SVM (1UL<<25) /* Secure Virtual Machine instruction */ #define CPU_PadLock (1UL<<25) /* VIA PadLock instruction */ #define CPU_EM64T (1UL<<26) /* Intel EM64T or better */ +#define CPU_SSE4 (1UL<<27) /* Streaming SIMD extensions 4 required */ /* Technically not CPU capabilities, they do affect what instructions are * available. These are tested against BITS==64. diff --git a/modules/arch/x86/x86id.c b/modules/arch/x86/x86id.c index d1ffac0e..a0275a2c 100644 --- a/modules/arch/x86/x86id.c +++ b/modules/arch/x86/x86id.c @@ -1948,6 +1948,25 @@ static const x86_insn_info lddqu_insn[] = { {OPT_SIMDReg|OPS_128|OPA_Spare, OPT_Mem|OPS_Any|OPA_EA, 0} } }; +/* SSE4 instructions */ +static const x86_insn_info sse4_insn[] = { + { CPU_SSE4, MOD_Op2Add, 0, 0, 0, 3, {0x0F, 0x38, 0x00}, 0, 2, + {OPT_SIMDReg|OPS_64|OPA_Spare, OPT_SIMDRM|OPS_64|OPS_Relaxed|OPA_EA, 0} + }, + { CPU_SSE4, MOD_Op2Add, 0, 0, 0x66, 3, {0x0F, 0x38, 0x00}, 0, 2, + {OPT_SIMDReg|OPS_128|OPA_Spare, OPT_SIMDRM|OPS_128|OPS_Relaxed|OPA_EA, 0} + } +}; + +static const x86_insn_info palignr_insn[] = { + { CPU_SSE4, MOD_Op2Add, 0, 0, 0, 3, {0x0F, 0x3A, 0x00}, 0, 3, + {OPT_SIMDReg|OPS_64|OPA_Spare, OPT_SIMDRM|OPS_64|OPS_Relaxed|OPA_EA, OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm} + }, + { CPU_SSE4, MOD_Op2Add, 0, 0, 0x66, 3, {0x0F, 0x3A, 0x00}, 0, 3, + {OPT_SIMDReg|OPS_128|OPA_Spare, OPT_SIMDRM|OPS_128|OPS_Relaxed|OPA_EA, OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm} + } +}; + /* AMD 3DNow! instructions */ static const x86_insn_info now3d_insn[] = { { CPU_3DNow, MOD_Imm8, 0, 0, 0, 2, {0x0F, 0x0F, 0}, 0, 2, diff --git a/modules/arch/x86/x86parse.gap b/modules/arch/x86/x86parse.gap index ce4e4f97..4741ec69 100644 --- a/modules/arch/x86/x86parse.gap +++ b/modules/arch/x86/x86parse.gap @@ -716,6 +716,23 @@ INSN - movddup NONE cvt_xmm_xmm64_ss 0xF212 CPU_SSE3 INSN - movshdup NONE ssess 0xF316 CPU_SSE3 INSN - movsldup NONE ssess 0xF312 CPU_SSE3 INSN - mwait NONE threebyte 0x0F01C9 CPU_SSE3 +# SSE4 / TNI Tejas New Intructions instructions +INSN - pshufb NONE sse4 0x00 CPU_SSE4 +INSN - phaddw NONE sse4 0x01 CPU_SSE4 +INSN - phaddd NONE sse4 0x02 CPU_SSE4 +INSN - phaddsw NONE sse4 0x03 CPU_SSE4 +INSN - pmaddubsw NONE sse4 0x04 CPU_SSE4 +INSN - phsubw NONE sse4 0x05 CPU_SSE4 +INSN - phsubd NONE sse4 0x06 CPU_SSE4 +INSN - phsubsw NONE sse4 0x07 CPU_SSE4 +INSN - psignb NONE sse4 0x08 CPU_SSE4 +INSN - psignw NONE sse4 0x09 CPU_SSE4 +INSN - psignd NONE sse4 0x0A CPU_SSE4 +INSN - pmulhrsw NONE sse4 0x0B CPU_SSE4 +INSN - pabsb NONE sse4 0x1C CPU_SSE4 +INSN - pabsw NONE sse4 0x1D CPU_SSE4 +INSN - pabsd NONE sse4 0x1E CPU_SSE4 +INSN - palignr NONE palignr 0x0F CPU_SSE4 # AMD 3DNow! instructions INSN - prefetch NONE twobytemem 0x000F0D CPU_3DNow INSN - prefetchw NONE twobytemem 0x010F0D CPU_3DNow @@ -888,6 +905,10 @@ CPU prescott CPU_186|CPU_286|CPU_386|CPU_486|CPU_586|CPU_686|\ CPU_K6|CPU_Athlon|CPU_Hammer|CPU_EM64T|CPU_FPU|CPU_MMX|\ CPU_SSE|CPU_SSE2|CPU_SSE3|CPU_3DNow|CPU_SMM|\ CPU_Prot|CPU_Priv +CPU conroe CPU_186|CPU_286|CPU_386|CPU_486|CPU_586|CPU_686|\ + CPU_EM64T|CPU_FPU|CPU_MMX|\ + CPU_SSE|CPU_SSE2|CPU_SSE3|CPU_SSE4|CPU_SMM|\ + CPU_Prot|CPU_Priv # Features have "no" versions to disable them, and only set/reset the # specific feature being changed. All other bits are left alone. @@ -909,6 +930,8 @@ CPU_FEATURE priv CPU_Priv CPU_FEATURE svm CPU_SVM CPU_FEATURE padlock CPU_PadLock CPU_FEATURE em64t CPU_EM64T +CPU_FEATURE sse4 CPU_SSE4 +CPU_FEATURE ssse3 CPU_SSE4 # TARGETMOD parameters: