From 4139febfe8acad10fb759b9d5a8992ed8cad6234 Mon Sep 17 00:00:00 2001 From: Sam Hocevar Date: Thu, 2 Mar 2006 20:48:08 +0000 Subject: [PATCH] * common/i386/*.asm: don't use the "GLOBAL" reserved word, some versions NASM complain about it. Replaced it with "GOT_ebx". git-svn-id: svn://svn.videolan.org/x264/trunk@451 df754926-b1dd-0310-bc7b-ec298dee348c --- common/i386/dct-a.asm | 24 ++++++++++++------------ common/i386/deblock-a.asm | 20 ++++++++++---------- common/i386/i386inc.asm | 23 ++++++++++++++++------- common/i386/mc-a.asm | 12 ++++++------ common/i386/mc-a2.asm | 16 ++++++++-------- common/i386/pixel-sse2.asm | 2 +- common/i386/predict-a.asm | 12 ++++++------ common/i386/quant-a.asm | 2 +- 8 files changed, 60 insertions(+), 51 deletions(-) diff --git a/common/i386/dct-a.asm b/common/i386/dct-a.asm index 0dc3edc6..801ba764 100644 --- a/common/i386/dct-a.asm +++ b/common/i386/dct-a.asm @@ -169,7 +169,7 @@ x264_dct4x4dc_mmxext: MMX_TRANSPOSE mm0, mm2, mm3, mm4, mm1 ; in: mm0, mm2, mm3, mm4 out: mm0, mm4, mm1, mm3 - movq mm6, [x264_mmx_1 GLOBAL] + movq mm6, [x264_mmx_1 GOT_ebx] paddw mm0, mm6 paddw mm4, mm6 psraw mm0, 1 @@ -300,7 +300,7 @@ x264_add4x4_idct_mmxext: MMX_SUMSUB_BADC mm2, mm3, mm4, mm1 ; mm2=s02+s13 mm3=s02-s13 mm4=d02+d13 mm1=d02-d13 MMX_ZERO mm7 - movq mm6, [x264_mmx_32 GLOBAL] + movq mm6, [x264_mmx_32 GOT_ebx] MMX_STORE_DIFF_4P mm2, mm0, mm6, mm7, [eax] MMX_STORE_DIFF_4P mm4, mm0, mm6, mm7, [eax+ecx] @@ -397,10 +397,10 @@ x264_xdct8_mmxext: picpush ebx picgetgot ebx - movq mm5, [x264_mmx_PPNN GLOBAL] - movq mm6, [x264_mmx_PNNP GLOBAL] - movq mm4, [x264_mmx_PPPN GLOBAL] - movq mm7, [x264_mmx_PPNP GLOBAL] + movq mm5, [x264_mmx_PPNN GOT_ebx] + movq mm6, [x264_mmx_PNNP GOT_ebx] + movq mm4, [x264_mmx_PPPN GOT_ebx] + movq mm7, [x264_mmx_PPNP GOT_ebx] ;------------------------------------------------------------------------- ; horizontal dct ( compute 1 row at a time -> 8 loops ) @@ -434,7 +434,7 @@ x264_xdct8_mmxext: pshufw mm2, mm0, 11001001b ; (low)a1/a3/a0/a2(high) pshufw mm0, mm0, 10011100b ; (low)a0/a2/a1/a3(high) - pmullw mm2, [x264_mmx_2121 GLOBAL] + pmullw mm2, [x264_mmx_2121 GOT_ebx] pmullw mm0, mm5 ; (low)a0/a2/-a1/-a3(high) psraw mm2, 1 ; (low)a1/a3>>1/a0/a2>>1(high) paddw mm0, mm2 ; (low)dst0/dst2/dst4/dst6(high) @@ -554,10 +554,10 @@ x264_xidct8_mmxext: picpush ebx picgetgot ebx - movq mm4, [x264_mmx_PPNN GLOBAL] - movq mm5, [x264_mmx_PNPN GLOBAL] - movq mm6, [x264_mmx_PPNP GLOBAL] - movq mm7, [x264_mmx_PPPN GLOBAL] + movq mm4, [x264_mmx_PPNN GOT_ebx] + movq mm5, [x264_mmx_PNPN GOT_ebx] + movq mm6, [x264_mmx_PPNP GOT_ebx] + movq mm7, [x264_mmx_PPPN GOT_ebx] ;------------------------------------------------------------------------- ; horizontal idct ( compute 1 row at a time -> 8 loops ) @@ -573,7 +573,7 @@ x264_xidct8_mmxext: punpckhwd mm1, mm2 ; (low)d1,d5,d3,d7(high) pshufw mm2, mm0, 10110001b ; (low)d4,d0,d6,d2(high) - pmullw mm0, [x264_mmx_p2n2p1p1 GLOBAL]; (low)2*d0,-2*d4,d2,d6(high) + pmullw mm0, [x264_mmx_p2n2p1p1 GOT_ebx]; (low)2*d0,-2*d4,d2,d6(high) pmullw mm2, mm6 ; (low)d4,d0,-d6,d2(high) psraw mm0, 1 ; (low)d0,-d4,d2>>1,d6>>1(high) paddw mm0, mm2 ; (low)e0,e2,e4,e6(high) diff --git a/common/i386/deblock-a.asm b/common/i386/deblock-a.asm index dc3fc2fd..7904c5c0 100644 --- a/common/i386/deblock-a.asm +++ b/common/i386/deblock-a.asm @@ -185,19 +185,19 @@ cglobal x264_deblock_h_chroma_intra_mmxext pxor mm4, mm2 ; b = p0^(q1>>2) psrlw mm3, 2 - pand mm3, [pb_3f GLOBAL] + pand mm3, [pb_3f GOT_ebx] movq mm5, mm1 pxor mm5, mm3 ; c = q0^(p1>>2) psrlw mm0, 2 - pand mm0, [pb_3f GLOBAL] + pand mm0, [pb_3f GOT_ebx] movq mm6, mm2 pxor mm6, mm0 ; d = (c^b) & ~(b^a) & 1 pxor mm6, mm5 pxor mm5, mm4 pandn mm5, mm6 - pand mm5, [pb_01 GLOBAL] + pand mm5, [pb_01 GOT_ebx] ; delta = (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3 ; = (avg(q0, p1>>2) + (d&a)) ; - (avg(p0, q1>>2) + (d^(d&a))) @@ -227,10 +227,10 @@ cglobal x264_deblock_h_chroma_intra_mmxext %macro LUMA_Q1_MMX 6 movq %6, mm1 pavgb %6, mm2 - pavgb %2, %6 ; avg(p2,avg(p0,q0)) + pavgb %2, %6 ; avg(p2,avg(p0,q0)) pxor %6, %3 - pand %6, [pb_01 GLOBAL] ; (p2^avg(p0,q0))&1 - psubusb %2, %6 ; (p2+((p0+q0+1)>>1))>>1 + pand %6, [pb_01 GOT_ebx] ; (p2^avg(p0,q0))&1 + psubusb %2, %6 ; (p2+((p0+q0+1)>>1))>>1 movq %6, %1 psubusb %6, %5 paddusb %5, %1 @@ -274,7 +274,7 @@ x264_deblock_v8_luma_mmxext: punpcklbw mm4, mm4 punpcklbw mm4, mm4 ; tc = 4x tc0[1], 4x tc0[0] movq [esp+8], mm4 ; tc - pcmpgtb mm4, [pb_ff GLOBAL] + pcmpgtb mm4, [pb_ff GOT_ebx] pand mm4, mm7 movq [esp+0], mm4 ; mask @@ -284,7 +284,7 @@ x264_deblock_v8_luma_mmxext: pcmpeqb mm6, mm4 pand mm6, mm4 pand mm4, [esp+8] ; tc - movq mm7, [pb_01 GLOBAL] + movq mm7, [pb_01 GOT_ebx] pand mm7, mm6 pand mm6, mm4 paddb mm7, mm4 @@ -298,7 +298,7 @@ x264_deblock_v8_luma_mmxext: pand mm6, mm5 movq mm5, [esp+8] ; tc pand mm5, mm6 - pand mm6, [pb_01 GLOBAL] + pand mm6, [pb_01 GOT_ebx] paddb mm7, mm6 movq mm3, [edi+esi] LUMA_Q1_MMX mm3, mm4, [edi+2*esi], [edi+esi], mm5, mm6 @@ -476,7 +476,7 @@ x264_deblock_h_chroma_mmxext: %macro CHROMA_INTRA_P0 3 movq mm4, %1 pxor mm4, %3 - pand mm4, [pb_01 GLOBAL] ; mm4 = (p0^q1)&1 + pand mm4, [pb_01 GOT_ebx] ; mm4 = (p0^q1)&1 pavgb %1, %3 psubusb %1, mm4 pavgb %1, %2 ; dst = avg(p1, avg(p0,q1) - ((p0^q1)&1)) diff --git a/common/i386/i386inc.asm b/common/i386/i386inc.asm index 190d6352..888fdab5 100644 --- a/common/i386/i386inc.asm +++ b/common/i386/i386inc.asm @@ -54,13 +54,14 @@ BITS 32 ; and let you load non-shared .so objects (Linux, Win32...). However, OS X ; requires PIC code in its .dylib objects. ; -; - GLOBAL should be used as a suffix for global addressing, eg. -; mov eax, [foo GLOBAL] +; - GOT_* should be used as a suffix for global addressing, eg. +; picgetgot ebx +; mov eax, [foo GOT_ebx] ; instead of ; mov eax, [foo] ; ; - picgetgot computes the GOT address into the given register in PIC -; mode, otherwise does nothing. You need to do this before using GLOBAL. +; mode, otherwise does nothing. You need to do this before using GOT_*. ; ; - picpush and picpop respectively push and pop the given register ; in PIC mode, otherwise do nothing. You should always use them around @@ -81,7 +82,10 @@ BITS 32 %ifidn __OUTPUT_FORMAT__,macho ; There is no real global offset table on OS X, but we still ; need to reference our variables by offset. - %define GLOBAL + ebx + %define GOT_eax + eax + %define GOT_ebx + ebx + %define GOT_ecx + ecx + %define GOT_edx + edx %macro picgetgot 1 call %%getgot %%getgot: @@ -95,8 +99,10 @@ BITS 32 %define GOT __GLOBAL_OFFSET_TABLE_ %endif extern GOT - ; FIXME: find an elegant way to use registers other than ebx - %define GLOBAL + ebx wrt ..gotoff + %define GOT_eax + eax wrt ..gotoff + %define GOT_ebx + ebx wrt ..gotoff + %define GOT_ecx + ecx wrt ..gotoff + %define GOT_edx + edx wrt ..gotoff %macro picgetgot 1 call %%getgot %%getgot: @@ -112,7 +118,10 @@ BITS 32 %endmacro %define picesp esp+4 %else - %define GLOBAL + %define GOT_eax + %define GOT_ebx + %define GOT_ecx + %define GOT_edx %macro picgetgot 1 %endmacro %macro picpush 1 diff --git a/common/i386/mc-a.asm b/common/i386/mc-a.asm index 72d7a2f6..9f58bf86 100644 --- a/common/i386/mc-a.asm +++ b/common/i386/mc-a.asm @@ -268,10 +268,10 @@ ALIGN 4 mov edx, [picesp+20] ; src mov ecx, [picesp+24] ; i_src - pshufw mm4, [picesp+28], 0 ; weight_dst - movq mm5, [pw_64 GLOBAL] - psubw mm5, mm4 ; weight_src - movq mm6, [pw_32 GLOBAL] ; rounding + pshufw mm4, [picesp+28], 0 ; weight_dst + movq mm5, [pw_64 GOT_ebx] + psubw mm5, mm4 ; weight_src + movq mm6, [pw_32 GOT_ebx] ; rounding pxor mm7, mm7 %endmacro %macro BIWEIGHT_END_MMX 0 @@ -516,7 +516,7 @@ x264_mc_chroma_mmxext: pshufw mm5, [picesp+20], 0 ; mm5 = dx pshufw mm6, [picesp+24], 0 ; mm6 = dy - movq mm4, [pw_8 GLOBAL] + movq mm4, [pw_8 GOT_ebx] movq mm0, mm4 psubw mm4, mm5 ; mm4 = 8-dx @@ -552,7 +552,7 @@ ALIGN 4 punpcklbw mm2, mm3 punpcklbw mm1, mm3 - paddw mm0, [pw_32 GLOBAL] + paddw mm0, [pw_32 GOT_ebx] pmullw mm2, mm5 ; line * cB pmullw mm1, mm7 ; line * cD diff --git a/common/i386/mc-a2.asm b/common/i386/mc-a2.asm index a688f719..49ba566a 100644 --- a/common/i386/mc-a2.asm +++ b/common/i386/mc-a2.asm @@ -180,7 +180,7 @@ loopcy: mov edi, [picesp + tdst1] lea ebp, [picesp + tbuffer] mov esi, [picesp + tsrc] - movq mm7, [mmx_dw_one GLOBAL] + movq mm7, [mmx_dw_one GOT_ebx] picpop ebx @@ -249,15 +249,15 @@ loopcx2: paddw mm3, mm4 paddw mm1, mm6 - movq mm5, [mmx_dw_20 GLOBAL] - movq mm4, [mmx_dw_5 GLOBAL] + movq mm5, [mmx_dw_20 GOT_ebx] + movq mm4, [mmx_dw_5 GOT_ebx] movq mm6, mm1 pxor mm7, mm7 punpckhwd mm5, mm2 punpcklwd mm4, mm3 - punpcklwd mm2, [mmx_dw_20 GLOBAL] - punpckhwd mm3, [mmx_dw_5 GLOBAL] + punpcklwd mm2, [mmx_dw_20 GOT_ebx] + punpckhwd mm3, [mmx_dw_5 GOT_ebx] pcmpgtw mm7, mm1 @@ -270,8 +270,8 @@ loopcx2: paddd mm2, mm1 paddd mm3, mm6 - paddd mm2, [mmx_dd_one GLOBAL] - paddd mm3, [mmx_dd_one GLOBAL] + paddd mm2, [mmx_dd_one GOT_ebx] + paddd mm3, [mmx_dd_one GOT_ebx] psrad mm2, 10 psrad mm3, 10 @@ -323,7 +323,7 @@ x264_horizontal_filter_mmxext : pxor mm0, mm0 picpush ebx picgetgot ebx - movq mm7, [mmx_dw_one GLOBAL] + movq mm7, [mmx_dw_one GOT_ebx] picpop ebx mov ecx, [esp + 32] ; height diff --git a/common/i386/pixel-sse2.asm b/common/i386/pixel-sse2.asm index a7a37087..e89c3895 100644 --- a/common/i386/pixel-sse2.asm +++ b/common/i386/pixel-sse2.asm @@ -404,7 +404,7 @@ x264_pixel_ssd_16x8_sse2: movdqa %2, %1 psrldq %1, 2 paddusw %1, %2 - pand %1, [pd_0000ffff GLOBAL] + pand %1, [pd_0000ffff GOT_ebx] movdqa %2, %1 psrldq %1, 4 paddd %1, %2 diff --git a/common/i386/predict-a.asm b/common/i386/predict-a.asm index d3539251..a834fb13 100644 --- a/common/i386/predict-a.asm +++ b/common/i386/predict-a.asm @@ -85,7 +85,7 @@ cglobal predict_16x16_dc_top_mmxext pavgb mm1, mm2 pxor mm2, mm3 movq %1 , %2 - pand mm2, [pb_1 GLOBAL] + pand mm2, [pb_1 GOT_ebx] psubusb mm1, mm2 pavgb %1 , mm1 ; %1 = (t[n-1] + t[n]*2 + t[n+1] + 2) >> 2 %endmacro @@ -157,7 +157,7 @@ predict_8x8_dc_core_mmxext: pxor mm1, mm1 psadbw mm0, mm1 psadbw mm4, mm1 - paddw mm0, [pw_8 GLOBAL] + paddw mm0, [pw_8 GOT_ebx] paddw mm0, mm4 psrlw mm0, 4 pshufw mm0, mm0, 0 @@ -212,7 +212,7 @@ predict_8x8c_dc_core_mmxext: paddw mm0, [picesp + 8] pshufw mm2, [picesp + 12], 0 psrlw mm0, 3 - paddw mm1, [pw_2 GLOBAL] + paddw mm1, [pw_2 GOT_ebx] movq mm3, mm2 pshufw mm1, mm1, 0 pshufw mm0, mm0, 0 ; dc0 (w) @@ -246,7 +246,7 @@ predict_8x8c_p_core_mmx: pshufw mm2, [picesp +12], 0 pshufw mm4, [picesp +16], 0 movq mm1, mm2 - pmullw mm2, [pw_3210 GLOBAL] + pmullw mm2, [pw_3210 GOT_ebx] psllw mm1, 2 paddsw mm0, mm2 ; mm0 = {i+0*b, i+1*b, i+2*b, i+3*b} paddsw mm1, mm0 ; mm1 = {i+4*b, i+5*b, i+6*b, i+7*b} @@ -293,7 +293,7 @@ predict_16x16_p_core_mmx: pshufw mm4, [picesp +16], 0 movq mm5, mm2 movq mm1, mm2 - pmullw mm5, [pw_3210 GLOBAL] + pmullw mm5, [pw_3210 GOT_ebx] psllw mm2, 3 psllw mm1, 2 movq mm3, mm2 @@ -421,7 +421,7 @@ ALIGN 16 predict_16x16_dc_top_mmxext: picpush ebx picgetgot ebx - PRED16x16_DC [pw_8 GLOBAL], 4, picesp + PRED16x16_DC [pw_8 GOT_ebx], 4, picesp picpop ebx ret diff --git a/common/i386/quant-a.asm b/common/i386/quant-a.asm index 7b68626d..6435c2f5 100644 --- a/common/i386/quant-a.asm +++ b/common/i386/quant-a.asm @@ -480,7 +480,7 @@ ALIGN 16 neg eax picpush ebx picgetgot ebx - movq mm6, [pd_1 GLOBAL] + movq mm6, [pd_1 GOT_ebx] picpop ebx movd mm5, eax pxor mm7, mm7 -- 2.40.0