MMX_SUMSUB_BADC mm2, mm3, mm0, mm4 ; mm2=s01 mm3=d01 mm0=s23 mm4=d23
MMX_SUMSUB_BADC mm0, mm2, mm4, mm3 ; mm0=s01+s23 mm2=s01-s23 mm4=d01+d23 mm3=d01-d23
- movq mm6, [pw_1 GOT_edx]
+ movq mm6, [pw_1 GLOBAL]
paddw mm0, mm6
paddw mm2, mm6
psraw mm0, 1
MMX_SUMSUB_BADC mm2, mm3, mm4, mm1 ; mm2=s02+s13 mm3=s02-s13 mm4=d02+d13 mm1=d02-d13
MMX_ZERO mm7
- movq mm6, [pw_32 GOT_edx]
+ movq mm6, [pw_32 GLOBAL]
MMX_STORE_DIFF_4P mm2, mm0, mm6, mm7, [eax+0*FDEC_STRIDE]
MMX_STORE_DIFF_4P mm4, mm0, mm6, mm7, [eax+1*FDEC_STRIDE]
IDCT8_1D xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
TRANSPOSE8 xmm4, xmm1, xmm7, xmm3, xmm5, xmm0, xmm2, xmm6, eax
picgetgot edx
- paddw xmm4, [pw_32 GOT_edx]
+ paddw xmm4, [pw_32 GLOBAL]
movdqa [eax+0x00], xmm4
movdqa [eax+0x40], xmm2
IDCT8_1D xmm4, xmm0, xmm6, xmm3, xmm2, xmm5, xmm7, xmm1
%macro DEBLOCK_P0_Q0_MMX 0
movq mm5, mm1
pxor mm5, mm2 ; p0^q0
- pand mm5, [pb_01 GOT_ebx] ; (p0^q0)&1
+ pand mm5, [pb_01 GLOBAL] ; (p0^q0)&1
pcmpeqb mm4, mm4
pxor mm3, mm4
pavgb mm3, mm0 ; (p1 - q1 + 256)>>1
- pavgb mm3, [pb_03 GOT_ebx] ; (((p1 - q1 + 256)>>1)+4)>>1 = 64+2+(p1-q1)>>2
+ pavgb mm3, [pb_03 GLOBAL] ; (((p1 - q1 + 256)>>1)+4)>>1 = 64+2+(p1-q1)>>2
pxor mm4, mm1
pavgb mm4, mm2 ; (q0 - p0 + 256)>>1
pavgb mm3, mm5
paddusb mm3, mm4 ; d+128+33
- movq mm6, [pb_a1 GOT_ebx]
+ movq mm6, [pb_a1 GLOBAL]
psubusb mm6, mm3
- psubusb mm3, [pb_a1 GOT_ebx]
+ psubusb mm3, [pb_a1 GLOBAL]
pminub mm6, mm7
pminub mm3, mm7
psubusb mm1, mm6
pavgb %6, mm2
pavgb %2, %6 ; avg(p2,avg(p0,q0))
pxor %6, %3
- pand %6, [pb_01 GOT_ebx] ; (p2^avg(p0,q0))&1
+ pand %6, [pb_01 GLOBAL] ; (p2^avg(p0,q0))&1
psubusb %2, %6 ; (p2+((p0+q0+1)>>1))>>1
movq %6, %1
psubusb %6, %5
movq mm3, [edi+esi]
LUMA_Q1_MMX mm3, mm4, [edi+2*esi], [edi+esi], mm5, mm6
- DEBLOCK_P0_Q0_MMX ; XXX: make sure ebx has the GOT in PIC mode
+ DEBLOCK_P0_Q0_MMX
movq [eax+2*esi], mm1
movq [edi], mm2
punpcklbw mm6, mm6
pand mm7, mm6
picgetgot ebx ; no need to push ebx, it's already been done
- DEBLOCK_P0_Q0_MMX ; XXX: make sure ebx has the GOT in PIC mode
+ DEBLOCK_P0_Q0_MMX
movq [eax+esi], mm1
movq [edi], mm2
punpcklbw mm6, mm6
pand mm7, mm6
picgetgot ebx ; no need to push ebx, it's already been done
- DEBLOCK_P0_Q0_MMX ; XXX: make sure ebx has the GOT in PIC mode
+ DEBLOCK_P0_Q0_MMX
movq mm0, [esp+8]
movq mm3, [esp+0]
%macro CHROMA_INTRA_P0 3
movq mm4, %1
pxor mm4, %3
- pand mm4, [pb_01 GOT_ebx] ; mm4 = (p0^q1)&1
+ pand mm4, [pb_01 GLOBAL] ; mm4 = (p0^q1)&1
pavgb %1, %3
psubusb %1, mm4
pavgb %1, %2 ; dst = avg(p1, avg(p0,q1) - ((p0^q1)&1))
movq mm1, [eax+esi]
movq mm2, [edi]
movq mm3, [edi+esi]
- CHROMA_INTRA_BODY ; XXX: make sure ebx has the GOT in PIC mode
+ CHROMA_INTRA_BODY
movq [eax+esi], mm1
movq [edi], mm2
picpop ebx
picpush ebx
picgetgot ebx
TRANSPOSE4x8_LOAD PASS8ROWS(eax, edi, esi, ebp)
- CHROMA_INTRA_BODY ; XXX: make sure ebx has the GOT in PIC mode
+ CHROMA_INTRA_BODY
TRANSPOSE8x4_STORE PASS8ROWS(eax, edi, esi, ebp)
picpop ebx
pop ebp ; needed because of CHROMA_H_START
; and let you load non-shared .so objects (Linux, Win32...). However, OS X
; requires PIC code in its .dylib objects.
;
-; - GOT_* should be used as a suffix for global addressing, eg.
+; - GLOBAL should be used as a suffix for global addressing, eg.
; picgetgot ebx
-; mov eax, [foo GOT_ebx]
+; mov eax, [foo GLOBAL]
; instead of
; mov eax, [foo]
;
; - picgetgot computes the GOT address into the given register in PIC
-; mode, otherwise does nothing. You need to do this before using GOT_*.
+; mode, otherwise does nothing. You need to do this before using GLOBAL.
+; Before in both execution order and compiled code order (so GLOBAL knows
+; which register the GOT is in).
;
; - picpush and picpop respectively push and pop the given register
; in PIC mode, otherwise do nothing. You should always use them around
%ifidn __OUTPUT_FORMAT__,macho
; There is no real global offset table on OS X, but we still
; need to reference our variables by offset.
- %define GOT_eax - fakegot + eax
- %define GOT_ebx - fakegot + ebx
- %define GOT_ecx - fakegot + ecx
- %define GOT_edx - fakegot + edx
+ %define GOT_reg(x) - fakegot + x
%macro picgetgot 1
call %%getgot
%%getgot:
pop %1
add %1, $$ - %%getgot
+ %undef GLOBAL
+ %define GLOBAL GOT_reg(%1)
%endmacro
%else
%ifidn __OUTPUT_FORMAT__,elf
%define GOT __GLOBAL_OFFSET_TABLE_
%endif
extern GOT
- %define GOT_eax + eax wrt ..gotoff
- %define GOT_ebx + ebx wrt ..gotoff
- %define GOT_ecx + ecx wrt ..gotoff
- %define GOT_edx + edx wrt ..gotoff
+ %define GOT_reg(x) + x wrt ..gotoff
%macro picgetgot 1
call %%getgot
%%getgot:
pop %1
add %1, GOT + $$ - %%getgot wrt ..gotpc
+ %undef GLOBAL
+ %define GLOBAL GOT_reg(%1)
%endmacro
%endif
%macro picpush 1
%endmacro
%define picesp esp+4
%else
- %define GOT_eax
- %define GOT_ebx
- %define GOT_ecx
- %define GOT_edx
+ %define GLOBAL
%macro picgetgot 1
%endmacro
%macro picpush 1
push edi
push esi
picgetgot ecx
- movq mm5, [pw_64 GOT_ecx]
- movq mm6, [pw_32 GOT_ecx] ; rounding
+ movq mm5, [pw_64 GLOBAL]
+ movq mm6, [pw_32 GLOBAL] ; rounding
mov edi, [esp+12] ; dst
mov esi, [esp+16] ; i_dst
mov edx, [esp+20] ; src
pshufw mm5, mm5, 0 ; mm5 = dx&7
pshufw mm6, mm6, 0 ; mm6 = dy&7
- movq mm4, [pw_8 GOT_ebx]
+ movq mm4, [pw_8 GLOBAL]
movq mm0, mm4
psubw mm4, mm5 ; mm4 = 8-dx
punpcklbw mm2, mm3
punpcklbw mm1, mm3
- paddw mm0, [pw_32 GOT_ebx]
+ paddw mm0, [pw_32 GLOBAL]
pmullw mm2, mm5 ; line * cB
pmullw mm1, mm7 ; line * cD
pxor mm0, mm0
; mov globals onto the stack, to free up ebx
- movq mm1, [pw_1 GOT_ebx]
- movq mm2, [pw_16 GOT_ebx]
- movq mm3, [pw_32 GOT_ebx]
+ movq mm1, [pw_1 GLOBAL]
+ movq mm2, [pw_16 GLOBAL]
+ movq mm3, [pw_32 GLOBAL]
movq [tpw_1], mm1
movq [tpw_16], mm2
movq [tpw_32], mm3
%macro HADDW 2 ; sum junk
; ebx is no longer used at this point, so no push needed
picgetgot ebx
- pmaddwd %1, [pw_1 GOT_ebx]
+ pmaddwd %1, [pw_1 GLOBAL]
movhlps %2, %1
paddd %1, %2
pshuflw %2, %1, 0xE
shl eax, 4
%endif
picgetgot ebx
- lea edi, [sad_w16_align1_%1 + (sad_w16_align1_%1 - sad_w16_align2_%1) + eax GOT_ebx]
+ lea edi, [sad_w16_align1_%1 + (sad_w16_align1_%1 - sad_w16_align2_%1) + eax GLOBAL]
mov eax, [esp+16]
mov ebx, [esp+20]
mov ecx, [esp+24]
; PHADDD xmm3, xmm4
mov eax, [esp+24]
picgetgot ebx
- movdqa xmm7, [pw_1 GOT_ebx]
+ movdqa xmm7, [pw_1 GLOBAL]
pshufd xmm5, xmm3, 0xB1
pmaddwd xmm1, xmm7
pmaddwd xmm2, xmm7
paddd xmm1, xmm2
paddd xmm2, xmm3
paddd xmm3, xmm4
- movdqa xmm5, [ssim_c1 GOT_ebx]
- movdqa xmm6, [ssim_c2 GOT_ebx]
+ movdqa xmm5, [ssim_c1 GLOBAL]
+ movdqa xmm6, [ssim_c2 GLOBAL]
TRANSPOSE4x4D xmm0, xmm1, xmm2, xmm3, xmm4
; s1=mm0, s2=mm3, ss=mm4, s12=mm2
divps xmm1, xmm0 ; ssim
neg edx
- movdqu xmm3, [mask_ff + edx*4 + 16 GOT_ebx]
+ movdqu xmm3, [mask_ff + edx*4 + 16 GLOBAL]
pand xmm1, xmm3
movhlps xmm0, xmm1
addps xmm0, xmm1
pavgb %2, %3
pxor %3, %5
mov%6 %1, %4
- pand %3, [pb_1 GOT_ecx]
+ pand %3, [pb_1 GLOBAL]
psubusb %2, %3
pavgb %1, %2
%endmacro
movq mm3, [eax - FDEC_STRIDE ]
movq mm1, [eax - FDEC_STRIDE - 1]
movq mm2, mm3
- movq mm4, [pb_0s_ff GOT_ecx]
+ movq mm4, [pb_0s_ff GLOBAL]
psrlq mm2, 8
pand mm4, mm3
por mm2, mm4
pxor mm1, mm1
psadbw mm0, [eax+7]
psadbw mm1, [eax+16]
- paddw mm0, [pw_8 GOT_ecx]
+ paddw mm0, [pw_8 GLOBAL]
paddw mm0, mm1
psrlw mm0, 4
pshufw mm0, mm0, 0
mov edx, [esp + 4]
pxor mm0, mm0
psadbw mm0, [eax+%2]
- paddw mm0, [pw_4 GOT_ecx]
+ paddw mm0, [pw_4 GLOBAL]
psrlw mm0, 3
pshufw mm0, mm0, 0
packuswb mm0, mm0
paddw mm0, [esp + 8]
pshufw mm2, [esp + 12], 0
psrlw mm0, 3
- paddw mm1, [pw_2 GOT_ecx]
+ paddw mm1, [pw_2 GLOBAL]
movq mm3, mm2
pshufw mm1, mm1, 0
pshufw mm0, mm0, 0 ; dc0 (w)
pshufw mm2, [esp +12], 0
pshufw mm4, [esp +16], 0
movq mm1, mm2
- pmullw mm2, [pw_3210 GOT_ecx]
+ pmullw mm2, [pw_3210 GLOBAL]
psllw mm1, 2
paddsw mm0, mm2 ; mm0 = {i+0*b, i+1*b, i+2*b, i+3*b}
paddsw mm1, mm0 ; mm1 = {i+4*b, i+5*b, i+6*b, i+7*b}
pshufw mm4, [esp +16], 0
movq mm5, mm2
movq mm1, mm2
- pmullw mm5, [pw_3210 GOT_ecx]
+ pmullw mm5, [pw_3210 GLOBAL]
psllw mm2, 3
psllw mm1, 2
movq mm3, mm2
punpcklqdq xmm1, xmm1
punpcklqdq xmm2, xmm2
movdqa xmm3, xmm1
- pmullw xmm3, [pw_76543210 GOT_ecx]
+ pmullw xmm3, [pw_76543210 GLOBAL]
psllw xmm1, 3
paddsw xmm0, xmm3 ; xmm0 = {i+ 0*b, i+ 1*b, i+ 2*b, i+ 3*b, i+ 4*b, i+ 5*b, i+ 6*b, i+ 7*b}
paddsw xmm1, xmm0 ; xmm1 = {i+ 8*b, i+ 9*b, i+10*b, i+11*b, i+12*b, i+13*b, i+14*b, i+15*b}
cglobal predict_16x16_dc_top_mmxext
picgetgot ecx
- PRED16x16_DC [pw_8 GOT_ecx], 4
+ PRED16x16_DC [pw_8 GLOBAL], 4
ret
;-----------------------------------------------------------------------------
cglobal predict_16x16_dc_top_sse2
picgetgot ecx
- PRED16x16_DC_SSE2 [pw_8 GOT_ecx], 4
+ PRED16x16_DC_SSE2 [pw_8 GLOBAL], 4
ret
;-----------------------------------------------------------------------------
neg eax
movd mm5, eax
picgetgot eax
- movq mm6, [pd_1 GOT_eax]
+ movq mm6, [pd_1 GLOBAL]
pxor mm7, mm7
pslld mm6, mm5
psrld mm6, 1