From: Jan Kratochvil Date: Mon, 4 Oct 2010 21:18:58 +0000 (+0200) Subject: nasm: address labels 'rel label' vice 'wrt rip' X-Git-Tag: v0.9.5~63 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=5cdc3a4c29c5da43a4c346d57932c1c46068abec;p=libvpx nasm: address labels 'rel label' vice 'wrt rip' nasm does not support `label wrt rip', it requires `rel label'. It is still fully compatible with yasm. Provide nasm compatibility. No binary change by this patch with yasm on {x86_64,i686}-fedora13-linux-gnu. Few longer opcodes with nasm on {x86_64,i686}-fedora13-linux-gnu have been checked as safe. Change-Id: I488773a4e930a56e43b0cc72d867ee5291215f50 --- diff --git a/vp8/common/x86/idctllm_mmx.asm b/vp8/common/x86/idctllm_mmx.asm index 99e09a50e..43735bc4b 100644 --- a/vp8/common/x86/idctllm_mmx.asm +++ b/vp8/common/x86/idctllm_mmx.asm @@ -58,11 +58,11 @@ sym(vp8_short_idct4x4llm_mmx): movq mm5, mm1 paddw mm2, mm0 ; a1 =0+2 - pmulhw mm5, [x_s1sqr2 GLOBAL] ; + pmulhw mm5, [GLOBAL(x_s1sqr2)] ; paddw mm5, mm1 ; ip1 * sin(pi/8) * sqrt(2) movq mm7, mm3 ; - pmulhw mm7, [x_c1sqr2less1 GLOBAL] ; + pmulhw mm7, [GLOBAL(x_c1sqr2less1)] ; paddw mm7, mm3 ; ip3 * cos(pi/8) * sqrt(2) psubw mm7, mm5 ; c1 @@ -70,10 +70,10 @@ sym(vp8_short_idct4x4llm_mmx): movq mm5, mm1 movq mm4, mm3 - pmulhw mm5, [x_c1sqr2less1 GLOBAL] + pmulhw mm5, [GLOBAL(x_c1sqr2less1)] paddw mm5, mm1 - pmulhw mm3, [x_s1sqr2 GLOBAL] + pmulhw mm3, [GLOBAL(x_s1sqr2)] paddw mm3, mm4 paddw mm3, mm5 ; d1 @@ -113,11 +113,11 @@ sym(vp8_short_idct4x4llm_mmx): movq mm5, mm1 paddw mm2, mm0 ; a1 =0+2 - pmulhw mm5, [x_s1sqr2 GLOBAL] ; + pmulhw mm5, [GLOBAL(x_s1sqr2)] ; paddw mm5, mm1 ; ip1 * sin(pi/8) * sqrt(2) movq mm7, mm3 ; - pmulhw mm7, [x_c1sqr2less1 GLOBAL] ; + pmulhw mm7, [GLOBAL(x_c1sqr2less1)] ; paddw mm7, mm3 ; ip3 * cos(pi/8) * sqrt(2) psubw mm7, mm5 ; c1 @@ -125,16 +125,16 @@ sym(vp8_short_idct4x4llm_mmx): movq mm5, mm1 movq mm4, mm3 - pmulhw mm5, [x_c1sqr2less1 GLOBAL] + pmulhw mm5, [GLOBAL(x_c1sqr2less1)] paddw mm5, mm1 - pmulhw mm3, [x_s1sqr2 GLOBAL] + pmulhw mm3, [GLOBAL(x_s1sqr2)] paddw mm3, mm4 paddw mm3, mm5 ; d1 - paddw mm0, [fours GLOBAL] + paddw mm0, [GLOBAL(fours)] - paddw mm2, [fours GLOBAL] + paddw mm2, [GLOBAL(fours)] movq mm6, mm2 ; a1 movq mm4, mm0 ; b1 @@ -196,7 +196,7 @@ sym(vp8_short_idct4x4llm_1_mmx): mov rax, arg(0) ;input movd mm0, [rax] - paddw mm0, [fours GLOBAL] + paddw mm0, [GLOBAL(fours)] mov rdx, arg(1) ;output psraw mm0, 3 @@ -239,7 +239,7 @@ sym(vp8_dc_only_idct_add_mmx): movd mm5, arg(0) ;input_dc - paddw mm5, [fours GLOBAL] + paddw mm5, [GLOBAL(fours)] psraw mm5, 3 diff --git a/vp8/common/x86/idctllm_sse2.asm b/vp8/common/x86/idctllm_sse2.asm index ac941851b..edee1578e 100644 --- a/vp8/common/x86/idctllm_sse2.asm +++ b/vp8/common/x86/idctllm_sse2.asm @@ -51,7 +51,7 @@ sym(idct_dequant_0_2x_sse2): pshufhw xmm4, xmm4, 00000000b mov rax, arg(2) ; pre - paddw xmm4, [fours GLOBAL] + paddw xmm4, [GLOBAL(fours)] movsxd rcx, dword ptr arg(5) ; blk_stride psraw xmm4, 3 @@ -160,11 +160,11 @@ sym(idct_dequant_full_2x_sse2): movdqa xmm5, xmm1 paddw xmm2, xmm0 ; a1 = 0+2 - pmulhw xmm5, [x_s1sqr2 GLOBAL] + pmulhw xmm5, [GLOBAL(x_s1sqr2)] paddw xmm5, xmm1 ; ip1 * sin(pi/8) * sqrt(2) movdqa xmm7, xmm3 - pmulhw xmm7, [x_c1sqr2less1 GLOBAL] + pmulhw xmm7, [GLOBAL(x_c1sqr2less1)] paddw xmm7, xmm3 ; ip3 * cos(pi/8) * sqrt(2) psubw xmm7, xmm5 ; c1 @@ -172,10 +172,10 @@ sym(idct_dequant_full_2x_sse2): movdqa xmm5, xmm1 movdqa xmm4, xmm3 - pmulhw xmm5, [x_c1sqr2less1 GLOBAL] + pmulhw xmm5, [GLOBAL(x_c1sqr2less1)] paddw xmm5, xmm1 - pmulhw xmm3, [x_s1sqr2 GLOBAL] + pmulhw xmm3, [GLOBAL(x_s1sqr2)] paddw xmm3, xmm4 paddw xmm3, xmm5 ; d1 @@ -229,11 +229,11 @@ sym(idct_dequant_full_2x_sse2): movdqa xmm5, xmm1 paddw xmm2, xmm0 ; a1 = 0+2 - pmulhw xmm5, [x_s1sqr2 GLOBAL] + pmulhw xmm5, [GLOBAL(x_s1sqr2)] paddw xmm5, xmm1 ; ip1 * sin(pi/8) * sqrt(2) movdqa xmm7, xmm3 - pmulhw xmm7, [x_c1sqr2less1 GLOBAL] + pmulhw xmm7, [GLOBAL(x_c1sqr2less1)] paddw xmm7, xmm3 ; ip3 * cos(pi/8) * sqrt(2) psubw xmm7, xmm5 ; c1 @@ -241,16 +241,16 @@ sym(idct_dequant_full_2x_sse2): movdqa xmm5, xmm1 movdqa xmm4, xmm3 - pmulhw xmm5, [x_c1sqr2less1 GLOBAL] + pmulhw xmm5, [GLOBAL(x_c1sqr2less1)] paddw xmm5, xmm1 - pmulhw xmm3, [x_s1sqr2 GLOBAL] + pmulhw xmm3, [GLOBAL(x_s1sqr2)] paddw xmm3, xmm4 paddw xmm3, xmm5 ; d1 - paddw xmm0, [fours GLOBAL] + paddw xmm0, [GLOBAL(fours)] - paddw xmm2, [fours GLOBAL] + paddw xmm2, [GLOBAL(fours)] movdqa xmm6, xmm2 ; a1 movdqa xmm4, xmm0 ; b1 @@ -394,7 +394,7 @@ sym(idct_dequant_dc_0_2x_sse2): punpckldq xmm4, xmm4 ; Rounding to dequant and downshift - paddw xmm4, [fours GLOBAL] + paddw xmm4, [GLOBAL(fours)] psraw xmm4, 3 ; Predict buffer needs to be expanded from bytes to words @@ -505,11 +505,11 @@ sym(idct_dequant_dc_full_2x_sse2): movdqa xmm5, xmm1 paddw xmm2, xmm0 ; a1 = 0+2 - pmulhw xmm5, [x_s1sqr2 GLOBAL] + pmulhw xmm5, [GLOBAL(x_s1sqr2)] paddw xmm5, xmm1 ; ip1 * sin(pi/8) * sqrt(2) movdqa xmm7, xmm3 - pmulhw xmm7, [x_c1sqr2less1 GLOBAL] + pmulhw xmm7, [GLOBAL(x_c1sqr2less1)] paddw xmm7, xmm3 ; ip3 * cos(pi/8) * sqrt(2) psubw xmm7, xmm5 ; c1 @@ -517,10 +517,10 @@ sym(idct_dequant_dc_full_2x_sse2): movdqa xmm5, xmm1 movdqa xmm4, xmm3 - pmulhw xmm5, [x_c1sqr2less1 GLOBAL] + pmulhw xmm5, [GLOBAL(x_c1sqr2less1)] paddw xmm5, xmm1 - pmulhw xmm3, [x_s1sqr2 GLOBAL] + pmulhw xmm3, [GLOBAL(x_s1sqr2)] paddw xmm3, xmm4 paddw xmm3, xmm5 ; d1 @@ -574,11 +574,11 @@ sym(idct_dequant_dc_full_2x_sse2): movdqa xmm5, xmm1 paddw xmm2, xmm0 ; a1 = 0+2 - pmulhw xmm5, [x_s1sqr2 GLOBAL] + pmulhw xmm5, [GLOBAL(x_s1sqr2)] paddw xmm5, xmm1 ; ip1 * sin(pi/8) * sqrt(2) movdqa xmm7, xmm3 - pmulhw xmm7, [x_c1sqr2less1 GLOBAL] + pmulhw xmm7, [GLOBAL(x_c1sqr2less1)] paddw xmm7, xmm3 ; ip3 * cos(pi/8) * sqrt(2) psubw xmm7, xmm5 ; c1 @@ -586,16 +586,16 @@ sym(idct_dequant_dc_full_2x_sse2): movdqa xmm5, xmm1 movdqa xmm4, xmm3 - pmulhw xmm5, [x_c1sqr2less1 GLOBAL] + pmulhw xmm5, [GLOBAL(x_c1sqr2less1)] paddw xmm5, xmm1 - pmulhw xmm3, [x_s1sqr2 GLOBAL] + pmulhw xmm3, [GLOBAL(x_s1sqr2)] paddw xmm3, xmm4 paddw xmm3, xmm5 ; d1 - paddw xmm0, [fours GLOBAL] + paddw xmm0, [GLOBAL(fours)] - paddw xmm2, [fours GLOBAL] + paddw xmm2, [GLOBAL(fours)] movdqa xmm6, xmm2 ; a1 movdqa xmm4, xmm0 ; b1 diff --git a/vp8/common/x86/loopfilter_mmx.asm b/vp8/common/x86/loopfilter_mmx.asm index 0b39e627d..c6c215c3c 100644 --- a/vp8/common/x86/loopfilter_mmx.asm +++ b/vp8/common/x86/loopfilter_mmx.asm @@ -111,7 +111,7 @@ next8_h: psubusb mm3, mm2 ; q1-=p1 psubusb mm2, mm4 ; p1-=q1 por mm2, mm3 ; abs(p1-q1) - pand mm2, [tfe GLOBAL] ; set lsb of each byte to zero + pand mm2, [GLOBAL(tfe)] ; set lsb of each byte to zero psrlw mm2, 1 ; abs(p1-q1)/2 movq mm6, mm5 ; p0 @@ -150,12 +150,12 @@ next8_h: ; start work on filters movq mm2, [rsi+2*rax] ; p1 movq mm7, [rdi] ; q1 - pxor mm2, [t80 GLOBAL] ; p1 offset to convert to signed values - pxor mm7, [t80 GLOBAL] ; q1 offset to convert to signed values + pxor mm2, [GLOBAL(t80)] ; p1 offset to convert to signed values + pxor mm7, [GLOBAL(t80)] ; q1 offset to convert to signed values psubsb mm2, mm7 ; p1 - q1 pand mm2, mm4 ; high var mask (hvm)(p1 - q1) - pxor mm6, [t80 GLOBAL] ; offset to convert to signed values - pxor mm0, [t80 GLOBAL] ; offset to convert to signed values + pxor mm6, [GLOBAL(t80)] ; offset to convert to signed values + pxor mm0, [GLOBAL(t80)] ; offset to convert to signed values movq mm3, mm0 ; q0 psubsb mm0, mm6 ; q0 - p0 paddsb mm2, mm0 ; 1 * (q0 - p0) + hvm(p1 - q1) @@ -163,8 +163,8 @@ next8_h: paddsb mm2, mm0 ; 3 * (q0 - p0) + hvm(p1 - q1) pand mm1, mm2 ; mask filter values we don't care about movq mm2, mm1 - paddsb mm1, [t4 GLOBAL] ; 3* (q0 - p0) + hvm(p1 - q1) + 4 - paddsb mm2, [t3 GLOBAL] ; 3* (q0 - p0) + hvm(p1 - q1) + 3 + paddsb mm1, [GLOBAL(t4)] ; 3* (q0 - p0) + hvm(p1 - q1) + 4 + paddsb mm2, [GLOBAL(t3)] ; 3* (q0 - p0) + hvm(p1 - q1) + 3 pxor mm0, mm0 ; pxor mm5, mm5 @@ -185,29 +185,29 @@ next8_h: movq mm5, mm0 ; save results packsswb mm0, mm1 ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>3 - paddsw mm5, [ones GLOBAL] - paddsw mm1, [ones GLOBAL] + paddsw mm5, [GLOBAL(ones)] + paddsw mm1, [GLOBAL(ones)] psraw mm5, 1 ; partial shifted one more time for 2nd tap psraw mm1, 1 ; partial shifted one more time for 2nd tap packsswb mm5, mm1 ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>4 pandn mm4, mm5 ; high edge variance additive paddsb mm6, mm2 ; p0+= p0 add - pxor mm6, [t80 GLOBAL] ; unoffset + pxor mm6, [GLOBAL(t80)] ; unoffset movq [rsi+rax], mm6 ; write back movq mm6, [rsi+2*rax] ; p1 - pxor mm6, [t80 GLOBAL] ; reoffset + pxor mm6, [GLOBAL(t80)] ; reoffset paddsb mm6, mm4 ; p1+= p1 add - pxor mm6, [t80 GLOBAL] ; unoffset + pxor mm6, [GLOBAL(t80)] ; unoffset movq [rsi+2*rax], mm6 ; write back psubsb mm3, mm0 ; q0-= q0 add - pxor mm3, [t80 GLOBAL] ; unoffset + pxor mm3, [GLOBAL(t80)] ; unoffset movq [rsi], mm3 ; write back psubsb mm7, mm4 ; q1-= q1 add - pxor mm7, [t80 GLOBAL] ; unoffset + pxor mm7, [GLOBAL(t80)] ; unoffset movq [rdi], mm7 ; write back add rsi,8 @@ -403,7 +403,7 @@ next8_v: psubusb mm5, mm1 ; q1-=p1 psubusb mm1, mm2 ; p1-=q1 por mm5, mm1 ; abs(p1-q1) - pand mm5, [tfe GLOBAL] ; set lsb of each byte to zero + pand mm5, [GLOBAL(tfe)] ; set lsb of each byte to zero psrlw mm5, 1 ; abs(p1-q1)/2 mov rdx, arg(2) ;flimit ; @@ -455,14 +455,14 @@ next8_v: movq mm6, [rdx+8] ; p0 movq mm0, [rdx+16] ; q0 - pxor mm2, [t80 GLOBAL] ; p1 offset to convert to signed values - pxor mm7, [t80 GLOBAL] ; q1 offset to convert to signed values + pxor mm2, [GLOBAL(t80)] ; p1 offset to convert to signed values + pxor mm7, [GLOBAL(t80)] ; q1 offset to convert to signed values psubsb mm2, mm7 ; p1 - q1 pand mm2, mm4 ; high var mask (hvm)(p1 - q1) - pxor mm6, [t80 GLOBAL] ; offset to convert to signed values - pxor mm0, [t80 GLOBAL] ; offset to convert to signed values + pxor mm6, [GLOBAL(t80)] ; offset to convert to signed values + pxor mm0, [GLOBAL(t80)] ; offset to convert to signed values movq mm3, mm0 ; q0 psubsb mm0, mm6 ; q0 - p0 @@ -474,9 +474,9 @@ next8_v: pand mm1, mm2 ; mask filter values we don't care about movq mm2, mm1 - paddsb mm1, [t4 GLOBAL] ; 3* (q0 - p0) + hvm(p1 - q1) + 4 + paddsb mm1, [GLOBAL(t4)] ; 3* (q0 - p0) + hvm(p1 - q1) + 4 - paddsb mm2, [t3 GLOBAL] ; 3* (q0 - p0) + hvm(p1 - q1) + 3 + paddsb mm2, [GLOBAL(t3)] ; 3* (q0 - p0) + hvm(p1 - q1) + 3 pxor mm0, mm0 ; pxor mm5, mm5 @@ -503,9 +503,9 @@ next8_v: movq mm5, mm0 ; save results packsswb mm0, mm1 ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>3 - paddsw mm5, [ones GLOBAL] + paddsw mm5, [GLOBAL(ones)] - paddsw mm1, [ones GLOBAL] + paddsw mm1, [GLOBAL(ones)] psraw mm5, 1 ; partial shifted one more time for 2nd tap psraw mm1, 1 ; partial shifted one more time for 2nd tap @@ -514,22 +514,22 @@ next8_v: pandn mm4, mm5 ; high edge variance additive paddsb mm6, mm2 ; p0+= p0 add - pxor mm6, [t80 GLOBAL] ; unoffset + pxor mm6, [GLOBAL(t80)] ; unoffset ; mm6=p0 ; movq mm1, [rdx] ; p1 - pxor mm1, [t80 GLOBAL] ; reoffset + pxor mm1, [GLOBAL(t80)] ; reoffset paddsb mm1, mm4 ; p1+= p1 add - pxor mm1, [t80 GLOBAL] ; unoffset + pxor mm1, [GLOBAL(t80)] ; unoffset ; mm6 = p0 mm1 = p1 psubsb mm3, mm0 ; q0-= q0 add - pxor mm3, [t80 GLOBAL] ; unoffset + pxor mm3, [GLOBAL(t80)] ; unoffset ; mm3 = q0 psubsb mm7, mm4 ; q1-= q1 add - pxor mm7, [t80 GLOBAL] ; unoffset + pxor mm7, [GLOBAL(t80)] ; unoffset ; mm7 = q1 ; tranpose and write back @@ -708,7 +708,7 @@ next8_mbh: psubusb mm3, mm2 ; q1-=p1 psubusb mm2, mm4 ; p1-=q1 por mm2, mm3 ; abs(p1-q1) - pand mm2, [tfe GLOBAL] ; set lsb of each byte to zero + pand mm2, [GLOBAL(tfe)] ; set lsb of each byte to zero psrlw mm2, 1 ; abs(p1-q1)/2 movq mm6, mm5 ; p0 @@ -753,12 +753,12 @@ next8_mbh: ; start work on filters movq mm2, [rsi+2*rax] ; p1 movq mm7, [rdi] ; q1 - pxor mm2, [t80 GLOBAL] ; p1 offset to convert to signed values - pxor mm7, [t80 GLOBAL] ; q1 offset to convert to signed values + pxor mm2, [GLOBAL(t80)] ; p1 offset to convert to signed values + pxor mm7, [GLOBAL(t80)] ; q1 offset to convert to signed values psubsb mm2, mm7 ; p1 - q1 - pxor mm6, [t80 GLOBAL] ; offset to convert to signed values - pxor mm0, [t80 GLOBAL] ; offset to convert to signed values + pxor mm6, [GLOBAL(t80)] ; offset to convert to signed values + pxor mm0, [GLOBAL(t80)] ; offset to convert to signed values movq mm3, mm0 ; q0 psubsb mm0, mm6 ; q0 - p0 paddsb mm2, mm0 ; 1 * (q0 - p0) + (p1 - q1) @@ -772,7 +772,7 @@ next8_mbh: pand mm2, mm4; ; Filter2 = vp8_filter & hev movq mm5, mm2 ; - paddsb mm5, [t3 GLOBAL]; + paddsb mm5, [GLOBAL(t3)]; pxor mm0, mm0 ; 0 pxor mm7, mm7 ; 0 @@ -785,7 +785,7 @@ next8_mbh: movq mm5, mm0 ; Filter2 - paddsb mm2, [t4 GLOBAL] ; vp8_signed_char_clamp(Filter2 + 4) + paddsb mm2, [GLOBAL(t4)] ; vp8_signed_char_clamp(Filter2 + 4) pxor mm0, mm0 ; 0 pxor mm7, mm7 ; 0 @@ -818,10 +818,10 @@ next8_mbh: pxor mm2, mm2 punpcklbw mm1, mm4 punpckhbw mm2, mm4 - pmulhw mm1, [s27 GLOBAL] - pmulhw mm2, [s27 GLOBAL] - paddw mm1, [s63 GLOBAL] - paddw mm2, [s63 GLOBAL] + pmulhw mm1, [GLOBAL(s27)] + pmulhw mm2, [GLOBAL(s27)] + paddw mm1, [GLOBAL(s63)] + paddw mm2, [GLOBAL(s63)] psraw mm1, 7 psraw mm2, 7 packsswb mm1, mm2 @@ -829,8 +829,8 @@ next8_mbh: psubsb mm3, mm1 paddsb mm6, mm1 - pxor mm3, [t80 GLOBAL] - pxor mm6, [t80 GLOBAL] + pxor mm3, [GLOBAL(t80)] + pxor mm6, [GLOBAL(t80)] movq [rsi+rax], mm6 movq [rsi], mm3 @@ -844,10 +844,10 @@ next8_mbh: pxor mm2, mm2 punpcklbw mm1, mm4 punpckhbw mm2, mm4 - pmulhw mm1, [s18 GLOBAL] - pmulhw mm2, [s18 GLOBAL] - paddw mm1, [s63 GLOBAL] - paddw mm2, [s63 GLOBAL] + pmulhw mm1, [GLOBAL(s18)] + pmulhw mm2, [GLOBAL(s18)] + paddw mm1, [GLOBAL(s63)] + paddw mm2, [GLOBAL(s63)] psraw mm1, 7 psraw mm2, 7 packsswb mm1, mm2 @@ -855,14 +855,14 @@ next8_mbh: movq mm3, [rdi] movq mm6, [rsi+rax*2] ; p1 - pxor mm3, [t80 GLOBAL] - pxor mm6, [t80 GLOBAL] + pxor mm3, [GLOBAL(t80)] + pxor mm6, [GLOBAL(t80)] paddsb mm6, mm1 psubsb mm3, mm1 - pxor mm6, [t80 GLOBAL] - pxor mm3, [t80 GLOBAL] + pxor mm6, [GLOBAL(t80)] + pxor mm3, [GLOBAL(t80)] movq [rdi], mm3 movq [rsi+rax*2], mm6 @@ -876,10 +876,10 @@ next8_mbh: pxor mm2, mm2 punpcklbw mm1, mm4 punpckhbw mm2, mm4 - pmulhw mm1, [s9 GLOBAL] - pmulhw mm2, [s9 GLOBAL] - paddw mm1, [s63 GLOBAL] - paddw mm2, [s63 GLOBAL] + pmulhw mm1, [GLOBAL(s9)] + pmulhw mm2, [GLOBAL(s9)] + paddw mm1, [GLOBAL(s63)] + paddw mm2, [GLOBAL(s63)] psraw mm1, 7 psraw mm2, 7 packsswb mm1, mm2 @@ -889,14 +889,14 @@ next8_mbh: neg rax movq mm3, [rdi+rax ] - pxor mm6, [t80 GLOBAL] - pxor mm3, [t80 GLOBAL] + pxor mm6, [GLOBAL(t80)] + pxor mm3, [GLOBAL(t80)] paddsb mm6, mm1 psubsb mm3, mm1 - pxor mm6, [t80 GLOBAL] - pxor mm3, [t80 GLOBAL] + pxor mm6, [GLOBAL(t80)] + pxor mm3, [GLOBAL(t80)] movq [rdi+rax ], mm3 neg rax movq [rdi+rax*4], mm6 @@ -1105,7 +1105,7 @@ next8_mbv: psubusb mm5, mm1 ; q1-=p1 psubusb mm1, mm2 ; p1-=q1 por mm5, mm1 ; abs(p1-q1) - pand mm5, [tfe GLOBAL] ; set lsb of each byte to zero + pand mm5, [GLOBAL(tfe)] ; set lsb of each byte to zero psrlw mm5, 1 ; abs(p1-q1)/2 mov rdx, arg(2) ;flimit ; @@ -1155,14 +1155,14 @@ next8_mbv: ; start work on filters movq mm2, [rdx+16] ; p1 movq mm7, [rdx+40] ; q1 - pxor mm2, [t80 GLOBAL] ; p1 offset to convert to signed values - pxor mm7, [t80 GLOBAL] ; q1 offset to convert to signed values + pxor mm2, [GLOBAL(t80)] ; p1 offset to convert to signed values + pxor mm7, [GLOBAL(t80)] ; q1 offset to convert to signed values psubsb mm2, mm7 ; p1 - q1 movq mm6, [rdx+24] ; p0 movq mm0, [rdx+32] ; q0 - pxor mm6, [t80 GLOBAL] ; offset to convert to signed values - pxor mm0, [t80 GLOBAL] ; offset to convert to signed values + pxor mm6, [GLOBAL(t80)] ; offset to convert to signed values + pxor mm0, [GLOBAL(t80)] ; offset to convert to signed values movq mm3, mm0 ; q0 psubsb mm0, mm6 ; q0 - p0 @@ -1176,7 +1176,7 @@ next8_mbv: pand mm2, mm4; ; Filter2 = vp8_filter & hev movq mm5, mm2 ; - paddsb mm5, [t3 GLOBAL]; + paddsb mm5, [GLOBAL(t3)]; pxor mm0, mm0 ; 0 pxor mm7, mm7 ; 0 @@ -1189,7 +1189,7 @@ next8_mbv: movq mm5, mm0 ; Filter2 - paddsb mm2, [t4 GLOBAL] ; vp8_signed_char_clamp(Filter2 + 4) + paddsb mm2, [GLOBAL(t4)] ; vp8_signed_char_clamp(Filter2 + 4) pxor mm0, mm0 ; 0 pxor mm7, mm7 ; 0 @@ -1222,10 +1222,10 @@ next8_mbv: pxor mm2, mm2 punpcklbw mm1, mm4 punpckhbw mm2, mm4 - pmulhw mm1, [s27 GLOBAL] - pmulhw mm2, [s27 GLOBAL] - paddw mm1, [s63 GLOBAL] - paddw mm2, [s63 GLOBAL] + pmulhw mm1, [GLOBAL(s27)] + pmulhw mm2, [GLOBAL(s27)] + paddw mm1, [GLOBAL(s63)] + paddw mm2, [GLOBAL(s63)] psraw mm1, 7 psraw mm2, 7 packsswb mm1, mm2 @@ -1233,8 +1233,8 @@ next8_mbv: psubsb mm3, mm1 paddsb mm6, mm1 - pxor mm3, [t80 GLOBAL] - pxor mm6, [t80 GLOBAL] + pxor mm3, [GLOBAL(t80)] + pxor mm6, [GLOBAL(t80)] movq [rdx+24], mm6 movq [rdx+32], mm3 @@ -1248,24 +1248,24 @@ next8_mbv: pxor mm2, mm2 punpcklbw mm1, mm4 punpckhbw mm2, mm4 - pmulhw mm1, [s18 GLOBAL] - pmulhw mm2, [s18 GLOBAL] - paddw mm1, [s63 GLOBAL] - paddw mm2, [s63 GLOBAL] + pmulhw mm1, [GLOBAL(s18)] + pmulhw mm2, [GLOBAL(s18)] + paddw mm1, [GLOBAL(s63)] + paddw mm2, [GLOBAL(s63)] psraw mm1, 7 psraw mm2, 7 packsswb mm1, mm2 movq mm3, [rdx + 40] movq mm6, [rdx + 16] ; p1 - pxor mm3, [t80 GLOBAL] - pxor mm6, [t80 GLOBAL] + pxor mm3, [GLOBAL(t80)] + pxor mm6, [GLOBAL(t80)] paddsb mm6, mm1 psubsb mm3, mm1 - pxor mm6, [t80 GLOBAL] - pxor mm3, [t80 GLOBAL] + pxor mm6, [GLOBAL(t80)] + pxor mm3, [GLOBAL(t80)] movq [rdx + 40], mm3 movq [rdx + 16], mm6 @@ -1279,10 +1279,10 @@ next8_mbv: pxor mm2, mm2 punpcklbw mm1, mm4 punpckhbw mm2, mm4 - pmulhw mm1, [s9 GLOBAL] - pmulhw mm2, [s9 GLOBAL] - paddw mm1, [s63 GLOBAL] - paddw mm2, [s63 GLOBAL] + pmulhw mm1, [GLOBAL(s9)] + pmulhw mm2, [GLOBAL(s9)] + paddw mm1, [GLOBAL(s63)] + paddw mm2, [GLOBAL(s63)] psraw mm1, 7 psraw mm2, 7 packsswb mm1, mm2 @@ -1290,14 +1290,14 @@ next8_mbv: movq mm6, [rdx+ 8] movq mm3, [rdx+48] - pxor mm6, [t80 GLOBAL] - pxor mm3, [t80 GLOBAL] + pxor mm6, [GLOBAL(t80)] + pxor mm3, [GLOBAL(t80)] paddsb mm6, mm1 psubsb mm3, mm1 - pxor mm6, [t80 GLOBAL] ; mm6 = 71 61 51 41 31 21 11 01 - pxor mm3, [t80 GLOBAL] ; mm3 = 76 66 56 46 36 26 15 06 + pxor mm6, [GLOBAL(t80)] ; mm6 = 71 61 51 41 31 21 11 01 + pxor mm3, [GLOBAL(t80)] ; mm3 = 76 66 56 46 36 26 15 06 ; tranpose and write back movq mm0, [rdx] ; mm0 = 70 60 50 40 30 20 10 00 @@ -1432,7 +1432,7 @@ nexts8_h: psubusb mm0, mm1 ; q1-=p1 psubusb mm1, mm4 ; p1-=q1 por mm1, mm0 ; abs(p1-q1) - pand mm1, [tfe GLOBAL] ; set lsb of each byte to zero + pand mm1, [GLOBAL(tfe)] ; set lsb of each byte to zero psrlw mm1, 1 ; abs(p1-q1)/2 movq mm5, [rsi+rax] ; p0 @@ -1450,12 +1450,12 @@ nexts8_h: pcmpeqb mm5, mm3 ; start work on filters - pxor mm2, [t80 GLOBAL] ; p1 offset to convert to signed values - pxor mm7, [t80 GLOBAL] ; q1 offset to convert to signed values + pxor mm2, [GLOBAL(t80)] ; p1 offset to convert to signed values + pxor mm7, [GLOBAL(t80)] ; q1 offset to convert to signed values psubsb mm2, mm7 ; p1 - q1 - pxor mm6, [t80 GLOBAL] ; offset to convert to signed values - pxor mm0, [t80 GLOBAL] ; offset to convert to signed values + pxor mm6, [GLOBAL(t80)] ; offset to convert to signed values + pxor mm0, [GLOBAL(t80)] ; offset to convert to signed values movq mm3, mm0 ; q0 psubsb mm0, mm6 ; q0 - p0 paddsb mm2, mm0 ; p1 - q1 + 1 * (q0 - p0) @@ -1464,7 +1464,7 @@ nexts8_h: pand mm5, mm2 ; mask filter values we don't care about ; do + 4 side - paddsb mm5, [t4 GLOBAL] ; 3* (q0 - p0) + (p1 - q1) + 4 + paddsb mm5, [GLOBAL(t4)] ; 3* (q0 - p0) + (p1 - q1) + 4 movq mm0, mm5 ; get a copy of filters psllw mm0, 8 ; shift left 8 @@ -1477,12 +1477,12 @@ nexts8_h: por mm0, mm1 ; put the two together to get result psubsb mm3, mm0 ; q0-= q0 add - pxor mm3, [t80 GLOBAL] ; unoffset + pxor mm3, [GLOBAL(t80)] ; unoffset movq [rsi], mm3 ; write back ; now do +3 side - psubsb mm5, [t1s GLOBAL] ; +3 instead of +4 + psubsb mm5, [GLOBAL(t1s)] ; +3 instead of +4 movq mm0, mm5 ; get a copy of filters psllw mm0, 8 ; shift left 8 @@ -1494,7 +1494,7 @@ nexts8_h: paddsb mm6, mm0 ; p0+= p0 add - pxor mm6, [t80 GLOBAL] ; unoffset + pxor mm6, [GLOBAL(t80)] ; unoffset movq [rsi+rax], mm6 ; write back add rsi,8 @@ -1589,7 +1589,7 @@ nexts8_v: psubusb mm7, mm6 ; q1-=p1 psubusb mm6, mm3 ; p1-=q1 por mm6, mm7 ; abs(p1-q1) - pand mm6, [tfe GLOBAL] ; set lsb of each byte to zero + pand mm6, [GLOBAL(tfe)] ; set lsb of each byte to zero psrlw mm6, 1 ; abs(p1-q1)/2 movq mm5, mm1 ; p0 @@ -1617,16 +1617,16 @@ nexts8_v: movq t0, mm0 movq t1, mm3 - pxor mm0, [t80 GLOBAL] ; p1 offset to convert to signed values - pxor mm3, [t80 GLOBAL] ; q1 offset to convert to signed values + pxor mm0, [GLOBAL(t80)] ; p1 offset to convert to signed values + pxor mm3, [GLOBAL(t80)] ; q1 offset to convert to signed values psubsb mm0, mm3 ; p1 - q1 movq mm6, mm1 ; p0 movq mm7, mm2 ; q0 - pxor mm6, [t80 GLOBAL] ; offset to convert to signed values + pxor mm6, [GLOBAL(t80)] ; offset to convert to signed values - pxor mm7, [t80 GLOBAL] ; offset to convert to signed values + pxor mm7, [GLOBAL(t80)] ; offset to convert to signed values movq mm3, mm7 ; offseted ; q0 psubsb mm7, mm6 ; q0 - p0 @@ -1637,7 +1637,7 @@ nexts8_v: pand mm5, mm0 ; mask filter values we don't care about - paddsb mm5, [t4 GLOBAL] ; 3* (q0 - p0) + (p1 - q1) + 4 + paddsb mm5, [GLOBAL(t4)] ; 3* (q0 - p0) + (p1 - q1) + 4 movq mm0, mm5 ; get a copy of filters psllw mm0, 8 ; shift left 8 @@ -1651,10 +1651,10 @@ nexts8_v: por mm0, mm7 ; put the two together to get result psubsb mm3, mm0 ; q0-= q0sz add - pxor mm3, [t80 GLOBAL] ; unoffset + pxor mm3, [GLOBAL(t80)] ; unoffset ; now do +3 side - psubsb mm5, [t1s GLOBAL] ; +3 instead of +4 + psubsb mm5, [GLOBAL(t1s)] ; +3 instead of +4 movq mm0, mm5 ; get a copy of filters psllw mm0, 8 ; shift left 8 @@ -1666,7 +1666,7 @@ nexts8_v: por mm0, mm5 ; put the two together to get result paddsb mm6, mm0 ; p0+= p0 add - pxor mm6, [t80 GLOBAL] ; unoffset + pxor mm6, [GLOBAL(t80)] ; unoffset movq mm0, t0 diff --git a/vp8/common/x86/loopfilter_sse2.asm b/vp8/common/x86/loopfilter_sse2.asm index dc8167d4d..1ab6d0f39 100644 --- a/vp8/common/x86/loopfilter_sse2.asm +++ b/vp8/common/x86/loopfilter_sse2.asm @@ -126,7 +126,7 @@ movdqa xmm4, XMMWORD PTR [rdx] ; flimit movdqa xmm3, xmm0 ; q0 - pand xmm2, [tfe GLOBAL] ; set lsb of each byte to zero + pand xmm2, [GLOBAL(tfe)] ; set lsb of each byte to zero mov rdx, arg(4) ; hev get thresh @@ -182,14 +182,14 @@ movdqa xmm0, [rdx+32] ; q0 %endif - pxor xmm2, [t80 GLOBAL] ; p1 offset to convert to signed values - pxor xmm7, [t80 GLOBAL] ; q1 offset to convert to signed values + pxor xmm2, [GLOBAL(t80)] ; p1 offset to convert to signed values + pxor xmm7, [GLOBAL(t80)] ; q1 offset to convert to signed values psubsb xmm2, xmm7 ; p1 - q1 - pxor xmm6, [t80 GLOBAL] ; offset to convert to signed values + pxor xmm6, [GLOBAL(t80)] ; offset to convert to signed values pand xmm2, xmm4 ; high var mask (hvm)(p1 - q1) - pxor xmm0, [t80 GLOBAL] ; offset to convert to signed values + pxor xmm0, [GLOBAL(t80)] ; offset to convert to signed values movdqa xmm3, xmm0 ; q0 psubsb xmm0, xmm6 ; q0 - p0 @@ -204,8 +204,8 @@ movdqa xmm2, xmm1 - paddsb xmm1, [t4 GLOBAL] ; 3* (q0 - p0) + hvm(p1 - q1) + 4 - paddsb xmm2, [t3 GLOBAL] ; 3* (q0 - p0) + hvm(p1 - q1) + 3 + paddsb xmm1, [GLOBAL(t4)] ; 3* (q0 - p0) + hvm(p1 - q1) + 4 + paddsb xmm2, [GLOBAL(t3)] ; 3* (q0 - p0) + hvm(p1 - q1) + 3 punpckhbw xmm5, xmm2 ; axbxcxdx punpcklbw xmm2, xmm2 ; exfxgxhx @@ -223,9 +223,9 @@ movdqa xmm5, xmm0 ; save results packsswb xmm0, xmm1 ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>3 - paddsw xmm5, [ones GLOBAL] + paddsw xmm5, [GLOBAL(ones)] - paddsw xmm1, [ones GLOBAL] + paddsw xmm1, [GLOBAL(ones)] psraw xmm5, 1 ; partial shifted one more time for 2nd tap psraw xmm1, 1 ; partial shifted one more time for 2nd tap @@ -241,18 +241,18 @@ movdqa xmm1, [rdx] ; p1 %endif pandn xmm4, xmm5 ; high edge variance additive - pxor xmm6, [t80 GLOBAL] ; unoffset + pxor xmm6, [GLOBAL(t80)] ; unoffset - pxor xmm1, [t80 GLOBAL] ; reoffset + pxor xmm1, [GLOBAL(t80)] ; reoffset psubsb xmm3, xmm0 ; q0-= q0 add paddsb xmm1, xmm4 ; p1+= p1 add - pxor xmm3, [t80 GLOBAL] ; unoffset + pxor xmm3, [GLOBAL(t80)] ; unoffset - pxor xmm1, [t80 GLOBAL] ; unoffset + pxor xmm1, [GLOBAL(t80)] ; unoffset psubsb xmm7, xmm4 ; q1-= q1 add - pxor xmm7, [t80 GLOBAL] ; unoffset + pxor xmm7, [GLOBAL(t80)] ; unoffset %if %1 == 0 lea rsi, [rsi + rcx*2] lea rdi, [rdi + rcx*2] @@ -401,10 +401,10 @@ sym(vp8_loop_filter_horizontal_edge_uv_sse2): movdqa xmm0, [rdx+64] ; q0 %endif - pxor xmm2, [t80 GLOBAL] ; p1 offset to convert to signed values - pxor xmm7, [t80 GLOBAL] ; q1 offset to convert to signed values - pxor xmm6, [t80 GLOBAL] ; offset to convert to signed values - pxor xmm0, [t80 GLOBAL] ; offset to convert to signed values + pxor xmm2, [GLOBAL(t80)] ; p1 offset to convert to signed values + pxor xmm7, [GLOBAL(t80)] ; q1 offset to convert to signed values + pxor xmm6, [GLOBAL(t80)] ; offset to convert to signed values + pxor xmm0, [GLOBAL(t80)] ; offset to convert to signed values psubsb xmm2, xmm7 ; p1 - q1 movdqa xmm3, xmm0 ; q0 @@ -431,14 +431,14 @@ sym(vp8_loop_filter_horizontal_edge_uv_sse2): movdqa xmm5, xmm2 punpckhbw xmm1, xmm4 ; Filter 2 (lo) - paddsb xmm5, [t3 GLOBAL] ; vp8_signed_char_clamp(Filter2 + 3) + paddsb xmm5, [GLOBAL(t3)] ; vp8_signed_char_clamp(Filter2 + 3) - pmulhw xmm1, [s9 GLOBAL] ; Filter 2 (lo) * 9 + pmulhw xmm1, [GLOBAL(s9)] ; Filter 2 (lo) * 9 - pmulhw xmm0, [s9 GLOBAL] ; Filter 2 (hi) * 9 + pmulhw xmm0, [GLOBAL(s9)] ; Filter 2 (hi) * 9 punpckhbw xmm7, xmm5 ; axbxcxdx - paddsb xmm2, [t4 GLOBAL] ; vp8_signed_char_clamp(Filter2 + 4) + paddsb xmm2, [GLOBAL(t4)] ; vp8_signed_char_clamp(Filter2 + 4) punpcklbw xmm5, xmm5 ; exfxgxhx psraw xmm7, 11 ; sign extended shift right by 3 @@ -462,9 +462,9 @@ sym(vp8_loop_filter_horizontal_edge_uv_sse2): movdqa xmm5, xmm0 movdqa xmm2, xmm5 - paddw xmm0, [s63 GLOBAL] ; Filter 2 (hi) * 9 + 63 + paddw xmm0, [GLOBAL(s63)] ; Filter 2 (hi) * 9 + 63 - paddw xmm1, [s63 GLOBAL] ; Filter 2 (lo) * 9 + 63 + paddw xmm1, [GLOBAL(s63)] ; Filter 2 (lo) * 9 + 63 paddw xmm5, xmm5 ; Filter 2 (hi) * 18 paddw xmm7, xmm7 ; Filter 2 (lo) * 18 @@ -510,26 +510,26 @@ sym(vp8_loop_filter_horizontal_edge_uv_sse2): movdqa xmm7, XMMWORD PTR [rdx+16] ; p2 %endif - pxor xmm3, [t80 GLOBAL] ; *oq0 = sq^0x80 - pxor xmm6, [t80 GLOBAL] ; *oq0 = sp^0x80 + pxor xmm3, [GLOBAL(t80)] ; *oq0 = sq^0x80 + pxor xmm6, [GLOBAL(t80)] ; *oq0 = sp^0x80 - pxor xmm1, [t80 GLOBAL] - pxor xmm4, [t80 GLOBAL] + pxor xmm1, [GLOBAL(t80)] + pxor xmm4, [GLOBAL(t80)] psubsb xmm1, xmm2 ; sq = vp8_signed_char_clamp(qs1 - u2) paddsb xmm4, xmm2 ; sp = vp8_signed_char_clamp(ps1 - u2) - pxor xmm1, [t80 GLOBAL] ; *oq1 = sq^0x80; - pxor xmm4, [t80 GLOBAL] ; *op1 = sp^0x80; + pxor xmm1, [GLOBAL(t80)] ; *oq1 = sq^0x80; + pxor xmm4, [GLOBAL(t80)] ; *op1 = sp^0x80; - pxor xmm7, [t80 GLOBAL] - pxor xmm5, [t80 GLOBAL] + pxor xmm7, [GLOBAL(t80)] + pxor xmm5, [GLOBAL(t80)] paddsb xmm7, xmm0 ; sp = vp8_signed_char_clamp(ps2 - u) psubsb xmm5, xmm0 ; sq = vp8_signed_char_clamp(qs2 - u) - pxor xmm7, [t80 GLOBAL] ; *op2 = sp^0x80; - pxor xmm5, [t80 GLOBAL] ; *oq2 = sq^0x80; + pxor xmm7, [GLOBAL(t80)] ; *op2 = sp^0x80; + pxor xmm5, [GLOBAL(t80)] ; *oq2 = sq^0x80; %if %1 == 0 lea rsi, [rsi+rcx*2] @@ -915,7 +915,7 @@ sym(vp8_mbloop_filter_horizontal_edge_uv_sse2): por xmm5, xmm1 ; abs(p1-q1) movdqa xmm1, xmm3 ; p0 - pand xmm5, [tfe GLOBAL] ; set lsb of each byte to zero + pand xmm5, [GLOBAL(tfe)] ; set lsb of each byte to zero psubusb xmm1, xmm6 ; p0-q0 psrlw xmm5, 1 ; abs(p1-q1)/2 @@ -1415,7 +1415,7 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2): psubusb xmm0, xmm1 ; q1-=p1 psubusb xmm1, xmm4 ; p1-=q1 por xmm1, xmm0 ; abs(p1-q1) - pand xmm1, [tfe GLOBAL] ; set lsb of each byte to zero + pand xmm1, [GLOBAL(tfe)] ; set lsb of each byte to zero psrlw xmm1, 1 ; abs(p1-q1)/2 movdqu xmm5, [rsi+rax] ; p0 @@ -1433,12 +1433,12 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2): pcmpeqb xmm5, xmm3 ; start work on filters - pxor xmm2, [t80 GLOBAL] ; p1 offset to convert to signed values - pxor xmm7, [t80 GLOBAL] ; q1 offset to convert to signed values + pxor xmm2, [GLOBAL(t80)] ; p1 offset to convert to signed values + pxor xmm7, [GLOBAL(t80)] ; q1 offset to convert to signed values psubsb xmm2, xmm7 ; p1 - q1 - pxor xmm6, [t80 GLOBAL] ; offset to convert to signed values - pxor xmm0, [t80 GLOBAL] ; offset to convert to signed values + pxor xmm6, [GLOBAL(t80)] ; offset to convert to signed values + pxor xmm0, [GLOBAL(t80)] ; offset to convert to signed values movdqa xmm3, xmm0 ; q0 psubsb xmm0, xmm6 ; q0 - p0 paddsb xmm2, xmm0 ; p1 - q1 + 1 * (q0 - p0) @@ -1447,7 +1447,7 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2): pand xmm5, xmm2 ; mask filter values we don't care about ; do + 4 side - paddsb xmm5, [t4 GLOBAL] ; 3* (q0 - p0) + (p1 - q1) + 4 + paddsb xmm5, [GLOBAL(t4)] ; 3* (q0 - p0) + (p1 - q1) + 4 movdqa xmm0, xmm5 ; get a copy of filters psllw xmm0, 8 ; shift left 8 @@ -1460,11 +1460,11 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2): por xmm0, xmm1 ; put the two together to get result psubsb xmm3, xmm0 ; q0-= q0 add - pxor xmm3, [t80 GLOBAL] ; unoffset + pxor xmm3, [GLOBAL(t80)] ; unoffset movdqu [rsi], xmm3 ; write back ; now do +3 side - psubsb xmm5, [t1s GLOBAL] ; +3 instead of +4 + psubsb xmm5, [GLOBAL(t1s)] ; +3 instead of +4 movdqa xmm0, xmm5 ; get a copy of filters psllw xmm0, 8 ; shift left 8 @@ -1476,7 +1476,7 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2): paddsb xmm6, xmm0 ; p0+= p0 add - pxor xmm6, [t80 GLOBAL] ; unoffset + pxor xmm6, [GLOBAL(t80)] ; unoffset movdqu [rsi+rax], xmm6 ; write back ; begin epilog @@ -1596,7 +1596,7 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2): psubusb xmm7, xmm0 ; q1-=p1 psubusb xmm6, xmm3 ; p1-=q1 por xmm6, xmm7 ; abs(p1-q1) - pand xmm6, [tfe GLOBAL] ; set lsb of each byte to zero + pand xmm6, [GLOBAL(tfe)] ; set lsb of each byte to zero psrlw xmm6, 1 ; abs(p1-q1)/2 movdqa xmm5, xmm1 ; p0 @@ -1622,16 +1622,16 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2): movdqa t0, xmm0 movdqa t1, xmm3 - pxor xmm0, [t80 GLOBAL] ; p1 offset to convert to signed values - pxor xmm3, [t80 GLOBAL] ; q1 offset to convert to signed values + pxor xmm0, [GLOBAL(t80)] ; p1 offset to convert to signed values + pxor xmm3, [GLOBAL(t80)] ; q1 offset to convert to signed values psubsb xmm0, xmm3 ; p1 - q1 movdqa xmm6, xmm1 ; p0 movdqa xmm7, xmm2 ; q0 - pxor xmm6, [t80 GLOBAL] ; offset to convert to signed values + pxor xmm6, [GLOBAL(t80)] ; offset to convert to signed values - pxor xmm7, [t80 GLOBAL] ; offset to convert to signed values + pxor xmm7, [GLOBAL(t80)] ; offset to convert to signed values movdqa xmm3, xmm7 ; offseted ; q0 psubsb xmm7, xmm6 ; q0 - p0 @@ -1643,7 +1643,7 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2): pand xmm5, xmm0 ; mask filter values we don't care about - paddsb xmm5, [t4 GLOBAL] ; 3* (q0 - p0) + (p1 - q1) + 4 + paddsb xmm5, [GLOBAL(t4)] ; 3* (q0 - p0) + (p1 - q1) + 4 movdqa xmm0, xmm5 ; get a copy of filters psllw xmm0, 8 ; shift left 8 @@ -1658,10 +1658,10 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2): por xmm0, xmm7 ; put the two together to get result psubsb xmm3, xmm0 ; q0-= q0sz add - pxor xmm3, [t80 GLOBAL] ; unoffset q0 + pxor xmm3, [GLOBAL(t80)] ; unoffset q0 ; now do +3 side - psubsb xmm5, [t1s GLOBAL] ; +3 instead of +4 + psubsb xmm5, [GLOBAL(t1s)] ; +3 instead of +4 movdqa xmm0, xmm5 ; get a copy of filters psllw xmm0, 8 ; shift left 8 @@ -1674,7 +1674,7 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2): por xmm0, xmm5 ; put the two together to get result paddsb xmm6, xmm0 ; p0+= p0 add - pxor xmm6, [t80 GLOBAL] ; unoffset p0 + pxor xmm6, [GLOBAL(t80)] ; unoffset p0 movdqa xmm0, t0 ; p1 movdqa xmm4, t1 ; q1 diff --git a/vp8/common/x86/postproc_mmx.asm b/vp8/common/x86/postproc_mmx.asm index 349ac0d3b..787e83268 100644 --- a/vp8/common/x86/postproc_mmx.asm +++ b/vp8/common/x86/postproc_mmx.asm @@ -37,16 +37,16 @@ sym(vp8_post_proc_down_and_across_mmx): %if ABI_IS_32BIT=1 && CONFIG_PIC=1 ; move the global rd onto the stack, since we don't have enough registers ; to do PIC addressing - movq mm0, [rd GLOBAL] + movq mm0, [GLOBAL(rd)] sub rsp, 8 movq [rsp], mm0 %define RD [rsp] %else -%define RD [rd GLOBAL] +%define RD [GLOBAL(rd)] %endif push rbx - lea rbx, [Blur GLOBAL] + lea rbx, [GLOBAL(Blur)] movd mm2, dword ptr arg(6) ;flimit punpcklwd mm2, mm2 punpckldq mm2, mm2 @@ -286,7 +286,7 @@ sym(vp8_mbpost_proc_down_mmx): %define flimit2 [rsp+128] %if ABI_IS_32BIT=0 - lea r8, [sym(vp8_rv) GLOBAL] + lea r8, [GLOBAL(sym(vp8_rv))] %endif ;rows +=8; @@ -404,7 +404,7 @@ loop_row: and rcx, 127 %if ABI_IS_32BIT=1 && CONFIG_PIC=1 push rax - lea rax, [sym(vp8_rv) GLOBAL] + lea rax, [GLOBAL(sym(vp8_rv))] movq mm4, [rax + rcx*2] ;vp8_rv[rcx*2] pop rax %elif ABI_IS_32BIT=0 diff --git a/vp8/common/x86/postproc_sse2.asm b/vp8/common/x86/postproc_sse2.asm index 276f208ff..30b4bf53a 100644 --- a/vp8/common/x86/postproc_sse2.asm +++ b/vp8/common/x86/postproc_sse2.asm @@ -36,12 +36,12 @@ sym(vp8_post_proc_down_and_across_xmm): ALIGN_STACK 16, rax ; move the global rd onto the stack, since we don't have enough registers ; to do PIC addressing - movdqa xmm0, [rd42 GLOBAL] + movdqa xmm0, [GLOBAL(rd42)] sub rsp, 16 movdqa [rsp], xmm0 %define RD42 [rsp] %else -%define RD42 [rd42 GLOBAL] +%define RD42 [GLOBAL(rd42)] %endif @@ -275,7 +275,7 @@ sym(vp8_mbpost_proc_down_xmm): %define flimit4 [rsp+128] %if ABI_IS_32BIT=0 - lea r8, [sym(vp8_rv) GLOBAL] + lea r8, [GLOBAL(sym(vp8_rv))] %endif ;rows +=8; @@ -393,7 +393,7 @@ loop_row: and rcx, 127 %if ABI_IS_32BIT=1 && CONFIG_PIC=1 push rax - lea rax, [sym(vp8_rv) GLOBAL] + lea rax, [GLOBAL(sym(vp8_rv))] movdqu xmm4, [rax + rcx*2] ;vp8_rv[rcx*2] pop rax %elif ABI_IS_32BIT=0 @@ -579,7 +579,7 @@ nextcol4: punpcklwd xmm1, xmm0 paddd xmm1, xmm6 - paddd xmm1, [four8s GLOBAL] + paddd xmm1, [GLOBAL(four8s)] psrad xmm1, 4 packssdw xmm1, xmm0 diff --git a/vp8/common/x86/subpixel_mmx.asm b/vp8/common/x86/subpixel_mmx.asm index 06db0c6a0..23ed4e208 100644 --- a/vp8/common/x86/subpixel_mmx.asm +++ b/vp8/common/x86/subpixel_mmx.asm @@ -84,7 +84,7 @@ nextrow: pmullw mm5, [rdx] ; mm5 *= kernel 5 modifiers paddsw mm3, mm5 ; mm3 += mm5 - paddsw mm3, [rd GLOBAL] ; mm3 += round value + paddsw mm3, [GLOBAL(rd)] ; mm3 += round value psraw mm3, VP8_FILTER_SHIFT ; mm3 /= 128 packuswb mm3, mm0 ; pack and unpack to saturate punpcklbw mm3, mm0 ; @@ -136,7 +136,7 @@ sym(vp8_filter_block1d_v6_mmx): push rdi ; end prolog - movq mm5, [rd GLOBAL] + movq mm5, [GLOBAL(rd)] push rbx mov rbx, arg(6) ;vp8_filter movq mm1, [rbx + 16] ; do both the negative taps first!!! @@ -225,7 +225,7 @@ sym(vp8_filter_block1dc_v6_mmx): push rdi ; end prolog - movq mm5, [rd GLOBAL] + movq mm5, [GLOBAL(rd)] push rbx mov rbx, arg(7) ;vp8_filter movq mm1, [rbx + 16] ; do both the negative taps first!!! @@ -320,7 +320,7 @@ sym(vp8_bilinear_predict8x8_mmx): mov rdi, arg(4) ;dst_ptr ; shl rax, 5 ; offset * 32 - lea rcx, [sym(vp8_bilinear_filters_mmx) GLOBAL] + lea rcx, [GLOBAL(sym(vp8_bilinear_filters_mmx))] add rax, rcx ; HFilter mov rsi, arg(0) ;src_ptr ; @@ -363,10 +363,10 @@ sym(vp8_bilinear_predict8x8_mmx): paddw mm3, mm5 ; paddw mm4, mm6 ; - paddw mm3, [rd GLOBAL] ; xmm3 += round value + paddw mm3, [GLOBAL(rd)] ; xmm3 += round value psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - paddw mm4, [rd GLOBAL] ; + paddw mm4, [GLOBAL(rd)] ; psraw mm4, VP8_FILTER_SHIFT ; movq mm7, mm3 ; @@ -404,10 +404,10 @@ next_row_8x8: pmullw mm5, [rax] ; pmullw mm6, [rax] ; - paddw mm3, [rd GLOBAL] ; xmm3 += round value + paddw mm3, [GLOBAL(rd)] ; xmm3 += round value psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - paddw mm4, [rd GLOBAL] ; + paddw mm4, [GLOBAL(rd)] ; psraw mm4, VP8_FILTER_SHIFT ; movq mm7, mm3 ; @@ -421,10 +421,10 @@ next_row_8x8: paddw mm4, mm6 ; - paddw mm3, [rd GLOBAL] ; xmm3 += round value + paddw mm3, [GLOBAL(rd)] ; xmm3 += round value psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - paddw mm4, [rd GLOBAL] ; + paddw mm4, [GLOBAL(rd)] ; psraw mm4, VP8_FILTER_SHIFT ; packuswb mm3, mm4 @@ -476,7 +476,7 @@ sym(vp8_bilinear_predict8x4_mmx): movsxd rax, dword ptr arg(2) ;xoffset mov rdi, arg(4) ;dst_ptr ; - lea rcx, [sym(vp8_bilinear_filters_mmx) GLOBAL] + lea rcx, [GLOBAL(sym(vp8_bilinear_filters_mmx))] shl rax, 5 mov rsi, arg(0) ;src_ptr ; @@ -518,10 +518,10 @@ sym(vp8_bilinear_predict8x4_mmx): paddw mm3, mm5 ; paddw mm4, mm6 ; - paddw mm3, [rd GLOBAL] ; xmm3 += round value + paddw mm3, [GLOBAL(rd)] ; xmm3 += round value psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - paddw mm4, [rd GLOBAL] ; + paddw mm4, [GLOBAL(rd)] ; psraw mm4, VP8_FILTER_SHIFT ; movq mm7, mm3 ; @@ -559,10 +559,10 @@ next_row_8x4: pmullw mm5, [rax] ; pmullw mm6, [rax] ; - paddw mm3, [rd GLOBAL] ; xmm3 += round value + paddw mm3, [GLOBAL(rd)] ; xmm3 += round value psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - paddw mm4, [rd GLOBAL] ; + paddw mm4, [GLOBAL(rd)] ; psraw mm4, VP8_FILTER_SHIFT ; movq mm7, mm3 ; @@ -576,10 +576,10 @@ next_row_8x4: paddw mm4, mm6 ; - paddw mm3, [rd GLOBAL] ; xmm3 += round value + paddw mm3, [GLOBAL(rd)] ; xmm3 += round value psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - paddw mm4, [rd GLOBAL] ; + paddw mm4, [GLOBAL(rd)] ; psraw mm4, VP8_FILTER_SHIFT ; packuswb mm3, mm4 @@ -631,7 +631,7 @@ sym(vp8_bilinear_predict4x4_mmx): movsxd rax, dword ptr arg(2) ;xoffset mov rdi, arg(4) ;dst_ptr ; - lea rcx, [sym(vp8_bilinear_filters_mmx) GLOBAL] + lea rcx, [GLOBAL(sym(vp8_bilinear_filters_mmx))] shl rax, 5 add rax, rcx ; HFilter @@ -662,7 +662,7 @@ sym(vp8_bilinear_predict4x4_mmx): pmullw mm5, mm2 ; paddw mm3, mm5 ; - paddw mm3, [rd GLOBAL] ; xmm3 += round value + paddw mm3, [GLOBAL(rd)] ; xmm3 += round value psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128 @@ -686,7 +686,7 @@ next_row_4x4: punpcklbw mm5, mm0 ; pmullw mm5, [rax] ; - paddw mm3, [rd GLOBAL] ; xmm3 += round value + paddw mm3, [GLOBAL(rd)] ; xmm3 += round value psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128 movq mm7, mm3 ; @@ -697,7 +697,7 @@ next_row_4x4: paddw mm3, mm5 ; - paddw mm3, [rd GLOBAL] ; xmm3 += round value + paddw mm3, [GLOBAL(rd)] ; xmm3 += round value psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128 packuswb mm3, mm0 diff --git a/vp8/common/x86/subpixel_sse2.asm b/vp8/common/x86/subpixel_sse2.asm index 2385abfd0..b87cad259 100644 --- a/vp8/common/x86/subpixel_sse2.asm +++ b/vp8/common/x86/subpixel_sse2.asm @@ -107,7 +107,7 @@ filter_block1d8_h6_rowloop: paddsw xmm4, xmm6 paddsw xmm4, xmm1 - paddsw xmm4, [rd GLOBAL] + paddsw xmm4, [GLOBAL(rd)] psraw xmm4, 7 @@ -231,7 +231,7 @@ filter_block1d16_h6_sse2_rowloop: paddsw xmm4, xmm6 paddsw xmm4, xmm1 - paddsw xmm4, [rd GLOBAL] + paddsw xmm4, [GLOBAL(rd)] psraw xmm4, 7 @@ -284,7 +284,7 @@ filter_block1d16_h6_sse2_rowloop: paddsw xmm4, xmm6 paddsw xmm4, xmm2 - paddsw xmm4, [rd GLOBAL] + paddsw xmm4, [GLOBAL(rd)] psraw xmm4, 7 @@ -351,7 +351,7 @@ sym(vp8_filter_block1d8_v6_sse2): movsxd rcx, DWORD PTR arg(5) ;[output_height] pxor xmm0, xmm0 ; clear xmm0 - movdqa xmm7, XMMWORD PTR [rd GLOBAL] + movdqa xmm7, XMMWORD PTR [GLOBAL(rd)] %if ABI_IS_32BIT=0 movsxd r8, dword ptr arg(2) ; dst_ptich %endif @@ -489,7 +489,7 @@ vp8_filter_block1d16_v6_sse2_loop: pmullw xmm5, [rax + 80] pmullw xmm6, [rax + 80] - movdqa xmm7, XMMWORD PTR [rd GLOBAL] + movdqa xmm7, XMMWORD PTR [GLOBAL(rd)] pxor xmm0, xmm0 ; clear xmm0 paddsw xmm1, xmm3 @@ -608,7 +608,7 @@ filter_block1d8_h6_only_rowloop: paddsw xmm4, xmm6 paddsw xmm4, xmm1 - paddsw xmm4, [rd GLOBAL] + paddsw xmm4, [GLOBAL(rd)] psraw xmm4, 7 @@ -723,7 +723,7 @@ filter_block1d16_h6_only_sse2_rowloop: paddsw xmm4, xmm6 paddsw xmm4, xmm1 - paddsw xmm4, [rd GLOBAL] + paddsw xmm4, [GLOBAL(rd)] psraw xmm4, 7 @@ -773,7 +773,7 @@ filter_block1d16_h6_only_sse2_rowloop: paddsw xmm4, xmm6 paddsw xmm4, xmm2 - paddsw xmm4, [rd GLOBAL] + paddsw xmm4, [GLOBAL(rd)] psraw xmm4, 7 @@ -832,7 +832,7 @@ sym(vp8_filter_block1d8_v6_only_sse2): pxor xmm0, xmm0 ; clear xmm0 - movdqa xmm7, XMMWORD PTR [rd GLOBAL] + movdqa xmm7, XMMWORD PTR [GLOBAL(rd)] %if ABI_IS_32BIT=0 movsxd r8, dword ptr arg(3) ; dst_ptich %endif @@ -978,7 +978,7 @@ sym(vp8_bilinear_predict16x16_sse2): ;const short *HFilter = bilinear_filters_mmx[xoffset] ;const short *VFilter = bilinear_filters_mmx[yoffset] - lea rcx, [sym(vp8_bilinear_filters_mmx) GLOBAL] + lea rcx, [GLOBAL(sym(vp8_bilinear_filters_mmx))] movsxd rax, dword ptr arg(2) ;xoffset cmp rax, 0 ;skip first_pass filter if xoffset=0 @@ -1033,10 +1033,10 @@ sym(vp8_bilinear_predict16x16_sse2): paddw xmm3, xmm5 paddw xmm4, xmm6 - paddw xmm3, [rd GLOBAL] ; xmm3 += round value + paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - paddw xmm4, [rd GLOBAL] + paddw xmm4, [GLOBAL(rd)] psraw xmm4, VP8_FILTER_SHIFT movdqa xmm7, xmm3 @@ -1074,10 +1074,10 @@ next_row: pmullw xmm5, [rax] pmullw xmm6, [rax] - paddw xmm3, [rd GLOBAL] ; xmm3 += round value + paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - paddw xmm4, [rd GLOBAL] + paddw xmm4, [GLOBAL(rd)] psraw xmm4, VP8_FILTER_SHIFT movdqa xmm7, xmm3 @@ -1089,10 +1089,10 @@ next_row: paddw xmm3, xmm5 paddw xmm4, xmm6 - paddw xmm3, [rd GLOBAL] ; xmm3 += round value + paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - paddw xmm4, [rd GLOBAL] + paddw xmm4, [GLOBAL(rd)] psraw xmm4, VP8_FILTER_SHIFT packuswb xmm3, xmm4 @@ -1154,10 +1154,10 @@ next_row_spo: paddw xmm3, xmm5 paddw xmm4, xmm6 - paddw xmm3, [rd GLOBAL] ; xmm3 += round value + paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - paddw xmm4, [rd GLOBAL] + paddw xmm4, [GLOBAL(rd)] psraw xmm4, VP8_FILTER_SHIFT packuswb xmm3, xmm4 @@ -1198,10 +1198,10 @@ next_row_fpo: paddw xmm3, xmm5 paddw xmm4, xmm6 - paddw xmm3, [rd GLOBAL] ; xmm3 += round value + paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - paddw xmm4, [rd GLOBAL] + paddw xmm4, [GLOBAL(rd)] psraw xmm4, VP8_FILTER_SHIFT packuswb xmm3, xmm4 @@ -1249,7 +1249,7 @@ sym(vp8_bilinear_predict8x8_sse2): ;const short *HFilter = bilinear_filters_mmx[xoffset] ;const short *VFilter = bilinear_filters_mmx[yoffset] - lea rcx, [sym(vp8_bilinear_filters_mmx) GLOBAL] + lea rcx, [GLOBAL(sym(vp8_bilinear_filters_mmx))] mov rsi, arg(0) ;src_ptr movsxd rdx, dword ptr arg(1) ;src_pixels_per_line @@ -1315,7 +1315,7 @@ sym(vp8_bilinear_predict8x8_sse2): paddw xmm3, xmm4 - paddw xmm3, [rd GLOBAL] ; xmm3 += round value + paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128 movdqa xmm7, xmm3 @@ -1334,7 +1334,7 @@ next_row8x8: paddw xmm3, xmm4 pmullw xmm7, xmm5 - paddw xmm3, [rd GLOBAL] ; xmm3 += round value + paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128 movdqa xmm4, xmm3 @@ -1344,7 +1344,7 @@ next_row8x8: movdqa xmm7, xmm4 - paddw xmm3, [rd GLOBAL] ; xmm3 += round value + paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128 packuswb xmm3, xmm0 diff --git a/vp8/common/x86/subpixel_ssse3.asm b/vp8/common/x86/subpixel_ssse3.asm index fe921c019..7f6fd93e4 100644 --- a/vp8/common/x86/subpixel_ssse3.asm +++ b/vp8/common/x86/subpixel_ssse3.asm @@ -48,9 +48,9 @@ sym(vp8_filter_block1d8_h6_ssse3): xor rsi, rsi shl rdx, 4 - movdqa xmm7, [rd GLOBAL] + movdqa xmm7, [GLOBAL(rd)] - lea rax, [k0_k5 GLOBAL] + lea rax, [GLOBAL(k0_k5)] add rax, rdx mov rdi, arg(2) ;output_ptr @@ -80,9 +80,9 @@ filter_block1d8_h6_rowloop_ssse3: pmaddubsw xmm0, xmm4 movdqa xmm2, xmm1 - pshufb xmm1, [shuf2bfrom1 GLOBAL] + pshufb xmm1, [GLOBAL(shuf2bfrom1)] - pshufb xmm2, [shuf3bfrom1 GLOBAL] + pshufb xmm2, [GLOBAL(shuf3bfrom1)] pmaddubsw xmm1, xmm5 lea rdi, [rdi + rdx] @@ -115,8 +115,8 @@ vp8_filter_block1d8_h4_ssse3: movdqa xmm5, XMMWORD PTR [rax+256] ;k2_k4 movdqa xmm6, XMMWORD PTR [rax+128] ;k1_k3 - movdqa xmm3, XMMWORD PTR [shuf2bfrom1 GLOBAL] - movdqa xmm4, XMMWORD PTR [shuf3bfrom1 GLOBAL] + movdqa xmm3, XMMWORD PTR [GLOBAL(shuf2bfrom1)] + movdqa xmm4, XMMWORD PTR [GLOBAL(shuf3bfrom1)] mov rsi, arg(0) ;src_ptr @@ -189,7 +189,7 @@ sym(vp8_filter_block1d16_h6_ssse3): xor rsi, rsi shl rdx, 4 ; - lea rax, [k0_k5 GLOBAL] + lea rax, [GLOBAL(k0_k5)] add rax, rdx mov rdi, arg(2) ;output_ptr @@ -219,9 +219,9 @@ filter_block1d16_h6_rowloop_ssse3: pmaddubsw xmm0, xmm4 movdqa xmm2, xmm1 - pshufb xmm1, [shuf2bfrom1 GLOBAL] + pshufb xmm1, [GLOBAL(shuf2bfrom1)] - pshufb xmm2, [shuf3bfrom1 GLOBAL] + pshufb xmm2, [GLOBAL(shuf3bfrom1)] movq xmm3, MMWORD PTR [rsi + 6] pmaddubsw xmm1, xmm5 @@ -237,10 +237,10 @@ filter_block1d16_h6_rowloop_ssse3: paddsw xmm0, xmm2 movdqa xmm2, xmm1 - paddsw xmm0, [rd GLOBAL] + paddsw xmm0, [GLOBAL(rd)] - pshufb xmm1, [shuf2bfrom1 GLOBAL] - pshufb xmm2, [shuf3bfrom1 GLOBAL] + pshufb xmm1, [GLOBAL(shuf2bfrom1)] + pshufb xmm2, [GLOBAL(shuf3bfrom1)] psraw xmm0, 7 pmaddubsw xmm1, xmm5 @@ -253,7 +253,7 @@ filter_block1d16_h6_rowloop_ssse3: paddsw xmm3, xmm2 - paddsw xmm3, [rd GLOBAL] + paddsw xmm3, [GLOBAL(rd)] psraw xmm3, 7 @@ -288,18 +288,18 @@ filter_block1d16_h4_rowloop_ssse3: movdqu xmm1, XMMWORD PTR [rsi - 2] movdqa xmm2, xmm1 - pshufb xmm1, [shuf2b GLOBAL] - pshufb xmm2, [shuf3b GLOBAL] + pshufb xmm1, [GLOBAL(shuf2b)] + pshufb xmm2, [GLOBAL(shuf3b)] pmaddubsw xmm1, xmm5 movdqu xmm3, XMMWORD PTR [rsi + 6] pmaddubsw xmm2, xmm6 movdqa xmm0, xmm3 - pshufb xmm3, [shuf3b GLOBAL] - pshufb xmm0, [shuf2b GLOBAL] + pshufb xmm3, [GLOBAL(shuf3b)] + pshufb xmm0, [GLOBAL(shuf2b)] - paddsw xmm1, [rd GLOBAL] + paddsw xmm1, [GLOBAL(rd)] paddsw xmm1, xmm2 pmaddubsw xmm0, xmm5 @@ -309,7 +309,7 @@ filter_block1d16_h4_rowloop_ssse3: packuswb xmm1, xmm1 lea rsi, [rsi + rax] paddsw xmm3, xmm0 - paddsw xmm3, [rd GLOBAL] + paddsw xmm3, [GLOBAL(rd)] psraw xmm3, 7 packuswb xmm3, xmm3 @@ -353,9 +353,9 @@ sym(vp8_filter_block1d4_h6_ssse3): xor rsi, rsi shl rdx, 4 ; - lea rax, [k0_k5 GLOBAL] + lea rax, [GLOBAL(k0_k5)] add rax, rdx - movdqa xmm7, [rd GLOBAL] + movdqa xmm7, [GLOBAL(rd)] cmp esi, DWORD PTR [rax] je vp8_filter_block1d4_h4_ssse3 @@ -376,12 +376,12 @@ filter_block1d4_h6_rowloop_ssse3: movdqu xmm0, XMMWORD PTR [rsi - 2] movdqa xmm1, xmm0 - pshufb xmm0, [shuf1b GLOBAL] + pshufb xmm0, [GLOBAL(shuf1b)] movdqa xmm2, xmm1 - pshufb xmm1, [shuf2b GLOBAL] + pshufb xmm1, [GLOBAL(shuf2b)] pmaddubsw xmm0, xmm4 - pshufb xmm2, [shuf3b GLOBAL] + pshufb xmm2, [GLOBAL(shuf3b)] pmaddubsw xmm1, xmm5 ;-- @@ -413,8 +413,8 @@ filter_block1d4_h6_rowloop_ssse3: vp8_filter_block1d4_h4_ssse3: movdqa xmm5, XMMWORD PTR [rax+256] ;k2_k4 movdqa xmm6, XMMWORD PTR [rax+128] ;k1_k3 - movdqa xmm0, XMMWORD PTR [shuf2b GLOBAL] - movdqa xmm3, XMMWORD PTR [shuf3b GLOBAL] + movdqa xmm0, XMMWORD PTR [GLOBAL(shuf2b)] + movdqa xmm3, XMMWORD PTR [GLOBAL(shuf3b)] mov rsi, arg(0) ;src_ptr mov rdi, arg(2) ;output_ptr @@ -427,8 +427,8 @@ filter_block1d4_h4_rowloop_ssse3: movdqu xmm1, XMMWORD PTR [rsi - 2] movdqa xmm2, xmm1 - pshufb xmm1, xmm0 ;;[shuf2b GLOBAL] - pshufb xmm2, xmm3 ;;[shuf3b GLOBAL] + pshufb xmm1, xmm0 ;;[GLOBAL(shuf2b)] + pshufb xmm2, xmm3 ;;[GLOBAL(shuf3b)] pmaddubsw xmm1, xmm5 ;-- @@ -480,7 +480,7 @@ sym(vp8_filter_block1d16_v6_ssse3): xor rsi, rsi shl rdx, 4 ; - lea rax, [k0_k5 GLOBAL] + lea rax, [GLOBAL(k0_k5)] add rax, rdx cmp esi, DWORD PTR [rax] @@ -521,7 +521,7 @@ vp8_filter_block1d16_v6_ssse3_loop: paddsw xmm2, xmm3 paddsw xmm2, xmm1 - paddsw xmm2, [rd GLOBAL] + paddsw xmm2, [GLOBAL(rd)] psraw xmm2, 7 packuswb xmm2, xmm2 @@ -548,7 +548,7 @@ vp8_filter_block1d16_v6_ssse3_loop: ;-- paddsw xmm2, xmm3 paddsw xmm2, xmm1 - paddsw xmm2, [rd GLOBAL] + paddsw xmm2, [GLOBAL(rd)] psraw xmm2, 7 packuswb xmm2, xmm2 @@ -601,7 +601,7 @@ vp8_filter_block1d16_v4_ssse3_loop: movq xmm4, MMWORD PTR [rax + rdx * 2 + 8] ;D movq xmm0, MMWORD PTR [rsi + rdx * 4 + 8] ;E - paddsw xmm2, [rd GLOBAL] + paddsw xmm2, [GLOBAL(rd)] paddsw xmm2, xmm3 psraw xmm2, 7 packuswb xmm2, xmm2 @@ -612,7 +612,7 @@ vp8_filter_block1d16_v4_ssse3_loop: pmaddubsw xmm1, xmm6 pmaddubsw xmm5, xmm7 - movdqa xmm4, [rd GLOBAL] + movdqa xmm4, [GLOBAL(rd)] add rsi, rdx add rax, rdx ;-- @@ -665,7 +665,7 @@ sym(vp8_filter_block1d8_v6_ssse3): xor rsi, rsi shl rdx, 4 ; - lea rax, [k0_k5 GLOBAL] + lea rax, [GLOBAL(k0_k5)] add rax, rdx movsxd rdx, DWORD PTR arg(1) ;pixels_per_line @@ -698,7 +698,7 @@ vp8_filter_block1d8_v6_ssse3_loop: punpcklbw xmm3, xmm0 ;C E movq xmm0, MMWORD PTR [rax + rdx * 4] ;F - movdqa xmm4, [rd GLOBAL] + movdqa xmm4, [GLOBAL(rd)] pmaddubsw xmm3, xmm6 punpcklbw xmm1, xmm0 ;A F @@ -735,7 +735,7 @@ vp8_filter_block1d8_v6_ssse3_loop: vp8_filter_block1d8_v4_ssse3: movdqa xmm6, XMMWORD PTR [rax+256] ;k2_k4 movdqa xmm7, XMMWORD PTR [rax+128] ;k1_k3 - movdqa xmm5, [rd GLOBAL] + movdqa xmm5, [GLOBAL(rd)] mov rsi, arg(0) ;src_ptr @@ -802,7 +802,7 @@ sym(vp8_filter_block1d4_v6_ssse3): xor rsi, rsi shl rdx, 4 ; - lea rax, [k0_k5 GLOBAL] + lea rax, [GLOBAL(k0_k5)] add rax, rdx movsxd rdx, DWORD PTR arg(1) ;pixels_per_line @@ -836,7 +836,7 @@ vp8_filter_block1d4_v6_ssse3_loop: movd mm0, DWORD PTR [rax + rdx * 4] ;F - movq mm4, [rd GLOBAL] + movq mm4, [GLOBAL(rd)] pmaddubsw mm3, mm6 punpcklbw mm1, mm0 ;A F @@ -873,7 +873,7 @@ vp8_filter_block1d4_v6_ssse3_loop: vp8_filter_block1d4_v4_ssse3: movq mm6, MMWORD PTR [rax+256] ;k2_k4 movq mm7, MMWORD PTR [rax+128] ;k1_k3 - movq mm5, MMWORD PTR [rd GLOBAL] + movq mm5, MMWORD PTR [GLOBAL(rd)] mov rsi, arg(0) ;src_ptr @@ -938,7 +938,7 @@ sym(vp8_bilinear_predict16x16_ssse3): push rdi ; end prolog - lea rcx, [vp8_bilinear_filters_ssse3 GLOBAL] + lea rcx, [GLOBAL(vp8_bilinear_filters_ssse3)] movsxd rax, dword ptr arg(2) ; xoffset cmp rax, 0 ; skip first_pass filter if xoffset=0 @@ -985,10 +985,10 @@ sym(vp8_bilinear_predict16x16_ssse3): punpcklbw xmm4, xmm5 ; 08 09 09 10 10 11 11 12 12 13 13 14 14 15 15 16 pmaddubsw xmm4, xmm1 ; 01 03 05 07 09 11 13 15 - paddw xmm3, [rd GLOBAL] ; xmm3 += round value + paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - paddw xmm4, [rd GLOBAL] ; xmm4 += round value + paddw xmm4, [GLOBAL(rd)] ; xmm4 += round value psraw xmm4, VP8_FILTER_SHIFT ; xmm4 /= 128 movdqa xmm7, xmm3 @@ -1009,10 +1009,10 @@ sym(vp8_bilinear_predict16x16_ssse3): punpcklbw xmm4, xmm5 pmaddubsw xmm4, xmm1 - paddw xmm6, [rd GLOBAL] ; xmm6 += round value + paddw xmm6, [GLOBAL(rd)] ; xmm6 += round value psraw xmm6, VP8_FILTER_SHIFT ; xmm6 /= 128 - paddw xmm4, [rd GLOBAL] ; xmm4 += round value + paddw xmm4, [GLOBAL(rd)] ; xmm4 += round value psraw xmm4, VP8_FILTER_SHIFT ; xmm4 /= 128 packuswb xmm6, xmm4 @@ -1024,10 +1024,10 @@ sym(vp8_bilinear_predict16x16_ssse3): punpckhbw xmm7, xmm6 pmaddubsw xmm7, xmm2 - paddw xmm5, [rd GLOBAL] ; xmm5 += round value + paddw xmm5, [GLOBAL(rd)] ; xmm5 += round value psraw xmm5, VP8_FILTER_SHIFT ; xmm5 /= 128 - paddw xmm7, [rd GLOBAL] ; xmm7 += round value + paddw xmm7, [GLOBAL(rd)] ; xmm7 += round value psraw xmm7, VP8_FILTER_SHIFT ; xmm7 /= 128 packuswb xmm5, xmm7 @@ -1082,19 +1082,19 @@ b16x16_sp_only: punpcklbw xmm5, xmm6 pmaddubsw xmm3, xmm1 - paddw xmm4, [rd GLOBAL] + paddw xmm4, [GLOBAL(rd)] pmaddubsw xmm5, xmm1 - paddw xmm2, [rd GLOBAL] + paddw xmm2, [GLOBAL(rd)] psraw xmm4, VP8_FILTER_SHIFT psraw xmm2, VP8_FILTER_SHIFT packuswb xmm4, xmm2 - paddw xmm3, [rd GLOBAL] + paddw xmm3, [GLOBAL(rd)] movdqa [rdi], xmm4 ; store row 0 - paddw xmm5, [rd GLOBAL] + paddw xmm5, [GLOBAL(rd)] psraw xmm3, VP8_FILTER_SHIFT psraw xmm5, VP8_FILTER_SHIFT @@ -1134,7 +1134,7 @@ b16x16_fp_only: pmaddubsw xmm3, xmm1 movq xmm5, [rsi] - paddw xmm2, [rd GLOBAL] + paddw xmm2, [GLOBAL(rd)] movq xmm7, [rsi+1] movq xmm6, [rsi+8] @@ -1143,7 +1143,7 @@ b16x16_fp_only: punpcklbw xmm5, xmm7 movq xmm7, [rsi+9] - paddw xmm3, [rd GLOBAL] + paddw xmm3, [GLOBAL(rd)] pmaddubsw xmm5, xmm1 psraw xmm3, VP8_FILTER_SHIFT @@ -1153,12 +1153,12 @@ b16x16_fp_only: pmaddubsw xmm6, xmm1 movdqa [rdi], xmm2 ; store the results in the destination - paddw xmm5, [rd GLOBAL] + paddw xmm5, [GLOBAL(rd)] lea rdi, [rdi + rdx] ; dst_pitch psraw xmm5, VP8_FILTER_SHIFT - paddw xmm6, [rd GLOBAL] + paddw xmm6, [GLOBAL(rd)] psraw xmm6, VP8_FILTER_SHIFT packuswb xmm5, xmm6 @@ -1204,7 +1204,7 @@ sym(vp8_bilinear_predict8x8_ssse3): ALIGN_STACK 16, rax sub rsp, 144 ; reserve 144 bytes - lea rcx, [vp8_bilinear_filters_ssse3 GLOBAL] + lea rcx, [GLOBAL(vp8_bilinear_filters_ssse3)] mov rsi, arg(0) ;src_ptr movsxd rdx, dword ptr arg(1) ;src_pixels_per_line @@ -1269,7 +1269,7 @@ sym(vp8_bilinear_predict8x8_ssse3): punpcklbw xmm3, xmm5 ; 00 01 01 02 02 03 03 04 04 05 05 06 06 07 07 08 pmaddubsw xmm3, xmm0 ; 00 02 04 06 08 10 12 14 - paddw xmm3, [rd GLOBAL] ; xmm3 += round value + paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128 movdqa xmm7, xmm3 @@ -1286,7 +1286,7 @@ sym(vp8_bilinear_predict8x8_ssse3): punpcklbw xmm6, xmm5 pmaddubsw xmm6, xmm0 - paddw xmm6, [rd GLOBAL] ; xmm6 += round value + paddw xmm6, [GLOBAL(rd)] ; xmm6 += round value psraw xmm6, VP8_FILTER_SHIFT ; xmm6 /= 128 packuswb xmm6, xmm6 @@ -1294,7 +1294,7 @@ sym(vp8_bilinear_predict8x8_ssse3): punpcklbw xmm7, xmm6 pmaddubsw xmm7, xmm1 - paddw xmm7, [rd GLOBAL] ; xmm7 += round value + paddw xmm7, [GLOBAL(rd)] ; xmm7 += round value psraw xmm7, VP8_FILTER_SHIFT ; xmm7 /= 128 packuswb xmm7, xmm7 @@ -1347,21 +1347,21 @@ b8x8_sp_only: punpcklbw xmm6, xmm7 pmaddubsw xmm6, xmm0 - paddw xmm1, [rd GLOBAL] + paddw xmm1, [GLOBAL(rd)] - paddw xmm2, [rd GLOBAL] + paddw xmm2, [GLOBAL(rd)] psraw xmm1, VP8_FILTER_SHIFT - paddw xmm3, [rd GLOBAL] + paddw xmm3, [GLOBAL(rd)] psraw xmm2, VP8_FILTER_SHIFT - paddw xmm4, [rd GLOBAL] + paddw xmm4, [GLOBAL(rd)] psraw xmm3, VP8_FILTER_SHIFT - paddw xmm5, [rd GLOBAL] + paddw xmm5, [GLOBAL(rd)] psraw xmm4, VP8_FILTER_SHIFT - paddw xmm6, [rd GLOBAL] + paddw xmm6, [GLOBAL(rd)] psraw xmm5, VP8_FILTER_SHIFT psraw xmm6, VP8_FILTER_SHIFT @@ -1395,10 +1395,10 @@ b8x8_sp_only: punpcklbw xmm1, xmm2 pmaddubsw xmm1, xmm0 - paddw xmm7, [rd GLOBAL] + paddw xmm7, [GLOBAL(rd)] psraw xmm7, VP8_FILTER_SHIFT - paddw xmm1, [rd GLOBAL] + paddw xmm1, [GLOBAL(rd)] psraw xmm1, VP8_FILTER_SHIFT packuswb xmm7, xmm7 @@ -1447,16 +1447,16 @@ b8x8_fp_only: punpcklbw xmm7, xmm2 pmaddubsw xmm7, xmm0 - paddw xmm1, [rd GLOBAL] + paddw xmm1, [GLOBAL(rd)] psraw xmm1, VP8_FILTER_SHIFT - paddw xmm3, [rd GLOBAL] + paddw xmm3, [GLOBAL(rd)] psraw xmm3, VP8_FILTER_SHIFT - paddw xmm5, [rd GLOBAL] + paddw xmm5, [GLOBAL(rd)] psraw xmm5, VP8_FILTER_SHIFT - paddw xmm7, [rd GLOBAL] + paddw xmm7, [GLOBAL(rd)] psraw xmm7, VP8_FILTER_SHIFT packuswb xmm1, xmm1 diff --git a/vp8/decoder/x86/dequantize_mmx.asm b/vp8/decoder/x86/dequantize_mmx.asm index eb9d1f8aa..0d6133a46 100644 --- a/vp8/decoder/x86/dequantize_mmx.asm +++ b/vp8/decoder/x86/dequantize_mmx.asm @@ -98,11 +98,11 @@ sym(vp8_dequant_idct_add_mmx): movq mm5, mm1 paddw mm2, mm0 ; a1 =0+2 - pmulhw mm5, [x_s1sqr2 GLOBAL]; + pmulhw mm5, [GLOBAL(x_s1sqr2)]; paddw mm5, mm1 ; ip1 * sin(pi/8) * sqrt(2) movq mm7, mm3 ; - pmulhw mm7, [x_c1sqr2less1 GLOBAL]; + pmulhw mm7, [GLOBAL(x_c1sqr2less1)]; paddw mm7, mm3 ; ip3 * cos(pi/8) * sqrt(2) psubw mm7, mm5 ; c1 @@ -110,10 +110,10 @@ sym(vp8_dequant_idct_add_mmx): movq mm5, mm1 movq mm4, mm3 - pmulhw mm5, [x_c1sqr2less1 GLOBAL] + pmulhw mm5, [GLOBAL(x_c1sqr2less1)] paddw mm5, mm1 - pmulhw mm3, [x_s1sqr2 GLOBAL] + pmulhw mm3, [GLOBAL(x_s1sqr2)] paddw mm3, mm4 paddw mm3, mm5 ; d1 @@ -153,11 +153,11 @@ sym(vp8_dequant_idct_add_mmx): movq mm5, mm1 paddw mm2, mm0 ; a1 =0+2 - pmulhw mm5, [x_s1sqr2 GLOBAL]; + pmulhw mm5, [GLOBAL(x_s1sqr2)]; paddw mm5, mm1 ; ip1 * sin(pi/8) * sqrt(2) movq mm7, mm3 ; - pmulhw mm7, [x_c1sqr2less1 GLOBAL]; + pmulhw mm7, [GLOBAL(x_c1sqr2less1)]; paddw mm7, mm3 ; ip3 * cos(pi/8) * sqrt(2) psubw mm7, mm5 ; c1 @@ -165,16 +165,16 @@ sym(vp8_dequant_idct_add_mmx): movq mm5, mm1 movq mm4, mm3 - pmulhw mm5, [x_c1sqr2less1 GLOBAL] + pmulhw mm5, [GLOBAL(x_c1sqr2less1)] paddw mm5, mm1 - pmulhw mm3, [x_s1sqr2 GLOBAL] + pmulhw mm3, [GLOBAL(x_s1sqr2)] paddw mm3, mm4 paddw mm3, mm5 ; d1 - paddw mm0, [fours GLOBAL] + paddw mm0, [GLOBAL(fours)] - paddw mm2, [fours GLOBAL] + paddw mm2, [GLOBAL(fours)] movq mm6, mm2 ; a1 movq mm4, mm0 ; b1 @@ -300,11 +300,11 @@ sym(vp8_dequant_dc_idct_add_mmx): movq mm5, mm1 paddw mm2, mm0 ; a1 =0+2 - pmulhw mm5, [x_s1sqr2 GLOBAL]; + pmulhw mm5, [GLOBAL(x_s1sqr2)]; paddw mm5, mm1 ; ip1 * sin(pi/8) * sqrt(2) movq mm7, mm3 ; - pmulhw mm7, [x_c1sqr2less1 GLOBAL]; + pmulhw mm7, [GLOBAL(x_c1sqr2less1)]; paddw mm7, mm3 ; ip3 * cos(pi/8) * sqrt(2) psubw mm7, mm5 ; c1 @@ -312,10 +312,10 @@ sym(vp8_dequant_dc_idct_add_mmx): movq mm5, mm1 movq mm4, mm3 - pmulhw mm5, [x_c1sqr2less1 GLOBAL] + pmulhw mm5, [GLOBAL(x_c1sqr2less1)] paddw mm5, mm1 - pmulhw mm3, [x_s1sqr2 GLOBAL] + pmulhw mm3, [GLOBAL(x_s1sqr2)] paddw mm3, mm4 paddw mm3, mm5 ; d1 @@ -355,11 +355,11 @@ sym(vp8_dequant_dc_idct_add_mmx): movq mm5, mm1 paddw mm2, mm0 ; a1 =0+2 - pmulhw mm5, [x_s1sqr2 GLOBAL]; + pmulhw mm5, [GLOBAL(x_s1sqr2)]; paddw mm5, mm1 ; ip1 * sin(pi/8) * sqrt(2) movq mm7, mm3 ; - pmulhw mm7, [x_c1sqr2less1 GLOBAL]; + pmulhw mm7, [GLOBAL(x_c1sqr2less1)]; paddw mm7, mm3 ; ip3 * cos(pi/8) * sqrt(2) psubw mm7, mm5 ; c1 @@ -367,16 +367,16 @@ sym(vp8_dequant_dc_idct_add_mmx): movq mm5, mm1 movq mm4, mm3 - pmulhw mm5, [x_c1sqr2less1 GLOBAL] + pmulhw mm5, [GLOBAL(x_c1sqr2less1)] paddw mm5, mm1 - pmulhw mm3, [x_s1sqr2 GLOBAL] + pmulhw mm3, [GLOBAL(x_s1sqr2)] paddw mm3, mm4 paddw mm3, mm5 ; d1 - paddw mm0, [fours GLOBAL] + paddw mm0, [GLOBAL(fours)] - paddw mm2, [fours GLOBAL] + paddw mm2, [GLOBAL(fours)] movq mm6, mm2 ; a1 movq mm4, mm0 ; b1 diff --git a/vp8/encoder/x86/dct_mmx.asm b/vp8/encoder/x86/dct_mmx.asm index b6cfc5ce0..5acaca875 100644 --- a/vp8/encoder/x86/dct_mmx.asm +++ b/vp8/encoder/x86/dct_mmx.asm @@ -35,7 +35,7 @@ sym(vp8_short_fdct4x4_mmx): mov rsi, arg(0) ;input mov rdi, arg(1) ;output - lea rdx, [dct_const_mmx GLOBAL] + lea rdx, [GLOBAL(dct_const_mmx)] movsxd rax, dword ptr arg(2) ;pitch lea rcx, [rsi + rax*2] @@ -243,7 +243,7 @@ sym(vp8_short_fdct8x4_wmt): mov rsi, arg(0) ;input mov rdi, arg(1) ;output - lea rdx, [dct_const_xmm GLOBAL] + lea rdx, [GLOBAL(dct_const_xmm)] movsxd rax, dword ptr arg(2) ;pitch lea rcx, [rsi + rax*2] diff --git a/vp8/encoder/x86/dct_sse2.asm b/vp8/encoder/x86/dct_sse2.asm index f7a18432d..723a78d76 100644 --- a/vp8/encoder/x86/dct_sse2.asm +++ b/vp8/encoder/x86/dct_sse2.asm @@ -52,14 +52,14 @@ sym(vp8_short_fdct4x4_sse2): psllw xmm0, 3 ;b1 <<= 3 a1 <<= 3 psllw xmm3, 3 ;c1 <<= 3 d1 <<= 3 movdqa xmm1, xmm0 - pmaddwd xmm0, XMMWORD PTR[_mult_add GLOBAL] ;a1 + b1 - pmaddwd xmm1, XMMWORD PTR[_mult_sub GLOBAL] ;a1 - b1 + pmaddwd xmm0, XMMWORD PTR[GLOBAL(_mult_add)] ;a1 + b1 + pmaddwd xmm1, XMMWORD PTR[GLOBAL(_mult_sub)] ;a1 - b1 movdqa xmm4, xmm3 - pmaddwd xmm3, XMMWORD PTR[_5352_2217 GLOBAL] ;c1*2217 + d1*5352 - pmaddwd xmm4, XMMWORD PTR[_2217_neg5352 GLOBAL] ;d1*2217 - c1*5352 + pmaddwd xmm3, XMMWORD PTR[GLOBAL(_5352_2217)] ;c1*2217 + d1*5352 + pmaddwd xmm4, XMMWORD PTR[GLOBAL(_2217_neg5352)];d1*2217 - c1*5352 - paddd xmm3, XMMWORD PTR[_14500 GLOBAL] - paddd xmm4, XMMWORD PTR[_7500 GLOBAL] + paddd xmm3, XMMWORD PTR[GLOBAL(_14500)] + paddd xmm4, XMMWORD PTR[GLOBAL(_7500)] psrad xmm3, 12 ;(c1 * 2217 + d1 * 5352 + 14500)>>12 psrad xmm4, 12 ;(d1 * 2217 - c1 * 5352 + 7500)>>12 @@ -80,7 +80,7 @@ sym(vp8_short_fdct4x4_sse2): punpcklwd xmm0, xmm3 ;13 12 11 10 03 02 01 00 punpckhwd xmm2, xmm3 ;33 32 31 30 23 22 21 20 - movdqa xmm5, XMMWORD PTR[_7 GLOBAL] + movdqa xmm5, XMMWORD PTR[GLOBAL(_7)] pshufd xmm2, xmm2, 04eh movdqa xmm3, xmm0 paddw xmm0, xmm2 ;b1 b1 b1 b1 a1 a1 a1 a1 @@ -94,8 +94,8 @@ sym(vp8_short_fdct4x4_sse2): pshufhw xmm0, xmm0, 0d8h ;b1 a1 b1 a1 b1 a1 b1 a1 pshufhw xmm3, xmm3, 0d8h ;c1 d1 c1 d1 c1 d1 c1 d1 movdqa xmm1, xmm0 - pmaddwd xmm0, XMMWORD PTR[_mult_add GLOBAL] ;a1 + b1 - pmaddwd xmm1, XMMWORD PTR[_mult_sub GLOBAL] ;a1 - b1 + pmaddwd xmm0, XMMWORD PTR[GLOBAL(_mult_add)] ;a1 + b1 + pmaddwd xmm1, XMMWORD PTR[GLOBAL(_mult_sub)] ;a1 - b1 pxor xmm4, xmm4 ;zero out for compare paddd xmm0, xmm5 @@ -103,14 +103,14 @@ sym(vp8_short_fdct4x4_sse2): pcmpeqw xmm2, xmm4 psrad xmm0, 4 ;(a1 + b1 + 7)>>4 psrad xmm1, 4 ;(a1 - b1 + 7)>>4 - pandn xmm2, XMMWORD PTR[_cmp_mask GLOBAL] ;clear upper, - ;and keep bit 0 of lower + pandn xmm2, XMMWORD PTR[GLOBAL(_cmp_mask)] ;clear upper, + ;and keep bit 0 of lower movdqa xmm4, xmm3 - pmaddwd xmm3, XMMWORD PTR[_5352_2217 GLOBAL] ;c1*2217 + d1*5352 - pmaddwd xmm4, XMMWORD PTR[_2217_neg5352 GLOBAL] ;d1*2217 - c1*5352 - paddd xmm3, XMMWORD PTR[_12000 GLOBAL] - paddd xmm4, XMMWORD PTR[_51000 GLOBAL] + pmaddwd xmm3, XMMWORD PTR[GLOBAL(_5352_2217)] ;c1*2217 + d1*5352 + pmaddwd xmm4, XMMWORD PTR[GLOBAL(_2217_neg5352)] ;d1*2217 - c1*5352 + paddd xmm3, XMMWORD PTR[GLOBAL(_12000)] + paddd xmm4, XMMWORD PTR[GLOBAL(_51000)] packssdw xmm0, xmm1 ;op[8] op[0] psrad xmm3, 16 ;(c1 * 2217 + d1 * 5352 + 12000)>>16 psrad xmm4, 16 ;(d1 * 2217 - c1 * 5352 + 51000)>>16 diff --git a/vp8/encoder/x86/variance_impl_mmx.asm b/vp8/encoder/x86/variance_impl_mmx.asm index d4ec63bd6..67a9b4d3e 100644 --- a/vp8/encoder/x86/variance_impl_mmx.asm +++ b/vp8/encoder/x86/variance_impl_mmx.asm @@ -556,7 +556,7 @@ sym(vp8_filter_block2d_bil4x4_var_mmx): pmullw mm3, [rax+8] ; paddw mm1, mm3 ; - paddw mm1, [mmx_bi_rd GLOBAL] ; + paddw mm1, [GLOBAL(mmx_bi_rd)] ; psraw mm1, mmx_filter_shift ; movq mm5, mm1 @@ -580,7 +580,7 @@ filter_block2d_bil4x4_var_mmx_loop: pmullw mm3, [rax+8] ; paddw mm1, mm3 ; - paddw mm1, [mmx_bi_rd GLOBAL] ; + paddw mm1, [GLOBAL(mmx_bi_rd)] ; psraw mm1, mmx_filter_shift ; movq mm3, mm5 ; @@ -592,7 +592,7 @@ filter_block2d_bil4x4_var_mmx_loop: paddw mm1, mm3 ; - paddw mm1, [mmx_bi_rd GLOBAL] ; + paddw mm1, [GLOBAL(mmx_bi_rd)] ; psraw mm1, mmx_filter_shift ; movd mm3, [rdi] ; @@ -710,10 +710,10 @@ sym(vp8_filter_block2d_bil_var_mmx): paddw mm1, mm3 ; paddw mm2, mm4 ; - paddw mm1, [mmx_bi_rd GLOBAL] ; + paddw mm1, [GLOBAL(mmx_bi_rd)] ; psraw mm1, mmx_filter_shift ; - paddw mm2, [mmx_bi_rd GLOBAL] ; + paddw mm2, [GLOBAL(mmx_bi_rd)] ; psraw mm2, mmx_filter_shift ; movq mm5, mm1 @@ -749,10 +749,10 @@ filter_block2d_bil_var_mmx_loop: paddw mm1, mm3 ; paddw mm2, mm4 ; - paddw mm1, [mmx_bi_rd GLOBAL] ; + paddw mm1, [GLOBAL(mmx_bi_rd)] ; psraw mm1, mmx_filter_shift ; - paddw mm2, [mmx_bi_rd GLOBAL] ; + paddw mm2, [GLOBAL(mmx_bi_rd)] ; psraw mm2, mmx_filter_shift ; movq mm3, mm5 ; @@ -773,8 +773,8 @@ filter_block2d_bil_var_mmx_loop: paddw mm1, mm3 ; paddw mm2, mm4 ; - paddw mm1, [mmx_bi_rd GLOBAL] ; - paddw mm2, [mmx_bi_rd GLOBAL] ; + paddw mm1, [GLOBAL(mmx_bi_rd)] ; + paddw mm2, [GLOBAL(mmx_bi_rd)] ; psraw mm1, mmx_filter_shift ; psraw mm2, mmx_filter_shift ; diff --git a/vp8/encoder/x86/variance_impl_sse2.asm b/vp8/encoder/x86/variance_impl_sse2.asm index 38b3f33ee..cefa0a956 100644 --- a/vp8/encoder/x86/variance_impl_sse2.asm +++ b/vp8/encoder/x86/variance_impl_sse2.asm @@ -532,7 +532,7 @@ sym(vp8_filter_block2d_bil_var_sse2): pmullw xmm3, [rax+16] ; paddw xmm1, xmm3 ; - paddw xmm1, [xmm_bi_rd GLOBAL] ; + paddw xmm1, [GLOBAL(xmm_bi_rd)] ; psraw xmm1, xmm_filter_shift ; movdqa xmm5, xmm1 @@ -554,7 +554,7 @@ filter_block2d_bil_var_sse2_loop: pmullw xmm3, [rax+16] ; paddw xmm1, xmm3 ; - paddw xmm1, [xmm_bi_rd GLOBAL] ; + paddw xmm1, [GLOBAL(xmm_bi_rd)] ; psraw xmm1, xmm_filter_shift ; movdqa xmm3, xmm5 ; @@ -565,7 +565,7 @@ filter_block2d_bil_var_sse2_loop: pmullw xmm1, [rdx+16] ; paddw xmm1, xmm3 ; - paddw xmm1, [xmm_bi_rd GLOBAL] ; + paddw xmm1, [GLOBAL(xmm_bi_rd)] ; psraw xmm1, xmm_filter_shift ; movq xmm3, QWORD PTR [rdi] ; diff --git a/vpx_ports/x86_abi_support.asm b/vpx_ports/x86_abi_support.asm index 470c58a6d..a872b280e 100644 --- a/vpx_ports/x86_abi_support.asm +++ b/vpx_ports/x86_abi_support.asm @@ -160,7 +160,7 @@ ret %%exitGG: %undef GLOBAL - %define GLOBAL + %1 wrt ..gotoff + %define GLOBAL(x) x + %1 wrt ..gotoff %undef RESTORE_GOT %define RESTORE_GOT pop %1 %endmacro @@ -176,7 +176,7 @@ ret %%exitGG: %undef GLOBAL - %define GLOBAL + %1 - fake_got + %define GLOBAL(x) x + %1 - fake_got %undef RESTORE_GOT %define RESTORE_GOT pop %1 %endmacro @@ -186,7 +186,7 @@ %else %macro GET_GOT 1 %endmacro - %define GLOBAL wrt rip + %define GLOBAL(x) rel x %ifidn __OUTPUT_FORMAT__,elf64 %define WRT_PLT wrt ..plt %define HIDDEN_DATA(x) x:data hidden @@ -197,7 +197,7 @@ %ifnmacro GET_GOT %macro GET_GOT 1 %endmacro - %define GLOBAL + %define GLOBAL(x) x %endif %ifndef RESTORE_GOT %define RESTORE_GOT