From: Yunqing Wang Date: Wed, 18 Sep 2013 17:36:21 +0000 (-0700) Subject: Fix x86inc.asm to build PIC code correctly X-Git-Tag: v1.3.0~433^2 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=9d901217c689c1c5b38eb7cfe7db7f161827eb7a;p=libvpx Fix x86inc.asm to build PIC code correctly Current x86inc.asm didn't handle 32bit PIC build properly. TEXTRELs were seen in the library built. The PIC macros from libvpx's x86_abi_support.asm was used to fix this problem. The assembly code was modified to use the macros. Notes: We need this fix in for decoder building. Functions in encoder will be fixed later. Change-Id: Ifa548d37b1d0bc7d0528db75009cc18cd5eb1838 --- diff --git a/third_party/x86inc/x86inc.asm b/third_party/x86inc/x86inc.asm index a66a96bba..b9f483023 100644 --- a/third_party/x86inc/x86inc.asm +++ b/third_party/x86inc/x86inc.asm @@ -97,21 +97,91 @@ %endif %endmacro -%if WIN64 +; PIC macros are copied from vpx_ports/x86_abi_support.asm. The "define PIC" +; from original code is added in for 64bit. +%ifidn __OUTPUT_FORMAT__,elf32 +%define ABI_IS_32BIT 1 +%elifidn __OUTPUT_FORMAT__,macho32 +%define ABI_IS_32BIT 1 +%elifidn __OUTPUT_FORMAT__,win32 +%define ABI_IS_32BIT 1 +%elifidn __OUTPUT_FORMAT__,aout +%define ABI_IS_32BIT 1 +%else +%define ABI_IS_32BIT 0 +%endif + +%if ABI_IS_32BIT + %if CONFIG_PIC=1 + %ifidn __OUTPUT_FORMAT__,elf32 + %define GET_GOT_SAVE_ARG 1 + %define WRT_PLT wrt ..plt + %macro GET_GOT 1 + extern _GLOBAL_OFFSET_TABLE_ + push %1 + call %%get_got + %%sub_offset: + jmp %%exitGG + %%get_got: + mov %1, [esp] + add %1, _GLOBAL_OFFSET_TABLE_ + $$ - %%sub_offset wrt ..gotpc + ret + %%exitGG: + %undef GLOBAL + %define GLOBAL(x) x + %1 wrt ..gotoff + %undef RESTORE_GOT + %define RESTORE_GOT pop %1 + %endmacro + %elifidn __OUTPUT_FORMAT__,macho32 + %define GET_GOT_SAVE_ARG 1 + %macro GET_GOT 1 + push %1 + call %%get_got + %%get_got: + pop %1 + %undef GLOBAL + %define GLOBAL(x) x + %1 - %%get_got + %undef RESTORE_GOT + %define RESTORE_GOT pop %1 + %endmacro + %endif + %endif + + %if ARCH_X86_64 == 0 + %undef PIC + %endif + +%else + %macro GET_GOT 1 + %endmacro + %define GLOBAL(x) rel x + %define WRT_PLT wrt ..plt + + %if WIN64 %define PIC -%elifidn __OUTPUT_FORMAT__,macho64 + %elifidn __OUTPUT_FORMAT__,macho64 %define PIC -%elif ARCH_X86_64 == 0 -; x86_32 doesn't require PIC. -; Some distros prefer shared objects to be PIC, but nothing breaks if -; the code contains a few textrels, so we'll skip that complexity. - %undef PIC -%elif CONFIG_PIC + %elif CONFIG_PIC %define PIC + %endif +%endif + +%ifnmacro GET_GOT + %macro GET_GOT 1 + %endmacro + %define GLOBAL(x) x +%endif +%ifndef RESTORE_GOT +%define RESTORE_GOT %endif +%ifndef WRT_PLT +%define WRT_PLT +%endif + %ifdef PIC default rel %endif +; Done with PIC macros ; Always use long nops (reduces 0x90 spam in disassembly on x86_32) %ifndef __NASM_VER__ diff --git a/vp9/common/x86/vp9_intrapred_sse2.asm b/vp9/common/x86/vp9_intrapred_sse2.asm index 980b8b984..69b07f645 100644 --- a/vp9/common/x86/vp9_intrapred_sse2.asm +++ b/vp9/common/x86/vp9_intrapred_sse2.asm @@ -19,12 +19,14 @@ pw_32: times 8 dw 32 SECTION .text INIT_MMX sse -cglobal dc_predictor_4x4, 4, 4, 2, dst, stride, above, left +cglobal dc_predictor_4x4, 4, 5, 2, dst, stride, above, left, goffset + GET_GOT goffsetq + pxor m1, m1 movd m0, [aboveq] punpckldq m0, [leftq] psadbw m0, m1 - paddw m0, [pw_4] + paddw m0, [GLOBAL(pw_4)] psraw m0, 3 pshufw m0, m0, 0x0 packuswb m0, m0 @@ -33,10 +35,14 @@ cglobal dc_predictor_4x4, 4, 4, 2, dst, stride, above, left lea dstq, [dstq+strideq*2] movd [dstq ], m0 movd [dstq+strideq], m0 + + RESTORE_GOT RET INIT_MMX sse -cglobal dc_predictor_8x8, 4, 4, 3, dst, stride, above, left +cglobal dc_predictor_8x8, 4, 5, 3, dst, stride, above, left, goffset + GET_GOT goffsetq + pxor m1, m1 movq m0, [aboveq] movq m2, [leftq] @@ -45,7 +51,7 @@ cglobal dc_predictor_8x8, 4, 4, 3, dst, stride, above, left psadbw m0, m1 psadbw m2, m1 paddw m0, m2 - paddw m0, [pw_8] + paddw m0, [GLOBAL(pw_8)] psraw m0, 4 pshufw m0, m0, 0x0 packuswb m0, m0 @@ -58,10 +64,14 @@ cglobal dc_predictor_8x8, 4, 4, 3, dst, stride, above, left movq [dstq+strideq ], m0 movq [dstq+strideq*2], m0 movq [dstq+stride3q ], m0 + + RESTORE_GOT RET INIT_XMM sse2 -cglobal dc_predictor_16x16, 4, 4, 3, dst, stride, above, left +cglobal dc_predictor_16x16, 4, 5, 3, dst, stride, above, left, goffset + GET_GOT goffsetq + pxor m1, m1 mova m0, [aboveq] mova m2, [leftq] @@ -73,7 +83,7 @@ cglobal dc_predictor_16x16, 4, 4, 3, dst, stride, above, left paddw m0, m2 movhlps m2, m0 paddw m0, m2 - paddw m0, [pw_16] + paddw m0, [GLOBAL(pw_16)] psraw m0, 5 pshuflw m0, m0, 0x0 punpcklqdq m0, m0 @@ -86,10 +96,14 @@ cglobal dc_predictor_16x16, 4, 4, 3, dst, stride, above, left lea dstq, [dstq+strideq*4] dec lines4d jnz .loop + + RESTORE_GOT REP_RET INIT_XMM sse2 -cglobal dc_predictor_32x32, 4, 4, 5, dst, stride, above, left +cglobal dc_predictor_32x32, 4, 5, 5, dst, stride, above, left, goffset + GET_GOT goffsetq + pxor m1, m1 mova m0, [aboveq] mova m2, [aboveq+16] @@ -107,7 +121,7 @@ cglobal dc_predictor_32x32, 4, 4, 5, dst, stride, above, left paddw m0, m4 movhlps m2, m0 paddw m0, m2 - paddw m0, [pw_32] + paddw m0, [GLOBAL(pw_32)] psraw m0, 6 pshuflw m0, m0, 0x0 punpcklqdq m0, m0 @@ -124,6 +138,8 @@ cglobal dc_predictor_32x32, 4, 4, 5, dst, stride, above, left lea dstq, [dstq+strideq*4] dec lines4d jnz .loop + + RESTORE_GOT REP_RET INIT_MMX sse diff --git a/vp9/common/x86/vp9_intrapred_ssse3.asm b/vp9/common/x86/vp9_intrapred_ssse3.asm index 8ba26f310..67c8ab03a 100644 --- a/vp9/common/x86/vp9_intrapred_ssse3.asm +++ b/vp9/common/x86/vp9_intrapred_ssse3.asm @@ -112,14 +112,16 @@ cglobal h_predictor_32x32, 2, 4, 3, dst, stride, line, left REP_RET INIT_MMX ssse3 -cglobal d45_predictor_4x4, 3, 3, 4, dst, stride, above +cglobal d45_predictor_4x4, 3, 4, 4, dst, stride, above, goffset + GET_GOT goffsetq + movq m0, [aboveq] - pshufb m2, m0, [sh_b23456777] - pshufb m1, m0, [sh_b01234577] - pshufb m0, [sh_b12345677] + pshufb m2, m0, [GLOBAL(sh_b23456777)] + pshufb m1, m0, [GLOBAL(sh_b01234577)] + pshufb m0, [GLOBAL(sh_b12345677)] pavgb m3, m2, m1 pxor m2, m1 - pand m2, [pb_1] + pand m2, [GLOBAL(pb_1)] psubb m3, m2 pavgb m0, m3 @@ -132,19 +134,23 @@ cglobal d45_predictor_4x4, 3, 3, 4, dst, stride, above movd [dstq ], m0 psrlq m0, 8 movd [dstq+strideq], m0 + + RESTORE_GOT RET INIT_MMX ssse3 -cglobal d45_predictor_8x8, 3, 3, 4, dst, stride, above +cglobal d45_predictor_8x8, 3, 4, 4, dst, stride, above, goffset + GET_GOT goffsetq + movq m0, [aboveq] - mova m1, [sh_b12345677] - DEFINE_ARGS dst, stride, stride3, line + mova m1, [GLOBAL(sh_b12345677)] + DEFINE_ARGS dst, stride, stride3 lea stride3q, [strideq*3] - pshufb m2, m0, [sh_b23456777] + pshufb m2, m0, [GLOBAL(sh_b23456777)] pavgb m3, m2, m0 pxor m2, m0 pshufb m0, m1 - pand m2, [pb_1] + pand m2, [GLOBAL(pb_1)] psubb m3, m2 pavgb m0, m3 @@ -167,20 +173,24 @@ cglobal d45_predictor_8x8, 3, 3, 4, dst, stride, above movq [dstq+strideq*2], m0 pshufb m0, m1 movq [dstq+stride3q ], m0 + + RESTORE_GOT RET INIT_XMM ssse3 -cglobal d45_predictor_16x16, 3, 5, 4, dst, stride, above, dst8, line +cglobal d45_predictor_16x16, 3, 6, 4, dst, stride, above, dst8, line, goffset + GET_GOT goffsetq + mova m0, [aboveq] DEFINE_ARGS dst, stride, stride3, dst8, line lea stride3q, [strideq*3] lea dst8q, [dstq+strideq*8] - mova m1, [sh_b123456789abcdeff] - pshufb m2, m0, [sh_b23456789abcdefff] + mova m1, [GLOBAL(sh_b123456789abcdeff)] + pshufb m2, m0, [GLOBAL(sh_b23456789abcdefff)] pavgb m3, m2, m0 pxor m2, m0 pshufb m0, m1 - pand m2, [pb_1] + pand m2, [GLOBAL(pb_1)] psubb m3, m2 pavgb m0, m3 @@ -214,29 +224,33 @@ cglobal d45_predictor_16x16, 3, 5, 4, dst, stride, above, dst8, line movhps [dstq+strideq +8], m0 movhps [dstq+strideq*2+8], m0 movhps [dstq+stride3q +8], m0 + + RESTORE_GOT RET INIT_XMM ssse3 -cglobal d45_predictor_32x32, 3, 5, 7, dst, stride, above, dst16, line +cglobal d45_predictor_32x32, 3, 6, 7, dst, stride, above, dst16, line, goffset + GET_GOT goffsetq + mova m0, [aboveq] mova m4, [aboveq+16] DEFINE_ARGS dst, stride, stride3, dst16, line lea stride3q, [strideq*3] lea dst16q, [dstq +strideq*8] lea dst16q, [dst16q+strideq*8] - mova m1, [sh_b123456789abcdeff] - pshufb m2, m4, [sh_b23456789abcdefff] + mova m1, [GLOBAL(sh_b123456789abcdeff)] + pshufb m2, m4, [GLOBAL(sh_b23456789abcdefff)] pavgb m3, m2, m4 pxor m2, m4 palignr m5, m4, m0, 1 palignr m6, m4, m0, 2 pshufb m4, m1 - pand m2, [pb_1] + pand m2, [GLOBAL(pb_1)] psubb m3, m2 pavgb m4, m3 pavgb m3, m0, m6 pxor m0, m6 - pand m0, [pb_1] + pand m0, [GLOBAL(pb_1)] psubb m3, m0 pavgb m5, m3 @@ -288,4 +302,6 @@ cglobal d45_predictor_32x32, 3, 5, 7, dst, stride, above, dst16, line mova [dstq +strideq +16], m4 mova [dstq +strideq*2+16], m4 mova [dstq +stride3q +16], m4 + + RESTORE_GOT RET