%xdefine %%funccpu2 %3 ; last64
%xdefine %%funccpu3 %4 ; last15/last16
coeff_last_%1:
- %ifdef PIC
- %xdefine %%base coeff_last_%1 ; offset relative to the start of the table
- %else
- %xdefine %%base 0 ; absolute address
- %endif
+ %xdefine %%base coeff_last_%1
%rep 14
%ifidn %5, 4
dd mangle(private_prefix %+ _coeff_last%5_ %+ %%funccpu1) - %%base
endstruc
%macro LOAD_GLOBAL 3-5 0 ; dst, base, off1, off2, tmp
-%ifdef PIC
- %ifidn %4, 0
- movzx %1, byte [%2+%3+r7-$$]
- %else
- lea %5, [r7+%4]
- movzx %1, byte [%2+%3+%5-$$]
- %endif
-%else
+%if ARCH_X86_64 == 0
movzx %1, byte [%2+%3+%4]
+%elifidn %4, 0
+ movzx %1, byte [%2+%3+r7-$$]
+%else
+ lea %5, [r7+%4]
+ movzx %1, byte [%2+%3+%5-$$]
%endif
%endmacro
shr t5d, 6
movifnidn t2d, r2m
%if WIN64
- PUSH r7
+ PUSH r7
%endif
-%ifdef PIC
+%if ARCH_X86_64
lea r7, [$$]
%endif
LOAD_GLOBAL t5d, cabac_range_lps-4, t5, t4*2, t4
shl t6d, t3b
%endif
%if WIN64
- POP r7
+ POP r7
%endif
mov [t0+cb.range], t4d
add t3d, [t0+cb.queue]
CABAC asm
CABAC bmi2
+%if ARCH_X86_64
; %1 = label name
; %2 = node_ctx init?
%macro COEFF_ABS_LEVEL_GT1 2
%endmacro
%macro COEFF_LAST 2 ; table, ctx_block_cat
-%ifdef PIC
lea r1, [%1 GLOBAL]
movsxd r6, [r1+4*%2]
add r6, r1
-%else
- movsxd r6, [%1+4*%2]
-%endif
call r6
%endmacro
%define dct r4
%endif
-%ifdef PIC
- cglobal func, 4,13,6,-maxcoeffs*SIZEOF_DCTCOEF
+cglobal func, 4,13,6,-maxcoeffs*SIZEOF_DCTCOEF
lea r12, [$$]
%define GLOBAL +r12-$$
-%else
- cglobal func, 4,12,6,-maxcoeffs*SIZEOF_DCTCOEF
- %define GLOBAL
-%endif
-
shl r1d, 4 ; MB_INTERLACED*16
%if %1
lea r4, [significant_coeff_flag_offset_8x8+r1*4 GLOBAL] ; r12 = sig offset 8x8
RET
%endmacro
-%if ARCH_X86_64
INIT_XMM sse2
CABAC_RESIDUAL_RD 0, coeff_last_sse2
CABAC_RESIDUAL_RD 1, coeff_last_sse2
CABAC_RESIDUAL_RD 0, coeff_last_avx512
INIT_ZMM avx512
CABAC_RESIDUAL_RD 1, coeff_last_avx512
-%endif
;-----------------------------------------------------------------------------
; void x264_cabac_block_residual_internal_sse2 ( dctcoef *l, int b_interlaced,
%macro CABAC_RESIDUAL 1
cglobal cabac_block_residual_internal, 4,15,0,-4*64
-%ifdef PIC
; if we use the same r7 as in cabac_encode_decision, we can cheat and save a register.
lea r7, [$$]
%define lastm [rsp+4*1]
%define GLOBAL +r7-$$
-%else
- %define lastm r7d
- %define GLOBAL
-%endif
shl r1d, 4
%define sigoffq r8
RET
%endmacro
-%if ARCH_X86_64
INIT_XMM sse2
CABAC_RESIDUAL coeff_last_sse2
INIT_XMM lzcnt
sub r4, r2
shl r6, 4 ;jump = (offset + align*2)*48
%define avg_w16_addr avg_w16_align1_1_ssse3-(avg_w16_align2_2_ssse3-avg_w16_align1_1_ssse3)
-%ifdef PIC
+%if ARCH_X86_64
lea r7, [avg_w16_addr]
add r6, r7
%else
%if cpuflag(cache64)
mov t0d, r3d
and t0d, 7
-%ifdef PIC
+%if ARCH_X86_64
lea t1, [ch_shuf_adj]
movddup xm5, [t1 + t0*4]
%else
; output the predicted samples
mov r3d, eax
shr r3d, 16
-%ifdef PIC
+%if ARCH_X86_64
lea r2, [%2_lut]
movzx r2d, byte [r2+r3]
%else
je .skip ; faster only if this is the common case; remove branch if we use ssim on a macroblock level
neg r2
-%ifdef PIC
+%if ARCH_X86_64
lea r3, [mask_ff + 16]
%xdefine %%mask r3
%else
add r5, r6
xor r0d, r0d ; nmv
mov [r5], r0d
-%ifdef PIC
+%if ARCH_X86_64
lea r1, [$$]
%define GLOBAL +r1-$$
%else
je .fix_lt_2
.do_top:
and r2d, 4
-%ifdef PIC
+%if ARCH_X86_64
lea r3, [shuf_fixtr]
pshufb m3, [r3+r2*4]
%else
sub t2d, t0d
sub t2d, t1d ; i_mf = i_qp % 6
shl t2d, %2
-%ifdef PIC
+%if ARCH_X86_64
lea r1, [dequant%1_scale]
add r1, t2
%else
sub t2d, t1d ; i_mf = i_qp % 6
shl t2d, %1
%if %2
-%ifdef PIC
+%if ARCH_X86_64
%define dmf r1+t2
lea r1, [dequant8_scale]
%else
shr edx, 1
%endif
%endif
-%ifdef PIC
+%if ARCH_X86_64
lea r4, [decimate_mask_table4]
%define mask_table r4
%else
add eax, r3d
jnz .ret9
%endif
-%ifdef PIC
- lea r4, [decimate_table8]
- %define table r4
-%else
- %define table decimate_table8
-%endif
+ lea r4, [decimate_table8]
mov al, -6
.loop:
tzcnt rcx, r1
- add al, byte [table + rcx]
+ add al, byte [r4 + rcx]
jge .ret9
shr r1, 1
SHRX r1, rcx
%macro COEFF_LEVELRUN_LUT 1
cglobal coeff_level_run%1,2,4+(%1/9)
-%ifdef PIC
+%if ARCH_X86_64
lea r5, [$$]
%define GLOBAL +r5-$$
%else
shl r4d, 4 ; code size = 80
%endif
%define sad_w16_addr (sad_w16_align1_%1 + (sad_w16_align1_%1 - sad_w16_align2_%1))
-%ifdef PIC
+%if ARCH_X86_64
lea r5, [sad_w16_addr]
add r5, r4
%else
paddd m6, m6
%define unquant_mf m6
%endif
-%ifdef PIC
%if dc == 0
mov unquant_mfm, unquant_mfq
%endif
; (Any address in .text would work, this one was just convenient.)
lea r0, [$$]
%define GLOBAL +r0-$$
-%else
- %define GLOBAL
-%endif
TRELLIS_LOOP 0 ; node_ctx 0..3
TRELLIS_LOOP 1 ; node_ctx 1..7
mov r10, cabac_state_sigm
%if num_coefs == 64
mov r6d, b_interlacedm
-%ifdef PIC
add r6d, iid
movzx r6d, byte [significant_coeff_flag_offset_8x8 + r6 GLOBAL]
-%else
- movzx r6d, byte [significant_coeff_flag_offset_8x8 + r6 + iiq]
-%endif
movzx r10, byte [r10 + r6]
%elif num_coefs == 8
movzx r13, byte [coeff_flag_offset_chroma_422_dc + iiq GLOBAL]
%if dc
pmuludq m0, unquant_mf
%else
-%ifdef PIC
mov r10, unquant_mfm
LOAD_DUP m3, [r10 + zigzagiq*4]
-%else
- LOAD_DUP m3, [unquant_mfq + zigzagiq*4]
-%endif
pmuludq m0, m3
%endif
paddd m0, [pq_128]
ARCH="X86_64"
AS="${AS-nasm}"
AS_EXT=".asm"
- ASFLAGS="$ASFLAGS -DARCH_X86_64=1 -I\$(SRCPATH)/common/x86/"
+ ASFLAGS="$ASFLAGS -DARCH_X86_64=1 -DPIC -I\$(SRCPATH)/common/x86/"
stack_alignment=16
[ $compiler = GNU ] && CFLAGS="-m64 $CFLAGS" && LDFLAGS="-m64 $LDFLAGS"
if [ "$SYS" = MACOSX ]; then
- ASFLAGS="$ASFLAGS -f macho64 -DPIC -DPREFIX"
+ ASFLAGS="$ASFLAGS -f macho64 -DPREFIX"
if cc_check '' "-arch x86_64"; then
CFLAGS="$CFLAGS -arch x86_64"
LDFLAGS="$LDFLAGS -arch x86_64"
if [ "$pic" = "yes" ] ; then
[ "$SYS" != WINDOWS -a "$SYS" != CYGWIN ] && CFLAGS="$CFLAGS -fPIC"
- ASFLAGS="$ASFLAGS -DPIC"
+ [[ "$ASFLAGS" != *"-DPIC"* ]] && ASFLAGS="$ASFLAGS -DPIC"
# resolve textrels in the x86 asm
cc_check stdio.h "-shared -Wl,-Bsymbolic" && SOFLAGS="$SOFLAGS -Wl,-Bsymbolic"
[ $SYS = SunOS -a "$ARCH" = "X86" ] && SOFLAGS="$SOFLAGS -mimpure-text"