From: Janne Grunau Date: Tue, 1 Apr 2014 20:11:43 +0000 (+0200) Subject: arm: do not export every asm function X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=b2e9ca30f1e9ac25df1f592db04ff0d91faf42d4;p=libx264 arm: do not export every asm function Based on Libav's libavutil/arm/asm.S. Also prevents having the same label twice for every function on systems not defining EXTERN_ASM. Clang's integrated assembler does not like it. --- diff --git a/common/arm/asm.S b/common/arm/asm.S index 3a6f6213..273a79c9 100644 --- a/common/arm/asm.S +++ b/common/arm/asm.S @@ -48,14 +48,20 @@ ELF .eabi_attribute 24, \val ELF .eabi_attribute 25, \val .endm -.macro function name - .global EXTERN_ASM\name +.macro function name, export=1 .align 2 +.if \export == 1 + .global EXTERN_ASM\name +ELF .hidden EXTERN_ASM\name +ELF .type EXTERN_ASM\name, %function + .func EXTERN_ASM\name EXTERN_ASM\name: +.else ELF .hidden \name ELF .type \name, %function .func \name \name: +.endif .endm .macro movrel rd, val @@ -78,6 +84,10 @@ ELF .type \name, %function #endif .endm +#define GLUE(a, b) a ## b +#define JOIN(a, b) GLUE(a, b) +#define X(s) JOIN(EXTERN_ASM, s) + #define FENC_STRIDE 16 #define FDEC_STRIDE 32 diff --git a/common/arm/cpu-a.S b/common/arm/cpu-a.S index a254551a..9ae6b14e 100644 --- a/common/arm/cpu-a.S +++ b/common/arm/cpu-a.S @@ -38,7 +38,7 @@ function x264_cpu_neon_test // return: 0 on success // 1 if counters were already enabled // 9 if lo-res counters were already enabled -function x264_cpu_enable_armv7_counter +function x264_cpu_enable_armv7_counter, export=0 mrc p15, 0, r2, c9, c12, 0 // read PMNC ands r0, r2, #1 andne r0, r2, #9 @@ -51,7 +51,7 @@ function x264_cpu_enable_armv7_counter bx lr .endfunc -function x264_cpu_disable_armv7_counter +function x264_cpu_disable_armv7_counter, export=0 mrc p15, 0, r0, c9, c12, 0 // read PMNC bic r0, r0, #1 // disable counters mcr p15, 0, r0, c9, c12, 0 // write PMNC diff --git a/common/arm/dct-a.S b/common/arm/dct-a.S index df12aeb0..c5490bdb 100644 --- a/common/arm/dct-a.S +++ b/common/arm/dct-a.S @@ -131,7 +131,7 @@ function x264_sub4x4_dct_neon bx lr .endfunc -function x264_sub8x4_dct_neon +function x264_sub8x4_dct_neon, export=0 vld1.64 {d0}, [r1,:64], r3 vld1.64 {d1}, [r2,:64], ip vsubl.u8 q8, d0, d1 @@ -283,17 +283,17 @@ function x264_sub8x8_dct8_neon function x264_sub16x16_dct8_neon push {lr} - bl x264_sub8x8_dct8_neon + bl X(x264_sub8x8_dct8_neon) sub r1, r1, #FENC_STRIDE*8 - 8 sub r2, r2, #FDEC_STRIDE*8 - 8 - bl x264_sub8x8_dct8_neon + bl X(x264_sub8x8_dct8_neon) sub r1, r1, #8 sub r2, r2, #8 - bl x264_sub8x8_dct8_neon + bl X(x264_sub8x8_dct8_neon) pop {lr} sub r1, r1, #FENC_STRIDE*8 - 8 sub r2, r2, #FDEC_STRIDE*8 - 8 - b x264_sub8x8_dct8_neon + b X(x264_sub8x8_dct8_neon) .endfunc @@ -338,7 +338,7 @@ function x264_add4x4_idct_neon bx lr .endfunc -function x264_add8x4_idct_neon +function x264_add8x4_idct_neon, export=0 vld1.64 {d0-d3}, [r1,:128]! IDCT_1D d16, d18, d20, d22, d0, d1, d2, d3 vld1.64 {d4-d7}, [r1,:128]! @@ -502,14 +502,14 @@ function x264_add8x8_idct8_neon function x264_add16x16_idct8_neon mov ip, lr - bl x264_add8x8_idct8_neon + bl X(x264_add8x8_idct8_neon) sub r0, r0, #8*FDEC_STRIDE-8 - bl x264_add8x8_idct8_neon + bl X(x264_add8x8_idct8_neon) sub r0, r0, #8 - bl x264_add8x8_idct8_neon + bl X(x264_add8x8_idct8_neon) sub r0, r0, #8*FDEC_STRIDE-8 mov lr, ip - b x264_add8x8_idct8_neon + b X(x264_add8x8_idct8_neon) .endfunc diff --git a/common/arm/mc-a.S b/common/arm/mc-a.S index 58cf542c..330b8529 100644 --- a/common/arm/mc-a.S +++ b/common/arm/mc-a.S @@ -88,7 +88,7 @@ function x264_memcpy_aligned_neon .endfunc .macro MEMCPY_ALIGNED srcalign dstalign -function memcpy_aligned_\dstalign\()_\srcalign\()_neon +function memcpy_aligned_\dstalign\()_\srcalign\()_neon, export=0 mov r3, r0 .if \srcalign == 8 && \dstalign == 8 sub r2, #16 @@ -239,7 +239,7 @@ AVGH 16, 16 .endm .macro AVG_WEIGHT ext -function x264_pixel_avg_weight_w4_\ext\()_neon +function x264_pixel_avg_weight_w4_\ext\()_neon, export=0 load_weights_\ext 1: // height loop subs lr, lr, #2 @@ -255,7 +255,7 @@ function x264_pixel_avg_weight_w4_\ext\()_neon pop {r4-r6,pc} .endfunc -function x264_pixel_avg_weight_w8_\ext\()_neon +function x264_pixel_avg_weight_w8_\ext\()_neon, export=0 load_weights_\ext 1: // height loop subs lr, lr, #4 @@ -279,7 +279,7 @@ function x264_pixel_avg_weight_w8_\ext\()_neon pop {r4-r6,pc} .endfunc -function x264_pixel_avg_weight_w16_\ext\()_neon +function x264_pixel_avg_weight_w16_\ext\()_neon, export=0 load_weights_\ext 1: // height loop subs lr, lr, #2 @@ -304,7 +304,7 @@ AVG_WEIGHT add_add AVG_WEIGHT add_sub AVG_WEIGHT sub_add -function x264_pixel_avg_w4_neon +function x264_pixel_avg_w4_neon, export=0 subs lr, lr, #2 vld1.32 {d0[]}, [r2], r3 vld1.32 {d2[]}, [r4], r5 @@ -318,7 +318,7 @@ function x264_pixel_avg_w4_neon pop {r4-r6,pc} .endfunc -function x264_pixel_avg_w8_neon +function x264_pixel_avg_w8_neon, export=0 subs lr, lr, #4 vld1.64 {d0}, [r2], r3 vld1.64 {d2}, [r4], r5 @@ -340,7 +340,7 @@ function x264_pixel_avg_w8_neon pop {r4-r6,pc} .endfunc -function x264_pixel_avg_w16_neon +function x264_pixel_avg_w16_neon, export=0 subs lr, lr, #4 vld1.64 {d0-d1}, [r2], r3 vld1.64 {d2-d3}, [r4], r5 diff --git a/common/arm/pixel-a.S b/common/arm/pixel-a.S index 0b996a85..ddf396d4 100644 --- a/common/arm/pixel-a.S +++ b/common/arm/pixel-a.S @@ -575,7 +575,7 @@ var16_loop: bgt var16_loop .endfunc -function x264_var_end +function x264_var_end, export=0 vpaddl.u16 q8, q14 vpaddl.u16 q9, q15 vadd.u32 q1, q1, q8 @@ -760,7 +760,7 @@ function x264_pixel_satd_8x4_neon SUMSUB_AB q10, q11, q2, q3 .endfunc -function x264_satd_4x8_8x4_end_neon +function x264_satd_4x8_8x4_end_neon, export=0 vadd.s16 q0, q8, q10 vadd.s16 q1, q9, q11 vsub.s16 q2, q8, q10 @@ -823,7 +823,7 @@ function x264_pixel_satd_8x16_neon bx lr .endfunc -function x264_satd_8x8_neon +function x264_satd_8x8_neon, export=0 LOAD_DIFF_8x4 q8, q9, q10, q11 vld1.64 {d7}, [r2], r3 SUMSUB_AB q0, q1, q8, q9 @@ -844,7 +844,7 @@ function x264_satd_8x8_neon .endfunc // one vertical hadamard pass and two horizontal -function x264_satd_8x4v_8x8h_neon +function x264_satd_8x4v_8x8h_neon, export=0 SUMSUB_ABCD q0, q1, q2, q3, q12, q13, q14, q15 vtrn.16 q8, q9 SUMSUB_AB q12, q14, q0, q2 @@ -928,7 +928,7 @@ function x264_pixel_satd_16x16_neon bx lr .endfunc -function x264_satd_16x4_neon +function x264_satd_16x4_neon, export=0 vld1.64 {d2-d3}, [r2], r3 vld1.64 {d0-d1}, [r0,:128], r1 vsubl.u8 q8, d0, d2 @@ -1002,7 +1002,7 @@ function x264_pixel_sa8d_16x16_neon SUMSUB_ABCD \r1, \r3, \r2, \r4, \t1, \t3, \t2, \t4 .endm -function x264_sa8d_8x8_neon +function x264_sa8d_8x8_neon, export=0 LOAD_DIFF_8x4 q8, q9, q10, q11 vld1.64 {d7}, [r2], r3 SUMSUB_AB q0, q1, q8, q9 @@ -1103,7 +1103,7 @@ HADAMARD_AC 16, 8 HADAMARD_AC 16, 16 // q4: satd q5: sa8d q6: mask_ac4 q7: mask_ac8 -function x264_hadamard_ac_8x8_neon +function x264_hadamard_ac_8x8_neon, export=0 vld1.64 {d2}, [r0,:64], r1 vld1.64 {d3}, [r0,:64], r1 vaddl.u8 q0, d2, d3