From: Steve Clark Date: Wed, 20 Nov 2013 17:40:23 +0000 (+0400) Subject: Fix ARM asm compilation with Apple assembler X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=91481419e3acc4bb601600cf32e46e7f93ae02ab;p=libx264 Fix ARM asm compilation with Apple assembler --- diff --git a/common/arm/asm.S b/common/arm/asm.S index 053328a0..43d7259c 100644 --- a/common/arm/asm.S +++ b/common/arm/asm.S @@ -26,6 +26,8 @@ #include "config.h" +.syntax unified + #ifdef PREFIX # define EXTERN_ASM _ #else diff --git a/common/arm/cpu-a.S b/common/arm/cpu-a.S index 7a392908..3d9cca9d 100644 --- a/common/arm/cpu-a.S +++ b/common/arm/cpu-a.S @@ -26,7 +26,7 @@ #include "asm.S" .fpu neon -.align +.align 2 // done in gas because .fpu neon overrides the refusal to assemble // instructions the selected -march/-mcpu doesn't support @@ -95,7 +95,7 @@ average_loop: sub r2, r2, r1 cmpgt r2, #30 << 3 // assume context switch if it took over 30 cycles addle r3, r3, r2 - subles ip, ip, #1 + subsle ip, ip, #1 bgt average_loop // disable counters if we enabled them diff --git a/common/arm/mc-a.S b/common/arm/mc-a.S index 8a37e957..6e84e3cd 100644 --- a/common/arm/mc-a.S +++ b/common/arm/mc-a.S @@ -167,7 +167,7 @@ function x264_pixel_avg_\w\()x\h\()_neon ldr ip, [sp, #8] push {r4-r6,lr} cmp ip, #32 - ldrd r4, [sp, #16] + ldrd r4, r5, [sp, #16] mov lr, #\h beq x264_pixel_avg_w\w\()_neon rsbs r6, ip, #64 @@ -447,7 +447,7 @@ avg2_w20_loop: .ifc \type, full ldr lr, [r4, #32] // denom .endif - ldrd r4, [r4, #32+4] // scale, offset + ldrd r4, r5, [r4, #32+4] // scale, offset vdup.16 q0, r4 vdup.16 q1, r5 .ifc \type, full @@ -818,8 +818,8 @@ copy_w16_aligned_loop: function x264_mc_chroma_neon push {r4-r8, lr} vpush {d8-d11} - ldrd r4, [sp, #56] - ldrd r6, [sp, #64] + ldrd r4, r5, [sp, #56] + ldrd r6, r7, [sp, #64] asr lr, r6, #3 mul lr, r4, lr @@ -1380,8 +1380,8 @@ filter_h_loop: function x264_frame_init_lowres_core_neon push {r4-r10,lr} vpush {d8-d15} - ldrd r4, [sp, #96] - ldrd r6, [sp, #104] + ldrd r4, r5, [sp, #96] + ldrd r6, r7, [sp, #104] ldr lr, [sp, #112] sub r10, r6, r7 // dst_stride - width and r10, r10, #~15 diff --git a/common/arm/pixel-a.S b/common/arm/pixel-a.S index 7294ad0e..717a771f 100644 --- a/common/arm/pixel-a.S +++ b/common/arm/pixel-a.S @@ -328,9 +328,9 @@ SAD_FUNC_DUAL 16, 16 function x264_pixel_sad_x\x\()_\w\()x\h\()_neon push {r6-r7,lr} .if \x == 3 - ldrd r6, [sp, #12] + ldrd r6, r7, [sp, #12] .else - ldrd r6, [sp, #16] + ldrd r6, r7, [sp, #16] ldr r12, [sp, #12] .endif mov lr, #FENC_STRIDE @@ -596,7 +596,7 @@ function x264_pixel_var2_8x8_neon vadd.s32 d1, d2, d3 vpadd.s32 d0, d0, d1 - vmov.32 r0, r1, d0 + vmov r0, r1, d0 vst1.32 {d0[1]}, [ip,:32] mul r0, r0, r0 sub r0, r1, r0, lsr #6 diff --git a/common/arm/predict-a.S b/common/arm/predict-a.S index 6d652f2c..c2bf0817 100644 --- a/common/arm/predict-a.S +++ b/common/arm/predict-a.S @@ -181,9 +181,9 @@ function x264_predict_4x4_ddl_neon function x264_predict_8x8_dc_neon mov ip, #0 - ldrd r2, [r1, #8] + ldrd r2, r3, [r1, #8] push {r4-r5,lr} - ldrd r4, [r1, #16] + ldrd r4, r5, [r1, #16] lsl r3, r3, #8 ldrb lr, [r1, #7] usad8 r2, r2, ip diff --git a/common/arm/quant-a.S b/common/arm/quant-a.S index 0ccf112a..38045bc6 100644 --- a/common/arm/quant-a.S +++ b/common/arm/quant-a.S @@ -312,7 +312,7 @@ dequant_4x4_dc_rshift: // int coeff_last( int16_t *l ) function x264_coeff_last4_arm - ldrd r2, [r0] + ldrd r2, r3, [r0] subs r0, r3, #0 movne r0, #2 movne r2, r3 @@ -341,7 +341,7 @@ function x264_coeff_last\size\()_neon subs r1, ip, r1, lsr #2 addge r0, r1, #\size - 8 - sublts r0, r3, r0, lsr #2 + subslt r0, r3, r0, lsr #2 movlt r0, #0 bx lr .endfunc @@ -390,7 +390,7 @@ function x264_coeff_last64_neon subs r1, ip, r1 addge r0, r1, #32 - sublts r0, ip, r0 + subslt r0, ip, r0 movlt r0, #0 bx lr .endfunc