From aa48c1fbb74308fecfe5f7eceee63076479f32dd Mon Sep 17 00:00:00 2001 From: David Conrad Date: Sat, 9 Jan 2010 01:52:33 -0500 Subject: [PATCH] Fix x264 compilation on Apple GCC Apple's GCC stupidly ignores the ARM ABI and doesn't give any stack alignment beyond 4. --- common/arm/mc-c.c | 3 +++ common/macroblock.c | 2 +- common/osdep.h | 21 +++++++++++++++++---- encoder/analyse.c | 6 +++--- encoder/me.c | 4 ++-- encoder/slicetype.c | 6 +++--- 6 files changed, 29 insertions(+), 13 deletions(-) diff --git a/common/arm/mc-c.c b/common/arm/mc-c.c index 167b11bb..20cf1517 100644 --- a/common/arm/mc-c.c +++ b/common/arm/mc-c.c @@ -182,7 +182,10 @@ void x264_mc_init_arm( int cpu, x264_mc_functions_t *pf ) pf->avg[PIXEL_4x4] = x264_pixel_avg_4x4_neon; pf->avg[PIXEL_4x2] = x264_pixel_avg_4x2_neon; +// Apple's gcc stupidly cannot align stack variables, and ALIGNED_ARRAY can't work on structs +#ifndef SYS_MACOSX pf->memcpy_aligned = x264_memcpy_aligned_neon; +#endif pf->memzero_aligned = x264_memzero_aligned_neon; pf->mc_chroma = x264_mc_chroma_neon; diff --git a/common/macroblock.c b/common/macroblock.c index ba428fbe..6143c53d 100644 --- a/common/macroblock.c +++ b/common/macroblock.c @@ -221,7 +221,7 @@ static int x264_mb_predict_mv_direct16x16_temporal( x264_t *h ) static int x264_mb_predict_mv_direct16x16_spatial( x264_t *h ) { int ref[2]; - ALIGNED_8( int16_t mv[2][2] ); + ALIGNED_ARRAY_8( int16_t, mv,[2],[2] ); int i_list; int i8; const int8_t *l1ref0 = &h->fref1[0]->ref[0][ h->mb.i_b8_xy ]; diff --git a/common/osdep.h b/common/osdep.h index 0c8623ec..abae9ac2 100644 --- a/common/osdep.h +++ b/common/osdep.h @@ -54,12 +54,25 @@ #define ALIGNED_8( var ) DECLARE_ALIGNED( var, 8 ) #define ALIGNED_4( var ) DECLARE_ALIGNED( var, 4 ) -// current arm compilers only maintain 8-byte stack alignment -// and cannot align stack variables to more than 8-bytes +// ARM compiliers don't reliably align stack variables +// - EABI requires only 8 byte stack alignment to be maintained +// - gcc can't align stack variables to more even if the stack were to be correctly aligned outside the function +// - armcc can't either, but is nice enough to actually tell you so +// - Apple gcc only maintains 4 byte alignment +// - llvm can align the stack, but only in svn and (unrelated) it exposes bugs in all released GNU binutils... +#if defined(ARCH_ARM) && defined(SYS_MACOSX) +#define ALIGNED_ARRAY_8( type, name, sub1, ... )\ + uint8_t name##_u [sizeof(type sub1 __VA_ARGS__) + 7]; \ + type (*name) __VA_ARGS__ = (void*)((intptr_t)(name##_u+7) & ~7) +#else +#define ALIGNED_ARRAY_8( type, name, sub1, ... )\ + ALIGNED_8( type name sub1 __VA_ARGS__ ) +#endif + #ifdef ARCH_ARM #define ALIGNED_ARRAY_16( type, name, sub1, ... )\ - ALIGNED_8( uint8_t name##_8 [sizeof(type sub1 __VA_ARGS__) + 8] );\ - type (*name) __VA_ARGS__ = (void*)(name##_8 + ((intptr_t)name##_8 & 8)) + uint8_t name##_u [sizeof(type sub1 __VA_ARGS__) + 15];\ + type (*name) __VA_ARGS__ = (void*)((intptr_t)(name##_u+15) & ~15) #else #define ALIGNED_ARRAY_16( type, name, sub1, ... )\ ALIGNED_16( type name sub1 __VA_ARGS__ ) diff --git a/encoder/analyse.c b/encoder/analyse.c index 7366546d..e55b6b31 100644 --- a/encoder/analyse.c +++ b/encoder/analyse.c @@ -1585,7 +1585,7 @@ static void x264_mb_analyse_inter_p8x16( x264_t *h, x264_mb_analysis_t *a ) static int x264_mb_analyse_inter_p4x4_chroma( x264_t *h, x264_mb_analysis_t *a, uint8_t **p_fref, int i8x8, int pixel ) { - ALIGNED_8( uint8_t pix1[16*8] ); + ALIGNED_ARRAY_8( uint8_t, pix1,[16*8] ); uint8_t *pix2 = pix1+8; const int i_stride = h->mb.pic.i_stride[1]; const int or = 4*(i8x8&1) + 2*(i8x8&2)*i_stride; @@ -1956,7 +1956,7 @@ static void x264_mb_analyse_inter_b8x8( x264_t *h, x264_mb_analysis_t *a ) uint8_t **p_fref[2] = { h->mb.pic.p_fref[0][a->l0.i_ref], h->mb.pic.p_fref[1][a->l1.i_ref] }; - ALIGNED_8( uint8_t pix[2][8*8] ); + ALIGNED_ARRAY_8( uint8_t, pix,[2],[8*8] ); int i, l; /* XXX Needed for x264_mb_predict_mv */ @@ -2089,7 +2089,7 @@ static void x264_mb_analyse_inter_b8x16( x264_t *h, x264_mb_analysis_t *a ) uint8_t **p_fref[2] = { h->mb.pic.p_fref[0][a->l0.i_ref], h->mb.pic.p_fref[1][a->l1.i_ref] }; - ALIGNED_8( uint8_t pix[2][8*16] ); + ALIGNED_ARRAY_8( uint8_t, pix,[2],[8*16] ); ALIGNED_4( int16_t mvc[2][2] ); int i, l; diff --git a/encoder/me.c b/encoder/me.c index 0d98f910..70848ae5 100644 --- a/encoder/me.c +++ b/encoder/me.c @@ -903,8 +903,8 @@ static void ALWAYS_INLINE x264_me_refine_bidir( x264_t *h, x264_me_t *m0, x264_m const uint16_t *p_cost_m1x = m1->p_cost_mv - m1->mvp[0]; const uint16_t *p_cost_m1y = m1->p_cost_mv - m1->mvp[1]; ALIGNED_ARRAY_16( uint8_t, pixy_buf,[2],[9][16*16] ); - ALIGNED_8( uint8_t pixu_buf[2][9][8*8] ); - ALIGNED_8( uint8_t pixv_buf[2][9][8*8] ); + ALIGNED_ARRAY_8( uint8_t, pixu_buf,[2],[9][8*8] ); + ALIGNED_ARRAY_8( uint8_t, pixv_buf,[2],[9][8*8] ); uint8_t *src0[9]; uint8_t *src1[9]; uint8_t *pix = &h->mb.pic.p_fdec[0][(i8>>1)*8*FDEC_STRIDE+(i8&1)*8]; diff --git a/encoder/slicetype.c b/encoder/slicetype.c index e6af3973..18309e4a 100644 --- a/encoder/slicetype.c +++ b/encoder/slicetype.c @@ -122,7 +122,7 @@ static NOINLINE unsigned int x264_weight_cost( x264_t *h, x264_frame_t *fenc, ui int i_lines = fenc->i_lines_lowres; int i_width = fenc->i_width_lowres; uint8_t *fenc_plane = fenc->lowres[0]; - ALIGNED_8( uint8_t buf[8*8] ); + ALIGNED_ARRAY_8( uint8_t, buf,[8*8] ); int pixoff = 0; int i_mb = 0; @@ -259,7 +259,7 @@ static int x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a, int16_t (*fenc_mvs[2])[2] = { &frames[b]->lowres_mvs[0][b-p0-1][i_mb_xy], &frames[b]->lowres_mvs[1][p1-b-1][i_mb_xy] }; int (*fenc_costs[2]) = { &frames[b]->lowres_mv_costs[0][b-p0-1][i_mb_xy], &frames[b]->lowres_mv_costs[1][p1-b-1][i_mb_xy] }; - ALIGNED_8( uint8_t pix1[9*FDEC_STRIDE] ); + ALIGNED_ARRAY_8( uint8_t, pix1,[9*FDEC_STRIDE] ); uint8_t *pix2 = pix1+8; x264_me_t m[2]; int i_bcost = COST_MAX; @@ -340,7 +340,7 @@ static int x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a, if( b_bidir ) { int16_t *mvr = fref1->lowres_mvs[0][p1-p0-1][i_mb_xy]; - ALIGNED_8( int16_t dmv[2][2] ); + ALIGNED_ARRAY_8( int16_t, dmv,[2],[2] ); m[1].i_pixel = PIXEL_8x8; m[1].p_cost_mv = a->p_cost_mv; -- 2.40.0