Fix x264 compilation on Apple GCC

author David Conrad <lessen42@gmail.com>

Sat, 9 Jan 2010 06:52:33 +0000 (01:52 -0500)

committer Fiona Glaser <fiona@x264.com>

Thu, 14 Jan 2010 04:47:02 +0000 (23:47 -0500)
author David Conrad <lessen42@gmail.com>
Sat, 9 Jan 2010 06:52:33 +0000 (01:52 -0500)
committer Fiona Glaser <fiona@x264.com>
Thu, 14 Jan 2010 04:47:02 +0000 (23:47 -0500)
diff --git a/common/arm/mc-c.c b/common/arm/mc-c.c

index 167b11bb48a5db07b2fc89f35753075f24a7fa3e..20cf1517fd9cd7ae9781a3b198faba6121a88a7f 100644 (file)
--- a/common/arm/mc-c.c
+++ b/common/arm/mc-c.c
@@ -182,7 +182,10 @@ void x264_mc_init_arm( int cpu, x264_mc_functions_t *pf )
      pf->avg[PIXEL_4x4]   = x264_pixel_avg_4x4_neon;
      pf->avg[PIXEL_4x2]   = x264_pixel_avg_4x2_neon;
  
+// Apple's gcc stupidly cannot align stack variables, and ALIGNED_ARRAY can't work on structs
+#ifndef SYS_MACOSX
      pf->memcpy_aligned  = x264_memcpy_aligned_neon;
+#endif
      pf->memzero_aligned = x264_memzero_aligned_neon;
  
      pf->mc_chroma = x264_mc_chroma_neon;
diff --git a/common/macroblock.c b/common/macroblock.c

index ba428fbea35ba93e6a3a17e75bd010ba2d3f0d33..6143c53dead0086a1c4958a8886da2c90ac16638 100644 (file)
--- a/common/macroblock.c
+++ b/common/macroblock.c
@@ -221,7 +221,7 @@ static int x264_mb_predict_mv_direct16x16_temporal( x264_t *h )
  static int x264_mb_predict_mv_direct16x16_spatial( x264_t *h )
  {
      int ref[2];
-    ALIGNED_8( int16_t mv[2][2] );
+    ALIGNED_ARRAY_8( int16_t, mv,[2],[2] );
      int i_list;
      int i8;
      const int8_t *l1ref0 = &h->fref1[0]->ref[0][ h->mb.i_b8_xy ];
diff --git a/common/osdep.h b/common/osdep.h

index 0c8623ec947fa4b882761384fd28f6261b19f52c..abae9ac28c9d89920f58a9a8ed414b3c027124ce 100644 (file)
--- a/common/osdep.h
+++ b/common/osdep.h
@@ -54,12 +54,25 @@
  #define ALIGNED_8( var )  DECLARE_ALIGNED( var, 8 )
  #define ALIGNED_4( var )  DECLARE_ALIGNED( var, 4 )
  
-// current arm compilers only maintain 8-byte stack alignment
-// and cannot align stack variables to more than 8-bytes
+// ARM compiliers don't reliably align stack variables
+// - EABI requires only 8 byte stack alignment to be maintained
+// - gcc can't align stack variables to more even if the stack were to be correctly aligned outside the function
+// - armcc can't either, but is nice enough to actually tell you so
+// - Apple gcc only maintains 4 byte alignment
+// - llvm can align the stack, but only in svn and (unrelated) it exposes bugs in all released GNU binutils...
+#if defined(ARCH_ARM) && defined(SYS_MACOSX)
+#define ALIGNED_ARRAY_8( type, name, sub1, ... )\
+    uint8_t name##_u [sizeof(type sub1 __VA_ARGS__) + 7]; \
+    type (*name) __VA_ARGS__ = (void*)((intptr_t)(name##_u+7) & ~7)
+#else
+#define ALIGNED_ARRAY_8( type, name, sub1, ... )\
+    ALIGNED_8( type name sub1 __VA_ARGS__ )
+#endif
+
  #ifdef ARCH_ARM
  #define ALIGNED_ARRAY_16( type, name, sub1, ... )\
-    ALIGNED_8( uint8_t name##_8 [sizeof(type sub1 __VA_ARGS__) + 8] );\
-    type (*name) __VA_ARGS__ = (void*)(name##_8 + ((intptr_t)name##_8 & 8))
+    uint8_t name##_u [sizeof(type sub1 __VA_ARGS__) + 15];\
+    type (*name) __VA_ARGS__ = (void*)((intptr_t)(name##_u+15) & ~15)
  #else
  #define ALIGNED_ARRAY_16( type, name, sub1, ... )\
      ALIGNED_16( type name sub1 __VA_ARGS__ )
diff --git a/encoder/analyse.c b/encoder/analyse.c

index 7366546d245937944f3807c3168255d74ab10649..e55b6b31d1354476c186a541385fdef5ab69aa13 100644 (file)
--- a/encoder/analyse.c
+++ b/encoder/analyse.c
@@ -1585,7 +1585,7 @@ static void x264_mb_analyse_inter_p8x16( x264_t *h, x264_mb_analysis_t *a )
  
  static int x264_mb_analyse_inter_p4x4_chroma( x264_t *h, x264_mb_analysis_t *a, uint8_t **p_fref, int i8x8, int pixel )
  {
-    ALIGNED_8( uint8_t pix1[16*8] );
+    ALIGNED_ARRAY_8( uint8_t, pix1,[16*8] );
      uint8_t *pix2 = pix1+8;
      const int i_stride = h->mb.pic.i_stride[1];
      const int or = 4*(i8x8&1) + 2*(i8x8&2)*i_stride;
@@ -1956,7 +1956,7 @@ static void x264_mb_analyse_inter_b8x8( x264_t *h, x264_mb_analysis_t *a )
      uint8_t **p_fref[2] =
          { h->mb.pic.p_fref[0][a->l0.i_ref],
            h->mb.pic.p_fref[1][a->l1.i_ref] };
-    ALIGNED_8( uint8_t pix[2][8*8] );
+    ALIGNED_ARRAY_8( uint8_t, pix,[2],[8*8] );
      int i, l;
  
      /* XXX Needed for x264_mb_predict_mv */
@@ -2089,7 +2089,7 @@ static void x264_mb_analyse_inter_b8x16( x264_t *h, x264_mb_analysis_t *a )
      uint8_t **p_fref[2] =
          { h->mb.pic.p_fref[0][a->l0.i_ref],
            h->mb.pic.p_fref[1][a->l1.i_ref] };
-    ALIGNED_8( uint8_t pix[2][8*16] );
+    ALIGNED_ARRAY_8( uint8_t, pix,[2],[8*16] );
      ALIGNED_4( int16_t mvc[2][2] );
      int i, l;
  
diff --git a/encoder/me.c b/encoder/me.c

index 0d98f910950f93e054627d4de139224c594b9476..70848ae51fe893f4501e0f5b32ae9f0ce2be6600 100644 (file)
--- a/encoder/me.c
+++ b/encoder/me.c
@@ -903,8 +903,8 @@ static void ALWAYS_INLINE x264_me_refine_bidir( x264_t *h, x264_me_t *m0, x264_m
      const uint16_t *p_cost_m1x = m1->p_cost_mv - m1->mvp[0];
      const uint16_t *p_cost_m1y = m1->p_cost_mv - m1->mvp[1];
      ALIGNED_ARRAY_16( uint8_t, pixy_buf,[2],[9][16*16] );
-    ALIGNED_8( uint8_t pixu_buf[2][9][8*8] );
-    ALIGNED_8( uint8_t pixv_buf[2][9][8*8] );
+    ALIGNED_ARRAY_8( uint8_t, pixu_buf,[2],[9][8*8] );
+    ALIGNED_ARRAY_8( uint8_t, pixv_buf,[2],[9][8*8] );
      uint8_t *src0[9];
      uint8_t *src1[9];
      uint8_t *pix  = &h->mb.pic.p_fdec[0][(i8>>1)*8*FDEC_STRIDE+(i8&1)*8];
diff --git a/encoder/slicetype.c b/encoder/slicetype.c

index e6af39737d8ca2d27d70c5498c06525795365e32..18309e4a63f5cf139add074500d3b860ea33a9b0 100644 (file)
--- a/encoder/slicetype.c
+++ b/encoder/slicetype.c
@@ -122,7 +122,7 @@ static NOINLINE unsigned int x264_weight_cost( x264_t *h, x264_frame_t *fenc, ui
      int i_lines = fenc->i_lines_lowres;
      int i_width = fenc->i_width_lowres;
      uint8_t *fenc_plane = fenc->lowres[0];
-    ALIGNED_8( uint8_t buf[8*8] );
+    ALIGNED_ARRAY_8( uint8_t, buf,[8*8] );
      int pixoff = 0;
      int i_mb = 0;
  
@@ -259,7 +259,7 @@ static int x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a,
      int16_t (*fenc_mvs[2])[2] = { &frames[b]->lowres_mvs[0][b-p0-1][i_mb_xy], &frames[b]->lowres_mvs[1][p1-b-1][i_mb_xy] };
      int (*fenc_costs[2]) = { &frames[b]->lowres_mv_costs[0][b-p0-1][i_mb_xy], &frames[b]->lowres_mv_costs[1][p1-b-1][i_mb_xy] };
  
-    ALIGNED_8( uint8_t pix1[9*FDEC_STRIDE] );
+    ALIGNED_ARRAY_8( uint8_t, pix1,[9*FDEC_STRIDE] );
      uint8_t *pix2 = pix1+8;
      x264_me_t m[2];
      int i_bcost = COST_MAX;
@@ -340,7 +340,7 @@ static int x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a,
      if( b_bidir )
      {
          int16_t *mvr = fref1->lowres_mvs[0][p1-p0-1][i_mb_xy];
-        ALIGNED_8( int16_t dmv[2][2] );
+        ALIGNED_ARRAY_8( int16_t, dmv,[2],[2] );
  
          m[1].i_pixel = PIXEL_8x8;
          m[1].p_cost_mv = a->p_cost_mv;
author	David Conrad <lessen42@gmail.com>
	Sat, 9 Jan 2010 06:52:33 +0000 (01:52 -0500)
committer	Fiona Glaser <fiona@x264.com>
	Thu, 14 Jan 2010 04:47:02 +0000 (23:47 -0500)
common/arm/mc-c.c		patch \| blob \| history
common/macroblock.c		patch \| blob \| history
common/osdep.h		patch \| blob \| history
encoder/analyse.c		patch \| blob \| history
encoder/me.c		patch \| blob \| history
encoder/slicetype.c		patch \| blob \| history