Fix all aliasing violations

author Fiona Glaser <fiona@x264.com>

Thu, 12 Nov 2009 13:25:32 +0000 (05:25 -0800)

committer Fiona Glaser <fiona@x264.com>

Thu, 12 Nov 2009 20:34:39 +0000 (12:34 -0800)
author Fiona Glaser <fiona@x264.com>
Thu, 12 Nov 2009 13:25:32 +0000 (05:25 -0800)
committer Fiona Glaser <fiona@x264.com>
Thu, 12 Nov 2009 20:34:39 +0000 (12:34 -0800)
diff --git a/common/bs.h b/common/bs.h

index 0c009921a1200c2efa4e8b5eb8e377bb2b7db837..afe6b5af7bdd7134ea9d2451f9d2d8ce5d8bc4dd 100644 (file)
--- a/common/bs.h
+++ b/common/bs.h
@@ -88,7 +88,7 @@ static inline int bs_pos( bs_t *s )
  /* Write the rest of cur_bits to the bitstream; results in a bitstream no longer 32-bit aligned. */
  static inline void bs_flush( bs_t *s )
  {
-    *(uint32_t*)s->p = endian_fix32( s->cur_bits << (s->i_left&31) );
+    M32( s->p ) = endian_fix32( s->cur_bits << (s->i_left&31) );
      s->p += WORD_SIZE - s->i_left / 8;
      s->i_left = WORD_SIZE*8;
  }
@@ -102,9 +102,9 @@ static inline void bs_write( bs_t *s, int i_count, uint32_t i_bits )
          if( s->i_left <= 32 )
          {
  #ifdef WORDS_BIGENDIAN
-            *(uint32_t*)s->p = s->cur_bits >> (32 - s->i_left);
+            M32( s->p ) = s->cur_bits >> (32 - s->i_left);
  #else
-            *(uint32_t*)s->p = endian_fix( s->cur_bits << s->i_left );
+            M32( s->p ) = endian_fix( s->cur_bits << s->i_left );
  #endif
              s->i_left += 32;
              s->p += 4;
@@ -121,7 +121,7 @@ static inline void bs_write( bs_t *s, int i_count, uint32_t i_bits )
          {
              i_count -= s->i_left;
              s->cur_bits = (s->cur_bits << s->i_left) | (i_bits >> i_count);
-            *(uint32_t*)s->p = endian_fix( s->cur_bits );
+            M32( s->p ) = endian_fix( s->cur_bits );
              s->p += 4;
              s->cur_bits = i_bits;
              s->i_left = 32 - i_count;
@@ -144,7 +144,7 @@ static inline void bs_write1( bs_t *s, uint32_t i_bit )
      s->i_left--;
      if( s->i_left == WORD_SIZE*8-32 )
      {
-        *(uint32_t*)s->p = endian_fix32( s->cur_bits );
+        M32( s->p ) = endian_fix32( s->cur_bits );
          s->p += 4;
          s->i_left = WORD_SIZE*8;
      }
diff --git a/common/common.h b/common/common.h

index 8bd71d358f5a6566492499f829e5c73be53b4144..3ea5155311cc3677172e11b40670fb432efce2ec 100644 (file)
--- a/common/common.h
+++ b/common/common.h
@@ -78,6 +78,21 @@ do {\
  #include <string.h>
  #include <assert.h>
  #include <limits.h>
+
+/* Unions for type-punning without aliasing violations.
+ * Mn: load or store n bits, aligned, native-endian
+ * CPn: copy n bits, aligned, native-endian
+ * we don't use memcpy for CPn because memcpy's args aren't assumed to be aligned */
+typedef union { uint16_t i; uint8_t  c[2]; } x264_union16_t;
+typedef union { uint32_t i; uint16_t b[2]; uint8_t  c[4]; } x264_union32_t;
+typedef union { uint64_t i; uint32_t a[2]; uint16_t b[4]; uint8_t c[8]; } x264_union64_t;
+#define M16(src) (((x264_union16_t*)(src))->i)
+#define M32(src) (((x264_union32_t*)(src))->i)
+#define M64(src) (((x264_union64_t*)(src))->i)
+#define CP16(dst,src) M16(dst) = M16(src)
+#define CP32(dst,src) M32(dst) = M32(src)
+#define CP64(dst,src) M64(dst) = M64(src)
+
  #include "x264.h"
  #include "bs.h"
  #include "set.h"
diff --git a/common/dct.c b/common/dct.c

index 0aed8d0f573df89c74d32cba83709d7d8d70913a..245347b3c76733ff92d397e7b461be8e7eb4bddb 100644 (file)
--- a/common/dct.c
+++ b/common/dct.c
@@ -607,11 +607,11 @@ static void zigzag_scan_4x4_frame( int16_t level[16], int16_t dct[16] )
  
  static void zigzag_scan_4x4_field( int16_t level[16], int16_t dct[16] )
  {
-    *(uint32_t*)level = *(uint32_t*)dct;
+    CP32( level, dct );
      ZIG(2,0,1) ZIG(3,2,0) ZIG(4,3,0) ZIG(5,1,1)
-    *(uint32_t*)(level+6) = *(uint32_t*)(dct+6);
-    *(uint64_t*)(level+8) = *(uint64_t*)(dct+8);
-    *(uint64_t*)(level+12) = *(uint64_t*)(dct+12);
+    CP32( level+6, dct+6 );
+    CP64( level+8, dct+8 );
+    CP64( level+12, dct+12 );
  }
  
  #undef ZIG
@@ -622,19 +622,19 @@ static void zigzag_scan_4x4_field( int16_t level[16], int16_t dct[16] )
      nz |= level[i];\
  }
  #define COPY4x4\
-    *(uint32_t*)(p_dst+0*FDEC_STRIDE) = *(uint32_t*)(p_src+0*FENC_STRIDE);\
-    *(uint32_t*)(p_dst+1*FDEC_STRIDE) = *(uint32_t*)(p_src+1*FENC_STRIDE);\
-    *(uint32_t*)(p_dst+2*FDEC_STRIDE) = *(uint32_t*)(p_src+2*FENC_STRIDE);\
-    *(uint32_t*)(p_dst+3*FDEC_STRIDE) = *(uint32_t*)(p_src+3*FENC_STRIDE);
+    CP32( p_dst+0*FDEC_STRIDE, p_src+0*FENC_STRIDE );\
+    CP32( p_dst+1*FDEC_STRIDE, p_src+1*FENC_STRIDE );\
+    CP32( p_dst+2*FDEC_STRIDE, p_src+2*FENC_STRIDE );\
+    CP32( p_dst+3*FDEC_STRIDE, p_src+3*FENC_STRIDE );
  #define COPY8x8\
-    *(uint64_t*)(p_dst+0*FDEC_STRIDE) = *(uint64_t*)(p_src+0*FENC_STRIDE);\
-    *(uint64_t*)(p_dst+1*FDEC_STRIDE) = *(uint64_t*)(p_src+1*FENC_STRIDE);\
-    *(uint64_t*)(p_dst+2*FDEC_STRIDE) = *(uint64_t*)(p_src+2*FENC_STRIDE);\
-    *(uint64_t*)(p_dst+3*FDEC_STRIDE) = *(uint64_t*)(p_src+3*FENC_STRIDE);\
-    *(uint64_t*)(p_dst+4*FDEC_STRIDE) = *(uint64_t*)(p_src+4*FENC_STRIDE);\
-    *(uint64_t*)(p_dst+5*FDEC_STRIDE) = *(uint64_t*)(p_src+5*FENC_STRIDE);\
-    *(uint64_t*)(p_dst+6*FDEC_STRIDE) = *(uint64_t*)(p_src+6*FENC_STRIDE);\
-    *(uint64_t*)(p_dst+7*FDEC_STRIDE) = *(uint64_t*)(p_src+7*FENC_STRIDE);
+    CP64( p_dst+0*FDEC_STRIDE, p_src+0*FENC_STRIDE );\
+    CP64( p_dst+1*FDEC_STRIDE, p_src+1*FENC_STRIDE );\
+    CP64( p_dst+2*FDEC_STRIDE, p_src+2*FENC_STRIDE );\
+    CP64( p_dst+3*FDEC_STRIDE, p_src+3*FENC_STRIDE );\
+    CP64( p_dst+4*FDEC_STRIDE, p_src+4*FENC_STRIDE );\
+    CP64( p_dst+5*FDEC_STRIDE, p_src+5*FENC_STRIDE );\
+    CP64( p_dst+6*FDEC_STRIDE, p_src+6*FENC_STRIDE );\
+    CP64( p_dst+7*FDEC_STRIDE, p_src+7*FENC_STRIDE );
  
  static int zigzag_sub_4x4_frame( int16_t level[16], const uint8_t *p_src, uint8_t *p_dst )
  {
diff --git a/common/frame.c b/common/frame.c

index 4b4cc99cbee10cbfd63590dccc8b447cb4595788..0ae0e9a1d83a04bac4afa8253ddcf84a11139942 100644 (file)
--- a/common/frame.c
+++ b/common/frame.c
@@ -728,10 +728,10 @@ void x264_frame_deblock_row( x264_t *h, int mb_y )
          {\
              /* *** Get bS for each 4px for the current edge *** */\
              if( IS_INTRA( h->mb.type[mb_xy] ) || IS_INTRA( h->mb.type[mbn_xy]) )\
-                *(uint32_t*)bS = 0x03030303;\
+                M32( bS ) = 0x03030303;\
              else\
              {\
-                *(uint32_t*)bS = 0x00000000;\
+                M32( bS ) = 0x00000000;\
                  for( i = 0; i < 4; i++ )\
                  {\
                      int x  = i_dir == 0 ? i_edge : i;\
@@ -805,7 +805,7 @@ void x264_frame_deblock_row( x264_t *h, int mb_y )
                      goto end##i_dir;\
                  }\
                  DEBLOCK_STRENGTH(i_dir);\
-                if( *(uint32_t*)bS )\
+                if( M32( bS ) )\
                      FILTER_DIR( , i_dir);\
                  end##i_dir:\
                  i_edge += b_8x8_transform+1;\
@@ -816,7 +816,7 @@ void x264_frame_deblock_row( x264_t *h, int mb_y )
              for( ; i_edge < i_edge_end; i_edge+=b_8x8_transform+1 )\
              {\
                  DEBLOCK_STRENGTH(i_dir);\
-                if( *(uint32_t*)bS )\
+                if( M32( bS ) )\
                      FILTER_DIR( , i_dir);\
              }\
          }
diff --git a/common/macroblock.c b/common/macroblock.c

index c747241c509661eee7a0ac07cb4331dd30e49356..4468f4b8821cddfb4c2d2fe607e8c4b81b1a55d4 100644 (file)
--- a/common/macroblock.c
+++ b/common/macroblock.c
@@ -50,7 +50,7 @@ void x264_mb_predict_mv( x264_t *h, int i_list, int idx, int i_width, int16_t mv
          {
              if( i_refb == i_ref )
              {
-                *(uint32_t*)mvp = *(uint32_t*)mv_b;
+                CP32( mvp, mv_b );
                  return;
              }
          }
@@ -58,7 +58,7 @@ void x264_mb_predict_mv( x264_t *h, int i_list, int idx, int i_width, int16_t mv
          {
              if( i_refa == i_ref )
              {
-                *(uint32_t*)mvp = *(uint32_t*)mv_a;
+                CP32( mvp, mv_a );
                  return;
              }
          }
@@ -69,7 +69,7 @@ void x264_mb_predict_mv( x264_t *h, int i_list, int idx, int i_width, int16_t mv
          {
              if( i_refa == i_ref )
              {
-                *(uint32_t*)mvp = *(uint32_t*)mv_a;
+                CP32( mvp, mv_a );
                  return;
              }
          }
@@ -77,7 +77,7 @@ void x264_mb_predict_mv( x264_t *h, int i_list, int idx, int i_width, int16_t mv
          {
              if( i_refc == i_ref )
              {
-                *(uint32_t*)mvp = *(uint32_t*)mv_c;
+                CP32( mvp, mv_c );
                  return;
              }
          }
@@ -95,14 +95,14 @@ median:
      else if( i_count == 1 )
      {
          if( i_refa == i_ref )
-            *(uint32_t*)mvp = *(uint32_t*)mv_a;
+            CP32( mvp, mv_a );
          else if( i_refb == i_ref )
-            *(uint32_t*)mvp = *(uint32_t*)mv_b;
+            CP32( mvp, mv_b );
          else
-            *(uint32_t*)mvp = *(uint32_t*)mv_c;
+            CP32( mvp, mv_c );
      }
      else if( i_refb == -2 && i_refc == -2 && i_refa != -2 )
-        *(uint32_t*)mvp = *(uint32_t*)mv_a;
+        CP32( mvp, mv_a );
      else
          goto median;
  }
@@ -136,14 +136,14 @@ median:
      else if( i_count == 1 )
      {
          if( i_refa == i_ref )
-            *(uint32_t*)mvp = *(uint32_t*)mv_a;
+            CP32( mvp, mv_a );
          else if( i_refb == i_ref )
-            *(uint32_t*)mvp = *(uint32_t*)mv_b;
+            CP32( mvp, mv_b );
          else
-            *(uint32_t*)mvp = *(uint32_t*)mv_c;
+            CP32( mvp, mv_c );
      }
      else if( i_refb == -2 && i_refc == -2 && i_refa != -2 )
-        *(uint32_t*)mvp = *(uint32_t*)mv_a;
+        CP32( mvp, mv_a );
      else
          goto median;
  }
@@ -157,10 +157,10 @@ void x264_mb_predict_mv_pskip( x264_t *h, int16_t mv[2] )
      int16_t *mv_b  = h->mb.cache.mv[0][X264_SCAN8_0 - 8];
  
      if( i_refa == -2 || i_refb == -2 ||
-        !( i_refa | *(uint32_t*)mv_a ) ||
-        !( i_refb | *(uint32_t*)mv_b ) )
+        !( i_refa | M32( mv_a ) ) ||
+        !( i_refb | M32( mv_b ) ) )
      {
-        *(uint32_t*)mv = 0;
+        M32( mv ) = 0;
      }
      else
      {
@@ -259,17 +259,12 @@ static int x264_mb_predict_mv_direct16x16_spatial( x264_t *h )
      if( ref[0] >= 0 )
          x264_mb_predict_mv_16x16( h, 0, ref[0], mv[0] );
      else
-    {
-        mv[0][0] = 0;
-        mv[0][1] = 0;
-    }
+        M32( mv[0] ) = 0;
+
      if( ref[1] >= 0 )
          x264_mb_predict_mv_16x16( h, 1, ref[1], mv[1] );
      else
-    {
-        mv[1][0] = 0;
-        mv[1][1] = 0;
-    }
+        M32( mv[1] ) = 0;
  
      x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, ref[0] );
      x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, ref[1] );
@@ -336,8 +331,8 @@ int x264_mb_predict_mv_direct16x16( x264_t *h, int *b_changed )
          {
              *b_changed = h->mb.cache.direct_ref[0][0] != h->mb.cache.ref[0][X264_SCAN8_0]
                        || h->mb.cache.direct_ref[1][0] != h->mb.cache.ref[1][X264_SCAN8_0]
-                      || *(uint32_t*)h->mb.cache.direct_mv[0][X264_SCAN8_0] != *(uint32_t*)h->mb.cache.mv[0][X264_SCAN8_0]
-                      || *(uint32_t*)h->mb.cache.direct_mv[1][X264_SCAN8_0] != *(uint32_t*)h->mb.cache.mv[1][X264_SCAN8_0];
+                      || M32( h->mb.cache.direct_mv[0][X264_SCAN8_0] ) != M32( h->mb.cache.mv[0][X264_SCAN8_0] )
+                      || M32( h->mb.cache.direct_mv[1][X264_SCAN8_0] ) != M32( h->mb.cache.mv[1][X264_SCAN8_0] );
          }
          else
          {
@@ -371,14 +366,10 @@ void x264_mb_load_mv_direct8x8( x264_t *h, int idx )
      const int y = 2*(idx/2);
      x264_macroblock_cache_ref( h, x, y, 2, 2, 0, h->mb.cache.direct_ref[0][idx] );
      x264_macroblock_cache_ref( h, x, y, 2, 2, 1, h->mb.cache.direct_ref[1][idx] );
-    *(uint64_t*)h->mb.cache.mv[0][x264_scan8[idx*4]] =
-    *(uint64_t*)h->mb.cache.direct_mv[0][x264_scan8[idx*4]];
-    *(uint64_t*)h->mb.cache.mv[0][x264_scan8[idx*4]+8] =
-    *(uint64_t*)h->mb.cache.direct_mv[0][x264_scan8[idx*4]+8];
-    *(uint64_t*)h->mb.cache.mv[1][x264_scan8[idx*4]] =
-    *(uint64_t*)h->mb.cache.direct_mv[1][x264_scan8[idx*4]];
-    *(uint64_t*)h->mb.cache.mv[1][x264_scan8[idx*4]+8] =
-    *(uint64_t*)h->mb.cache.direct_mv[1][x264_scan8[idx*4]+8];
+    CP64( h->mb.cache.mv[0][x264_scan8[idx*4]+0], h->mb.cache.direct_mv[0][x264_scan8[idx*4]+0] );
+    CP64( h->mb.cache.mv[0][x264_scan8[idx*4]+8], h->mb.cache.direct_mv[0][x264_scan8[idx*4]+8] );
+    CP64( h->mb.cache.mv[1][x264_scan8[idx*4]+0], h->mb.cache.direct_mv[1][x264_scan8[idx*4]+0] );
+    CP64( h->mb.cache.mv[1][x264_scan8[idx*4]+8], h->mb.cache.direct_mv[1][x264_scan8[idx*4]+8] );
  }
  
  /* This just improves encoder performance, it's not part of the spec */
@@ -388,7 +379,7 @@ void x264_mb_predict_mv_ref16x16( x264_t *h, int i_list, int i_ref, int16_t mvc[
      int i = 0;
  
  #define SET_MVP(mvp) { \
-        *(uint32_t*)mvc[i] = *(uint32_t*)mvp; \
+        CP32( mvc[i], mvp ); \
          i++; \
      }
  
@@ -403,7 +394,11 @@ void x264_mb_predict_mv_ref16x16( x264_t *h, int i_list, int i_ref, int16_t mvc[
      {
          int16_t (*lowres_mv)[2] = i_list ? h->fenc->lowres_mvs[1][h->fref1[0]->i_frame-h->fenc->i_frame-1]
                                           : h->fenc->lowres_mvs[0][h->fenc->i_frame-h->fref0[0]->i_frame-1];
-        if( lowres_mv[0][0] != 0x7fff ) *(uint32_t*)mvc[i++] = (*(uint32_t*)lowres_mv[h->mb.i_mb_xy]*2)&0xfffeffff;
+        if( lowres_mv[0][0] != 0x7fff )
+        {
+            M32( mvc[i] ) = (M32( lowres_mv[h->mb.i_mb_xy] )*2)&0xfffeffff;
+            i++;
+        }
      }
  
      /* spatial predictors */
@@ -982,13 +977,13 @@ void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y )
              h->mb.i_neighbour_intra |= MB_TOP;
  
          /* load intra4x4 */
-        *(uint32_t*)&h->mb.cache.intra4x4_pred_mode[x264_scan8[0] - 8] = *(uint32_t*)&h->mb.intra4x4_pred_mode[i_top_xy][0];
+        CP32( &h->mb.cache.intra4x4_pred_mode[x264_scan8[0] - 8], &h->mb.intra4x4_pred_mode[i_top_xy][0] );
  
          /* load non_zero_count */
-        *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[0] - 8] = *(uint32_t*)&h->mb.non_zero_count[i_top_xy][12];
+        CP32( &h->mb.cache.non_zero_count[x264_scan8[0] - 8], &h->mb.non_zero_count[i_top_xy][12] );
          /* shift because x264_scan8[16] is misaligned */
-        *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[16] - 9] = *(uint16_t*)&h->mb.non_zero_count[i_top_xy][18] << 8;
-        *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[16+4] - 9] = *(uint16_t*)&h->mb.non_zero_count[i_top_xy][22] << 8;
+        M32( &h->mb.cache.non_zero_count[x264_scan8[16+0] - 9] ) = M16( &h->mb.non_zero_count[i_top_xy][18] ) << 8;
+        M32( &h->mb.cache.non_zero_count[x264_scan8[16+4] - 9] ) = M16( &h->mb.non_zero_count[i_top_xy][22] ) << 8;
      }
      else
      {
@@ -996,12 +991,12 @@ void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y )
          h->mb.cache.i_cbp_top = -1;
  
          /* load intra4x4 */
-        *(uint32_t*)&h->mb.cache.intra4x4_pred_mode[x264_scan8[0] - 8] = 0xFFFFFFFFU;
+        M32( &h->mb.cache.intra4x4_pred_mode[x264_scan8[0] - 8] ) = 0xFFFFFFFFU;
  
          /* load non_zero_count */
-        *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[0] - 8] =
-        *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[16+0] - 9] =
-        *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[16+4] - 9] = 0x80808080U;
+        M32( &h->mb.cache.non_zero_count[x264_scan8[   0] - 8] ) = 0x80808080U;
+        M32( &h->mb.cache.non_zero_count[x264_scan8[16+0] - 9] ) = 0x80808080U;
+        M32( &h->mb.cache.non_zero_count[x264_scan8[16+4] - 9] ) = 0x80808080U;
      }
  
      if( i_mb_x > 0 && i_mb_xy > h->sh.i_first_mb )
@@ -1136,13 +1131,13 @@ void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y )
                  const int ir = i_top_8x8 - 1;
                  const int iv = i_top_4x4 - 1;
                  h->mb.cache.ref[i_list][i8]  = h->mb.ref[i_list][ir];
-                *(uint32_t*)h->mb.cache.mv[i_list][i8] = *(uint32_t*)h->mb.mv[i_list][iv];
+                CP32( h->mb.cache.mv[i_list][i8], h->mb.mv[i_list][iv] );
              }
              else
              {
                  const int i8 = x264_scan8[0] - 1 - 1*8;
                  h->mb.cache.ref[i_list][i8] = -2;
-                *(uint32_t*)h->mb.cache.mv[i_list][i8] = 0;
+                M32( h->mb.cache.mv[i_list][i8] ) = 0;
              }
  
              if( h->mb.i_neighbour & MB_TOP )
@@ -1154,15 +1149,15 @@ void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y )
                  h->mb.cache.ref[i_list][i8+1] = h->mb.ref[i_list][ir + 0];
                  h->mb.cache.ref[i_list][i8+2] =
                  h->mb.cache.ref[i_list][i8+3] = h->mb.ref[i_list][ir + 1];
-                *(uint64_t*)h->mb.cache.mv[i_list][i8+0] = *(uint64_t*)h->mb.mv[i_list][iv+0];
-                *(uint64_t*)h->mb.cache.mv[i_list][i8+2] = *(uint64_t*)h->mb.mv[i_list][iv+2];
+                CP64( h->mb.cache.mv[i_list][i8+0], h->mb.mv[i_list][iv+0] );
+                CP64( h->mb.cache.mv[i_list][i8+2], h->mb.mv[i_list][iv+2] );
              }
              else
              {
                  const int i8 = x264_scan8[0] - 8;
-                *(uint64_t*)h->mb.cache.mv[i_list][i8+0] = 0;
-                *(uint64_t*)h->mb.cache.mv[i_list][i8+2] = 0;
-                *(uint32_t*)&h->mb.cache.ref[i_list][i8] = (uint8_t)(-2) * 0x01010101U;
+                M64( h->mb.cache.mv[i_list][i8+0] ) = 0;
+                M64( h->mb.cache.mv[i_list][i8+2] ) = 0;
+                M32( &h->mb.cache.ref[i_list][i8] ) = (uint8_t)(-2) * 0x01010101U;
              }
  
              if( h->mb.i_neighbour & MB_TOPRIGHT )
@@ -1171,13 +1166,13 @@ void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y )
                  const int ir = i_top_8x8 + 2;
                  const int iv = i_top_4x4 + 4;
                  h->mb.cache.ref[i_list][i8]  = h->mb.ref[i_list][ir];
-                *(uint32_t*)h->mb.cache.mv[i_list][i8] = *(uint32_t*)h->mb.mv[i_list][iv];
+                CP32( h->mb.cache.mv[i_list][i8], h->mb.mv[i_list][iv] );
              }
              else
              {
                  const int i8 = x264_scan8[0] + 4 - 1*8;
                  h->mb.cache.ref[i_list][i8] = -2;
-                *(uint32_t*)h->mb.cache.mv[i_list][i8] = 0;
+                M32( h->mb.cache.mv[i_list][i8] ) = 0;
              }
  
              if( h->mb.i_neighbour & MB_LEFT )
@@ -1190,10 +1185,10 @@ void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y )
                  h->mb.cache.ref[i_list][i8+2*8] =
                  h->mb.cache.ref[i_list][i8+3*8] = h->mb.ref[i_list][ir + 1*s8x8];
  
-                *(uint32_t*)h->mb.cache.mv[i_list][i8+0*8] = *(uint32_t*)h->mb.mv[i_list][iv + 0*s4x4];
-                *(uint32_t*)h->mb.cache.mv[i_list][i8+1*8] = *(uint32_t*)h->mb.mv[i_list][iv + 1*s4x4];
-                *(uint32_t*)h->mb.cache.mv[i_list][i8+2*8] = *(uint32_t*)h->mb.mv[i_list][iv + 2*s4x4];
-                *(uint32_t*)h->mb.cache.mv[i_list][i8+3*8] = *(uint32_t*)h->mb.mv[i_list][iv + 3*s4x4];
+                CP32( h->mb.cache.mv[i_list][i8+0*8], h->mb.mv[i_list][iv + 0*s4x4] );
+                CP32( h->mb.cache.mv[i_list][i8+1*8], h->mb.mv[i_list][iv + 1*s4x4] );
+                CP32( h->mb.cache.mv[i_list][i8+2*8], h->mb.mv[i_list][iv + 2*s4x4] );
+                CP32( h->mb.cache.mv[i_list][i8+3*8], h->mb.mv[i_list][iv + 3*s4x4] );
              }
              else
              {
@@ -1201,7 +1196,7 @@ void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y )
                  for( i = 0; i < 4; i++ )
                  {
                      h->mb.cache.ref[i_list][i8+i*8] = -2;
-                    *(uint32_t*)h->mb.cache.mv[i_list][i8+i*8] = 0;
+                    M32( h->mb.cache.mv[i_list][i8+i*8] ) = 0;
                  }
              }
  
@@ -1211,30 +1206,30 @@ void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y )
                  {
                      const int i8 = x264_scan8[0] - 8;
                      const int iv = i_top_4x4;
-                    *(uint64_t*)h->mb.cache.mvd[i_list][i8+0] = *(uint64_t*)h->mb.mvd[i_list][iv+0];
-                    *(uint64_t*)h->mb.cache.mvd[i_list][i8+2] = *(uint64_t*)h->mb.mvd[i_list][iv+2];
+                    CP64( h->mb.cache.mvd[i_list][i8+0], h->mb.mvd[i_list][iv+0] );
+                    CP64( h->mb.cache.mvd[i_list][i8+2], h->mb.mvd[i_list][iv+2] );
                  }
                  else
                  {
                      const int i8 = x264_scan8[0] - 8;
-                    *(uint64_t*)h->mb.cache.mvd[i_list][i8+0] =
-                    *(uint64_t*)h->mb.cache.mvd[i_list][i8+2] = 0;
+                    M64( h->mb.cache.mvd[i_list][i8+0] ) = 0;
+                    M64( h->mb.cache.mvd[i_list][i8+2] ) = 0;
                  }
  
                  if( i_left_type >= 0 )
                  {
                      const int i8 = x264_scan8[0] - 1;
                      const int iv = i_mb_4x4 - 1;
-                    *(uint32_t*)h->mb.cache.mvd[i_list][i8+0*8] = *(uint32_t*)h->mb.mvd[i_list][iv + 0*s4x4];
-                    *(uint32_t*)h->mb.cache.mvd[i_list][i8+1*8] = *(uint32_t*)h->mb.mvd[i_list][iv + 1*s4x4];
-                    *(uint32_t*)h->mb.cache.mvd[i_list][i8+2*8] = *(uint32_t*)h->mb.mvd[i_list][iv + 2*s4x4];
-                    *(uint32_t*)h->mb.cache.mvd[i_list][i8+3*8] = *(uint32_t*)h->mb.mvd[i_list][iv + 3*s4x4];
+                    CP32( h->mb.cache.mvd[i_list][i8+0*8], h->mb.mvd[i_list][iv + 0*s4x4] );
+                    CP32( h->mb.cache.mvd[i_list][i8+1*8], h->mb.mvd[i_list][iv + 1*s4x4] );
+                    CP32( h->mb.cache.mvd[i_list][i8+2*8], h->mb.mvd[i_list][iv + 2*s4x4] );
+                    CP32( h->mb.cache.mvd[i_list][i8+3*8], h->mb.mvd[i_list][iv + 3*s4x4] );
                  }
                  else
                  {
                      const int i8 = x264_scan8[0] - 1;
                      for( i = 0; i < 4; i++ )
-                        *(uint32_t*)h->mb.cache.mvd[i_list][i8+i*8] = 0;
+                        M32( h->mb.cache.mvd[i_list][i8+i*8] ) = 0;
                  }
              }
          }
@@ -1311,15 +1306,15 @@ void x264_macroblock_cache_save( x264_t *h )
      /* save intra4x4 */
      if( i_mb_type == I_4x4 )
      {
-        *(uint32_t*)&intra4x4_pred_mode[0] = *(uint32_t*)&h->mb.cache.intra4x4_pred_mode[x264_scan8[10] ];
-        *(uint32_t*)&intra4x4_pred_mode[4] = pack8to32(h->mb.cache.intra4x4_pred_mode[x264_scan8[5] ],
-                                                       h->mb.cache.intra4x4_pred_mode[x264_scan8[7] ],
-                                                       h->mb.cache.intra4x4_pred_mode[x264_scan8[13] ], 0);
+        CP32( &intra4x4_pred_mode[0], &h->mb.cache.intra4x4_pred_mode[x264_scan8[10]] );
+        M32( &intra4x4_pred_mode[4] ) = pack8to32(h->mb.cache.intra4x4_pred_mode[x264_scan8[5] ],
+                                                  h->mb.cache.intra4x4_pred_mode[x264_scan8[7] ],
+                                                  h->mb.cache.intra4x4_pred_mode[x264_scan8[13] ], 0);
      }
      else if( !h->param.b_constrained_intra || IS_INTRA(i_mb_type) )
-        *(uint64_t*)intra4x4_pred_mode = I_PRED_4x4_DC * 0x0101010101010101ULL;
+        M64( intra4x4_pred_mode ) = I_PRED_4x4_DC * 0x0101010101010101ULL;
      else
-        *(uint64_t*)intra4x4_pred_mode = (uint8_t)(-1) * 0x0101010101010101ULL;
+        M64( intra4x4_pred_mode ) = (uint8_t)(-1) * 0x0101010101010101ULL;
  
  
      if( i_mb_type == I_PCM )
@@ -1335,14 +1330,14 @@ void x264_macroblock_cache_save( x264_t *h )
      else
      {
          /* save non zero count */
-        *(uint32_t*)&non_zero_count[0*4] = *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[0]+0*8];
-        *(uint32_t*)&non_zero_count[1*4] = *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[0]+1*8];
-        *(uint32_t*)&non_zero_count[2*4] = *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[0]+2*8];
-        *(uint32_t*)&non_zero_count[3*4] = *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[0]+3*8];
-        *(uint16_t*)&non_zero_count[16+0*2] = *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[16+0*2]-1] >> 8;
-        *(uint16_t*)&non_zero_count[16+1*2] = *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[16+1*2]-1] >> 8;
-        *(uint16_t*)&non_zero_count[16+2*2] = *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[16+2*2]-1] >> 8;
-        *(uint16_t*)&non_zero_count[16+3*2] = *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[16+3*2]-1] >> 8;
+        CP32( &non_zero_count[0*4], &h->mb.cache.non_zero_count[x264_scan8[0]+0*8] );
+        CP32( &non_zero_count[1*4], &h->mb.cache.non_zero_count[x264_scan8[0]+1*8] );
+        CP32( &non_zero_count[2*4], &h->mb.cache.non_zero_count[x264_scan8[0]+2*8] );
+        CP32( &non_zero_count[3*4], &h->mb.cache.non_zero_count[x264_scan8[0]+3*8] );
+        M16( &non_zero_count[16+0*2] ) = M32( &h->mb.cache.non_zero_count[x264_scan8[16+0*2]-1] ) >> 8;
+        M16( &non_zero_count[16+1*2] ) = M32( &h->mb.cache.non_zero_count[x264_scan8[16+1*2]-1] ) >> 8;
+        M16( &non_zero_count[16+2*2] ) = M32( &h->mb.cache.non_zero_count[x264_scan8[16+2*2]-1] ) >> 8;
+        M16( &non_zero_count[16+3*2] ) = M32( &h->mb.cache.non_zero_count[x264_scan8[16+3*2]-1] ) >> 8;
  
          if( h->mb.i_type != I_16x16 && h->mb.i_cbp_luma == 0 && h->mb.i_cbp_chroma == 0 )
              h->mb.i_qp = h->mb.i_last_qp;
@@ -1365,8 +1360,8 @@ void x264_macroblock_cache_save( x264_t *h )
              h->mb.ref[0][i_mb_8x8+1+1*s8x8] = h->mb.cache.ref[0][x264_scan8[12]];
              for( y = 0; y < 4; y++ )
              {
-                *(uint64_t*)h->mb.mv[0][i_mb_4x4+y*s4x4+0] = *(uint64_t*)h->mb.cache.mv[0][x264_scan8[0]+8*y+0];
-                *(uint64_t*)h->mb.mv[0][i_mb_4x4+y*s4x4+2] = *(uint64_t*)h->mb.cache.mv[0][x264_scan8[0]+8*y+2];
+                CP64( h->mb.mv[0][i_mb_4x4+y*s4x4+0], h->mb.cache.mv[0][x264_scan8[0]+8*y+0] );
+                CP64( h->mb.mv[0][i_mb_4x4+y*s4x4+2], h->mb.cache.mv[0][x264_scan8[0]+8*y+2] );
              }
              if( h->sh.i_type == SLICE_TYPE_B )
              {
@@ -1376,8 +1371,8 @@ void x264_macroblock_cache_save( x264_t *h )
                  h->mb.ref[1][i_mb_8x8+1+1*s8x8] = h->mb.cache.ref[1][x264_scan8[12]];
                  for( y = 0; y < 4; y++ )
                  {
-                    *(uint64_t*)h->mb.mv[1][i_mb_4x4+y*s4x4+0] = *(uint64_t*)h->mb.cache.mv[1][x264_scan8[0]+8*y+0];
-                    *(uint64_t*)h->mb.mv[1][i_mb_4x4+y*s4x4+2] = *(uint64_t*)h->mb.cache.mv[1][x264_scan8[0]+8*y+2];
+                    CP64( h->mb.mv[1][i_mb_4x4+y*s4x4+0], h->mb.cache.mv[1][x264_scan8[0]+8*y+0] );
+                    CP64( h->mb.mv[1][i_mb_4x4+y*s4x4+2], h->mb.cache.mv[1][x264_scan8[0]+8*y+2] );
                  }
              }
          }
@@ -1386,12 +1381,12 @@ void x264_macroblock_cache_save( x264_t *h )
              int i_list;
              for( i_list = 0; i_list < (h->sh.i_type == SLICE_TYPE_B ? 2  : 1 ); i_list++ )
              {
-                *(uint16_t*)&h->mb.ref[i_list][i_mb_8x8+0*s8x8] = (uint8_t)(-1) * 0x0101;
-                *(uint16_t*)&h->mb.ref[i_list][i_mb_8x8+1*s8x8] = (uint8_t)(-1) * 0x0101;
+                M16( &h->mb.ref[i_list][i_mb_8x8+0*s8x8] ) = (uint8_t)(-1) * 0x0101;
+                M16( &h->mb.ref[i_list][i_mb_8x8+1*s8x8] ) = (uint8_t)(-1) * 0x0101;
                  for( y = 0; y < 4; y++ )
                  {
-                    *(uint64_t*)h->mb.mv[i_list][i_mb_4x4+y*s4x4+0] = 0;
-                    *(uint64_t*)h->mb.mv[i_list][i_mb_4x4+y*s4x4+2] = 0;
+                    M64( h->mb.mv[i_list][i_mb_4x4+y*s4x4+0] ) = 0;
+                    M64( h->mb.mv[i_list][i_mb_4x4+y*s4x4+2] ) = 0;
                  }
              }
          }
@@ -1408,28 +1403,28 @@ void x264_macroblock_cache_save( x264_t *h )
          {
              for( y = 0; y < 4; y++ )
              {
-                *(uint64_t*)h->mb.mvd[0][i_mb_4x4+y*s4x4+0] = *(uint64_t*)h->mb.cache.mvd[0][x264_scan8[0]+8*y+0];
-                *(uint64_t*)h->mb.mvd[0][i_mb_4x4+y*s4x4+2] = *(uint64_t*)h->mb.cache.mvd[0][x264_scan8[0]+8*y+2];
+                CP64( h->mb.mvd[0][i_mb_4x4+y*s4x4+0], h->mb.cache.mvd[0][x264_scan8[0]+8*y+0] );
+                CP64( h->mb.mvd[0][i_mb_4x4+y*s4x4+2], h->mb.cache.mvd[0][x264_scan8[0]+8*y+2] );
              }
              if( h->sh.i_type == SLICE_TYPE_B )
                  for( y = 0; y < 4; y++ )
                  {
-                    *(uint64_t*)h->mb.mvd[1][i_mb_4x4+y*s4x4+0] = *(uint64_t*)h->mb.cache.mvd[1][x264_scan8[0]+8*y+0];
-                    *(uint64_t*)h->mb.mvd[1][i_mb_4x4+y*s4x4+2] = *(uint64_t*)h->mb.cache.mvd[1][x264_scan8[0]+8*y+2];
+                    CP64( h->mb.mvd[1][i_mb_4x4+y*s4x4+0], h->mb.cache.mvd[1][x264_scan8[0]+8*y+0] );
+                    CP64( h->mb.mvd[1][i_mb_4x4+y*s4x4+2], h->mb.cache.mvd[1][x264_scan8[0]+8*y+2] );
                  }
          }
          else
          {
              for( y = 0; y < 4; y++ )
              {
-                *(uint64_t*)h->mb.mvd[0][i_mb_4x4+y*s4x4+0] = 0;
-                *(uint64_t*)h->mb.mvd[0][i_mb_4x4+y*s4x4+2] = 0;
+                M64( h->mb.mvd[0][i_mb_4x4+y*s4x4+0] ) = 0;
+                M64( h->mb.mvd[0][i_mb_4x4+y*s4x4+2] ) = 0;
              }
              if( h->sh.i_type == SLICE_TYPE_B )
                  for( y = 0; y < 4; y++ )
                  {
-                    *(uint64_t*)h->mb.mvd[1][i_mb_4x4+y*s4x4+0] = 0;
-                    *(uint64_t*)h->mb.mvd[1][i_mb_4x4+y*s4x4+2] = 0;
+                    M64( h->mb.mvd[1][i_mb_4x4+y*s4x4+0] ) = 0;
+                    M64( h->mb.mvd[1][i_mb_4x4+y*s4x4+2] ) = 0;
                  }
          }
  
diff --git a/common/macroblock.h b/common/macroblock.h

index 1afc73ec647ec2284c25e7e3429e5bbef765142d..5aaf3a68ecfae1bee9e02d46fffd8fa360235872 100644 (file)
--- a/common/macroblock.h
+++ b/common/macroblock.h
@@ -338,21 +338,22 @@ static ALWAYS_INLINE uint32_t pack16to32_mask( int a, int b )
  }
  static ALWAYS_INLINE void x264_macroblock_cache_rect1( void *dst, int width, int height, uint8_t val )
  {
+    uint32_t *d = dst;
      if( width == 4 )
      {
          uint32_t val2 = val * 0x01010101;
-                          ((uint32_t*)dst)[0] = val2;
-        if( height >= 2 ) ((uint32_t*)dst)[2] = val2;
-        if( height == 4 ) ((uint32_t*)dst)[4] = val2;
-        if( height == 4 ) ((uint32_t*)dst)[6] = val2;
+                          M32( d+0 ) = val2;
+        if( height >= 2 ) M32( d+2 ) = val2;
+        if( height == 4 ) M32( d+4 ) = val2;
+        if( height == 4 ) M32( d+6 ) = val2;
      }
      else // 2
      {
          uint32_t val2 = val * 0x0101;
-                          ((uint16_t*)dst)[ 0] = val2;
-        if( height >= 2 ) ((uint16_t*)dst)[ 4] = val2;
-        if( height == 4 ) ((uint16_t*)dst)[ 8] = val2;
-        if( height == 4 ) ((uint16_t*)dst)[12] = val2;
+                          M16( d+0 ) = val2;
+        if( height >= 2 ) M16( d+2 ) = val2;
+        if( height == 4 ) M16( d+4 ) = val2;
+        if( height == 4 ) M16( d+6 ) = val2;
      }
  }
  static ALWAYS_INLINE void x264_macroblock_cache_rect4( void *dst, int width, int height, uint32_t val )
@@ -360,25 +361,27 @@ static ALWAYS_INLINE void x264_macroblock_cache_rect4( void *dst, int width, int
      int dy;
      if( width == 1 || WORD_SIZE < 8 )
      {
+        uint32_t *d = dst;
          for( dy = 0; dy < height; dy++ )
          {
-                             ((uint32_t*)dst)[8*dy+0] = val;
-            if( width >= 2 ) ((uint32_t*)dst)[8*dy+1] = val;
-            if( width == 4 ) ((uint32_t*)dst)[8*dy+2] = val;
-            if( width == 4 ) ((uint32_t*)dst)[8*dy+3] = val;
+                             M32( d+8*dy+0 ) = val;
+            if( width >= 2 ) M32( d+8*dy+1 ) = val;
+            if( width == 4 ) M32( d+8*dy+2 ) = val;
+            if( width == 4 ) M32( d+8*dy+3 ) = val;
          }
      }
      else
      {
          uint64_t val64 = val + ((uint64_t)val<<32);
+        uint64_t *d = dst;
          for( dy = 0; dy < height; dy++ )
          {
-                             ((uint64_t*)dst)[4*dy+0] = val64;
-            if( width == 4 ) ((uint64_t*)dst)[4*dy+1] = val64;
+                             M64( d+4*dy+0 ) = val64;
+            if( width == 4 ) M64( d+4*dy+1 ) = val64;
          }
      }
  }
-#define x264_macroblock_cache_mv_ptr(a,x,y,w,h,l,mv) x264_macroblock_cache_mv(a,x,y,w,h,l,*(uint32_t*)mv)
+#define x264_macroblock_cache_mv_ptr( a, x, y, w, h, l, mv ) x264_macroblock_cache_mv( a, x, y, w, h, l, M32( mv ) )
  static ALWAYS_INLINE void x264_macroblock_cache_mv( x264_t *h, int x, int y, int width, int height, int i_list, uint32_t mv )
  {
      x264_macroblock_cache_rect4( &h->mb.cache.mv[i_list][X264_SCAN8_0+x+8*y], width, height, mv );
@@ -462,7 +465,7 @@ static inline int x264_mb_transform_8x8_allowed( x264_t *h )
          return 0;
      if( h->mb.i_type != P_8x8 )
          return partition_tab[h->mb.i_type];
-    return *(uint32_t*)h->mb.i_sub_partition == D_L0_8x8*0x01010101;
+    return M32( h->mb.i_sub_partition ) == D_L0_8x8*0x01010101;
  }
  
  #endif
diff --git a/common/predict.c b/common/predict.c

index 385eb5ce28215eccacb6c99259939673fab9f53f..119458d0becb0ba1b49e08b4a6ff84a2430d0d5f 100644 (file)
--- a/common/predict.c
+++ b/common/predict.c
@@ -44,11 +44,10 @@
  #define PREDICT_16x16_DC(v) \
      for( i = 0; i < 16; i++ )\
      {\
-        uint32_t *p = (uint32_t*)src;\
-        *p++ = v;\
-        *p++ = v;\
-        *p++ = v;\
-        *p++ = v;\
+        M32( src+ 0 ) = v;\
+        M32( src+ 4 ) = v;\
+        M32( src+ 8 ) = v;\
+        M32( src+12 ) = v;\
          src += FDEC_STRIDE;\
      }
  
@@ -104,32 +103,28 @@ static void predict_16x16_h( uint8_t *src )
      for( i = 0; i < 16; i++ )
      {
          const uint32_t v = 0x01010101 * src[-1];
-        uint32_t *p = (uint32_t*)src;
-
-        *p++ = v;
-        *p++ = v;
-        *p++ = v;
-        *p++ = v;
-
+        M32( src+ 0 ) = v;
+        M32( src+ 4 ) = v;
+        M32( src+ 8 ) = v;
+        M32( src+12 ) = v;
          src += FDEC_STRIDE;
  
      }
  }
  static void predict_16x16_v( uint8_t *src )
  {
-    uint32_t v0 = *(uint32_t*)&src[ 0-FDEC_STRIDE];
-    uint32_t v1 = *(uint32_t*)&src[ 4-FDEC_STRIDE];
-    uint32_t v2 = *(uint32_t*)&src[ 8-FDEC_STRIDE];
-    uint32_t v3 = *(uint32_t*)&src[12-FDEC_STRIDE];
+    uint32_t v0 = M32( &src[ 0-FDEC_STRIDE] );
+    uint32_t v1 = M32( &src[ 4-FDEC_STRIDE] );
+    uint32_t v2 = M32( &src[ 8-FDEC_STRIDE] );
+    uint32_t v3 = M32( &src[12-FDEC_STRIDE] );
      int i;
  
      for( i = 0; i < 16; i++ )
      {
-        uint32_t *p = (uint32_t*)src;
-        *p++ = v0;
-        *p++ = v1;
-        *p++ = v2;
-        *p++ = v3;
+        M32( src+ 0 ) = v0;
+        M32( src+ 4 ) = v1;
+        M32( src+ 8 ) = v2;
+        M32( src+12 ) = v3;
          src += FDEC_STRIDE;
      }
  }
@@ -178,9 +173,8 @@ static void predict_8x8c_dc_128( uint8_t *src )
  
      for( y = 0; y < 8; y++ )
      {
-        uint32_t *p = (uint32_t*)src;
-        *p++ = 0x80808080;
-        *p++ = 0x80808080;
+        M32( src+0 ) = 0x80808080;
+        M32( src+4 ) = 0x80808080;
          src += FDEC_STRIDE;
      }
  }
@@ -199,16 +193,14 @@ static void predict_8x8c_dc_left( uint8_t *src )
  
      for( y = 0; y < 4; y++ )
      {
-        uint32_t *p = (uint32_t*)src;
-        *p++ = dc0;
-        *p++ = dc0;
+        M32( src+0 ) = dc0;
+        M32( src+4 ) = dc0;
          src += FDEC_STRIDE;
      }
      for( y = 0; y < 4; y++ )
      {
-        uint32_t *p = (uint32_t*)src;
-        *p++ = dc1;
-        *p++ = dc1;
+        M32( src+0 ) = dc1;
+        M32( src+4 ) = dc1;
          src += FDEC_STRIDE;
      }
  
@@ -228,9 +220,8 @@ static void predict_8x8c_dc_top( uint8_t *src )
  
      for( y = 0; y < 8; y++ )
      {
-        uint32_t *p = (uint32_t*)src;
-        *p++ = dc0;
-        *p++ = dc1;
+        M32( src+0 ) = dc0;
+        M32( src+4 ) = dc1;
          src += FDEC_STRIDE;
      }
  }
@@ -264,17 +255,15 @@ static void predict_8x8c_dc( uint8_t *src )
  
      for( y = 0; y < 4; y++ )
      {
-        uint32_t *p = (uint32_t*)src;
-        *p++ = dc0;
-        *p++ = dc1;
+        M32( src+0 ) = dc0;
+        M32( src+4 ) = dc1;
          src += FDEC_STRIDE;
      }
  
      for( y = 0; y < 4; y++ )
      {
-        uint32_t *p = (uint32_t*)src;
-        *p++ = dc2;
-        *p++ = dc3;
+        M32( src+0 ) = dc2;
+        M32( src+4 ) = dc3;
          src += FDEC_STRIDE;
      }
  }
@@ -285,23 +274,21 @@ static void predict_8x8c_h( uint8_t *src )
      for( i = 0; i < 8; i++ )
      {
          uint32_t v = 0x01010101 * src[-1];
-        uint32_t *p = (uint32_t*)src;
-        *p++ = v;
-        *p++ = v;
+        M32( src+0 ) = v;
+        M32( src+4 ) = v;
          src += FDEC_STRIDE;
      }
  }
  static void predict_8x8c_v( uint8_t *src )
  {
-    uint32_t v0 = *(uint32_t*)&src[0-FDEC_STRIDE];
-    uint32_t v1 = *(uint32_t*)&src[4-FDEC_STRIDE];
+    uint32_t v0 = M32( src+0-FDEC_STRIDE );
+    uint32_t v1 = M32( src+4-FDEC_STRIDE );
      int i;
  
      for( i = 0; i < 8; i++ )
      {
-        uint32_t *p = (uint32_t*)src;
-        *p++ = v0;
-        *p++ = v1;
+        M32( src+0 ) = v0;
+        M32( src+4 ) = v1;
          src += FDEC_STRIDE;
      }
  }
@@ -343,7 +330,7 @@ static void predict_8x8c_p( uint8_t *src )
   ****************************************************************************/
  
  #define SRC(x,y) src[(x)+(y)*FDEC_STRIDE]
-#define SRC32(x,y) *(uint32_t*)&SRC(x,y)
+#define SRC32(x,y) M32( &SRC(x,y) )
  
  #define PREDICT_4x4_DC(v)\
      SRC32(0,0) = SRC32(0,1) = SRC32(0,2) = SRC32(0,3) = v;
@@ -535,7 +522,7 @@ static void predict_8x8_filter( uint8_t *src, uint8_t edge[33], int i_neighbor,
              }
              else
              {
-                *(uint64_t*)(edge+24) = SRC(7,-1) * 0x0101010101010101ULL;
+                M64( edge+24 ) = SRC(7,-1) * 0x0101010101010101ULL;
                  edge[32] = SRC(7,-1);
              }
          }
@@ -561,8 +548,8 @@ static void predict_8x8_filter( uint8_t *src, uint8_t edge[33], int i_neighbor,
  #define PREDICT_8x8_DC(v) \
      int y; \
      for( y = 0; y < 8; y++ ) { \
-        ((uint32_t*)src)[0] = \
-        ((uint32_t*)src)[1] = v; \
+        M32( src+0 ) = v; \
+        M32( src+4 ) = v; \
          src += FDEC_STRIDE; \
      }
  
@@ -593,17 +580,17 @@ static void predict_8x8_dc( uint8_t *src, uint8_t edge[33] )
  static void predict_8x8_h( uint8_t *src, uint8_t edge[33] )
  {
      PREDICT_8x8_LOAD_LEFT
-#define ROW(y) ((uint32_t*)(src+y*FDEC_STRIDE))[0] =\
-               ((uint32_t*)(src+y*FDEC_STRIDE))[1] = 0x01010101U * l##y
+#define ROW(y) M32( src+y*FDEC_STRIDE+0 ) =\
+               M32( src+y*FDEC_STRIDE+4 ) = 0x01010101U * l##y;
      ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7);
  #undef ROW
  }
  static void predict_8x8_v( uint8_t *src, uint8_t edge[33] )
  {
-    const uint64_t top = *(uint64_t*)(edge+16);
+    const uint64_t top = M64( edge+16 );
      int y;
      for( y = 0; y < 8; y++ )
-        *(uint64_t*)(src+y*FDEC_STRIDE) = top;
+        M64( src+y*FDEC_STRIDE ) = top;
  }
  static void predict_8x8_ddl( uint8_t *src, uint8_t edge[33] )
  {
diff --git a/common/quant.c b/common/quant.c

index 096a4b34ef279796d817a192e17a5bce04915256..7434a3d82b3832714bef357a39b4d8f888ce0e18 100644 (file)
--- a/common/quant.c
+++ b/common/quant.c
@@ -178,7 +178,7 @@ static int ALWAYS_INLINE x264_decimate_score_internal( int16_t *dct, int i_max )
      int idx = i_max - 1;
  
      /* Yes, dct[idx-1] is guaranteed to be 32-bit aligned.  idx>=0 instead of 1 works correctly for the same reason */
-    while( idx >= 0 && *(uint32_t*)&dct[idx-1] == 0 )
+    while( idx >= 0 && M32( &dct[idx-1] ) == 0 )
          idx -= 2;
      if( idx >= 0 && dct[idx] == 0 )
          idx--;
@@ -218,7 +218,7 @@ static int ALWAYS_INLINE x264_coeff_last_internal( int16_t *l, int i_count )
  {
      int i_last;
      for( i_last = i_count-1; i_last >= 3; i_last -= 4 )
-        if( *(uint64_t*)(l+i_last-3) )
+        if( M64( l+i_last-3 ) )
              break;
      while( i_last >= 0 && l[i_last] == 0 )
          i_last--;
diff --git a/common/x86/predict-c.c b/common/x86/predict-c.c

index 5cfa6fd4a33486dcfe2e5f399bba92cf094c186e..602ddcdcfcd08c97c6ecc865980cfd6c6fe14e66 100644 (file)
--- a/common/x86/predict-c.c
+++ b/common/x86/predict-c.c
@@ -266,12 +266,12 @@ static void predict_8x8c_dc_left( uint8_t *src )
  
      for( y = 0; y < 4; y++ )
      {
-        *(uint64_t*)src = dc0;
+        M64( src ) = dc0;
          src += FDEC_STRIDE;
      }
      for( y = 0; y < 4; y++ )
      {
-        *(uint64_t*)src = dc1;
+        M64( src ) = dc1;
          src += FDEC_STRIDE;
      }
  
@@ -296,8 +296,8 @@ static void predict_8x8c_dc_left( uint8_t *src )
  #define PREDICT_8x8_DC(v) \
      int y; \
      for( y = 0; y < 8; y++ ) { \
-        ((uint32_t*)src)[0] = \
-        ((uint32_t*)src)[1] = v; \
+        M32( src+0 ) = v; \
+        M32( src+4 ) = v; \
          src += FDEC_STRIDE; \
      }
  
diff --git a/common/x86/util.h b/common/x86/util.h

index bfb2ec8dc8c37d841fd533e298d6812908a81ef6..9d7579e7ebc21266810fb6cbbd0cb61a39a8865a 100644 (file)
--- a/common/x86/util.h
+++ b/common/x86/util.h
@@ -38,8 +38,8 @@ static inline void x264_median_mv_mmxext( int16_t *dst, int16_t *a, int16_t *b,
          "pminsw %%mm2, %%mm0 \n"
          "pmaxsw %%mm1, %%mm0 \n"
          "movd   %%mm0, %0    \n"
-        :"=m"(*(uint32_t*)dst)
-        :"m"(*(uint32_t*)a), "m"(*(uint32_t*)b), "m"(*(uint32_t*)c)
+        :"=m"(*(x264_union32_t*)dst)
+        :"m"(M32( a )), "m"(M32( b )), "m"(M32( c ))
      );
  }
  #define x264_predictor_difference x264_predictor_difference_mmxext
@@ -69,7 +69,7 @@ static inline int x264_predictor_difference_mmxext( int16_t (*mvc)[2], intptr_t
          "jg 1b                \n"
          "movq    %%mm4, %0    \n"
          :"=m"(output), "+r"(i_mvc)
-        :"r"(mvc), "m"(*(struct {int16_t x[4];} *)mvc)
+        :"r"(mvc), "m"(M64( mvc ))
      );
      sum += output[0] + output[1] + output[2] + output[3];
      return sum;
@@ -98,7 +98,7 @@ static ALWAYS_INLINE uint32_t x264_cabac_amvd_sum_mmxext(int16_t *mvdleft, int16
          "pminsw    %5, %%mm0 \n"
          "movd   %%mm0, %0    \n"
          :"=r"(amvd)
-        :"m"(*(uint32_t*)mvdleft),"m"(*(uint32_t*)mvdtop),
+        :"m"(M32( mvdleft )),"m"(M32( mvdtop )),
           "m"(pw_28),"m"(pw_2184),"m"(pw_2)
      );
      return amvd;
diff --git a/encoder/analyse.c b/encoder/analyse.c

index 92737888e6ffb2c4d64e2d9fea0e901487e01927..07cf5a7c07c7425eb1be935d9a24a6130ce19b06 100644 (file)
--- a/encoder/analyse.c
+++ b/encoder/analyse.c
@@ -874,10 +874,10 @@ static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_satd_
              if( h->mb.i_skip_intra )
              {
                  h->mc.copy[PIXEL_16x16]( h->mb.pic.i8x8_fdec_buf, 16, p_dst, FDEC_STRIDE, 16 );
-                h->mb.pic.i8x8_nnz_buf[0] = *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[ 0]];
-                h->mb.pic.i8x8_nnz_buf[1] = *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[ 2]];
-                h->mb.pic.i8x8_nnz_buf[2] = *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[ 8]];
-                h->mb.pic.i8x8_nnz_buf[3] = *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[10]];
+                h->mb.pic.i8x8_nnz_buf[0] = M32( &h->mb.cache.non_zero_count[x264_scan8[ 0]] );
+                h->mb.pic.i8x8_nnz_buf[1] = M32( &h->mb.cache.non_zero_count[x264_scan8[ 2]] );
+                h->mb.pic.i8x8_nnz_buf[2] = M32( &h->mb.cache.non_zero_count[x264_scan8[ 8]] );
+                h->mb.pic.i8x8_nnz_buf[3] = M32( &h->mb.cache.non_zero_count[x264_scan8[10]] );
                  h->mb.pic.i8x8_cbp = h->mb.i_cbp_luma;
                  if( h->mb.i_skip_intra == 2 )
                      h->mc.memcpy_aligned( h->mb.pic.i8x8_dct_buf, h->dct.luma8x8, sizeof(h->mb.pic.i8x8_dct_buf) );
@@ -918,7 +918,7 @@ static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_satd_
  
              if( (h->mb.i_neighbour4[idx] & (MB_TOPRIGHT|MB_TOP)) == MB_TOP )
                  /* emulate missing topright samples */
-                *(uint32_t*) &p_dst_by[4 - FDEC_STRIDE] = p_dst_by[3 - FDEC_STRIDE] * 0x01010101U;
+                M32( &p_dst_by[4 - FDEC_STRIDE] ) = p_dst_by[3 - FDEC_STRIDE] * 0x01010101U;
  
              if( b_merged_satd && i_max >= 6 )
              {
@@ -964,10 +964,10 @@ static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_satd_
              if( h->mb.i_skip_intra )
              {
                  h->mc.copy[PIXEL_16x16]( h->mb.pic.i4x4_fdec_buf, 16, p_dst, FDEC_STRIDE, 16 );
-                h->mb.pic.i4x4_nnz_buf[0] = *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[ 0]];
-                h->mb.pic.i4x4_nnz_buf[1] = *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[ 2]];
-                h->mb.pic.i4x4_nnz_buf[2] = *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[ 8]];
-                h->mb.pic.i4x4_nnz_buf[3] = *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[10]];
+                h->mb.pic.i4x4_nnz_buf[0] = M32( &h->mb.cache.non_zero_count[x264_scan8[ 0]] );
+                h->mb.pic.i4x4_nnz_buf[1] = M32( &h->mb.cache.non_zero_count[x264_scan8[ 2]] );
+                h->mb.pic.i4x4_nnz_buf[2] = M32( &h->mb.cache.non_zero_count[x264_scan8[ 8]] );
+                h->mb.pic.i4x4_nnz_buf[3] = M32( &h->mb.cache.non_zero_count[x264_scan8[10]] );
                  h->mb.pic.i4x4_cbp = h->mb.i_cbp_luma;
                  if( h->mb.i_skip_intra == 2 )
                      h->mc.memcpy_aligned( h->mb.pic.i4x4_dct_buf, h->dct.luma4x4, sizeof(h->mb.pic.i4x4_dct_buf) );
@@ -1092,7 +1092,7 @@ static void x264_intra_rd_refine( x264_t *h, x264_mb_analysis_t *a )
  
              if( (h->mb.i_neighbour4[idx] & (MB_TOPRIGHT|MB_TOP)) == MB_TOP )
                  /* emulate missing topright samples */
-                *(uint32_t*) &p_dst_by[4 - FDEC_STRIDE] = p_dst_by[3 - FDEC_STRIDE] * 0x01010101U;
+                M32( &p_dst_by[4 - FDEC_STRIDE] ) = p_dst_by[3 - FDEC_STRIDE] * 0x01010101U;
  
              for( i = 0; i < i_max; i++ )
              {
@@ -1107,18 +1107,18 @@ static void x264_intra_rd_refine( x264_t *h, x264_mb_analysis_t *a )
                  {
                      a->i_predict4x4[idx] = i_mode;
                      i_best = i_satd;
-                    pels[0] = *(uint32_t*)(p_dst_by+0*FDEC_STRIDE);
-                    pels[1] = *(uint32_t*)(p_dst_by+1*FDEC_STRIDE);
-                    pels[2] = *(uint32_t*)(p_dst_by+2*FDEC_STRIDE);
-                    pels[3] = *(uint32_t*)(p_dst_by+3*FDEC_STRIDE);
+                    pels[0] = M32( p_dst_by+0*FDEC_STRIDE );
+                    pels[1] = M32( p_dst_by+1*FDEC_STRIDE );
+                    pels[2] = M32( p_dst_by+2*FDEC_STRIDE );
+                    pels[3] = M32( p_dst_by+3*FDEC_STRIDE );
                      i_nnz = h->mb.cache.non_zero_count[x264_scan8[idx]];
                  }
              }
  
-            *(uint32_t*)(p_dst_by+0*FDEC_STRIDE) = pels[0];
-            *(uint32_t*)(p_dst_by+1*FDEC_STRIDE) = pels[1];
-            *(uint32_t*)(p_dst_by+2*FDEC_STRIDE) = pels[2];
-            *(uint32_t*)(p_dst_by+3*FDEC_STRIDE) = pels[3];
+            M32( p_dst_by+0*FDEC_STRIDE ) = pels[0];
+            M32( p_dst_by+1*FDEC_STRIDE ) = pels[1];
+            M32( p_dst_by+2*FDEC_STRIDE ) = pels[2];
+            M32( p_dst_by+3*FDEC_STRIDE ) = pels[3];
              h->mb.cache.non_zero_count[x264_scan8[idx]] = i_nnz;
  
              h->mb.cache.intra4x4_pred_mode[x264_scan8[idx]] = a->i_predict4x4[idx];
@@ -1163,21 +1163,21 @@ static void x264_intra_rd_refine( x264_t *h, x264_mb_analysis_t *a )
                      cbp_luma_new = h->mb.i_cbp_luma;
                      i_best = i_satd;
  
-                    pels_h = *(uint64_t*)(p_dst_by+7*FDEC_STRIDE);
+                    pels_h = M64( p_dst_by+7*FDEC_STRIDE );
                      if( !(idx&1) )
                          for( j=0; j<7; j++ )
                              pels_v[j] = p_dst_by[7+j*FDEC_STRIDE];
-                    i_nnz[0] = *(uint16_t*)&h->mb.cache.non_zero_count[x264_scan8[4*idx+0]];
-                    i_nnz[1] = *(uint16_t*)&h->mb.cache.non_zero_count[x264_scan8[4*idx+2]];
+                    i_nnz[0] = M16( &h->mb.cache.non_zero_count[x264_scan8[4*idx+0]] );
+                    i_nnz[1] = M16( &h->mb.cache.non_zero_count[x264_scan8[4*idx+2]] );
                  }
              }
              a->i_cbp_i8x8_luma = cbp_luma_new;
-            *(uint64_t*)(p_dst_by+7*FDEC_STRIDE) = pels_h;
+            M64( p_dst_by+7*FDEC_STRIDE ) = pels_h;
              if( !(idx&1) )
                  for( j=0; j<7; j++ )
                      p_dst_by[7+j*FDEC_STRIDE] = pels_v[j];
-            *(uint16_t*)&h->mb.cache.non_zero_count[x264_scan8[4*idx+0]] = i_nnz[0];
-            *(uint16_t*)&h->mb.cache.non_zero_count[x264_scan8[4*idx+2]] = i_nnz[1];
+            M16( &h->mb.cache.non_zero_count[x264_scan8[4*idx+0]] ) = i_nnz[0];
+            M16( &h->mb.cache.non_zero_count[x264_scan8[4*idx+2]] ) = i_nnz[1];
  
              x264_macroblock_cache_intra8x8_pred( h, 2*x, 2*y, a->i_predict8x8[idx] );
          }
@@ -1259,8 +1259,8 @@ static void x264_mb_analyse_inter_p16x16( x264_t *h, x264_mb_analysis_t *a )
              h->mc.memcpy_aligned( &a->l0.me16x16, &m, sizeof(x264_me_t) );
  
          /* save mv for predicting neighbors */
-        *(uint32_t*)a->l0.mvc[i_ref][0] =
-        *(uint32_t*)h->mb.mvr[0][i_ref][h->mb.i_mb_xy] = *(uint32_t*)m.mv;
+        CP32( a->l0.mvc[i_ref][0], m.mv );
+        CP32( h->mb.mvr[0][i_ref][h->mb.i_mb_xy], m.mv );
      }
  
      x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, a->l0.me16x16.i_ref );
@@ -1270,7 +1270,7 @@ static void x264_mb_analyse_inter_p16x16( x264_t *h, x264_mb_analysis_t *a )
      if( a->i_mbrd )
      {
          x264_mb_cache_fenc_satd( h );
-        if( a->l0.me16x16.i_ref == 0 && *(uint32_t*)a->l0.me16x16.mv == *(uint32_t*)h->mb.cache.pskip_mv )
+        if( a->l0.me16x16.i_ref == 0 && M32( a->l0.me16x16.mv ) == M32( h->mb.cache.pskip_mv ) )
          {
              h->mb.i_partition = D_16x16;
              x264_macroblock_cache_mv_ptr( h, 0, 0, 4, 4, 0, a->l0.me16x16.mv );
@@ -1308,7 +1308,7 @@ static void x264_mb_analyse_inter_p8x8_mixed_ref( x264_t *h, x264_mb_analysis_t
      }
  
      for( i_ref = 0; i_ref <= i_maxref; i_ref++ )
-         *(uint32_t*)a->l0.mvc[i_ref][0] = *(uint32_t*)h->mb.mvr[0][i_ref][h->mb.i_mb_xy];
+        CP32( a->l0.mvc[i_ref][0], h->mb.mvr[0][i_ref][h->mb.i_mb_xy] );
  
      for( i = 0; i < 4; i++ )
      {
@@ -1335,7 +1335,7 @@ static void x264_mb_analyse_inter_p8x8_mixed_ref( x264_t *h, x264_mb_analysis_t
  
              m.cost += i_ref_cost;
              i_halfpel_thresh += i_ref_cost;
-            *(uint32_t*)a->l0.mvc[i_ref][i+1] = *(uint32_t*)m.mv;
+            CP32( a->l0.mvc[i_ref][i+1], m.mv );
  
              if( m.cost < l0m->cost )
                  h->mc.memcpy_aligned( l0m, &m, sizeof(x264_me_t) );
@@ -1372,7 +1372,7 @@ static void x264_mb_analyse_inter_p8x8( x264_t *h, x264_mb_analysis_t *a )
      h->mb.i_partition = D_8x8;
  
      i_mvc = 1;
-    *(uint32_t*)mvc[0] = *(uint32_t*)a->l0.me16x16.mv;
+    CP32( mvc[0], a->l0.me16x16.mv );
  
      for( i = 0; i < 4; i++ )
      {
@@ -1392,7 +1392,7 @@ static void x264_mb_analyse_inter_p8x8( x264_t *h, x264_mb_analysis_t *a )
  
          x264_macroblock_cache_mv_ptr( h, 2*x8, 2*y8, 2, 2, 0, m->mv );
  
-        *(uint32_t*)mvc[i_mvc] = *(uint32_t*)m->mv;
+        CP32( mvc[i_mvc], m->mv );
          i_mvc++;
  
          /* mb type cost */
@@ -1438,9 +1438,9 @@ static void x264_mb_analyse_inter_p16x8( x264_t *h, x264_mb_analysis_t *a )
              m.i_ref_cost = i_ref_cost;
  
              /* if we skipped the 16x16 predictor, we wouldn't have to copy anything... */
-            *(uint32_t*)mvc[0] = *(uint32_t*)a->l0.mvc[i_ref][0];
-            *(uint32_t*)mvc[1] = *(uint32_t*)a->l0.mvc[i_ref][2*i+1];
-            *(uint32_t*)mvc[2] = *(uint32_t*)a->l0.mvc[i_ref][2*i+2];
+            CP32( mvc[0], a->l0.mvc[i_ref][0] );
+            CP32( mvc[1], a->l0.mvc[i_ref][2*i+1] );
+            CP32( mvc[2], a->l0.mvc[i_ref][2*i+2] );
  
              LOAD_HPELS( &m, h->mb.pic.p_fref[0][i_ref], 0, i_ref, 0, 8*i );
              LOAD_WPELS( &m, h->mb.pic.p_fref_w[i_ref], 0, i_ref, 0, 8*i );
@@ -1487,9 +1487,9 @@ static void x264_mb_analyse_inter_p8x16( x264_t *h, x264_mb_analysis_t *a )
              const int i_ref_cost = REF_COST( 0, i_ref );
              m.i_ref_cost = i_ref_cost;
  
-            *(uint32_t*)mvc[0] = *(uint32_t*)a->l0.mvc[i_ref][0];
-            *(uint32_t*)mvc[1] = *(uint32_t*)a->l0.mvc[i_ref][i+1];
-            *(uint32_t*)mvc[2] = *(uint32_t*)a->l0.mvc[i_ref][i+3];
+            CP32( mvc[0], a->l0.mvc[i_ref][0] );
+            CP32( mvc[1], a->l0.mvc[i_ref][i+1] );
+            CP32( mvc[2], a->l0.mvc[i_ref][i+3] );
  
              LOAD_HPELS( &m, h->mb.pic.p_fref[0][i_ref], 0, i_ref, 8*i, 0 );
              LOAD_WPELS( &m, h->mb.pic.p_fref_w[i_ref], 0, i_ref, 8*i, 0 );
@@ -1731,7 +1731,7 @@ static void x264_mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a )
          }
  
          /* save mv for predicting neighbors */
-        *(uint32_t*)h->mb.mvr[0][i_ref][h->mb.i_mb_xy] = *(uint32_t*)m.mv;
+        CP32( h->mb.mvr[0][i_ref][h->mb.i_mb_xy], m.mv );
      }
      a->l0.me16x16.i_ref = a->l0.i_ref;
  
@@ -1760,7 +1760,7 @@ static void x264_mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a )
          }
  
          /* save mv for predicting neighbors */
-        *(uint32_t*)h->mb.mvr[1][i_ref][h->mb.i_mb_xy] = *(uint32_t*)m.mv;
+        CP32( h->mb.mvr[1][i_ref][h->mb.i_mb_xy], m.mv );
      }
      a->l1.me16x16.i_ref = a->l1.i_ref;
  
@@ -1972,8 +1972,8 @@ static void x264_mb_analyse_inter_b16x8( x264_t *h, x264_mb_analysis_t *a )
              LOAD_FENC( m, h->mb.pic.p_fenc, 0, 8*i );
              LOAD_HPELS( m, p_fref[l], l, lX->i_ref, 0, 8*i );
  
-            *(uint32_t*)mvc[0] = *(uint32_t*)lX->me8x8[2*i].mv;
-            *(uint32_t*)mvc[1] = *(uint32_t*)lX->me8x8[2*i+1].mv;
+            CP32( mvc[0], lX->me8x8[2*i].mv );
+            CP32( mvc[1], lX->me8x8[2*i+1].mv );
  
              x264_mb_predict_mv( h, l, 8*i, 2, m->mvp );
              x264_me_search( h, m, mvc, 2 );
@@ -2040,8 +2040,8 @@ static void x264_mb_analyse_inter_b8x16( x264_t *h, x264_mb_analysis_t *a )
              LOAD_FENC( m, h->mb.pic.p_fenc, 8*i, 0 );
              LOAD_HPELS( m, p_fref[l], l, lX->i_ref, 8*i, 0 );
  
-            *(uint32_t*)mvc[0] = *(uint32_t*)lX->me8x8[i].mv;
-            *(uint32_t*)mvc[1] = *(uint32_t*)lX->me8x8[i+2].mv;
+            CP32( mvc[0], lX->me8x8[i].mv );
+            CP32( mvc[1], lX->me8x8[i+2].mv );
  
              x264_mb_predict_mv( h, l, 4*i, 2, m->mvp );
              x264_me_search( h, m, mvc, 2 );
@@ -2995,7 +2995,7 @@ void x264_macroblock_analyse( x264_t *h )
          static const uint8_t check_mv_lists[X264_MBTYPE_MAX] = {[P_L0]=1, [B_L0_L0]=1, [B_L1_L1]=2};
          int list = check_mv_lists[h->mb.i_type] - 1;
          if( list >= 0 && h->mb.i_partition != D_16x16 &&
-            *(uint32_t*)&h->mb.cache.mv[list][x264_scan8[0]] == *(uint32_t*)&h->mb.cache.mv[list][x264_scan8[12]] &&
+            M32( &h->mb.cache.mv[list][x264_scan8[0]] ) == M32( &h->mb.cache.mv[list][x264_scan8[12]] ) &&
              h->mb.cache.ref[list][x264_scan8[0]] == h->mb.cache.ref[list][x264_scan8[12]] )
                  h->mb.i_partition = D_16x16;
      }
diff --git a/encoder/macroblock.c b/encoder/macroblock.c

index 2ddbc473d319a13775d957aaaa08ff95acc4c69a..0b244a7c25cef656877a3b2535855831c5c4cb13 100644 (file)
--- a/encoder/macroblock.c
+++ b/encoder/macroblock.c
@@ -155,8 +155,16 @@ void x264_mb_encode_i4x4( x264_t *h, int idx, int i_qp )
  
  #define STORE_8x8_NNZ(idx,nz)\
  {\
-    *(uint16_t*)&h->mb.cache.non_zero_count[x264_scan8[idx*4+0]] = nz * 0x0101;\
-    *(uint16_t*)&h->mb.cache.non_zero_count[x264_scan8[idx*4+2]] = nz * 0x0101;\
+    M16( &h->mb.cache.non_zero_count[x264_scan8[idx*4+0]] ) = nz * 0x0101;\
+    M16( &h->mb.cache.non_zero_count[x264_scan8[idx*4+2]] ) = nz * 0x0101;\
+}
+
+#define CLEAR_16x16_NNZ \
+{\
+    M32( &h->mb.cache.non_zero_count[x264_scan8[ 0]] ) = 0;\
+    M32( &h->mb.cache.non_zero_count[x264_scan8[ 2]] ) = 0;\
+    M32( &h->mb.cache.non_zero_count[x264_scan8[ 8]] ) = 0;\
+    M32( &h->mb.cache.non_zero_count[x264_scan8[10]] ) = 0;\
  }
  
  void x264_mb_encode_i8x8( x264_t *h, int idx, int i_qp )
@@ -244,10 +252,7 @@ static void x264_mb_encode_i16x16( x264_t *h, int i_qp )
      if( decimate_score < 6 )
      {
          h->mb.i_cbp_luma = 0;
-        *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[ 0]] = 0;
-        *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[ 2]] = 0;
-        *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[ 8]] = 0;
-        *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[10]] = 0;
+        CLEAR_16x16_NNZ
      }
  
      h->dctf.dct4x4dc( dct_dc4x4 );
@@ -661,10 +666,10 @@ void x264_macroblock_encode( x264_t *h )
          if( h->mb.i_skip_intra )
          {
              h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fdec[0], FDEC_STRIDE, h->mb.pic.i8x8_fdec_buf, 16, 16 );
-            *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[ 0]] = h->mb.pic.i8x8_nnz_buf[0];
-            *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[ 2]] = h->mb.pic.i8x8_nnz_buf[1];
-            *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[ 8]] = h->mb.pic.i8x8_nnz_buf[2];
-            *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[10]] = h->mb.pic.i8x8_nnz_buf[3];
+            M32( &h->mb.cache.non_zero_count[x264_scan8[ 0]] ) = h->mb.pic.i8x8_nnz_buf[0];
+            M32( &h->mb.cache.non_zero_count[x264_scan8[ 2]] ) = h->mb.pic.i8x8_nnz_buf[1];
+            M32( &h->mb.cache.non_zero_count[x264_scan8[ 8]] ) = h->mb.pic.i8x8_nnz_buf[2];
+            M32( &h->mb.cache.non_zero_count[x264_scan8[10]] ) = h->mb.pic.i8x8_nnz_buf[3];
              h->mb.i_cbp_luma = h->mb.pic.i8x8_cbp;
              /* In RD mode, restore the now-overwritten DCT data. */
              if( h->mb.i_skip_intra == 2 )
@@ -691,10 +696,10 @@ void x264_macroblock_encode( x264_t *h )
          if( h->mb.i_skip_intra )
          {
              h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fdec[0], FDEC_STRIDE, h->mb.pic.i4x4_fdec_buf, 16, 16 );
-            *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[ 0]] = h->mb.pic.i4x4_nnz_buf[0];
-            *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[ 2]] = h->mb.pic.i4x4_nnz_buf[1];
-            *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[ 8]] = h->mb.pic.i4x4_nnz_buf[2];
-            *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[10]] = h->mb.pic.i4x4_nnz_buf[3];
+            M32( &h->mb.cache.non_zero_count[x264_scan8[ 0]] ) = h->mb.pic.i4x4_nnz_buf[0];
+            M32( &h->mb.cache.non_zero_count[x264_scan8[ 2]] ) = h->mb.pic.i4x4_nnz_buf[1];
+            M32( &h->mb.cache.non_zero_count[x264_scan8[ 8]] ) = h->mb.pic.i4x4_nnz_buf[2];
+            M32( &h->mb.cache.non_zero_count[x264_scan8[10]] ) = h->mb.pic.i4x4_nnz_buf[3];
              h->mb.i_cbp_luma = h->mb.pic.i4x4_cbp;
              /* In RD mode, restore the now-overwritten DCT data. */
              if( h->mb.i_skip_intra == 2 )
@@ -707,7 +712,7 @@ void x264_macroblock_encode( x264_t *h )
  
              if( (h->mb.i_neighbour4[i] & (MB_TOPRIGHT|MB_TOP)) == MB_TOP )
                  /* emulate missing topright samples */
-                *(uint32_t*) &p_dst[4-FDEC_STRIDE] = p_dst[3-FDEC_STRIDE] * 0x01010101U;
+                M32( &p_dst[4-FDEC_STRIDE] ) = p_dst[3-FDEC_STRIDE] * 0x01010101U;
  
              if( h->mb.b_lossless )
                  x264_predict_lossless_4x4( h, p_dst, i, i_mode );
@@ -779,10 +784,7 @@ void x264_macroblock_encode( x264_t *h )
              if( i_decimate_mb < 6 && b_decimate )
              {
                  h->mb.i_cbp_luma = 0;
-                *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[ 0]] = 0;
-                *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[ 2]] = 0;
-                *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[ 8]] = 0;
-                *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[10]] = 0;
+                CLEAR_16x16_NNZ
              }
              else
              {
@@ -851,10 +853,7 @@ void x264_macroblock_encode( x264_t *h )
                  if( i_decimate_mb < 6 )
                  {
                      h->mb.i_cbp_luma = 0;
-                    *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[ 0]] = 0;
-                    *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[ 2]] = 0;
-                    *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[ 8]] = 0;
-                    *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[10]] = 0;
+                    CLEAR_16x16_NNZ
                  }
                  else
                  {
@@ -899,7 +898,7 @@ void x264_macroblock_encode( x264_t *h )
      {
          if( h->mb.i_type == P_L0 && h->mb.i_partition == D_16x16 &&
              !(h->mb.i_cbp_luma | h->mb.i_cbp_chroma) &&
-            *(uint32_t*)h->mb.cache.mv[0][x264_scan8[0]] == *(uint32_t*)h->mb.cache.pskip_mv
+            M32( h->mb.cache.mv[0][x264_scan8[0]] ) == M32( h->mb.cache.pskip_mv )
              && h->mb.cache.ref[0][x264_scan8[0]] == 0 )
          {
              h->mb.i_type = P_SKIP;
diff --git a/encoder/me.c b/encoder/me.c

index 4828d5ba412dce0e145db62fd3b8c06fe2408322..999cd4fcc46e601a1a2297e07705042a82874ccf 100644 (file)
--- a/encoder/me.c
+++ b/encoder/me.c
@@ -211,7 +211,7 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc,
          COST_MV_HPEL( bmx, bmy );
          for( i = 0; i < i_mvc; i++ )
          {
-            if( *(uint32_t*)mvc[i] && (bmv - *(uint32_t*)mvc[i]) )
+            if( M32( mvc[i] ) && (bmv - M32( mvc[i] )) )
              {
                  int mx = x264_clip3( mvc[i][0], mv_x_min*4, mv_x_max*4 );
                  int my = x264_clip3( mvc[i][1], mv_y_min*4, mv_y_max*4 );
@@ -643,7 +643,7 @@ me_hex2:
                      {
                          /* mvsad_t is not guaranteed to be 8 bytes on all archs, so check before using explicit write-combining */
                          if( sizeof( mvsad_t ) == sizeof( uint64_t ) )
-                            *(uint64_t*)&mvsads[i] = *(uint64_t*)&mvsads[j];
+                            CP64( &mvsads[i], &mvsads[j] );
                          else
                              mvsads[i] = mvsads[j];
                          i += mvsads[j].sad <= sad_thresh;
@@ -659,7 +659,7 @@ me_hex2:
                      nmvsad--;
                      mvsads[bi] = mvsads[nmvsad];
                      if( sizeof( mvsad_t ) == sizeof( uint64_t ) )
-                        *(uint64_t*)&mvsads[bi] = *(uint64_t*)&mvsads[nmvsad];
+                        CP64( &mvsads[bi], &mvsads[nmvsad] );
                      else
                          mvsads[bi] = mvsads[nmvsad];
                  }
@@ -974,8 +974,10 @@ static void ALWAYS_INLINE x264_me_refine_bidir( x264_t *h, x264_me_t *m0, x264_m
                      if( cost < bcost * SATD_THRESH )
                      {
                          bcost = X264_MIN( cost, bcost );
-                        *(uint32_t*)cache0_mv = *(uint32_t*)cache0_mv2 = pack16to32_mask(m0x,m0y);
-                        *(uint32_t*)cache1_mv = *(uint32_t*)cache1_mv2 = pack16to32_mask(m1x,m1y);
+                        M32( cache0_mv  ) = pack16to32_mask(m0x,m0y);
+                        M32( cache0_mv2 ) = pack16to32_mask(m0x,m0y);
+                        M32( cache1_mv  ) = pack16to32_mask(m1x,m1y);
+                        M32( cache1_mv2 ) = pack16to32_mask(m1x,m1y);
                          h->mc.avg[i_pixel+3]( pixu, FDEC_STRIDE, pixu_buf[0][i0], 8, pixu_buf[1][i1], 8, i_weight );
                          h->mc.avg[i_pixel+3]( pixv, FDEC_STRIDE, pixv_buf[0][i0], 8, pixv_buf[1][i1], 8, i_weight );
                          uint64_t costrd = x264_rd_cost_part( h, i_lambda2, i8*4, m0->i_pixel );
@@ -1038,7 +1040,8 @@ void x264_me_refine_bidir_rd( x264_t *h, x264_me_t *m0, x264_me_t *m1, int i_wei
      if( satd <= bsatd * SATD_THRESH ) \
      { \
          uint64_t cost; \
-        *(uint32_t*)cache_mv = *(uint32_t*)cache_mv2 = pack16to32_mask(mx,my); \
+        M32( cache_mv  ) = pack16to32_mask(mx,my); \
+        M32( cache_mv2 ) = pack16to32_mask(mx,my); \
          cost = x264_rd_cost_part( h, i_lambda2, i4, m->i_pixel ); \
          COPY4_IF_LT( bcost, cost, bmx, mx, bmy, my, dir, do_dir?mdir:dir ); \
      } \
diff --git a/encoder/slicetype.c b/encoder/slicetype.c

index 615510a0547433e94f0416b76a4b9f2f95f8459a..dfd8ae10e6f1abdf13a641a2d26bce45c9e84daf 100644 (file)
--- a/encoder/slicetype.c
+++ b/encoder/slicetype.c
@@ -373,10 +373,10 @@ static int x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a,
              ALIGNED_4( int16_t mvc[4][2] );
  
              /* Reverse-order MV prediction. */
-            *(uint32_t*)mvc[0] = 0;
-            *(uint32_t*)mvc[1] = 0;
-            *(uint32_t*)mvc[2] = 0;
-#define MVC(mv) { *(uint32_t*)mvc[i_mvc] = *(uint32_t*)mv; i_mvc++; }
+            M32( mvc[0] ) = 0;
+            M32( mvc[1] ) = 0;
+            M32( mvc[2] ) = 0;
+#define MVC(mv) { CP32( mvc[i_mvc], mv ); i_mvc++; }
              if( i_mb_x < h->sps->i_mb_width - 1 )
                  MVC(fenc_mv[1]);
              if( i_mb_y < h->sps->i_mb_height - 1 )
@@ -392,20 +392,20 @@ static int x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a,
              x264_me_search( h, &m[l], mvc, i_mvc );
  
              m[l].cost -= 2; // remove mvcost from skip mbs
-            if( *(uint32_t*)m[l].mv )
+            if( M32( m[l].mv ) )
                  m[l].cost += 5;
-            *(uint32_t*)fenc_mvs[l] = *(uint32_t*)m[l].mv;
+            CP32( fenc_mvs[l], m[l].mv );
              *fenc_costs[l] = m[l].cost;
          }
          else
          {
-            *(uint32_t*)m[l].mv = *(uint32_t*)fenc_mvs[l];
+            CP32( m[l].mv, fenc_mvs[l] );
              m[l].cost = *fenc_costs[l];
          }
          COPY2_IF_LT( i_bcost, m[l].cost, list_used, l+1 );
      }
  
-    if( b_bidir && ( *(uint32_t*)m[0].mv || *(uint32_t*)m[1].mv ) )
+    if( b_bidir && ( M32( m[0].mv ) || M32( m[1].mv ) ) )
          TRY_BIDIR( m[0].mv, m[1].mv, 5 );
  
      /* Store to width-2 bitfield. */
author	Fiona Glaser <fiona@x264.com>
	Thu, 12 Nov 2009 13:25:32 +0000 (05:25 -0800)
committer	Fiona Glaser <fiona@x264.com>
	Thu, 12 Nov 2009 20:34:39 +0000 (12:34 -0800)
common/bs.h		patch \| blob \| history
common/common.h		patch \| blob \| history
common/dct.c		patch \| blob \| history
common/frame.c		patch \| blob \| history
common/macroblock.c		patch \| blob \| history
common/macroblock.h		patch \| blob \| history
common/predict.c		patch \| blob \| history
common/quant.c		patch \| blob \| history
common/x86/predict-c.c		patch \| blob \| history
common/x86/util.h		patch \| blob \| history
encoder/analyse.c		patch \| blob \| history
encoder/macroblock.c		patch \| blob \| history
encoder/me.c		patch \| blob \| history
encoder/slicetype.c		patch \| blob \| history