explicit write combining, because gcc fails at optimizing consecutive memory accesses

author Fiona Glaser <fiona@x264.com>

Thu, 15 May 2008 12:01:01 +0000 (06:01 -0600)

committer Loren Merritt <pengvado@akuvian.org>

Sat, 17 May 2008 06:51:34 +0000 (00:51 -0600)
author Fiona Glaser <fiona@x264.com>
Thu, 15 May 2008 12:01:01 +0000 (06:01 -0600)
committer Loren Merritt <pengvado@akuvian.org>
Sat, 17 May 2008 06:51:34 +0000 (00:51 -0600)
diff --git a/common/common.h b/common/common.h

index 628c0009eebbaa67bdf794a15d1c9bf119741cf3..a53509b45f746be72ff91d403b503dc8d19ec04b 100644 (file)
--- a/common/common.h
+++ b/common/common.h
@@ -471,14 +471,14 @@ struct x264_t
  
              /* 0 if not available */
              DECLARE_ALIGNED_16( int16_t mv[2][X264_SCAN8_SIZE][2] );
-            DECLARE_ALIGNED_4( int16_t mvd[2][X264_SCAN8_SIZE][2] );
+            DECLARE_ALIGNED_8( int16_t mvd[2][X264_SCAN8_SIZE][2] );
  
              /* 1 if SKIP or DIRECT. set only for B-frames + CABAC */
              DECLARE_ALIGNED_4( int8_t skip[X264_SCAN8_SIZE] );
  
              DECLARE_ALIGNED_16( int16_t direct_mv[2][X264_SCAN8_SIZE][2] );
-            int8_t  direct_ref[2][X264_SCAN8_SIZE];
-            int     pskip_mv[2];
+            DECLARE_ALIGNED_4( int8_t  direct_ref[2][X264_SCAN8_SIZE] );
+            DECLARE_ALIGNED_4( int16_t pskip_mv[2] );
  
              /* number of neighbors (top and left) that used 8x8 dct */
              int     i_neighbour_transform_size;
diff --git a/common/macroblock.c b/common/macroblock.c

index cd1f9cc7b6a81e1dbaf1efb868efcd66885fd3f1..d2fc0cbc3f562f124429a043acad5007bc8e2620 100644 (file)
--- a/common/macroblock.c
+++ b/common/macroblock.c
@@ -88,7 +88,7 @@ int x264_mb_transform_8x8_allowed( x264_t *h )
      }
  }
  
-void x264_mb_predict_mv( x264_t *h, int i_list, int idx, int i_width, int mvp[2] )
+void x264_mb_predict_mv( x264_t *h, int i_list, int idx, int i_width, int16_t mvp[2] )
  {
      const int i8 = x264_scan8[idx];
      const int i_ref= h->mb.cache.ref[i_list][i8];
@@ -111,14 +111,12 @@ void x264_mb_predict_mv( x264_t *h, int i_list, int idx, int i_width, int mvp[2]
      {
          if( idx == 0 && i_refb == i_ref )
          {
-            mvp[0] = mv_b[0];
-            mvp[1] = mv_b[1];
+            *(uint32_t*)mvp = *(uint32_t*)mv_b;
              return;
          }
          else if( idx != 0 && i_refa == i_ref )
          {
-            mvp[0] = mv_a[0];
-            mvp[1] = mv_a[1];
+            *(uint32_t*)mvp = *(uint32_t*)mv_a;
              return;
          }
      }
@@ -126,14 +124,12 @@ void x264_mb_predict_mv( x264_t *h, int i_list, int idx, int i_width, int mvp[2]
      {
          if( idx == 0 && i_refa == i_ref )
          {
-            mvp[0] = mv_a[0];
-            mvp[1] = mv_a[1];
+            *(uint32_t*)mvp = *(uint32_t*)mv_a;
              return;
          }
          else if( idx != 0 && i_refc == i_ref )
          {
-            mvp[0] = mv_c[0];
-            mvp[1] = mv_c[1];
+            *(uint32_t*)mvp = *(uint32_t*)mv_c;
              return;
          }
      }
@@ -151,26 +147,14 @@ void x264_mb_predict_mv( x264_t *h, int i_list, int idx, int i_width, int mvp[2]
      else if( i_count == 1 )
      {
          if( i_refa == i_ref )
-        {
-            mvp[0] = mv_a[0];
-            mvp[1] = mv_a[1];
-        }
+            *(uint32_t*)mvp = *(uint32_t*)mv_a;
          else if( i_refb == i_ref )
-        {
-            mvp[0] = mv_b[0];
-            mvp[1] = mv_b[1];
-        }
+            *(uint32_t*)mvp = *(uint32_t*)mv_b;
          else
-        {
-            mvp[0] = mv_c[0];
-            mvp[1] = mv_c[1];
-        }
+            *(uint32_t*)mvp = *(uint32_t*)mv_c;
      }
      else if( i_refb == -2 && i_refc == -2 && i_refa != -2 )
-    {
-        mvp[0] = mv_a[0];
-        mvp[1] = mv_a[1];
-    }
+        *(uint32_t*)mvp = *(uint32_t*)mv_a;
      else
      {
          mvp[0] = x264_median( mv_a[0], mv_b[0], mv_c[0] );
@@ -178,7 +162,7 @@ void x264_mb_predict_mv( x264_t *h, int i_list, int idx, int i_width, int mvp[2]
      }
  }
  
-void x264_mb_predict_mv_16x16( x264_t *h, int i_list, int i_ref, int mvp[2] )
+void x264_mb_predict_mv_16x16( x264_t *h, int i_list, int i_ref, int16_t mvp[2] )
  {
      int     i_refa = h->mb.cache.ref[i_list][X264_SCAN8_0 - 1];
      int16_t *mv_a  = h->mb.cache.mv[i_list][X264_SCAN8_0 - 1];
@@ -208,26 +192,14 @@ void x264_mb_predict_mv_16x16( x264_t *h, int i_list, int i_ref, int mvp[2] )
      else if( i_count == 1 )
      {
          if( i_refa == i_ref )
-        {
-            mvp[0] = mv_a[0];
-            mvp[1] = mv_a[1];
-        }
+            *(uint32_t*)mvp = *(uint32_t*)mv_a;
          else if( i_refb == i_ref )
-        {
-            mvp[0] = mv_b[0];
-            mvp[1] = mv_b[1];
-        }
+            *(uint32_t*)mvp = *(uint32_t*)mv_b;
          else
-        {
-            mvp[0] = mv_c[0];
-            mvp[1] = mv_c[1];
-        }
+            *(uint32_t*)mvp = *(uint32_t*)mv_c;
      }
      else if( i_refb == -2 && i_refc == -2 && i_refa != -2 )
-    {
-        mvp[0] = mv_a[0];
-        mvp[1] = mv_a[1];
-    }
+        *(uint32_t*)mvp = *(uint32_t*)mv_a;
      else
      {
          mvp[0] = x264_median( mv_a[0], mv_b[0], mv_c[0] );
@@ -236,7 +208,7 @@ void x264_mb_predict_mv_16x16( x264_t *h, int i_list, int i_ref, int mvp[2] )
  }
  
  
-void x264_mb_predict_mv_pskip( x264_t *h, int mv[2] )
+void x264_mb_predict_mv_pskip( x264_t *h, int16_t mv[2] )
  {
      int     i_refa = h->mb.cache.ref[0][X264_SCAN8_0 - 1];
      int     i_refb = h->mb.cache.ref[0][X264_SCAN8_0 - 8];
@@ -244,10 +216,10 @@ void x264_mb_predict_mv_pskip( x264_t *h, int mv[2] )
      int16_t *mv_b  = h->mb.cache.mv[0][X264_SCAN8_0 - 8];
  
      if( i_refa == -2 || i_refb == -2 ||
-        ( i_refa == 0 && mv_a[0] == 0 && mv_a[1] == 0 ) ||
-        ( i_refb == 0 && mv_b[0] == 0 && mv_b[1] == 0 ) )
+        ( i_refa == 0 && *(uint32_t*)mv_a == 0 ) ||
+        ( i_refb == 0 && *(uint32_t*)mv_b == 0 ) )
      {
-        mv[0] = mv[1] = 0;
+        *(uint32_t*)mv = 0;
      }
      else
      {
@@ -268,8 +240,8 @@ static int x264_mb_predict_mv_direct16x16_temporal( x264_t *h )
      if( IS_INTRA( type_col ) )
      {
          x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, 0 );
-        x264_macroblock_cache_mv(  h, 0, 0, 4, 4, 0, 0, 0 );
-        x264_macroblock_cache_mv(  h, 0, 0, 4, 4, 1, 0, 0 );
+        x264_macroblock_cache_mv(  h, 0, 0, 4, 4, 0, 0 );
+        x264_macroblock_cache_mv(  h, 0, 0, 4, 4, 1, 0 );
          return 1;
      }
      b8x8 = h->sps->b_direct8x8_inference ||
@@ -291,11 +263,10 @@ static int x264_mb_predict_mv_direct16x16_temporal( x264_t *h )
              if( b8x8 )
              {
                  const int16_t *mv_col = h->fref1[0]->mv[0][ i_mb_4x4 + 3*x8 + 3*y8 * h->mb.i_b4_stride];
-                int mv_l0[2];
-                mv_l0[0] = ( dist_scale_factor * mv_col[0] + 128 ) >> 8;
-                mv_l0[1] = ( dist_scale_factor * mv_col[1] + 128 ) >> 8;
-                x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 0, mv_l0[0], mv_l0[1] );
-                x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 1, mv_l0[0] - mv_col[0], mv_l0[1] - mv_col[1] );
+                const int l0x = ( dist_scale_factor * mv_col[0] + 128 ) >> 8;
+                const int l0y = ( dist_scale_factor * mv_col[1] + 128 ) >> 8;
+                x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 0, pack16to32_mask(l0x, l0y) );
+                x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 1, pack16to32_mask(l0x-mv_col[0], l0y-mv_col[1]) );
              }
              else
              {
@@ -304,11 +275,10 @@ static int x264_mb_predict_mv_direct16x16_temporal( x264_t *h )
                      const int x4 = i4%2 + 2*x8;
                      const int y4 = i4/2 + 2*y8;
                      const int16_t *mv_col = h->fref1[0]->mv[0][ i_mb_4x4 + x4 + y4 * h->mb.i_b4_stride ];
-                    int mv_l0[2];
-                    mv_l0[0] = ( dist_scale_factor * mv_col[0] + 128 ) >> 8;
-                    mv_l0[1] = ( dist_scale_factor * mv_col[1] + 128 ) >> 8;
-                    x264_macroblock_cache_mv( h, x4, y4, 1, 1, 0, mv_l0[0], mv_l0[1] );
-                    x264_macroblock_cache_mv( h, x4, y4, 1, 1, 1, mv_l0[0] - mv_col[0], mv_l0[1] - mv_col[1] );
+                    const int l0x = ( dist_scale_factor * mv_col[0] + 128 ) >> 8;
+                    const int l0y = ( dist_scale_factor * mv_col[1] + 128 ) >> 8;
+                    x264_macroblock_cache_mv( h, x4, y4, 1, 1, 0, pack16to32_mask(l0x, l0y) );
+                    x264_macroblock_cache_mv( h, x4, y4, 1, 1, 1, pack16to32_mask(l0x-mv_col[0], l0y-mv_col[1]) );
                  }
              }
          }
@@ -350,7 +320,7 @@ static int x264_mb_predict_mv_direct16x16_temporal( x264_t *h )
  static int x264_mb_predict_mv_direct16x16_spatial( x264_t *h )
  {
      int ref[2];
-    int mv[2][2];
+    DECLARE_ALIGNED_4( int16_t mv[2][2] );
      int i_list;
      int i8, i4;
      int b8x8;
@@ -381,10 +351,7 @@ static int x264_mb_predict_mv_direct16x16_spatial( x264_t *h )
      {
          ref[0] = 
          ref[1] = 0;
-        mv[0][0] = 
-        mv[0][1] = 
-        mv[1][0] = 
-        mv[1][1] = 0;
+        *(uint64_t*)mv[0] = 0;
      }
      else
      {
@@ -393,14 +360,14 @@ static int x264_mb_predict_mv_direct16x16_spatial( x264_t *h )
              if( ref[i_list] >= 0 )
                  x264_mb_predict_mv_16x16( h, i_list, ref[i_list], mv[i_list] );
              else
-                mv[i_list][0] = mv[i_list][1] = 0;
+                *(uint32_t*)mv[i_list] = 0;
          }
      }
  
      x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, ref[0] );
      x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, ref[1] );
-    x264_macroblock_cache_mv(  h, 0, 0, 4, 4, 0, mv[0][0], mv[0][1] );
-    x264_macroblock_cache_mv(  h, 0, 0, 4, 4, 1, mv[1][0], mv[1][1] );
+    x264_macroblock_cache_mv_ptr( h, 0, 0, 4, 4, 0, mv[0] );
+    x264_macroblock_cache_mv_ptr( h, 0, 0, 4, 4, 1, mv[1] );
  
      if( IS_INTRA( type_col ) )
          return 1;
@@ -435,9 +402,9 @@ static int x264_mb_predict_mv_direct16x16_spatial( x264_t *h )
                  if( abs( mvcol[0] ) <= 1 && abs( mvcol[1] ) <= 1 )
                  {
                      if( ref[0] == 0 )
-                        x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 0, 0, 0 );
+                        x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 0, 0 );
                      if( ref[1] == 0 )
-                        x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 1, 0, 0 );
+                        x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 1, 0 );
                  }
              }
              else
@@ -450,9 +417,9 @@ static int x264_mb_predict_mv_direct16x16_spatial( x264_t *h )
                      if( abs( mvcol[0] ) <= 1 && abs( mvcol[1] ) <= 1 )
                      {
                          if( ref[0] == 0 )
-                            x264_macroblock_cache_mv( h, x4, y4, 1, 1, 0, 0, 0 );
+                            x264_macroblock_cache_mv( h, x4, y4, 1, 1, 0, 0 );
                          if( ref[1] == 0 )
-                            x264_macroblock_cache_mv( h, x4, y4, 1, 1, 1, 0, 0 );
+                            x264_macroblock_cache_mv( h, x4, y4, 1, 1, 1, 0 );
                      }
                  }
              }
@@ -527,14 +494,13 @@ void x264_mb_load_mv_direct8x8( x264_t *h, int idx )
  #define FIXED_SCALE 256
  
  /* This just improves encoder performance, it's not part of the spec */
-void x264_mb_predict_mv_ref16x16( x264_t *h, int i_list, int i_ref, int mvc[8][2], int *i_mvc )
+void x264_mb_predict_mv_ref16x16( x264_t *h, int i_list, int i_ref, int16_t mvc[8][2], int *i_mvc )
  {
      int16_t (*mvr)[2] = h->mb.mvr[i_list][i_ref];
      int i = 0;
  
  #define SET_MVP(mvp) { \
-        mvc[i][0] = mvp[0]; \
-        mvc[i][1] = mvp[1]; \
+        *(uint32_t*)mvc[i] = *(uint32_t*)mvp; \
          i++; \
      }
  
@@ -1262,15 +1228,13 @@ void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y )
                  const int ir = i_top_8x8 - 1;
                  const int iv = i_top_4x4 - 1;
                  h->mb.cache.ref[i_list][i8]  = h->mb.ref[i_list][ir];
-                h->mb.cache.mv[i_list][i8][0] = h->mb.mv[i_list][iv][0];
-                h->mb.cache.mv[i_list][i8][1] = h->mb.mv[i_list][iv][1];
+                *(uint32_t*)h->mb.cache.mv[i_list][i8] = *(uint32_t*)h->mb.mv[i_list][iv];
              }
              else
              {
                  const int i8 = x264_scan8[0] - 1 - 1*8;
                  h->mb.cache.ref[i_list][i8] = -2;
-                h->mb.cache.mv[i_list][i8][0] = 0;
-                h->mb.cache.mv[i_list][i8][1] = 0;
+                *(uint32_t*)h->mb.cache.mv[i_list][i8] = 0;
              }
  
              if( h->mb.i_neighbour & MB_TOP )
@@ -1282,22 +1246,15 @@ void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y )
                  h->mb.cache.ref[i_list][i8+1] = h->mb.ref[i_list][ir + 0];
                  h->mb.cache.ref[i_list][i8+2] =
                  h->mb.cache.ref[i_list][i8+3] = h->mb.ref[i_list][ir + 1];
-
-                for( i = 0; i < 4; i++ )
-                {
-                    h->mb.cache.mv[i_list][i8+i][0] = h->mb.mv[i_list][iv + i][0];
-                    h->mb.cache.mv[i_list][i8+i][1] = h->mb.mv[i_list][iv + i][1];
-                }
+                *(uint64_t*)h->mb.cache.mv[i_list][i8+0] = *(uint64_t*)h->mb.mv[i_list][iv+0];
+                *(uint64_t*)h->mb.cache.mv[i_list][i8+2] = *(uint64_t*)h->mb.mv[i_list][iv+2];
              }
              else
              {
                  const int i8 = x264_scan8[0] - 8;
-                for( i = 0; i < 4; i++ )
-                {
-                    h->mb.cache.ref[i_list][i8+i] = -2;
-                    h->mb.cache.mv[i_list][i8+i][0] =
-                    h->mb.cache.mv[i_list][i8+i][1] = 0;
-                }
+                *(uint64_t*)h->mb.cache.mv[i_list][i8+0] = 0;
+                *(uint64_t*)h->mb.cache.mv[i_list][i8+2] = 0;
+                *(uint32_t*)&h->mb.cache.ref[i_list][i8] = (uint8_t)(-2) * 0x01010101U;
              }
  
              if( h->mb.i_neighbour & MB_TOPRIGHT )
@@ -1306,15 +1263,13 @@ void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y )
                  const int ir = i_top_8x8 + 2;
                  const int iv = i_top_4x4 + 4;
                  h->mb.cache.ref[i_list][i8]  = h->mb.ref[i_list][ir];
-                h->mb.cache.mv[i_list][i8][0] = h->mb.mv[i_list][iv][0];
-                h->mb.cache.mv[i_list][i8][1] = h->mb.mv[i_list][iv][1];
+                *(uint32_t*)h->mb.cache.mv[i_list][i8] = *(uint32_t*)h->mb.mv[i_list][iv];
              }
              else
              {
                  const int i8 = x264_scan8[0] + 4 - 1*8;
                  h->mb.cache.ref[i_list][i8] = -2;
-                h->mb.cache.mv[i_list][i8][0] = 0;
-                h->mb.cache.mv[i_list][i8][1] = 0;
+                *(uint32_t*)h->mb.cache.mv[i_list][i8] = 0;
              }
  
              if( h->mb.i_neighbour & MB_LEFT )
@@ -1328,10 +1283,7 @@ void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y )
                  h->mb.cache.ref[i_list][i8+3*8] = h->mb.ref[i_list][ir + 1*s8x8];
  
                  for( i = 0; i < 4; i++ )
-                {
-                    h->mb.cache.mv[i_list][i8+i*8][0] = h->mb.mv[i_list][iv + i*s4x4][0];
-                    h->mb.cache.mv[i_list][i8+i*8][1] = h->mb.mv[i_list][iv + i*s4x4][1];
-                }
+                    *(uint32_t*)h->mb.cache.mv[i_list][i8+i*8] = *(uint32_t*)h->mb.mv[i_list][iv + i*s4x4];
              }
              else
              {
@@ -1339,8 +1291,7 @@ void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y )
                  for( i = 0; i < 4; i++ )
                  {
                      h->mb.cache.ref[i_list][i8+i*8] = -2;
-                    h->mb.cache.mv[i_list][i8+i*8][0] =
-                    h->mb.cache.mv[i_list][i8+i*8][1] = 0;
+                    *(uint32_t*)h->mb.cache.mv[i_list][i8+i*8] = 0;
                  }
              }
  
@@ -1350,20 +1301,14 @@ void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y )
                  {
                      const int i8 = x264_scan8[0] - 8;
                      const int iv = i_top_4x4;
-                    for( i = 0; i < 4; i++ )
-                    {
-                        h->mb.cache.mvd[i_list][i8+i][0] = h->mb.mvd[i_list][iv + i][0];
-                        h->mb.cache.mvd[i_list][i8+i][1] = h->mb.mvd[i_list][iv + i][1];
-                    }
+                    *(uint64_t*)h->mb.cache.mvd[i_list][i8+0] = *(uint64_t*)h->mb.mvd[i_list][iv+0];
+                    *(uint64_t*)h->mb.cache.mvd[i_list][i8+2] = *(uint64_t*)h->mb.mvd[i_list][iv+2];
                  }
                  else
                  {
                      const int i8 = x264_scan8[0] - 8;
-                    for( i = 0; i < 4; i++ )
-                    {
-                        h->mb.cache.mvd[i_list][i8+i][0] =
-                        h->mb.cache.mvd[i_list][i8+i][1] = 0;
-                    }
+                    *(uint64_t*)h->mb.cache.mvd[i_list][i8+0] =
+                    *(uint64_t*)h->mb.cache.mvd[i_list][i8+2] = 0;
                  }
  
                  if( i_left_type >= 0 )
@@ -1371,19 +1316,13 @@ void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y )
                      const int i8 = x264_scan8[0] - 1;
                      const int iv = i_mb_4x4 - 1;
                      for( i = 0; i < 4; i++ )
-                    {
-                        h->mb.cache.mvd[i_list][i8+i*8][0] = h->mb.mvd[i_list][iv + i*s4x4][0];
-                        h->mb.cache.mvd[i_list][i8+i*8][1] = h->mb.mvd[i_list][iv + i*s4x4][1];
-                    }
+                        *(uint32_t*)h->mb.cache.mvd[i_list][i8+i*8] = *(uint32_t*)h->mb.mvd[i_list][iv + i*s4x4];
                  }
                  else
                  {
                      const int i8 = x264_scan8[0] - 1;
                      for( i = 0; i < 4; i++ )
-                    {
-                        h->mb.cache.mvd[i_list][i8+i*8][0] =
-                        h->mb.cache.mvd[i_list][i8+i*8][1] = 0;
-                    }
+                        *(uint32_t*)h->mb.cache.mvd[i_list][i8+i*8] = 0;
                  }
              }
          }
@@ -1516,7 +1455,7 @@ void x264_macroblock_cache_save( x264_t *h )
          int i_list;
          for( i_list = 0; i_list < (h->sh.i_type == SLICE_TYPE_B ? 2  : 1 ); i_list++ )
          {
-            int y,x;
+            int y;
  
              h->mb.ref[i_list][i_mb_8x8+0+0*s8x8] = h->mb.cache.ref[i_list][x264_scan8[0]];
              h->mb.ref[i_list][i_mb_8x8+1+0*s8x8] = h->mb.cache.ref[i_list][x264_scan8[4]];
@@ -1525,11 +1464,8 @@ void x264_macroblock_cache_save( x264_t *h )
  
              for( y = 0; y < 4; y++ )
              {
-                for( x = 0; x < 4; x++ )
-                {
-                    h->mb.mv[i_list][i_mb_4x4+x+y*s4x4][0] = h->mb.cache.mv[i_list][x264_scan8[0]+x+8*y][0];
-                    h->mb.mv[i_list][i_mb_4x4+x+y*s4x4][1] = h->mb.cache.mv[i_list][x264_scan8[0]+x+8*y][1];
-                }
+                *(uint64_t*)h->mb.mv[i_list][i_mb_4x4+y*s4x4+0] = *(uint64_t*)h->mb.cache.mv[i_list][x264_scan8[0]+8*y+0];
+                *(uint64_t*)h->mb.mv[i_list][i_mb_4x4+y*s4x4+2] = *(uint64_t*)h->mb.cache.mv[i_list][x264_scan8[0]+8*y+2];
              }
          }
      }
@@ -1538,20 +1474,15 @@ void x264_macroblock_cache_save( x264_t *h )
          int i_list;
          for( i_list = 0; i_list < (h->sh.i_type == SLICE_TYPE_B ? 2  : 1 ); i_list++ )
          {
-            int y,x;
+            int y;
  
-            h->mb.ref[i_list][i_mb_8x8+0+0*s8x8] =
-            h->mb.ref[i_list][i_mb_8x8+1+0*s8x8] =
-            h->mb.ref[i_list][i_mb_8x8+0+1*s8x8] =
-            h->mb.ref[i_list][i_mb_8x8+1+1*s8x8] = -1;
+            *(uint16_t*)&h->mb.ref[i_list][i_mb_8x8+0*s8x8] = (uint8_t)(-1) * 0x0101;
+            *(uint16_t*)&h->mb.ref[i_list][i_mb_8x8+1*s8x8] = (uint8_t)(-1) * 0x0101;
  
              for( y = 0; y < 4; y++ )
              {
-                for( x = 0; x < 4; x++ )
-                {
-                    h->mb.mv[i_list][i_mb_4x4+x+y*s4x4][0] = 0;
-                    h->mb.mv[i_list][i_mb_4x4+x+y*s4x4][1] = 0;
-                }
+                *(uint64_t*)h->mb.mv[i_list][i_mb_4x4+y*s4x4+0] = 0;
+                *(uint64_t*)h->mb.mv[i_list][i_mb_4x4+y*s4x4+2] = 0;
              }
          }
      }
@@ -1569,14 +1500,11 @@ void x264_macroblock_cache_save( x264_t *h )
              for( i_list  = 0; i_list < 2; i_list++ )
              {
                  const int s4x4 = 4 * h->mb.i_mb_stride;
-                int y,x;
+                int y;
                  for( y = 0; y < 4; y++ )
                  {
-                    for( x = 0; x < 4; x++ )
-                    {
-                        h->mb.mvd[i_list][i_mb_4x4+x+y*s4x4][0] = h->mb.cache.mvd[i_list][x264_scan8[0]+x+8*y][0];
-                        h->mb.mvd[i_list][i_mb_4x4+x+y*s4x4][1] = h->mb.cache.mvd[i_list][x264_scan8[0]+x+8*y][1];
-                    }
+                    *(uint64_t*)h->mb.mvd[i_list][i_mb_4x4+y*s4x4+0] = *(uint64_t*)h->mb.cache.mvd[i_list][x264_scan8[0]+8*y+0];
+                    *(uint64_t*)h->mb.mvd[i_list][i_mb_4x4+y*s4x4+2] = *(uint64_t*)h->mb.cache.mvd[i_list][x264_scan8[0]+8*y+2];
                  }
              }
          }
@@ -1586,14 +1514,11 @@ void x264_macroblock_cache_save( x264_t *h )
              for( i_list  = 0; i_list < 2; i_list++ )
              {
                  const int s4x4 = 4 * h->mb.i_mb_stride;
-                int y,x;
+                int y;
                  for( y = 0; y < 4; y++ )
                  {
-                    for( x = 0; x < 4; x++ )
-                    {
-                        h->mb.mvd[i_list][i_mb_4x4+x+y*s4x4][0] = 0;
-                        h->mb.mvd[i_list][i_mb_4x4+x+y*s4x4][1] = 0;
-                    }
+                    *(uint64_t*)h->mb.mvd[i_list][i_mb_4x4+y*s4x4+0] = 0;
+                    *(uint64_t*)h->mb.mvd[i_list][i_mb_4x4+y*s4x4+2] = 0;
                  }
              }
          }
diff --git a/common/macroblock.h b/common/macroblock.h

index e127e2b7cf2a8e04ccdc95cbf0f8630df2dca11c..2766ddd0be66bafb001874276ff5c953c588c9d2 100644 (file)
--- a/common/macroblock.h
+++ b/common/macroblock.h
@@ -253,16 +253,16 @@ void x264_prefetch_fenc( x264_t *h, x264_frame_t *fenc, int i_mb_x, int i_mb_y )
  /* x264_mb_predict_mv_16x16:
   *      set mvp with predicted mv for D_16x16 block
   *      h->mb. need only valid values from other blocks */
-void x264_mb_predict_mv_16x16( x264_t *h, int i_list, int i_ref, int mvp[2] );
+void x264_mb_predict_mv_16x16( x264_t *h, int i_list, int i_ref, int16_t mvp[2] );
  /* x264_mb_predict_mv_pskip:
   *      set mvp with predicted mv for P_SKIP
   *      h->mb. need only valid values from other blocks */
-void x264_mb_predict_mv_pskip( x264_t *h, int mv[2] );
+void x264_mb_predict_mv_pskip( x264_t *h, int16_t mv[2] );
  /* x264_mb_predict_mv:
   *      set mvp with predicted mv for all blocks except SKIP and DIRECT
   *      h->mb. need valid ref/partition/sub of current block to be valid
   *      and valid mv/ref from other blocks. */
-void x264_mb_predict_mv( x264_t *h, int i_list, int idx, int i_width, int mvp[2] );
+void x264_mb_predict_mv( x264_t *h, int i_list, int idx, int i_width, int16_t mvp[2] );
  /* x264_mb_predict_mv_direct16x16:
   *      set h->mb.cache.mv and h->mb.cache.ref for B_SKIP or B_DIRECT
   *      h->mb. need only valid values from other blocks.
@@ -278,7 +278,7 @@ void x264_mb_load_mv_direct8x8( x264_t *h, int idx );
   *      set mvc with D_16x16 prediction.
   *      uses all neighbors, even those that didn't end up using this ref.
   *      h->mb. need only valid values from other blocks */
-void x264_mb_predict_mv_ref16x16( x264_t *h, int i_list, int i_ref, int mvc[8][2], int *i_mvc );
+void x264_mb_predict_mv_ref16x16( x264_t *h, int i_list, int i_ref, int16_t mvc[8][2], int *i_mvc );
  
  
  int  x264_mb_predict_intra4x4_mode( x264_t *h, int idx );
@@ -293,7 +293,14 @@ int  x264_mb_transform_8x8_allowed( x264_t *h );
  void x264_mb_mc( x264_t *h );
  void x264_mb_mc_8x8( x264_t *h, int i8 );
  
-
+static ALWAYS_INLINE uint32_t pack16to32_mask( int a, int b )
+{
+#ifdef WORDS_BIGENDIAN
+   return (b&0xFFFF) + (a<<16);
+#else
+   return (a&0xFFFF) + (b<<16);
+#endif
+}
  static ALWAYS_INLINE void x264_macroblock_cache_rect1( void *dst, int width, int height, uint8_t val )
  {
      int dy;
@@ -313,30 +320,32 @@ static ALWAYS_INLINE void x264_macroblock_cache_rect1( void *dst, int width, int
  static ALWAYS_INLINE void x264_macroblock_cache_rect4( void *dst, int width, int height, uint32_t val )
  {
      int dy, dx;
-    for( dy = 0; dy < height; dy++ )
-        for( dx = 0; dx < width; dx++ )
-            ((uint32_t*)dst)[dx+8*dy] = val;
-}
-static ALWAYS_INLINE uint32_t pack16to32_clip( int a, int b )
-{
-#ifdef WORDS_BIGENDIAN
-   return (b&0xFFFF) + (a<<16);
-#else
-   return (a&0xFFFF) + (b<<16);
-#endif
+    if( width == 1 || WORD_SIZE < 8 )
+    {
+        for( dy = 0; dy < height; dy++ )
+            for( dx = 0; dx < width; dx++ )
+                ((uint32_t*)dst)[dx+8*dy] = val;
+    }
+    else
+    {
+        uint64_t val64 = val + ((uint64_t)val<<32);
+        for( dy = 0; dy < height; dy++ )
+            for( dx = 0; dx < width/2; dx++ )
+                ((uint64_t*)dst)[dx+4*dy] = val64;
+    }
  }
-
-static ALWAYS_INLINE void x264_macroblock_cache_ref( x264_t *h, int x, int y, int width, int height, int i_list, uint8_t ref )
+#define x264_macroblock_cache_mv_ptr(a,x,y,w,h,l,mv) x264_macroblock_cache_mv(a,x,y,w,h,l,*(uint32_t*)mv)
+static ALWAYS_INLINE void x264_macroblock_cache_mv( x264_t *h, int x, int y, int width, int height, int i_list, uint32_t mv )
  {
-    x264_macroblock_cache_rect1( &h->mb.cache.ref[i_list][X264_SCAN8_0+x+8*y], width, height, ref );
+    x264_macroblock_cache_rect4( &h->mb.cache.mv[i_list][X264_SCAN8_0+x+8*y], width, height, mv );
  }
-static ALWAYS_INLINE void x264_macroblock_cache_mv( x264_t *h, int x, int y, int width, int height, int i_list, int mvx, int mvy )
+static ALWAYS_INLINE void x264_macroblock_cache_mvd( x264_t *h, int x, int y, int width, int height, int i_list, uint32_t mv )
  {
-    x264_macroblock_cache_rect4( &h->mb.cache.mv[i_list][X264_SCAN8_0+x+8*y], width, height, pack16to32_clip(mvx,mvy) );
+    x264_macroblock_cache_rect4( &h->mb.cache.mvd[i_list][X264_SCAN8_0+x+8*y], width, height, mv );
  }
-static ALWAYS_INLINE void x264_macroblock_cache_mvd( x264_t *h, int x, int y, int width, int height, int i_list, int mdx, int mdy )
+static ALWAYS_INLINE void x264_macroblock_cache_ref( x264_t *h, int x, int y, int width, int height, int i_list, uint8_t ref )
  {
-    x264_macroblock_cache_rect4( &h->mb.cache.mvd[i_list][X264_SCAN8_0+x+8*y], width, height, pack16to32_clip(mdx,mdy) );
+    x264_macroblock_cache_rect1( &h->mb.cache.ref[i_list][X264_SCAN8_0+x+8*y], width, height, ref );
  }
  static ALWAYS_INLINE void x264_macroblock_cache_skip( x264_t *h, int x, int y, int width, int height, int b_skip )
  {
diff --git a/common/osdep.h b/common/osdep.h

index d914e78472a57a0b208f41edbbb5793bab01e0da..c7353e0dac3baab7c5c93e0bb128947a66cbc2d4 100644 (file)
--- a/common/osdep.h
+++ b/common/osdep.h
@@ -127,4 +127,7 @@
  #define x264_pthread_cond_wait(c,m)  usleep(100)
  #endif
  
+/* FIXME: long isn't always the native register size (e.g. win64). */
+#define WORD_SIZE sizeof(long)
+
  #endif /* X264_OSDEP_H */
diff --git a/encoder/analyse.c b/encoder/analyse.c

index 9da9a5c75544c3aa49f71b8dddc99801276b3bce..dad24b1f92b712d9ff002bf64459bbdc8a2bb0d7 100644 (file)
--- a/encoder/analyse.c
+++ b/encoder/analyse.c
@@ -45,7 +45,7 @@ typedef struct
      /* 8x8 */
      int       i_cost8x8;
      /* [ref][0] is 16x16 mv, [ref][1..4] are 8x8 mv from partition [0..3] */
-    DECLARE_ALIGNED_8( int mvc[32][5][2] );
+    DECLARE_ALIGNED_4( int16_t mvc[32][5][2] );
      x264_me_t me8x8[4];
  
      /* Sub 4x4 */
@@ -967,8 +967,8 @@ static void x264_intra_rd_refine( x264_t *h, x264_mb_analysis_t *a )
  static void x264_mb_analyse_inter_p16x16( x264_t *h, x264_mb_analysis_t *a )
  {
      x264_me_t m;
-    int i_ref;
-    int mvc[7][2], i_mvc;
+    int i_ref, i_mvc;
+    DECLARE_ALIGNED_4( int16_t mvc[7][2] );
      int i_halfpel_thresh = INT_MAX;
      int *p_halfpel_thresh = h->mb.pic.i_fref[0]>1 ? &i_halfpel_thresh : NULL;
  
@@ -1013,10 +1013,8 @@ static void x264_mb_analyse_inter_p16x16( x264_t *h, x264_mb_analysis_t *a )
              a->l0.me16x16 = m;
  
          /* save mv for predicting neighbors */
-        a->l0.mvc[i_ref][0][0] =
-        h->mb.mvr[0][i_ref][h->mb.i_mb_xy][0] = m.mv[0];
-        a->l0.mvc[i_ref][0][1] =
-        h->mb.mvr[0][i_ref][h->mb.i_mb_xy][1] = m.mv[1];
+        *(uint32_t*)a->l0.mvc[i_ref][0] = 
+        *(uint32_t*)h->mb.mvr[0][i_ref][h->mb.i_mb_xy] = *(uint32_t*)m.mv;
      }
  
      x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, a->l0.me16x16.i_ref );
@@ -1024,11 +1022,10 @@ static void x264_mb_analyse_inter_p16x16( x264_t *h, x264_mb_analysis_t *a )
  
      h->mb.i_type = P_L0;
      if( a->b_mbrd && a->l0.me16x16.i_ref == 0
-        && a->l0.me16x16.mv[0] == h->mb.cache.pskip_mv[0]
-        && a->l0.me16x16.mv[1] == h->mb.cache.pskip_mv[1] )
+        && *(uint32_t*)a->l0.me16x16.mv == *(uint32_t*)h->mb.cache.pskip_mv )
      {
          h->mb.i_partition = D_16x16;
-        x264_macroblock_cache_mv( h, 0, 0, 4, 4, 0, a->l0.me16x16.mv[0], a->l0.me16x16.mv[1] );
+        x264_macroblock_cache_mv_ptr( h, 0, 0, 4, 4, 0, a->l0.me16x16.mv );
          a->l0.i_rd16x16 = x264_rd_cost_mb( h, a->i_lambda2 );
      }
  }
@@ -1060,10 +1057,7 @@ static void x264_mb_analyse_inter_p8x8_mixed_ref( x264_t *h, x264_mb_analysis_t
      }
  
      for( i_ref = 0; i_ref <= i_maxref; i_ref++ )
-    {
-         a->l0.mvc[i_ref][0][0] = h->mb.mvr[0][i_ref][h->mb.i_mb_xy][0];
-         a->l0.mvc[i_ref][0][1] = h->mb.mvr[0][i_ref][h->mb.i_mb_xy][1];
-    }
+         *(uint32_t*)a->l0.mvc[i_ref][0] = *(uint32_t*)h->mb.mvr[0][i_ref][h->mb.i_mb_xy];
  
      for( i = 0; i < 4; i++ )
      {
@@ -1090,12 +1084,12 @@ static void x264_mb_analyse_inter_p8x8_mixed_ref( x264_t *h, x264_mb_analysis_t
  
               m.cost += i_ref_cost;
               i_halfpel_thresh += i_ref_cost;
-             *(uint64_t*)a->l0.mvc[i_ref][i+1] = *(uint64_t*)m.mv;
+             *(uint32_t*)a->l0.mvc[i_ref][i+1] = *(uint32_t*)m.mv;
  
               if( m.cost < l0m->cost )
                   *l0m = m;
          }
-        x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 0, l0m->mv[0], l0m->mv[1] );
+        x264_macroblock_cache_mv_ptr( h, 2*x8, 2*y8, 2, 2, 0, l0m->mv );
          x264_macroblock_cache_ref( h, 2*x8, 2*y8, 2, 2, 0, l0m->i_ref );
  
          /* mb type cost */
@@ -1115,14 +1109,14 @@ static void x264_mb_analyse_inter_p8x8( x264_t *h, x264_mb_analysis_t *a )
      uint8_t  **p_fref = h->mb.pic.p_fref[0][i_ref];
      uint8_t  **p_fenc = h->mb.pic.p_fenc;
      int i_mvc;
-    int (*mvc)[2] = a->l0.mvc[i_ref];
+    int16_t (*mvc)[2] = a->l0.mvc[i_ref];
      int i;
  
      /* XXX Needed for x264_mb_predict_mv */
      h->mb.i_partition = D_8x8;
  
      i_mvc = 1;
-    *(uint64_t*)mvc[0] = *(uint64_t*)a->l0.me16x16.mv;
+    *(uint32_t*)mvc[0] = *(uint32_t*)a->l0.me16x16.mv;
  
      for( i = 0; i < 4; i++ )
      {
@@ -1140,9 +1134,9 @@ static void x264_mb_analyse_inter_p8x8( x264_t *h, x264_mb_analysis_t *a )
          x264_mb_predict_mv( h, 0, 4*i, 2, m->mvp );
          x264_me_search( h, m, mvc, i_mvc );
  
-        x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 0, m->mv[0], m->mv[1] );
+        x264_macroblock_cache_mv_ptr( h, 2*x8, 2*y8, 2, 2, 0, m->mv );
  
-        *(uint64_t*)mvc[i_mvc] = *(uint64_t*)m->mv;
+        *(uint32_t*)mvc[i_mvc] = *(uint32_t*)m->mv;
          i_mvc++;
  
          /* mb type cost */
@@ -1163,7 +1157,7 @@ static void x264_mb_analyse_inter_p16x8( x264_t *h, x264_mb_analysis_t *a )
  {
      x264_me_t m;
      uint8_t  **p_fenc = h->mb.pic.p_fenc;
-    DECLARE_ALIGNED_8( int mvc[3][2] );
+    DECLARE_ALIGNED_4( int16_t mvc[3][2] );
      int i, j;
  
      /* XXX Needed for x264_mb_predict_mv */
@@ -1188,9 +1182,9 @@ static void x264_mb_analyse_inter_p16x8( x264_t *h, x264_mb_analysis_t *a )
               m.i_ref = i_ref;
  
               /* if we skipped the 16x16 predictor, we wouldn't have to copy anything... */
-             *(uint64_t*)mvc[0] = *(uint64_t*)a->l0.mvc[i_ref][0];
-             *(uint64_t*)mvc[1] = *(uint64_t*)a->l0.mvc[i_ref][2*i+1];
-             *(uint64_t*)mvc[2] = *(uint64_t*)a->l0.mvc[i_ref][2*i+2];
+             *(uint32_t*)mvc[0] = *(uint32_t*)a->l0.mvc[i_ref][0];
+             *(uint32_t*)mvc[1] = *(uint32_t*)a->l0.mvc[i_ref][2*i+1];
+             *(uint32_t*)mvc[2] = *(uint32_t*)a->l0.mvc[i_ref][2*i+2];
  
               LOAD_HPELS( &m, h->mb.pic.p_fref[0][i_ref], 0, i_ref, 0, 8*i );
               x264_macroblock_cache_ref( h, 0, 2*i, 4, 2, 0, i_ref );
@@ -1202,7 +1196,7 @@ static void x264_mb_analyse_inter_p16x8( x264_t *h, x264_mb_analysis_t *a )
               if( m.cost < l0m->cost )
                   *l0m = m;
          }
-        x264_macroblock_cache_mv( h, 0, 2*i, 4, 2, 0, l0m->mv[0], l0m->mv[1] );
+        x264_macroblock_cache_mv_ptr( h, 0, 2*i, 4, 2, 0, l0m->mv );
          x264_macroblock_cache_ref( h, 0, 2*i, 4, 2, 0, l0m->i_ref );
      }
  
@@ -1213,7 +1207,7 @@ static void x264_mb_analyse_inter_p8x16( x264_t *h, x264_mb_analysis_t *a )
  {
      x264_me_t m;
      uint8_t  **p_fenc = h->mb.pic.p_fenc;
-    DECLARE_ALIGNED_8( int mvc[3][2] );
+    DECLARE_ALIGNED_4( int16_t mvc[3][2] );
      int i, j;
  
      /* XXX Needed for x264_mb_predict_mv */
@@ -1237,9 +1231,9 @@ static void x264_mb_analyse_inter_p8x16( x264_t *h, x264_mb_analysis_t *a )
               m.i_ref_cost = i_ref_cost;
               m.i_ref = i_ref;
  
-             *(uint64_t*)mvc[0] = *(uint64_t*)a->l0.mvc[i_ref][0];
-             *(uint64_t*)mvc[1] = *(uint64_t*)a->l0.mvc[i_ref][i+1];
-             *(uint64_t*)mvc[2] = *(uint64_t*)a->l0.mvc[i_ref][i+3];
+             *(uint32_t*)mvc[0] = *(uint32_t*)a->l0.mvc[i_ref][0];
+             *(uint32_t*)mvc[1] = *(uint32_t*)a->l0.mvc[i_ref][i+1];
+             *(uint32_t*)mvc[2] = *(uint32_t*)a->l0.mvc[i_ref][i+3];
  
               LOAD_HPELS( &m, h->mb.pic.p_fref[0][i_ref], 0, i_ref, 8*i, 0 );
               x264_macroblock_cache_ref( h, 2*i, 0, 2, 4, 0, i_ref );
@@ -1251,7 +1245,7 @@ static void x264_mb_analyse_inter_p8x16( x264_t *h, x264_mb_analysis_t *a )
               if( m.cost < l0m->cost )
                   *l0m = m;
          }
-        x264_macroblock_cache_mv( h, 2*i, 0, 2, 4, 0, l0m->mv[0], l0m->mv[1] );
+        x264_macroblock_cache_mv_ptr( h, 2*i, 0, 2, 4, 0, l0m->mv );
          x264_macroblock_cache_ref( h, 2*i, 0, 2, 4, 0, l0m->i_ref );
      }
  
@@ -1320,7 +1314,7 @@ static void x264_mb_analyse_inter_p4x4( x264_t *h, x264_mb_analysis_t *a, int i8
          x264_mb_predict_mv( h, 0, idx, 1, m->mvp );
          x264_me_search( h, m, &a->l0.me8x8[i8x8].mv, i_mvc );
  
-        x264_macroblock_cache_mv( h, x4, y4, 1, 1, 0, m->mv[0], m->mv[1] );
+        x264_macroblock_cache_mv_ptr( h, x4, y4, 1, 1, 0, m->mv );
      }
      a->l0.i_cost4x4[i8x8] = a->l0.me4x4[i8x8][0].cost +
                              a->l0.me4x4[i8x8][1].cost +
@@ -1360,7 +1354,7 @@ static void x264_mb_analyse_inter_p8x4( x264_t *h, x264_mb_analysis_t *a, int i8
          x264_mb_predict_mv( h, 0, idx, 2, m->mvp );
          x264_me_search( h, m, &a->l0.me4x4[i8x8][0].mv, i_mvc );
  
-        x264_macroblock_cache_mv( h, x4, y4, 2, 1, 0, m->mv[0], m->mv[1] );
+        x264_macroblock_cache_mv_ptr( h, x4, y4, 2, 1, 0, m->mv );
      }
      a->l0.i_cost8x4[i8x8] = a->l0.me8x4[i8x8][0].cost + a->l0.me8x4[i8x8][1].cost +
                              REF_COST( 0, i_ref ) +
@@ -1397,7 +1391,7 @@ static void x264_mb_analyse_inter_p4x8( x264_t *h, x264_mb_analysis_t *a, int i8
          x264_mb_predict_mv( h, 0, idx, 1, m->mvp );
          x264_me_search( h, m, &a->l0.me4x4[i8x8][0].mv, i_mvc );
  
-        x264_macroblock_cache_mv( h, x4, y4, 1, 2, 0, m->mv[0], m->mv[1] );
+        x264_macroblock_cache_mv_ptr( h, x4, y4, 1, 2, 0, m->mv );
      }
      a->l0.i_cost4x8[i8x8] = a->l0.me4x8[i8x8][0].cost + a->l0.me4x8[i8x8][1].cost +
                              REF_COST( 0, i_ref ) +
@@ -1447,8 +1441,8 @@ static void x264_mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a )
      int weight;
  
      x264_me_t m;
-    int i_ref;
-    int mvc[8][2], i_mvc;
+    int i_ref, i_mvc;
+    DECLARE_ALIGNED_4( int16_t mvc[8][2] );
      int i_halfpel_thresh = INT_MAX;
      int *p_halfpel_thresh = h->mb.pic.i_fref[0]>1 ? &i_halfpel_thresh : NULL;
  
@@ -1477,8 +1471,7 @@ static void x264_mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a )
          }
  
          /* save mv for predicting neighbors */
-        h->mb.mvr[0][i_ref][h->mb.i_mb_xy][0] = m.mv[0];
-        h->mb.mvr[0][i_ref][h->mb.i_mb_xy][1] = m.mv[1];
+        *(uint32_t*)h->mb.mvr[0][i_ref][h->mb.i_mb_xy] = *(uint32_t*)m.mv;
      }
      /* subtract ref cost, so we don't have to add it for the other MB types */
      a->l0.me16x16.cost -= REF_COST( 0, a->l0.i_ref );
@@ -1505,8 +1498,7 @@ static void x264_mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a )
          }
  
          /* save mv for predicting neighbors */
-        h->mb.mvr[1][i_ref][h->mb.i_mb_xy][0] = m.mv[0];
-        h->mb.mvr[1][i_ref][h->mb.i_mb_xy][1] = m.mv[1];
+        *(uint32_t*)h->mb.mvr[1][i_ref][h->mb.i_mb_xy] = *(uint32_t*)m.mv;
      }
      /* subtract ref cost, so we don't have to add it for the other MB types */
      a->l1.me16x16.cost -= REF_COST( 1, a->l1.i_ref );
@@ -1517,7 +1509,7 @@ static void x264_mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a )
  
      /* get cost of BI mode */
      weight = h->mb.bipred_weight[a->l0.i_ref][a->l1.i_ref];
-    if ( ((a->l0.me16x16.mv[0] | a->l0.me16x16.mv[1]) & 1) == 0 )
+    if ( (*(uint32_t*)a->l0.me16x16.mv & 0x10001) == 0 )
      {
          /* l0 reference is halfpel, so get_ref on it will make it faster */
          src2 = 
@@ -1570,21 +1562,21 @@ static inline void x264_mb_cache_mv_p8x8( x264_t *h, x264_mb_analysis_t *a, int
      switch( h->mb.i_sub_partition[i] )
      {
          case D_L0_8x8:
-            x264_macroblock_cache_mv( h, x, y, 2, 2, 0, a->l0.me8x8[i].mv[0], a->l0.me8x8[i].mv[1] );
+            x264_macroblock_cache_mv_ptr( h, x, y, 2, 2, 0, a->l0.me8x8[i].mv );
              break;
          case D_L0_8x4:
-            x264_macroblock_cache_mv( h, x, y+0, 2, 1, 0, a->l0.me8x4[i][0].mv[0], a->l0.me8x4[i][0].mv[1] );
-            x264_macroblock_cache_mv( h, x, y+1, 2, 1, 0, a->l0.me8x4[i][1].mv[0], a->l0.me8x4[i][1].mv[1] );
+            x264_macroblock_cache_mv_ptr( h, x, y+0, 2, 1, 0, a->l0.me8x4[i][0].mv );
+            x264_macroblock_cache_mv_ptr( h, x, y+1, 2, 1, 0, a->l0.me8x4[i][1].mv );
              break;
          case D_L0_4x8:
-            x264_macroblock_cache_mv( h, x+0, y, 1, 2, 0, a->l0.me4x8[i][0].mv[0], a->l0.me4x8[i][0].mv[1] );
-            x264_macroblock_cache_mv( h, x+1, y, 1, 2, 0, a->l0.me4x8[i][1].mv[0], a->l0.me4x8[i][1].mv[1] );
+            x264_macroblock_cache_mv_ptr( h, x+0, y, 1, 2, 0, a->l0.me4x8[i][0].mv );
+            x264_macroblock_cache_mv_ptr( h, x+1, y, 1, 2, 0, a->l0.me4x8[i][1].mv );
              break;
          case D_L0_4x4:
-            x264_macroblock_cache_mv( h, x+0, y+0, 1, 1, 0, a->l0.me4x4[i][0].mv[0], a->l0.me4x4[i][0].mv[1] );
-            x264_macroblock_cache_mv( h, x+1, y+0, 1, 1, 0, a->l0.me4x4[i][1].mv[0], a->l0.me4x4[i][1].mv[1] );
-            x264_macroblock_cache_mv( h, x+0, y+1, 1, 1, 0, a->l0.me4x4[i][2].mv[0], a->l0.me4x4[i][2].mv[1] );
-            x264_macroblock_cache_mv( h, x+1, y+1, 1, 1, 0, a->l0.me4x4[i][3].mv[0], a->l0.me4x4[i][3].mv[1] );
+            x264_macroblock_cache_mv_ptr( h, x+0, y+0, 1, 1, 0, a->l0.me4x4[i][0].mv );
+            x264_macroblock_cache_mv_ptr( h, x+1, y+0, 1, 1, 0, a->l0.me4x4[i][1].mv );
+            x264_macroblock_cache_mv_ptr( h, x+0, y+1, 1, 1, 0, a->l0.me4x4[i][2].mv );
+            x264_macroblock_cache_mv_ptr( h, x+1, y+1, 1, 1, 0, a->l0.me4x4[i][3].mv );
              break;
          default:
              x264_log( h, X264_LOG_ERROR, "internal error\n" );
@@ -1596,26 +1588,26 @@ static inline void x264_mb_cache_mv_p8x8( x264_t *h, x264_mb_analysis_t *a, int
      if( x264_mb_partition_listX_table[0][part] ) \
      { \
          x264_macroblock_cache_ref( h, x,y,dx,dy, 0, a->l0.i_ref ); \
-        x264_macroblock_cache_mv(  h, x,y,dx,dy, 0, me0.mv[0], me0.mv[1] ); \
+        x264_macroblock_cache_mv_ptr( h, x,y,dx,dy, 0, me0.mv ); \
      } \
      else \
      { \
          x264_macroblock_cache_ref( h, x,y,dx,dy, 0, -1 ); \
-        x264_macroblock_cache_mv(  h, x,y,dx,dy, 0, 0, 0 ); \
+        x264_macroblock_cache_mv(  h, x,y,dx,dy, 0, 0 ); \
          if( b_mvd ) \
-            x264_macroblock_cache_mvd( h, x,y,dx,dy, 0, 0, 0 ); \
+            x264_macroblock_cache_mvd( h, x,y,dx,dy, 0, 0 ); \
      } \
      if( x264_mb_partition_listX_table[1][part] ) \
      { \
          x264_macroblock_cache_ref( h, x,y,dx,dy, 1, a->l1.i_ref ); \
-        x264_macroblock_cache_mv(  h, x,y,dx,dy, 1, me1.mv[0], me1.mv[1] ); \
+        x264_macroblock_cache_mv_ptr( h, x,y,dx,dy, 1, me1.mv ); \
      } \
      else \
      { \
          x264_macroblock_cache_ref( h, x,y,dx,dy, 1, -1 ); \
-        x264_macroblock_cache_mv(  h, x,y,dx,dy, 1, 0, 0 ); \
+        x264_macroblock_cache_mv(  h, x,y,dx,dy, 1, 0 ); \
          if( b_mvd ) \
-            x264_macroblock_cache_mvd( h, x,y,dx,dy, 1, 0, 0 ); \
+            x264_macroblock_cache_mvd( h, x,y,dx,dy, 1, 0 ); \
      }
  
  static inline void x264_mb_cache_mv_b8x8( x264_t *h, x264_mb_analysis_t *a, int i, int b_mvd )
@@ -1627,8 +1619,8 @@ static inline void x264_mb_cache_mv_b8x8( x264_t *h, x264_mb_analysis_t *a, int
          x264_mb_load_mv_direct8x8( h, i );
          if( b_mvd )
          {
-            x264_macroblock_cache_mvd(  h, x, y, 2, 2, 0, 0, 0 );
-            x264_macroblock_cache_mvd(  h, x, y, 2, 2, 1, 0, 0 );
+            x264_macroblock_cache_mvd(  h, x, y, 2, 2, 0, 0 );
+            x264_macroblock_cache_mvd(  h, x, y, 2, 2, 1, 0 );
              x264_macroblock_cache_skip( h, x, y, 2, 2, 1 );
          }
      }
@@ -1681,7 +1673,7 @@ static void x264_mb_analyse_inter_b8x8( x264_t *h, x264_mb_analysis_t *a )
              x264_mb_predict_mv( h, l, 4*i, 2, m->mvp );
              x264_me_search( h, m, &lX->me16x16.mv, 1 );
  
-            x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, l, m->mv[0], m->mv[1] );
+            x264_macroblock_cache_mv_ptr( h, 2*x8, 2*y8, 2, 2, l, m->mv );
  
              /* BI mode */
              h->mc.mc_luma( pix[l], 8, m->p_fref, m->i_stride[0],
@@ -1717,7 +1709,7 @@ static void x264_mb_analyse_inter_b16x8( x264_t *h, x264_mb_analysis_t *a )
          { h->mb.pic.p_fref[0][a->l0.i_ref],
            h->mb.pic.p_fref[1][a->l1.i_ref] };
      DECLARE_ALIGNED_16( uint8_t  pix[2][16*8] );
-    DECLARE_ALIGNED_8( int mvc[2][2] );
+    DECLARE_ALIGNED_4( int16_t mvc[2][2] );
      int i, l;
  
      h->mb.i_partition = D_16x8;
@@ -1740,8 +1732,8 @@ static void x264_mb_analyse_inter_b16x8( x264_t *h, x264_mb_analysis_t *a )
              LOAD_FENC( m, h->mb.pic.p_fenc, 0, 8*i );
              LOAD_HPELS( m, p_fref[l], l, lX->i_ref, 0, 8*i );
  
-            *(uint64_t*)mvc[0] = *(uint64_t*)lX->me8x8[2*i].mv;
-            *(uint64_t*)mvc[1] = *(uint64_t*)lX->me8x8[2*i+1].mv;
+            *(uint32_t*)mvc[0] = *(uint32_t*)lX->me8x8[2*i].mv;
+            *(uint32_t*)mvc[1] = *(uint32_t*)lX->me8x8[2*i+1].mv;
  
              x264_mb_predict_mv( h, l, 8*i, 2, m->mvp );
              x264_me_search( h, m, mvc, 2 );
@@ -1786,7 +1778,7 @@ static void x264_mb_analyse_inter_b8x16( x264_t *h, x264_mb_analysis_t *a )
          { h->mb.pic.p_fref[0][a->l0.i_ref],
            h->mb.pic.p_fref[1][a->l1.i_ref] };
      DECLARE_ALIGNED_8( uint8_t pix[2][8*16] );
-    DECLARE_ALIGNED_8( int mvc[2][2] );
+    DECLARE_ALIGNED_4( int16_t mvc[2][2] );
      int i, l;
  
      h->mb.i_partition = D_8x16;
@@ -1808,8 +1800,8 @@ static void x264_mb_analyse_inter_b8x16( x264_t *h, x264_mb_analysis_t *a )
              LOAD_FENC( m, h->mb.pic.p_fenc, 8*i, 0 );
              LOAD_HPELS( m, p_fref[l], l, lX->i_ref, 8*i, 0 );
  
-            *(uint64_t*)mvc[0] = *(uint64_t*)lX->me8x8[i].mv;
-            *(uint64_t*)mvc[1] = *(uint64_t*)lX->me8x8[i+2].mv;
+            *(uint32_t*)mvc[0] = *(uint32_t*)lX->me8x8[i].mv;
+            *(uint32_t*)mvc[1] = *(uint32_t*)lX->me8x8[i+2].mv;
  
              x264_mb_predict_mv( h, l, 4*i, 2, m->mvp );
              x264_me_search( h, m, mvc, 2 );
@@ -2626,21 +2618,21 @@ static void x264_analyse_update_cache( x264_t *h, x264_mb_analysis_t *a  )
              {
                  case D_16x16:
                      x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, a->l0.me16x16.i_ref );
-                    x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 0, a->l0.me16x16.mv[0], a->l0.me16x16.mv[1] );
+                    x264_macroblock_cache_mv_ptr( h, 0, 0, 4, 4, 0, a->l0.me16x16.mv );
                      break;
  
                  case D_16x8:
                      x264_macroblock_cache_ref( h, 0, 0, 4, 2, 0, a->l0.me16x8[0].i_ref );
                      x264_macroblock_cache_ref( h, 0, 2, 4, 2, 0, a->l0.me16x8[1].i_ref );
-                    x264_macroblock_cache_mv ( h, 0, 0, 4, 2, 0, a->l0.me16x8[0].mv[0], a->l0.me16x8[0].mv[1] );
-                    x264_macroblock_cache_mv ( h, 0, 2, 4, 2, 0, a->l0.me16x8[1].mv[0], a->l0.me16x8[1].mv[1] );
+                    x264_macroblock_cache_mv_ptr( h, 0, 0, 4, 2, 0, a->l0.me16x8[0].mv );
+                    x264_macroblock_cache_mv_ptr( h, 0, 2, 4, 2, 0, a->l0.me16x8[1].mv );
                      break;
  
                  case D_8x16:
                      x264_macroblock_cache_ref( h, 0, 0, 2, 4, 0, a->l0.me8x16[0].i_ref );
                      x264_macroblock_cache_ref( h, 2, 0, 2, 4, 0, a->l0.me8x16[1].i_ref );
-                    x264_macroblock_cache_mv ( h, 0, 0, 2, 4, 0, a->l0.me8x16[0].mv[0], a->l0.me8x16[0].mv[1] );
-                    x264_macroblock_cache_mv ( h, 2, 0, 2, 4, 0, a->l0.me8x16[1].mv[0], a->l0.me8x16[1].mv[1] );
+                    x264_macroblock_cache_mv_ptr( h, 0, 0, 2, 4, 0, a->l0.me8x16[0].mv );
+                    x264_macroblock_cache_mv_ptr( h, 2, 0, 2, 4, 0, a->l0.me8x16[1].mv );
                      break;
  
                  default:
@@ -2662,8 +2654,7 @@ static void x264_analyse_update_cache( x264_t *h, x264_mb_analysis_t *a  )
          {
              h->mb.i_partition = D_16x16;
              x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, 0 );
-            x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 0, h->mb.cache.pskip_mv[0],
-                                                         h->mb.cache.pskip_mv[1] );
+            x264_macroblock_cache_mv_ptr( h, 0, 0, 4, 4, 0, h->mb.cache.pskip_mv );
              break;
          }
  
@@ -2689,26 +2680,26 @@ static void x264_analyse_update_cache( x264_t *h, x264_mb_analysis_t *a  )
                  {
                  case B_L0_L0:
                      x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, a->l0.i_ref );
-                    x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 0, a->l0.me16x16.mv[0], a->l0.me16x16.mv[1] );
+                    x264_macroblock_cache_mv_ptr( h, 0, 0, 4, 4, 0, a->l0.me16x16.mv );
  
                      x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, -1 );
-                    x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 1,  0, 0 );
-                    x264_macroblock_cache_mvd( h, 0, 0, 4, 4, 1,  0, 0 );
+                    x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 1, 0 );
+                    x264_macroblock_cache_mvd( h, 0, 0, 4, 4, 1, 0 );
                      break;
                  case B_L1_L1:
                      x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, -1 );
-                    x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 0,  0, 0 );
-                    x264_macroblock_cache_mvd( h, 0, 0, 4, 4, 0,  0, 0 );
+                    x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 0, 0 );
+                    x264_macroblock_cache_mvd( h, 0, 0, 4, 4, 0, 0 );
  
                      x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, a->l1.i_ref );
-                    x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 1, a->l1.me16x16.mv[0], a->l1.me16x16.mv[1] );
+                    x264_macroblock_cache_mv_ptr( h, 0, 0, 4, 4, 1, a->l1.me16x16.mv );
                      break;
                  case B_BI_BI:
                      x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, a->l0.i_ref );
-                    x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 0, a->l0.me16x16.mv[0], a->l0.me16x16.mv[1] );
+                    x264_macroblock_cache_mv_ptr( h, 0, 0, 4, 4, 0, a->l0.me16x16.mv );
  
                      x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, a->l1.i_ref );
-                    x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 1, a->l1.me16x16.mv[0], a->l1.me16x16.mv[1] );
+                    x264_macroblock_cache_mv_ptr( h, 0, 0, 4, 4, 1, a->l1.me16x16.mv );
                      break;
                  }
                  break;
diff --git a/encoder/cabac.c b/encoder/cabac.c

index 052c0e4134da52fede33d1bf9cc82d11975dbb9a..d482c0666b0b2e18cdf06a0440aac5f2128aa3d2 100644 (file)
--- a/encoder/cabac.c
+++ b/encoder/cabac.c
@@ -484,7 +484,7 @@ static inline void x264_cabac_mb_mvd_cpn( x264_t *h, x264_cabac_t *cb, int i_lis
  
  static inline void x264_cabac_mb_mvd( x264_t *h, x264_cabac_t *cb, int i_list, int idx, int width, int height )
  {
-    int mvp[2];
+    DECLARE_ALIGNED_4( int16_t mvp[2] );
      int mdx, mdy;
  
      /* Calculate mvd */
@@ -497,7 +497,7 @@ static inline void x264_cabac_mb_mvd( x264_t *h, x264_cabac_t *cb, int i_list, i
      x264_cabac_mb_mvd_cpn( h, cb, i_list, idx, 1, mdy );
  
      /* save value */
-    x264_macroblock_cache_mvd( h, block_idx_x[idx], block_idx_y[idx], width, height, i_list, mdx, mdy );
+    x264_macroblock_cache_mvd( h, block_idx_x[idx], block_idx_y[idx], width, height, i_list, pack16to32_mask(mdx,mdy) );
  }
  
  static inline void x264_cabac_mb8x8_mvd( x264_t *h, x264_cabac_t *cb, int i_list, int i )
diff --git a/encoder/cavlc.c b/encoder/cavlc.c

index 54bc567ece6ab7844fde278bdc12fea1c82f8a98..726d024fc532cce99a35d93271fc142f6082554f 100644 (file)
--- a/encoder/cavlc.c
+++ b/encoder/cavlc.c
@@ -232,7 +232,7 @@ static void cavlc_qp_delta( x264_t *h, bs_t *s )
  
  static void cavlc_mb_mvd( x264_t *h, bs_t *s, int i_list, int idx, int width )
  {
-    int mvp[2];
+    DECLARE_ALIGNED_4( int16_t mvp[2] );
      x264_mb_predict_mv( h, i_list, idx, width, mvp );
      bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[idx]][0] - mvp[0] );
      bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[idx]][1] - mvp[1] );
@@ -408,7 +408,7 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s )
      }
      else if( i_mb_type == P_L0 )
      {
-        int mvp[2];
+        DECLARE_ALIGNED_4( int16_t mvp[2] );
  
          if( h->mb.i_partition == D_16x16 )
          {
@@ -524,7 +524,7 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s )
          /* All B mode */
          /* Motion Vector */
          int i_list;
-        int mvp[2];
+        DECLARE_ALIGNED_4( int16_t mvp[2] );
  
          int b_list[2][2];
  
diff --git a/encoder/macroblock.c b/encoder/macroblock.c

index 33547146a582991714e9deae19014733e5903f5c..74c9413055890a52c8bb6e0b147c93270d443fc7 100644 (file)
--- a/encoder/macroblock.c
+++ b/encoder/macroblock.c
@@ -585,17 +585,15 @@ void x264_macroblock_encode( x264_t *h )
      if( !b_force_no_skip )
      {
          if( h->mb.i_type == P_L0 && h->mb.i_partition == D_16x16 &&
-            h->mb.i_cbp_luma == 0x00 && h->mb.i_cbp_chroma == 0x00 &&
-            h->mb.cache.mv[0][x264_scan8[0]][0] == h->mb.cache.pskip_mv[0] &&
-            h->mb.cache.mv[0][x264_scan8[0]][1] == h->mb.cache.pskip_mv[1] &&
-            h->mb.cache.ref[0][x264_scan8[0]] == 0 )
+            !(h->mb.i_cbp_luma | h->mb.i_cbp_chroma) && 
+            *(uint32_t*)h->mb.cache.mv[0][x264_scan8[0]] == *(uint32_t*)h->mb.cache.pskip_mv
+            && h->mb.cache.ref[0][x264_scan8[0]] == 0 )
          {
              h->mb.i_type = P_SKIP;
          }
  
          /* Check for B_SKIP */
-        if( h->mb.i_type == B_DIRECT &&
-            h->mb.i_cbp_luma == 0x00 && h->mb.i_cbp_chroma== 0x00 )
+        if( h->mb.i_type == B_DIRECT && !(h->mb.i_cbp_luma | h->mb.i_cbp_chroma) )
          {
              h->mb.i_type = B_SKIP;
          }
diff --git a/encoder/me.c b/encoder/me.c

index 7198957d1798cd13d7785e91f78c6c583ad92a0d..7598b76f9014ea17a5c209ccc6d60c2d62ccdd7b 100644 (file)
--- a/encoder/me.c
+++ b/encoder/me.c
@@ -151,7 +151,7 @@ static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_ite
      }\
  }
  
-void x264_me_search_ref( x264_t *h, x264_me_t *m, int (*mvc)[2], int i_mvc, int *p_halfpel_thresh )
+void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc, int *p_halfpel_thresh )
  {
      const int bw = x264_pixel_size[m->i_pixel].w;
      const int bh = x264_pixel_size[m->i_pixel].h;
@@ -982,7 +982,7 @@ void x264_me_refine_qpel_rd( x264_t *h, x264_me_t *m, int i_lambda2, int i8 )
      m->cost = bcost;
      m->mv[0] = bmx;
      m->mv[1] = bmy;
-    x264_macroblock_cache_mv ( h, 2*(i8&1), i8&2, bw, bh, 0, bmx, bmy );
-    x264_macroblock_cache_mvd( h, 2*(i8&1), i8&2, bw, bh, 0, bmx - pmx, bmy - pmy );
+    x264_macroblock_cache_mv ( h, 2*(i8&1), i8&2, bw, bh, 0, pack16to32_mask(bmx, bmy) );
+    x264_macroblock_cache_mvd( h, 2*(i8&1), i8&2, bw, bh, 0, pack16to32_mask(bmx - pmx, bmy - pmy) );
  }
  
diff --git a/encoder/me.h b/encoder/me.h

index 295dd14aed736812e2ed5457ebdca23967f3d4be..96135c9e56252ec6b44c78f8c38be96dabce73be 100644 (file)
--- a/encoder/me.h
+++ b/encoder/me.h
@@ -39,16 +39,16 @@ typedef struct
      uint16_t *integral;
      int      i_stride[2];
  
-    int mvp[2];
+    DECLARE_ALIGNED_4( int16_t mvp[2] );
  
      /* output */
      int cost_mv;        /* lambda * nbits for the chosen mv */
      int cost;           /* satd + lambda * nbits */
-    DECLARE_ALIGNED_8( int mv[2] );
+    DECLARE_ALIGNED_4( int16_t mv[2] );
  } x264_me_t;
  
-void x264_me_search_ref( x264_t *h, x264_me_t *m, int (*mvc)[2], int i_mvc, int *p_fullpel_thresh );
-static inline void x264_me_search( x264_t *h, x264_me_t *m, int (*mvc)[2], int i_mvc )
+void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc, int *p_fullpel_thresh );
+static inline void x264_me_search( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc )
      { x264_me_search_ref( h, m, mvc, i_mvc, NULL ); }
  
  void x264_me_refine_qpel( x264_t *h, x264_me_t *m );
diff --git a/encoder/slicetype.c b/encoder/slicetype.c

index 8cee4f909d019e03ef7606734668612c78869900..58e666be804404247a43b617a7fed4fcbd7f7a69 100644 (file)
--- a/encoder/slicetype.c
+++ b/encoder/slicetype.c
@@ -153,9 +153,9 @@ int x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a,
      i_cost_bak = i_bcost;
      for( l = 0; l < 1 + b_bidir; l++ )
      {
-        int mvc[4][2] = {{0}}, i_mvc;
+        int16_t mvc[4][2] = {{0}};
+        int i_mvc = 0;
          int16_t (*fenc_mv)[2] = &fenc->mv[l][i_mb_xy];
-        i_mvc = 0;
  #define MVC(mv) { mvc[i_mvc][0] = mv[0]; mvc[i_mvc][1] = mv[1]; i_mvc++; }
          if( i_mb_x > 0 )
              MVC(fenc_mv[-1]);
author	Fiona Glaser <fiona@x264.com>
	Thu, 15 May 2008 12:01:01 +0000 (06:01 -0600)
committer	Loren Merritt <pengvado@akuvian.org>
	Sat, 17 May 2008 06:51:34 +0000 (00:51 -0600)
common/common.h		patch \| blob \| history
common/macroblock.c		patch \| blob \| history
common/macroblock.h		patch \| blob \| history
common/osdep.h		patch \| blob \| history
encoder/analyse.c		patch \| blob \| history
encoder/cabac.c		patch \| blob \| history
encoder/cavlc.c		patch \| blob \| history
encoder/macroblock.c		patch \| blob \| history
encoder/me.c		patch \| blob \| history
encoder/me.h		patch \| blob \| history
encoder/slicetype.c		patch \| blob \| history