Faster fullpel predictor checking

author Fiona Glaser <fiona@x264.com>

Tue, 20 Apr 2010 23:50:13 +0000 (16:50 -0700)

committer Fiona Glaser <fiona@x264.com>

Fri, 23 Apr 2010 19:40:25 +0000 (12:40 -0700)
author Fiona Glaser <fiona@x264.com>
Tue, 20 Apr 2010 23:50:13 +0000 (16:50 -0700)
committer Fiona Glaser <fiona@x264.com>
Fri, 23 Apr 2010 19:40:25 +0000 (12:40 -0700)
diff --git a/encoder/me.c b/encoder/me.c

index c263dacd52cec88457e6822797958311be96395c..3b72d5052ebde6220befcc49265590d518a56a1b 100644 (file)
--- a/encoder/me.c
+++ b/encoder/me.c
@@ -219,7 +219,7 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc,
              COST_MV_HPEL( bmx, bmy );
          for( int i = 0; i < i_mvc; i++ )
          {
-            if( M32( mvc[i] ) && (pmv - M32( mvc[i] )) )
+            if( M32( mvc[i] ) && (pmv != M32( mvc[i] )) )
              {
                  int mx = x264_clip3( mvc[i][0], mv_x_min_qpel, mv_x_max_qpel );
                  int my = x264_clip3( mvc[i][1], mv_y_min_qpel, mv_y_max_qpel );
@@ -243,16 +243,27 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc,
           * biasing against use of the predicted motion vector. */
          bcost = h->pixf.fpelcmp[i_pixel]( p_fenc, FENC_STRIDE, &p_fref_w[bmy*stride+bmx], stride );
          pmv = pack16to32_mask( bmx, bmy );
-        if( i_mvc )
-            x264_predictor_roundclip( mvc, i_mvc, mv_x_min, mv_x_max, mv_y_min, mv_y_max );
-        for( int i = 0; i < i_mvc; i++ )
+        if( i_mvc > 0 )
          {
-            if( M32( mvc[i] ) && (pmv - M32( mvc[i] )) )
+            x264_predictor_roundclip( mvc, i_mvc, mv_x_min, mv_x_max, mv_y_min, mv_y_max );
+            bcost <<= 4;
+            for( int i = 1; i <= i_mvc; i++ )
+            {
+                if( M32( mvc[i-1] ) && (pmv != M32( mvc[i-1] )) )
+                {
+                    int mx = mvc[i-1][0];
+                    int my = mvc[i-1][1];
+                    int cost = h->pixf.fpelcmp[i_pixel]( p_fenc, FENC_STRIDE, &p_fref_w[my*stride+mx], stride ) + BITS_MVD( mx, my );
+                    cost = (cost << 4) + i;
+                    COPY1_IF_LT( bcost, cost );
+                }
+            }
+            if( bcost&15 )
              {
-                int mx = mvc[i][0];
-                int my = mvc[i][1];
-                COST_MV( mx, my );
+                bmx = mvc[(bcost&15)-1][0];
+                bmy = mvc[(bcost&15)-1][1];
              }
+            bcost >>= 4;
          }
      }
  
@@ -265,7 +276,7 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc,
          {
              /* diamond search, radius 1 */
              bcost <<= 4;
-            int i = 0;
+            int i = i_me_range;
              do
              {
                  COST_MV_X4_DIR( 0,-1, 0,1, -1,0, 1,0, costs );
@@ -278,9 +289,7 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc,
                  bmx -= (bcost<<28)>>30;
                  bmy -= (bcost<<30)>>30;
                  bcost &= ~15;
-                if( !CHECK_MVRANGE(bmx, bmy) )
-                    break;
-            } while( ++i < i_me_range );
+            } while( --i && CHECK_MVRANGE(bmx, bmy) );
              bcost >>= 4;
              break;
          }
@@ -325,7 +334,7 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc,
                  bmy += hex2[dir+1][1];
  
                  /* half hexagon, not overlapping the previous iteration */
-                for( int i = 1; i < i_me_range>>1 && CHECK_MVRANGE(bmx, bmy); i++ )
+                for( int i = (i_me_range>>1) - 1; i > 0 && CHECK_MVRANGE(bmx, bmy); i-- )
                  {
                      COST_MV_X3_DIR( hex2[dir+0][0], hex2[dir+0][1],
                                      hex2[dir+1][0], hex2[dir+1][1],
author	Fiona Glaser <fiona@x264.com>
	Tue, 20 Apr 2010 23:50:13 +0000 (16:50 -0700)
committer	Fiona Glaser <fiona@x264.com>
	Fri, 23 Apr 2010 19:40:25 +0000 (12:40 -0700)