Use SAD instead of SATD for halfpel motion search.

author Loren Merritt <pengvado@videolan.org>

Sat, 15 Oct 2005 00:27:17 +0000 (00:27 +0000)

committer Loren Merritt <pengvado@videolan.org>

Sat, 15 Oct 2005 00:27:17 +0000 (00:27 +0000)
author Loren Merritt <pengvado@videolan.org>
Sat, 15 Oct 2005 00:27:17 +0000 (00:27 +0000)
committer Loren Merritt <pengvado@videolan.org>
Sat, 15 Oct 2005 00:27:17 +0000 (00:27 +0000)
diff --git a/encoder/analyse.c b/encoder/analyse.c

index 6e4c3ab0a33910a6c90908113d74bf5851e4ab54..a2348ab618d5c1a90295bab55312a7b7deec0539 100644 (file)
--- a/encoder/analyse.c
+++ b/encoder/analyse.c
@@ -674,8 +674,8 @@ static void x264_mb_analyse_inter_p16x16( x264_t *h, x264_mb_analysis_t *a )
      x264_me_t m;
      int i_ref;
      int mvc[7][2], i_mvc;
-    int i_fullpel_thresh = INT_MAX;
-    int *p_fullpel_thresh = h->i_ref0>1 ? &i_fullpel_thresh : NULL;
+    int i_halfpel_thresh = INT_MAX;
+    int *p_halfpel_thresh = h->i_ref0>1 ? &i_halfpel_thresh : NULL;
  
      /* 16x16 Search on all ref frame */
      m.i_pixel = PIXEL_16x16;
@@ -686,7 +686,7 @@ static void x264_mb_analyse_inter_p16x16( x264_t *h, x264_mb_analysis_t *a )
      for( i_ref = 0; i_ref < h->i_ref0; i_ref++ )
      {
          const int i_ref_cost = REF_COST( 0, i_ref );
-        i_fullpel_thresh -= i_ref_cost;
+        i_halfpel_thresh -= i_ref_cost;
          m.i_ref_cost = i_ref_cost;
          m.i_ref = i_ref;
  
@@ -694,10 +694,10 @@ static void x264_mb_analyse_inter_p16x16( x264_t *h, x264_mb_analysis_t *a )
          LOAD_HPELS( &m, h->mb.pic.p_fref[0][i_ref], 0, 0 );
          x264_mb_predict_mv_16x16( h, 0, i_ref, m.mvp );
          x264_mb_predict_mv_ref16x16( h, 0, i_ref, mvc, &i_mvc );
-        x264_me_search_ref( h, &m, mvc, i_mvc, p_fullpel_thresh );
+        x264_me_search_ref( h, &m, mvc, i_mvc, p_halfpel_thresh );
  
          m.cost += i_ref_cost;
-        i_fullpel_thresh += i_ref_cost;
+        i_halfpel_thresh += i_ref_cost;
  
          if( m.cost < a->l0.me16x16.cost )
              a->l0.me16x16 = m;
@@ -726,8 +726,8 @@ static void x264_mb_analyse_inter_p8x8_mixed_ref( x264_t *h, x264_mb_analysis_t
      x264_me_t m;
      int i_ref;
      uint8_t  **p_fenc = h->mb.pic.p_fenc;
-    int i_fullpel_thresh = INT_MAX;
-    int *p_fullpel_thresh = /*h->i_ref0>1 ? &i_fullpel_thresh : */NULL;
+    int i_halfpel_thresh = INT_MAX;
+    int *p_halfpel_thresh = /*h->i_ref0>1 ? &i_halfpel_thresh : */NULL;
      int i;
      int i_maxref = h->i_ref0-1;
  
@@ -767,17 +767,17 @@ static void x264_mb_analyse_inter_p8x8_mixed_ref( x264_t *h, x264_mb_analysis_t
          for( i_ref = 0; i_ref <= i_maxref; i_ref++ )
          {
               const int i_ref_cost = REF_COST( 0, i_ref );
-             i_fullpel_thresh -= i_ref_cost;
+             i_halfpel_thresh -= i_ref_cost;
               m.i_ref_cost = i_ref_cost;
               m.i_ref = i_ref;
  
               LOAD_HPELS( &m, h->mb.pic.p_fref[0][i_ref], 8*x8, 8*y8 );
               x264_macroblock_cache_ref( h, 2*x8, 2*y8, 2, 2, 0, i_ref );
               x264_mb_predict_mv( h, 0, 4*i, 2, m.mvp );
-             x264_me_search_ref( h, &m, a->l0.mvc[i_ref], i+1, p_fullpel_thresh );
+             x264_me_search_ref( h, &m, a->l0.mvc[i_ref], i+1, p_halfpel_thresh );
  
               m.cost += i_ref_cost;
-             i_fullpel_thresh += i_ref_cost;
+             i_halfpel_thresh += i_ref_cost;
               *(uint64_t*)a->l0.mvc[i_ref][i+1] = *(uint64_t*)m.mv;
  
               if( m.cost < l0m->cost )
@@ -1166,8 +1166,8 @@ static void x264_mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a )
      x264_me_t m;
      int i_ref;
      int mvc[8][2], i_mvc;
-    int i_fullpel_thresh = INT_MAX;
-    int *p_fullpel_thresh = h->i_ref0>1 ? &i_fullpel_thresh : NULL;
+    int i_halfpel_thresh = INT_MAX;
+    int *p_halfpel_thresh = h->i_ref0>1 ? &i_halfpel_thresh : NULL;
  
      /* 16x16 Search on all ref frame */
      m.i_pixel = PIXEL_16x16;
@@ -1182,7 +1182,7 @@ static void x264_mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a )
          LOAD_HPELS( &m, h->mb.pic.p_fref[0][i_ref], 0, 0 );
          x264_mb_predict_mv_16x16( h, 0, i_ref, m.mvp );
          x264_mb_predict_mv_ref16x16( h, 0, i_ref, mvc, &i_mvc );
-        x264_me_search_ref( h, &m, mvc, i_mvc, p_fullpel_thresh );
+        x264_me_search_ref( h, &m, mvc, i_mvc, p_halfpel_thresh );
  
          /* add ref cost */
          m.cost += REF_COST( 0, i_ref );
@@ -1201,8 +1201,8 @@ static void x264_mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a )
      a->l0.me16x16.cost -= REF_COST( 0, a->l0.i_ref );
  
      /* ME for list 1 */
-    i_fullpel_thresh = INT_MAX;
-    p_fullpel_thresh = h->i_ref1>1 ? &i_fullpel_thresh : NULL;
+    i_halfpel_thresh = INT_MAX;
+    p_halfpel_thresh = h->i_ref1>1 ? &i_halfpel_thresh : NULL;
      a->l1.me16x16.cost = INT_MAX;
      for( i_ref = 0; i_ref < h->i_ref1; i_ref++ )
      {
@@ -1210,7 +1210,7 @@ static void x264_mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a )
          LOAD_HPELS( &m, h->mb.pic.p_fref[1][i_ref], 0, 0 );
          x264_mb_predict_mv_16x16( h, 1, i_ref, m.mvp );
          x264_mb_predict_mv_ref16x16( h, 1, i_ref, mvc, &i_mvc );
-        x264_me_search_ref( h, &m, mvc, i_mvc, p_fullpel_thresh );
+        x264_me_search_ref( h, &m, mvc, i_mvc, p_halfpel_thresh );
  
          /* add ref cost */
          m.cost += REF_COST( 1, i_ref );
diff --git a/encoder/me.c b/encoder/me.c

index b2717547950b84d408ae841f09d29b298679ede5..326ee721c6833700eb75669dc4e43e667765ee0d 100644 (file)
--- a/encoder/me.c
+++ b/encoder/me.c
@@ -36,13 +36,13 @@
  static const int subpel_iterations[][4] = 
     {{1,0,0,0},
      {1,1,0,0},
-    {1,2,0,0},
+    {0,1,1,0},
      {0,2,1,0},
      {0,2,1,1},
      {0,2,1,2},
      {0,0,2,3}};
  
-static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_iters );
+static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_iters, int *p_halfpel_thresh, int b_refine_qpel );
  
  #define COST_MV( mx, my ) \
  { \
@@ -58,11 +58,10 @@ static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_ite
      } \
  }
  
-void x264_me_search_ref( x264_t *h, x264_me_t *m, int (*mvc)[2], int i_mvc, int *p_fullpel_thresh )
+void x264_me_search_ref( x264_t *h, x264_me_t *m, int (*mvc)[2], int i_mvc, int *p_halfpel_thresh )
  {
      const int i_pixel = m->i_pixel;
      const int i_me_range = h->param.analyse.i_me_range;
-    const int b_chroma_me = h->mb.b_chroma_me && i_pixel <= PIXEL_8x8;
      int bmx, bmy, bcost;
      int omx, omy, pmx, pmy;
      uint8_t *p_fref = m->p_fref[0];
@@ -86,7 +85,7 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int (*mvc)[2], int i_mvc, int
      bmx = pmx = x264_clip3( ( m->mvp[0] + 2 ) >> 2, mv_x_min, mv_x_max );
      bmy = pmy = x264_clip3( ( m->mvp[1] + 2 ) >> 2, mv_y_min, mv_y_max );
      bcost = COST_MAX;
-    COST_MV( bmx, bmy );
+    COST_MV( pmx, pmy );
      /* I don't know why this helps */
      bcost -= p_cost_mvx[ bmx<<2 ] + p_cost_mvy[ bmy<<2 ];
  
@@ -246,39 +245,16 @@ umh_small_hex:
  
      /* compute the real cost */
      m->cost_mv = p_cost_mvx[ m->mv[0] ] + p_cost_mvy[ m->mv[1] ];
-    m->cost = h->pixf.mbcmp[i_pixel]( m->p_fenc[0], m->i_stride[0],
-                    &p_fref[bmy * m->i_stride[0] + bmx], m->i_stride[0] )
-            + m->cost_mv;
-    if( b_chroma_me )
-    {
-        const int bw = x264_pixel_size[m->i_pixel].w;
-        const int bh = x264_pixel_size[m->i_pixel].h;
-        DECLARE_ALIGNED( uint8_t, pix[8*8*2], 16 );
-        h->mc.mc_chroma( m->p_fref[4], m->i_stride[1], pix, 8, m->mv[0], m->mv[1], bw/2, bh/2 );
-        h->mc.mc_chroma( m->p_fref[5], m->i_stride[1], pix+8*8, 8, m->mv[0], m->mv[1], bw/2, bh/2 );
-        m->cost += h->pixf.mbcmp[i_pixel+3]( m->p_fenc[1], m->i_stride[1], pix, 8 )
-                 + h->pixf.mbcmp[i_pixel+3]( m->p_fenc[2], m->i_stride[1], pix+8*8, 8 );
-    }
-
+    m->cost = bcost;
+    if( bmx == pmx && bmy == pmy )
+        m->cost += m->cost_mv;
+    
      /* subpel refine */
-    if( h->mb.i_subpel_refine >= 3 )
+    if( h->mb.i_subpel_refine >= 2 )
      {
-        int hpel, qpel;
-
-        /* early termination (when examining multiple reference frames)
-         * FIXME: this can update fullpel_thresh even if the match
-         *        ref is rejected after subpel refinement */
-        if( p_fullpel_thresh )
-        {
-            if( (m->cost*7)>>3 > *p_fullpel_thresh )
-                return;
-            else if( m->cost < *p_fullpel_thresh )
-                *p_fullpel_thresh = m->cost;
-        }
-
-        hpel = subpel_iterations[h->mb.i_subpel_refine][2];
-        qpel = subpel_iterations[h->mb.i_subpel_refine][3];
-        refine_subpel( h, m, hpel, qpel );
+        int hpel = subpel_iterations[h->mb.i_subpel_refine][2];
+        int qpel = subpel_iterations[h->mb.i_subpel_refine][3];
+        refine_subpel( h, m, hpel, qpel, p_halfpel_thresh, 0 );
      }
  }
  #undef COST_MV
@@ -291,10 +267,24 @@ void x264_me_refine_qpel( x264_t *h, x264_me_t *m )
      if( m->i_pixel <= PIXEL_8x8 && h->sh.i_type == SLICE_TYPE_P )
          m->cost -= m->i_ref_cost;
         
-    refine_subpel( h, m, hpel, qpel );
+    refine_subpel( h, m, hpel, qpel, NULL, 1 );
  }
  
-#define COST_MV( mx, my ) \
+#define COST_MV_SAD( mx, my ) \
+{ \
+    int stride = 16; \
+    uint8_t *src = h->mc.get_ref( m->p_fref, m->i_stride[0], pix, &stride, mx, my, bw, bh ); \
+    int cost = h->pixf.sad[i_pixel]( m->p_fenc[0], m->i_stride[0], src, stride ) \
+             + p_cost_mvx[ mx ] + p_cost_mvy[ my ]; \
+    if( cost < bcost ) \
+    {                  \
+        bcost = cost;  \
+        bmx = mx;      \
+        bmy = my;      \
+    } \
+}
+
+#define COST_MV_SATD( mx, my ) \
  { \
      int stride = 16; \
      uint8_t *src = h->mc.get_ref( m->p_fref, m->i_stride[0], pix, &stride, mx, my, bw, bh ); \
@@ -318,7 +308,7 @@ void x264_me_refine_qpel( x264_t *h, x264_me_t *m )
      } \
  }
  
-static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_iters )
+static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_iters, int *p_halfpel_thresh, int b_refine_qpel )
  {
      const int bw = x264_pixel_size[m->i_pixel].w;
      const int bh = x264_pixel_size[m->i_pixel].h;
@@ -328,12 +318,14 @@ static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_ite
      const int b_chroma_me = h->mb.b_chroma_me && i_pixel <= PIXEL_8x8;
  
      DECLARE_ALIGNED( uint8_t, pix[16*16], 16 );
-    int step, i;
+    int omx, omy;
+    int i;
  
      int bmx = m->mv[0];
      int bmy = m->mv[1];
      int bcost = m->cost;
  
+
      /* try the subpel component of the predicted mv if it's close to
       * the result of the fullpel search */
      if( hpel_iters )
@@ -341,22 +333,54 @@ static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_ite
          int mx = X264_ABS(bmx - m->mvp[0]) < 4 ? m->mvp[0] : bmx;
          int my = X264_ABS(bmy - m->mvp[1]) < 4 ? m->mvp[1] : bmy;
          if( mx != bmx || my != bmy )
-            COST_MV( mx, my );
+            COST_MV_SAD( mx, my );
      }
-
-    for( step = 2; step >= 1; step-- )
+    
+    /* hpel search */
+    for( i = hpel_iters; i > 0; i-- )
+    {
+        omx = bmx;
+        omy = bmy;
+        COST_MV_SAD( omx, omy - 2 );
+        COST_MV_SAD( omx, omy + 2 );
+        COST_MV_SAD( omx - 2, omy );
+        COST_MV_SAD( omx + 2, omy );
+        if( bmx == omx && bmy == omy )
+            break;
+    }
+    
+    if( !b_refine_qpel )
      {
-       for( i = step>1 ? hpel_iters : qpel_iters; i > 0; i-- )
+        bcost = COST_MAX;
+        COST_MV_SATD( bmx, bmy );
+    }
+    
+    /* early termination when examining multiple reference frames */
+    if( p_halfpel_thresh )
+    {
+        if( (bcost*7)>>3 > *p_halfpel_thresh )
          {
-            int omx = bmx;
-            int omy = bmy;
-            COST_MV( omx, omy - step );
-            COST_MV( omx, omy + step );
-            COST_MV( omx - step, omy );
-            COST_MV( omx + step, omy );
-            if( bmx == omx && bmy == omy )
-                break;
-       }
+            m->cost = bcost;
+            m->mv[0] = bmx;
+            m->mv[1] = bmy;
+            // don't need cost_mv
+            return;
+        }
+        else if( bcost < *p_halfpel_thresh )
+            *p_halfpel_thresh = bcost;
+    }
+
+    /* qpel search */
+    for( i = qpel_iters; i > 0; i-- )
+    {
+        omx = bmx;
+        omy = bmy;
+        COST_MV_SATD( omx, omy - 1 );
+        COST_MV_SATD( omx, omy + 1 );
+        COST_MV_SATD( omx - 1, omy );
+        COST_MV_SATD( omx + 1, omy );
+        if( bmx == omx && bmy == omy )
+            break;
      }
  
      m->cost = bcost;
author	Loren Merritt <pengvado@videolan.org>
	Sat, 15 Oct 2005 00:27:17 +0000 (00:27 +0000)
committer	Loren Merritt <pengvado@videolan.org>
	Sat, 15 Oct 2005 00:27:17 +0000 (00:27 +0000)
encoder/analyse.c		patch \| blob \| history
encoder/me.c		patch \| blob \| history