* all: Patches by Loren Merritt:

author Laurent Aimar <fenrir@videolan.org>

Wed, 22 Sep 2004 07:07:48 +0000 (07:07 +0000)

committer Laurent Aimar <fenrir@videolan.org>

Wed, 22 Sep 2004 07:07:48 +0000 (07:07 +0000)
author Laurent Aimar <fenrir@videolan.org>
Wed, 22 Sep 2004 07:07:48 +0000 (07:07 +0000)
committer Laurent Aimar <fenrir@videolan.org>
Wed, 22 Sep 2004 07:07:48 +0000 (07:07 +0000)
diff --git a/core/common.c b/core/common.c

index b8a157ed134dd477bbf1d09d66ab2c8479d62032..403c0764e153d82f974fae509171811fca177289 100644 (file)
--- a/core/common.c
+++ b/core/common.c
@@ -96,6 +96,7 @@ void    x264_param_default( x264_param_t *param )
      /* */
      param->analyse.intra = X264_ANALYSE_I4x4;
      param->analyse.inter = X264_ANALYSE_I4x4 | X264_ANALYSE_PSUB16x16;
+    param->analyse.i_subpel_refine = 1;
      param->analyse.b_psnr = 1;
  }
  
diff --git a/encoder/analyse.c b/encoder/analyse.c

index 27cf9938c5b9e2ffa9140c95e5c4ad1bda4c81e2..41c561d9d66af64fcb47bd97473dd51dca41ae10 100644 (file)
--- a/encoder/analyse.c
+++ b/encoder/analyse.c
@@ -25,6 +25,7 @@
  #include <stdio.h>
  #include <string.h>
  #include <math.h>
+#include <limits.h>
  
  #include "../core/common.h"
  #include "../core/macroblock.h"
@@ -464,15 +465,8 @@ static void x264_mb_analyse_inter_p16x16( x264_t *h, x264_mb_analysis_t *a )
  //    m.mvc[0]  = 0;
  //    m.mvc[1]  = 0;
  
-    /* ME for ref 0 */
-    m.p_fref = h->mb.pic.p_fref[0][0][0];
-    x264_mb_predict_mv_16x16( h, 0, 0, m.mvp );
-    x264_me_search( h, &m );
-
-    a->l0.i_ref = 0;
-    a->l0.me16x16 = m;
-
-    for( i_ref = 1; i_ref < h->i_ref0; i_ref++ )
+    a->l0.me16x16.cost = INT_MAX;
+    for( i_ref = 0; i_ref < h->i_ref0; i_ref++ )
      {
          /* search with ref */
          m.p_fref = h->mb.pic.p_fref[0][i_ref][0];
@@ -489,6 +483,9 @@ static void x264_mb_analyse_inter_p16x16( x264_t *h, x264_mb_analysis_t *a )
          }
      }
  
+    /* subtract ref cost, so we don't have to add it for the other P types */
+    a->l0.me16x16.cost -= m.lm * bs_size_te( h->sh.i_num_ref_idx_l0_active - 1, a->l0.i_ref );
+
      /* Set global ref, needed for all others modes */
      x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, a->l0.i_ref );
  }
@@ -765,15 +762,9 @@ static void x264_mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a )
      m.b_mvc   = 0;
      m.i_mv_range = a->i_mv_range;
  
-    /* ME for List 0 ref 0 */
-    m.p_fref = h->mb.pic.p_fref[0][0][0];
-    x264_mb_predict_mv_16x16( h, 0, 0, m.mvp );
-    x264_me_search( h, &m );
-
-    a->l0.i_ref = 0;
-    a->l0.me16x16 = m;
-
-    for( i_ref = 1; i_ref < h->i_ref0; i_ref++ )
+    /* ME for List 0 */
+    a->l0.me16x16.cost = INT_MAX;
+    for( i_ref = 0; i_ref < h->i_ref0; i_ref++ )
      {
          /* search with ref */
          m.p_fref = h->mb.pic.p_fref[0][i_ref][0];
@@ -790,15 +781,9 @@ static void x264_mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a )
          }
      }
  
-    /* ME for list 1 ref 0 */
-    m.p_fref = h->mb.pic.p_fref[1][0][0];
-    x264_mb_predict_mv_16x16( h, 1, 0, m.mvp );
-    x264_me_search( h, &m );
-
-    a->l1.i_ref = 0;
-    a->l1.me16x16 = m;
-
-    for( i_ref = 1; i_ref < h->i_ref1; i_ref++ )
+    /* ME for list 1 */
+    a->l1.me16x16.cost = INT_MAX;
+    for( i_ref = 0; i_ref < h->i_ref1; i_ref++ )
      {
          /* search with ref */
          m.p_fref = h->mb.pic.p_fref[1][i_ref][0];
diff --git a/encoder/encoder.c b/encoder/encoder.c

index 5fdf21a9c6c33b415d98b0f72a7f80bb3c7b8002..517d730acb8bb8886b8edec43e7c6129477d8b1f 100644 (file)
--- a/encoder/encoder.c
+++ b/encoder/encoder.c
@@ -347,6 +347,11 @@ x264_t *x264_encoder_open   ( x264_param_t *param )
  
      h->param.i_cabac_init_idc = x264_clip3( h->param.i_cabac_init_idc, -1, 2 );
  
+    if( param->analyse.i_subpel_refine < 0 )
+       param->analyse.i_subpel_refine = 0;
+    if( param->analyse.i_subpel_refine > 5 )
+       param->analyse.i_subpel_refine = 5;
+
      /* VUI */
      if( h->param.vui.i_sar_width > 0 && h->param.vui.i_sar_height > 0 )
      {
diff --git a/encoder/me.c b/encoder/me.c

index 86396f3b5d55edb73bb62da92b48cc378c1a63b8..90131515199b781199665a572e6579a4308e2869 100644 (file)
--- a/encoder/me.c
+++ b/encoder/me.c
@@ -28,6 +28,20 @@
  #include "../core/common.h"
  #include "me.h"
  
+/* presets selected from good points on the speed-vs-quality curve of several test videos
+ * subpel_iters[i_subpel_refine] = { refine_hpel, refine_qpel, me_hpel, me_qpel }
+ * where me_* are the number of EPZS iterations run on all candidate block types,
+ * and refine_* are run only on the winner. */
+const static int subpel_iterations[][4] = 
+   {{1,0,0,0},
+    {1,1,0,0},
+    {1,2,0,0},
+    {0,2,1,0},
+    {0,2,1,1},
+    {0,2,1,2}};
+
+static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_iters );
+
  void x264_me_search( x264_t *h, x264_me_t *m )
  {
      const int i_pixel = m->i_pixel;
@@ -35,6 +49,7 @@ void x264_me_search( x264_t *h, x264_me_t *m )
      int bmx, bmy;
      uint8_t *p_fref = m->p_fref;
      int i_iter;
+    int hpel, qpel;
  
  
      /* init with mvp */
@@ -118,9 +133,22 @@ void x264_me_search( x264_t *h, x264_me_t *m )
      m->cost = h->pixf.satd[i_pixel]( m->p_fenc, m->i_stride, p_fref, m->i_stride ) +
                  m->lm * ( bs_size_se( m->mv[0] - m->mvp[0] ) +
                            bs_size_se( m->mv[1] - m->mvp[1] ) );
+
+    hpel = subpel_iterations[h->param.analyse.i_subpel_refine][2];
+    qpel = subpel_iterations[h->param.analyse.i_subpel_refine][3];
+    if( hpel || qpel )
+       refine_subpel( h, m, hpel, qpel );
  }
  
  void x264_me_refine_qpel( x264_t *h, x264_me_t *m )
+{
+    int hpel = subpel_iterations[h->param.analyse.i_subpel_refine][0];
+    int qpel = subpel_iterations[h->param.analyse.i_subpel_refine][1];
+    if( hpel || qpel )
+       refine_subpel( h, m, hpel, qpel );
+}
+
+static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_iters )
  {
      const int bw = x264_pixel_size[m->i_pixel].w;
      const int bh = x264_pixel_size[m->i_pixel].h;
@@ -128,66 +156,47 @@ void x264_me_refine_qpel( x264_t *h, x264_me_t *m )
      DECLARE_ALIGNED( uint8_t, pix[4][16*16], 16 );
      int cost[4];
      int best;
+    int step, i;
  
      int bmx = m->mv[0];
      int bmy = m->mv[1];
  
-    h->mc[MC_LUMA]( m->p_fref, m->i_stride, pix[0], 16, bmx + 0, bmy - 2, bw, bh );
-    h->mc[MC_LUMA]( m->p_fref, m->i_stride, pix[1], 16, bmx + 0, bmy + 2, bw, bh );
-    h->mc[MC_LUMA]( m->p_fref, m->i_stride, pix[2], 16, bmx - 2, bmy + 0, bw, bh );
-    h->mc[MC_LUMA]( m->p_fref, m->i_stride, pix[3], 16, bmx + 2, bmy + 0, bw, bh );
-
-    cost[0] = h->pixf.satd[m->i_pixel]( m->p_fenc, m->i_stride, pix[0], 16 ) +
-              m->lm * ( bs_size_se( bmx + 0 - m->mvp[0] ) + bs_size_se( bmy - 2 - m->mvp[1] ) );
-    cost[1] = h->pixf.satd[m->i_pixel]( m->p_fenc, m->i_stride, pix[1], 16 ) +
-              m->lm * ( bs_size_se( bmx + 0 - m->mvp[0] ) + bs_size_se( bmy + 2 - m->mvp[1] ) );
-    cost[2] = h->pixf.satd[m->i_pixel]( m->p_fenc, m->i_stride, pix[2], 16 ) +
-              m->lm * ( bs_size_se( bmx - 2 - m->mvp[0] ) + bs_size_se( bmy + 0 - m->mvp[1] ) );
-    cost[3] = h->pixf.satd[m->i_pixel]( m->p_fenc, m->i_stride, pix[3], 16 ) +
-              m->lm * ( bs_size_se( bmx + 2 - m->mvp[0] ) + bs_size_se( bmy + 0 - m->mvp[1] ) );
-
-    best = 0;
-    if( cost[1] < cost[0] )    best = 1;
-    if( cost[2] < cost[best] ) best = 2;
-    if( cost[3] < cost[best] ) best = 3;
-
-    if( cost[best] < m->cost )
+    for( step = 2; step >= 1; step-- )
      {
-        m->cost = cost[best];
-        if( best == 0 )      bmy -= 2;
-        else if( best == 1 ) bmy += 2;
-        else if( best == 2 ) bmx -= 2;
-        else if( best == 3 ) bmx += 2;
-    }
-
-    h->mc[MC_LUMA]( m->p_fref, m->i_stride, pix[0], 16, bmx + 0, bmy - 1, bw, bh );
-    h->mc[MC_LUMA]( m->p_fref, m->i_stride, pix[1], 16, bmx + 0, bmy + 1, bw, bh );
-    h->mc[MC_LUMA]( m->p_fref, m->i_stride, pix[2], 16, bmx - 1, bmy + 0, bw, bh );
-    h->mc[MC_LUMA]( m->p_fref, m->i_stride, pix[3], 16, bmx + 1, bmy + 0, bw, bh );
-
-    cost[0] = h->pixf.satd[m->i_pixel]( m->p_fenc, m->i_stride, pix[0], 16 ) +
-              m->lm * ( bs_size_se( bmx + 0 - m->mvp[0] ) + bs_size_se( bmy - 1 - m->mvp[1] ) );
-    cost[1] = h->pixf.satd[m->i_pixel]( m->p_fenc, m->i_stride, pix[1], 16 ) +
-              m->lm * ( bs_size_se( bmx + 0 - m->mvp[0] ) + bs_size_se( bmy + 1 - m->mvp[1] ) );
-    cost[2] = h->pixf.satd[m->i_pixel]( m->p_fenc, m->i_stride, pix[2], 16 ) +
-              m->lm * ( bs_size_se( bmx - 1 - m->mvp[0] ) + bs_size_se( bmy + 0 - m->mvp[1] ) );
-    cost[3] = h->pixf.satd[m->i_pixel]( m->p_fenc, m->i_stride, pix[3], 16 ) +
-              m->lm * ( bs_size_se( bmx + 1 - m->mvp[0] ) + bs_size_se( bmy + 0 - m->mvp[1] ) );
-
-    best = 0;
-    if( cost[1] < cost[0] )    best = 1;
-    if( cost[2] < cost[best] ) best = 2;
-    if( cost[3] < cost[best] ) best = 3;
-
-    if( cost[best] < m->cost )
-    {
-        m->cost = cost[best];
-        if( best == 0 )      bmy--;
-        else if( best == 1 ) bmy++;
-        else if( best == 2 ) bmx--;
-        else if( best == 3 ) bmx++;
+       for( i = step>1 ? hpel_iters : qpel_iters; i > 0; i-- )
+        {
+            h->mc[MC_LUMA]( m->p_fref, m->i_stride, pix[0], 16, bmx + 0, bmy - step, bw, bh );
+            h->mc[MC_LUMA]( m->p_fref, m->i_stride, pix[1], 16, bmx + 0, bmy + step, bw, bh );
+            h->mc[MC_LUMA]( m->p_fref, m->i_stride, pix[2], 16, bmx - step, bmy + 0, bw, bh );
+            h->mc[MC_LUMA]( m->p_fref, m->i_stride, pix[3], 16, bmx + step, bmy + 0, bw, bh );
+    
+            cost[0] = h->pixf.satd[m->i_pixel]( m->p_fenc, m->i_stride, pix[0], 16 ) +
+                      m->lm * ( bs_size_se( bmx + 0 - m->mvp[0] ) + bs_size_se( bmy - step - m->mvp[1] ) );
+            cost[1] = h->pixf.satd[m->i_pixel]( m->p_fenc, m->i_stride, pix[1], 16 ) +
+                      m->lm * ( bs_size_se( bmx + 0 - m->mvp[0] ) + bs_size_se( bmy + step - m->mvp[1] ) );
+            cost[2] = h->pixf.satd[m->i_pixel]( m->p_fenc, m->i_stride, pix[2], 16 ) +
+                      m->lm * ( bs_size_se( bmx - step - m->mvp[0] ) + bs_size_se( bmy + 0 - m->mvp[1] ) );
+            cost[3] = h->pixf.satd[m->i_pixel]( m->p_fenc, m->i_stride, pix[3], 16 ) +
+                      m->lm * ( bs_size_se( bmx + step - m->mvp[0] ) + bs_size_se( bmy + 0 - m->mvp[1] ) );
+    
+            best = 0;
+            if( cost[1] < cost[0] )    best = 1;
+            if( cost[2] < cost[best] ) best = 2;
+            if( cost[3] < cost[best] ) best = 3;
+    
+            if( cost[best] < m->cost )
+            {
+                m->cost = cost[best];
+                if( best == 0 )      bmy -= step;
+                else if( best == 1 ) bmy += step;
+                else if( best == 2 ) bmx -= step;
+                else if( best == 3 ) bmx += step;
+            }
+            else break;
+       }
      }
  
      m->mv[0] = bmx;
      m->mv[1] = bmy;
  }
+
diff --git a/x264.c b/x264.c

index c7c6df962bee33db734ed0a16b2b2cda0b5ae7f5..4c662d8e3186e895ac490d7c3f399f82d2cfcaac 100644 (file)
--- a/x264.c
+++ b/x264.c
@@ -132,6 +132,7 @@ static void Help( void )
               "                                  - i4x4\n"
               "                                  - psub16x16,psub8x8\n"
               "                                  - none, all\n"
+             "      --subme <integer>       Subpixel motion estimation quality\n"
               "\n"
               "  -s, --sar width:height      Specify Sample Aspect Ratio\n"
               "  -o, --output                Specify output file\n"
@@ -176,6 +177,7 @@ static int  Parse( int argc, char **argv,
  #define OPT_QCOMP 266
  #define OPT_NOPSNR 267
  #define OPT_QUIET 268
+#define OPT_SUBME 269
  
          static struct option long_options[] =
          {
@@ -196,6 +198,7 @@ static int  Parse( int argc, char **argv,
              { "sar",     required_argument, NULL, 's' },
              { "output",  required_argument, NULL, 'o' },
              { "analyse", required_argument, NULL, 'A' },
+            { "subme",   required_argument, NULL, OPT_SUBME },
              { "rcsens",  required_argument, NULL, OPT_RCSENS },
              { "rcbuf",   required_argument, NULL, OPT_RCBUF },
              { "rcinitbuf",required_argument, NULL, OPT_RCIBUF },
@@ -304,6 +307,9 @@ static int  Parse( int argc, char **argv,
                  if( strstr( optarg, "psub16x16" ) ) param->analyse.inter |= X264_ANALYSE_PSUB16x16;
                  if( strstr( optarg, "psub8x8" ) )   param->analyse.inter |= X264_ANALYSE_PSUB8x8;
                  break;
+            case OPT_SUBME:
+                param->analyse.i_subpel_refine = atoi(optarg);
+                break;
              case OPT_RCBUF:
                  param->rc.i_rc_buffer_size = atoi(optarg);
                  break;
diff --git a/x264.h b/x264.h

index 76deec4ba2b40d3abd875c0a5508cbad66dbc831..8da79c829766c811ee476575608711b5ecf1f62f 100644 (file)
--- a/x264.h
+++ b/x264.h
@@ -124,6 +124,8 @@ typedef struct
          unsigned int intra;     /* intra flags */
          unsigned int inter;     /* inter flags */
  
+        int          i_subpel_refine; /* subpixel motion estimation quality */
+
          int          b_psnr;    /* Do we compute PSNR stats (save a few % of cpu) */
      } analyse;
author	Laurent Aimar <fenrir@videolan.org>
	Wed, 22 Sep 2004 07:07:48 +0000 (07:07 +0000)
committer	Laurent Aimar <fenrir@videolan.org>
	Wed, 22 Sep 2004 07:07:48 +0000 (07:07 +0000)
core/common.c		patch \| blob \| history
encoder/analyse.c		patch \| blob \| history
encoder/encoder.c		patch \| blob \| history
encoder/me.c		patch \| blob \| history
x264.c		patch \| blob \| history
x264.h		patch \| blob \| history