Fix a nondeterminism with threads and subme>7

author Fiona Glaser <fiona@x264.com>

Sun, 26 Jul 2009 05:31:06 +0000 (22:31 -0700)

committer Fiona Glaser <fiona@x264.com>

Sun, 26 Jul 2009 08:46:19 +0000 (01:46 -0700)
author Fiona Glaser <fiona@x264.com>
Sun, 26 Jul 2009 05:31:06 +0000 (22:31 -0700)
committer Fiona Glaser <fiona@x264.com>
Sun, 26 Jul 2009 08:46:19 +0000 (01:46 -0700)
diff --git a/encoder/analyse.c b/encoder/analyse.c

index dc75fb167e0476b68f1b270163acb3491fd42b92..bdc005ba1928da6540a86e80daafc03382e23104 100644 (file)
--- a/encoder/analyse.c
+++ b/encoder/analyse.c
@@ -309,8 +309,7 @@ static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int i_qp )
          int i_fmv_range = 4 * h->param.analyse.i_mv_range;
          // limit motion search to a slightly smaller range than the theoretical limit,
          // since the search may go a few iterations past its given range
-        int i_fpel_border = 5; // umh unconditional radius
-        int i_spel_border = 8; // 1.5 for subpel_satd, 1.5 for subpel_rd, 2 for bime, round up
+        int i_fpel_border = 6; // umh: 1 for diamond, 2 for octagon, 2 for hpel
  
          /* Calculate max allowed MV range */
  #define CLIP_FMV(mv) x264_clip3( mv, -i_fmv_range, i_fmv_range-1 )
@@ -348,7 +347,7 @@ static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int i_qp )
  
              h->mb.mv_min[1] = 4*( -16*mb_y - 24 );
              h->mb.mv_max[1] = 4*( 16*( mb_height - mb_y - 1 ) + 24 );
-            h->mb.mv_min_spel[1] = x264_clip3( h->mb.mv_min[1], X264_MAX(4*(-512+i_spel_border), -i_fmv_range), i_fmv_range );
+            h->mb.mv_min_spel[1] = x264_clip3( h->mb.mv_min[1], -i_fmv_range, i_fmv_range );
              h->mb.mv_max_spel[1] = CLIP_FMV( h->mb.mv_max[1] );
              h->mb.mv_max_spel[1] = X264_MIN( h->mb.mv_max_spel[1], thread_mvy_range*4 );
              h->mb.mv_min_fpel[1] = (h->mb.mv_min_spel[1]>>2) + i_fpel_border;
diff --git a/encoder/me.c b/encoder/me.c

index 2e520441681648ea89a6bc5d6f1450f7bf9b7d63..eb6a3a34c60eda4eb36e23e2d52b3a106ee8007b 100644 (file)
--- a/encoder/me.c
+++ b/encoder/me.c
@@ -535,7 +535,7 @@ me_hex2:
                      }
                  }
              } while( ++i <= i_me_range/4 );
-            if( bmy <= mv_y_max )
+            if( bmy <= mv_y_max && bmy >= mv_y_min )
                  goto me_hex2;
              break;
          }
@@ -718,8 +718,6 @@ me_hex2:
          int qpel = subpel_iterations[h->mb.i_subpel_refine][3];
          refine_subpel( h, m, hpel, qpel, p_halfpel_thresh, 0 );
      }
-    else if( m->mv[1] > h->mb.mv_max_spel[1] )
-        m->mv[1] = h->mb.mv_max_spel[1];
  }
  #undef COST_MV
  
@@ -790,8 +788,8 @@ static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_ite
      /* try the subpel component of the predicted mv */
      if( hpel_iters && h->mb.i_subpel_refine < 3 )
      {
-        int mx = x264_clip3( m->mvp[0], h->mb.mv_min_spel[0], h->mb.mv_max_spel[0] );
-        int my = x264_clip3( m->mvp[1], h->mb.mv_min_spel[1], h->mb.mv_max_spel[1] );
+        int mx = x264_clip3( m->mvp[0], h->mb.mv_min_spel[0]+2, h->mb.mv_max_spel[0]-2 );
+        int my = x264_clip3( m->mvp[1], h->mb.mv_min_spel[1]+2, h->mb.mv_max_spel[1]-2 );
          if( (mx-bmx)|(my-bmy) )
              COST_MV_SAD( mx, my );
      }
@@ -818,9 +816,6 @@ static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_ite
  
      if( !b_refine_qpel )
      {
-        /* check for mvrange */
-        if( bmy > h->mb.mv_max_spel[1] )
-            bmy = h->mb.mv_max_spel[1];
          bcost = COST_MAX;
          COST_MV_SATD( bmx, bmy, -1 );
      }
@@ -844,6 +839,8 @@ static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_ite
      bdir = -1;
      for( i = qpel_iters; i > 0; i-- )
      {
+        if( bmy <= h->mb.mv_min_spel[1] || bmy >= h->mb.mv_max_spel[1] )
+            break;
          odir = bdir;
          omx = bmx;
          omy = bmy;
@@ -855,14 +852,6 @@ static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_ite
              break;
      }
  
-    /* check for mvrange */
-    if( bmy > h->mb.mv_max_spel[1] )
-    {
-        bmy = h->mb.mv_max_spel[1];
-        bcost = COST_MAX;
-        COST_MV_SATD( bmx, bmy, -1 );
-    }
-
      m->cost = bcost;
      m->mv[0] = bmx;
      m->mv[1] = bmy;
@@ -970,8 +959,8 @@ static void ALWAYS_INLINE x264_me_refine_bidir( x264_t *h, x264_me_t *m0, x264_m
      /* each byte of visited represents 8 possible m1y positions, so a 4D array isn't needed */
      DECLARE_ALIGNED_16( uint8_t visited[8][8][8] );
  
-    if( bm0y > h->mb.mv_max_spel[1] - 8 ||
-        bm1y > h->mb.mv_max_spel[1] - 8 )
+    if( bm0y < h->mb.mv_min_spel[1] + 8 || bm1y < h->mb.mv_min_spel[1] + 8 ||
+        bm0y > h->mb.mv_max_spel[1] - 8 || bm1y > h->mb.mv_max_spel[1] - 8 )
          return;
  
      h->mc.memzero_aligned( visited, sizeof(visited) );
@@ -1096,6 +1085,10 @@ void x264_me_refine_qpel_rd( x264_t *h, x264_me_t *m, int i_lambda2, int i4, int
          }
      }
  
+    if( bmy < h->mb.mv_min_spel[1] + 3 ||
+        bmy > h->mb.mv_max_spel[1] - 3 )
+        return;
+
      /* subpel hex search, same pattern as ME HEX. */
      dir = -2;
      omx = bmx;
@@ -1109,8 +1102,8 @@ void x264_me_refine_qpel_rd( x264_t *h, x264_me_t *m, int i_lambda2, int i4, int
          for( i = 1; i < 10; i++ )
          {
              const int odir = mod6m1[dir+1];
-            if( bmy > h->mb.mv_max_spel[1] - 2 ||
-                bmy < h->mb.mv_min_spel[1] - 2 )
+            if( bmy < h->mb.mv_min_spel[1] + 3 ||
+                bmy > h->mb.mv_max_spel[1] - 3 )
                  break;
              dir = -2;
              omx = bmx;
@@ -1128,7 +1121,6 @@ void x264_me_refine_qpel_rd( x264_t *h, x264_me_t *m, int i_lambda2, int i4, int
      for( i=0; i<8; i++ ) COST_MV_SATD( omx + square1[i+1][0], omy + square1[i+1][1], satds[i], 1 );
      for( i=0; i<8; i++ ) COST_MV_RD  ( omx + square1[i+1][0], omy + square1[i+1][1], satds[i], 0,0 );
  
-    bmy = x264_clip3( bmy, h->mb.mv_min_spel[1],  h->mb.mv_max_spel[1] );
      m->cost = bcost;
      m->mv[0] = bmx;
      m->mv[1] = bmy;
author	Fiona Glaser <fiona@x264.com>
	Sun, 26 Jul 2009 05:31:06 +0000 (22:31 -0700)
committer	Fiona Glaser <fiona@x264.com>
	Sun, 26 Jul 2009 08:46:19 +0000 (01:46 -0700)
encoder/analyse.c		patch \| blob \| history
encoder/me.c		patch \| blob \| history