From: Fiona Glaser <fiona@x264.com>
Date: Thu, 12 Jun 2008 14:09:22 +0000 (-0600)
Subject: More tweaks to me.c
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=5204112861581df847a4a892ea63b8a0d72f2e6c;p=libx264

More tweaks to me.c
Added inline MMX version of UMH's predictor difference test
Various cosmetics throughout me.c
Removed a C99-ism introduced in r878.
---

diff --git a/common/common.h b/common/common.h
index 84d6d7f2..572231ae 100644
--- a/common/common.h
+++ b/common/common.h
@@ -130,6 +130,17 @@ static inline void x264_median_mv( int16_t *dst, int16_t *a, int16_t *b, int16_t
     dst[1] = x264_median( a[1], b[1], c[1] );
 }
 
+static inline int x264_predictor_difference( int16_t (*mvc)[2], int i_mvc )
+{
+    int sum = 0, i;
+    for( i = 0; i < i_mvc-1; i++ )
+    {
+        sum += abs( mvc[i][0] - mvc[i+1][0] )
+             + abs( mvc[i][1] - mvc[i+1][1] );
+    }
+    return sum;
+}
+
 #ifdef HAVE_MMX
 #include "x86/util.h"
 #endif
diff --git a/common/x86/util.h b/common/x86/util.h
index 73f49046..4b149f28 100644
--- a/common/x86/util.h
+++ b/common/x86/util.h
@@ -39,6 +39,37 @@ static inline void x264_median_mv_mmxext( int16_t *dst, int16_t *a, int16_t *b,
         :"m"(*(uint32_t*)a), "m"(*(uint32_t*)b), "m"(*(uint32_t*)c)
     );
 }
+#define x264_predictor_difference x264_predictor_difference_mmxext
+static inline int x264_predictor_difference_mmxext( int16_t (*mvc)[2], int i_mvc )
+{
+    int sum = 0;
+    uint16_t output[4];
+    asm(
+        "pxor    %%mm4, %%mm4 \n"
+        "test    $1, %1       \n"
+        "jnz 3f               \n"
+        "movd    -8(%2,%1,4), %%mm0 \n"
+        "movd    -4(%2,%1,4), %%mm3 \n"
+        "psubw   %%mm3, %%mm0 \n"
+        "jmp 2f               \n"
+        "3:                   \n"
+        "sub     $1,    %1    \n"
+        "1:                   \n"
+        "movq    -8(%2,%1,4), %%mm0 \n"
+        "psubw   -4(%2,%1,4), %%mm0 \n"
+        "2:                   \n"
+        "sub     $2,    %1    \n"
+        "pxor    %%mm2, %%mm2 \n"
+        "psubw   %%mm0, %%mm2 \n"
+        "pmaxsw  %%mm2, %%mm0 \n"
+        "paddusw %%mm0, %%mm4 \n"
+        "jg 1b                \n"
+        "movq    %%mm4, %0    \n"
+        :"=m"(output), "+r"(i_mvc), "+r"(mvc)
+    );
+    sum += output[0] + output[1] + output[2] + output[3];
+    return sum;
+}
 #endif
 
 #endif
diff --git a/encoder/me.c b/encoder/me.c
index 276f5432..216c909a 100644
--- a/encoder/me.c
+++ b/encoder/me.c
@@ -186,8 +186,8 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc,
     /* try extra predictors if provided */
     if( h->mb.i_subpel_refine >= 3 )
     {
-        COST_MV_HPEL( bmx, bmy );
         uint32_t bmv = pack16to32_mask(bmx,bmy);
+        COST_MV_HPEL( bmx, bmy );
         do
         {
             if( *(uint32_t*)mvc[i] && (bmv - *(uint32_t*)mvc[i]) )
@@ -235,7 +235,7 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc,
         for( i = 0; i < i_me_range; i++ )
         {
             DIA1_ITER( bmx, bmy );
-            if( bmx == omx && bmy == omy )
+            if( (bmx == omx) & (bmy == omy) )
                 break;
             if( !CHECK_MVRANGE(bmx, bmy) )
                 break;
@@ -389,9 +389,7 @@ me_hex2:
                             + abs( m->mvp[1] - mvc[0][1] );
                         denom++;
                     }
-                    for( i = 0; i < i_mvc-1; i++ )
-                        mvd += abs( mvc[i][0] - mvc[i+1][0] )
-                             + abs( mvc[i][1] - mvc[i+1][1] );
+                    mvd += x264_predictor_difference( mvc, i_mvc );
                 }
 
                 sad_ctx = SAD_THRESH(1000) ? 0
@@ -689,13 +687,12 @@ static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_ite
     int bcost = m->cost;
     int odir = -1, bdir;
 
-
     /* try the subpel component of the predicted mv */
     if( hpel_iters && h->mb.i_subpel_refine < 3 )
     {
         int mx = x264_clip3( m->mvp[0], h->mb.mv_min_spel[0], h->mb.mv_max_spel[0] );
         int my = x264_clip3( m->mvp[1], h->mb.mv_min_spel[1], h->mb.mv_max_spel[1] );
-        if( mx != bmx || my != bmy )
+        if( (mx-bmx)|(my-bmy) )
             COST_MV_SAD( mx, my );
     }
 
@@ -715,7 +712,7 @@ static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_ite
         COPY2_IF_LT( bcost, costs[1] + p_cost_mvx[omx  ] + p_cost_mvy[omy+2], bmy, omy+2 );
         COPY3_IF_LT( bcost, costs[2] + p_cost_mvx[omx-2] + p_cost_mvy[omy  ], bmx, omx-2, bmy, omy );
         COPY3_IF_LT( bcost, costs[3] + p_cost_mvx[omx+2] + p_cost_mvy[omy  ], bmx, omx+2, bmy, omy );
-        if( bmx == omx && bmy == omy )
+        if( (bmx == omx) & (bmy == omy) )
             break;
     }