Cache half-pixel interpolated reference frames, to avoid duplicate motion compensation.

author Loren Merritt <pengvado@videolan.org>

Sun, 13 Feb 2005 09:49:42 +0000 (09:49 +0000)

committer Loren Merritt <pengvado@videolan.org>

Sun, 13 Feb 2005 09:49:42 +0000 (09:49 +0000)
author Loren Merritt <pengvado@videolan.org>
Sun, 13 Feb 2005 09:49:42 +0000 (09:49 +0000)
committer Loren Merritt <pengvado@videolan.org>
Sun, 13 Feb 2005 09:49:42 +0000 (09:49 +0000)
diff --git a/common/common.h b/common/common.h

index c25af813a6ca3ee7d90b60b1e5860611007c7ac5..b9ef408db7df7780791670caa9c1529f905d2ae6 100644 (file)
--- a/common/common.h
+++ b/common/common.h
@@ -305,7 +305,7 @@ struct x264_t
              uint8_t *p_fdec[3];
  
              /* pointer over mb of the references */
-            uint8_t *p_fref[2][16][3];
+            uint8_t *p_fref[2][16][4+2]; /* last: lN, lH, lV, lHV, cU, cV */
  
              /* common stride */
              int     i_stride[3];
@@ -393,7 +393,7 @@ struct x264_t
      x264_predict_t      predict_4x4[9+3];
  
      x264_pixel_function_t pixf;
-    x264_mc_function_t    mc[2];
+    x264_mc_functions_t   mc;
      x264_dct_function_t   dctf;
      x264_csp_function_t   csp;
  
diff --git a/common/frame.c b/common/frame.c

index 5f71862cf19b5720a0ca2042865b19faee697818..4c67e481f98535ed889c096693ecd4f8c3e2697d 100644 (file)
--- a/common/frame.c
+++ b/common/frame.c
@@ -66,6 +66,17 @@ x264_frame_t *x264_frame_new( x264_t *h )
      frame->buffer[3] = NULL;
      frame->plane[3] = NULL;
  
+    frame->filtered[0] = frame->plane[0];
+    for( i = 0; i < 3; i++ )
+    {
+        frame->buffer[4+i] = x264_malloc( frame->i_stride[0] *
+                                        ( frame->i_lines[0] + 64 ) );
+
+        frame->filtered[i+1] = ((uint8_t*)frame->buffer[4+i]) +
+                          frame->i_stride[0] * 32 + 32;
+    }
+
+
      frame->i_poc = -1;
      frame->i_type = X264_TYPE_AUTO;
      frame->i_qpplus1 = 0;
@@ -95,6 +106,10 @@ void x264_frame_delete( x264_frame_t *frame )
      {
          x264_free( frame->buffer[i] );
      }
+    for( i = 4; i < 7; i++ ) /* filtered planes */
+    {
+        x264_free( frame->buffer[i] );
+    }
      x264_free( frame->mv[0] );
      x264_free( frame->mv[1] );
      x264_free( frame->ref[0] );
@@ -180,6 +195,47 @@ void x264_frame_expand_border( x264_frame_t *frame )
      }
  }
  
+void x264_frame_expand_border_filtered( x264_frame_t *frame )
+{
+
+    /* during filtering, 8 extra pixels were filtered on each edge. 
+       we want to expand border from the last filtered pixel */
+    int w;
+    int i, y;
+    for( i = 1; i < 4; i++ )
+    {
+#define PPIXEL(x, y) ( frame->filtered[i] + (x) +(y)*frame->i_stride[0] )
+        w = 32;
+
+        for( y = 8; y < w; y++ )
+        {
+            /* upper band */
+            memcpy( PPIXEL(-8,-y-1), PPIXEL(-8,-8), frame->i_stride[0] - 2 * w + 16 );
+            /* up left corner */
+            memset( PPIXEL(-w,-y-1), PPIXEL(-8,-8)[0], w - 8 );
+            /* up right corner */
+            memset( PPIXEL(frame->i_stride[0] - 2*w + 8,-y-1), PPIXEL( frame->i_stride[0]-1-2*w+8,-8)[0], w - 8 );
+
+            /* lower band */
+            memcpy( PPIXEL(-8, frame->i_lines[0]+y), PPIXEL(-8,frame->i_lines[0]+7), frame->i_stride[0] - 2 * w + 16 );
+            /* low left corner */
+            memset( PPIXEL(-w, frame->i_lines[0]+y), PPIXEL(-8,frame->i_lines[0]+7)[0], w - 8);
+            /* low right corner */
+            memset( PPIXEL(frame->i_stride[0]-2*w+8, frame->i_lines[0]+y), PPIXEL(frame->i_stride[0]+7-2*w,frame->i_lines[0]+7)[0], w-8);
+
+        }
+        for( y = -8; y < frame->i_lines[0]+8; y++ )
+        {
+            /* left band */
+            memset( PPIXEL( -w, y ), PPIXEL( -8, y )[0], w - 8 );
+            /* right band */
+            memset( PPIXEL( frame->i_stride[0]-2*w + 8, y ), PPIXEL( frame->i_stride[0] + 7 - 2*w, y )[0], w - 8 );
+        }
+#undef PPIXEL
+    }
+}
+
+
  /* FIXME theses tables are duplicated with the ones in macroblock.c */
  static const uint8_t block_idx_xy[4][4] =
  {
diff --git a/common/frame.h b/common/frame.h

index 3e982d2285de17b621081ff7b8217a4ffac17c9a..95cfc5d00ae520a9d23fb29f41c5783728854363 100644 (file)
--- a/common/frame.h
+++ b/common/frame.h
@@ -38,10 +38,11 @@ typedef struct
      int     i_stride[4];
      int     i_lines[4];
      uint8_t *plane[4];
+    uint8_t *filtered[4]; /* plane[0], H, V, HV */
  
      /* for unrestricted mv we allocate more data than needed
       * allocated data are stored in buffer */
-    void    *buffer[4];
+    void    *buffer[7];
  
      /* motion data */
      int16_t (*mv[2])[2];
@@ -58,6 +59,10 @@ void          x264_frame_copy_picture( x264_t *h, x264_frame_t *dst, x264_pictur
  
  void          x264_frame_expand_border( x264_frame_t *frame );
  
+void          x264_frame_expand_border_filtered( x264_frame_t *frame );
+
  void          x264_frame_deblocking_filter( x264_t *h, int i_slice_type );
  
+void          x264_frame_filter( int cpu, x264_frame_t *frame );
+
  #endif
diff --git a/common/i386/mc-c.c b/common/i386/mc-c.c

index 424f18eedae77694343bfd204fd346a99676b89e..63fb7a7bfbc23abf04c8cabae78d610d2ae0a0f4 100644 (file)
--- a/common/i386/mc-c.c
+++ b/common/i386/mc-c.c
@@ -1021,12 +1021,136 @@ static void motion_compensation_luma_sse2( uint8_t *src, int i_src_stride,
      MOTION_COMPENSATION_LUMA
  }
  
-void x264_mc_mmxext_init( x264_mc_function_t pf[2] )
+
+void mc_luma_mmx( uint8_t *src[4], int i_src_stride,
+              uint8_t *dst,    int i_dst_stride,
+              int mvx,int mvy,
+              int i_width, int i_height )
+{
+    uint8_t *src1, *src2;
+
+    /* todo : fixme... */
+    int correction = ((mvx&3) == 3 && (mvy&3) == 1 || (mvx&3) == 1 && (mvy&3) == 3) ? 1:0;
+
+    int hpel1x = mvx>>1;
+    int hpel1y = (mvy+1-correction)>>1;
+    int filter1 = (hpel1x & 1) + ( (hpel1y & 1) << 1 );
+
+
+    src1 = src[filter1] + (hpel1y >> 1) * i_src_stride + (hpel1x >> 1);
+
+    if ( (mvx|mvy) & 1 ) /* qpel interpolation needed */
+    {
+        int hpel2x = (mvx+1)>>1;
+        int hpel2y = (mvy+correction)>>1;
+        int filter2 = (hpel2x & 1) + ( (hpel2y & 1) <<1 );
+
+        src2 = src[filter2] + (hpel2y >> 1) * i_src_stride + (hpel2x >> 1);
+
+        switch(i_width) {
+        case 4:
+            x264_pixel_avg_w4_mmxext( dst, i_dst_stride, src1, i_src_stride,
+                          src2, i_src_stride, i_height );
+            break;
+        case 8:
+            x264_pixel_avg_w8_mmxext( dst, i_dst_stride, src1, i_src_stride,
+                          src2, i_src_stride, i_height );
+            break;
+        case 16:
+        default:
+            x264_pixel_avg_w16_mmxext(dst, i_dst_stride, src1, i_src_stride,
+                          src2, i_src_stride, i_height );
+        }
+    }
+    else
+    {
+        switch(i_width) {
+        case 4:
+            x264_mc_copy_w4_mmxext( src1, i_src_stride, dst, i_dst_stride, i_height );
+            break;
+        case 8:
+            x264_mc_copy_w8_mmxext( src1, i_src_stride, dst, i_dst_stride, i_height );
+            break;
+        case 16:
+            x264_mc_copy_w16_mmxext( src1, i_src_stride, dst, i_dst_stride, i_height );
+            break;
+        }
+
+    }
+}
+
+uint8_t *get_ref_mmx( uint8_t *src[4], int i_src_stride,
+                      uint8_t *dst,   int *i_dst_stride,
+                      int mvx,int mvy,
+                      int i_width, int i_height )
+{
+    uint8_t *src1, *src2;
+
+    /* todo : fixme... */
+    int correction = ((mvx&3) == 3 && (mvy&3) == 1 || (mvx&3) == 1 && (mvy&3) == 3) ? 1:0;
+
+    int hpel1x = mvx>>1;
+    int hpel1y = (mvy+1-correction)>>1;
+    int filter1 = (hpel1x & 1) + ( (hpel1y & 1) << 1 );
+
+
+    src1 = src[filter1] + (hpel1y >> 1) * i_src_stride + (hpel1x >> 1);
+
+    if ( (mvx|mvy) & 1 ) /* qpel interpolation needed */
+    {
+        int hpel2x = (mvx+1)>>1;
+        int hpel2y = (mvy+correction)>>1;
+        int filter2 = (hpel2x & 1) + ( (hpel2y & 1) <<1 );
+
+        src2 = src[filter2] + (hpel2y >> 1) * i_src_stride + (hpel2x >> 1);
+    
+        switch(i_width) {
+        case 4:
+            x264_pixel_avg_w4_mmxext( dst, *i_dst_stride, src1, i_src_stride,
+                          src2, i_src_stride, i_height );
+            break;
+        case 8:
+            x264_pixel_avg_w8_mmxext( dst, *i_dst_stride, src1, i_src_stride,
+                          src2, i_src_stride, i_height );
+            break;
+        case 16:
+        default:
+            x264_pixel_avg_w16_mmxext(dst, *i_dst_stride, src1, i_src_stride,
+                          src2, i_src_stride, i_height );
+        }
+        return dst;
+
+    }
+    else
+    {
+        *i_dst_stride = i_src_stride;
+        return src1;
+    }
+}
+
+
+void x264_mc_mmxext_init( x264_mc_functions_t *pf )
  {
-    pf[MC_LUMA]   = motion_compensation_luma_mmxext;
+    pf->mc_luma   = mc_luma_mmx;
+    pf->get_ref   = get_ref_mmx;
  }
-void x264_mc_sse2_init( x264_mc_function_t pf[2] )
+void x264_mc_sse2_init( x264_mc_functions_t *pf )
  {
-    pf[MC_LUMA]   = motion_compensation_luma_sse2;
+    /* todo: use sse2 */
+    pf->mc_luma   = mc_luma_mmx;
+    pf->get_ref   = get_ref_mmx;
  }
  
+void get_funcs_mmx(pf_mc_t *int_h, pf_mc_t *int_v, pf_mc_t *int_hv)
+{
+    *int_h = mc_hh_w16;
+    *int_v = mc_hv_w16;
+    *int_hv = mc_hc_w16;
+}
+
+void get_funcs_sse2(pf_mc_t *int_h, pf_mc_t *int_v, pf_mc_t *int_hv)
+{
+    *int_h = mc_hh_w16;
+    *int_v = mc_hv_w16;
+    *int_hv = mc_hc_w16;
+}
diff --git a/common/i386/mc.h b/common/i386/mc.h

index 8cfc0a4ff652134a2a1873bc624a01a2d9ce2fa6..a07c8dc841a0e52e3b067eade0eba02f9aa423ba 100644 (file)
--- a/common/i386/mc.h
+++ b/common/i386/mc.h
@@ -24,7 +24,7 @@
  #ifndef _I386_MC_H
  #define _I386_MC_H 1
  
-void x264_mc_mmxext_init( x264_mc_function_t pf[2] );
-void x264_mc_sse2_init( x264_mc_function_t pf[2] );
+void x264_mc_mmxext_init( x264_mc_functions_t *pf );
+void x264_mc_sse2_init( x264_mc_functions_t *pf );
  
  #endif
diff --git a/common/macroblock.c b/common/macroblock.c

index 851c1649c13974a2f13a894b1303d8e76f0b056b..1793ae29a0a7ce80f7cd4500a738594feed58a00 100644 (file)
--- a/common/macroblock.c
+++ b/common/macroblock.c
@@ -577,15 +577,15 @@ static inline void x264_mb_mc_0xywh( x264_t *h, int x, int y, int width, int hei
      const int mvx   = x264_clip3( h->mb.cache.mv[0][i8][0], h->mb.mv_min[0], h->mb.mv_max[0] );
      const int mvy   = x264_clip3( h->mb.cache.mv[0][i8][1], h->mb.mv_min[1], h->mb.mv_max[1] );
  
-    h->mc[MC_LUMA]( &h->mb.pic.p_fref[0][i_ref][0][4*y * h->mb.pic.i_stride[0]+4*x], h->mb.pic.i_stride[0],
+    h->mc.mc_luma( h->mb.pic.p_fref[0][i_ref], h->mb.pic.i_stride[0],
                      &h->mb.pic.p_fdec[0][4*y * h->mb.pic.i_stride[0]+4*x],           h->mb.pic.i_stride[0],
-                    mvx, mvy, 4*width, 4*height );
+                    mvx + 4*4*x, mvy + 4*4*y, 4*width, 4*height );
  
-    h->mc[MC_CHROMA]( &h->mb.pic.p_fref[0][i_ref][1][2*y*h->mb.pic.i_stride[1]+2*x], h->mb.pic.i_stride[1],
+    h->mc.mc_chroma( &h->mb.pic.p_fref[0][i_ref][4][2*y*h->mb.pic.i_stride[1]+2*x], h->mb.pic.i_stride[1],
                        &h->mb.pic.p_fdec[1][2*y*h->mb.pic.i_stride[1]+2*x],           h->mb.pic.i_stride[1],
                        mvx, mvy, 2*width, 2*height );
  
-    h->mc[MC_CHROMA]( &h->mb.pic.p_fref[0][i_ref][2][2*y*h->mb.pic.i_stride[2]+2*x], h->mb.pic.i_stride[2],
+    h->mc.mc_chroma( &h->mb.pic.p_fref[0][i_ref][5][2*y*h->mb.pic.i_stride[2]+2*x], h->mb.pic.i_stride[2],
                        &h->mb.pic.p_fdec[2][2*y*h->mb.pic.i_stride[2]+2*x],           h->mb.pic.i_stride[2],
                        mvx, mvy, 2*width, 2*height );
  }
@@ -596,15 +596,15 @@ static inline void x264_mb_mc_1xywh( x264_t *h, int x, int y, int width, int hei
      const int mvx   = x264_clip3( h->mb.cache.mv[1][i8][0], h->mb.mv_min[0], h->mb.mv_max[0] );
      const int mvy   = x264_clip3( h->mb.cache.mv[1][i8][1], h->mb.mv_min[1], h->mb.mv_max[1] );
  
-    h->mc[MC_LUMA]( &h->mb.pic.p_fref[1][i_ref][0][4*y * h->mb.pic.i_stride[0]+4*x], h->mb.pic.i_stride[0],
+    h->mc.mc_luma( h->mb.pic.p_fref[1][i_ref], h->mb.pic.i_stride[0],
                      &h->mb.pic.p_fdec[0][4*y *h->mb.pic.i_stride[0]+4*x],            h->mb.pic.i_stride[0],
-                    mvx, mvy, 4*width, 4*height );
+                    mvx + 4*4*x, mvy + 4*4*y, 4*width, 4*height );
  
-    h->mc[MC_CHROMA]( &h->mb.pic.p_fref[1][i_ref][1][2*y*h->mb.pic.i_stride[1]+2*x], h->mb.pic.i_stride[1],
+    h->mc.mc_chroma( &h->mb.pic.p_fref[1][i_ref][4][2*y*h->mb.pic.i_stride[1]+2*x], h->mb.pic.i_stride[1],
                        &h->mb.pic.p_fdec[1][2*y*h->mb.pic.i_stride[1]+2*x],           h->mb.pic.i_stride[1],
                        mvx, mvy, 2*width, 2*height );
  
-    h->mc[MC_CHROMA]( &h->mb.pic.p_fref[1][i_ref][2][2*y*h->mb.pic.i_stride[2]+2*x], h->mb.pic.i_stride[2],
+    h->mc.mc_chroma( &h->mb.pic.p_fref[1][i_ref][5][2*y*h->mb.pic.i_stride[2]+2*x], h->mb.pic.i_stride[2],
                        &h->mb.pic.p_fdec[2][2*y*h->mb.pic.i_stride[2]+2*x],           h->mb.pic.i_stride[2],
                        mvx, mvy, 2*width, 2*height );
  }
@@ -631,26 +631,26 @@ static inline void x264_mb_mc_01xywh( x264_t *h, int x, int y, int width, int he
      else if( width == 1 && height == 2 ) i_mode = PIXEL_4x8;
      else if( width == 1 && height == 1 ) i_mode = PIXEL_4x4;
  
-    h->mc[MC_LUMA]( &h->mb.pic.p_fref[0][i_ref0][0][4*y * h->mb.pic.i_stride[0]+4*x], h->mb.pic.i_stride[0],
+    h->mc.mc_luma( h->mb.pic.p_fref[0][i_ref0], h->mb.pic.i_stride[0],
                      &h->mb.pic.p_fdec[0][4*y *h->mb.pic.i_stride[0]+4*x],             h->mb.pic.i_stride[0],
-                    mvx0, mvy0, 4*width, 4*height );
-    h->mc[MC_CHROMA]( &h->mb.pic.p_fref[0][i_ref0][1][2*y*h->mb.pic.i_stride[1]+2*x], h->mb.pic.i_stride[1],
+                    mvx0 + 4*4*x, mvy0 + 4*4*y, 4*width, 4*height );
+    h->mc.mc_chroma( &h->mb.pic.p_fref[0][i_ref0][4][2*y*h->mb.pic.i_stride[1]+2*x], h->mb.pic.i_stride[1],
                        &h->mb.pic.p_fdec[1][2*y*h->mb.pic.i_stride[1]+2*x],            h->mb.pic.i_stride[1],
                        mvx0, mvy0, 2*width, 2*height );
-    h->mc[MC_CHROMA]( &h->mb.pic.p_fref[0][i_ref0][2][2*y*h->mb.pic.i_stride[2]+2*x], h->mb.pic.i_stride[2],
+    h->mc.mc_chroma( &h->mb.pic.p_fref[0][i_ref0][5][2*y*h->mb.pic.i_stride[2]+2*x], h->mb.pic.i_stride[2],
                        &h->mb.pic.p_fdec[2][2*y*h->mb.pic.i_stride[2]+2*x],            h->mb.pic.i_stride[2],
                        mvx0, mvy0, 2*width, 2*height );
  
  
-    h->mc[MC_LUMA]( &h->mb.pic.p_fref[1][i_ref1][0][4*y * h->mb.pic.i_stride[0]+4*x], h->mb.pic.i_stride[0],
-                    tmp, 16, mvx1, mvy1, 4*width, 4*height );
+    h->mc.mc_luma( h->mb.pic.p_fref[1][i_ref1], h->mb.pic.i_stride[0],
+                    tmp, 16, mvx1 + 4*4*x, mvy1 + 4*4*y, 4*width, 4*height );
      h->pixf.avg[i_mode]( &h->mb.pic.p_fdec[0][4*y *h->mb.pic.i_stride[0]+4*x], h->mb.pic.i_stride[0], tmp, 16 );
  
-    h->mc[MC_CHROMA]( &h->mb.pic.p_fref[1][i_ref1][1][2*y*h->mb.pic.i_stride[1]+2*x], h->mb.pic.i_stride[1],
+    h->mc.mc_chroma( &h->mb.pic.p_fref[1][i_ref1][4][2*y*h->mb.pic.i_stride[1]+2*x], h->mb.pic.i_stride[1],
                        tmp, 16, mvx1, mvy1, 2*width, 2*height );
      h->pixf.avg[i_mode]( &h->mb.pic.p_fdec[1][2*y*h->mb.pic.i_stride[1]+2*x], h->mb.pic.i_stride[1], tmp, 16 );
  
-    h->mc[MC_CHROMA]( &h->mb.pic.p_fref[1][i_ref1][2][2*y*h->mb.pic.i_stride[2]+2*x], h->mb.pic.i_stride[2],
+    h->mc.mc_chroma( &h->mb.pic.p_fref[1][i_ref1][5][2*y*h->mb.pic.i_stride[2]+2*x], h->mb.pic.i_stride[2],
                        tmp, 16, mvx1, mvy1, 2*width, 2*height );
      h->pixf.avg[i_mode]( &h->mb.pic.p_fdec[2][2*y*h->mb.pic.i_stride[2]+2*x], h->mb.pic.i_stride[2], tmp, 16 );
  }
@@ -946,11 +946,13 @@ void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y )
  
          for( j = 0; j < h->i_ref0; j++ )
          {
-            h->mb.pic.p_fref[0][j][i] = &h->fref0[j]->plane[i][ w * ( i_mb_x + i_mb_y * i_stride )];
+            h->mb.pic.p_fref[0][j][i==0 ? 0:i+3] = &h->fref0[j]->plane[i][ w * ( i_mb_x + i_mb_y * i_stride )];
+            h->mb.pic.p_fref[0][j][i+1] = &h->fref0[j]->filtered[i+1][ 16 * ( i_mb_x + i_mb_y * h->fdec->i_stride[0] )];
          }
          for( j = 0; j < h->i_ref1; j++ )
          {
-            h->mb.pic.p_fref[1][j][i] = &h->fref1[j]->plane[i][ w * ( i_mb_x + i_mb_y * i_stride )];
+            h->mb.pic.p_fref[1][j][i==0 ? 0:i+3] = &h->fref1[j]->plane[i][ w * ( i_mb_x + i_mb_y * i_stride )];
+            h->mb.pic.p_fref[1][j][i+1] = &h->fref1[j]->filtered[i+1][ 16 * ( i_mb_x + i_mb_y * h->fdec->i_stride[0] )];
          }
      }
  
diff --git a/common/mc.c b/common/mc.c

index 870b0fac42ade629c1292e30cc93875af1833b24..6560f75c0a49b2fba82487dabd548ea5c113482c 100644 (file)
--- a/common/mc.c
+++ b/common/mc.c
@@ -35,6 +35,7 @@
  
  #include "mc.h"
  #include "clip1.h"
+#include "frame.h"
  
  #ifdef _MSC_VER
  #undef HAVE_MMXEXT  /* not finished now */
@@ -270,6 +271,80 @@ static void motion_compensation_luma( uint8_t *src, int i_src_stride,
      pf_mc[mvy&0x03][mvx&0x03]( src, i_src_stride, dst, i_dst_stride, i_width, i_height );
  }
  
+void mc_luma( uint8_t *src[4], int i_src_stride,
+              uint8_t *dst,    int i_dst_stride,
+              int mvx,int mvy,
+              int i_width, int i_height )
+{
+    uint8_t *src1, *src2;
+
+    /* todo : fixme... */
+    int correction = ((mvx&3) == 3 && (mvy&3) == 1 || (mvx&3) == 1 && (mvy&3) == 3) ? 1:0;
+
+    int hpel1x = mvx>>1;
+    int hpel1y = (mvy+1-correction)>>1;
+    int filter1 = (hpel1x & 1) + ( (hpel1y & 1) << 1 );
+
+
+    src1 = src[filter1] + (hpel1y >> 1) * i_src_stride + (hpel1x >> 1);
+
+    if ( (mvx|mvy) & 1 ) /* qpel interpolation needed */
+    {
+        int hpel2x = (mvx+1)>>1;
+        int hpel2y = (mvy+correction)>>1;
+        int filter2 = (hpel2x & 1) + ( (hpel2y & 1) <<1 );
+
+        src2 = src[filter2] + (hpel2y >> 1) * i_src_stride + (hpel2x >> 1);
+    
+        pixel_avg( dst, i_dst_stride, src1, i_src_stride,
+                   src2, i_src_stride, i_width, i_height );
+
+    }
+    else
+    {
+        mc_copy( src1, i_src_stride, dst, i_dst_stride, i_width, i_height );
+
+    }
+}
+
+uint8_t *get_ref( uint8_t *src[4], int i_src_stride,
+                  uint8_t *dst,    int * i_dst_stride,
+                  int mvx,int mvy,
+                  int i_width, int i_height )
+{
+    uint8_t *src1, *src2;
+
+    /* todo : fixme... */
+    int correction = ((mvx&3) == 3 && (mvy&3) == 1 || (mvx&3) == 1 && (mvy&3) == 3) ? 1:0;
+
+    int hpel1x = mvx>>1;
+    int hpel1y = (mvy+1-correction)>>1;
+    int filter1 = (hpel1x & 1) + ( (hpel1y & 1) << 1 );
+
+
+    src1 = src[filter1] + (hpel1y >> 1) * i_src_stride + (hpel1x >> 1);
+
+    if ( (mvx|mvy) & 1 ) /* qpel interpolation needed */
+    {
+        int hpel2x = (mvx+1)>>1;
+        int hpel2y = (mvy+correction)>>1;
+        int filter2 = (hpel2x & 1) + ( (hpel2y & 1) <<1 );
+
+        src2 = src[filter2] + (hpel2y >> 1) * i_src_stride + (hpel2x >> 1);
+    
+        pixel_avg( dst, *i_dst_stride, src1, i_src_stride,
+                   src2, i_src_stride, i_width, i_height );
+
+        return dst;
+
+    }
+    else
+    {
+        *i_dst_stride = i_src_stride;
+        return src1;
+    }
+}
+
  /* full chroma mc (ie until 1/8 pixel)*/
  static void motion_compensation_chroma( uint8_t *src, int i_src_stride,
                                          uint8_t *dst, int i_dst_stride,
@@ -304,10 +379,11 @@ static void motion_compensation_chroma( uint8_t *src, int i_src_stride,
      }
  }
  
-void x264_mc_init( int cpu, x264_mc_function_t pf[2] )
+void x264_mc_init( int cpu, x264_mc_functions_t *pf )
  {
-    pf[MC_LUMA]   = motion_compensation_luma;
-    pf[MC_CHROMA] = motion_compensation_chroma;
+    pf->mc_luma   = mc_luma;
+    pf->get_ref   = get_ref;
+    pf->mc_chroma = motion_compensation_chroma;
  
  #ifdef HAVE_MMXEXT
      if( cpu&X264_CPU_MMXEXT )
@@ -317,10 +393,54 @@ void x264_mc_init( int cpu, x264_mc_function_t pf[2] )
      if( cpu&X264_CPU_SSE2 )
          x264_mc_sse2_init( pf );
  #endif
-
+/*
  #ifdef ARCH_PPC
      if( cpu&X264_CPU_ALTIVEC )
          x264_mc_altivec_init( pf );
  #endif
+*/
  }
  
+void get_funcs_mmx(pf_mc_t*, pf_mc_t*, pf_mc_t*);
+void get_funcs_sse2(pf_mc_t*, pf_mc_t*, pf_mc_t*);
+
+void x264_frame_filter( int cpu, x264_frame_t *frame )
+{
+    const int x_inc = 16, y_inc = 16;
+    const int stride = frame->i_stride[0];
+    int x, y;
+
+    pf_mc_t int_h = mc_hh;
+    pf_mc_t int_v = mc_hv;
+    pf_mc_t int_hv = mc_hc;
+
+#ifdef HAVE_MMXEXT
+    if( cpu&X264_CPU_MMXEXT )
+        get_funcs_mmx(&int_h, &int_v, &int_hv);
+#endif
+
+#ifdef HAVE_SSE2
+    if( cpu&X264_CPU_SSE2 )
+        get_funcs_sse2(&int_h, &int_v, &int_hv);
+#endif
+
+    for( y = -8; y < frame->i_lines[0]+8; y += y_inc ) {
+        
+        uint8_t *p_in = frame->plane[0] + y * stride - 8;
+        uint8_t *p_h  = frame->filtered[1] + y * stride - 8;
+        uint8_t *p_v  = frame->filtered[2] + y * stride - 8;
+        uint8_t *p_hv = frame->filtered[3] + y * stride - 8;
+
+        for( x = -8; x < stride - 64 + 8; x += x_inc )
+        {
+            int_h(  p_in, stride, p_h,  stride, x_inc, y_inc );
+            int_v(  p_in, stride, p_v,  stride, x_inc, y_inc );
+            int_hv( p_in, stride, p_hv, stride, x_inc, y_inc );
+
+            p_h += x_inc;
+            p_v += x_inc;
+            p_hv += x_inc;
+            p_in += x_inc;
+        }
+    }
+}
diff --git a/common/mc.h b/common/mc.h

index 8f91eab179931e00d7c68f02800b57e49984ffe8..6c260ec9ffe51c153916403726eb1ad2404b59cb 100644 (file)
--- a/common/mc.h
+++ b/common/mc.h
@@ -31,15 +31,21 @@
   * width == 16-> height == 8 or 16
   * */
  
-typedef void (*x264_mc_function_t)(uint8_t *, int, uint8_t *, int,
-                          int mvx, int mvy,
-                          int i_width, int i_height );
-enum
+typedef struct
  {
-    MC_LUMA   = 0,
-    MC_CHROMA = 1,
-};
+    void (*mc_luma)(uint8_t **, int, uint8_t *, int,
+                    int mvx, int mvy,
+                    int i_width, int i_height );
  
-void x264_mc_init( int cpu, x264_mc_function_t pf[2] );
+    uint8_t* (*get_ref)(uint8_t **, int, uint8_t *, int *,
+                        int mvx, int mvy,
+                        int i_width, int i_height );
+
+    void (*mc_chroma)(uint8_t *, int, uint8_t *, int,
+                      int mvx, int mvy,
+                      int i_width, int i_height );
+} x264_mc_functions_t;
+
+void x264_mc_init( int cpu, x264_mc_functions_t *pf );
  
  #endif
diff --git a/encoder/analyse.c b/encoder/analyse.c

index 7f4cd7c0988713f6ddb62209801b41db38d2e11a..2430e1485b097f5851c167d1949ae66560778cd8 100644 (file)
--- a/encoder/analyse.c
+++ b/encoder/analyse.c
@@ -486,6 +486,12 @@ static void x264_mb_analyse_intra_chroma( x264_t *h, x264_mb_analysis_t *res )
      }
  }
  
+#define LOAD_HPELS(dst, src, offset) \
+    dst[0] = &src[0][offset]; \
+    dst[1] = &src[1][offset]; \
+    dst[2] = &src[2][offset]; \
+    dst[3] = &src[3][offset]; \
+
  static void x264_mb_analyse_inter_p16x16( x264_t *h, x264_mb_analysis_t *a )
  {
      x264_me_t m;
@@ -507,7 +513,7 @@ static void x264_mb_analyse_inter_p16x16( x264_t *h, x264_mb_analysis_t *a )
          i_fullpel_thresh -= i_ref_cost;
  
          /* search with ref */
-        m.p_fref = h->mb.pic.p_fref[0][i_ref][0];
+        LOAD_HPELS( m.p_fref, h->mb.pic.p_fref[0][i_ref], 0 );
          x264_mb_predict_mv_16x16( h, 0, i_ref, m.mvp );
          x264_mb_predict_mv_ref16x16( h, 0, i_ref, mvc, &i_mvc );
          x264_me_search_ref( h, &m, mvc, i_mvc, p_fullpel_thresh );
@@ -535,7 +541,7 @@ static void x264_mb_analyse_inter_p16x16( x264_t *h, x264_mb_analysis_t *a )
  
  static void x264_mb_analyse_inter_p8x8( x264_t *h, x264_mb_analysis_t *a )
  {
-    uint8_t  *p_fref = h->mb.pic.p_fref[0][a->l0.i_ref][0];
+    uint8_t  **p_fref = h->mb.pic.p_fref[0][a->l0.i_ref];
      uint8_t  *p_fenc = h->mb.pic.p_fenc[0];
      int mvc[5][2], i_mvc;
      int i;
@@ -556,9 +562,9 @@ static void x264_mb_analyse_inter_p8x8( x264_t *h, x264_mb_analysis_t *a )
          m->i_pixel = PIXEL_8x8;
          m->lm      = a->i_lambda;
  
-        m->p_fenc = &p_fenc[8*(y8*h->mb.pic.i_stride[0]+x8)];
-        m->p_fref = &p_fref[8*(y8*h->mb.pic.i_stride[0]+x8)];
+        m->p_fenc  = &p_fenc[8*(y8*h->mb.pic.i_stride[0]+x8)];
          m->i_stride= h->mb.pic.i_stride[0];
+        LOAD_HPELS( m->p_fref, p_fref, 8*(y8*m->i_stride + x8) );
  
          x264_mb_predict_mv( h, 0, 4*i, 2, m->mvp );
          x264_me_search( h, m, mvc, i_mvc );
@@ -579,7 +585,7 @@ static void x264_mb_analyse_inter_p8x8( x264_t *h, x264_mb_analysis_t *a )
  
  static void x264_mb_analyse_inter_p16x8( x264_t *h, x264_mb_analysis_t *a )
  {
-    uint8_t  *p_fref = h->mb.pic.p_fref[0][a->l0.i_ref][0];
+    uint8_t  **p_fref = h->mb.pic.p_fref[0][a->l0.i_ref];
      uint8_t  *p_fenc = h->mb.pic.p_fenc[0];
      int mvc[2][2];
      int i;
@@ -594,9 +600,9 @@ static void x264_mb_analyse_inter_p16x8( x264_t *h, x264_mb_analysis_t *a )
          m->i_pixel = PIXEL_16x8;
          m->lm      = a->i_lambda;
  
-        m->p_fenc = &p_fenc[8*i*h->mb.pic.i_stride[0]];
-        m->p_fref = &p_fref[8*i*h->mb.pic.i_stride[0]];
+        m->p_fenc  = &p_fenc[8*i*h->mb.pic.i_stride[0]];
          m->i_stride= h->mb.pic.i_stride[0];
+        LOAD_HPELS( m->p_fref, p_fref, 8*i*m->i_stride );
  
          mvc[0][0] = a->l0.me8x8[2*i].mv[0];
          mvc[0][1] = a->l0.me8x8[2*i].mv[1];
@@ -614,7 +620,7 @@ static void x264_mb_analyse_inter_p16x8( x264_t *h, x264_mb_analysis_t *a )
  
  static void x264_mb_analyse_inter_p8x16( x264_t *h, x264_mb_analysis_t *a )
  {
-    uint8_t  *p_fref = h->mb.pic.p_fref[0][a->l0.i_ref][0];
+    uint8_t  **p_fref = h->mb.pic.p_fref[0][a->l0.i_ref];
      uint8_t  *p_fenc = h->mb.pic.p_fenc[0];
      int mvc[2][2];
      int i;
@@ -630,8 +636,8 @@ static void x264_mb_analyse_inter_p8x16( x264_t *h, x264_mb_analysis_t *a )
          m->lm      = a->i_lambda;
  
          m->p_fenc  = &p_fenc[8*i];
-        m->p_fref  = &p_fref[8*i];
          m->i_stride= h->mb.pic.i_stride[0];
+        LOAD_HPELS( m->p_fref, p_fref, 8*i );
  
          mvc[0][0] = a->l0.me8x8[i].mv[0];
          mvc[0][1] = a->l0.me8x8[i].mv[1];
@@ -649,7 +655,7 @@ static void x264_mb_analyse_inter_p8x16( x264_t *h, x264_mb_analysis_t *a )
  
  static void x264_mb_analyse_inter_p4x4( x264_t *h, x264_mb_analysis_t *a, int i8x8 )
  {
-    uint8_t  *p_fref = h->mb.pic.p_fref[0][a->l0.i_ref][0];
+    uint8_t  **p_fref = h->mb.pic.p_fref[0][a->l0.i_ref];
      uint8_t  *p_fenc = h->mb.pic.p_fenc[0];
  
      int i4x4;
@@ -670,8 +676,8 @@ static void x264_mb_analyse_inter_p4x4( x264_t *h, x264_mb_analysis_t *a, int i8
          m->lm      = a->i_lambda;
  
          m->p_fenc  = &p_fenc[4*(y4*h->mb.pic.i_stride[0]+x4)];
-        m->p_fref  = &p_fref[4*(y4*h->mb.pic.i_stride[0]+x4)];
          m->i_stride= h->mb.pic.i_stride[0];
+        LOAD_HPELS( m->p_fref, p_fref, 4*(y4*m->i_stride + x4) );
  
          x264_mb_predict_mv( h, 0, idx, 1, m->mvp );
          x264_me_search( h, m, &a->l0.me8x8[i8x8].mv, i_mvc );
@@ -688,7 +694,7 @@ static void x264_mb_analyse_inter_p4x4( x264_t *h, x264_mb_analysis_t *a, int i8
  
  static void x264_mb_analyse_inter_p8x4( x264_t *h, x264_mb_analysis_t *a, int i8x8 )
  {
-    uint8_t  *p_fref = h->mb.pic.p_fref[0][a->l0.i_ref][0];
+    uint8_t  **p_fref = h->mb.pic.p_fref[0][a->l0.i_ref];
      uint8_t  *p_fenc = h->mb.pic.p_fenc[0];
  
      int i8x4;
@@ -709,8 +715,8 @@ static void x264_mb_analyse_inter_p8x4( x264_t *h, x264_mb_analysis_t *a, int i8
          m->lm      = a->i_lambda;
  
          m->p_fenc  = &p_fenc[4*(y4*h->mb.pic.i_stride[0]+x4)];
-        m->p_fref  = &p_fref[4*(y4*h->mb.pic.i_stride[0]+x4)];
          m->i_stride= h->mb.pic.i_stride[0];
+        LOAD_HPELS( m->p_fref, p_fref, 4*(y4*m->i_stride + x4) );
  
          x264_mb_predict_mv( h, 0, idx, 2, m->mvp );
          x264_me_search( h, m, &a->l0.me4x4[i8x8][0].mv, i_mvc );
@@ -724,7 +730,7 @@ static void x264_mb_analyse_inter_p8x4( x264_t *h, x264_mb_analysis_t *a, int i8
  
  static void x264_mb_analyse_inter_p4x8( x264_t *h, x264_mb_analysis_t *a, int i8x8 )
  {
-    uint8_t  *p_fref = h->mb.pic.p_fref[0][a->l0.i_ref][0];
+    uint8_t  **p_fref = h->mb.pic.p_fref[0][a->l0.i_ref];
      uint8_t  *p_fenc = h->mb.pic.p_fenc[0];
  
      int i4x8;
@@ -745,8 +751,8 @@ static void x264_mb_analyse_inter_p4x8( x264_t *h, x264_mb_analysis_t *a, int i8
          m->lm      = a->i_lambda;
  
          m->p_fenc  = &p_fenc[4*(y4*h->mb.pic.i_stride[0]+x4)];
-        m->p_fref  = &p_fref[4*(y4*h->mb.pic.i_stride[0]+x4)];
          m->i_stride= h->mb.pic.i_stride[0];
+        LOAD_HPELS( m->p_fref, p_fref, 4*(y4*m->i_stride + x4) );
  
          x264_mb_predict_mv( h, 0, idx, 1, m->mvp );
          x264_me_search( h, m, &a->l0.me4x4[i8x8][0].mv, i_mvc );
@@ -806,7 +812,7 @@ static void x264_mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a )
      for( i_ref = 0; i_ref < h->i_ref0; i_ref++ )
      {
          /* search with ref */
-        m.p_fref = h->mb.pic.p_fref[0][i_ref][0];
+        LOAD_HPELS( m.p_fref, h->mb.pic.p_fref[0][i_ref], 0 );
          x264_mb_predict_mv_16x16( h, 0, i_ref, m.mvp );
          x264_mb_predict_mv_ref16x16( h, 0, i_ref, mvc, &i_mvc );
          x264_me_search_ref( h, &m, mvc, i_mvc, p_fullpel_thresh );
@@ -833,7 +839,7 @@ static void x264_mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a )
      for( i_ref = 0; i_ref < h->i_ref1; i_ref++ )
      {
          /* search with ref */
-        m.p_fref = h->mb.pic.p_fref[1][i_ref][0];
+        LOAD_HPELS( m.p_fref, h->mb.pic.p_fref[1][i_ref], 0 );
          x264_mb_predict_mv_16x16( h, 1, i_ref, m.mvp );
          x264_mb_predict_mv_ref16x16( h, 1, i_ref, mvc, &i_mvc );
          x264_me_search( h, &m, mvc, i_mvc );
@@ -859,11 +865,11 @@ static void x264_mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a )
      x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, a->l1.i_ref );
  
      /* get cost of BI mode */
-    h->mc[MC_LUMA]( h->mb.pic.p_fref[0][a->l0.i_ref][0], h->mb.pic.i_stride[0],
+    h->mc.mc_luma( h->mb.pic.p_fref[0][a->l0.i_ref], h->mb.pic.i_stride[0],
                      pix1, 16,
                      a->l0.me16x16.mv[0], a->l0.me16x16.mv[1],
                      16, 16 );
-    h->mc[MC_LUMA]( h->mb.pic.p_fref[1][a->l1.i_ref][0], h->mb.pic.i_stride[0],
+    h->mc.mc_luma( h->mb.pic.p_fref[1][a->l1.i_ref], h->mb.pic.i_stride[0],
                      pix2, 16,
                      a->l1.me16x16.mv[0], a->l1.me16x16.mv[1],
                      16, 16 );
@@ -940,8 +946,18 @@ static inline void x264_mb_cache_mv_b8x16( x264_t *h, x264_mb_analysis_t *a, int
  
  static void x264_mb_analyse_inter_b8x8( x264_t *h, x264_mb_analysis_t *a )
  {
-    uint8_t *p_fref[2] = { h->mb.pic.p_fref[0][a->l0.i_ref][0],
-                           h->mb.pic.p_fref[1][a->l1.i_ref][0] };
+    uint8_t *p_fref[2][4] =
+        { {
+            h->mb.pic.p_fref[0][a->l0.i_ref][0],
+            h->mb.pic.p_fref[0][a->l0.i_ref][1],
+            h->mb.pic.p_fref[0][a->l0.i_ref][2],
+            h->mb.pic.p_fref[0][a->l0.i_ref][3]
+        }, {
+            h->mb.pic.p_fref[1][a->l1.i_ref][0],
+            h->mb.pic.p_fref[1][a->l1.i_ref][1],
+            h->mb.pic.p_fref[1][a->l1.i_ref][2],
+            h->mb.pic.p_fref[1][a->l1.i_ref][3] 
+        } };
      uint8_t *p_fenc = h->mb.pic.p_fenc[0];
      uint8_t pix[2][8*8];
      int i, l;
@@ -968,8 +984,8 @@ static void x264_mb_analyse_inter_b8x8( x264_t *h, x264_mb_analysis_t *a )
              m->lm      = a->i_lambda;
  
              m->p_fenc = p_fenc_i;
-            m->p_fref = &p_fref[l][8*(y8*h->mb.pic.i_stride[0]+x8)];
              m->i_stride = h->mb.pic.i_stride[0];
+            LOAD_HPELS( m->p_fref, p_fref[l], 8*(y8*m->i_stride + x8) );
  
              x264_mb_predict_mv( h, l, 4*i, 2, m->mvp );
              x264_me_search( h, m, &lX->me16x16.mv, 1 );
@@ -977,7 +993,7 @@ static void x264_mb_analyse_inter_b8x8( x264_t *h, x264_mb_analysis_t *a )
              x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, l, m->mv[0], m->mv[1] );
  
              /* BI mode */
-            h->mc[MC_LUMA]( m->p_fref, m->i_stride, pix[l], 8,
+            h->mc.mc_luma( m->p_fref, m->i_stride, pix[l], 8,
                              m->mv[0], m->mv[1], 8, 8 );
              i_part_cost_bi += a->i_lambda * ( bs_size_se( m->mv[0] - m->mvp[0] ) +
                                                bs_size_se( m->mv[1] - m->mvp[1] ) );
@@ -1019,8 +1035,18 @@ static void x264_mb_analyse_inter_b8x8( x264_t *h, x264_mb_analysis_t *a )
  
  static void x264_mb_analyse_inter_b16x8( x264_t *h, x264_mb_analysis_t *a )
  {
-    uint8_t *p_fref[2] = { h->mb.pic.p_fref[0][a->l0.i_ref][0],
-                           h->mb.pic.p_fref[1][a->l1.i_ref][0] };
+    uint8_t *p_fref[2][4] =
+        { {
+            h->mb.pic.p_fref[0][a->l0.i_ref][0],
+            h->mb.pic.p_fref[0][a->l0.i_ref][1],
+            h->mb.pic.p_fref[0][a->l0.i_ref][2],
+            h->mb.pic.p_fref[0][a->l0.i_ref][3]
+        }, {
+            h->mb.pic.p_fref[1][a->l1.i_ref][0],
+            h->mb.pic.p_fref[1][a->l1.i_ref][1],
+            h->mb.pic.p_fref[1][a->l1.i_ref][2],
+            h->mb.pic.p_fref[1][a->l1.i_ref][3] 
+        } };
      uint8_t *p_fenc = h->mb.pic.p_fenc[0];
      uint8_t pix[2][8*8];
      int i_ref_stride = h->mb.pic.i_stride[0];
@@ -1047,7 +1073,7 @@ static void x264_mb_analyse_inter_b16x8( x264_t *h, x264_mb_analysis_t *a )
  
              m->p_fenc  = p_fenc_i;
              m->i_stride= i_ref_stride;
-            m->p_fref  = &p_fref[l][8*i*i_ref_stride];
+            LOAD_HPELS( m->p_fref, p_fref[l], 8*i*i_ref_stride );
  
              mvc[0][0] = lX->me8x8[2*i].mv[0];
              mvc[0][1] = lX->me8x8[2*i].mv[1];
@@ -1058,7 +1084,7 @@ static void x264_mb_analyse_inter_b16x8( x264_t *h, x264_mb_analysis_t *a )
              x264_me_search( h, m, mvc, 2 );
  
              /* BI mode */
-            h->mc[MC_LUMA]( m->p_fref, m->i_stride, pix[l], 8,
+            h->mc.mc_luma( m->p_fref, m->i_stride, pix[l], 8,
                              m->mv[0], m->mv[1], 8, 8 );
              /* FIXME: ref cost */
              i_part_cost_bi += a->i_lambda * ( bs_size_se( m->mv[0] - m->mvp[0] ) +
@@ -1094,8 +1120,18 @@ static void x264_mb_analyse_inter_b16x8( x264_t *h, x264_mb_analysis_t *a )
  }
  static void x264_mb_analyse_inter_b8x16( x264_t *h, x264_mb_analysis_t *a )
  {
-    uint8_t *p_fref[2] = { h->mb.pic.p_fref[0][a->l0.i_ref][0],
-                           h->mb.pic.p_fref[1][a->l1.i_ref][0] };
+    uint8_t *p_fref[2][4] =
+        { {
+            h->mb.pic.p_fref[0][a->l0.i_ref][0],
+            h->mb.pic.p_fref[0][a->l0.i_ref][1],
+            h->mb.pic.p_fref[0][a->l0.i_ref][2],
+            h->mb.pic.p_fref[0][a->l0.i_ref][3]
+        }, {
+            h->mb.pic.p_fref[1][a->l1.i_ref][0],
+            h->mb.pic.p_fref[1][a->l1.i_ref][1],
+            h->mb.pic.p_fref[1][a->l1.i_ref][2],
+            h->mb.pic.p_fref[1][a->l1.i_ref][3] 
+        } };
      uint8_t *p_fenc = h->mb.pic.p_fenc[0];
      uint8_t pix[2][8*8];
      int i_ref_stride = h->mb.pic.i_stride[0];
@@ -1120,8 +1156,8 @@ static void x264_mb_analyse_inter_b8x16( x264_t *h, x264_mb_analysis_t *a )
              m->lm      = a->i_lambda;
  
              m->p_fenc  = p_fenc_i;
-            m->p_fref  = &p_fref[l][8*i];
              m->i_stride= i_ref_stride;
+            LOAD_HPELS( m->p_fref, p_fref[l], 8*i );
  
              mvc[0][0] = lX->me8x8[i].mv[0];
              mvc[0][1] = lX->me8x8[i].mv[1];
@@ -1132,7 +1168,7 @@ static void x264_mb_analyse_inter_b8x16( x264_t *h, x264_mb_analysis_t *a )
              x264_me_search( h, m, mvc, 2 );
  
              /* BI mode */
-            h->mc[MC_LUMA]( m->p_fref, m->i_stride, pix[l], 8,
+            h->mc.mc_luma( m->p_fref, m->i_stride, pix[l], 8,
                              m->mv[0], m->mv[1], 8, 8 );
              /* FIXME: ref cost */
              i_part_cost_bi += a->i_lambda * ( bs_size_se( m->mv[0] - m->mvp[0] ) +
diff --git a/encoder/encoder.c b/encoder/encoder.c

index 799e483b39bbc3b1d9140577001cbffafc94d981..29839a0699796b756f9801f219b8250d60bcdf08 100644 (file)
--- a/encoder/encoder.c
+++ b/encoder/encoder.c
@@ -453,7 +453,7 @@ x264_t *x264_encoder_open   ( x264_param_t *param )
  
      x264_pixel_init( h->param.cpu, &h->pixf );
      x264_dct_init( h->param.cpu, &h->dctf );
-    x264_mc_init( h->param.cpu, h->mc );
+    x264_mc_init( h->param.cpu, &h->mc );
      x264_csp_init( h->param.cpu, h->param.i_csp, &h->csp );
  
      /* rate control */
@@ -666,6 +666,12 @@ static inline void x264_reference_update( x264_t *h )
      /* expand border */
      x264_frame_expand_border( h->fdec );
  
+    /* create filtered images */
+    x264_frame_filter( h->param.cpu, h->fdec );
+
+    /* expand border of filtered images */
+    x264_frame_expand_border_filtered( h->fdec );
+
      /* move frame in the buffer */
      h->fdec = h->frames.reference[h->param.i_frame_reference+1];
      for( i = h->param.i_frame_reference+1; i > 0; i-- )
diff --git a/encoder/macroblock.c b/encoder/macroblock.c

index ea371d3723de27a5345a29ff2b1f2d5c2d74f774..ae9d1eea05d35b1371a331147363012aa685a066 100644 (file)
--- a/encoder/macroblock.c
+++ b/encoder/macroblock.c
@@ -518,16 +518,16 @@ void x264_macroblock_encode_pskip( x264_t *h )
                                  h->mb.mv_min[1], h->mb.mv_max[1] );
  
      /* Motion compensation XXX probably unneeded */
-    h->mc[MC_LUMA]( h->mb.pic.p_fref[0][0][0], h->mb.pic.i_stride[0],
+    h->mc.mc_luma( h->mb.pic.p_fref[0][0], h->mb.pic.i_stride[0],
                      h->mb.pic.p_fdec[0],       h->mb.pic.i_stride[0],
                      mvx, mvy, 16, 16 );
  
      /* Chroma MC */
-    h->mc[MC_CHROMA]( h->mb.pic.p_fref[0][0][1], h->mb.pic.i_stride[1],
+    h->mc.mc_chroma( h->mb.pic.p_fref[0][0][4], h->mb.pic.i_stride[1],
                        h->mb.pic.p_fdec[1],       h->mb.pic.i_stride[1],
                        mvx, mvy, 8, 8 );
  
-    h->mc[MC_CHROMA]( h->mb.pic.p_fref[0][0][2], h->mb.pic.i_stride[2],
+    h->mc.mc_chroma( h->mb.pic.p_fref[0][0][5], h->mb.pic.i_stride[2],
                        h->mb.pic.p_fdec[2],       h->mb.pic.i_stride[2],
                        mvx, mvy, 8, 8 );
  
@@ -803,8 +803,8 @@ int x264_macroblock_probe_skip( x264_t *h, int b_bidir )
          mvp[1] = x264_clip3( mvp[1], h->mb.mv_min[1], h->mb.mv_max[1] );
  
          /* Motion compensation */
-        h->mc[MC_LUMA]( h->mb.pic.p_fref[0][0][0], h->mb.pic.i_stride[0],
-                        h->mb.pic.p_fdec[0],       h->mb.pic.i_stride[0],
+        h->mc.mc_luma( h->mb.pic.p_fref[0][0], h->mb.pic.i_stride[0],
+                        h->mb.pic.p_fdec[0],   h->mb.pic.i_stride[0],
                          mvp[0], mvp[1], 16, 16 );
      }
  
@@ -843,7 +843,7 @@ int x264_macroblock_probe_skip( x264_t *h, int b_bidir )
  
          if( !b_bidir )
          {
-            h->mc[MC_CHROMA]( h->mb.pic.p_fref[0][0][1+ch], i_stride,
+            h->mc.mc_chroma( h->mb.pic.p_fref[0][0][4+ch], i_stride,
                                h->mb.pic.p_fdec[1+ch],       i_stride,
                                mvp[0], mvp[1], 8, 8 );
          }
diff --git a/encoder/me.c b/encoder/me.c

index 660b5de2c1cea54e6d8b40183b1a3682b99e1a67..15522a65f2b6db1f7772367eff30f18d167bf686 100644 (file)
--- a/encoder/me.c
+++ b/encoder/me.c
@@ -62,7 +62,7 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int (*mvc)[2], int i_mvc, int
      const int i_pixel = m->i_pixel;
      int bmx, bmy, bcost;
      int omx, omy;
-    uint8_t *p_fref = m->p_fref;
+    uint8_t *p_fref = m->p_fref[0];
      int i_iter;
  
      const int mv_x_min = h->mb.mv_min_fpel[0];
@@ -198,10 +198,10 @@ static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_ite
      {
         for( i = step>1 ? hpel_iters : qpel_iters; i > 0; i-- )
          {
-            h->mc[MC_LUMA]( m->p_fref, m->i_stride, pix[0], 16, bmx + 0, bmy - step, bw, bh );
-            h->mc[MC_LUMA]( m->p_fref, m->i_stride, pix[1], 16, bmx + 0, bmy + step, bw, bh );
-            h->mc[MC_LUMA]( m->p_fref, m->i_stride, pix[2], 16, bmx - step, bmy + 0, bw, bh );
-            h->mc[MC_LUMA]( m->p_fref, m->i_stride, pix[3], 16, bmx + step, bmy + 0, bw, bh );
+            h->mc.mc_luma( m->p_fref, m->i_stride, pix[0], 16, bmx + 0, bmy - step, bw, bh );
+            h->mc.mc_luma( m->p_fref, m->i_stride, pix[1], 16, bmx + 0, bmy + step, bw, bh );
+            h->mc.mc_luma( m->p_fref, m->i_stride, pix[2], 16, bmx - step, bmy + 0, bw, bh );
+            h->mc.mc_luma( m->p_fref, m->i_stride, pix[3], 16, bmx + step, bmy + 0, bw, bh );
      
              cost[0] = h->pixf.satd[m->i_pixel]( m->p_fenc, m->i_stride, pix[0], 16 ) +
                        m->lm * ( bs_size_se( bmx + 0 - m->mvp[0] ) + bs_size_se( bmy - step - m->mvp[1] ) );
diff --git a/encoder/me.h b/encoder/me.h

index d360c9c13ccd1daf7af0527e7d6832b3f380acaa..ef1adb0679b4cc53a887e6ba6b68e18c64da9a7d 100644 (file)
--- a/encoder/me.h
+++ b/encoder/me.h
@@ -30,7 +30,7 @@ typedef struct
      int      i_pixel;   /* PIXEL_WxH */
      int      lm;        /* lambda motion */
  
-    uint8_t *p_fref;
+    uint8_t *p_fref[4];
      uint8_t *p_fenc;
      int      i_stride;
  
diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c

index c541418e3475b432fb903106d98ab06acf27e249..0ca28c6ff5736d4ea993d055e7690b3a6d97f2e2 100644 (file)
--- a/encoder/ratecontrol.c
+++ b/encoder/ratecontrol.c
@@ -43,7 +43,9 @@
  #ifdef SYS_FREEBSD
  #define exp2f(x) powf( 2, (x) )
  #endif
-
+#ifdef _MSC_VER
+#define exp2f(x) pow( 2, (x) )
+#endif
  
  typedef struct
  {
author	Loren Merritt <pengvado@videolan.org>
	Sun, 13 Feb 2005 09:49:42 +0000 (09:49 +0000)
committer	Loren Merritt <pengvado@videolan.org>
	Sun, 13 Feb 2005 09:49:42 +0000 (09:49 +0000)
common/common.h		patch \| blob \| history
common/frame.c		patch \| blob \| history
common/frame.h		patch \| blob \| history
common/i386/mc-c.c		patch \| blob \| history
common/i386/mc.h		patch \| blob \| history
common/macroblock.c		patch \| blob \| history
common/mc.c		patch \| blob \| history
common/mc.h		patch \| blob \| history
encoder/analyse.c		patch \| blob \| history
encoder/encoder.c		patch \| blob \| history
encoder/macroblock.c		patch \| blob \| history
encoder/me.c		patch \| blob \| history
encoder/me.h		patch \| blob \| history
encoder/ratecontrol.c		patch \| blob \| history