uint8_t *p_fdec[3];
/* pointer over mb of the references */
- uint8_t *p_fref[2][16][3];
+ uint8_t *p_fref[2][16][4+2]; /* last: lN, lH, lV, lHV, cU, cV */
/* common stride */
int i_stride[3];
x264_predict_t predict_4x4[9+3];
x264_pixel_function_t pixf;
- x264_mc_function_t mc[2];
+ x264_mc_functions_t mc;
x264_dct_function_t dctf;
x264_csp_function_t csp;
frame->buffer[3] = NULL;
frame->plane[3] = NULL;
+ frame->filtered[0] = frame->plane[0];
+ for( i = 0; i < 3; i++ )
+ {
+ frame->buffer[4+i] = x264_malloc( frame->i_stride[0] *
+ ( frame->i_lines[0] + 64 ) );
+
+ frame->filtered[i+1] = ((uint8_t*)frame->buffer[4+i]) +
+ frame->i_stride[0] * 32 + 32;
+ }
+
+
frame->i_poc = -1;
frame->i_type = X264_TYPE_AUTO;
frame->i_qpplus1 = 0;
{
x264_free( frame->buffer[i] );
}
+ for( i = 4; i < 7; i++ ) /* filtered planes */
+ {
+ x264_free( frame->buffer[i] );
+ }
x264_free( frame->mv[0] );
x264_free( frame->mv[1] );
x264_free( frame->ref[0] );
}
}
+void x264_frame_expand_border_filtered( x264_frame_t *frame )
+{
+
+ /* during filtering, 8 extra pixels were filtered on each edge.
+ we want to expand border from the last filtered pixel */
+ int w;
+ int i, y;
+ for( i = 1; i < 4; i++ )
+ {
+#define PPIXEL(x, y) ( frame->filtered[i] + (x) +(y)*frame->i_stride[0] )
+ w = 32;
+
+ for( y = 8; y < w; y++ )
+ {
+ /* upper band */
+ memcpy( PPIXEL(-8,-y-1), PPIXEL(-8,-8), frame->i_stride[0] - 2 * w + 16 );
+ /* up left corner */
+ memset( PPIXEL(-w,-y-1), PPIXEL(-8,-8)[0], w - 8 );
+ /* up right corner */
+ memset( PPIXEL(frame->i_stride[0] - 2*w + 8,-y-1), PPIXEL( frame->i_stride[0]-1-2*w+8,-8)[0], w - 8 );
+
+ /* lower band */
+ memcpy( PPIXEL(-8, frame->i_lines[0]+y), PPIXEL(-8,frame->i_lines[0]+7), frame->i_stride[0] - 2 * w + 16 );
+ /* low left corner */
+ memset( PPIXEL(-w, frame->i_lines[0]+y), PPIXEL(-8,frame->i_lines[0]+7)[0], w - 8);
+ /* low right corner */
+ memset( PPIXEL(frame->i_stride[0]-2*w+8, frame->i_lines[0]+y), PPIXEL(frame->i_stride[0]+7-2*w,frame->i_lines[0]+7)[0], w-8);
+
+ }
+ for( y = -8; y < frame->i_lines[0]+8; y++ )
+ {
+ /* left band */
+ memset( PPIXEL( -w, y ), PPIXEL( -8, y )[0], w - 8 );
+ /* right band */
+ memset( PPIXEL( frame->i_stride[0]-2*w + 8, y ), PPIXEL( frame->i_stride[0] + 7 - 2*w, y )[0], w - 8 );
+ }
+#undef PPIXEL
+ }
+}
+
+
/* FIXME theses tables are duplicated with the ones in macroblock.c */
static const uint8_t block_idx_xy[4][4] =
{
int i_stride[4];
int i_lines[4];
uint8_t *plane[4];
+ uint8_t *filtered[4]; /* plane[0], H, V, HV */
/* for unrestricted mv we allocate more data than needed
* allocated data are stored in buffer */
- void *buffer[4];
+ void *buffer[7];
/* motion data */
int16_t (*mv[2])[2];
void x264_frame_expand_border( x264_frame_t *frame );
+void x264_frame_expand_border_filtered( x264_frame_t *frame );
+
void x264_frame_deblocking_filter( x264_t *h, int i_slice_type );
+void x264_frame_filter( int cpu, x264_frame_t *frame );
+
#endif
MOTION_COMPENSATION_LUMA
}
-void x264_mc_mmxext_init( x264_mc_function_t pf[2] )
+
+void mc_luma_mmx( uint8_t *src[4], int i_src_stride,
+ uint8_t *dst, int i_dst_stride,
+ int mvx,int mvy,
+ int i_width, int i_height )
+{
+ uint8_t *src1, *src2;
+
+ /* todo : fixme... */
+ int correction = ((mvx&3) == 3 && (mvy&3) == 1 || (mvx&3) == 1 && (mvy&3) == 3) ? 1:0;
+
+ int hpel1x = mvx>>1;
+ int hpel1y = (mvy+1-correction)>>1;
+ int filter1 = (hpel1x & 1) + ( (hpel1y & 1) << 1 );
+
+
+ src1 = src[filter1] + (hpel1y >> 1) * i_src_stride + (hpel1x >> 1);
+
+ if ( (mvx|mvy) & 1 ) /* qpel interpolation needed */
+ {
+ int hpel2x = (mvx+1)>>1;
+ int hpel2y = (mvy+correction)>>1;
+ int filter2 = (hpel2x & 1) + ( (hpel2y & 1) <<1 );
+
+ src2 = src[filter2] + (hpel2y >> 1) * i_src_stride + (hpel2x >> 1);
+
+ switch(i_width) {
+ case 4:
+ x264_pixel_avg_w4_mmxext( dst, i_dst_stride, src1, i_src_stride,
+ src2, i_src_stride, i_height );
+ break;
+ case 8:
+ x264_pixel_avg_w8_mmxext( dst, i_dst_stride, src1, i_src_stride,
+ src2, i_src_stride, i_height );
+ break;
+ case 16:
+ default:
+ x264_pixel_avg_w16_mmxext(dst, i_dst_stride, src1, i_src_stride,
+ src2, i_src_stride, i_height );
+ }
+ }
+ else
+ {
+ switch(i_width) {
+ case 4:
+ x264_mc_copy_w4_mmxext( src1, i_src_stride, dst, i_dst_stride, i_height );
+ break;
+ case 8:
+ x264_mc_copy_w8_mmxext( src1, i_src_stride, dst, i_dst_stride, i_height );
+ break;
+ case 16:
+ x264_mc_copy_w16_mmxext( src1, i_src_stride, dst, i_dst_stride, i_height );
+ break;
+ }
+
+ }
+}
+
+uint8_t *get_ref_mmx( uint8_t *src[4], int i_src_stride,
+ uint8_t *dst, int *i_dst_stride,
+ int mvx,int mvy,
+ int i_width, int i_height )
+{
+ uint8_t *src1, *src2;
+
+ /* todo : fixme... */
+ int correction = ((mvx&3) == 3 && (mvy&3) == 1 || (mvx&3) == 1 && (mvy&3) == 3) ? 1:0;
+
+ int hpel1x = mvx>>1;
+ int hpel1y = (mvy+1-correction)>>1;
+ int filter1 = (hpel1x & 1) + ( (hpel1y & 1) << 1 );
+
+
+ src1 = src[filter1] + (hpel1y >> 1) * i_src_stride + (hpel1x >> 1);
+
+ if ( (mvx|mvy) & 1 ) /* qpel interpolation needed */
+ {
+ int hpel2x = (mvx+1)>>1;
+ int hpel2y = (mvy+correction)>>1;
+ int filter2 = (hpel2x & 1) + ( (hpel2y & 1) <<1 );
+
+ src2 = src[filter2] + (hpel2y >> 1) * i_src_stride + (hpel2x >> 1);
+
+ switch(i_width) {
+ case 4:
+ x264_pixel_avg_w4_mmxext( dst, *i_dst_stride, src1, i_src_stride,
+ src2, i_src_stride, i_height );
+ break;
+ case 8:
+ x264_pixel_avg_w8_mmxext( dst, *i_dst_stride, src1, i_src_stride,
+ src2, i_src_stride, i_height );
+ break;
+ case 16:
+ default:
+ x264_pixel_avg_w16_mmxext(dst, *i_dst_stride, src1, i_src_stride,
+ src2, i_src_stride, i_height );
+ }
+ return dst;
+
+ }
+ else
+ {
+ *i_dst_stride = i_src_stride;
+ return src1;
+ }
+}
+
+
+void x264_mc_mmxext_init( x264_mc_functions_t *pf )
{
- pf[MC_LUMA] = motion_compensation_luma_mmxext;
+ pf->mc_luma = mc_luma_mmx;
+ pf->get_ref = get_ref_mmx;
}
-void x264_mc_sse2_init( x264_mc_function_t pf[2] )
+void x264_mc_sse2_init( x264_mc_functions_t *pf )
{
- pf[MC_LUMA] = motion_compensation_luma_sse2;
+ /* todo: use sse2 */
+ pf->mc_luma = mc_luma_mmx;
+ pf->get_ref = get_ref_mmx;
}
+void get_funcs_mmx(pf_mc_t *int_h, pf_mc_t *int_v, pf_mc_t *int_hv)
+{
+ *int_h = mc_hh_w16;
+ *int_v = mc_hv_w16;
+ *int_hv = mc_hc_w16;
+}
+
+void get_funcs_sse2(pf_mc_t *int_h, pf_mc_t *int_v, pf_mc_t *int_hv)
+{
+ *int_h = mc_hh_w16;
+ *int_v = mc_hv_w16;
+ *int_hv = mc_hc_w16;
+}
#ifndef _I386_MC_H
#define _I386_MC_H 1
-void x264_mc_mmxext_init( x264_mc_function_t pf[2] );
-void x264_mc_sse2_init( x264_mc_function_t pf[2] );
+void x264_mc_mmxext_init( x264_mc_functions_t *pf );
+void x264_mc_sse2_init( x264_mc_functions_t *pf );
#endif
const int mvx = x264_clip3( h->mb.cache.mv[0][i8][0], h->mb.mv_min[0], h->mb.mv_max[0] );
const int mvy = x264_clip3( h->mb.cache.mv[0][i8][1], h->mb.mv_min[1], h->mb.mv_max[1] );
- h->mc[MC_LUMA]( &h->mb.pic.p_fref[0][i_ref][0][4*y * h->mb.pic.i_stride[0]+4*x], h->mb.pic.i_stride[0],
+ h->mc.mc_luma( h->mb.pic.p_fref[0][i_ref], h->mb.pic.i_stride[0],
&h->mb.pic.p_fdec[0][4*y * h->mb.pic.i_stride[0]+4*x], h->mb.pic.i_stride[0],
- mvx, mvy, 4*width, 4*height );
+ mvx + 4*4*x, mvy + 4*4*y, 4*width, 4*height );
- h->mc[MC_CHROMA]( &h->mb.pic.p_fref[0][i_ref][1][2*y*h->mb.pic.i_stride[1]+2*x], h->mb.pic.i_stride[1],
+ h->mc.mc_chroma( &h->mb.pic.p_fref[0][i_ref][4][2*y*h->mb.pic.i_stride[1]+2*x], h->mb.pic.i_stride[1],
&h->mb.pic.p_fdec[1][2*y*h->mb.pic.i_stride[1]+2*x], h->mb.pic.i_stride[1],
mvx, mvy, 2*width, 2*height );
- h->mc[MC_CHROMA]( &h->mb.pic.p_fref[0][i_ref][2][2*y*h->mb.pic.i_stride[2]+2*x], h->mb.pic.i_stride[2],
+ h->mc.mc_chroma( &h->mb.pic.p_fref[0][i_ref][5][2*y*h->mb.pic.i_stride[2]+2*x], h->mb.pic.i_stride[2],
&h->mb.pic.p_fdec[2][2*y*h->mb.pic.i_stride[2]+2*x], h->mb.pic.i_stride[2],
mvx, mvy, 2*width, 2*height );
}
const int mvx = x264_clip3( h->mb.cache.mv[1][i8][0], h->mb.mv_min[0], h->mb.mv_max[0] );
const int mvy = x264_clip3( h->mb.cache.mv[1][i8][1], h->mb.mv_min[1], h->mb.mv_max[1] );
- h->mc[MC_LUMA]( &h->mb.pic.p_fref[1][i_ref][0][4*y * h->mb.pic.i_stride[0]+4*x], h->mb.pic.i_stride[0],
+ h->mc.mc_luma( h->mb.pic.p_fref[1][i_ref], h->mb.pic.i_stride[0],
&h->mb.pic.p_fdec[0][4*y *h->mb.pic.i_stride[0]+4*x], h->mb.pic.i_stride[0],
- mvx, mvy, 4*width, 4*height );
+ mvx + 4*4*x, mvy + 4*4*y, 4*width, 4*height );
- h->mc[MC_CHROMA]( &h->mb.pic.p_fref[1][i_ref][1][2*y*h->mb.pic.i_stride[1]+2*x], h->mb.pic.i_stride[1],
+ h->mc.mc_chroma( &h->mb.pic.p_fref[1][i_ref][4][2*y*h->mb.pic.i_stride[1]+2*x], h->mb.pic.i_stride[1],
&h->mb.pic.p_fdec[1][2*y*h->mb.pic.i_stride[1]+2*x], h->mb.pic.i_stride[1],
mvx, mvy, 2*width, 2*height );
- h->mc[MC_CHROMA]( &h->mb.pic.p_fref[1][i_ref][2][2*y*h->mb.pic.i_stride[2]+2*x], h->mb.pic.i_stride[2],
+ h->mc.mc_chroma( &h->mb.pic.p_fref[1][i_ref][5][2*y*h->mb.pic.i_stride[2]+2*x], h->mb.pic.i_stride[2],
&h->mb.pic.p_fdec[2][2*y*h->mb.pic.i_stride[2]+2*x], h->mb.pic.i_stride[2],
mvx, mvy, 2*width, 2*height );
}
else if( width == 1 && height == 2 ) i_mode = PIXEL_4x8;
else if( width == 1 && height == 1 ) i_mode = PIXEL_4x4;
- h->mc[MC_LUMA]( &h->mb.pic.p_fref[0][i_ref0][0][4*y * h->mb.pic.i_stride[0]+4*x], h->mb.pic.i_stride[0],
+ h->mc.mc_luma( h->mb.pic.p_fref[0][i_ref0], h->mb.pic.i_stride[0],
&h->mb.pic.p_fdec[0][4*y *h->mb.pic.i_stride[0]+4*x], h->mb.pic.i_stride[0],
- mvx0, mvy0, 4*width, 4*height );
- h->mc[MC_CHROMA]( &h->mb.pic.p_fref[0][i_ref0][1][2*y*h->mb.pic.i_stride[1]+2*x], h->mb.pic.i_stride[1],
+ mvx0 + 4*4*x, mvy0 + 4*4*y, 4*width, 4*height );
+ h->mc.mc_chroma( &h->mb.pic.p_fref[0][i_ref0][4][2*y*h->mb.pic.i_stride[1]+2*x], h->mb.pic.i_stride[1],
&h->mb.pic.p_fdec[1][2*y*h->mb.pic.i_stride[1]+2*x], h->mb.pic.i_stride[1],
mvx0, mvy0, 2*width, 2*height );
- h->mc[MC_CHROMA]( &h->mb.pic.p_fref[0][i_ref0][2][2*y*h->mb.pic.i_stride[2]+2*x], h->mb.pic.i_stride[2],
+ h->mc.mc_chroma( &h->mb.pic.p_fref[0][i_ref0][5][2*y*h->mb.pic.i_stride[2]+2*x], h->mb.pic.i_stride[2],
&h->mb.pic.p_fdec[2][2*y*h->mb.pic.i_stride[2]+2*x], h->mb.pic.i_stride[2],
mvx0, mvy0, 2*width, 2*height );
- h->mc[MC_LUMA]( &h->mb.pic.p_fref[1][i_ref1][0][4*y * h->mb.pic.i_stride[0]+4*x], h->mb.pic.i_stride[0],
- tmp, 16, mvx1, mvy1, 4*width, 4*height );
+ h->mc.mc_luma( h->mb.pic.p_fref[1][i_ref1], h->mb.pic.i_stride[0],
+ tmp, 16, mvx1 + 4*4*x, mvy1 + 4*4*y, 4*width, 4*height );
h->pixf.avg[i_mode]( &h->mb.pic.p_fdec[0][4*y *h->mb.pic.i_stride[0]+4*x], h->mb.pic.i_stride[0], tmp, 16 );
- h->mc[MC_CHROMA]( &h->mb.pic.p_fref[1][i_ref1][1][2*y*h->mb.pic.i_stride[1]+2*x], h->mb.pic.i_stride[1],
+ h->mc.mc_chroma( &h->mb.pic.p_fref[1][i_ref1][4][2*y*h->mb.pic.i_stride[1]+2*x], h->mb.pic.i_stride[1],
tmp, 16, mvx1, mvy1, 2*width, 2*height );
h->pixf.avg[i_mode]( &h->mb.pic.p_fdec[1][2*y*h->mb.pic.i_stride[1]+2*x], h->mb.pic.i_stride[1], tmp, 16 );
- h->mc[MC_CHROMA]( &h->mb.pic.p_fref[1][i_ref1][2][2*y*h->mb.pic.i_stride[2]+2*x], h->mb.pic.i_stride[2],
+ h->mc.mc_chroma( &h->mb.pic.p_fref[1][i_ref1][5][2*y*h->mb.pic.i_stride[2]+2*x], h->mb.pic.i_stride[2],
tmp, 16, mvx1, mvy1, 2*width, 2*height );
h->pixf.avg[i_mode]( &h->mb.pic.p_fdec[2][2*y*h->mb.pic.i_stride[2]+2*x], h->mb.pic.i_stride[2], tmp, 16 );
}
for( j = 0; j < h->i_ref0; j++ )
{
- h->mb.pic.p_fref[0][j][i] = &h->fref0[j]->plane[i][ w * ( i_mb_x + i_mb_y * i_stride )];
+ h->mb.pic.p_fref[0][j][i==0 ? 0:i+3] = &h->fref0[j]->plane[i][ w * ( i_mb_x + i_mb_y * i_stride )];
+ h->mb.pic.p_fref[0][j][i+1] = &h->fref0[j]->filtered[i+1][ 16 * ( i_mb_x + i_mb_y * h->fdec->i_stride[0] )];
}
for( j = 0; j < h->i_ref1; j++ )
{
- h->mb.pic.p_fref[1][j][i] = &h->fref1[j]->plane[i][ w * ( i_mb_x + i_mb_y * i_stride )];
+ h->mb.pic.p_fref[1][j][i==0 ? 0:i+3] = &h->fref1[j]->plane[i][ w * ( i_mb_x + i_mb_y * i_stride )];
+ h->mb.pic.p_fref[1][j][i+1] = &h->fref1[j]->filtered[i+1][ 16 * ( i_mb_x + i_mb_y * h->fdec->i_stride[0] )];
}
}
#include "mc.h"
#include "clip1.h"
+#include "frame.h"
#ifdef _MSC_VER
#undef HAVE_MMXEXT /* not finished now */
pf_mc[mvy&0x03][mvx&0x03]( src, i_src_stride, dst, i_dst_stride, i_width, i_height );
}
+void mc_luma( uint8_t *src[4], int i_src_stride,
+ uint8_t *dst, int i_dst_stride,
+ int mvx,int mvy,
+ int i_width, int i_height )
+{
+ uint8_t *src1, *src2;
+
+ /* todo : fixme... */
+ int correction = ((mvx&3) == 3 && (mvy&3) == 1 || (mvx&3) == 1 && (mvy&3) == 3) ? 1:0;
+
+ int hpel1x = mvx>>1;
+ int hpel1y = (mvy+1-correction)>>1;
+ int filter1 = (hpel1x & 1) + ( (hpel1y & 1) << 1 );
+
+
+ src1 = src[filter1] + (hpel1y >> 1) * i_src_stride + (hpel1x >> 1);
+
+ if ( (mvx|mvy) & 1 ) /* qpel interpolation needed */
+ {
+ int hpel2x = (mvx+1)>>1;
+ int hpel2y = (mvy+correction)>>1;
+ int filter2 = (hpel2x & 1) + ( (hpel2y & 1) <<1 );
+
+ src2 = src[filter2] + (hpel2y >> 1) * i_src_stride + (hpel2x >> 1);
+
+ pixel_avg( dst, i_dst_stride, src1, i_src_stride,
+ src2, i_src_stride, i_width, i_height );
+
+ }
+ else
+ {
+ mc_copy( src1, i_src_stride, dst, i_dst_stride, i_width, i_height );
+
+ }
+}
+
+uint8_t *get_ref( uint8_t *src[4], int i_src_stride,
+ uint8_t *dst, int * i_dst_stride,
+ int mvx,int mvy,
+ int i_width, int i_height )
+{
+ uint8_t *src1, *src2;
+
+ /* todo : fixme... */
+ int correction = ((mvx&3) == 3 && (mvy&3) == 1 || (mvx&3) == 1 && (mvy&3) == 3) ? 1:0;
+
+ int hpel1x = mvx>>1;
+ int hpel1y = (mvy+1-correction)>>1;
+ int filter1 = (hpel1x & 1) + ( (hpel1y & 1) << 1 );
+
+
+ src1 = src[filter1] + (hpel1y >> 1) * i_src_stride + (hpel1x >> 1);
+
+ if ( (mvx|mvy) & 1 ) /* qpel interpolation needed */
+ {
+ int hpel2x = (mvx+1)>>1;
+ int hpel2y = (mvy+correction)>>1;
+ int filter2 = (hpel2x & 1) + ( (hpel2y & 1) <<1 );
+
+ src2 = src[filter2] + (hpel2y >> 1) * i_src_stride + (hpel2x >> 1);
+
+ pixel_avg( dst, *i_dst_stride, src1, i_src_stride,
+ src2, i_src_stride, i_width, i_height );
+
+ return dst;
+
+ }
+ else
+ {
+ *i_dst_stride = i_src_stride;
+ return src1;
+ }
+}
+
/* full chroma mc (ie until 1/8 pixel)*/
static void motion_compensation_chroma( uint8_t *src, int i_src_stride,
uint8_t *dst, int i_dst_stride,
}
}
-void x264_mc_init( int cpu, x264_mc_function_t pf[2] )
+void x264_mc_init( int cpu, x264_mc_functions_t *pf )
{
- pf[MC_LUMA] = motion_compensation_luma;
- pf[MC_CHROMA] = motion_compensation_chroma;
+ pf->mc_luma = mc_luma;
+ pf->get_ref = get_ref;
+ pf->mc_chroma = motion_compensation_chroma;
#ifdef HAVE_MMXEXT
if( cpu&X264_CPU_MMXEXT )
if( cpu&X264_CPU_SSE2 )
x264_mc_sse2_init( pf );
#endif
-
+/*
#ifdef ARCH_PPC
if( cpu&X264_CPU_ALTIVEC )
x264_mc_altivec_init( pf );
#endif
+*/
}
+void get_funcs_mmx(pf_mc_t*, pf_mc_t*, pf_mc_t*);
+void get_funcs_sse2(pf_mc_t*, pf_mc_t*, pf_mc_t*);
+
+void x264_frame_filter( int cpu, x264_frame_t *frame )
+{
+ const int x_inc = 16, y_inc = 16;
+ const int stride = frame->i_stride[0];
+ int x, y;
+
+ pf_mc_t int_h = mc_hh;
+ pf_mc_t int_v = mc_hv;
+ pf_mc_t int_hv = mc_hc;
+
+#ifdef HAVE_MMXEXT
+ if( cpu&X264_CPU_MMXEXT )
+ get_funcs_mmx(&int_h, &int_v, &int_hv);
+#endif
+
+#ifdef HAVE_SSE2
+ if( cpu&X264_CPU_SSE2 )
+ get_funcs_sse2(&int_h, &int_v, &int_hv);
+#endif
+
+ for( y = -8; y < frame->i_lines[0]+8; y += y_inc ) {
+
+ uint8_t *p_in = frame->plane[0] + y * stride - 8;
+ uint8_t *p_h = frame->filtered[1] + y * stride - 8;
+ uint8_t *p_v = frame->filtered[2] + y * stride - 8;
+ uint8_t *p_hv = frame->filtered[3] + y * stride - 8;
+
+ for( x = -8; x < stride - 64 + 8; x += x_inc )
+ {
+ int_h( p_in, stride, p_h, stride, x_inc, y_inc );
+ int_v( p_in, stride, p_v, stride, x_inc, y_inc );
+ int_hv( p_in, stride, p_hv, stride, x_inc, y_inc );
+
+ p_h += x_inc;
+ p_v += x_inc;
+ p_hv += x_inc;
+ p_in += x_inc;
+ }
+ }
+}
* width == 16-> height == 8 or 16
* */
-typedef void (*x264_mc_function_t)(uint8_t *, int, uint8_t *, int,
- int mvx, int mvy,
- int i_width, int i_height );
-enum
+typedef struct
{
- MC_LUMA = 0,
- MC_CHROMA = 1,
-};
+ void (*mc_luma)(uint8_t **, int, uint8_t *, int,
+ int mvx, int mvy,
+ int i_width, int i_height );
-void x264_mc_init( int cpu, x264_mc_function_t pf[2] );
+ uint8_t* (*get_ref)(uint8_t **, int, uint8_t *, int *,
+ int mvx, int mvy,
+ int i_width, int i_height );
+
+ void (*mc_chroma)(uint8_t *, int, uint8_t *, int,
+ int mvx, int mvy,
+ int i_width, int i_height );
+} x264_mc_functions_t;
+
+void x264_mc_init( int cpu, x264_mc_functions_t *pf );
#endif
}
}
+#define LOAD_HPELS(dst, src, offset) \
+ dst[0] = &src[0][offset]; \
+ dst[1] = &src[1][offset]; \
+ dst[2] = &src[2][offset]; \
+ dst[3] = &src[3][offset]; \
+
static void x264_mb_analyse_inter_p16x16( x264_t *h, x264_mb_analysis_t *a )
{
x264_me_t m;
i_fullpel_thresh -= i_ref_cost;
/* search with ref */
- m.p_fref = h->mb.pic.p_fref[0][i_ref][0];
+ LOAD_HPELS( m.p_fref, h->mb.pic.p_fref[0][i_ref], 0 );
x264_mb_predict_mv_16x16( h, 0, i_ref, m.mvp );
x264_mb_predict_mv_ref16x16( h, 0, i_ref, mvc, &i_mvc );
x264_me_search_ref( h, &m, mvc, i_mvc, p_fullpel_thresh );
static void x264_mb_analyse_inter_p8x8( x264_t *h, x264_mb_analysis_t *a )
{
- uint8_t *p_fref = h->mb.pic.p_fref[0][a->l0.i_ref][0];
+ uint8_t **p_fref = h->mb.pic.p_fref[0][a->l0.i_ref];
uint8_t *p_fenc = h->mb.pic.p_fenc[0];
int mvc[5][2], i_mvc;
int i;
m->i_pixel = PIXEL_8x8;
m->lm = a->i_lambda;
- m->p_fenc = &p_fenc[8*(y8*h->mb.pic.i_stride[0]+x8)];
- m->p_fref = &p_fref[8*(y8*h->mb.pic.i_stride[0]+x8)];
+ m->p_fenc = &p_fenc[8*(y8*h->mb.pic.i_stride[0]+x8)];
m->i_stride= h->mb.pic.i_stride[0];
+ LOAD_HPELS( m->p_fref, p_fref, 8*(y8*m->i_stride + x8) );
x264_mb_predict_mv( h, 0, 4*i, 2, m->mvp );
x264_me_search( h, m, mvc, i_mvc );
static void x264_mb_analyse_inter_p16x8( x264_t *h, x264_mb_analysis_t *a )
{
- uint8_t *p_fref = h->mb.pic.p_fref[0][a->l0.i_ref][0];
+ uint8_t **p_fref = h->mb.pic.p_fref[0][a->l0.i_ref];
uint8_t *p_fenc = h->mb.pic.p_fenc[0];
int mvc[2][2];
int i;
m->i_pixel = PIXEL_16x8;
m->lm = a->i_lambda;
- m->p_fenc = &p_fenc[8*i*h->mb.pic.i_stride[0]];
- m->p_fref = &p_fref[8*i*h->mb.pic.i_stride[0]];
+ m->p_fenc = &p_fenc[8*i*h->mb.pic.i_stride[0]];
m->i_stride= h->mb.pic.i_stride[0];
+ LOAD_HPELS( m->p_fref, p_fref, 8*i*m->i_stride );
mvc[0][0] = a->l0.me8x8[2*i].mv[0];
mvc[0][1] = a->l0.me8x8[2*i].mv[1];
static void x264_mb_analyse_inter_p8x16( x264_t *h, x264_mb_analysis_t *a )
{
- uint8_t *p_fref = h->mb.pic.p_fref[0][a->l0.i_ref][0];
+ uint8_t **p_fref = h->mb.pic.p_fref[0][a->l0.i_ref];
uint8_t *p_fenc = h->mb.pic.p_fenc[0];
int mvc[2][2];
int i;
m->lm = a->i_lambda;
m->p_fenc = &p_fenc[8*i];
- m->p_fref = &p_fref[8*i];
m->i_stride= h->mb.pic.i_stride[0];
+ LOAD_HPELS( m->p_fref, p_fref, 8*i );
mvc[0][0] = a->l0.me8x8[i].mv[0];
mvc[0][1] = a->l0.me8x8[i].mv[1];
static void x264_mb_analyse_inter_p4x4( x264_t *h, x264_mb_analysis_t *a, int i8x8 )
{
- uint8_t *p_fref = h->mb.pic.p_fref[0][a->l0.i_ref][0];
+ uint8_t **p_fref = h->mb.pic.p_fref[0][a->l0.i_ref];
uint8_t *p_fenc = h->mb.pic.p_fenc[0];
int i4x4;
m->lm = a->i_lambda;
m->p_fenc = &p_fenc[4*(y4*h->mb.pic.i_stride[0]+x4)];
- m->p_fref = &p_fref[4*(y4*h->mb.pic.i_stride[0]+x4)];
m->i_stride= h->mb.pic.i_stride[0];
+ LOAD_HPELS( m->p_fref, p_fref, 4*(y4*m->i_stride + x4) );
x264_mb_predict_mv( h, 0, idx, 1, m->mvp );
x264_me_search( h, m, &a->l0.me8x8[i8x8].mv, i_mvc );
static void x264_mb_analyse_inter_p8x4( x264_t *h, x264_mb_analysis_t *a, int i8x8 )
{
- uint8_t *p_fref = h->mb.pic.p_fref[0][a->l0.i_ref][0];
+ uint8_t **p_fref = h->mb.pic.p_fref[0][a->l0.i_ref];
uint8_t *p_fenc = h->mb.pic.p_fenc[0];
int i8x4;
m->lm = a->i_lambda;
m->p_fenc = &p_fenc[4*(y4*h->mb.pic.i_stride[0]+x4)];
- m->p_fref = &p_fref[4*(y4*h->mb.pic.i_stride[0]+x4)];
m->i_stride= h->mb.pic.i_stride[0];
+ LOAD_HPELS( m->p_fref, p_fref, 4*(y4*m->i_stride + x4) );
x264_mb_predict_mv( h, 0, idx, 2, m->mvp );
x264_me_search( h, m, &a->l0.me4x4[i8x8][0].mv, i_mvc );
static void x264_mb_analyse_inter_p4x8( x264_t *h, x264_mb_analysis_t *a, int i8x8 )
{
- uint8_t *p_fref = h->mb.pic.p_fref[0][a->l0.i_ref][0];
+ uint8_t **p_fref = h->mb.pic.p_fref[0][a->l0.i_ref];
uint8_t *p_fenc = h->mb.pic.p_fenc[0];
int i4x8;
m->lm = a->i_lambda;
m->p_fenc = &p_fenc[4*(y4*h->mb.pic.i_stride[0]+x4)];
- m->p_fref = &p_fref[4*(y4*h->mb.pic.i_stride[0]+x4)];
m->i_stride= h->mb.pic.i_stride[0];
+ LOAD_HPELS( m->p_fref, p_fref, 4*(y4*m->i_stride + x4) );
x264_mb_predict_mv( h, 0, idx, 1, m->mvp );
x264_me_search( h, m, &a->l0.me4x4[i8x8][0].mv, i_mvc );
for( i_ref = 0; i_ref < h->i_ref0; i_ref++ )
{
/* search with ref */
- m.p_fref = h->mb.pic.p_fref[0][i_ref][0];
+ LOAD_HPELS( m.p_fref, h->mb.pic.p_fref[0][i_ref], 0 );
x264_mb_predict_mv_16x16( h, 0, i_ref, m.mvp );
x264_mb_predict_mv_ref16x16( h, 0, i_ref, mvc, &i_mvc );
x264_me_search_ref( h, &m, mvc, i_mvc, p_fullpel_thresh );
for( i_ref = 0; i_ref < h->i_ref1; i_ref++ )
{
/* search with ref */
- m.p_fref = h->mb.pic.p_fref[1][i_ref][0];
+ LOAD_HPELS( m.p_fref, h->mb.pic.p_fref[1][i_ref], 0 );
x264_mb_predict_mv_16x16( h, 1, i_ref, m.mvp );
x264_mb_predict_mv_ref16x16( h, 1, i_ref, mvc, &i_mvc );
x264_me_search( h, &m, mvc, i_mvc );
x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, a->l1.i_ref );
/* get cost of BI mode */
- h->mc[MC_LUMA]( h->mb.pic.p_fref[0][a->l0.i_ref][0], h->mb.pic.i_stride[0],
+ h->mc.mc_luma( h->mb.pic.p_fref[0][a->l0.i_ref], h->mb.pic.i_stride[0],
pix1, 16,
a->l0.me16x16.mv[0], a->l0.me16x16.mv[1],
16, 16 );
- h->mc[MC_LUMA]( h->mb.pic.p_fref[1][a->l1.i_ref][0], h->mb.pic.i_stride[0],
+ h->mc.mc_luma( h->mb.pic.p_fref[1][a->l1.i_ref], h->mb.pic.i_stride[0],
pix2, 16,
a->l1.me16x16.mv[0], a->l1.me16x16.mv[1],
16, 16 );
static void x264_mb_analyse_inter_b8x8( x264_t *h, x264_mb_analysis_t *a )
{
- uint8_t *p_fref[2] = { h->mb.pic.p_fref[0][a->l0.i_ref][0],
- h->mb.pic.p_fref[1][a->l1.i_ref][0] };
+ uint8_t *p_fref[2][4] =
+ { {
+ h->mb.pic.p_fref[0][a->l0.i_ref][0],
+ h->mb.pic.p_fref[0][a->l0.i_ref][1],
+ h->mb.pic.p_fref[0][a->l0.i_ref][2],
+ h->mb.pic.p_fref[0][a->l0.i_ref][3]
+ }, {
+ h->mb.pic.p_fref[1][a->l1.i_ref][0],
+ h->mb.pic.p_fref[1][a->l1.i_ref][1],
+ h->mb.pic.p_fref[1][a->l1.i_ref][2],
+ h->mb.pic.p_fref[1][a->l1.i_ref][3]
+ } };
uint8_t *p_fenc = h->mb.pic.p_fenc[0];
uint8_t pix[2][8*8];
int i, l;
m->lm = a->i_lambda;
m->p_fenc = p_fenc_i;
- m->p_fref = &p_fref[l][8*(y8*h->mb.pic.i_stride[0]+x8)];
m->i_stride = h->mb.pic.i_stride[0];
+ LOAD_HPELS( m->p_fref, p_fref[l], 8*(y8*m->i_stride + x8) );
x264_mb_predict_mv( h, l, 4*i, 2, m->mvp );
x264_me_search( h, m, &lX->me16x16.mv, 1 );
x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, l, m->mv[0], m->mv[1] );
/* BI mode */
- h->mc[MC_LUMA]( m->p_fref, m->i_stride, pix[l], 8,
+ h->mc.mc_luma( m->p_fref, m->i_stride, pix[l], 8,
m->mv[0], m->mv[1], 8, 8 );
i_part_cost_bi += a->i_lambda * ( bs_size_se( m->mv[0] - m->mvp[0] ) +
bs_size_se( m->mv[1] - m->mvp[1] ) );
static void x264_mb_analyse_inter_b16x8( x264_t *h, x264_mb_analysis_t *a )
{
- uint8_t *p_fref[2] = { h->mb.pic.p_fref[0][a->l0.i_ref][0],
- h->mb.pic.p_fref[1][a->l1.i_ref][0] };
+ uint8_t *p_fref[2][4] =
+ { {
+ h->mb.pic.p_fref[0][a->l0.i_ref][0],
+ h->mb.pic.p_fref[0][a->l0.i_ref][1],
+ h->mb.pic.p_fref[0][a->l0.i_ref][2],
+ h->mb.pic.p_fref[0][a->l0.i_ref][3]
+ }, {
+ h->mb.pic.p_fref[1][a->l1.i_ref][0],
+ h->mb.pic.p_fref[1][a->l1.i_ref][1],
+ h->mb.pic.p_fref[1][a->l1.i_ref][2],
+ h->mb.pic.p_fref[1][a->l1.i_ref][3]
+ } };
uint8_t *p_fenc = h->mb.pic.p_fenc[0];
uint8_t pix[2][8*8];
int i_ref_stride = h->mb.pic.i_stride[0];
m->p_fenc = p_fenc_i;
m->i_stride= i_ref_stride;
- m->p_fref = &p_fref[l][8*i*i_ref_stride];
+ LOAD_HPELS( m->p_fref, p_fref[l], 8*i*i_ref_stride );
mvc[0][0] = lX->me8x8[2*i].mv[0];
mvc[0][1] = lX->me8x8[2*i].mv[1];
x264_me_search( h, m, mvc, 2 );
/* BI mode */
- h->mc[MC_LUMA]( m->p_fref, m->i_stride, pix[l], 8,
+ h->mc.mc_luma( m->p_fref, m->i_stride, pix[l], 8,
m->mv[0], m->mv[1], 8, 8 );
/* FIXME: ref cost */
i_part_cost_bi += a->i_lambda * ( bs_size_se( m->mv[0] - m->mvp[0] ) +
}
static void x264_mb_analyse_inter_b8x16( x264_t *h, x264_mb_analysis_t *a )
{
- uint8_t *p_fref[2] = { h->mb.pic.p_fref[0][a->l0.i_ref][0],
- h->mb.pic.p_fref[1][a->l1.i_ref][0] };
+ uint8_t *p_fref[2][4] =
+ { {
+ h->mb.pic.p_fref[0][a->l0.i_ref][0],
+ h->mb.pic.p_fref[0][a->l0.i_ref][1],
+ h->mb.pic.p_fref[0][a->l0.i_ref][2],
+ h->mb.pic.p_fref[0][a->l0.i_ref][3]
+ }, {
+ h->mb.pic.p_fref[1][a->l1.i_ref][0],
+ h->mb.pic.p_fref[1][a->l1.i_ref][1],
+ h->mb.pic.p_fref[1][a->l1.i_ref][2],
+ h->mb.pic.p_fref[1][a->l1.i_ref][3]
+ } };
uint8_t *p_fenc = h->mb.pic.p_fenc[0];
uint8_t pix[2][8*8];
int i_ref_stride = h->mb.pic.i_stride[0];
m->lm = a->i_lambda;
m->p_fenc = p_fenc_i;
- m->p_fref = &p_fref[l][8*i];
m->i_stride= i_ref_stride;
+ LOAD_HPELS( m->p_fref, p_fref[l], 8*i );
mvc[0][0] = lX->me8x8[i].mv[0];
mvc[0][1] = lX->me8x8[i].mv[1];
x264_me_search( h, m, mvc, 2 );
/* BI mode */
- h->mc[MC_LUMA]( m->p_fref, m->i_stride, pix[l], 8,
+ h->mc.mc_luma( m->p_fref, m->i_stride, pix[l], 8,
m->mv[0], m->mv[1], 8, 8 );
/* FIXME: ref cost */
i_part_cost_bi += a->i_lambda * ( bs_size_se( m->mv[0] - m->mvp[0] ) +
x264_pixel_init( h->param.cpu, &h->pixf );
x264_dct_init( h->param.cpu, &h->dctf );
- x264_mc_init( h->param.cpu, h->mc );
+ x264_mc_init( h->param.cpu, &h->mc );
x264_csp_init( h->param.cpu, h->param.i_csp, &h->csp );
/* rate control */
/* expand border */
x264_frame_expand_border( h->fdec );
+ /* create filtered images */
+ x264_frame_filter( h->param.cpu, h->fdec );
+
+ /* expand border of filtered images */
+ x264_frame_expand_border_filtered( h->fdec );
+
/* move frame in the buffer */
h->fdec = h->frames.reference[h->param.i_frame_reference+1];
for( i = h->param.i_frame_reference+1; i > 0; i-- )
h->mb.mv_min[1], h->mb.mv_max[1] );
/* Motion compensation XXX probably unneeded */
- h->mc[MC_LUMA]( h->mb.pic.p_fref[0][0][0], h->mb.pic.i_stride[0],
+ h->mc.mc_luma( h->mb.pic.p_fref[0][0], h->mb.pic.i_stride[0],
h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0],
mvx, mvy, 16, 16 );
/* Chroma MC */
- h->mc[MC_CHROMA]( h->mb.pic.p_fref[0][0][1], h->mb.pic.i_stride[1],
+ h->mc.mc_chroma( h->mb.pic.p_fref[0][0][4], h->mb.pic.i_stride[1],
h->mb.pic.p_fdec[1], h->mb.pic.i_stride[1],
mvx, mvy, 8, 8 );
- h->mc[MC_CHROMA]( h->mb.pic.p_fref[0][0][2], h->mb.pic.i_stride[2],
+ h->mc.mc_chroma( h->mb.pic.p_fref[0][0][5], h->mb.pic.i_stride[2],
h->mb.pic.p_fdec[2], h->mb.pic.i_stride[2],
mvx, mvy, 8, 8 );
mvp[1] = x264_clip3( mvp[1], h->mb.mv_min[1], h->mb.mv_max[1] );
/* Motion compensation */
- h->mc[MC_LUMA]( h->mb.pic.p_fref[0][0][0], h->mb.pic.i_stride[0],
- h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0],
+ h->mc.mc_luma( h->mb.pic.p_fref[0][0], h->mb.pic.i_stride[0],
+ h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0],
mvp[0], mvp[1], 16, 16 );
}
if( !b_bidir )
{
- h->mc[MC_CHROMA]( h->mb.pic.p_fref[0][0][1+ch], i_stride,
+ h->mc.mc_chroma( h->mb.pic.p_fref[0][0][4+ch], i_stride,
h->mb.pic.p_fdec[1+ch], i_stride,
mvp[0], mvp[1], 8, 8 );
}
const int i_pixel = m->i_pixel;
int bmx, bmy, bcost;
int omx, omy;
- uint8_t *p_fref = m->p_fref;
+ uint8_t *p_fref = m->p_fref[0];
int i_iter;
const int mv_x_min = h->mb.mv_min_fpel[0];
{
for( i = step>1 ? hpel_iters : qpel_iters; i > 0; i-- )
{
- h->mc[MC_LUMA]( m->p_fref, m->i_stride, pix[0], 16, bmx + 0, bmy - step, bw, bh );
- h->mc[MC_LUMA]( m->p_fref, m->i_stride, pix[1], 16, bmx + 0, bmy + step, bw, bh );
- h->mc[MC_LUMA]( m->p_fref, m->i_stride, pix[2], 16, bmx - step, bmy + 0, bw, bh );
- h->mc[MC_LUMA]( m->p_fref, m->i_stride, pix[3], 16, bmx + step, bmy + 0, bw, bh );
+ h->mc.mc_luma( m->p_fref, m->i_stride, pix[0], 16, bmx + 0, bmy - step, bw, bh );
+ h->mc.mc_luma( m->p_fref, m->i_stride, pix[1], 16, bmx + 0, bmy + step, bw, bh );
+ h->mc.mc_luma( m->p_fref, m->i_stride, pix[2], 16, bmx - step, bmy + 0, bw, bh );
+ h->mc.mc_luma( m->p_fref, m->i_stride, pix[3], 16, bmx + step, bmy + 0, bw, bh );
cost[0] = h->pixf.satd[m->i_pixel]( m->p_fenc, m->i_stride, pix[0], 16 ) +
m->lm * ( bs_size_se( bmx + 0 - m->mvp[0] ) + bs_size_se( bmy - step - m->mvp[1] ) );
int i_pixel; /* PIXEL_WxH */
int lm; /* lambda motion */
- uint8_t *p_fref;
+ uint8_t *p_fref[4];
uint8_t *p_fenc;
int i_stride;
#ifdef SYS_FREEBSD
#define exp2f(x) powf( 2, (x) )
#endif
-
+#ifdef _MSC_VER
+#define exp2f(x) pow( 2, (x) )
+#endif
typedef struct
{