#include <stdio.h>
#include <string.h>
#include <math.h>
+#include <limits.h>
#include "../core/common.h"
#include "../core/macroblock.h"
// m.mvc[0] = 0;
// m.mvc[1] = 0;
- /* ME for ref 0 */
- m.p_fref = h->mb.pic.p_fref[0][0][0];
- x264_mb_predict_mv_16x16( h, 0, 0, m.mvp );
- x264_me_search( h, &m );
-
- a->l0.i_ref = 0;
- a->l0.me16x16 = m;
-
- for( i_ref = 1; i_ref < h->i_ref0; i_ref++ )
+ a->l0.me16x16.cost = INT_MAX;
+ for( i_ref = 0; i_ref < h->i_ref0; i_ref++ )
{
/* search with ref */
m.p_fref = h->mb.pic.p_fref[0][i_ref][0];
}
}
+ /* subtract ref cost, so we don't have to add it for the other P types */
+ a->l0.me16x16.cost -= m.lm * bs_size_te( h->sh.i_num_ref_idx_l0_active - 1, a->l0.i_ref );
+
/* Set global ref, needed for all others modes */
x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, a->l0.i_ref );
}
m.b_mvc = 0;
m.i_mv_range = a->i_mv_range;
- /* ME for List 0 ref 0 */
- m.p_fref = h->mb.pic.p_fref[0][0][0];
- x264_mb_predict_mv_16x16( h, 0, 0, m.mvp );
- x264_me_search( h, &m );
-
- a->l0.i_ref = 0;
- a->l0.me16x16 = m;
-
- for( i_ref = 1; i_ref < h->i_ref0; i_ref++ )
+ /* ME for List 0 */
+ a->l0.me16x16.cost = INT_MAX;
+ for( i_ref = 0; i_ref < h->i_ref0; i_ref++ )
{
/* search with ref */
m.p_fref = h->mb.pic.p_fref[0][i_ref][0];
}
}
- /* ME for list 1 ref 0 */
- m.p_fref = h->mb.pic.p_fref[1][0][0];
- x264_mb_predict_mv_16x16( h, 1, 0, m.mvp );
- x264_me_search( h, &m );
-
- a->l1.i_ref = 0;
- a->l1.me16x16 = m;
-
- for( i_ref = 1; i_ref < h->i_ref1; i_ref++ )
+ /* ME for list 1 */
+ a->l1.me16x16.cost = INT_MAX;
+ for( i_ref = 0; i_ref < h->i_ref1; i_ref++ )
{
/* search with ref */
m.p_fref = h->mb.pic.p_fref[1][i_ref][0];
#include "../core/common.h"
#include "me.h"
+/* presets selected from good points on the speed-vs-quality curve of several test videos
+ * subpel_iters[i_subpel_refine] = { refine_hpel, refine_qpel, me_hpel, me_qpel }
+ * where me_* are the number of EPZS iterations run on all candidate block types,
+ * and refine_* are run only on the winner. */
+const static int subpel_iterations[][4] =
+ {{1,0,0,0},
+ {1,1,0,0},
+ {1,2,0,0},
+ {0,2,1,0},
+ {0,2,1,1},
+ {0,2,1,2}};
+
+static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_iters );
+
void x264_me_search( x264_t *h, x264_me_t *m )
{
const int i_pixel = m->i_pixel;
int bmx, bmy;
uint8_t *p_fref = m->p_fref;
int i_iter;
+ int hpel, qpel;
/* init with mvp */
m->cost = h->pixf.satd[i_pixel]( m->p_fenc, m->i_stride, p_fref, m->i_stride ) +
m->lm * ( bs_size_se( m->mv[0] - m->mvp[0] ) +
bs_size_se( m->mv[1] - m->mvp[1] ) );
+
+ hpel = subpel_iterations[h->param.analyse.i_subpel_refine][2];
+ qpel = subpel_iterations[h->param.analyse.i_subpel_refine][3];
+ if( hpel || qpel )
+ refine_subpel( h, m, hpel, qpel );
}
void x264_me_refine_qpel( x264_t *h, x264_me_t *m )
+{
+ int hpel = subpel_iterations[h->param.analyse.i_subpel_refine][0];
+ int qpel = subpel_iterations[h->param.analyse.i_subpel_refine][1];
+ if( hpel || qpel )
+ refine_subpel( h, m, hpel, qpel );
+}
+
+static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_iters )
{
const int bw = x264_pixel_size[m->i_pixel].w;
const int bh = x264_pixel_size[m->i_pixel].h;
DECLARE_ALIGNED( uint8_t, pix[4][16*16], 16 );
int cost[4];
int best;
+ int step, i;
int bmx = m->mv[0];
int bmy = m->mv[1];
- h->mc[MC_LUMA]( m->p_fref, m->i_stride, pix[0], 16, bmx + 0, bmy - 2, bw, bh );
- h->mc[MC_LUMA]( m->p_fref, m->i_stride, pix[1], 16, bmx + 0, bmy + 2, bw, bh );
- h->mc[MC_LUMA]( m->p_fref, m->i_stride, pix[2], 16, bmx - 2, bmy + 0, bw, bh );
- h->mc[MC_LUMA]( m->p_fref, m->i_stride, pix[3], 16, bmx + 2, bmy + 0, bw, bh );
-
- cost[0] = h->pixf.satd[m->i_pixel]( m->p_fenc, m->i_stride, pix[0], 16 ) +
- m->lm * ( bs_size_se( bmx + 0 - m->mvp[0] ) + bs_size_se( bmy - 2 - m->mvp[1] ) );
- cost[1] = h->pixf.satd[m->i_pixel]( m->p_fenc, m->i_stride, pix[1], 16 ) +
- m->lm * ( bs_size_se( bmx + 0 - m->mvp[0] ) + bs_size_se( bmy + 2 - m->mvp[1] ) );
- cost[2] = h->pixf.satd[m->i_pixel]( m->p_fenc, m->i_stride, pix[2], 16 ) +
- m->lm * ( bs_size_se( bmx - 2 - m->mvp[0] ) + bs_size_se( bmy + 0 - m->mvp[1] ) );
- cost[3] = h->pixf.satd[m->i_pixel]( m->p_fenc, m->i_stride, pix[3], 16 ) +
- m->lm * ( bs_size_se( bmx + 2 - m->mvp[0] ) + bs_size_se( bmy + 0 - m->mvp[1] ) );
-
- best = 0;
- if( cost[1] < cost[0] ) best = 1;
- if( cost[2] < cost[best] ) best = 2;
- if( cost[3] < cost[best] ) best = 3;
-
- if( cost[best] < m->cost )
+ for( step = 2; step >= 1; step-- )
{
- m->cost = cost[best];
- if( best == 0 ) bmy -= 2;
- else if( best == 1 ) bmy += 2;
- else if( best == 2 ) bmx -= 2;
- else if( best == 3 ) bmx += 2;
- }
-
- h->mc[MC_LUMA]( m->p_fref, m->i_stride, pix[0], 16, bmx + 0, bmy - 1, bw, bh );
- h->mc[MC_LUMA]( m->p_fref, m->i_stride, pix[1], 16, bmx + 0, bmy + 1, bw, bh );
- h->mc[MC_LUMA]( m->p_fref, m->i_stride, pix[2], 16, bmx - 1, bmy + 0, bw, bh );
- h->mc[MC_LUMA]( m->p_fref, m->i_stride, pix[3], 16, bmx + 1, bmy + 0, bw, bh );
-
- cost[0] = h->pixf.satd[m->i_pixel]( m->p_fenc, m->i_stride, pix[0], 16 ) +
- m->lm * ( bs_size_se( bmx + 0 - m->mvp[0] ) + bs_size_se( bmy - 1 - m->mvp[1] ) );
- cost[1] = h->pixf.satd[m->i_pixel]( m->p_fenc, m->i_stride, pix[1], 16 ) +
- m->lm * ( bs_size_se( bmx + 0 - m->mvp[0] ) + bs_size_se( bmy + 1 - m->mvp[1] ) );
- cost[2] = h->pixf.satd[m->i_pixel]( m->p_fenc, m->i_stride, pix[2], 16 ) +
- m->lm * ( bs_size_se( bmx - 1 - m->mvp[0] ) + bs_size_se( bmy + 0 - m->mvp[1] ) );
- cost[3] = h->pixf.satd[m->i_pixel]( m->p_fenc, m->i_stride, pix[3], 16 ) +
- m->lm * ( bs_size_se( bmx + 1 - m->mvp[0] ) + bs_size_se( bmy + 0 - m->mvp[1] ) );
-
- best = 0;
- if( cost[1] < cost[0] ) best = 1;
- if( cost[2] < cost[best] ) best = 2;
- if( cost[3] < cost[best] ) best = 3;
-
- if( cost[best] < m->cost )
- {
- m->cost = cost[best];
- if( best == 0 ) bmy--;
- else if( best == 1 ) bmy++;
- else if( best == 2 ) bmx--;
- else if( best == 3 ) bmx++;
+ for( i = step>1 ? hpel_iters : qpel_iters; i > 0; i-- )
+ {
+ h->mc[MC_LUMA]( m->p_fref, m->i_stride, pix[0], 16, bmx + 0, bmy - step, bw, bh );
+ h->mc[MC_LUMA]( m->p_fref, m->i_stride, pix[1], 16, bmx + 0, bmy + step, bw, bh );
+ h->mc[MC_LUMA]( m->p_fref, m->i_stride, pix[2], 16, bmx - step, bmy + 0, bw, bh );
+ h->mc[MC_LUMA]( m->p_fref, m->i_stride, pix[3], 16, bmx + step, bmy + 0, bw, bh );
+
+ cost[0] = h->pixf.satd[m->i_pixel]( m->p_fenc, m->i_stride, pix[0], 16 ) +
+ m->lm * ( bs_size_se( bmx + 0 - m->mvp[0] ) + bs_size_se( bmy - step - m->mvp[1] ) );
+ cost[1] = h->pixf.satd[m->i_pixel]( m->p_fenc, m->i_stride, pix[1], 16 ) +
+ m->lm * ( bs_size_se( bmx + 0 - m->mvp[0] ) + bs_size_se( bmy + step - m->mvp[1] ) );
+ cost[2] = h->pixf.satd[m->i_pixel]( m->p_fenc, m->i_stride, pix[2], 16 ) +
+ m->lm * ( bs_size_se( bmx - step - m->mvp[0] ) + bs_size_se( bmy + 0 - m->mvp[1] ) );
+ cost[3] = h->pixf.satd[m->i_pixel]( m->p_fenc, m->i_stride, pix[3], 16 ) +
+ m->lm * ( bs_size_se( bmx + step - m->mvp[0] ) + bs_size_se( bmy + 0 - m->mvp[1] ) );
+
+ best = 0;
+ if( cost[1] < cost[0] ) best = 1;
+ if( cost[2] < cost[best] ) best = 2;
+ if( cost[3] < cost[best] ) best = 3;
+
+ if( cost[best] < m->cost )
+ {
+ m->cost = cost[best];
+ if( best == 0 ) bmy -= step;
+ else if( best == 1 ) bmy += step;
+ else if( best == 2 ) bmx -= step;
+ else if( best == 3 ) bmx += step;
+ }
+ else break;
+ }
}
m->mv[0] = bmx;
m->mv[1] = bmy;
}
+
" - i4x4\n"
" - psub16x16,psub8x8\n"
" - none, all\n"
+ " --subme <integer> Subpixel motion estimation quality\n"
"\n"
" -s, --sar width:height Specify Sample Aspect Ratio\n"
" -o, --output Specify output file\n"
#define OPT_QCOMP 266
#define OPT_NOPSNR 267
#define OPT_QUIET 268
+#define OPT_SUBME 269
static struct option long_options[] =
{
{ "sar", required_argument, NULL, 's' },
{ "output", required_argument, NULL, 'o' },
{ "analyse", required_argument, NULL, 'A' },
+ { "subme", required_argument, NULL, OPT_SUBME },
{ "rcsens", required_argument, NULL, OPT_RCSENS },
{ "rcbuf", required_argument, NULL, OPT_RCBUF },
{ "rcinitbuf",required_argument, NULL, OPT_RCIBUF },
if( strstr( optarg, "psub16x16" ) ) param->analyse.inter |= X264_ANALYSE_PSUB16x16;
if( strstr( optarg, "psub8x8" ) ) param->analyse.inter |= X264_ANALYSE_PSUB8x8;
break;
+ case OPT_SUBME:
+ param->analyse.i_subpel_refine = atoi(optarg);
+ break;
case OPT_RCBUF:
param->rc.i_rc_buffer_size = atoi(optarg);
break;