From 75b3871f90713a290be183e1436e792cef51f335 Mon Sep 17 00:00:00 2001 From: Fiona Glaser Date: Sat, 5 Dec 2009 02:27:30 -0800 Subject: [PATCH] Actually do r1356 Somehow commit r1356 got lost in the ether. I'm not sure how, but now it's fixed. --- encoder/me.c | 59 ++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 41 insertions(+), 18 deletions(-) diff --git a/encoder/me.c b/encoder/me.c index a09db50b..6b18e8a6 100644 --- a/encoder/me.c +++ b/encoder/me.c @@ -1025,9 +1025,8 @@ void x264_me_refine_bidir_rd( x264_t *h, x264_me_t *m0, x264_me_t *m1, int i_wei { \ if( !avoid_mvp || !(mx == pmx && my == pmy) ) \ { \ - int stride = 16; \ - uint8_t *src = h->mc.get_ref( pix, &stride, m->p_fref, m->i_stride[0], mx, my, bw*4, bh*4, &m->weight[0] ); \ - dst = h->pixf.mbcmp[i_pixel]( m->p_fenc[0], FENC_STRIDE, src, stride ) \ + h->mc.mc_luma( pix, FDEC_STRIDE, m->p_fref, m->i_stride[0], mx, my, bw, bh, &m->weight[0] ); \ + dst = h->pixf.mbcmp_unaligned[i_pixel]( m->p_fenc[0], FENC_STRIDE, pix, FDEC_STRIDE ) \ + p_cost_mvx[mx] + p_cost_mvy[my]; \ COPY1_IF_LT( bsatd, dst ); \ } \ @@ -1042,6 +1041,11 @@ void x264_me_refine_bidir_rd( x264_t *h, x264_me_t *m0, x264_me_t *m1, int i_wei uint64_t cost; \ M32( cache_mv ) = pack16to32_mask(mx,my); \ M32( cache_mv2 ) = pack16to32_mask(mx,my); \ + if( m->i_pixel <= PIXEL_8x8 )\ + {\ + h->mc.mc_chroma( pixu, FDEC_STRIDE, m->p_fref[4], m->i_stride[1], mx, my + mvy_offset, bw>>1, bh>>1 );\ + h->mc.mc_chroma( pixv, FDEC_STRIDE, m->p_fref[5], m->i_stride[1], mx, my + mvy_offset, bw>>1, bh>>1 );\ + }\ cost = x264_rd_cost_part( h, i_lambda2, i4, m->i_pixel ); \ COPY4_IF_LT( bcost, cost, bmx, mx, bmy, my, dir, do_dir?mdir:dir ); \ } \ @@ -1054,22 +1058,28 @@ void x264_me_refine_qpel_rd( x264_t *h, x264_me_t *m, int i_lambda2, int i4, int int16_t *cache_mv = h->mb.cache.mv[i_list][x264_scan8[i4]]; int16_t *cache_mv2 = cache_mv + pixel_mv_offs[m->i_pixel]; const uint16_t *p_cost_mvx, *p_cost_mvy; - const int bw = x264_pixel_size[m->i_pixel].w>>2; - const int bh = x264_pixel_size[m->i_pixel].h>>2; + const int bw = x264_pixel_size[m->i_pixel].w; + const int bh = x264_pixel_size[m->i_pixel].h; const int i_pixel = m->i_pixel; + const int mvy_offset = h->mb.b_interlaced & m->i_ref ? (h->mb.i_mb_y & 1)*4 - 2 : 0; - ALIGNED_ARRAY_16( uint8_t, pix,[16*16] ); uint64_t bcost = COST_MAX64; int bmx = m->mv[0]; int bmy = m->mv[1]; int omx, omy, pmx, pmy, i, j; unsigned bsatd; - int satd = 0; + int satd; int dir = -2; - int satds[8]; + int i8 = i4>>2; + + uint8_t *pix = &h->mb.pic.p_fdec[0][block_idx_xy_fdec[i4]]; + uint8_t *pixu = &h->mb.pic.p_fdec[1][(i8>>1)*4*FDEC_STRIDE+(i8&1)*4]; + uint8_t *pixv = &h->mb.pic.p_fdec[2][(i8>>1)*4*FDEC_STRIDE+(i8&1)*4]; + + h->mb.b_skip_mc = 1; if( m->i_pixel != PIXEL_16x16 && i4 != 0 ) - x264_mb_predict_mv( h, i_list, i4, bw, m->mvp ); + x264_mb_predict_mv( h, i_list, i4, bw>>2, m->mvp ); pmx = m->mvp[0]; pmy = m->mvp[1]; p_cost_mvx = m->p_cost_mv - pmx; @@ -1086,7 +1096,7 @@ void x264_me_refine_qpel_rd( x264_t *h, x264_me_t *m, int i_lambda2, int i4, int && pmy >= h->mb.mv_min_spel[1] && pmy <= h->mb.mv_max_spel[1] ) { COST_MV_SATD( pmx, pmy, satd, 0 ); - COST_MV_RD( pmx, pmy, satd, 0,0 ); + COST_MV_RD ( pmx, pmy, satd, 0, 0 ); /* The hex motion search is guaranteed to not repeat the center candidate, * so if pmv is chosen, set the "MV to avoid checking" to bmv instead. */ if( bmx == pmx && bmy == pmy ) @@ -1098,14 +1108,20 @@ void x264_me_refine_qpel_rd( x264_t *h, x264_me_t *m, int i_lambda2, int i4, int if( bmy < h->mb.mv_min_spel[1] + 3 || bmy > h->mb.mv_max_spel[1] - 3 ) + { + h->mb.b_skip_mc = 0; return; + } /* subpel hex search, same pattern as ME HEX. */ dir = -2; omx = bmx; omy = bmy; - for( j=0; j<6; j++ ) COST_MV_SATD( omx + hex2[j+1][0], omy + hex2[j+1][1], satds[j], 1 ); - for( j=0; j<6; j++ ) COST_MV_RD ( omx + hex2[j+1][0], omy + hex2[j+1][1], satds[j], 1,j ); + for( j=0; j<6; j++ ) + { + COST_MV_SATD( omx + hex2[j+1][0], omy + hex2[j+1][1], satd, 1 ); + COST_MV_RD ( omx + hex2[j+1][0], omy + hex2[j+1][1], satd, 1, j ); + } if( dir != -2 ) { @@ -1119,8 +1135,11 @@ void x264_me_refine_qpel_rd( x264_t *h, x264_me_t *m, int i_lambda2, int i4, int dir = -2; omx = bmx; omy = bmy; - for( j=0; j<3; j++ ) COST_MV_SATD( omx + hex2[odir+j][0], omy + hex2[odir+j][1], satds[j], 1 ); - for( j=0; j<3; j++ ) COST_MV_RD ( omx + hex2[odir+j][0], omy + hex2[odir+j][1], satds[j], 1, odir-1+j ); + for( j=0; j<3; j++ ) + { + COST_MV_SATD( omx + hex2[odir+j][0], omy + hex2[odir+j][1], satd, 1 ); + COST_MV_RD ( omx + hex2[odir+j][0], omy + hex2[odir+j][1], satd, 1, odir-1+j ); + } if( dir == -2 ) break; } @@ -1129,12 +1148,16 @@ void x264_me_refine_qpel_rd( x264_t *h, x264_me_t *m, int i_lambda2, int i4, int /* square refine, same pattern as ME HEX. */ omx = bmx; omy = bmy; - for( i=0; i<8; i++ ) COST_MV_SATD( omx + square1[i+1][0], omy + square1[i+1][1], satds[i], 1 ); - for( i=0; i<8; i++ ) COST_MV_RD ( omx + square1[i+1][0], omy + square1[i+1][1], satds[i], 0,0 ); + for( i=0; i<8; i++ ) + { + COST_MV_SATD( omx + square1[i+1][0], omy + square1[i+1][1], satd, 1 ); + COST_MV_RD ( omx + square1[i+1][0], omy + square1[i+1][1], satd, 0, 0 ); + } m->cost = bcost; m->mv[0] = bmx; m->mv[1] = bmy; - x264_macroblock_cache_mv ( h, block_idx_x[i4], block_idx_y[i4], bw, bh, i_list, pack16to32_mask(bmx, bmy) ); - x264_macroblock_cache_mvd( h, block_idx_x[i4], block_idx_y[i4], bw, bh, i_list, pack16to32_mask(bmx - m->mvp[0], bmy - m->mvp[1]) ); + x264_macroblock_cache_mv ( h, block_idx_x[i4], block_idx_y[i4], bw>>2, bh>>2, i_list, pack16to32_mask(bmx, bmy) ); + x264_macroblock_cache_mvd( h, block_idx_x[i4], block_idx_y[i4], bw>>2, bh>>2, i_list, pack16to32_mask(bmx - m->mvp[0], bmy - m->mvp[1]) ); + h->mb.b_skip_mc = 0; } -- 2.40.0