From 4aa33d658263abb40bf91438b5ec1eb93d86621f Mon Sep 17 00:00:00 2001 From: Fiona Glaser Date: Tue, 1 Dec 2009 16:15:15 -0800 Subject: [PATCH] Significantly faster qpel-RD Cache the results of MC, like in bidir-RD. Slightly changes output due to the necessary reordering of satd/RD calls. 5-10% faster qpel-RD. --- encoder/macroblock.c | 6 +----- encoder/me.c | 2 +- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/encoder/macroblock.c b/encoder/macroblock.c index 0b244a7c..a709d26c 100644 --- a/encoder/macroblock.c +++ b/encoder/macroblock.c @@ -1181,13 +1181,9 @@ void x264_macroblock_encode_p4x4( x264_t *h, int i4 ) int i_qp = h->mb.i_qp; uint8_t *p_fenc = &h->mb.pic.p_fenc[0][block_idx_xy_fenc[i4]]; uint8_t *p_fdec = &h->mb.pic.p_fdec[0][block_idx_xy_fdec[i4]]; - const int i_ref = h->mb.cache.ref[0][x264_scan8[i4]]; - const int mvx = x264_clip3( h->mb.cache.mv[0][x264_scan8[i4]][0], h->mb.mv_min[0], h->mb.mv_max[0] ); - const int mvy = x264_clip3( h->mb.cache.mv[0][x264_scan8[i4]][1], h->mb.mv_min[1], h->mb.mv_max[1] ); int nz; - h->mc.mc_luma( p_fdec, FDEC_STRIDE, h->mb.pic.p_fref[0][i_ref], h->mb.pic.i_stride[0], - mvx + 4*4*block_idx_x[i4], mvy + 4*4*block_idx_y[i4], 4, 4, &h->sh.weight[i_ref][0] ); + /* Don't need motion compensation as this function is only used in qpel-RD, which caches pixel data. */ if( h->mb.b_lossless ) { diff --git a/encoder/me.c b/encoder/me.c index 999cd4fc..a09db50b 100644 --- a/encoder/me.c +++ b/encoder/me.c @@ -1027,7 +1027,7 @@ void x264_me_refine_bidir_rd( x264_t *h, x264_me_t *m0, x264_me_t *m1, int i_wei { \ int stride = 16; \ uint8_t *src = h->mc.get_ref( pix, &stride, m->p_fref, m->i_stride[0], mx, my, bw*4, bh*4, &m->weight[0] ); \ - dst = h->pixf.mbcmp_unaligned[i_pixel]( m->p_fenc[0], FENC_STRIDE, src, stride ) \ + dst = h->pixf.mbcmp[i_pixel]( m->p_fenc[0], FENC_STRIDE, src, stride ) \ + p_cost_mvx[mx] + p_cost_mvy[my]; \ COPY1_IF_LT( bsatd, dst ); \ } \ -- 2.40.0