From: Jingning Han Date: Mon, 20 May 2013 23:04:28 +0000 (-0700) Subject: Make the intra rd search support 8x4/4x8 X-Git-Tag: v1.3.0~1104^2~151^2 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=f153a5d06302eec5e2ada78e4576ca29f804c999;p=libvpx Make the intra rd search support 8x4/4x8 This commit allows the rate-distortion optimization of intra coding capable of supporting 8x4 and 4x8 partition settings. It enables the entropy coding of intra modes in key frame using a unified contextual probability model conditioned on its above/left prediction modes. Coding performance: derf 0.464% Change-Id: Ieed055084e11fcb64d5d5faeb0e706d30268ba18 --- diff --git a/vp9/common/vp9_entropymode.c b/vp9/common/vp9_entropymode.c index a44fe4ca5..3e3a94e51 100644 --- a/vp9/common/vp9_entropymode.c +++ b/vp9/common/vp9_entropymode.c @@ -107,10 +107,10 @@ const vp9_prob vp9_partition_probs[NUM_PARTITION_CONTEXTS] [PARTITION_TYPES - 1] = { // FIXME(jingning,rbultje) put real probabilities here #if CONFIG_AB4X4 - {105, 88, 252}, - {113, 88, 249}, - {113, 106, 251}, - {126, 105, 107}, + {202, 162, 107}, + {16, 2, 169}, + {3, 246, 19}, + {104, 90, 134}, #endif {202, 162, 107}, {16, 2, 169}, diff --git a/vp9/common/vp9_findnearmv.h b/vp9/common/vp9_findnearmv.h index 0a1c4133b..4c46aa712 100644 --- a/vp9/common/vp9_findnearmv.h +++ b/vp9/common/vp9_findnearmv.h @@ -153,7 +153,7 @@ static B_PREDICTION_MODE left_block_mode(const MODE_INFO *cur_mb, int b) { } else if (cur_mb->mbmi.mode == I4X4_PRED) { return ((cur_mb->bmi + 1 + b)->as_mode.first); } else { - return B_DC_PRED; + return DC_PRED; } } assert(b == 1 || b == 3); @@ -171,7 +171,7 @@ static B_PREDICTION_MODE above_block_mode(const MODE_INFO *cur_mb, } else if (cur_mb->mbmi.mode == I4X4_PRED) { return ((cur_mb->bmi + 2 + b)->as_mode.first); } else { - return B_DC_PRED; + return DC_PRED; } } diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c index ac66fb644..9ed26b00a 100644 --- a/vp9/decoder/vp9_decodemv.c +++ b/vp9/decoder/vp9_decodemv.c @@ -103,6 +103,7 @@ static void kfread_modes(VP9D_COMP *pbi, MODE_INFO *m, vp9_reader *r) { VP9_COMMON *const cm = &pbi->common; MACROBLOCKD *const xd = &pbi->mb; + const int mis = cm->mode_info_stride; m->mbmi.ref_frame = INTRA_FRAME; // Read segmentation map if it is being updated explicitly this frame @@ -119,11 +120,14 @@ static void kfread_modes(VP9D_COMP *pbi, MODE_INFO *m, // luma mode #if CONFIG_AB4X4 - if (m->mbmi.sb_type >= BLOCK_SIZE_SB8X8) - m->mbmi.mode = read_kf_sb_ymode(r, - cm->sb_kf_ymode_prob[cm->kf_ymode_probs_index]); - else + if (m->mbmi.sb_type >= BLOCK_SIZE_SB8X8) { + const MB_PREDICTION_MODE A = above_block_mode(m, 0, mis); + const MB_PREDICTION_MODE L = xd->left_available ? + left_block_mode(m, 0) : DC_PRED; + m->mbmi.mode = read_kf_bmode(r, cm->kf_bmode_prob[A][L]); + } else { m->mbmi.mode = I4X4_PRED; + } #else m->mbmi.mode = m->mbmi.sb_type > BLOCK_SIZE_SB8X8 ? read_kf_sb_ymode(r, cm->sb_kf_ymode_prob[cm->kf_ymode_probs_index]): @@ -140,15 +144,25 @@ static void kfread_modes(VP9D_COMP *pbi, MODE_INFO *m, int idx, idy; int bw = 1 << b_width_log2(m->mbmi.sb_type); int bh = 1 << b_height_log2(m->mbmi.sb_type); - // FIXME(jingning): fix intra4x4 rate-distortion optimization, then - // use bw and bh as the increment values. -#if !CONFIG_AB4X4 || CONFIG_AB4X4 + +#if !CONFIG_AB4X4 bw = 1, bh = 1; #endif - for (idy = 0; idy < 2; idy += bh) - for (idx = 0; idx < 2; idx += bw) - m->bmi[idy * 2 + idx].as_mode.first = - read_kf_sb_ymode(r, cm->sb_kf_ymode_prob[cm->kf_ymode_probs_index]); + for (idy = 0; idy < 2; idy += bh) { + for (idx = 0; idx < 2; idx += bw) { + int ib = idy * 2 + idx; + int k; + const MB_PREDICTION_MODE A = above_block_mode(m, ib, mis); + const MB_PREDICTION_MODE L = (xd->left_available || idx) ? + left_block_mode(m, ib) : DC_PRED; + m->bmi[ib].as_mode.first = + read_kf_bmode(r, cm->kf_bmode_prob[A][L]); + for (k = 1; k < bh; ++k) + m->bmi[ib + k * 2].as_mode.first = m->bmi[ib].as_mode.first; + for (k = 1; k < bw; ++k) + m->bmi[ib + k].as_mode.first = m->bmi[ib].as_mode.first; + } + } } m->mbmi.uv_mode = read_uv_mode(r, cm->kf_uv_mode_prob[m->mbmi.mode]); @@ -858,16 +872,19 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, if (mbmi->mode == I4X4_PRED) { #endif int idx, idy; - // FIXME(jingning): fix intra4x4 rate-distortion optimization, then - // use bw and bh as the increment values. -#if !CONFIG_AB4X4 || CONFIG_AB4X4 +#if !CONFIG_AB4X4 bw = 1, bh = 1; #endif for (idy = 0; idy < 2; idy += bh) { for (idx = 0; idx < 2; idx += bw) { + int ib = idy * 2 + idx, k; int m = read_sb_ymode(r, cm->fc.sb_ymode_prob); - mi->bmi[idy * 2 + idx].as_mode.first = m; + mi->bmi[ib].as_mode.first = m; cm->fc.sb_ymode_counts[m]++; + for (k = 1; k < bh; ++k) + mi->bmi[ib + k * 2].as_mode.first = m; + for (k = 1; k < bw; ++k) + mi->bmi[ib + k].as_mode.first = m; } } } diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c index 88943330f..a09180d16 100644 --- a/vp9/encoder/vp9_bitstream.c +++ b/vp9/encoder/vp9_bitstream.c @@ -740,9 +740,7 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, int idx, idy; int bw = 1 << b_width_log2(mi->sb_type); int bh = 1 << b_height_log2(mi->sb_type); - // FIXME(jingning): fix intra4x4 rate-distortion optimization, then - // use bw and bh as the increment values. -#if !CONFIG_AB4X4 || CONFIG_AB4X4 +#if !CONFIG_AB4X4 bw = 1, bh = 1; #endif for (idy = 0; idy < 2; idy += bh) @@ -892,6 +890,7 @@ static void write_mb_modes_kf(const VP9_COMP *cpi, const VP9_COMMON *const c = &cpi->common; const MACROBLOCKD *const xd = &cpi->mb.e_mbd; const int ym = m->mbmi.mode; + const int mis = c->mode_info_stride; const int segment_id = m->mbmi.segment_id; int skip_coeff; @@ -906,8 +905,12 @@ static void write_mb_modes_kf(const VP9_COMP *cpi, } #if CONFIG_AB4X4 - if (m->mbmi.sb_type >= BLOCK_SIZE_SB8X8) - sb_kfwrite_ymode(bc, ym, c->sb_kf_ymode_prob[c->kf_ymode_probs_index]); + if (m->mbmi.sb_type >= BLOCK_SIZE_SB8X8) { + const B_PREDICTION_MODE A = above_block_mode(m, 0, mis); + const B_PREDICTION_MODE L = xd->left_available ? + left_block_mode(m, 0) : DC_PRED; + write_kf_bmode(bc, ym, c->kf_bmode_prob[A][L]); + } #else if (m->mbmi.sb_type > BLOCK_SIZE_SB8X8) sb_kfwrite_ymode(bc, ym, c->sb_kf_ymode_prob[c->kf_ymode_probs_index]); @@ -923,15 +926,19 @@ static void write_mb_modes_kf(const VP9_COMP *cpi, int idx, idy; int bw = 1 << b_width_log2(m->mbmi.sb_type); int bh = 1 << b_height_log2(m->mbmi.sb_type); - // FIXME(jingning): fix intra4x4 rate-distortion optimization, then - // use bw and bh as the increment values. -#if !CONFIG_AB4X4 || CONFIG_AB4X4 +#if !CONFIG_AB4X4 bw = 1, bh = 1; #endif - for (idy = 0; idy < 2; idy += bh) - for (idx = 0; idx < 2; idx += bw) - sb_kfwrite_ymode(bc, m->bmi[idy * 2 + idx].as_mode.first, - c->sb_kf_ymode_prob[c->kf_ymode_probs_index]); + for (idy = 0; idy < 2; idy += bh) { + for (idx = 0; idx < 2; idx += bw) { + int i = idy * 2 + idx; + const B_PREDICTION_MODE A = above_block_mode(m, i, mis); + const B_PREDICTION_MODE L = (xd->left_available || idx) ? + left_block_mode(m, i) : DC_PRED; + write_kf_bmode(bc, m->bmi[i].as_mode.first, + c->kf_bmode_prob[A][L]); + } + } } write_uv_mode(bc, m->mbmi.uv_mode, c->kf_uv_mode_prob[ym]); diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 6425ed658..8dec7141a 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -1617,9 +1617,7 @@ static void sum_intra_stats(VP9_COMP *cpi, MACROBLOCK *x) { int idx, idy; int bw = 1 << b_width_log2(xd->mode_info_context->mbmi.sb_type); int bh = 1 << b_height_log2(xd->mode_info_context->mbmi.sb_type); - // FIXME(jingning): fix intra4x4 rate-distortion optimization, then - // use bw and bh as the increment values. -#if !CONFIG_AB4X4 || CONFIG_AB4X4 +#if !CONFIG_AB4X4 bw = 1, bh = 1; #endif for (idy = 0; idy < 2; idy += bh) { diff --git a/vp9/encoder/vp9_modecosts.c b/vp9/encoder/vp9_modecosts.c index e26daf0c9..f937b19ae 100644 --- a/vp9/encoder/vp9_modecosts.c +++ b/vp9/encoder/vp9_modecosts.c @@ -33,10 +33,18 @@ void vp9_init_mode_costs(VP9_COMP *c) { x->fc.sub_mv_ref_prob[0], vp9_sub_mv_ref_tree); // TODO(rbultje) separate tables for superblock costing? +#if CONFIG_AB4X4 + vp9_cost_tokens(c->mb.mbmode_cost[1], x->fc.sb_ymode_prob, + vp9_sb_ymode_tree); + vp9_cost_tokens(c->mb.mbmode_cost[0], + x->sb_kf_ymode_prob[c->common.kf_ymode_probs_index], + vp9_sb_ymode_tree); +#else vp9_cost_tokens(c->mb.mbmode_cost[1], x->fc.ymode_prob, vp9_ymode_tree); vp9_cost_tokens(c->mb.mbmode_cost[0], x->kf_ymode_prob[c->common.kf_ymode_probs_index], vp9_kf_ymode_tree); +#endif vp9_cost_tokens(c->mb.intra_uv_mode_cost[1], x->fc.uv_mode_prob[VP9_YMODES - 1], vp9_uv_mode_tree); vp9_cost_tokens(c->mb.intra_uv_mode_cost[0], diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index e1c50e417..04e87927d 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -580,78 +580,104 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, int *bmode_costs, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, int *bestrate, int *bestratey, - int *bestdistortion) { + int *bestdistortion, + BLOCK_SIZE_TYPE bsize) { +#if CONFIG_AB4X4 + MB_PREDICTION_MODE mode; +#else B_PREDICTION_MODE mode; +#endif MACROBLOCKD *xd = &x->e_mbd; int64_t best_rd = INT64_MAX; int rate = 0; int distortion; VP9_COMMON *const cm = &cpi->common; const int src_stride = x->plane[0].src.stride; - uint8_t* const src = - raster_block_offset_uint8(xd, - BLOCK_SIZE_SB8X8, - 0, ib, - x->plane[0].src.buf, src_stride); - int16_t* const src_diff = - raster_block_offset_int16(xd, - BLOCK_SIZE_SB8X8, - 0, ib, - x->plane[0].src_diff); - int16_t* const coeff = BLOCK_OFFSET(x->plane[0].coeff, ib, 16); - uint8_t* const dst = - raster_block_offset_uint8(xd, - BLOCK_SIZE_SB8X8, - 0, ib, - xd->plane[0].dst.buf, xd->plane[0].dst.stride); - ENTROPY_CONTEXT ta = *a, tempa = *a; - ENTROPY_CONTEXT tl = *l, templ = *l; + uint8_t *src, *dst; + int16_t *src_diff, *coeff; + + ENTROPY_CONTEXT ta[2], tempa[2]; + ENTROPY_CONTEXT tl[2], templ[2]; TX_TYPE tx_type = DCT_DCT; TX_TYPE best_tx_type = DCT_DCT; - /* - * The predictor buffer is a 2d buffer with a stride of 16. Create - * a temp buffer that meets the stride requirements, but we are only - * interested in the left 4x4 block - * */ - DECLARE_ALIGNED_ARRAY(16, int16_t, best_dqcoeff, 16); + int bw = 1 << b_width_log2(bsize); + int bh = 1 << b_height_log2(bsize); + int idx, idy, block; + DECLARE_ALIGNED(16, int16_t, best_dqcoeff[4][16]); assert(ib < 4); +#if !CONFIG_AB4X4 + bw = 1, bh = 1; +#endif + vpx_memcpy(ta, a, sizeof(ta)); + vpx_memcpy(tl, l, sizeof(tl)); xd->mode_info_context->mbmi.txfm_size = TX_4X4; + +#if CONFIG_AB4X4 + for (mode = DC_PRED; mode <= TM_PRED; ++mode) { +#else for (mode = B_DC_PRED; mode < LEFT4X4; mode++) { +#endif int64_t this_rd; - int ratey; + int ratey = 0; xd->mode_info_context->bmi[ib].as_mode.first = mode; - rate = bmode_costs[mode]; + if (cm->frame_type == KEY_FRAME) + rate = bmode_costs[mode]; + else + rate = x->mbmode_cost[cm->frame_type][mode]; + distortion = 0; - vp9_intra4x4_predict(xd, ib, - BLOCK_SIZE_SB8X8, - mode, dst, xd->plane[0].dst.stride); - vp9_subtract_block(4, 4, src_diff, 8, - src, src_stride, - dst, xd->plane[0].dst.stride); + vpx_memcpy(tempa, ta, sizeof(ta)); + vpx_memcpy(templ, tl, sizeof(tl)); - xd->mode_info_context->bmi[ib].as_mode.first = mode; - tx_type = get_tx_type_4x4(xd, ib); - if (tx_type != DCT_DCT) { - vp9_short_fht4x4(src_diff, coeff, 8, tx_type); - x->quantize_b_4x4(x, ib, tx_type, 16); - } else { - x->fwd_txm4x4(src_diff, coeff, 16); - x->quantize_b_4x4(x, ib, tx_type, 16); - } + for (idy = 0; idy < bh; ++idy) { + for (idx = 0; idx < bw; ++idx) { + block = ib + idy * 2 + idx; + xd->mode_info_context->bmi[block].as_mode.first = mode; + src = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block, + x->plane[0].src.buf, src_stride); + src_diff = raster_block_offset_int16(xd, BLOCK_SIZE_SB8X8, 0, block, + x->plane[0].src_diff); + coeff = BLOCK_OFFSET(x->plane[0].coeff, block, 16); + dst = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block, + xd->plane[0].dst.buf, + xd->plane[0].dst.stride); + vp9_intra4x4_predict(xd, block, + BLOCK_SIZE_SB8X8, + mode, dst, xd->plane[0].dst.stride); + vp9_subtract_block(4, 4, src_diff, 8, + src, src_stride, + dst, xd->plane[0].dst.stride); + + tx_type = get_tx_type_4x4(xd, block); + if (tx_type != DCT_DCT) { + vp9_short_fht4x4(src_diff, coeff, 8, tx_type); + x->quantize_b_4x4(x, block, tx_type, 16); + } else { + x->fwd_txm4x4(src_diff, coeff, 16); + x->quantize_b_4x4(x, block, tx_type, 16); + } - tempa = ta; - templ = tl; + ratey += cost_coeffs(cm, x, 0, block, PLANE_TYPE_Y_WITH_DC, + tempa + idx, templ + idy, TX_4X4, 16); + distortion += vp9_block_error(coeff, BLOCK_OFFSET(xd->plane[0].dqcoeff, + block, 16), 16) >> 2; - ratey = cost_coeffs(cm, x, 0, ib, - PLANE_TYPE_Y_WITH_DC, &tempa, &templ, TX_4X4, 16); - rate += ratey; - distortion = vp9_block_error(coeff, - BLOCK_OFFSET(xd->plane[0].dqcoeff, ib, 16), - 16) >> 2; + vp9_intra4x4_predict(xd, block, BLOCK_SIZE_SB8X8, *best_mode, + dst, xd->plane[0].dst.stride); + + if (best_tx_type != DCT_DCT) + vp9_short_iht4x4_add(BLOCK_OFFSET(xd->plane[0].dqcoeff, block, 16), + dst, xd->plane[0].dst.stride, best_tx_type); + else + xd->inv_txm4x4_add(BLOCK_OFFSET(xd->plane[0].dqcoeff, block, 16), + dst, xd->plane[0].dst.stride); + } + } + rate += ratey; this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion); if (this_rd < best_rd) { @@ -661,25 +687,37 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, best_rd = this_rd; *best_mode = mode; best_tx_type = tx_type; - *a = tempa; - *l = templ; - vpx_memcpy(best_dqcoeff, BLOCK_OFFSET(xd->plane[0].dqcoeff, ib, 16), 32); + vpx_memcpy(a, tempa, sizeof(tempa)); + vpx_memcpy(l, templ, sizeof(templ)); + for (idy = 0; idy < bh; ++idy) { + for (idx = 0; idx < bw; ++idx) { + block = ib + idy * 2 + idx; + vpx_memcpy(best_dqcoeff[idy * 2 + idx], + BLOCK_OFFSET(xd->plane[0].dqcoeff, block, 16), + sizeof(best_dqcoeff[0])); + } + } } } - xd->mode_info_context->bmi[ib].as_mode.first = - (B_PREDICTION_MODE)(*best_mode); - - vp9_intra4x4_predict(xd, ib, - BLOCK_SIZE_SB8X8, - *best_mode, - dst, xd->plane[0].dst.stride); - - // inverse transform - if (best_tx_type != DCT_DCT) { - vp9_short_iht4x4_add(best_dqcoeff, dst, xd->plane[0].dst.stride, - best_tx_type); - } else { - xd->inv_txm4x4_add(best_dqcoeff, dst, xd->plane[0].dst.stride); + + for (idy = 0; idy < bh; ++idy) { + for (idx = 0; idx < bw; ++idx) { + block = ib + idy * 2 + idx; + xd->mode_info_context->bmi[block].as_mode.first = *best_mode; + dst = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block, + xd->plane[0].dst.buf, + xd->plane[0].dst.stride); + + vp9_intra4x4_predict(xd, block, BLOCK_SIZE_SB8X8, *best_mode, + dst, xd->plane[0].dst.stride); + // inverse transform + if (best_tx_type != DCT_DCT) + vp9_short_iht4x4_add(best_dqcoeff[idy * 2 + idx], dst, + xd->plane[0].dst.stride, best_tx_type); + else + xd->inv_txm4x4_add(best_dqcoeff[idy * 2 + idx], dst, + xd->plane[0].dst.stride); + } } return best_rd; @@ -688,8 +726,12 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb, int *Rate, int *rate_y, int *Distortion, int64_t best_rd) { - int i; + int i, j; MACROBLOCKD *const xd = &mb->e_mbd; + BLOCK_SIZE_TYPE bsize = xd->mode_info_context->mbmi.sb_type; + int bw = 1 << b_width_log2(bsize); + int bh = 1 << b_height_log2(bsize); + int idx, idy; #if CONFIG_AB4X4 int cost = 0; #else @@ -698,7 +740,7 @@ static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb, int distortion = 0; int tot_rate_y = 0; int64_t total_rd = 0; - ENTROPY_CONTEXT t_above[2], t_left[2]; + ENTROPY_CONTEXT t_above[4], t_left[4]; int *bmode_costs; vpx_memcpy(t_above, xd->plane[0].above_context, sizeof(t_above)); @@ -707,31 +749,43 @@ static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb, xd->mode_info_context->mbmi.mode = I4X4_PRED; bmode_costs = mb->inter_bmode_costs; - for (i = 0; i < 4; i++) { - const int x_idx = i & 1, y_idx = i >> 1; - MODE_INFO *const mic = xd->mode_info_context; - const int mis = xd->mode_info_stride; - B_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_mode); - int UNINITIALIZED_IS_SAFE(r), UNINITIALIZED_IS_SAFE(ry), UNINITIALIZED_IS_SAFE(d); +#if !CONFIG_AB4X4 + bw = 1, bh = 1; +#endif - if (xd->frame_type == KEY_FRAME) { - const B_PREDICTION_MODE A = above_block_mode(mic, i, mis); - const B_PREDICTION_MODE L = left_block_mode(mic, i); + for (idy = 0; idy < 2; idy += bh) { + for (idx = 0; idx < 2; idx += bw) { + MODE_INFO *const mic = xd->mode_info_context; + const int mis = xd->mode_info_stride; + B_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_mode); + int UNINITIALIZED_IS_SAFE(r), UNINITIALIZED_IS_SAFE(ry); + int UNINITIALIZED_IS_SAFE(d); + i = idy * 2 + idx; - bmode_costs = mb->bmode_costs[A][L]; - } + if (xd->frame_type == KEY_FRAME) { + const MB_PREDICTION_MODE A = above_block_mode(mic, i, mis); + const MB_PREDICTION_MODE L = (xd->left_available || idx) ? + left_block_mode(mic, i) : DC_PRED; - total_rd += rd_pick_intra4x4block(cpi, mb, i, &best_mode, bmode_costs, - t_above + x_idx, t_left + y_idx, - &r, &ry, &d); - cost += r; - distortion += d; - tot_rate_y += ry; + bmode_costs = mb->bmode_costs[A][L]; + } - mic->bmi[i].as_mode.first = best_mode; + total_rd += rd_pick_intra4x4block(cpi, mb, i, &best_mode, bmode_costs, + t_above + idx, t_left + idy, + &r, &ry, &d, bsize); + cost += r; + distortion += d; + tot_rate_y += ry; - if (total_rd >= best_rd) - break; + mic->bmi[i].as_mode.first = best_mode; + for (j = 1; j < bh; ++j) + mic->bmi[i + j * 2].as_mode.first = best_mode; + for (j = 1; j < bw; ++j) + mic->bmi[i + j].as_mode.first = best_mode; + + if (total_rd >= best_rd) + break; + } } if (total_rd >= best_rd) @@ -751,6 +805,7 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x, int64_t txfm_cache[NB_TXFM_MODES]) { MB_PREDICTION_MODE mode; MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected); + MACROBLOCKD *xd = &x->e_mbd; int this_rate, this_rate_tokenonly; int this_distortion, s; int64_t best_rd = INT64_MAX, this_rd; @@ -770,13 +825,20 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x, /* Y Search for 32x32 intra prediction mode */ for (mode = DC_PRED; mode <= TM_PRED; mode++) { int64_t local_txfm_cache[NB_TXFM_MODES]; + MODE_INFO *const mic = xd->mode_info_context; + const int mis = xd->mode_info_stride; + const MB_PREDICTION_MODE A = above_block_mode(mic, 0, mis); + const MB_PREDICTION_MODE L = xd->left_available ? + left_block_mode(mic, 0) : DC_PRED; + + int *bmode_costs = x->bmode_costs[A][L]; x->e_mbd.mode_info_context->mbmi.mode = mode; vp9_build_intra_predictors_sby_s(&x->e_mbd, bsize); super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s, bsize, local_txfm_cache); - this_rate = this_rate_tokenonly + x->mbmode_cost[x->e_mbd.frame_type][mode]; + this_rate = this_rate_tokenonly + bmode_costs[mode]; this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion); if (this_rd < best_rd) {