From: Jingning Han Date: Thu, 28 Aug 2014 16:09:37 +0000 (-0700) Subject: Extend block level sse to support multiple txfm blocks X-Git-Tag: v1.4.0~885^2 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=02e6ecdc4c0bc1216f6748fe95ff6864d0f8304e;p=libvpx Extend block level sse to support multiple txfm blocks This commit extends the sse and forward transform computation flag to support the case 64x64 blocks where there are 4 32x32 2D-DCT blocks. Change-Id: I86a3e805dfaa0f3abd812f590520c71aa0e40473 --- diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h index 46f463a01..aef20f272 100644 --- a/vp9/encoder/vp9_block.h +++ b/vp9/encoder/vp9_block.h @@ -112,9 +112,9 @@ struct macroblock { int quant_fp; // skip forward transform and quantization - int skip_txfm[MAX_MB_PLANE]; + int skip_txfm[MAX_MB_PLANE << 2]; - int64_t bsse[MAX_MB_PLANE]; + int64_t bsse[MAX_MB_PLANE << 2]; // Used to store sub partition's choices. MV pred_mv[MAX_REF_FRAMES]; diff --git a/vp9/encoder/vp9_context_tree.h b/vp9/encoder/vp9_context_tree.h index 0cbb24429..fccdaf5de 100644 --- a/vp9/encoder/vp9_context_tree.h +++ b/vp9/encoder/vp9_context_tree.h @@ -33,10 +33,10 @@ typedef struct { int is_coded; int num_4x4_blk; int skip; - int skip_txfm[MAX_MB_PLANE]; // For current partition, only if all Y, U, and V transform blocks' // coefficients are quantized to 0, skippable is set to 0. int skippable; + int skip_txfm[MAX_MB_PLANE << 2]; int best_mode_index; int hybrid_pred_diff; int comp_pred_diff; diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index 8a737e18e..667845072 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -476,20 +476,24 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize, } if (!x->skip_recode) { - if (x->skip_txfm[plane] == 0) { - // full forward transform and quantization - if (x->quant_fp) - vp9_xform_quant_fp(x, plane, block, plane_bsize, tx_size); - else - vp9_xform_quant(x, plane, block, plane_bsize, tx_size); - } else if (x->skip_txfm[plane] == 2) { - // fast path forward transform and quantization - vp9_xform_quant_dc(x, plane, block, plane_bsize, tx_size); + if (max_txsize_lookup[plane_bsize] == tx_size) { + if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == 0) { + // full forward transform and quantization + if (x->quant_fp) + vp9_xform_quant_fp(x, plane, block, plane_bsize, tx_size); + else + vp9_xform_quant(x, plane, block, plane_bsize, tx_size); + } else if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == 2) { + // fast path forward transform and quantization + vp9_xform_quant_dc(x, plane, block, plane_bsize, tx_size); + } else { + // skip forward transform + p->eobs[block] = 0; + *a = *l = 0; + return; + } } else { - // skip forward transform - p->eobs[block] = 0; - *a = *l = 0; - return; + vp9_xform_quant(x, plane, block, plane_bsize, tx_size); } } diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index f73006ef7..d3a1db8bd 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -172,6 +172,7 @@ static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize, const int ref = xd->mi[0]->mbmi.ref_frame[0]; unsigned int sse; unsigned int var = 0; + int64_t sum_sse = 0; const int shift = 8; int rate; int64_t dist; @@ -190,31 +191,33 @@ static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize, int lw = b_width_log2_lookup[unit_size] + 2; int lh = b_height_log2_lookup[unit_size] + 2; - x->bsse[i] = 0; + sum_sse = 0; for (idy = 0; idy < bh; ++idy) { for (idx = 0; idx < bw; ++idx) { uint8_t *src = p->src.buf + (idy * p->src.stride << lh) + (idx << lw); uint8_t *dst = pd->dst.buf + (idy * pd->dst.stride << lh) + (idx << lh); + int block_idx = (idy << 1) + idx; + + var = cpi->fn_ptr[unit_size].vf(src, p->src.stride, + dst, pd->dst.stride, &sse); + x->bsse[(i << 2) + block_idx] = sse; + sum_sse += sse; + + if (!x->select_tx_size) { + if (x->bsse[(i << 2) + block_idx] < p->quant_thred[0] >> shift) + x->skip_txfm[(i << 2) + block_idx] = 1; + else if (var < p->quant_thred[1] >> shift) + x->skip_txfm[(i << 2) + block_idx] = 2; + else + x->skip_txfm[(i << 2) + block_idx] = 0; + } - var += cpi->fn_ptr[unit_size].vf(src , p->src.stride, - dst, pd->dst.stride, &sse); - - x->bsse[i] += sse; if (i == 0) x->pred_sse[ref] += sse; } } - if (!x->select_tx_size) { - if (x->bsse[i] < p->quant_thred[0] >> shift) - x->skip_txfm[i] = 1; - else if (var < p->quant_thred[1] >> shift) - x->skip_txfm[i] = 2; - else - x->skip_txfm[i] = 0; - } - // Fast approximate the modelling function. if (cpi->oxcf.speed > 4) { int64_t rate; @@ -230,7 +233,7 @@ static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize, rate_sum += rate; dist_sum += dist; } else { - vp9_model_rd_from_var_lapndz(x->bsse[i], 1 << num_pels_log2_lookup[bs], + vp9_model_rd_from_var_lapndz(sum_sse, 1 << num_pels_log2_lookup[bs], pd->dequant[1] >> 3, &rate, &dist); rate_sum += rate; dist_sum += dist; @@ -390,17 +393,17 @@ static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize, if (!is_inter_block(mbmi)) { vp9_encode_block_intra(x, plane, block, plane_bsize, tx_size, &mbmi->skip); dist_block(plane, block, tx_size, args); - } else { - if (x->skip_txfm[plane] == 0) { + } else if (max_txsize_lookup[plane_bsize] == tx_size) { + if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == 0) { // full forward transform and quantization vp9_xform_quant(x, plane, block, plane_bsize, tx_size); dist_block(plane, block, tx_size, args); - } else if (x->skip_txfm[plane] == 2) { + } else if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == 2) { // compute DC coefficient int16_t *const coeff = BLOCK_OFFSET(x->plane[plane].coeff, block); int16_t *const dqcoeff = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block); vp9_xform_quant_dc(x, plane, block, plane_bsize, tx_size); - args->sse = x->bsse[plane] << 4; + args->sse = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4; args->dist = args->sse; if (!x->plane[plane].eobs[block]) args->dist = args->sse - ((coeff[0] * coeff[0] - @@ -408,9 +411,13 @@ static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize, } else { // skip forward transform x->plane[plane].eobs[block] = 0; - args->sse = x->bsse[plane] << 4; + args->sse = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4; args->dist = args->sse; } + } else { + // full forward transform and quantization + vp9_xform_quant(x, plane, block, plane_bsize, tx_size); + dist_block(plane, block, tx_size, args); } rate_block(plane, block, plane_bsize, tx_size, args); @@ -2166,8 +2173,8 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, int orig_dst_stride[MAX_MB_PLANE]; int rs = 0; INTERP_FILTER best_filter = SWITCHABLE; - int skip_txfm[MAX_MB_PLANE] = {0}; - int64_t bsse[MAX_MB_PLANE] = {0}; + int skip_txfm[MAX_MB_PLANE << 2] = {0}; + int64_t bsse[MAX_MB_PLANE << 2] = {0}; int bsl = mi_width_log2_lookup[bsize]; int pred_filter_search = cpi->sf.cb_pred_filter_search ?