From: Guillaume Martres Date: Mon, 7 Oct 2013 18:20:10 +0000 (+0100) Subject: Implement variance-based adaptive quantization X-Git-Tag: v1.3.0~209 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=e55f60240ac62455c61160e824660c0d22f8f7ed;p=libvpx Implement variance-based adaptive quantization This should be similar to what x264 does with --aq-mode 1. It works well with clips like parkjoy and touhou (http://x264.nl/developers/Dark_Shikari/LosslessTouhou.mkv). At low bitrates, the segmentation signaling overhead may negate the benefits of this feature. (PGW) Default changed to feature OFF to allow provisional merge. Change-Id: I938abf9bb487e1d4ad3b0264ea03d9826275c70b --- diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h index 482c50b55..1b5d09810 100644 --- a/vp9/encoder/vp9_block.h +++ b/vp9/encoder/vp9_block.h @@ -90,6 +90,7 @@ struct macroblock { int sadperbit4; int rddiv; int rdmult; + unsigned int mb_energy; unsigned int *mb_activity_ptr; int *mb_norm_activity_ptr; signed int act_zbin_adj; diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 9a5256088..15a3a70e3 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -38,7 +38,10 @@ #include "vp9/encoder/vp9_onyx_int.h" #include "vp9/encoder/vp9_rdopt.h" #include "vp9/encoder/vp9_segmentation.h" +#include "vp9/common/vp9_systemdependent.h" #include "vp9/encoder/vp9_tokenize.h" +#include "vp9/encoder/vp9_vaq.h" + #define DBG_PRNT_SEGMAP 0 @@ -372,6 +375,10 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, && (xd->mb_to_bottom_edge >> (3 + MI_SIZE_LOG2)) + mi_height > y) xd->mi_8x8[x_idx + y * mis] = mi_addr; + if (cpi->sf.variance_adaptive_quantization) { + vp9_mb_init_quantizer(cpi, x); + } + // FIXME(rbultje) I'm pretty sure this should go to the end of this block // (i.e. after the output_enabled) if (bsize < BLOCK_32X32) { @@ -517,10 +524,11 @@ static void set_offsets(VP9_COMP *cpi, int mi_row, int mi_col, /* segment ID */ if (seg->enabled) { - uint8_t *map = seg->update_map ? cpi->segmentation_map - : cm->last_frame_seg_map; - mbmi->segment_id = vp9_get_segment_id(cm, map, bsize, mi_row, mi_col); - + if (!cpi->sf.variance_adaptive_quantization) { + uint8_t *map = seg->update_map ? cpi->segmentation_map + : cm->last_frame_seg_map; + mbmi->segment_id = vp9_get_segment_id(cm, map, bsize, mi_row, mi_col); + } vp9_mb_init_quantizer(cpi, x); if (seg->enabled && cpi->seg0_cnt > 0 @@ -554,6 +562,8 @@ static void pick_sb_modes(VP9_COMP *cpi, int mi_row, int mi_col, VP9_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &cpi->mb; MACROBLOCKD *const xd = &x->e_mbd; + int orig_rdmult = x->rdmult; + double rdmult_ratio = 1.0; // Use the lower precision, but faster, 32x32 fdct for mode selection. x->use_lp32x32fdct = 1; @@ -576,9 +586,24 @@ static void pick_sb_modes(VP9_COMP *cpi, int mi_row, int mi_col, x->source_variance = get_sby_perpixel_variance(cpi, x, bsize); + if (cpi->sf.variance_adaptive_quantization) { + int energy; + if (bsize <= BLOCK_16X16) { + energy = x->mb_energy; + } else { + energy = vp9_block_energy(cpi, x, bsize); + } + + xd->this_mi->mbmi.segment_id = vp9_vaq_segment_id(energy); + rdmult_ratio = vp9_vaq_rdmult_ratio(energy); + vp9_mb_init_quantizer(cpi, x); + } + if (cpi->oxcf.tuning == VP8_TUNE_SSIM) vp9_activity_masking(cpi, x); + x->rdmult = round(x->rdmult * rdmult_ratio); + // Find best coding mode & reconstruct the MB so it is available // as a predictor for MBs that follow in the SB if (frame_is_intra_only(cm)) { @@ -592,6 +617,10 @@ static void pick_sb_modes(VP9_COMP *cpi, int mi_row, int mi_col, vp9_rd_pick_inter_mode_sub8x8(cpi, x, mi_row, mi_col, totalrate, totaldist, bsize, ctx, best_rd); } + + x->rdmult = orig_rdmult; + if (*totalrate != INT_MAX) + *totalrate = round(*totalrate * rdmult_ratio); } static void update_stats(VP9_COMP *cpi) { @@ -1009,6 +1038,11 @@ static void rd_use_partition(VP9_COMP *cpi, MODE_INFO **mi_8x8, } save_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); + if (bsize == BLOCK_16X16) { + set_offsets(cpi, mi_row, mi_col, bsize); + x->mb_energy = vp9_block_energy(cpi, x, bsize); + } + x->fast_ms = 0; x->subblock_ref = 0; @@ -1469,6 +1503,11 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row, } assert(mi_height_log2(bsize) == mi_width_log2(bsize)); + if (bsize == BLOCK_16X16) { + set_offsets(cpi, mi_row, mi_col, bsize); + x->mb_energy = vp9_block_energy(cpi, x, bsize); + } + // Determine partition types in search according to the speed features. // The threshold set here has to be of square block size. if (cpi->sf.auto_min_max_partition_size) { @@ -1917,7 +1956,7 @@ static void encode_frame_internal(VP9_COMP *cpi) { vp9_frame_init_quantizer(cpi); - vp9_initialize_rd_consts(cpi, cm->base_qindex + cm->y_dc_delta_q); + vp9_initialize_rd_consts(cpi); vp9_initialize_me_consts(cpi, cm->base_qindex); switch_tx_mode(cpi); diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c index b2becbb44..aed7a95a5 100644 --- a/vp9/encoder/vp9_firstpass.c +++ b/vp9/encoder/vp9_firstpass.c @@ -30,6 +30,7 @@ #include "vp9/common/vp9_quant_common.h" #include "vp9/common/vp9_entropymv.h" #include "vp9/encoder/vp9_encodemv.h" +#include "vp9/encoder/vp9_vaq.h" #include "./vpx_scale_rtcd.h" // TODO(jkoleszar): for setup_dst_planes #include "vp9/common/vp9_reconinter.h" @@ -530,7 +531,7 @@ void vp9_first_pass(VP9_COMP *cpi) { // if ( 0 ) { vp9_init_mv_probs(cm); - vp9_initialize_rd_consts(cpi, cm->base_qindex + cm->y_dc_delta_q); + vp9_initialize_rd_consts(cpi); } // for each macroblock row in image @@ -555,6 +556,7 @@ void vp9_first_pass(VP9_COMP *cpi) { int this_error; int gf_motion_error = INT_MAX; int use_dc_pred = (mb_col || mb_row) && (!mb_col || !mb_row); + double error_weight = 1.0; xd->plane[0].dst.buf = new_yv12->y_buffer + recon_yoffset; xd->plane[1].dst.buf = new_yv12->u_buffer + recon_uvoffset; @@ -581,8 +583,13 @@ void vp9_first_pass(VP9_COMP *cpi) { mb_col << 1, 1 << mi_width_log2(xd->this_mi->mbmi.sb_type)); + if (cpi->sf.variance_adaptive_quantization) { + int energy = vp9_block_energy(cpi, x, xd->this_mi->mbmi.sb_type); + error_weight = vp9_vaq_inv_q_ratio(energy); + } + // do intra 16x16 prediction - this_error = vp9_encode_intra(x, use_dc_pred); + this_error = error_weight * vp9_encode_intra(x, use_dc_pred); // intrapenalty below deals with situations where the intra and inter // error scores are very low (eg a plain black frame). @@ -617,6 +624,7 @@ void vp9_first_pass(VP9_COMP *cpi) { first_pass_motion_search(cpi, x, &best_ref_mv, &mv.as_mv, lst_yv12, &motion_error, recon_yoffset); + motion_error *= error_weight; // If the current best reference mv is not centered on 0,0 then do a 0,0 // based search as well. @@ -624,6 +632,7 @@ void vp9_first_pass(VP9_COMP *cpi) { tmp_err = INT_MAX; first_pass_motion_search(cpi, x, &zero_ref_mv, &tmp_mv.as_mv, lst_yv12, &tmp_err, recon_yoffset); + tmp_err *= error_weight; if (tmp_err < motion_error) { motion_error = tmp_err; @@ -640,6 +649,7 @@ void vp9_first_pass(VP9_COMP *cpi) { first_pass_motion_search(cpi, x, &zero_ref_mv, &tmp_mv.as_mv, gld_yv12, &gf_motion_error, recon_yoffset); + gf_motion_error *= error_weight; if ((gf_motion_error < motion_error) && (gf_motion_error < this_error)) { diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index 2afbf4b08..43e379d32 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -33,6 +33,7 @@ #include "vp9/encoder/vp9_rdopt.h" #include "vp9/encoder/vp9_segmentation.h" #include "vp9/encoder/vp9_temporal_filter.h" +#include "vp9/encoder/vp9_vaq.h" #include "vpx_ports/vpx_timer.h" @@ -315,7 +316,7 @@ static void dealloc_compressor_data(VP9_COMP *cpi) { // Computes a q delta (in "q index" terms) to get from a starting q value // to a target value // target q value -static int compute_qdelta(VP9_COMP *cpi, double qstart, double qtarget) { +int vp9_compute_qdelta(VP9_COMP *cpi, double qstart, double qtarget) { int i; int start_index = cpi->worst_quality; int target_index = cpi->worst_quality; @@ -379,7 +380,7 @@ static void configure_static_seg_features(VP9_COMP *cpi) { seg->update_map = 1; seg->update_data = 1; - qi_delta = compute_qdelta(cpi, cpi->avg_q, (cpi->avg_q * 0.875)); + qi_delta = vp9_compute_qdelta(cpi, cpi->avg_q, (cpi->avg_q * 0.875)); vp9_set_segdata(seg, 1, SEG_LVL_ALT_Q, (qi_delta - 2)); vp9_set_segdata(seg, 1, SEG_LVL_ALT_LF, -2); @@ -400,8 +401,8 @@ static void configure_static_seg_features(VP9_COMP *cpi) { seg->update_data = 1; seg->abs_delta = SEGMENT_DELTADATA; - qi_delta = compute_qdelta(cpi, cpi->avg_q, - (cpi->avg_q * 1.125)); + qi_delta = vp9_compute_qdelta(cpi, cpi->avg_q, + (cpi->avg_q * 1.125)); vp9_set_segdata(seg, 1, SEG_LVL_ALT_Q, (qi_delta + 2)); vp9_enable_segfeature(seg, 1, SEG_LVL_ALT_Q); @@ -756,6 +757,8 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->static_segmentation = 0; #endif + sf->variance_adaptive_quantization = 0; + switch (mode) { case 0: // This is the best quality mode. break; @@ -2631,8 +2634,8 @@ static int pick_q_and_adjust_q_bounds(VP9_COMP *cpi, int qindex = cpi->last_boosted_qindex; double last_boosted_q = vp9_convert_qindex_to_q(qindex); - delta_qindex = compute_qdelta(cpi, last_boosted_q, - (last_boosted_q * 0.75)); + delta_qindex = vp9_compute_qdelta(cpi, last_boosted_q, + (last_boosted_q * 0.75)); cpi->active_best_quality = MAX(qindex + delta_qindex, cpi->best_quality); @@ -2660,14 +2663,14 @@ static int pick_q_and_adjust_q_bounds(VP9_COMP *cpi, // on active_best_quality. q_val = vp9_convert_qindex_to_q(cpi->active_best_quality); cpi->active_best_quality += - compute_qdelta(cpi, q_val, (q_val * q_adj_factor)); + vp9_compute_qdelta(cpi, q_val, (q_val * q_adj_factor)); } #else double current_q; // Force the KF quantizer to be 30% of the active_worst_quality. current_q = vp9_convert_qindex_to_q(cpi->active_worst_quality); cpi->active_best_quality = cpi->active_worst_quality - + compute_qdelta(cpi, current_q, current_q * 0.3); + + vp9_compute_qdelta(cpi, current_q, current_q * 0.3); #endif } else if (!cpi->is_src_frame_alt_ref && (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) { @@ -2945,7 +2948,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, // Set quantizer steps at 10% increments. new_q = current_q * (1.0 - (0.2 * (cpi->max_arf_level - level))); - q = cpi->active_worst_quality + compute_qdelta(cpi, current_q, new_q); + q = cpi->active_worst_quality + vp9_compute_qdelta(cpi, current_q, new_q); bottom_index = q; top_index = q; @@ -3020,6 +3023,10 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, } } + if (cpi->sf.variance_adaptive_quantization) { + vp9_vaq_frame_setup(cpi); + } + // transform / motion compensation build reconstruction frame vp9_encode_frame(cpi); @@ -3802,6 +3809,10 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags, vp9_setup_interp_filters(&cpi->mb.e_mbd, DEFAULT_INTERP_FILTER, cm); + if (cpi->sf.variance_adaptive_quantization) { + vp9_vaq_init(); + } + if (cpi->pass == 1) { Pass1Encode(cpi, size, dest, frame_flags); } else if (cpi->pass == 2) { diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h index f88ae8ad0..2e5c7bc7e 100644 --- a/vp9/encoder/vp9_onyx_int.h +++ b/vp9/encoder/vp9_onyx_int.h @@ -253,6 +253,7 @@ typedef struct { int auto_mv_step_size; int optimize_coefficients; int static_segmentation; + int variance_adaptive_quantization; int comp_inter_joint_search_thresh; int adaptive_rd_thresh; int skip_encode_sb; @@ -379,9 +380,9 @@ typedef struct VP9_COMP { int ref_frame_mask; int set_ref_frame_mask; - int rd_threshes[BLOCK_SIZES][MAX_MODES]; + int rd_threshes[MAX_SEGMENTS][BLOCK_SIZES][MAX_MODES]; int rd_thresh_freq_fact[BLOCK_SIZES][MAX_MODES]; - int rd_thresh_sub8x8[BLOCK_SIZES][MAX_REFS]; + int rd_thresh_sub8x8[MAX_SEGMENTS][BLOCK_SIZES][MAX_REFS]; int rd_thresh_freq_sub8x8[BLOCK_SIZES][MAX_REFS]; int64_t rd_comp_pred_diff[NB_PREDICTION_TYPES]; @@ -710,6 +711,8 @@ int vp9_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest); void vp9_alloc_compressor_data(VP9_COMP *cpi); +int vp9_compute_qdelta(VP9_COMP *cpi, double qstart, double qtarget); + static int get_token_alloc(int mb_rows, int mb_cols) { return mb_rows * mb_cols * (48 * 16 + 4); } diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c index 05e893ee9..048a6e1eb 100644 --- a/vp9/encoder/vp9_quantize.c +++ b/vp9/encoder/vp9_quantize.c @@ -12,6 +12,7 @@ #include "vpx_mem/vpx_mem.h" #include "vp9/encoder/vp9_onyx_int.h" +#include "vp9/encoder/vp9_rdopt.h" #include "vp9/encoder/vp9_quantize.h" #include "vp9/common/vp9_quant_common.h" @@ -271,12 +272,15 @@ void vp9_init_quantizer(VP9_COMP *cpi) { void vp9_mb_init_quantizer(VP9_COMP *cpi, MACROBLOCK *x) { int i; + VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *xd = &x->e_mbd; int zbin_extra; int segment_id = xd->this_mi->mbmi.segment_id; const int qindex = vp9_get_qindex(&cpi->common.seg, segment_id, cpi->common.base_qindex); + int rdmult = vp9_compute_rd_mult(cpi, qindex + cm->y_dc_delta_q); + // Y zbin_extra = (cpi->common.y_dequant[qindex][1] * (cpi->zbin_mode_boost + x->act_zbin_adj)) >> 7; @@ -315,6 +319,12 @@ void vp9_mb_init_quantizer(VP9_COMP *cpi, MACROBLOCK *x) { /* save this macroblock QIndex for vp9_update_zbin_extra() */ x->e_mbd.q_index = qindex; + + /* R/D setup */ + cpi->mb.errorperbit = rdmult >> 6; + cpi->mb.errorperbit += (cpi->mb.errorperbit == 0); + + vp9_initialize_me_consts(cpi, xd->q_index); } void vp9_update_zbin_extra(VP9_COMP *cpi, MACROBLOCK *x) { diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index c8f440cf9..56a080377 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -161,10 +161,17 @@ void vp9_init_me_luts() { } } -static int compute_rd_mult(int qindex) { +int vp9_compute_rd_mult(VP9_COMP *cpi, int qindex) { const int q = vp9_dc_quant(qindex, 0); // TODO(debargha): Adjust the function below - return (88 * q * q / 25); + int rdmult = 88 * q * q / 25; + if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) { + if (cpi->twopass.next_iiratio > 31) + rdmult += (rdmult * rd_iifactor[31]) >> 4; + else + rdmult += (rdmult * rd_iifactor[cpi->twopass.next_iiratio]) >> 4; + } + return rdmult; } static int compute_rd_thresh_factor(int qindex) { @@ -181,41 +188,47 @@ void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) { cpi->mb.sadperbit4 = sad_per_bit4lut[qindex]; } -static void set_block_thresholds(VP9_COMP *cpi, int qindex) { - int q, i, bsize; - q = compute_rd_thresh_factor(qindex); +static void set_block_thresholds(VP9_COMP *cpi) { + int i, bsize, segment_id; + VP9_COMMON *cm = &cpi->common; + + for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) { + int q; + int segment_qindex = vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex); + segment_qindex = clamp(segment_qindex + cm->y_dc_delta_q, 0, MAXQ); + q = compute_rd_thresh_factor(segment_qindex); - for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) { - for (i = 0; i < MAX_MODES; ++i) { + for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) { // Threshold here seem unecessarily harsh but fine given actual // range of values used for cpi->sf.thresh_mult[] int thresh_max = INT_MAX / (q * rd_thresh_block_size_factor[bsize]); - if (cpi->sf.thresh_mult[i] < thresh_max) { - cpi->rd_threshes[bsize][i] = - cpi->sf.thresh_mult[i] * q * - rd_thresh_block_size_factor[bsize] / 4; - } else { - cpi->rd_threshes[bsize][i] = INT_MAX; + for (i = 0; i < MAX_MODES; ++i) { + if (cpi->sf.thresh_mult[i] < thresh_max) { + cpi->rd_threshes[segment_id][bsize][i] = + cpi->sf.thresh_mult[i] * q * + rd_thresh_block_size_factor[bsize] / 4; + } else { + cpi->rd_threshes[segment_id][bsize][i] = INT_MAX; + } } - } - - for (i = 0; i < MAX_REFS; ++i) { - int thresh_max = INT_MAX / (q * rd_thresh_block_size_factor[bsize]); - if (cpi->sf.thresh_mult_sub8x8[i] < thresh_max) { - cpi->rd_thresh_sub8x8[bsize][i] = - cpi->sf.thresh_mult_sub8x8[i] * q * - rd_thresh_block_size_factor[bsize] / 4; - } else { - cpi->rd_thresh_sub8x8[bsize][i] = INT_MAX; + for (i = 0; i < MAX_REFS; ++i) { + if (cpi->sf.thresh_mult_sub8x8[i] < thresh_max) { + cpi->rd_thresh_sub8x8[segment_id][bsize][i] = + cpi->sf.thresh_mult_sub8x8[i] * q * + rd_thresh_block_size_factor[bsize] / 4; + } else { + cpi->rd_thresh_sub8x8[segment_id][bsize][i] = INT_MAX; + } } } } } -void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) { - int i; +void vp9_initialize_rd_consts(VP9_COMP *cpi) { + VP9_COMMON *cm = &cpi->common; + int qindex, i; vp9_clear_system_state(); // __asm emms; @@ -223,23 +236,17 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) { // for key frames, golden frames and arf frames. // if (cpi->common.refresh_golden_frame || // cpi->common.refresh_alt_ref_frame) - qindex = clamp(qindex, 0, MAXQ); + qindex = clamp(cm->base_qindex + cm->y_dc_delta_q, 0, MAXQ); cpi->RDDIV = RDDIV_BITS; // in bits (to multiply D by 128) - cpi->RDMULT = compute_rd_mult(qindex); - if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) { - if (cpi->twopass.next_iiratio > 31) - cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) >> 4; - else - cpi->RDMULT += - (cpi->RDMULT * rd_iifactor[cpi->twopass.next_iiratio]) >> 4; - } + cpi->RDMULT = vp9_compute_rd_mult(cpi, qindex); + cpi->mb.errorperbit = cpi->RDMULT / RD_MULT_EPB_RATIO; cpi->mb.errorperbit += (cpi->mb.errorperbit == 0); vp9_set_speed_features(cpi); - set_block_thresholds(cpi, qindex); + set_block_thresholds(cpi); fill_token_costs(cpi->mb.token_costs, cpi->common.fc.coef_probs); @@ -3264,9 +3271,9 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, continue; // Test best rd so far against threshold for trying this mode. - if ((best_rd < ((int64_t)cpi->rd_threshes[bsize][mode_index] * + if ((best_rd < ((int64_t)cpi->rd_threshes[segment_id][bsize][mode_index] * cpi->rd_thresh_freq_fact[bsize][mode_index] >> 5)) || - cpi->rd_threshes[bsize][mode_index] == INT_MAX) + cpi->rd_threshes[segment_id][bsize][mode_index] == INT_MAX) continue; // Do not allow compound prediction if the segment level reference @@ -3934,9 +3941,10 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, continue; // Test best rd so far against threshold for trying this mode. - if ((best_rd < ((int64_t)cpi->rd_thresh_sub8x8[bsize][mode_index] * - cpi->rd_thresh_freq_sub8x8[bsize][mode_index] >> 5)) || - cpi->rd_thresh_sub8x8[bsize][mode_index] == INT_MAX) + if ((best_rd < + ((int64_t)cpi->rd_thresh_sub8x8[segment_id][bsize][mode_index] * + cpi->rd_thresh_freq_sub8x8[bsize][mode_index] >> 5)) || + cpi->rd_thresh_sub8x8[segment_id][bsize][mode_index] == INT_MAX) continue; // Do not allow compound prediction if the segment level reference @@ -4081,10 +4089,10 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, int uv_skippable; this_rd_thresh = (ref_frame == LAST_FRAME) ? - cpi->rd_thresh_sub8x8[bsize][THR_LAST] : - cpi->rd_thresh_sub8x8[bsize][THR_ALTR]; + cpi->rd_thresh_sub8x8[segment_id][bsize][THR_LAST] : + cpi->rd_thresh_sub8x8[segment_id][bsize][THR_ALTR]; this_rd_thresh = (ref_frame == GOLDEN_FRAME) ? - cpi->rd_thresh_sub8x8[bsize][THR_GOLD] : this_rd_thresh; + cpi->rd_thresh_sub8x8[segment_id][bsize][THR_GOLD] : this_rd_thresh; xd->this_mi->mbmi.tx_size = TX_4X4; cpi->rd_filter_cache[SWITCHABLE_FILTERS] = INT64_MAX; diff --git a/vp9/encoder/vp9_rdopt.h b/vp9/encoder/vp9_rdopt.h index aa4068d76..0b0bb18d7 100644 --- a/vp9/encoder/vp9_rdopt.h +++ b/vp9/encoder/vp9_rdopt.h @@ -18,7 +18,9 @@ (((128 + ((int64_t)R) * (RM)) >> 8) + (D << DM)) #define QIDX_SKIP_THRESH 115 -void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex); +int vp9_compute_rd_mult(VP9_COMP *cpi, int qindex); + +void vp9_initialize_rd_consts(VP9_COMP *cpi); void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex); diff --git a/vp9/encoder/vp9_vaq.c b/vp9/encoder/vp9_vaq.c new file mode 100644 index 000000000..3d3b4b0f1 --- /dev/null +++ b/vp9/encoder/vp9_vaq.c @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2013 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "vp9/encoder/vp9_vaq.h" + +#include "vp9/common/vp9_seg_common.h" + +#include "vp9/encoder/vp9_ratectrl.h" +#include "vp9/encoder/vp9_rdopt.h" +#include "vp9/encoder/vp9_segmentation.h" +#include "vp9/common/vp9_systemdependent.h" + +#define ENERGY_MIN (-3) +#define ENERGY_MAX (3) +#define ENERGY_SPAN (ENERGY_MAX - ENERGY_MIN + 1) +#define ENERGY_IN_BOUNDS(energy)\ + assert((energy) >= ENERGY_MIN && (energy) <= ENERGY_MAX) + +static double q_ratio[MAX_SEGMENTS] = { 1, 1, 1, 1, 1, 1, 1, 1 }; +static double rdmult_ratio[MAX_SEGMENTS] = { 1, 1, 1, 1, 1, 1, 1, 1 }; +static int segment_id[MAX_SEGMENTS] = { 5, 3, 1, 0, 2, 4, 6, 7 }; + +#define Q_RATIO(i) q_ratio[(i) - ENERGY_MIN] +#define RDMULT_RATIO(i) rdmult_ratio[(i) - ENERGY_MIN] +#define SEGMENT_ID(i) segment_id[(i) - ENERGY_MIN] + +DECLARE_ALIGNED(16, static const uint8_t, vp9_64_zeros[64]) = {0}; + +unsigned int vp9_vaq_segment_id(int energy) { + ENERGY_IN_BOUNDS(energy); + return SEGMENT_ID(energy); +} + +double vp9_vaq_rdmult_ratio(int energy) { + ENERGY_IN_BOUNDS(energy); + return RDMULT_RATIO(energy); +} + +double vp9_vaq_inv_q_ratio(int energy) { + ENERGY_IN_BOUNDS(energy); + return Q_RATIO(-energy); +} + +void vp9_vaq_init() { + int i; + double base_ratio = 1.8; + + assert(ENERGY_SPAN <= MAX_SEGMENTS); + + for (i = ENERGY_MIN; i <= ENERGY_MAX; i++) { + Q_RATIO(i) = pow(base_ratio, i/3.0); + } +} + +void vp9_vaq_frame_setup(VP9_COMP *cpi) { + VP9_COMMON *cm = &cpi->common; + struct segmentation *seg = &cm->seg; + int base_q = vp9_convert_qindex_to_q(cm->base_qindex); + int base_rdmult = vp9_compute_rd_mult(cpi, cm->base_qindex + + cm->y_dc_delta_q); + int i; + + vp9_enable_segmentation((VP9_PTR)cpi); + vp9_clearall_segfeatures(seg); + + seg->abs_delta = SEGMENT_DELTADATA; + + for (i = ENERGY_MIN; i <= ENERGY_MAX; i++) { + int qindex_delta, segment_rdmult; + + if (Q_RATIO(i) == 1) { + // No need to enable SEG_LVL_ALT_Q for this segment + RDMULT_RATIO(i) = 1; + continue; + } + + qindex_delta = vp9_compute_qdelta(cpi, base_q, base_q * Q_RATIO(i)); + vp9_set_segdata(seg, SEGMENT_ID(i), SEG_LVL_ALT_Q, qindex_delta); + vp9_enable_segfeature(seg, SEGMENT_ID(i), SEG_LVL_ALT_Q); + + segment_rdmult = vp9_compute_rd_mult(cpi, cm->base_qindex + qindex_delta + + cm->y_dc_delta_q); + RDMULT_RATIO(i) = (double) segment_rdmult / base_rdmult; + } +} + + +static unsigned int block_variance(VP9_COMP *cpi, MACROBLOCK *x, + BLOCK_SIZE bs) { + MACROBLOCKD *xd = &x->e_mbd; + unsigned int var, sse; + int right_overflow = (xd->mb_to_right_edge < 0) ? + ((-xd->mb_to_right_edge) >> 3) : 0; + int bottom_overflow = (xd->mb_to_bottom_edge < 0) ? + ((-xd->mb_to_bottom_edge) >> 3) : 0; + + if (right_overflow || bottom_overflow) { + int bw = (1 << (mi_width_log2(bs) + 3)) - right_overflow; + int bh = (1 << (mi_height_log2(bs) + 3)) - bottom_overflow; + int avg; + variance(x->plane[0].src.buf, x->plane[0].src.stride, + vp9_64_zeros, 0, bw, bh, &sse, &avg); + var = sse - (((int64_t)avg * avg) / (bw * bh)); + return (256 * var) / (bw * bh); + } else { + var = cpi->fn_ptr[bs].vf(x->plane[0].src.buf, + x->plane[0].src.stride, + vp9_64_zeros, 0, &sse); + return (256 * var) >> num_pels_log2_lookup[bs]; + } +} + +int vp9_block_energy(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs) { + // if (var <= 1000) + // return 0; + unsigned int var = block_variance(cpi, x, bs); + double energy = 0.9*(logf(var + 1) - 10.0); + return clamp(round(energy), ENERGY_MIN, ENERGY_MAX); +} diff --git a/vp9/encoder/vp9_vaq.h b/vp9/encoder/vp9_vaq.h new file mode 100644 index 000000000..dc18b22f2 --- /dev/null +++ b/vp9/encoder/vp9_vaq.h @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2013 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef VP9_ENCODER_VP9_CONFIG_VAQ_H_ +#define VP9_ENCODER_VP9_CONFIG_VAQ_H_ + +#include "vp9/encoder/vp9_onyx_int.h" + +unsigned int vp9_vaq_segment_id(int energy); +double vp9_vaq_rdmult_ratio(int energy); +double vp9_vaq_inv_q_ratio(int energy); + +void vp9_vaq_init(); +void vp9_vaq_frame_setup(VP9_COMP *cpi); + +int vp9_block_energy(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs); + +#endif // VP9_ENCODER_VP9_CONFIG_VAQ_H_ diff --git a/vp9/encoder/vp9_variance.h b/vp9/encoder/vp9_variance.h index 61031e064..2ded97c55 100644 --- a/vp9/encoder/vp9_variance.h +++ b/vp9/encoder/vp9_variance.h @@ -14,6 +14,15 @@ #include "vpx/vpx_integer.h" // #include "./vpx_config.h" +void variance(const uint8_t *src_ptr, + int source_stride, + const uint8_t *ref_ptr, + int recon_stride, + int w, + int h, + unsigned int *sse, + int *sum); + typedef unsigned int(*vp9_sad_fn_t)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, diff --git a/vp9/encoder/vp9_variance_c.c b/vp9/encoder/vp9_variance_c.c index 991ef4d29..01d1269af 100644 --- a/vp9/encoder/vp9_variance_c.c +++ b/vp9/encoder/vp9_variance_c.c @@ -18,14 +18,14 @@ #include "vp9/common/vp9_filter.h" #include "vp9/encoder/vp9_variance.h" -static void variance(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, - int recon_stride, - int w, - int h, - unsigned int *sse, - int *sum) { +void variance(const uint8_t *src_ptr, + int source_stride, + const uint8_t *ref_ptr, + int recon_stride, + int w, + int h, + unsigned int *sse, + int *sum) { int i, j; int diff; diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk index 9fbf100f5..b454eee02 100644 --- a/vp9/vp9cx.mk +++ b/vp9/vp9cx.mk @@ -64,6 +64,8 @@ VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += encoder/vp9_ssim.c VP9_CX_SRCS-yes += encoder/vp9_tokenize.c VP9_CX_SRCS-yes += encoder/vp9_treewriter.c VP9_CX_SRCS-yes += encoder/vp9_variance_c.c +VP9_CX_SRCS-yes += encoder/vp9_vaq.c +VP9_CX_SRCS-yes += encoder/vp9_vaq.h ifeq ($(CONFIG_VP9_POSTPROC),yes) VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += common/vp9_postproc.h VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += common/vp9_postproc.c