From: Deb Mukherjee Date: Wed, 10 Jul 2013 23:51:07 +0000 (-0700) Subject: Some minor cleanups for efficiency X-Git-Tag: v1.3.0~879 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=94c481f9f17828db0d4b8b4f2549bc5ff1075cf8;p=libvpx Some minor cleanups for efficiency Implements some of the helper functions more efficiently with lookups rathers than branches. Modeling function is consolidated to reduce some computations. Also merged the two enums BLOCK_SIZE_TYPES and BlockSize into one because there is no need to keep them separate (even though the semantics are a little different). No bitstream or output change. About 0.5% speedup Change-Id: I7d71a66e8031ddb340744dc493f22976052b8f9f --- diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h index e67250b0f..9ec6c187b 100644 --- a/vp9/common/vp9_blockd.h +++ b/vp9/common/vp9_blockd.h @@ -91,22 +91,6 @@ static INLINE int is_inter_mode(MB_PREDICTION_MODE mode) { return mode >= NEARESTMV && mode <= NEWMV; } -// Segment level features. -typedef enum { - TX_4X4 = 0, // 4x4 dct transform - TX_8X8 = 1, // 8x8 dct transform - TX_16X16 = 2, // 16x16 dct transform - TX_32X32 = 3, // 32x32 dct transform - TX_SIZE_MAX_SB, // Number of transforms available to SBs -} TX_SIZE; - -typedef enum { - DCT_DCT = 0, // DCT in both horizontal and vertical - ADST_DCT = 1, // ADST in vertical, DCT in horizontal - DCT_ADST = 2, // DCT in vertical, ADST in horizontal - ADST_ADST = 3 // ADST in both directions -} TX_TYPE; - #define VP9_INTRA_MODES (TM_PRED + 1) #define VP9_INTER_MODES (1 + NEWMV - NEARESTMV) @@ -293,7 +277,7 @@ typedef struct macroblockd { } MACROBLOCKD; -static int *get_sb_index(MACROBLOCKD *xd, BLOCK_SIZE_TYPE subsize) { +static INLINE int *get_sb_index(MACROBLOCKD *xd, BLOCK_SIZE_TYPE subsize) { switch (subsize) { case BLOCK_SIZE_SB64X64: case BLOCK_SIZE_SB64X32: @@ -361,49 +345,8 @@ static INLINE int partition_plane_context(MACROBLOCKD *xd, static BLOCK_SIZE_TYPE get_subsize(BLOCK_SIZE_TYPE bsize, PARTITION_TYPE partition) { - BLOCK_SIZE_TYPE subsize = bsize; - switch (partition) { - case PARTITION_NONE: - break; - case PARTITION_HORZ: - if (bsize == BLOCK_SIZE_SB64X64) - subsize = BLOCK_SIZE_SB64X32; - else if (bsize == BLOCK_SIZE_SB32X32) - subsize = BLOCK_SIZE_SB32X16; - else if (bsize == BLOCK_SIZE_MB16X16) - subsize = BLOCK_SIZE_SB16X8; - else if (bsize == BLOCK_SIZE_SB8X8) - subsize = BLOCK_SIZE_SB8X4; - else - assert(0); - break; - case PARTITION_VERT: - if (bsize == BLOCK_SIZE_SB64X64) - subsize = BLOCK_SIZE_SB32X64; - else if (bsize == BLOCK_SIZE_SB32X32) - subsize = BLOCK_SIZE_SB16X32; - else if (bsize == BLOCK_SIZE_MB16X16) - subsize = BLOCK_SIZE_SB8X16; - else if (bsize == BLOCK_SIZE_SB8X8) - subsize = BLOCK_SIZE_SB4X8; - else - assert(0); - break; - case PARTITION_SPLIT: - if (bsize == BLOCK_SIZE_SB64X64) - subsize = BLOCK_SIZE_SB32X32; - else if (bsize == BLOCK_SIZE_SB32X32) - subsize = BLOCK_SIZE_MB16X16; - else if (bsize == BLOCK_SIZE_MB16X16) - subsize = BLOCK_SIZE_SB8X8; - else if (bsize == BLOCK_SIZE_SB8X8) - subsize = BLOCK_SIZE_AB4X4; - else - assert(0); - break; - default: - assert(0); - } + BLOCK_SIZE_TYPE subsize = subsize_lookup[partition][bsize]; + assert(subsize != BLOCK_SIZE_TYPES); return subsize; } @@ -444,31 +387,10 @@ static void setup_block_dptrs(MACROBLOCKD *xd, int ss_x, int ss_y) { } -static TX_SIZE get_uv_tx_size(const MB_MODE_INFO *mbmi) { +static INLINE TX_SIZE get_uv_tx_size(const MB_MODE_INFO *mbmi) { const TX_SIZE size = mbmi->txfm_size; - - switch (mbmi->sb_type) { - case BLOCK_SIZE_SB64X64: - return size; - case BLOCK_SIZE_SB64X32: - case BLOCK_SIZE_SB32X64: - case BLOCK_SIZE_SB32X32: - if (size == TX_32X32) - return TX_16X16; - else - return size; - case BLOCK_SIZE_SB32X16: - case BLOCK_SIZE_SB16X32: - case BLOCK_SIZE_MB16X16: - if (size == TX_16X16) - return TX_8X8; - else - return size; - default: - return TX_4X4; - } - - return size; + const TX_SIZE max_size = max_uv_txsize_lookup[mbmi->sb_type]; + return (size > max_size ? max_size : size); } struct plane_block_idx { @@ -507,6 +429,16 @@ static INLINE int plane_block_height(BLOCK_SIZE_TYPE bsize, return 4 << (b_height_log2(bsize) - plane->subsampling_y); } +static INLINE int plane_block_width_log2by4( + BLOCK_SIZE_TYPE bsize, const struct macroblockd_plane* plane) { + return (b_width_log2(bsize) - plane->subsampling_x); +} + +static INLINE int plane_block_height_log2by4( + BLOCK_SIZE_TYPE bsize, const struct macroblockd_plane* plane) { + return (b_height_log2(bsize) - plane->subsampling_y); +} + typedef void (*foreach_transformed_block_visitor)(int plane, int block, BLOCK_SIZE_TYPE bsize, int ss_txfrm_size, diff --git a/vp9/common/vp9_common_data.c b/vp9/common/vp9_common_data.c index 57e3d2f58..d5b51e89d 100644 --- a/vp9/common/vp9_common_data.c +++ b/vp9/common/vp9_common_data.c @@ -8,6 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ + +#include "vp9/common/vp9_blockd.h" #include "vp9/common/vp9_common_data.h" // Log 2 conversion lookup tables for block width and height @@ -20,3 +22,57 @@ const int mi_width_log2_lookup[BLOCK_SIZE_TYPES] = {0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3}; const int mi_height_log2_lookup[BLOCK_SIZE_TYPES] = {0, 0, 0, 0, 1, 0, 1, 2, 1, 2, 3, 2, 3}; + +const BLOCK_SIZE_TYPE subsize_lookup[PARTITION_TYPES][BLOCK_SIZE_TYPES] = { + { // PARTITION_NONE + BLOCK_SIZE_AB4X4, BLOCK_SIZE_SB4X8, BLOCK_SIZE_SB8X4, + BLOCK_SIZE_SB8X8, BLOCK_SIZE_SB8X16, BLOCK_SIZE_SB16X8, + BLOCK_SIZE_MB16X16, BLOCK_SIZE_SB16X32, BLOCK_SIZE_SB32X16, + BLOCK_SIZE_SB32X32, BLOCK_SIZE_SB32X64, BLOCK_SIZE_SB64X32, + BLOCK_SIZE_SB64X64, + }, { // PARTITION_HORZ + BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, + BLOCK_SIZE_SB8X4, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, + BLOCK_SIZE_SB16X8, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, + BLOCK_SIZE_SB32X16, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, + BLOCK_SIZE_SB64X32, + }, { // PARTITION_VERT + BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, + BLOCK_SIZE_SB4X8, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, + BLOCK_SIZE_SB8X16, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, + BLOCK_SIZE_SB16X32, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, + BLOCK_SIZE_SB32X64, + }, { // PARTITION_SPLIT + BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, + BLOCK_SIZE_AB4X4, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, + BLOCK_SIZE_SB8X8, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, + BLOCK_SIZE_MB16X16, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, + BLOCK_SIZE_SB32X32, + } +}; + +const TX_SIZE max_txsize_lookup[BLOCK_SIZE_TYPES] = { + TX_4X4, TX_4X4, TX_4X4, + TX_8X8, TX_8X8, TX_8X8, + TX_16X16, TX_16X16, TX_16X16, + TX_32X32, TX_32X32, TX_32X32, TX_32X32 +}; +const TX_SIZE max_uv_txsize_lookup[BLOCK_SIZE_TYPES] = { + TX_4X4, TX_4X4, TX_4X4, + TX_4X4, TX_4X4, TX_4X4, + TX_8X8, TX_8X8, TX_8X8, + TX_16X16, TX_16X16, TX_16X16, TX_32X32 +}; + +const BLOCK_SIZE_TYPE bsize_from_dim_lookup[5][5] = { + {BLOCK_SIZE_AB4X4, BLOCK_SIZE_SB4X8, BLOCK_SIZE_SB4X8, + BLOCK_SIZE_SB4X8, BLOCK_SIZE_SB4X8}, + {BLOCK_SIZE_SB8X4, BLOCK_SIZE_SB8X8, BLOCK_SIZE_SB8X16, + BLOCK_SIZE_SB8X16, BLOCK_SIZE_SB8X16}, + {BLOCK_SIZE_SB16X8, BLOCK_SIZE_SB16X8, BLOCK_SIZE_MB16X16, + BLOCK_SIZE_SB16X32, BLOCK_SIZE_SB16X32}, + {BLOCK_SIZE_SB32X16, BLOCK_SIZE_SB32X16, BLOCK_SIZE_SB32X16, + BLOCK_SIZE_SB32X32, BLOCK_SIZE_SB32X64}, + {BLOCK_SIZE_SB64X32, BLOCK_SIZE_SB64X32, BLOCK_SIZE_SB64X32, + BLOCK_SIZE_SB64X32, BLOCK_SIZE_SB64X64} +}; diff --git a/vp9/common/vp9_common_data.h b/vp9/common/vp9_common_data.h index 7f9f90433..52c314897 100644 --- a/vp9/common/vp9_common_data.h +++ b/vp9/common/vp9_common_data.h @@ -17,5 +17,9 @@ extern const int b_width_log2_lookup[BLOCK_SIZE_TYPES]; extern const int b_height_log2_lookup[BLOCK_SIZE_TYPES]; extern const int mi_width_log2_lookup[BLOCK_SIZE_TYPES]; extern const int mi_height_log2_lookup[BLOCK_SIZE_TYPES]; +extern const BLOCK_SIZE_TYPE subsize_lookup[PARTITION_TYPES][BLOCK_SIZE_TYPES]; +extern const TX_SIZE max_txsize_lookup[BLOCK_SIZE_TYPES]; +extern const TX_SIZE max_uv_txsize_lookup[BLOCK_SIZE_TYPES]; +extern const BLOCK_SIZE_TYPE bsize_from_dim_lookup[5][5]; #endif // VP9_COMMON_VP9_COMMON_DATA_H diff --git a/vp9/common/vp9_enums.h b/vp9/common/vp9_enums.h index dfd6a4a3f..bc739ffbd 100644 --- a/vp9/common/vp9_enums.h +++ b/vp9/common/vp9_enums.h @@ -22,20 +22,20 @@ #define MI_MASK (MI_BLOCK_SIZE - 1) typedef enum BLOCK_SIZE_TYPE { - BLOCK_SIZE_AB4X4, - BLOCK_SIZE_SB4X8, - BLOCK_SIZE_SB8X4, - BLOCK_SIZE_SB8X8, - BLOCK_SIZE_SB8X16, - BLOCK_SIZE_SB16X8, - BLOCK_SIZE_MB16X16, - BLOCK_SIZE_SB16X32, - BLOCK_SIZE_SB32X16, - BLOCK_SIZE_SB32X32, - BLOCK_SIZE_SB32X64, - BLOCK_SIZE_SB64X32, - BLOCK_SIZE_SB64X64, - BLOCK_SIZE_TYPES + BLOCK_SIZE_AB4X4, BLOCK_4X4 = BLOCK_SIZE_AB4X4, + BLOCK_SIZE_SB4X8, BLOCK_4X8 = BLOCK_SIZE_SB4X8, + BLOCK_SIZE_SB8X4, BLOCK_8X4 = BLOCK_SIZE_SB8X4, + BLOCK_SIZE_SB8X8, BLOCK_8X8 = BLOCK_SIZE_SB8X8, + BLOCK_SIZE_SB8X16, BLOCK_8X16 = BLOCK_SIZE_SB8X16, + BLOCK_SIZE_SB16X8, BLOCK_16X8 = BLOCK_SIZE_SB16X8, + BLOCK_SIZE_MB16X16, BLOCK_16X16 = BLOCK_SIZE_MB16X16, + BLOCK_SIZE_SB16X32, BLOCK_16X32 = BLOCK_SIZE_SB16X32, + BLOCK_SIZE_SB32X16, BLOCK_32X16 = BLOCK_SIZE_SB32X16, + BLOCK_SIZE_SB32X32, BLOCK_32X32 = BLOCK_SIZE_SB32X32, + BLOCK_SIZE_SB32X64, BLOCK_32X64 = BLOCK_SIZE_SB32X64, + BLOCK_SIZE_SB64X32, BLOCK_64X32 = BLOCK_SIZE_SB64X32, + BLOCK_SIZE_SB64X64, BLOCK_64X64 = BLOCK_SIZE_SB64X64, + BLOCK_SIZE_TYPES, BLOCK_MAX_SB_SEGMENTS = BLOCK_SIZE_TYPES, } BLOCK_SIZE_TYPE; typedef enum PARTITION_TYPE { @@ -49,4 +49,19 @@ typedef enum PARTITION_TYPE { #define PARTITION_PLOFFSET 4 // number of probability models per block size #define NUM_PARTITION_CONTEXTS (4 * PARTITION_PLOFFSET) +typedef enum { + TX_4X4 = 0, // 4x4 dct transform + TX_8X8 = 1, // 8x8 dct transform + TX_16X16 = 2, // 16x16 dct transform + TX_32X32 = 3, // 32x32 dct transform + TX_SIZE_MAX_SB, // Number of transforms available to SBs +} TX_SIZE; + +typedef enum { + DCT_DCT = 0, // DCT in both horizontal and vertical + ADST_DCT = 1, // ADST in vertical, DCT in horizontal + DCT_ADST = 2, // DCT in vertical, ADST in horizontal + ADST_ADST = 3 // ADST in both directions +} TX_TYPE; + #endif // VP9_COMMON_VP9_ENUMS_H_ diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c index 86ab30c05..55d2a7062 100644 --- a/vp9/encoder/vp9_firstpass.c +++ b/vp9/encoder/vp9_firstpass.c @@ -370,19 +370,6 @@ static void zz_motion_search(VP9_COMP *cpi, MACROBLOCK *x, YV12_BUFFER_CONFIG *r } } -static enum BlockSize get_bs(BLOCK_SIZE_TYPE b) { - switch (b) { - case BLOCK_SIZE_SB8X8: - return BLOCK_8X8; - case BLOCK_SIZE_SB16X8: - return BLOCK_16X8; - case BLOCK_SIZE_SB8X16: - return BLOCK_8X16; - default: - return BLOCK_16X16; - } -} - static void first_pass_motion_search(VP9_COMP *cpi, MACROBLOCK *x, int_mv *ref_mv, MV *best_mv, YV12_BUFFER_CONFIG *recon_buffer, @@ -398,7 +385,7 @@ static void first_pass_motion_search(VP9_COMP *cpi, MACROBLOCK *x, int further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param; int n; vp9_variance_fn_ptr_t v_fn_ptr = - cpi->fn_ptr[get_bs(xd->mode_info_context->mbmi.sb_type)]; + cpi->fn_ptr[xd->mode_info_context->mbmi.sb_type]; int new_mv_mode_penalty = 256; int sr = 0; diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h index 08f6bb62f..6e9fd0f51 100644 --- a/vp9/encoder/vp9_onyx_int.h +++ b/vp9/encoder/vp9_onyx_int.h @@ -278,23 +278,6 @@ typedef struct { int use_rd_breakout; } SPEED_FEATURES; -enum BlockSize { - BLOCK_4X4, - BLOCK_4X8, - BLOCK_8X4, - BLOCK_8X8, - BLOCK_8X16, - BLOCK_16X8, - BLOCK_16X16, - BLOCK_32X32, - BLOCK_32X16, - BLOCK_16X32, - BLOCK_64X32, - BLOCK_32X64, - BLOCK_64X64, - BLOCK_MAX_SB_SEGMENTS, -}; - typedef struct VP9_COMP { DECLARE_ALIGNED(16, int16_t, y_quant[QINDEX_RANGE][8]); @@ -540,7 +523,7 @@ typedef struct VP9_COMP { vp9_full_search_fn_t full_search_sad; vp9_refining_search_fn_t refining_search_sad; vp9_diamond_search_fn_t diamond_search_sad; - vp9_variance_fn_ptr_t fn_ptr[BLOCK_MAX_SB_SEGMENTS]; + vp9_variance_fn_ptr_t fn_ptr[BLOCK_SIZE_TYPES]; uint64_t time_receive_data; uint64_t time_compress_data; uint64_t time_pick_lpf; diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 03b35eb0f..05978d3ea 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -289,77 +289,43 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) { } } -static enum BlockSize get_block_size(int bw, int bh) { - if (bw == 4 && bh == 4) - return BLOCK_4X4; - - if (bw == 4 && bh == 8) - return BLOCK_4X8; - - if (bw == 8 && bh == 4) - return BLOCK_8X4; - - if (bw == 8 && bh == 8) - return BLOCK_8X8; - - if (bw == 8 && bh == 16) - return BLOCK_8X16; - - if (bw == 16 && bh == 8) - return BLOCK_16X8; - - if (bw == 16 && bh == 16) - return BLOCK_16X16; - - if (bw == 32 && bh == 32) - return BLOCK_32X32; - - if (bw == 32 && bh == 16) - return BLOCK_32X16; - - if (bw == 16 && bh == 32) - return BLOCK_16X32; - - if (bw == 64 && bh == 32) - return BLOCK_64X32; - - if (bw == 32 && bh == 64) - return BLOCK_32X64; - - if (bw == 64 && bh == 64) - return BLOCK_64X64; - - assert(0); - return -1; +static INLINE BLOCK_SIZE_TYPE get_block_size(int bwl, int bhl) { + return bsize_from_dim_lookup[bwl][bhl]; } -static enum BlockSize get_plane_block_size(BLOCK_SIZE_TYPE bsize, - struct macroblockd_plane *pd) { - return get_block_size(plane_block_width(bsize, pd), - plane_block_height(bsize, pd)); +static BLOCK_SIZE_TYPE get_plane_block_size(BLOCK_SIZE_TYPE bsize, + struct macroblockd_plane *pd) { + return get_block_size(plane_block_width_log2by4(bsize, pd), + plane_block_height_log2by4(bsize, pd)); } -static double linear_interpolate(double x, int ntab, int inv_step, - const double *tab) { +static inline void linear_interpolate2(double x, int ntab, int inv_step, + const double *tab1, const double *tab2, + double *v1, double *v2) { double y = x * inv_step; int d = (int) y; if (d >= ntab - 1) { - return tab[ntab - 1]; + *v1 = tab1[ntab - 1]; + *v2 = tab2[ntab - 1]; } else { double a = y - d; - return tab[d] * (1 - a) + tab[d + 1] * a; + *v1 = tab1[d] * (1 - a) + tab1[d + 1] * a; + *v2 = tab2[d] * (1 - a) + tab2[d + 1] * a; } } -static double model_rate_norm(double x) { +static void model_rd_norm(double x, double *R, double *D) { + static const int inv_tab_step = 8; + static const int tab_size = 120; + // NOTE: The tables below must be of the same size + // // Normalized rate - // This function models the rate for a Laplacian source + // This table models the rate for a Laplacian source // source with given variance when quantized with a uniform quantizer // with given stepsize. The closed form expression is: // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)], // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance), // and H(x) is the binary entropy function. - static const int inv_rate_tab_step = 8; static const double rate_tab[] = { 64.00, 4.944, 3.949, 3.372, 2.966, 2.655, 2.403, 2.194, 2.014, 1.858, 1.720, 1.596, 1.485, 1.384, 1.291, 1.206, @@ -377,20 +343,13 @@ static double model_rate_norm(double x) { 0.002, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.000, }; - const int rate_tab_num = sizeof(rate_tab)/sizeof(rate_tab[0]); - assert(x >= 0.0); - return linear_interpolate(x, rate_tab_num, inv_rate_tab_step, rate_tab); -} - -static double model_dist_norm(double x) { // Normalized distortion - // This function models the normalized distortion for a Laplacian source + // This table models the normalized distortion for a Laplacian source // source with given variance when quantized with a uniform quantizer // with given stepsize. The closed form expression is: // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2)) // where x = qpstep / sqrt(variance) // Note the actual distortion is Dn * variance. - static const int inv_dist_tab_step = 8; static const double dist_tab[] = { 0.000, 0.001, 0.005, 0.012, 0.021, 0.032, 0.045, 0.061, 0.079, 0.098, 0.119, 0.142, 0.166, 0.190, 0.216, 0.242, @@ -408,9 +367,14 @@ static double model_dist_norm(double x) { 0.998, 0.998, 0.998, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 1.000, }; - const int dist_tab_num = sizeof(dist_tab)/sizeof(dist_tab[0]); + /* + assert(sizeof(rate_tab) == tab_size * sizeof(rate_tab[0]); + assert(sizeof(dist_tab) == tab_size * sizeof(dist_tab[0]); + assert(sizeof(rate_tab) == sizeof(dist_tab)); + */ assert(x >= 0.0); - return linear_interpolate(x, dist_tab_num, inv_dist_tab_step, dist_tab); + linear_interpolate2(x, tab_size, inv_tab_step, + rate_tab, dist_tab, R, D); } static void model_rd_from_var_lapndz(int var, int n, int qstep, @@ -429,14 +393,9 @@ static void model_rd_from_var_lapndz(int var, int n, int qstep, double D, R; double s2 = (double) var / n; double x = qstep / sqrt(s2); - D = model_dist_norm(x); - R = model_rate_norm(x); - if (R < 0) { - R = 0; - D = var; - } - *rate = (n * R * 256 + 0.5); - *dist = (n * D * s2 + 0.5); + model_rd_norm(x, &R, &D); + *rate = ((n << 8) * R + 0.5); + *dist = (var * D + 0.5); } vp9_clear_system_state(); } @@ -454,16 +413,17 @@ static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE_TYPE bsize, struct macroblockd_plane *const pd = &xd->plane[i]; // TODO(dkovalev) the same code in get_plane_block_size - const int bw = plane_block_width(bsize, pd); - const int bh = plane_block_height(bsize, pd); - const enum BlockSize bs = get_block_size(bw, bh); + const int bwl = plane_block_width_log2by4(bsize, pd); + const int bhl = plane_block_height_log2by4(bsize, pd); + const BLOCK_SIZE_TYPE bs = get_block_size(bwl, bhl); unsigned int sse; int rate; int64_t dist; (void) cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride, &sse); // sse works better than var, since there is no dc prediction used - model_rd_from_var_lapndz(sse, bw * bh, pd->dequant[1] >> 3, &rate, &dist); + model_rd_from_var_lapndz(sse, 16 << (bwl + bhl), + pd->dequant[1] >> 3, &rate, &dist); rate_sum += rate; dist_sum += dist; @@ -483,16 +443,17 @@ static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE_TYPE bsize, struct macroblockd_plane *const pd = &xd->plane[0]; // TODO(dkovalev) the same code in get_plane_block_size - const int bw = plane_block_width(bsize, pd); - const int bh = plane_block_height(bsize, pd); - const enum BlockSize bs = get_block_size(bw, bh); + const int bwl = plane_block_width_log2by4(bsize, pd); + const int bhl = plane_block_height_log2by4(bsize, pd); + const BLOCK_SIZE_TYPE bs = get_block_size(bwl, bhl); unsigned int sse; int rate; int64_t dist; (void) cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride, &sse); // sse works better than var, since there is no dc prediction used - model_rd_from_var_lapndz(sse, bw * bh, pd->dequant[1] >> 3, &rate, &dist); + model_rd_from_var_lapndz(sse, 16 << (bwl + bhl), + pd->dequant[1] >> 3, &rate, &dist); *out_rate_sum = rate; *out_dist_sum = dist << 4; @@ -504,11 +465,13 @@ static void model_rd_for_sb_y_tx(VP9_COMP *cpi, BLOCK_SIZE_TYPE bsize, int *out_rate_sum, int64_t *out_dist_sum, int *out_skip) { int t = 4, j, k; - enum BlockSize bs = BLOCK_4X4; + BLOCK_SIZE_TYPE bs = BLOCK_SIZE_AB4X4; struct macroblock_plane *const p = &x->plane[0]; struct macroblockd_plane *const pd = &xd->plane[0]; - const int bw = plane_block_width(bsize, pd); - const int bh = plane_block_height(bsize, pd); + const int bwl = plane_block_width_log2by4(bsize, pd); + const int bhl = plane_block_height_log2by4(bsize, pd); + const int bw = 4 << bwl; + const int bh = 4 << bhl; int rate_sum = 0; int64_t dist_sum = 0; @@ -527,7 +490,7 @@ static void model_rd_for_sb_y_tx(VP9_COMP *cpi, BLOCK_SIZE_TYPE bsize, } else { assert(0); } - assert(bs <= get_block_size(bw, bh)); + assert(bs <= get_block_size(bwl, bhl)); *out_skip = 1; for (j = 0; j < bh; j+=t) { for (k = 0; k < bw; k+=t) { @@ -772,10 +735,10 @@ static int rdcost_uv(VP9_COMMON *const cm, MACROBLOCK *x, static int block_error_sby(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int shift, int64_t *sse) { struct macroblockd_plane *p = &x->e_mbd.plane[0]; - const int bw = plane_block_width(bsize, p); - const int bh = plane_block_height(bsize, p); + const int bwl = plane_block_width_log2by4(bsize, p); + const int bhl = plane_block_height_log2by4(bsize, p); int64_t e = vp9_block_error(x->plane[0].coeff, x->e_mbd.plane[0].dqcoeff, - bw * bh, sse) >> shift; + 16 << (bwl + bhl), sse) >> shift; *sse >>= shift; return e; } @@ -788,10 +751,10 @@ static int64_t block_error_sbuv(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, *sse = 0; for (plane = 1; plane < MAX_MB_PLANE; plane++) { struct macroblockd_plane *p = &x->e_mbd.plane[plane]; - const int bw = plane_block_width(bsize, p); - const int bh = plane_block_height(bsize, p); + const int bwl = plane_block_width_log2by4(bsize, p); + const int bhl = plane_block_height_log2by4(bsize, p); sum += vp9_block_error(x->plane[plane].coeff, x->e_mbd.plane[plane].dqcoeff, - bw * bh, &this_sse); + 16 << (bwl + bhl), &this_sse); *sse += this_sse; } *sse >>= shift; @@ -850,6 +813,7 @@ static void super_block_yrd_for_txfm(VP9_COMMON *const cm, MACROBLOCK *x, static void choose_largest_txfm_size(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int64_t *distortion, int *skip, int64_t *sse, + int64_t ref_best_rd, BLOCK_SIZE_TYPE bs) { const TX_SIZE max_txfm_size = TX_32X32 - (bs < BLOCK_SIZE_SB32X32) - (bs < BLOCK_SIZE_MB16X16); @@ -871,7 +835,7 @@ static void choose_largest_txfm_size(VP9_COMP *cpi, MACROBLOCK *x, mbmi->txfm_size = TX_4X4; } super_block_yrd_for_txfm(cm, x, rate, distortion, skip, - &sse[mbmi->txfm_size], INT64_MAX, bs, + &sse[mbmi->txfm_size], ref_best_rd, bs, mbmi->txfm_size); cpi->txfm_stepdown_count[0]++; } @@ -984,6 +948,7 @@ static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x, int (*r)[2], int *rate, int64_t *d, int64_t *distortion, int *s, int *skip, int64_t *sse, + int64_t ref_best_rd, BLOCK_SIZE_TYPE bs, int *model_used) { const TX_SIZE max_txfm_size = TX_32X32 @@ -1058,7 +1023,7 @@ static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x, // Actually encode using the chosen mode if a model was used, but do not // update the r, d costs super_block_yrd_for_txfm(cm, x, rate, distortion, skip, - &sse[mbmi->txfm_size], INT64_MAX, + &sse[mbmi->txfm_size], ref_best_rd, bs, mbmi->txfm_size); } else { *distortion = d[mbmi->txfm_size]; @@ -1101,7 +1066,8 @@ static void super_block_yrd(VP9_COMP *cpi, (cpi->sf.tx_size_search_method != USE_FULL_RD && mbmi->ref_frame[0] == INTRA_FRAME)) { vpx_memset(txfm_cache, 0, NB_TXFM_MODES * sizeof(int64_t)); - choose_largest_txfm_size(cpi, x, rate, distortion, skip, sse, bs); + choose_largest_txfm_size(cpi, x, rate, distortion, skip, sse, + ref_best_rd, bs); if (psse) *psse = sse[mbmi->txfm_size]; return; @@ -1145,7 +1111,7 @@ static void super_block_yrd(VP9_COMP *cpi, &sse[TX_4X4], INT64_MAX, bs, TX_4X4); } choose_txfm_size_from_modelrd(cpi, x, r, rate, d, distortion, s, - skip, sse, bs, model_used); + skip, sse, ref_best_rd, bs, model_used); } else { if (bs >= BLOCK_SIZE_SB32X32) super_block_yrd_for_txfm(cm, x, &r[TX_32X32][0], &d[TX_32X32], @@ -1666,8 +1632,10 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi, VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *xd = &x->e_mbd; BLOCK_SIZE_TYPE bsize = xd->mode_info_context->mbmi.sb_type; - const int bw = plane_block_width(bsize, &xd->plane[0]); - const int bh = plane_block_height(bsize, &xd->plane[0]); + const int bwl = plane_block_width_log2by4(bsize, &xd->plane[0]); + const int bhl = plane_block_height_log2by4(bsize, &xd->plane[0]); + const int bw = 4 << bwl; + const int bh = 4 << bhl; int idx, idy; const int src_stride = x->plane[0].src.stride; uint8_t* const src = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i, @@ -1706,22 +1674,6 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi, &xd->subpix, MV_PRECISION_Q3); } - // Turning this section off for now since it hurts quality and does not - // improve speed much - /* - if (cpi->sf.use_rd_breakout && - best_yrd < INT64_MAX) { - int64_t thisrd; - model_rd_for_sb_y(cpi, bsize, x, xd, &thisrate, &thisdistortion); - thisrd = RDCOST(x->rdmult, x->rddiv, thisrate, thisdistortion); - if (thisrd / 2 > best_yrd) { - *distortion = thisdistortion; - *labelyrate = thisrate; - return thisrd; - } - } - */ - vp9_subtract_block(bh, bw, src_diff, 8, src, src_stride, dst, xd->plane[0].dst.stride); @@ -1827,7 +1779,7 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, vpx_memcpy(t_above, x->e_mbd.plane[0].above_context, sizeof(t_above)); vpx_memcpy(t_left, x->e_mbd.plane[0].left_context, sizeof(t_left)); - v_fn_ptr = &cpi->fn_ptr[get_block_size(4 << bwl, 4 << bhl)]; + v_fn_ptr = &cpi->fn_ptr[get_block_size(bwl, bhl)]; // 64 makes this threshold really big effectively // making it so that we very rarely check mvs on @@ -2109,7 +2061,7 @@ static int64_t rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x, static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x, uint8_t *ref_y_buffer, int ref_y_stride, - int ref_frame, enum BlockSize block_size ) { + int ref_frame, BLOCK_SIZE_TYPE block_size ) { MACROBLOCKD *xd = &x->e_mbd; MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; int_mv this_mv; @@ -2274,7 +2226,7 @@ static void setup_pred_block(const MACROBLOCKD *xd, static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, int idx, MV_REFERENCE_FRAME frame_type, - enum BlockSize block_size, + BLOCK_SIZE_TYPE block_size, int mi_row, int mi_col, int_mv frame_nearest_mv[MAX_REF_FRAMES], int_mv frame_near_mv[MAX_REF_FRAMES], @@ -2352,7 +2304,7 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, int_mv mvp_full; int ref = mbmi->ref_frame[0]; int_mv ref_mv = mbmi->ref_mvs[ref][0]; - const enum BlockSize block_size = get_plane_block_size(bsize, &xd->plane[0]); + const BLOCK_SIZE_TYPE block_size = get_plane_block_size(bsize, &xd->plane[0]); int tmp_col_min = x->mv_col_min; int tmp_col_max = x->mv_col_max; @@ -2434,7 +2386,7 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, int refs[2] = { mbmi->ref_frame[0], (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) }; int_mv ref_mv[2]; - const enum BlockSize block_size = get_plane_block_size(bsize, &xd->plane[0]); + const BLOCK_SIZE_TYPE block_size = get_plane_block_size(bsize, &xd->plane[0]); int ite; // Prediction buffer from second frame. uint8_t *second_pred = vpx_memalign(16, pw * ph * sizeof(uint8_t)); @@ -2841,8 +2793,8 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, if (cpi->active_map_enabled && x->active_ptr[0] == 0) x->skip = 1; else if (x->encode_breakout) { - const enum BlockSize y_size = get_plane_block_size(bsize, &xd->plane[0]); - const enum BlockSize uv_size = get_plane_block_size(bsize, &xd->plane[1]); + const BLOCK_SIZE_TYPE y_size = get_plane_block_size(bsize, &xd->plane[0]); + const BLOCK_SIZE_TYPE uv_size = get_plane_block_size(bsize, &xd->plane[1]); unsigned int var, sse; int threshold = (xd->plane[0].dequant[1] * xd->plane[0].dequant[1] >> 4); @@ -3001,7 +2953,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, VP9_COMMON *cm = &cpi->common; MACROBLOCKD *xd = &x->e_mbd; MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; - const enum BlockSize block_size = get_plane_block_size(bsize, &xd->plane[0]); + const BLOCK_SIZE_TYPE block_size = get_plane_block_size(bsize, &xd->plane[0]); MB_PREDICTION_MODE this_mode; MV_REFERENCE_FRAME ref_frame; unsigned char segment_id = xd->mode_info_context->mbmi.segment_id;