From 78bcc48756c62123c4f08d7cfcdbdca49d554d5a Mon Sep 17 00:00:00 2001 From: Deb Mukherjee Date: Mon, 9 Mar 2015 11:41:59 -0700 Subject: [PATCH] Make filterintra experiment work with highbitdepth All stats look fine. derflr: +0.912 with respect to 10-bit internal baseline (Was +0.747% w.r.t. 8 bit) +5.545 with respect to 8-bit baseline Change-Id: I3c14fd17718a640ea2f6bd39534e0b5cbe04fb66 --- vp9/common/vp9_reconintra.c | 368 ++++++++++++++++++++++++++++-------- vp9/decoder/vp9_decodemv.c | 12 +- vp9/encoder/vp9_encodemb.c | 319 +++++++++++++++---------------- vp9/encoder/vp9_pickmode.c | 2 +- vp9/encoder/vp9_rdopt.c | 26 +++ 5 files changed, 474 insertions(+), 253 deletions(-) diff --git a/vp9/common/vp9_reconintra.c b/vp9/common/vp9_reconintra.c index a4fa9d8ad..ded2701da 100644 --- a/vp9/common/vp9_reconintra.c +++ b/vp9/common/vp9_reconintra.c @@ -657,19 +657,73 @@ void vp9_init_intra_predictors() { #undef intra_pred_allsizes } +#if CONFIG_FILTERINTRA +static const int taps4_4[10][4] = { + {735, 881, -537, -54}, + {1005, 519, -488, -11}, + {383, 990, -343, -6}, + {442, 805, -542, 319}, + {658, 616, -133, -116}, + {875, 442, -141, -151}, + {386, 741, -23, -80}, + {390, 1027, -446, 51}, + {679, 606, -523, 262}, + {903, 922, -778, -23} +}; + +static const int taps4_8[10][4] = { + {648, 803, -444, 16}, + {972, 620, -576, 7}, + {561, 967, -499, -5}, + {585, 762, -468, 144}, + {596, 619, -182, -9}, + {895, 459, -176, -153}, + {557, 722, -126, -129}, + {601, 839, -523, 105}, + {562, 709, -499, 251}, + {803, 872, -695, 43} +}; + +static const int taps4_16[10][4] = { + {423, 728, -347, 111}, + {963, 685, -665, 23}, + {281, 1024, -480, 216}, + {640, 596, -437, 78}, + {429, 669, -259, 99}, + {740, 646, -415, 23}, + {568, 771, -346, 40}, + {404, 833, -486, 209}, + {398, 712, -423, 307}, + {939, 935, -887, 17} +}; + +static const int taps4_32[10][4] = { + {477, 737, -393, 150}, + {881, 630, -546, 67}, + {506, 984, -443, -20}, + {114, 459, -270, 528}, + {433, 528, 14, 3}, + {837, 470, -301, -30}, + {181, 777, 89, -107}, + {-29, 716, -232, 259}, + {589, 646, -495, 255}, + {740, 884, -728, 77} +}; +#endif // CONFIG_FILTERINTRA + #if CONFIG_VP9_HIGHBITDEPTH -static void build_intra_predictors_high(const MACROBLOCKD *xd, - const uint8_t *ref8, - int ref_stride, - uint8_t *dst8, - int dst_stride, - PREDICTION_MODE mode, - TX_SIZE tx_size, - int up_available, - int left_available, - int right_available, - int x, int y, - int plane, int bd) { +static void build_intra_predictors_highbd(const MACROBLOCKD *xd, + const uint8_t *ref8, + int ref_stride, + uint8_t *dst8, + int dst_stride, + PREDICTION_MODE mode, + TX_SIZE tx_size, + int up_available, + int left_available, + int right_available, + int x, int y, + int plane, int bd) { int i; uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); @@ -926,13 +980,195 @@ static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref, } #if CONFIG_FILTERINTRA +#if CONFIG_VP9_HIGHBITDEPTH +static void filter_intra_predictors_4tap_highbd(uint16_t *ypred_ptr, + int y_stride, + int bs, + const uint16_t *yabove_row, + const uint16_t *yleft_col, + int mode, int bd) { + const int prec_bits = 10; + const int round_val = (1 << (prec_bits - 1)) - 1; + + int k, r, c; +#if CONFIG_TX64X64 + int pred[65][129]; +#else + int pred[33][65]; +#endif + int mean, ipred; + + const int c1 = (bs >= 32) ? taps4_32[mode][0] : ((bs >= 16) ? + taps4_16[mode][0] : ((bs >= 8) ? taps4_8[mode][0] : taps4_4[mode][0])); + const int c2 = (bs >= 32) ? taps4_32[mode][1] : ((bs >= 16) ? + taps4_16[mode][1] : ((bs >= 8) ? taps4_8[mode][1] : taps4_4[mode][1])); + const int c3 = (bs >= 32) ? taps4_32[mode][2] : ((bs >= 16) ? + taps4_16[mode][2] : ((bs >= 8) ? taps4_8[mode][2] : taps4_4[mode][2])); + const int c4 = (bs >= 32) ? taps4_32[mode][3] : ((bs >= 16) ? + taps4_16[mode][3] : ((bs >= 8) ? taps4_8[mode][3] : taps4_4[mode][3])); + + k = 0; + mean = 0; + while (k < bs) { + mean = mean + (int)yleft_col[k]; + mean = mean + (int)yabove_row[k]; + k++; + } + mean = (mean + bs) / (2 * bs); + + for (r = 0; r < bs; r++) + pred[r + 1][0] = (int)yleft_col[r] - mean; + + for (c = 0; c < 2 * bs + 1; c++) + pred[0][c] = (int)yabove_row[c - 1] - mean; + + for (r = 1; r < bs + 1; r++) + for (c = 1; c < 2 * bs + 1 - r; c++) { + ipred = c1 * pred[r - 1][c] + c2 * pred[r][c - 1] + + c3 * pred[r - 1][c - 1] + c4 * pred[r - 1][c + 1]; + pred[r][c] = ipred < 0 ? -((-ipred + round_val) >> prec_bits) : + ((ipred + round_val) >> prec_bits); + } + + for (r = 0; r < bs; r++) { + for (c = 0; c < bs; c++) { + ipred = pred[r + 1][c + 1] + mean; + ypred_ptr[c] = clip_pixel_highbd(ipred, bd); + } + ypred_ptr += y_stride; + } +} + +static void build_filter_intra_predictors_highbd( + const MACROBLOCKD *xd, + const uint8_t *ref8, int ref_stride, + uint8_t *dst8, int dst_stride, + PREDICTION_MODE mode, TX_SIZE tx_size, + int up_available, int left_available, + int right_available, int x, int y, + int plane, int bd) { + int i; + uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); + uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); + DECLARE_ALIGNED_ARRAY(16, uint16_t, left_col, 64); +#if CONFIG_TX64X64 + DECLARE_ALIGNED_ARRAY(16, uint16_t, above_data, 256 + 16); +#else + DECLARE_ALIGNED_ARRAY(16, uint16_t, above_data, 128 + 16); +#endif + uint16_t *above_row = above_data + 16; + const uint16_t *const_above_row = above_row; + const int bs = 4 << tx_size; + int frame_width, frame_height; + int x0, y0; + const struct macroblockd_plane *const pd = &xd->plane[plane]; + int base = 128 << (bd - 8); + + // Get current frame pointer, width and height. + if (plane == 0) { + frame_width = xd->cur_buf->y_width; + frame_height = xd->cur_buf->y_height; + } else { + frame_width = xd->cur_buf->uv_width; + frame_height = xd->cur_buf->uv_height; + } + + // Get block position in current frame. + x0 = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)) + x; + y0 = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)) + y; + + // left + if (left_available) { + if (xd->mb_to_bottom_edge < 0) { + /* slower path if the block needs border extension */ + if (y0 + bs <= frame_height) { + for (i = 0; i < bs; ++i) + left_col[i] = ref[i * ref_stride - 1]; + } else { + const int extend_bottom = frame_height - y0; + for (i = 0; i < extend_bottom; ++i) + left_col[i] = ref[i * ref_stride - 1]; + for (; i < bs; ++i) + left_col[i] = ref[(extend_bottom - 1) * ref_stride - 1]; + } + } else { + /* faster path if the block does not need extension */ + for (i = 0; i < bs; ++i) + left_col[i] = ref[i * ref_stride - 1]; + } + } else { + vpx_memset16(left_col, base + 1, bs); + } + + if (up_available) { + const uint16_t *above_ref = ref - ref_stride; + if (xd->mb_to_right_edge < 0) { + /* slower path if the block needs border extension */ + if (x0 + 2 * bs <= frame_width) { + if (right_available && bs == 4) { + vpx_memcpy(above_row, above_ref, 2 * bs * sizeof(uint16_t)); + } else { + vpx_memcpy(above_row, above_ref, bs * sizeof(uint16_t)); + vpx_memset16(above_row + bs, above_row[bs - 1], bs); + } + } else if (x0 + bs <= frame_width) { + const int r = frame_width - x0; + if (right_available && bs == 4) { + vpx_memcpy(above_row, above_ref, r * sizeof(uint16_t)); + vpx_memset16(above_row + r, above_row[r - 1], + x0 + 2 * bs - frame_width); + } else { + vpx_memcpy(above_row, above_ref, bs * sizeof(uint16_t)); + vpx_memset16(above_row + bs, above_row[bs - 1], bs); + } + } else if (x0 <= frame_width) { + const int r = frame_width - x0; + if (right_available && bs == 4) { + vpx_memcpy(above_row, above_ref, r * sizeof(uint16_t)); + vpx_memset16(above_row + r, above_row[r - 1], + x0 + 2 * bs - frame_width); + } else { + vpx_memcpy(above_row, above_ref, r * sizeof(uint16_t)); + vpx_memset16(above_row + r, above_row[r - 1], + x0 + 2 * bs - frame_width); + } + } + // TODO(Peter) this value should probably change for high bitdepth + above_row[-1] = left_available ? above_ref[-1] : (base + 1); + } else { + /* faster path if the block does not need extension */ + if (bs == 4 && right_available && left_available) { + const_above_row = above_ref; + } else { + vpx_memcpy(above_row, above_ref, bs * sizeof(uint16_t)); + if (bs == 4 && right_available) + vpx_memcpy(above_row + bs, above_ref + bs, bs * sizeof(uint16_t)); + else + vpx_memset16(above_row + bs, above_row[bs - 1], bs); + // TODO(Peter): this value should probably change for high bitdepth + above_row[-1] = left_available ? above_ref[-1] : (base+1); + } + } + } else { + vpx_memset16(above_row, base - 1, bs * 2); + // TODO(Peter): this value should probably change for high bitdepth + above_row[-1] = base - 1; + } + + // predict + filter_intra_predictors_4tap_highbd(dst, dst_stride, bs, + const_above_row, left_col, + mode, bd); +} +#endif // CONFIG_VP9_HIGHBITDEPTH + static void filter_intra_predictors_4tap(uint8_t *ypred_ptr, int y_stride, int bs, const uint8_t *yabove_row, const uint8_t *yleft_col, int mode) { - static const int prec_bits = 10; - static const int round_val = 511; + const int prec_bits = 10; + const int round_val = (1 << (prec_bits - 1)) - 1; int k, r, c; #if CONFIG_TX64X64 @@ -942,55 +1178,6 @@ static void filter_intra_predictors_4tap(uint8_t *ypred_ptr, int y_stride, #endif int mean, ipred; - int taps4_4[10][4] = { - {735, 881, -537, -54}, - {1005, 519, -488, -11}, - {383, 990, -343, -6}, - {442, 805, -542, 319}, - {658, 616, -133, -116}, - {875, 442, -141, -151}, - {386, 741, -23, -80}, - {390, 1027, -446, 51}, - {679, 606, -523, 262}, - {903, 922, -778, -23} - }; - int taps4_8[10][4] = { - {648, 803, -444, 16}, - {972, 620, -576, 7}, - {561, 967, -499, -5}, - {585, 762, -468, 144}, - {596, 619, -182, -9}, - {895, 459, -176, -153}, - {557, 722, -126, -129}, - {601, 839, -523, 105}, - {562, 709, -499, 251}, - {803, 872, -695, 43} - }; - int taps4_16[10][4] = { - {423, 728, -347, 111}, - {963, 685, -665, 23}, - {281, 1024, -480, 216}, - {640, 596, -437, 78}, - {429, 669, -259, 99}, - {740, 646, -415, 23}, - {568, 771, -346, 40}, - {404, 833, -486, 209}, - {398, 712, -423, 307}, - {939, 935, -887, 17} - }; - int taps4_32[10][4] = { - {477, 737, -393, 150}, - {881, 630, -546, 67}, - {506, 984, -443, -20}, - {114, 459, -270, 528}, - {433, 528, 14, 3}, - {837, 470, -301, -30}, - {181, 777, 89, -107}, - {-29, 716, -232, 259}, - {589, 646, -495, 255}, - {740, 884, -728, 77} - }; - const int c1 = (bs >= 32) ? taps4_32[mode][0] : ((bs >= 16) ? taps4_16[mode][0] : ((bs >= 8) ? taps4_8[mode][0] : taps4_4[mode][0])); const int c2 = (bs >= 32) ? taps4_32[mode][1] : ((bs >= 16) ? @@ -1066,8 +1253,6 @@ static void build_filter_intra_predictors(const MACROBLOCKD *xd, x0 = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)) + x; y0 = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)) + y; - vpx_memset(left_col, 129, 64); - // left if (left_available) { if (xd->mb_to_bottom_edge < 0) { @@ -1087,6 +1272,8 @@ static void build_filter_intra_predictors(const MACROBLOCKD *xd, for (i = 0; i < bs; ++i) left_col[i] = ref[i * ref_stride - 1]; } + } else { + vpx_memset(left_col, 129, 64); } // TODO(hkuang) do not extend 2*bs pixels for all modes. @@ -1147,7 +1334,7 @@ static void build_filter_intra_predictors(const MACROBLOCKD *xd, filter_intra_predictors_4tap(dst, dst_stride, bs, const_above_row, left_col, mode); } -#endif +#endif // CONFIG_FILTERINTRA void vp9_predict_intra_block(const MACROBLOCKD *xd, int block_idx, int bwl_in, @@ -1173,36 +1360,49 @@ void vp9_predict_intra_block(const MACROBLOCKD *xd, int block_idx, int bwl_in, assert(bwl >= 0); #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - build_intra_predictors_high(xd, ref, ref_stride, dst, dst_stride, mode, - tx_size, have_top, have_left, have_right, - x, y, plane, xd->bd); +#if CONFIG_FILTERINTRA + if (!filterflag) { +#endif + build_intra_predictors_highbd(xd, ref, ref_stride, dst, dst_stride, + mode, tx_size, have_top, + have_left, have_right, x, y, + plane, xd->bd); +#if CONFIG_FILTERINTRA + } else { + build_filter_intra_predictors_highbd(xd, ref, ref_stride, dst, dst_stride, + mode, tx_size, have_top, + have_left, have_right, x, y, + plane, xd->bd); + } +#endif return; } -#endif +#endif // CONFIG_VP9_HIGHBITDEPTH #if CONFIG_FILTERINTRA if (!filterflag) { #endif // CONFIG_FILTERINTRA #if CONFIG_PALETTE - if (xd->mi[0].src_mi->mbmi.palette_enabled && !plane) { - uint8_t *palette = xd->mi[0].src_mi->mbmi.palette_colors; - int bs = 4 * (1 << tx_size); - uint8_t *map = xd->plane[0].color_index_map; - int r, c, stride = 4 * (1 << bwl_in); - - for (r = 0; r < bs; r++) { - for (c = 0; c < bs; c++) { - dst[r * dst_stride + c] = palette[map[(r + y) * stride + c + x]]; + if (xd->mi[0].src_mi->mbmi.palette_enabled && !plane) { + uint8_t *palette = xd->mi[0].src_mi->mbmi.palette_colors; + int bs = 4 * (1 << tx_size); + uint8_t *map = xd->plane[0].color_index_map; + int r, c, stride = 4 * (1 << bwl_in); + + for (r = 0; r < bs; r++) { + for (c = 0; c < bs; c++) { + dst[r * dst_stride + c] = palette[map[(r + y) * stride + c + x]]; + } } + return; } - return; - } #endif // CONFIG_PALETTE - build_intra_predictors(xd, ref, ref_stride, dst, dst_stride, mode, tx_size, - have_top, have_left, have_right, x, y, plane); + build_intra_predictors(xd, ref, ref_stride, dst, dst_stride, mode, tx_size, + have_top, have_left, have_right, x, y, plane); #if CONFIG_FILTERINTRA } else { build_filter_intra_predictors(xd, ref, ref_stride, dst, dst_stride, mode, - tx_size, have_top, have_left, have_right, x, y, plane); + tx_size, have_top, have_left, have_right, + x, y, plane); } #endif } diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c index 1d3aaf3e7..1d93c1886 100644 --- a/vp9/decoder/vp9_decodemv.c +++ b/vp9/decoder/vp9_decodemv.c @@ -391,10 +391,10 @@ static void read_intra_frame_mode_info(VP9_COMMON *const cm, switch (bsize) { case BLOCK_4X4: -#if !CONFIG_FILTERINTRA - for (i = 0; i < 4; ++i) -#else +#if CONFIG_FILTERINTRA for (i = 0; i < 4; ++i) { +#else + for (i = 0; i < 4; ++i) #endif mi->bmi[i].as_mode = read_intra_mode(r, get_y_mode_probs(mi, above_mi, left_mi, i)); @@ -619,10 +619,10 @@ static void read_intra_block_mode_info(VP9_COMMON *const cm, MODE_INFO *mi, switch (bsize) { case BLOCK_4X4: -#if !CONFIG_FILTERINTRA - for (i = 0; i < 4; ++i) -#else +#if CONFIG_FILTERINTRA for (i = 0; i < 4; ++i) { +#else + for (i = 0; i < 4; ++i) #endif mi->bmi[i].as_mode = read_intra_mode_y(cm, r, 0); #if CONFIG_FILTERINTRA diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index 341dedda4..4f14490aa 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -1572,181 +1572,181 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, src = &p->src.buf[4 * (j * src_stride + i)]; src_diff = &p->src_diff[4 * (j * diff_stride + i)]; -#if CONFIG_TX_SKIP - if (mbmi->tx_skip[plane != 0]) { - int shift = mbmi->tx_skip_shift; #if CONFIG_FILTERINTRA - if (mbmi->sb_type < BLOCK_8X8 && plane == 0) - fbit = xd->mi[0].b_filter_info[block]; - else - fbit = plane == 0 ? mbmi->filterbit : mbmi->uv_filterbit; -#endif // CONFIG_FILTERINTRA - switch (tx_size) { + if (mbmi->sb_type < BLOCK_8X8 && plane == 0) + fbit = xd->mi[0].b_filter_info[block]; + else + fbit = plane == 0 ? mbmi->filterbit : mbmi->uv_filterbit; +#endif +#if CONFIG_TX_SKIP + if (mbmi->tx_skip[plane != 0]) { + int shift = mbmi->tx_skip_shift; + switch (tx_size) { #if CONFIG_TX64X64 - case TX_64X64: - assert(plane == 0); - scan_order = &vp9_default_scan_orders[TX_64X64]; - mode = plane == 0 ? mbmi->mode : mbmi->uv_mode; - vp9_predict_intra_block(xd, block >> 8, bwl, TX_64X64, mode, + case TX_64X64: + assert(plane == 0); + scan_order = &vp9_default_scan_orders[TX_64X64]; + mode = plane == 0 ? mbmi->mode : mbmi->uv_mode; + vp9_predict_intra_block(xd, block >> 8, bwl, TX_64X64, mode, #if CONFIG_FILTERINTRA - fbit, + fbit, #endif - x->skip_encode ? src : dst, - x->skip_encode ? src_stride : dst_stride, - dst, dst_stride, i, j, plane); - if (!x->skip_recode) { - vp9_subtract_block(64, 64, src_diff, diff_stride, - src, src_stride, dst, dst_stride); - vp9_tx_identity(src_diff, coeff, diff_stride, 64, shift); - vp9_quantize_b_64x64(coeff, 4096, x->skip_block, p->zbin, - p->round, p->quant, p->quant_shift, qcoeff, - dqcoeff, pd->dequant, eob, - scan_order->scan, scan_order->iscan); - } - if (!x->skip_encode && *eob) - vp9_tx_identity_add(dqcoeff, dst, dst_stride, 64, shift); - break; + x->skip_encode ? src : dst, + x->skip_encode ? src_stride : dst_stride, + dst, dst_stride, i, j, plane); + if (!x->skip_recode) { + vp9_subtract_block(64, 64, src_diff, diff_stride, + src, src_stride, dst, dst_stride); + vp9_tx_identity(src_diff, coeff, diff_stride, 64, shift); + vp9_quantize_b_64x64(coeff, 4096, x->skip_block, p->zbin, + p->round, p->quant, p->quant_shift, qcoeff, + dqcoeff, pd->dequant, eob, + scan_order->scan, scan_order->iscan); + } + if (!x->skip_encode && *eob) + vp9_tx_identity_add(dqcoeff, dst, dst_stride, 64, shift); + break; #endif // CONFIG_TX64X64 - case TX_32X32: - scan_order = &vp9_default_scan_orders[TX_32X32]; - mode = plane == 0 ? mbmi->mode : mbmi->uv_mode; - vp9_predict_intra_block(xd, block >> 6, bwl, TX_32X32, mode, + case TX_32X32: + scan_order = &vp9_default_scan_orders[TX_32X32]; + mode = plane == 0 ? mbmi->mode : mbmi->uv_mode; + vp9_predict_intra_block(xd, block >> 6, bwl, TX_32X32, mode, #if CONFIG_FILTERINTRA - fbit, + fbit, #endif - x->skip_encode ? src : dst, - x->skip_encode ? src_stride : dst_stride, - dst, dst_stride, i, j, plane); - - if (!x->skip_recode) { - if (mode == V_PRED || mode == H_PRED || mode == TM_PRED) { - *eob = vp9_dpcm_intra(src, src_stride, dst, dst_stride, - src_diff, diff_stride, - coeff, qcoeff, dqcoeff, p, pd, - scan_order, mode, 32, shift, 0); - break; - } - - vp9_subtract_block(32, 32, src_diff, diff_stride, - src, src_stride, dst, dst_stride); - vp9_tx_identity(src_diff, coeff, diff_stride, 32, shift); - - vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, - p->round, p->quant, p->quant_shift, qcoeff, - dqcoeff, pd->dequant, eob, - scan_order->scan, scan_order->iscan); - } - if (!x->skip_encode && *eob) { - vp9_tx_identity_add(dqcoeff, dst, dst_stride, 32, shift); - } + x->skip_encode ? src : dst, + x->skip_encode ? src_stride : dst_stride, + dst, dst_stride, i, j, plane); + + if (!x->skip_recode) { + if (mode == V_PRED || mode == H_PRED || mode == TM_PRED) { + *eob = vp9_dpcm_intra(src, src_stride, dst, dst_stride, + src_diff, diff_stride, + coeff, qcoeff, dqcoeff, p, pd, + scan_order, mode, 32, shift, 0); break; - case TX_16X16: - tx_type = get_tx_type(pd->plane_type, xd); - scan_order = &vp9_scan_orders[TX_16X16][tx_type]; - mode = plane == 0 ? mbmi->mode : mbmi->uv_mode; - vp9_predict_intra_block(xd, block >> 4, bwl, TX_16X16, mode, + } + + vp9_subtract_block(32, 32, src_diff, diff_stride, + src, src_stride, dst, dst_stride); + vp9_tx_identity(src_diff, coeff, diff_stride, 32, shift); + + vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, + p->round, p->quant, p->quant_shift, qcoeff, + dqcoeff, pd->dequant, eob, + scan_order->scan, scan_order->iscan); + } + if (!x->skip_encode && *eob) { + vp9_tx_identity_add(dqcoeff, dst, dst_stride, 32, shift); + } + break; + case TX_16X16: + tx_type = get_tx_type(pd->plane_type, xd); + scan_order = &vp9_scan_orders[TX_16X16][tx_type]; + mode = plane == 0 ? mbmi->mode : mbmi->uv_mode; + vp9_predict_intra_block(xd, block >> 4, bwl, TX_16X16, mode, #if CONFIG_FILTERINTRA - fbit, + fbit, #endif - x->skip_encode ? src : dst, - x->skip_encode ? src_stride : dst_stride, - dst, dst_stride, i, j, plane); - if (!x->skip_recode) { - if (mode == V_PRED || mode == H_PRED || mode == TM_PRED) { - *eob = vp9_dpcm_intra(src, src_stride, dst, dst_stride, - src_diff, diff_stride, - coeff, qcoeff, dqcoeff, p, pd, - scan_order, mode, 16, shift, -1); - break; - } - - vp9_subtract_block(16, 16, src_diff, diff_stride, - src, src_stride, dst, dst_stride); - vp9_tx_identity(src_diff, coeff, diff_stride, 16, shift); - vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, - p->quant, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, eob, scan_order->scan, - scan_order->iscan); - } - if (!x->skip_encode && *eob) { - vp9_tx_identity_add(dqcoeff, dst, dst_stride, 16, shift); - } + x->skip_encode ? src : dst, + x->skip_encode ? src_stride : dst_stride, + dst, dst_stride, i, j, plane); + if (!x->skip_recode) { + if (mode == V_PRED || mode == H_PRED || mode == TM_PRED) { + *eob = vp9_dpcm_intra(src, src_stride, dst, dst_stride, + src_diff, diff_stride, + coeff, qcoeff, dqcoeff, p, pd, + scan_order, mode, 16, shift, -1); break; - case TX_8X8: - tx_type = get_tx_type(pd->plane_type, xd); - scan_order = &vp9_scan_orders[TX_8X8][tx_type]; - mode = plane == 0 ? mbmi->mode : mbmi->uv_mode; - vp9_predict_intra_block(xd, block >> 2, bwl, TX_8X8, mode, + } + + vp9_subtract_block(16, 16, src_diff, diff_stride, + src, src_stride, dst, dst_stride); + vp9_tx_identity(src_diff, coeff, diff_stride, 16, shift); + vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, + p->quant, p->quant_shift, qcoeff, dqcoeff, + pd->dequant, eob, scan_order->scan, + scan_order->iscan); + } + if (!x->skip_encode && *eob) { + vp9_tx_identity_add(dqcoeff, dst, dst_stride, 16, shift); + } + break; + case TX_8X8: + tx_type = get_tx_type(pd->plane_type, xd); + scan_order = &vp9_scan_orders[TX_8X8][tx_type]; + mode = plane == 0 ? mbmi->mode : mbmi->uv_mode; + vp9_predict_intra_block(xd, block >> 2, bwl, TX_8X8, mode, #if CONFIG_FILTERINTRA - fbit, + fbit, #endif - x->skip_encode ? src : dst, - x->skip_encode ? src_stride : dst_stride, - dst, dst_stride, i, j, plane); - if (!x->skip_recode) { - if (mode == V_PRED || mode == H_PRED || mode == TM_PRED) { - *eob = vp9_dpcm_intra(src, src_stride, dst, dst_stride, - src_diff, diff_stride, - coeff, qcoeff, dqcoeff, p, pd, - scan_order, mode, 8, shift, -1); - break; - } - - vp9_subtract_block(8, 8, src_diff, diff_stride, - src, src_stride, dst, dst_stride); - vp9_tx_identity(src_diff, coeff, diff_stride, 8, shift); - vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, - p->quant, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, eob, scan_order->scan, - scan_order->iscan); - } - if (!x->skip_encode && *eob) { - vp9_tx_identity_add(dqcoeff, dst, dst_stride, 8, shift); - } + x->skip_encode ? src : dst, + x->skip_encode ? src_stride : dst_stride, + dst, dst_stride, i, j, plane); + if (!x->skip_recode) { + if (mode == V_PRED || mode == H_PRED || mode == TM_PRED) { + *eob = vp9_dpcm_intra(src, src_stride, dst, dst_stride, + src_diff, diff_stride, + coeff, qcoeff, dqcoeff, p, pd, + scan_order, mode, 8, shift, -1); break; - case TX_4X4: - tx_type = get_tx_type_4x4(pd->plane_type, xd, block); - scan_order = &vp9_scan_orders[TX_4X4][tx_type]; - mode = plane == 0 ? - get_y_mode(xd->mi[0].src_mi, block) : mbmi->uv_mode; - vp9_predict_intra_block(xd, block, bwl, TX_4X4, mode, + } + + vp9_subtract_block(8, 8, src_diff, diff_stride, + src, src_stride, dst, dst_stride); + vp9_tx_identity(src_diff, coeff, diff_stride, 8, shift); + vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, + p->quant, p->quant_shift, qcoeff, dqcoeff, + pd->dequant, eob, scan_order->scan, + scan_order->iscan); + } + if (!x->skip_encode && *eob) { + vp9_tx_identity_add(dqcoeff, dst, dst_stride, 8, shift); + } + break; + case TX_4X4: + tx_type = get_tx_type_4x4(pd->plane_type, xd, block); + scan_order = &vp9_scan_orders[TX_4X4][tx_type]; + mode = plane == 0 ? + get_y_mode(xd->mi[0].src_mi, block) : mbmi->uv_mode; + vp9_predict_intra_block(xd, block, bwl, TX_4X4, mode, #if CONFIG_FILTERINTRA - fbit, + fbit, #endif - x->skip_encode ? src : dst, - x->skip_encode ? src_stride : dst_stride, - dst, dst_stride, i, j, plane); - - if (!x->skip_recode) { - if (mode == V_PRED || mode == H_PRED || mode == TM_PRED) { - *eob = vp9_dpcm_intra(src, src_stride, dst, dst_stride, - src_diff, diff_stride, - coeff, qcoeff, dqcoeff, p, pd, - scan_order, mode, 4, shift, -1); - break; - } - - vp9_subtract_block(4, 4, src_diff, diff_stride, - src, src_stride, dst, dst_stride); - vp9_tx_identity(src_diff, coeff, diff_stride, 4, shift); - vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, - p->quant, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, eob, scan_order->scan, - scan_order->iscan); - } + x->skip_encode ? src : dst, + x->skip_encode ? src_stride : dst_stride, + dst, dst_stride, i, j, plane); - if (!x->skip_encode && *eob) { - vp9_tx_identity_add(dqcoeff, dst, dst_stride, 4, shift); - } - break; - default: - assert(0); + if (!x->skip_recode) { + if (mode == V_PRED || mode == H_PRED || mode == TM_PRED) { + *eob = vp9_dpcm_intra(src, src_stride, dst, dst_stride, + src_diff, diff_stride, + coeff, qcoeff, dqcoeff, p, pd, + scan_order, mode, 4, shift, -1); break; + } + + vp9_subtract_block(4, 4, src_diff, diff_stride, + src, src_stride, dst, dst_stride); + vp9_tx_identity(src_diff, coeff, diff_stride, 4, shift); + vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, + p->quant, p->quant_shift, qcoeff, dqcoeff, + pd->dequant, eob, scan_order->scan, + scan_order->iscan); } - if (*eob) - *(args->skip) = 0; - return; - } + + if (!x->skip_encode && *eob) { + vp9_tx_identity_add(dqcoeff, dst, dst_stride, 4, shift); + } + break; + default: + assert(0); + break; + } + if (*eob) + *(args->skip) = 0; + return; + } #endif // CONFIG_TX_SKIP #if CONFIG_VP9_HIGHBITDEPTH @@ -1895,12 +1895,7 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, return; } #endif // CONFIG_VP9_HIGHBITDEPTH -#if CONFIG_FILTERINTRA - if (mbmi->sb_type < BLOCK_8X8 && plane == 0) - fbit = xd->mi[0].b_filter_info[block]; - else - fbit = plane == 0 ? mbmi->filterbit : mbmi->uv_filterbit; -#endif + switch (tx_size) { #if CONFIG_TX64X64 case TX_64X64: diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c index d207b0e38..a80819cdf 100644 --- a/vp9/encoder/vp9_pickmode.c +++ b/vp9/encoder/vp9_pickmode.c @@ -457,7 +457,7 @@ static void estimate_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, b_width_log2_lookup[plane_bsize], tx_size, args->mode, #if CONFIG_FILTERINTRA - 0, + 0, #endif p->src.buf, src_stride, pd->dst.buf, dst_stride, diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index c96c9f5db..cd0183ba0 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -958,6 +958,22 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { +#if CONFIG_FILTERINTRA + for (mode_ext = 2 * DC_PRED; mode_ext <= 2 * TM_PRED + 1; ++mode_ext) { + int64_t this_rd; + int ratey = 0; + int64_t distortion = 0; + int rate; + + fbit = mode_ext & 1; + mode = mode_ext >> 1; + if (fbit && !is_filter_allowed(mode)) + continue; + + rate = bmode_costs[mode]; + if (is_filter_allowed(mode)) + rate += vp9_cost_bit(cpi->common.fc.filterintra_prob[0][mode], fbit); +#else for (mode = DC_PRED; mode <= TM_PRED; ++mode) { int64_t this_rd; int ratey = 0; @@ -973,6 +989,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, if (conditional_skipintra(mode, *best_mode)) continue; } +#endif // CONFIG_FILTERINTRA vpx_memcpy(tempa, ta, sizeof(ta)); vpx_memcpy(templ, tl, sizeof(tl)); @@ -986,8 +1003,14 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, p->src_diff); tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block); xd->mi[0].src_mi->bmi[block].as_mode = mode; +#if CONFIG_FILTERINTRA + xd->mi[0].b_filter_info[block] = fbit; +#endif vp9_predict_intra_block(xd, block, 1, TX_4X4, mode, +#if CONFIG_FILTERINTRA + fbit, +#endif x->skip_encode ? src : dst, x->skip_encode ? src_stride : dst_stride, dst, dst_stride, idx, idy, 0); @@ -1034,6 +1057,9 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, *bestdistortion = distortion; best_rd = this_rd; *best_mode = mode; +#if CONFIG_FILTERINTRA + *best_fbit = fbit; +#endif vpx_memcpy(a, tempa, sizeof(tempa)); vpx_memcpy(l, templ, sizeof(templ)); for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) { -- 2.40.0