From eb939f45b8ffde47e160d545114d68ddd3606b90 Mon Sep 17 00:00:00 2001 From: John Koleszar Date: Sun, 24 Feb 2013 20:55:14 -0800 Subject: [PATCH] Spatial resamping of ZEROMV predictors This patch allows coding frames using references of different resolution, in ZEROMV mode. For compound prediction, either reference may be scaled. To test, I use the resize_test and enable WRITE_RECON_BUFFER in vp9_onyxd_if.c. It's also useful to apply this patch to test/i420_video_source.h: --- a/test/i420_video_source.h +++ b/test/i420_video_source.h @@ -93,6 +93,7 @@ class I420VideoSource : public VideoSource { virtual void FillFrame() { // Read a frame from input_file. + if (frame_ != 3) if (fread(img_->img_data, raw_sz_, 1, input_file_) == 0) { limit_ = frame_; } This forces the frame that the resolution changes on to be coded with no motion, only scaling, and improves the quality of the result. Change-Id: I1ee75d19a437ff801192f767fd02a36bcbd1d496 --- test/resize_test.cc | 3 +- vp9/common/vp9_blockd.h | 3 + vp9/common/vp9_convolve.c | 30 ++- vp9/common/vp9_convolve.h | 3 - vp9/common/vp9_filter.c | 96 +++++++++- vp9/common/vp9_filter.h | 10 +- vp9/common/vp9_mbpitch.c | 11 -- vp9/common/vp9_onyxc_int.h | 1 + vp9/common/vp9_reconinter.c | 291 +++++++++++++++++++++++------- vp9/common/vp9_reconinter.h | 83 ++++++++- vp9/decoder/vp9_decodemv.c | 36 +++- vp9/decoder/vp9_decodframe.c | 96 +++++++--- vp9/decoder/vp9_onyxd_int.h | 2 + vp9/encoder/vp9_encodeframe.c | 36 ++-- vp9/encoder/vp9_encodemb.c | 8 +- vp9/encoder/vp9_encodemb.h | 4 +- vp9/encoder/vp9_firstpass.c | 2 +- vp9/encoder/vp9_mbgraph.c | 26 ++- vp9/encoder/vp9_onyx_if.c | 90 +++++++-- vp9/encoder/vp9_rdopt.c | 119 +++++++++--- vp9/encoder/vp9_rdopt.h | 14 -- vp9/encoder/vp9_temporal_filter.c | 7 + 22 files changed, 740 insertions(+), 231 deletions(-) diff --git a/test/resize_test.cc b/test/resize_test.cc index 5e9234c38..2fe8884bc 100644 --- a/test/resize_test.cc +++ b/test/resize_test.cc @@ -114,7 +114,8 @@ class ResizeInternalTest : public ResizeTest { TEST_P(ResizeInternalTest, TestInternalResizeWorks) { ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, - 30, 1, 0, 5); + 30, 1, 0, 6); + cfg_.rc_target_bitrate = 5000; ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); for (std::vector::iterator info = frame_info_list_.begin(); diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h index 5d876c15b..2f60e38fa 100644 --- a/vp9/common/vp9_blockd.h +++ b/vp9/common/vp9_blockd.h @@ -292,9 +292,12 @@ struct scale_factors { int x_num; int x_den; int x_offset_q4; + int x_step_q4; int y_num; int y_den; int y_offset_q4; + int y_step_q4; + convolve_fn_t predict[2][2][2]; // horiz, vert, avg }; typedef struct macroblockd { diff --git a/vp9/common/vp9_convolve.c b/vp9/common/vp9_convolve.c index ac5d5cb3e..f1b5915bd 100644 --- a/vp9/common/vp9_convolve.c +++ b/vp9/common/vp9_convolve.c @@ -206,16 +206,25 @@ static void convolve_c(const uint8_t *src, int src_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int taps) { - /* Fixed size intermediate buffer places limits on parameters. */ - uint8_t temp[16 * 23]; + /* Fixed size intermediate buffer places limits on parameters. + * Maximum intermediate_height is 39, for y_step_q4 == 32, + * h == 16, taps == 8. + */ + uint8_t temp[16 * 39]; + int intermediate_height = ((h * y_step_q4) >> 4) + taps - 1; + assert(w <= 16); assert(h <= 16); assert(taps <= 8); + assert(y_step_q4 <= 32); + + if (intermediate_height < h) + intermediate_height = h; convolve_horiz_c(src - src_stride * (taps / 2 - 1), src_stride, temp, 16, filter_x, x_step_q4, filter_y, y_step_q4, - w, h + taps - 1, taps); + w, intermediate_height, taps); convolve_vert_c(temp + 16 * (taps / 2 - 1), 16, dst, dst_stride, filter_x, x_step_q4, filter_y, y_step_q4, w, h, taps); @@ -226,16 +235,25 @@ static void convolve_avg_c(const uint8_t *src, int src_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int taps) { - /* Fixed size intermediate buffer places limits on parameters. */ - uint8_t temp[16 * 23]; + /* Fixed size intermediate buffer places limits on parameters. + * Maximum intermediate_height is 39, for y_step_q4 == 32, + * h == 16, taps == 8. + */ + uint8_t temp[16 * 39]; + int intermediate_height = ((h * y_step_q4) >> 4) + taps - 1; + assert(w <= 16); assert(h <= 16); assert(taps <= 8); + assert(y_step_q4 <= 32); + + if (intermediate_height < h) + intermediate_height = h; convolve_horiz_c(src - src_stride * (taps / 2 - 1), src_stride, temp, 16, filter_x, x_step_q4, filter_y, y_step_q4, - w, h + taps - 1, taps); + w, intermediate_height, taps); convolve_avg_vert_c(temp + 16 * (taps / 2 - 1), 16, dst, dst_stride, filter_x, x_step_q4, filter_y, y_step_q4, w, h, taps); diff --git a/vp9/common/vp9_convolve.h b/vp9/common/vp9_convolve.h index 46c935ab7..8c4856187 100644 --- a/vp9/common/vp9_convolve.h +++ b/vp9/common/vp9_convolve.h @@ -33,11 +33,8 @@ void vp9_convolve_avg(const uint8_t *src, int src_stride, int w, int h); struct subpix_fn_table { - convolve_fn_t predict[2][2][2]; // horiz, vert, avg const int16_t (*filter_x)[8]; const int16_t (*filter_y)[8]; - int x_step_q4; - int y_step_q4; }; #endif // VP9_COMMON_CONVOLVE_H_ diff --git a/vp9/common/vp9_filter.c b/vp9/common/vp9_filter.c index 5e425895f..9249c5786 100644 --- a/vp9/common/vp9_filter.c +++ b/vp9/common/vp9_filter.c @@ -15,7 +15,26 @@ #include "vp9_rtcd.h" #include "vp9/common/vp9_common.h" -DECLARE_ALIGNED(16, const int16_t, vp9_bilinear_filters[SUBPEL_SHIFTS][8]) = { +/* TODO(jkoleszar): We can avoid duplicating these tables 2X by forcing 256 + * byte alignment of the table's base address. + */ +DECLARE_ALIGNED(16, const int16_t, vp9_bilinear_filters[SUBPEL_SHIFTS*2][8]) = { + { 0, 0, 0, 128, 0, 0, 0, 0 }, + { 0, 0, 0, 120, 8, 0, 0, 0 }, + { 0, 0, 0, 112, 16, 0, 0, 0 }, + { 0, 0, 0, 104, 24, 0, 0, 0 }, + { 0, 0, 0, 96, 32, 0, 0, 0 }, + { 0, 0, 0, 88, 40, 0, 0, 0 }, + { 0, 0, 0, 80, 48, 0, 0, 0 }, + { 0, 0, 0, 72, 56, 0, 0, 0 }, + { 0, 0, 0, 64, 64, 0, 0, 0 }, + { 0, 0, 0, 56, 72, 0, 0, 0 }, + { 0, 0, 0, 48, 80, 0, 0, 0 }, + { 0, 0, 0, 40, 88, 0, 0, 0 }, + { 0, 0, 0, 32, 96, 0, 0, 0 }, + { 0, 0, 0, 24, 104, 0, 0, 0 }, + { 0, 0, 0, 16, 112, 0, 0, 0 }, + { 0, 0, 0, 8, 120, 0, 0, 0 }, { 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 0, 0, 120, 8, 0, 0, 0 }, { 0, 0, 0, 112, 16, 0, 0, 0 }, @@ -36,7 +55,8 @@ DECLARE_ALIGNED(16, const int16_t, vp9_bilinear_filters[SUBPEL_SHIFTS][8]) = { #define FILTER_ALPHA 0 #define FILTER_ALPHA_SHARP 1 -DECLARE_ALIGNED(16, const int16_t, vp9_sub_pel_filters_8[SUBPEL_SHIFTS][8]) = { +DECLARE_ALIGNED(16, const int16_t, vp9_sub_pel_filters_8[SUBPEL_SHIFTS*2][8]) + = { #if FILTER_ALPHA == 0 /* Lagrangian interpolation filter */ { 0, 0, 0, 128, 0, 0, 0, 0}, @@ -54,6 +74,22 @@ DECLARE_ALIGNED(16, const int16_t, vp9_sub_pel_filters_8[SUBPEL_SHIFTS][8]) = { { -1, 4, -11, 37, 112, -16, 4, -1}, { -1, 3, -9, 27, 118, -13, 4, -1}, { 0, 2, -6, 18, 122, -10, 3, -1}, + { 0, 1, -3, 8, 126, -5, 1, 0}, + { 0, 0, 0, 128, 0, 0, 0, 0}, + { 0, 1, -5, 126, 8, -3, 1, 0}, + { -1, 3, -10, 122, 18, -6, 2, 0}, + { -1, 4, -13, 118, 27, -9, 3, -1}, + { -1, 4, -16, 112, 37, -11, 4, -1}, + { -1, 5, -18, 105, 48, -14, 4, -1}, + { -1, 5, -19, 97, 58, -16, 5, -1}, + { -1, 6, -19, 88, 68, -18, 5, -1}, + { -1, 6, -19, 78, 78, -19, 6, -1}, + { -1, 5, -18, 68, 88, -19, 6, -1}, + { -1, 5, -16, 58, 97, -19, 5, -1}, + { -1, 4, -14, 48, 105, -18, 5, -1}, + { -1, 4, -11, 37, 112, -16, 4, -1}, + { -1, 3, -9, 27, 118, -13, 4, -1}, + { 0, 2, -6, 18, 122, -10, 3, -1}, { 0, 1, -3, 8, 126, -5, 1, 0} #elif FILTER_ALPHA == 50 /* Generated using MATLAB: @@ -82,7 +118,8 @@ DECLARE_ALIGNED(16, const int16_t, vp9_sub_pel_filters_8[SUBPEL_SHIFTS][8]) = { #endif /* FILTER_ALPHA */ }; -DECLARE_ALIGNED(16, const int16_t, vp9_sub_pel_filters_8s[SUBPEL_SHIFTS][8]) = { +DECLARE_ALIGNED(16, const int16_t, vp9_sub_pel_filters_8s[SUBPEL_SHIFTS*2][8]) + = { #if FILTER_ALPHA_SHARP == 1 /* dct based filter */ {0, 0, 0, 128, 0, 0, 0, 0}, @@ -100,6 +137,22 @@ DECLARE_ALIGNED(16, const int16_t, vp9_sub_pel_filters_8s[SUBPEL_SHIFTS][8]) = { {-2, 6, -13, 37, 115, -20, 9, -4}, {-2, 5, -10, 27, 121, -17, 7, -3}, {-1, 3, -6, 17, 125, -13, 5, -2}, + {0, 1, -3, 8, 127, -7, 3, -1}, + {0, 0, 0, 128, 0, 0, 0, 0}, + {-1, 3, -7, 127, 8, -3, 1, 0}, + {-2, 5, -13, 125, 17, -6, 3, -1}, + {-3, 7, -17, 121, 27, -10, 5, -2}, + {-4, 9, -20, 115, 37, -13, 6, -2}, + {-4, 10, -23, 108, 48, -16, 8, -3}, + {-4, 10, -24, 100, 59, -19, 9, -3}, + {-4, 11, -24, 90, 70, -21, 10, -4}, + {-4, 11, -23, 80, 80, -23, 11, -4}, + {-4, 10, -21, 70, 90, -24, 11, -4}, + {-3, 9, -19, 59, 100, -24, 10, -4}, + {-3, 8, -16, 48, 108, -23, 10, -4}, + {-2, 6, -13, 37, 115, -20, 9, -4}, + {-2, 5, -10, 27, 121, -17, 7, -3}, + {-1, 3, -6, 17, 125, -13, 5, -2}, {0, 1, -3, 8, 127, -7, 3, -1} #elif FILTER_ALPHA_SHARP == 75 /* alpha = 0.75 */ @@ -123,7 +176,7 @@ DECLARE_ALIGNED(16, const int16_t, vp9_sub_pel_filters_8s[SUBPEL_SHIFTS][8]) = { }; DECLARE_ALIGNED(16, const int16_t, - vp9_sub_pel_filters_8lp[SUBPEL_SHIFTS][8]) = { + vp9_sub_pel_filters_8lp[SUBPEL_SHIFTS*2][8]) = { /* 8-tap lowpass filter */ /* Hamming window */ {-1, -7, 32, 80, 32, -7, -1, 0}, @@ -141,10 +194,43 @@ DECLARE_ALIGNED(16, const int16_t, { 1, -3, -4, 50, 76, 16, -8, 0}, { 1, -3, -5, 45, 78, 20, -8, 0}, { 1, -2, -7, 41, 79, 24, -8, 0}, + { 1, -2, -7, 37, 80, 28, -8, -1}, + {-1, -7, 32, 80, 32, -7, -1, 0}, + {-1, -8, 28, 80, 37, -7, -2, 1}, + { 0, -8, 24, 79, 41, -7, -2, 1}, + { 0, -8, 20, 78, 45, -5, -3, 1}, + { 0, -8, 16, 76, 50, -4, -3, 1}, + { 0, -7, 13, 74, 54, -3, -4, 1}, + { 1, -7, 9, 71, 58, -1, -4, 1}, + { 1, -6, 6, 68, 62, 1, -5, 1}, + { 1, -6, 4, 65, 65, 4, -6, 1}, + { 1, -5, 1, 62, 68, 6, -6, 1}, + { 1, -4, -1, 58, 71, 9, -7, 1}, + { 1, -4, -3, 54, 74, 13, -7, 0}, + { 1, -3, -4, 50, 76, 16, -8, 0}, + { 1, -3, -5, 45, 78, 20, -8, 0}, + { 1, -2, -7, 41, 79, 24, -8, 0}, { 1, -2, -7, 37, 80, 28, -8, -1} }; -DECLARE_ALIGNED(16, const int16_t, vp9_sub_pel_filters_6[SUBPEL_SHIFTS][8]) = { +DECLARE_ALIGNED(16, const int16_t, vp9_sub_pel_filters_6[SUBPEL_SHIFTS*2][8]) + = { + {0, 0, 0, 128, 0, 0, 0, 0}, + {0, 1, -5, 125, 8, -2, 1, 0}, + {0, 1, -8, 122, 17, -5, 1, 0}, + {0, 2, -11, 116, 27, -8, 2, 0}, + {0, 3, -14, 110, 37, -10, 2, 0}, + {0, 3, -15, 103, 47, -12, 2, 0}, + {0, 3, -16, 95, 57, -14, 3, 0}, + {0, 3, -16, 86, 67, -15, 3, 0}, + {0, 3, -16, 77, 77, -16, 3, 0}, + {0, 3, -15, 67, 86, -16, 3, 0}, + {0, 3, -14, 57, 95, -16, 3, 0}, + {0, 2, -12, 47, 103, -15, 3, 0}, + {0, 2, -10, 37, 110, -14, 3, 0}, + {0, 2, -8, 27, 116, -11, 2, 0}, + {0, 1, -5, 17, 122, -8, 1, 0}, + {0, 1, -2, 8, 125, -5, 1, 0}, {0, 0, 0, 128, 0, 0, 0, 0}, {0, 1, -5, 125, 8, -2, 1, 0}, {0, 1, -8, 122, 17, -5, 1, 0}, diff --git a/vp9/common/vp9_filter.h b/vp9/common/vp9_filter.h index 1ccfdaac2..765379d35 100644 --- a/vp9/common/vp9_filter.h +++ b/vp9/common/vp9_filter.h @@ -21,11 +21,11 @@ #define SUBPEL_SHIFTS 16 -extern const int16_t vp9_bilinear_filters[SUBPEL_SHIFTS][8]; -extern const int16_t vp9_sub_pel_filters_6[SUBPEL_SHIFTS][8]; -extern const int16_t vp9_sub_pel_filters_8[SUBPEL_SHIFTS][8]; -extern const int16_t vp9_sub_pel_filters_8s[SUBPEL_SHIFTS][8]; -extern const int16_t vp9_sub_pel_filters_8lp[SUBPEL_SHIFTS][8]; +extern const int16_t vp9_bilinear_filters[SUBPEL_SHIFTS*2][8]; +extern const int16_t vp9_sub_pel_filters_6[SUBPEL_SHIFTS*2][8]; +extern const int16_t vp9_sub_pel_filters_8[SUBPEL_SHIFTS*2][8]; +extern const int16_t vp9_sub_pel_filters_8s[SUBPEL_SHIFTS*2][8]; +extern const int16_t vp9_sub_pel_filters_8lp[SUBPEL_SHIFTS*2][8]; // The VP9_BILINEAR_FILTERS_2TAP macro returns a pointer to the bilinear // filter kernel as a 2 tap filter. diff --git a/vp9/common/vp9_mbpitch.c b/vp9/common/vp9_mbpitch.c index b3303eb59..ed96292a4 100644 --- a/vp9/common/vp9_mbpitch.c +++ b/vp9/common/vp9_mbpitch.c @@ -71,17 +71,6 @@ static void setup_macroblock(MACROBLOCKD *xd, BLOCKSET bs) { setup_block(&blockd[block + 4], stride, v, v2, stride, ((block - 16) >> 1) * 4 * stride + (block & 1) * 4, bs); } - - // TODO(jkoleszar): this will move once we're actually scaling. - xd->scale_factor[0].x_num = 1; - xd->scale_factor[0].x_den = 1; - xd->scale_factor[0].y_num = 1; - xd->scale_factor[0].y_den = 1; - xd->scale_factor[0].x_offset_q4 = 0; - xd->scale_factor[0].y_offset_q4 = 0; - xd->scale_factor[1]= xd->scale_factor[0]; - xd->scale_factor_uv[0] = xd->scale_factor[0]; - xd->scale_factor_uv[1] = xd->scale_factor[1]; } void vp9_setup_block_dptrs(MACROBLOCKD *xd) { diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h index e952fe933..0709e37ed 100644 --- a/vp9/common/vp9_onyxc_int.h +++ b/vp9/common/vp9_onyxc_int.h @@ -145,6 +145,7 @@ typedef struct VP9Common { */ int active_ref_idx[3]; /* each frame can reference 3 buffers */ int new_fb_idx; + struct scale_factors active_ref_scale[3]; YV12_BUFFER_CONFIG post_proc_buffer; YV12_BUFFER_CONFIG temp_scale_frame; diff --git a/vp9/common/vp9_reconinter.c b/vp9/common/vp9_reconinter.c index 3b4b34216..90ecade85 100644 --- a/vp9/common/vp9_reconinter.c +++ b/vp9/common/vp9_reconinter.c @@ -17,26 +17,97 @@ #include "vp9/common/vp9_reconinter.h" #include "vp9/common/vp9_reconintra.h" -void vp9_setup_interp_filters(MACROBLOCKD *xd, - INTERPOLATIONFILTERTYPE mcomp_filter_type, - VP9_COMMON *cm) { +void vp9_setup_scale_factors_for_frame(struct scale_factors *scale, + YV12_BUFFER_CONFIG *other, + int this_w, int this_h) { + int other_w, other_h; + + other_h = other->y_height; + other_w = other->y_width; + scale->x_num = other_w; + scale->x_den = this_w; + scale->x_offset_q4 = 0; // calculated per-mb + scale->x_step_q4 = 16 * other_w / this_w; + scale->y_num = other_h; + scale->y_den = this_h; + scale->y_offset_q4 = 0; // calculated per-mb + scale->y_step_q4 = 16 * other_h / this_h; + // TODO(agrange): Investigate the best choice of functions to use here // for EIGHTTAP_SMOOTH. Since it is not interpolating, need to choose what // to do at full-pel offsets. The current selection, where the filter is // applied in one direction only, and not at all for 0,0, seems to give the // best quality, but it may be worth trying an additional mode that does // do the filtering on full-pel. - xd->subpix.predict[0][0][0] = vp9_convolve_copy; - xd->subpix.predict[0][0][1] = vp9_convolve_avg; - xd->subpix.predict[0][1][0] = vp9_convolve8_vert; - xd->subpix.predict[0][1][1] = vp9_convolve8_avg_vert; - xd->subpix.predict[1][0][0] = vp9_convolve8_horiz; - xd->subpix.predict[1][0][1] = vp9_convolve8_avg_horiz; - xd->subpix.predict[1][1][0] = vp9_convolve8; - xd->subpix.predict[1][1][1] = vp9_convolve8_avg; - - xd->subpix.x_step_q4 = 16; - xd->subpix.y_step_q4 = 16; + if (scale->x_step_q4 == 16) { + if (scale->y_step_q4 == 16) { + // No scaling in either direction. + scale->predict[0][0][0] = vp9_convolve_copy; + scale->predict[0][0][1] = vp9_convolve_avg; + scale->predict[0][1][0] = vp9_convolve8_vert; + scale->predict[0][1][1] = vp9_convolve8_avg_vert; + scale->predict[1][0][0] = vp9_convolve8_horiz; + scale->predict[1][0][1] = vp9_convolve8_avg_horiz; + } else { + // No scaling in x direction. Must always scale in the y direction. + scale->predict[0][0][0] = vp9_convolve8_vert; + scale->predict[0][0][1] = vp9_convolve8_avg_vert; + scale->predict[0][1][0] = vp9_convolve8_vert; + scale->predict[0][1][1] = vp9_convolve8_avg_vert; + scale->predict[1][0][0] = vp9_convolve8; + scale->predict[1][0][1] = vp9_convolve8_avg; + } + } else { + if (scale->y_step_q4 == 16) { + // No scaling in the y direction. Must always scale in the x direction. + scale->predict[0][0][0] = vp9_convolve8_horiz; + scale->predict[0][0][1] = vp9_convolve8_avg_horiz; + scale->predict[0][1][0] = vp9_convolve8; + scale->predict[0][1][1] = vp9_convolve8_avg; + scale->predict[1][0][0] = vp9_convolve8_horiz; + scale->predict[1][0][1] = vp9_convolve8_avg_horiz; + } else { + // Must always scale in both directions. + scale->predict[0][0][0] = vp9_convolve8; + scale->predict[0][0][1] = vp9_convolve8_avg; + scale->predict[0][1][0] = vp9_convolve8; + scale->predict[0][1][1] = vp9_convolve8_avg; + scale->predict[1][0][0] = vp9_convolve8; + scale->predict[1][0][1] = vp9_convolve8_avg; + } + } + // 2D subpel motion always gets filtered in both directions + scale->predict[1][1][0] = vp9_convolve8; + scale->predict[1][1][1] = vp9_convolve8_avg; +} + +void vp9_setup_interp_filters(MACROBLOCKD *xd, + INTERPOLATIONFILTERTYPE mcomp_filter_type, + VP9_COMMON *cm) { + int i; + + /* Calculate scaling factors for each of the 3 available references */ + for (i = 0; i < 3; ++i) { + if (cm->active_ref_idx[i] >= NUM_YV12_BUFFERS) { + memset(&cm->active_ref_scale[i], 0, sizeof(cm->active_ref_scale[i])); + continue; + } + + vp9_setup_scale_factors_for_frame(&cm->active_ref_scale[i], + &cm->yv12_fb[cm->active_ref_idx[i]], + cm->mb_cols * 16, cm->mb_rows * 16); + } + + if (xd->mode_info_context) { + MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; + + set_scale_factors(xd, + mbmi->ref_frame - 1, + mbmi->second_ref_frame - 1, + cm->active_ref_scale); + } + + switch (mcomp_filter_type) { case EIGHTTAP: case SWITCHABLE: @@ -146,30 +217,50 @@ void vp9_copy_mem8x4_c(const uint8_t *src, } } -static int32_t scale_motion_vector_component(int mv, - int num, - int den, - int offset_q4) { +static void set_scaled_offsets(struct scale_factors *scale, + int row, int col) { + const int x_q4 = 16 * col; + const int y_q4 = 16 * row; + + scale->x_offset_q4 = (x_q4 * scale->x_num / scale->x_den) & 0xf; + scale->y_offset_q4 = (y_q4 * scale->y_num / scale->y_den) & 0xf; +} + +static int32_t scale_motion_vector_component_q3(int mv_q3, + int num, + int den, + int offset_q4) { + // returns the scaled and offset value of the mv component. + const int32_t mv_q4 = mv_q3 << 1; + + /* TODO(jkoleszar): make fixed point, or as a second multiply? */ + return mv_q4 * num / den + offset_q4; +} + +static int32_t scale_motion_vector_component_q4(int mv_q4, + int num, + int den, + int offset_q4) { // returns the scaled and offset value of the mv component. - // input and output mv have the same units -- this would work with either q3 - // or q4 motion vectors. Offset is given as a q4 fractional number. - const int32_t mv_q4 = mv * 16; /* TODO(jkoleszar): make fixed point, or as a second multiply? */ - return (mv_q4 * num / den + offset_q4 + 8) >> 4; + return mv_q4 * num / den + offset_q4; } -static int_mv32 scale_motion_vector(const int_mv *src_mv, - const struct scale_factors *scale) { +static int_mv32 scale_motion_vector_q3_to_q4( + const int_mv *src_mv, + const struct scale_factors *scale) { // returns mv * scale + offset int_mv32 result; - result.as_mv.row = scale_motion_vector_component(src_mv->as_mv.row, - scale->y_num, scale->y_den, - scale->y_offset_q4); - result.as_mv.col = scale_motion_vector_component(src_mv->as_mv.col, - scale->x_num, scale->x_den, - scale->x_offset_q4); + result.as_mv.row = scale_motion_vector_component_q3(src_mv->as_mv.row, + scale->y_num, + scale->y_den, + scale->y_offset_q4); + result.as_mv.col = scale_motion_vector_component_q3(src_mv->as_mv.col, + scale->x_num, + scale->x_den, + scale->x_offset_q4); return result; } @@ -181,12 +272,13 @@ void vp9_build_inter_predictor(const uint8_t *src, int src_stride, const struct subpix_fn_table *subpix) { int_mv32 mv; - mv = scale_motion_vector(mv_q3, scale); - src = src + (mv.as_mv.row >> 3) * src_stride + (mv.as_mv.col >> 3); - subpix->predict[!!(mv.as_mv.col & 7)][!!(mv.as_mv.row & 7)][do_avg]( + mv = scale_motion_vector_q3_to_q4(mv_q3, scale); + src = src + (mv.as_mv.row >> 4) * src_stride + (mv.as_mv.col >> 4); + + scale->predict[!!(mv.as_mv.col & 15)][!!(mv.as_mv.row & 15)][do_avg]( src, src_stride, dst, dst_stride, - subpix->filter_x[(mv.as_mv.col & 7) << 1], subpix->x_step_q4, - subpix->filter_y[(mv.as_mv.row & 7) << 1], subpix->y_step_q4, + subpix->filter_x[mv.as_mv.col & 15], scale->x_step_q4, + subpix->filter_y[mv.as_mv.row & 15], scale->y_step_q4, w, h); } @@ -205,19 +297,19 @@ void vp9_build_inter_predictor_q4(const uint8_t *src, int src_stride, const int mv_col_q4 = ((fullpel_mv_q3->as_mv.col >> 3) << 4) + (frac_mv_q4->as_mv.col & 0xf); const int scaled_mv_row_q4 = - scale_motion_vector_component(mv_row_q4, scale->y_num, scale->y_den, - scale->y_offset_q4); + scale_motion_vector_component_q4(mv_row_q4, scale->y_num, scale->y_den, + scale->y_offset_q4); const int scaled_mv_col_q4 = - scale_motion_vector_component(mv_col_q4, scale->x_num, scale->x_den, - scale->x_offset_q4); + scale_motion_vector_component_q4(mv_col_q4, scale->x_num, scale->x_den, + scale->x_offset_q4); const int subpel_x = scaled_mv_col_q4 & 15; const int subpel_y = scaled_mv_row_q4 & 15; src = src + (scaled_mv_row_q4 >> 4) * src_stride + (scaled_mv_col_q4 >> 4); - subpix->predict[!!subpel_x][!!subpel_y][do_avg]( + scale->predict[!!subpel_x][!!subpel_y][do_avg]( src, src_stride, dst, dst_stride, - subpix->filter_x[subpel_x], subpix->x_step_q4, - subpix->filter_y[subpel_y], subpix->y_step_q4, + subpix->filter_x[subpel_x], scale->x_step_q4, + subpix->filter_y[subpel_y], scale->y_step_q4, w, h); } @@ -261,7 +353,9 @@ static void build_2x1_inter_predictor(const BLOCKD *d0, const BLOCKD *d1, } /*encoder only*/ -void vp9_build_inter4x4_predictors_mbuv(MACROBLOCKD *xd) { +void vp9_build_inter4x4_predictors_mbuv(MACROBLOCKD *xd, + int mb_row, + int mb_col) { int i, j; BLOCKD *blockd = xd->block; @@ -339,11 +433,17 @@ void vp9_build_inter4x4_predictors_mbuv(MACROBLOCKD *xd) { for (i = 16; i < 24; i += 2) { const int use_second_ref = xd->mode_info_context->mbmi.second_ref_frame > 0; + const int x = 4 * (i & 1); + const int y = ((i - 16) >> 1) * 4; + int which_mv; BLOCKD *d0 = &blockd[i]; BLOCKD *d1 = &blockd[i + 1]; for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) { + set_scaled_offsets(&xd->scale_factor_uv[which_mv], + mb_row * 8 + y, mb_col * 8 + x); + build_2x1_inter_predictor(d0, d1, xd->scale_factor_uv, 4, 8, which_mv, &xd->subpix); } @@ -389,7 +489,9 @@ static void clamp_uvmv_to_umv_border(MV *mv, const MACROBLOCKD *xd) { /*encoder only*/ void vp9_build_inter16x16_predictors_mby(MACROBLOCKD *xd, uint8_t *dst_y, - int dst_ystride) { + int dst_ystride, + int mb_row, + int mb_col) { const int use_second_ref = xd->mode_info_context->mbmi.second_ref_frame > 0; int which_mv; @@ -399,14 +501,19 @@ void vp9_build_inter16x16_predictors_mby(MACROBLOCKD *xd, : xd->mode_info_context->mbmi.need_to_clamp_mvs; uint8_t *base_pre; int_mv ymv; + int pre_stride; ymv.as_int = xd->mode_info_context->mbmi.mv[which_mv].as_int; base_pre = which_mv ? xd->second_pre.y_buffer : xd->pre.y_buffer; + pre_stride = which_mv ? xd->second_pre.y_stride + : xd->pre.y_stride; if (clamp_mvs) clamp_mv_to_umv_border(&ymv.as_mv, xd); - vp9_build_inter_predictor(base_pre, xd->block[0].pre_stride, + set_scaled_offsets(&xd->scale_factor[which_mv], mb_row * 16, mb_col * 16); + + vp9_build_inter_predictor(base_pre, pre_stride, dst_y, dst_ystride, &ymv, &xd->scale_factor[which_mv], 16, 16, which_mv, &xd->subpix); @@ -416,7 +523,9 @@ void vp9_build_inter16x16_predictors_mby(MACROBLOCKD *xd, void vp9_build_inter16x16_predictors_mbuv(MACROBLOCKD *xd, uint8_t *dst_u, uint8_t *dst_v, - int dst_uvstride) { + int dst_uvstride, + int mb_row, + int mb_col) { const int use_second_ref = xd->mode_info_context->mbmi.second_ref_frame > 0; int which_mv; @@ -425,7 +534,8 @@ void vp9_build_inter16x16_predictors_mbuv(MACROBLOCKD *xd, which_mv ? xd->mode_info_context->mbmi.need_to_clamp_secondmv : xd->mode_info_context->mbmi.need_to_clamp_mvs; uint8_t *uptr, *vptr; - int pre_stride = xd->block[0].pre_stride; + int pre_stride = which_mv ? xd->second_pre.y_stride + : xd->pre.y_stride; int_mv _o16x16mv; int_mv _16x16mv; @@ -456,6 +566,9 @@ void vp9_build_inter16x16_predictors_mbuv(MACROBLOCKD *xd, uptr = (which_mv ? xd->second_pre.u_buffer : xd->pre.u_buffer); vptr = (which_mv ? xd->second_pre.v_buffer : xd->pre.v_buffer); + set_scaled_offsets(&xd->scale_factor_uv[which_mv], + mb_row * 16, mb_col * 16); + vp9_build_inter_predictor_q4(uptr, pre_stride, dst_u, dst_uvstride, &_16x16mv, &_o16x16mv, @@ -475,7 +588,9 @@ void vp9_build_inter32x32_predictors_sb(MACROBLOCKD *x, uint8_t *dst_u, uint8_t *dst_v, int dst_ystride, - int dst_uvstride) { + int dst_uvstride, + int mb_row, + int mb_col) { uint8_t *y1 = x->pre.y_buffer, *u1 = x->pre.u_buffer, *v1 = x->pre.v_buffer; uint8_t *y2 = x->second_pre.y_buffer, *u2 = x->second_pre.u_buffer, *v2 = x->second_pre.v_buffer; @@ -488,27 +603,43 @@ void vp9_build_inter32x32_predictors_sb(MACROBLOCKD *x, for (n = 0; n < 4; n++) { const int x_idx = n & 1, y_idx = n >> 1; + int scaled_uv_offset; x->mb_to_top_edge = edge[0] - ((y_idx * 16) << 3); x->mb_to_bottom_edge = edge[1] + (((1 - y_idx) * 16) << 3); x->mb_to_left_edge = edge[2] - ((x_idx * 16) << 3); x->mb_to_right_edge = edge[3] + (((1 - x_idx) * 16) << 3); - x->pre.y_buffer = y1 + y_idx * 16 * x->pre.y_stride + x_idx * 16; - x->pre.u_buffer = u1 + y_idx * 8 * x->pre.uv_stride + x_idx * 8; - x->pre.v_buffer = v1 + y_idx * 8 * x->pre.uv_stride + x_idx * 8; + x->pre.y_buffer = y1 + scaled_buffer_offset(x_idx * 16, + y_idx * 16, + x->pre.y_stride, + &x->scale_factor[0]); + scaled_uv_offset = scaled_buffer_offset(x_idx * 8, + y_idx * 8, + x->pre.uv_stride, + &x->scale_factor_uv[0]); + x->pre.u_buffer = u1 + scaled_uv_offset; + x->pre.v_buffer = v1 + scaled_uv_offset; if (x->mode_info_context->mbmi.second_ref_frame > 0) { - x->second_pre.y_buffer = y2 + y_idx * 16 * x->pre.y_stride + x_idx * 16; - x->second_pre.u_buffer = u2 + y_idx * 8 * x->pre.uv_stride + x_idx * 8; - x->second_pre.v_buffer = v2 + y_idx * 8 * x->pre.uv_stride + x_idx * 8; + x->second_pre.y_buffer = y2 + + scaled_buffer_offset(x_idx * 16, + y_idx * 16, + x->second_pre.y_stride, + &x->scale_factor[1]); + scaled_uv_offset = scaled_buffer_offset(x_idx * 8, + y_idx * 8, + x->second_pre.uv_stride, + &x->scale_factor_uv[1]); + x->second_pre.u_buffer = u2 + scaled_uv_offset; + x->second_pre.v_buffer = v2 + scaled_uv_offset; } vp9_build_inter16x16_predictors_mb(x, dst_y + y_idx * 16 * dst_ystride + x_idx * 16, dst_u + y_idx * 8 * dst_uvstride + x_idx * 8, dst_v + y_idx * 8 * dst_uvstride + x_idx * 8, - dst_ystride, dst_uvstride); + dst_ystride, dst_uvstride, mb_row + y_idx, mb_col + x_idx); } x->mb_to_top_edge = edge[0]; @@ -539,7 +670,9 @@ void vp9_build_inter64x64_predictors_sb(MACROBLOCKD *x, uint8_t *dst_u, uint8_t *dst_v, int dst_ystride, - int dst_uvstride) { + int dst_uvstride, + int mb_row, + int mb_col) { uint8_t *y1 = x->pre.y_buffer, *u1 = x->pre.u_buffer, *v1 = x->pre.v_buffer; uint8_t *y2 = x->second_pre.y_buffer, *u2 = x->second_pre.u_buffer, *v2 = x->second_pre.v_buffer; @@ -552,27 +685,43 @@ void vp9_build_inter64x64_predictors_sb(MACROBLOCKD *x, for (n = 0; n < 4; n++) { const int x_idx = n & 1, y_idx = n >> 1; + int scaled_uv_offset; x->mb_to_top_edge = edge[0] - ((y_idx * 32) << 3); x->mb_to_bottom_edge = edge[1] + (((1 - y_idx) * 32) << 3); x->mb_to_left_edge = edge[2] - ((x_idx * 32) << 3); x->mb_to_right_edge = edge[3] + (((1 - x_idx) * 32) << 3); - x->pre.y_buffer = y1 + y_idx * 32 * x->pre.y_stride + x_idx * 32; - x->pre.u_buffer = u1 + y_idx * 16 * x->pre.uv_stride + x_idx * 16; - x->pre.v_buffer = v1 + y_idx * 16 * x->pre.uv_stride + x_idx * 16; + x->pre.y_buffer = y1 + scaled_buffer_offset(x_idx * 32, + y_idx * 32, + x->pre.y_stride, + &x->scale_factor[0]); + scaled_uv_offset = scaled_buffer_offset(x_idx * 16, + y_idx * 16, + x->pre.uv_stride, + &x->scale_factor_uv[0]); + x->pre.u_buffer = u1 + scaled_uv_offset; + x->pre.v_buffer = v1 + scaled_uv_offset; if (x->mode_info_context->mbmi.second_ref_frame > 0) { - x->second_pre.y_buffer = y2 + y_idx * 32 * x->pre.y_stride + x_idx * 32; - x->second_pre.u_buffer = u2 + y_idx * 16 * x->pre.uv_stride + x_idx * 16; - x->second_pre.v_buffer = v2 + y_idx * 16 * x->pre.uv_stride + x_idx * 16; + x->second_pre.y_buffer = y2 + + scaled_buffer_offset(x_idx * 32, + y_idx * 32, + x->second_pre.y_stride, + &x->scale_factor[1]); + scaled_uv_offset = scaled_buffer_offset(x_idx * 16, + y_idx * 16, + x->second_pre.uv_stride, + &x->scale_factor_uv[1]); + x->second_pre.u_buffer = u2 + scaled_uv_offset; + x->second_pre.v_buffer = v2 + scaled_uv_offset; } vp9_build_inter32x32_predictors_sb(x, dst_y + y_idx * 32 * dst_ystride + x_idx * 32, dst_u + y_idx * 16 * dst_uvstride + x_idx * 16, dst_v + y_idx * 16 * dst_uvstride + x_idx * 16, - dst_ystride, dst_uvstride); + dst_ystride, dst_uvstride, mb_row + y_idx * 2, mb_col + x_idx * 2); } x->mb_to_top_edge = edge[0]; @@ -769,17 +918,23 @@ void vp9_build_inter16x16_predictors_mb(MACROBLOCKD *xd, uint8_t *dst_u, uint8_t *dst_v, int dst_ystride, - int dst_uvstride) { - vp9_build_inter16x16_predictors_mby(xd, dst_y, dst_ystride); - vp9_build_inter16x16_predictors_mbuv(xd, dst_u, dst_v, dst_uvstride); + int dst_uvstride, + int mb_row, + int mb_col) { + vp9_build_inter16x16_predictors_mby(xd, dst_y, dst_ystride, mb_row, mb_col); + vp9_build_inter16x16_predictors_mbuv(xd, dst_u, dst_v, dst_uvstride, + mb_row, mb_col); } -void vp9_build_inter_predictors_mb(MACROBLOCKD *xd) { +void vp9_build_inter_predictors_mb(MACROBLOCKD *xd, + int mb_row, + int mb_col) { if (xd->mode_info_context->mbmi.mode != SPLITMV) { vp9_build_inter16x16_predictors_mb(xd, xd->predictor, &xd->predictor[256], - &xd->predictor[320], 16, 8); + &xd->predictor[320], 16, 8, + mb_row, mb_col); #if CONFIG_COMP_INTERINTRA_PRED if (xd->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME) { diff --git a/vp9/common/vp9_reconinter.h b/vp9/common/vp9_reconinter.h index 320bac276..831ce2a73 100644 --- a/vp9/common/vp9_reconinter.h +++ b/vp9/common/vp9_reconinter.h @@ -18,42 +18,60 @@ struct subpix_fn_table; void vp9_build_inter16x16_predictors_mby(MACROBLOCKD *xd, uint8_t *dst_y, - int dst_ystride); + int dst_ystride, + int mb_row, + int mb_col); void vp9_build_inter16x16_predictors_mbuv(MACROBLOCKD *xd, uint8_t *dst_u, uint8_t *dst_v, - int dst_uvstride); + int dst_uvstride, + int mb_row, + int mb_col); void vp9_build_inter16x16_predictors_mb(MACROBLOCKD *xd, uint8_t *dst_y, uint8_t *dst_u, uint8_t *dst_v, int dst_ystride, - int dst_uvstride); + int dst_uvstride, + int mb_row, + int mb_col); void vp9_build_inter32x32_predictors_sb(MACROBLOCKD *x, uint8_t *dst_y, uint8_t *dst_u, uint8_t *dst_v, int dst_ystride, - int dst_uvstride); + int dst_uvstride, + int mb_row, + int mb_col); void vp9_build_inter64x64_predictors_sb(MACROBLOCKD *x, uint8_t *dst_y, uint8_t *dst_u, uint8_t *dst_v, int dst_ystride, - int dst_uvstride); + int dst_uvstride, + int mb_row, + int mb_col); -void vp9_build_inter_predictors_mb(MACROBLOCKD *xd); +void vp9_build_inter_predictors_mb(MACROBLOCKD *xd, + int mb_row, + int mb_col); -void vp9_build_inter4x4_predictors_mbuv(MACROBLOCKD *xd); +void vp9_build_inter4x4_predictors_mbuv(MACROBLOCKD *xd, + int mb_row, + int mb_col); void vp9_setup_interp_filters(MACROBLOCKD *xd, INTERPOLATIONFILTERTYPE filter, VP9_COMMON *cm); +void vp9_setup_scale_factors_for_frame(struct scale_factors *scale, + YV12_BUFFER_CONFIG *other, + int this_w, int this_h); + void vp9_build_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int_mv *mv_q3, @@ -69,4 +87,55 @@ void vp9_build_inter_predictor_q4(const uint8_t *src, int src_stride, int w, int h, int do_avg, const struct subpix_fn_table *subpix); +static int scale_value_x(int val, const struct scale_factors *scale) { + return val * scale->x_num / scale->x_den; +} + +static int scale_value_y(int val, const struct scale_factors *scale) { + return val * scale->y_num / scale->y_den; +} + +static int scaled_buffer_offset(int x_offset, + int y_offset, + int stride, + const struct scale_factors *scale) { + return scale_value_y(y_offset, scale) * stride + + scale_value_x(x_offset, scale); +} + +static void setup_pred_block(YV12_BUFFER_CONFIG *dst, + const YV12_BUFFER_CONFIG *src, + int mb_row, int mb_col, + const struct scale_factors *scale, + const struct scale_factors *scale_uv) { + const int recon_y_stride = src->y_stride; + const int recon_uv_stride = src->uv_stride; + int recon_yoffset; + int recon_uvoffset; + + if (scale) { + recon_yoffset = scaled_buffer_offset(16 * mb_col, 16 * mb_row, + recon_y_stride, scale); + recon_uvoffset = scaled_buffer_offset(8 * mb_col, 8 * mb_row, + recon_uv_stride, scale_uv); + } else { + recon_yoffset = 16 * mb_row * recon_y_stride + 16 * mb_col; + recon_uvoffset = 8 * mb_row * recon_uv_stride + 8 * mb_col; + } + *dst = *src; + dst->y_buffer += recon_yoffset; + dst->u_buffer += recon_uvoffset; + dst->v_buffer += recon_uvoffset; +} + +static void set_scale_factors(MACROBLOCKD *xd, + int ref0, int ref1, + struct scale_factors scale_factor[MAX_REF_FRAMES]) { + + xd->scale_factor[0] = scale_factor[ref0 >= 0 ? ref0 : 0]; + xd->scale_factor[1] = scale_factor[ref1 >= 0 ? ref1 : 0]; + xd->scale_factor_uv[0] = xd->scale_factor[0]; + xd->scale_factor_uv[1] = xd->scale_factor[1]; +} + #endif // VP9_COMMON_VP9_RECONINTER_H_ diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c index a1225f1dc..121d6e339 100644 --- a/vp9/decoder/vp9_decodemv.c +++ b/vp9/decoder/vp9_decodemv.c @@ -12,6 +12,7 @@ #include "vp9/decoder/vp9_treereader.h" #include "vp9/common/vp9_entropymv.h" #include "vp9/common/vp9_entropymode.h" +#include "vp9/common/vp9_reconinter.h" #include "vp9/decoder/vp9_onyxd_int.h" #include "vp9/common/vp9_findnearmv.h" #include "vp9/common/vp9_common.h" @@ -749,21 +750,25 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, int_mv nearest_second, nearby_second, best_mv_second; vp9_prob mv_ref_p [VP9_MVREFS - 1]; - int recon_y_stride, recon_yoffset; - int recon_uv_stride, recon_uvoffset; MV_REFERENCE_FRAME ref_frame = mbmi->ref_frame; { int ref_fb_idx; + int recon_y_stride, recon_yoffset; + int recon_uv_stride, recon_uvoffset; /* Select the appropriate reference frame for this MB */ ref_fb_idx = cm->active_ref_idx[ref_frame - 1]; - recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride ; + recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride; recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride; - recon_yoffset = (mb_row * recon_y_stride * 16) + (mb_col * 16); - recon_uvoffset = (mb_row * recon_uv_stride * 8) + (mb_col * 8); + recon_yoffset = scaled_buffer_offset(mb_col * 16, mb_row * 16, + recon_y_stride, + &xd->scale_factor[0]); + recon_uvoffset = scaled_buffer_offset(mb_col * 8, mb_row * 8, + recon_uv_stride, + &xd->scale_factor_uv[0]); xd->pre.y_buffer = cm->yv12_fb[ref_fb_idx].y_buffer + recon_yoffset; xd->pre.u_buffer = cm->yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset; @@ -853,9 +858,21 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, mbmi->second_ref_frame = 1; if (mbmi->second_ref_frame > 0) { int second_ref_fb_idx; + int recon_y_stride, recon_yoffset; + int recon_uv_stride, recon_uvoffset; + /* Select the appropriate reference frame for this MB */ second_ref_fb_idx = cm->active_ref_idx[mbmi->second_ref_frame - 1]; + recon_y_stride = cm->yv12_fb[second_ref_fb_idx].y_stride; + recon_uv_stride = cm->yv12_fb[second_ref_fb_idx].uv_stride; + + recon_yoffset = scaled_buffer_offset(mb_col * 16, mb_row * 16, + recon_y_stride, + &xd->scale_factor[1]); + recon_uvoffset = scaled_buffer_offset(mb_col * 8, mb_row * 8, + recon_uv_stride, + &xd->scale_factor_uv[1]); xd->second_pre.y_buffer = cm->yv12_fb[second_ref_fb_idx].y_buffer + recon_yoffset; xd->second_pre.u_buffer = @@ -1089,7 +1106,6 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, break; case NEWMV: - read_nmv(bc, &mv->as_mv, &best_mv.as_mv, nmvc); read_nmv_fp(bc, &mv->as_mv, &best_mv.as_mv, nmvc, xd->allow_high_precision_mv); @@ -1230,8 +1246,12 @@ void vp9_decode_mb_mode_mv(VP9D_COMP* const pbi, MODE_INFO *mi = xd->mode_info_context; MODE_INFO *prev_mi = xd->prev_mode_info_context; - if (pbi->common.frame_type == KEY_FRAME) + if (pbi->common.frame_type == KEY_FRAME) { kfread_modes(pbi, mi, mb_row, mb_col, bc); - else + } else { read_mb_modes_mv(pbi, mi, &mi->mbmi, prev_mi, mb_row, mb_col, bc); + set_scale_factors(xd, + mi->mbmi.ref_frame - 1, mi->mbmi.second_ref_frame - 1, + pbi->common.active_ref_scale); + } } diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c index 830b6fdfe..96fcd4b2c 100644 --- a/vp9/decoder/vp9_decodframe.c +++ b/vp9/decoder/vp9_decodframe.c @@ -147,7 +147,8 @@ static void mb_init_dequantizer(VP9D_COMP *pbi, MACROBLOCKD *xd) { /* skip_recon_mb() is Modified: Instead of writing the result to predictor buffer and then copying it * to dst buffer, we can write the result directly to dst buffer. This eliminates unnecessary copy. */ -static void skip_recon_mb(VP9D_COMP *pbi, MACROBLOCKD *xd) { +static void skip_recon_mb(VP9D_COMP *pbi, MACROBLOCKD *xd, + int mb_row, int mb_col) { BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type; if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) { @@ -168,21 +169,24 @@ static void skip_recon_mb(VP9D_COMP *pbi, MACROBLOCKD *xd) { xd->dst.u_buffer, xd->dst.v_buffer, xd->dst.y_stride, - xd->dst.uv_stride); + xd->dst.uv_stride, + mb_row, mb_col); } else if (sb_type == BLOCK_SIZE_SB32X32) { vp9_build_inter32x32_predictors_sb(xd, xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, xd->dst.y_stride, - xd->dst.uv_stride); + xd->dst.uv_stride, + mb_row, mb_col); } else { vp9_build_inter16x16_predictors_mb(xd, xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, xd->dst.y_stride, - xd->dst.uv_stride); + xd->dst.uv_stride, + mb_row, mb_col); #if CONFIG_COMP_INTERINTRA_PRED if (xd->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME) { vp9_build_interintra_16x16_predictors_mb(xd, @@ -599,7 +603,7 @@ static void decode_superblock64(VP9D_COMP *pbi, MACROBLOCKD *xd, /* Special case: Force the loopfilter to skip when eobtotal and * mb_skip_coeff are zero. */ - skip_recon_mb(pbi, xd); + skip_recon_mb(pbi, xd, mb_row, mb_col); return; } @@ -610,7 +614,8 @@ static void decode_superblock64(VP9D_COMP *pbi, MACROBLOCKD *xd, } else { vp9_build_inter64x64_predictors_sb(xd, xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, - xd->dst.y_stride, xd->dst.uv_stride); + xd->dst.y_stride, xd->dst.uv_stride, + mb_row, mb_col); } /* dequantization and idct */ @@ -720,7 +725,7 @@ static void decode_superblock32(VP9D_COMP *pbi, MACROBLOCKD *xd, /* Special case: Force the loopfilter to skip when eobtotal and * mb_skip_coeff are zero. */ - skip_recon_mb(pbi, xd); + skip_recon_mb(pbi, xd, mb_row, mb_col); return; } @@ -731,7 +736,8 @@ static void decode_superblock32(VP9D_COMP *pbi, MACROBLOCKD *xd, } else { vp9_build_inter32x32_predictors_sb(xd, xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, - xd->dst.y_stride, xd->dst.uv_stride); + xd->dst.y_stride, xd->dst.uv_stride, + mb_row, mb_col); } /* dequantization and idct */ @@ -832,7 +838,7 @@ static void decode_macroblock(VP9D_COMP *pbi, MACROBLOCKD *xd, /* Special case: Force the loopfilter to skip when eobtotal and mb_skip_coeff are zero. */ xd->mode_info_context->mbmi.mb_skip_coeff = 1; - skip_recon_mb(pbi, xd); + skip_recon_mb(pbi, xd, mb_row, mb_col); return; } #ifdef DEC_DEBUG @@ -859,7 +865,7 @@ static void decode_macroblock(VP9D_COMP *pbi, MACROBLOCKD *xd, xd->mode_info_context->mbmi.mode, tx_size, xd->mode_info_context->mbmi.interp_filter); #endif - vp9_build_inter_predictors_mb(xd); + vp9_build_inter_predictors_mb(xd, mb_row, mb_col); } if (tx_size == TX_16X16) { @@ -966,18 +972,14 @@ static void set_refs(VP9D_COMP *pbi, int block_size, MB_MODE_INFO *const mbmi = &mi->mbmi; if (mbmi->ref_frame > INTRA_FRAME) { - int ref_fb_idx, ref_yoffset, ref_uvoffset, ref_y_stride, ref_uv_stride; + int ref_fb_idx; /* Select the appropriate reference frame for this MB */ ref_fb_idx = cm->active_ref_idx[mbmi->ref_frame - 1]; - - ref_y_stride = cm->yv12_fb[ref_fb_idx].y_stride; - ref_yoffset = mb_row * 16 * ref_y_stride + 16 * mb_col; - xd->pre.y_buffer = cm->yv12_fb[ref_fb_idx].y_buffer + ref_yoffset; - ref_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride; - ref_uvoffset = mb_row * 8 * ref_uv_stride + 8 * mb_col; - xd->pre.u_buffer = cm->yv12_fb[ref_fb_idx].u_buffer + ref_uvoffset; - xd->pre.v_buffer = cm->yv12_fb[ref_fb_idx].v_buffer + ref_uvoffset; + xd->scale_factor[0] = cm->active_ref_scale[mbmi->ref_frame - 1]; + xd->scale_factor_uv[0] = cm->active_ref_scale[mbmi->ref_frame - 1]; + setup_pred_block(&xd->pre, &cm->yv12_fb[ref_fb_idx], mb_row, mb_col, + &xd->scale_factor[0], &xd->scale_factor_uv[0]); /* propagate errors from reference frames */ xd->corrupted |= cm->yv12_fb[ref_fb_idx].corrupted; @@ -988,12 +990,9 @@ static void set_refs(VP9D_COMP *pbi, int block_size, /* Select the appropriate reference frame for this MB */ second_ref_fb_idx = cm->active_ref_idx[mbmi->second_ref_frame - 1]; - xd->second_pre.y_buffer = - cm->yv12_fb[second_ref_fb_idx].y_buffer + ref_yoffset; - xd->second_pre.u_buffer = - cm->yv12_fb[second_ref_fb_idx].u_buffer + ref_uvoffset; - xd->second_pre.v_buffer = - cm->yv12_fb[second_ref_fb_idx].v_buffer + ref_uvoffset; + setup_pred_block(&xd->second_pre, &cm->yv12_fb[second_ref_fb_idx], + mb_row, mb_col, + &xd->scale_factor[1], &xd->scale_factor_uv[1]); /* propagate errors from reference frames */ xd->corrupted |= cm->yv12_fb[second_ref_fb_idx].corrupted; @@ -1204,6 +1203,26 @@ static void read_coef_probs(VP9D_COMP *pbi, BOOL_DECODER* const bc) { } } +static void update_frame_size(VP9D_COMP *pbi) { + VP9_COMMON *cm = &pbi->common; + + /* our internal buffers are always multiples of 16 */ + int width = (cm->Width + 15) & ~15; + int height = (cm->Height + 15) & ~15; + + cm->mb_rows = height >> 4; + cm->mb_cols = width >> 4; + cm->MBs = cm->mb_rows * cm->mb_cols; + cm->mode_info_stride = cm->mb_cols + 1; + memset(cm->mip, 0, + (cm->mb_cols + 1) * (cm->mb_rows + 1) * sizeof(MODE_INFO)); + vp9_update_mode_info_border(cm, cm->mip); + + cm->mi = cm->mip + cm->mode_info_stride + 1; + cm->prev_mi = cm->prev_mip + cm->mode_info_stride + 1; + vp9_update_mode_info_in_image(cm, cm->mi); +} + int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) { BOOL_DECODER header_bc, residual_bc; VP9_COMMON *const pc = &pbi->common; @@ -1281,9 +1300,25 @@ int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) { "Invalid frame height"); } - if (vp9_alloc_frame_buffers(pc, pc->Width, pc->Height)) - vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR, - "Failed to allocate frame buffers"); + if (!pbi->initial_width || !pbi->initial_height) { + if (vp9_alloc_frame_buffers(pc, pc->Width, pc->Height)) + vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR, + "Failed to allocate frame buffers"); + pbi->initial_width = pc->Width; + pbi->initial_height = pc->Height; + } + + if (pc->Width > pbi->initial_width) { + vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, + "Frame width too large"); + } + + if (pc->Height > pbi->initial_height) { + vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, + "Frame height too large"); + } + + update_frame_size(pbi); } } } @@ -1295,6 +1330,11 @@ int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) { init_frame(pbi); + /* Reset the frame pointers to the current frame size */ + vp8_yv12_realloc_frame_buffer(&pc->yv12_fb[pc->new_fb_idx], + pc->mb_cols * 16, pc->mb_rows * 16, + VP9BORDERINPIXELS); + if (vp9_start_decode(&header_bc, data, (unsigned int)first_partition_length_in_bytes)) vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR, diff --git a/vp9/decoder/vp9_onyxd_int.h b/vp9/decoder/vp9_onyxd_int.h index 8c1f76e73..0e6d059af 100644 --- a/vp9/decoder/vp9_onyxd_int.h +++ b/vp9/decoder/vp9_onyxd_int.h @@ -38,6 +38,8 @@ typedef struct VP9Decompressor { int decoded_key_frame; + int initial_width; + int initial_height; } VP9D_COMP; int vp9_decode_frame(VP9D_COMP *cpi, const unsigned char **p_data_end); diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index fec5a7c61..eaed1a964 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -654,7 +654,7 @@ static void set_offsets(VP9_COMP *cpi, // Set up destination pointers setup_pred_block(&xd->dst, &cm->yv12_fb[dst_fb_idx], - mb_row, mb_col); + mb_row, mb_col, NULL, NULL); /* Set up limit values for MV components to prevent them from * extending beyond the UMV borders assuming 16x16 block size */ @@ -679,7 +679,7 @@ static void set_offsets(VP9_COMP *cpi, xd->right_available = (mb_col + block_size < cm->cur_tile_mb_col_end); /* set up source buffers */ - setup_pred_block(&x->src, cpi->Source, mb_row, mb_col); + setup_pred_block(&x->src, cpi->Source, mb_row, mb_col, NULL, NULL); /* R/D setup */ x->rddiv = cpi->RDDIV; @@ -1272,9 +1272,6 @@ static void encode_frame_internal(VP9_COMP *cpi) { totalrate = 0; - // Functions setup for all frame types so we can use MC in AltRef - vp9_setup_interp_filters(xd, cm->mcomp_filter_type, cm); - // Reset frame count of inter 0,0 motion vector usage. cpi->inter_zz_count = 0; @@ -2100,7 +2097,8 @@ static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t, setup_pred_block(&xd->pre, &cpi->common.yv12_fb[ref_fb_idx], - mb_row, mb_col); + mb_row, mb_col, + &xd->scale_factor[0], &xd->scale_factor_uv[0]); if (mbmi->second_ref_frame > 0) { int second_ref_fb_idx; @@ -2114,11 +2112,12 @@ static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t, setup_pred_block(&xd->second_pre, &cpi->common.yv12_fb[second_ref_fb_idx], - mb_row, mb_col); + mb_row, mb_col, + &xd->scale_factor[1], &xd->scale_factor_uv[1]); } if (!x->skip) { - vp9_encode_inter16x16(x); + vp9_encode_inter16x16(x, mb_row, mb_col); // Clear mb_skip_coeff if mb_no_coeff_skip is not set if (!cpi->common.mb_no_coeff_skip) @@ -2130,7 +2129,8 @@ static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t, xd->dst.u_buffer, xd->dst.v_buffer, xd->dst.y_stride, - xd->dst.uv_stride); + xd->dst.uv_stride, + mb_row, mb_col); #if CONFIG_COMP_INTERINTRA_PRED if (xd->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME) { vp9_build_interintra_16x16_predictors_mb(xd, @@ -2327,7 +2327,8 @@ static void encode_superblock32(VP9_COMP *cpi, TOKENEXTRA **t, setup_pred_block(&xd->pre, &cpi->common.yv12_fb[ref_fb_idx], - mb_row, mb_col); + mb_row, mb_col, + &xd->scale_factor[0], &xd->scale_factor_uv[0]); if (xd->mode_info_context->mbmi.second_ref_frame > 0) { int second_ref_fb_idx; @@ -2341,12 +2342,14 @@ static void encode_superblock32(VP9_COMP *cpi, TOKENEXTRA **t, setup_pred_block(&xd->second_pre, &cpi->common.yv12_fb[second_ref_fb_idx], - mb_row, mb_col); + mb_row, mb_col, + &xd->scale_factor[1], &xd->scale_factor_uv[1]); } vp9_build_inter32x32_predictors_sb(xd, xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, - xd->dst.y_stride, xd->dst.uv_stride); + xd->dst.y_stride, xd->dst.uv_stride, + mb_row, mb_col); } if (xd->mode_info_context->mbmi.txfm_size == TX_32X32) { @@ -2553,7 +2556,8 @@ static void encode_superblock64(VP9_COMP *cpi, TOKENEXTRA **t, setup_pred_block(&xd->pre, &cpi->common.yv12_fb[ref_fb_idx], - mb_row, mb_col); + mb_row, mb_col, + &xd->scale_factor[0], &xd->scale_factor_uv[0]); if (xd->mode_info_context->mbmi.second_ref_frame > 0) { int second_ref_fb_idx; @@ -2567,12 +2571,14 @@ static void encode_superblock64(VP9_COMP *cpi, TOKENEXTRA **t, setup_pred_block(&xd->second_pre, &cpi->common.yv12_fb[second_ref_fb_idx], - mb_row, mb_col); + mb_row, mb_col, + &xd->scale_factor[1], &xd->scale_factor_uv[1]); } vp9_build_inter64x64_predictors_sb(xd, xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, - xd->dst.y_stride, xd->dst.uv_stride); + xd->dst.y_stride, xd->dst.uv_stride, + mb_row, mb_col); } if (xd->mode_info_context->mbmi.txfm_size == TX_32X32) { diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index b38d3a54c..ee08d263c 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -684,21 +684,21 @@ void vp9_fidct_mb(MACROBLOCK *x) { } } -void vp9_encode_inter16x16(MACROBLOCK *x) { +void vp9_encode_inter16x16(MACROBLOCK *x, int mb_row, int mb_col) { MACROBLOCKD *const xd = &x->e_mbd; - vp9_build_inter_predictors_mb(xd); + vp9_build_inter_predictors_mb(xd, mb_row, mb_col); subtract_mb(x); vp9_fidct_mb(x); vp9_recon_mb(xd); } /* this function is used by first pass only */ -void vp9_encode_inter16x16y(MACROBLOCK *x) { +void vp9_encode_inter16x16y(MACROBLOCK *x, int mb_row, int mb_col) { MACROBLOCKD *xd = &x->e_mbd; BLOCK *b = &x->block[0]; - vp9_build_inter16x16_predictors_mby(xd, xd->predictor, 16); + vp9_build_inter16x16_predictors_mby(xd, xd->predictor, 16, mb_row, mb_col); vp9_subtract_mby(x->src_diff, *(b->base_src), xd->predictor, b->src_stride); diff --git a/vp9/encoder/vp9_encodemb.h b/vp9/encoder/vp9_encodemb.h index f3c679227..6356df215 100644 --- a/vp9/encoder/vp9_encodemb.h +++ b/vp9/encoder/vp9_encodemb.h @@ -23,14 +23,14 @@ typedef struct { #include "vp9/encoder/vp9_onyx_int.h" struct VP9_ENCODER_RTCD; -void vp9_encode_inter16x16(MACROBLOCK *x); +void vp9_encode_inter16x16(MACROBLOCK *x, int mb_row, int mb_col); void vp9_transform_mbuv_4x4(MACROBLOCK *x); void vp9_transform_mby_4x4(MACROBLOCK *x); void vp9_optimize_mby_4x4(MACROBLOCK *x); void vp9_optimize_mbuv_4x4(MACROBLOCK *x); -void vp9_encode_inter16x16y(MACROBLOCK *x); +void vp9_encode_inter16x16y(MACROBLOCK *x, int mb_row, int mb_col); void vp9_transform_mb_8x8(MACROBLOCK *mb); void vp9_transform_mby_8x8(MACROBLOCK *x); diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c index 6aee9ef0d..05a0f6f04 100644 --- a/vp9/encoder/vp9_firstpass.c +++ b/vp9/encoder/vp9_firstpass.c @@ -613,7 +613,7 @@ void vp9_first_pass(VP9_COMP *cpi) { this_error = motion_error; vp9_set_mbmode_and_mvs(x, NEWMV, &mv); xd->mode_info_context->mbmi.txfm_size = TX_4X4; - vp9_encode_inter16x16y(x); + vp9_encode_inter16x16y(x, mb_row, mb_col); sum_mvr += mv.as_mv.row; sum_mvr_abs += abs(mv.as_mv.row); sum_mvc += mv.as_mv.col; diff --git a/vp9/encoder/vp9_mbgraph.c b/vp9/encoder/vp9_mbgraph.c index 40823f60c..bc06c9458 100644 --- a/vp9/encoder/vp9_mbgraph.c +++ b/vp9/encoder/vp9_mbgraph.c @@ -20,7 +20,9 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi, int_mv *ref_mv, - int_mv *dst_mv) { + int_mv *dst_mv, + int mb_row, + int mb_col) { MACROBLOCK *const x = &cpi->mb; MACROBLOCKD *const xd = &x->e_mbd; BLOCK *b = &x->block[0]; @@ -72,7 +74,7 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi, } vp9_set_mbmode_and_mvs(x, NEWMV, dst_mv); - vp9_build_inter16x16_predictors_mby(xd, xd->predictor, 16); + vp9_build_inter16x16_predictors_mby(xd, xd->predictor, 16, mb_row, mb_col); best_err = vp9_sad16x16(xd->dst.y_buffer, xd->dst.y_stride, xd->predictor, 16, INT_MAX); @@ -93,8 +95,9 @@ static int do_16x16_motion_search YV12_BUFFER_CONFIG *buf, int buf_mb_y_offset, YV12_BUFFER_CONFIG *ref, - int mb_y_offset -) { + int mb_y_offset, + int mb_row, + int mb_col) { MACROBLOCK *const x = &cpi->mb; MACROBLOCKD *const xd = &x->e_mbd; unsigned int err, tmp_err; @@ -124,7 +127,7 @@ static int do_16x16_motion_search // Test last reference frame using the previous best mv as the // starting point (best reference) for the search - tmp_err = do_16x16_motion_iteration(cpi, ref_mv, &tmp_mv); + tmp_err = do_16x16_motion_iteration(cpi, ref_mv, &tmp_mv, mb_row, mb_col); if (tmp_err < err) { err = tmp_err; dst_mv->as_int = tmp_mv.as_int; @@ -136,7 +139,8 @@ static int do_16x16_motion_search int_mv zero_ref_mv, tmp_mv; zero_ref_mv.as_int = 0; - tmp_err = do_16x16_motion_iteration(cpi, &zero_ref_mv, &tmp_mv); + tmp_err = do_16x16_motion_iteration(cpi, &zero_ref_mv, &tmp_mv, + mb_row, mb_col); if (tmp_err < err) { dst_mv->as_int = tmp_mv.as_int; err = tmp_err; @@ -229,7 +233,9 @@ static void update_mbgraph_mb_stats int gld_y_offset, YV12_BUFFER_CONFIG *alt_ref, int_mv *prev_alt_ref_mv, - int arf_y_offset + int arf_y_offset, + int mb_row, + int mb_col ) { MACROBLOCK *const x = &cpi->mb; MACROBLOCKD *const xd = &x->e_mbd; @@ -249,7 +255,8 @@ static void update_mbgraph_mb_stats int g_motion_error = do_16x16_motion_search(cpi, prev_golden_ref_mv, &stats->ref[GOLDEN_FRAME].m.mv, buf, mb_y_offset, - golden_ref, gld_y_offset); + golden_ref, gld_y_offset, + mb_row, mb_col); stats->ref[GOLDEN_FRAME].err = g_motion_error; } else { stats->ref[GOLDEN_FRAME].err = INT_MAX; @@ -326,7 +333,8 @@ static void update_mbgraph_frame_stats update_mbgraph_mb_stats(cpi, mb_stats, buf, mb_y_in_offset, golden_ref, &gld_left_mv, gld_y_in_offset, - alt_ref, &arf_left_mv, arf_y_in_offset); + alt_ref, &arf_left_mv, arf_y_in_offset, + mb_row, mb_col); arf_left_mv.as_int = mb_stats->ref[ALTREF_FRAME].m.mv.as_int; gld_left_mv.as_int = mb_stats->ref[GOLDEN_FRAME].m.mv.as_int; if (mb_col == 0) { diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index 8d48e4029..42f327674 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -10,6 +10,7 @@ #include "vpx_config.h" +#include "vp9/common/vp9_filter.h" #include "vp9/common/vp9_onyxc_int.h" #include "vp9/common/vp9_reconinter.h" #include "vp9/encoder/vp9_onyx_int.h" @@ -2198,6 +2199,69 @@ void vp9_write_yuv_rec_frame(VP9_COMMON *cm) { } #endif +static void scale_and_extend_frame(YV12_BUFFER_CONFIG *src_fb, + YV12_BUFFER_CONFIG *dst_fb) { + const int in_w = src_fb->y_width; + const int in_h = src_fb->y_height; + const int out_w = dst_fb->y_width; + const int out_h = dst_fb->y_height; + int x, y; + + for (y = 0; y < out_h; y += 16) { + for (x = 0; x < out_w; x += 16) { + int x_q4 = x * 16 * in_w / out_w; + int y_q4 = y * 16 * in_h / out_h; + uint8_t *src, *dst; + int src_stride, dst_stride; + + + src = src_fb->y_buffer + + y * in_h / out_h * src_fb->y_stride + + x * in_w / out_w; + dst = dst_fb->y_buffer + + y * dst_fb->y_stride + + x; + src_stride = src_fb->y_stride; + dst_stride = dst_fb->y_stride; + + vp9_convolve8(src, src_stride, dst, dst_stride, + vp9_sub_pel_filters_8[x_q4 & 0xf], 16 * in_w / out_w, + vp9_sub_pel_filters_8[y_q4 & 0xf], 16 * in_h / out_h, + 16, 16); + + x_q4 >>= 1; + y_q4 >>= 1; + src_stride = src_fb->uv_stride; + dst_stride = dst_fb->uv_stride; + + src = src_fb->u_buffer + + y / 2 * in_h / out_h * src_fb->uv_stride + + x / 2 * in_w / out_w; + dst = dst_fb->u_buffer + + y / 2 * dst_fb->uv_stride + + x / 2; + vp9_convolve8(src, src_stride, dst, dst_stride, + vp9_sub_pel_filters_8[x_q4 & 0xf], 16 * in_w / out_w, + vp9_sub_pel_filters_8[y_q4 & 0xf], 16 * in_h / out_h, + 8, 8); + + src = src_fb->v_buffer + + y / 2 * in_h / out_h * src_fb->uv_stride + + x / 2 * in_w / out_w; + dst = dst_fb->v_buffer + + y / 2 * dst_fb->uv_stride + + x / 2; + vp9_convolve8(src, src_stride, dst, dst_stride, + vp9_sub_pel_filters_8[x_q4 & 0xf], 16 * in_w / out_w, + vp9_sub_pel_filters_8[y_q4 & 0xf], 16 * in_h / out_h, + 8, 8); + } + } + + vp8_yv12_extend_frame_borders(dst_fb); +} + + static void update_alt_ref_frame_stats(VP9_COMP *cpi) { VP9_COMMON *cm = &cpi->common; @@ -2583,6 +2647,15 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, int mcomp_filter_index = 0; int64_t mcomp_filter_cost[4]; + /* Scale the source buffer, if required */ + if (cm->Width != cpi->un_scaled_source->y_width || + cm->Height != cpi->un_scaled_source->y_height) { + scale_and_extend_frame(cpi->un_scaled_source, &cpi->scaled_source); + cpi->Source = &cpi->scaled_source; + } else { + cpi->Source = cpi->un_scaled_source; + } + // Clear down mmx registers to allow floating point in what follows vp9_clear_system_state(); @@ -3761,23 +3834,6 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags, cm->mb_cols * 16, cm->mb_rows * 16, VP9BORDERINPIXELS); - /* Disable any references that have different size */ - if ((cm->yv12_fb[cm->active_ref_idx[cpi->lst_fb_idx]].y_width != - cm->yv12_fb[cm->new_fb_idx].y_width) || - (cm->yv12_fb[cm->active_ref_idx[cpi->lst_fb_idx]].y_height != - cm->yv12_fb[cm->new_fb_idx].y_height)) - cpi->ref_frame_flags &= ~VP9_LAST_FLAG; - if ((cm->yv12_fb[cm->active_ref_idx[cpi->gld_fb_idx]].y_width != - cm->yv12_fb[cm->new_fb_idx].y_width) || - (cm->yv12_fb[cm->active_ref_idx[cpi->gld_fb_idx]].y_height != - cm->yv12_fb[cm->new_fb_idx].y_height)) - cpi->ref_frame_flags &= ~VP9_GOLD_FLAG; - if ((cm->yv12_fb[cm->active_ref_idx[cpi->alt_fb_idx]].y_width != - cm->yv12_fb[cm->new_fb_idx].y_width) || - (cm->yv12_fb[cm->active_ref_idx[cpi->alt_fb_idx]].y_height != - cm->yv12_fb[cm->new_fb_idx].y_height)) - cpi->ref_frame_flags &= ~VP9_ALT_FLAG; - vp9_setup_interp_filters(&cpi->mb.e_mbd, DEFAULT_INTERP_FILTER, cm); if (cpi->pass == 1) { Pass1Encode(cpi, size, dest, frame_flags); diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index e2845a618..9d51e1cba 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -1671,8 +1671,9 @@ static int64_t rd_inter64x64_uv(VP9_COMP *cpi, MACROBLOCK *x, int *rate, } static int64_t rd_inter4x4_uv(VP9_COMP *cpi, MACROBLOCK *x, int *rate, - int *distortion, int *skip, int fullpixel) { - vp9_build_inter4x4_predictors_mbuv(&x->e_mbd); + int *distortion, int *skip, int fullpixel, + int mb_row, int mb_col) { + vp9_build_inter4x4_predictors_mbuv(&x->e_mbd, mb_row, mb_col); vp9_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer, x->e_mbd.predictor, x->src.uv_stride); return rd_inter16x16_uv_4x4(cpi, x, rate, distortion, fullpixel, skip, 1); @@ -3108,12 +3109,24 @@ static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, int_mv frame_nearest_mv[MAX_REF_FRAMES], int_mv frame_near_mv[MAX_REF_FRAMES], int frame_mdcounts[4][4], - YV12_BUFFER_CONFIG yv12_mb[4]) { - YV12_BUFFER_CONFIG *yv12 = &cpi->common.yv12_fb[idx]; + YV12_BUFFER_CONFIG yv12_mb[4], + struct scale_factors scale[MAX_REF_FRAMES]) { + VP9_COMMON *cm = &cpi->common; + YV12_BUFFER_CONFIG *yv12 = &cm->yv12_fb[cpi->common.active_ref_idx[idx]]; MACROBLOCKD *const xd = &x->e_mbd; MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi; - setup_pred_block(&yv12_mb[frame_type], yv12, mb_row, mb_col); + // set up scaling factors + scale[frame_type] = cpi->common.active_ref_scale[frame_type - 1]; + scale[frame_type].x_offset_q4 = + (mb_col * 16 * scale[frame_type].x_num / scale[frame_type].x_den) & 0xf; + scale[frame_type].y_offset_q4 = + (mb_row * 16 * scale[frame_type].y_num / scale[frame_type].y_den) & 0xf; + + // TODO(jkoleszar): Is the UV buffer ever used here? If so, need to make this + // use the UV scaling factors. + setup_pred_block(&yv12_mb[frame_type], yv12, mb_row, mb_col, + &scale[frame_type], &scale[frame_type]); // Gets an initial list of candidate vectors from neighbours and orders them vp9_find_mv_refs(&cpi->common, xd, xd->mode_info_context, @@ -3198,7 +3211,8 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, int mode_index, INTERPOLATIONFILTERTYPE *best_filter, int_mv frame_mv[MB_MODE_COUNT] - [MAX_REF_FRAMES]) { + [MAX_REF_FRAMES], + int mb_row, int mb_col) { VP9_COMMON *cm = &cpi->common; MACROBLOCKD *xd = &x->e_mbd; MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; @@ -3376,7 +3390,8 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, xd->dst.u_buffer, xd->dst.v_buffer, xd->dst.y_stride, - xd->dst.uv_stride); + xd->dst.uv_stride, + mb_row, mb_col); var = vp9_variance64x64(*(b->base_src), b->src_stride, xd->dst.y_buffer, xd->dst.y_stride, &sse); // Note our transform coeffs are 8 times an orthogonal transform. @@ -3460,7 +3475,8 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, xd->dst.u_buffer, xd->dst.v_buffer, xd->dst.y_stride, - xd->dst.uv_stride); + xd->dst.uv_stride, + mb_row, mb_col); var = vp9_variance32x32(*(b->base_src), b->src_stride, xd->dst.y_buffer, xd->dst.y_stride, &sse); // Note our transform coeffs are 8 times an orthogonal transform. @@ -3542,7 +3558,8 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, int tmp_dist_y, tmp_dist_u, tmp_dist_v; // TODO(jkoleszar): these 2 y/uv should be replaced with one call to // vp9_build_interintra_16x16_predictors_mb(). - vp9_build_inter16x16_predictors_mby(xd, xd->predictor, 16); + vp9_build_inter16x16_predictors_mby(xd, xd->predictor, 16, + mb_row, mb_col); #if CONFIG_COMP_INTERINTRA_PRED if (is_comp_interintra_pred) { @@ -3551,7 +3568,8 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, #endif vp9_build_inter16x16_predictors_mbuv(xd, xd->predictor + 256, - xd->predictor + 320, 8); + xd->predictor + 320, 8, + mb_row, mb_col); #if CONFIG_COMP_INTERINTRA_PRED if (is_comp_interintra_pred) { @@ -3647,25 +3665,29 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, xd->dst.u_buffer, xd->dst.v_buffer, xd->dst.y_stride, - xd->dst.uv_stride); + xd->dst.uv_stride, + mb_row, mb_col); } else if (block_size == BLOCK_32X32) { vp9_build_inter32x32_predictors_sb(xd, xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, xd->dst.y_stride, - xd->dst.uv_stride); + xd->dst.uv_stride, + mb_row, mb_col); } else { // TODO(jkoleszar): These y/uv fns can be replaced with their mb // equivalent - vp9_build_inter16x16_predictors_mby(xd, xd->predictor, 16); + vp9_build_inter16x16_predictors_mby(xd, xd->predictor, 16, + mb_row, mb_col); #if CONFIG_COMP_INTERINTRA_PRED if (is_comp_interintra_pred) { vp9_build_interintra_16x16_predictors_mby(xd, xd->predictor, 16); } #endif vp9_build_inter16x16_predictors_mbuv(xd, &xd->predictor[256], - &xd->predictor[320], 8); + &xd->predictor[320], 8, + mb_row, mb_col); #if CONFIG_COMP_INTERINTRA_PRED if (is_comp_interintra_pred) { vp9_build_interintra_16x16_predictors_mbuv(xd, &xd->predictor[256], @@ -3860,6 +3882,8 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, int intra_cost_penalty = 20 * vp9_dc_quant(cpi->common.base_qindex, cpi->common.y1dc_delta_q); + struct scale_factors scale_factor[4]; + vpx_memset(mode8x8, 0, sizeof(mode8x8)); vpx_memset(&frame_mv, 0, sizeof(frame_mv)); vpx_memset(&best_mbmode, 0, sizeof(best_mbmode)); @@ -3883,24 +3907,24 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, } if (cpi->ref_frame_flags & VP9_LAST_FLAG) { - setup_buffer_inter(cpi, x, cpi->common.active_ref_idx[cpi->lst_fb_idx], + setup_buffer_inter(cpi, x, cpi->lst_fb_idx, LAST_FRAME, BLOCK_16X16, mb_row, mb_col, frame_mv[NEARESTMV], frame_mv[NEARMV], - frame_mdcounts, yv12_mb); + frame_mdcounts, yv12_mb, scale_factor); } if (cpi->ref_frame_flags & VP9_GOLD_FLAG) { - setup_buffer_inter(cpi, x, cpi->common.active_ref_idx[cpi->gld_fb_idx], + setup_buffer_inter(cpi, x, cpi->gld_fb_idx, GOLDEN_FRAME, BLOCK_16X16, mb_row, mb_col, frame_mv[NEARESTMV], frame_mv[NEARMV], - frame_mdcounts, yv12_mb); + frame_mdcounts, yv12_mb, scale_factor); } if (cpi->ref_frame_flags & VP9_ALT_FLAG) { - setup_buffer_inter(cpi, x, cpi->common.active_ref_idx[cpi->alt_fb_idx], + setup_buffer_inter(cpi, x, cpi->alt_fb_idx, ALTREF_FRAME, BLOCK_16X16, mb_row, mb_col, frame_mv[NEARESTMV], frame_mv[NEARMV], - frame_mdcounts, yv12_mb); + frame_mdcounts, yv12_mb, scale_factor); } *returnintra = INT64_MAX; @@ -3955,6 +3979,10 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, mbmi->second_ref_frame = vp9_mode_order[mode_index].second_ref_frame; mbmi->interp_filter = cm->mcomp_filter_type; + + set_scale_factors(xd, mbmi->ref_frame, mbmi->second_ref_frame, + scale_factor); + vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common); // Test best rd so far against threshold for trying this mode. @@ -3970,6 +3998,18 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, !(cpi->ref_frame_flags & flag_list[mbmi->second_ref_frame])) continue; + // only scale on zeromv. + if (mbmi->ref_frame > 0 && + (yv12_mb[mbmi->ref_frame].y_width != cm->mb_cols * 16 || + yv12_mb[mbmi->ref_frame].y_height != cm->mb_rows * 16) && + this_mode != ZEROMV) + continue; + if (mbmi->second_ref_frame > 0 && + (yv12_mb[mbmi->second_ref_frame].y_width != cm->mb_cols * 16 || + yv12_mb[mbmi->second_ref_frame].y_height != cm->mb_rows * 16) && + this_mode != ZEROMV) + continue; + // current coding mode under rate-distortion optimization test loop #if CONFIG_COMP_INTERINTRA_PRED mbmi->interintra_mode = (MB_PREDICTION_MODE)(DC_PRED - 1); @@ -4288,7 +4328,7 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, int uv_skippable; rd_inter4x4_uv(cpi, x, &rate_uv, &distortion_uv, &uv_skippable, - cpi->common.full_pixel); + cpi->common.full_pixel, mb_row, mb_col); rate2 += rate_uv; distortion2 += distortion_uv; skippable = skippable && uv_skippable; @@ -4330,7 +4370,8 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, &rate_y, &distortion, &rate_uv, &distortion_uv, &mode_excluded, &disable_skip, - mode_index, &tmp_best_filter, frame_mv); + mode_index, &tmp_best_filter, frame_mv, + mb_row, mb_col); if (this_rd == INT64_MAX) continue; } @@ -4581,6 +4622,8 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, mbmi->mb_skip_coeff = (cpi->common.mb_no_coeff_skip) ? 1 : 0; mbmi->partitioning = 0; + set_scale_factors(xd, mbmi->ref_frame, mbmi->second_ref_frame, + scale_factor); vpx_memset(best_pred_diff, 0, sizeof(best_pred_diff)); vpx_memset(best_txfm_diff, 0, sizeof(best_txfm_diff)); @@ -4633,6 +4676,8 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, } end: + set_scale_factors(xd, mbmi->ref_frame, mbmi->second_ref_frame, + scale_factor); store_coding_context(x, &x->mb_context[xd->sb_index][xd->mb_index], best_mode_index, &best_partition, &mbmi->ref_mvs[mbmi->ref_frame][0], @@ -4846,9 +4891,9 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG, VP9_ALT_FLAG }; int idx_list[4] = {0, - cpi->common.active_ref_idx[cpi->lst_fb_idx], - cpi->common.active_ref_idx[cpi->gld_fb_idx], - cpi->common.active_ref_idx[cpi->alt_fb_idx]}; + cpi->lst_fb_idx, + cpi->gld_fb_idx, + cpi->alt_fb_idx}; int mdcounts[4]; int near_sadidx[8] = { 0, 1, 2, 3, 4, 5, 6, 7 }; int saddone = 0; @@ -4875,6 +4920,7 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, int rate_uv_16x16 = 0, rate_uv_tokenonly_16x16 = 0; int dist_uv_16x16 = 0, uv_skip_16x16 = 0; MB_PREDICTION_MODE mode_uv_16x16 = NEARESTMV; + struct scale_factors scale_factor[4]; xd->mode_info_context->mbmi.segment_id = segment_id; estimate_ref_frame_costs(cpi, segment_id, ref_costs); @@ -4890,7 +4936,7 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, setup_buffer_inter(cpi, x, idx_list[ref_frame], ref_frame, block_size, mb_row, mb_col, frame_mv[NEARESTMV], frame_mv[NEARMV], frame_mdcounts, - yv12_mb); + yv12_mb, scale_factor); } frame_mv[NEWMV][ref_frame].as_int = INVALID_MV; frame_mv[ZEROMV][ref_frame].as_int = 0; @@ -4969,6 +5015,8 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, } mbmi->ref_frame = ref_frame; mbmi->second_ref_frame = vp9_mode_order[mode_index].second_ref_frame; + set_scale_factors(xd, mbmi->ref_frame, mbmi->second_ref_frame, + scale_factor); comp_pred = mbmi->second_ref_frame > INTRA_FRAME; mbmi->mode = this_mode; mbmi->uv_mode = DC_PRED; @@ -4976,6 +5024,18 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, mbmi->interintra_mode = (MB_PREDICTION_MODE)(DC_PRED - 1); mbmi->interintra_uv_mode = (MB_PREDICTION_MODE)(DC_PRED - 1); #endif + + if (mbmi->ref_frame > 0 && + (yv12_mb[mbmi->ref_frame].y_width != cm->mb_cols * 16 || + yv12_mb[mbmi->ref_frame].y_height != cm->mb_rows * 16) && + this_mode != ZEROMV) + continue; + if (mbmi->second_ref_frame > 0 && + (yv12_mb[mbmi->second_ref_frame].y_width != cm->mb_cols * 16 || + yv12_mb[mbmi->second_ref_frame].y_height != cm->mb_rows * 16) && + this_mode != ZEROMV) + continue; + // Evaluate all sub-pel filters irrespective of whether we can use // them for this frame. mbmi->interp_filter = cm->mcomp_filter_type; @@ -5000,6 +5060,8 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, if (!(cpi->ref_frame_flags & flag_list[second_ref])) continue; mbmi->second_ref_frame = second_ref; + set_scale_factors(xd, mbmi->ref_frame, mbmi->second_ref_frame, + scale_factor); xd->second_pre = yv12_mb[second_ref]; mode_excluded = @@ -5098,7 +5160,8 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, &rate_y, &distortion_y, &rate_uv, &distortion_uv, &mode_excluded, &disable_skip, - mode_index, &tmp_best_filter, frame_mv); + mode_index, &tmp_best_filter, frame_mv, + mb_row, mb_col); if (this_rd == INT64_MAX) continue; } @@ -5351,6 +5414,8 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, } end: + set_scale_factors(xd, mbmi->ref_frame, mbmi->second_ref_frame, + scale_factor); { PICK_MODE_CONTEXT *p = (block_size == BLOCK_32X32) ? &x->sb32_context[xd->sb_index] : diff --git a/vp9/encoder/vp9_rdopt.h b/vp9/encoder/vp9_rdopt.h index 710ae58fe..01b156044 100644 --- a/vp9/encoder/vp9_rdopt.h +++ b/vp9/encoder/vp9_rdopt.h @@ -45,18 +45,4 @@ extern void vp9_init_me_luts(); extern void vp9_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv); -static void setup_pred_block(YV12_BUFFER_CONFIG *dst, - const YV12_BUFFER_CONFIG *src, - int mb_row, int mb_col) { - const int recon_y_stride = src->y_stride; - const int recon_uv_stride = src->uv_stride; - const int recon_yoffset = 16 * mb_row * recon_y_stride + 16 * mb_col; - const int recon_uvoffset = 8 * mb_row * recon_uv_stride + 8 * mb_col; - - *dst = *src; - dst->y_buffer += recon_yoffset; - dst->u_buffer += recon_uvoffset; - dst->v_buffer += recon_uvoffset; -} - #endif // VP9_ENCODER_VP9_RDOPT_H_ diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c index 39c02e6ad..a6cd1c0c3 100644 --- a/vp9/encoder/vp9_temporal_filter.c +++ b/vp9/encoder/vp9_temporal_filter.c @@ -456,6 +456,13 @@ void vp9_temporal_filter_prepare(VP9_COMP *cpi, int distance) { , start_frame); #endif + // Setup scaling factors. Scaling on each of the arnr frames is not supported + vp9_setup_scale_factors_for_frame(&cpi->mb.e_mbd.scale_factor[0], + &cpi->common.yv12_fb[cpi->common.new_fb_idx], + 16 * cpi->common.mb_cols, + 16 * cpi->common.mb_rows); + cpi->mb.e_mbd.scale_factor_uv[0] = cpi->mb.e_mbd.scale_factor[0]; + // Setup frame pointers, NULL indicates frame not included in filter vpx_memset(cpi->frames, 0, max_frames * sizeof(YV12_BUFFER_CONFIG *)); for (frame = 0; frame < frames_to_blur; frame++) { -- 2.40.0