From 56e6c66b498e4d71eae35af54b00bbf7d00ab4de Mon Sep 17 00:00:00 2001 From: Tero Rintaluoma Date: Wed, 13 Feb 2013 15:18:41 +0200 Subject: [PATCH] Avoid division in intra prediction - Using multiplication and shifting instead of division in intra prediction. - Maximum absolute difference is 1 for division statements in d45, d27, d63 prediction modes. However, errors can cumulate for large block sizes when using already predicted values. - Maximum number of non-matching result values in loops using division are: 4x4 0/16 8x8 0/64 16x16 10/256 32x32 13/1024 64x64 122/4096 Overall PSNR derf: 0.005 yt: -0.022 std-hd: 0.021 hd: -0.006 Change-Id: I3979a02eb6351636442c1af1e23d6c4e6ec1d01d --- vp9/common/vp9_reconintra.c | 36 ++++++++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/vp9/common/vp9_reconintra.c b/vp9/common/vp9_reconintra.c index 64456a766..eb8de2126 100644 --- a/vp9/common/vp9_reconintra.c +++ b/vp9/common/vp9_reconintra.c @@ -18,6 +18,23 @@ * and vp9_build_intra_predictors_mbuv_s(MACROBLOCKD *xd). */ +/* Using multiplication and shifting instead of division in diagonal prediction. + * iscale table is calculated from ((1<<16) + (i+2)/2) / (i+2) and used as + * ((A + B) * iscale[i] + (1<<15)) >> 16; + * where A and B are weighted pixel values. + */ +static const unsigned int iscale[64] = { + 32768, 21845, 16384, 13107, 10923, 9362, 8192, 7282, + 6554, 5958, 5461, 5041, 4681, 4369, 4096, 3855, + 3641, 3449, 3277, 3121, 2979, 2849, 2731, 2621, + 2521, 2427, 2341, 2260, 2185, 2114, 2048, 1986, + 1928, 1872, 1820, 1771, 1725, 1680, 1638, 1598, + 1560, 1524, 1489, 1456, 1425, 1394, 1365, 1337, + 1311, 1285, 1260, 1237, 1214, 1192, 1170, 1150, + 1130, 1111, 1092, 1074, 1057, 1040, 1024, 1008, +}; + + static void d27_predictor(uint8_t *ypred_ptr, int y_stride, int n, uint8_t *yabove_row, uint8_t *yleft_col) { int r, c, h, w, v; @@ -29,7 +46,7 @@ static void d27_predictor(uint8_t *ypred_ptr, int y_stride, int n, else a = (yleft_col[r] + yleft_col[r + 1] + 1) >> 1; b = yabove_row[c + 2]; - ypred_ptr[c] = (2 * a + (c + 1) * b + (c + 3) / 2) / (c + 3); + ypred_ptr[c] = ((2 * a + (c + 1) * b) * iscale[1+c] + (1<<15)) >> 16; } for (r = 1; r < n / 2 - 1; r++) { for (c = 0; c < n - 2 - 2 * r; c++) { @@ -38,7 +55,8 @@ static void d27_predictor(uint8_t *ypred_ptr, int y_stride, int n, else a = (yleft_col[r] + yleft_col[r + 1] + 1) >> 1; b = ypred_ptr[(r - 1) * y_stride + c + 2]; - ypred_ptr[r * y_stride + c] = (2 * a + (c + 1) * b + (c + 3) / 2) / (c + 3); + ypred_ptr[r * y_stride + c] = + ((2 * a + (c + 1) * b) * iscale[1+c] + (1<<15)) >> 16; } } for (; r < n - 1; ++r) { @@ -77,7 +95,8 @@ static void d63_predictor(uint8_t *ypred_ptr, int y_stride, int n, else a = (yabove_row[c] + yabove_row[c + 1] + 1) >> 1; b = yleft_col[r + 2]; - ypred_ptr[r * y_stride] = (2 * a + (r + 1) * b + (r + 3) / 2) / (r + 3); + ypred_ptr[r * y_stride] = ((2 * a + (r + 1) * b) * iscale[1+r] + + (1<<15)) >> 16; } for (c = 1; c < n / 2 - 1; c++) { for (r = 0; r < n - 2 - 2 * c; r++) { @@ -86,7 +105,8 @@ static void d63_predictor(uint8_t *ypred_ptr, int y_stride, int n, else a = (yabove_row[c] + yabove_row[c + 1] + 1) >> 1; b = ypred_ptr[(r + 2) * y_stride + c - 1]; - ypred_ptr[r * y_stride + c] = (2 * a + (c + 1) * b + (c + 3) / 2) / (c + 3); + ypred_ptr[r * y_stride + c] = ((2 * a + (c + 1) * b) * iscale[1+c] + + (1<<15)) >> 16; } } for (; c < n - 1; ++c) { @@ -119,8 +139,8 @@ static void d45_predictor(uint8_t *ypred_ptr, int y_stride, int n, for (r = 0; r < n - 1; ++r) { for (c = 0; c <= r; ++c) { ypred_ptr[(r - c) * y_stride + c] = - (yabove_row[r + 1] * (c + 1) + - yleft_col[r + 1] * (r - c + 1) + r / 2 + 1) / (r + 2); + ((yabove_row[r + 1] * (c + 1) + + yleft_col[r + 1] * (r - c + 1)) * iscale[r] + (1<<15)) >> 16; } } for (c = 0; c <= r; ++c) { @@ -129,8 +149,8 @@ static void d45_predictor(uint8_t *ypred_ptr, int y_stride, int n, int yleft_ext = yleft_col[r]; // clip_pixel(2 * yleft_col[r] - // yleft_col[r-1]); ypred_ptr[(r - c) * y_stride + c] = - (yabove_ext * (c + 1) + - yleft_ext * (r - c + 1) + r / 2 + 1) / (r + 2); + ((yabove_ext * (c + 1) + + yleft_ext * (r - c + 1)) * iscale[r] + (1<<15)) >> 16; } for (r = 1; r < n; ++r) { for (c = n - r; c < n; ++c) { -- 2.40.0