From b58c99eb71aeca19f27757e10c1392662781b65d Mon Sep 17 00:00:00 2001 From: Yaowu Xu Date: Tue, 7 Jul 2015 18:29:14 -0700 Subject: [PATCH] Remove clamp operations. The clamp calls with INT32_MIN and INT32_MAX have no effect at all on int values passed in, therefore this commit removes those effectless clamps and also adds more const intermediate results to make the code more readable. Change-Id: I66d8811f58bb74ec31cbec9a6c441983a662352e --- vp9/encoder/vp9_quantize.c | 84 ++++++++----------- .../x86/vp9_highbd_quantize_intrin_sse2.c | 30 ++++--- 2 files changed, 49 insertions(+), 65 deletions(-) diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c index e6e17c073..32c1f7697 100644 --- a/vp9/encoder/vp9_quantize.c +++ b/vp9/encoder/vp9_quantize.c @@ -57,17 +57,14 @@ void vp9_highbd_quantize_dc(const tran_low_t *coeff_ptr, memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); if (!skip_block) { - const int rc = 0; - const int coeff = coeff_ptr[rc]; + const int coeff = coeff_ptr[0]; const int coeff_sign = (coeff >> 31); const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; - - const int64_t tmp = - (clamp(abs_coeff + round_ptr[rc != 0], INT32_MIN, INT32_MAX) * - quant) >> 16; - qcoeff_ptr[rc] = (tran_low_t)((tmp ^ coeff_sign) - coeff_sign); - dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr; - if (tmp) + const int64_t tmp = abs_coeff + round_ptr[0]; + const uint32_t abs_qcoeff = (uint32_t)((tmp * quant) >> 16); + qcoeff_ptr[0] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign); + dqcoeff_ptr[0] = qcoeff_ptr[0] * dequant_ptr; + if (abs_qcoeff) eob = 0; } *eob_ptr = eob + 1; @@ -89,7 +86,6 @@ void vp9_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block, memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); if (!skip_block) { - tmp = clamp(abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1), INT16_MIN, INT16_MAX); tmp = (tmp * quant) >> 15; @@ -117,17 +113,14 @@ void vp9_highbd_quantize_dc_32x32(const tran_low_t *coeff_ptr, memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); if (!skip_block) { - const int rc = 0; - const int coeff = coeff_ptr[rc]; + const int coeff = coeff_ptr[0]; const int coeff_sign = (coeff >> 31); const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; - - const int64_t tmp = - (clamp(abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1), - INT32_MIN, INT32_MAX) * quant) >> 15; - qcoeff_ptr[rc] = (tran_low_t)((tmp ^ coeff_sign) - coeff_sign); - dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr / 2; - if (tmp) + const int64_t tmp = abs_coeff + ROUND_POWER_OF_TWO(round_ptr[0], 1); + const uint32_t abs_qcoeff = (uint32_t)((tmp * quant) >> 15); + qcoeff_ptr[0] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign); + dqcoeff_ptr[0] = qcoeff_ptr[0] * dequant_ptr / 2; + if (abs_qcoeff) eob = 0; } *eob_ptr = eob + 1; @@ -207,15 +200,11 @@ void vp9_highbd_quantize_fp_c(const tran_low_t *coeff_ptr, const int coeff = coeff_ptr[rc]; const int coeff_sign = (coeff >> 31); const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; - - const int64_t tmp = - (clamp(abs_coeff + round_ptr[rc != 0], INT32_MIN, INT32_MAX) * - quant_ptr[rc != 0]) >> 16; - - qcoeff_ptr[rc] = (tran_low_t)((tmp ^ coeff_sign) - coeff_sign); + const int64_t tmp = abs_coeff + round_ptr[rc != 0]; + const uint32_t abs_qcoeff = (uint32_t)((tmp * quant_ptr[rc != 0]) >> 16); + qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign); dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0]; - - if (tmp) + if (abs_qcoeff) eob = i; } } @@ -287,21 +276,21 @@ void vp9_highbd_quantize_fp_32x32_c(const tran_low_t *coeff_ptr, if (!skip_block) { for (i = 0; i < n_coeffs; i++) { + uint32_t abs_qcoeff = 0; const int rc = scan[i]; const int coeff = coeff_ptr[rc]; const int coeff_sign = (coeff >> 31); - int64_t tmp = 0; const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; if (abs_coeff >= (dequant_ptr[rc != 0] >> 2)) { - tmp = clamp(abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1), - INT32_MIN, INT32_MAX); - tmp = (tmp * quant_ptr[rc != 0]) >> 15; - qcoeff_ptr[rc] = (tran_low_t)((tmp ^ coeff_sign) - coeff_sign); + const int64_t tmp = abs_coeff + + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1); + abs_qcoeff = (uint32_t) ((tmp * quant_ptr[rc != 0]) >> 15); + qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign); dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2; } - if (tmp) + if (abs_qcoeff) eob = i; } } @@ -398,14 +387,13 @@ void vp9_highbd_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; if (abs_coeff >= zbins[rc != 0]) { - int64_t tmp = clamp(abs_coeff + round_ptr[rc != 0], - INT32_MIN, INT32_MAX); - tmp = ((((tmp * quant_ptr[rc != 0]) >> 16) + tmp) * - quant_shift_ptr[rc != 0]) >> 16; // quantization - qcoeff_ptr[rc] = (tran_low_t)((tmp ^ coeff_sign) - coeff_sign); + const int64_t tmp1 = abs_coeff + round_ptr[rc != 0]; + const int64_t tmp2 = ((tmp1 * quant_ptr[rc != 0]) >> 16) + tmp1; + const uint32_t abs_qcoeff = + (uint32_t)((tmp2 * quant_shift_ptr[rc != 0]) >> 16); + qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign); dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0]; - - if (tmp) + if (abs_qcoeff) eob = i; } } @@ -513,16 +501,14 @@ void vp9_highbd_quantize_b_32x32_c(const tran_low_t *coeff_ptr, const int coeff = coeff_ptr[rc]; const int coeff_sign = (coeff >> 31); const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; - int64_t tmp = clamp(abs_coeff + - ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1), - INT32_MIN, INT32_MAX); - tmp = ((((tmp * quant_ptr[rc != 0]) >> 16) + tmp) * - quant_shift_ptr[rc != 0]) >> 15; - - qcoeff_ptr[rc] = (tran_low_t)((tmp ^ coeff_sign) - coeff_sign); + const int64_t tmp1 = abs_coeff + + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1); + const int64_t tmp2 = ((tmp1 * quant_ptr[rc != 0]) >> 16) + tmp1; + const uint32_t abs_qcoeff = + (uint32_t)((tmp2 * quant_shift_ptr[rc != 0]) >> 15); + qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign); dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2; - - if (tmp) + if (abs_qcoeff) eob = idx_arr[i]; } } diff --git a/vp9/encoder/x86/vp9_highbd_quantize_intrin_sse2.c b/vp9/encoder/x86/vp9_highbd_quantize_intrin_sse2.c index cbdd1c93e..0174cfeca 100644 --- a/vp9/encoder/x86/vp9_highbd_quantize_intrin_sse2.c +++ b/vp9/encoder/x86/vp9_highbd_quantize_intrin_sse2.c @@ -85,13 +85,13 @@ void vp9_highbd_quantize_b_sse2(const tran_low_t *coeff_ptr, for (j = 0; j < 4; j++) { if (test & (1 << (4 * j))) { int k = 4 * i + j; - int64_t tmp = clamp(abs_coeff[j] + round_ptr[k != 0], - INT32_MIN, INT32_MAX); - tmp = ((((tmp * quant_ptr[k != 0]) >> 16) + tmp) * - quant_shift_ptr[k != 0]) >> 16; // quantization - qcoeff_ptr[k] = (tmp ^ coeff_sign[j]) - coeff_sign[j]; + const int64_t tmp1 = abs_coeff[j] + round_ptr[k != 0]; + const int64_t tmp2 = ((tmp1 * quant_ptr[k != 0]) >> 16) + tmp1; + const uint32_t abs_qcoeff = + (uint32_t)((tmp2 * quant_shift_ptr[k != 0]) >> 16); + qcoeff_ptr[k] = (int)(abs_qcoeff ^ coeff_sign[j]) - coeff_sign[j]; dqcoeff_ptr[k] = qcoeff_ptr[k] * dequant_ptr[k != 0]; - if (tmp) + if (abs_qcoeff) eob_i = iscan[k] > eob_i ? iscan[k] : eob_i; } } @@ -162,17 +162,15 @@ void vp9_highbd_quantize_b_32x32_sse2(const tran_low_t *coeff_ptr, const int rc = idx_arr[i]; const int coeff = coeff_ptr[rc]; const int coeff_sign = (coeff >> 31); - int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; - int64_t tmp = clamp(abs_coeff + - ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1), - INT32_MIN, INT32_MAX); - tmp = ((((tmp * quant_ptr[rc != 0]) >> 16) + tmp) * - quant_shift_ptr[rc != 0]) >> 15; - - qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign; + const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; + const int64_t tmp1 = abs_coeff + + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1); + const int64_t tmp2 = ((tmp1 * quant_ptr[rc != 0]) >> 16) + tmp1; + const uint32_t abs_qcoeff = + (uint32_t)((tmp2 * quant_shift_ptr[rc != 0]) >> 15); + qcoeff_ptr[rc] = (int)(abs_qcoeff ^ coeff_sign) - coeff_sign; dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2; - - if (tmp) + if (abs_qcoeff) eob = iscan[idx_arr[i]] > eob ? iscan[idx_arr[i]] : eob; } } -- 2.49.0