From: Linfeng Zhang Date: Tue, 2 May 2017 17:44:12 +0000 (-0700) Subject: Clean CONVERT_TO_BYTEPTR/SHORTPTR in idct X-Git-Tag: v1.7.0~491^2~2 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=081b39f2b765f83406ac41a9ad8c8d6830e1707a;p=libvpx Clean CONVERT_TO_BYTEPTR/SHORTPTR in idct BUG=webm:1388 Change-Id: Ida62c941f2b836d6c9e27b427a7d5008ab6dc112 --- diff --git a/test/dct16x16_test.cc b/test/dct16x16_test.cc index f9745ed81..4de36c2c6 100644 --- a/test/dct16x16_test.cc +++ b/test/dct16x16_test.cc @@ -353,7 +353,7 @@ class Trans16x16TestBase { #if CONFIG_VP9_HIGHBITDEPTH } else { ASM_REGISTER_STATE_CHECK( - RunInvTxfm(test_temp_block, CONVERT_TO_BYTEPTR(dst16), pitch_)); + RunInvTxfm(test_temp_block, CAST_TO_BYTEPTR(dst16), pitch_)); #endif } @@ -475,10 +475,10 @@ class Trans16x16TestBase { ASM_REGISTER_STATE_CHECK(RunInvTxfm(output_ref_block, dst, pitch_)); #if CONFIG_VP9_HIGHBITDEPTH } else { - inv_txfm_ref(output_ref_block, CONVERT_TO_BYTEPTR(ref16), pitch_, + inv_txfm_ref(output_ref_block, CAST_TO_BYTEPTR(ref16), pitch_, tx_type_); ASM_REGISTER_STATE_CHECK( - RunInvTxfm(output_ref_block, CONVERT_TO_BYTEPTR(dst16), pitch_)); + RunInvTxfm(output_ref_block, CAST_TO_BYTEPTR(dst16), pitch_)); #endif } if (bit_depth_ == VPX_BITS_8) { @@ -530,8 +530,7 @@ class Trans16x16TestBase { ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, 16)); #if CONFIG_VP9_HIGHBITDEPTH } else { - ASM_REGISTER_STATE_CHECK( - RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16), 16)); + ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, CAST_TO_BYTEPTR(dst16), 16)); #endif // CONFIG_VP9_HIGHBITDEPTH } @@ -585,9 +584,9 @@ class Trans16x16TestBase { ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_)); } else { #if CONFIG_VP9_HIGHBITDEPTH - ref_txfm(coeff, CONVERT_TO_BYTEPTR(ref16), pitch_); + ref_txfm(coeff, CAST_TO_BYTEPTR(ref16), pitch_); ASM_REGISTER_STATE_CHECK( - RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16), pitch_)); + RunInvTxfm(coeff, CAST_TO_BYTEPTR(dst16), pitch_)); #endif // CONFIG_VP9_HIGHBITDEPTH } diff --git a/test/dct32x32_test.cc b/test/dct32x32_test.cc index a168e690e..b0d5c4d06 100644 --- a/test/dct32x32_test.cc +++ b/test/dct32x32_test.cc @@ -137,7 +137,7 @@ TEST_P(Trans32x32Test, AccuracyCheck) { #if CONFIG_VP9_HIGHBITDEPTH } else { ASM_REGISTER_STATE_CHECK( - inv_txfm_(test_temp_block, CONVERT_TO_BYTEPTR(dst16), 32)); + inv_txfm_(test_temp_block, CAST_TO_BYTEPTR(dst16), 32)); #endif } @@ -275,7 +275,7 @@ TEST_P(Trans32x32Test, InverseAccuracy) { ASM_REGISTER_STATE_CHECK(inv_txfm_(coeff, dst, 32)); #if CONFIG_VP9_HIGHBITDEPTH } else { - ASM_REGISTER_STATE_CHECK(inv_txfm_(coeff, CONVERT_TO_BYTEPTR(dst16), 32)); + ASM_REGISTER_STATE_CHECK(inv_txfm_(coeff, CAST_TO_BYTEPTR(dst16), 32)); #endif } for (int j = 0; j < kNumCoeffs; ++j) { diff --git a/test/fdct4x4_test.cc b/test/fdct4x4_test.cc index 444b0209d..4836a0453 100644 --- a/test/fdct4x4_test.cc +++ b/test/fdct4x4_test.cc @@ -135,7 +135,7 @@ class Trans4x4TestBase { #if CONFIG_VP9_HIGHBITDEPTH } else { ASM_REGISTER_STATE_CHECK( - RunInvTxfm(test_temp_block, CONVERT_TO_BYTEPTR(dst16), pitch_)); + RunInvTxfm(test_temp_block, CAST_TO_BYTEPTR(dst16), pitch_)); #endif } @@ -249,7 +249,7 @@ class Trans4x4TestBase { #if CONFIG_VP9_HIGHBITDEPTH } else { ASM_REGISTER_STATE_CHECK( - RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16), pitch_)); + RunInvTxfm(coeff, CAST_TO_BYTEPTR(dst16), pitch_)); #endif } diff --git a/test/fdct8x8_test.cc b/test/fdct8x8_test.cc index 7c2df67f6..9727da93d 100644 --- a/test/fdct8x8_test.cc +++ b/test/fdct8x8_test.cc @@ -257,7 +257,7 @@ class FwdTrans8x8TestBase { #if CONFIG_VP9_HIGHBITDEPTH } else { ASM_REGISTER_STATE_CHECK( - RunInvTxfm(test_temp_block, CONVERT_TO_BYTEPTR(dst16), pitch_)); + RunInvTxfm(test_temp_block, CAST_TO_BYTEPTR(dst16), pitch_)); #endif } @@ -340,7 +340,7 @@ class FwdTrans8x8TestBase { #if CONFIG_VP9_HIGHBITDEPTH } else { ASM_REGISTER_STATE_CHECK( - RunInvTxfm(test_temp_block, CONVERT_TO_BYTEPTR(dst16), pitch_)); + RunInvTxfm(test_temp_block, CAST_TO_BYTEPTR(dst16), pitch_)); #endif } @@ -413,7 +413,7 @@ class FwdTrans8x8TestBase { #if CONFIG_VP9_HIGHBITDEPTH } else { ASM_REGISTER_STATE_CHECK( - RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16), pitch_)); + RunInvTxfm(coeff, CAST_TO_BYTEPTR(dst16), pitch_)); #endif } @@ -497,9 +497,9 @@ class FwdTrans8x8TestBase { ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_)); #if CONFIG_VP9_HIGHBITDEPTH } else { - ref_txfm(coeff, CONVERT_TO_BYTEPTR(ref16), pitch_); + ref_txfm(coeff, CAST_TO_BYTEPTR(ref16), pitch_); ASM_REGISTER_STATE_CHECK( - RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16), pitch_)); + RunInvTxfm(coeff, CAST_TO_BYTEPTR(dst16), pitch_)); #endif } diff --git a/test/partial_idct_test.cc b/test/partial_idct_test.cc index 0759cd41c..893165fa3 100644 --- a/test/partial_idct_test.cc +++ b/test/partial_idct_test.cc @@ -45,7 +45,7 @@ void wrapper(const tran_low_t *in, uint8_t *out, int stride, int bd) { #if CONFIG_VP9_HIGHBITDEPTH template void highbd_wrapper(const tran_low_t *in, uint8_t *out, int stride, int bd) { - fn(in, CONVERT_TO_BYTEPTR(out), stride, bd); + fn(in, out, stride, bd); } #endif diff --git a/vp9/common/vp9_idct.c b/vp9/common/vp9_idct.c index 55957414c..afdae5996 100644 --- a/vp9/common/vp9_idct.c +++ b/vp9/common/vp9_idct.c @@ -213,7 +213,7 @@ void vp9_highbd_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, { vpx_highbd_idct4_c, vpx_highbd_iadst4_c }, // DCT_ADST = 2 { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c } // ADST_ADST = 3 }; - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); + uint16_t *dest = CAST_TO_SHORTPTR(dest8); int i, j; tran_low_t out[4 * 4]; @@ -252,7 +252,7 @@ void vp9_highbd_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest8, tran_low_t *outptr = out; tran_low_t temp_in[8], temp_out[8]; const highbd_transform_2d ht = HIGH_IHT_8[tx_type]; - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); + uint16_t *dest = CAST_TO_SHORTPTR(dest8); // Inverse transform row vectors. for (i = 0; i < 8; ++i) { @@ -286,7 +286,7 @@ void vp9_highbd_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest8, tran_low_t *outptr = out; tran_low_t temp_in[16], temp_out[16]; const highbd_transform_2d ht = HIGH_IHT_16[tx_type]; - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); + uint16_t *dest = CAST_TO_SHORTPTR(dest8); // Rows for (i = 0; i < 16; ++i) { diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c index a9e5eebba..8023ebd57 100644 --- a/vp9/decoder/vp9_decodeframe.c +++ b/vp9/decoder/vp9_decodeframe.c @@ -189,21 +189,22 @@ static void inverse_transform_block_inter(MACROBLOCKD *xd, int plane, assert(eob > 0); #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + uint8_t *const dst16 = CAST_TO_BYTEPTR(CONVERT_TO_SHORTPTR(dst)); if (xd->lossless) { - vp9_highbd_iwht4x4_add(dqcoeff, dst, stride, eob, xd->bd); + vp9_highbd_iwht4x4_add(dqcoeff, dst16, stride, eob, xd->bd); } else { switch (tx_size) { case TX_4X4: - vp9_highbd_idct4x4_add(dqcoeff, dst, stride, eob, xd->bd); + vp9_highbd_idct4x4_add(dqcoeff, dst16, stride, eob, xd->bd); break; case TX_8X8: - vp9_highbd_idct8x8_add(dqcoeff, dst, stride, eob, xd->bd); + vp9_highbd_idct8x8_add(dqcoeff, dst16, stride, eob, xd->bd); break; case TX_16X16: - vp9_highbd_idct16x16_add(dqcoeff, dst, stride, eob, xd->bd); + vp9_highbd_idct16x16_add(dqcoeff, dst16, stride, eob, xd->bd); break; case TX_32X32: - vp9_highbd_idct32x32_add(dqcoeff, dst, stride, eob, xd->bd); + vp9_highbd_idct32x32_add(dqcoeff, dst16, stride, eob, xd->bd); break; default: assert(0 && "Invalid transform size"); } @@ -256,21 +257,22 @@ static void inverse_transform_block_intra(MACROBLOCKD *xd, int plane, assert(eob > 0); #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + uint8_t *const dst16 = CAST_TO_BYTEPTR(CONVERT_TO_SHORTPTR(dst)); if (xd->lossless) { - vp9_highbd_iwht4x4_add(dqcoeff, dst, stride, eob, xd->bd); + vp9_highbd_iwht4x4_add(dqcoeff, dst16, stride, eob, xd->bd); } else { switch (tx_size) { case TX_4X4: - vp9_highbd_iht4x4_add(tx_type, dqcoeff, dst, stride, eob, xd->bd); + vp9_highbd_iht4x4_add(tx_type, dqcoeff, dst16, stride, eob, xd->bd); break; case TX_8X8: - vp9_highbd_iht8x8_add(tx_type, dqcoeff, dst, stride, eob, xd->bd); + vp9_highbd_iht8x8_add(tx_type, dqcoeff, dst16, stride, eob, xd->bd); break; case TX_16X16: - vp9_highbd_iht16x16_add(tx_type, dqcoeff, dst, stride, eob, xd->bd); + vp9_highbd_iht16x16_add(tx_type, dqcoeff, dst16, stride, eob, xd->bd); break; case TX_32X32: - vp9_highbd_idct32x32_add(dqcoeff, dst, stride, eob, xd->bd); + vp9_highbd_idct32x32_add(dqcoeff, dst16, stride, eob, xd->bd); break; default: assert(0 && "Invalid transform size"); } diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index 16eea8fa2..9d3152e1e 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -637,24 +637,25 @@ static void encode_block(int plane, int block, int row, int col, if (x->skip_encode || p->eobs[block] == 0) return; #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + uint8_t *const dst16 = CAST_TO_BYTEPTR(CONVERT_TO_SHORTPTR(dst)); switch (tx_size) { case TX_32X32: - vp9_highbd_idct32x32_add(dqcoeff, dst, pd->dst.stride, p->eobs[block], + vp9_highbd_idct32x32_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block], xd->bd); break; case TX_16X16: - vp9_highbd_idct16x16_add(dqcoeff, dst, pd->dst.stride, p->eobs[block], + vp9_highbd_idct16x16_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block], xd->bd); break; case TX_8X8: - vp9_highbd_idct8x8_add(dqcoeff, dst, pd->dst.stride, p->eobs[block], + vp9_highbd_idct8x8_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block], xd->bd); break; case TX_4X4: // this is like vp9_short_idct4x4 but has a special case around eob<=1 // which is significant (not just an optimization) for the lossless // case. - x->highbd_itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block], + x->highbd_itxm_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block], xd->bd); break; default: assert(0 && "Invalid transform size"); @@ -699,7 +700,8 @@ static void encode_block_pass1(int plane, int block, int row, int col, if (p->eobs[block] > 0) { #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - x->highbd_itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block], xd->bd); + x->highbd_itxm_add(dqcoeff, CAST_TO_BYTEPTR(CONVERT_TO_SHORTPTR(dst)), + pd->dst.stride, p->eobs[block], xd->bd); return; } #endif // CONFIG_VP9_HIGHBITDEPTH @@ -799,6 +801,7 @@ void vp9_encode_block_intra(int plane, int block, int row, int col, #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + uint8_t *const dst16 = CAST_TO_BYTEPTR(CONVERT_TO_SHORTPTR(dst)); switch (tx_size) { case TX_32X32: if (!x->skip_recode) { @@ -814,7 +817,7 @@ void vp9_encode_block_intra(int plane, int block, int row, int col, *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0; } if (!x->skip_encode && *eob) { - vp9_highbd_idct32x32_add(dqcoeff, dst, dst_stride, *eob, xd->bd); + vp9_highbd_idct32x32_add(dqcoeff, dst16, dst_stride, *eob, xd->bd); } break; case TX_16X16: @@ -834,7 +837,7 @@ void vp9_encode_block_intra(int plane, int block, int row, int col, *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0; } if (!x->skip_encode && *eob) { - vp9_highbd_iht16x16_add(tx_type, dqcoeff, dst, dst_stride, *eob, + vp9_highbd_iht16x16_add(tx_type, dqcoeff, dst16, dst_stride, *eob, xd->bd); } break; @@ -855,7 +858,7 @@ void vp9_encode_block_intra(int plane, int block, int row, int col, *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0; } if (!x->skip_encode && *eob) { - vp9_highbd_iht8x8_add(tx_type, dqcoeff, dst, dst_stride, *eob, + vp9_highbd_iht8x8_add(tx_type, dqcoeff, dst16, dst_stride, *eob, xd->bd); } break; @@ -880,9 +883,10 @@ void vp9_encode_block_intra(int plane, int block, int row, int col, // this is like vp9_short_idct4x4 but has a special case around // eob<=1 which is significant (not just an optimization) for the // lossless case. - x->highbd_itxm_add(dqcoeff, dst, dst_stride, *eob, xd->bd); + x->highbd_itxm_add(dqcoeff, dst16, dst_stride, *eob, xd->bd); } else { - vp9_highbd_iht4x4_16_add(dqcoeff, dst, dst_stride, tx_type, xd->bd); + vp9_highbd_iht4x4_16_add(dqcoeff, dst16, dst_stride, tx_type, + xd->bd); } } break; diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index d4aa46e83..be4ff234d 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -601,7 +601,7 @@ static void dist_block(const VP9_COMP *cpi, MACROBLOCK *x, int plane, if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { vpx_highbd_convolve_copy(CONVERT_TO_SHORTPTR(dst), dst_stride, recon16, 32, NULL, 0, NULL, 0, bs, bs, xd->bd); - recon = CONVERT_TO_BYTEPTR(recon16); + recon = CAST_TO_BYTEPTR(recon16); if (xd->lossless) { vp9_highbd_iwht4x4_add(dqcoeff, recon, 32, *eob, xd->bd); } else { @@ -621,6 +621,7 @@ static void dist_block(const VP9_COMP *cpi, MACROBLOCK *x, int plane, default: assert(0 && "Invalid transform size"); } } + recon = CONVERT_TO_BYTEPTR(recon16); } else { #endif // CONFIG_VP9_HIGHBITDEPTH vpx_convolve_copy(dst, dst_stride, recon, 32, NULL, 0, NULL, 0, bs, bs); @@ -1004,6 +1005,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int row, const int block = (row + idy) * 2 + (col + idx); const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride]; uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride]; + uint8_t *const dst16 = CAST_TO_BYTEPTR(CONVERT_TO_SHORTPTR(dst)); int16_t *const src_diff = vp9_raster_block_offset_int16(BLOCK_8X8, block, p->src_diff); tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block); @@ -1025,7 +1027,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int row, tempa[idx] = templ[idy] = (x->plane[0].eobs[block] > 0 ? 1 : 0); if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd) goto next_highbd; - vp9_highbd_iwht4x4_add(BLOCK_OFFSET(pd->dqcoeff, block), dst, + vp9_highbd_iwht4x4_add(BLOCK_OFFSET(pd->dqcoeff, block), dst16, dst_stride, p->eobs[block], xd->bd); } else { int64_t unused; @@ -1048,7 +1050,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int row, if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd) goto next_highbd; vp9_highbd_iht4x4_add(tx_type, BLOCK_OFFSET(pd->dqcoeff, block), - dst, dst_stride, p->eobs[block], xd->bd); + dst16, dst_stride, p->eobs[block], xd->bd); } } } diff --git a/vpx_dsp/arm/highbd_idct16x16_add_neon.c b/vpx_dsp/arm/highbd_idct16x16_add_neon.c index 1259bb380..486a33148 100644 --- a/vpx_dsp/arm/highbd_idct16x16_add_neon.c +++ b/vpx_dsp/arm/highbd_idct16x16_add_neon.c @@ -1270,7 +1270,7 @@ void vpx_highbd_idct16x16_10_add_half1d_pass2(const int32_t *input, void vpx_highbd_idct16x16_256_add_neon(const tran_low_t *input, uint8_t *dest8, int stride, int bd) { - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); + uint16_t *dest = CAST_TO_SHORTPTR(dest8); if (bd == 8) { int16_t row_idct_output[16 * 16]; @@ -1315,7 +1315,7 @@ void vpx_highbd_idct16x16_256_add_neon(const tran_low_t *input, uint8_t *dest8, void vpx_highbd_idct16x16_38_add_neon(const tran_low_t *input, uint8_t *dest8, int stride, int bd) { - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); + uint16_t *dest = CAST_TO_SHORTPTR(dest8); if (bd == 8) { int16_t row_idct_output[16 * 16]; @@ -1351,7 +1351,7 @@ void vpx_highbd_idct16x16_38_add_neon(const tran_low_t *input, uint8_t *dest8, void vpx_highbd_idct16x16_10_add_neon(const tran_low_t *input, uint8_t *dest8, int stride, int bd) { - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); + uint16_t *dest = CAST_TO_SHORTPTR(dest8); if (bd == 8) { int16_t row_idct_output[4 * 16]; @@ -1422,7 +1422,7 @@ void vpx_highbd_idct16x16_1_add_neon(const tran_low_t *input, uint8_t *dest8, HIGHBD_WRAPLOW(dct_const_round_shift(out0 * cospi_16_64), bd); const int16_t a1 = ROUND_POWER_OF_TWO(out1, 6); const int16x8_t dc = vdupq_n_s16(a1); - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); + uint16_t *dest = CAST_TO_SHORTPTR(dest8); int i; if (a1 >= 0) { diff --git a/vpx_dsp/arm/highbd_idct32x32_1024_add_neon.c b/vpx_dsp/arm/highbd_idct32x32_1024_add_neon.c index 858342830..9646f744f 100644 --- a/vpx_dsp/arm/highbd_idct32x32_1024_add_neon.c +++ b/vpx_dsp/arm/highbd_idct32x32_1024_add_neon.c @@ -394,7 +394,7 @@ static INLINE void vpx_highbd_idct32_32_neon(const tran_low_t *input, int32_t pass2[32 * 32]; int32_t *out; int32x4x2_t q[16]; - uint16_t *dst = CONVERT_TO_SHORTPTR(dest); + uint16_t *dst = CAST_TO_SHORTPTR(dest); for (idct32_pass_loop = 0, out = pass1; idct32_pass_loop < 2; idct32_pass_loop++, input = pass1, out = pass2) { diff --git a/vpx_dsp/arm/highbd_idct32x32_135_add_neon.c b/vpx_dsp/arm/highbd_idct32x32_135_add_neon.c index 52f3d43e5..b2c776b57 100644 --- a/vpx_dsp/arm/highbd_idct32x32_135_add_neon.c +++ b/vpx_dsp/arm/highbd_idct32x32_135_add_neon.c @@ -729,7 +729,7 @@ static void vpx_highbd_idct32_16_neon(const int32_t *const input, void vpx_highbd_idct32x32_135_add_neon(const tran_low_t *input, uint8_t *dest8, int stride, int bd) { int i; - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); + uint16_t *dest = CAST_TO_SHORTPTR(dest8); if (bd == 8) { int16_t temp[32 * 16]; diff --git a/vpx_dsp/arm/highbd_idct32x32_34_add_neon.c b/vpx_dsp/arm/highbd_idct32x32_34_add_neon.c index 195dcc92d..41622a249 100644 --- a/vpx_dsp/arm/highbd_idct32x32_34_add_neon.c +++ b/vpx_dsp/arm/highbd_idct32x32_34_add_neon.c @@ -597,7 +597,7 @@ static void vpx_highbd_idct32_8_neon(const int32_t *input, uint16_t *output, void vpx_highbd_idct32x32_34_add_neon(const tran_low_t *input, uint8_t *dest8, int stride, int bd) { int i; - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); + uint16_t *dest = CAST_TO_SHORTPTR(dest8); if (bd == 8) { int16_t temp[32 * 8]; diff --git a/vpx_dsp/arm/highbd_idct32x32_add_neon.c b/vpx_dsp/arm/highbd_idct32x32_add_neon.c index d74331f80..e7bfb3e7f 100644 --- a/vpx_dsp/arm/highbd_idct32x32_add_neon.c +++ b/vpx_dsp/arm/highbd_idct32x32_add_neon.c @@ -67,7 +67,7 @@ void vpx_highbd_idct32x32_1_add_neon(const tran_low_t *input, uint8_t *dest8, HIGHBD_WRAPLOW(dct_const_round_shift(out0 * cospi_16_64), bd); const int16_t a1 = ROUND_POWER_OF_TWO(out1, 6); const int16x8_t dc = vdupq_n_s16(a1); - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); + uint16_t *dest = CAST_TO_SHORTPTR(dest8); int i; if (a1 >= 0) { diff --git a/vpx_dsp/arm/highbd_idct4x4_add_neon.c b/vpx_dsp/arm/highbd_idct4x4_add_neon.c index 128f72b9c..dd8a3d55e 100644 --- a/vpx_dsp/arm/highbd_idct4x4_add_neon.c +++ b/vpx_dsp/arm/highbd_idct4x4_add_neon.c @@ -60,7 +60,7 @@ void vpx_highbd_idct4x4_1_add_neon(const tran_low_t *input, uint8_t *dest8, HIGHBD_WRAPLOW(dct_const_round_shift(out0 * cospi_16_64), bd); const int16_t a1 = ROUND_POWER_OF_TWO(out1, 4); const int16x8_t dc = vdupq_n_s16(a1); - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); + uint16_t *dest = CAST_TO_SHORTPTR(dest8); highbd_idct4x4_1_add_kernel1(&dest, stride, dc, max); highbd_idct4x4_1_add_kernel1(&dest, stride, dc, max); @@ -140,7 +140,7 @@ void vpx_highbd_idct4x4_16_add_neon(const tran_low_t *input, uint8_t *dest8, int32x4_t c1 = vld1q_s32(input + 4); int32x4_t c2 = vld1q_s32(input + 8); int32x4_t c3 = vld1q_s32(input + 12); - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); + uint16_t *dest = CAST_TO_SHORTPTR(dest8); int16x8_t a0, a1; if (bd == 8) { diff --git a/vpx_dsp/arm/highbd_idct8x8_add_neon.c b/vpx_dsp/arm/highbd_idct8x8_add_neon.c index f53f4c7fc..50b82779a 100644 --- a/vpx_dsp/arm/highbd_idct8x8_add_neon.c +++ b/vpx_dsp/arm/highbd_idct8x8_add_neon.c @@ -44,7 +44,7 @@ void vpx_highbd_idct8x8_1_add_neon(const tran_low_t *input, uint8_t *dest8, HIGHBD_WRAPLOW(dct_const_round_shift(out0 * cospi_16_64), bd); const int16_t a1 = ROUND_POWER_OF_TWO(out1, 5); const int16x8_t dc = vdupq_n_s16(a1); - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); + uint16_t *dest = CAST_TO_SHORTPTR(dest8); if (a1 >= 0) { const int16x8_t max = vdupq_n_s16((1 << bd) - 1); @@ -294,7 +294,7 @@ static INLINE void highbd_add8x8(int16x8_t a0, int16x8_t a1, int16x8_t a2, void vpx_highbd_idct8x8_12_add_neon(const tran_low_t *input, uint8_t *dest8, int stride, int bd) { - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); + uint16_t *dest = CAST_TO_SHORTPTR(dest8); int32x4_t a0 = vld1q_s32(input); int32x4_t a1 = vld1q_s32(input + 8); int32x4_t a2 = vld1q_s32(input + 16); @@ -555,7 +555,7 @@ static INLINE void idct8x8_64_half1d_bd12( void vpx_highbd_idct8x8_64_add_neon(const tran_low_t *input, uint8_t *dest8, int stride, int bd) { - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); + uint16_t *dest = CAST_TO_SHORTPTR(dest8); int32x4_t a0 = vld1q_s32(input); int32x4_t a1 = vld1q_s32(input + 4); int32x4_t a2 = vld1q_s32(input + 8); diff --git a/vpx_dsp/arm/idct32x32_add_neon.c b/vpx_dsp/arm/idct32x32_add_neon.c index 34b5baf72..91418c9e6 100644 --- a/vpx_dsp/arm/idct32x32_add_neon.c +++ b/vpx_dsp/arm/idct32x32_add_neon.c @@ -517,7 +517,7 @@ void vpx_idct32_32_neon(const tran_low_t *input, uint8_t *dest, const int16_t *input_pass2 = pass1; // input of pass2 is the result of pass1 int16_t *out; int16x8_t q[16]; - uint16_t *dst = CONVERT_TO_SHORTPTR(dest); + uint16_t *dst = CAST_TO_SHORTPTR(dest); for (idct32_pass_loop = 0, out = pass1; idct32_pass_loop < 2; idct32_pass_loop++, out = pass2) { diff --git a/vpx_dsp/inv_txfm.c b/vpx_dsp/inv_txfm.c index 210a9bed9..14aa8ba22 100644 --- a/vpx_dsp/inv_txfm.c +++ b/vpx_dsp/inv_txfm.c @@ -1299,7 +1299,7 @@ void vpx_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, tran_high_t a1, b1, c1, d1, e1; const tran_low_t *ip = input; tran_low_t *op = output; - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); + uint16_t *dest = CAST_TO_SHORTPTR(dest8); for (i = 0; i < 4; i++) { a1 = ip[0] >> UNIT_QUANT_SHIFT; @@ -1355,7 +1355,7 @@ void vpx_highbd_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest8, tran_low_t tmp[4]; const tran_low_t *ip = in; tran_low_t *op = tmp; - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); + uint16_t *dest = CAST_TO_SHORTPTR(dest8); (void)bd; a1 = ip[0] >> UNIT_QUANT_SHIFT; @@ -1458,7 +1458,7 @@ void vpx_highbd_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, tran_low_t out[4 * 4]; tran_low_t *outptr = out; tran_low_t temp_in[4], temp_out[4]; - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); + uint16_t *dest = CAST_TO_SHORTPTR(dest8); // Rows for (i = 0; i < 4; ++i) { @@ -1484,7 +1484,7 @@ void vpx_highbd_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest8, tran_high_t a1; tran_low_t out = HIGHBD_WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), bd); - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); + uint16_t *dest = CAST_TO_SHORTPTR(dest8); out = HIGHBD_WRAPLOW(dct_const_round_shift(out * cospi_16_64), bd); a1 = ROUND_POWER_OF_TWO(out, 4); @@ -1642,7 +1642,7 @@ void vpx_highbd_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest8, tran_low_t out[8 * 8]; tran_low_t *outptr = out; tran_low_t temp_in[8], temp_out[8]; - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); + uint16_t *dest = CAST_TO_SHORTPTR(dest8); // First transform rows for (i = 0; i < 8; ++i) { @@ -1668,7 +1668,7 @@ void vpx_highbd_idct8x8_12_add_c(const tran_low_t *input, uint8_t *dest8, tran_low_t out[8 * 8] = { 0 }; tran_low_t *outptr = out; tran_low_t temp_in[8], temp_out[8]; - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); + uint16_t *dest = CAST_TO_SHORTPTR(dest8); // First transform rows // Only first 4 row has non-zero coefs @@ -1695,7 +1695,7 @@ void vpx_highbd_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest8, tran_high_t a1; tran_low_t out = HIGHBD_WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), bd); - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); + uint16_t *dest = CAST_TO_SHORTPTR(dest8); out = HIGHBD_WRAPLOW(dct_const_round_shift(out * cospi_16_64), bd); a1 = ROUND_POWER_OF_TWO(out, 5); @@ -2062,7 +2062,7 @@ void vpx_highbd_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest8, tran_low_t out[16 * 16]; tran_low_t *outptr = out; tran_low_t temp_in[16], temp_out[16]; - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); + uint16_t *dest = CAST_TO_SHORTPTR(dest8); // First transform rows for (i = 0; i < 16; ++i) { @@ -2088,7 +2088,7 @@ void vpx_highbd_idct16x16_38_add_c(const tran_low_t *input, uint8_t *dest8, tran_low_t out[16 * 16] = { 0 }; tran_low_t *outptr = out; tran_low_t temp_in[16], temp_out[16]; - uint16_t *const dest = CONVERT_TO_SHORTPTR(dest8); + uint16_t *const dest = CAST_TO_SHORTPTR(dest8); // First transform rows. Since all non-zero dct coefficients are in // upper-left 8x8 area, we only need to calculate first 8 rows here. @@ -2117,7 +2117,7 @@ void vpx_highbd_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest8, tran_low_t out[16 * 16] = { 0 }; tran_low_t *outptr = out; tran_low_t temp_in[16], temp_out[16]; - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); + uint16_t *dest = CAST_TO_SHORTPTR(dest8); // First transform rows. Since all non-zero dct coefficients are in // upper-left 4x4 area, we only need to calculate first 4 rows here. @@ -2144,7 +2144,7 @@ void vpx_highbd_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest8, tran_high_t a1; tran_low_t out = HIGHBD_WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), bd); - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); + uint16_t *dest = CAST_TO_SHORTPTR(dest8); out = HIGHBD_WRAPLOW(dct_const_round_shift(out * cospi_16_64), bd); a1 = ROUND_POWER_OF_TWO(out, 6); @@ -2537,7 +2537,7 @@ void vpx_highbd_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest8, tran_low_t out[32 * 32]; tran_low_t *outptr = out; tran_low_t temp_in[32], temp_out[32]; - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); + uint16_t *dest = CAST_TO_SHORTPTR(dest8); // Rows for (i = 0; i < 32; ++i) { @@ -2575,7 +2575,7 @@ void vpx_highbd_idct32x32_135_add_c(const tran_low_t *input, uint8_t *dest8, tran_low_t out[32 * 32] = { 0 }; tran_low_t *outptr = out; tran_low_t temp_in[32], temp_out[32]; - uint16_t *const dest = CONVERT_TO_SHORTPTR(dest8); + uint16_t *const dest = CAST_TO_SHORTPTR(dest8); // Rows // Only upper-left 16x16 has non-zero coeff @@ -2604,7 +2604,7 @@ void vpx_highbd_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest8, tran_low_t out[32 * 32] = { 0 }; tran_low_t *outptr = out; tran_low_t temp_in[32], temp_out[32]; - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); + uint16_t *dest = CAST_TO_SHORTPTR(dest8); // Rows // Only upper-left 8x8 has non-zero coeff @@ -2629,7 +2629,7 @@ void vpx_highbd_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest8, int stride, int bd) { int i, j; int a1; - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); + uint16_t *dest = CAST_TO_SHORTPTR(dest8); tran_low_t out = HIGHBD_WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), bd); diff --git a/vpx_dsp/x86/inv_txfm_sse2.c b/vpx_dsp/x86/inv_txfm_sse2.c index 8c33caedb..d469e5219 100644 --- a/vpx_dsp/x86/inv_txfm_sse2.c +++ b/vpx_dsp/x86/inv_txfm_sse2.c @@ -3373,7 +3373,7 @@ void vpx_highbd_idct4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest8, __m128i sign_bits[2]; __m128i temp_mm, min_input, max_input; int test; - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); + uint16_t *dest = CAST_TO_SHORTPTR(dest8); int optimised_cols = 0; const __m128i zero = _mm_set1_epi16(0); const __m128i eight = _mm_set1_epi16(8); @@ -3486,7 +3486,7 @@ void vpx_highbd_idct8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest8, int i, j, test; __m128i inptr[8]; __m128i min_input, max_input, temp1, temp2, sign_bits; - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); + uint16_t *dest = CAST_TO_SHORTPTR(dest8); const __m128i zero = _mm_set1_epi16(0); const __m128i sixteen = _mm_set1_epi16(16); const __m128i max = _mm_set1_epi16(6201); @@ -3586,7 +3586,7 @@ void vpx_highbd_idct8x8_12_add_sse2(const tran_low_t *input, uint8_t *dest8, int i, j, test; __m128i inptr[8]; __m128i min_input, max_input, temp1, temp2, sign_bits; - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); + uint16_t *dest = CAST_TO_SHORTPTR(dest8); const __m128i zero = _mm_set1_epi16(0); const __m128i sixteen = _mm_set1_epi16(16); const __m128i max = _mm_set1_epi16(6201); @@ -3689,7 +3689,7 @@ void vpx_highbd_idct16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest8, int i, j, test; __m128i inptr[32]; __m128i min_input, max_input, temp1, temp2, sign_bits; - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); + uint16_t *dest = CAST_TO_SHORTPTR(dest8); const __m128i zero = _mm_set1_epi16(0); const __m128i rounding = _mm_set1_epi16(32); const __m128i max = _mm_set1_epi16(3155); @@ -3802,7 +3802,7 @@ void vpx_highbd_idct16x16_10_add_sse2(const tran_low_t *input, uint8_t *dest8, int i, j, test; __m128i inptr[32]; __m128i min_input, max_input, temp1, temp2, sign_bits; - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); + uint16_t *dest = CAST_TO_SHORTPTR(dest8); const __m128i zero = _mm_set1_epi16(0); const __m128i rounding = _mm_set1_epi16(32); const __m128i max = _mm_set1_epi16(3155); @@ -3920,7 +3920,7 @@ void vpx_highbd_idct32x32_1_add_sse2(const tran_low_t *input, uint8_t *dest8, const __m128i one = _mm_set1_epi16(1); const __m128i max = _mm_sub_epi16(_mm_slli_epi16(one, bd), one); int a, i, j; - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); + uint16_t *dest = CAST_TO_SHORTPTR(dest8); tran_low_t out; out = HIGHBD_WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), bd);