#if CONFIG_VP9_HIGHBITDEPTH
} else {
ASM_REGISTER_STATE_CHECK(
- RunInvTxfm(test_temp_block, CONVERT_TO_BYTEPTR(dst16), pitch_));
+ RunInvTxfm(test_temp_block, CAST_TO_BYTEPTR(dst16), pitch_));
#endif
}
ASM_REGISTER_STATE_CHECK(RunInvTxfm(output_ref_block, dst, pitch_));
#if CONFIG_VP9_HIGHBITDEPTH
} else {
- inv_txfm_ref(output_ref_block, CONVERT_TO_BYTEPTR(ref16), pitch_,
+ inv_txfm_ref(output_ref_block, CAST_TO_BYTEPTR(ref16), pitch_,
tx_type_);
ASM_REGISTER_STATE_CHECK(
- RunInvTxfm(output_ref_block, CONVERT_TO_BYTEPTR(dst16), pitch_));
+ RunInvTxfm(output_ref_block, CAST_TO_BYTEPTR(dst16), pitch_));
#endif
}
if (bit_depth_ == VPX_BITS_8) {
ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, 16));
#if CONFIG_VP9_HIGHBITDEPTH
} else {
- ASM_REGISTER_STATE_CHECK(
- RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16), 16));
+ ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, CAST_TO_BYTEPTR(dst16), 16));
#endif // CONFIG_VP9_HIGHBITDEPTH
}
ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_));
} else {
#if CONFIG_VP9_HIGHBITDEPTH
- ref_txfm(coeff, CONVERT_TO_BYTEPTR(ref16), pitch_);
+ ref_txfm(coeff, CAST_TO_BYTEPTR(ref16), pitch_);
ASM_REGISTER_STATE_CHECK(
- RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16), pitch_));
+ RunInvTxfm(coeff, CAST_TO_BYTEPTR(dst16), pitch_));
#endif // CONFIG_VP9_HIGHBITDEPTH
}
#if CONFIG_VP9_HIGHBITDEPTH
} else {
ASM_REGISTER_STATE_CHECK(
- inv_txfm_(test_temp_block, CONVERT_TO_BYTEPTR(dst16), 32));
+ inv_txfm_(test_temp_block, CAST_TO_BYTEPTR(dst16), 32));
#endif
}
ASM_REGISTER_STATE_CHECK(inv_txfm_(coeff, dst, 32));
#if CONFIG_VP9_HIGHBITDEPTH
} else {
- ASM_REGISTER_STATE_CHECK(inv_txfm_(coeff, CONVERT_TO_BYTEPTR(dst16), 32));
+ ASM_REGISTER_STATE_CHECK(inv_txfm_(coeff, CAST_TO_BYTEPTR(dst16), 32));
#endif
}
for (int j = 0; j < kNumCoeffs; ++j) {
#if CONFIG_VP9_HIGHBITDEPTH
} else {
ASM_REGISTER_STATE_CHECK(
- RunInvTxfm(test_temp_block, CONVERT_TO_BYTEPTR(dst16), pitch_));
+ RunInvTxfm(test_temp_block, CAST_TO_BYTEPTR(dst16), pitch_));
#endif
}
#if CONFIG_VP9_HIGHBITDEPTH
} else {
ASM_REGISTER_STATE_CHECK(
- RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16), pitch_));
+ RunInvTxfm(coeff, CAST_TO_BYTEPTR(dst16), pitch_));
#endif
}
#if CONFIG_VP9_HIGHBITDEPTH
} else {
ASM_REGISTER_STATE_CHECK(
- RunInvTxfm(test_temp_block, CONVERT_TO_BYTEPTR(dst16), pitch_));
+ RunInvTxfm(test_temp_block, CAST_TO_BYTEPTR(dst16), pitch_));
#endif
}
#if CONFIG_VP9_HIGHBITDEPTH
} else {
ASM_REGISTER_STATE_CHECK(
- RunInvTxfm(test_temp_block, CONVERT_TO_BYTEPTR(dst16), pitch_));
+ RunInvTxfm(test_temp_block, CAST_TO_BYTEPTR(dst16), pitch_));
#endif
}
#if CONFIG_VP9_HIGHBITDEPTH
} else {
ASM_REGISTER_STATE_CHECK(
- RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16), pitch_));
+ RunInvTxfm(coeff, CAST_TO_BYTEPTR(dst16), pitch_));
#endif
}
ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_));
#if CONFIG_VP9_HIGHBITDEPTH
} else {
- ref_txfm(coeff, CONVERT_TO_BYTEPTR(ref16), pitch_);
+ ref_txfm(coeff, CAST_TO_BYTEPTR(ref16), pitch_);
ASM_REGISTER_STATE_CHECK(
- RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16), pitch_));
+ RunInvTxfm(coeff, CAST_TO_BYTEPTR(dst16), pitch_));
#endif
}
#if CONFIG_VP9_HIGHBITDEPTH
template <InvTxfmWithBdFunc fn>
void highbd_wrapper(const tran_low_t *in, uint8_t *out, int stride, int bd) {
- fn(in, CONVERT_TO_BYTEPTR(out), stride, bd);
+ fn(in, out, stride, bd);
}
#endif
{ vpx_highbd_idct4_c, vpx_highbd_iadst4_c }, // DCT_ADST = 2
{ vpx_highbd_iadst4_c, vpx_highbd_iadst4_c } // ADST_ADST = 3
};
- uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+ uint16_t *dest = CAST_TO_SHORTPTR(dest8);
int i, j;
tran_low_t out[4 * 4];
tran_low_t *outptr = out;
tran_low_t temp_in[8], temp_out[8];
const highbd_transform_2d ht = HIGH_IHT_8[tx_type];
- uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+ uint16_t *dest = CAST_TO_SHORTPTR(dest8);
// Inverse transform row vectors.
for (i = 0; i < 8; ++i) {
tran_low_t *outptr = out;
tran_low_t temp_in[16], temp_out[16];
const highbd_transform_2d ht = HIGH_IHT_16[tx_type];
- uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+ uint16_t *dest = CAST_TO_SHORTPTR(dest8);
// Rows
for (i = 0; i < 16; ++i) {
assert(eob > 0);
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ uint8_t *const dst16 = CAST_TO_BYTEPTR(CONVERT_TO_SHORTPTR(dst));
if (xd->lossless) {
- vp9_highbd_iwht4x4_add(dqcoeff, dst, stride, eob, xd->bd);
+ vp9_highbd_iwht4x4_add(dqcoeff, dst16, stride, eob, xd->bd);
} else {
switch (tx_size) {
case TX_4X4:
- vp9_highbd_idct4x4_add(dqcoeff, dst, stride, eob, xd->bd);
+ vp9_highbd_idct4x4_add(dqcoeff, dst16, stride, eob, xd->bd);
break;
case TX_8X8:
- vp9_highbd_idct8x8_add(dqcoeff, dst, stride, eob, xd->bd);
+ vp9_highbd_idct8x8_add(dqcoeff, dst16, stride, eob, xd->bd);
break;
case TX_16X16:
- vp9_highbd_idct16x16_add(dqcoeff, dst, stride, eob, xd->bd);
+ vp9_highbd_idct16x16_add(dqcoeff, dst16, stride, eob, xd->bd);
break;
case TX_32X32:
- vp9_highbd_idct32x32_add(dqcoeff, dst, stride, eob, xd->bd);
+ vp9_highbd_idct32x32_add(dqcoeff, dst16, stride, eob, xd->bd);
break;
default: assert(0 && "Invalid transform size");
}
assert(eob > 0);
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ uint8_t *const dst16 = CAST_TO_BYTEPTR(CONVERT_TO_SHORTPTR(dst));
if (xd->lossless) {
- vp9_highbd_iwht4x4_add(dqcoeff, dst, stride, eob, xd->bd);
+ vp9_highbd_iwht4x4_add(dqcoeff, dst16, stride, eob, xd->bd);
} else {
switch (tx_size) {
case TX_4X4:
- vp9_highbd_iht4x4_add(tx_type, dqcoeff, dst, stride, eob, xd->bd);
+ vp9_highbd_iht4x4_add(tx_type, dqcoeff, dst16, stride, eob, xd->bd);
break;
case TX_8X8:
- vp9_highbd_iht8x8_add(tx_type, dqcoeff, dst, stride, eob, xd->bd);
+ vp9_highbd_iht8x8_add(tx_type, dqcoeff, dst16, stride, eob, xd->bd);
break;
case TX_16X16:
- vp9_highbd_iht16x16_add(tx_type, dqcoeff, dst, stride, eob, xd->bd);
+ vp9_highbd_iht16x16_add(tx_type, dqcoeff, dst16, stride, eob, xd->bd);
break;
case TX_32X32:
- vp9_highbd_idct32x32_add(dqcoeff, dst, stride, eob, xd->bd);
+ vp9_highbd_idct32x32_add(dqcoeff, dst16, stride, eob, xd->bd);
break;
default: assert(0 && "Invalid transform size");
}
if (x->skip_encode || p->eobs[block] == 0) return;
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ uint8_t *const dst16 = CAST_TO_BYTEPTR(CONVERT_TO_SHORTPTR(dst));
switch (tx_size) {
case TX_32X32:
- vp9_highbd_idct32x32_add(dqcoeff, dst, pd->dst.stride, p->eobs[block],
+ vp9_highbd_idct32x32_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block],
xd->bd);
break;
case TX_16X16:
- vp9_highbd_idct16x16_add(dqcoeff, dst, pd->dst.stride, p->eobs[block],
+ vp9_highbd_idct16x16_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block],
xd->bd);
break;
case TX_8X8:
- vp9_highbd_idct8x8_add(dqcoeff, dst, pd->dst.stride, p->eobs[block],
+ vp9_highbd_idct8x8_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block],
xd->bd);
break;
case TX_4X4:
// this is like vp9_short_idct4x4 but has a special case around eob<=1
// which is significant (not just an optimization) for the lossless
// case.
- x->highbd_itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block],
+ x->highbd_itxm_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block],
xd->bd);
break;
default: assert(0 && "Invalid transform size");
if (p->eobs[block] > 0) {
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- x->highbd_itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block], xd->bd);
+ x->highbd_itxm_add(dqcoeff, CAST_TO_BYTEPTR(CONVERT_TO_SHORTPTR(dst)),
+ pd->dst.stride, p->eobs[block], xd->bd);
return;
}
#endif // CONFIG_VP9_HIGHBITDEPTH
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ uint8_t *const dst16 = CAST_TO_BYTEPTR(CONVERT_TO_SHORTPTR(dst));
switch (tx_size) {
case TX_32X32:
if (!x->skip_recode) {
*a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
}
if (!x->skip_encode && *eob) {
- vp9_highbd_idct32x32_add(dqcoeff, dst, dst_stride, *eob, xd->bd);
+ vp9_highbd_idct32x32_add(dqcoeff, dst16, dst_stride, *eob, xd->bd);
}
break;
case TX_16X16:
*a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
}
if (!x->skip_encode && *eob) {
- vp9_highbd_iht16x16_add(tx_type, dqcoeff, dst, dst_stride, *eob,
+ vp9_highbd_iht16x16_add(tx_type, dqcoeff, dst16, dst_stride, *eob,
xd->bd);
}
break;
*a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
}
if (!x->skip_encode && *eob) {
- vp9_highbd_iht8x8_add(tx_type, dqcoeff, dst, dst_stride, *eob,
+ vp9_highbd_iht8x8_add(tx_type, dqcoeff, dst16, dst_stride, *eob,
xd->bd);
}
break;
// this is like vp9_short_idct4x4 but has a special case around
// eob<=1 which is significant (not just an optimization) for the
// lossless case.
- x->highbd_itxm_add(dqcoeff, dst, dst_stride, *eob, xd->bd);
+ x->highbd_itxm_add(dqcoeff, dst16, dst_stride, *eob, xd->bd);
} else {
- vp9_highbd_iht4x4_16_add(dqcoeff, dst, dst_stride, tx_type, xd->bd);
+ vp9_highbd_iht4x4_16_add(dqcoeff, dst16, dst_stride, tx_type,
+ xd->bd);
}
}
break;
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
vpx_highbd_convolve_copy(CONVERT_TO_SHORTPTR(dst), dst_stride, recon16,
32, NULL, 0, NULL, 0, bs, bs, xd->bd);
- recon = CONVERT_TO_BYTEPTR(recon16);
+ recon = CAST_TO_BYTEPTR(recon16);
if (xd->lossless) {
vp9_highbd_iwht4x4_add(dqcoeff, recon, 32, *eob, xd->bd);
} else {
default: assert(0 && "Invalid transform size");
}
}
+ recon = CONVERT_TO_BYTEPTR(recon16);
} else {
#endif // CONFIG_VP9_HIGHBITDEPTH
vpx_convolve_copy(dst, dst_stride, recon, 32, NULL, 0, NULL, 0, bs, bs);
const int block = (row + idy) * 2 + (col + idx);
const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
+ uint8_t *const dst16 = CAST_TO_BYTEPTR(CONVERT_TO_SHORTPTR(dst));
int16_t *const src_diff =
vp9_raster_block_offset_int16(BLOCK_8X8, block, p->src_diff);
tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
tempa[idx] = templ[idy] = (x->plane[0].eobs[block] > 0 ? 1 : 0);
if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
goto next_highbd;
- vp9_highbd_iwht4x4_add(BLOCK_OFFSET(pd->dqcoeff, block), dst,
+ vp9_highbd_iwht4x4_add(BLOCK_OFFSET(pd->dqcoeff, block), dst16,
dst_stride, p->eobs[block], xd->bd);
} else {
int64_t unused;
if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
goto next_highbd;
vp9_highbd_iht4x4_add(tx_type, BLOCK_OFFSET(pd->dqcoeff, block),
- dst, dst_stride, p->eobs[block], xd->bd);
+ dst16, dst_stride, p->eobs[block], xd->bd);
}
}
}
void vpx_highbd_idct16x16_256_add_neon(const tran_low_t *input, uint8_t *dest8,
int stride, int bd) {
- uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+ uint16_t *dest = CAST_TO_SHORTPTR(dest8);
if (bd == 8) {
int16_t row_idct_output[16 * 16];
void vpx_highbd_idct16x16_38_add_neon(const tran_low_t *input, uint8_t *dest8,
int stride, int bd) {
- uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+ uint16_t *dest = CAST_TO_SHORTPTR(dest8);
if (bd == 8) {
int16_t row_idct_output[16 * 16];
void vpx_highbd_idct16x16_10_add_neon(const tran_low_t *input, uint8_t *dest8,
int stride, int bd) {
- uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+ uint16_t *dest = CAST_TO_SHORTPTR(dest8);
if (bd == 8) {
int16_t row_idct_output[4 * 16];
HIGHBD_WRAPLOW(dct_const_round_shift(out0 * cospi_16_64), bd);
const int16_t a1 = ROUND_POWER_OF_TWO(out1, 6);
const int16x8_t dc = vdupq_n_s16(a1);
- uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+ uint16_t *dest = CAST_TO_SHORTPTR(dest8);
int i;
if (a1 >= 0) {
int32_t pass2[32 * 32];
int32_t *out;
int32x4x2_t q[16];
- uint16_t *dst = CONVERT_TO_SHORTPTR(dest);
+ uint16_t *dst = CAST_TO_SHORTPTR(dest);
for (idct32_pass_loop = 0, out = pass1; idct32_pass_loop < 2;
idct32_pass_loop++, input = pass1, out = pass2) {
void vpx_highbd_idct32x32_135_add_neon(const tran_low_t *input, uint8_t *dest8,
int stride, int bd) {
int i;
- uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+ uint16_t *dest = CAST_TO_SHORTPTR(dest8);
if (bd == 8) {
int16_t temp[32 * 16];
void vpx_highbd_idct32x32_34_add_neon(const tran_low_t *input, uint8_t *dest8,
int stride, int bd) {
int i;
- uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+ uint16_t *dest = CAST_TO_SHORTPTR(dest8);
if (bd == 8) {
int16_t temp[32 * 8];
HIGHBD_WRAPLOW(dct_const_round_shift(out0 * cospi_16_64), bd);
const int16_t a1 = ROUND_POWER_OF_TWO(out1, 6);
const int16x8_t dc = vdupq_n_s16(a1);
- uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+ uint16_t *dest = CAST_TO_SHORTPTR(dest8);
int i;
if (a1 >= 0) {
HIGHBD_WRAPLOW(dct_const_round_shift(out0 * cospi_16_64), bd);
const int16_t a1 = ROUND_POWER_OF_TWO(out1, 4);
const int16x8_t dc = vdupq_n_s16(a1);
- uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+ uint16_t *dest = CAST_TO_SHORTPTR(dest8);
highbd_idct4x4_1_add_kernel1(&dest, stride, dc, max);
highbd_idct4x4_1_add_kernel1(&dest, stride, dc, max);
int32x4_t c1 = vld1q_s32(input + 4);
int32x4_t c2 = vld1q_s32(input + 8);
int32x4_t c3 = vld1q_s32(input + 12);
- uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+ uint16_t *dest = CAST_TO_SHORTPTR(dest8);
int16x8_t a0, a1;
if (bd == 8) {
HIGHBD_WRAPLOW(dct_const_round_shift(out0 * cospi_16_64), bd);
const int16_t a1 = ROUND_POWER_OF_TWO(out1, 5);
const int16x8_t dc = vdupq_n_s16(a1);
- uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+ uint16_t *dest = CAST_TO_SHORTPTR(dest8);
if (a1 >= 0) {
const int16x8_t max = vdupq_n_s16((1 << bd) - 1);
void vpx_highbd_idct8x8_12_add_neon(const tran_low_t *input, uint8_t *dest8,
int stride, int bd) {
- uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+ uint16_t *dest = CAST_TO_SHORTPTR(dest8);
int32x4_t a0 = vld1q_s32(input);
int32x4_t a1 = vld1q_s32(input + 8);
int32x4_t a2 = vld1q_s32(input + 16);
void vpx_highbd_idct8x8_64_add_neon(const tran_low_t *input, uint8_t *dest8,
int stride, int bd) {
- uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+ uint16_t *dest = CAST_TO_SHORTPTR(dest8);
int32x4_t a0 = vld1q_s32(input);
int32x4_t a1 = vld1q_s32(input + 4);
int32x4_t a2 = vld1q_s32(input + 8);
const int16_t *input_pass2 = pass1; // input of pass2 is the result of pass1
int16_t *out;
int16x8_t q[16];
- uint16_t *dst = CONVERT_TO_SHORTPTR(dest);
+ uint16_t *dst = CAST_TO_SHORTPTR(dest);
for (idct32_pass_loop = 0, out = pass1; idct32_pass_loop < 2;
idct32_pass_loop++, out = pass2) {
tran_high_t a1, b1, c1, d1, e1;
const tran_low_t *ip = input;
tran_low_t *op = output;
- uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+ uint16_t *dest = CAST_TO_SHORTPTR(dest8);
for (i = 0; i < 4; i++) {
a1 = ip[0] >> UNIT_QUANT_SHIFT;
tran_low_t tmp[4];
const tran_low_t *ip = in;
tran_low_t *op = tmp;
- uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+ uint16_t *dest = CAST_TO_SHORTPTR(dest8);
(void)bd;
a1 = ip[0] >> UNIT_QUANT_SHIFT;
tran_low_t out[4 * 4];
tran_low_t *outptr = out;
tran_low_t temp_in[4], temp_out[4];
- uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+ uint16_t *dest = CAST_TO_SHORTPTR(dest8);
// Rows
for (i = 0; i < 4; ++i) {
tran_high_t a1;
tran_low_t out =
HIGHBD_WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), bd);
- uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+ uint16_t *dest = CAST_TO_SHORTPTR(dest8);
out = HIGHBD_WRAPLOW(dct_const_round_shift(out * cospi_16_64), bd);
a1 = ROUND_POWER_OF_TWO(out, 4);
tran_low_t out[8 * 8];
tran_low_t *outptr = out;
tran_low_t temp_in[8], temp_out[8];
- uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+ uint16_t *dest = CAST_TO_SHORTPTR(dest8);
// First transform rows
for (i = 0; i < 8; ++i) {
tran_low_t out[8 * 8] = { 0 };
tran_low_t *outptr = out;
tran_low_t temp_in[8], temp_out[8];
- uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+ uint16_t *dest = CAST_TO_SHORTPTR(dest8);
// First transform rows
// Only first 4 row has non-zero coefs
tran_high_t a1;
tran_low_t out =
HIGHBD_WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), bd);
- uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+ uint16_t *dest = CAST_TO_SHORTPTR(dest8);
out = HIGHBD_WRAPLOW(dct_const_round_shift(out * cospi_16_64), bd);
a1 = ROUND_POWER_OF_TWO(out, 5);
tran_low_t out[16 * 16];
tran_low_t *outptr = out;
tran_low_t temp_in[16], temp_out[16];
- uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+ uint16_t *dest = CAST_TO_SHORTPTR(dest8);
// First transform rows
for (i = 0; i < 16; ++i) {
tran_low_t out[16 * 16] = { 0 };
tran_low_t *outptr = out;
tran_low_t temp_in[16], temp_out[16];
- uint16_t *const dest = CONVERT_TO_SHORTPTR(dest8);
+ uint16_t *const dest = CAST_TO_SHORTPTR(dest8);
// First transform rows. Since all non-zero dct coefficients are in
// upper-left 8x8 area, we only need to calculate first 8 rows here.
tran_low_t out[16 * 16] = { 0 };
tran_low_t *outptr = out;
tran_low_t temp_in[16], temp_out[16];
- uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+ uint16_t *dest = CAST_TO_SHORTPTR(dest8);
// First transform rows. Since all non-zero dct coefficients are in
// upper-left 4x4 area, we only need to calculate first 4 rows here.
tran_high_t a1;
tran_low_t out =
HIGHBD_WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), bd);
- uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+ uint16_t *dest = CAST_TO_SHORTPTR(dest8);
out = HIGHBD_WRAPLOW(dct_const_round_shift(out * cospi_16_64), bd);
a1 = ROUND_POWER_OF_TWO(out, 6);
tran_low_t out[32 * 32];
tran_low_t *outptr = out;
tran_low_t temp_in[32], temp_out[32];
- uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+ uint16_t *dest = CAST_TO_SHORTPTR(dest8);
// Rows
for (i = 0; i < 32; ++i) {
tran_low_t out[32 * 32] = { 0 };
tran_low_t *outptr = out;
tran_low_t temp_in[32], temp_out[32];
- uint16_t *const dest = CONVERT_TO_SHORTPTR(dest8);
+ uint16_t *const dest = CAST_TO_SHORTPTR(dest8);
// Rows
// Only upper-left 16x16 has non-zero coeff
tran_low_t out[32 * 32] = { 0 };
tran_low_t *outptr = out;
tran_low_t temp_in[32], temp_out[32];
- uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+ uint16_t *dest = CAST_TO_SHORTPTR(dest8);
// Rows
// Only upper-left 8x8 has non-zero coeff
int stride, int bd) {
int i, j;
int a1;
- uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+ uint16_t *dest = CAST_TO_SHORTPTR(dest8);
tran_low_t out =
HIGHBD_WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), bd);
__m128i sign_bits[2];
__m128i temp_mm, min_input, max_input;
int test;
- uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+ uint16_t *dest = CAST_TO_SHORTPTR(dest8);
int optimised_cols = 0;
const __m128i zero = _mm_set1_epi16(0);
const __m128i eight = _mm_set1_epi16(8);
int i, j, test;
__m128i inptr[8];
__m128i min_input, max_input, temp1, temp2, sign_bits;
- uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+ uint16_t *dest = CAST_TO_SHORTPTR(dest8);
const __m128i zero = _mm_set1_epi16(0);
const __m128i sixteen = _mm_set1_epi16(16);
const __m128i max = _mm_set1_epi16(6201);
int i, j, test;
__m128i inptr[8];
__m128i min_input, max_input, temp1, temp2, sign_bits;
- uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+ uint16_t *dest = CAST_TO_SHORTPTR(dest8);
const __m128i zero = _mm_set1_epi16(0);
const __m128i sixteen = _mm_set1_epi16(16);
const __m128i max = _mm_set1_epi16(6201);
int i, j, test;
__m128i inptr[32];
__m128i min_input, max_input, temp1, temp2, sign_bits;
- uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+ uint16_t *dest = CAST_TO_SHORTPTR(dest8);
const __m128i zero = _mm_set1_epi16(0);
const __m128i rounding = _mm_set1_epi16(32);
const __m128i max = _mm_set1_epi16(3155);
int i, j, test;
__m128i inptr[32];
__m128i min_input, max_input, temp1, temp2, sign_bits;
- uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+ uint16_t *dest = CAST_TO_SHORTPTR(dest8);
const __m128i zero = _mm_set1_epi16(0);
const __m128i rounding = _mm_set1_epi16(32);
const __m128i max = _mm_set1_epi16(3155);
const __m128i one = _mm_set1_epi16(1);
const __m128i max = _mm_sub_epi16(_mm_slli_epi16(one, bd), one);
int a, i, j;
- uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+ uint16_t *dest = CAST_TO_SHORTPTR(dest8);
tran_low_t out;
out = HIGHBD_WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), bd);