make_tuple(&QuantWrapper<vpx_highbd_quantize_b_sse2>,
&QuantWrapper<vpx_highbd_quantize_b_c>, VPX_BITS_12, 16,
false),
- make_tuple(&Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_sse2>,
- &Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_c>,
- VPX_BITS_8, 32, false),
- make_tuple(&Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_sse2>,
- &Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_c>,
- VPX_BITS_10, 32, false),
- make_tuple(&Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_sse2>,
- &Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_c>,
- VPX_BITS_12, 32, false)));
+ make_tuple(&QuantWrapper<vpx_highbd_quantize_b_32x32_sse2>,
+ &QuantWrapper<vpx_highbd_quantize_b_32x32_c>, VPX_BITS_8, 32,
+ false),
+ make_tuple(&QuantWrapper<vpx_highbd_quantize_b_32x32_sse2>,
+ &QuantWrapper<vpx_highbd_quantize_b_32x32_c>, VPX_BITS_10,
+ 32, false),
+ make_tuple(&QuantWrapper<vpx_highbd_quantize_b_32x32_sse2>,
+ &QuantWrapper<vpx_highbd_quantize_b_32x32_c>, VPX_BITS_12,
+ 32, false)));
#else
INSTANTIATE_TEST_SUITE_P(
make_tuple(&Quant32x32Wrapper<vpx_quantize_b_32x32_avx2>,
&Quant32x32Wrapper<vpx_quantize_b_32x32_c>, VPX_BITS_8, 32,
false),
- make_tuple(&Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_avx2>,
- &Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_c>,
- VPX_BITS_8, 32, false),
- make_tuple(&Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_avx2>,
- &Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_c>,
- VPX_BITS_10, 32, false),
- make_tuple(&Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_avx2>,
- &Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_c>,
- VPX_BITS_12, 32, false)));
+ make_tuple(&QuantWrapper<vpx_highbd_quantize_b_32x32_avx2>,
+ &QuantWrapper<vpx_highbd_quantize_b_32x32_c>, VPX_BITS_8, 32,
+ false),
+ make_tuple(&QuantWrapper<vpx_highbd_quantize_b_32x32_avx2>,
+ &QuantWrapper<vpx_highbd_quantize_b_32x32_c>, VPX_BITS_10,
+ 32, false),
+ make_tuple(&QuantWrapper<vpx_highbd_quantize_b_32x32_avx2>,
+ &QuantWrapper<vpx_highbd_quantize_b_32x32_c>, VPX_BITS_12,
+ 32, false)));
#else
INSTANTIATE_TEST_SUITE_P(
AVX2, VP9QuantizeTest,
make_tuple(&Quant32x32Wrapper<vpx_quantize_b_32x32_neon>,
&Quant32x32Wrapper<vpx_quantize_b_32x32_c>, VPX_BITS_8, 32,
false),
- make_tuple(&Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_neon>,
- &Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_c>,
- VPX_BITS_8, 32, false),
- make_tuple(&Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_neon>,
- &Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_c>,
- VPX_BITS_10, 32, false),
- make_tuple(&Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_neon>,
- &Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_c>,
- VPX_BITS_12, 32, false),
+ make_tuple(&QuantWrapper<vpx_highbd_quantize_b_32x32_neon>,
+ &QuantWrapper<vpx_highbd_quantize_b_32x32_c>, VPX_BITS_8, 32,
+ false),
+ make_tuple(&QuantWrapper<vpx_highbd_quantize_b_32x32_neon>,
+ &QuantWrapper<vpx_highbd_quantize_b_32x32_c>, VPX_BITS_10,
+ 32, false),
+ make_tuple(&QuantWrapper<vpx_highbd_quantize_b_32x32_neon>,
+ &QuantWrapper<vpx_highbd_quantize_b_32x32_c>, VPX_BITS_12,
+ 32, false),
make_tuple(&QuantFPWrapper<vp9_quantize_fp_neon>,
&QuantFPWrapper<vp9_quantize_fp_c>, VPX_BITS_8, 16, true),
make_tuple(&QuantFPWrapper<vp9_quantize_fp_32x32_neon>,
switch (tx_size) {
case TX_32X32:
highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
- vpx_highbd_quantize_b_32x32(coeff, p, qcoeff, dqcoeff, pd->dequant, eob,
- scan_order->scan, scan_order->iscan);
+ vpx_highbd_quantize_b_32x32(
+ coeff, 1024, p->zbin, p->round, p->quant, p->quant_shift, qcoeff,
+ dqcoeff, pd->dequant, eob, scan_order->scan, scan_order->iscan);
break;
case TX_16X16:
vpx_highbd_fdct16x16(src_diff, coeff, diff_stride);
vpx_highbd_subtract_block(32, 32, src_diff, diff_stride, src,
src_stride, dst, dst_stride, xd->bd);
highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
- vpx_highbd_quantize_b_32x32(coeff, p, qcoeff, dqcoeff, pd->dequant,
- eob, scan_order->scan, scan_order->iscan);
+ vpx_highbd_quantize_b_32x32(
+ coeff, 1024, p->zbin, p->round, p->quant, p->quant_shift, qcoeff,
+ dqcoeff, pd->dequant, eob, scan_order->scan, scan_order->iscan);
}
if (args->enable_coeff_opt && !x->skip_recode) {
*a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
#include "./vpx_config.h"
#include "./vpx_dsp_rtcd.h"
#include "vpx_dsp/arm/mem_neon.h"
-#include "vp9/encoder/vp9_block.h"
static VPX_FORCE_INLINE void highbd_calculate_dqcoeff_and_store(
const int32x4_t dqcoeff_0, const int32x4_t dqcoeff_1,
}
void vpx_highbd_quantize_b_32x32_neon(
- const tran_low_t *coeff_ptr, const struct macroblock_plane *const mb_plane,
- tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
- uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan) {
+ const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
+ const int16_t *round_ptr, const int16_t *quant_ptr,
+ const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
+ tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
+ const int16_t *scan, const int16_t *iscan) {
const int16x8_t neg_one = vdupq_n_s16(-1);
uint16x8_t eob_max;
int i;
// High half has identical elements, but we can reconstruct it from the low
// half by duplicating the 2nd element. So we only need to pass a 4x32-bit
// vector
- int32x4_t zbin = vrshrq_n_s32(vmovl_s16(vld1_s16(mb_plane->zbin)), 1);
- int32x4_t round = vrshrq_n_s32(vmovl_s16(vld1_s16(mb_plane->round)), 1);
+ int32x4_t zbin = vrshrq_n_s32(vmovl_s16(vld1_s16(zbin_ptr)), 1);
+ int32x4_t round = vrshrq_n_s32(vmovl_s16(vld1_s16(round_ptr)), 1);
// Extend the quant, quant_shift vectors to ones of 32-bit elements
// scale to high-half, so we can use vqdmulhq_s32
- int32x4_t quant = vshlq_n_s32(vmovl_s16(vld1_s16(mb_plane->quant)), 15);
- int32x4_t quant_shift =
- vshlq_n_s32(vmovl_s16(vld1_s16(mb_plane->quant_shift)), 16);
+ int32x4_t quant = vshlq_n_s32(vmovl_s16(vld1_s16(quant_ptr)), 15);
+ int32x4_t quant_shift = vshlq_n_s32(vmovl_s16(vld1_s16(quant_shift_ptr)), 16);
int32x4_t dequant = vmovl_s16(vld1_s16(dequant_ptr));
// Process first 8 values which include a dc component.
vst1_lane_u16(eob_ptr, eob_max_2, 0);
}
#endif // __aarch64__
- // Need this here, else the compiler complains about mixing declarations and
+ // Need these here, else the compiler complains about mixing declarations and
// code in C90
+ (void)n_coeffs;
(void)scan;
}
#if CONFIG_VP9_HIGHBITDEPTH
void vpx_highbd_quantize_b_32x32_c(
- const tran_low_t *coeff_ptr, const struct macroblock_plane *const mb_plane,
- tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
- uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan) {
- const intptr_t n_coeffs = 32 * 32;
- const int zbins[2] = { ROUND_POWER_OF_TWO(mb_plane->zbin[0], 1),
- ROUND_POWER_OF_TWO(mb_plane->zbin[1], 1) };
+ const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
+ const int16_t *round_ptr, const int16_t *quant_ptr,
+ const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
+ tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
+ const int16_t *scan, const int16_t *iscan) {
+ const int zbins[2] = { ROUND_POWER_OF_TWO(zbin_ptr[0], 1),
+ ROUND_POWER_OF_TWO(zbin_ptr[1], 1) };
const int nzbins[2] = { zbins[0] * -1, zbins[1] * -1 };
- const int16_t *round_ptr = mb_plane->round;
- const int16_t *quant_ptr = mb_plane->quant;
- const int16_t *quant_shift_ptr = mb_plane->quant_shift;
int idx = 0;
int idx_arr[1024];
add_proto qw/void vpx_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vpx_highbd_quantize_b neon sse2 avx2/;
- add_proto qw/void vpx_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, const struct macroblock_plane * const mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+ add_proto qw/void vpx_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vpx_highbd_quantize_b_32x32 neon sse2 avx2/;
} # CONFIG_VP9_HIGHBITDEPTH
} # CONFIG_VP9_ENCODER
#include <immintrin.h>
#include "./vpx_dsp_rtcd.h"
-#include "vp9/encoder/vp9_block.h"
static VPX_FORCE_INLINE void init_one_qp(const __m128i *p, __m256i *qp) {
const __m128i sign = _mm_srai_epi16(*p, 15);
}
void vpx_highbd_quantize_b_32x32_avx2(
- const tran_low_t *coeff_ptr, const struct macroblock_plane *const mb_plane,
- tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
- uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan) {
+ const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
+ const int16_t *round_ptr, const int16_t *quant_ptr,
+ const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
+ tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
+ const int16_t *scan, const int16_t *iscan) {
const unsigned int step = 8;
- intptr_t n_coeffs = 32 * 32;
__m256i eob = _mm256_setzero_si256();
__m256i qp[5];
(void)scan;
- init_qp(mb_plane->zbin, mb_plane->round, mb_plane->quant, dequant_ptr,
- mb_plane->quant_shift, qp, 1);
+ init_qp(zbin_ptr, round_ptr, quant_ptr, dequant_ptr, quant_shift_ptr, qp, 1);
quantize_b_32x32(qp, coeff_ptr, iscan, qcoeff_ptr, dqcoeff_ptr, &eob);
#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_mem/vpx_mem.h"
#include "vpx_ports/mem.h"
-#include "vp9/encoder/vp9_block.h"
#if CONFIG_VP9_HIGHBITDEPTH
void vpx_highbd_quantize_b_sse2(const tran_low_t *coeff_ptr, intptr_t count,
}
void vpx_highbd_quantize_b_32x32_sse2(
- const tran_low_t *coeff_ptr, const struct macroblock_plane *const mb_plane,
- tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
- uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan) {
+ const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
+ const int16_t *round_ptr, const int16_t *quant_ptr,
+ const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
+ tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
+ const int16_t *scan, const int16_t *iscan) {
__m128i zbins[2];
__m128i nzbins[2];
int idx = 0;
int idx_arr[1024];
int i, eob = 0;
- const intptr_t n_coeffs = 32 * 32;
- const int zbin0_tmp = ROUND_POWER_OF_TWO(mb_plane->zbin[0], 1);
- const int zbin1_tmp = ROUND_POWER_OF_TWO(mb_plane->zbin[1], 1);
+ const int zbin0_tmp = ROUND_POWER_OF_TWO(zbin_ptr[0], 1);
+ const int zbin1_tmp = ROUND_POWER_OF_TWO(zbin_ptr[1], 1);
(void)scan;
zbins[0] = _mm_set_epi32(zbin1_tmp, zbin1_tmp, zbin1_tmp, zbin0_tmp);
const int coeff = coeff_ptr[rc];
const int coeff_sign = (coeff >> 31);
const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
- const int64_t tmp1 =
- abs_coeff + ROUND_POWER_OF_TWO(mb_plane->round[rc != 0], 1);
- const int64_t tmp2 = ((tmp1 * mb_plane->quant[rc != 0]) >> 16) + tmp1;
+ const int64_t tmp1 = abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1);
+ const int64_t tmp2 = ((tmp1 * quant_ptr[rc != 0]) >> 16) + tmp1;
const uint32_t abs_qcoeff =
- (uint32_t)((tmp2 * mb_plane->quant_shift[rc != 0]) >> 15);
+ (uint32_t)((tmp2 * quant_shift_ptr[rc != 0]) >> 15);
qcoeff_ptr[rc] = (int)(abs_qcoeff ^ (uint32_t)coeff_sign) - coeff_sign;
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2;
if (abs_qcoeff) eob = iscan[idx_arr[i]] > eob ? iscan[idx_arr[i]] : eob;