Revert "quantize: simplifly highbd 32x32_b args"

author James Zern <jzern@google.com>

Wed, 1 Mar 2023 23:53:14 +0000 (15:53 -0800)

committer James Zern <jzern@google.com>

Wed, 1 Mar 2023 23:54:48 +0000 (15:54 -0800)
author James Zern <jzern@google.com>
Wed, 1 Mar 2023 23:53:14 +0000 (15:53 -0800)
committer James Zern <jzern@google.com>
Wed, 1 Mar 2023 23:54:48 +0000 (15:54 -0800)
diff --git a/test/vp9_quantize_test.cc b/test/vp9_quantize_test.cc

index e533b2509c161c88bf61a72c958596f7472a1896..ecb6116f0ca31b189a24386768c49b09d2f9ac23 100644 (file)
--- a/test/vp9_quantize_test.cc
+++ b/test/vp9_quantize_test.cc
@@ -549,15 +549,15 @@ INSTANTIATE_TEST_SUITE_P(
          make_tuple(&QuantWrapper<vpx_highbd_quantize_b_sse2>,
                     &QuantWrapper<vpx_highbd_quantize_b_c>, VPX_BITS_12, 16,
                     false),
-        make_tuple(&Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_sse2>,
-                   &Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_c>,
-                   VPX_BITS_8, 32, false),
-        make_tuple(&Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_sse2>,
-                   &Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_c>,
-                   VPX_BITS_10, 32, false),
-        make_tuple(&Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_sse2>,
-                   &Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_c>,
-                   VPX_BITS_12, 32, false)));
+        make_tuple(&QuantWrapper<vpx_highbd_quantize_b_32x32_sse2>,
+                   &QuantWrapper<vpx_highbd_quantize_b_32x32_c>, VPX_BITS_8, 32,
+                   false),
+        make_tuple(&QuantWrapper<vpx_highbd_quantize_b_32x32_sse2>,
+                   &QuantWrapper<vpx_highbd_quantize_b_32x32_c>, VPX_BITS_10,
+                   32, false),
+        make_tuple(&QuantWrapper<vpx_highbd_quantize_b_32x32_sse2>,
+                   &QuantWrapper<vpx_highbd_quantize_b_32x32_c>, VPX_BITS_12,
+                   32, false)));
  
  #else
  INSTANTIATE_TEST_SUITE_P(
@@ -626,15 +626,15 @@ INSTANTIATE_TEST_SUITE_P(
          make_tuple(&Quant32x32Wrapper<vpx_quantize_b_32x32_avx2>,
                     &Quant32x32Wrapper<vpx_quantize_b_32x32_c>, VPX_BITS_8, 32,
                     false),
-        make_tuple(&Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_avx2>,
-                   &Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_c>,
-                   VPX_BITS_8, 32, false),
-        make_tuple(&Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_avx2>,
-                   &Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_c>,
-                   VPX_BITS_10, 32, false),
-        make_tuple(&Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_avx2>,
-                   &Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_c>,
-                   VPX_BITS_12, 32, false)));
+        make_tuple(&QuantWrapper<vpx_highbd_quantize_b_32x32_avx2>,
+                   &QuantWrapper<vpx_highbd_quantize_b_32x32_c>, VPX_BITS_8, 32,
+                   false),
+        make_tuple(&QuantWrapper<vpx_highbd_quantize_b_32x32_avx2>,
+                   &QuantWrapper<vpx_highbd_quantize_b_32x32_c>, VPX_BITS_10,
+                   32, false),
+        make_tuple(&QuantWrapper<vpx_highbd_quantize_b_32x32_avx2>,
+                   &QuantWrapper<vpx_highbd_quantize_b_32x32_c>, VPX_BITS_12,
+                   32, false)));
  #else
  INSTANTIATE_TEST_SUITE_P(
      AVX2, VP9QuantizeTest,
@@ -672,15 +672,15 @@ INSTANTIATE_TEST_SUITE_P(
          make_tuple(&Quant32x32Wrapper<vpx_quantize_b_32x32_neon>,
                     &Quant32x32Wrapper<vpx_quantize_b_32x32_c>, VPX_BITS_8, 32,
                     false),
-        make_tuple(&Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_neon>,
-                   &Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_c>,
-                   VPX_BITS_8, 32, false),
-        make_tuple(&Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_neon>,
-                   &Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_c>,
-                   VPX_BITS_10, 32, false),
-        make_tuple(&Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_neon>,
-                   &Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_c>,
-                   VPX_BITS_12, 32, false),
+        make_tuple(&QuantWrapper<vpx_highbd_quantize_b_32x32_neon>,
+                   &QuantWrapper<vpx_highbd_quantize_b_32x32_c>, VPX_BITS_8, 32,
+                   false),
+        make_tuple(&QuantWrapper<vpx_highbd_quantize_b_32x32_neon>,
+                   &QuantWrapper<vpx_highbd_quantize_b_32x32_c>, VPX_BITS_10,
+                   32, false),
+        make_tuple(&QuantWrapper<vpx_highbd_quantize_b_32x32_neon>,
+                   &QuantWrapper<vpx_highbd_quantize_b_32x32_c>, VPX_BITS_12,
+                   32, false),
          make_tuple(&QuantFPWrapper<vp9_quantize_fp_neon>,
                     &QuantFPWrapper<vp9_quantize_fp_c>, VPX_BITS_8, 16, true),
          make_tuple(&QuantFPWrapper<vp9_quantize_fp_32x32_neon>,
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c

index 6a5f628808fc17c4406e6fb7bb7b81623e42200f..4910dc20f5a0947107c2bb5006058c6e712a0286 100644 (file)
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -511,8 +511,9 @@ void vp9_xform_quant(MACROBLOCK *x, int plane, int block, int row, int col,
      switch (tx_size) {
        case TX_32X32:
          highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
-        vpx_highbd_quantize_b_32x32(coeff, p, qcoeff, dqcoeff, pd->dequant, eob,
-                                    scan_order->scan, scan_order->iscan);
+        vpx_highbd_quantize_b_32x32(
+            coeff, 1024, p->zbin, p->round, p->quant, p->quant_shift, qcoeff,
+            dqcoeff, pd->dequant, eob, scan_order->scan, scan_order->iscan);
          break;
        case TX_16X16:
          vpx_highbd_fdct16x16(src_diff, coeff, diff_stride);
@@ -855,8 +856,9 @@ void vp9_encode_block_intra(int plane, int block, int row, int col,
            vpx_highbd_subtract_block(32, 32, src_diff, diff_stride, src,
                                      src_stride, dst, dst_stride, xd->bd);
            highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
-          vpx_highbd_quantize_b_32x32(coeff, p, qcoeff, dqcoeff, pd->dequant,
-                                      eob, scan_order->scan, scan_order->iscan);
+          vpx_highbd_quantize_b_32x32(
+              coeff, 1024, p->zbin, p->round, p->quant, p->quant_shift, qcoeff,
+              dqcoeff, pd->dequant, eob, scan_order->scan, scan_order->iscan);
          }
          if (args->enable_coeff_opt && !x->skip_recode) {
            *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
diff --git a/vpx_dsp/arm/highbd_quantize_neon.c b/vpx_dsp/arm/highbd_quantize_neon.c

index 3b1fec3321afdac2e019e010e2405e0a7414f3bd..b9f72a94c5df3a226ae100e5733992dea7315bf4 100644 (file)
--- a/vpx_dsp/arm/highbd_quantize_neon.c
+++ b/vpx_dsp/arm/highbd_quantize_neon.c
@@ -13,7 +13,6 @@
  #include "./vpx_config.h"
  #include "./vpx_dsp_rtcd.h"
  #include "vpx_dsp/arm/mem_neon.h"
-#include "vp9/encoder/vp9_block.h"
  
  static VPX_FORCE_INLINE void highbd_calculate_dqcoeff_and_store(
      const int32x4_t dqcoeff_0, const int32x4_t dqcoeff_1,
@@ -225,9 +224,11 @@ static VPX_FORCE_INLINE int16x8_t highbd_quantize_b_32x32_neon(
  }
  
  void vpx_highbd_quantize_b_32x32_neon(
-    const tran_low_t *coeff_ptr, const struct macroblock_plane *const mb_plane,
-    tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
-    uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan) {
+    const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
+    const int16_t *round_ptr, const int16_t *quant_ptr,
+    const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
+    tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
+    const int16_t *scan, const int16_t *iscan) {
    const int16x8_t neg_one = vdupq_n_s16(-1);
    uint16x8_t eob_max;
    int i;
@@ -236,13 +237,12 @@ void vpx_highbd_quantize_b_32x32_neon(
    // High half has identical elements, but we can reconstruct it from the low
    // half by duplicating the 2nd element. So we only need to pass a 4x32-bit
    // vector
-  int32x4_t zbin = vrshrq_n_s32(vmovl_s16(vld1_s16(mb_plane->zbin)), 1);
-  int32x4_t round = vrshrq_n_s32(vmovl_s16(vld1_s16(mb_plane->round)), 1);
+  int32x4_t zbin = vrshrq_n_s32(vmovl_s16(vld1_s16(zbin_ptr)), 1);
+  int32x4_t round = vrshrq_n_s32(vmovl_s16(vld1_s16(round_ptr)), 1);
    // Extend the quant, quant_shift vectors to ones of 32-bit elements
    // scale to high-half, so we can use vqdmulhq_s32
-  int32x4_t quant = vshlq_n_s32(vmovl_s16(vld1_s16(mb_plane->quant)), 15);
-  int32x4_t quant_shift =
-      vshlq_n_s32(vmovl_s16(vld1_s16(mb_plane->quant_shift)), 16);
+  int32x4_t quant = vshlq_n_s32(vmovl_s16(vld1_s16(quant_ptr)), 15);
+  int32x4_t quant_shift = vshlq_n_s32(vmovl_s16(vld1_s16(quant_shift_ptr)), 16);
    int32x4_t dequant = vmovl_s16(vld1_s16(dequant_ptr));
  
    // Process first 8 values which include a dc component.
@@ -300,7 +300,8 @@ void vpx_highbd_quantize_b_32x32_neon(
      vst1_lane_u16(eob_ptr, eob_max_2, 0);
    }
  #endif  // __aarch64__
-  // Need this here, else the compiler complains about mixing declarations and
+  // Need these here, else the compiler complains about mixing declarations and
    // code in C90
+  (void)n_coeffs;
    (void)scan;
  }
diff --git a/vpx_dsp/quantize.c b/vpx_dsp/quantize.c

index c4642812ad1828b159dd69f06cbf9b05c38bd0bb..212db45c88fc1c513d568b00c297f4bd1671fc8a 100644 (file)
--- a/vpx_dsp/quantize.c
+++ b/vpx_dsp/quantize.c
@@ -272,16 +272,14 @@ void vpx_quantize_b_32x32_c(const tran_low_t *coeff_ptr,
  
  #if CONFIG_VP9_HIGHBITDEPTH
  void vpx_highbd_quantize_b_32x32_c(
-    const tran_low_t *coeff_ptr, const struct macroblock_plane *const mb_plane,
-    tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
-    uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan) {
-  const intptr_t n_coeffs = 32 * 32;
-  const int zbins[2] = { ROUND_POWER_OF_TWO(mb_plane->zbin[0], 1),
-                         ROUND_POWER_OF_TWO(mb_plane->zbin[1], 1) };
+    const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
+    const int16_t *round_ptr, const int16_t *quant_ptr,
+    const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
+    tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
+    const int16_t *scan, const int16_t *iscan) {
+  const int zbins[2] = { ROUND_POWER_OF_TWO(zbin_ptr[0], 1),
+                         ROUND_POWER_OF_TWO(zbin_ptr[1], 1) };
    const int nzbins[2] = { zbins[0] * -1, zbins[1] * -1 };
-  const int16_t *round_ptr = mb_plane->round;
-  const int16_t *quant_ptr = mb_plane->quant;
-  const int16_t *quant_shift_ptr = mb_plane->quant_shift;
  
    int idx = 0;
    int idx_arr[1024];
diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl

index c899c467bbe1c095ed482c5d5bba5a8bca6670e0..2752eea5d9ef4e69de4e628b5f74fff7f06b295c 100644 (file)
--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -731,7 +731,7 @@ if (vpx_config("CONFIG_VP9_ENCODER") eq "yes") {
      add_proto qw/void vpx_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
      specialize qw/vpx_highbd_quantize_b neon sse2 avx2/;
  
-    add_proto qw/void vpx_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, const struct macroblock_plane * const mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+    add_proto qw/void vpx_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
      specialize qw/vpx_highbd_quantize_b_32x32 neon sse2 avx2/;
    }  # CONFIG_VP9_HIGHBITDEPTH
  }  # CONFIG_VP9_ENCODER
diff --git a/vpx_dsp/x86/highbd_quantize_intrin_avx2.c b/vpx_dsp/x86/highbd_quantize_intrin_avx2.c

index 6041d7289ae70c99d125de6cd659d27ac5606a5c..8edddd637f0f10632cfb7ef64da02d735e6d7c92 100644 (file)
--- a/vpx_dsp/x86/highbd_quantize_intrin_avx2.c
+++ b/vpx_dsp/x86/highbd_quantize_intrin_avx2.c
@@ -11,7 +11,6 @@
  #include <immintrin.h>
  
  #include "./vpx_dsp_rtcd.h"
-#include "vp9/encoder/vp9_block.h"
  
  static VPX_FORCE_INLINE void init_one_qp(const __m128i *p, __m256i *qp) {
    const __m128i sign = _mm_srai_epi16(*p, 15);
@@ -223,17 +222,17 @@ static VPX_FORCE_INLINE void quantize_b_32x32(
  }
  
  void vpx_highbd_quantize_b_32x32_avx2(
-    const tran_low_t *coeff_ptr, const struct macroblock_plane *const mb_plane,
-    tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
-    uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan) {
+    const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
+    const int16_t *round_ptr, const int16_t *quant_ptr,
+    const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
+    tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
+    const int16_t *scan, const int16_t *iscan) {
    const unsigned int step = 8;
-  intptr_t n_coeffs = 32 * 32;
    __m256i eob = _mm256_setzero_si256();
    __m256i qp[5];
    (void)scan;
  
-  init_qp(mb_plane->zbin, mb_plane->round, mb_plane->quant, dequant_ptr,
-          mb_plane->quant_shift, qp, 1);
+  init_qp(zbin_ptr, round_ptr, quant_ptr, dequant_ptr, quant_shift_ptr, qp, 1);
  
    quantize_b_32x32(qp, coeff_ptr, iscan, qcoeff_ptr, dqcoeff_ptr, &eob);
  
diff --git a/vpx_dsp/x86/highbd_quantize_intrin_sse2.c b/vpx_dsp/x86/highbd_quantize_intrin_sse2.c

index 6a8f42b8a40ccd01d2879037bc5341cb51a02896..ae1981a8345e94e5660490a29bbc0310de4e6907 100644 (file)
--- a/vpx_dsp/x86/highbd_quantize_intrin_sse2.c
+++ b/vpx_dsp/x86/highbd_quantize_intrin_sse2.c
@@ -15,7 +15,6 @@
  #include "vpx_dsp/vpx_dsp_common.h"
  #include "vpx_mem/vpx_mem.h"
  #include "vpx_ports/mem.h"
-#include "vp9/encoder/vp9_block.h"
  
  #if CONFIG_VP9_HIGHBITDEPTH
  void vpx_highbd_quantize_b_sse2(const tran_low_t *coeff_ptr, intptr_t count,
@@ -94,17 +93,18 @@ void vpx_highbd_quantize_b_sse2(const tran_low_t *coeff_ptr, intptr_t count,
  }
  
  void vpx_highbd_quantize_b_32x32_sse2(
-    const tran_low_t *coeff_ptr, const struct macroblock_plane *const mb_plane,
-    tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
-    uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan) {
+    const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
+    const int16_t *round_ptr, const int16_t *quant_ptr,
+    const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
+    tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
+    const int16_t *scan, const int16_t *iscan) {
    __m128i zbins[2];
    __m128i nzbins[2];
    int idx = 0;
    int idx_arr[1024];
    int i, eob = 0;
-  const intptr_t n_coeffs = 32 * 32;
-  const int zbin0_tmp = ROUND_POWER_OF_TWO(mb_plane->zbin[0], 1);
-  const int zbin1_tmp = ROUND_POWER_OF_TWO(mb_plane->zbin[1], 1);
+  const int zbin0_tmp = ROUND_POWER_OF_TWO(zbin_ptr[0], 1);
+  const int zbin1_tmp = ROUND_POWER_OF_TWO(zbin_ptr[1], 1);
    (void)scan;
  
    zbins[0] = _mm_set_epi32(zbin1_tmp, zbin1_tmp, zbin1_tmp, zbin0_tmp);
@@ -140,11 +140,10 @@ void vpx_highbd_quantize_b_32x32_sse2(
      const int coeff = coeff_ptr[rc];
      const int coeff_sign = (coeff >> 31);
      const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
-    const int64_t tmp1 =
-        abs_coeff + ROUND_POWER_OF_TWO(mb_plane->round[rc != 0], 1);
-    const int64_t tmp2 = ((tmp1 * mb_plane->quant[rc != 0]) >> 16) + tmp1;
+    const int64_t tmp1 = abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1);
+    const int64_t tmp2 = ((tmp1 * quant_ptr[rc != 0]) >> 16) + tmp1;
      const uint32_t abs_qcoeff =
-        (uint32_t)((tmp2 * mb_plane->quant_shift[rc != 0]) >> 15);
+        (uint32_t)((tmp2 * quant_shift_ptr[rc != 0]) >> 15);
      qcoeff_ptr[rc] = (int)(abs_qcoeff ^ (uint32_t)coeff_sign) - coeff_sign;
      dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2;
      if (abs_qcoeff) eob = iscan[idx_arr[i]] > eob ? iscan[idx_arr[i]] : eob;
author	James Zern <jzern@google.com>
	Wed, 1 Mar 2023 23:53:14 +0000 (15:53 -0800)
committer	James Zern <jzern@google.com>
	Wed, 1 Mar 2023 23:54:48 +0000 (15:54 -0800)
test/vp9_quantize_test.cc		patch \| blob \| history
vp9/encoder/vp9_encodemb.c		patch \| blob \| history
vpx_dsp/arm/highbd_quantize_neon.c		patch \| blob \| history
vpx_dsp/quantize.c		patch \| blob \| history
vpx_dsp/vpx_dsp_rtcd_defs.pl		patch \| blob \| history
vpx_dsp/x86/highbd_quantize_intrin_avx2.c		patch \| blob \| history
vpx_dsp/x86/highbd_quantize_intrin_sse2.c		patch \| blob \| history