Re-design quantization process for 32x32 transform block

author Jingning Han <jingning@google.com>

Mon, 7 Jul 2014 19:08:40 +0000 (12:08 -0700)

committer Jingning Han <jingning@google.com>

Tue, 8 Jul 2014 23:55:28 +0000 (16:55 -0700)
author Jingning Han <jingning@google.com>
Mon, 7 Jul 2014 19:08:40 +0000 (12:08 -0700)
committer Jingning Han <jingning@google.com>
Tue, 8 Jul 2014 23:55:28 +0000 (16:55 -0700)
diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl

index f52dccbf4e1df126e8bd365e82bc32f71fc1d270..b182f3fe3854abeb409c1c5aa59231a186ed5b41 100644 (file)
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -717,6 +717,9 @@ specialize qw/vp9_subtract_block/, "$sse2_x86inc";
  add_proto qw/void vp9_quantize_fp/, "const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
  specialize qw/vp9_quantize_fp/, "$ssse3_x86_64";
  
+add_proto qw/void vp9_quantize_fp_32x32/, "const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+specialize qw/vp9_quantize_fp_32x32/, "$ssse3_x86_64";
+
  add_proto qw/void vp9_quantize_b/, "const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
  specialize qw/vp9_quantize_b/, "$ssse3_x86_64";
  
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c

index eb9624dde0f3e71c791d68da0f2621886bccf311..cd0191e0a2aaafa6a084de0b7265436927a3ed6a 100644 (file)
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -320,10 +320,10 @@ void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block,
    switch (tx_size) {
      case TX_32X32:
        fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
-      vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round,
-                           p->quant, p->quant_shift, qcoeff, dqcoeff,
-                           pd->dequant, p->zbin_extra, eob, scan_order->scan,
-                           scan_order->iscan);
+      vp9_quantize_fp_32x32(coeff, 1024, x->skip_block, p->zbin, p->round_fp,
+                            p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
+                            pd->dequant, p->zbin_extra, eob, scan_order->scan,
+                            scan_order->iscan);
        break;
      case TX_16X16:
        vp9_fdct16x16(src_diff, coeff, diff_stride);
diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c

index 4964e0fd048ee8b947ea1e892a0feda13c497c68..370e1ce77569d01e7c281ba25c45afac54542947 100644 (file)
--- a/vp9/encoder/vp9_quantize.c
+++ b/vp9/encoder/vp9_quantize.c
@@ -104,6 +104,49 @@ void vp9_quantize_fp_c(const int16_t *coeff_ptr, intptr_t count,
    *eob_ptr = eob + 1;
  }
  
+// TODO(jingning) Refactor this file and combine functions with similar
+// operations.
+void vp9_quantize_fp_32x32_c(const int16_t *coeff_ptr, intptr_t n_coeffs,
+                             int skip_block,
+                             const int16_t *zbin_ptr, const int16_t *round_ptr,
+                             const int16_t *quant_ptr,
+                             const int16_t *quant_shift_ptr,
+                             int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr,
+                             const int16_t *dequant_ptr,
+                             int zbin_oq_value, uint16_t *eob_ptr,
+                             const int16_t *scan, const int16_t *iscan) {
+  int i, eob = -1;
+  (void)zbin_ptr;
+  (void)quant_shift_ptr;
+  (void)zbin_oq_value;
+  (void)iscan;
+
+  vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(int16_t));
+  vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(int16_t));
+
+  if (!skip_block) {
+    for (i = 0; i < n_coeffs; i++) {
+      const int rc = scan[i];
+      const int coeff = coeff_ptr[rc];
+      const int coeff_sign = (coeff >> 31);
+      int tmp = 0;
+      int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+
+      if (abs_coeff >= (dequant_ptr[rc != 0] >> 2)) {
+        abs_coeff += ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1);
+        abs_coeff = clamp(abs_coeff, INT16_MIN, INT16_MAX);
+        tmp = (abs_coeff * quant_ptr[rc != 0]) >> 15;
+        qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
+        dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2;
+      }
+
+      if (tmp)
+        eob = i;
+    }
+  }
+  *eob_ptr = eob + 1;
+}
+
  void vp9_quantize_b_c(const int16_t *coeff_ptr, intptr_t count,
                        int skip_block,
                        const int16_t *zbin_ptr, const int16_t *round_ptr,
diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c

index 1eac02f99411407e670aced75f0f20c5e935110e..9fe7b34af0fae791ec5e8c8bd2755060039735f0 100644 (file)
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -250,6 +250,7 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
    }
  
    if (speed >= 5) {
+    sf->use_quant_fp = cm->frame_type == KEY_FRAME ? 0 : 1;
      sf->auto_min_max_partition_size = (cm->frame_type == KEY_FRAME) ?
          RELAXED_NEIGHBORING_MIN_MAX : STRICT_NEIGHBORING_MIN_MAX;
      sf->max_partition_size = BLOCK_32X32;
@@ -282,7 +283,6 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
      sf->elevate_newmv_thresh = 2000;
    }
    if (speed >= 7) {
-    sf->use_quant_fp = cm->frame_type == KEY_FRAME ? 0 : 1;
      sf->mv.fullpel_search_step_param = 10;
      sf->lpf_pick = LPF_PICK_MINIMAL_LPF;
      sf->encode_breakout_thresh = (MIN(cm->width, cm->height) >= 720) ?
diff --git a/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm b/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm

index 2d9f2b056475005084606eea3091294b8580c680..508e1d4f55afda8f707a119ee0929e053d940dbc 100644 (file)
--- a/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm
+++ b/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm
@@ -234,21 +234,18 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
    movifnidn                   quantq, quantmp
    mova                            m1, [roundq]             ; m1 = round
    mova                            m2, [quantq]             ; m2 = quant
-%ifidn %1, b_32x32
-; TODO(jingning) to be continued with 32x32 quantization process
+%ifidn %1, fp_32x32
    pcmpeqw                         m5, m5
    psrlw                           m5, 15
-  paddw                           m0, m5
    paddw                           m1, m5
-  psrlw                           m0, 1                    ; m0 = (m0 + 1) / 2
    psrlw                           m1, 1                    ; m1 = (m1 + 1) / 2
  %endif
    mova                            m3, [r2q]                ; m3 = dequant
    mov                             r3, qcoeffmp
    mov                             r4, dqcoeffmp
    mov                             r5, iscanmp
-%ifidn %1, b_32x32
-  psllw                           m4, 1
+%ifidn %1, fp_32x32
+  psllw                           m2, 1
  %endif
    pxor                            m5, m5                   ; m5 = dedicated zero
    DEFINE_ARGS coeff, ncoeff, d1, qcoeff, dqcoeff, iscan, d2, d3, d4, d5, d6, eob
@@ -275,18 +272,19 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
    psignw                         m13, m10                  ; m13 = reinsert sign
    mova        [qcoeffq+ncoeffq*2+ 0], m8
    mova        [qcoeffq+ncoeffq*2+16], m13
-%ifidn %1, b_32x32
+%ifidn %1, fp_32x32
    pabsw                           m8, m8
    pabsw                          m13, m13
  %endif
    pmullw                          m8, m3                   ; dqc[i] = qc[i] * q
    punpckhqdq                      m3, m3
    pmullw                         m13, m3                   ; dqc[i] = qc[i] * q
-%ifidn %1, b_32x32
+%ifidn %1, fp_32x32
    psrlw                           m8, 1
    psrlw                          m13, 1
    psignw                          m8, m9
    psignw                         m13, m10
+  psrlw                           m0, m3, 2
  %endif
    mova       [dqcoeffq+ncoeffq*2+ 0], m8
    mova       [dqcoeffq+ncoeffq*2+16], m13
@@ -307,13 +305,17 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
    mova                           m10, [  coeffq+ncoeffq*2+16] ; m10 = c[i]
    pabsw                           m6, m9                   ; m6 = abs(m9)
    pabsw                          m11, m10                  ; m11 = abs(m10)
-  pcmpeqw                         m7, m7
-%ifidn %1, b_32x32
+%ifidn %1, fp_32x32
+  pcmpgtw                         m7, m6,  m0
+  pcmpgtw                        m12, m11, m0
    pmovmskb                        r6, m7
-  pmovmskb                        r2, m7
+  pmovmskb                        r2, m12
+
    or                              r6, r2
    jz .skip_iter
  %endif
+  pcmpeqw                         m7, m7
+
    paddsw                          m6, m1                   ; m6 += round
    paddsw                         m11, m1                   ; m11 += round
    pmulhw                         m14, m6, m2               ; m14 = m6*q>>16
@@ -322,13 +324,13 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
    psignw                         m13, m10                  ; m13 = reinsert sign
    mova        [qcoeffq+ncoeffq*2+ 0], m14
    mova        [qcoeffq+ncoeffq*2+16], m13
-%ifidn %1, b_32x32
+%ifidn %1, fp_32x32
    pabsw                          m14, m14
    pabsw                          m13, m13
  %endif
    pmullw                         m14, m3                   ; dqc[i] = qc[i] * q
    pmullw                         m13, m3                   ; dqc[i] = qc[i] * q
-%ifidn %1, b_32x32
+%ifidn %1, fp_32x32
    psrlw                          m14, 1
    psrlw                          m13, 1
    psignw                         m14, m9
@@ -349,7 +351,7 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
    add                        ncoeffq, mmsize
    jl .ac_only_loop
  
-%ifidn %1, b_32x32
+%ifidn %1, fp_32x32
    jmp .accumulate_eob
  .skip_iter:
    mova        [qcoeffq+ncoeffq*2+ 0], m5
@@ -397,3 +399,4 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
  
  INIT_XMM ssse3
  QUANTIZE_FP fp, 7
+QUANTIZE_FP fp_32x32, 7
author	Jingning Han <jingning@google.com>
	Mon, 7 Jul 2014 19:08:40 +0000 (12:08 -0700)
committer	Jingning Han <jingning@google.com>
	Tue, 8 Jul 2014 23:55:28 +0000 (16:55 -0700)
vp9/common/vp9_rtcd_defs.pl		patch \| blob \| history
vp9/encoder/vp9_encodemb.c		patch \| blob \| history
vp9/encoder/vp9_quantize.c		patch \| blob \| history
vp9/encoder/vp9_speed_features.c		patch \| blob \| history
vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm		patch \| blob \| history