Use the fast quantizer for inter mode selection

author Scott LaVarnway <slavarnway@google.com>

Tue, 28 Dec 2010 19:51:46 +0000 (14:51 -0500)

committer Scott LaVarnway <slavarnway@google.com>

Tue, 28 Dec 2010 19:51:46 +0000 (14:51 -0500)
author Scott LaVarnway <slavarnway@google.com>
Tue, 28 Dec 2010 19:51:46 +0000 (14:51 -0500)
committer Scott LaVarnway <slavarnway@google.com>
Tue, 28 Dec 2010 19:51:46 +0000 (14:51 -0500)
diff --git a/vp8/encoder/arm/quantize_arm.c b/vp8/encoder/arm/quantize_arm.c

index 65c616614000eb29d0970c21d9a4376ffc8c5b2c..225feaca635b6e01b5f92f74a71c886153318e32 100644 (file)
--- a/vp8/encoder/arm/quantize_arm.c
+++ b/vp8/encoder/arm/quantize_arm.c
@@ -29,7 +29,7 @@ extern int vp8_fast_quantize_b_neon_func(short *coeff_ptr, short *zbin_ptr, shor
  
  void vp8_fast_quantize_b_neon(BLOCK *b, BLOCKD *d)
  {
-    d->eob = vp8_fast_quantize_b_neon_func(b->coeff, b->zbin, d->qcoeff, d->dqcoeff, d->dequant, vp8_rvsplus1_default_zig_zag1d, b->round, b->quant);
+    d->eob = vp8_fast_quantize_b_neon_func(b->coeff, b->zbin, d->qcoeff, d->dqcoeff, d->dequant, vp8_rvsplus1_default_zig_zag1d, b->round, b->quant_fast);
  }
  
  /*
diff --git a/vp8/encoder/block.h b/vp8/encoder/block.h

index 90b42c35cac19160d9cfee3fb90e3e1e9909fdae..bf94e508b5136b557ed770cf7b9777a2f9726a98 100644 (file)
--- a/vp8/encoder/block.h
+++ b/vp8/encoder/block.h
@@ -33,6 +33,7 @@ typedef struct
  
      // 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries
      short *quant;
+    short *quant_fast;
      short *quant_shift;
      short *zbin;
      short *zrun_zbin_boost;
diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c

index 1528389468fac8b1164d3739b3d24820182c5621..cb7cc65d7c9109ede0dc66c6db1e5dcf05d39c29 100644 (file)
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c
@@ -179,6 +179,7 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
      {
          // dc values
          quant_val = vp8_dc_quant(Q, cpi->common.y1dc_delta_q);
+        cpi->Y1quant_fast[Q][0] = (1 << 16) / quant_val;
          vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + 0,
                             cpi->Y1quant_shift[Q] + 0, quant_val);
          cpi->Y1zbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
@@ -187,6 +188,7 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
          cpi->zrun_zbin_boost_y1[Q][0] = (quant_val * zbin_boost[0]) >> 7;
  
          quant_val = vp8_dc2quant(Q, cpi->common.y2dc_delta_q);
+        cpi->Y2quant_fast[Q][0] = (1 << 16) / quant_val;
          vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y2quant[Q] + 0,
                             cpi->Y2quant_shift[Q] + 0, quant_val);
          cpi->Y2zbin[Q][0] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
@@ -195,6 +197,7 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
          cpi->zrun_zbin_boost_y2[Q][0] = (quant_val * zbin_boost[0]) >> 7;
  
          quant_val = vp8_dc_uv_quant(Q, cpi->common.uvdc_delta_q);
+        cpi->UVquant_fast[Q][0] = (1 << 16) / quant_val;
          vp8cx_invert_quant(cpi->sf.improved_quant, cpi->UVquant[Q] + 0,
                             cpi->UVquant_shift[Q] + 0, quant_val);
          cpi->UVzbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;;
@@ -208,6 +211,7 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
              int rc = vp8_default_zig_zag1d[i];
  
              quant_val = vp8_ac_yquant(Q);
+            cpi->Y1quant_fast[Q][rc] = (1 << 16) / quant_val;
              vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + rc,
                                 cpi->Y1quant_shift[Q] + rc, quant_val);
              cpi->Y1zbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
@@ -216,6 +220,7 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
              cpi->zrun_zbin_boost_y1[Q][i] = (quant_val * zbin_boost[i]) >> 7;
  
              quant_val = vp8_ac2quant(Q, cpi->common.y2ac_delta_q);
+            cpi->Y2quant_fast[Q][rc] = (1 << 16) / quant_val;
              vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y2quant[Q] + rc,
                                 cpi->Y2quant_shift[Q] + rc, quant_val);
              cpi->Y2zbin[Q][rc] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
@@ -224,6 +229,7 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
              cpi->zrun_zbin_boost_y2[Q][i] = (quant_val * zbin_boost[i]) >> 7;
  
              quant_val = vp8_ac_uv_quant(Q, cpi->common.uvac_delta_q);
+            cpi->UVquant_fast[Q][rc] = (1 << 16) / quant_val;
              vp8cx_invert_quant(cpi->sf.improved_quant, cpi->UVquant[Q] + rc,
                                 cpi->UVquant_shift[Q] + rc, quant_val);
              cpi->UVzbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
@@ -325,6 +331,7 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x)
      for (i = 0; i < 16; i++)
      {
          x->block[i].quant = cpi->Y1quant[QIndex];
+        x->block[i].quant_fast = cpi->Y1quant_fast[QIndex];
          x->block[i].quant_shift = cpi->Y1quant_shift[QIndex];
          x->block[i].zbin = cpi->Y1zbin[QIndex];
          x->block[i].round = cpi->Y1round[QIndex];
@@ -339,6 +346,7 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x)
      for (i = 16; i < 24; i++)
      {
          x->block[i].quant = cpi->UVquant[QIndex];
+        x->block[i].quant_fast = cpi->UVquant_fast[QIndex];
          x->block[i].quant_shift = cpi->UVquant_shift[QIndex];
          x->block[i].zbin = cpi->UVzbin[QIndex];
          x->block[i].round = cpi->UVround[QIndex];
@@ -349,6 +357,7 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x)
  
      // Y2
      zbin_extra = (cpi->common.Y2dequant[QIndex][1] * ((cpi->zbin_over_quant / 2) + cpi->zbin_mode_boost)) >> 7;
+    x->block[24].quant_fast = cpi->Y2quant_fast[QIndex];
      x->block[24].quant = cpi->Y2quant[QIndex];
      x->block[24].quant_shift = cpi->Y2quant_shift[QIndex];
      x->block[24].zbin = cpi->Y2zbin[QIndex];
@@ -1270,7 +1279,18 @@ int vp8cx_encode_inter_macroblock
  
      if (cpi->sf.RD)
      {
+        /* Are we using the fast quantizer for the mode selection? */
+        if(cpi->sf.use_fastquant_for_pick)
+            cpi->mb.quantize_b      = QUANTIZE_INVOKE(&cpi->rtcd.quantize, fastquantb);
+
          inter_error = vp8_rd_pick_inter_mode(cpi, x, recon_yoffset, recon_uvoffset, &rate, &distortion, &intra_error);
+
+        /* switch back to the regular quantizer for the encode */
+        if (cpi->sf.improved_quant)
+        {
+            cpi->mb.quantize_b    = QUANTIZE_INVOKE(&cpi->rtcd.quantize, quantb);
+        }
+
      }
      else
  #endif
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c

index 1f890790c3f2772b531a8a5db83322d82480036f..05a1338dca084666ac5be65c8cce0f7f5f43ad59 100644 (file)
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -591,6 +591,7 @@ void vp8_set_speed_features(VP8_COMP *cpi)
      sf->max_fs_radius = 32;
      sf->iterative_sub_pixel = 1;
      sf->optimize_coefficients = 1;
+    sf->use_fastquant_for_pick = 0;
  
      sf->first_step = 0;
      sf->max_step_search_steps = MAX_MVSEARCH_STEPS;
@@ -758,7 +759,7 @@ void vp8_set_speed_features(VP8_COMP *cpi)
  
              cpi->mode_check_freq[THR_SPLITG] = 4;
              cpi->mode_check_freq[THR_SPLITA] = 4;
-            cpi->mode_check_freq[THR_SPLITMV] = 0;
+            cpi->mode_check_freq[THR_SPLITMV] = 2;
  
              sf->thresh_mult[THR_TM       ] = 1500;
              sf->thresh_mult[THR_V_PRED   ] = 1500;
@@ -789,8 +790,7 @@ void vp8_set_speed_features(VP8_COMP *cpi)
                  sf->thresh_mult[THR_SPLITA   ] = 20000;
              }
  
-            sf->improved_quant = 0;
-            sf->improved_dct = 0;
+            sf->use_fastquant_for_pick = 1;
  
              sf->first_step = 1;
              sf->max_step_search_steps = MAX_MVSEARCH_STEPS;
@@ -798,6 +798,8 @@ void vp8_set_speed_features(VP8_COMP *cpi)
  
          if (Speed > 1)
          {
+            sf->use_fastquant_for_pick = 0;
+
              cpi->mode_check_freq[THR_SPLITG] = 15;
              cpi->mode_check_freq[THR_SPLITA] = 15;
              cpi->mode_check_freq[THR_SPLITMV] = 7;
@@ -831,6 +833,11 @@ void vp8_set_speed_features(VP8_COMP *cpi)
                  sf->thresh_mult[THR_SPLITA   ] = 50000;
              }
  
+            sf->first_step = 1;
+
+            sf->improved_quant = 0;
+            sf->improved_dct = 0;
+
              // Only do recode loop on key frames, golden frames and
              // alt ref frames
              sf->recode_loop = 2;
diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h

index 990ae1d9e427276854edb1f5f51156c44449975d..ab270ca5f4cbffc9b5806482a95ff44b44289330 100644 (file)
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -182,6 +182,8 @@ typedef struct
      int first_step;
      int optimize_coefficients;
  
+    int use_fastquant_for_pick;
+
  } SPEED_FEATURES;
  
  typedef struct
@@ -269,6 +271,9 @@ typedef struct
      DECLARE_ALIGNED(16, short, zrun_zbin_boost_y1[QINDEX_RANGE][16]);
      DECLARE_ALIGNED(16, short, zrun_zbin_boost_y2[QINDEX_RANGE][16]);
      DECLARE_ALIGNED(16, short, zrun_zbin_boost_uv[QINDEX_RANGE][16]);
+    DECLARE_ALIGNED(16, short, Y1quant_fast[QINDEX_RANGE][16]);
+    DECLARE_ALIGNED(16, short, Y2quant_fast[QINDEX_RANGE][16]);
+    DECLARE_ALIGNED(16, short, UVquant_fast[QINDEX_RANGE][16]);
  
  
      MACROBLOCK mb;
diff --git a/vp8/encoder/quantize.c b/vp8/encoder/quantize.c

index a1be6614b4f10c84989b2dcbdae821d220bca658..a67299487649f76b4100d599cde24ad818748377 100644 (file)
--- a/vp8/encoder/quantize.c
+++ b/vp8/encoder/quantize.c
@@ -27,7 +27,7 @@ void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
      short *coeff_ptr       = b->coeff;
      short *zbin_ptr        = b->zbin;
      short *round_ptr       = b->round;
-    short *quant_ptr       = b->quant;
+    short *quant_ptr       = b->quant_fast;
      short *quant_shift_ptr = b->quant_shift;
      short *qcoeff_ptr      = d->qcoeff;
      short *dqcoeff_ptr     = d->dqcoeff;
@@ -74,7 +74,7 @@ void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
      int x, y, z, sz;
      short *coeff_ptr   = b->coeff;
      short *round_ptr   = b->round;
-    short *quant_ptr   = b->quant;
+    short *quant_ptr   = b->quant_fast;
      short *qcoeff_ptr  = d->qcoeff;
      short *dqcoeff_ptr = d->dqcoeff;
      short *dequant_ptr = d->dequant;
diff --git a/vp8/encoder/x86/x86_csystemdependent.c b/vp8/encoder/x86/x86_csystemdependent.c

index d2199a49901547c2dd3ad52cda71358044804550..6e317e2a2fe77faea2716d25d5dd5f4ce60d6ea8 100644 (file)
--- a/vp8/encoder/x86/x86_csystemdependent.c
+++ b/vp8/encoder/x86/x86_csystemdependent.c
@@ -32,7 +32,7 @@ void vp8_fast_quantize_b_mmx(BLOCK *b, BLOCKD *d)
      short *coeff_ptr   = b->coeff;
      short *zbin_ptr    = b->zbin;
      short *round_ptr   = b->round;
-    short *quant_ptr   = b->quant;
+    short *quant_ptr   = b->quant_fast;
      short *qcoeff_ptr  = d->qcoeff;
      short *dqcoeff_ptr = d->dqcoeff;
      short *dequant_ptr = d->dequant;
@@ -90,7 +90,7 @@ void vp8_fast_quantize_b_sse2(BLOCK *b, BLOCKD *d)
      short *scan_mask   = vp8_default_zig_zag_mask;//d->scan_order_mask_ptr;
      short *coeff_ptr   = b->coeff;
      short *round_ptr   = b->round;
-    short *quant_ptr   = b->quant;
+    short *quant_ptr   = b->quant_fast;
      short *qcoeff_ptr  = d->qcoeff;
      short *dqcoeff_ptr = d->dqcoeff;
      short *dequant_ptr = d->dequant;
@@ -183,7 +183,7 @@ void vp8_fast_quantize_b_ssse3(BLOCK *b, BLOCKD *d)
                      d->qcoeff,
                      d->dequant,
                      b->round,
-                    b->quant,
+                    b->quant_fast,
                      d->dqcoeff
                 );
  }
author	Scott LaVarnway <slavarnway@google.com>
	Tue, 28 Dec 2010 19:51:46 +0000 (14:51 -0500)
committer	Scott LaVarnway <slavarnway@google.com>
	Tue, 28 Dec 2010 19:51:46 +0000 (14:51 -0500)
vp8/encoder/arm/quantize_arm.c		patch \| blob \| history
vp8/encoder/block.h		patch \| blob \| history
vp8/encoder/encodeframe.c		patch \| blob \| history
vp8/encoder/onyx_if.c		patch \| blob \| history
vp8/encoder/onyx_int.h		patch \| blob \| history
vp8/encoder/quantize.c		patch \| blob \| history
vp8/encoder/x86/x86_csystemdependent.c		patch \| blob \| history