Move new quant experiment from nextgen

author Sarah Parker <sarahparker@google.com>

Tue, 10 May 2016 22:32:42 +0000 (15:32 -0700)

committer Sarah Parker <sarahparker@google.com>

Fri, 10 Jun 2016 15:06:22 +0000 (08:06 -0700)
author Sarah Parker <sarahparker@google.com>
Tue, 10 May 2016 22:32:42 +0000 (15:32 -0700)
committer Sarah Parker <sarahparker@google.com>
Fri, 10 Jun 2016 15:06:22 +0000 (08:06 -0700)
diff --git a/vp10/common/blockd.h b/vp10/common/blockd.h

index 87e5d1c278f39f0cc71abeac870cf7ec34ab41aa..5391d125d75666497f3d226f9feedea9f8536e76 100644 (file)
--- a/vp10/common/blockd.h
+++ b/vp10/common/blockd.h
@@ -19,6 +19,7 @@
  #include "vpx_scale/yv12config.h"
  
  #include "vp10/common/common_data.h"
+#include "vp10/common/quant_common.h"
  #include "vp10/common/entropy.h"
  #include "vp10/common/entropymode.h"
  #include "vp10/common/mv.h"
@@ -215,6 +216,10 @@ typedef struct {
  #if CONFIG_EXT_PARTITION_TYPES
    PARTITION_TYPE partition;
  #endif
+#if CONFIG_NEW_QUANT
+  int dq_off_index;
+  int send_dq_bit;
+#endif  // CONFIG_NEW_QUANT
  } MB_MODE_INFO;
  
  typedef struct MODE_INFO {
@@ -261,6 +266,9 @@ typedef struct macroblockd_plane {
    ENTROPY_CONTEXT *above_context;
    ENTROPY_CONTEXT *left_context;
    int16_t seg_dequant[MAX_SEGMENTS][2];
+#if CONFIG_NEW_QUANT
+  dequant_val_type_nuq seg_dequant_nuq[MAX_SEGMENTS][COEF_BANDS];
+#endif
    uint8_t *color_index_map;
  
    // number of 4x4s in current block
@@ -270,6 +278,9 @@ typedef struct macroblockd_plane {
  
    // encoder
    const int16_t *dequant;
+#if CONFIG_NEW_QUANT
+  const dequant_val_type_nuq* dequant_val_nuq;
+#endif  // CONFIG_NEW_QUANT
  } MACROBLOCKD_PLANE;
  
  #define BLOCK_OFFSET(x, i) ((x) + (i) * 16)
diff --git a/vp10/common/onyxc_int.h b/vp10/common/onyxc_int.h

index cbfa8b6278abba9f2e79f42001f913b943cade82..3ea4f3aec67a03b73c1748322a7abb254b057d23 100644 (file)
--- a/vp10/common/onyxc_int.h
+++ b/vp10/common/onyxc_int.h
@@ -218,6 +218,10 @@ typedef struct VP10Common {
    int uv_ac_delta_q;
    int16_t y_dequant[MAX_SEGMENTS][2];
    int16_t uv_dequant[MAX_SEGMENTS][2];
+#if CONFIG_NEW_QUANT
+  dequant_val_type_nuq y_dequant_nuq[MAX_SEGMENTS][COEF_BANDS];
+  dequant_val_type_nuq uv_dequant_nuq[MAX_SEGMENTS][COEF_BANDS];
+#endif
  
    /* We allocate a MODE_INFO struct for each macroblock, together with
       an extra row on top and column on the left to simplify prediction. */
@@ -429,14 +433,21 @@ static INLINE int frame_is_intra_only(const VP10_COMMON *const cm) {
  static INLINE void vp10_init_macroblockd(VP10_COMMON *cm, MACROBLOCKD *xd,
                                          tran_low_t *dqcoeff) {
    int i;
-
    for (i = 0; i < MAX_MB_PLANE; ++i) {
      xd->plane[i].dqcoeff = dqcoeff;
      xd->above_context[i] = cm->above_context[i];
      if (xd->plane[i].plane_type == PLANE_TYPE_Y) {
        memcpy(xd->plane[i].seg_dequant, cm->y_dequant, sizeof(cm->y_dequant));
+#if CONFIG_NEW_QUANT
+      memcpy(xd->plane[i].seg_dequant_nuq, cm->y_dequant_nuq,
+             sizeof(cm->y_dequant_nuq));
+#endif
      } else {
        memcpy(xd->plane[i].seg_dequant, cm->uv_dequant, sizeof(cm->uv_dequant));
+#if CONFIG_NEW_QUANT
+      memcpy(xd->plane[i].seg_dequant_nuq, cm->uv_dequant_nuq,
+             sizeof(cm->uv_dequant_nuq));
+#endif
      }
      xd->fc = cm->fc;
    }
diff --git a/vp10/common/quant_common.c b/vp10/common/quant_common.c

index b1fb34dc29c703fa6d912846ecc17a37a3f5e274..f5886be847d03b0e03df790e837b275e2650609e 100644 (file)
--- a/vp10/common/quant_common.c
+++ b/vp10/common/quant_common.c
@@ -34,18 +34,18 @@ static const uint8_t nuq_knots_lossless[COEF_BANDS][NUQ_KNOTS] = {
  
  // TODO(sarahparker) add multiple quantization profiles
  static const uint8_t nuq_knots[COEF_BANDS][NUQ_KNOTS] = {
-    {86, 122, 134},  // dc, band 0
+    {91, 133, 139},  // dc, band 0
      {78, 122, 134},  // band 1
-    {78, 122, 134},  // band 2
-    {84, 122, 133},  // band 3
-    {88, 122, 134},  // band 4
-    {88, 122, 134},  // band 5
+    {83, 127, 139},  // band 2
+    {84, 117, 128},  // band 3
+    {88, 117, 129},  // band 4
+    {93, 122, 134},  // band 5
  };
  
  // dequantization offsets
  static const uint8_t nuq_doff_lossless[COEF_BANDS] = {0, 0, 0, 0, 0, 0};
  
-static const uint8_t nuq_doff[COEF_BANDS] = {8, 15, 16, 22, 23, 24};
+static const uint8_t nuq_doff[COEF_BANDS] = {11, 12, 22, 18, 20, 21};
  
  static const uint8_t *get_nuq_knots(int lossless, int band) {
    if (lossless)
diff --git a/vp10/common/quant_common.h b/vp10/common/quant_common.h

index 5be07931d482072c7788619b33c1388e5e3af27f..ebb82e8db7c26accde2b3933e8e316e4e6cb308e 100644 (file)
--- a/vp10/common/quant_common.h
+++ b/vp10/common/quant_common.h
@@ -31,6 +31,8 @@ int vp10_get_qindex(const struct segmentation *seg, int segment_id,
  
  #if CONFIG_NEW_QUANT
  #define NUQ_KNOTS 3
+typedef tran_low_t dequant_val_type_nuq[NUQ_KNOTS + 1];
+typedef tran_low_t cuml_bins_type_nuq[NUQ_KNOTS];
  void get_dequant_val_nuq(int q, int lossless, int band,
                           tran_low_t *dq, tran_low_t *cumbins);
  tran_low_t dequant_abscoeff_nuq(int v, int q, const tran_low_t *dq);
diff --git a/vp10/common/vp10_rtcd_defs.pl b/vp10/common/vp10_rtcd_defs.pl

index 1e93f7d2115a9315b877c970eac45e28c3d821d1..b30953d4612fdf0cf8f441043bb1eeefa198eea4 100644 (file)
--- a/vp10/common/vp10_rtcd_defs.pl
+++ b/vp10/common/vp10_rtcd_defs.pl
@@ -7,6 +7,7 @@ print <<EOF
  #include "vpx/vpx_integer.h"
  #include "vp10/common/common.h"
  #include "vp10/common/enums.h"
+#include "vp10/common/quant_common.h"
  #include "vp10/common/vp10_txfm.h"
  
  struct macroblockd;
@@ -286,6 +287,20 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
    }
  }
  
+if (vpx_config("CONFIG_NEW_QUANT") eq "yes") {
+  add_proto qw/void quantize_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, const int16_t *dequant_ptr, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
+  specialize qw/quantize_nuq/;
+
+  add_proto qw/void quantize_fp_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *dequant_ptr, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
+  specialize qw/quantize_fp_nuq/;
+
+  add_proto qw/void quantize_32x32_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, const int16_t *dequant_ptr, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
+  specialize qw/quantize_32x32_nuq/;
+
+  add_proto qw/void quantize_32x32_fp_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *dequant_ptr, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
+  specialize qw/quantize_32x32_fp_nuq/;
+}
+
  # High bitdepth functions
  if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
    #
@@ -658,6 +673,19 @@ specialize qw/vp10_temporal_filter_apply sse2 msa/;
  if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
  
    # ENCODEMB INVOKE
+  if (vpx_config("CONFIG_NEW_QUANT") eq "yes") {
+    add_proto qw/void highbd_quantize_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, const int16_t *dequant_ptr, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
+    specialize qw/highbd_quantize_nuq/;
+
+    add_proto qw/void highbd_quantize_fp_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *dequant_ptr, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
+    specialize qw/highbd_quantize_fp_nuq/;
+
+    add_proto qw/void highbd_quantize_32x32_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, const int16_t *dequant_ptr, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
+    specialize qw/highbd_quantize_32x32_nuq/;
+
+    add_proto qw/void highbd_quantize_32x32_fp_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *dequant_ptr, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
+    specialize qw/highbd_quantize_32x32_fp_nuq/;
+  }
  
    add_proto qw/int64_t vp10_highbd_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz, int bd";
    specialize qw/vp10_highbd_block_error sse2/;
diff --git a/vp10/decoder/decodeframe.c b/vp10/decoder/decodeframe.c

index 83d08b164611345c090d3c3ed91fdd27d34118fb..199891a54f744393678a60784754e28204c293c9 100644 (file)
--- a/vp10/decoder/decodeframe.c
+++ b/vp10/decoder/decodeframe.c
@@ -2046,6 +2046,9 @@ static void setup_quantization(VP10_COMMON *const cm,
  
  static void setup_segmentation_dequant(VP10_COMMON *const cm) {
    // Build y/uv dequant values based on segmentation.
+#if CONFIG_NEW_QUANT
+  int b;
+#endif
    if (cm->seg.enabled) {
      int i;
      for (i = 0; i < MAX_SEGMENTS; ++i) {
@@ -2057,6 +2060,16 @@ static void setup_segmentation_dequant(VP10_COMMON *const cm) {
                                            cm->bit_depth);
        cm->uv_dequant[i][1] = vp10_ac_quant(qindex, cm->uv_ac_delta_q,
                                            cm->bit_depth);
+#if CONFIG_NEW_QUANT
+      for (b = 0; b < COEF_BANDS; ++b) {
+        get_dequant_val_nuq(
+            cm->y_dequant[i][b != 0], qindex == 0, b,
+            cm->y_dequant_nuq[i][b], NULL);
+        get_dequant_val_nuq(
+            cm->uv_dequant[i][b != 0], qindex == 0, b,
+            cm->uv_dequant_nuq[i][b], NULL);
+      }
+#endif
      }
    } else {
      const int qindex = cm->base_qindex;
@@ -2068,6 +2081,16 @@ static void setup_segmentation_dequant(VP10_COMMON *const cm) {
                                          cm->bit_depth);
      cm->uv_dequant[0][1] = vp10_ac_quant(qindex, cm->uv_ac_delta_q,
                                          cm->bit_depth);
+#if CONFIG_NEW_QUANT
+    for (b = 0; b < COEF_BANDS; ++b) {
+      get_dequant_val_nuq(
+          cm->y_dequant[0][b != 0], qindex == 0, b,
+          cm->y_dequant_nuq[0][b], NULL);
+      get_dequant_val_nuq(
+          cm->uv_dequant[0][b != 0], qindex == 0, b,
+          cm->uv_dequant_nuq[0][b], NULL);
+    }
+#endif
    }
  }
  
@@ -3057,7 +3080,6 @@ static size_t read_uncompressed_header(VP10Decoder *pbi,
    RefCntBuffer *const frame_bufs = pool->frame_bufs;
    int i, mask, ref_index = 0;
    size_t sz;
-
  #if CONFIG_EXT_REFS
    cm->last3_frame_type = cm->last2_frame_type;
    cm->last2_frame_type = cm->last_frame_type;
diff --git a/vp10/decoder/detokenize.c b/vp10/decoder/detokenize.c

index 953af567c98e42285f7e7a5e80339061ecb3b311..000b30b36eaaa731e20bf7bf7d27c411acc2ac32 100644 (file)
--- a/vp10/decoder/detokenize.c
+++ b/vp10/decoder/detokenize.c
@@ -49,6 +49,9 @@ static int decode_coefs(const MACROBLOCKD *xd,
                          PLANE_TYPE type,
                          tran_low_t *dqcoeff, TX_SIZE tx_size, TX_TYPE tx_type,
                          const int16_t *dq,
+#if CONFIG_NEW_QUANT
+                        dequant_val_type_nuq *dq_val,
+#endif  // CONFIG_NEW_QUANT
                          int ctx, const int16_t *scan, const int16_t *nb,
                          vp10_reader *r) {
    FRAME_COUNTS *counts = xd->counts;
@@ -66,6 +69,9 @@ static int decode_coefs(const MACROBLOCKD *xd,
    int dq_shift;
    int v, token;
    int16_t dqv = dq[0];
+#if CONFIG_NEW_QUANT
+  const tran_low_t *dqv_val = &dq_val[0][0];
+#endif  // CONFIG_NEW_QUANT
    const uint8_t *cat1_prob;
    const uint8_t *cat2_prob;
    const uint8_t *cat3_prob;
@@ -125,6 +131,10 @@ static int decode_coefs(const MACROBLOCKD *xd,
        break;
      }
  
+#if CONFIG_NEW_QUANT
+    dqv_val = &dq_val[band][0];
+#endif  // CONFIG_NEW_QUANT
+
      while (!vp10_read(r, prob[ZERO_CONTEXT_NODE])) {
        INCREMENT_COUNT(ZERO_TOKEN);
        dqv = dq[1];
@@ -135,6 +145,9 @@ static int decode_coefs(const MACROBLOCKD *xd,
        ctx = get_coef_context(nb, token_cache, c);
        band = *band_translate++;
        prob = coef_probs[band][ctx];
+#if CONFIG_NEW_QUANT
+      dqv_val = &dq_val[band][0];
+#endif  // CONFIG_NEW_QUANT
      }
  
      if (!vp10_read(r, prob[ONE_CONTEXT_NODE])) {
@@ -191,7 +204,13 @@ static int decode_coefs(const MACROBLOCKD *xd,
          }
        }
      }
+#if CONFIG_NEW_QUANT
+    v = dequant_abscoeff_nuq(val, dqv, dqv_val);
+    v = dq_shift ? ROUND_POWER_OF_TWO(v, dq_shift) : v;
+#else
      v = (val * dqv) >> dq_shift;
+#endif  // CONFIG_NEW_QUANT
+
  #if CONFIG_COEFFICIENT_RANGE_CHECKING
  #if CONFIG_VP9_HIGHBITDEPTH
      dqcoeff[scan[c]] = highbd_check_range((vp10_read_bit(r) ? -v : v),
@@ -224,6 +243,9 @@ static int decode_coefs_ans(const MACROBLOCKD *const xd,
                              tran_low_t *dqcoeff, TX_SIZE tx_size,
                              TX_TYPE tx_type,
                              const int16_t *dq,
+#if CONFIG_NEW_QUANT
+                            dequant_val_type_nuq *dq_val,
+#endif  // CONFIG_NEW_QUANT
                              int ctx, const int16_t *scan, const int16_t *nb,
                              struct AnsDecoder *const ans) {
    FRAME_COUNTS *counts = xd->counts;
@@ -245,6 +267,9 @@ static int decode_coefs_ans(const MACROBLOCKD *const xd,
    int dq_shift;
    int v, token;
    int16_t dqv = dq[0];
+#if CONFIG_NEW_QUANT
+  const tran_low_t *dqv_val = &dq_val[0][0];
+#endif  // CONFIG_NEW_QUANT
    const uint8_t *cat1_prob;
    const uint8_t *cat2_prob;
    const uint8_t *cat3_prob;
@@ -306,6 +331,10 @@ static int decode_coefs_ans(const MACROBLOCKD *const xd,
        }
      }
  
+#if CONFIG_NEW_QUANT
+    dqv_val = &dq_val[band][0];
+#endif  // CONFIG_NEW_QUANT
+
      cdf = &coef_cdfs[band][ctx];
      token = ZERO_TOKEN + rans_read(ans, *cdf);
      if (token == ZERO_TOKEN) {
@@ -359,7 +388,13 @@ static int decode_coefs_ans(const MACROBLOCKD *const xd,
  #endif
          } break;
        }
-      v = (val * dqv) >> dq_shift;
+#if CONFIG_NEW_QUANT
+    v = dequant_abscoeff_nuq(val, dqv, dqv_val);
+    v = dq_shift ? ROUND_POWER_OF_TWO(v, dq_shift) : v;
+#else
+    v = (val * dqv) >> dq_shift;
+#endif  // CONFIG_NEW_QUANT
+
  #if CONFIG_COEFFICIENT_RANGE_CHECKING
  #if CONFIG_VP9_HIGHBITDEPTH
        dqcoeff[scan[c]] =
@@ -474,11 +509,19 @@ int vp10_decode_block_tokens(MACROBLOCKD *const xd,
  #if !CONFIG_ANS
    const int eob = decode_coefs(xd, pd->plane_type,
                                 pd->dqcoeff, tx_size, tx_type,
-                               dequant, ctx, sc->scan, sc->neighbors, r);
+                               dequant,
+#if CONFIG_NEW_QUANT
+                               pd->seg_dequant_nuq[0],
+#endif  // CONFIG_NEW_QUANT
+                               ctx, sc->scan, sc->neighbors, r);
  #else
    const int eob = decode_coefs_ans(xd, pd->plane_type,
                                     pd->dqcoeff, tx_size, tx_type,
-                                   dequant, ctx, sc->scan, sc->neighbors, r);
+                                   dequant,
+#if CONFIG_NEW_QUANT
+                                   pd->seg_dequant_nuq[0],
+#endif  // CONFIG_NEW_QUANT
+                                   ctx, sc->scan, sc->neighbors, r);
  #endif  // !CONFIG_ANS
    dec_set_contexts(xd, pd, tx_size, eob > 0, x, y);
    return eob;
diff --git a/vp10/encoder/block.h b/vp10/encoder/block.h

index d4adf0de566039e90079f881fed17238153e0171..6606e59cbf50cfddf071bdd2ca01d7827d3241aa 100644 (file)
--- a/vp10/encoder/block.h
+++ b/vp10/encoder/block.h
@@ -41,6 +41,9 @@ typedef struct macroblock_plane {
    int16_t *quant_shift;
    int16_t *zbin;
    int16_t *round;
+#if CONFIG_NEW_QUANT
+  cuml_bins_type_nuq *cuml_bins_nuq;
+#endif  // CONFIG_NEW_QUANT
  
    int64_t quant_thred[2];
  } MACROBLOCK_PLANE;
diff --git a/vp10/encoder/encodemb.c b/vp10/encoder/encodemb.c

index b9412cc5a38081cda32930893c1923d808f26336..dfb72ea94e9822913f413403f6e203ab5f17ea59 100644 (file)
--- a/vp10/encoder/encodemb.c
+++ b/vp10/encoder/encodemb.c
@@ -107,6 +107,9 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block,
    const int default_eob = 16 << (tx_size << 1);
    int mul;
    const int16_t *dequant_ptr = pd->dequant;
+#if CONFIG_NEW_QUANT
+  const dequant_val_type_nuq *dequant_val = pd->dequant_val_nuq;
+#endif  // CONFIG_NEW_QUANT
    const uint8_t *const band_translate = get_band_translate(tx_size);
    TX_TYPE tx_type = get_tx_type(type, xd, block, tx_size);
    const scan_order *const so =
@@ -121,6 +124,7 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block,
    int16_t t0, t1;
    EXTRABIT e0;
    int best, band, pt, i, final_eob;
+  int shift = get_tx_scale(xd, tx_type, tx_size);
  #if CONFIG_VP9_HIGHBITDEPTH
    const int *cat6_high_cost = vp10_get_high_cost_table(xd->bd);
  #else
@@ -129,7 +133,7 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block,
  
    assert((!type && !plane) || (type && plane));
    assert(eob <= default_eob);
-  mul = 1 << get_tx_scale(xd, tx_type, tx_size);
+  mul = 1 << shift;
  
    /* Now set up a Viterbi trellis to evaluate alternative roundings. */
    /* Initialize the sentinel node of the trellis. */
@@ -188,12 +192,23 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block,
        rate0 = tokens[next][0].rate;
        rate1 = tokens[next][1].rate;
  
+#if CONFIG_NEW_QUANT
+      shortcut = (
+          (dequant_abscoeff_nuq(
+              abs(x), dequant_ptr[rc != 0],
+              dequant_val[band_translate[i]]) > abs(coeff[rc]) * mul) &&
+          (dequant_abscoeff_nuq(
+              abs(x) - 1, dequant_ptr[rc != 0],
+              dequant_val[band_translate[i]]) < abs(coeff[rc]) * mul));
+#else   // CONFIG_NEW_QUANT
+
        if ((abs(x) * dequant_ptr[rc != 0] > abs(coeff[rc]) * mul) &&
            (abs(x) * dequant_ptr[rc != 0] < abs(coeff[rc]) * mul +
                                                 dequant_ptr[rc != 0]))
          shortcut = 1;
        else
          shortcut = 0;
+#endif   // CONFIG_NEW_QUANT
  
        if (shortcut) {
          sz = -(x < 0);
@@ -232,6 +247,16 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block,
        base_bits = vp10_get_cost(t0, e0, cat6_high_cost);
  
        if (shortcut) {
+#if CONFIG_NEW_QUANT
+        dx = dequant_coeff_nuq(
+            x, dequant_ptr[rc != 0],
+            dequant_val[band_translate[i]]) - coeff[rc] * mul;
+#if CONFIG_VP9_HIGHBITDEPTH
+        if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+          dx >>= xd->bd - 8;
+        }
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+#else   // CONFIG_NEW_QUANT
  #if CONFIG_VP9_HIGHBITDEPTH
          if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
            dx -= ((dequant_ptr[rc != 0] >> (xd->bd - 8)) + sz) ^ sz;
@@ -241,6 +266,7 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block,
  #else
          dx -= (dequant_ptr[rc != 0] + sz) ^ sz;
  #endif  // CONFIG_VP9_HIGHBITDEPTH
+#endif  // CONFIG_NEW_QUANT
          d2 = dx * dx;
        }
        tokens[i][1].rate = base_bits + (best ? rate1 : rate0);
@@ -295,9 +321,15 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block,
      if (x) {
        final_eob = i;
      }
-
      qcoeff[rc] = x;
+#if CONFIG_NEW_QUANT
+    dqcoeff[rc] = dequant_abscoeff_nuq(abs(x), dequant_ptr[rc != 0],
+                                       dequant_val[band_translate[i]]);
+    if (shift) dqcoeff[rc] = ROUND_POWER_OF_TWO(dqcoeff[rc], shift);
+    if (x < 0) dqcoeff[rc] = -dqcoeff[rc];
+#else
      dqcoeff[rc] = (x * dequant_ptr[rc != 0]) / mul;
+#endif  // CONFIG_NEW_QUANT
  
      next = tokens[i][best].next;
      best = best_index[i][best];
@@ -401,6 +433,469 @@ void vp10_xform_quant(MACROBLOCK *x, int plane, int block, int blk_row,
    }
  }
  
+#if CONFIG_NEW_QUANT
+void vp10_xform_quant_nuq(MACROBLOCK *x, int plane, int block, int blk_row,
+                          int blk_col, BLOCK_SIZE plane_bsize,
+                          TX_SIZE tx_size) {
+  MACROBLOCKD *const xd = &x->e_mbd;
+  const struct macroblock_plane *const p = &x->plane[plane];
+  const struct macroblockd_plane *const pd = &xd->plane[plane];
+  PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
+  TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
+  const scan_order *const scan_order =
+      get_scan(tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi));
+  tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
+  tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
+  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
+  uint16_t *const eob = &p->eobs[block];
+  const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
+  const int16_t *src_diff;
+  const uint8_t* band = get_band_translate(tx_size);
+
+  FWD_TXFM_PARAM fwd_txfm_param;
+
+  fwd_txfm_param.tx_type = tx_type;
+  fwd_txfm_param.tx_size = tx_size;
+  fwd_txfm_param.fwd_txfm_opt = fwd_txfm_opt_list[VP10_XFORM_QUANT_B];
+  fwd_txfm_param.rd_transform = x->use_lp32x32fdct;
+  fwd_txfm_param.lossless = xd->lossless[xd->mi[0]->mbmi.segment_id];
+
+  src_diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)];
+
+// TODO(sarahparker) add all of these new quant quantize functions
+// to quant_func_list, just trying to get this expr to work for now
+#if CONFIG_VP9_HIGHBITDEPTH
+  fwd_txfm_param.bd = xd->bd;
+  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+    highbd_fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param);
+    switch (tx_size) {
+      case TX_32X32:
+        highbd_quantize_32x32_nuq(coeff, 1024, x->skip_block,
+                                  p->quant, p->quant_shift, pd->dequant,
+                                  (const cuml_bins_type_nuq *)
+                                      p->cuml_bins_nuq,
+                                  (const dequant_val_type_nuq *)
+                                      pd->dequant_val_nuq,
+                                  qcoeff, dqcoeff, eob,
+                                  scan_order->scan, band);
+        break;
+      case TX_16X16:
+        highbd_quantize_nuq(coeff, 256, x->skip_block,
+                            p->quant, p->quant_shift, pd->dequant,
+                            (const cuml_bins_type_nuq *)p->cuml_bins_nuq,
+                            (const dequant_val_type_nuq *)
+                                pd->dequant_val_nuq,
+                            qcoeff, dqcoeff, eob,
+                            scan_order->scan, band);
+        break;
+      case TX_8X8:
+        highbd_quantize_nuq(coeff, 64, x->skip_block,
+                            p->quant, p->quant_shift, pd->dequant,
+                            (const cuml_bins_type_nuq *)p->cuml_bins_nuq,
+                            (const dequant_val_type_nuq *)
+                                pd->dequant_val_nuq,
+                            qcoeff, dqcoeff, eob,
+                            scan_order->scan, band);
+        break;
+      case TX_4X4:
+        highbd_quantize_nuq(coeff, 16, x->skip_block,
+                            p->quant, p->quant_shift, pd->dequant,
+                            (const cuml_bins_type_nuq *)p->cuml_bins_nuq,
+                            (const dequant_val_type_nuq *)
+                                pd->dequant_val_nuq,
+                            qcoeff, dqcoeff, eob,
+                            scan_order->scan, band);
+        break;
+      default:
+        assert(0);
+    }
+    return;
+  }
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+  fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param);
+  switch (tx_size) {
+    case TX_32X32:
+      quantize_32x32_nuq(coeff, 1024, x->skip_block,
+                         p->quant, p->quant_shift, pd->dequant,
+                         (const cuml_bins_type_nuq *)p->cuml_bins_nuq,
+                         (const dequant_val_type_nuq *)
+                         pd->dequant_val_nuq,
+                         qcoeff, dqcoeff, eob,
+                         scan_order->scan, band);
+      break;
+    case TX_16X16:
+      quantize_nuq(coeff, 256, x->skip_block,
+                   p->quant, p->quant_shift, pd->dequant,
+                   (const cuml_bins_type_nuq *)p->cuml_bins_nuq,
+                   (const dequant_val_type_nuq *)pd->dequant_val_nuq,
+                   qcoeff, dqcoeff, eob,
+                   scan_order->scan, band);
+      break;
+    case TX_8X8:
+      quantize_nuq(coeff, 64, x->skip_block,
+                   p->quant, p->quant_shift, pd->dequant,
+                   (const cuml_bins_type_nuq *)p->cuml_bins_nuq,
+                   (const dequant_val_type_nuq *)pd->dequant_val_nuq,
+                   qcoeff, dqcoeff, eob,
+                   scan_order->scan, band);
+      break;
+    case TX_4X4:
+      quantize_nuq(coeff, 16, x->skip_block,
+                   p->quant, p->quant_shift, pd->dequant,
+                   (const cuml_bins_type_nuq *)p->cuml_bins_nuq,
+                   (const dequant_val_type_nuq *)pd->dequant_val_nuq,
+                   qcoeff, dqcoeff, eob,
+                   scan_order->scan, band);
+      break;
+    default:
+      assert(0);
+      break;
+  }
+}
+
+void vp10_xform_quant_fp_nuq(MACROBLOCK *x, int plane, int block, int blk_row,
+                             int blk_col, BLOCK_SIZE plane_bsize,
+                             TX_SIZE tx_size) {
+  MACROBLOCKD *const xd = &x->e_mbd;
+  const struct macroblock_plane *const p = &x->plane[plane];
+  const struct macroblockd_plane *const pd = &xd->plane[plane];
+  PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
+  TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
+  const scan_order *const scan_order =
+      get_scan(tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi));
+  tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
+  tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
+  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
+  uint16_t *const eob = &p->eobs[block];
+  const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
+  const int16_t *src_diff;
+  const uint8_t* band = get_band_translate(tx_size);
+
+  FWD_TXFM_PARAM fwd_txfm_param;
+
+  fwd_txfm_param.tx_type = tx_type;
+  fwd_txfm_param.tx_size = tx_size;
+  fwd_txfm_param.fwd_txfm_opt = fwd_txfm_opt_list[VP10_XFORM_QUANT_FP];
+  fwd_txfm_param.rd_transform = x->use_lp32x32fdct;
+  fwd_txfm_param.lossless = xd->lossless[xd->mi[0]->mbmi.segment_id];
+
+  src_diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)];
+
+// TODO(sarahparker) add all of these new quant quantize functions
+// to quant_func_list, just trying to get this expr to work for now
+#if CONFIG_VP9_HIGHBITDEPTH
+  fwd_txfm_param.bd = xd->bd;
+  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+    highbd_fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param);
+    switch (tx_size) {
+      case TX_32X32:
+        highbd_quantize_32x32_fp_nuq(coeff, 1024, x->skip_block,
+                                     p->quant_fp, pd->dequant,
+                                     (const cuml_bins_type_nuq *)
+                                         p->cuml_bins_nuq,
+                                     (const dequant_val_type_nuq *)
+                                         pd->dequant_val_nuq,
+                                     qcoeff, dqcoeff, eob,
+                                     scan_order->scan, band);
+        break;
+      case TX_16X16:
+        highbd_quantize_fp_nuq(coeff, 256, x->skip_block,
+                               p->quant_fp, pd->dequant,
+                               (const cuml_bins_type_nuq *)
+                                  p->cuml_bins_nuq,
+                               (const dequant_val_type_nuq *)
+                                   pd->dequant_val_nuq,
+                               qcoeff, dqcoeff, eob,
+                               scan_order->scan, band);
+        break;
+      case TX_8X8:
+        highbd_quantize_fp_nuq(coeff, 64, x->skip_block,
+                               p->quant_fp, pd->dequant,
+                               (const cuml_bins_type_nuq *)
+                                  p->cuml_bins_nuq,
+                               (const dequant_val_type_nuq *)
+                                   pd->dequant_val_nuq,
+                               qcoeff, dqcoeff, eob,
+                               scan_order->scan, band);
+        break;
+      case TX_4X4:
+        highbd_quantize_fp_nuq(coeff, 16, x->skip_block,
+                               p->quant_fp, pd->dequant,
+                               (const cuml_bins_type_nuq *)
+                                   p->cuml_bins_nuq,
+                               (const dequant_val_type_nuq *)
+                                   pd->dequant_val_nuq,
+                               qcoeff, dqcoeff, eob,
+                               scan_order->scan, band);
+        break;
+      default:
+        assert(0);
+    }
+    return;
+  }
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+  fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param);
+  switch (tx_size) {
+    case TX_32X32:
+      quantize_32x32_fp_nuq(coeff, 1024, x->skip_block,
+                            p->quant_fp, pd->dequant,
+                            (const cuml_bins_type_nuq *)
+                                p->cuml_bins_nuq,
+                            (const dequant_val_type_nuq *)
+                                pd->dequant_val_nuq,
+                            qcoeff, dqcoeff, eob,
+                            scan_order->scan, band);
+      break;
+    case TX_16X16:
+      quantize_fp_nuq(coeff, 256, x->skip_block,
+                      p->quant_fp, pd->dequant,
+                      (const cuml_bins_type_nuq *)
+                          p->cuml_bins_nuq,
+                      (const dequant_val_type_nuq *)
+                          pd->dequant_val_nuq,
+                      qcoeff, dqcoeff, eob,
+                      scan_order->scan, band);
+      break;
+    case TX_8X8:
+      quantize_fp_nuq(coeff, 64, x->skip_block,
+                      p->quant_fp, pd->dequant,
+                      (const cuml_bins_type_nuq *)
+                          p->cuml_bins_nuq,
+                      (const dequant_val_type_nuq *)
+                          pd->dequant_val_nuq,
+                      qcoeff, dqcoeff, eob,
+                      scan_order->scan, band);
+      break;
+    case TX_4X4:
+      quantize_fp_nuq(coeff, 16, x->skip_block,
+                      p->quant_fp, pd->dequant,
+                      (const cuml_bins_type_nuq *)
+                          p->cuml_bins_nuq,
+                      (const dequant_val_type_nuq *)
+                          pd->dequant_val_nuq,
+                      qcoeff, dqcoeff, eob,
+                      scan_order->scan, band);
+      break;
+    default:
+      assert(0);
+      break;
+  }
+}
+
+void vp10_xform_quant_dc_nuq(MACROBLOCK *x, int plane, int block, int blk_row,
+                             int blk_col, BLOCK_SIZE plane_bsize,
+                             TX_SIZE tx_size) {
+  MACROBLOCKD *const xd = &x->e_mbd;
+  const struct macroblock_plane *const p = &x->plane[plane];
+  const struct macroblockd_plane *const pd = &xd->plane[plane];
+  PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
+  TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
+  tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
+  tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
+  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
+  uint16_t *const eob = &p->eobs[block];
+  const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
+  const int16_t *src_diff;
+
+  FWD_TXFM_PARAM fwd_txfm_param;
+
+  fwd_txfm_param.tx_type = tx_type;
+  fwd_txfm_param.tx_size = tx_size;
+  fwd_txfm_param.fwd_txfm_opt = fwd_txfm_opt_list[VP10_XFORM_QUANT_DC];
+  fwd_txfm_param.rd_transform = x->use_lp32x32fdct;
+  fwd_txfm_param.lossless = xd->lossless[xd->mi[0]->mbmi.segment_id];
+
+  src_diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)];
+
+// TODO(sarahparker) add all of these new quant quantize functions
+// to quant_func_list, just trying to get this expr to work for now
+#if CONFIG_VP9_HIGHBITDEPTH
+  fwd_txfm_param.bd = xd->bd;
+  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+    highbd_fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param);
+    switch (tx_size) {
+      case TX_32X32:
+        highbd_quantize_dc_32x32_nuq(coeff, 1024, x->skip_block,
+                                     p->quant[0], p->quant_shift[0],
+                                     pd->dequant[0],
+                                     p->cuml_bins_nuq[0],
+                                     pd->dequant_val_nuq[0],
+                                     qcoeff, dqcoeff, eob);
+        break;
+      case TX_16X16:
+        highbd_quantize_dc_nuq(coeff, 256, x->skip_block,
+                               p->quant[0], p->quant_shift[0],
+                               pd->dequant[0],
+                               p->cuml_bins_nuq[0],
+                               pd->dequant_val_nuq[0],
+                               qcoeff, dqcoeff, eob);
+        break;
+      case TX_8X8:
+        highbd_quantize_dc_nuq(coeff, 64, x->skip_block,
+                               p->quant[0], p->quant_shift[0],
+                               pd->dequant[0],
+                               p->cuml_bins_nuq[0],
+                               pd->dequant_val_nuq[0],
+                               qcoeff, dqcoeff, eob);
+        break;
+      case TX_4X4:
+        highbd_quantize_dc_nuq(coeff, 16, x->skip_block,
+                               p->quant[0], p->quant_shift[0],
+                               pd->dequant[0],
+                               p->cuml_bins_nuq[0],
+                               pd->dequant_val_nuq[0],
+                               qcoeff, dqcoeff, eob);
+        break;
+      default:
+        assert(0);
+    }
+    return;
+  }
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+  fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param);
+  switch (tx_size) {
+    case TX_32X32:
+      quantize_dc_32x32_nuq(coeff, 1024, x->skip_block,
+                            p->quant[0], p->quant_shift[0], pd->dequant[0],
+                            p->cuml_bins_nuq[0],
+                            pd->dequant_val_nuq[0],
+                            qcoeff, dqcoeff, eob);
+      break;
+    case TX_16X16:
+      quantize_dc_nuq(coeff, 256, x->skip_block,
+                      p->quant[0], p->quant_shift[0], pd->dequant[0],
+                      p->cuml_bins_nuq[0],
+                      pd->dequant_val_nuq[0],
+                      qcoeff, dqcoeff, eob);
+      break;
+    case TX_8X8:
+      quantize_dc_nuq(coeff, 64, x->skip_block,
+                      p->quant[0], p->quant_shift[0], pd->dequant[0],
+                      p->cuml_bins_nuq[0],
+                      pd->dequant_val_nuq[0],
+                      qcoeff, dqcoeff, eob);
+      break;
+    case TX_4X4:
+      quantize_dc_nuq(coeff, 16, x->skip_block,
+                      p->quant[0], p->quant_shift[0], pd->dequant[0],
+                      p->cuml_bins_nuq[0],
+                      pd->dequant_val_nuq[0],
+                      qcoeff, dqcoeff, eob);
+      break;
+    default:
+      assert(0);
+      break;
+  }
+}
+
+void vp10_xform_quant_dc_fp_nuq(MACROBLOCK *x, int plane, int block,
+                                int blk_row, int blk_col,
+                                BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
+  MACROBLOCKD *const xd = &x->e_mbd;
+  const struct macroblock_plane *const p = &x->plane[plane];
+  const struct macroblockd_plane *const pd = &xd->plane[plane];
+  PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
+  TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
+  tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
+  tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
+  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
+  uint16_t *const eob = &p->eobs[block];
+  const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
+  const int16_t *src_diff;
+
+  FWD_TXFM_PARAM fwd_txfm_param;
+
+  fwd_txfm_param.tx_type = tx_type;
+  fwd_txfm_param.tx_size = tx_size;
+  fwd_txfm_param.fwd_txfm_opt = fwd_txfm_opt_list[VP10_XFORM_QUANT_DC];
+  fwd_txfm_param.rd_transform = x->use_lp32x32fdct;
+  fwd_txfm_param.lossless = xd->lossless[xd->mi[0]->mbmi.segment_id];
+
+  src_diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)];
+
+// TODO(sarahparker) add all of these new quant quantize functions
+// to quant_func_list, just trying to get this expr to work for now
+#if CONFIG_VP9_HIGHBITDEPTH
+  fwd_txfm_param.bd = xd->bd;
+  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+    highbd_fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param);
+    switch (tx_size) {
+      case TX_32X32:
+        highbd_quantize_dc_32x32_fp_nuq(coeff, 1024, x->skip_block,
+                                        p->quant_fp[0], pd->dequant[0],
+                                        p->cuml_bins_nuq[0],
+                                        pd->dequant_val_nuq[0],
+                                        qcoeff, dqcoeff, eob);
+        break;
+      case TX_16X16:
+        highbd_quantize_dc_fp_nuq(coeff, 256, x->skip_block,
+                                  p->quant_fp[0], pd->dequant[0],
+                                  p->cuml_bins_nuq[0],
+                                  pd->dequant_val_nuq[0],
+                                  qcoeff, dqcoeff, eob);
+        break;
+      case TX_8X8:
+        highbd_quantize_dc_fp_nuq(coeff, 64, x->skip_block,
+                                  p->quant_fp[0], pd->dequant[0],
+                                  p->cuml_bins_nuq[0],
+                                  pd->dequant_val_nuq[0],
+                                  qcoeff, dqcoeff, eob);
+        break;
+      case TX_4X4:
+        highbd_quantize_dc_fp_nuq(coeff, 16, x->skip_block,
+                                  p->quant_fp[0], pd->dequant[0],
+                                  p->cuml_bins_nuq[0],
+                                  pd->dequant_val_nuq[0],
+                                  qcoeff, dqcoeff, eob);
+        break;
+      default:
+        assert(0);
+    }
+    return;
+  }
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+  fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param);
+  switch (tx_size) {
+    case TX_32X32:
+      quantize_dc_32x32_fp_nuq(coeff, 1024, x->skip_block,
+                               p->quant_fp[0], pd->dequant[0],
+                               p->cuml_bins_nuq[0],
+                               pd->dequant_val_nuq[0],
+                               qcoeff, dqcoeff, eob);
+      break;
+    case TX_16X16:
+      quantize_dc_fp_nuq(coeff, 256, x->skip_block,
+                         p->quant_fp[0], pd->dequant[0],
+                         p->cuml_bins_nuq[0],
+                         pd->dequant_val_nuq[0],
+                         qcoeff, dqcoeff, eob);
+
+      break;
+    case TX_8X8:
+      quantize_dc_fp_nuq(coeff, 64, x->skip_block,
+                         p->quant_fp[0], pd->dequant[0],
+                         p->cuml_bins_nuq[0],
+                         pd->dequant_val_nuq[0],
+                         qcoeff, dqcoeff, eob);
+      break;
+    case TX_4X4:
+      quantize_dc_fp_nuq(coeff, 16, x->skip_block,
+                         p->quant_fp[0], pd->dequant[0],
+                         p->cuml_bins_nuq[0],
+                         pd->dequant_val_nuq[0],
+                         qcoeff, dqcoeff, eob);
+      break;
+    default:
+      assert(0);
+      break;
+  }
+}
+#endif  // CONFIG_NEW_QUANT
+
  static void encode_block(int plane, int block, int blk_row, int blk_col,
                           BLOCK_SIZE plane_bsize,
                           TX_SIZE tx_size, void *arg) {
@@ -448,20 +943,35 @@ static void encode_block(int plane, int block, int blk_row, int blk_col,
          *a = *l = 0;
          return;
        } else {
+#if CONFIG_NEW_QUANT
+        vp10_xform_quant_fp_nuq(x, plane, block, blk_row, blk_col, plane_bsize,
+                                tx_size);
+#else
          vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize,
                           tx_size, VP10_XFORM_QUANT_FP);
+#endif
        }
      } else {
        if (max_txsize_lookup[plane_bsize] == tx_size) {
          int blk_index = (block >> (tx_size << 1));
          if (x->skip_txfm[plane][blk_index] == SKIP_TXFM_NONE) {
            // full forward transform and quantization
+#if CONFIG_NEW_QUANT
+          vp10_xform_quant_nuq(x, plane, block, blk_row, blk_col, plane_bsize,
+                               tx_size);
+#else
            vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize,
                             tx_size, VP10_XFORM_QUANT_B);
+#endif  // CONFIG_NEW_QUANT
          } else if (x->skip_txfm[plane][blk_index] == SKIP_TXFM_AC_ONLY) {
            // fast path forward transform and quantization
+#if CONFIG_NEW_QUANT
+          vp10_xform_quant_dc_nuq(x, plane, block, blk_row, blk_col,
+                                  plane_bsize, tx_size);
+#else
            vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize,
                             tx_size, VP10_XFORM_QUANT_DC);
+#endif  // CONFIG_NEW_QUANT
          } else {
            // skip forward transform
            p->eobs[block] = 0;
@@ -471,8 +981,13 @@ static void encode_block(int plane, int block, int blk_row, int blk_col,
  #endif
          }
        } else {
+#if CONFIG_NEW_QUANT
+        vp10_xform_quant_nuq(x, plane, block, blk_row, blk_col, plane_bsize,
+                             tx_size);
+#else
          vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize,
                           tx_size, VP10_XFORM_QUANT_B);
+#endif  // CONFIG_NEW_QUANT
        }
      }
    }
@@ -603,8 +1118,13 @@ static void encode_block_pass1(int plane, int block, int blk_row, int blk_col,
    uint8_t *dst;
    dst = &pd->dst.buf[4 * blk_row * pd->dst.stride + 4 * blk_col];
  
+#if CONFIG_NEW_QUANT
+  vp10_xform_quant_nuq(x, plane, block, blk_row, blk_col, plane_bsize,
+                       tx_size);
+#else
    vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize,
                     tx_size, VP10_XFORM_QUANT_B);
+#endif  // CONFIG_NEW_QUANT
  
    if (p->eobs[block] > 0) {
  #if CONFIG_VP9_HIGHBITDEPTH
@@ -733,7 +1253,6 @@ void vp10_encode_block_intra(int plane, int block, int blk_row, int blk_col,
    uint16_t *eob = &p->eobs[block];
    const int src_stride = p->src.stride;
    const int dst_stride = pd->dst.stride;
-
    const int tx1d_size = get_tx1d_size(tx_size);
  
    INV_TXFM_PARAM inv_txfm_param;
@@ -758,8 +1277,13 @@ void vp10_encode_block_intra(int plane, int block, int blk_row, int blk_col,
                       src_stride, dst, dst_stride);
  #endif  // CONFIG_VP9_HIGHBITDEPTH
  
+#if CONFIG_NEW_QUANT
+  vp10_xform_quant_nuq(x, plane, block, blk_row, blk_col, plane_bsize,
+                       tx_size);
+#else  // CONFIG_NEW_QUANT
    vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
                     VP10_XFORM_QUANT_B);
+#endif  // CONFIG_NEW_QUANT
  
    if (args->ctx != NULL) {
      struct optimize_ctx *const ctx = args->ctx;
diff --git a/vp10/encoder/encodemb.h b/vp10/encoder/encodemb.h

index cbe15aadfe876ca90faf773043804180b68dbf7b..eae1db799752690f29196141ae8f1c3a03384e99 100644 (file)
--- a/vp10/encoder/encodemb.h
+++ b/vp10/encoder/encodemb.h
@@ -41,6 +41,20 @@ void vp10_xform_quant(MACROBLOCK *x, int plane, int block,
                        int blk_row, int blk_col,
                        BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
                        VP10_XFORM_QUANT xform_quant_idx);
+#if CONFIG_NEW_QUANT
+void vp10_xform_quant_nuq(MACROBLOCK *x, int plane, int block, int blk_row,
+                          int blk_col, BLOCK_SIZE plane_bsize,
+                          TX_SIZE tx_size);
+void vp10_xform_quant_dc_nuq(MACROBLOCK *x, int plane, int block, int blk_row,
+                             int blk_col, BLOCK_SIZE plane_bsize,
+                             TX_SIZE tx_size);
+void vp10_xform_quant_fp_nuq(MACROBLOCK *x, int plane, int block, int blk_row,
+                             int blk_col, BLOCK_SIZE plane_bsize,
+                             TX_SIZE tx_size);
+void vp10_xform_quant_dc_fp_nuq(MACROBLOCK *x, int plane, int block,
+                                int blk_row, int blk_col,
+                                BLOCK_SIZE plane_bsize, TX_SIZE tx_size);
+#endif
  
  void vp10_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane);
  
diff --git a/vp10/encoder/encoder.h b/vp10/encoder/encoder.h

index 7b7bd7d52812c231745addca412b1173979b2be1..4878c0052ae03d75789afb27767d58a62fa548f4 100644 (file)
--- a/vp10/encoder/encoder.h
+++ b/vp10/encoder/encoder.h
@@ -336,6 +336,12 @@ typedef struct VP10_COMP {
    MB_MODE_INFO_EXT *mbmi_ext_base;
    DECLARE_ALIGNED(16, int16_t, y_dequant[QINDEX_RANGE][8]);   // 8: SIMD width
    DECLARE_ALIGNED(16, int16_t, uv_dequant[QINDEX_RANGE][8]);  // 8: SIMD width
+#if CONFIG_NEW_QUANT
+  DECLARE_ALIGNED(16, dequant_val_type_nuq,
+                  y_dequant_val_nuq[QINDEX_RANGE][COEF_BANDS]);
+  DECLARE_ALIGNED(16, dequant_val_type_nuq,
+                  uv_dequant_val_nuq[QINDEX_RANGE][COEF_BANDS]);
+#endif  // CONFIG_NEW_QUANT
    VP10_COMMON common;
    VP10EncoderConfig oxcf;
    struct lookahead_ctx    *lookahead;
diff --git a/vp10/encoder/quantize.c b/vp10/encoder/quantize.c

index 2c61de5bd27c63881cd05e554c28f2eb8da85f89..2a8b33f6c2152d758d0db48018f05c6bbbca2f78 100644 (file)
--- a/vp10/encoder/quantize.c
+++ b/vp10/encoder/quantize.c
@@ -22,6 +22,405 @@
  #include "vp10/encoder/quantize.h"
  #include "vp10/encoder/rd.h"
  
+#if CONFIG_NEW_QUANT
+static INLINE int quantize_coeff_nuq(const tran_low_t coeffv,
+                                     const int16_t quant,
+                                     const int16_t quant_shift,
+                                     const int16_t dequant,
+                                     const tran_low_t *cuml_bins_ptr,
+                                     const tran_low_t *dequant_val,
+                                     tran_low_t *qcoeff_ptr,
+                                     tran_low_t *dqcoeff_ptr) {
+  const int coeff = coeffv;
+  const int coeff_sign = (coeff >> 31);
+  const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+  int i, q;
+  int tmp = clamp(abs_coeff, INT16_MIN, INT16_MAX);
+  for (i = 0; i < NUQ_KNOTS; i++) {
+    if (tmp < cuml_bins_ptr[i]) {
+      q = i;
+      break;
+    }
+  }
+  if (i == NUQ_KNOTS) {
+    tmp -= cuml_bins_ptr[NUQ_KNOTS - 1];
+    q = NUQ_KNOTS + (((((tmp * quant) >> 16) + tmp) * quant_shift) >> 16);
+  }
+  if (q) {
+    *dqcoeff_ptr =
+        dequant_abscoeff_nuq(q, dequant, dequant_val);
+    *qcoeff_ptr  = (q ^ coeff_sign) - coeff_sign;
+    *dqcoeff_ptr = *qcoeff_ptr < 0 ? -*dqcoeff_ptr : *dqcoeff_ptr;
+  } else {
+    *qcoeff_ptr = 0;
+    *dqcoeff_ptr = 0;
+  }
+  return (q != 0);
+}
+
+static INLINE int quantize_coeff_bigtx_nuq(const tran_low_t coeffv,
+                                           const int16_t quant,
+                                           const int16_t quant_shift,
+                                           const int16_t dequant,
+                                           const tran_low_t *cuml_bins_ptr,
+                                           const tran_low_t *dequant_val,
+                                           tran_low_t *qcoeff_ptr,
+                                           tran_low_t *dqcoeff_ptr,
+                                           int logsizeby32) {
+  const int coeff = coeffv;
+  const int coeff_sign = (coeff >> 31);
+  const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+  int i, q;
+  int tmp = clamp(abs_coeff, INT16_MIN, INT16_MAX);
+  for (i = 0; i < NUQ_KNOTS; i++) {
+    if (tmp < ROUND_POWER_OF_TWO(cuml_bins_ptr[i], 1 + logsizeby32)) {
+      q = i;
+      break;
+    }
+  }
+  if (i == NUQ_KNOTS) {
+    tmp -= ROUND_POWER_OF_TWO(cuml_bins_ptr[NUQ_KNOTS - 1], 1 + logsizeby32);
+    q = NUQ_KNOTS +
+        (((((tmp * quant) >> 16) + tmp) * quant_shift) >> (15 - logsizeby32));
+  }
+  if (q) {
+    *dqcoeff_ptr =
+         ROUND_POWER_OF_TWO(dequant_abscoeff_nuq(q, dequant, dequant_val),
+                            1 + logsizeby32);
+    // *dqcoeff_ptr = dequant_abscoeff_nuq(q, dequant, dequant_val) >>
+    // (1 + logsizeby32);
+    *qcoeff_ptr  = (q ^ coeff_sign) - coeff_sign;
+    *dqcoeff_ptr = *qcoeff_ptr < 0 ? -*dqcoeff_ptr : *dqcoeff_ptr;
+  } else {
+    *qcoeff_ptr = 0;
+    *dqcoeff_ptr = 0;
+  }
+  return (q != 0);
+}
+
+static INLINE int quantize_coeff_fp_nuq(const tran_low_t coeffv,
+                                        const int16_t quant,
+                                        const int16_t dequant,
+                                        const tran_low_t *cuml_bins_ptr,
+                                        const tran_low_t *dequant_val,
+                                        tran_low_t *qcoeff_ptr,
+                                        tran_low_t *dqcoeff_ptr) {
+  const int coeff = coeffv;
+  const int coeff_sign = (coeff >> 31);
+  const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+  int i, q;
+  int tmp = clamp(abs_coeff, INT16_MIN, INT16_MAX);
+  for (i = 0; i < NUQ_KNOTS; i++) {
+    if (tmp < cuml_bins_ptr[i]) {
+      q = i;
+      break;
+    }
+  }
+  if (i == NUQ_KNOTS) {
+    q = NUQ_KNOTS +
+        ((((int64_t)tmp - cuml_bins_ptr[NUQ_KNOTS - 1]) * quant) >> 16);
+  }
+  if (q) {
+    *dqcoeff_ptr =
+        dequant_abscoeff_nuq(q, dequant, dequant_val);
+    *qcoeff_ptr  = (q ^ coeff_sign) - coeff_sign;
+    *dqcoeff_ptr = *qcoeff_ptr < 0 ? -*dqcoeff_ptr : *dqcoeff_ptr;
+  } else {
+    *qcoeff_ptr = 0;
+    *dqcoeff_ptr = 0;
+  }
+  return (q != 0);
+}
+
+static INLINE int quantize_coeff_bigtx_fp_nuq(const tran_low_t coeffv,
+                                              const int16_t quant,
+                                              const int16_t dequant,
+                                              const tran_low_t *cuml_bins_ptr,
+                                              const tran_low_t *dequant_val,
+                                              tran_low_t *qcoeff_ptr,
+                                              tran_low_t *dqcoeff_ptr,
+                                              int logsizeby32) {
+  const int coeff = coeffv;
+  const int coeff_sign = (coeff >> 31);
+  const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+  int i, q;
+  int tmp = clamp(abs_coeff, INT16_MIN, INT16_MAX);
+  for (i = 0; i < NUQ_KNOTS; i++) {
+    if (tmp < ROUND_POWER_OF_TWO(cuml_bins_ptr[i], 1 + logsizeby32)) {
+      q = i;
+      break;
+    }
+  }
+  if (i == NUQ_KNOTS) {
+    q = NUQ_KNOTS +
+        ((((int64_t)tmp - ROUND_POWER_OF_TWO(cuml_bins_ptr[NUQ_KNOTS - 1],
+                                             1 + logsizeby32)) * quant) >>
+         (15 - logsizeby32));
+  }
+  if (q) {
+    *dqcoeff_ptr =
+        ROUND_POWER_OF_TWO(dequant_abscoeff_nuq(q, dequant, dequant_val),
+                           1 + logsizeby32);
+    // *dqcoeff_ptr = dequant_abscoeff_nuq(q, dequant, dequant_val) >>
+    // (1 + logsizeby32);
+    *qcoeff_ptr  = (q ^ coeff_sign) - coeff_sign;
+    *dqcoeff_ptr = *qcoeff_ptr < 0 ? -*dqcoeff_ptr : *dqcoeff_ptr;
+  } else {
+    *qcoeff_ptr = 0;
+    *dqcoeff_ptr = 0;
+  }
+  return (q != 0);
+}
+
+void quantize_dc_nuq(const tran_low_t *coeff_ptr,
+                     intptr_t n_coeffs,
+                     int skip_block,
+                     const int16_t quant,
+                     const int16_t quant_shift,
+                     const int16_t dequant,
+                     const tran_low_t *cuml_bins_ptr,
+                     const tran_low_t *dequant_val,
+                     tran_low_t *qcoeff_ptr,
+                     tran_low_t *dqcoeff_ptr,
+                     uint16_t *eob_ptr) {
+  int eob = -1;
+  memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+  memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+  if (!skip_block) {
+    const int rc = 0;
+    if (quantize_coeff_nuq(coeff_ptr[rc],
+                           quant,
+                           quant_shift,
+                           dequant,
+                           cuml_bins_ptr,
+                           dequant_val,
+                           qcoeff_ptr,
+                           dqcoeff_ptr))
+      eob = 0;
+  }
+  *eob_ptr = eob + 1;
+}
+
+void quantize_dc_fp_nuq(const tran_low_t *coeff_ptr,
+                        intptr_t n_coeffs,
+                        int skip_block,
+                        const int16_t quant,
+                        const int16_t dequant,
+                        const tran_low_t *cuml_bins_ptr,
+                        const tran_low_t *dequant_val,
+                        tran_low_t *qcoeff_ptr,
+                        tran_low_t *dqcoeff_ptr,
+                        uint16_t *eob_ptr) {
+  int eob = -1;
+  memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+  memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+  if (!skip_block) {
+    const int rc = 0;
+    if (quantize_coeff_fp_nuq(coeff_ptr[rc],
+                              quant,
+                              dequant,
+                              cuml_bins_ptr,
+                              dequant_val,
+                              qcoeff_ptr,
+                              dqcoeff_ptr))
+      eob = 0;
+  }
+  *eob_ptr = eob + 1;
+}
+
+void quantize_dc_32x32_nuq(const tran_low_t *coeff_ptr,
+                           intptr_t n_coeffs,
+                           int skip_block,
+                           const int16_t quant,
+                           const int16_t quant_shift,
+                           const int16_t dequant,
+                           const tran_low_t *cuml_bins_ptr,
+                           const tran_low_t *dequant_val,
+                           tran_low_t *qcoeff_ptr,
+                           tran_low_t *dqcoeff_ptr,
+                           uint16_t *eob_ptr) {
+  int eob = -1;
+  memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+  memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+  if (!skip_block) {
+    const int rc = 0;
+    if (quantize_coeff_bigtx_nuq(coeff_ptr[rc],
+                                 quant,
+                                 quant_shift,
+                                 dequant,
+                                 cuml_bins_ptr,
+                                 dequant_val,
+                                 qcoeff_ptr,
+                                 dqcoeff_ptr,
+                                 0))
+      eob = 0;
+  }
+  *eob_ptr = eob + 1;
+}
+
+void quantize_dc_32x32_fp_nuq(const tran_low_t *coeff_ptr,
+                              intptr_t n_coeffs,
+                              int skip_block,
+                              const int16_t quant,
+                              const int16_t dequant,
+                              const tran_low_t *cuml_bins_ptr,
+                              const tran_low_t *dequant_val,
+                              tran_low_t *qcoeff_ptr,
+                              tran_low_t *dqcoeff_ptr,
+                              uint16_t *eob_ptr) {
+  int eob = -1;
+  memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+  memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+  if (!skip_block) {
+    const int rc = 0;
+    if (quantize_coeff_bigtx_fp_nuq(coeff_ptr[rc],
+                                    quant,
+                                    dequant,
+                                    cuml_bins_ptr,
+                                    dequant_val,
+                                    qcoeff_ptr,
+                                    dqcoeff_ptr,
+                                    0))
+      eob = 0;
+  }
+  *eob_ptr = eob + 1;
+}
+
+void quantize_nuq_c(const tran_low_t *coeff_ptr,
+                    intptr_t n_coeffs,
+                    int skip_block,
+                    const int16_t *quant_ptr,
+                    const int16_t *quant_shift_ptr,
+                    const int16_t *dequant_ptr,
+                    const cuml_bins_type_nuq *cuml_bins_ptr,
+                    const dequant_val_type_nuq *dequant_val,
+                    tran_low_t *qcoeff_ptr,
+                    tran_low_t *dqcoeff_ptr,
+                    uint16_t *eob_ptr,
+                    const int16_t *scan,
+                    const uint8_t *band) {
+  int eob = -1;
+  memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+  memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+  if (!skip_block) {
+    int i;
+    for (i = 0; i < n_coeffs; i++) {
+      const int rc = scan[i];
+      if (quantize_coeff_nuq(coeff_ptr[rc],
+                             quant_ptr[rc != 0],
+                             quant_shift_ptr[rc != 0],
+                             dequant_ptr[rc != 0],
+                             cuml_bins_ptr[band[i]],
+                             dequant_val[band[i]],
+                             &qcoeff_ptr[rc],
+                             &dqcoeff_ptr[rc]))
+        eob = i;
+    }
+  }
+  *eob_ptr = eob + 1;
+}
+
+void quantize_fp_nuq_c(const tran_low_t *coeff_ptr,
+                       intptr_t n_coeffs,
+                       int skip_block,
+                       const int16_t *quant_ptr,
+                       const int16_t *dequant_ptr,
+                       const cuml_bins_type_nuq *cuml_bins_ptr,
+                       const dequant_val_type_nuq *dequant_val,
+                       tran_low_t *qcoeff_ptr,
+                       tran_low_t *dqcoeff_ptr,
+                       uint16_t *eob_ptr,
+                       const int16_t *scan,
+                       const uint8_t *band) {
+  int eob = -1;
+  memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+  memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+  if (!skip_block) {
+    int i;
+    for (i = 0; i < n_coeffs; i++) {
+      const int rc = scan[i];
+      if (quantize_coeff_fp_nuq(coeff_ptr[rc],
+                                quant_ptr[rc != 0],
+                                dequant_ptr[rc != 0],
+                                cuml_bins_ptr[band[i]],
+                                dequant_val[band[i]],
+                                &qcoeff_ptr[rc],
+                                &dqcoeff_ptr[rc]))
+        eob = i;
+    }
+  }
+  *eob_ptr = eob + 1;
+}
+
+void quantize_32x32_nuq_c(const tran_low_t *coeff_ptr,
+                          intptr_t n_coeffs,
+                          int skip_block,
+                          const int16_t *quant_ptr,
+                          const int16_t *quant_shift_ptr,
+                          const int16_t *dequant_ptr,
+                          const cuml_bins_type_nuq *cuml_bins_ptr,
+                          const dequant_val_type_nuq *dequant_val,
+                          tran_low_t *qcoeff_ptr,
+                          tran_low_t *dqcoeff_ptr,
+                          uint16_t *eob_ptr,
+                          const int16_t *scan,
+                          const uint8_t *band) {
+  int eob = -1;
+  memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+  memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+  if (!skip_block) {
+    int i;
+    for (i = 0; i < n_coeffs; i++) {
+      const int rc = scan[i];
+      if (quantize_coeff_bigtx_nuq(coeff_ptr[rc],
+                                   quant_ptr[rc != 0],
+                                   quant_shift_ptr[rc != 0],
+                                   dequant_ptr[rc != 0],
+                                   cuml_bins_ptr[band[i]],
+                                   dequant_val[band[i]],
+                                   &qcoeff_ptr[rc],
+                                   &dqcoeff_ptr[rc],
+                                   0))
+        eob = i;
+    }
+  }
+  *eob_ptr = eob + 1;
+}
+
+void quantize_32x32_fp_nuq_c(const tran_low_t *coeff_ptr,
+                             intptr_t n_coeffs,
+                             int skip_block,
+                             const int16_t *quant_ptr,
+                             const int16_t *dequant_ptr,
+                             const cuml_bins_type_nuq *cuml_bins_ptr,
+                             const dequant_val_type_nuq *dequant_val,
+                             tran_low_t *qcoeff_ptr,
+                             tran_low_t *dqcoeff_ptr,
+                             uint16_t *eob_ptr,
+                             const int16_t *scan,
+                             const uint8_t *band) {
+  int eob = -1;
+  memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+  memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+  if (!skip_block) {
+    int i;
+    for (i = 0; i < n_coeffs; i++) {
+      const int rc = scan[i];
+      if (quantize_coeff_bigtx_fp_nuq(coeff_ptr[rc],
+                                      quant_ptr[rc != 0],
+                                      dequant_ptr[rc != 0],
+                                      cuml_bins_ptr[band[i]],
+                                      dequant_val[band[i]],
+                                      &qcoeff_ptr[rc],
+                                      &dqcoeff_ptr[rc],
+                                      0))
+        eob = i;
+    }
+  }
+  *eob_ptr = eob + 1;
+}
+#endif  // CONFIG_NEW_QUANT
+
  void vp10_quantize_skip(intptr_t n_coeffs, tran_low_t *qcoeff_ptr,
                          tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr) {
    memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
@@ -131,6 +530,403 @@ void vp10_highbd_quantize_dc_facade(
                           p->quant_fp[0], qcoeff_ptr, dqcoeff_ptr,
                           pd->dequant[0], eob_ptr, qparam->log_scale);
  }
+
+#if CONFIG_NEW_QUANT
+static INLINE int highbd_quantize_coeff_nuq(const tran_low_t coeffv,
+                                            const int16_t quant,
+                                            const int16_t quant_shift,
+                                            const int16_t dequant,
+                                            const tran_low_t *cuml_bins_ptr,
+                                            const tran_low_t *dequant_val,
+                                            tran_low_t *qcoeff_ptr,
+                                            tran_low_t *dqcoeff_ptr) {
+  const int coeff = coeffv;
+  const int coeff_sign = (coeff >> 31);
+  const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+  int i, q;
+  int64_t tmp = clamp(abs_coeff, INT32_MIN, INT32_MAX);
+  for (i = 0; i < NUQ_KNOTS; i++) {
+    if (tmp < cuml_bins_ptr[i]) {
+      q = i;
+      break;
+    }
+  }
+  if (i == NUQ_KNOTS) {
+    tmp -= cuml_bins_ptr[NUQ_KNOTS - 1];
+    q = NUQ_KNOTS + (((((tmp * quant) >> 16) + tmp) * quant_shift) >> 16);
+  }
+  if (q) {
+    *dqcoeff_ptr =
+        dequant_abscoeff_nuq(q, dequant, dequant_val);
+    *qcoeff_ptr  = (q ^ coeff_sign) - coeff_sign;
+    *dqcoeff_ptr = *qcoeff_ptr < 0 ? -*dqcoeff_ptr : *dqcoeff_ptr;
+  } else {
+    *qcoeff_ptr = 0;
+    *dqcoeff_ptr = 0;
+  }
+  return (q != 0);
+}
+
+static INLINE int highbd_quantize_coeff_fp_nuq(const tran_low_t coeffv,
+                                               const int16_t quant,
+                                               const int16_t dequant,
+                                               const tran_low_t *cuml_bins_ptr,
+                                               const tran_low_t *dequant_val,
+                                               tran_low_t *qcoeff_ptr,
+                                               tran_low_t *dqcoeff_ptr) {
+  const int coeff = coeffv;
+  const int coeff_sign = (coeff >> 31);
+  const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+  int i, q;
+  int64_t tmp = clamp(abs_coeff, INT32_MIN, INT32_MAX);
+  for (i = 0; i < NUQ_KNOTS; i++) {
+    if (tmp < cuml_bins_ptr[i]) {
+      q = i;
+      break;
+    }
+  }
+  if (i == NUQ_KNOTS) {
+    q = NUQ_KNOTS +
+        (((tmp - cuml_bins_ptr[NUQ_KNOTS - 1]) * quant) >> 16);
+  }
+  if (q) {
+    *dqcoeff_ptr =
+        dequant_abscoeff_nuq(q, dequant, dequant_val);
+    *qcoeff_ptr  = (q ^ coeff_sign) - coeff_sign;
+    *dqcoeff_ptr = *qcoeff_ptr < 0 ? -*dqcoeff_ptr : *dqcoeff_ptr;
+  } else {
+    *qcoeff_ptr = 0;
+    *dqcoeff_ptr = 0;
+  }
+  return (q != 0);
+}
+
+static INLINE int highbd_quantize_coeff_bigtx_fp_nuq(
+    const tran_low_t coeffv,
+    const int16_t quant,
+    const int16_t dequant,
+    const tran_low_t *cuml_bins_ptr,
+    const tran_low_t *dequant_val,
+    tran_low_t *qcoeff_ptr,
+    tran_low_t *dqcoeff_ptr,
+    int logsizeby32) {
+  const int coeff = coeffv;
+  const int coeff_sign = (coeff >> 31);
+  const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+  int i, q;
+  int64_t tmp = clamp(abs_coeff, INT32_MIN, INT32_MAX);
+  for (i = 0; i < NUQ_KNOTS; i++) {
+    if (tmp < ROUND_POWER_OF_TWO(cuml_bins_ptr[i], 1 + logsizeby32)) {
+      q = i;
+      break;
+    }
+  }
+  if (i == NUQ_KNOTS) {
+    q = NUQ_KNOTS +
+        (((tmp - ROUND_POWER_OF_TWO(cuml_bins_ptr[NUQ_KNOTS - 1],
+                                    1 + logsizeby32)) * quant) >>
+         (15 - logsizeby32));
+  }
+  if (q) {
+    *dqcoeff_ptr =
+        ROUND_POWER_OF_TWO(dequant_abscoeff_nuq(q, dequant, dequant_val),
+                           1 + logsizeby32);
+    *qcoeff_ptr  = (q ^ coeff_sign) - coeff_sign;
+    *dqcoeff_ptr = *qcoeff_ptr < 0 ? -*dqcoeff_ptr : *dqcoeff_ptr;
+  } else {
+    *qcoeff_ptr = 0;
+    *dqcoeff_ptr = 0;
+  }
+  return (q != 0);
+}
+
+static INLINE int highbd_quantize_coeff_bigtx_nuq(const tran_low_t coeffv,
+                                                  const int16_t quant,
+                                                  const int16_t quant_shift,
+                                                  const int16_t dequant,
+                                                  const tran_low_t
+                                                        *cuml_bins_ptr,
+                                                  const tran_low_t *dequant_val,
+                                                  tran_low_t *qcoeff_ptr,
+                                                  tran_low_t *dqcoeff_ptr,
+                                                  int logsizeby32) {
+  const int coeff = coeffv;
+  const int coeff_sign = (coeff >> 31);
+  const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+  int i, q;
+  int64_t tmp = clamp(abs_coeff, INT32_MIN, INT32_MAX);
+  for (i = 0; i < NUQ_KNOTS; i++) {
+    if (tmp < ROUND_POWER_OF_TWO(cuml_bins_ptr[i], 1 + logsizeby32)) {
+      q = i;
+      break;
+    }
+  }
+  if (i == NUQ_KNOTS) {
+    tmp -= ROUND_POWER_OF_TWO(cuml_bins_ptr[NUQ_KNOTS - 1], 1 + logsizeby32);
+    q = NUQ_KNOTS +
+        (((((tmp * quant) >> 16) + tmp) * quant_shift) >> (15 - logsizeby32));
+  }
+  if (q) {
+    *dqcoeff_ptr =
+        ROUND_POWER_OF_TWO(dequant_abscoeff_nuq(q, dequant, dequant_val),
+                           1 + logsizeby32);
+    *qcoeff_ptr  = (q ^ coeff_sign) - coeff_sign;
+    *dqcoeff_ptr = *qcoeff_ptr < 0 ? -*dqcoeff_ptr : *dqcoeff_ptr;
+  } else {
+    *qcoeff_ptr = 0;
+    *dqcoeff_ptr = 0;
+  }
+  return (q != 0);
+}
+
+void highbd_quantize_dc_nuq(const tran_low_t *coeff_ptr,
+                            intptr_t n_coeffs,
+                            int skip_block,
+                            const int16_t quant,
+                            const int16_t quant_shift,
+                            const int16_t dequant,
+                            const tran_low_t *cuml_bins_ptr,
+                            const tran_low_t *dequant_val,
+                            tran_low_t *qcoeff_ptr,
+                            tran_low_t *dqcoeff_ptr,
+                            uint16_t *eob_ptr) {
+  int eob = -1;
+  memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+  memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+  if (!skip_block) {
+    const int rc = 0;
+    if (highbd_quantize_coeff_nuq(coeff_ptr[rc],
+                                  quant,
+                                  quant_shift,
+                                  dequant,
+                                  cuml_bins_ptr,
+                                  dequant_val,
+                                  qcoeff_ptr,
+                                  dqcoeff_ptr))
+      eob = 0;
+  }
+  *eob_ptr = eob + 1;
+}
+
+void highbd_quantize_dc_fp_nuq(const tran_low_t *coeff_ptr,
+                               intptr_t n_coeffs,
+                               int skip_block,
+                               const int16_t quant,
+                               const int16_t dequant,
+                               const tran_low_t *cuml_bins_ptr,
+                               const tran_low_t *dequant_val,
+                               tran_low_t *qcoeff_ptr,
+                               tran_low_t *dqcoeff_ptr,
+                               uint16_t *eob_ptr) {
+  int eob = -1;
+  memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+  memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+  if (!skip_block) {
+    const int rc = 0;
+    if (highbd_quantize_coeff_fp_nuq(coeff_ptr[rc],
+                                     quant,
+                                     dequant,
+                                     cuml_bins_ptr,
+                                     dequant_val,
+                                     qcoeff_ptr,
+                                     dqcoeff_ptr))
+      eob = 0;
+  }
+  *eob_ptr = eob + 1;
+}
+
+void highbd_quantize_nuq_c(const tran_low_t *coeff_ptr,
+                           intptr_t n_coeffs,
+                           int skip_block,
+                           const int16_t *quant_ptr,
+                           const int16_t *quant_shift_ptr,
+                           const int16_t *dequant_ptr,
+                           const cuml_bins_type_nuq *cuml_bins_ptr,
+                           const dequant_val_type_nuq *dequant_val,
+                           tran_low_t *qcoeff_ptr,
+                           tran_low_t *dqcoeff_ptr,
+                           uint16_t *eob_ptr,
+                           const int16_t *scan,
+                           const uint8_t *band) {
+  int eob = -1;
+  memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+  memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+  if (!skip_block) {
+    int i;
+    for (i = 0; i < n_coeffs; i++) {
+      const int rc = scan[i];
+      if (highbd_quantize_coeff_nuq(coeff_ptr[rc],
+                                    quant_ptr[rc != 0],
+                                    quant_shift_ptr[rc != 0],
+                                    dequant_ptr[rc != 0],
+                                    cuml_bins_ptr[band[i]],
+                                    dequant_val[band[i]],
+                                    &qcoeff_ptr[rc],
+                                    &dqcoeff_ptr[rc]))
+        eob = i;
+    }
+  }
+  *eob_ptr = eob + 1;
+}
+
+void highbd_quantize_32x32_nuq_c(const tran_low_t *coeff_ptr,
+                                 intptr_t n_coeffs,
+                                 int skip_block,
+                                 const int16_t *quant_ptr,
+                                 const int16_t *quant_shift_ptr,
+                                 const int16_t *dequant_ptr,
+                                 const cuml_bins_type_nuq *cuml_bins_ptr,
+                                 const dequant_val_type_nuq *dequant_val,
+                                 tran_low_t *qcoeff_ptr,
+                                 tran_low_t *dqcoeff_ptr,
+                                 uint16_t *eob_ptr,
+                                 const int16_t *scan,
+                                 const uint8_t *band) {
+  int eob = -1;
+  memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+  memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+  if (!skip_block) {
+    int i;
+    for (i = 0; i < n_coeffs; i++) {
+      const int rc = scan[i];
+      if (highbd_quantize_coeff_bigtx_nuq(coeff_ptr[rc],
+                                          quant_ptr[rc != 0],
+                                          quant_shift_ptr[rc != 0],
+                                          dequant_ptr[rc != 0],
+                                          cuml_bins_ptr[band[i]],
+                                          dequant_val[band[i]],
+                                          &qcoeff_ptr[rc],
+                                          &dqcoeff_ptr[rc],
+                                          0))
+        eob = i;
+    }
+  }
+  *eob_ptr = eob + 1;
+}
+
+void highbd_quantize_32x32_fp_nuq_c(const tran_low_t *coeff_ptr,
+                                    intptr_t n_coeffs,
+                                    int skip_block,
+                                    const int16_t *quant_ptr,
+                                    const int16_t *dequant_ptr,
+                                    const cuml_bins_type_nuq *cuml_bins_ptr,
+                                    const dequant_val_type_nuq *dequant_val,
+                                    tran_low_t *qcoeff_ptr,
+                                    tran_low_t *dqcoeff_ptr,
+                                    uint16_t *eob_ptr,
+                                    const int16_t *scan,
+                                    const uint8_t *band) {
+  int eob = -1;
+  memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+  memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+  if (!skip_block) {
+    int i;
+    for (i = 0; i < n_coeffs; i++) {
+      const int rc = scan[i];
+      if (highbd_quantize_coeff_bigtx_fp_nuq(coeff_ptr[rc],
+                                             quant_ptr[rc != 0],
+                                             dequant_ptr[rc != 0],
+                                             cuml_bins_ptr[band[i]],
+                                             dequant_val[band[i]],
+                                             &qcoeff_ptr[rc],
+                                             &dqcoeff_ptr[rc],
+                                             0))
+        eob = i;
+    }
+  }
+  *eob_ptr = eob + 1;
+}
+
+void highbd_quantize_fp_nuq_c(const tran_low_t *coeff_ptr,
+                              intptr_t n_coeffs,
+                              int skip_block,
+                              const int16_t *quant_ptr,
+                              const int16_t *dequant_ptr,
+                              const cuml_bins_type_nuq *cuml_bins_ptr,
+                              const dequant_val_type_nuq *dequant_val,
+                              tran_low_t *qcoeff_ptr,
+                              tran_low_t *dqcoeff_ptr,
+                              uint16_t *eob_ptr,
+                              const int16_t *scan,
+                              const uint8_t *band) {
+  int eob = -1;
+  memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+  memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+  if (!skip_block) {
+    int i;
+    for (i = 0; i < n_coeffs; i++) {
+      const int rc = scan[i];
+      if (highbd_quantize_coeff_fp_nuq(coeff_ptr[rc],
+                                       quant_ptr[rc != 0],
+                                       dequant_ptr[rc != 0],
+                                       cuml_bins_ptr[band[i]],
+                                       dequant_val[band[i]],
+                                       &qcoeff_ptr[rc],
+                                       &dqcoeff_ptr[rc]))
+        eob = i;
+    }
+  }
+  *eob_ptr = eob + 1;
+}
+
+void highbd_quantize_dc_32x32_nuq(const tran_low_t *coeff_ptr,
+                                  intptr_t n_coeffs,
+                                  int skip_block,
+                                  const int16_t quant,
+                                  const int16_t quant_shift,
+                                  const int16_t dequant,
+                                  const tran_low_t *cuml_bins_ptr,
+                                  const tran_low_t *dequant_val,
+                                  tran_low_t *qcoeff_ptr,
+                                  tran_low_t *dqcoeff_ptr,
+                                  uint16_t *eob_ptr) {
+  int eob = -1;
+  memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+  memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+  if (!skip_block) {
+    const int rc = 0;
+    if (highbd_quantize_coeff_bigtx_nuq(coeff_ptr[rc],
+                                        quant,
+                                        quant_shift,
+                                        dequant,
+                                        cuml_bins_ptr,
+                                        dequant_val,
+                                        qcoeff_ptr,
+                                        dqcoeff_ptr,
+                                        0))
+      eob = 0;
+  }
+  *eob_ptr = eob + 1;
+}
+
+void highbd_quantize_dc_32x32_fp_nuq(const tran_low_t *coeff_ptr,
+                                     intptr_t n_coeffs,
+                                     int skip_block,
+                                     const int16_t quant,
+                                     const int16_t dequant,
+                                     const tran_low_t *cuml_bins_ptr,
+                                     const tran_low_t *dequant_val,
+                                     tran_low_t *qcoeff_ptr,
+                                     tran_low_t *dqcoeff_ptr,
+                                     uint16_t *eob_ptr) {
+  int eob = -1;
+  memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+  memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+  if (!skip_block) {
+    const int rc = 0;
+    if (highbd_quantize_coeff_bigtx_fp_nuq(coeff_ptr[rc],
+                                           quant,
+                                           dequant,
+                                           cuml_bins_ptr,
+                                           dequant_val,
+                                           qcoeff_ptr,
+                                           dqcoeff_ptr,
+                                           0))
+      eob = 0;
+  }
+  *eob_ptr = eob + 1;
+}
+#endif  // CONFIG_NEW_QUANT
  #endif  // CONFIG_VP9_HIGHBITDEPTH
  
  void vp10_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
@@ -186,7 +982,7 @@ void vp10_highbd_quantize_fp_c(const tran_low_t *coeff_ptr,
                                const int16_t *dequant_ptr,
                                uint16_t *eob_ptr,
                                const int16_t *scan,
-                              const int16_t *iscan, int log_scale) {
+                              const int16_t *iscan, const int log_scale) {
    int i;
    int eob = -1;
    const int scale = 1 << log_scale;
@@ -219,7 +1015,8 @@ void vp10_highbd_quantize_fp_c(const tran_low_t *coeff_ptr,
    }
    *eob_ptr = eob + 1;
  }
-#endif
+
+#endif  // CONFIG_VP9_HIGHBITDEPTH
  
  // TODO(jingning) Refactor this file and combine functions with similar
  // operations.
@@ -272,7 +1069,7 @@ void vp10_highbd_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
                                tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
                                const int16_t *dequant_ptr,
                                uint16_t *eob_ptr, const int16_t *scan,
-                              const int16_t *iscan, int log_scale) {
+                              const int16_t *iscan, const int log_scale) {
    int i, non_zero_count = (int)n_coeffs, eob = -1;
    int zbins[2] = {zbin_ptr[0], zbin_ptr[1]};
    int round[2] = {round_ptr[0], round_ptr[1]};
@@ -452,6 +1249,20 @@ void vp10_init_quantizer(VP10_COMP *cpi) {
        cpi->uv_dequant[q][i] = quant;
      }
  
+#if CONFIG_NEW_QUANT
+    // TODO(sarahparker) do this for multiple profiles once they are added
+    for (i = 0; i < COEF_BANDS; i++) {
+      const int quant = cpi->y_dequant[q][i != 0];
+      const int uvquant = cpi->uv_dequant[q][i != 0];
+      get_dequant_val_nuq(quant, q == 0, i,
+                          cpi->y_dequant_val_nuq[q][i],
+                          quants->y_cuml_bins_nuq[q][i]);
+      get_dequant_val_nuq(uvquant, q == 0, i,
+                          cpi->uv_dequant_val_nuq[q][i],
+                          quants->uv_cuml_bins_nuq[q][i]);
+    }
+#endif  // CONFIG_NEW_QUANT
+
      for (i = 2; i < 8; i++) {  // 8: SIMD width
        quants->y_quant[q][i] = quants->y_quant[q][1];
        quants->y_quant_fp[q][i] = quants->y_quant_fp[q][1];
@@ -489,6 +1300,12 @@ void vp10_init_plane_quantizers(VP10_COMP *cpi, MACROBLOCK *x) {
    x->plane[0].zbin = quants->y_zbin[qindex];
    x->plane[0].round = quants->y_round[qindex];
    xd->plane[0].dequant = cpi->y_dequant[qindex];
+#if CONFIG_NEW_QUANT
+  x->plane[0].cuml_bins_nuq = quants->y_cuml_bins_nuq[qindex];
+  xd->plane[0].dequant_val_nuq = (const dequant_val_type_nuq*)
+                                 cpi->y_dequant_val_nuq[qindex];
+#endif  // CONFIG_NEW_QUANT
+
  
    x->plane[0].quant_thred[0] = x->plane[0].zbin[0] * x->plane[0].zbin[0];
    x->plane[0].quant_thred[1] = x->plane[0].zbin[1] * x->plane[0].zbin[1];
@@ -502,6 +1319,11 @@ void vp10_init_plane_quantizers(VP10_COMP *cpi, MACROBLOCK *x) {
      x->plane[i].zbin = quants->uv_zbin[qindex];
      x->plane[i].round = quants->uv_round[qindex];
      xd->plane[i].dequant = cpi->uv_dequant[qindex];
+#if CONFIG_NEW_QUANT
+    x->plane[i].cuml_bins_nuq = quants->uv_cuml_bins_nuq[qindex];
+    xd->plane[i].dequant_val_nuq = (const dequant_val_type_nuq*)
+                                   cpi->uv_dequant_val_nuq[qindex];
+#endif  // CONFIG_NEW_QUANT
  
      x->plane[i].quant_thred[0] = x->plane[i].zbin[0] * x->plane[i].zbin[0];
      x->plane[i].quant_thred[1] = x->plane[i].zbin[1] * x->plane[i].zbin[1];
diff --git a/vp10/encoder/quantize.h b/vp10/encoder/quantize.h

index 5e62eb25e3b71e143aa2972a64599e5436c11405..dd10528b389a16146af2191ed1fc54e509b72704 100644 (file)
--- a/vp10/encoder/quantize.h
+++ b/vp10/encoder/quantize.h
@@ -32,6 +32,14 @@ typedef void (*VP10_QUANT_FACADE)(const tran_low_t *coeff_ptr,
                                    const QUANT_PARAM *qparam);
  
  typedef struct {
+#if CONFIG_NEW_QUANT
+  DECLARE_ALIGNED(16, tran_low_t,
+                  y_cuml_bins_nuq[QINDEX_RANGE][COEF_BANDS]
+                               [NUQ_KNOTS]);
+  DECLARE_ALIGNED(16, tran_low_t,
+                  uv_cuml_bins_nuq[QINDEX_RANGE][COEF_BANDS]
+                                [NUQ_KNOTS]);
+#endif  // CONFIG_NEW_QUANT
    // 0: dc 1: ac 2-8: ac repeated to SIMD width
    DECLARE_ALIGNED(16, int16_t, y_quant[QINDEX_RANGE][8]);
    DECLARE_ALIGNED(16, int16_t, y_quant_shift[QINDEX_RANGE][8]);
@@ -88,6 +96,52 @@ void vp10_quantize_dc_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
                               const MACROBLOCKD_PLANE *pd,
                               tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
                               const scan_order *sc, const QUANT_PARAM *qparam);
+
+#if CONFIG_NEW_QUANT
+void quantize_dc_nuq(const tran_low_t *coeff_ptr,
+                     intptr_t n_coeffs,
+                     int skip_block,
+                     const int16_t quant,
+                     const int16_t quant_shift,
+                     const int16_t dequant,
+                     const tran_low_t *cuml_bins_ptr,
+                     const tran_low_t *dequant_val,
+                     tran_low_t *qcoeff_ptr,
+                     tran_low_t *dqcoeff_ptr,
+                     uint16_t *eob_ptr);
+void quantize_dc_32x32_nuq(const tran_low_t *coeff_ptr,
+                           intptr_t n_coeffs,
+                           int skip_block,
+                           const int16_t quant,
+                           const int16_t quant_shift,
+                           const int16_t dequant,
+                           const tran_low_t *cuml_bins_ptr,
+                           const tran_low_t *dequant_val,
+                           tran_low_t *qcoeff_ptr,
+                           tran_low_t *dqcoeff_ptr,
+                           uint16_t *eob_ptr);
+void quantize_dc_fp_nuq(const tran_low_t *coeff_ptr,
+                        intptr_t n_coeffs,
+                        int skip_block,
+                        const int16_t quant,
+                        const int16_t dequant,
+                        const tran_low_t *cuml_bins_ptr,
+                        const tran_low_t *dequant_val,
+                        tran_low_t *qcoeff_ptr,
+                        tran_low_t *dqcoeff_ptr,
+                        uint16_t *eob_ptr);
+void quantize_dc_32x32_fp_nuq(const tran_low_t *coeff_ptr,
+                              intptr_t n_coeffs,
+                              int skip_block,
+                              const int16_t quant,
+                              const int16_t dequant,
+                              const tran_low_t *cuml_bins_ptr,
+                              const tran_low_t *dequant_val,
+                              tran_low_t *qcoeff_ptr,
+                              tran_low_t *dqcoeff_ptr,
+                              uint16_t *eob_ptr);
+#endif  // CONFIG_NEW_QUANT
+
  #if CONFIG_VP9_HIGHBITDEPTH
  void vp10_highbd_quantize_fp_facade(
      const tran_low_t *coeff_ptr, intptr_t n_coeffs, const MACROBLOCK_PLANE *p,
@@ -115,6 +169,51 @@ void vp10_highbd_quantize_dc(const tran_low_t *coeff_ptr,
                              tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
                              const int16_t dequant_ptr, uint16_t *eob_ptr,
                              const int log_scale);
+#if CONFIG_NEW_QUANT
+void highbd_quantize_dc_nuq(const tran_low_t *coeff_ptr,
+                            intptr_t n_coeffs,
+                            int skip_block,
+                            const int16_t quant,
+                            const int16_t quant_shift,
+                            const int16_t dequant,
+                            const tran_low_t *cuml_bins_ptr,
+                            const tran_low_t *dequant_val,
+                            tran_low_t *qcoeff_ptr,
+                            tran_low_t *dqcoeff_ptr,
+                            uint16_t *eob_ptr);
+void highbd_quantize_dc_32x32_nuq(const tran_low_t *coeff_ptr,
+                                  intptr_t n_coeffs,
+                                  int skip_block,
+                                  const int16_t quant,
+                                  const int16_t quant_shift,
+                                  const int16_t dequant,
+                                  const tran_low_t *cuml_bins_ptr,
+                                  const tran_low_t *dequant_val,
+                                  tran_low_t *qcoeff_ptr,
+                                  tran_low_t *dqcoeff_ptr,
+                                  uint16_t *eob_ptr);
+void highbd_quantize_dc_fp_nuq(const tran_low_t *coeff_ptr,
+                               intptr_t n_coeffs,
+                               int skip_block,
+                               const int16_t quant,
+                               const int16_t dequant,
+                               const tran_low_t *cuml_bins_ptr,
+                               const tran_low_t *dequant_val,
+                               tran_low_t *qcoeff_ptr,
+                               tran_low_t *dqcoeff_ptr,
+                               uint16_t *eob_ptr);
+void highbd_quantize_dc_32x32_fp_nuq(const tran_low_t *coeff_ptr,
+                                     intptr_t n_coeffs,
+                                     int skip_block,
+                                     const int16_t quant,
+                                     const int16_t dequant,
+                                     const tran_low_t *cuml_bins_ptr,
+                                     const tran_low_t *dequant_val,
+                                     tran_low_t *qcoeff_ptr,
+                                     tran_low_t *dqcoeff_ptr,
+                                     uint16_t *eob_ptr);
+
+#endif  // CONFIG_NEW_QUANT
  #endif  // CONFIG_VP9_HIGHBITDEPTH
  
  #ifdef __cplusplus
diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c

index 8c4b2020af583c2c498d78230e101d5bafa9c4ee..640a409c94f84baa434db4083827337bba44d632 100644 (file)
--- a/vp10/encoder/rdopt.c
+++ b/vp10/encoder/rdopt.c
@@ -1259,8 +1259,13 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
      if (x->skip_txfm[plane][block >> (tx_size << 1)] ==
          SKIP_TXFM_NONE) {
        // full forward transform and quantization
+#if CONFIG_NEW_QUANT
+      vp10_xform_quant_nuq(x, plane, block, blk_row, blk_col,
+                           plane_bsize, tx_size);
+#else
        vp10_xform_quant(x, plane, block, blk_row, blk_col,
                         plane_bsize, tx_size, VP10_XFORM_QUANT_B);
+#endif  // CONFIG_NEW_QUANT
        dist_block(args->cpi, x, plane, block, blk_row, blk_col,
                   tx_size, &dist, &sse);
      } else if (x->skip_txfm[plane][block >> (tx_size << 1)] ==
@@ -1268,8 +1273,17 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
        // compute DC coefficient
        tran_low_t *const coeff   = BLOCK_OFFSET(x->plane[plane].coeff, block);
        tran_low_t *const dqcoeff = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block);
+#if CONFIG_NEW_QUANT
+      if (x->quant_fp)
+        vp10_xform_quant_dc_fp_nuq(x, plane, block, blk_row, blk_col,
+                                   plane_bsize, tx_size);
+      else
+        vp10_xform_quant_dc_nuq(x, plane, block, blk_row, blk_col,
+                                plane_bsize, tx_size);
+#else
        vp10_xform_quant(x, plane, block, blk_row, blk_col,
                            plane_bsize, tx_size, VP10_XFORM_QUANT_DC);
+#endif  // CONFIG_NEW_QUANT
        sse  = x->bsse[plane][block >> (tx_size << 1)] << 4;
        dist = sse;
        if (x->plane[plane].eobs[block]) {
@@ -1295,8 +1309,17 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
      }
    } else {
      // full forward transform and quantization
+#if CONFIG_NEW_QUANT
+    if (x->quant_fp)
+      vp10_xform_quant_fp_nuq(x, plane, block, blk_row, blk_col, plane_bsize,
+                              tx_size);
+    else
+      vp10_xform_quant_nuq(x, plane, block, blk_row, blk_col, plane_bsize,
+                           tx_size);
+#else
      vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
                       VP10_XFORM_QUANT_B);
+#endif  // CONFIG_NEW_QUANT
      dist_block(args->cpi, x, plane, block, blk_row, blk_col,
                 tx_size, &dist, &sse);
    }
diff --git a/vp10/encoder/speed_features.c b/vp10/encoder/speed_features.c

index 3f411b77ec438fd9c9e19bfe240404ab24f0cf03..bd0cb818908e513811297e0b9a5a0e28fc58a5cd 100644 (file)
--- a/vp10/encoder/speed_features.c
+++ b/vp10/encoder/speed_features.c
@@ -239,7 +239,6 @@ static void set_good_speed_feature(VP10_COMP *cpi, VP10_COMMON *cm,
  static void set_rt_speed_feature_framesize_dependent(VP10_COMP *cpi,
      SPEED_FEATURES *sf, int speed) {
    VP10_COMMON *const cm = &cpi->common;
-
    if (speed >= 1) {
      if (VPXMIN(cm->width, cm->height) >= 720) {
        sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT
@@ -309,6 +308,7 @@ static void set_rt_speed_feature(VP10_COMP *cpi, SPEED_FEATURES *sf,
      sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V;
    }
  
+
    if (speed >= 2) {
      sf->mode_search_skip_flags = (cm->frame_type == KEY_FRAME) ? 0 :
                                   FLAG_SKIP_INTRA_DIRMISMATCH |
author	Sarah Parker <sarahparker@google.com>
	Tue, 10 May 2016 22:32:42 +0000 (15:32 -0700)
committer	Sarah Parker <sarahparker@google.com>
	Fri, 10 Jun 2016 15:06:22 +0000 (08:06 -0700)
vp10/common/blockd.h		patch \| blob \| history
vp10/common/onyxc_int.h		patch \| blob \| history
vp10/common/quant_common.c		patch \| blob \| history
vp10/common/quant_common.h		patch \| blob \| history
vp10/common/vp10_rtcd_defs.pl		patch \| blob \| history
vp10/decoder/decodeframe.c		patch \| blob \| history
vp10/decoder/detokenize.c		patch \| blob \| history
vp10/encoder/block.h		patch \| blob \| history
vp10/encoder/encodemb.c		patch \| blob \| history
vp10/encoder/encodemb.h		patch \| blob \| history
vp10/encoder/encoder.h		patch \| blob \| history
vp10/encoder/quantize.c		patch \| blob \| history
vp10/encoder/quantize.h		patch \| blob \| history
vp10/encoder/rdopt.c		patch \| blob \| history
vp10/encoder/speed_features.c		patch \| blob \| history