Non-uniform quantization experiment

author Deb Mukherjee <debargha@google.com>

Wed, 4 Mar 2015 22:04:11 +0000 (14:04 -0800)

committer Deb Mukherjee <debargha@google.com>

Wed, 18 Mar 2015 04:42:55 +0000 (21:42 -0700)
author Deb Mukherjee <debargha@google.com>
Wed, 4 Mar 2015 22:04:11 +0000 (14:04 -0800)
committer Deb Mukherjee <debargha@google.com>
Wed, 18 Mar 2015 04:42:55 +0000 (21:42 -0700)
diff --git a/configure b/configure

index 3323b3229e512291047199c9ff4b20e1c54f6ca8..4593d252fb6d974084b0be1b2c6b2e08507f27cc 100755 (executable)
--- a/configure
+++ b/configure
@@ -294,6 +294,7 @@ EXPERIMENT_LIST="
      global_motion
      palette
      newmvref_sub8x8
+    new_quant
  "
  CONFIG_LIST="
      external_build
diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h

index e659d10c2f34d38e3c2f35dab3235f554ef75c46..0f647ceeaf283d18813c94c0fcff14996bd6bb09 100644 (file)
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -20,6 +20,7 @@
  #include "vp9/common/vp9_common_data.h"
  #include "vp9/common/vp9_filter.h"
  #include "vp9/common/vp9_mv.h"
+#include "vp9/common/vp9_quant_common.h"
  #include "vp9/common/vp9_scale.h"
  
  #ifdef __cplusplus
@@ -306,6 +307,9 @@ struct macroblockd_plane {
    struct buf_2d dst;
    struct buf_2d pre[2];
    const int16_t *dequant;
+#if CONFIG_NEW_QUANT
+  const dequant_val_type_nuq *dequant_val_nuq;
+#endif
    ENTROPY_CONTEXT *above_context;
    ENTROPY_CONTEXT *left_context;
  #if CONFIG_PALETTE
@@ -548,6 +552,20 @@ static inline int get_wedge_bits(BLOCK_SIZE sb_type) {
  }
  #endif  // CONFIG_WEDGE_PARTITION
  
+#if CONFIG_NEW_QUANT && CONFIG_TX_SKIP
+static inline int is_rect_quant_used(const MB_MODE_INFO *mbmi,
+                                     int plane) {
+  return
+      mbmi->tx_skip[plane != 0] &&
+      ((plane == 0 && (mbmi->mode == V_PRED ||
+                       mbmi->mode == H_PRED ||
+                       mbmi->mode == TM_PRED)) ||
+       (plane != 0 && (mbmi->uv_mode == V_PRED ||
+                       mbmi->uv_mode == H_PRED ||
+                       mbmi->uv_mode == TM_PRED)));
+}
+#endif
+
  #ifdef __cplusplus
  }  // extern "C"
  #endif
diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h

index 2562a86684ed413c76b6fbca90d8736b42fbf57c..f142584be0bba31fafa36bf0ade109ff93c30c4d 100644 (file)
--- a/vp9/common/vp9_onyxc_int.h
+++ b/vp9/common/vp9_onyxc_int.h
@@ -65,9 +65,14 @@ typedef struct {
  
  typedef struct VP9Common {
    struct vpx_internal_error_info  error;
-
    DECLARE_ALIGNED(16, int16_t, y_dequant[QINDEX_RANGE][8]);
    DECLARE_ALIGNED(16, int16_t, uv_dequant[QINDEX_RANGE][8]);
+#if CONFIG_NEW_QUANT
+  DECLARE_ALIGNED(16, dequant_val_type_nuq,
+                  y_dequant_val_nuq[QINDEX_RANGE][COEF_BANDS]);
+  DECLARE_ALIGNED(16, dequant_val_type_nuq,
+                  uv_dequant_val_nuq[QINDEX_RANGE][COEF_BANDS]);
+#endif  // CONFIG_NEW_QUANT
  
    vpx_color_space_t color_space;
  
diff --git a/vp9/common/vp9_quant_common.c b/vp9/common/vp9_quant_common.c

index 564a3eb0ce3d255389301a3b829f663d225d4289..055ffe90851ffb3200b167a21d04b7f2eb6cec2e 100644 (file)
--- a/vp9/common/vp9_quant_common.c
+++ b/vp9/common/vp9_quant_common.c
@@ -8,10 +8,145 @@
   *  be found in the AUTHORS file in the root of the source tree.
   */
  
+#include <stdio.h>
+#include <math.h>
  #include "vp9/common/vp9_common.h"
+#include "vp9/common/vp9_entropy.h"
  #include "vp9/common/vp9_quant_common.h"
  #include "vp9/common/vp9_seg_common.h"
  
+#if CONFIG_NEW_QUANT
+// Bin widths expressed as a fraction over 128 of the quant stepsize,
+// for the quantization bins 0-4.
+// So a value x indicates the bin is actually factor x/128 of the
+// nominal quantization step.  For the zero bin, the width is only
+// for one side of zero, so the actual width is twice that.
+// There are four sets of values for 4 different quantizer ranges.
+//
+// TODO(debargha): Optimize these tables
+static const uint8_t vp9_nuq_knotes_tiny[COEF_BANDS][NUQ_KNOTES] = {
+  {84, 124, 128, 128, 128},  // dc, band 0
+  {84, 124, 128, 128, 128},  // band 1
+  {84, 124, 128, 128, 128},  // band 2
+  {86, 124, 128, 128, 128},  // band 3
+  {86, 124, 128, 128, 128},  // band 4
+  {86, 124, 128, 128, 128},  // band 5
+};
+static const uint8_t vp9_nuq_knotes_low[COEF_BANDS][NUQ_KNOTES] = {
+  {84, 124, 128, 128, 128},  // dc, band 0
+  {84, 124, 128, 128, 128},  // band 1
+  {84, 124, 128, 128, 128},  // band 2
+  {86, 124, 128, 128, 128},  // band 3
+  {86, 124, 128, 128, 128},  // band 4
+  {86, 124, 128, 128, 128},  // band 5
+};
+static const uint8_t vp9_nuq_knotes_mid[COEF_BANDS][NUQ_KNOTES] = {
+  {84, 124, 128, 128, 128},  // dc, band 0
+  {84, 124, 128, 128, 128},  // band 1
+  {84, 124, 128, 128, 128},  // band 2
+  {86, 124, 128, 128, 128},  // band 3
+  {86, 124, 128, 128, 128},  // band 4
+  {86, 124, 128, 128, 128},  // band 5
+};
+static const uint8_t vp9_nuq_knotes_high[COEF_BANDS][NUQ_KNOTES] = {
+  {84, 124, 128, 128, 128},  // dc, band 0
+  {84, 124, 128, 128, 128},  // band 1
+  {84, 124, 128, 128, 128},  // band 2
+  {86, 124, 128, 128, 128},  // band 3
+  {86, 124, 128, 128, 128},  // band 4
+  {86, 124, 128, 128, 128},  // band 5
+};
+static const uint8_t vp9_nuq_knotes_huge[COEF_BANDS][NUQ_KNOTES] = {
+  {84, 124, 128, 128, 128},  // dc, band 0
+  {84, 124, 128, 128, 128},  // band 1
+  {84, 124, 128, 128, 128},  // band 2
+  {86, 124, 128, 128, 128},  // band 3
+  {86, 124, 128, 128, 128},  // band 4
+  {86, 124, 128, 128, 128},  // band 5
+};
+
+static const uint8_t vp9_nuq_doff_tiny[COEF_BANDS] = { 8, 16, 17, 22, 23, 24 };
+static const uint8_t vp9_nuq_doff_low[COEF_BANDS] =  { 8, 16, 17, 22, 23, 24 };
+static const uint8_t vp9_nuq_doff_mid[COEF_BANDS] =  { 8, 16, 17, 22, 23, 24 };
+static const uint8_t vp9_nuq_doff_high[COEF_BANDS] = { 8, 16, 17, 22, 23, 24 };
+static const uint8_t vp9_nuq_doff_huge[COEF_BANDS] = { 8, 16, 17, 22, 23, 24 };
+
+// Allow different quantization profiles in different q ranges,
+// to enable entropy-constraints in scalar quantization.
+
+static const uint8_t *get_nuq_knotes(int16_t quant, int band, int bd) {
+  const int shift = bd - 8;
+  if (quant > (512 << shift))
+    return vp9_nuq_knotes_huge[band];
+  else if (quant > (256 << shift))
+    return vp9_nuq_knotes_high[band];
+  else if (quant > (128 << shift))
+    return vp9_nuq_knotes_mid[band];
+  else if (quant > (64 << shift))
+    return vp9_nuq_knotes_low[band];
+  else
+    return vp9_nuq_knotes_tiny[band];
+}
+
+static INLINE int16_t quant_to_doff_fixed(int16_t quant, int band, int bd) {
+  const int shift = bd - 8;
+  if (quant > (512 << shift))
+    return vp9_nuq_doff_huge[band];
+  else if (quant > (256 << shift))
+    return vp9_nuq_doff_high[band];
+  else if (quant > (128 << shift))
+    return vp9_nuq_doff_mid[band];
+  else if (quant > (64 << shift))
+    return vp9_nuq_doff_low[band];
+  else
+    return vp9_nuq_doff_tiny[band];
+}
+
+static INLINE void get_cumbins_nuq(int q, int band, int bd,
+                                   tran_low_t *cumbins) {
+  const uint8_t *knotes = get_nuq_knotes(q, band, bd);
+  int16_t cumknotes[NUQ_KNOTES];
+  int i;
+  cumknotes[0] = knotes[0];
+  for (i = 1; i < NUQ_KNOTES; ++i)
+    cumknotes[i] = cumknotes[i - 1] + knotes[i];
+  for (i = 0; i < NUQ_KNOTES; ++i)
+    cumbins[i] = (cumknotes[i] * q + 64) >> 7;
+}
+
+void vp9_get_dequant_val_nuq(int q, int band, int bd,
+                             tran_low_t *dq, tran_low_t *cumbins) {
+  const uint8_t *knotes = get_nuq_knotes(q, band, bd);
+  tran_low_t cumbins_[NUQ_KNOTES], *cumbins_ptr;
+  tran_low_t doff;
+  int i;
+  cumbins_ptr = (cumbins ? cumbins : cumbins_);
+  get_cumbins_nuq(q, band, bd, cumbins_ptr);
+  dq[0] = 0;
+  for (i = 1; i < NUQ_KNOTES; ++i) {
+    const int16_t qstep = (knotes[i] * q + 64) >> 7;
+    doff = quant_to_doff_fixed(qstep, band, bd);
+    doff = (2 * doff * qstep + q) / (2 * q);
+    dq[i] = cumbins_ptr[i - 1] + (((knotes[i] - doff * 2) * q + 128) >> 8);
+  }
+  doff = quant_to_doff_fixed(q, band, bd);
+  dq[NUQ_KNOTES] =
+      cumbins_ptr[NUQ_KNOTES - 1] + (((64 - doff) * q + 64) >> 7);
+}
+
+tran_low_t vp9_dequant_abscoeff_nuq(int v, int q, const tran_low_t *dq) {
+  if (v <= NUQ_KNOTES)
+    return dq[v];
+  else
+    return dq[NUQ_KNOTES] + (v - NUQ_KNOTES) * q;
+}
+
+tran_low_t vp9_dequant_coeff_nuq(int v, int q, const tran_low_t *dq) {
+  tran_low_t dqmag = vp9_dequant_abscoeff_nuq(abs(v), q, dq);
+  return (v < 0 ? -dqmag : dqmag);
+}
+#endif  // CONFIG_NEW_QUANT
+
  static const int16_t dc_qlookup[QINDEX_RANGE] = {
    4,       8,    8,    9,   10,   11,   12,   12,
    13,     14,   15,   16,   17,   18,   19,   19,
@@ -275,4 +410,3 @@ int vp9_get_qindex(const struct segmentation *seg, int segment_id,
      return base_qindex;
    }
  }
-
diff --git a/vp9/common/vp9_quant_common.h b/vp9/common/vp9_quant_common.h

index af3bdb82f93265d49a9b1a89f8406bddda93be5f..ccbc0c964c203f18b14adddde308ba52cd1f8630 100644 (file)
--- a/vp9/common/vp9_quant_common.h
+++ b/vp9/common/vp9_quant_common.h
@@ -11,6 +11,8 @@
  #ifndef VP9_COMMON_VP9_QUANT_COMMON_H_
  #define VP9_COMMON_VP9_QUANT_COMMON_H_
  
+#include <stdio.h>
+
  #include "vpx/vpx_codec.h"
  #include "vp9/common/vp9_seg_common.h"
  
@@ -34,6 +36,21 @@ int16_t vp9_ac_quant(int qindex, int delta, vpx_bit_depth_t bit_depth);
  int vp9_get_qindex(const struct segmentation *seg, int segment_id,
                     int base_qindex);
  
+static INLINE int16_t vp9_round_factor_to_round(int16_t quant,
+                                                int16_t round_factor) {
+  return (round_factor * quant) >> 7;
+}
+
+#if CONFIG_NEW_QUANT
+#define NUQ_KNOTES 5
+typedef tran_low_t dequant_val_type_nuq[NUQ_KNOTES + 1];
+typedef tran_low_t cumbins_type_nuq[NUQ_KNOTES];
+void vp9_get_dequant_val_nuq(int q, int band, int bd,
+                             tran_low_t *dq, tran_low_t *cumbins);
+tran_low_t vp9_dequant_abscoeff_nuq(int v, int q, const tran_low_t *dq);
+tran_low_t vp9_dequant_coeff_nuq(int v, int q, const tran_low_t *dq);
+#endif  // CONFIG_NEW_QUANT
+
  #ifdef __cplusplus
  }  // extern "C"
  #endif
diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl

index 4ae64847d2d27d906448f53d9a721e3ff69aa9df..c9648d968c98b440782c141cc007eabbb112b456 100644 (file)
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -7,6 +7,7 @@ print <<EOF
  #include "vpx/vpx_integer.h"
  #include "vp9/common/vp9_common.h"
  #include "vp9/common/vp9_enums.h"
+#include "vp9/common/vp9_quant_common.h"
  
  struct macroblockd;
  
@@ -1402,6 +1403,27 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
    }
  }
  
+if (vpx_config("CONFIG_NEW_QUANT") eq "yes") {
+  add_proto qw/void vp9_quantize_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, const int16_t *dequant_ptr, const cumbins_type_nuq *cumbins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
+  specialize qw/vp9_quantize_nuq/;
+
+  add_proto qw/void vp9_quantize_fp_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *dequant_ptr, const cumbins_type_nuq *cumbins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
+  specialize qw/vp9_quantize_fp_nuq/;
+
+  add_proto qw/void vp9_quantize_32x32_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, const int16_t *dequant_ptr, const cumbins_type_nuq *cumbins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
+  specialize qw/vp9_quantize_32x32_nuq/;
+
+  add_proto qw/void vp9_quantize_32x32_fp_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *dequant_ptr, const cumbins_type_nuq *cumbins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
+  specialize qw/vp9_quantize_32x32_fp_nuq/;
+
+  if (vpx_config("CONFIG_TX64X64") eq "yes") {
+    add_proto qw/void vp9_quantize_64x64_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, const int16_t *dequant_ptr, const cumbins_type_nuq *cumbins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
+    specialize qw/vp9_quantize_64x64_nuq/;
+
+    add_proto qw/void vp9_quantize_64x64_fp_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *dequant_ptr, const cumbins_type_nuq *cumbins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
+    specialize qw/vp9_quantize_64x64_fp_nuq/;
+  }
+}
  #
  # Structured Similarity (SSIM)
  #
@@ -2131,6 +2153,28 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
  
      add_proto qw/void vp9_highbd_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
      specialize qw/vp9_highbd_quantize_b_64x64/;
+
+    if (vpx_config("CONFIG_NEW_QUANT") eq "yes") {
+      add_proto qw/void vp9_highbd_quantize_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, const int16_t *dequant_ptr, const cumbins_type_nuq *cumbins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
+      specialize qw/vp9_highbd_quantize_nuq/;
+
+      add_proto qw/void vp9_highbd_quantize_fp_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *dequant_ptr, const cumbins_type_nuq *cumbins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
+      specialize qw/vp9_highbd_quantize_fp_nuq/;
+
+      add_proto qw/void vp9_highbd_quantize_32x32_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, const int16_t *dequant_ptr, const cumbins_type_nuq *cumbins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
+      specialize qw/vp9_highbd_quantize_32x32_nuq/;
+
+      add_proto qw/void vp9_highbd_quantize_32x32_fp_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *dequant_ptr, const cumbins_type_nuq *cumbins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
+      specialize qw/vp9_highbd_quantize_32x32_fp_nuq/;
+
+      if (vpx_config("CONFIG_TX64X64") eq "yes") {
+        add_proto qw/void vp9_highbd_quantize_64x64_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, const int16_t *dequant_ptr, const cumbins_type_nuq *cumbins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
+        specialize qw/vp9_highbd_quantize_64x64_nuq/;
+
+        add_proto qw/void vp9_highbd_quantize_64x64_fp_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *dequant_ptr, const cumbins_type_nuq *cumbins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
+        specialize qw/vp9_highbd_quantize_64x64_fp_nuq/;
+      }
+    }
    }
  
    #
diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c

index 1b9edc603d6abeb6c1cbd5a700f13805e851f243..7dc9708ce354917b38533de6696a2aab151a7487 100644 (file)
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -197,9 +197,18 @@ static void read_mv_probs(nmv_context *ctx, int allow_hp, vp9_reader *r) {
  static void setup_plane_dequants(VP9_COMMON *cm, MACROBLOCKD *xd, int q_index) {
    int i;
    xd->plane[0].dequant = cm->y_dequant[q_index];
+#if CONFIG_NEW_QUANT
+  xd->plane[0].dequant_val_nuq =
+      (const dequant_val_type_nuq *)cm->y_dequant_val_nuq[q_index];
+#endif  // CONFIG_NEW_QUANT
  
-  for (i = 1; i < MAX_MB_PLANE; i++)
+  for (i = 1; i < MAX_MB_PLANE; i++) {
      xd->plane[i].dequant = cm->uv_dequant[q_index];
+#if CONFIG_NEW_QUANT
+    xd->plane[i].dequant_val_nuq =
+        (const dequant_val_type_nuq *)cm->uv_dequant_val_nuq[q_index];
+#endif  // CONFIG_NEW_QUANT
+  }
  }
  
  #if CONFIG_TX_SKIP
@@ -2520,11 +2529,24 @@ void vp9_init_dequantizer(VP9_COMMON *cm) {
    int q;
  
    for (q = 0; q < QINDEX_RANGE; q++) {
+    int b;
      cm->y_dequant[q][0] = vp9_dc_quant(q, cm->y_dc_delta_q, cm->bit_depth);
      cm->y_dequant[q][1] = vp9_ac_quant(q, 0, cm->bit_depth);
  
      cm->uv_dequant[q][0] = vp9_dc_quant(q, cm->uv_dc_delta_q, cm->bit_depth);
      cm->uv_dequant[q][1] = vp9_ac_quant(q, cm->uv_ac_delta_q, cm->bit_depth);
+
+#if CONFIG_NEW_QUANT
+    for (b = 0; b < COEF_BANDS; ++b) {
+      vp9_get_dequant_val_nuq(
+          cm->y_dequant[q][b != 0], b, cm->bit_depth,
+          cm->y_dequant_val_nuq[q][b], NULL);
+      vp9_get_dequant_val_nuq(
+          cm->uv_dequant[q][b != 0], b, cm->bit_depth,
+          cm->uv_dequant_val_nuq[q][b], NULL);
+    }
+#endif  // CONFIG_NEW_QUANT
+    (void) b;
    }
  }
  
diff --git a/vp9/decoder/vp9_detokenize.c b/vp9/decoder/vp9_detokenize.c

index c8202347954628694e94f832bf5b803eeaee3e9a..d14cd33c880d1a4c2d36488dc7cf066a2a1cdf97 100644 (file)
--- a/vp9/decoder/vp9_detokenize.c
+++ b/vp9/decoder/vp9_detokenize.c
@@ -54,7 +54,11 @@ static const vp9_tree_index coeff_subtree_high[TREE_SIZE(ENTROPY_TOKENS)] = {
  };
  
  static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd, PLANE_TYPE type,
-                        tran_low_t *dqcoeff, TX_SIZE tx_size, const int16_t *dq,
+                        tran_low_t *dqcoeff, TX_SIZE tx_size,
+                        const int16_t *dq,
+#if CONFIG_NEW_QUANT
+                        const dequant_val_type_nuq *dq_val,
+#endif  // CONFIG_NEW_QUANT
                          int ctx, const int16_t *scan, const int16_t *nb,
                          vp9_reader *r) {
    const int max_eob = 16 << (tx_size << 1);
@@ -74,6 +78,12 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd, PLANE_TYPE type,
    const int dq_shift = (tx_size > TX_16X16) ? tx_size - TX_16X16 : 0;
    int v, token;
    int16_t dqv = dq[0];
+#if CONFIG_NEW_QUANT
+#if CONFIG_TX_SKIP
+  const int use_rect_quant = is_rect_quant_used(&xd->mi[0].src_mi->mbmi, type);
+#endif
+  const tran_low_t *dqv_val = &dq_val[0][0];
+#endif  // CONFIG_NEW_QUANT
    const uint8_t *cat1_prob;
    const uint8_t *cat2_prob;
    const uint8_t *cat3_prob;
@@ -125,6 +135,9 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd, PLANE_TYPE type,
        INCREMENT_COUNT(EOB_MODEL_TOKEN);
        break;
      }
+#if CONFIG_NEW_QUANT
+    dqv_val = &dq_val[band][0];
+#endif  // CONFIG_NEW_QUANT
  
      while (!vp9_read(r, prob[ZERO_CONTEXT_NODE])) {
        INCREMENT_COUNT(ZERO_TOKEN);
@@ -136,6 +149,9 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd, PLANE_TYPE type,
        ctx = get_coef_context(nb, token_cache, c);
        band = *band_translate++;
        prob = coef_probs[band][ctx];
+#if CONFIG_NEW_QUANT
+      dqv_val = &dq_val[band][0];
+#endif  // CONFIG_NEW_QUANT
      }
  
      if (!vp9_read(r, prob[ONE_CONTEXT_NODE])) {
@@ -191,7 +207,22 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd, PLANE_TYPE type,
            break;
        }
      }
+#if CONFIG_NEW_QUANT
+#if CONFIG_TX_SKIP
+    if (use_rect_quant) {
+      v = (val * dqv) >> dq_shift;
+    } else {
+      v = vp9_dequant_abscoeff_nuq(val, dqv, dqv_val);
+      v = dq_shift ? ROUND_POWER_OF_TWO(v, dq_shift) : v;
+    }
+#else
+    v = vp9_dequant_abscoeff_nuq(val, dqv, dqv_val);
+    v = dq_shift ? ROUND_POWER_OF_TWO(v, dq_shift) : v;
+#endif  // CONFIG_TX_SKIP
+#else   // CONFIG_NEW_QUANT
      v = (val * dqv) >> dq_shift;
+#endif  // CONFIG_NEW_QUANT
+
  #if CONFIG_COEFFICIENT_RANGE_CHECKING
      dqcoeff[scan[c]] = check_range(vp9_read_bit(r) ? -v : v);
  #else
@@ -213,9 +244,15 @@ int vp9_decode_block_tokens(VP9_COMMON *cm, MACROBLOCKD *xd,
    const int ctx = get_entropy_context(tx_size, pd->above_context + x,
                                                 pd->left_context + y);
    const scan_order *so = get_scan(xd, tx_size, pd->plane_type, block);
-  const int eob = decode_coefs(cm, xd, pd->plane_type,
-                               BLOCK_OFFSET(pd->dqcoeff, block), tx_size,
-                               pd->dequant, ctx, so->scan, so->neighbors, r);
+  int eob;
+  eob = decode_coefs(cm, xd, pd->plane_type,
+                     BLOCK_OFFSET(pd->dqcoeff, block), tx_size,
+                     pd->dequant,
+#if CONFIG_NEW_QUANT
+                     pd->dequant_val_nuq,
+#endif
+                     ctx, so->scan,
+                     so->neighbors, r);
  #if CONFIG_TX64X64
    if (plane > 0) assert(tx_size != TX_64X64);
  #endif
diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h

index c5d30b3c1ceecfd84f847231f810f6020749029a..f931cab568abe29de6ca3e87b62a8e88063acd14 100644 (file)
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -38,6 +38,10 @@ struct macroblock_plane {
    int16_t *quant_shift;
    int16_t *zbin;
    int16_t *round;
+#if CONFIG_NEW_QUANT
+  dequant_val_type_nuq *dequant_val_nuq;
+  cumbins_type_nuq *cumbins_nuq;
+#endif
  
    int64_t quant_thred[2];
  };
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c

index 4f14490aa81ed9fe6e4c35366eacf4f291145e9c..3742f729285ef84ea226e4fd8d840e5de50d3a7b 100644 (file)
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -144,8 +144,15 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block,
    const int eob = p->eobs[block];
    const PLANE_TYPE type = pd->plane_type;
    const int default_eob = 16 << (tx_size << 1);
-  const int mul = 1 << (tx_size >= TX_32X32 ? tx_size - TX_16X16 : 0);
+  const int shift = (tx_size >= TX_32X32 ? tx_size - TX_16X16 : 0);
+  const int mul = 1 << shift;
    const int16_t *dequant_ptr = pd->dequant;
+#if CONFIG_NEW_QUANT
+#if CONFIG_TX_SKIP
+  const int use_rect_quant = is_rect_quant_used(&xd->mi[0].src_mi->mbmi, plane);
+#endif
+  const dequant_val_type_nuq *dequant_val = pd->dequant_val_nuq;
+#endif  // CONFIG_NEW_QUANT
    const uint8_t *const band_translate = get_band_translate(tx_size);
    const scan_order *const so = get_scan(xd, tx_size, type, block);
    const int16_t *const scan = so->scan;
@@ -236,12 +243,34 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block,
        rate0 = tokens[next][0].rate;
        rate1 = tokens[next][1].rate;
  
-      if ((abs(x) * dequant_ptr[rc != 0] > abs(coeff[rc]) * mul) &&
-          (abs(x) * dequant_ptr[rc != 0] < abs(coeff[rc]) * mul +
-                                               dequant_ptr[rc != 0]))
-        shortcut = 1;
-      else
-        shortcut = 0;
+#if CONFIG_NEW_QUANT
+#if CONFIG_TX_SKIP
+      if (use_rect_quant) {
+        shortcut =
+            ((abs(x) * dequant_ptr[rc != 0] > abs(coeff[rc]) * mul) &&
+             ((abs(x) - 1) * dequant_ptr[rc != 0] < abs(coeff[rc]) * mul));
+      } else {
+        shortcut = (
+            (vp9_dequant_abscoeff_nuq(
+                abs(x), dequant_ptr[rc != 0],
+                dequant_val[band_translate[i]]) > abs(coeff[rc]) * mul) &&
+            (vp9_dequant_abscoeff_nuq(
+                abs(x) - 1, dequant_ptr[rc != 0],
+                dequant_val[band_translate[i]]) < abs(coeff[rc]) * mul));
+      }
+#else   // CONFIG_TX_SKIP
+      shortcut = (
+          (vp9_dequant_abscoeff_nuq(
+              abs(x), dequant_ptr[rc != 0],
+              dequant_val[band_translate[i]]) > abs(coeff[rc]) * mul) &&
+          (vp9_dequant_abscoeff_nuq(
+              abs(x) - 1, dequant_ptr[rc != 0],
+              dequant_val[band_translate[i]]) < abs(coeff[rc]) * mul));
+#endif  // CONFIG_TX_SKIP
+#else   // CONFIG_NEW_QUANT
+      shortcut = ((abs(x) * dequant_ptr[rc != 0] > abs(coeff[rc]) * mul) &&
+                  ((abs(x) - 1) * dequant_ptr[rc != 0] < abs(coeff[rc]) * mul));
+#endif  // CONFIG_NEW_QUANT
  
        if (shortcut) {
          sz = -(x < 0);
@@ -278,6 +307,39 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block,
        base_bits = dct_value_cost[x];
  
        if (shortcut) {
+#if CONFIG_NEW_QUANT
+#if CONFIG_TX_SKIP
+        if (use_rect_quant) {
+#if CONFIG_VP9_HIGHBITDEPTH
+          if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+            dx -= ((dequant_ptr[rc != 0] >> (xd->bd - 8)) + sz) ^ sz;
+          } else {
+            dx -= (dequant_ptr[rc != 0] + sz) ^ sz;
+          }
+#else
+          dx -= (dequant_ptr[rc != 0] + sz) ^ sz;
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+        } else {
+          dx = vp9_dequant_coeff_nuq(
+              x, dequant_ptr[rc != 0],
+              dequant_val[band_translate[i]]) - coeff[rc] * mul;
+#if CONFIG_VP9_HIGHBITDEPTH
+          if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+            dx >>= xd->bd - 8;
+          }
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+        }
+#else   // CONFIG_TX_SKIP
+        dx = vp9_dequant_coeff_nuq(
+            x, dequant_ptr[rc != 0],
+            dequant_val[band_translate[i]]) - coeff[rc] * mul;
+#if CONFIG_VP9_HIGHBITDEPTH
+        if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+          dx >>= xd->bd - 8;
+        }
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+#endif  // CONFIG_TX_SKIP
+#else   // CONFIG_NEW_QUANT
  #if CONFIG_VP9_HIGHBITDEPTH
          if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
            dx -= ((dequant_ptr[rc != 0] >> (xd->bd - 8)) + sz) ^ sz;
@@ -287,6 +349,7 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block,
  #else
          dx -= (dequant_ptr[rc != 0] + sz) ^ sz;
  #endif  // CONFIG_VP9_HIGHBITDEPTH
+#endif  // CONFIG_NEW_QUANT
          d2 = dx * dx;
        }
        tokens[i][1].rate = base_bits + (best ? rate1 : rate0);
@@ -341,10 +404,26 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block,
      if (x) {
        final_eob = i;
      }
-
      qcoeff[rc] = x;
+#if CONFIG_NEW_QUANT
+#if CONFIG_TX_SKIP
+    if (use_rect_quant) {
+      dqcoeff[rc] = (x * dequant_ptr[rc != 0]) / mul;
+    } else {
+      dqcoeff[rc] = vp9_dequant_abscoeff_nuq(abs(x), dequant_ptr[rc != 0],
+                                             dequant_val[band_translate[i]]);
+      if (shift) dqcoeff[rc] = ROUND_POWER_OF_TWO(dqcoeff[rc], shift);
+      if (x < 0) dqcoeff[rc] = -dqcoeff[rc];
+    }
+#else   // CONFIG_TX_SKIP
+    dqcoeff[rc] = vp9_dequant_abscoeff_nuq(abs(x), dequant_ptr[rc != 0],
+                                           dequant_val[band_translate[i]]);
+    if (shift) dqcoeff[rc] = ROUND_POWER_OF_TWO(dqcoeff[rc], shift);
+    if (x < 0) dqcoeff[rc] = -dqcoeff[rc];
+#endif  // CONFIG_TX_SKIP
+#else
      dqcoeff[rc] = (x * dequant_ptr[rc != 0]) / mul;
-
+#endif  // CONFIG_NEW_QUANT
      next = tokens[i][best].next;
      best = best_index[i][best];
    }
@@ -518,92 +597,925 @@ static void forw_tx4x4(MACROBLOCK *x, int plane, int block,
    }
  }
  
-#if CONFIG_VP9_HIGHBITDEPTH
-static void highbd_forw_tx16x16(MACROBLOCK *x, int plane,
-                                const int16_t *src_diff, int diff_stride,
-                                tran_low_t *const coeff) {
+#if CONFIG_VP9_HIGHBITDEPTH
+static void highbd_forw_tx16x16(MACROBLOCK *x, int plane,
+                                const int16_t *src_diff, int diff_stride,
+                                tran_low_t *const coeff) {
+  MACROBLOCKD *const xd = &x->e_mbd;
+  int16_t src_diff2[256];
+  TX_TYPE tx_type = get_tx_type(plane, xd);
+  if (tx_type == DCT_DCT) {
+    vp9_highbd_fdct16x16(src_diff, coeff, diff_stride);
+  } else if (tx_type == FLIPADST_DCT) {
+    copy_flipud(src_diff, diff_stride, 16, src_diff2, 16);
+    vp9_highbd_fht16x16(src_diff2, coeff, 16, ADST_DCT);
+  } else if (tx_type == DCT_FLIPADST) {
+    copy_fliplr(src_diff, diff_stride, 16, src_diff2, 16);
+    vp9_highbd_fht16x16(src_diff2, coeff, 16, DCT_ADST);
+  } else if (tx_type == FLIPADST_FLIPADST) {
+    copy_fliplrud(src_diff, diff_stride, 16, src_diff2, 16);
+    vp9_highbd_fht16x16(src_diff2, coeff, 16, ADST_ADST);
+  } else if (tx_type == ADST_FLIPADST) {
+    copy_fliplr(src_diff, diff_stride, 16, src_diff2, 16);
+    vp9_highbd_fht16x16(src_diff2, coeff, 16, ADST_ADST);
+  } else if (tx_type == FLIPADST_ADST) {
+    copy_flipud(src_diff, diff_stride, 16, src_diff2, 16);
+    vp9_highbd_fht16x16(src_diff2, coeff, 16, ADST_ADST);
+  } else {
+    vp9_highbd_fht16x16(src_diff, coeff, diff_stride, tx_type);
+  }
+}
+
+static void highbd_forw_tx8x8(MACROBLOCK *x, int plane,
+                              const int16_t *src_diff, int diff_stride,
+                              tran_low_t *const coeff) {
+  MACROBLOCKD *const xd = &x->e_mbd;
+  int16_t src_diff2[64];
+  TX_TYPE tx_type = get_tx_type(plane, xd);
+  if (tx_type == DCT_DCT) {
+    vp9_highbd_fdct8x8(src_diff, coeff, diff_stride);
+  } else if (tx_type == FLIPADST_DCT) {
+    copy_flipud(src_diff, diff_stride, 8, src_diff2, 8);
+    vp9_highbd_fht8x8(src_diff2, coeff, 8, ADST_DCT);
+  } else if (tx_type == DCT_FLIPADST) {
+    copy_fliplr(src_diff, diff_stride, 8, src_diff2, 8);
+    vp9_highbd_fht8x8(src_diff2, coeff, 8, DCT_ADST);
+  } else if (tx_type == FLIPADST_FLIPADST) {
+    copy_fliplrud(src_diff, diff_stride, 8, src_diff2, 8);
+    vp9_highbd_fht8x8(src_diff2, coeff, 8, ADST_ADST);
+  } else if (tx_type == ADST_FLIPADST) {
+    copy_fliplr(src_diff, diff_stride, 8, src_diff2, 8);
+    vp9_highbd_fht8x8(src_diff2, coeff, 8, ADST_ADST);
+  } else if (tx_type == FLIPADST_ADST) {
+    copy_flipud(src_diff, diff_stride, 8, src_diff2, 8);
+    vp9_highbd_fht8x8(src_diff2, coeff, 8, ADST_ADST);
+  } else {
+    vp9_highbd_fht8x8(src_diff, coeff, diff_stride, tx_type);
+  }
+}
+
+static void highbd_forw_tx4x4(MACROBLOCK *x, int plane, int block,
+                              const int16_t *src_diff, int diff_stride,
+                              tran_low_t *const coeff) {
+  MACROBLOCKD *const xd = &x->e_mbd;
+  int16_t src_diff2[16];
+  TX_TYPE tx_type = get_tx_type_4x4(plane, xd, block);
+  if (tx_type == DCT_DCT) {
+    x->fwd_txm4x4(src_diff, coeff, diff_stride);
+  } else if (tx_type == FLIPADST_DCT) {
+    copy_flipud(src_diff, diff_stride, 4, src_diff2, 4);
+    vp9_highbd_fht4x4(src_diff2, coeff, 4, ADST_DCT);
+  } else if (tx_type == DCT_FLIPADST) {
+    copy_fliplr(src_diff, diff_stride, 4, src_diff2, 4);
+    vp9_highbd_fht4x4(src_diff2, coeff, 4, DCT_ADST);
+  } else if (tx_type == FLIPADST_FLIPADST) {
+    copy_fliplrud(src_diff, diff_stride, 4, src_diff2, 4);
+    vp9_highbd_fht4x4(src_diff2, coeff, 4, ADST_ADST);
+  } else if (tx_type == ADST_FLIPADST) {
+    copy_fliplr(src_diff, diff_stride, 4, src_diff2, 4);
+    vp9_highbd_fht4x4(src_diff2, coeff, 4, ADST_ADST);
+  } else if (tx_type == FLIPADST_ADST) {
+    copy_flipud(src_diff, diff_stride, 4, src_diff2, 4);
+    vp9_highbd_fht4x4(src_diff2, coeff, 4, ADST_ADST);
+  } else {
+    vp9_highbd_fht4x4(src_diff, coeff, diff_stride, tx_type);
+  }
+}
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+#endif  // CONFIG_EXT_TX
+
+#if CONFIG_NEW_QUANT
+void vp9_xform_quant_nuq(MACROBLOCK *x, int plane, int block,
+                         BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
+  MACROBLOCKD *const xd = &x->e_mbd;
+  const struct macroblock_plane *const p = &x->plane[plane];
+  const struct macroblockd_plane *const pd = &xd->plane[plane];
+  const scan_order *const scan_order = &vp9_default_scan_orders[tx_size];
+  tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
+  tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
+  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
+  uint16_t *const eob = &p->eobs[block];
+  const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
+  int i, j;
+  const int16_t *src_diff;
+#if CONFIG_TX_SKIP
+  MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi;
+  int shift = mbmi->tx_skip_shift;
+#endif
+  const uint8_t* band = get_band_translate(tx_size);
+
+  txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
+  src_diff = &p->src_diff[4 * (j * diff_stride + i)];
+
+#if CONFIG_TX_SKIP
+  if (mbmi->tx_skip[plane != 0]) {
+    switch (tx_size) {
+#if CONFIG_TX64X64
+      case TX_64X64:
+        vp9_tx_identity(src_diff, coeff, diff_stride, 64, shift);
+        vp9_quantize_64x64_nuq(coeff, 4096, x->skip_block,
+                               p->quant, p->quant_shift, pd->dequant,
+                               (const cumbins_type_nuq *)p->cumbins_nuq,
+                               (const dequant_val_type_nuq *)
+                                   pd->dequant_val_nuq,
+                               qcoeff, dqcoeff, eob,
+                               scan_order->scan, band);
+        break;
+#endif  // CONFIG_TX64X64
+      case TX_32X32:
+        vp9_tx_identity(src_diff, coeff, diff_stride, 32, shift);
+        vp9_quantize_32x32_nuq(coeff, 1024, x->skip_block,
+                               p->quant, p->quant_shift, pd->dequant,
+                               (const cumbins_type_nuq *)p->cumbins_nuq,
+                               (const dequant_val_type_nuq *)
+                                   pd->dequant_val_nuq,
+                               qcoeff, dqcoeff, eob,
+                               scan_order->scan, band);
+        break;
+      case TX_16X16:
+        vp9_tx_identity(src_diff, coeff, diff_stride, 16, shift);
+        vp9_quantize_nuq(coeff, 256, x->skip_block,
+                         p->quant, p->quant_shift, pd->dequant,
+                         (const cumbins_type_nuq *)p->cumbins_nuq,
+                         (const dequant_val_type_nuq *)pd->dequant_val_nuq,
+                         qcoeff, dqcoeff, eob,
+                         scan_order->scan, band);
+        break;
+      case TX_8X8:
+        vp9_tx_identity(src_diff, coeff, diff_stride, 8, shift);
+        vp9_quantize_nuq(coeff, 64, x->skip_block,
+                         p->quant, p->quant_shift, pd->dequant,
+                         (const cumbins_type_nuq *)p->cumbins_nuq,
+                         (const dequant_val_type_nuq *)pd->dequant_val_nuq,
+                         qcoeff, dqcoeff, eob,
+                         scan_order->scan, band);
+        break;
+      case TX_4X4:
+        vp9_tx_identity(src_diff, coeff, diff_stride, 4, shift);
+        vp9_quantize_nuq(coeff, 16, x->skip_block,
+                         p->quant, p->quant_shift, pd->dequant,
+                         (const cumbins_type_nuq *)p->cumbins_nuq,
+                         (const dequant_val_type_nuq *) pd->dequant_val_nuq,
+                         qcoeff, dqcoeff, eob,
+                         scan_order->scan, band);
+      break;
+      default:
+        assert(0);
+        break;
+    }
+    return;
+  }
+#endif  // CONFIG_TX_SKIP
+
+#if CONFIG_VP9_HIGHBITDEPTH
+  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+    switch (tx_size) {
+#if CONFIG_TX64X64
+      case TX_64X64:
+        vp9_highbd_fdct64x64(src_diff, coeff, diff_stride);
+        vp9_highbd_quantize_64x64_nuq(coeff, 4096, x->skip_block,
+                                      p->quant, p->quant_shift, pd->dequant,
+                                      (const cumbins_type_nuq *)p->cumbins_nuq,
+                                      (const dequant_val_type_nuq *)
+                                          pd->dequant_val_nuq,
+                                      qcoeff, dqcoeff, eob,
+                                      scan_order->scan, band);
+        break;
+#endif  // CONFIG_TX64X64
+      case TX_32X32:
+        highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
+        vp9_highbd_quantize_32x32_nuq(coeff, 1024, x->skip_block,
+                                      p->quant, p->quant_shift, pd->dequant,
+                                      (const cumbins_type_nuq *)p->cumbins_nuq,
+                                      (const dequant_val_type_nuq *)
+                                          pd->dequant_val_nuq,
+                                      qcoeff, dqcoeff, eob,
+                                      scan_order->scan, band);
+        break;
+      case TX_16X16:
+#if CONFIG_EXT_TX
+        highbd_forw_tx16x16(x, plane, src_diff, diff_stride, coeff);
+#else
+        vp9_highbd_fdct16x16(src_diff, coeff, diff_stride);
+#endif
+        vp9_highbd_quantize_nuq(coeff, 256, x->skip_block,
+                                p->quant, p->quant_shift, pd->dequant,
+                                (const cumbins_type_nuq *)p->cumbins_nuq,
+                                (const dequant_val_type_nuq *)
+                                    pd->dequant_val_nuq,
+                                qcoeff, dqcoeff, eob,
+                                scan_order->scan, band);
+        break;
+      case TX_8X8:
+#if CONFIG_EXT_TX
+        highbd_forw_tx8x8(x, plane, src_diff, diff_stride, coeff);
+#else
+        vp9_highbd_fdct8x8(src_diff, coeff, diff_stride);
+#endif
+        vp9_highbd_quantize_nuq(coeff, 64, x->skip_block,
+                                p->quant, p->quant_shift, pd->dequant,
+                                (const cumbins_type_nuq *)p->cumbins_nuq,
+                                (const dequant_val_type_nuq *)
+                                    pd->dequant_val_nuq,
+                                qcoeff, dqcoeff, eob,
+                                scan_order->scan, band);
+        break;
+      case TX_4X4:
+#if CONFIG_EXT_TX
+        highbd_forw_tx4x4(x, plane, block, src_diff, diff_stride, coeff);
+#else
+        x->fwd_txm4x4(src_diff, coeff, diff_stride);
+#endif
+        vp9_highbd_quantize_nuq(coeff, 16, x->skip_block,
+                                p->quant, p->quant_shift, pd->dequant,
+                                (const cumbins_type_nuq *)p->cumbins_nuq,
+                                (const dequant_val_type_nuq *)
+                                    pd->dequant_val_nuq,
+                                qcoeff, dqcoeff, eob,
+                                scan_order->scan, band);
+        break;
+      default:
+        assert(0);
+    }
+    return;
+  }
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+  switch (tx_size) {
+#if CONFIG_TX64X64
+    case TX_64X64:
+      vp9_fdct64x64(src_diff, coeff, diff_stride);
+      vp9_quantize_64x64_nuq(coeff, 4096, x->skip_block,
+                             p->quant, p->quant_shift, pd->dequant,
+                             (const cumbins_type_nuq *)p->cumbins_nuq,
+                             (const dequant_val_type_nuq *)pd->dequant_val_nuq,
+                             qcoeff, dqcoeff, eob,
+                             scan_order->scan, band);
+      break;
+#endif  // CONFIG_TX64X64
+    case TX_32X32:
+      fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
+      vp9_quantize_32x32_nuq(coeff, 1024, x->skip_block,
+                             p->quant, p->quant_shift, pd->dequant,
+                             (const cumbins_type_nuq *)p->cumbins_nuq,
+                             (const dequant_val_type_nuq *)pd->dequant_val_nuq,
+                             qcoeff, dqcoeff, eob,
+                             scan_order->scan, band);
+      break;
+    case TX_16X16:
+#if CONFIG_EXT_TX
+      forw_tx16x16(x, plane, src_diff, diff_stride, coeff);
+#else
+      vp9_fdct16x16(src_diff, coeff, diff_stride);
+#endif
+      vp9_quantize_nuq(coeff, 256, x->skip_block,
+                       p->quant, p->quant_shift, pd->dequant,
+                       (const cumbins_type_nuq *)p->cumbins_nuq,
+                       (const dequant_val_type_nuq *)pd->dequant_val_nuq,
+                       qcoeff, dqcoeff, eob,
+                       scan_order->scan, band);
+      break;
+    case TX_8X8:
+#if CONFIG_EXT_TX
+      forw_tx8x8(x, plane, src_diff, diff_stride, coeff);
+#else
+      vp9_fdct8x8(src_diff, coeff, diff_stride);
+#endif
+      vp9_quantize_nuq(coeff, 64, x->skip_block,
+                       p->quant, p->quant_shift, pd->dequant,
+                       (const cumbins_type_nuq *)p->cumbins_nuq,
+                       (const dequant_val_type_nuq *)pd->dequant_val_nuq,
+                       qcoeff, dqcoeff, eob,
+                       scan_order->scan, band);
+      break;
+    case TX_4X4:
+#if CONFIG_EXT_TX
+      forw_tx4x4(x, plane, block, src_diff, diff_stride, coeff);
+#else
+      x->fwd_txm4x4(src_diff, coeff, diff_stride);
+#endif
+      vp9_quantize_nuq(coeff, 16, x->skip_block,
+                       p->quant, p->quant_shift, pd->dequant,
+                       (const cumbins_type_nuq *)p->cumbins_nuq,
+                       (const dequant_val_type_nuq *)pd->dequant_val_nuq,
+                       qcoeff, dqcoeff, eob,
+                       scan_order->scan, band);
+      break;
+    default:
+      assert(0);
+      break;
+  }
+}
+
+void vp9_xform_quant_fp_nuq(MACROBLOCK *x, int plane, int block,
+                            BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
+  MACROBLOCKD *const xd = &x->e_mbd;
+  const struct macroblock_plane *const p = &x->plane[plane];
+  const struct macroblockd_plane *const pd = &xd->plane[plane];
+  const scan_order *const scan_order = &vp9_default_scan_orders[tx_size];
+  tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
+  tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
+  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
+  uint16_t *const eob = &p->eobs[block];
+  const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
+  int i, j;
+  const int16_t *src_diff;
+#if CONFIG_TX_SKIP
+  MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi;
+  int shift = mbmi->tx_skip_shift;
+#endif
+  const uint8_t* band = get_band_translate(tx_size);
+
+  txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
+  src_diff = &p->src_diff[4 * (j * diff_stride + i)];
+
+#if CONFIG_TX_SKIP
+  if (mbmi->tx_skip[plane != 0]) {
+    switch (tx_size) {
+#if CONFIG_TX64X64
+      case TX_64X64:
+        vp9_tx_identity(src_diff, coeff, diff_stride, 64, shift);
+        vp9_quantize_64x64_fp_nuq(coeff, 4096, x->skip_block,
+                                  p->quant_fp, pd->dequant,
+                                  (const cumbins_type_nuq *)p->cumbins_nuq,
+                                  (const dequant_val_type_nuq *)
+                                      pd->dequant_val_nuq,
+                                  qcoeff, dqcoeff, eob,
+                                  scan_order->scan, band);
+        break;
+#endif  // CONFIG_TX64X64
+      case TX_32X32:
+        vp9_tx_identity(src_diff, coeff, diff_stride, 32, shift);
+        vp9_quantize_32x32_fp_nuq(coeff, 1024, x->skip_block,
+                                  p->quant_fp, pd->dequant,
+                                  (const cumbins_type_nuq *)p->cumbins_nuq,
+                                  (const dequant_val_type_nuq *)
+                                      pd->dequant_val_nuq,
+                                  qcoeff, dqcoeff, eob,
+                                  scan_order->scan, band);
+        break;
+      case TX_16X16:
+        vp9_tx_identity(src_diff, coeff, diff_stride, 16, shift);
+        vp9_quantize_fp_nuq(coeff, 256, x->skip_block,
+                            p->quant_fp, pd->dequant,
+                            (const cumbins_type_nuq *)p->cumbins_nuq,
+                            (const dequant_val_type_nuq *)pd->dequant_val_nuq,
+                            qcoeff, dqcoeff, eob,
+                            scan_order->scan, band);
+        break;
+      case TX_8X8:
+        vp9_tx_identity(src_diff, coeff, diff_stride, 8, shift);
+        vp9_quantize_fp_nuq(coeff, 64, x->skip_block,
+                            p->quant_fp, pd->dequant,
+                            (const cumbins_type_nuq *)p->cumbins_nuq,
+                            (const dequant_val_type_nuq *)pd->dequant_val_nuq,
+                            qcoeff, dqcoeff, eob,
+                            scan_order->scan, band);
+        break;
+      case TX_4X4:
+        vp9_tx_identity(src_diff, coeff, diff_stride, 4, shift);
+        vp9_quantize_fp_nuq(coeff, 16, x->skip_block,
+                            p->quant_fp, pd->dequant,
+                            (const cumbins_type_nuq *)p->cumbins_nuq,
+                            (const dequant_val_type_nuq *)pd->dequant_val_nuq,
+                            qcoeff, dqcoeff, eob,
+                            scan_order->scan, band);
+        break;
+      default:
+        assert(0);
+        break;
+    }
+    return;
+  }
+#endif  // CONFIG_TX_SKIP
+
+#if CONFIG_VP9_HIGHBITDEPTH
+  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+    switch (tx_size) {
+#if CONFIG_TX64X64
+      case TX_64X64:
+        vp9_highbd_fdct64x64(src_diff, coeff, diff_stride);
+        vp9_highbd_quantize_64x64_fp_nuq(coeff, 4096, x->skip_block,
+                                         p->quant_fp, pd->dequant,
+                                         (const cumbins_type_nuq *)
+                                             p->cumbins_nuq,
+                                         (const dequant_val_type_nuq *)
+                                             pd->dequant_val_nuq,
+                                         qcoeff, dqcoeff, eob,
+                                         scan_order->scan, band);
+        break;
+#endif  // CONFIG_TX64X64
+      case TX_32X32:
+        highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
+        vp9_highbd_quantize_32x32_fp_nuq(coeff, 1024, x->skip_block,
+                                         p->quant_fp, pd->dequant,
+                                         (const cumbins_type_nuq *)
+                                             p->cumbins_nuq,
+                                         (const dequant_val_type_nuq *)
+                                             pd->dequant_val_nuq,
+                                         qcoeff, dqcoeff, eob,
+                                         scan_order->scan, band);
+        break;
+      case TX_16X16:
+#if CONFIG_EXT_TX
+        highbd_forw_tx16x16(x, plane, src_diff, diff_stride, coeff);
+#else
+        vp9_highbd_fdct16x16(src_diff, coeff, diff_stride);
+#endif
+        vp9_highbd_quantize_fp_nuq(coeff, 256, x->skip_block,
+                                   p->quant_fp, pd->dequant,
+                                   (const cumbins_type_nuq *)p->cumbins_nuq,
+                                   (const dequant_val_type_nuq *)
+                                       pd->dequant_val_nuq,
+                                   qcoeff, dqcoeff, eob,
+                                   scan_order->scan, band);
+        break;
+      case TX_8X8:
+#if CONFIG_EXT_TX
+        highbd_forw_tx8x8(x, plane, src_diff, diff_stride, coeff);
+#else
+        vp9_highbd_fdct8x8(src_diff, coeff, diff_stride);
+#endif
+        vp9_highbd_quantize_fp_nuq(coeff, 64, x->skip_block,
+                                   p->quant_fp, pd->dequant,
+                                   (const cumbins_type_nuq *)p->cumbins_nuq,
+                                   (const dequant_val_type_nuq *)
+                                       pd->dequant_val_nuq,
+                                   qcoeff, dqcoeff, eob,
+                                   scan_order->scan, band);
+        break;
+      case TX_4X4:
+#if CONFIG_EXT_TX
+        highbd_forw_tx4x4(x, plane, block, src_diff, diff_stride, coeff);
+#else
+        x->fwd_txm4x4(src_diff, coeff, diff_stride);
+#endif
+        vp9_highbd_quantize_fp_nuq(coeff, 16, x->skip_block,
+                                   p->quant_fp, pd->dequant,
+                                   (const cumbins_type_nuq *)p->cumbins_nuq,
+                                   (const dequant_val_type_nuq *)
+                                       pd->dequant_val_nuq,
+                                   qcoeff, dqcoeff, eob,
+                                   scan_order->scan, band);
+        break;
+      default:
+        assert(0);
+    }
+    return;
+  }
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+  switch (tx_size) {
+#if CONFIG_TX64X64
+    case TX_64X64:
+      vp9_fdct64x64(src_diff, coeff, diff_stride);
+      vp9_quantize_64x64_fp_nuq(coeff, 4096, x->skip_block,
+                                p->quant_fp, pd->dequant,
+                                (const cumbins_type_nuq *)p->cumbins_nuq,
+                                (const dequant_val_type_nuq *)
+                                    pd->dequant_val_nuq,
+                                qcoeff, dqcoeff, eob,
+                                scan_order->scan, band);
+      break;
+#endif  // CONFIG_TX64X64
+    case TX_32X32:
+      fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
+      vp9_quantize_32x32_fp_nuq(coeff, 1024, x->skip_block,
+                                p->quant_fp, pd->dequant,
+                                (const cumbins_type_nuq *)p->cumbins_nuq,
+                                (const dequant_val_type_nuq *)
+                                    pd->dequant_val_nuq,
+                                qcoeff, dqcoeff, eob,
+                                scan_order->scan, band);
+      break;
+    case TX_16X16:
+#if CONFIG_EXT_TX
+      forw_tx16x16(x, plane, src_diff, diff_stride, coeff);
+#else
+      vp9_fdct16x16(src_diff, coeff, diff_stride);
+#endif
+      vp9_quantize_fp_nuq(coeff, 256, x->skip_block,
+                          p->quant_fp, pd->dequant,
+                          (const cumbins_type_nuq *)p->cumbins_nuq,
+                          (const dequant_val_type_nuq *)pd->dequant_val_nuq,
+                          qcoeff, dqcoeff, eob,
+                          scan_order->scan, band);
+      break;
+    case TX_8X8:
+#if CONFIG_EXT_TX
+      forw_tx8x8(x, plane, src_diff, diff_stride, coeff);
+#else
+      vp9_fdct8x8(src_diff, coeff, diff_stride);
+#endif
+      vp9_quantize_fp_nuq(coeff, 64, x->skip_block,
+                          p->quant_fp, pd->dequant,
+                          (const cumbins_type_nuq *)p->cumbins_nuq,
+                          (const dequant_val_type_nuq *)pd->dequant_val_nuq,
+                          qcoeff, dqcoeff, eob,
+                          scan_order->scan, band);
+      break;
+    case TX_4X4:
+#if CONFIG_EXT_TX
+      forw_tx4x4(x, plane, block, src_diff, diff_stride, coeff);
+#else
+      x->fwd_txm4x4(src_diff, coeff, diff_stride);
+#endif
+      vp9_quantize_fp_nuq(coeff, 16, x->skip_block,
+                          p->quant_fp, pd->dequant,
+                          (const cumbins_type_nuq *)p->cumbins_nuq,
+                          (const dequant_val_type_nuq *)pd->dequant_val_nuq,
+                          qcoeff, dqcoeff, eob,
+                          scan_order->scan, band);
+      break;
+    default:
+      assert(0);
+      break;
+  }
+}
+
+void vp9_xform_quant_dc_nuq(MACROBLOCK *x, int plane, int block,
+                            BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
+  MACROBLOCKD *const xd = &x->e_mbd;
+  const struct macroblock_plane *const p = &x->plane[plane];
+  const struct macroblockd_plane *const pd = &xd->plane[plane];
+  tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
+  tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
+  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
+  uint16_t *const eob = &p->eobs[block];
+  const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
+  int i, j;
+  const int16_t *src_diff;
+#if CONFIG_TX_SKIP
+  MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi;
+  int shift = mbmi->tx_skip_shift;
+#endif
+
+  txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
+  src_diff = &p->src_diff[4 * (j * diff_stride + i)];
+
+#if CONFIG_TX_SKIP
+  if (mbmi->tx_skip[plane != 0]) {
+    switch (tx_size) {
+#if CONFIG_TX64X64
+      case TX_64X64:
+        vp9_tx_identity(src_diff, coeff, diff_stride, 64, shift);
+        vp9_quantize_dc_64x64_nuq(coeff, x->skip_block,
+                                  p->quant[0], p->quant_shift[0],
+                                  pd->dequant[0],
+                                  p->cumbins_nuq[0], pd->dequant_val_nuq[0],
+                                  qcoeff, dqcoeff, eob);
+        break;
+#endif  // CONFIG_TX64X64
+      case TX_32X32:
+        vp9_tx_identity(src_diff, coeff, diff_stride, 32, shift);
+        vp9_quantize_dc_32x32_nuq(coeff, x->skip_block,
+                                  p->quant[0], p->quant_shift[0],
+                                  pd->dequant[0],
+                                  p->cumbins_nuq[0], pd->dequant_val_nuq[0],
+                                  qcoeff, dqcoeff, eob);
+        break;
+      case TX_16X16:
+        vp9_tx_identity(src_diff, coeff, diff_stride, 16, shift);
+        vp9_quantize_dc_nuq(coeff, x->skip_block,
+                            p->quant[0], p->quant_shift[0], pd->dequant[0],
+                            p->cumbins_nuq[0], pd->dequant_val_nuq[0],
+                            qcoeff, dqcoeff, eob);
+        break;
+      case TX_8X8:
+        vp9_tx_identity(src_diff, coeff, diff_stride, 8, shift);
+        vp9_quantize_dc_nuq(coeff, x->skip_block,
+                            p->quant[0], p->quant_shift[0], pd->dequant[0],
+                            p->cumbins_nuq[0], pd->dequant_val_nuq[0],
+                            qcoeff, dqcoeff, eob);
+        break;
+      case TX_4X4:
+        vp9_tx_identity(src_diff, coeff, diff_stride, 4, shift);
+        vp9_quantize_dc_nuq(coeff, x->skip_block,
+                            p->quant[0], p->quant_shift[0], pd->dequant[0],
+                            p->cumbins_nuq[0], pd->dequant_val_nuq[0],
+                            qcoeff, dqcoeff, eob);
+        break;
+      default:
+        assert(0);
+        break;
+    }
+    return;
+  }
+#endif  // CONFIG_TX_SKIP
+
+#if CONFIG_VP9_HIGHBITDEPTH
+  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+    switch (tx_size) {
+#if CONFIG_TX64X64
+      case TX_64X64:
+        vp9_highbd_fdct64x64_1(src_diff, coeff, diff_stride);
+        vp9_highbd_quantize_dc_64x64_nuq(coeff, x->skip_block,
+                                         p->quant[0], p->quant_shift[0],
+                                         pd->dequant[0],
+                                         p->cumbins_nuq[0],
+                                         pd->dequant_val_nuq[0],
+                                         qcoeff, dqcoeff, eob);
+        break;
+#endif  // CONFIG_TX64X64
+      case TX_32X32:
+        vp9_highbd_fdct32x32_1(src_diff, coeff, diff_stride);
+        vp9_highbd_quantize_dc_32x32_nuq(coeff, x->skip_block,
+                                         p->quant[0], p->quant_shift[0],
+                                         pd->dequant[0],
+                                         p->cumbins_nuq[0],
+                                         pd->dequant_val_nuq[0],
+                                         qcoeff, dqcoeff, eob);
+        break;
+      case TX_16X16:
+#if CONFIG_EXT_TX
+        highbd_forw_tx16x16(x, plane, src_diff, diff_stride, coeff);
+#else
+        vp9_highbd_fdct16x16_1(src_diff, coeff, diff_stride);
+#endif
+        vp9_highbd_quantize_dc_nuq(coeff, x->skip_block,
+                                   p->quant[0], p->quant_shift[0],
+                                   pd->dequant[0],
+                                   p->cumbins_nuq[0],
+                                   pd->dequant_val_nuq[0],
+                                   qcoeff, dqcoeff, eob);
+        break;
+      case TX_8X8:
+#if CONFIG_EXT_TX
+        highbd_forw_tx8x8(x, plane, src_diff, diff_stride, coeff);
+#else
+        vp9_highbd_fdct8x8_1(src_diff, coeff, diff_stride);
+#endif
+        vp9_highbd_quantize_dc_nuq(coeff, x->skip_block,
+                                   p->quant[0], p->quant_shift[0],
+                                   pd->dequant[0],
+                                   p->cumbins_nuq[0],
+                                   pd->dequant_val_nuq[0],
+                                   qcoeff, dqcoeff, eob);
+        break;
+      case TX_4X4:
+#if CONFIG_EXT_TX
+        highbd_forw_tx4x4(x, plane, block, src_diff, diff_stride, coeff);
+#else
+        x->fwd_txm4x4(src_diff, coeff, diff_stride);
+#endif
+        vp9_highbd_quantize_dc_nuq(coeff, x->skip_block,
+                                   p->quant[0], p->quant_shift[0],
+                                   pd->dequant[0],
+                                   p->cumbins_nuq[0],
+                                   pd->dequant_val_nuq[0],
+                                   qcoeff, dqcoeff, eob);
+        break;
+      default:
+        assert(0);
+    }
+    return;
+  }
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+  switch (tx_size) {
+#if CONFIG_TX64X64
+    case TX_64X64:
+      vp9_fdct64x64_1(src_diff, coeff, diff_stride);
+      vp9_quantize_dc_64x64_nuq(coeff, x->skip_block,
+                                p->quant[0], p->quant_shift[0], pd->dequant[0],
+                                p->cumbins_nuq[0], pd->dequant_val_nuq[0],
+                                qcoeff, dqcoeff, eob);
+      break;
+#endif  // CONFIG_TX64X64
+    case TX_32X32:
+      vp9_fdct32x32_1(src_diff, coeff, diff_stride);
+      vp9_quantize_dc_32x32_nuq(coeff, x->skip_block,
+                                p->quant[0], p->quant_shift[0], pd->dequant[0],
+                                p->cumbins_nuq[0], pd->dequant_val_nuq[0],
+                                qcoeff, dqcoeff, eob);
+      break;
+    case TX_16X16:
+#if CONFIG_EXT_TX
+      forw_tx16x16(x, plane, src_diff, diff_stride, coeff);
+#else
+      vp9_fdct16x16_1(src_diff, coeff, diff_stride);
+#endif
+      vp9_quantize_dc_nuq(coeff, x->skip_block,
+                          p->quant[0], p->quant_shift[0], pd->dequant[0],
+                          p->cumbins_nuq[0], pd->dequant_val_nuq[0],
+                          qcoeff, dqcoeff, eob);
+      break;
+    case TX_8X8:
+#if CONFIG_EXT_TX
+      forw_tx8x8(x, plane, src_diff, diff_stride, coeff);
+#else
+      vp9_fdct8x8_1(src_diff, coeff, diff_stride);
+#endif
+      vp9_quantize_dc_nuq(coeff, x->skip_block,
+                          p->quant[0], p->quant_shift[0], pd->dequant[0],
+                          p->cumbins_nuq[0], pd->dequant_val_nuq[0],
+                          qcoeff, dqcoeff, eob);
+      break;
+    case TX_4X4:
+#if CONFIG_EXT_TX
+      forw_tx4x4(x, plane, block, src_diff, diff_stride, coeff);
+#else
+      x->fwd_txm4x4(src_diff, coeff, diff_stride);
+#endif
+      vp9_quantize_dc_nuq(coeff, x->skip_block,
+                          p->quant[0], p->quant_shift[0], pd->dequant[0],
+                          p->cumbins_nuq[0], pd->dequant_val_nuq[0],
+                          qcoeff, dqcoeff, eob);
+      break;
+    default:
+      assert(0);
+      break;
+  }
+}
+
+void vp9_xform_quant_dc_fp_nuq(MACROBLOCK *x, int plane, int block,
+                               BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
    MACROBLOCKD *const xd = &x->e_mbd;
-  int16_t src_diff2[256];
-  TX_TYPE tx_type = get_tx_type(plane, xd);
-  if (tx_type == DCT_DCT) {
-    vp9_highbd_fdct16x16(src_diff, coeff, diff_stride);
-  } else if (tx_type == FLIPADST_DCT) {
-    copy_flipud(src_diff, diff_stride, 16, src_diff2, 16);
-    vp9_highbd_fht16x16(src_diff2, coeff, 16, ADST_DCT);
-  } else if (tx_type == DCT_FLIPADST) {
-    copy_fliplr(src_diff, diff_stride, 16, src_diff2, 16);
-    vp9_highbd_fht16x16(src_diff2, coeff, 16, DCT_ADST);
-  } else if (tx_type == FLIPADST_FLIPADST) {
-    copy_fliplrud(src_diff, diff_stride, 16, src_diff2, 16);
-    vp9_highbd_fht16x16(src_diff2, coeff, 16, ADST_ADST);
-  } else if (tx_type == ADST_FLIPADST) {
-    copy_fliplr(src_diff, diff_stride, 16, src_diff2, 16);
-    vp9_highbd_fht16x16(src_diff2, coeff, 16, ADST_ADST);
-  } else if (tx_type == FLIPADST_ADST) {
-    copy_flipud(src_diff, diff_stride, 16, src_diff2, 16);
-    vp9_highbd_fht16x16(src_diff2, coeff, 16, ADST_ADST);
-  } else {
-    vp9_highbd_fht16x16(src_diff, coeff, diff_stride, tx_type);
+  const struct macroblock_plane *const p = &x->plane[plane];
+  const struct macroblockd_plane *const pd = &xd->plane[plane];
+  tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
+  tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
+  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
+  uint16_t *const eob = &p->eobs[block];
+  const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
+  int i, j;
+  const int16_t *src_diff;
+#if CONFIG_TX_SKIP
+  MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi;
+  int shift = mbmi->tx_skip_shift;
+#endif
+
+  txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
+  src_diff = &p->src_diff[4 * (j * diff_stride + i)];
+
+#if CONFIG_TX_SKIP
+  if (mbmi->tx_skip[plane != 0]) {
+    switch (tx_size) {
+#if CONFIG_TX64X64
+      case TX_64X64:
+        vp9_tx_identity(src_diff, coeff, diff_stride, 64, shift);
+        vp9_quantize_dc_64x64_fp_nuq(coeff, x->skip_block,
+                                     p->quant_fp[0], pd->dequant[0],
+                                     p->cumbins_nuq[0], pd->dequant_val_nuq[0],
+                                     qcoeff, dqcoeff, eob);
+        break;
+#endif  // CONFIG_TX64X64
+      case TX_32X32:
+        vp9_tx_identity(src_diff, coeff, diff_stride, 32, shift);
+        vp9_quantize_dc_32x32_fp_nuq(coeff, x->skip_block,
+                                     p->quant_fp[0], pd->dequant[0],
+                                     p->cumbins_nuq[0], pd->dequant_val_nuq[0],
+                                     qcoeff, dqcoeff, eob);
+        break;
+      case TX_16X16:
+        vp9_tx_identity(src_diff, coeff, diff_stride, 16, shift);
+        vp9_quantize_dc_fp_nuq(coeff, x->skip_block,
+                               p->quant_fp[0], pd->dequant[0],
+                               p->cumbins_nuq[0], pd->dequant_val_nuq[0],
+                               qcoeff, dqcoeff, eob);
+        break;
+      case TX_8X8:
+        vp9_tx_identity(src_diff, coeff, diff_stride, 8, shift);
+        vp9_quantize_dc_fp_nuq(coeff, x->skip_block,
+                               p->quant_fp[0], pd->dequant[0],
+                               p->cumbins_nuq[0], pd->dequant_val_nuq[0],
+                               qcoeff, dqcoeff, eob);
+        break;
+      case TX_4X4:
+        vp9_tx_identity(src_diff, coeff, diff_stride, 4, shift);
+        vp9_quantize_dc_fp_nuq(coeff, x->skip_block,
+                               p->quant_fp[0], pd->dequant[0],
+                               p->cumbins_nuq[0], pd->dequant_val_nuq[0],
+                               qcoeff, dqcoeff, eob);
+        break;
+      default:
+        assert(0);
+        break;
+    }
+    return;
    }
-}
+#endif  // CONFIG_TX_SKIP
  
-static void highbd_forw_tx8x8(MACROBLOCK *x, int plane,
-                              const int16_t *src_diff, int diff_stride,
-                              tran_low_t *const coeff) {
-  MACROBLOCKD *const xd = &x->e_mbd;
-  int16_t src_diff2[64];
-  TX_TYPE tx_type = get_tx_type(plane, xd);
-  if (tx_type == DCT_DCT) {
-    vp9_highbd_fdct8x8(src_diff, coeff, diff_stride);
-  } else if (tx_type == FLIPADST_DCT) {
-    copy_flipud(src_diff, diff_stride, 8, src_diff2, 8);
-    vp9_highbd_fht8x8(src_diff2, coeff, 8, ADST_DCT);
-  } else if (tx_type == DCT_FLIPADST) {
-    copy_fliplr(src_diff, diff_stride, 8, src_diff2, 8);
-    vp9_highbd_fht8x8(src_diff2, coeff, 8, DCT_ADST);
-  } else if (tx_type == FLIPADST_FLIPADST) {
-    copy_fliplrud(src_diff, diff_stride, 8, src_diff2, 8);
-    vp9_highbd_fht8x8(src_diff2, coeff, 8, ADST_ADST);
-  } else if (tx_type == ADST_FLIPADST) {
-    copy_fliplr(src_diff, diff_stride, 8, src_diff2, 8);
-    vp9_highbd_fht8x8(src_diff2, coeff, 8, ADST_ADST);
-  } else if (tx_type == FLIPADST_ADST) {
-    copy_flipud(src_diff, diff_stride, 8, src_diff2, 8);
-    vp9_highbd_fht8x8(src_diff2, coeff, 8, ADST_ADST);
-  } else {
-    vp9_highbd_fht8x8(src_diff, coeff, diff_stride, tx_type);
+#if CONFIG_VP9_HIGHBITDEPTH
+  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+    switch (tx_size) {
+#if CONFIG_TX64X64
+      case TX_64X64:
+        vp9_highbd_fdct64x64_1(src_diff, coeff, diff_stride);
+        vp9_highbd_quantize_dc_64x64_fp_nuq(coeff, x->skip_block,
+                                            p->quant_fp[0], pd->dequant[0],
+                                            p->cumbins_nuq[0],
+                                            pd->dequant_val_nuq[0],
+                                            qcoeff, dqcoeff, eob);
+        break;
+#endif  // CONFIG_TX64X64
+      case TX_32X32:
+        vp9_highbd_fdct32x32_1(src_diff, coeff, diff_stride);
+        vp9_highbd_quantize_dc_32x32_fp_nuq(coeff, x->skip_block,
+                                            p->quant_fp[0], pd->dequant[0],
+                                            p->cumbins_nuq[0],
+                                            pd->dequant_val_nuq[0],
+                                            qcoeff, dqcoeff, eob);
+        break;
+      case TX_16X16:
+#if CONFIG_EXT_TX
+        highbd_forw_tx16x16(x, plane, src_diff, diff_stride, coeff);
+#else
+        vp9_highbd_fdct16x16_1(src_diff, coeff, diff_stride);
+#endif
+        vp9_highbd_quantize_dc_fp_nuq(coeff, x->skip_block,
+                                      p->quant_fp[0], pd->dequant[0],
+                                      p->cumbins_nuq[0],
+                                      pd->dequant_val_nuq[0],
+                                      qcoeff, dqcoeff, eob);
+        break;
+      case TX_8X8:
+#if CONFIG_EXT_TX
+        highbd_forw_tx8x8(x, plane, src_diff, diff_stride, coeff);
+#else
+        vp9_highbd_fdct8x8_1(src_diff, coeff, diff_stride);
+#endif
+        vp9_highbd_quantize_dc_fp_nuq(coeff, x->skip_block,
+                                      p->quant_fp[0], pd->dequant[0],
+                                      p->cumbins_nuq[0],
+                                      pd->dequant_val_nuq[0],
+                                      qcoeff, dqcoeff, eob);
+        break;
+      case TX_4X4:
+#if CONFIG_EXT_TX
+        highbd_forw_tx4x4(x, plane, block, src_diff, diff_stride, coeff);
+#else
+        x->fwd_txm4x4(src_diff, coeff, diff_stride);
+#endif
+        vp9_highbd_quantize_dc_fp_nuq(coeff, x->skip_block,
+                                      p->quant_fp[0], pd->dequant[0],
+                                      p->cumbins_nuq[0],
+                                      pd->dequant_val_nuq[0],
+                                      qcoeff, dqcoeff, eob);
+        break;
+      default:
+        assert(0);
+    }
+    return;
    }
-}
+#endif  // CONFIG_VP9_HIGHBITDEPTH
  
-static void highbd_forw_tx4x4(MACROBLOCK *x, int plane, int block,
-                              const int16_t *src_diff, int diff_stride,
-                              tran_low_t *const coeff) {
-  MACROBLOCKD *const xd = &x->e_mbd;
-  int16_t src_diff2[16];
-  TX_TYPE tx_type = get_tx_type_4x4(plane, xd, block);
-  if (tx_type == DCT_DCT) {
-    x->fwd_txm4x4(src_diff, coeff, diff_stride);
-  } else if (tx_type == FLIPADST_DCT) {
-    copy_flipud(src_diff, diff_stride, 4, src_diff2, 4);
-    vp9_highbd_fht4x4(src_diff2, coeff, 4, ADST_DCT);
-  } else if (tx_type == DCT_FLIPADST) {
-    copy_fliplr(src_diff, diff_stride, 4, src_diff2, 4);
-    vp9_highbd_fht4x4(src_diff2, coeff, 4, DCT_ADST);
-  } else if (tx_type == FLIPADST_FLIPADST) {
-    copy_fliplrud(src_diff, diff_stride, 4, src_diff2, 4);
-    vp9_highbd_fht4x4(src_diff2, coeff, 4, ADST_ADST);
-  } else if (tx_type == ADST_FLIPADST) {
-    copy_fliplr(src_diff, diff_stride, 4, src_diff2, 4);
-    vp9_highbd_fht4x4(src_diff2, coeff, 4, ADST_ADST);
-  } else if (tx_type == FLIPADST_ADST) {
-    copy_flipud(src_diff, diff_stride, 4, src_diff2, 4);
-    vp9_highbd_fht4x4(src_diff2, coeff, 4, ADST_ADST);
-  } else {
-    vp9_highbd_fht4x4(src_diff, coeff, diff_stride, tx_type);
+  switch (tx_size) {
+#if CONFIG_TX64X64
+    case TX_64X64:
+      vp9_fdct64x64_1(src_diff, coeff, diff_stride);
+      vp9_quantize_dc_64x64_fp_nuq(coeff, x->skip_block,
+                                   p->quant_fp[0], pd->dequant[0],
+                                   p->cumbins_nuq[0], pd->dequant_val_nuq[0],
+                                   qcoeff, dqcoeff, eob);
+      break;
+#endif  // CONFIG_TX64X64
+    case TX_32X32:
+      vp9_fdct32x32_1(src_diff, coeff, diff_stride);
+      vp9_quantize_dc_32x32_fp_nuq(coeff, x->skip_block,
+                                   p->quant_fp[0], pd->dequant[0],
+                                   p->cumbins_nuq[0], pd->dequant_val_nuq[0],
+                                   qcoeff, dqcoeff, eob);
+      break;
+    case TX_16X16:
+#if CONFIG_EXT_TX
+      forw_tx16x16(x, plane, src_diff, diff_stride, coeff);
+#else
+      vp9_fdct16x16_1(src_diff, coeff, diff_stride);
+#endif
+      vp9_quantize_dc_fp_nuq(coeff, x->skip_block,
+                             p->quant_fp[0], pd->dequant[0],
+                             p->cumbins_nuq[0], pd->dequant_val_nuq[0],
+                             qcoeff, dqcoeff, eob);
+      break;
+    case TX_8X8:
+#if CONFIG_EXT_TX
+      forw_tx8x8(x, plane, src_diff, diff_stride, coeff);
+#else
+      vp9_fdct8x8_1(src_diff, coeff, diff_stride);
+#endif
+      vp9_quantize_dc_fp_nuq(coeff, x->skip_block,
+                             p->quant_fp[0], pd->dequant[0],
+                             p->cumbins_nuq[0], pd->dequant_val_nuq[0],
+                             qcoeff, dqcoeff, eob);
+      break;
+    case TX_4X4:
+#if CONFIG_EXT_TX
+      forw_tx4x4(x, plane, block, src_diff, diff_stride, coeff);
+#else
+      x->fwd_txm4x4(src_diff, coeff, diff_stride);
+#endif
+      vp9_quantize_dc_fp_nuq(coeff, x->skip_block,
+                             p->quant_fp[0], pd->dequant[0],
+                             p->cumbins_nuq[0], pd->dequant_val_nuq[0],
+                             qcoeff, dqcoeff, eob);
+      break;
+    default:
+      assert(0);
+      break;
    }
  }
-#endif  // CONFIG_VP9_HIGHBITDEPTH
-#endif  // CONFIG_EXT_TX
+#endif  // CONFIG_NEW_QUANT
  
  void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block,
                          BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
@@ -662,9 +1574,9 @@ void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block,
        case TX_4X4:
          vp9_tx_identity(src_diff, coeff, diff_stride, 4, shift);
          vp9_quantize_fp(coeff, 16, x->skip_block, p->zbin, p->round_fp,
-                      p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
-                      pd->dequant, eob,
-                      scan_order->scan, scan_order->iscan);
+                        p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
+                        pd->dequant, eob,
+                        scan_order->scan, scan_order->iscan);
        break;
        default:
          assert(0);
@@ -747,10 +1659,10 @@ void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block,
  #endif  // CONFIG_TX64X64
      case TX_32X32:
        fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
-      vp9_quantize_fp_32x32(coeff, 1024, x->skip_block, p->zbin, p->round_fp,
-                            p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
-                            pd->dequant, eob, scan_order->scan,
-                            scan_order->iscan);
+      vp9_quantize_fp_32x32(coeff, 1024, x->skip_block, p->zbin,
+                            p->round_fp, p->quant_fp, p->quant_shift,
+                            qcoeff, dqcoeff, pd->dequant, eob,
+                            scan_order->scan, scan_order->iscan);
        break;
      case TX_16X16:
  #if CONFIG_EXT_TX
@@ -1181,13 +2093,27 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize,
      if (max_txsize_lookup[plane_bsize] == tx_size) {
        if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == 0) {
          // full forward transform and quantization
+#if CONFIG_NEW_QUANT
+        if (x->quant_fp)
+          vp9_xform_quant_fp_nuq(x, plane, block, plane_bsize, tx_size);
+        else
+          vp9_xform_quant_nuq(x, plane, block, plane_bsize, tx_size);
+#else
          if (x->quant_fp)
            vp9_xform_quant_fp(x, plane, block, plane_bsize, tx_size);
          else
            vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
+#endif  // CONFIG_NEW_QUANT
        } else if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == 2) {
          // fast path forward transform and quantization
+#if CONFIG_NEW_QUANT
+        if (x->quant_fp)
+          vp9_xform_quant_dc_fp_nuq(x, plane, block, plane_bsize, tx_size);
+        else
+          vp9_xform_quant_dc_nuq(x, plane, block, plane_bsize, tx_size);
+#else
          vp9_xform_quant_dc(x, plane, block, plane_bsize, tx_size);
+#endif
        } else {
          // skip forward transform
          p->eobs[block] = 0;
@@ -1195,7 +2121,17 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize,
          return;
        }
      } else {
-      vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
+#if CONFIG_NEW_QUANT
+      if (x->quant_fp)
+        vp9_xform_quant_fp_nuq(x, plane, block, plane_bsize, tx_size);
+      else
+        vp9_xform_quant_nuq(x, plane, block, plane_bsize, tx_size);
+#else
+      if (x->quant_fp)
+        vp9_xform_quant_fp(x, plane, block, plane_bsize, tx_size);
+      else
+        vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
+#endif
      }
    }
  
@@ -1369,7 +2305,17 @@ static void encode_block_pass1(int plane, int block, BLOCK_SIZE plane_bsize,
    txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
    dst = &pd->dst.buf[4 * j * pd->dst.stride + 4 * i];
  
-  vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
+#if CONFIG_NEW_QUANT
+  if (x->quant_fp)
+    vp9_xform_quant_fp_nuq(x, plane, block, plane_bsize, tx_size);
+  else
+    vp9_xform_quant_nuq(x, plane, block, plane_bsize, tx_size);
+#else
+  if (x->quant_fp)
+    vp9_xform_quant_fp(x, plane, block, plane_bsize, tx_size);
+  else
+    vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
+#endif
  
    if (p->eobs[block] > 0) {
  #if CONFIG_VP9_HIGHBITDEPTH
@@ -1447,8 +2393,12 @@ static int vp9_dpcm_intra(uint8_t *src, int src_stride,
                            tran_low_t *dqcoeff, struct macroblock_plane *p,
                            struct macroblockd_plane *pd,
                            const scan_order *scan_order, PREDICTION_MODE mode,
-                          int bs, int shift, int logsizeby32) {
+                          TX_SIZE tx_size, int shift, int logsizeby32) {
    int i, j, eob, temp;
+  const int bs = 4 << tx_size;
+#if CONFIG_NEW_QUANT
+  // const uint8_t* band = get_band_translate(tx_size);
+#endif
    vpx_memset(qcoeff, 0, bs * bs * sizeof(*qcoeff));
    vpx_memset(dqcoeff, 0, bs * bs * sizeof(*dqcoeff));
  
@@ -1567,6 +2517,9 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
    const int src_stride = p->src.stride;
    const int dst_stride = pd->dst.stride;
    int i, j;
+#if CONFIG_NEW_QUANT
+  const uint8_t* band = get_band_translate(tx_size);
+#endif
    txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
    dst = &pd->dst.buf[4 * (j * dst_stride + i)];
    src = &p->src.buf[4 * (j * src_stride + i)];
@@ -1598,10 +2551,29 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
            vp9_subtract_block(64, 64, src_diff, diff_stride,
                               src, src_stride, dst, dst_stride);
            vp9_tx_identity(src_diff, coeff, diff_stride, 64, shift);
+#if CONFIG_NEW_QUANT
+          if (x->quant_fp)
+            vp9_quantize_64x64_fp_nuq(coeff, 4096, x->skip_block,
+                                      p->quant_fp, pd->dequant,
+                                      (const cumbins_type_nuq *)p->cumbins_nuq,
+                                      (const dequant_val_type_nuq *)
+                                          pd->dequant_val_nuq,
+                                      qcoeff, dqcoeff, eob,
+                                      scan_order->scan, band);
+          else
+            vp9_quantize_64x64_nuq(coeff, 4096, x->skip_block,
+                                   p->quant, p->quant_shift, pd->dequant,
+                                   (const cumbins_type_nuq *)p->cumbins_nuq,
+                                   (const dequant_val_type_nuq *)
+                                   pd->dequant_val_nuq,
+                                   qcoeff, dqcoeff, eob,
+                                   scan_order->scan, band);
+#else
            vp9_quantize_b_64x64(coeff, 4096, x->skip_block, p->zbin,
                                 p->round, p->quant, p->quant_shift, qcoeff,
                                 dqcoeff, pd->dequant, eob,
-                               scan_order->scan, scan_order->iscan);
+                               scan_order->scan,  scan_order->iscan);
+#endif  // CONFIG_NEW_QUANT
          }
          if (!x->skip_encode && *eob)
            vp9_tx_identity_add(dqcoeff, dst, dst_stride, 64, shift);
@@ -1623,7 +2595,7 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
              *eob = vp9_dpcm_intra(src, src_stride, dst, dst_stride,
                                    src_diff, diff_stride,
                                    coeff, qcoeff, dqcoeff, p, pd,
-                                  scan_order, mode, 32, shift, 0);
+                                  scan_order, mode, TX_32X32, shift, 0);
              break;
            }
  
@@ -1631,10 +2603,29 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
                               src, src_stride, dst, dst_stride);
            vp9_tx_identity(src_diff, coeff, diff_stride, 32, shift);
  
+#if CONFIG_NEW_QUANT
+          if (x->quant_fp)
+            vp9_quantize_32x32_fp_nuq(coeff, 1024, x->skip_block,
+                                      p->quant_fp, pd->dequant,
+                                      (const cumbins_type_nuq *)p->cumbins_nuq,
+                                      (const dequant_val_type_nuq *)
+                                      pd->dequant_val_nuq,
+                                      qcoeff, dqcoeff, eob,
+                                      scan_order->scan, band);
+          else
+            vp9_quantize_32x32_nuq(coeff, 1024, x->skip_block,
+                                   p->quant, p->quant_shift, pd->dequant,
+                                   (const cumbins_type_nuq *)p->cumbins_nuq,
+                                   (const dequant_val_type_nuq *)
+                                   pd->dequant_val_nuq,
+                                   qcoeff, dqcoeff, eob,
+                                   scan_order->scan, band);
+#else
            vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin,
                                 p->round, p->quant, p->quant_shift, qcoeff,
                                 dqcoeff, pd->dequant, eob,
                                 scan_order->scan, scan_order->iscan);
+#endif  // CONFIG_NEW_QUANT
          }
          if (!x->skip_encode && *eob) {
            vp9_tx_identity_add(dqcoeff, dst, dst_stride, 32, shift);
@@ -1656,17 +2647,36 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
              *eob = vp9_dpcm_intra(src, src_stride, dst, dst_stride,
                                    src_diff, diff_stride,
                                    coeff, qcoeff, dqcoeff, p, pd,
-                                  scan_order, mode, 16, shift, -1);
+                                  scan_order, mode, TX_16X16, shift, -1);
              break;
            }
  
            vp9_subtract_block(16, 16, src_diff, diff_stride,
                               src, src_stride, dst, dst_stride);
            vp9_tx_identity(src_diff, coeff, diff_stride, 16, shift);
+#if CONFIG_NEW_QUANT
+          if (x->quant_fp)
+            vp9_quantize_fp_nuq(coeff, 256, x->skip_block,
+                                p->quant_fp, pd->dequant,
+                                (const cumbins_type_nuq *)p->cumbins_nuq,
+                                (const dequant_val_type_nuq *)
+                                pd->dequant_val_nuq,
+                                qcoeff, dqcoeff, eob,
+                                scan_order->scan, band);
+          else
+            vp9_quantize_nuq(coeff, 256, x->skip_block,
+                             p->quant, p->quant_shift, pd->dequant,
+                             (const cumbins_type_nuq *)p->cumbins_nuq,
+                             (const dequant_val_type_nuq *)
+                             pd->dequant_val_nuq,
+                             qcoeff, dqcoeff, eob,
+                             scan_order->scan, band);
+#else
            vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
                           p->quant, p->quant_shift, qcoeff, dqcoeff,
                           pd->dequant, eob, scan_order->scan,
                           scan_order->iscan);
+#endif  // CONFIG_NEW_QUANT
          }
          if (!x->skip_encode && *eob) {
            vp9_tx_identity_add(dqcoeff, dst, dst_stride, 16, shift);
@@ -1688,17 +2698,36 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
              *eob = vp9_dpcm_intra(src, src_stride, dst, dst_stride,
                                    src_diff, diff_stride,
                                    coeff, qcoeff, dqcoeff, p, pd,
-                                  scan_order, mode, 8, shift, -1);
+                                  scan_order, mode, TX_8X8, shift, -1);
              break;
            }
  
            vp9_subtract_block(8, 8, src_diff, diff_stride,
                               src, src_stride, dst, dst_stride);
            vp9_tx_identity(src_diff, coeff, diff_stride, 8, shift);
+#if CONFIG_NEW_QUANT
+          if (x->quant_fp)
+            vp9_quantize_fp_nuq(coeff, 64, x->skip_block,
+                                p->quant_fp, pd->dequant,
+                                (const cumbins_type_nuq *)p->cumbins_nuq,
+                                (const dequant_val_type_nuq *)
+                                pd->dequant_val_nuq,
+                                qcoeff, dqcoeff, eob,
+                                scan_order->scan, band);
+          else
+            vp9_quantize_nuq(coeff, 64, x->skip_block,
+                             p->quant, p->quant_shift, pd->dequant,
+                             (const cumbins_type_nuq *)p->cumbins_nuq,
+                             (const dequant_val_type_nuq *)
+                             pd->dequant_val_nuq,
+                             qcoeff, dqcoeff, eob,
+                             scan_order->scan, band);
+#else
            vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round,
                           p->quant, p->quant_shift, qcoeff, dqcoeff,
                           pd->dequant, eob, scan_order->scan,
                           scan_order->iscan);
+#endif  // CONFIG_NEW_QUANT
          }
          if (!x->skip_encode && *eob) {
            vp9_tx_identity_add(dqcoeff, dst, dst_stride, 8, shift);
@@ -1722,17 +2751,36 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
              *eob = vp9_dpcm_intra(src, src_stride, dst, dst_stride,
                                    src_diff, diff_stride,
                                    coeff, qcoeff, dqcoeff, p, pd,
-                                  scan_order, mode, 4, shift, -1);
+                                  scan_order, mode, TX_4X4, shift, -1);
              break;
            }
  
            vp9_subtract_block(4, 4, src_diff, diff_stride,
                               src, src_stride, dst, dst_stride);
            vp9_tx_identity(src_diff, coeff, diff_stride, 4, shift);
+#if CONFIG_NEW_QUANT
+          if (x->quant_fp)
+            vp9_quantize_fp_nuq(coeff, 16, x->skip_block,
+                                p->quant_fp, pd->dequant,
+                                (const cumbins_type_nuq *)p->cumbins_nuq,
+                                (const dequant_val_type_nuq *)
+                                pd->dequant_val_nuq,
+                                qcoeff, dqcoeff, eob,
+                                scan_order->scan, band);
+          else
+            vp9_quantize_nuq(coeff, 16, x->skip_block,
+                             p->quant, p->quant_shift, pd->dequant,
+                             (const cumbins_type_nuq *)p->cumbins_nuq,
+                             (const dequant_val_type_nuq *)
+                             pd->dequant_val_nuq,
+                             qcoeff, dqcoeff, eob,
+                             scan_order->scan, band);
+#else
            vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round,
                           p->quant, p->quant_shift, qcoeff, dqcoeff,
                           pd->dequant, eob, scan_order->scan,
                           scan_order->iscan);
+#endif  // CONFIG_NEW_QUANT
          }
  
          if (!x->skip_encode && *eob) {
@@ -1767,10 +2815,32 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
            vp9_highbd_subtract_block(64, 64, src_diff, diff_stride,
                                      src, src_stride, dst, dst_stride, xd->bd);
            vp9_highbd_fdct64x64(src_diff, coeff, diff_stride);
+#if CONFIG_NEW_QUANT
+          if (x->quant_fp)
+            vp9_highbd_quantize_64x64_fp_nuq(coeff, 4096, x->skip_block,
+                                             p->quant_fp, pd->dequant,
+                                             (const cumbins_type_nuq *)
+                                             p->cumbins_nuq,
+                                             (const dequant_val_type_nuq *)
+                                             pd->dequant_val_nuq,
+                                             qcoeff, dqcoeff, eob,
+                                             scan_order->scan,
+                                             band);
+          else
+            vp9_highbd_quantize_64x64_nuq(coeff, 4096, x->skip_block,
+                                          p->quant, p->quant_shift, pd->dequant,
+                                          (const cumbins_type_nuq *)
+                                          p->cumbins_nuq,
+                                          (const dequant_val_type_nuq *)
+                                          pd->dequant_val_nuq,
+                                          qcoeff, dqcoeff, eob,
+                                          scan_order->scan, band);
+#else
            vp9_highbd_quantize_b_64x64(coeff, 4096, x->skip_block, p->zbin,
                                        p->round, p->quant, p->quant_shift,
                                        qcoeff, dqcoeff, pd->dequant, eob,
                                        scan_order->scan, scan_order->iscan);
+#endif  // CONFIG_NEW_QUANT
            if (!x->skip_encode && *eob) {
              vp9_highbd_idct64x64_add(dqcoeff, dst, dst_stride, *eob, xd->bd);
            }
@@ -1791,10 +2861,32 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
            vp9_highbd_subtract_block(32, 32, src_diff, diff_stride,
                                      src, src_stride, dst, dst_stride, xd->bd);
            highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
+#if CONFIG_NEW_QUANT
+          if (x->quant_fp)
+            vp9_highbd_quantize_32x32_fp_nuq(coeff, 1024, x->skip_block,
+                                             p->quant_fp, pd->dequant,
+                                             (const cumbins_type_nuq *)
+                                             p->cumbins_nuq,
+                                             (const dequant_val_type_nuq *)
+                                             pd->dequant_val_nuq,
+                                             qcoeff, dqcoeff, eob,
+                                             scan_order->scan,
+                                             band);
+          else
+            vp9_highbd_quantize_32x32_nuq(coeff, 1024, x->skip_block,
+                                          p->quant, p->quant_shift, pd->dequant,
+                                          (const cumbins_type_nuq *)
+                                          p->cumbins_nuq,
+                                          (const dequant_val_type_nuq *)
+                                          pd->dequant_val_nuq,
+                                          qcoeff, dqcoeff, eob,
+                                          scan_order->scan, band);
+#else
            vp9_highbd_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin,
                                        p->round, p->quant, p->quant_shift,
                                        qcoeff, dqcoeff, pd->dequant, eob,
                                        scan_order->scan, scan_order->iscan);
+#endif  // CONFIG_NEW_QUANT
          }
          if (!x->skip_encode && *eob) {
            vp9_highbd_idct32x32_add(dqcoeff, dst, dst_stride, *eob, xd->bd);
@@ -1815,10 +2907,29 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
            vp9_highbd_subtract_block(16, 16, src_diff, diff_stride,
                                      src, src_stride, dst, dst_stride, xd->bd);
            vp9_highbd_fht16x16(src_diff, coeff, diff_stride, tx_type);
+#if CONFIG_NEW_QUANT
+          if (x->quant_fp)
+            vp9_highbd_quantize_fp_nuq(coeff, 256, x->skip_block,
+                                       p->quant_fp, pd->dequant,
+                                       (const cumbins_type_nuq *)p->cumbins_nuq,
+                                       (const dequant_val_type_nuq *)
+                                       pd->dequant_val_nuq,
+                                       qcoeff, dqcoeff, eob,
+                                       scan_order->scan, band);
+          else
+            vp9_highbd_quantize_nuq(coeff, 256, x->skip_block,
+                                    p->quant, p->quant_shift, pd->dequant,
+                                    (const cumbins_type_nuq *)p->cumbins_nuq,
+                                    (const dequant_val_type_nuq *)
+                                    pd->dequant_val_nuq,
+                                    qcoeff, dqcoeff, eob,
+                                    scan_order->scan, band);
+#else
            vp9_highbd_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
                                  p->quant, p->quant_shift, qcoeff, dqcoeff,
                                  pd->dequant, eob,
                                  scan_order->scan, scan_order->iscan);
+#endif  // CONFIG_NEW_QUANT
          }
          if (!x->skip_encode && *eob) {
            vp9_highbd_iht16x16_add(tx_type, dqcoeff, dst, dst_stride,
@@ -1840,10 +2951,29 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
            vp9_highbd_subtract_block(8, 8, src_diff, diff_stride,
                                      src, src_stride, dst, dst_stride, xd->bd);
            vp9_highbd_fht8x8(src_diff, coeff, diff_stride, tx_type);
+#if CONFIG_NEW_QUANT
+          if (x->quant_fp)
+            vp9_highbd_quantize_fp_nuq(coeff, 64, x->skip_block,
+                                       p->quant_fp, pd->dequant,
+                                       (const cumbins_type_nuq *)p->cumbins_nuq,
+                                       (const dequant_val_type_nuq *)
+                                       pd->dequant_val_nuq,
+                                       qcoeff, dqcoeff, eob,
+                                       scan_order->scan, band);
+          else
+            vp9_highbd_quantize_nuq(coeff, 64, x->skip_block,
+                                    p->quant, p->quant_shift, pd->dequant,
+                                    (const cumbins_type_nuq *)p->cumbins_nuq,
+                                    (const dequant_val_type_nuq *)
+                                    pd->dequant_val_nuq,
+                                    qcoeff, dqcoeff, eob,
+                                    scan_order->scan, band);
+#else
            vp9_highbd_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round,
                                  p->quant, p->quant_shift, qcoeff, dqcoeff,
                                  pd->dequant, eob,
                                  scan_order->scan, scan_order->iscan);
+#endif  // CONFIG_NEW_QUANT
          }
          if (!x->skip_encode && *eob) {
            vp9_highbd_iht8x8_add(tx_type, dqcoeff, dst, dst_stride, *eob,
@@ -1869,10 +2999,29 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
              vp9_highbd_fht4x4(src_diff, coeff, diff_stride, tx_type);
            else
              x->fwd_txm4x4(src_diff, coeff, diff_stride);
+#if CONFIG_NEW_QUANT
+          if (x->quant_fp)
+            vp9_highbd_quantize_fp_nuq(coeff, 16, x->skip_block,
+                                       p->quant_fp, pd->dequant,
+                                       (const cumbins_type_nuq *)p->cumbins_nuq,
+                                       (const dequant_val_type_nuq *)
+                                       pd->dequant_val_nuq,
+                                       qcoeff, dqcoeff, eob,
+                                       scan_order->scan, band);
+          else
+            vp9_highbd_quantize_nuq(coeff, 16, x->skip_block,
+                                    p->quant, p->quant_shift, pd->dequant,
+                                    (const cumbins_type_nuq *)p->cumbins_nuq,
+                                    (const dequant_val_type_nuq *)
+                                    pd->dequant_val_nuq,
+                                    qcoeff, dqcoeff, eob,
+                                    scan_order->scan, band);
+#else
            vp9_highbd_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round,
                                  p->quant, p->quant_shift, qcoeff, dqcoeff,
                                  pd->dequant, eob,
                                  scan_order->scan, scan_order->iscan);
+#endif  // CONFIG_NEW_QUANT
          }
  
          if (!x->skip_encode && *eob) {
@@ -1913,10 +3062,29 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
          vp9_subtract_block(64, 64, src_diff, diff_stride,
                             src, src_stride, dst, dst_stride);
          vp9_fdct64x64(src_diff, coeff, diff_stride);
+#if CONFIG_NEW_QUANT
+        if (x->quant_fp)
+          vp9_quantize_64x64_fp_nuq(coeff, 4096, x->skip_block,
+                                    p->quant_fp, pd->dequant,
+                                    (const cumbins_type_nuq *)p->cumbins_nuq,
+                                    (const dequant_val_type_nuq *)
+                                    pd->dequant_val_nuq,
+                                    qcoeff, dqcoeff, eob,
+                                    scan_order->scan, band);
+        else
+          vp9_quantize_64x64_nuq(coeff, 4096, x->skip_block,
+                                 p->quant, p->quant_shift, pd->dequant,
+                                 (const cumbins_type_nuq *)p->cumbins_nuq,
+                                 (const dequant_val_type_nuq *)
+                                 pd->dequant_val_nuq,
+                                 qcoeff, dqcoeff, eob,
+                                 scan_order->scan, band);
+#else
          vp9_quantize_b_64x64(coeff, 4096, x->skip_block, p->zbin, p->round,
                               p->quant, p->quant_shift, qcoeff, dqcoeff,
                               pd->dequant, eob, scan_order->scan,
                               scan_order->iscan);
+#endif  // CONFIG_NEW_QUANT
        }
        if (!x->skip_encode && *eob)
          vp9_idct64x64_add(dqcoeff, dst, dst_stride, *eob);
@@ -1936,10 +3104,29 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
          vp9_subtract_block(32, 32, src_diff, diff_stride,
                             src, src_stride, dst, dst_stride);
          fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
+#if CONFIG_NEW_QUANT
+        if (x->quant_fp)
+          vp9_quantize_32x32_fp_nuq(coeff, 1024, x->skip_block,
+                                    p->quant_fp, pd->dequant,
+                                    (const cumbins_type_nuq *)p->cumbins_nuq,
+                                    (const dequant_val_type_nuq *)
+                                    pd->dequant_val_nuq,
+                                    qcoeff, dqcoeff, eob,
+                                    scan_order->scan, band);
+        else
+          vp9_quantize_32x32_nuq(coeff, 1024, x->skip_block,
+                                 p->quant, p->quant_shift, pd->dequant,
+                                 (const cumbins_type_nuq *)p->cumbins_nuq,
+                                 (const dequant_val_type_nuq *)
+                                 pd->dequant_val_nuq,
+                                 qcoeff, dqcoeff, eob,
+                                 scan_order->scan, band);
+#else
          vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round,
                               p->quant, p->quant_shift, qcoeff, dqcoeff,
                               pd->dequant, eob, scan_order->scan,
                               scan_order->iscan);
+#endif  // CONFIG_NEW_QUANT
        }
        if (!x->skip_encode && *eob)
          vp9_idct32x32_add(dqcoeff, dst, dst_stride, *eob);
@@ -1959,10 +3146,27 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
          vp9_subtract_block(16, 16, src_diff, diff_stride,
                             src, src_stride, dst, dst_stride);
          vp9_fht16x16(src_diff, coeff, diff_stride, tx_type);
+#if CONFIG_NEW_QUANT
+        if (x->quant_fp)
+          vp9_quantize_fp_nuq(coeff, 256, x->skip_block,
+                              p->quant_fp, pd->dequant,
+                              (const cumbins_type_nuq *)p->cumbins_nuq,
+                              (const dequant_val_type_nuq *)pd->dequant_val_nuq,
+                              qcoeff, dqcoeff, eob,
+                              scan_order->scan, band);
+        else
+          vp9_quantize_nuq(coeff, 256, x->skip_block,
+                           p->quant, p->quant_shift, pd->dequant,
+                           (const cumbins_type_nuq *)p->cumbins_nuq,
+                           (const dequant_val_type_nuq *)pd->dequant_val_nuq,
+                           qcoeff, dqcoeff, eob,
+                           scan_order->scan, band);
+#else
          vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
                         p->quant, p->quant_shift, qcoeff, dqcoeff,
                         pd->dequant, eob, scan_order->scan,
                         scan_order->iscan);
+#endif  // CONFIG_NEW_QUANT
        }
        if (!x->skip_encode && *eob)
          vp9_iht16x16_add(tx_type, dqcoeff, dst, dst_stride, *eob);
@@ -1982,10 +3186,27 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
          vp9_subtract_block(8, 8, src_diff, diff_stride,
                             src, src_stride, dst, dst_stride);
          vp9_fht8x8(src_diff, coeff, diff_stride, tx_type);
+#if CONFIG_NEW_QUANT
+        if (x->quant_fp)
+          vp9_quantize_fp_nuq(coeff, 64, x->skip_block,
+                              p->quant_fp, pd->dequant,
+                              (const cumbins_type_nuq *)p->cumbins_nuq,
+                              (const dequant_val_type_nuq *)pd->dequant_val_nuq,
+                              qcoeff, dqcoeff, eob,
+                              scan_order->scan, band);
+        else
+          vp9_quantize_nuq(coeff, 64, x->skip_block,
+                           p->quant, p->quant_shift, pd->dequant,
+                           (const cumbins_type_nuq *)p->cumbins_nuq,
+                           (const dequant_val_type_nuq *)pd->dequant_val_nuq,
+                           qcoeff, dqcoeff, eob,
+                           scan_order->scan, band);
+#else
          vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant,
                         p->quant_shift, qcoeff, dqcoeff,
                         pd->dequant, eob, scan_order->scan,
                         scan_order->iscan);
+#endif  // CONFIG_NEW_QUANT
        }
        if (!x->skip_encode && *eob)
          vp9_iht8x8_add(tx_type, dqcoeff, dst, dst_stride, *eob);
@@ -2009,10 +3230,27 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
            vp9_fht4x4(src_diff, coeff, diff_stride, tx_type);
          else
            x->fwd_txm4x4(src_diff, coeff, diff_stride);
+#if CONFIG_NEW_QUANT
+        if (x->quant_fp)
+          vp9_quantize_fp_nuq(coeff, 16, x->skip_block,
+                              p->quant_fp, pd->dequant,
+                              (const cumbins_type_nuq *)p->cumbins_nuq,
+                              (const dequant_val_type_nuq *)pd->dequant_val_nuq,
+                              qcoeff, dqcoeff, eob,
+                              scan_order->scan, band);
+        else
+          vp9_quantize_nuq(coeff, 16, x->skip_block,
+                           p->quant, p->quant_shift, pd->dequant,
+                           (const cumbins_type_nuq *)p->cumbins_nuq,
+                           (const dequant_val_type_nuq *)pd->dequant_val_nuq,
+                           qcoeff, dqcoeff, eob,
+                           scan_order->scan, band);
+#else
          vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant,
                         p->quant_shift, qcoeff, dqcoeff,
                         pd->dequant, eob, scan_order->scan,
                         scan_order->iscan);
+#endif  // CONFIG_NEW_QUANT
        }
  
        if (!x->skip_encode && *eob) {
diff --git a/vp9/encoder/vp9_encodemb.h b/vp9/encoder/vp9_encodemb.h

index 8d11c35400f281c8f30ae7b1a3b77c7c6c921462..6974871d8be1aab0ae61a0d240fa212a276b7533 100644 (file)
--- a/vp9/encoder/vp9_encodemb.h
+++ b/vp9/encoder/vp9_encodemb.h
@@ -29,6 +29,16 @@ void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block,
                          BLOCK_SIZE plane_bsize, TX_SIZE tx_size);
  void vp9_xform_quant(MACROBLOCK *x, int plane, int block,
                       BLOCK_SIZE plane_bsize, TX_SIZE tx_size);
+#if CONFIG_NEW_QUANT
+void vp9_xform_quant_nuq(MACROBLOCK *x, int plane, int block,
+                         BLOCK_SIZE plane_bsize, TX_SIZE tx_size);
+void vp9_xform_quant_dc_nuq(MACROBLOCK *x, int plane, int block,
+                            BLOCK_SIZE plane_bsize, TX_SIZE tx_size);
+void vp9_xform_quant_fp_nuq(MACROBLOCK *x, int plane, int block,
+                            BLOCK_SIZE plane_bsize, TX_SIZE tx_size);
+void vp9_xform_quant_dc_fp_nuq(MACROBLOCK *x, int plane, int block,
+                               BLOCK_SIZE plane_bsize, TX_SIZE tx_size);
+#endif
  
  void vp9_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane);
  
diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c

index 6353728be478e19d0faeec96db5f2badd5333861..1f12c56f262d32b163eb4142c2bb2ea426682609 100644 (file)
--- a/vp9/encoder/vp9_quantize.c
+++ b/vp9/encoder/vp9_quantize.c
@@ -40,6 +40,357 @@ void vp9_quantize_dc(const tran_low_t *coeff_ptr, int skip_block,
    *eob_ptr = eob + 1;
  }
  
+#if CONFIG_NEW_QUANT
+static INLINE int quantize_coeff_nuq(const tran_low_t coeffv,
+                                     const int16_t quant,
+                                     const int16_t quant_shift,
+                                     const int16_t dequant,
+                                     const tran_low_t *cumbins_ptr,
+                                     const tran_low_t *dequant_val,
+                                     tran_low_t *qcoeff_ptr,
+                                     tran_low_t *dqcoeff_ptr) {
+  const int coeff = coeffv;
+  const int coeff_sign = (coeff >> 31);
+  const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+  int i, q;
+  int tmp = clamp(abs_coeff, INT16_MIN, INT16_MAX);
+  for (i = 0; i < NUQ_KNOTES; i++) {
+    if (tmp < cumbins_ptr[i]) {
+      q = i;
+      break;
+    }
+  }
+  if (i == NUQ_KNOTES) {
+    tmp -= cumbins_ptr[NUQ_KNOTES - 1];
+    q = NUQ_KNOTES + (((((tmp * quant) >> 16) + tmp) * quant_shift) >> 16);
+  }
+  if (q) {
+    *dqcoeff_ptr =
+        vp9_dequant_abscoeff_nuq(q, dequant, dequant_val);
+    *qcoeff_ptr  = (q ^ coeff_sign) - coeff_sign;
+    *dqcoeff_ptr = *qcoeff_ptr < 0 ? -*dqcoeff_ptr : *dqcoeff_ptr;
+  } else {
+    *qcoeff_ptr = 0;
+    *dqcoeff_ptr = 0;
+  }
+  return (q != 0);
+}
+
+static INLINE int quantize_coeff_bigtx_nuq(const tran_low_t coeffv,
+                                           const int16_t quant,
+                                           const int16_t quant_shift,
+                                           const int16_t dequant,
+                                           const tran_low_t *cumbins_ptr,
+                                           const tran_low_t *dequant_val,
+                                           tran_low_t *qcoeff_ptr,
+                                           tran_low_t *dqcoeff_ptr,
+                                           int logsizeby32) {
+  const int coeff = coeffv;
+  const int coeff_sign = (coeff >> 31);
+  const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+  int i, q;
+  int tmp = clamp(abs_coeff, INT16_MIN, INT16_MAX);
+  for (i = 0; i < NUQ_KNOTES; i++) {
+    if (tmp < ROUND_POWER_OF_TWO(cumbins_ptr[i], 1 + logsizeby32)) {
+      q = i;
+      break;
+    }
+  }
+  if (i == NUQ_KNOTES) {
+    tmp -= ROUND_POWER_OF_TWO(cumbins_ptr[NUQ_KNOTES - 1], 1 + logsizeby32);
+    q = NUQ_KNOTES +
+        (((((tmp * quant) >> 16) + tmp) * quant_shift) >> (15 - logsizeby32));
+  }
+  if (q) {
+    *dqcoeff_ptr =
+         ROUND_POWER_OF_TWO(vp9_dequant_abscoeff_nuq(q, dequant, dequant_val),
+                            1 + logsizeby32);
+    // *dqcoeff_ptr = vp9_dequant_abscoeff_nuq(q, dequant, dequant_val) >>
+    // (1 + logsizeby32);
+    *qcoeff_ptr  = (q ^ coeff_sign) - coeff_sign;
+    *dqcoeff_ptr = *qcoeff_ptr < 0 ? -*dqcoeff_ptr : *dqcoeff_ptr;
+  } else {
+    *qcoeff_ptr = 0;
+    *dqcoeff_ptr = 0;
+  }
+  return (q != 0);
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static INLINE int highbd_quantize_coeff_nuq(const tran_low_t coeffv,
+                                            const int16_t quant,
+                                            const int16_t quant_shift,
+                                            const int16_t dequant,
+                                            const tran_low_t *cumbins_ptr,
+                                            const tran_low_t *dequant_val,
+                                            tran_low_t *qcoeff_ptr,
+                                            tran_low_t *dqcoeff_ptr) {
+  const int coeff = coeffv;
+  const int coeff_sign = (coeff >> 31);
+  const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+  int i, q;
+  int64_t tmp = clamp(abs_coeff, INT32_MIN, INT32_MAX);
+  for (i = 0; i < NUQ_KNOTES; i++) {
+    if (tmp < cumbins_ptr[i]) {
+      q = i;
+      break;
+    }
+  }
+  if (i == NUQ_KNOTES) {
+    tmp -= cumbins_ptr[NUQ_KNOTES - 1];
+    q = NUQ_KNOTES + (((((tmp * quant) >> 16) + tmp) * quant_shift) >> 16);
+  }
+  if (q) {
+    *dqcoeff_ptr =
+        vp9_dequant_abscoeff_nuq(q, dequant, dequant_val);
+    *qcoeff_ptr  = (q ^ coeff_sign) - coeff_sign;
+    *dqcoeff_ptr = *qcoeff_ptr < 0 ? -*dqcoeff_ptr : *dqcoeff_ptr;
+  } else {
+    *qcoeff_ptr = 0;
+    *dqcoeff_ptr = 0;
+  }
+  return (q != 0);
+}
+
+static INLINE int highbd_quantize_coeff_bigtx_nuq(const tran_low_t coeffv,
+                                                  const int16_t quant,
+                                                  const int16_t quant_shift,
+                                                  const int16_t dequant,
+                                                  const tran_low_t *cumbins_ptr,
+                                                  const tran_low_t *dequant_val,
+                                                  tran_low_t *qcoeff_ptr,
+                                                  tran_low_t *dqcoeff_ptr,
+                                                  int logsizeby32) {
+  const int coeff = coeffv;
+  const int coeff_sign = (coeff >> 31);
+  const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+  int i, q;
+  int64_t tmp = clamp(abs_coeff, INT32_MIN, INT32_MAX);
+  for (i = 0; i < NUQ_KNOTES; i++) {
+    if (tmp < ROUND_POWER_OF_TWO(cumbins_ptr[i], 1 + logsizeby32)) {
+      q = i;
+      break;
+    }
+  }
+  if (i == NUQ_KNOTES) {
+    tmp -= ROUND_POWER_OF_TWO(cumbins_ptr[NUQ_KNOTES - 1], 1 + logsizeby32);
+    q = NUQ_KNOTES +
+        (((((tmp * quant) >> 16) + tmp) * quant_shift) >> (15 - logsizeby32));
+  }
+  if (q) {
+    *dqcoeff_ptr =
+        ROUND_POWER_OF_TWO(vp9_dequant_abscoeff_nuq(q, dequant, dequant_val),
+                           1 + logsizeby32);
+    *qcoeff_ptr  = (q ^ coeff_sign) - coeff_sign;
+    *dqcoeff_ptr = *qcoeff_ptr < 0 ? -*dqcoeff_ptr : *dqcoeff_ptr;
+  } else {
+    *qcoeff_ptr = 0;
+    *dqcoeff_ptr = 0;
+  }
+  return (q != 0);
+}
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+static INLINE int quantize_coeff_fp_nuq(const tran_low_t coeffv,
+                                        const int16_t quant,
+                                        const int16_t dequant,
+                                        const tran_low_t *cumbins_ptr,
+                                        const tran_low_t *dequant_val,
+                                        tran_low_t *qcoeff_ptr,
+                                        tran_low_t *dqcoeff_ptr) {
+  const int coeff = coeffv;
+  const int coeff_sign = (coeff >> 31);
+  const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+  int i, q;
+  int tmp = clamp(abs_coeff, INT16_MIN, INT16_MAX);
+  for (i = 0; i < NUQ_KNOTES; i++) {
+    if (tmp < cumbins_ptr[i]) {
+      q = i;
+      break;
+    }
+  }
+  if (i == NUQ_KNOTES) {
+    q = NUQ_KNOTES +
+        ((((int64_t)tmp - cumbins_ptr[NUQ_KNOTES - 1]) * quant) >> 16);
+  }
+  if (q) {
+    *dqcoeff_ptr =
+        vp9_dequant_abscoeff_nuq(q, dequant, dequant_val);
+    *qcoeff_ptr  = (q ^ coeff_sign) - coeff_sign;
+    *dqcoeff_ptr = *qcoeff_ptr < 0 ? -*dqcoeff_ptr : *dqcoeff_ptr;
+  } else {
+    *qcoeff_ptr = 0;
+    *dqcoeff_ptr = 0;
+  }
+  return (q != 0);
+}
+
+static INLINE int quantize_coeff_bigtx_fp_nuq(const tran_low_t coeffv,
+                                              const int16_t quant,
+                                              const int16_t dequant,
+                                              const tran_low_t *cumbins_ptr,
+                                              const tran_low_t *dequant_val,
+                                              tran_low_t *qcoeff_ptr,
+                                              tran_low_t *dqcoeff_ptr,
+                                              int logsizeby32) {
+  const int coeff = coeffv;
+  const int coeff_sign = (coeff >> 31);
+  const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+  int i, q;
+  int tmp = clamp(abs_coeff, INT16_MIN, INT16_MAX);
+  for (i = 0; i < NUQ_KNOTES; i++) {
+    if (tmp < ROUND_POWER_OF_TWO(cumbins_ptr[i], 1 + logsizeby32)) {
+      q = i;
+      break;
+    }
+  }
+  if (i == NUQ_KNOTES) {
+    q = NUQ_KNOTES +
+        ((((int64_t)tmp - ROUND_POWER_OF_TWO(cumbins_ptr[NUQ_KNOTES - 1],
+                                             1 + logsizeby32)) * quant) >>
+         (15 - logsizeby32));
+  }
+  if (q) {
+    *dqcoeff_ptr =
+        ROUND_POWER_OF_TWO(vp9_dequant_abscoeff_nuq(q, dequant, dequant_val),
+                           1 + logsizeby32);
+    // *dqcoeff_ptr = vp9_dequant_abscoeff_nuq(q, dequant, dequant_val) >>
+    // (1 + logsizeby32);
+    *qcoeff_ptr  = (q ^ coeff_sign) - coeff_sign;
+    *dqcoeff_ptr = *qcoeff_ptr < 0 ? -*dqcoeff_ptr : *dqcoeff_ptr;
+  } else {
+    *qcoeff_ptr = 0;
+    *dqcoeff_ptr = 0;
+  }
+  return (q != 0);
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static INLINE int highbd_quantize_coeff_fp_nuq(const tran_low_t coeffv,
+                                               const int16_t quant,
+                                               const int16_t dequant,
+                                               const tran_low_t *cumbins_ptr,
+                                               const tran_low_t *dequant_val,
+                                               tran_low_t *qcoeff_ptr,
+                                               tran_low_t *dqcoeff_ptr) {
+  const int coeff = coeffv;
+  const int coeff_sign = (coeff >> 31);
+  const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+  int i, q;
+  int64_t tmp = clamp(abs_coeff, INT32_MIN, INT32_MAX);
+  for (i = 0; i < NUQ_KNOTES; i++) {
+    if (tmp < cumbins_ptr[i]) {
+      q = i;
+      break;
+    }
+  }
+  if (i == NUQ_KNOTES) {
+    q = NUQ_KNOTES +
+        (((tmp - cumbins_ptr[NUQ_KNOTES - 1]) * quant) >> 16);
+  }
+  if (q) {
+    *dqcoeff_ptr =
+        vp9_dequant_abscoeff_nuq(q, dequant, dequant_val);
+    *qcoeff_ptr  = (q ^ coeff_sign) - coeff_sign;
+    *dqcoeff_ptr = *qcoeff_ptr < 0 ? -*dqcoeff_ptr : *dqcoeff_ptr;
+  } else {
+    *qcoeff_ptr = 0;
+    *dqcoeff_ptr = 0;
+  }
+  return (q != 0);
+}
+
+static INLINE int highbd_quantize_coeff_bigtx_fp_nuq(
+    const tran_low_t coeffv,
+    const int16_t quant,
+    const int16_t dequant,
+    const tran_low_t *cumbins_ptr,
+    const tran_low_t *dequant_val,
+    tran_low_t *qcoeff_ptr,
+    tran_low_t *dqcoeff_ptr,
+    int logsizeby32) {
+  const int coeff = coeffv;
+  const int coeff_sign = (coeff >> 31);
+  const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+  int i, q;
+  int64_t tmp = clamp(abs_coeff, INT32_MIN, INT32_MAX);
+  for (i = 0; i < NUQ_KNOTES; i++) {
+    if (tmp < ROUND_POWER_OF_TWO(cumbins_ptr[i], 1 + logsizeby32)) {
+      q = i;
+      break;
+    }
+  }
+  if (i == NUQ_KNOTES) {
+    q = NUQ_KNOTES +
+        (((tmp - ROUND_POWER_OF_TWO(cumbins_ptr[NUQ_KNOTES - 1],
+                                    1 + logsizeby32)) * quant) >>
+         (15 - logsizeby32));
+  }
+  if (q) {
+    *dqcoeff_ptr =
+        ROUND_POWER_OF_TWO(vp9_dequant_abscoeff_nuq(q, dequant, dequant_val),
+                           1 + logsizeby32);
+    *qcoeff_ptr  = (q ^ coeff_sign) - coeff_sign;
+    *dqcoeff_ptr = *qcoeff_ptr < 0 ? -*dqcoeff_ptr : *dqcoeff_ptr;
+  } else {
+    *qcoeff_ptr = 0;
+    *dqcoeff_ptr = 0;
+  }
+  return (q != 0);
+}
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+void vp9_quantize_dc_nuq(const tran_low_t *coeff_ptr,
+                         int skip_block,
+                         const int16_t quant,
+                         const int16_t quant_shift,
+                         const int16_t dequant,
+                         const tran_low_t *cumbins_ptr,
+                         const tran_low_t *dequant_val,
+                         tran_low_t *qcoeff_ptr,
+                         tran_low_t *dqcoeff_ptr,
+                         uint16_t *eob_ptr) {
+  int eob = -1;
+  if (!skip_block) {
+    const int rc = 0;
+    if (quantize_coeff_nuq(coeff_ptr[rc],
+                           quant,
+                           quant_shift,
+                           dequant,
+                           cumbins_ptr,
+                           dequant_val,
+                           qcoeff_ptr,
+                           dqcoeff_ptr))
+      eob = 0;
+  }
+  *eob_ptr = eob + 1;
+}
+
+void vp9_quantize_dc_fp_nuq(const tran_low_t *coeff_ptr,
+                            int skip_block,
+                            const int16_t quant,
+                            const int16_t dequant,
+                            const tran_low_t *cumbins_ptr,
+                            const tran_low_t *dequant_val,
+                            tran_low_t *qcoeff_ptr,
+                            tran_low_t *dqcoeff_ptr,
+                            uint16_t *eob_ptr) {
+  int eob = -1;
+  if (!skip_block) {
+    const int rc = 0;
+    if (quantize_coeff_fp_nuq(coeff_ptr[rc],
+                              quant,
+                              dequant,
+                              cumbins_ptr,
+                              dequant_val,
+                              qcoeff_ptr,
+                              dqcoeff_ptr))
+      eob = 0;
+  }
+  *eob_ptr = eob + 1;
+}
+#endif  // CONFIG_NEW_QUANT
+
  #if CONFIG_VP9_HIGHBITDEPTH
  void vp9_highbd_quantize_dc(const tran_low_t *coeff_ptr, int skip_block,
                              const int16_t *round_ptr, const int16_t quant,
@@ -63,7 +414,59 @@ void vp9_highbd_quantize_dc(const tran_low_t *coeff_ptr, int skip_block,
    }
    *eob_ptr = eob + 1;
  }
-#endif
+
+#if CONFIG_NEW_QUANT
+void vp9_highbd_quantize_dc_nuq(const tran_low_t *coeff_ptr,
+                                int skip_block,
+                                const int16_t quant,
+                                const int16_t quant_shift,
+                                const int16_t dequant,
+                                const tran_low_t *cumbins_ptr,
+                                const tran_low_t *dequant_val,
+                                tran_low_t *qcoeff_ptr,
+                                tran_low_t *dqcoeff_ptr,
+                                uint16_t *eob_ptr) {
+  int eob = -1;
+  if (!skip_block) {
+    const int rc = 0;
+    if (highbd_quantize_coeff_nuq(coeff_ptr[rc],
+                                  quant,
+                                  quant_shift,
+                                  dequant,
+                                  cumbins_ptr,
+                                  dequant_val,
+                                  qcoeff_ptr,
+                                  dqcoeff_ptr))
+      eob = 0;
+  }
+  *eob_ptr = eob + 1;
+}
+
+void vp9_highbd_quantize_dc_fp_nuq(const tran_low_t *coeff_ptr,
+                                   int skip_block,
+                                   const int16_t quant,
+                                   const int16_t dequant,
+                                   const tran_low_t *cumbins_ptr,
+                                   const tran_low_t *dequant_val,
+                                   tran_low_t *qcoeff_ptr,
+                                   tran_low_t *dqcoeff_ptr,
+                                   uint16_t *eob_ptr) {
+  int eob = -1;
+  if (!skip_block) {
+    const int rc = 0;
+    if (highbd_quantize_coeff_fp_nuq(coeff_ptr[rc],
+                                     quant,
+                                     dequant,
+                                     cumbins_ptr,
+                                     dequant_val,
+                                     qcoeff_ptr,
+                                     dqcoeff_ptr))
+      eob = 0;
+  }
+  *eob_ptr = eob + 1;
+}
+#endif  // CONFIG_NEW_QUANT
+#endif  // CONFIG_VP9_HIGHBITDEPTH
  
  static INLINE void quantize_dc_bigtx(const tran_low_t *coeff_ptr,
                                       int skip_block,
@@ -101,6 +504,60 @@ void vp9_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block,
                      qcoeff_ptr, dqcoeff_ptr, dequant_ptr, eob_ptr, 0);
  }
  
+#if CONFIG_NEW_QUANT
+void vp9_quantize_dc_32x32_nuq(const tran_low_t *coeff_ptr,
+                               int skip_block,
+                               const int16_t quant,
+                               const int16_t quant_shift,
+                               const int16_t dequant,
+                               const tran_low_t *cumbins_ptr,
+                               const tran_low_t *dequant_val,
+                               tran_low_t *qcoeff_ptr,
+                               tran_low_t *dqcoeff_ptr,
+                               uint16_t *eob_ptr) {
+  int eob = -1;
+  if (!skip_block) {
+    const int rc = 0;
+    if (quantize_coeff_bigtx_nuq(coeff_ptr[rc],
+                                 quant,
+                                 quant_shift,
+                                 dequant,
+                                 cumbins_ptr,
+                                 dequant_val,
+                                 qcoeff_ptr,
+                                 dqcoeff_ptr,
+                                 0))
+      eob = 0;
+  }
+  *eob_ptr = eob + 1;
+}
+
+void vp9_quantize_dc_32x32_fp_nuq(const tran_low_t *coeff_ptr,
+                                  int skip_block,
+                                  const int16_t quant,
+                                  const int16_t dequant,
+                                  const tran_low_t *cumbins_ptr,
+                                  const tran_low_t *dequant_val,
+                                  tran_low_t *qcoeff_ptr,
+                                  tran_low_t *dqcoeff_ptr,
+                                  uint16_t *eob_ptr) {
+  int eob = -1;
+  if (!skip_block) {
+    const int rc = 0;
+    if (quantize_coeff_bigtx_fp_nuq(coeff_ptr[rc],
+                                    quant,
+                                    dequant,
+                                    cumbins_ptr,
+                                    dequant_val,
+                                    qcoeff_ptr,
+                                    dqcoeff_ptr,
+                                    0))
+      eob = 0;
+  }
+  *eob_ptr = eob + 1;
+}
+#endif  // CONFIG_NEW_QUANT
+
  #if CONFIG_TX64X64
  void vp9_quantize_dc_64x64(const tran_low_t *coeff_ptr, int skip_block,
                             const int16_t *round_ptr, const int16_t quant,
@@ -109,6 +566,60 @@ void vp9_quantize_dc_64x64(const tran_low_t *coeff_ptr, int skip_block,
    quantize_dc_bigtx(coeff_ptr, skip_block, round_ptr, quant,
                      qcoeff_ptr, dqcoeff_ptr, dequant_ptr, eob_ptr, 1);
  }
+
+#if CONFIG_NEW_QUANT
+void vp9_quantize_dc_64x64_nuq(const tran_low_t *coeff_ptr,
+                               int skip_block,
+                               const int16_t quant,
+                               const int16_t quant_shift,
+                               const int16_t dequant,
+                               const tran_low_t *cumbins_ptr,
+                               const tran_low_t *dequant_val,
+                               tran_low_t *qcoeff_ptr,
+                               tran_low_t *dqcoeff_ptr,
+                               uint16_t *eob_ptr) {
+  int eob = -1;
+  if (!skip_block) {
+    const int rc = 0;
+    if (quantize_coeff_bigtx_nuq(coeff_ptr[rc],
+                                 quant,
+                                 quant_shift,
+                                 dequant,
+                                 cumbins_ptr,
+                                 dequant_val,
+                                 qcoeff_ptr,
+                                 dqcoeff_ptr,
+                                 1))
+      eob = 0;
+  }
+  *eob_ptr = eob + 1;
+}
+
+void vp9_quantize_dc_64x64_fp_nuq(const tran_low_t *coeff_ptr,
+                                  int skip_block,
+                                  const int16_t quant,
+                                  const int16_t dequant,
+                                  const tran_low_t *cumbins_ptr,
+                                  const tran_low_t *dequant_val,
+                                  tran_low_t *qcoeff_ptr,
+                                  tran_low_t *dqcoeff_ptr,
+                                  uint16_t *eob_ptr) {
+  int eob = -1;
+  if (!skip_block) {
+    const int rc = 0;
+    if (quantize_coeff_bigtx_fp_nuq(coeff_ptr[rc],
+                                    quant,
+                                    dequant,
+                                    cumbins_ptr,
+                                    dequant_val,
+                                    qcoeff_ptr,
+                                    dqcoeff_ptr,
+                                    1))
+      eob = 0;
+  }
+  *eob_ptr = eob + 1;
+}
+#endif  // CONFIG_NEW_QUANT
  #endif  // CONFIG_TX64X64
  
  #if CONFIG_VP9_HIGHBITDEPTH
@@ -128,11 +639,11 @@ static INLINE void highbd_quantize_dc_bigtx(const tran_low_t *coeff_ptr,
      const int coeff_sign = (coeff >> 31);
      const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
  
-    int64_t tmp =
-        clamp(abs_coeff +
-              ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1 + logsizeby32),
-              INT32_MIN, INT32_MAX);
-    tmp = (tmp * quant) >> (15 - logsizeby32);
+    const int64_t tmp =
+        (clamp(abs_coeff +
+               ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1 + logsizeby32),
+               INT32_MIN, INT32_MAX) *
+         quant) >> (15 - logsizeby32);
      qcoeff_ptr[rc]  = (tmp ^ coeff_sign) - coeff_sign;
      dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr / (2 << logsizeby32);
      if (tmp)
@@ -153,6 +664,60 @@ void vp9_highbd_quantize_dc_32x32(const tran_low_t *coeff_ptr,
                             qcoeff_ptr, dqcoeff_ptr, dequant_ptr, eob_ptr, 0);
  }
  
+#if CONFIG_NEW_QUANT
+void vp9_highbd_quantize_dc_32x32_nuq(const tran_low_t *coeff_ptr,
+                                      int skip_block,
+                                      const int16_t quant,
+                                      const int16_t quant_shift,
+                                      const int16_t dequant,
+                                      const tran_low_t *cumbins_ptr,
+                                      const tran_low_t *dequant_val,
+                                      tran_low_t *qcoeff_ptr,
+                                      tran_low_t *dqcoeff_ptr,
+                                      uint16_t *eob_ptr) {
+  int eob = -1;
+  if (!skip_block) {
+    const int rc = 0;
+    if (highbd_quantize_coeff_bigtx_nuq(coeff_ptr[rc],
+                                        quant,
+                                        quant_shift,
+                                        dequant,
+                                        cumbins_ptr,
+                                        dequant_val,
+                                        qcoeff_ptr,
+                                        dqcoeff_ptr,
+                                        0))
+      eob = 0;
+  }
+  *eob_ptr = eob + 1;
+}
+
+void vp9_highbd_quantize_dc_32x32_fp_nuq(const tran_low_t *coeff_ptr,
+                                         int skip_block,
+                                         const int16_t quant,
+                                         const int16_t dequant,
+                                         const tran_low_t *cumbins_ptr,
+                                         const tran_low_t *dequant_val,
+                                         tran_low_t *qcoeff_ptr,
+                                         tran_low_t *dqcoeff_ptr,
+                                         uint16_t *eob_ptr) {
+  int eob = -1;
+  if (!skip_block) {
+    const int rc = 0;
+    if (highbd_quantize_coeff_bigtx_fp_nuq(coeff_ptr[rc],
+                                           quant,
+                                           dequant,
+                                           cumbins_ptr,
+                                           dequant_val,
+                                           qcoeff_ptr,
+                                           dqcoeff_ptr,
+                                           0))
+      eob = 0;
+  }
+  *eob_ptr = eob + 1;
+}
+#endif  // CONFIG_NEW_QUANT
+
  #if CONFIG_TX64X64
  void vp9_highbd_quantize_dc_64x64(const tran_low_t *coeff_ptr,
                                    int skip_block,
@@ -165,6 +730,60 @@ void vp9_highbd_quantize_dc_64x64(const tran_low_t *coeff_ptr,
    highbd_quantize_dc_bigtx(coeff_ptr, skip_block, round_ptr, quant,
                             qcoeff_ptr, dqcoeff_ptr, dequant_ptr, eob_ptr, 1);
  }
+
+#if CONFIG_NEW_QUANT
+void vp9_highbd_quantize_dc_64x64_nuq(const tran_low_t *coeff_ptr,
+                                      int skip_block,
+                                      const int16_t quant,
+                                      const int16_t quant_shift,
+                                      const int16_t dequant,
+                                      const tran_low_t *cumbins_ptr,
+                                      const tran_low_t *dequant_val,
+                                      tran_low_t *qcoeff_ptr,
+                                      tran_low_t *dqcoeff_ptr,
+                                      uint16_t *eob_ptr) {
+  int eob = -1;
+  if (!skip_block) {
+    const int rc = 0;
+    if (highbd_quantize_coeff_bigtx_nuq(coeff_ptr[rc],
+                                        quant,
+                                        quant_shift,
+                                        dequant,
+                                        cumbins_ptr,
+                                        dequant_val,
+                                        qcoeff_ptr,
+                                        dqcoeff_ptr,
+                                        1))
+      eob = 0;
+  }
+  *eob_ptr = eob + 1;
+}
+
+void vp9_highbd_quantize_dc_64x64_fp_nuq(const tran_low_t *coeff_ptr,
+                                         int skip_block,
+                                         const int16_t quant,
+                                         const int16_t dequant,
+                                         const tran_low_t *cumbins_ptr,
+                                         const tran_low_t *dequant_val,
+                                         tran_low_t *qcoeff_ptr,
+                                         tran_low_t *dqcoeff_ptr,
+                                         uint16_t *eob_ptr) {
+  int eob = -1;
+  if (!skip_block) {
+    const int rc = 0;
+    if (highbd_quantize_coeff_bigtx_fp_nuq(coeff_ptr[rc],
+                                           quant,
+                                           dequant,
+                                           cumbins_ptr,
+                                           dequant_val,
+                                           qcoeff_ptr,
+                                           dqcoeff_ptr,
+                                           1))
+      eob = 0;
+  }
+  *eob_ptr = eob + 1;
+}
+#endif  // CONFIG_NEW_QUANT
  #endif  // CONFIG_TX64X64
  #endif  // CONFIG_VP9_HIGHBITDEPTH
  
@@ -208,6 +827,74 @@ void vp9_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
    *eob_ptr = eob + 1;
  }
  
+#if CONFIG_NEW_QUANT
+void vp9_quantize_nuq_c(const tran_low_t *coeff_ptr,
+                        intptr_t n_coeffs,
+                        int skip_block,
+                        const int16_t *quant_ptr,
+                        const int16_t *quant_shift_ptr,
+                        const int16_t *dequant_ptr,
+                        const cumbins_type_nuq *cumbins_ptr,
+                        const dequant_val_type_nuq *dequant_val,
+                        tran_low_t *qcoeff_ptr,
+                        tran_low_t *dqcoeff_ptr,
+                        uint16_t *eob_ptr,
+                        const int16_t *scan,
+                        const uint8_t *band) {
+  int eob = -1;
+  vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+  vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+  if (!skip_block) {
+    int i;
+    for (i = 0; i < n_coeffs; i++) {
+      const int rc = scan[i];
+      if (quantize_coeff_nuq(coeff_ptr[rc],
+                             quant_ptr[rc != 0],
+                             quant_shift_ptr[rc != 0],
+                             dequant_ptr[rc != 0],
+                             cumbins_ptr[band[i]],
+                             dequant_val[band[i]],
+                             &qcoeff_ptr[rc],
+                             &dqcoeff_ptr[rc]))
+        eob = i;
+    }
+  }
+  *eob_ptr = eob + 1;
+}
+
+void vp9_quantize_fp_nuq_c(const tran_low_t *coeff_ptr,
+                           intptr_t n_coeffs,
+                           int skip_block,
+                           const int16_t *quant_ptr,
+                           const int16_t *dequant_ptr,
+                           const cumbins_type_nuq *cumbins_ptr,
+                           const dequant_val_type_nuq *dequant_val,
+                           tran_low_t *qcoeff_ptr,
+                           tran_low_t *dqcoeff_ptr,
+                           uint16_t *eob_ptr,
+                           const int16_t *scan,
+                           const uint8_t *band) {
+  int eob = -1;
+  vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+  vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+  if (!skip_block) {
+    int i;
+    for (i = 0; i < n_coeffs; i++) {
+      const int rc = scan[i];
+      if (quantize_coeff_fp_nuq(coeff_ptr[rc],
+                                quant_ptr[rc != 0],
+                                dequant_ptr[rc != 0],
+                                cumbins_ptr[band[i]],
+                                dequant_val[band[i]],
+                                &qcoeff_ptr[rc],
+                                &dqcoeff_ptr[rc]))
+        eob = i;
+    }
+  }
+  *eob_ptr = eob + 1;
+}
+#endif  // CONFIG_NEW_QUANT
+
  #if CONFIG_VP9_HIGHBITDEPTH
  void vp9_highbd_quantize_fp_c(const tran_low_t *coeff_ptr,
                                intptr_t count,
@@ -255,7 +942,77 @@ void vp9_highbd_quantize_fp_c(const tran_low_t *coeff_ptr,
    }
    *eob_ptr = eob + 1;
  }
-#endif
+
+#if CONFIG_NEW_QUANT
+void vp9_highbd_quantize_nuq_c(const tran_low_t *coeff_ptr,
+                               intptr_t n_coeffs,
+                               int skip_block,
+                               const int16_t *quant_ptr,
+                               const int16_t *quant_shift_ptr,
+                               const int16_t *dequant_ptr,
+                               const cumbins_type_nuq *cumbins_ptr,
+                               const dequant_val_type_nuq *dequant_val,
+                               tran_low_t *qcoeff_ptr,
+                               tran_low_t *dqcoeff_ptr,
+                               uint16_t *eob_ptr,
+                               const int16_t *scan,
+                               const uint8_t *band) {
+  int eob = -1;
+  (void)iscan;
+  vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+  vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+  if (!skip_block) {
+    int i;
+    for (i = 0; i < n_coeffs; i++) {
+      const int rc = scan[i];
+      if (highbd_quantize_coeff_nuq(coeff_ptr[rc],
+                                    quant_ptr[rc != 0],
+                                    quant_shift_ptr[rc != 0],
+                                    dequant_ptr[rc != 0],
+                                    cumbins_ptr[band[i]],
+                                    dequant_val[band[i]],
+                                    &qcoeff_ptr[rc],
+                                    &dqcoeff_ptr[rc]))
+        eob = i;
+    }
+  }
+  *eob_ptr = eob + 1;
+}
+
+void vp9_highbd_quantize_fp_nuq_c(const tran_low_t *coeff_ptr,
+                                  intptr_t n_coeffs,
+                                  int skip_block,
+                                  const int16_t *quant_ptr,
+                                  const int16_t *dequant_ptr,
+                                  const cumbins_type_nuq *cumbins_ptr,
+                                  const dequant_val_type_nuq *dequant_val,
+                                  tran_low_t *qcoeff_ptr,
+                                  tran_low_t *dqcoeff_ptr,
+                                  uint16_t *eob_ptr,
+                                  const int16_t *scan,
+                                  const uint8_t *band) {
+  int eob = -1;
+  (void)iscan;
+  vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+  vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+  if (!skip_block) {
+    int i;
+    for (i = 0; i < n_coeffs; i++) {
+      const int rc = scan[i];
+      if (highbd_quantize_coeff_fp_nuq(coeff_ptr[rc],
+                                       quant_ptr[rc != 0],
+                                       dequant_ptr[rc != 0],
+                                       cumbins_ptr[band[i]],
+                                       dequant_val[band[i]],
+                                       &qcoeff_ptr[rc],
+                                       &dqcoeff_ptr[rc]))
+        eob = i;
+    }
+  }
+  *eob_ptr = eob + 1;
+}
+#endif  // CONFIG_NEW_QUANT
+#endif  // CONFIG_VP9_HIGHBITDEPTH
  
  // TODO(jingning) Refactor this file and combine functions with similar
  // operations.
@@ -324,6 +1081,76 @@ void vp9_quantize_fp_32x32_c(const tran_low_t *coeff_ptr,
                      eob_ptr, scan, iscan, 0);
  }
  
+#if CONFIG_NEW_QUANT
+void vp9_quantize_32x32_nuq_c(const tran_low_t *coeff_ptr,
+                              intptr_t n_coeffs,
+                              int skip_block,
+                              const int16_t *quant_ptr,
+                              const int16_t *quant_shift_ptr,
+                              const int16_t *dequant_ptr,
+                              const cumbins_type_nuq *cumbins_ptr,
+                              const dequant_val_type_nuq *dequant_val,
+                              tran_low_t *qcoeff_ptr,
+                              tran_low_t *dqcoeff_ptr,
+                              uint16_t *eob_ptr,
+                              const int16_t *scan,
+                              const uint8_t *band) {
+  int eob = -1;
+  vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+  vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+  if (!skip_block) {
+    int i;
+    for (i = 0; i < n_coeffs; i++) {
+      const int rc = scan[i];
+      if (quantize_coeff_bigtx_nuq(coeff_ptr[rc],
+                                   quant_ptr[rc != 0],
+                                   quant_shift_ptr[rc != 0],
+                                   dequant_ptr[rc != 0],
+                                   cumbins_ptr[band[i]],
+                                   dequant_val[band[i]],
+                                   &qcoeff_ptr[rc],
+                                   &dqcoeff_ptr[rc],
+                                   0))
+        eob = i;
+    }
+  }
+  *eob_ptr = eob + 1;
+}
+
+void vp9_quantize_32x32_fp_nuq_c(const tran_low_t *coeff_ptr,
+                                 intptr_t n_coeffs,
+                                 int skip_block,
+                                 const int16_t *quant_ptr,
+                                 const int16_t *dequant_ptr,
+                                 const cumbins_type_nuq *cumbins_ptr,
+                                 const dequant_val_type_nuq *dequant_val,
+                                 tran_low_t *qcoeff_ptr,
+                                 tran_low_t *dqcoeff_ptr,
+                                 uint16_t *eob_ptr,
+                                 const int16_t *scan,
+                                 const uint8_t *band) {
+  int eob = -1;
+  vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+  vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+  if (!skip_block) {
+    int i;
+    for (i = 0; i < n_coeffs; i++) {
+      const int rc = scan[i];
+      if (quantize_coeff_bigtx_fp_nuq(coeff_ptr[rc],
+                                      quant_ptr[rc != 0],
+                                      dequant_ptr[rc != 0],
+                                      cumbins_ptr[band[i]],
+                                      dequant_val[band[i]],
+                                      &qcoeff_ptr[rc],
+                                      &dqcoeff_ptr[rc],
+                                      0))
+        eob = i;
+    }
+  }
+  *eob_ptr = eob + 1;
+}
+#endif  // CONFIG_NEW_QUANT
+
  #if CONFIG_TX64X64
  void vp9_quantize_fp_64x64_c(const tran_low_t *coeff_ptr,
                               intptr_t n_coeffs,
@@ -343,6 +1170,76 @@ void vp9_quantize_fp_64x64_c(const tran_low_t *coeff_ptr,
                      qcoeff_ptr, dqcoeff_ptr, dequant_ptr,
                      eob_ptr, scan, iscan, 1);
  }
+
+#if CONFIG_NEW_QUANT
+void vp9_quantize_64x64_nuq_c(const tran_low_t *coeff_ptr,
+                              intptr_t n_coeffs,
+                              int skip_block,
+                              const int16_t *quant_ptr,
+                              const int16_t *quant_shift_ptr,
+                              const int16_t *dequant_ptr,
+                              const cumbins_type_nuq *cumbins_ptr,
+                              const dequant_val_type_nuq *dequant_val,
+                              tran_low_t *qcoeff_ptr,
+                              tran_low_t *dqcoeff_ptr,
+                              uint16_t *eob_ptr,
+                              const int16_t *scan,
+                              const uint8_t *band) {
+  int eob = -1;
+  vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+  vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+  if (!skip_block) {
+    int i;
+    for (i = 0; i < n_coeffs; i++) {
+      const int rc = scan[i];
+      if (quantize_coeff_bigtx_nuq(coeff_ptr[rc],
+                                   quant_ptr[rc != 0],
+                                   quant_shift_ptr[rc != 0],
+                                   dequant_ptr[rc != 0],
+                                   cumbins_ptr[band[i]],
+                                   dequant_val[band[i]],
+                                   &qcoeff_ptr[rc],
+                                   &dqcoeff_ptr[rc],
+                                   1))
+        eob = i;
+    }
+  }
+  *eob_ptr = eob + 1;
+}
+
+void vp9_quantize_64x64_fp_nuq_c(const tran_low_t *coeff_ptr,
+                                 intptr_t n_coeffs,
+                                 int skip_block,
+                                 const int16_t *quant_ptr,
+                                 const int16_t *dequant_ptr,
+                                 const cumbins_type_nuq *cumbins_ptr,
+                                 const dequant_val_type_nuq *dequant_val,
+                                 tran_low_t *qcoeff_ptr,
+                                 tran_low_t *dqcoeff_ptr,
+                                 uint16_t *eob_ptr,
+                                 const int16_t *scan,
+                                 const uint8_t *band) {
+  int eob = -1;
+  vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+  vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+  if (!skip_block) {
+    int i;
+    for (i = 0; i < n_coeffs; i++) {
+      const int rc = scan[i];
+      if (quantize_coeff_bigtx_fp_nuq(coeff_ptr[rc],
+                                      quant_ptr[rc != 0],
+                                      dequant_ptr[rc != 0],
+                                      cumbins_ptr[band[i]],
+                                      dequant_val[band[i]],
+                                      &qcoeff_ptr[rc],
+                                      &dqcoeff_ptr[rc],
+                                      1))
+        eob = i;
+    }
+  }
+  *eob_ptr = eob + 1;
+}
+#endif  // CONFIG_NEW_QUANT
  #endif  // CONFIG_TX64X64
  
  #if CONFIG_VP9_HIGHBITDEPTH
@@ -385,7 +1282,6 @@ static INLINE void highbd_quantize_fp_bigtx(const tran_low_t *coeff_ptr,
          dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] /
                            (2 << logsizeby32);
        }
-
        if (tmp)
          eob = i;
      }
@@ -412,6 +1308,76 @@ void vp9_highbd_quantize_fp_32x32_c(const tran_low_t *coeff_ptr,
                             eob_ptr, scan, iscan, 0);
  }
  
+#if CONFIG_NEW_QUANT
+void vp9_highbd_quantize_32x32_nuq_c(const tran_low_t *coeff_ptr,
+                                     intptr_t n_coeffs,
+                                     int skip_block,
+                                     const int16_t *quant_ptr,
+                                     const int16_t *quant_shift_ptr,
+                                     const int16_t *dequant_ptr,
+                                     const cumbins_type_nuq *cumbins_ptr,
+                                     const dequant_val_type_nuq *dequant_val,
+                                     tran_low_t *qcoeff_ptr,
+                                     tran_low_t *dqcoeff_ptr,
+                                     uint16_t *eob_ptr,
+                                     const int16_t *scan,
+                                     const uint8_t *band) {
+  int eob = -1;
+  vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+  vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+  if (!skip_block) {
+    int i;
+    for (i = 0; i < n_coeffs; i++) {
+      const int rc = scan[i];
+      if (highbd_quantize_coeff_bigtx_nuq(coeff_ptr[rc],
+                                          quant_ptr[rc != 0],
+                                          quant_shift_ptr[rc != 0],
+                                          dequant_ptr[rc != 0],
+                                          cumbins_ptr[band[i]],
+                                          dequant_val[band[i]],
+                                          &qcoeff_ptr[rc],
+                                          &dqcoeff_ptr[rc],
+                                          0))
+        eob = i;
+    }
+  }
+  *eob_ptr = eob + 1;
+}
+
+void vp9_highbd_quantize_32x32_fp_nuq_c(const tran_low_t *coeff_ptr,
+                                        intptr_t n_coeffs,
+                                        int skip_block,
+                                        const int16_t *quant_ptr,
+                                        const int16_t *dequant_ptr,
+                                        const cumbins_type_nuq *cumbins_ptr,
+                                        const dequant_val_type_nuq *dequant_val,
+                                        tran_low_t *qcoeff_ptr,
+                                        tran_low_t *dqcoeff_ptr,
+                                        uint16_t *eob_ptr,
+                                        const int16_t *scan,
+                                        const uint8_t *band) {
+  int eob = -1;
+  vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+  vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+  if (!skip_block) {
+    int i;
+    for (i = 0; i < n_coeffs; i++) {
+      const int rc = scan[i];
+      if (highbd_quantize_coeff_bigtx_fp_nuq(coeff_ptr[rc],
+                                             quant_ptr[rc != 0],
+                                             dequant_ptr[rc != 0],
+                                             cumbins_ptr[band[i]],
+                                             dequant_val[band[i]],
+                                             &qcoeff_ptr[rc],
+                                             &dqcoeff_ptr[rc],
+                                             0))
+        eob = i;
+    }
+  }
+  *eob_ptr = eob + 1;
+}
+#endif  // CONFIG_NEW_QUANT
+
  #if CONFIG_TX64X64
  void vp9_highbd_quantize_fp_64x64_c(const tran_low_t *coeff_ptr,
                                      intptr_t n_coeffs,
@@ -431,6 +1397,76 @@ void vp9_highbd_quantize_fp_64x64_c(const tran_low_t *coeff_ptr,
                             qcoeff_ptr, dqcoeff_ptr, dequant_ptr,
                             eob_ptr, scan, iscan, 1);
  }
+
+#if CONFIG_NEW_QUANT
+void vp9_highbd_quantize_64x64_nuq_c(const tran_low_t *coeff_ptr,
+                                     intptr_t n_coeffs,
+                                     int skip_block,
+                                     const int16_t *quant_ptr,
+                                     const int16_t *quant_shift_ptr,
+                                     const int16_t *dequant_ptr,
+                                     const cumbins_type_nuq *cumbins_ptr,
+                                     const dequant_val_type_nuq *dequant_val,
+                                     tran_low_t *qcoeff_ptr,
+                                     tran_low_t *dqcoeff_ptr,
+                                     uint16_t *eob_ptr,
+                                     const int16_t *scan,
+                                     const uint8_t *band) {
+  int eob = -1;
+  vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+  vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+  if (!skip_block) {
+    int i;
+    for (i = 0; i < n_coeffs; i++) {
+      const int rc = scan[i];
+      if (highbd_quantize_coeff_bigtx_nuq(coeff_ptr[rc],
+                                          quant_ptr[rc != 0],
+                                          quant_shift_ptr[rc != 0],
+                                          dequant_ptr[rc != 0],
+                                          cumbins_ptr[band[i]],
+                                          dequant_val[band[i]],
+                                          &qcoeff_ptr[rc],
+                                          &dqcoeff_ptr[rc],
+                                          1))
+        eob = i;
+    }
+  }
+  *eob_ptr = eob + 1;
+}
+
+void vp9_highbd_quantize_64x64_fp_nuq_c(const tran_low_t *coeff_ptr,
+                                        intptr_t n_coeffs,
+                                        int skip_block,
+                                        const int16_t *quant_ptr,
+                                        const int16_t *dequant_ptr,
+                                        const cumbins_type_nuq *cumbins_ptr,
+                                        const dequant_val_type_nuq *dequant_val,
+                                        tran_low_t *qcoeff_ptr,
+                                        tran_low_t *dqcoeff_ptr,
+                                        uint16_t *eob_ptr,
+                                        const int16_t *scan,
+                                        const uint8_t *band) {
+  int eob = -1;
+  vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+  vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+  if (!skip_block) {
+    int i;
+    for (i = 0; i < n_coeffs; i++) {
+      const int rc = scan[i];
+      if (highbd_quantize_coeff_bigtx_fp_nuq(coeff_ptr[rc],
+                                             quant_ptr[rc != 0],
+                                             dequant_ptr[rc != 0],
+                                             cumbins_ptr[band[i]],
+                                             dequant_val[band[i]],
+                                             &qcoeff_ptr[rc],
+                                             &dqcoeff_ptr[rc],
+                                             1))
+        eob = i;
+    }
+  }
+  *eob_ptr = eob + 1;
+}
+#endif  // CONFIG_NEW_QUANT
  #endif  // CONFIG_TX64X64
  #endif  // CONFIG_VP9_HIGHBITDEPTH
  
@@ -530,6 +1566,56 @@ void vp9_quantize_rect(const tran_low_t *coeff_ptr, int row, int col,
      }
  }
  
+#if CONFIG_NEW_QUANT
+void vp9_quantize_rect_nuq(const tran_low_t *coeff_ptr,
+                           int row,
+                           int col,
+                           int stride,
+                           const int16_t *quant_ptr,
+                           const int16_t *quant_shift_ptr,
+                           const int16_t *dequant_ptr,
+                           const cumbins_type_nuq *cumbins_ptr,
+                           const dequant_val_type_nuq *dequant_val,
+                           tran_low_t *qcoeff_ptr,
+                           tran_low_t *dqcoeff_ptr,
+                           uint16_t *eob_ptr,
+                           int logsizeby32,
+                           const int16_t *scan,
+                           const uint8_t *band) {
+  const int n_coeffs = row * col;
+  int i, res, eob = -1;
+  for (i = 0; i < n_coeffs; ++i) {
+    const int rc = scan[i];
+    const int r = rc / col;
+    const int c = rc % col;
+    const int rcs = r * stride + c;
+    qcoeff_ptr[rcs] = dqcoeff_ptr[rcs] = 0;
+    if (logsizeby32 >= 0)
+      res = quantize_coeff_bigtx_nuq(coeff_ptr[rcs],
+                                     quant_ptr[rc != 0],
+                                     quant_shift_ptr[rc != 0],
+                                     dequant_ptr[rc != 0],
+                                     cumbins_ptr[band[i]],
+                                     dequant_val[band[i]],
+                                     &qcoeff_ptr[rcs],
+                                     &dqcoeff_ptr[rcs],
+                                     logsizeby32);
+    else
+      res = quantize_coeff_nuq(coeff_ptr[rcs],
+                               quant_ptr[rc != 0],
+                               quant_shift_ptr[rc != 0],
+                               dequant_ptr[rc != 0],
+                               cumbins_ptr[band[i]],
+                               dequant_val[band[i]],
+                               &qcoeff_ptr[rcs],
+                               &dqcoeff_ptr[rcs]);
+    if (res)
+      eob = i;
+  }
+  *eob_ptr = eob + 1;
+}
+#endif  // CONFIG_NEW_QUANT
+
  int get_eob(tran_low_t *qcoeff_ptr, intptr_t n_coeffs, const int16_t *scan) {
    int i, rc, eob = -1;
  
@@ -873,18 +1959,18 @@ void vp9_init_quantizer(VP9_COMP *cpi) {
      const int qrounding_factor = q == 0 ? 64 : 48;
  
      for (i = 0; i < 2; ++i) {
-      int qrounding_factor_fp = i == 0 ? 48 : 42;
-      if (q == 0)
-        qrounding_factor_fp = 64;
+      const int qrounding_factor_fp = q == 0 ? 64 : (i == 0 ? 48 : 42);
  
        // y
        quant = i == 0 ? vp9_dc_quant(q, cm->y_dc_delta_q, cm->bit_depth)
                       : vp9_ac_quant(q, 0, cm->bit_depth);
        invert_quant(&quants->y_quant[q][i], &quants->y_quant_shift[q][i], quant);
        quants->y_quant_fp[q][i] = (1 << 16) / quant;
-      quants->y_round_fp[q][i] = (qrounding_factor_fp * quant) >> 7;
+      quants->y_round_fp[q][i] =
+          vp9_round_factor_to_round(quant, qrounding_factor_fp);
        quants->y_zbin[q][i] = ROUND_POWER_OF_TWO(qzbin_factor * quant, 7);
-      quants->y_round[q][i] = (qrounding_factor * quant) >> 7;
+      quants->y_round[q][i] =
+          vp9_round_factor_to_round(quant, qrounding_factor);
        cm->y_dequant[q][i] = quant;
  
        // uv
@@ -893,12 +1979,27 @@ void vp9_init_quantizer(VP9_COMP *cpi) {
        invert_quant(&quants->uv_quant[q][i],
                     &quants->uv_quant_shift[q][i], quant);
        quants->uv_quant_fp[q][i] = (1 << 16) / quant;
-      quants->uv_round_fp[q][i] = (qrounding_factor_fp * quant) >> 7;
+      quants->uv_round_fp[q][i] =
+          vp9_round_factor_to_round(quant, qrounding_factor_fp);
        quants->uv_zbin[q][i] = ROUND_POWER_OF_TWO(qzbin_factor * quant, 7);
-      quants->uv_round[q][i] = (qrounding_factor * quant) >> 7;
+      quants->uv_round[q][i] =
+          vp9_round_factor_to_round(quant, qrounding_factor);
        cm->uv_dequant[q][i] = quant;
      }
  
+#if CONFIG_NEW_QUANT
+    for (i = 0; i < COEF_BANDS; i++) {
+      const int quant = cm->y_dequant[q][i != 0];
+      const int uvquant = cm->uv_dequant[q][i != 0];
+      vp9_get_dequant_val_nuq(quant, i, cm->bit_depth,
+                              cm->y_dequant_val_nuq[q][i],
+                              quants->y_cumbins_nuq[q][i]);
+      vp9_get_dequant_val_nuq(uvquant, i, cm->bit_depth,
+                              cm->uv_dequant_val_nuq[q][i],
+                              quants->uv_cumbins_nuq[q][i]);
+    }
+#endif  // CONFIG_NEW_QUANT
+
      for (i = 2; i < 8; i++) {
        quants->y_quant[q][i] = quants->y_quant[q][1];
        quants->y_quant_fp[q][i] = quants->y_quant_fp[q][1];
@@ -936,6 +2037,10 @@ void vp9_init_plane_quantizers(VP9_COMP *cpi, MACROBLOCK *x) {
    x->plane[0].zbin = quants->y_zbin[qindex];
    x->plane[0].round = quants->y_round[qindex];
    xd->plane[0].dequant = cm->y_dequant[qindex];
+#if CONFIG_NEW_QUANT
+  x->plane[0].cumbins_nuq = quants->y_cumbins_nuq[qindex];
+  xd->plane[0].dequant_val_nuq = cm->y_dequant_val_nuq[qindex];
+#endif
  
    x->plane[0].quant_thred[0] = x->plane[0].zbin[0] * x->plane[0].zbin[0];
    x->plane[0].quant_thred[1] = x->plane[0].zbin[1] * x->plane[0].zbin[1];
@@ -949,6 +2054,10 @@ void vp9_init_plane_quantizers(VP9_COMP *cpi, MACROBLOCK *x) {
      x->plane[i].zbin = quants->uv_zbin[qindex];
      x->plane[i].round = quants->uv_round[qindex];
      xd->plane[i].dequant = cm->uv_dequant[qindex];
+#if CONFIG_NEW_QUANT
+    x->plane[i].cumbins_nuq = quants->uv_cumbins_nuq[qindex];
+    xd->plane[i].dequant_val_nuq = cm->uv_dequant_val_nuq[qindex];
+#endif
  
      x->plane[i].quant_thred[0] = x->plane[i].zbin[0] * x->plane[i].zbin[0];
      x->plane[i].quant_thred[1] = x->plane[i].zbin[1] * x->plane[i].zbin[1];
diff --git a/vp9/encoder/vp9_quantize.h b/vp9/encoder/vp9_quantize.h

index fad9c7d6e997d45648b1da36e859b3a9681095c8..7ac28b8fd41dec63f87efcc332a4597556a86b56 100644 (file)
--- a/vp9/encoder/vp9_quantize.h
+++ b/vp9/encoder/vp9_quantize.h
@@ -19,6 +19,13 @@ extern "C" {
  #endif
  
  typedef struct {
+#if CONFIG_NEW_QUANT
+  DECLARE_ALIGNED(16, tran_low_t,
+                  y_cumbins_nuq[QINDEX_RANGE][COEF_BANDS][NUQ_KNOTES]);
+  DECLARE_ALIGNED(16, tran_low_t,
+                  uv_cumbins_nuq[QINDEX_RANGE][COEF_BANDS][NUQ_KNOTES]);
+#endif  // CONFIG_NEW_QUANT
+
    DECLARE_ALIGNED(16, int16_t, y_quant[QINDEX_RANGE][8]);
    DECLARE_ALIGNED(16, int16_t, y_quant_shift[QINDEX_RANGE][8]);
    DECLARE_ALIGNED(16, int16_t, y_zbin[QINDEX_RANGE][8]);
@@ -45,12 +52,75 @@ void vp9_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block,
                             const int16_t *round_ptr, const int16_t quant_ptr,
                             tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
                             const int16_t dequant_ptr, uint16_t *eob_ptr);
+#if CONFIG_NEW_QUANT
+void vp9_quantize_dc_nuq(const tran_low_t *coeff_ptr,
+                         int skip_block,
+                         const int16_t quant,
+                         const int16_t quant_shift,
+                         const int16_t dequant,
+                         const tran_low_t *cumbins_ptr,
+                         const tran_low_t *dequant_val,
+                         tran_low_t *qcoeff_ptr,
+                         tran_low_t *dqcoeff_ptr,
+                         uint16_t *eob_ptr);
+void vp9_quantize_dc_32x32_nuq(const tran_low_t *coeff_ptr,
+                               int skip_block,
+                               const int16_t quant,
+                               const int16_t quant_shift,
+                               const int16_t dequant,
+                               const tran_low_t *cumbins_ptr,
+                               const tran_low_t *dequant_val,
+                               tran_low_t *qcoeff_ptr,
+                               tran_low_t *dqcoeff_ptr,
+                               uint16_t *eob_ptr);
+void vp9_quantize_dc_fp_nuq(const tran_low_t *coeff_ptr,
+                            int skip_block,
+                            const int16_t quant,
+                            const int16_t dequant,
+                            const tran_low_t *cumbins_ptr,
+                            const tran_low_t *dequant_val,
+                            tran_low_t *qcoeff_ptr,
+                            tran_low_t *dqcoeff_ptr,
+                            uint16_t *eob_ptr);
+void vp9_quantize_dc_32x32_fp_nuq(const tran_low_t *coeff_ptr,
+                                  int skip_block,
+                                  const int16_t quant,
+                                  const int16_t dequant,
+                                  const tran_low_t *cumbins_ptr,
+                                  const tran_low_t *dequant_val,
+                                  tran_low_t *qcoeff_ptr,
+                                  tran_low_t *dqcoeff_ptr,
+                                  uint16_t *eob_ptr);
+#endif  // CONFIG_NEW_QUANT
+
  #if CONFIG_TX64X64
  void vp9_quantize_dc_64x64(const tran_low_t *coeff_ptr, int skip_block,
                             const int16_t *round_ptr, const int16_t quant_ptr,
                             tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
                             const int16_t dequant_ptr, uint16_t *eob_ptr);
+#if CONFIG_NEW_QUANT
+void vp9_quantize_dc_64x64_nuq(const tran_low_t *coeff_ptr,
+                               int skip_block,
+                               const int16_t quant,
+                               const int16_t quant_shift,
+                               const int16_t dequant,
+                               const tran_low_t *cumbins_ptr,
+                               const tran_low_t *dequant_val,
+                               tran_low_t *qcoeff_ptr,
+                               tran_low_t *dqcoeff_ptr,
+                               uint16_t *eob_ptr);
+void vp9_quantize_dc_64x64_fp_nuq(const tran_low_t *coeff_ptr,
+                                  int skip_block,
+                                  const int16_t quant,
+                                  const int16_t dequant,
+                                  const tran_low_t *cumbins_ptr,
+                                  const tran_low_t *dequant_val,
+                                  tran_low_t *qcoeff_ptr,
+                                  tran_low_t *dqcoeff_ptr,
+                                  uint16_t *eob_ptr);
+#endif  // CONFIG_NEW_QUANT
  #endif  // CONFIG_TX64X64
+
  void vp9_regular_quantize_b_4x4(MACROBLOCK *x, int plane, int block,
                                  const int16_t *scan, const int16_t *iscan);
  
@@ -67,6 +137,46 @@ void vp9_highbd_quantize_dc_32x32(const tran_low_t *coeff_ptr,
                                    tran_low_t *dqcoeff_ptr,
                                    const int16_t dequant_ptr,
                                    uint16_t *eob_ptr);
+#if CONFIG_NEW_QUANT
+void vp9_highbd_quantize_dc_nuq(const tran_low_t *coeff_ptr,
+                                int skip_block,
+                                const int16_t quant,
+                                const int16_t quant_shift,
+                                const int16_t dequant,
+                                const tran_low_t *cumbins_ptr,
+                                const tran_low_t *dequant_val,
+                                tran_low_t *qcoeff_ptr,
+                                tran_low_t *dqcoeff_ptr,
+                                uint16_t *eob_ptr);
+void vp9_highbd_quantize_dc_32x32_nuq(const tran_low_t *coeff_ptr,
+                                      int skip_block,
+                                      const int16_t quant,
+                                      const int16_t quant_shift,
+                                      const int16_t dequant,
+                                      const tran_low_t *cumbins_ptr,
+                                      const tran_low_t *dequant_val,
+                                      tran_low_t *qcoeff_ptr,
+                                      tran_low_t *dqcoeff_ptr,
+                                      uint16_t *eob_ptr);
+void vp9_highbd_quantize_dc_fp_nuq(const tran_low_t *coeff_ptr,
+                                   int skip_block,
+                                   const int16_t quant,
+                                   const int16_t dequant,
+                                   const tran_low_t *cumbins_ptr,
+                                   const tran_low_t *dequant_val,
+                                   tran_low_t *qcoeff_ptr,
+                                   tran_low_t *dqcoeff_ptr,
+                                   uint16_t *eob_ptr);
+void vp9_highbd_quantize_dc_32x32_fp_nuq(const tran_low_t *coeff_ptr,
+                                         int skip_block,
+                                         const int16_t quant,
+                                         const int16_t dequant,
+                                         const tran_low_t *cumbins_ptr,
+                                         const tran_low_t *dequant_val,
+                                         tran_low_t *qcoeff_ptr,
+                                         tran_low_t *dqcoeff_ptr,
+                                         uint16_t *eob_ptr);
+#endif  // CONFIG_NEW_QUANT
  #if CONFIG_TX64X64
  void vp9_highbd_quantize_dc_64x64(const tran_low_t *coeff_ptr,
                                    int skip_block,
@@ -76,6 +186,27 @@ void vp9_highbd_quantize_dc_64x64(const tran_low_t *coeff_ptr,
                                    tran_low_t *dqcoeff_ptr,
                                    const int16_t dequant_ptr,
                                    uint16_t *eob_ptr);
+#if CONFIG_NEW_QUANT
+void vp9_highbd_quantize_dc_64x64_nuq(const tran_low_t *coeff_ptr,
+                                      int skip_block,
+                                      const int16_t quant,
+                                      const int16_t quant_shift,
+                                      const int16_t dequant,
+                                      const tran_low_t *cumbins_ptr,
+                                      const tran_low_t *dequant_val,
+                                      tran_low_t *qcoeff_ptr,
+                                      tran_low_t *dqcoeff_ptr,
+                                      uint16_t *eob_ptr);
+void vp9_highbd_quantize_dc_64x64_fp_nuq(const tran_low_t *coeff_ptr,
+                                         int skip_block,
+                                         const int16_t quant,
+                                         const int16_t dequant,
+                                         const tran_low_t *cumbins_ptr,
+                                         const tran_low_t *dequant_val,
+                                         tran_low_t *qcoeff_ptr,
+                                         tran_low_t *dqcoeff_ptr,
+                                         uint16_t *eob_ptr);
+#endif  // CONFIG_NEW_QUANT
  #endif  // CONFIG_TX64X64
  #endif  // CONFIG_VP9_HIGHBITDEPTH
  
@@ -101,7 +232,23 @@ void vp9_quantize_rect(const tran_low_t *coeff_ptr, int row, int col,
                         tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
                         const int16_t *dequant_ptr,
                         int logsizeby32, int stride, int has_dc);
-
+#if CONFIG_NEW_QUANT
+void vp9_quantize_rect_nuq(const tran_low_t *coeff_ptr,
+                           int row,
+                           int col,
+                           int stride,
+                           const int16_t *quant_ptr,
+                           const int16_t *quant_shift_ptr,
+                           const int16_t *dequant_ptr,
+                           const cumbins_type_nuq *cumbins_ptr,
+                           const dequant_val_type_nuq *dequant_val,
+                           tran_low_t *qcoeff_ptr,
+                           tran_low_t *dqcoeff_ptr,
+                           uint16_t *eob_ptr,
+                           int logsizeby32,
+                           const int16_t *scan,
+                           const uint8_t *band);
+#endif  // CONFIG_NEW_QUANT
  int get_eob(tran_low_t *qcoeff_ptr, intptr_t n_coeffs, const int16_t *scan);
  #endif
  
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c

index 9dbfad5d6332c4acd1bc0696117dbfc5a0daa5c3..1d446993e9d96d2f5440f2b99f10613026f3a71c 100644 (file)
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -559,7 +559,17 @@ static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
    } else if (max_txsize_lookup[plane_bsize] == tx_size) {
      if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == 0) {
        // full forward transform and quantization
-      vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
+#if CONFIG_NEW_QUANT
+      if (x->quant_fp)
+        vp9_xform_quant_fp_nuq(x, plane, block, plane_bsize, tx_size);
+      else
+        vp9_xform_quant_nuq(x, plane, block, plane_bsize, tx_size);
+#else
+      if (x->quant_fp)
+        vp9_xform_quant_fp(x, plane, block, plane_bsize, tx_size);
+      else
+        vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
+#endif
  #if CONFIG_VP9_HIGHBITDEPTH
        if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
          dist_block(plane, block, tx_size, args, xd->bd);
@@ -573,7 +583,14 @@ static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
        // compute DC coefficient
        tran_low_t *const coeff   = BLOCK_OFFSET(x->plane[plane].coeff, block);
        tran_low_t *const dqcoeff = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block);
+#if CONFIG_NEW_QUANT
+      if (x->quant_fp)
+        vp9_xform_quant_dc_fp_nuq(x, plane, block, plane_bsize, tx_size);
+      else
+        vp9_xform_quant_dc_nuq(x, plane, block, plane_bsize, tx_size);
+#else
        vp9_xform_quant_dc(x, plane, block, plane_bsize, tx_size);
+#endif
        args->sse  = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4;
        args->dist = args->sse;
        if (x->plane[plane].eobs[block]) {
@@ -598,7 +615,17 @@ static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
      }
    } else {
      // full forward transform and quantization
-    vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
+#if CONFIG_NEW_QUANT
+    if (x->quant_fp)
+      vp9_xform_quant_fp_nuq(x, plane, block, plane_bsize, tx_size);
+    else
+      vp9_xform_quant_nuq(x, plane, block, plane_bsize, tx_size);
+#else
+    if (x->quant_fp)
+      vp9_xform_quant_fp(x, plane, block, plane_bsize, tx_size);
+    else
+      vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
+#endif  // CONFIG_NEW_QUANT
  #if CONFIG_VP9_HIGHBITDEPTH
      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
        dist_block(plane, block, tx_size, args, xd->bd);
author	Deb Mukherjee <debargha@google.com>
	Wed, 4 Mar 2015 22:04:11 +0000 (14:04 -0800)
committer	Deb Mukherjee <debargha@google.com>
	Wed, 18 Mar 2015 04:42:55 +0000 (21:42 -0700)
configure		patch \| blob \| history
vp9/common/vp9_blockd.h		patch \| blob \| history
vp9/common/vp9_onyxc_int.h		patch \| blob \| history
vp9/common/vp9_quant_common.c		patch \| blob \| history
vp9/common/vp9_quant_common.h		patch \| blob \| history
vp9/common/vp9_rtcd_defs.pl		patch \| blob \| history
vp9/decoder/vp9_decodeframe.c		patch \| blob \| history
vp9/decoder/vp9_detokenize.c		patch \| blob \| history
vp9/encoder/vp9_block.h		patch \| blob \| history
vp9/encoder/vp9_encodemb.c		patch \| blob \| history
vp9/encoder/vp9_encodemb.h		patch \| blob \| history
vp9/encoder/vp9_quantize.c		patch \| blob \| history
vp9/encoder/vp9_quantize.h		patch \| blob \| history
vp9/encoder/vp9_rdopt.c		patch \| blob \| history