]> granicus.if.org Git - libvpx/commitdiff
Non-uniform quantization experiment
authorDeb Mukherjee <debargha@google.com>
Wed, 4 Mar 2015 22:04:11 +0000 (14:04 -0800)
committerDeb Mukherjee <debargha@google.com>
Wed, 18 Mar 2015 04:42:55 +0000 (21:42 -0700)
This framework allows lower quantization bins to be shrunk down or
expanded to match closer the source distribution (assuming a generalized
gaussian-like central peaky model for the coefficients) in an
entropy-constrained sense. Specifically, the width of the bins 0-4 are
modified as a factor of the nominal quantization step size and from 5
onwards all bins become the same as the nominal quantization step size.
Further, different bin width profiles as well as reconstruction values
can be used based on the coefficient band as well as the quantization step
size divided into 5 ranges.

A small gain currently on derflr of about 0.16% is observed with the
same paraemters for all q values.
Optimizing the parameters based on qstep value is left as a TODO for now.

Results on derflr with all expts on is +6.08% (up from 5.88%).

Experiments are in progress to tune the parameters for different
coefficient bands and quantization step ranges.

Change-Id: I88429d8cb0777021bfbb689ef69b764eafb3a1de

14 files changed:
configure
vp9/common/vp9_blockd.h
vp9/common/vp9_onyxc_int.h
vp9/common/vp9_quant_common.c
vp9/common/vp9_quant_common.h
vp9/common/vp9_rtcd_defs.pl
vp9/decoder/vp9_decodeframe.c
vp9/decoder/vp9_detokenize.c
vp9/encoder/vp9_block.h
vp9/encoder/vp9_encodemb.c
vp9/encoder/vp9_encodemb.h
vp9/encoder/vp9_quantize.c
vp9/encoder/vp9_quantize.h
vp9/encoder/vp9_rdopt.c

index 3323b3229e512291047199c9ff4b20e1c54f6ca8..4593d252fb6d974084b0be1b2c6b2e08507f27cc 100755 (executable)
--- a/configure
+++ b/configure
@@ -294,6 +294,7 @@ EXPERIMENT_LIST="
     global_motion
     palette
     newmvref_sub8x8
+    new_quant
 "
 CONFIG_LIST="
     external_build
index e659d10c2f34d38e3c2f35dab3235f554ef75c46..0f647ceeaf283d18813c94c0fcff14996bd6bb09 100644 (file)
@@ -20,6 +20,7 @@
 #include "vp9/common/vp9_common_data.h"
 #include "vp9/common/vp9_filter.h"
 #include "vp9/common/vp9_mv.h"
+#include "vp9/common/vp9_quant_common.h"
 #include "vp9/common/vp9_scale.h"
 
 #ifdef __cplusplus
@@ -306,6 +307,9 @@ struct macroblockd_plane {
   struct buf_2d dst;
   struct buf_2d pre[2];
   const int16_t *dequant;
+#if CONFIG_NEW_QUANT
+  const dequant_val_type_nuq *dequant_val_nuq;
+#endif
   ENTROPY_CONTEXT *above_context;
   ENTROPY_CONTEXT *left_context;
 #if CONFIG_PALETTE
@@ -548,6 +552,20 @@ static inline int get_wedge_bits(BLOCK_SIZE sb_type) {
 }
 #endif  // CONFIG_WEDGE_PARTITION
 
+#if CONFIG_NEW_QUANT && CONFIG_TX_SKIP
+static inline int is_rect_quant_used(const MB_MODE_INFO *mbmi,
+                                     int plane) {
+  return
+      mbmi->tx_skip[plane != 0] &&
+      ((plane == 0 && (mbmi->mode == V_PRED ||
+                       mbmi->mode == H_PRED ||
+                       mbmi->mode == TM_PRED)) ||
+       (plane != 0 && (mbmi->uv_mode == V_PRED ||
+                       mbmi->uv_mode == H_PRED ||
+                       mbmi->uv_mode == TM_PRED)));
+}
+#endif
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
index 2562a86684ed413c76b6fbca90d8736b42fbf57c..f142584be0bba31fafa36bf0ade109ff93c30c4d 100644 (file)
@@ -65,9 +65,14 @@ typedef struct {
 
 typedef struct VP9Common {
   struct vpx_internal_error_info  error;
-
   DECLARE_ALIGNED(16, int16_t, y_dequant[QINDEX_RANGE][8]);
   DECLARE_ALIGNED(16, int16_t, uv_dequant[QINDEX_RANGE][8]);
+#if CONFIG_NEW_QUANT
+  DECLARE_ALIGNED(16, dequant_val_type_nuq,
+                  y_dequant_val_nuq[QINDEX_RANGE][COEF_BANDS]);
+  DECLARE_ALIGNED(16, dequant_val_type_nuq,
+                  uv_dequant_val_nuq[QINDEX_RANGE][COEF_BANDS]);
+#endif  // CONFIG_NEW_QUANT
 
   vpx_color_space_t color_space;
 
index 564a3eb0ce3d255389301a3b829f663d225d4289..055ffe90851ffb3200b167a21d04b7f2eb6cec2e 100644 (file)
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
+#include <stdio.h>
+#include <math.h>
 #include "vp9/common/vp9_common.h"
+#include "vp9/common/vp9_entropy.h"
 #include "vp9/common/vp9_quant_common.h"
 #include "vp9/common/vp9_seg_common.h"
 
+#if CONFIG_NEW_QUANT
+// Bin widths expressed as a fraction over 128 of the quant stepsize,
+// for the quantization bins 0-4.
+// So a value x indicates the bin is actually factor x/128 of the
+// nominal quantization step.  For the zero bin, the width is only
+// for one side of zero, so the actual width is twice that.
+// There are four sets of values for 4 different quantizer ranges.
+//
+// TODO(debargha): Optimize these tables
+static const uint8_t vp9_nuq_knotes_tiny[COEF_BANDS][NUQ_KNOTES] = {
+  {84, 124, 128, 128, 128},  // dc, band 0
+  {84, 124, 128, 128, 128},  // band 1
+  {84, 124, 128, 128, 128},  // band 2
+  {86, 124, 128, 128, 128},  // band 3
+  {86, 124, 128, 128, 128},  // band 4
+  {86, 124, 128, 128, 128},  // band 5
+};
+static const uint8_t vp9_nuq_knotes_low[COEF_BANDS][NUQ_KNOTES] = {
+  {84, 124, 128, 128, 128},  // dc, band 0
+  {84, 124, 128, 128, 128},  // band 1
+  {84, 124, 128, 128, 128},  // band 2
+  {86, 124, 128, 128, 128},  // band 3
+  {86, 124, 128, 128, 128},  // band 4
+  {86, 124, 128, 128, 128},  // band 5
+};
+static const uint8_t vp9_nuq_knotes_mid[COEF_BANDS][NUQ_KNOTES] = {
+  {84, 124, 128, 128, 128},  // dc, band 0
+  {84, 124, 128, 128, 128},  // band 1
+  {84, 124, 128, 128, 128},  // band 2
+  {86, 124, 128, 128, 128},  // band 3
+  {86, 124, 128, 128, 128},  // band 4
+  {86, 124, 128, 128, 128},  // band 5
+};
+static const uint8_t vp9_nuq_knotes_high[COEF_BANDS][NUQ_KNOTES] = {
+  {84, 124, 128, 128, 128},  // dc, band 0
+  {84, 124, 128, 128, 128},  // band 1
+  {84, 124, 128, 128, 128},  // band 2
+  {86, 124, 128, 128, 128},  // band 3
+  {86, 124, 128, 128, 128},  // band 4
+  {86, 124, 128, 128, 128},  // band 5
+};
+static const uint8_t vp9_nuq_knotes_huge[COEF_BANDS][NUQ_KNOTES] = {
+  {84, 124, 128, 128, 128},  // dc, band 0
+  {84, 124, 128, 128, 128},  // band 1
+  {84, 124, 128, 128, 128},  // band 2
+  {86, 124, 128, 128, 128},  // band 3
+  {86, 124, 128, 128, 128},  // band 4
+  {86, 124, 128, 128, 128},  // band 5
+};
+
+static const uint8_t vp9_nuq_doff_tiny[COEF_BANDS] = { 8, 16, 17, 22, 23, 24 };
+static const uint8_t vp9_nuq_doff_low[COEF_BANDS] =  { 8, 16, 17, 22, 23, 24 };
+static const uint8_t vp9_nuq_doff_mid[COEF_BANDS] =  { 8, 16, 17, 22, 23, 24 };
+static const uint8_t vp9_nuq_doff_high[COEF_BANDS] = { 8, 16, 17, 22, 23, 24 };
+static const uint8_t vp9_nuq_doff_huge[COEF_BANDS] = { 8, 16, 17, 22, 23, 24 };
+
+// Allow different quantization profiles in different q ranges,
+// to enable entropy-constraints in scalar quantization.
+
+static const uint8_t *get_nuq_knotes(int16_t quant, int band, int bd) {
+  const int shift = bd - 8;
+  if (quant > (512 << shift))
+    return vp9_nuq_knotes_huge[band];
+  else if (quant > (256 << shift))
+    return vp9_nuq_knotes_high[band];
+  else if (quant > (128 << shift))
+    return vp9_nuq_knotes_mid[band];
+  else if (quant > (64 << shift))
+    return vp9_nuq_knotes_low[band];
+  else
+    return vp9_nuq_knotes_tiny[band];
+}
+
+static INLINE int16_t quant_to_doff_fixed(int16_t quant, int band, int bd) {
+  const int shift = bd - 8;
+  if (quant > (512 << shift))
+    return vp9_nuq_doff_huge[band];
+  else if (quant > (256 << shift))
+    return vp9_nuq_doff_high[band];
+  else if (quant > (128 << shift))
+    return vp9_nuq_doff_mid[band];
+  else if (quant > (64 << shift))
+    return vp9_nuq_doff_low[band];
+  else
+    return vp9_nuq_doff_tiny[band];
+}
+
+static INLINE void get_cumbins_nuq(int q, int band, int bd,
+                                   tran_low_t *cumbins) {
+  const uint8_t *knotes = get_nuq_knotes(q, band, bd);
+  int16_t cumknotes[NUQ_KNOTES];
+  int i;
+  cumknotes[0] = knotes[0];
+  for (i = 1; i < NUQ_KNOTES; ++i)
+    cumknotes[i] = cumknotes[i - 1] + knotes[i];
+  for (i = 0; i < NUQ_KNOTES; ++i)
+    cumbins[i] = (cumknotes[i] * q + 64) >> 7;
+}
+
+void vp9_get_dequant_val_nuq(int q, int band, int bd,
+                             tran_low_t *dq, tran_low_t *cumbins) {
+  const uint8_t *knotes = get_nuq_knotes(q, band, bd);
+  tran_low_t cumbins_[NUQ_KNOTES], *cumbins_ptr;
+  tran_low_t doff;
+  int i;
+  cumbins_ptr = (cumbins ? cumbins : cumbins_);
+  get_cumbins_nuq(q, band, bd, cumbins_ptr);
+  dq[0] = 0;
+  for (i = 1; i < NUQ_KNOTES; ++i) {
+    const int16_t qstep = (knotes[i] * q + 64) >> 7;
+    doff = quant_to_doff_fixed(qstep, band, bd);
+    doff = (2 * doff * qstep + q) / (2 * q);
+    dq[i] = cumbins_ptr[i - 1] + (((knotes[i] - doff * 2) * q + 128) >> 8);
+  }
+  doff = quant_to_doff_fixed(q, band, bd);
+  dq[NUQ_KNOTES] =
+      cumbins_ptr[NUQ_KNOTES - 1] + (((64 - doff) * q + 64) >> 7);
+}
+
+tran_low_t vp9_dequant_abscoeff_nuq(int v, int q, const tran_low_t *dq) {
+  if (v <= NUQ_KNOTES)
+    return dq[v];
+  else
+    return dq[NUQ_KNOTES] + (v - NUQ_KNOTES) * q;
+}
+
+tran_low_t vp9_dequant_coeff_nuq(int v, int q, const tran_low_t *dq) {
+  tran_low_t dqmag = vp9_dequant_abscoeff_nuq(abs(v), q, dq);
+  return (v < 0 ? -dqmag : dqmag);
+}
+#endif  // CONFIG_NEW_QUANT
+
 static const int16_t dc_qlookup[QINDEX_RANGE] = {
   4,       8,    8,    9,   10,   11,   12,   12,
   13,     14,   15,   16,   17,   18,   19,   19,
@@ -275,4 +410,3 @@ int vp9_get_qindex(const struct segmentation *seg, int segment_id,
     return base_qindex;
   }
 }
-
index af3bdb82f93265d49a9b1a89f8406bddda93be5f..ccbc0c964c203f18b14adddde308ba52cd1f8630 100644 (file)
@@ -11,6 +11,8 @@
 #ifndef VP9_COMMON_VP9_QUANT_COMMON_H_
 #define VP9_COMMON_VP9_QUANT_COMMON_H_
 
+#include <stdio.h>
+
 #include "vpx/vpx_codec.h"
 #include "vp9/common/vp9_seg_common.h"
 
@@ -34,6 +36,21 @@ int16_t vp9_ac_quant(int qindex, int delta, vpx_bit_depth_t bit_depth);
 int vp9_get_qindex(const struct segmentation *seg, int segment_id,
                    int base_qindex);
 
+static INLINE int16_t vp9_round_factor_to_round(int16_t quant,
+                                                int16_t round_factor) {
+  return (round_factor * quant) >> 7;
+}
+
+#if CONFIG_NEW_QUANT
+#define NUQ_KNOTES 5
+typedef tran_low_t dequant_val_type_nuq[NUQ_KNOTES + 1];
+typedef tran_low_t cumbins_type_nuq[NUQ_KNOTES];
+void vp9_get_dequant_val_nuq(int q, int band, int bd,
+                             tran_low_t *dq, tran_low_t *cumbins);
+tran_low_t vp9_dequant_abscoeff_nuq(int v, int q, const tran_low_t *dq);
+tran_low_t vp9_dequant_coeff_nuq(int v, int q, const tran_low_t *dq);
+#endif  // CONFIG_NEW_QUANT
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
index 4ae64847d2d27d906448f53d9a721e3ff69aa9df..c9648d968c98b440782c141cc007eabbb112b456 100644 (file)
@@ -7,6 +7,7 @@ print <<EOF
 #include "vpx/vpx_integer.h"
 #include "vp9/common/vp9_common.h"
 #include "vp9/common/vp9_enums.h"
+#include "vp9/common/vp9_quant_common.h"
 
 struct macroblockd;
 
@@ -1402,6 +1403,27 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
   }
 }
 
+if (vpx_config("CONFIG_NEW_QUANT") eq "yes") {
+  add_proto qw/void vp9_quantize_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, const int16_t *dequant_ptr, const cumbins_type_nuq *cumbins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
+  specialize qw/vp9_quantize_nuq/;
+
+  add_proto qw/void vp9_quantize_fp_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *dequant_ptr, const cumbins_type_nuq *cumbins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
+  specialize qw/vp9_quantize_fp_nuq/;
+
+  add_proto qw/void vp9_quantize_32x32_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, const int16_t *dequant_ptr, const cumbins_type_nuq *cumbins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
+  specialize qw/vp9_quantize_32x32_nuq/;
+
+  add_proto qw/void vp9_quantize_32x32_fp_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *dequant_ptr, const cumbins_type_nuq *cumbins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
+  specialize qw/vp9_quantize_32x32_fp_nuq/;
+
+  if (vpx_config("CONFIG_TX64X64") eq "yes") {
+    add_proto qw/void vp9_quantize_64x64_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, const int16_t *dequant_ptr, const cumbins_type_nuq *cumbins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
+    specialize qw/vp9_quantize_64x64_nuq/;
+
+    add_proto qw/void vp9_quantize_64x64_fp_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *dequant_ptr, const cumbins_type_nuq *cumbins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
+    specialize qw/vp9_quantize_64x64_fp_nuq/;
+  }
+}
 #
 # Structured Similarity (SSIM)
 #
@@ -2131,6 +2153,28 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
 
     add_proto qw/void vp9_highbd_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
     specialize qw/vp9_highbd_quantize_b_64x64/;
+
+    if (vpx_config("CONFIG_NEW_QUANT") eq "yes") {
+      add_proto qw/void vp9_highbd_quantize_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, const int16_t *dequant_ptr, const cumbins_type_nuq *cumbins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
+      specialize qw/vp9_highbd_quantize_nuq/;
+
+      add_proto qw/void vp9_highbd_quantize_fp_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *dequant_ptr, const cumbins_type_nuq *cumbins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
+      specialize qw/vp9_highbd_quantize_fp_nuq/;
+
+      add_proto qw/void vp9_highbd_quantize_32x32_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, const int16_t *dequant_ptr, const cumbins_type_nuq *cumbins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
+      specialize qw/vp9_highbd_quantize_32x32_nuq/;
+
+      add_proto qw/void vp9_highbd_quantize_32x32_fp_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *dequant_ptr, const cumbins_type_nuq *cumbins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
+      specialize qw/vp9_highbd_quantize_32x32_fp_nuq/;
+
+      if (vpx_config("CONFIG_TX64X64") eq "yes") {
+        add_proto qw/void vp9_highbd_quantize_64x64_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, const int16_t *dequant_ptr, const cumbins_type_nuq *cumbins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
+        specialize qw/vp9_highbd_quantize_64x64_nuq/;
+
+        add_proto qw/void vp9_highbd_quantize_64x64_fp_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *dequant_ptr, const cumbins_type_nuq *cumbins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
+        specialize qw/vp9_highbd_quantize_64x64_fp_nuq/;
+      }
+    }
   }
 
   #
index 1b9edc603d6abeb6c1cbd5a700f13805e851f243..7dc9708ce354917b38533de6696a2aab151a7487 100644 (file)
@@ -197,9 +197,18 @@ static void read_mv_probs(nmv_context *ctx, int allow_hp, vp9_reader *r) {
 static void setup_plane_dequants(VP9_COMMON *cm, MACROBLOCKD *xd, int q_index) {
   int i;
   xd->plane[0].dequant = cm->y_dequant[q_index];
+#if CONFIG_NEW_QUANT
+  xd->plane[0].dequant_val_nuq =
+      (const dequant_val_type_nuq *)cm->y_dequant_val_nuq[q_index];
+#endif  // CONFIG_NEW_QUANT
 
-  for (i = 1; i < MAX_MB_PLANE; i++)
+  for (i = 1; i < MAX_MB_PLANE; i++) {
     xd->plane[i].dequant = cm->uv_dequant[q_index];
+#if CONFIG_NEW_QUANT
+    xd->plane[i].dequant_val_nuq =
+        (const dequant_val_type_nuq *)cm->uv_dequant_val_nuq[q_index];
+#endif  // CONFIG_NEW_QUANT
+  }
 }
 
 #if CONFIG_TX_SKIP
@@ -2520,11 +2529,24 @@ void vp9_init_dequantizer(VP9_COMMON *cm) {
   int q;
 
   for (q = 0; q < QINDEX_RANGE; q++) {
+    int b;
     cm->y_dequant[q][0] = vp9_dc_quant(q, cm->y_dc_delta_q, cm->bit_depth);
     cm->y_dequant[q][1] = vp9_ac_quant(q, 0, cm->bit_depth);
 
     cm->uv_dequant[q][0] = vp9_dc_quant(q, cm->uv_dc_delta_q, cm->bit_depth);
     cm->uv_dequant[q][1] = vp9_ac_quant(q, cm->uv_ac_delta_q, cm->bit_depth);
+
+#if CONFIG_NEW_QUANT
+    for (b = 0; b < COEF_BANDS; ++b) {
+      vp9_get_dequant_val_nuq(
+          cm->y_dequant[q][b != 0], b, cm->bit_depth,
+          cm->y_dequant_val_nuq[q][b], NULL);
+      vp9_get_dequant_val_nuq(
+          cm->uv_dequant[q][b != 0], b, cm->bit_depth,
+          cm->uv_dequant_val_nuq[q][b], NULL);
+    }
+#endif  // CONFIG_NEW_QUANT
+    (void) b;
   }
 }
 
index c8202347954628694e94f832bf5b803eeaee3e9a..d14cd33c880d1a4c2d36488dc7cf066a2a1cdf97 100644 (file)
@@ -54,7 +54,11 @@ static const vp9_tree_index coeff_subtree_high[TREE_SIZE(ENTROPY_TOKENS)] = {
 };
 
 static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd, PLANE_TYPE type,
-                        tran_low_t *dqcoeff, TX_SIZE tx_size, const int16_t *dq,
+                        tran_low_t *dqcoeff, TX_SIZE tx_size,
+                        const int16_t *dq,
+#if CONFIG_NEW_QUANT
+                        const dequant_val_type_nuq *dq_val,
+#endif  // CONFIG_NEW_QUANT
                         int ctx, const int16_t *scan, const int16_t *nb,
                         vp9_reader *r) {
   const int max_eob = 16 << (tx_size << 1);
@@ -74,6 +78,12 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd, PLANE_TYPE type,
   const int dq_shift = (tx_size > TX_16X16) ? tx_size - TX_16X16 : 0;
   int v, token;
   int16_t dqv = dq[0];
+#if CONFIG_NEW_QUANT
+#if CONFIG_TX_SKIP
+  const int use_rect_quant = is_rect_quant_used(&xd->mi[0].src_mi->mbmi, type);
+#endif
+  const tran_low_t *dqv_val = &dq_val[0][0];
+#endif  // CONFIG_NEW_QUANT
   const uint8_t *cat1_prob;
   const uint8_t *cat2_prob;
   const uint8_t *cat3_prob;
@@ -125,6 +135,9 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd, PLANE_TYPE type,
       INCREMENT_COUNT(EOB_MODEL_TOKEN);
       break;
     }
+#if CONFIG_NEW_QUANT
+    dqv_val = &dq_val[band][0];
+#endif  // CONFIG_NEW_QUANT
 
     while (!vp9_read(r, prob[ZERO_CONTEXT_NODE])) {
       INCREMENT_COUNT(ZERO_TOKEN);
@@ -136,6 +149,9 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd, PLANE_TYPE type,
       ctx = get_coef_context(nb, token_cache, c);
       band = *band_translate++;
       prob = coef_probs[band][ctx];
+#if CONFIG_NEW_QUANT
+      dqv_val = &dq_val[band][0];
+#endif  // CONFIG_NEW_QUANT
     }
 
     if (!vp9_read(r, prob[ONE_CONTEXT_NODE])) {
@@ -191,7 +207,22 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd, PLANE_TYPE type,
           break;
       }
     }
+#if CONFIG_NEW_QUANT
+#if CONFIG_TX_SKIP
+    if (use_rect_quant) {
+      v = (val * dqv) >> dq_shift;
+    } else {
+      v = vp9_dequant_abscoeff_nuq(val, dqv, dqv_val);
+      v = dq_shift ? ROUND_POWER_OF_TWO(v, dq_shift) : v;
+    }
+#else
+    v = vp9_dequant_abscoeff_nuq(val, dqv, dqv_val);
+    v = dq_shift ? ROUND_POWER_OF_TWO(v, dq_shift) : v;
+#endif  // CONFIG_TX_SKIP
+#else   // CONFIG_NEW_QUANT
     v = (val * dqv) >> dq_shift;
+#endif  // CONFIG_NEW_QUANT
+
 #if CONFIG_COEFFICIENT_RANGE_CHECKING
     dqcoeff[scan[c]] = check_range(vp9_read_bit(r) ? -v : v);
 #else
@@ -213,9 +244,15 @@ int vp9_decode_block_tokens(VP9_COMMON *cm, MACROBLOCKD *xd,
   const int ctx = get_entropy_context(tx_size, pd->above_context + x,
                                                pd->left_context + y);
   const scan_order *so = get_scan(xd, tx_size, pd->plane_type, block);
-  const int eob = decode_coefs(cm, xd, pd->plane_type,
-                               BLOCK_OFFSET(pd->dqcoeff, block), tx_size,
-                               pd->dequant, ctx, so->scan, so->neighbors, r);
+  int eob;
+  eob = decode_coefs(cm, xd, pd->plane_type,
+                     BLOCK_OFFSET(pd->dqcoeff, block), tx_size,
+                     pd->dequant,
+#if CONFIG_NEW_QUANT
+                     pd->dequant_val_nuq,
+#endif
+                     ctx, so->scan,
+                     so->neighbors, r);
 #if CONFIG_TX64X64
   if (plane > 0) assert(tx_size != TX_64X64);
 #endif
index c5d30b3c1ceecfd84f847231f810f6020749029a..f931cab568abe29de6ca3e87b62a8e88063acd14 100644 (file)
@@ -38,6 +38,10 @@ struct macroblock_plane {
   int16_t *quant_shift;
   int16_t *zbin;
   int16_t *round;
+#if CONFIG_NEW_QUANT
+  dequant_val_type_nuq *dequant_val_nuq;
+  cumbins_type_nuq *cumbins_nuq;
+#endif
 
   int64_t quant_thred[2];
 };
index 4f14490aa81ed9fe6e4c35366eacf4f291145e9c..3742f729285ef84ea226e4fd8d840e5de50d3a7b 100644 (file)
@@ -144,8 +144,15 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block,
   const int eob = p->eobs[block];
   const PLANE_TYPE type = pd->plane_type;
   const int default_eob = 16 << (tx_size << 1);
-  const int mul = 1 << (tx_size >= TX_32X32 ? tx_size - TX_16X16 : 0);
+  const int shift = (tx_size >= TX_32X32 ? tx_size - TX_16X16 : 0);
+  const int mul = 1 << shift;
   const int16_t *dequant_ptr = pd->dequant;
+#if CONFIG_NEW_QUANT
+#if CONFIG_TX_SKIP
+  const int use_rect_quant = is_rect_quant_used(&xd->mi[0].src_mi->mbmi, plane);
+#endif
+  const dequant_val_type_nuq *dequant_val = pd->dequant_val_nuq;
+#endif  // CONFIG_NEW_QUANT
   const uint8_t *const band_translate = get_band_translate(tx_size);
   const scan_order *const so = get_scan(xd, tx_size, type, block);
   const int16_t *const scan = so->scan;
@@ -236,12 +243,34 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block,
       rate0 = tokens[next][0].rate;
       rate1 = tokens[next][1].rate;
 
-      if ((abs(x) * dequant_ptr[rc != 0] > abs(coeff[rc]) * mul) &&
-          (abs(x) * dequant_ptr[rc != 0] < abs(coeff[rc]) * mul +
-                                               dequant_ptr[rc != 0]))
-        shortcut = 1;
-      else
-        shortcut = 0;
+#if CONFIG_NEW_QUANT
+#if CONFIG_TX_SKIP
+      if (use_rect_quant) {
+        shortcut =
+            ((abs(x) * dequant_ptr[rc != 0] > abs(coeff[rc]) * mul) &&
+             ((abs(x) - 1) * dequant_ptr[rc != 0] < abs(coeff[rc]) * mul));
+      } else {
+        shortcut = (
+            (vp9_dequant_abscoeff_nuq(
+                abs(x), dequant_ptr[rc != 0],
+                dequant_val[band_translate[i]]) > abs(coeff[rc]) * mul) &&
+            (vp9_dequant_abscoeff_nuq(
+                abs(x) - 1, dequant_ptr[rc != 0],
+                dequant_val[band_translate[i]]) < abs(coeff[rc]) * mul));
+      }
+#else   // CONFIG_TX_SKIP
+      shortcut = (
+          (vp9_dequant_abscoeff_nuq(
+              abs(x), dequant_ptr[rc != 0],
+              dequant_val[band_translate[i]]) > abs(coeff[rc]) * mul) &&
+          (vp9_dequant_abscoeff_nuq(
+              abs(x) - 1, dequant_ptr[rc != 0],
+              dequant_val[band_translate[i]]) < abs(coeff[rc]) * mul));
+#endif  // CONFIG_TX_SKIP
+#else   // CONFIG_NEW_QUANT
+      shortcut = ((abs(x) * dequant_ptr[rc != 0] > abs(coeff[rc]) * mul) &&
+                  ((abs(x) - 1) * dequant_ptr[rc != 0] < abs(coeff[rc]) * mul));
+#endif  // CONFIG_NEW_QUANT
 
       if (shortcut) {
         sz = -(x < 0);
@@ -278,6 +307,39 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block,
       base_bits = dct_value_cost[x];
 
       if (shortcut) {
+#if CONFIG_NEW_QUANT
+#if CONFIG_TX_SKIP
+        if (use_rect_quant) {
+#if CONFIG_VP9_HIGHBITDEPTH
+          if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+            dx -= ((dequant_ptr[rc != 0] >> (xd->bd - 8)) + sz) ^ sz;
+          } else {
+            dx -= (dequant_ptr[rc != 0] + sz) ^ sz;
+          }
+#else
+          dx -= (dequant_ptr[rc != 0] + sz) ^ sz;
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+        } else {
+          dx = vp9_dequant_coeff_nuq(
+              x, dequant_ptr[rc != 0],
+              dequant_val[band_translate[i]]) - coeff[rc] * mul;
+#if CONFIG_VP9_HIGHBITDEPTH
+          if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+            dx >>= xd->bd - 8;
+          }
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+        }
+#else   // CONFIG_TX_SKIP
+        dx = vp9_dequant_coeff_nuq(
+            x, dequant_ptr[rc != 0],
+            dequant_val[band_translate[i]]) - coeff[rc] * mul;
+#if CONFIG_VP9_HIGHBITDEPTH
+        if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+          dx >>= xd->bd - 8;
+        }
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+#endif  // CONFIG_TX_SKIP
+#else   // CONFIG_NEW_QUANT
 #if CONFIG_VP9_HIGHBITDEPTH
         if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
           dx -= ((dequant_ptr[rc != 0] >> (xd->bd - 8)) + sz) ^ sz;
@@ -287,6 +349,7 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block,
 #else
         dx -= (dequant_ptr[rc != 0] + sz) ^ sz;
 #endif  // CONFIG_VP9_HIGHBITDEPTH
+#endif  // CONFIG_NEW_QUANT
         d2 = dx * dx;
       }
       tokens[i][1].rate = base_bits + (best ? rate1 : rate0);
@@ -341,10 +404,26 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block,
     if (x) {
       final_eob = i;
     }
-
     qcoeff[rc] = x;
+#if CONFIG_NEW_QUANT
+#if CONFIG_TX_SKIP
+    if (use_rect_quant) {
+      dqcoeff[rc] = (x * dequant_ptr[rc != 0]) / mul;
+    } else {
+      dqcoeff[rc] = vp9_dequant_abscoeff_nuq(abs(x), dequant_ptr[rc != 0],
+                                             dequant_val[band_translate[i]]);
+      if (shift) dqcoeff[rc] = ROUND_POWER_OF_TWO(dqcoeff[rc], shift);
+      if (x < 0) dqcoeff[rc] = -dqcoeff[rc];
+    }
+#else   // CONFIG_TX_SKIP
+    dqcoeff[rc] = vp9_dequant_abscoeff_nuq(abs(x), dequant_ptr[rc != 0],
+                                           dequant_val[band_translate[i]]);
+    if (shift) dqcoeff[rc] = ROUND_POWER_OF_TWO(dqcoeff[rc], shift);
+    if (x < 0) dqcoeff[rc] = -dqcoeff[rc];
+#endif  // CONFIG_TX_SKIP
+#else
     dqcoeff[rc] = (x * dequant_ptr[rc != 0]) / mul;
-
+#endif  // CONFIG_NEW_QUANT
     next = tokens[i][best].next;
     best = best_index[i][best];
   }
@@ -518,92 +597,925 @@ static void forw_tx4x4(MACROBLOCK *x, int plane, int block,
   }
 }
 
-#if CONFIG_VP9_HIGHBITDEPTH
-static void highbd_forw_tx16x16(MACROBLOCK *x, int plane,
-                                const int16_t *src_diff, int diff_stride,
-                                tran_low_t *const coeff) {
+#if CONFIG_VP9_HIGHBITDEPTH
+static void highbd_forw_tx16x16(MACROBLOCK *x, int plane,
+                                const int16_t *src_diff, int diff_stride,
+                                tran_low_t *const coeff) {
+  MACROBLOCKD *const xd = &x->e_mbd;
+  int16_t src_diff2[256];
+  TX_TYPE tx_type = get_tx_type(plane, xd);
+  if (tx_type == DCT_DCT) {
+    vp9_highbd_fdct16x16(src_diff, coeff, diff_stride);
+  } else if (tx_type == FLIPADST_DCT) {
+    copy_flipud(src_diff, diff_stride, 16, src_diff2, 16);
+    vp9_highbd_fht16x16(src_diff2, coeff, 16, ADST_DCT);
+  } else if (tx_type == DCT_FLIPADST) {
+    copy_fliplr(src_diff, diff_stride, 16, src_diff2, 16);
+    vp9_highbd_fht16x16(src_diff2, coeff, 16, DCT_ADST);
+  } else if (tx_type == FLIPADST_FLIPADST) {
+    copy_fliplrud(src_diff, diff_stride, 16, src_diff2, 16);
+    vp9_highbd_fht16x16(src_diff2, coeff, 16, ADST_ADST);
+  } else if (tx_type == ADST_FLIPADST) {
+    copy_fliplr(src_diff, diff_stride, 16, src_diff2, 16);
+    vp9_highbd_fht16x16(src_diff2, coeff, 16, ADST_ADST);
+  } else if (tx_type == FLIPADST_ADST) {
+    copy_flipud(src_diff, diff_stride, 16, src_diff2, 16);
+    vp9_highbd_fht16x16(src_diff2, coeff, 16, ADST_ADST);
+  } else {
+    vp9_highbd_fht16x16(src_diff, coeff, diff_stride, tx_type);
+  }
+}
+
+static void highbd_forw_tx8x8(MACROBLOCK *x, int plane,
+                              const int16_t *src_diff, int diff_stride,
+                              tran_low_t *const coeff) {
+  MACROBLOCKD *const xd = &x->e_mbd;
+  int16_t src_diff2[64];
+  TX_TYPE tx_type = get_tx_type(plane, xd);
+  if (tx_type == DCT_DCT) {
+    vp9_highbd_fdct8x8(src_diff, coeff, diff_stride);
+  } else if (tx_type == FLIPADST_DCT) {
+    copy_flipud(src_diff, diff_stride, 8, src_diff2, 8);
+    vp9_highbd_fht8x8(src_diff2, coeff, 8, ADST_DCT);
+  } else if (tx_type == DCT_FLIPADST) {
+    copy_fliplr(src_diff, diff_stride, 8, src_diff2, 8);
+    vp9_highbd_fht8x8(src_diff2, coeff, 8, DCT_ADST);
+  } else if (tx_type == FLIPADST_FLIPADST) {
+    copy_fliplrud(src_diff, diff_stride, 8, src_diff2, 8);
+    vp9_highbd_fht8x8(src_diff2, coeff, 8, ADST_ADST);
+  } else if (tx_type == ADST_FLIPADST) {
+    copy_fliplr(src_diff, diff_stride, 8, src_diff2, 8);
+    vp9_highbd_fht8x8(src_diff2, coeff, 8, ADST_ADST);
+  } else if (tx_type == FLIPADST_ADST) {
+    copy_flipud(src_diff, diff_stride, 8, src_diff2, 8);
+    vp9_highbd_fht8x8(src_diff2, coeff, 8, ADST_ADST);
+  } else {
+    vp9_highbd_fht8x8(src_diff, coeff, diff_stride, tx_type);
+  }
+}
+
+static void highbd_forw_tx4x4(MACROBLOCK *x, int plane, int block,
+                              const int16_t *src_diff, int diff_stride,
+                              tran_low_t *const coeff) {
+  MACROBLOCKD *const xd = &x->e_mbd;
+  int16_t src_diff2[16];
+  TX_TYPE tx_type = get_tx_type_4x4(plane, xd, block);
+  if (tx_type == DCT_DCT) {
+    x->fwd_txm4x4(src_diff, coeff, diff_stride);
+  } else if (tx_type == FLIPADST_DCT) {
+    copy_flipud(src_diff, diff_stride, 4, src_diff2, 4);
+    vp9_highbd_fht4x4(src_diff2, coeff, 4, ADST_DCT);
+  } else if (tx_type == DCT_FLIPADST) {
+    copy_fliplr(src_diff, diff_stride, 4, src_diff2, 4);
+    vp9_highbd_fht4x4(src_diff2, coeff, 4, DCT_ADST);
+  } else if (tx_type == FLIPADST_FLIPADST) {
+    copy_fliplrud(src_diff, diff_stride, 4, src_diff2, 4);
+    vp9_highbd_fht4x4(src_diff2, coeff, 4, ADST_ADST);
+  } else if (tx_type == ADST_FLIPADST) {
+    copy_fliplr(src_diff, diff_stride, 4, src_diff2, 4);
+    vp9_highbd_fht4x4(src_diff2, coeff, 4, ADST_ADST);
+  } else if (tx_type == FLIPADST_ADST) {
+    copy_flipud(src_diff, diff_stride, 4, src_diff2, 4);
+    vp9_highbd_fht4x4(src_diff2, coeff, 4, ADST_ADST);
+  } else {
+    vp9_highbd_fht4x4(src_diff, coeff, diff_stride, tx_type);
+  }
+}
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+#endif  // CONFIG_EXT_TX
+
+#if CONFIG_NEW_QUANT
+void vp9_xform_quant_nuq(MACROBLOCK *x, int plane, int block,
+                         BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
+  MACROBLOCKD *const xd = &x->e_mbd;
+  const struct macroblock_plane *const p = &x->plane[plane];
+  const struct macroblockd_plane *const pd = &xd->plane[plane];
+  const scan_order *const scan_order = &vp9_default_scan_orders[tx_size];
+  tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
+  tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
+  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
+  uint16_t *const eob = &p->eobs[block];
+  const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
+  int i, j;
+  const int16_t *src_diff;
+#if CONFIG_TX_SKIP
+  MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi;
+  int shift = mbmi->tx_skip_shift;
+#endif
+  const uint8_t* band = get_band_translate(tx_size);
+
+  txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
+  src_diff = &p->src_diff[4 * (j * diff_stride + i)];
+
+#if CONFIG_TX_SKIP
+  if (mbmi->tx_skip[plane != 0]) {
+    switch (tx_size) {
+#if CONFIG_TX64X64
+      case TX_64X64:
+        vp9_tx_identity(src_diff, coeff, diff_stride, 64, shift);
+        vp9_quantize_64x64_nuq(coeff, 4096, x->skip_block,
+                               p->quant, p->quant_shift, pd->dequant,
+                               (const cumbins_type_nuq *)p->cumbins_nuq,
+                               (const dequant_val_type_nuq *)
+                                   pd->dequant_val_nuq,
+                               qcoeff, dqcoeff, eob,
+                               scan_order->scan, band);
+        break;
+#endif  // CONFIG_TX64X64
+      case TX_32X32:
+        vp9_tx_identity(src_diff, coeff, diff_stride, 32, shift);
+        vp9_quantize_32x32_nuq(coeff, 1024, x->skip_block,
+                               p->quant, p->quant_shift, pd->dequant,
+                               (const cumbins_type_nuq *)p->cumbins_nuq,
+                               (const dequant_val_type_nuq *)
+                                   pd->dequant_val_nuq,
+                               qcoeff, dqcoeff, eob,
+                               scan_order->scan, band);
+        break;
+      case TX_16X16:
+        vp9_tx_identity(src_diff, coeff, diff_stride, 16, shift);
+        vp9_quantize_nuq(coeff, 256, x->skip_block,
+                         p->quant, p->quant_shift, pd->dequant,
+                         (const cumbins_type_nuq *)p->cumbins_nuq,
+                         (const dequant_val_type_nuq *)pd->dequant_val_nuq,
+                         qcoeff, dqcoeff, eob,
+                         scan_order->scan, band);
+        break;
+      case TX_8X8:
+        vp9_tx_identity(src_diff, coeff, diff_stride, 8, shift);
+        vp9_quantize_nuq(coeff, 64, x->skip_block,
+                         p->quant, p->quant_shift, pd->dequant,
+                         (const cumbins_type_nuq *)p->cumbins_nuq,
+                         (const dequant_val_type_nuq *)pd->dequant_val_nuq,
+                         qcoeff, dqcoeff, eob,
+                         scan_order->scan, band);
+        break;
+      case TX_4X4:
+        vp9_tx_identity(src_diff, coeff, diff_stride, 4, shift);
+        vp9_quantize_nuq(coeff, 16, x->skip_block,
+                         p->quant, p->quant_shift, pd->dequant,
+                         (const cumbins_type_nuq *)p->cumbins_nuq,
+                         (const dequant_val_type_nuq *) pd->dequant_val_nuq,
+                         qcoeff, dqcoeff, eob,
+                         scan_order->scan, band);
+      break;
+      default:
+        assert(0);
+        break;
+    }
+    return;
+  }
+#endif  // CONFIG_TX_SKIP
+
+#if CONFIG_VP9_HIGHBITDEPTH
+  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+    switch (tx_size) {
+#if CONFIG_TX64X64
+      case TX_64X64:
+        vp9_highbd_fdct64x64(src_diff, coeff, diff_stride);
+        vp9_highbd_quantize_64x64_nuq(coeff, 4096, x->skip_block,
+                                      p->quant, p->quant_shift, pd->dequant,
+                                      (const cumbins_type_nuq *)p->cumbins_nuq,
+                                      (const dequant_val_type_nuq *)
+                                          pd->dequant_val_nuq,
+                                      qcoeff, dqcoeff, eob,
+                                      scan_order->scan, band);
+        break;
+#endif  // CONFIG_TX64X64
+      case TX_32X32:
+        highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
+        vp9_highbd_quantize_32x32_nuq(coeff, 1024, x->skip_block,
+                                      p->quant, p->quant_shift, pd->dequant,
+                                      (const cumbins_type_nuq *)p->cumbins_nuq,
+                                      (const dequant_val_type_nuq *)
+                                          pd->dequant_val_nuq,
+                                      qcoeff, dqcoeff, eob,
+                                      scan_order->scan, band);
+        break;
+      case TX_16X16:
+#if CONFIG_EXT_TX
+        highbd_forw_tx16x16(x, plane, src_diff, diff_stride, coeff);
+#else
+        vp9_highbd_fdct16x16(src_diff, coeff, diff_stride);
+#endif
+        vp9_highbd_quantize_nuq(coeff, 256, x->skip_block,
+                                p->quant, p->quant_shift, pd->dequant,
+                                (const cumbins_type_nuq *)p->cumbins_nuq,
+                                (const dequant_val_type_nuq *)
+                                    pd->dequant_val_nuq,
+                                qcoeff, dqcoeff, eob,
+                                scan_order->scan, band);
+        break;
+      case TX_8X8:
+#if CONFIG_EXT_TX
+        highbd_forw_tx8x8(x, plane, src_diff, diff_stride, coeff);
+#else
+        vp9_highbd_fdct8x8(src_diff, coeff, diff_stride);
+#endif
+        vp9_highbd_quantize_nuq(coeff, 64, x->skip_block,
+                                p->quant, p->quant_shift, pd->dequant,
+                                (const cumbins_type_nuq *)p->cumbins_nuq,
+                                (const dequant_val_type_nuq *)
+                                    pd->dequant_val_nuq,
+                                qcoeff, dqcoeff, eob,
+                                scan_order->scan, band);
+        break;
+      case TX_4X4:
+#if CONFIG_EXT_TX
+        highbd_forw_tx4x4(x, plane, block, src_diff, diff_stride, coeff);
+#else
+        x->fwd_txm4x4(src_diff, coeff, diff_stride);
+#endif
+        vp9_highbd_quantize_nuq(coeff, 16, x->skip_block,
+                                p->quant, p->quant_shift, pd->dequant,
+                                (const cumbins_type_nuq *)p->cumbins_nuq,
+                                (const dequant_val_type_nuq *)
+                                    pd->dequant_val_nuq,
+                                qcoeff, dqcoeff, eob,
+                                scan_order->scan, band);
+        break;
+      default:
+        assert(0);
+    }
+    return;
+  }
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+  switch (tx_size) {
+#if CONFIG_TX64X64
+    case TX_64X64:
+      vp9_fdct64x64(src_diff, coeff, diff_stride);
+      vp9_quantize_64x64_nuq(coeff, 4096, x->skip_block,
+                             p->quant, p->quant_shift, pd->dequant,
+                             (const cumbins_type_nuq *)p->cumbins_nuq,
+                             (const dequant_val_type_nuq *)pd->dequant_val_nuq,
+                             qcoeff, dqcoeff, eob,
+                             scan_order->scan, band);
+      break;
+#endif  // CONFIG_TX64X64
+    case TX_32X32:
+      fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
+      vp9_quantize_32x32_nuq(coeff, 1024, x->skip_block,
+                             p->quant, p->quant_shift, pd->dequant,
+                             (const cumbins_type_nuq *)p->cumbins_nuq,
+                             (const dequant_val_type_nuq *)pd->dequant_val_nuq,
+                             qcoeff, dqcoeff, eob,
+                             scan_order->scan, band);
+      break;
+    case TX_16X16:
+#if CONFIG_EXT_TX
+      forw_tx16x16(x, plane, src_diff, diff_stride, coeff);
+#else
+      vp9_fdct16x16(src_diff, coeff, diff_stride);
+#endif
+      vp9_quantize_nuq(coeff, 256, x->skip_block,
+                       p->quant, p->quant_shift, pd->dequant,
+                       (const cumbins_type_nuq *)p->cumbins_nuq,
+                       (const dequant_val_type_nuq *)pd->dequant_val_nuq,
+                       qcoeff, dqcoeff, eob,
+                       scan_order->scan, band);
+      break;
+    case TX_8X8:
+#if CONFIG_EXT_TX
+      forw_tx8x8(x, plane, src_diff, diff_stride, coeff);
+#else
+      vp9_fdct8x8(src_diff, coeff, diff_stride);
+#endif
+      vp9_quantize_nuq(coeff, 64, x->skip_block,
+                       p->quant, p->quant_shift, pd->dequant,
+                       (const cumbins_type_nuq *)p->cumbins_nuq,
+                       (const dequant_val_type_nuq *)pd->dequant_val_nuq,
+                       qcoeff, dqcoeff, eob,
+                       scan_order->scan, band);
+      break;
+    case TX_4X4:
+#if CONFIG_EXT_TX
+      forw_tx4x4(x, plane, block, src_diff, diff_stride, coeff);
+#else
+      x->fwd_txm4x4(src_diff, coeff, diff_stride);
+#endif
+      vp9_quantize_nuq(coeff, 16, x->skip_block,
+                       p->quant, p->quant_shift, pd->dequant,
+                       (const cumbins_type_nuq *)p->cumbins_nuq,
+                       (const dequant_val_type_nuq *)pd->dequant_val_nuq,
+                       qcoeff, dqcoeff, eob,
+                       scan_order->scan, band);
+      break;
+    default:
+      assert(0);
+      break;
+  }
+}
+
+void vp9_xform_quant_fp_nuq(MACROBLOCK *x, int plane, int block,
+                            BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
+  MACROBLOCKD *const xd = &x->e_mbd;
+  const struct macroblock_plane *const p = &x->plane[plane];
+  const struct macroblockd_plane *const pd = &xd->plane[plane];
+  const scan_order *const scan_order = &vp9_default_scan_orders[tx_size];
+  tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
+  tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
+  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
+  uint16_t *const eob = &p->eobs[block];
+  const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
+  int i, j;
+  const int16_t *src_diff;
+#if CONFIG_TX_SKIP
+  MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi;
+  int shift = mbmi->tx_skip_shift;
+#endif
+  const uint8_t* band = get_band_translate(tx_size);
+
+  txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
+  src_diff = &p->src_diff[4 * (j * diff_stride + i)];
+
+#if CONFIG_TX_SKIP
+  if (mbmi->tx_skip[plane != 0]) {
+    switch (tx_size) {
+#if CONFIG_TX64X64
+      case TX_64X64:
+        vp9_tx_identity(src_diff, coeff, diff_stride, 64, shift);
+        vp9_quantize_64x64_fp_nuq(coeff, 4096, x->skip_block,
+                                  p->quant_fp, pd->dequant,
+                                  (const cumbins_type_nuq *)p->cumbins_nuq,
+                                  (const dequant_val_type_nuq *)
+                                      pd->dequant_val_nuq,
+                                  qcoeff, dqcoeff, eob,
+                                  scan_order->scan, band);
+        break;
+#endif  // CONFIG_TX64X64
+      case TX_32X32:
+        vp9_tx_identity(src_diff, coeff, diff_stride, 32, shift);
+        vp9_quantize_32x32_fp_nuq(coeff, 1024, x->skip_block,
+                                  p->quant_fp, pd->dequant,
+                                  (const cumbins_type_nuq *)p->cumbins_nuq,
+                                  (const dequant_val_type_nuq *)
+                                      pd->dequant_val_nuq,
+                                  qcoeff, dqcoeff, eob,
+                                  scan_order->scan, band);
+        break;
+      case TX_16X16:
+        vp9_tx_identity(src_diff, coeff, diff_stride, 16, shift);
+        vp9_quantize_fp_nuq(coeff, 256, x->skip_block,
+                            p->quant_fp, pd->dequant,
+                            (const cumbins_type_nuq *)p->cumbins_nuq,
+                            (const dequant_val_type_nuq *)pd->dequant_val_nuq,
+                            qcoeff, dqcoeff, eob,
+                            scan_order->scan, band);
+        break;
+      case TX_8X8:
+        vp9_tx_identity(src_diff, coeff, diff_stride, 8, shift);
+        vp9_quantize_fp_nuq(coeff, 64, x->skip_block,
+                            p->quant_fp, pd->dequant,
+                            (const cumbins_type_nuq *)p->cumbins_nuq,
+                            (const dequant_val_type_nuq *)pd->dequant_val_nuq,
+                            qcoeff, dqcoeff, eob,
+                            scan_order->scan, band);
+        break;
+      case TX_4X4:
+        vp9_tx_identity(src_diff, coeff, diff_stride, 4, shift);
+        vp9_quantize_fp_nuq(coeff, 16, x->skip_block,
+                            p->quant_fp, pd->dequant,
+                            (const cumbins_type_nuq *)p->cumbins_nuq,
+                            (const dequant_val_type_nuq *)pd->dequant_val_nuq,
+                            qcoeff, dqcoeff, eob,
+                            scan_order->scan, band);
+        break;
+      default:
+        assert(0);
+        break;
+    }
+    return;
+  }
+#endif  // CONFIG_TX_SKIP
+
+#if CONFIG_VP9_HIGHBITDEPTH
+  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+    switch (tx_size) {
+#if CONFIG_TX64X64
+      case TX_64X64:
+        vp9_highbd_fdct64x64(src_diff, coeff, diff_stride);
+        vp9_highbd_quantize_64x64_fp_nuq(coeff, 4096, x->skip_block,
+                                         p->quant_fp, pd->dequant,
+                                         (const cumbins_type_nuq *)
+                                             p->cumbins_nuq,
+                                         (const dequant_val_type_nuq *)
+                                             pd->dequant_val_nuq,
+                                         qcoeff, dqcoeff, eob,
+                                         scan_order->scan, band);
+        break;
+#endif  // CONFIG_TX64X64
+      case TX_32X32:
+        highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
+        vp9_highbd_quantize_32x32_fp_nuq(coeff, 1024, x->skip_block,
+                                         p->quant_fp, pd->dequant,
+                                         (const cumbins_type_nuq *)
+                                             p->cumbins_nuq,
+                                         (const dequant_val_type_nuq *)
+                                             pd->dequant_val_nuq,
+                                         qcoeff, dqcoeff, eob,
+                                         scan_order->scan, band);
+        break;
+      case TX_16X16:
+#if CONFIG_EXT_TX
+        highbd_forw_tx16x16(x, plane, src_diff, diff_stride, coeff);
+#else
+        vp9_highbd_fdct16x16(src_diff, coeff, diff_stride);
+#endif
+        vp9_highbd_quantize_fp_nuq(coeff, 256, x->skip_block,
+                                   p->quant_fp, pd->dequant,
+                                   (const cumbins_type_nuq *)p->cumbins_nuq,
+                                   (const dequant_val_type_nuq *)
+                                       pd->dequant_val_nuq,
+                                   qcoeff, dqcoeff, eob,
+                                   scan_order->scan, band);
+        break;
+      case TX_8X8:
+#if CONFIG_EXT_TX
+        highbd_forw_tx8x8(x, plane, src_diff, diff_stride, coeff);
+#else
+        vp9_highbd_fdct8x8(src_diff, coeff, diff_stride);
+#endif
+        vp9_highbd_quantize_fp_nuq(coeff, 64, x->skip_block,
+                                   p->quant_fp, pd->dequant,
+                                   (const cumbins_type_nuq *)p->cumbins_nuq,
+                                   (const dequant_val_type_nuq *)
+                                       pd->dequant_val_nuq,
+                                   qcoeff, dqcoeff, eob,
+                                   scan_order->scan, band);
+        break;
+      case TX_4X4:
+#if CONFIG_EXT_TX
+        highbd_forw_tx4x4(x, plane, block, src_diff, diff_stride, coeff);
+#else
+        x->fwd_txm4x4(src_diff, coeff, diff_stride);
+#endif
+        vp9_highbd_quantize_fp_nuq(coeff, 16, x->skip_block,
+                                   p->quant_fp, pd->dequant,
+                                   (const cumbins_type_nuq *)p->cumbins_nuq,
+                                   (const dequant_val_type_nuq *)
+                                       pd->dequant_val_nuq,
+                                   qcoeff, dqcoeff, eob,
+                                   scan_order->scan, band);
+        break;
+      default:
+        assert(0);
+    }
+    return;
+  }
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+  switch (tx_size) {
+#if CONFIG_TX64X64
+    case TX_64X64:
+      vp9_fdct64x64(src_diff, coeff, diff_stride);
+      vp9_quantize_64x64_fp_nuq(coeff, 4096, x->skip_block,
+                                p->quant_fp, pd->dequant,
+                                (const cumbins_type_nuq *)p->cumbins_nuq,
+                                (const dequant_val_type_nuq *)
+                                    pd->dequant_val_nuq,
+                                qcoeff, dqcoeff, eob,
+                                scan_order->scan, band);
+      break;
+#endif  // CONFIG_TX64X64
+    case TX_32X32:
+      fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
+      vp9_quantize_32x32_fp_nuq(coeff, 1024, x->skip_block,
+                                p->quant_fp, pd->dequant,
+                                (const cumbins_type_nuq *)p->cumbins_nuq,
+                                (const dequant_val_type_nuq *)
+                                    pd->dequant_val_nuq,
+                                qcoeff, dqcoeff, eob,
+                                scan_order->scan, band);
+      break;
+    case TX_16X16:
+#if CONFIG_EXT_TX
+      forw_tx16x16(x, plane, src_diff, diff_stride, coeff);
+#else
+      vp9_fdct16x16(src_diff, coeff, diff_stride);
+#endif
+      vp9_quantize_fp_nuq(coeff, 256, x->skip_block,
+                          p->quant_fp, pd->dequant,
+                          (const cumbins_type_nuq *)p->cumbins_nuq,
+                          (const dequant_val_type_nuq *)pd->dequant_val_nuq,
+                          qcoeff, dqcoeff, eob,
+                          scan_order->scan, band);
+      break;
+    case TX_8X8:
+#if CONFIG_EXT_TX
+      forw_tx8x8(x, plane, src_diff, diff_stride, coeff);
+#else
+      vp9_fdct8x8(src_diff, coeff, diff_stride);
+#endif
+      vp9_quantize_fp_nuq(coeff, 64, x->skip_block,
+                          p->quant_fp, pd->dequant,
+                          (const cumbins_type_nuq *)p->cumbins_nuq,
+                          (const dequant_val_type_nuq *)pd->dequant_val_nuq,
+                          qcoeff, dqcoeff, eob,
+                          scan_order->scan, band);
+      break;
+    case TX_4X4:
+#if CONFIG_EXT_TX
+      forw_tx4x4(x, plane, block, src_diff, diff_stride, coeff);
+#else
+      x->fwd_txm4x4(src_diff, coeff, diff_stride);
+#endif
+      vp9_quantize_fp_nuq(coeff, 16, x->skip_block,
+                          p->quant_fp, pd->dequant,
+                          (const cumbins_type_nuq *)p->cumbins_nuq,
+                          (const dequant_val_type_nuq *)pd->dequant_val_nuq,
+                          qcoeff, dqcoeff, eob,
+                          scan_order->scan, band);
+      break;
+    default:
+      assert(0);
+      break;
+  }
+}
+
+void vp9_xform_quant_dc_nuq(MACROBLOCK *x, int plane, int block,
+                            BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
+  MACROBLOCKD *const xd = &x->e_mbd;
+  const struct macroblock_plane *const p = &x->plane[plane];
+  const struct macroblockd_plane *const pd = &xd->plane[plane];
+  tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
+  tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
+  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
+  uint16_t *const eob = &p->eobs[block];
+  const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
+  int i, j;
+  const int16_t *src_diff;
+#if CONFIG_TX_SKIP
+  MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi;
+  int shift = mbmi->tx_skip_shift;
+#endif
+
+  txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
+  src_diff = &p->src_diff[4 * (j * diff_stride + i)];
+
+#if CONFIG_TX_SKIP
+  if (mbmi->tx_skip[plane != 0]) {
+    switch (tx_size) {
+#if CONFIG_TX64X64
+      case TX_64X64:
+        vp9_tx_identity(src_diff, coeff, diff_stride, 64, shift);
+        vp9_quantize_dc_64x64_nuq(coeff, x->skip_block,
+                                  p->quant[0], p->quant_shift[0],
+                                  pd->dequant[0],
+                                  p->cumbins_nuq[0], pd->dequant_val_nuq[0],
+                                  qcoeff, dqcoeff, eob);
+        break;
+#endif  // CONFIG_TX64X64
+      case TX_32X32:
+        vp9_tx_identity(src_diff, coeff, diff_stride, 32, shift);
+        vp9_quantize_dc_32x32_nuq(coeff, x->skip_block,
+                                  p->quant[0], p->quant_shift[0],
+                                  pd->dequant[0],
+                                  p->cumbins_nuq[0], pd->dequant_val_nuq[0],
+                                  qcoeff, dqcoeff, eob);
+        break;
+      case TX_16X16:
+        vp9_tx_identity(src_diff, coeff, diff_stride, 16, shift);
+        vp9_quantize_dc_nuq(coeff, x->skip_block,
+                            p->quant[0], p->quant_shift[0], pd->dequant[0],
+                            p->cumbins_nuq[0], pd->dequant_val_nuq[0],
+                            qcoeff, dqcoeff, eob);
+        break;
+      case TX_8X8:
+        vp9_tx_identity(src_diff, coeff, diff_stride, 8, shift);
+        vp9_quantize_dc_nuq(coeff, x->skip_block,
+                            p->quant[0], p->quant_shift[0], pd->dequant[0],
+                            p->cumbins_nuq[0], pd->dequant_val_nuq[0],
+                            qcoeff, dqcoeff, eob);
+        break;
+      case TX_4X4:
+        vp9_tx_identity(src_diff, coeff, diff_stride, 4, shift);
+        vp9_quantize_dc_nuq(coeff, x->skip_block,
+                            p->quant[0], p->quant_shift[0], pd->dequant[0],
+                            p->cumbins_nuq[0], pd->dequant_val_nuq[0],
+                            qcoeff, dqcoeff, eob);
+        break;
+      default:
+        assert(0);
+        break;
+    }
+    return;
+  }
+#endif  // CONFIG_TX_SKIP
+
+#if CONFIG_VP9_HIGHBITDEPTH
+  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+    switch (tx_size) {
+#if CONFIG_TX64X64
+      case TX_64X64:
+        vp9_highbd_fdct64x64_1(src_diff, coeff, diff_stride);
+        vp9_highbd_quantize_dc_64x64_nuq(coeff, x->skip_block,
+                                         p->quant[0], p->quant_shift[0],
+                                         pd->dequant[0],
+                                         p->cumbins_nuq[0],
+                                         pd->dequant_val_nuq[0],
+                                         qcoeff, dqcoeff, eob);
+        break;
+#endif  // CONFIG_TX64X64
+      case TX_32X32:
+        vp9_highbd_fdct32x32_1(src_diff, coeff, diff_stride);
+        vp9_highbd_quantize_dc_32x32_nuq(coeff, x->skip_block,
+                                         p->quant[0], p->quant_shift[0],
+                                         pd->dequant[0],
+                                         p->cumbins_nuq[0],
+                                         pd->dequant_val_nuq[0],
+                                         qcoeff, dqcoeff, eob);
+        break;
+      case TX_16X16:
+#if CONFIG_EXT_TX
+        highbd_forw_tx16x16(x, plane, src_diff, diff_stride, coeff);
+#else
+        vp9_highbd_fdct16x16_1(src_diff, coeff, diff_stride);
+#endif
+        vp9_highbd_quantize_dc_nuq(coeff, x->skip_block,
+                                   p->quant[0], p->quant_shift[0],
+                                   pd->dequant[0],
+                                   p->cumbins_nuq[0],
+                                   pd->dequant_val_nuq[0],
+                                   qcoeff, dqcoeff, eob);
+        break;
+      case TX_8X8:
+#if CONFIG_EXT_TX
+        highbd_forw_tx8x8(x, plane, src_diff, diff_stride, coeff);
+#else
+        vp9_highbd_fdct8x8_1(src_diff, coeff, diff_stride);
+#endif
+        vp9_highbd_quantize_dc_nuq(coeff, x->skip_block,
+                                   p->quant[0], p->quant_shift[0],
+                                   pd->dequant[0],
+                                   p->cumbins_nuq[0],
+                                   pd->dequant_val_nuq[0],
+                                   qcoeff, dqcoeff, eob);
+        break;
+      case TX_4X4:
+#if CONFIG_EXT_TX
+        highbd_forw_tx4x4(x, plane, block, src_diff, diff_stride, coeff);
+#else
+        x->fwd_txm4x4(src_diff, coeff, diff_stride);
+#endif
+        vp9_highbd_quantize_dc_nuq(coeff, x->skip_block,
+                                   p->quant[0], p->quant_shift[0],
+                                   pd->dequant[0],
+                                   p->cumbins_nuq[0],
+                                   pd->dequant_val_nuq[0],
+                                   qcoeff, dqcoeff, eob);
+        break;
+      default:
+        assert(0);
+    }
+    return;
+  }
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+  switch (tx_size) {
+#if CONFIG_TX64X64
+    case TX_64X64:
+      vp9_fdct64x64_1(src_diff, coeff, diff_stride);
+      vp9_quantize_dc_64x64_nuq(coeff, x->skip_block,
+                                p->quant[0], p->quant_shift[0], pd->dequant[0],
+                                p->cumbins_nuq[0], pd->dequant_val_nuq[0],
+                                qcoeff, dqcoeff, eob);
+      break;
+#endif  // CONFIG_TX64X64
+    case TX_32X32:
+      vp9_fdct32x32_1(src_diff, coeff, diff_stride);
+      vp9_quantize_dc_32x32_nuq(coeff, x->skip_block,
+                                p->quant[0], p->quant_shift[0], pd->dequant[0],
+                                p->cumbins_nuq[0], pd->dequant_val_nuq[0],
+                                qcoeff, dqcoeff, eob);
+      break;
+    case TX_16X16:
+#if CONFIG_EXT_TX
+      forw_tx16x16(x, plane, src_diff, diff_stride, coeff);
+#else
+      vp9_fdct16x16_1(src_diff, coeff, diff_stride);
+#endif
+      vp9_quantize_dc_nuq(coeff, x->skip_block,
+                          p->quant[0], p->quant_shift[0], pd->dequant[0],
+                          p->cumbins_nuq[0], pd->dequant_val_nuq[0],
+                          qcoeff, dqcoeff, eob);
+      break;
+    case TX_8X8:
+#if CONFIG_EXT_TX
+      forw_tx8x8(x, plane, src_diff, diff_stride, coeff);
+#else
+      vp9_fdct8x8_1(src_diff, coeff, diff_stride);
+#endif
+      vp9_quantize_dc_nuq(coeff, x->skip_block,
+                          p->quant[0], p->quant_shift[0], pd->dequant[0],
+                          p->cumbins_nuq[0], pd->dequant_val_nuq[0],
+                          qcoeff, dqcoeff, eob);
+      break;
+    case TX_4X4:
+#if CONFIG_EXT_TX
+      forw_tx4x4(x, plane, block, src_diff, diff_stride, coeff);
+#else
+      x->fwd_txm4x4(src_diff, coeff, diff_stride);
+#endif
+      vp9_quantize_dc_nuq(coeff, x->skip_block,
+                          p->quant[0], p->quant_shift[0], pd->dequant[0],
+                          p->cumbins_nuq[0], pd->dequant_val_nuq[0],
+                          qcoeff, dqcoeff, eob);
+      break;
+    default:
+      assert(0);
+      break;
+  }
+}
+
+void vp9_xform_quant_dc_fp_nuq(MACROBLOCK *x, int plane, int block,
+                               BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
   MACROBLOCKD *const xd = &x->e_mbd;
-  int16_t src_diff2[256];
-  TX_TYPE tx_type = get_tx_type(plane, xd);
-  if (tx_type == DCT_DCT) {
-    vp9_highbd_fdct16x16(src_diff, coeff, diff_stride);
-  } else if (tx_type == FLIPADST_DCT) {
-    copy_flipud(src_diff, diff_stride, 16, src_diff2, 16);
-    vp9_highbd_fht16x16(src_diff2, coeff, 16, ADST_DCT);
-  } else if (tx_type == DCT_FLIPADST) {
-    copy_fliplr(src_diff, diff_stride, 16, src_diff2, 16);
-    vp9_highbd_fht16x16(src_diff2, coeff, 16, DCT_ADST);
-  } else if (tx_type == FLIPADST_FLIPADST) {
-    copy_fliplrud(src_diff, diff_stride, 16, src_diff2, 16);
-    vp9_highbd_fht16x16(src_diff2, coeff, 16, ADST_ADST);
-  } else if (tx_type == ADST_FLIPADST) {
-    copy_fliplr(src_diff, diff_stride, 16, src_diff2, 16);
-    vp9_highbd_fht16x16(src_diff2, coeff, 16, ADST_ADST);
-  } else if (tx_type == FLIPADST_ADST) {
-    copy_flipud(src_diff, diff_stride, 16, src_diff2, 16);
-    vp9_highbd_fht16x16(src_diff2, coeff, 16, ADST_ADST);
-  } else {
-    vp9_highbd_fht16x16(src_diff, coeff, diff_stride, tx_type);
+  const struct macroblock_plane *const p = &x->plane[plane];
+  const struct macroblockd_plane *const pd = &xd->plane[plane];
+  tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
+  tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
+  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
+  uint16_t *const eob = &p->eobs[block];
+  const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
+  int i, j;
+  const int16_t *src_diff;
+#if CONFIG_TX_SKIP
+  MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi;
+  int shift = mbmi->tx_skip_shift;
+#endif
+
+  txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
+  src_diff = &p->src_diff[4 * (j * diff_stride + i)];
+
+#if CONFIG_TX_SKIP
+  if (mbmi->tx_skip[plane != 0]) {
+    switch (tx_size) {
+#if CONFIG_TX64X64
+      case TX_64X64:
+        vp9_tx_identity(src_diff, coeff, diff_stride, 64, shift);
+        vp9_quantize_dc_64x64_fp_nuq(coeff, x->skip_block,
+                                     p->quant_fp[0], pd->dequant[0],
+                                     p->cumbins_nuq[0], pd->dequant_val_nuq[0],
+                                     qcoeff, dqcoeff, eob);
+        break;
+#endif  // CONFIG_TX64X64
+      case TX_32X32:
+        vp9_tx_identity(src_diff, coeff, diff_stride, 32, shift);
+        vp9_quantize_dc_32x32_fp_nuq(coeff, x->skip_block,
+                                     p->quant_fp[0], pd->dequant[0],
+                                     p->cumbins_nuq[0], pd->dequant_val_nuq[0],
+                                     qcoeff, dqcoeff, eob);
+        break;
+      case TX_16X16:
+        vp9_tx_identity(src_diff, coeff, diff_stride, 16, shift);
+        vp9_quantize_dc_fp_nuq(coeff, x->skip_block,
+                               p->quant_fp[0], pd->dequant[0],
+                               p->cumbins_nuq[0], pd->dequant_val_nuq[0],
+                               qcoeff, dqcoeff, eob);
+        break;
+      case TX_8X8:
+        vp9_tx_identity(src_diff, coeff, diff_stride, 8, shift);
+        vp9_quantize_dc_fp_nuq(coeff, x->skip_block,
+                               p->quant_fp[0], pd->dequant[0],
+                               p->cumbins_nuq[0], pd->dequant_val_nuq[0],
+                               qcoeff, dqcoeff, eob);
+        break;
+      case TX_4X4:
+        vp9_tx_identity(src_diff, coeff, diff_stride, 4, shift);
+        vp9_quantize_dc_fp_nuq(coeff, x->skip_block,
+                               p->quant_fp[0], pd->dequant[0],
+                               p->cumbins_nuq[0], pd->dequant_val_nuq[0],
+                               qcoeff, dqcoeff, eob);
+        break;
+      default:
+        assert(0);
+        break;
+    }
+    return;
   }
-}
+#endif  // CONFIG_TX_SKIP
 
-static void highbd_forw_tx8x8(MACROBLOCK *x, int plane,
-                              const int16_t *src_diff, int diff_stride,
-                              tran_low_t *const coeff) {
-  MACROBLOCKD *const xd = &x->e_mbd;
-  int16_t src_diff2[64];
-  TX_TYPE tx_type = get_tx_type(plane, xd);
-  if (tx_type == DCT_DCT) {
-    vp9_highbd_fdct8x8(src_diff, coeff, diff_stride);
-  } else if (tx_type == FLIPADST_DCT) {
-    copy_flipud(src_diff, diff_stride, 8, src_diff2, 8);
-    vp9_highbd_fht8x8(src_diff2, coeff, 8, ADST_DCT);
-  } else if (tx_type == DCT_FLIPADST) {
-    copy_fliplr(src_diff, diff_stride, 8, src_diff2, 8);
-    vp9_highbd_fht8x8(src_diff2, coeff, 8, DCT_ADST);
-  } else if (tx_type == FLIPADST_FLIPADST) {
-    copy_fliplrud(src_diff, diff_stride, 8, src_diff2, 8);
-    vp9_highbd_fht8x8(src_diff2, coeff, 8, ADST_ADST);
-  } else if (tx_type == ADST_FLIPADST) {
-    copy_fliplr(src_diff, diff_stride, 8, src_diff2, 8);
-    vp9_highbd_fht8x8(src_diff2, coeff, 8, ADST_ADST);
-  } else if (tx_type == FLIPADST_ADST) {
-    copy_flipud(src_diff, diff_stride, 8, src_diff2, 8);
-    vp9_highbd_fht8x8(src_diff2, coeff, 8, ADST_ADST);
-  } else {
-    vp9_highbd_fht8x8(src_diff, coeff, diff_stride, tx_type);
+#if CONFIG_VP9_HIGHBITDEPTH
+  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+    switch (tx_size) {
+#if CONFIG_TX64X64
+      case TX_64X64:
+        vp9_highbd_fdct64x64_1(src_diff, coeff, diff_stride);
+        vp9_highbd_quantize_dc_64x64_fp_nuq(coeff, x->skip_block,
+                                            p->quant_fp[0], pd->dequant[0],
+                                            p->cumbins_nuq[0],
+                                            pd->dequant_val_nuq[0],
+                                            qcoeff, dqcoeff, eob);
+        break;
+#endif  // CONFIG_TX64X64
+      case TX_32X32:
+        vp9_highbd_fdct32x32_1(src_diff, coeff, diff_stride);
+        vp9_highbd_quantize_dc_32x32_fp_nuq(coeff, x->skip_block,
+                                            p->quant_fp[0], pd->dequant[0],
+                                            p->cumbins_nuq[0],
+                                            pd->dequant_val_nuq[0],
+                                            qcoeff, dqcoeff, eob);
+        break;
+      case TX_16X16:
+#if CONFIG_EXT_TX
+        highbd_forw_tx16x16(x, plane, src_diff, diff_stride, coeff);
+#else
+        vp9_highbd_fdct16x16_1(src_diff, coeff, diff_stride);
+#endif
+        vp9_highbd_quantize_dc_fp_nuq(coeff, x->skip_block,
+                                      p->quant_fp[0], pd->dequant[0],
+                                      p->cumbins_nuq[0],
+                                      pd->dequant_val_nuq[0],
+                                      qcoeff, dqcoeff, eob);
+        break;
+      case TX_8X8:
+#if CONFIG_EXT_TX
+        highbd_forw_tx8x8(x, plane, src_diff, diff_stride, coeff);
+#else
+        vp9_highbd_fdct8x8_1(src_diff, coeff, diff_stride);
+#endif
+        vp9_highbd_quantize_dc_fp_nuq(coeff, x->skip_block,
+                                      p->quant_fp[0], pd->dequant[0],
+                                      p->cumbins_nuq[0],
+                                      pd->dequant_val_nuq[0],
+                                      qcoeff, dqcoeff, eob);
+        break;
+      case TX_4X4:
+#if CONFIG_EXT_TX
+        highbd_forw_tx4x4(x, plane, block, src_diff, diff_stride, coeff);
+#else
+        x->fwd_txm4x4(src_diff, coeff, diff_stride);
+#endif
+        vp9_highbd_quantize_dc_fp_nuq(coeff, x->skip_block,
+                                      p->quant_fp[0], pd->dequant[0],
+                                      p->cumbins_nuq[0],
+                                      pd->dequant_val_nuq[0],
+                                      qcoeff, dqcoeff, eob);
+        break;
+      default:
+        assert(0);
+    }
+    return;
   }
-}
+#endif  // CONFIG_VP9_HIGHBITDEPTH
 
-static void highbd_forw_tx4x4(MACROBLOCK *x, int plane, int block,
-                              const int16_t *src_diff, int diff_stride,
-                              tran_low_t *const coeff) {
-  MACROBLOCKD *const xd = &x->e_mbd;
-  int16_t src_diff2[16];
-  TX_TYPE tx_type = get_tx_type_4x4(plane, xd, block);
-  if (tx_type == DCT_DCT) {
-    x->fwd_txm4x4(src_diff, coeff, diff_stride);
-  } else if (tx_type == FLIPADST_DCT) {
-    copy_flipud(src_diff, diff_stride, 4, src_diff2, 4);
-    vp9_highbd_fht4x4(src_diff2, coeff, 4, ADST_DCT);
-  } else if (tx_type == DCT_FLIPADST) {
-    copy_fliplr(src_diff, diff_stride, 4, src_diff2, 4);
-    vp9_highbd_fht4x4(src_diff2, coeff, 4, DCT_ADST);
-  } else if (tx_type == FLIPADST_FLIPADST) {
-    copy_fliplrud(src_diff, diff_stride, 4, src_diff2, 4);
-    vp9_highbd_fht4x4(src_diff2, coeff, 4, ADST_ADST);
-  } else if (tx_type == ADST_FLIPADST) {
-    copy_fliplr(src_diff, diff_stride, 4, src_diff2, 4);
-    vp9_highbd_fht4x4(src_diff2, coeff, 4, ADST_ADST);
-  } else if (tx_type == FLIPADST_ADST) {
-    copy_flipud(src_diff, diff_stride, 4, src_diff2, 4);
-    vp9_highbd_fht4x4(src_diff2, coeff, 4, ADST_ADST);
-  } else {
-    vp9_highbd_fht4x4(src_diff, coeff, diff_stride, tx_type);
+  switch (tx_size) {
+#if CONFIG_TX64X64
+    case TX_64X64:
+      vp9_fdct64x64_1(src_diff, coeff, diff_stride);
+      vp9_quantize_dc_64x64_fp_nuq(coeff, x->skip_block,
+                                   p->quant_fp[0], pd->dequant[0],
+                                   p->cumbins_nuq[0], pd->dequant_val_nuq[0],
+                                   qcoeff, dqcoeff, eob);
+      break;
+#endif  // CONFIG_TX64X64
+    case TX_32X32:
+      vp9_fdct32x32_1(src_diff, coeff, diff_stride);
+      vp9_quantize_dc_32x32_fp_nuq(coeff, x->skip_block,
+                                   p->quant_fp[0], pd->dequant[0],
+                                   p->cumbins_nuq[0], pd->dequant_val_nuq[0],
+                                   qcoeff, dqcoeff, eob);
+      break;
+    case TX_16X16:
+#if CONFIG_EXT_TX
+      forw_tx16x16(x, plane, src_diff, diff_stride, coeff);
+#else
+      vp9_fdct16x16_1(src_diff, coeff, diff_stride);
+#endif
+      vp9_quantize_dc_fp_nuq(coeff, x->skip_block,
+                             p->quant_fp[0], pd->dequant[0],
+                             p->cumbins_nuq[0], pd->dequant_val_nuq[0],
+                             qcoeff, dqcoeff, eob);
+      break;
+    case TX_8X8:
+#if CONFIG_EXT_TX
+      forw_tx8x8(x, plane, src_diff, diff_stride, coeff);
+#else
+      vp9_fdct8x8_1(src_diff, coeff, diff_stride);
+#endif
+      vp9_quantize_dc_fp_nuq(coeff, x->skip_block,
+                             p->quant_fp[0], pd->dequant[0],
+                             p->cumbins_nuq[0], pd->dequant_val_nuq[0],
+                             qcoeff, dqcoeff, eob);
+      break;
+    case TX_4X4:
+#if CONFIG_EXT_TX
+      forw_tx4x4(x, plane, block, src_diff, diff_stride, coeff);
+#else
+      x->fwd_txm4x4(src_diff, coeff, diff_stride);
+#endif
+      vp9_quantize_dc_fp_nuq(coeff, x->skip_block,
+                             p->quant_fp[0], pd->dequant[0],
+                             p->cumbins_nuq[0], pd->dequant_val_nuq[0],
+                             qcoeff, dqcoeff, eob);
+      break;
+    default:
+      assert(0);
+      break;
   }
 }
-#endif  // CONFIG_VP9_HIGHBITDEPTH
-#endif  // CONFIG_EXT_TX
+#endif  // CONFIG_NEW_QUANT
 
 void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block,
                         BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
@@ -662,9 +1574,9 @@ void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block,
       case TX_4X4:
         vp9_tx_identity(src_diff, coeff, diff_stride, 4, shift);
         vp9_quantize_fp(coeff, 16, x->skip_block, p->zbin, p->round_fp,
-                      p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
-                      pd->dequant, eob,
-                      scan_order->scan, scan_order->iscan);
+                        p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
+                        pd->dequant, eob,
+                        scan_order->scan, scan_order->iscan);
       break;
       default:
         assert(0);
@@ -747,10 +1659,10 @@ void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block,
 #endif  // CONFIG_TX64X64
     case TX_32X32:
       fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
-      vp9_quantize_fp_32x32(coeff, 1024, x->skip_block, p->zbin, p->round_fp,
-                            p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
-                            pd->dequant, eob, scan_order->scan,
-                            scan_order->iscan);
+      vp9_quantize_fp_32x32(coeff, 1024, x->skip_block, p->zbin,
+                            p->round_fp, p->quant_fp, p->quant_shift,
+                            qcoeff, dqcoeff, pd->dequant, eob,
+                            scan_order->scan, scan_order->iscan);
       break;
     case TX_16X16:
 #if CONFIG_EXT_TX
@@ -1181,13 +2093,27 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize,
     if (max_txsize_lookup[plane_bsize] == tx_size) {
       if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == 0) {
         // full forward transform and quantization
+#if CONFIG_NEW_QUANT
+        if (x->quant_fp)
+          vp9_xform_quant_fp_nuq(x, plane, block, plane_bsize, tx_size);
+        else
+          vp9_xform_quant_nuq(x, plane, block, plane_bsize, tx_size);
+#else
         if (x->quant_fp)
           vp9_xform_quant_fp(x, plane, block, plane_bsize, tx_size);
         else
           vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
+#endif  // CONFIG_NEW_QUANT
       } else if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == 2) {
         // fast path forward transform and quantization
+#if CONFIG_NEW_QUANT
+        if (x->quant_fp)
+          vp9_xform_quant_dc_fp_nuq(x, plane, block, plane_bsize, tx_size);
+        else
+          vp9_xform_quant_dc_nuq(x, plane, block, plane_bsize, tx_size);
+#else
         vp9_xform_quant_dc(x, plane, block, plane_bsize, tx_size);
+#endif
       } else {
         // skip forward transform
         p->eobs[block] = 0;
@@ -1195,7 +2121,17 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize,
         return;
       }
     } else {
-      vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
+#if CONFIG_NEW_QUANT
+      if (x->quant_fp)
+        vp9_xform_quant_fp_nuq(x, plane, block, plane_bsize, tx_size);
+      else
+        vp9_xform_quant_nuq(x, plane, block, plane_bsize, tx_size);
+#else
+      if (x->quant_fp)
+        vp9_xform_quant_fp(x, plane, block, plane_bsize, tx_size);
+      else
+        vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
+#endif
     }
   }
 
@@ -1369,7 +2305,17 @@ static void encode_block_pass1(int plane, int block, BLOCK_SIZE plane_bsize,
   txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
   dst = &pd->dst.buf[4 * j * pd->dst.stride + 4 * i];
 
-  vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
+#if CONFIG_NEW_QUANT
+  if (x->quant_fp)
+    vp9_xform_quant_fp_nuq(x, plane, block, plane_bsize, tx_size);
+  else
+    vp9_xform_quant_nuq(x, plane, block, plane_bsize, tx_size);
+#else
+  if (x->quant_fp)
+    vp9_xform_quant_fp(x, plane, block, plane_bsize, tx_size);
+  else
+    vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
+#endif
 
   if (p->eobs[block] > 0) {
 #if CONFIG_VP9_HIGHBITDEPTH
@@ -1447,8 +2393,12 @@ static int vp9_dpcm_intra(uint8_t *src, int src_stride,
                           tran_low_t *dqcoeff, struct macroblock_plane *p,
                           struct macroblockd_plane *pd,
                           const scan_order *scan_order, PREDICTION_MODE mode,
-                          int bs, int shift, int logsizeby32) {
+                          TX_SIZE tx_size, int shift, int logsizeby32) {
   int i, j, eob, temp;
+  const int bs = 4 << tx_size;
+#if CONFIG_NEW_QUANT
+  // const uint8_t* band = get_band_translate(tx_size);
+#endif
   vpx_memset(qcoeff, 0, bs * bs * sizeof(*qcoeff));
   vpx_memset(dqcoeff, 0, bs * bs * sizeof(*dqcoeff));
 
@@ -1567,6 +2517,9 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
   const int src_stride = p->src.stride;
   const int dst_stride = pd->dst.stride;
   int i, j;
+#if CONFIG_NEW_QUANT
+  const uint8_t* band = get_band_translate(tx_size);
+#endif
   txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
   dst = &pd->dst.buf[4 * (j * dst_stride + i)];
   src = &p->src.buf[4 * (j * src_stride + i)];
@@ -1598,10 +2551,29 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
           vp9_subtract_block(64, 64, src_diff, diff_stride,
                              src, src_stride, dst, dst_stride);
           vp9_tx_identity(src_diff, coeff, diff_stride, 64, shift);
+#if CONFIG_NEW_QUANT
+          if (x->quant_fp)
+            vp9_quantize_64x64_fp_nuq(coeff, 4096, x->skip_block,
+                                      p->quant_fp, pd->dequant,
+                                      (const cumbins_type_nuq *)p->cumbins_nuq,
+                                      (const dequant_val_type_nuq *)
+                                          pd->dequant_val_nuq,
+                                      qcoeff, dqcoeff, eob,
+                                      scan_order->scan, band);
+          else
+            vp9_quantize_64x64_nuq(coeff, 4096, x->skip_block,
+                                   p->quant, p->quant_shift, pd->dequant,
+                                   (const cumbins_type_nuq *)p->cumbins_nuq,
+                                   (const dequant_val_type_nuq *)
+                                   pd->dequant_val_nuq,
+                                   qcoeff, dqcoeff, eob,
+                                   scan_order->scan, band);
+#else
           vp9_quantize_b_64x64(coeff, 4096, x->skip_block, p->zbin,
                                p->round, p->quant, p->quant_shift, qcoeff,
                                dqcoeff, pd->dequant, eob,
-                               scan_order->scan, scan_order->iscan);
+                               scan_order->scan,  scan_order->iscan);
+#endif  // CONFIG_NEW_QUANT
         }
         if (!x->skip_encode && *eob)
           vp9_tx_identity_add(dqcoeff, dst, dst_stride, 64, shift);
@@ -1623,7 +2595,7 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
             *eob = vp9_dpcm_intra(src, src_stride, dst, dst_stride,
                                   src_diff, diff_stride,
                                   coeff, qcoeff, dqcoeff, p, pd,
-                                  scan_order, mode, 32, shift, 0);
+                                  scan_order, mode, TX_32X32, shift, 0);
             break;
           }
 
@@ -1631,10 +2603,29 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
                              src, src_stride, dst, dst_stride);
           vp9_tx_identity(src_diff, coeff, diff_stride, 32, shift);
 
+#if CONFIG_NEW_QUANT
+          if (x->quant_fp)
+            vp9_quantize_32x32_fp_nuq(coeff, 1024, x->skip_block,
+                                      p->quant_fp, pd->dequant,
+                                      (const cumbins_type_nuq *)p->cumbins_nuq,
+                                      (const dequant_val_type_nuq *)
+                                      pd->dequant_val_nuq,
+                                      qcoeff, dqcoeff, eob,
+                                      scan_order->scan, band);
+          else
+            vp9_quantize_32x32_nuq(coeff, 1024, x->skip_block,
+                                   p->quant, p->quant_shift, pd->dequant,
+                                   (const cumbins_type_nuq *)p->cumbins_nuq,
+                                   (const dequant_val_type_nuq *)
+                                   pd->dequant_val_nuq,
+                                   qcoeff, dqcoeff, eob,
+                                   scan_order->scan, band);
+#else
           vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin,
                                p->round, p->quant, p->quant_shift, qcoeff,
                                dqcoeff, pd->dequant, eob,
                                scan_order->scan, scan_order->iscan);
+#endif  // CONFIG_NEW_QUANT
         }
         if (!x->skip_encode && *eob) {
           vp9_tx_identity_add(dqcoeff, dst, dst_stride, 32, shift);
@@ -1656,17 +2647,36 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
             *eob = vp9_dpcm_intra(src, src_stride, dst, dst_stride,
                                   src_diff, diff_stride,
                                   coeff, qcoeff, dqcoeff, p, pd,
-                                  scan_order, mode, 16, shift, -1);
+                                  scan_order, mode, TX_16X16, shift, -1);
             break;
           }
 
           vp9_subtract_block(16, 16, src_diff, diff_stride,
                              src, src_stride, dst, dst_stride);
           vp9_tx_identity(src_diff, coeff, diff_stride, 16, shift);
+#if CONFIG_NEW_QUANT
+          if (x->quant_fp)
+            vp9_quantize_fp_nuq(coeff, 256, x->skip_block,
+                                p->quant_fp, pd->dequant,
+                                (const cumbins_type_nuq *)p->cumbins_nuq,
+                                (const dequant_val_type_nuq *)
+                                pd->dequant_val_nuq,
+                                qcoeff, dqcoeff, eob,
+                                scan_order->scan, band);
+          else
+            vp9_quantize_nuq(coeff, 256, x->skip_block,
+                             p->quant, p->quant_shift, pd->dequant,
+                             (const cumbins_type_nuq *)p->cumbins_nuq,
+                             (const dequant_val_type_nuq *)
+                             pd->dequant_val_nuq,
+                             qcoeff, dqcoeff, eob,
+                             scan_order->scan, band);
+#else
           vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
                          p->quant, p->quant_shift, qcoeff, dqcoeff,
                          pd->dequant, eob, scan_order->scan,
                          scan_order->iscan);
+#endif  // CONFIG_NEW_QUANT
         }
         if (!x->skip_encode && *eob) {
           vp9_tx_identity_add(dqcoeff, dst, dst_stride, 16, shift);
@@ -1688,17 +2698,36 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
             *eob = vp9_dpcm_intra(src, src_stride, dst, dst_stride,
                                   src_diff, diff_stride,
                                   coeff, qcoeff, dqcoeff, p, pd,
-                                  scan_order, mode, 8, shift, -1);
+                                  scan_order, mode, TX_8X8, shift, -1);
             break;
           }
 
           vp9_subtract_block(8, 8, src_diff, diff_stride,
                              src, src_stride, dst, dst_stride);
           vp9_tx_identity(src_diff, coeff, diff_stride, 8, shift);
+#if CONFIG_NEW_QUANT
+          if (x->quant_fp)
+            vp9_quantize_fp_nuq(coeff, 64, x->skip_block,
+                                p->quant_fp, pd->dequant,
+                                (const cumbins_type_nuq *)p->cumbins_nuq,
+                                (const dequant_val_type_nuq *)
+                                pd->dequant_val_nuq,
+                                qcoeff, dqcoeff, eob,
+                                scan_order->scan, band);
+          else
+            vp9_quantize_nuq(coeff, 64, x->skip_block,
+                             p->quant, p->quant_shift, pd->dequant,
+                             (const cumbins_type_nuq *)p->cumbins_nuq,
+                             (const dequant_val_type_nuq *)
+                             pd->dequant_val_nuq,
+                             qcoeff, dqcoeff, eob,
+                             scan_order->scan, band);
+#else
           vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round,
                          p->quant, p->quant_shift, qcoeff, dqcoeff,
                          pd->dequant, eob, scan_order->scan,
                          scan_order->iscan);
+#endif  // CONFIG_NEW_QUANT
         }
         if (!x->skip_encode && *eob) {
           vp9_tx_identity_add(dqcoeff, dst, dst_stride, 8, shift);
@@ -1722,17 +2751,36 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
             *eob = vp9_dpcm_intra(src, src_stride, dst, dst_stride,
                                   src_diff, diff_stride,
                                   coeff, qcoeff, dqcoeff, p, pd,
-                                  scan_order, mode, 4, shift, -1);
+                                  scan_order, mode, TX_4X4, shift, -1);
             break;
           }
 
           vp9_subtract_block(4, 4, src_diff, diff_stride,
                              src, src_stride, dst, dst_stride);
           vp9_tx_identity(src_diff, coeff, diff_stride, 4, shift);
+#if CONFIG_NEW_QUANT
+          if (x->quant_fp)
+            vp9_quantize_fp_nuq(coeff, 16, x->skip_block,
+                                p->quant_fp, pd->dequant,
+                                (const cumbins_type_nuq *)p->cumbins_nuq,
+                                (const dequant_val_type_nuq *)
+                                pd->dequant_val_nuq,
+                                qcoeff, dqcoeff, eob,
+                                scan_order->scan, band);
+          else
+            vp9_quantize_nuq(coeff, 16, x->skip_block,
+                             p->quant, p->quant_shift, pd->dequant,
+                             (const cumbins_type_nuq *)p->cumbins_nuq,
+                             (const dequant_val_type_nuq *)
+                             pd->dequant_val_nuq,
+                             qcoeff, dqcoeff, eob,
+                             scan_order->scan, band);
+#else
           vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round,
                          p->quant, p->quant_shift, qcoeff, dqcoeff,
                          pd->dequant, eob, scan_order->scan,
                          scan_order->iscan);
+#endif  // CONFIG_NEW_QUANT
         }
 
         if (!x->skip_encode && *eob) {
@@ -1767,10 +2815,32 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
           vp9_highbd_subtract_block(64, 64, src_diff, diff_stride,
                                     src, src_stride, dst, dst_stride, xd->bd);
           vp9_highbd_fdct64x64(src_diff, coeff, diff_stride);
+#if CONFIG_NEW_QUANT
+          if (x->quant_fp)
+            vp9_highbd_quantize_64x64_fp_nuq(coeff, 4096, x->skip_block,
+                                             p->quant_fp, pd->dequant,
+                                             (const cumbins_type_nuq *)
+                                             p->cumbins_nuq,
+                                             (const dequant_val_type_nuq *)
+                                             pd->dequant_val_nuq,
+                                             qcoeff, dqcoeff, eob,
+                                             scan_order->scan,
+                                             band);
+          else
+            vp9_highbd_quantize_64x64_nuq(coeff, 4096, x->skip_block,
+                                          p->quant, p->quant_shift, pd->dequant,
+                                          (const cumbins_type_nuq *)
+                                          p->cumbins_nuq,
+                                          (const dequant_val_type_nuq *)
+                                          pd->dequant_val_nuq,
+                                          qcoeff, dqcoeff, eob,
+                                          scan_order->scan, band);
+#else
           vp9_highbd_quantize_b_64x64(coeff, 4096, x->skip_block, p->zbin,
                                       p->round, p->quant, p->quant_shift,
                                       qcoeff, dqcoeff, pd->dequant, eob,
                                       scan_order->scan, scan_order->iscan);
+#endif  // CONFIG_NEW_QUANT
           if (!x->skip_encode && *eob) {
             vp9_highbd_idct64x64_add(dqcoeff, dst, dst_stride, *eob, xd->bd);
           }
@@ -1791,10 +2861,32 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
           vp9_highbd_subtract_block(32, 32, src_diff, diff_stride,
                                     src, src_stride, dst, dst_stride, xd->bd);
           highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
+#if CONFIG_NEW_QUANT
+          if (x->quant_fp)
+            vp9_highbd_quantize_32x32_fp_nuq(coeff, 1024, x->skip_block,
+                                             p->quant_fp, pd->dequant,
+                                             (const cumbins_type_nuq *)
+                                             p->cumbins_nuq,
+                                             (const dequant_val_type_nuq *)
+                                             pd->dequant_val_nuq,
+                                             qcoeff, dqcoeff, eob,
+                                             scan_order->scan,
+                                             band);
+          else
+            vp9_highbd_quantize_32x32_nuq(coeff, 1024, x->skip_block,
+                                          p->quant, p->quant_shift, pd->dequant,
+                                          (const cumbins_type_nuq *)
+                                          p->cumbins_nuq,
+                                          (const dequant_val_type_nuq *)
+                                          pd->dequant_val_nuq,
+                                          qcoeff, dqcoeff, eob,
+                                          scan_order->scan, band);
+#else
           vp9_highbd_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin,
                                       p->round, p->quant, p->quant_shift,
                                       qcoeff, dqcoeff, pd->dequant, eob,
                                       scan_order->scan, scan_order->iscan);
+#endif  // CONFIG_NEW_QUANT
         }
         if (!x->skip_encode && *eob) {
           vp9_highbd_idct32x32_add(dqcoeff, dst, dst_stride, *eob, xd->bd);
@@ -1815,10 +2907,29 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
           vp9_highbd_subtract_block(16, 16, src_diff, diff_stride,
                                     src, src_stride, dst, dst_stride, xd->bd);
           vp9_highbd_fht16x16(src_diff, coeff, diff_stride, tx_type);
+#if CONFIG_NEW_QUANT
+          if (x->quant_fp)
+            vp9_highbd_quantize_fp_nuq(coeff, 256, x->skip_block,
+                                       p->quant_fp, pd->dequant,
+                                       (const cumbins_type_nuq *)p->cumbins_nuq,
+                                       (const dequant_val_type_nuq *)
+                                       pd->dequant_val_nuq,
+                                       qcoeff, dqcoeff, eob,
+                                       scan_order->scan, band);
+          else
+            vp9_highbd_quantize_nuq(coeff, 256, x->skip_block,
+                                    p->quant, p->quant_shift, pd->dequant,
+                                    (const cumbins_type_nuq *)p->cumbins_nuq,
+                                    (const dequant_val_type_nuq *)
+                                    pd->dequant_val_nuq,
+                                    qcoeff, dqcoeff, eob,
+                                    scan_order->scan, band);
+#else
           vp9_highbd_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
                                 p->quant, p->quant_shift, qcoeff, dqcoeff,
                                 pd->dequant, eob,
                                 scan_order->scan, scan_order->iscan);
+#endif  // CONFIG_NEW_QUANT
         }
         if (!x->skip_encode && *eob) {
           vp9_highbd_iht16x16_add(tx_type, dqcoeff, dst, dst_stride,
@@ -1840,10 +2951,29 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
           vp9_highbd_subtract_block(8, 8, src_diff, diff_stride,
                                     src, src_stride, dst, dst_stride, xd->bd);
           vp9_highbd_fht8x8(src_diff, coeff, diff_stride, tx_type);
+#if CONFIG_NEW_QUANT
+          if (x->quant_fp)
+            vp9_highbd_quantize_fp_nuq(coeff, 64, x->skip_block,
+                                       p->quant_fp, pd->dequant,
+                                       (const cumbins_type_nuq *)p->cumbins_nuq,
+                                       (const dequant_val_type_nuq *)
+                                       pd->dequant_val_nuq,
+                                       qcoeff, dqcoeff, eob,
+                                       scan_order->scan, band);
+          else
+            vp9_highbd_quantize_nuq(coeff, 64, x->skip_block,
+                                    p->quant, p->quant_shift, pd->dequant,
+                                    (const cumbins_type_nuq *)p->cumbins_nuq,
+                                    (const dequant_val_type_nuq *)
+                                    pd->dequant_val_nuq,
+                                    qcoeff, dqcoeff, eob,
+                                    scan_order->scan, band);
+#else
           vp9_highbd_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round,
                                 p->quant, p->quant_shift, qcoeff, dqcoeff,
                                 pd->dequant, eob,
                                 scan_order->scan, scan_order->iscan);
+#endif  // CONFIG_NEW_QUANT
         }
         if (!x->skip_encode && *eob) {
           vp9_highbd_iht8x8_add(tx_type, dqcoeff, dst, dst_stride, *eob,
@@ -1869,10 +2999,29 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
             vp9_highbd_fht4x4(src_diff, coeff, diff_stride, tx_type);
           else
             x->fwd_txm4x4(src_diff, coeff, diff_stride);
+#if CONFIG_NEW_QUANT
+          if (x->quant_fp)
+            vp9_highbd_quantize_fp_nuq(coeff, 16, x->skip_block,
+                                       p->quant_fp, pd->dequant,
+                                       (const cumbins_type_nuq *)p->cumbins_nuq,
+                                       (const dequant_val_type_nuq *)
+                                       pd->dequant_val_nuq,
+                                       qcoeff, dqcoeff, eob,
+                                       scan_order->scan, band);
+          else
+            vp9_highbd_quantize_nuq(coeff, 16, x->skip_block,
+                                    p->quant, p->quant_shift, pd->dequant,
+                                    (const cumbins_type_nuq *)p->cumbins_nuq,
+                                    (const dequant_val_type_nuq *)
+                                    pd->dequant_val_nuq,
+                                    qcoeff, dqcoeff, eob,
+                                    scan_order->scan, band);
+#else
           vp9_highbd_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round,
                                 p->quant, p->quant_shift, qcoeff, dqcoeff,
                                 pd->dequant, eob,
                                 scan_order->scan, scan_order->iscan);
+#endif  // CONFIG_NEW_QUANT
         }
 
         if (!x->skip_encode && *eob) {
@@ -1913,10 +3062,29 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
         vp9_subtract_block(64, 64, src_diff, diff_stride,
                            src, src_stride, dst, dst_stride);
         vp9_fdct64x64(src_diff, coeff, diff_stride);
+#if CONFIG_NEW_QUANT
+        if (x->quant_fp)
+          vp9_quantize_64x64_fp_nuq(coeff, 4096, x->skip_block,
+                                    p->quant_fp, pd->dequant,
+                                    (const cumbins_type_nuq *)p->cumbins_nuq,
+                                    (const dequant_val_type_nuq *)
+                                    pd->dequant_val_nuq,
+                                    qcoeff, dqcoeff, eob,
+                                    scan_order->scan, band);
+        else
+          vp9_quantize_64x64_nuq(coeff, 4096, x->skip_block,
+                                 p->quant, p->quant_shift, pd->dequant,
+                                 (const cumbins_type_nuq *)p->cumbins_nuq,
+                                 (const dequant_val_type_nuq *)
+                                 pd->dequant_val_nuq,
+                                 qcoeff, dqcoeff, eob,
+                                 scan_order->scan, band);
+#else
         vp9_quantize_b_64x64(coeff, 4096, x->skip_block, p->zbin, p->round,
                              p->quant, p->quant_shift, qcoeff, dqcoeff,
                              pd->dequant, eob, scan_order->scan,
                              scan_order->iscan);
+#endif  // CONFIG_NEW_QUANT
       }
       if (!x->skip_encode && *eob)
         vp9_idct64x64_add(dqcoeff, dst, dst_stride, *eob);
@@ -1936,10 +3104,29 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
         vp9_subtract_block(32, 32, src_diff, diff_stride,
                            src, src_stride, dst, dst_stride);
         fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
+#if CONFIG_NEW_QUANT
+        if (x->quant_fp)
+          vp9_quantize_32x32_fp_nuq(coeff, 1024, x->skip_block,
+                                    p->quant_fp, pd->dequant,
+                                    (const cumbins_type_nuq *)p->cumbins_nuq,
+                                    (const dequant_val_type_nuq *)
+                                    pd->dequant_val_nuq,
+                                    qcoeff, dqcoeff, eob,
+                                    scan_order->scan, band);
+        else
+          vp9_quantize_32x32_nuq(coeff, 1024, x->skip_block,
+                                 p->quant, p->quant_shift, pd->dequant,
+                                 (const cumbins_type_nuq *)p->cumbins_nuq,
+                                 (const dequant_val_type_nuq *)
+                                 pd->dequant_val_nuq,
+                                 qcoeff, dqcoeff, eob,
+                                 scan_order->scan, band);
+#else
         vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round,
                              p->quant, p->quant_shift, qcoeff, dqcoeff,
                              pd->dequant, eob, scan_order->scan,
                              scan_order->iscan);
+#endif  // CONFIG_NEW_QUANT
       }
       if (!x->skip_encode && *eob)
         vp9_idct32x32_add(dqcoeff, dst, dst_stride, *eob);
@@ -1959,10 +3146,27 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
         vp9_subtract_block(16, 16, src_diff, diff_stride,
                            src, src_stride, dst, dst_stride);
         vp9_fht16x16(src_diff, coeff, diff_stride, tx_type);
+#if CONFIG_NEW_QUANT
+        if (x->quant_fp)
+          vp9_quantize_fp_nuq(coeff, 256, x->skip_block,
+                              p->quant_fp, pd->dequant,
+                              (const cumbins_type_nuq *)p->cumbins_nuq,
+                              (const dequant_val_type_nuq *)pd->dequant_val_nuq,
+                              qcoeff, dqcoeff, eob,
+                              scan_order->scan, band);
+        else
+          vp9_quantize_nuq(coeff, 256, x->skip_block,
+                           p->quant, p->quant_shift, pd->dequant,
+                           (const cumbins_type_nuq *)p->cumbins_nuq,
+                           (const dequant_val_type_nuq *)pd->dequant_val_nuq,
+                           qcoeff, dqcoeff, eob,
+                           scan_order->scan, band);
+#else
         vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
                        p->quant, p->quant_shift, qcoeff, dqcoeff,
                        pd->dequant, eob, scan_order->scan,
                        scan_order->iscan);
+#endif  // CONFIG_NEW_QUANT
       }
       if (!x->skip_encode && *eob)
         vp9_iht16x16_add(tx_type, dqcoeff, dst, dst_stride, *eob);
@@ -1982,10 +3186,27 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
         vp9_subtract_block(8, 8, src_diff, diff_stride,
                            src, src_stride, dst, dst_stride);
         vp9_fht8x8(src_diff, coeff, diff_stride, tx_type);
+#if CONFIG_NEW_QUANT
+        if (x->quant_fp)
+          vp9_quantize_fp_nuq(coeff, 64, x->skip_block,
+                              p->quant_fp, pd->dequant,
+                              (const cumbins_type_nuq *)p->cumbins_nuq,
+                              (const dequant_val_type_nuq *)pd->dequant_val_nuq,
+                              qcoeff, dqcoeff, eob,
+                              scan_order->scan, band);
+        else
+          vp9_quantize_nuq(coeff, 64, x->skip_block,
+                           p->quant, p->quant_shift, pd->dequant,
+                           (const cumbins_type_nuq *)p->cumbins_nuq,
+                           (const dequant_val_type_nuq *)pd->dequant_val_nuq,
+                           qcoeff, dqcoeff, eob,
+                           scan_order->scan, band);
+#else
         vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant,
                        p->quant_shift, qcoeff, dqcoeff,
                        pd->dequant, eob, scan_order->scan,
                        scan_order->iscan);
+#endif  // CONFIG_NEW_QUANT
       }
       if (!x->skip_encode && *eob)
         vp9_iht8x8_add(tx_type, dqcoeff, dst, dst_stride, *eob);
@@ -2009,10 +3230,27 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
           vp9_fht4x4(src_diff, coeff, diff_stride, tx_type);
         else
           x->fwd_txm4x4(src_diff, coeff, diff_stride);
+#if CONFIG_NEW_QUANT
+        if (x->quant_fp)
+          vp9_quantize_fp_nuq(coeff, 16, x->skip_block,
+                              p->quant_fp, pd->dequant,
+                              (const cumbins_type_nuq *)p->cumbins_nuq,
+                              (const dequant_val_type_nuq *)pd->dequant_val_nuq,
+                              qcoeff, dqcoeff, eob,
+                              scan_order->scan, band);
+        else
+          vp9_quantize_nuq(coeff, 16, x->skip_block,
+                           p->quant, p->quant_shift, pd->dequant,
+                           (const cumbins_type_nuq *)p->cumbins_nuq,
+                           (const dequant_val_type_nuq *)pd->dequant_val_nuq,
+                           qcoeff, dqcoeff, eob,
+                           scan_order->scan, band);
+#else
         vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant,
                        p->quant_shift, qcoeff, dqcoeff,
                        pd->dequant, eob, scan_order->scan,
                        scan_order->iscan);
+#endif  // CONFIG_NEW_QUANT
       }
 
       if (!x->skip_encode && *eob) {
index 8d11c35400f281c8f30ae7b1a3b77c7c6c921462..6974871d8be1aab0ae61a0d240fa212a276b7533 100644 (file)
@@ -29,6 +29,16 @@ void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block,
                         BLOCK_SIZE plane_bsize, TX_SIZE tx_size);
 void vp9_xform_quant(MACROBLOCK *x, int plane, int block,
                      BLOCK_SIZE plane_bsize, TX_SIZE tx_size);
+#if CONFIG_NEW_QUANT
+void vp9_xform_quant_nuq(MACROBLOCK *x, int plane, int block,
+                         BLOCK_SIZE plane_bsize, TX_SIZE tx_size);
+void vp9_xform_quant_dc_nuq(MACROBLOCK *x, int plane, int block,
+                            BLOCK_SIZE plane_bsize, TX_SIZE tx_size);
+void vp9_xform_quant_fp_nuq(MACROBLOCK *x, int plane, int block,
+                            BLOCK_SIZE plane_bsize, TX_SIZE tx_size);
+void vp9_xform_quant_dc_fp_nuq(MACROBLOCK *x, int plane, int block,
+                               BLOCK_SIZE plane_bsize, TX_SIZE tx_size);
+#endif
 
 void vp9_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane);
 
index 6353728be478e19d0faeec96db5f2badd5333861..1f12c56f262d32b163eb4142c2bb2ea426682609 100644 (file)
@@ -40,6 +40,357 @@ void vp9_quantize_dc(const tran_low_t *coeff_ptr, int skip_block,
   *eob_ptr = eob + 1;
 }
 
+#if CONFIG_NEW_QUANT
+static INLINE int quantize_coeff_nuq(const tran_low_t coeffv,
+                                     const int16_t quant,
+                                     const int16_t quant_shift,
+                                     const int16_t dequant,
+                                     const tran_low_t *cumbins_ptr,
+                                     const tran_low_t *dequant_val,
+                                     tran_low_t *qcoeff_ptr,
+                                     tran_low_t *dqcoeff_ptr) {
+  const int coeff = coeffv;
+  const int coeff_sign = (coeff >> 31);
+  const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+  int i, q;
+  int tmp = clamp(abs_coeff, INT16_MIN, INT16_MAX);
+  for (i = 0; i < NUQ_KNOTES; i++) {
+    if (tmp < cumbins_ptr[i]) {
+      q = i;
+      break;
+    }
+  }
+  if (i == NUQ_KNOTES) {
+    tmp -= cumbins_ptr[NUQ_KNOTES - 1];
+    q = NUQ_KNOTES + (((((tmp * quant) >> 16) + tmp) * quant_shift) >> 16);
+  }
+  if (q) {
+    *dqcoeff_ptr =
+        vp9_dequant_abscoeff_nuq(q, dequant, dequant_val);
+    *qcoeff_ptr  = (q ^ coeff_sign) - coeff_sign;
+    *dqcoeff_ptr = *qcoeff_ptr < 0 ? -*dqcoeff_ptr : *dqcoeff_ptr;
+  } else {
+    *qcoeff_ptr = 0;
+    *dqcoeff_ptr = 0;
+  }
+  return (q != 0);
+}
+
+static INLINE int quantize_coeff_bigtx_nuq(const tran_low_t coeffv,
+                                           const int16_t quant,
+                                           const int16_t quant_shift,
+                                           const int16_t dequant,
+                                           const tran_low_t *cumbins_ptr,
+                                           const tran_low_t *dequant_val,
+                                           tran_low_t *qcoeff_ptr,
+                                           tran_low_t *dqcoeff_ptr,
+                                           int logsizeby32) {
+  const int coeff = coeffv;
+  const int coeff_sign = (coeff >> 31);
+  const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+  int i, q;
+  int tmp = clamp(abs_coeff, INT16_MIN, INT16_MAX);
+  for (i = 0; i < NUQ_KNOTES; i++) {
+    if (tmp < ROUND_POWER_OF_TWO(cumbins_ptr[i], 1 + logsizeby32)) {
+      q = i;
+      break;
+    }
+  }
+  if (i == NUQ_KNOTES) {
+    tmp -= ROUND_POWER_OF_TWO(cumbins_ptr[NUQ_KNOTES - 1], 1 + logsizeby32);
+    q = NUQ_KNOTES +
+        (((((tmp * quant) >> 16) + tmp) * quant_shift) >> (15 - logsizeby32));
+  }
+  if (q) {
+    *dqcoeff_ptr =
+         ROUND_POWER_OF_TWO(vp9_dequant_abscoeff_nuq(q, dequant, dequant_val),
+                            1 + logsizeby32);
+    // *dqcoeff_ptr = vp9_dequant_abscoeff_nuq(q, dequant, dequant_val) >>
+    // (1 + logsizeby32);
+    *qcoeff_ptr  = (q ^ coeff_sign) - coeff_sign;
+    *dqcoeff_ptr = *qcoeff_ptr < 0 ? -*dqcoeff_ptr : *dqcoeff_ptr;
+  } else {
+    *qcoeff_ptr = 0;
+    *dqcoeff_ptr = 0;
+  }
+  return (q != 0);
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static INLINE int highbd_quantize_coeff_nuq(const tran_low_t coeffv,
+                                            const int16_t quant,
+                                            const int16_t quant_shift,
+                                            const int16_t dequant,
+                                            const tran_low_t *cumbins_ptr,
+                                            const tran_low_t *dequant_val,
+                                            tran_low_t *qcoeff_ptr,
+                                            tran_low_t *dqcoeff_ptr) {
+  const int coeff = coeffv;
+  const int coeff_sign = (coeff >> 31);
+  const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+  int i, q;
+  int64_t tmp = clamp(abs_coeff, INT32_MIN, INT32_MAX);
+  for (i = 0; i < NUQ_KNOTES; i++) {
+    if (tmp < cumbins_ptr[i]) {
+      q = i;
+      break;
+    }
+  }
+  if (i == NUQ_KNOTES) {
+    tmp -= cumbins_ptr[NUQ_KNOTES - 1];
+    q = NUQ_KNOTES + (((((tmp * quant) >> 16) + tmp) * quant_shift) >> 16);
+  }
+  if (q) {
+    *dqcoeff_ptr =
+        vp9_dequant_abscoeff_nuq(q, dequant, dequant_val);
+    *qcoeff_ptr  = (q ^ coeff_sign) - coeff_sign;
+    *dqcoeff_ptr = *qcoeff_ptr < 0 ? -*dqcoeff_ptr : *dqcoeff_ptr;
+  } else {
+    *qcoeff_ptr = 0;
+    *dqcoeff_ptr = 0;
+  }
+  return (q != 0);
+}
+
+static INLINE int highbd_quantize_coeff_bigtx_nuq(const tran_low_t coeffv,
+                                                  const int16_t quant,
+                                                  const int16_t quant_shift,
+                                                  const int16_t dequant,
+                                                  const tran_low_t *cumbins_ptr,
+                                                  const tran_low_t *dequant_val,
+                                                  tran_low_t *qcoeff_ptr,
+                                                  tran_low_t *dqcoeff_ptr,
+                                                  int logsizeby32) {
+  const int coeff = coeffv;
+  const int coeff_sign = (coeff >> 31);
+  const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+  int i, q;
+  int64_t tmp = clamp(abs_coeff, INT32_MIN, INT32_MAX);
+  for (i = 0; i < NUQ_KNOTES; i++) {
+    if (tmp < ROUND_POWER_OF_TWO(cumbins_ptr[i], 1 + logsizeby32)) {
+      q = i;
+      break;
+    }
+  }
+  if (i == NUQ_KNOTES) {
+    tmp -= ROUND_POWER_OF_TWO(cumbins_ptr[NUQ_KNOTES - 1], 1 + logsizeby32);
+    q = NUQ_KNOTES +
+        (((((tmp * quant) >> 16) + tmp) * quant_shift) >> (15 - logsizeby32));
+  }
+  if (q) {
+    *dqcoeff_ptr =
+        ROUND_POWER_OF_TWO(vp9_dequant_abscoeff_nuq(q, dequant, dequant_val),
+                           1 + logsizeby32);
+    *qcoeff_ptr  = (q ^ coeff_sign) - coeff_sign;
+    *dqcoeff_ptr = *qcoeff_ptr < 0 ? -*dqcoeff_ptr : *dqcoeff_ptr;
+  } else {
+    *qcoeff_ptr = 0;
+    *dqcoeff_ptr = 0;
+  }
+  return (q != 0);
+}
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+static INLINE int quantize_coeff_fp_nuq(const tran_low_t coeffv,
+                                        const int16_t quant,
+                                        const int16_t dequant,
+                                        const tran_low_t *cumbins_ptr,
+                                        const tran_low_t *dequant_val,
+                                        tran_low_t *qcoeff_ptr,
+                                        tran_low_t *dqcoeff_ptr) {
+  const int coeff = coeffv;
+  const int coeff_sign = (coeff >> 31);
+  const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+  int i, q;
+  int tmp = clamp(abs_coeff, INT16_MIN, INT16_MAX);
+  for (i = 0; i < NUQ_KNOTES; i++) {
+    if (tmp < cumbins_ptr[i]) {
+      q = i;
+      break;
+    }
+  }
+  if (i == NUQ_KNOTES) {
+    q = NUQ_KNOTES +
+        ((((int64_t)tmp - cumbins_ptr[NUQ_KNOTES - 1]) * quant) >> 16);
+  }
+  if (q) {
+    *dqcoeff_ptr =
+        vp9_dequant_abscoeff_nuq(q, dequant, dequant_val);
+    *qcoeff_ptr  = (q ^ coeff_sign) - coeff_sign;
+    *dqcoeff_ptr = *qcoeff_ptr < 0 ? -*dqcoeff_ptr : *dqcoeff_ptr;
+  } else {
+    *qcoeff_ptr = 0;
+    *dqcoeff_ptr = 0;
+  }
+  return (q != 0);
+}
+
+static INLINE int quantize_coeff_bigtx_fp_nuq(const tran_low_t coeffv,
+                                              const int16_t quant,
+                                              const int16_t dequant,
+                                              const tran_low_t *cumbins_ptr,
+                                              const tran_low_t *dequant_val,
+                                              tran_low_t *qcoeff_ptr,
+                                              tran_low_t *dqcoeff_ptr,
+                                              int logsizeby32) {
+  const int coeff = coeffv;
+  const int coeff_sign = (coeff >> 31);
+  const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+  int i, q;
+  int tmp = clamp(abs_coeff, INT16_MIN, INT16_MAX);
+  for (i = 0; i < NUQ_KNOTES; i++) {
+    if (tmp < ROUND_POWER_OF_TWO(cumbins_ptr[i], 1 + logsizeby32)) {
+      q = i;
+      break;
+    }
+  }
+  if (i == NUQ_KNOTES) {
+    q = NUQ_KNOTES +
+        ((((int64_t)tmp - ROUND_POWER_OF_TWO(cumbins_ptr[NUQ_KNOTES - 1],
+                                             1 + logsizeby32)) * quant) >>
+         (15 - logsizeby32));
+  }
+  if (q) {
+    *dqcoeff_ptr =
+        ROUND_POWER_OF_TWO(vp9_dequant_abscoeff_nuq(q, dequant, dequant_val),
+                           1 + logsizeby32);
+    // *dqcoeff_ptr = vp9_dequant_abscoeff_nuq(q, dequant, dequant_val) >>
+    // (1 + logsizeby32);
+    *qcoeff_ptr  = (q ^ coeff_sign) - coeff_sign;
+    *dqcoeff_ptr = *qcoeff_ptr < 0 ? -*dqcoeff_ptr : *dqcoeff_ptr;
+  } else {
+    *qcoeff_ptr = 0;
+    *dqcoeff_ptr = 0;
+  }
+  return (q != 0);
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static INLINE int highbd_quantize_coeff_fp_nuq(const tran_low_t coeffv,
+                                               const int16_t quant,
+                                               const int16_t dequant,
+                                               const tran_low_t *cumbins_ptr,
+                                               const tran_low_t *dequant_val,
+                                               tran_low_t *qcoeff_ptr,
+                                               tran_low_t *dqcoeff_ptr) {
+  const int coeff = coeffv;
+  const int coeff_sign = (coeff >> 31);
+  const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+  int i, q;
+  int64_t tmp = clamp(abs_coeff, INT32_MIN, INT32_MAX);
+  for (i = 0; i < NUQ_KNOTES; i++) {
+    if (tmp < cumbins_ptr[i]) {
+      q = i;
+      break;
+    }
+  }
+  if (i == NUQ_KNOTES) {
+    q = NUQ_KNOTES +
+        (((tmp - cumbins_ptr[NUQ_KNOTES - 1]) * quant) >> 16);
+  }
+  if (q) {
+    *dqcoeff_ptr =
+        vp9_dequant_abscoeff_nuq(q, dequant, dequant_val);
+    *qcoeff_ptr  = (q ^ coeff_sign) - coeff_sign;
+    *dqcoeff_ptr = *qcoeff_ptr < 0 ? -*dqcoeff_ptr : *dqcoeff_ptr;
+  } else {
+    *qcoeff_ptr = 0;
+    *dqcoeff_ptr = 0;
+  }
+  return (q != 0);
+}
+
+static INLINE int highbd_quantize_coeff_bigtx_fp_nuq(
+    const tran_low_t coeffv,
+    const int16_t quant,
+    const int16_t dequant,
+    const tran_low_t *cumbins_ptr,
+    const tran_low_t *dequant_val,
+    tran_low_t *qcoeff_ptr,
+    tran_low_t *dqcoeff_ptr,
+    int logsizeby32) {
+  const int coeff = coeffv;
+  const int coeff_sign = (coeff >> 31);
+  const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+  int i, q;
+  int64_t tmp = clamp(abs_coeff, INT32_MIN, INT32_MAX);
+  for (i = 0; i < NUQ_KNOTES; i++) {
+    if (tmp < ROUND_POWER_OF_TWO(cumbins_ptr[i], 1 + logsizeby32)) {
+      q = i;
+      break;
+    }
+  }
+  if (i == NUQ_KNOTES) {
+    q = NUQ_KNOTES +
+        (((tmp - ROUND_POWER_OF_TWO(cumbins_ptr[NUQ_KNOTES - 1],
+                                    1 + logsizeby32)) * quant) >>
+         (15 - logsizeby32));
+  }
+  if (q) {
+    *dqcoeff_ptr =
+        ROUND_POWER_OF_TWO(vp9_dequant_abscoeff_nuq(q, dequant, dequant_val),
+                           1 + logsizeby32);
+    *qcoeff_ptr  = (q ^ coeff_sign) - coeff_sign;
+    *dqcoeff_ptr = *qcoeff_ptr < 0 ? -*dqcoeff_ptr : *dqcoeff_ptr;
+  } else {
+    *qcoeff_ptr = 0;
+    *dqcoeff_ptr = 0;
+  }
+  return (q != 0);
+}
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+void vp9_quantize_dc_nuq(const tran_low_t *coeff_ptr,
+                         int skip_block,
+                         const int16_t quant,
+                         const int16_t quant_shift,
+                         const int16_t dequant,
+                         const tran_low_t *cumbins_ptr,
+                         const tran_low_t *dequant_val,
+                         tran_low_t *qcoeff_ptr,
+                         tran_low_t *dqcoeff_ptr,
+                         uint16_t *eob_ptr) {
+  int eob = -1;
+  if (!skip_block) {
+    const int rc = 0;
+    if (quantize_coeff_nuq(coeff_ptr[rc],
+                           quant,
+                           quant_shift,
+                           dequant,
+                           cumbins_ptr,
+                           dequant_val,
+                           qcoeff_ptr,
+                           dqcoeff_ptr))
+      eob = 0;
+  }
+  *eob_ptr = eob + 1;
+}
+
+void vp9_quantize_dc_fp_nuq(const tran_low_t *coeff_ptr,
+                            int skip_block,
+                            const int16_t quant,
+                            const int16_t dequant,
+                            const tran_low_t *cumbins_ptr,
+                            const tran_low_t *dequant_val,
+                            tran_low_t *qcoeff_ptr,
+                            tran_low_t *dqcoeff_ptr,
+                            uint16_t *eob_ptr) {
+  int eob = -1;
+  if (!skip_block) {
+    const int rc = 0;
+    if (quantize_coeff_fp_nuq(coeff_ptr[rc],
+                              quant,
+                              dequant,
+                              cumbins_ptr,
+                              dequant_val,
+                              qcoeff_ptr,
+                              dqcoeff_ptr))
+      eob = 0;
+  }
+  *eob_ptr = eob + 1;
+}
+#endif  // CONFIG_NEW_QUANT
+
 #if CONFIG_VP9_HIGHBITDEPTH
 void vp9_highbd_quantize_dc(const tran_low_t *coeff_ptr, int skip_block,
                             const int16_t *round_ptr, const int16_t quant,
@@ -63,7 +414,59 @@ void vp9_highbd_quantize_dc(const tran_low_t *coeff_ptr, int skip_block,
   }
   *eob_ptr = eob + 1;
 }
-#endif
+
+#if CONFIG_NEW_QUANT
+void vp9_highbd_quantize_dc_nuq(const tran_low_t *coeff_ptr,
+                                int skip_block,
+                                const int16_t quant,
+                                const int16_t quant_shift,
+                                const int16_t dequant,
+                                const tran_low_t *cumbins_ptr,
+                                const tran_low_t *dequant_val,
+                                tran_low_t *qcoeff_ptr,
+                                tran_low_t *dqcoeff_ptr,
+                                uint16_t *eob_ptr) {
+  int eob = -1;
+  if (!skip_block) {
+    const int rc = 0;
+    if (highbd_quantize_coeff_nuq(coeff_ptr[rc],
+                                  quant,
+                                  quant_shift,
+                                  dequant,
+                                  cumbins_ptr,
+                                  dequant_val,
+                                  qcoeff_ptr,
+                                  dqcoeff_ptr))
+      eob = 0;
+  }
+  *eob_ptr = eob + 1;
+}
+
+void vp9_highbd_quantize_dc_fp_nuq(const tran_low_t *coeff_ptr,
+                                   int skip_block,
+                                   const int16_t quant,
+                                   const int16_t dequant,
+                                   const tran_low_t *cumbins_ptr,
+                                   const tran_low_t *dequant_val,
+                                   tran_low_t *qcoeff_ptr,
+                                   tran_low_t *dqcoeff_ptr,
+                                   uint16_t *eob_ptr) {
+  int eob = -1;
+  if (!skip_block) {
+    const int rc = 0;
+    if (highbd_quantize_coeff_fp_nuq(coeff_ptr[rc],
+                                     quant,
+                                     dequant,
+                                     cumbins_ptr,
+                                     dequant_val,
+                                     qcoeff_ptr,
+                                     dqcoeff_ptr))
+      eob = 0;
+  }
+  *eob_ptr = eob + 1;
+}
+#endif  // CONFIG_NEW_QUANT
+#endif  // CONFIG_VP9_HIGHBITDEPTH
 
 static INLINE void quantize_dc_bigtx(const tran_low_t *coeff_ptr,
                                      int skip_block,
@@ -101,6 +504,60 @@ void vp9_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block,
                     qcoeff_ptr, dqcoeff_ptr, dequant_ptr, eob_ptr, 0);
 }
 
+#if CONFIG_NEW_QUANT
+void vp9_quantize_dc_32x32_nuq(const tran_low_t *coeff_ptr,
+                               int skip_block,
+                               const int16_t quant,
+                               const int16_t quant_shift,
+                               const int16_t dequant,
+                               const tran_low_t *cumbins_ptr,
+                               const tran_low_t *dequant_val,
+                               tran_low_t *qcoeff_ptr,
+                               tran_low_t *dqcoeff_ptr,
+                               uint16_t *eob_ptr) {
+  int eob = -1;
+  if (!skip_block) {
+    const int rc = 0;
+    if (quantize_coeff_bigtx_nuq(coeff_ptr[rc],
+                                 quant,
+                                 quant_shift,
+                                 dequant,
+                                 cumbins_ptr,
+                                 dequant_val,
+                                 qcoeff_ptr,
+                                 dqcoeff_ptr,
+                                 0))
+      eob = 0;
+  }
+  *eob_ptr = eob + 1;
+}
+
+void vp9_quantize_dc_32x32_fp_nuq(const tran_low_t *coeff_ptr,
+                                  int skip_block,
+                                  const int16_t quant,
+                                  const int16_t dequant,
+                                  const tran_low_t *cumbins_ptr,
+                                  const tran_low_t *dequant_val,
+                                  tran_low_t *qcoeff_ptr,
+                                  tran_low_t *dqcoeff_ptr,
+                                  uint16_t *eob_ptr) {
+  int eob = -1;
+  if (!skip_block) {
+    const int rc = 0;
+    if (quantize_coeff_bigtx_fp_nuq(coeff_ptr[rc],
+                                    quant,
+                                    dequant,
+                                    cumbins_ptr,
+                                    dequant_val,
+                                    qcoeff_ptr,
+                                    dqcoeff_ptr,
+                                    0))
+      eob = 0;
+  }
+  *eob_ptr = eob + 1;
+}
+#endif  // CONFIG_NEW_QUANT
+
 #if CONFIG_TX64X64
 void vp9_quantize_dc_64x64(const tran_low_t *coeff_ptr, int skip_block,
                            const int16_t *round_ptr, const int16_t quant,
@@ -109,6 +566,60 @@ void vp9_quantize_dc_64x64(const tran_low_t *coeff_ptr, int skip_block,
   quantize_dc_bigtx(coeff_ptr, skip_block, round_ptr, quant,
                     qcoeff_ptr, dqcoeff_ptr, dequant_ptr, eob_ptr, 1);
 }
+
+#if CONFIG_NEW_QUANT
+void vp9_quantize_dc_64x64_nuq(const tran_low_t *coeff_ptr,
+                               int skip_block,
+                               const int16_t quant,
+                               const int16_t quant_shift,
+                               const int16_t dequant,
+                               const tran_low_t *cumbins_ptr,
+                               const tran_low_t *dequant_val,
+                               tran_low_t *qcoeff_ptr,
+                               tran_low_t *dqcoeff_ptr,
+                               uint16_t *eob_ptr) {
+  int eob = -1;
+  if (!skip_block) {
+    const int rc = 0;
+    if (quantize_coeff_bigtx_nuq(coeff_ptr[rc],
+                                 quant,
+                                 quant_shift,
+                                 dequant,
+                                 cumbins_ptr,
+                                 dequant_val,
+                                 qcoeff_ptr,
+                                 dqcoeff_ptr,
+                                 1))
+      eob = 0;
+  }
+  *eob_ptr = eob + 1;
+}
+
+void vp9_quantize_dc_64x64_fp_nuq(const tran_low_t *coeff_ptr,
+                                  int skip_block,
+                                  const int16_t quant,
+                                  const int16_t dequant,
+                                  const tran_low_t *cumbins_ptr,
+                                  const tran_low_t *dequant_val,
+                                  tran_low_t *qcoeff_ptr,
+                                  tran_low_t *dqcoeff_ptr,
+                                  uint16_t *eob_ptr) {
+  int eob = -1;
+  if (!skip_block) {
+    const int rc = 0;
+    if (quantize_coeff_bigtx_fp_nuq(coeff_ptr[rc],
+                                    quant,
+                                    dequant,
+                                    cumbins_ptr,
+                                    dequant_val,
+                                    qcoeff_ptr,
+                                    dqcoeff_ptr,
+                                    1))
+      eob = 0;
+  }
+  *eob_ptr = eob + 1;
+}
+#endif  // CONFIG_NEW_QUANT
 #endif  // CONFIG_TX64X64
 
 #if CONFIG_VP9_HIGHBITDEPTH
@@ -128,11 +639,11 @@ static INLINE void highbd_quantize_dc_bigtx(const tran_low_t *coeff_ptr,
     const int coeff_sign = (coeff >> 31);
     const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
 
-    int64_t tmp =
-        clamp(abs_coeff +
-              ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1 + logsizeby32),
-              INT32_MIN, INT32_MAX);
-    tmp = (tmp * quant) >> (15 - logsizeby32);
+    const int64_t tmp =
+        (clamp(abs_coeff +
+               ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1 + logsizeby32),
+               INT32_MIN, INT32_MAX) *
+         quant) >> (15 - logsizeby32);
     qcoeff_ptr[rc]  = (tmp ^ coeff_sign) - coeff_sign;
     dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr / (2 << logsizeby32);
     if (tmp)
@@ -153,6 +664,60 @@ void vp9_highbd_quantize_dc_32x32(const tran_low_t *coeff_ptr,
                            qcoeff_ptr, dqcoeff_ptr, dequant_ptr, eob_ptr, 0);
 }
 
+#if CONFIG_NEW_QUANT
+void vp9_highbd_quantize_dc_32x32_nuq(const tran_low_t *coeff_ptr,
+                                      int skip_block,
+                                      const int16_t quant,
+                                      const int16_t quant_shift,
+                                      const int16_t dequant,
+                                      const tran_low_t *cumbins_ptr,
+                                      const tran_low_t *dequant_val,
+                                      tran_low_t *qcoeff_ptr,
+                                      tran_low_t *dqcoeff_ptr,
+                                      uint16_t *eob_ptr) {
+  int eob = -1;
+  if (!skip_block) {
+    const int rc = 0;
+    if (highbd_quantize_coeff_bigtx_nuq(coeff_ptr[rc],
+                                        quant,
+                                        quant_shift,
+                                        dequant,
+                                        cumbins_ptr,
+                                        dequant_val,
+                                        qcoeff_ptr,
+                                        dqcoeff_ptr,
+                                        0))
+      eob = 0;
+  }
+  *eob_ptr = eob + 1;
+}
+
+void vp9_highbd_quantize_dc_32x32_fp_nuq(const tran_low_t *coeff_ptr,
+                                         int skip_block,
+                                         const int16_t quant,
+                                         const int16_t dequant,
+                                         const tran_low_t *cumbins_ptr,
+                                         const tran_low_t *dequant_val,
+                                         tran_low_t *qcoeff_ptr,
+                                         tran_low_t *dqcoeff_ptr,
+                                         uint16_t *eob_ptr) {
+  int eob = -1;
+  if (!skip_block) {
+    const int rc = 0;
+    if (highbd_quantize_coeff_bigtx_fp_nuq(coeff_ptr[rc],
+                                           quant,
+                                           dequant,
+                                           cumbins_ptr,
+                                           dequant_val,
+                                           qcoeff_ptr,
+                                           dqcoeff_ptr,
+                                           0))
+      eob = 0;
+  }
+  *eob_ptr = eob + 1;
+}
+#endif  // CONFIG_NEW_QUANT
+
 #if CONFIG_TX64X64
 void vp9_highbd_quantize_dc_64x64(const tran_low_t *coeff_ptr,
                                   int skip_block,
@@ -165,6 +730,60 @@ void vp9_highbd_quantize_dc_64x64(const tran_low_t *coeff_ptr,
   highbd_quantize_dc_bigtx(coeff_ptr, skip_block, round_ptr, quant,
                            qcoeff_ptr, dqcoeff_ptr, dequant_ptr, eob_ptr, 1);
 }
+
+#if CONFIG_NEW_QUANT
+void vp9_highbd_quantize_dc_64x64_nuq(const tran_low_t *coeff_ptr,
+                                      int skip_block,
+                                      const int16_t quant,
+                                      const int16_t quant_shift,
+                                      const int16_t dequant,
+                                      const tran_low_t *cumbins_ptr,
+                                      const tran_low_t *dequant_val,
+                                      tran_low_t *qcoeff_ptr,
+                                      tran_low_t *dqcoeff_ptr,
+                                      uint16_t *eob_ptr) {
+  int eob = -1;
+  if (!skip_block) {
+    const int rc = 0;
+    if (highbd_quantize_coeff_bigtx_nuq(coeff_ptr[rc],
+                                        quant,
+                                        quant_shift,
+                                        dequant,
+                                        cumbins_ptr,
+                                        dequant_val,
+                                        qcoeff_ptr,
+                                        dqcoeff_ptr,
+                                        1))
+      eob = 0;
+  }
+  *eob_ptr = eob + 1;
+}
+
+void vp9_highbd_quantize_dc_64x64_fp_nuq(const tran_low_t *coeff_ptr,
+                                         int skip_block,
+                                         const int16_t quant,
+                                         const int16_t dequant,
+                                         const tran_low_t *cumbins_ptr,
+                                         const tran_low_t *dequant_val,
+                                         tran_low_t *qcoeff_ptr,
+                                         tran_low_t *dqcoeff_ptr,
+                                         uint16_t *eob_ptr) {
+  int eob = -1;
+  if (!skip_block) {
+    const int rc = 0;
+    if (highbd_quantize_coeff_bigtx_fp_nuq(coeff_ptr[rc],
+                                           quant,
+                                           dequant,
+                                           cumbins_ptr,
+                                           dequant_val,
+                                           qcoeff_ptr,
+                                           dqcoeff_ptr,
+                                           1))
+      eob = 0;
+  }
+  *eob_ptr = eob + 1;
+}
+#endif  // CONFIG_NEW_QUANT
 #endif  // CONFIG_TX64X64
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 
@@ -208,6 +827,74 @@ void vp9_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
   *eob_ptr = eob + 1;
 }
 
+#if CONFIG_NEW_QUANT
+void vp9_quantize_nuq_c(const tran_low_t *coeff_ptr,
+                        intptr_t n_coeffs,
+                        int skip_block,
+                        const int16_t *quant_ptr,
+                        const int16_t *quant_shift_ptr,
+                        const int16_t *dequant_ptr,
+                        const cumbins_type_nuq *cumbins_ptr,
+                        const dequant_val_type_nuq *dequant_val,
+                        tran_low_t *qcoeff_ptr,
+                        tran_low_t *dqcoeff_ptr,
+                        uint16_t *eob_ptr,
+                        const int16_t *scan,
+                        const uint8_t *band) {
+  int eob = -1;
+  vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+  vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+  if (!skip_block) {
+    int i;
+    for (i = 0; i < n_coeffs; i++) {
+      const int rc = scan[i];
+      if (quantize_coeff_nuq(coeff_ptr[rc],
+                             quant_ptr[rc != 0],
+                             quant_shift_ptr[rc != 0],
+                             dequant_ptr[rc != 0],
+                             cumbins_ptr[band[i]],
+                             dequant_val[band[i]],
+                             &qcoeff_ptr[rc],
+                             &dqcoeff_ptr[rc]))
+        eob = i;
+    }
+  }
+  *eob_ptr = eob + 1;
+}
+
+void vp9_quantize_fp_nuq_c(const tran_low_t *coeff_ptr,
+                           intptr_t n_coeffs,
+                           int skip_block,
+                           const int16_t *quant_ptr,
+                           const int16_t *dequant_ptr,
+                           const cumbins_type_nuq *cumbins_ptr,
+                           const dequant_val_type_nuq *dequant_val,
+                           tran_low_t *qcoeff_ptr,
+                           tran_low_t *dqcoeff_ptr,
+                           uint16_t *eob_ptr,
+                           const int16_t *scan,
+                           const uint8_t *band) {
+  int eob = -1;
+  vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+  vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+  if (!skip_block) {
+    int i;
+    for (i = 0; i < n_coeffs; i++) {
+      const int rc = scan[i];
+      if (quantize_coeff_fp_nuq(coeff_ptr[rc],
+                                quant_ptr[rc != 0],
+                                dequant_ptr[rc != 0],
+                                cumbins_ptr[band[i]],
+                                dequant_val[band[i]],
+                                &qcoeff_ptr[rc],
+                                &dqcoeff_ptr[rc]))
+        eob = i;
+    }
+  }
+  *eob_ptr = eob + 1;
+}
+#endif  // CONFIG_NEW_QUANT
+
 #if CONFIG_VP9_HIGHBITDEPTH
 void vp9_highbd_quantize_fp_c(const tran_low_t *coeff_ptr,
                               intptr_t count,
@@ -255,7 +942,77 @@ void vp9_highbd_quantize_fp_c(const tran_low_t *coeff_ptr,
   }
   *eob_ptr = eob + 1;
 }
-#endif
+
+#if CONFIG_NEW_QUANT
+void vp9_highbd_quantize_nuq_c(const tran_low_t *coeff_ptr,
+                               intptr_t n_coeffs,
+                               int skip_block,
+                               const int16_t *quant_ptr,
+                               const int16_t *quant_shift_ptr,
+                               const int16_t *dequant_ptr,
+                               const cumbins_type_nuq *cumbins_ptr,
+                               const dequant_val_type_nuq *dequant_val,
+                               tran_low_t *qcoeff_ptr,
+                               tran_low_t *dqcoeff_ptr,
+                               uint16_t *eob_ptr,
+                               const int16_t *scan,
+                               const uint8_t *band) {
+  int eob = -1;
+  (void)iscan;
+  vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+  vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+  if (!skip_block) {
+    int i;
+    for (i = 0; i < n_coeffs; i++) {
+      const int rc = scan[i];
+      if (highbd_quantize_coeff_nuq(coeff_ptr[rc],
+                                    quant_ptr[rc != 0],
+                                    quant_shift_ptr[rc != 0],
+                                    dequant_ptr[rc != 0],
+                                    cumbins_ptr[band[i]],
+                                    dequant_val[band[i]],
+                                    &qcoeff_ptr[rc],
+                                    &dqcoeff_ptr[rc]))
+        eob = i;
+    }
+  }
+  *eob_ptr = eob + 1;
+}
+
+void vp9_highbd_quantize_fp_nuq_c(const tran_low_t *coeff_ptr,
+                                  intptr_t n_coeffs,
+                                  int skip_block,
+                                  const int16_t *quant_ptr,
+                                  const int16_t *dequant_ptr,
+                                  const cumbins_type_nuq *cumbins_ptr,
+                                  const dequant_val_type_nuq *dequant_val,
+                                  tran_low_t *qcoeff_ptr,
+                                  tran_low_t *dqcoeff_ptr,
+                                  uint16_t *eob_ptr,
+                                  const int16_t *scan,
+                                  const uint8_t *band) {
+  int eob = -1;
+  (void)iscan;
+  vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+  vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+  if (!skip_block) {
+    int i;
+    for (i = 0; i < n_coeffs; i++) {
+      const int rc = scan[i];
+      if (highbd_quantize_coeff_fp_nuq(coeff_ptr[rc],
+                                       quant_ptr[rc != 0],
+                                       dequant_ptr[rc != 0],
+                                       cumbins_ptr[band[i]],
+                                       dequant_val[band[i]],
+                                       &qcoeff_ptr[rc],
+                                       &dqcoeff_ptr[rc]))
+        eob = i;
+    }
+  }
+  *eob_ptr = eob + 1;
+}
+#endif  // CONFIG_NEW_QUANT
+#endif  // CONFIG_VP9_HIGHBITDEPTH
 
 // TODO(jingning) Refactor this file and combine functions with similar
 // operations.
@@ -324,6 +1081,76 @@ void vp9_quantize_fp_32x32_c(const tran_low_t *coeff_ptr,
                     eob_ptr, scan, iscan, 0);
 }
 
+#if CONFIG_NEW_QUANT
+void vp9_quantize_32x32_nuq_c(const tran_low_t *coeff_ptr,
+                              intptr_t n_coeffs,
+                              int skip_block,
+                              const int16_t *quant_ptr,
+                              const int16_t *quant_shift_ptr,
+                              const int16_t *dequant_ptr,
+                              const cumbins_type_nuq *cumbins_ptr,
+                              const dequant_val_type_nuq *dequant_val,
+                              tran_low_t *qcoeff_ptr,
+                              tran_low_t *dqcoeff_ptr,
+                              uint16_t *eob_ptr,
+                              const int16_t *scan,
+                              const uint8_t *band) {
+  int eob = -1;
+  vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+  vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+  if (!skip_block) {
+    int i;
+    for (i = 0; i < n_coeffs; i++) {
+      const int rc = scan[i];
+      if (quantize_coeff_bigtx_nuq(coeff_ptr[rc],
+                                   quant_ptr[rc != 0],
+                                   quant_shift_ptr[rc != 0],
+                                   dequant_ptr[rc != 0],
+                                   cumbins_ptr[band[i]],
+                                   dequant_val[band[i]],
+                                   &qcoeff_ptr[rc],
+                                   &dqcoeff_ptr[rc],
+                                   0))
+        eob = i;
+    }
+  }
+  *eob_ptr = eob + 1;
+}
+
+void vp9_quantize_32x32_fp_nuq_c(const tran_low_t *coeff_ptr,
+                                 intptr_t n_coeffs,
+                                 int skip_block,
+                                 const int16_t *quant_ptr,
+                                 const int16_t *dequant_ptr,
+                                 const cumbins_type_nuq *cumbins_ptr,
+                                 const dequant_val_type_nuq *dequant_val,
+                                 tran_low_t *qcoeff_ptr,
+                                 tran_low_t *dqcoeff_ptr,
+                                 uint16_t *eob_ptr,
+                                 const int16_t *scan,
+                                 const uint8_t *band) {
+  int eob = -1;
+  vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+  vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+  if (!skip_block) {
+    int i;
+    for (i = 0; i < n_coeffs; i++) {
+      const int rc = scan[i];
+      if (quantize_coeff_bigtx_fp_nuq(coeff_ptr[rc],
+                                      quant_ptr[rc != 0],
+                                      dequant_ptr[rc != 0],
+                                      cumbins_ptr[band[i]],
+                                      dequant_val[band[i]],
+                                      &qcoeff_ptr[rc],
+                                      &dqcoeff_ptr[rc],
+                                      0))
+        eob = i;
+    }
+  }
+  *eob_ptr = eob + 1;
+}
+#endif  // CONFIG_NEW_QUANT
+
 #if CONFIG_TX64X64
 void vp9_quantize_fp_64x64_c(const tran_low_t *coeff_ptr,
                              intptr_t n_coeffs,
@@ -343,6 +1170,76 @@ void vp9_quantize_fp_64x64_c(const tran_low_t *coeff_ptr,
                     qcoeff_ptr, dqcoeff_ptr, dequant_ptr,
                     eob_ptr, scan, iscan, 1);
 }
+
+#if CONFIG_NEW_QUANT
+void vp9_quantize_64x64_nuq_c(const tran_low_t *coeff_ptr,
+                              intptr_t n_coeffs,
+                              int skip_block,
+                              const int16_t *quant_ptr,
+                              const int16_t *quant_shift_ptr,
+                              const int16_t *dequant_ptr,
+                              const cumbins_type_nuq *cumbins_ptr,
+                              const dequant_val_type_nuq *dequant_val,
+                              tran_low_t *qcoeff_ptr,
+                              tran_low_t *dqcoeff_ptr,
+                              uint16_t *eob_ptr,
+                              const int16_t *scan,
+                              const uint8_t *band) {
+  int eob = -1;
+  vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+  vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+  if (!skip_block) {
+    int i;
+    for (i = 0; i < n_coeffs; i++) {
+      const int rc = scan[i];
+      if (quantize_coeff_bigtx_nuq(coeff_ptr[rc],
+                                   quant_ptr[rc != 0],
+                                   quant_shift_ptr[rc != 0],
+                                   dequant_ptr[rc != 0],
+                                   cumbins_ptr[band[i]],
+                                   dequant_val[band[i]],
+                                   &qcoeff_ptr[rc],
+                                   &dqcoeff_ptr[rc],
+                                   1))
+        eob = i;
+    }
+  }
+  *eob_ptr = eob + 1;
+}
+
+void vp9_quantize_64x64_fp_nuq_c(const tran_low_t *coeff_ptr,
+                                 intptr_t n_coeffs,
+                                 int skip_block,
+                                 const int16_t *quant_ptr,
+                                 const int16_t *dequant_ptr,
+                                 const cumbins_type_nuq *cumbins_ptr,
+                                 const dequant_val_type_nuq *dequant_val,
+                                 tran_low_t *qcoeff_ptr,
+                                 tran_low_t *dqcoeff_ptr,
+                                 uint16_t *eob_ptr,
+                                 const int16_t *scan,
+                                 const uint8_t *band) {
+  int eob = -1;
+  vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+  vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+  if (!skip_block) {
+    int i;
+    for (i = 0; i < n_coeffs; i++) {
+      const int rc = scan[i];
+      if (quantize_coeff_bigtx_fp_nuq(coeff_ptr[rc],
+                                      quant_ptr[rc != 0],
+                                      dequant_ptr[rc != 0],
+                                      cumbins_ptr[band[i]],
+                                      dequant_val[band[i]],
+                                      &qcoeff_ptr[rc],
+                                      &dqcoeff_ptr[rc],
+                                      1))
+        eob = i;
+    }
+  }
+  *eob_ptr = eob + 1;
+}
+#endif  // CONFIG_NEW_QUANT
 #endif  // CONFIG_TX64X64
 
 #if CONFIG_VP9_HIGHBITDEPTH
@@ -385,7 +1282,6 @@ static INLINE void highbd_quantize_fp_bigtx(const tran_low_t *coeff_ptr,
         dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] /
                           (2 << logsizeby32);
       }
-
       if (tmp)
         eob = i;
     }
@@ -412,6 +1308,76 @@ void vp9_highbd_quantize_fp_32x32_c(const tran_low_t *coeff_ptr,
                            eob_ptr, scan, iscan, 0);
 }
 
+#if CONFIG_NEW_QUANT
+void vp9_highbd_quantize_32x32_nuq_c(const tran_low_t *coeff_ptr,
+                                     intptr_t n_coeffs,
+                                     int skip_block,
+                                     const int16_t *quant_ptr,
+                                     const int16_t *quant_shift_ptr,
+                                     const int16_t *dequant_ptr,
+                                     const cumbins_type_nuq *cumbins_ptr,
+                                     const dequant_val_type_nuq *dequant_val,
+                                     tran_low_t *qcoeff_ptr,
+                                     tran_low_t *dqcoeff_ptr,
+                                     uint16_t *eob_ptr,
+                                     const int16_t *scan,
+                                     const uint8_t *band) {
+  int eob = -1;
+  vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+  vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+  if (!skip_block) {
+    int i;
+    for (i = 0; i < n_coeffs; i++) {
+      const int rc = scan[i];
+      if (highbd_quantize_coeff_bigtx_nuq(coeff_ptr[rc],
+                                          quant_ptr[rc != 0],
+                                          quant_shift_ptr[rc != 0],
+                                          dequant_ptr[rc != 0],
+                                          cumbins_ptr[band[i]],
+                                          dequant_val[band[i]],
+                                          &qcoeff_ptr[rc],
+                                          &dqcoeff_ptr[rc],
+                                          0))
+        eob = i;
+    }
+  }
+  *eob_ptr = eob + 1;
+}
+
+void vp9_highbd_quantize_32x32_fp_nuq_c(const tran_low_t *coeff_ptr,
+                                        intptr_t n_coeffs,
+                                        int skip_block,
+                                        const int16_t *quant_ptr,
+                                        const int16_t *dequant_ptr,
+                                        const cumbins_type_nuq *cumbins_ptr,
+                                        const dequant_val_type_nuq *dequant_val,
+                                        tran_low_t *qcoeff_ptr,
+                                        tran_low_t *dqcoeff_ptr,
+                                        uint16_t *eob_ptr,
+                                        const int16_t *scan,
+                                        const uint8_t *band) {
+  int eob = -1;
+  vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+  vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+  if (!skip_block) {
+    int i;
+    for (i = 0; i < n_coeffs; i++) {
+      const int rc = scan[i];
+      if (highbd_quantize_coeff_bigtx_fp_nuq(coeff_ptr[rc],
+                                             quant_ptr[rc != 0],
+                                             dequant_ptr[rc != 0],
+                                             cumbins_ptr[band[i]],
+                                             dequant_val[band[i]],
+                                             &qcoeff_ptr[rc],
+                                             &dqcoeff_ptr[rc],
+                                             0))
+        eob = i;
+    }
+  }
+  *eob_ptr = eob + 1;
+}
+#endif  // CONFIG_NEW_QUANT
+
 #if CONFIG_TX64X64
 void vp9_highbd_quantize_fp_64x64_c(const tran_low_t *coeff_ptr,
                                     intptr_t n_coeffs,
@@ -431,6 +1397,76 @@ void vp9_highbd_quantize_fp_64x64_c(const tran_low_t *coeff_ptr,
                            qcoeff_ptr, dqcoeff_ptr, dequant_ptr,
                            eob_ptr, scan, iscan, 1);
 }
+
+#if CONFIG_NEW_QUANT
+void vp9_highbd_quantize_64x64_nuq_c(const tran_low_t *coeff_ptr,
+                                     intptr_t n_coeffs,
+                                     int skip_block,
+                                     const int16_t *quant_ptr,
+                                     const int16_t *quant_shift_ptr,
+                                     const int16_t *dequant_ptr,
+                                     const cumbins_type_nuq *cumbins_ptr,
+                                     const dequant_val_type_nuq *dequant_val,
+                                     tran_low_t *qcoeff_ptr,
+                                     tran_low_t *dqcoeff_ptr,
+                                     uint16_t *eob_ptr,
+                                     const int16_t *scan,
+                                     const uint8_t *band) {
+  int eob = -1;
+  vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+  vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+  if (!skip_block) {
+    int i;
+    for (i = 0; i < n_coeffs; i++) {
+      const int rc = scan[i];
+      if (highbd_quantize_coeff_bigtx_nuq(coeff_ptr[rc],
+                                          quant_ptr[rc != 0],
+                                          quant_shift_ptr[rc != 0],
+                                          dequant_ptr[rc != 0],
+                                          cumbins_ptr[band[i]],
+                                          dequant_val[band[i]],
+                                          &qcoeff_ptr[rc],
+                                          &dqcoeff_ptr[rc],
+                                          1))
+        eob = i;
+    }
+  }
+  *eob_ptr = eob + 1;
+}
+
+void vp9_highbd_quantize_64x64_fp_nuq_c(const tran_low_t *coeff_ptr,
+                                        intptr_t n_coeffs,
+                                        int skip_block,
+                                        const int16_t *quant_ptr,
+                                        const int16_t *dequant_ptr,
+                                        const cumbins_type_nuq *cumbins_ptr,
+                                        const dequant_val_type_nuq *dequant_val,
+                                        tran_low_t *qcoeff_ptr,
+                                        tran_low_t *dqcoeff_ptr,
+                                        uint16_t *eob_ptr,
+                                        const int16_t *scan,
+                                        const uint8_t *band) {
+  int eob = -1;
+  vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+  vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+  if (!skip_block) {
+    int i;
+    for (i = 0; i < n_coeffs; i++) {
+      const int rc = scan[i];
+      if (highbd_quantize_coeff_bigtx_fp_nuq(coeff_ptr[rc],
+                                             quant_ptr[rc != 0],
+                                             dequant_ptr[rc != 0],
+                                             cumbins_ptr[band[i]],
+                                             dequant_val[band[i]],
+                                             &qcoeff_ptr[rc],
+                                             &dqcoeff_ptr[rc],
+                                             1))
+        eob = i;
+    }
+  }
+  *eob_ptr = eob + 1;
+}
+#endif  // CONFIG_NEW_QUANT
 #endif  // CONFIG_TX64X64
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 
@@ -530,6 +1566,56 @@ void vp9_quantize_rect(const tran_low_t *coeff_ptr, int row, int col,
     }
 }
 
+#if CONFIG_NEW_QUANT
+void vp9_quantize_rect_nuq(const tran_low_t *coeff_ptr,
+                           int row,
+                           int col,
+                           int stride,
+                           const int16_t *quant_ptr,
+                           const int16_t *quant_shift_ptr,
+                           const int16_t *dequant_ptr,
+                           const cumbins_type_nuq *cumbins_ptr,
+                           const dequant_val_type_nuq *dequant_val,
+                           tran_low_t *qcoeff_ptr,
+                           tran_low_t *dqcoeff_ptr,
+                           uint16_t *eob_ptr,
+                           int logsizeby32,
+                           const int16_t *scan,
+                           const uint8_t *band) {
+  const int n_coeffs = row * col;
+  int i, res, eob = -1;
+  for (i = 0; i < n_coeffs; ++i) {
+    const int rc = scan[i];
+    const int r = rc / col;
+    const int c = rc % col;
+    const int rcs = r * stride + c;
+    qcoeff_ptr[rcs] = dqcoeff_ptr[rcs] = 0;
+    if (logsizeby32 >= 0)
+      res = quantize_coeff_bigtx_nuq(coeff_ptr[rcs],
+                                     quant_ptr[rc != 0],
+                                     quant_shift_ptr[rc != 0],
+                                     dequant_ptr[rc != 0],
+                                     cumbins_ptr[band[i]],
+                                     dequant_val[band[i]],
+                                     &qcoeff_ptr[rcs],
+                                     &dqcoeff_ptr[rcs],
+                                     logsizeby32);
+    else
+      res = quantize_coeff_nuq(coeff_ptr[rcs],
+                               quant_ptr[rc != 0],
+                               quant_shift_ptr[rc != 0],
+                               dequant_ptr[rc != 0],
+                               cumbins_ptr[band[i]],
+                               dequant_val[band[i]],
+                               &qcoeff_ptr[rcs],
+                               &dqcoeff_ptr[rcs]);
+    if (res)
+      eob = i;
+  }
+  *eob_ptr = eob + 1;
+}
+#endif  // CONFIG_NEW_QUANT
+
 int get_eob(tran_low_t *qcoeff_ptr, intptr_t n_coeffs, const int16_t *scan) {
   int i, rc, eob = -1;
 
@@ -873,18 +1959,18 @@ void vp9_init_quantizer(VP9_COMP *cpi) {
     const int qrounding_factor = q == 0 ? 64 : 48;
 
     for (i = 0; i < 2; ++i) {
-      int qrounding_factor_fp = i == 0 ? 48 : 42;
-      if (q == 0)
-        qrounding_factor_fp = 64;
+      const int qrounding_factor_fp = q == 0 ? 64 : (i == 0 ? 48 : 42);
 
       // y
       quant = i == 0 ? vp9_dc_quant(q, cm->y_dc_delta_q, cm->bit_depth)
                      : vp9_ac_quant(q, 0, cm->bit_depth);
       invert_quant(&quants->y_quant[q][i], &quants->y_quant_shift[q][i], quant);
       quants->y_quant_fp[q][i] = (1 << 16) / quant;
-      quants->y_round_fp[q][i] = (qrounding_factor_fp * quant) >> 7;
+      quants->y_round_fp[q][i] =
+          vp9_round_factor_to_round(quant, qrounding_factor_fp);
       quants->y_zbin[q][i] = ROUND_POWER_OF_TWO(qzbin_factor * quant, 7);
-      quants->y_round[q][i] = (qrounding_factor * quant) >> 7;
+      quants->y_round[q][i] =
+          vp9_round_factor_to_round(quant, qrounding_factor);
       cm->y_dequant[q][i] = quant;
 
       // uv
@@ -893,12 +1979,27 @@ void vp9_init_quantizer(VP9_COMP *cpi) {
       invert_quant(&quants->uv_quant[q][i],
                    &quants->uv_quant_shift[q][i], quant);
       quants->uv_quant_fp[q][i] = (1 << 16) / quant;
-      quants->uv_round_fp[q][i] = (qrounding_factor_fp * quant) >> 7;
+      quants->uv_round_fp[q][i] =
+          vp9_round_factor_to_round(quant, qrounding_factor_fp);
       quants->uv_zbin[q][i] = ROUND_POWER_OF_TWO(qzbin_factor * quant, 7);
-      quants->uv_round[q][i] = (qrounding_factor * quant) >> 7;
+      quants->uv_round[q][i] =
+          vp9_round_factor_to_round(quant, qrounding_factor);
       cm->uv_dequant[q][i] = quant;
     }
 
+#if CONFIG_NEW_QUANT
+    for (i = 0; i < COEF_BANDS; i++) {
+      const int quant = cm->y_dequant[q][i != 0];
+      const int uvquant = cm->uv_dequant[q][i != 0];
+      vp9_get_dequant_val_nuq(quant, i, cm->bit_depth,
+                              cm->y_dequant_val_nuq[q][i],
+                              quants->y_cumbins_nuq[q][i]);
+      vp9_get_dequant_val_nuq(uvquant, i, cm->bit_depth,
+                              cm->uv_dequant_val_nuq[q][i],
+                              quants->uv_cumbins_nuq[q][i]);
+    }
+#endif  // CONFIG_NEW_QUANT
+
     for (i = 2; i < 8; i++) {
       quants->y_quant[q][i] = quants->y_quant[q][1];
       quants->y_quant_fp[q][i] = quants->y_quant_fp[q][1];
@@ -936,6 +2037,10 @@ void vp9_init_plane_quantizers(VP9_COMP *cpi, MACROBLOCK *x) {
   x->plane[0].zbin = quants->y_zbin[qindex];
   x->plane[0].round = quants->y_round[qindex];
   xd->plane[0].dequant = cm->y_dequant[qindex];
+#if CONFIG_NEW_QUANT
+  x->plane[0].cumbins_nuq = quants->y_cumbins_nuq[qindex];
+  xd->plane[0].dequant_val_nuq = cm->y_dequant_val_nuq[qindex];
+#endif
 
   x->plane[0].quant_thred[0] = x->plane[0].zbin[0] * x->plane[0].zbin[0];
   x->plane[0].quant_thred[1] = x->plane[0].zbin[1] * x->plane[0].zbin[1];
@@ -949,6 +2054,10 @@ void vp9_init_plane_quantizers(VP9_COMP *cpi, MACROBLOCK *x) {
     x->plane[i].zbin = quants->uv_zbin[qindex];
     x->plane[i].round = quants->uv_round[qindex];
     xd->plane[i].dequant = cm->uv_dequant[qindex];
+#if CONFIG_NEW_QUANT
+    x->plane[i].cumbins_nuq = quants->uv_cumbins_nuq[qindex];
+    xd->plane[i].dequant_val_nuq = cm->uv_dequant_val_nuq[qindex];
+#endif
 
     x->plane[i].quant_thred[0] = x->plane[i].zbin[0] * x->plane[i].zbin[0];
     x->plane[i].quant_thred[1] = x->plane[i].zbin[1] * x->plane[i].zbin[1];
index fad9c7d6e997d45648b1da36e859b3a9681095c8..7ac28b8fd41dec63f87efcc332a4597556a86b56 100644 (file)
@@ -19,6 +19,13 @@ extern "C" {
 #endif
 
 typedef struct {
+#if CONFIG_NEW_QUANT
+  DECLARE_ALIGNED(16, tran_low_t,
+                  y_cumbins_nuq[QINDEX_RANGE][COEF_BANDS][NUQ_KNOTES]);
+  DECLARE_ALIGNED(16, tran_low_t,
+                  uv_cumbins_nuq[QINDEX_RANGE][COEF_BANDS][NUQ_KNOTES]);
+#endif  // CONFIG_NEW_QUANT
+
   DECLARE_ALIGNED(16, int16_t, y_quant[QINDEX_RANGE][8]);
   DECLARE_ALIGNED(16, int16_t, y_quant_shift[QINDEX_RANGE][8]);
   DECLARE_ALIGNED(16, int16_t, y_zbin[QINDEX_RANGE][8]);
@@ -45,12 +52,75 @@ void vp9_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block,
                            const int16_t *round_ptr, const int16_t quant_ptr,
                            tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
                            const int16_t dequant_ptr, uint16_t *eob_ptr);
+#if CONFIG_NEW_QUANT
+void vp9_quantize_dc_nuq(const tran_low_t *coeff_ptr,
+                         int skip_block,
+                         const int16_t quant,
+                         const int16_t quant_shift,
+                         const int16_t dequant,
+                         const tran_low_t *cumbins_ptr,
+                         const tran_low_t *dequant_val,
+                         tran_low_t *qcoeff_ptr,
+                         tran_low_t *dqcoeff_ptr,
+                         uint16_t *eob_ptr);
+void vp9_quantize_dc_32x32_nuq(const tran_low_t *coeff_ptr,
+                               int skip_block,
+                               const int16_t quant,
+                               const int16_t quant_shift,
+                               const int16_t dequant,
+                               const tran_low_t *cumbins_ptr,
+                               const tran_low_t *dequant_val,
+                               tran_low_t *qcoeff_ptr,
+                               tran_low_t *dqcoeff_ptr,
+                               uint16_t *eob_ptr);
+void vp9_quantize_dc_fp_nuq(const tran_low_t *coeff_ptr,
+                            int skip_block,
+                            const int16_t quant,
+                            const int16_t dequant,
+                            const tran_low_t *cumbins_ptr,
+                            const tran_low_t *dequant_val,
+                            tran_low_t *qcoeff_ptr,
+                            tran_low_t *dqcoeff_ptr,
+                            uint16_t *eob_ptr);
+void vp9_quantize_dc_32x32_fp_nuq(const tran_low_t *coeff_ptr,
+                                  int skip_block,
+                                  const int16_t quant,
+                                  const int16_t dequant,
+                                  const tran_low_t *cumbins_ptr,
+                                  const tran_low_t *dequant_val,
+                                  tran_low_t *qcoeff_ptr,
+                                  tran_low_t *dqcoeff_ptr,
+                                  uint16_t *eob_ptr);
+#endif  // CONFIG_NEW_QUANT
+
 #if CONFIG_TX64X64
 void vp9_quantize_dc_64x64(const tran_low_t *coeff_ptr, int skip_block,
                            const int16_t *round_ptr, const int16_t quant_ptr,
                            tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
                            const int16_t dequant_ptr, uint16_t *eob_ptr);
+#if CONFIG_NEW_QUANT
+void vp9_quantize_dc_64x64_nuq(const tran_low_t *coeff_ptr,
+                               int skip_block,
+                               const int16_t quant,
+                               const int16_t quant_shift,
+                               const int16_t dequant,
+                               const tran_low_t *cumbins_ptr,
+                               const tran_low_t *dequant_val,
+                               tran_low_t *qcoeff_ptr,
+                               tran_low_t *dqcoeff_ptr,
+                               uint16_t *eob_ptr);
+void vp9_quantize_dc_64x64_fp_nuq(const tran_low_t *coeff_ptr,
+                                  int skip_block,
+                                  const int16_t quant,
+                                  const int16_t dequant,
+                                  const tran_low_t *cumbins_ptr,
+                                  const tran_low_t *dequant_val,
+                                  tran_low_t *qcoeff_ptr,
+                                  tran_low_t *dqcoeff_ptr,
+                                  uint16_t *eob_ptr);
+#endif  // CONFIG_NEW_QUANT
 #endif  // CONFIG_TX64X64
+
 void vp9_regular_quantize_b_4x4(MACROBLOCK *x, int plane, int block,
                                 const int16_t *scan, const int16_t *iscan);
 
@@ -67,6 +137,46 @@ void vp9_highbd_quantize_dc_32x32(const tran_low_t *coeff_ptr,
                                   tran_low_t *dqcoeff_ptr,
                                   const int16_t dequant_ptr,
                                   uint16_t *eob_ptr);
+#if CONFIG_NEW_QUANT
+void vp9_highbd_quantize_dc_nuq(const tran_low_t *coeff_ptr,
+                                int skip_block,
+                                const int16_t quant,
+                                const int16_t quant_shift,
+                                const int16_t dequant,
+                                const tran_low_t *cumbins_ptr,
+                                const tran_low_t *dequant_val,
+                                tran_low_t *qcoeff_ptr,
+                                tran_low_t *dqcoeff_ptr,
+                                uint16_t *eob_ptr);
+void vp9_highbd_quantize_dc_32x32_nuq(const tran_low_t *coeff_ptr,
+                                      int skip_block,
+                                      const int16_t quant,
+                                      const int16_t quant_shift,
+                                      const int16_t dequant,
+                                      const tran_low_t *cumbins_ptr,
+                                      const tran_low_t *dequant_val,
+                                      tran_low_t *qcoeff_ptr,
+                                      tran_low_t *dqcoeff_ptr,
+                                      uint16_t *eob_ptr);
+void vp9_highbd_quantize_dc_fp_nuq(const tran_low_t *coeff_ptr,
+                                   int skip_block,
+                                   const int16_t quant,
+                                   const int16_t dequant,
+                                   const tran_low_t *cumbins_ptr,
+                                   const tran_low_t *dequant_val,
+                                   tran_low_t *qcoeff_ptr,
+                                   tran_low_t *dqcoeff_ptr,
+                                   uint16_t *eob_ptr);
+void vp9_highbd_quantize_dc_32x32_fp_nuq(const tran_low_t *coeff_ptr,
+                                         int skip_block,
+                                         const int16_t quant,
+                                         const int16_t dequant,
+                                         const tran_low_t *cumbins_ptr,
+                                         const tran_low_t *dequant_val,
+                                         tran_low_t *qcoeff_ptr,
+                                         tran_low_t *dqcoeff_ptr,
+                                         uint16_t *eob_ptr);
+#endif  // CONFIG_NEW_QUANT
 #if CONFIG_TX64X64
 void vp9_highbd_quantize_dc_64x64(const tran_low_t *coeff_ptr,
                                   int skip_block,
@@ -76,6 +186,27 @@ void vp9_highbd_quantize_dc_64x64(const tran_low_t *coeff_ptr,
                                   tran_low_t *dqcoeff_ptr,
                                   const int16_t dequant_ptr,
                                   uint16_t *eob_ptr);
+#if CONFIG_NEW_QUANT
+void vp9_highbd_quantize_dc_64x64_nuq(const tran_low_t *coeff_ptr,
+                                      int skip_block,
+                                      const int16_t quant,
+                                      const int16_t quant_shift,
+                                      const int16_t dequant,
+                                      const tran_low_t *cumbins_ptr,
+                                      const tran_low_t *dequant_val,
+                                      tran_low_t *qcoeff_ptr,
+                                      tran_low_t *dqcoeff_ptr,
+                                      uint16_t *eob_ptr);
+void vp9_highbd_quantize_dc_64x64_fp_nuq(const tran_low_t *coeff_ptr,
+                                         int skip_block,
+                                         const int16_t quant,
+                                         const int16_t dequant,
+                                         const tran_low_t *cumbins_ptr,
+                                         const tran_low_t *dequant_val,
+                                         tran_low_t *qcoeff_ptr,
+                                         tran_low_t *dqcoeff_ptr,
+                                         uint16_t *eob_ptr);
+#endif  // CONFIG_NEW_QUANT
 #endif  // CONFIG_TX64X64
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 
@@ -101,7 +232,23 @@ void vp9_quantize_rect(const tran_low_t *coeff_ptr, int row, int col,
                        tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
                        const int16_t *dequant_ptr,
                        int logsizeby32, int stride, int has_dc);
-
+#if CONFIG_NEW_QUANT
+void vp9_quantize_rect_nuq(const tran_low_t *coeff_ptr,
+                           int row,
+                           int col,
+                           int stride,
+                           const int16_t *quant_ptr,
+                           const int16_t *quant_shift_ptr,
+                           const int16_t *dequant_ptr,
+                           const cumbins_type_nuq *cumbins_ptr,
+                           const dequant_val_type_nuq *dequant_val,
+                           tran_low_t *qcoeff_ptr,
+                           tran_low_t *dqcoeff_ptr,
+                           uint16_t *eob_ptr,
+                           int logsizeby32,
+                           const int16_t *scan,
+                           const uint8_t *band);
+#endif  // CONFIG_NEW_QUANT
 int get_eob(tran_low_t *qcoeff_ptr, intptr_t n_coeffs, const int16_t *scan);
 #endif
 
index 9dbfad5d6332c4acd1bc0696117dbfc5a0daa5c3..1d446993e9d96d2f5440f2b99f10613026f3a71c 100644 (file)
@@ -559,7 +559,17 @@ static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
   } else if (max_txsize_lookup[plane_bsize] == tx_size) {
     if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == 0) {
       // full forward transform and quantization
-      vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
+#if CONFIG_NEW_QUANT
+      if (x->quant_fp)
+        vp9_xform_quant_fp_nuq(x, plane, block, plane_bsize, tx_size);
+      else
+        vp9_xform_quant_nuq(x, plane, block, plane_bsize, tx_size);
+#else
+      if (x->quant_fp)
+        vp9_xform_quant_fp(x, plane, block, plane_bsize, tx_size);
+      else
+        vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
+#endif
 #if CONFIG_VP9_HIGHBITDEPTH
       if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
         dist_block(plane, block, tx_size, args, xd->bd);
@@ -573,7 +583,14 @@ static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
       // compute DC coefficient
       tran_low_t *const coeff   = BLOCK_OFFSET(x->plane[plane].coeff, block);
       tran_low_t *const dqcoeff = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block);
+#if CONFIG_NEW_QUANT
+      if (x->quant_fp)
+        vp9_xform_quant_dc_fp_nuq(x, plane, block, plane_bsize, tx_size);
+      else
+        vp9_xform_quant_dc_nuq(x, plane, block, plane_bsize, tx_size);
+#else
       vp9_xform_quant_dc(x, plane, block, plane_bsize, tx_size);
+#endif
       args->sse  = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4;
       args->dist = args->sse;
       if (x->plane[plane].eobs[block]) {
@@ -598,7 +615,17 @@ static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
     }
   } else {
     // full forward transform and quantization
-    vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
+#if CONFIG_NEW_QUANT
+    if (x->quant_fp)
+      vp9_xform_quant_fp_nuq(x, plane, block, plane_bsize, tx_size);
+    else
+      vp9_xform_quant_nuq(x, plane, block, plane_bsize, tx_size);
+#else
+    if (x->quant_fp)
+      vp9_xform_quant_fp(x, plane, block, plane_bsize, tx_size);
+    else
+      vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
+#endif  // CONFIG_NEW_QUANT
 #if CONFIG_VP9_HIGHBITDEPTH
     if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
       dist_block(plane, block, tx_size, args, xd->bd);