From b69152db79b5f9a3d0f94d615dc2c6fc35f33f1f Mon Sep 17 00:00:00 2001
From: hui su <huisu@google.com>
Date: Fri, 10 Apr 2015 11:25:09 -0700
Subject: [PATCH] Add high bit depth support for tx-skip expt

+0.3% on 10-bit
+0.3% on 12-bit

With other high bit compatible experiments on 12-bit
+12.44% (+0.17) over 8-bit baseline

Change-Id: I40b4c382fa54ba4640d08d9d01950ea8c1200bc9
---
 vp9/common/vp9_idct.c         |  22 +-
 vp9/common/vp9_idct.h         |   9 +-
 vp9/decoder/vp9_decodeframe.c | 304 +++++++++++---------
 vp9/encoder/vp9_encodemb.c    | 509 +++++++++++++++++++++++++++++-----
 vp9/encoder/vp9_quantize.c    |   7 +-
 vp9/encoder/vp9_quantize.h    |   2 +-
 6 files changed, 651 insertions(+), 202 deletions(-)

diff --git a/vp9/common/vp9_idct.c b/vp9/common/vp9_idct.c
index 0ca3d60a6..ed86ff19e 100644
--- a/vp9/common/vp9_idct.c
+++ b/vp9/common/vp9_idct.c
@@ -1608,7 +1608,27 @@ void vp9_tx_identity_add(const tran_low_t *input, uint8_t *dest,
                          int stride, int bs, int shift) {
   vp9_tx_identity_add_rect(input, dest, bs, bs, bs, stride, shift);
 }
-#endif
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void vp9_highbd_tx_identity_add_rect(const tran_low_t *input, uint8_t *dest8,
+                                     int row, int col, int stride_in,
+                                     int stride_out, int shift, int bd) {
+  int r, c;
+  uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+  for (r = 0; r < row; r++)
+    for (c = 0; c < col; c++) {
+      dest[r * stride_out + c] =
+          highbd_clip_pixel_add(dest[r * stride_out + c],
+                                input[r * stride_in + c] >> shift, bd);
+    }
+}
+
+void vp9_highbd_tx_identity_add(const tran_low_t *input, uint8_t *dest8,
+                                int stride, int bs, int shift, int bd) {
+  vp9_highbd_tx_identity_add_rect(input, dest8, bs, bs, bs, stride, shift, bd);
+}
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+#endif  // CONFIG_TX_SKIP
 
 #if CONFIG_TX64X64
 #define DownshiftMultiplyBy2(x) x * 2
diff --git a/vp9/common/vp9_idct.h b/vp9/common/vp9_idct.h
index fb468aed0..eda2e56c9 100644
--- a/vp9/common/vp9_idct.h
+++ b/vp9/common/vp9_idct.h
@@ -175,4 +175,11 @@ void vp9_tx_identity_add_rect(const tran_low_t *input, uint8_t *dest,
                               int stride_out, int shift);
 void vp9_tx_identity_add(const tran_low_t *input, uint8_t *dest,
                          int stride, int bs, int shift);
-#endif
+#if CONFIG_VP9_HIGHBITDEPTH
+void vp9_highbd_tx_identity_add_rect(const tran_low_t *input, uint8_t *dest,
+                                     int row, int col, int stride_in,
+                                     int stride_out, int shift, int bd);
+void vp9_highbd_tx_identity_add(const tran_low_t *input, uint8_t *dest,
+                                int stride, int bs, int shift, int bd);
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+#endif  // CONFIG_TX_SKIP
diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c
index f2140b529..f5f8e7707 100644
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -289,6 +289,81 @@ static void vp9_intra_dpcm_add_nocoeff(uint8_t *dst, int stride,
       break;
   }
 }
+#if CONFIG_VP9_HIGHBITDEPTH
+static void vp9_highbd_intra_dpcm_add(tran_low_t *dqcoeff, uint8_t *dst8,
+                                      int stride, PREDICTION_MODE mode,
+                                      int bs, int shift, int bd) {
+  int r, c, temp;
+  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
+
+  switch (mode) {
+    case H_PRED:
+      for (r = 0; r < bs; r++)
+        dst[r * stride] = clip_pixel_highbd(dst[r * stride] +
+                                            (dqcoeff[r * bs] >> shift), bd);
+      for (r = 0; r < bs; r++)
+        for (c = 1; c < bs; c++)
+          dst[r * stride + c] =
+              clip_pixel_highbd(dst[r * stride + c - 1] +
+                                (dqcoeff[r * bs + c] >> shift), bd);
+      break;
+    case V_PRED:
+      for (c = 0; c < bs; c++)
+        dst[c] = clip_pixel_highbd(dst[c] + (dqcoeff[c] >> shift), bd);
+      for (r = 1; r < bs; r++)
+        for (c = 0; c < bs; c++)
+          dst[r * stride + c] =
+              clip_pixel_highbd(dst[(r - 1) * stride + c] +
+                                (dqcoeff[r * bs + c] >> shift), bd);
+      break;
+    case TM_PRED:
+      for (c = 0; c < bs; c++)
+        dst[c] = clip_pixel_highbd(dst[c] + (dqcoeff[c] >> shift), bd);
+      for (r = 1; r < bs; r++)
+        dst[r * stride] = clip_pixel_highbd(dst[r * stride] +
+                                            (dqcoeff[r * bs] >> shift), bd);
+      for (r = 1; r < bs; r++)
+        for (c = 1; c < bs; c++) {
+          temp = dst[stride * r + c - 1] + dst[stride * (r - 1) + c] -
+              dst[stride * (r - 1) + c - 1];
+          temp = clip_pixel_highbd(temp, bd);
+          dst[stride * r + c] =
+              clip_pixel_highbd(temp + (dqcoeff[r * bs + c] >> shift), bd);
+        }
+      break;
+    default:
+      break;
+  }
+}
+
+static void vp9_highbd_intra_dpcm_add_nocoeff(uint8_t *dst8, int stride,
+                                       PREDICTION_MODE mode, int bs, int bd) {
+  int r, c, temp;
+  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
+
+  switch (mode) {
+    case H_PRED:
+      for (r = 0; r < bs; r++)
+        for (c = 1; c < bs; c++)
+          dst[r * stride + c] = dst[r * stride];
+      break;
+    case V_PRED:
+      for (r = 1; r < bs; r++)
+        vpx_memcpy(dst + r * stride, dst, bs * sizeof(dst[0]));
+      break;
+    case TM_PRED:
+      for (r = 1; r < bs; r++)
+        for (c = 1; c < bs; c++) {
+          temp = dst[stride * r + c - 1] + dst[stride * (r - 1) + c] -
+              dst[stride * (r - 1) + c - 1];
+          dst[stride * r + c] = clip_pixel_highbd(temp, bd);
+        }
+      break;
+    default:
+      break;
+  }
+}
+#endif  // CONFIG_VP9_HIGHBITDEPTH
 #endif  // CONFIG_TX_SKIP
 
 static void inverse_transform_block(MACROBLOCKD* xd, int plane, int block,
@@ -316,153 +391,114 @@ static void inverse_transform_block(MACROBLOCKD* xd, int plane, int block,
         vp9_highbd_iwht4x4_add(dqcoeff, dst, stride, eob, xd->bd);
       } else {
         const PLANE_TYPE plane_type = pd->plane_type;
-        switch (tx_size) {
-          case TX_4X4:
-            tx_type = get_tx_type_4x4(plane_type, xd, block);
-#if CONFIG_TX_SKIP
-            if (mbmi->tx_skip[plane != 0]) {
-              vp9_tx_identity_add(dqcoeff, dst, stride, 4, shift);
-            } else {
-              vp9_highbd_iht4x4_add(tx_type, dqcoeff, dst, stride, eob, xd->bd);
-            }
-#else
-            vp9_highbd_iht4x4_add(tx_type, dqcoeff, dst, stride, eob, xd->bd);
-#endif
-            break;
-          case TX_8X8:
-            tx_type = get_tx_type(plane_type, xd);
-#if CONFIG_TX_SKIP
-            if (mbmi->tx_skip[plane != 0]) {
-              vp9_tx_identity_add(dqcoeff, dst, stride, 8, shift);
-            } else {
-              vp9_highbd_iht8x8_add(tx_type, dqcoeff, dst, stride, eob, xd->bd);
-            }
-#else
-            vp9_highbd_iht8x8_add(tx_type, dqcoeff, dst, stride, eob, xd->bd);
-#endif
-            break;
-          case TX_16X16:
-            tx_type = get_tx_type(plane_type, xd);
 #if CONFIG_TX_SKIP
-            if (mbmi->tx_skip[plane != 0]) {
-              vp9_tx_identity_add(dqcoeff, dst, stride, 16, shift);
-            } else {
+        if (mbmi->tx_skip[plane != 0]) {
+          int bs = 4 << tx_size;
+          if (tx_size <= TX_32X32 &&
+              (mode == V_PRED || mode == H_PRED || mode == TM_PRED))
+            vp9_highbd_intra_dpcm_add(dqcoeff, dst, stride, mode, bs, shift,
+                                      xd->bd);
+          else
+            vp9_highbd_tx_identity_add(dqcoeff, dst, stride, bs, shift,
+                                       xd->bd);
+          tx_type = DCT_DCT;
+          if (tx_size == TX_4X4)
+            tx_type = get_tx_type_4x4(pd->plane_type, xd, block);
+          else if (tx_size <= TX_16X16)
+            tx_type = get_tx_type(pd->plane_type, xd);
+        } else {
+#endif  // CONFIG_TX_SKIP
+          switch (tx_size) {
+            case TX_4X4:
+              tx_type = get_tx_type_4x4(plane_type, xd, block);
+              vp9_highbd_iht4x4_add(tx_type, dqcoeff, dst, stride, eob,
+                                    xd->bd);
+              break;
+            case TX_8X8:
+              tx_type = get_tx_type(plane_type, xd);
+              vp9_highbd_iht8x8_add(tx_type, dqcoeff, dst, stride, eob,
+                                    xd->bd);
+              break;
+            case TX_16X16:
+              tx_type = get_tx_type(plane_type, xd);
               vp9_highbd_iht16x16_add(tx_type, dqcoeff, dst, stride, eob,
                                       xd->bd);
-            }
-#else
-            vp9_highbd_iht16x16_add(tx_type, dqcoeff, dst, stride, eob, xd->bd);
-#endif
-            break;
-          case TX_32X32:
-            tx_type = DCT_DCT;
-#if CONFIG_TX_SKIP
-            if (mbmi->tx_skip[plane != 0]) {
-              vp9_tx_identity_add(dqcoeff, dst, stride, 32, shift);
-            } else {
+              break;
+            case TX_32X32:
+              tx_type = DCT_DCT;
               vp9_highbd_idct32x32_add(dqcoeff, dst, stride, eob, xd->bd);
-            }
-#else
-            vp9_highbd_idct32x32_add(dqcoeff, dst, stride, eob, xd->bd);
-#endif
-            break;
+              break;
 #if CONFIG_TX64X64
-          case TX_64X64:
-            tx_type = DCT_DCT;
-#if CONFIG_TX_SKIP
-            if (mbmi->tx_skip[plane != 0]) {
-              vp9_tx_identity_add(dqcoeff, dst, stride, 64, shift);
-            } else {
+            case TX_64X64:
+              tx_type = DCT_DCT;
               vp9_highbd_idct64x64_add(dqcoeff, dst, stride, eob, xd->bd);
-            }
-#else
-            vp9_highbd_idct64x64_add(dqcoeff, dst, stride, eob, xd->bd);
-#endif  // CONFIG_TX_SKIP
-            break;
+              break;
 #endif  // CONFIG_TX64X64
-          default:
-            assert(0 && "Invalid transform size");
+            default:
+              assert(0 && "Invalid transform size");
+          }
+#if CONFIG_TX_SKIP
         }
+#endif  // CONFIG_TX_SKIP
       }
     } else {
 #if CONFIG_TX_SKIP
       if (xd->lossless && !mbmi->tx_skip[plane != 0]) {
 #else
-      if (xd->lossless) {
-#endif
-        tx_type = DCT_DCT;
-        vp9_iwht4x4_add(dqcoeff, dst, stride, eob);
-      } else {
-        const PLANE_TYPE plane_type = pd->plane_type;
-        switch (tx_size) {
-          case TX_4X4:
-            tx_type = get_tx_type_4x4(plane_type, xd, block);
-#if CONFIG_TX_SKIP
-            if (mbmi->tx_skip[plane != 0]) {
-              vp9_tx_identity_add(dqcoeff, dst, stride, 4, shift);
-            } else {
-              vp9_iht4x4_add(tx_type, dqcoeff, dst, stride, eob);
-            }
-#else
-            vp9_iht4x4_add(tx_type, dqcoeff, dst, stride, eob);
+        if (xd->lossless) {
 #endif
-            break;
-          case TX_8X8:
-            tx_type = get_tx_type(plane_type, xd);
-#if CONFIG_TX_SKIP
-            if (mbmi->tx_skip[plane != 0]) {
-              vp9_tx_identity_add(dqcoeff, dst, stride, 8, shift);
-            } else {
-              vp9_iht8x8_add(tx_type, dqcoeff, dst, stride, eob);
-            }
-#else
-            vp9_iht8x8_add(tx_type, dqcoeff, dst, stride, eob);
-#endif
-            break;
-          case TX_16X16:
-            tx_type = get_tx_type(plane_type, xd);
+          tx_type = DCT_DCT;
+          vp9_iwht4x4_add(dqcoeff, dst, stride, eob);
+        } else {
+          const PLANE_TYPE plane_type = pd->plane_type;
 #if CONFIG_TX_SKIP
-            if (mbmi->tx_skip[plane != 0]) {
-              vp9_tx_identity_add(dqcoeff, dst, stride, 16, shift);
-            } else {
-              vp9_iht16x16_add(tx_type, dqcoeff, dst, stride, eob);
-            }
-#else
-            vp9_iht16x16_add(tx_type, dqcoeff, dst, stride, eob);
-#endif
-            break;
-          case TX_32X32:
+          if (mbmi->tx_skip[plane != 0]) {
+            int bs = 4 << tx_size;
+            if (tx_size <= TX_32X32 &&
+                (mode == H_PRED || mode == V_PRED || mode == TM_PRED))
+              vp9_intra_dpcm_add(dqcoeff, dst, stride, mode, bs, shift);
+            else
+              vp9_tx_identity_add(dqcoeff, dst, stride, bs, shift);
             tx_type = DCT_DCT;
-#if CONFIG_TX_SKIP
-            if (mbmi->tx_skip[plane != 0]) {
-              vp9_tx_identity_add(dqcoeff, dst, stride, 32, shift);
-            } else {
-              vp9_idct32x32_add(dqcoeff, dst, stride, eob);
-            }
-#else
-            vp9_idct32x32_add(dqcoeff, dst, stride, eob);
-#endif
-            break;
+            if (tx_size == TX_4X4)
+              tx_type = get_tx_type_4x4(pd->plane_type, xd, block);
+            else if (tx_size <= TX_16X16)
+              tx_type = get_tx_type(pd->plane_type, xd);
+          } else {
+#endif  // CONFIG_TX_SKIP
+
+            switch (tx_size) {
+              case TX_4X4:
+                tx_type = get_tx_type_4x4(plane_type, xd, block);
+                vp9_iht4x4_add(tx_type, dqcoeff, dst, stride, eob);
+                break;
+              case TX_8X8:
+                tx_type = get_tx_type(plane_type, xd);
+                vp9_iht8x8_add(tx_type, dqcoeff, dst, stride, eob);
+                break;
+              case TX_16X16:
+                tx_type = get_tx_type(plane_type, xd);
+                vp9_iht16x16_add(tx_type, dqcoeff, dst, stride, eob);
+                break;
+              case TX_32X32:
+                tx_type = DCT_DCT;
+                vp9_idct32x32_add(dqcoeff, dst, stride, eob);
+                break;
 #if CONFIG_TX64X64
-          case TX_64X64:
-            tx_type = DCT_DCT;
-#if CONFIG_TX_SKIP
-            if (mbmi->tx_skip[plane != 0]) {
-              vp9_tx_identity_add(dqcoeff, dst, stride, 64, shift);
-            } else {
-              vp9_idct64x64_add(dqcoeff, dst, stride, eob);
+              case TX_64X64:
+                tx_type = DCT_DCT;
+                vp9_idct64x64_add(dqcoeff, dst, stride, eob);
+                break;
+#endif  // CONFIG_TX64X64
+              default:
+                assert(0 && "Invalid transform size");
+                return;
             }
-#else
-              vp9_idct64x64_add(dqcoeff, dst, stride, eob);
+#if CONFIG_TX_SKIP
+          }
 #endif  // CONFIG_TX_SKIP
-            break;
-#endif  // CONFIG_TX64X64
-          default:
-            assert(0 && "Invalid transform size");
-            return;
         }
       }
-    }
 
 #else  // CONFIG_VP9_HIGHBITDEPTH
 
@@ -593,7 +629,14 @@ static void predict_and_reconstruct_intra_block(int plane, int block,
   if ((mi->mbmi.skip || no_coeff) && mi->mbmi.tx_skip[plane != 0] &&
       mode == TM_PRED && tx_size <= TX_32X32) {
     int bs = 4 * (1 << tx_size);
+#if CONFIG_VP9_HIGHBITDEPTH
+    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
+      vp9_highbd_intra_dpcm_add_nocoeff(dst, pd->dst.stride, mode, bs, xd->bd);
+    else
+      vp9_intra_dpcm_add_nocoeff(dst, pd->dst.stride, mode, bs);
+#else
     vp9_intra_dpcm_add_nocoeff(dst, pd->dst.stride, mode, bs);
+#endif  // CONFIG_VP9_HIGHBITDEPTH
   }
 #endif
 
@@ -601,9 +644,16 @@ static void predict_and_reconstruct_intra_block(int plane, int block,
   if ((mi->mbmi.skip || no_coeff) && mi->mbmi.tx_skip[plane != 0] &&
       (mode == H_PRED || mode == V_PRED) && fbit && tx_size <= TX_32X32) {
     int bs = 4 * (1 << tx_size);
+#if CONFIG_VP9_HIGHBITDEPTH
+    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
+      vp9_highbd_intra_dpcm_add_nocoeff(dst, pd->dst.stride, mode, bs, xd->bd);
+    else
+      vp9_intra_dpcm_add_nocoeff(dst, pd->dst.stride, mode, bs);
+#else
     vp9_intra_dpcm_add_nocoeff(dst, pd->dst.stride, mode, bs);
+#endif  // CONFIG_VP9_HIGHBITDEPTH
   }
-#endif
+#endif  // CONFIG_TX_SKIP && CONFIG_FILTERINTRA
 }
 
 struct inter_args {
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c
index 3d009efa9..55da4706c 100644
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -1460,24 +1460,51 @@ void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block,
 #if CONFIG_TX_SKIP
   if (mbmi->tx_skip[plane != 0]) {
     int bs = 4 << tx_size;
+#if CONFIG_VP9_HIGHBITDEPTH
+    int use_hbd = xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH;
+#endif  // CONFIG_VP9_HIGHBITDEPTH
     vp9_tx_identity(src_diff, coeff, diff_stride, bs, shift);
     if (tx_size <= TX_16X16) {
-      vp9_quantize_fp(coeff, bs * bs, x->skip_block, p->zbin, p->round,
-                      p->quant, p->quant_shift, qcoeff, dqcoeff,
-                      pd->dequant, eob,
-                      scan_order->scan, scan_order->iscan);
+#if CONFIG_VP9_HIGHBITDEPTH
+      if (use_hbd)
+        vp9_highbd_quantize_fp(coeff, bs * bs, x->skip_block, p->zbin, p->round,
+                               p->quant, p->quant_shift, qcoeff, dqcoeff,
+                               pd->dequant, eob,
+                               scan_order->scan, scan_order->iscan);
+      else
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+        vp9_quantize_fp(coeff, bs * bs, x->skip_block, p->zbin, p->round,
+                        p->quant, p->quant_shift, qcoeff, dqcoeff,
+                        pd->dequant, eob,
+                        scan_order->scan, scan_order->iscan);
     } else if (tx_size == TX_32X32) {
-      vp9_quantize_fp_32x32(coeff, bs * bs, x->skip_block, p->zbin, p->round,
-                            p->quant, p->quant_shift, qcoeff, dqcoeff,
-                            pd->dequant, eob, scan_order->scan,
-                            scan_order->iscan);
+#if CONFIG_VP9_HIGHBITDEPTH
+      if (use_hbd)
+        vp9_highbd_quantize_fp_32x32(coeff, bs * bs, x->skip_block, p->zbin,
+                                     p->round, p->quant, p->quant_shift,
+                                     qcoeff, dqcoeff, pd->dequant, eob,
+                                     scan_order->scan, scan_order->iscan);
+      else
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+        vp9_quantize_fp_32x32(coeff, bs * bs, x->skip_block, p->zbin, p->round,
+                              p->quant, p->quant_shift, qcoeff, dqcoeff,
+                              pd->dequant, eob, scan_order->scan,
+                              scan_order->iscan);
     }
 #if CONFIG_TX64X64
     else if (tx_size == TX_64X64) {
-      vp9_quantize_fp_64x64(coeff, bs * bs, x->skip_block, p->zbin, p->round,
-                            p->quant, p->quant_shift, qcoeff, dqcoeff,
-                            pd->dequant, eob, scan_order->scan,
-                            scan_order->iscan);
+#if CONFIG_VP9_HIGHBITDEPTH
+      if (use_hbd)
+        vp9_highbd_quantize_fp_64x64(coeff, bs * bs, x->skip_block, p->zbin,
+                                     p->round, p->quant, p->quant_shift,
+                                     qcoeff, dqcoeff, pd->dequant, eob,
+                                     scan_order->scan, scan_order->iscan);
+      else
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+        vp9_quantize_fp_64x64(coeff, bs * bs, x->skip_block, p->zbin, p->round,
+                              p->quant, p->quant_shift, qcoeff, dqcoeff,
+                              pd->dequant, eob, scan_order->scan,
+                              scan_order->iscan);
     }
 #endif  // CONFIG_TX64X64
 
@@ -1625,21 +1652,45 @@ void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block,
 #if CONFIG_TX_SKIP
   if (mbmi->tx_skip[plane != 0]) {
     int bs = 4 << tx_size;
+#if CONFIG_VP9_HIGHBITDEPTH
+    int use_hbd = xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH;
+#endif  // CONFIG_VP9_HIGHBITDEPTH
     vp9_tx_identity(src_diff, coeff, diff_stride, bs, shift);
     if (tx_size <= TX_16X16) {
-      vp9_quantize_dc(coeff, x->skip_block, p->round,
-                      p->quant_fp[0], qcoeff, dqcoeff,
-                      pd->dequant[0], eob);
+#if CONFIG_VP9_HIGHBITDEPTH
+      if (use_hbd)
+        vp9_highbd_quantize_dc(coeff, x->skip_block, p->round,
+                               p->quant_fp[0], qcoeff, dqcoeff,
+                               pd->dequant[0], eob);
+      else
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+        vp9_quantize_dc(coeff, x->skip_block, p->round,
+                        p->quant_fp[0], qcoeff, dqcoeff,
+                        pd->dequant[0], eob);
     } else if (tx_size == TX_32X32) {
-      vp9_quantize_dc_32x32(coeff, x->skip_block, p->round,
-                            p->quant_fp[0], qcoeff, dqcoeff,
-                            pd->dequant[0], eob);
+#if CONFIG_VP9_HIGHBITDEPTH
+      if (use_hbd)
+        vp9_highbd_quantize_dc_32x32(coeff, x->skip_block, p->round,
+                                     p->quant_fp[0], qcoeff, dqcoeff,
+                                     pd->dequant[0], eob);
+      else
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+        vp9_quantize_dc_32x32(coeff, x->skip_block, p->round,
+                              p->quant_fp[0], qcoeff, dqcoeff,
+                              pd->dequant[0], eob);
     }
 #if CONFIG_TX64X64
     else if (tx_size == TX_64X64) {
-      vp9_quantize_dc_64x64(coeff, x->skip_block, p->round,
-                            p->quant_fp[0], qcoeff, dqcoeff,
-                            pd->dequant[0], eob);
+#if CONFIG_VP9_HIGHBITDEPTH
+      if (use_hbd)
+        vp9_highbd_quantize_dc_64x64(coeff, x->skip_block, p->round,
+                                     p->quant_fp[0], qcoeff, dqcoeff,
+                                     pd->dequant[0], eob);
+      else
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+        vp9_quantize_dc_64x64(coeff, x->skip_block, p->round,
+                              p->quant_fp[0], qcoeff, dqcoeff,
+                              pd->dequant[0], eob);
     }
 #endif  // CONFIG_TX64X64
 
@@ -1775,24 +1826,51 @@ void vp9_xform_quant(MACROBLOCK *x, int plane, int block,
 #if CONFIG_TX_SKIP
   if (mbmi->tx_skip[plane != 0]) {
     int bs = 4 << tx_size;
+#if CONFIG_VP9_HIGHBITDEPTH
+    int use_hbd = xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH;
+#endif  // CONFIG_VP9_HIGHBITDEPTH
     vp9_tx_identity(src_diff, coeff, diff_stride, bs, shift);
     if (tx_size <= TX_16X16) {
-      vp9_quantize_b(coeff, bs * bs, x->skip_block, p->zbin, p->round,
-                     p->quant, p->quant_shift, qcoeff, dqcoeff,
-                     pd->dequant, eob,
-                     scan_order->scan, scan_order->iscan);
+#if CONFIG_VP9_HIGHBITDEPTH
+      if (use_hbd)
+        vp9_highbd_quantize_b(coeff, bs * bs, x->skip_block, p->zbin, p->round,
+                              p->quant, p->quant_shift, qcoeff, dqcoeff,
+                              pd->dequant, eob,
+                              scan_order->scan, scan_order->iscan);
+      else
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+        vp9_quantize_b(coeff, bs * bs, x->skip_block, p->zbin, p->round,
+                       p->quant, p->quant_shift, qcoeff, dqcoeff,
+                       pd->dequant, eob,
+                       scan_order->scan, scan_order->iscan);
     } else if (tx_size == TX_32X32) {
-      vp9_quantize_b_32x32(coeff, bs * bs, x->skip_block, p->zbin, p->round,
-                           p->quant, p->quant_shift, qcoeff, dqcoeff,
-                           pd->dequant, eob, scan_order->scan,
-                           scan_order->iscan);
+#if CONFIG_VP9_HIGHBITDEPTH
+      if (use_hbd)
+        vp9_highbd_quantize_b_32x32(coeff, bs * bs, x->skip_block, p->zbin,
+                                    p->round, p->quant, p->quant_shift,
+                                    qcoeff, dqcoeff, pd->dequant, eob,
+                                    scan_order->scan, scan_order->iscan);
+      else
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+        vp9_quantize_b_32x32(coeff, bs * bs, x->skip_block, p->zbin, p->round,
+                             p->quant, p->quant_shift, qcoeff, dqcoeff,
+                             pd->dequant, eob, scan_order->scan,
+                             scan_order->iscan);
     }
 #if CONFIG_TX64X64
     else if (tx_size == TX_64X64) {
-      vp9_quantize_b_64x64(coeff, bs * bs, x->skip_block, p->zbin, p->round,
-                           p->quant, p->quant_shift, qcoeff, dqcoeff,
-                           pd->dequant, eob, scan_order->scan,
-                           scan_order->iscan);
+#if CONFIG_VP9_HIGHBITDEPTH
+      if (use_hbd)
+        vp9_highbd_quantize_b_64x64(coeff, bs * bs, x->skip_block, p->zbin,
+                                    p->round, p->quant, p->quant_shift, qcoeff,
+                                    dqcoeff, pd->dequant, eob, scan_order->scan,
+                                    scan_order->iscan);
+      else
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+        vp9_quantize_b_64x64(coeff, bs * bs, x->skip_block, p->zbin, p->round,
+                             p->quant, p->quant_shift, qcoeff, dqcoeff,
+                             pd->dequant, eob, scan_order->scan,
+                             scan_order->iscan);
     }
 #endif  // CONFIG_TX64X64
 
@@ -2011,28 +2089,15 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize,
 
 #if CONFIG_TX_SKIP
   if (mbmi->tx_skip[plane != 0]) {
-    switch (tx_size) {
-#if CONFIG_TX64X64
-      case TX_64X64:
-        vp9_tx_identity_add(dqcoeff, dst, pd->dst.stride, 64, shift);
-        break;
-#endif  // CONFIG_TX64X64
-      case TX_32X32:
-        vp9_tx_identity_add(dqcoeff, dst, pd->dst.stride, 32, shift);
-        break;
-      case TX_16X16:
-        vp9_tx_identity_add(dqcoeff, dst, pd->dst.stride, 16, shift);
-        break;
-      case TX_8X8:
-        vp9_tx_identity_add(dqcoeff, dst, pd->dst.stride, 8, shift);
-        break;
-      case TX_4X4:
-        vp9_tx_identity_add(dqcoeff, dst, pd->dst.stride, 4, shift);
-        break;
-      default:
-        assert(0 && "Invalid transform size");
-        break;
-    }
+    int bs = 4 << tx_size;
+#if CONFIG_VP9_HIGHBITDEPTH
+    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
+      vp9_highbd_tx_identity_add(dqcoeff, dst, pd->dst.stride, bs, shift,
+                                 xd->bd);
+    else
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+      vp9_tx_identity_add(dqcoeff, dst, pd->dst.stride, bs, shift);
+
     return;
   }
 #endif  // CONFIG_TX_SKIP
@@ -2257,9 +2322,7 @@ static int vp9_dpcm_intra(uint8_t *src, int src_stride,
                           TX_SIZE tx_size, int shift, int logsizeby32) {
   int i, j, eob, temp;
   const int bs = 4 << tx_size;
-#if CONFIG_NEW_QUANT
-  // const uint8_t* band = get_band_translate(tx_size);
-#endif
+
   vpx_memset(qcoeff, 0, bs * bs * sizeof(*qcoeff));
   vpx_memset(dqcoeff, 0, bs * bs * sizeof(*dqcoeff));
 
@@ -2273,10 +2336,10 @@ static int vp9_dpcm_intra(uint8_t *src, int src_stride,
                              diff_stride, bs, shift);
         vp9_quantize_rect(coeff + i, bs, 1, p->zbin, p->round, p->quant,
                           p->quant_shift, qcoeff + i, dqcoeff + i,
-                          pd->dequant, logsizeby32, bs, i == 0);
+                          pd->dequant, logsizeby32, bs, i == 0, 0);
         vp9_tx_identity_add_rect(dqcoeff + i, dst + i, bs, 1,
                                  bs, dst_stride, shift);
-        if ( i < bs - 1 && 1)
+        if (i < bs - 1)
           for (j = 0 ; j < bs; j++)
             *(dst + j * dst_stride + i + 1) =
                 *(dst + j * dst_stride + i);
@@ -2293,7 +2356,7 @@ static int vp9_dpcm_intra(uint8_t *src, int src_stride,
                              diff_stride, bs, shift);
         vp9_quantize_rect(coeff + bs * i, 1, bs, p->zbin, p->round, p->quant,
                           p->quant_shift, qcoeff + bs * i, dqcoeff + bs * i,
-                          pd->dequant, logsizeby32, bs, i == 0);
+                          pd->dequant, logsizeby32, bs, i == 0, 0);
         vp9_tx_identity_add_rect(dqcoeff + bs * i, dst + dst_stride * i,
                                  1, bs, bs, dst_stride, shift);
         if (i < bs - 1)
@@ -2307,7 +2370,7 @@ static int vp9_dpcm_intra(uint8_t *src, int src_stride,
       vp9_tx_identity_rect(src_diff, coeff, 1, bs, diff_stride, bs, shift);
       vp9_quantize_rect(coeff, 1, bs, p->zbin, p->round, p->quant,
                         p->quant_shift, qcoeff, dqcoeff, pd->dequant,
-                        logsizeby32, bs, 1);
+                        logsizeby32, bs, 1, 0);
       vp9_tx_identity_add_rect(dqcoeff, dst, 1, bs, bs, dst_stride, shift);
 
       vp9_subtract_block_c(bs -1, 1, src_diff + diff_stride, diff_stride,
@@ -2317,7 +2380,7 @@ static int vp9_dpcm_intra(uint8_t *src, int src_stride,
                            diff_stride, bs, shift);
       vp9_quantize_rect(coeff + bs, bs - 1, 1, p->zbin, p->round, p->quant,
                         p->quant_shift, qcoeff + bs, dqcoeff + bs,
-                        pd->dequant, logsizeby32, bs, 0);
+                        pd->dequant, logsizeby32, bs, 0, 0);
       vp9_tx_identity_add_rect(dqcoeff + bs, dst + dst_stride, bs - 1, 1,
                                bs, dst_stride, shift);
 
@@ -2337,7 +2400,7 @@ static int vp9_dpcm_intra(uint8_t *src, int src_stride,
           vp9_quantize_rect(coeff + bs * i + j, 1, 1, p->zbin, p->round,
                             p->quant, p->quant_shift, qcoeff + bs * i + j,
                             dqcoeff + bs * i + j, pd->dequant,
-                            logsizeby32, bs, 0);
+                            logsizeby32, bs, 0, 0);
           vp9_tx_identity_add_rect(dqcoeff + bs * i + j,
                                    dst + dst_stride * i + j, 1, 1, bs,
                                    dst_stride, shift);
@@ -2351,6 +2414,113 @@ static int vp9_dpcm_intra(uint8_t *src, int src_stride,
   eob = get_eob(qcoeff, bs * bs, scan_order->scan);
   return eob;
 }
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static int vp9_highbd_dpcm_intra(uint8_t *src, int src_stride,
+                                 uint8_t *dst, int dst_stride,
+                                 int16_t *src_diff, int diff_stride,
+                                 tran_low_t *coeff, tran_low_t *qcoeff,
+                                 tran_low_t *dqcoeff,
+                                 struct macroblock_plane *p,
+                                 struct macroblockd_plane *pd,
+                                 const scan_order *scan_order,
+                                 PREDICTION_MODE mode, TX_SIZE tx_size,
+                                 int shift, int logsizeby32, int bd) {
+  int i, j, eob, temp;
+  const int bs = 4 << tx_size;
+  uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst);
+
+  vpx_memset(qcoeff, 0, bs * bs * sizeof(*qcoeff));
+  vpx_memset(dqcoeff, 0, bs * bs * sizeof(*dqcoeff));
+
+  switch (mode) {
+    case H_PRED:
+      for (i = 0 ; i < bs; i++) {
+        vp9_highbd_subtract_block_c(bs, 1, src_diff + i, diff_stride,
+                                    src + i, src_stride, dst + i,
+                                    dst_stride, bd);
+        vp9_tx_identity_rect(src_diff + i, coeff + i, bs, 1,
+                             diff_stride, bs, shift);
+        vp9_quantize_rect(coeff + i, bs, 1, p->zbin, p->round, p->quant,
+                          p->quant_shift, qcoeff + i, dqcoeff + i,
+                          pd->dequant, logsizeby32, bs, i == 0, 1);
+        vp9_highbd_tx_identity_add_rect(dqcoeff + i, dst + i, bs, 1,
+                                        bs, dst_stride, shift, bd);
+        if (i < bs - 1)
+          for (j = 0 ; j < bs; j++)
+            *(dst16 + j * dst_stride + i + 1) =
+                *(dst16 + j * dst_stride + i);
+      }
+      break;
+    case V_PRED:
+      for (i = 0 ; i < bs; i++) {
+        vp9_highbd_subtract_block_c(1, bs, src_diff + diff_stride * i,
+                                    diff_stride, src + src_stride * i,
+                                    src_stride,  dst + dst_stride * i,
+                                    dst_stride, bd);
+        vp9_tx_identity_rect(src_diff + diff_stride * i, coeff + bs * i, 1, bs,
+                             diff_stride, bs, shift);
+        vp9_quantize_rect(coeff + bs * i, 1, bs, p->zbin, p->round, p->quant,
+                          p->quant_shift, qcoeff + bs * i, dqcoeff + bs * i,
+                          pd->dequant, logsizeby32, bs, i == 0, 1);
+        vp9_highbd_tx_identity_add_rect(dqcoeff + bs * i, dst + dst_stride * i,
+                                        1, bs, bs, dst_stride, shift, bd);
+        if (i < bs - 1)
+          vpx_memcpy(dst16 + (i + 1) * dst_stride,
+                     dst16 + i * dst_stride, bs * sizeof(dst16[0]));
+      }
+      break;
+    case TM_PRED:
+      vp9_highbd_subtract_block_c(1, bs, src_diff, diff_stride, src, src_stride,
+                                  dst, dst_stride, bd);
+      vp9_tx_identity_rect(src_diff, coeff, 1, bs, diff_stride, bs, shift);
+      vp9_quantize_rect(coeff, 1, bs, p->zbin, p->round, p->quant,
+                        p->quant_shift, qcoeff, dqcoeff, pd->dequant,
+                        logsizeby32, bs, 1, 1);
+      vp9_highbd_tx_identity_add_rect(dqcoeff, dst, 1, bs, bs, dst_stride,
+                                      shift, bd);
+      vp9_highbd_subtract_block_c(bs -1, 1, src_diff + diff_stride, diff_stride,
+                                  src + src_stride, src_stride,
+                                  dst + dst_stride, dst_stride, bd);
+      vp9_tx_identity_rect(src_diff + diff_stride, coeff + bs, bs - 1, 1,
+                           diff_stride, bs, shift);
+      vp9_quantize_rect(coeff + bs, bs - 1, 1, p->zbin, p->round, p->quant,
+                        p->quant_shift, qcoeff + bs, dqcoeff + bs,
+                        pd->dequant, logsizeby32, bs, 0, 1);
+      vp9_highbd_tx_identity_add_rect(dqcoeff + bs, dst + dst_stride, bs - 1, 1,
+                                      bs, dst_stride, shift, bd);
+
+      for (i = 1 ; i < bs; i++) {
+        for (j = 1 ; j < bs; j++) {
+          temp = dst16[(i - 1) * dst_stride + j] +
+              dst16[i * dst_stride + j - 1] -
+              dst16[(i - 1) * dst_stride + j - 1];
+          dst16[i * dst_stride + j] = clip_pixel_highbd(temp, bd);
+          vp9_highbd_subtract_block_c(1, 1, src_diff + diff_stride * i + j,
+                                      diff_stride, src + src_stride * i + j,
+                                      src_stride, dst + dst_stride * i + j,
+                                      dst_stride, bd);
+          vp9_tx_identity_rect(src_diff + i * diff_stride + j,
+                               coeff + bs * i + j, 1, 1, diff_stride,
+                               bs, shift);
+          vp9_quantize_rect(coeff + bs * i + j, 1, 1, p->zbin, p->round,
+                            p->quant, p->quant_shift, qcoeff + bs * i + j,
+                            dqcoeff + bs * i + j, pd->dequant,
+                            logsizeby32, bs, 0, 1);
+          vp9_highbd_tx_identity_add_rect(dqcoeff + bs * i + j,
+                                          dst + dst_stride * i + j, 1, 1, bs,
+                                          dst_stride, shift, bd);
+        }
+      }
+      break;
+    default:
+      break;
+  }
+
+  eob = get_eob(qcoeff, bs * bs, scan_order->scan);
+  return eob;
+}
+#endif  // CONFIG_VP9_HIGHBITDEPTH
 #endif  // CONFIG_TX_SKIP
 
 static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
@@ -2397,6 +2567,9 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
   if (mbmi->tx_skip[plane != 0]) {
     int shift = mbmi->tx_skip_shift;
     int bs = 4 << tx_size;
+#if CONFIG_VP9_HIGHBITDEPTH
+    int use_hbd = xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH;
+#endif  // CONFIG_VP9_HIGHBITDEPTH
 #if CONFIG_NEW_QUANT
     band = vp9_coefband_tx_skip;
 #endif  // CONFIG_NEW_QUANT
@@ -2422,24 +2595,89 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
                                     dst, dst_stride, i, j, plane);
 
     if (!x->skip_recode && tx_size <= TX_32X32 &&
-        (mode == V_PRED || mode == H_PRED || mode == TM_PRED)) {
+        (mode == H_PRED || mode == V_PRED || mode == TM_PRED)) {
+#if CONFIG_VP9_HIGHBITDEPTH
+      if (use_hbd)
+        *eob = vp9_highbd_dpcm_intra(src, src_stride, dst, dst_stride,
+                                     src_diff, diff_stride,
+                                     coeff, qcoeff, dqcoeff, p, pd,
+                                     scan_order, mode, tx_size, shift,
+                                     tx_size > TX_16X16 ? 0 : -1, xd->bd);
+      else
+        *eob = vp9_dpcm_intra(src, src_stride, dst, dst_stride,
+                              src_diff, diff_stride,
+                              coeff, qcoeff, dqcoeff, p, pd,
+                              scan_order, mode, tx_size, shift,
+                              tx_size > TX_16X16 ? 0 : -1);
+#else
       *eob = vp9_dpcm_intra(src, src_stride, dst, dst_stride,
                             src_diff, diff_stride,
                             coeff, qcoeff, dqcoeff, p, pd,
                             scan_order, mode, tx_size, shift,
                             tx_size > TX_16X16 ? 0 : -1);
+#endif  // CONFIG_VP9_HIGHBITDEPTH
       if (*eob)
         *(args->skip) = 0;
       return;
     }
 
+
     if (!x->skip_recode) {
+#if CONFIG_VP9_HIGHBITDEPTH
+      if (use_hbd) {
+        vp9_highbd_subtract_block(bs, bs, src_diff, diff_stride,
+                                  src, src_stride, dst, dst_stride, xd->bd);
+        vp9_tx_identity(src_diff, coeff, diff_stride, bs, shift);
+      } else {
+        vp9_subtract_block(bs, bs, src_diff, diff_stride,
+                           src, src_stride, dst, dst_stride);
+        vp9_tx_identity(src_diff, coeff, diff_stride, bs, shift);
+      }
+#else
       vp9_subtract_block(bs, bs, src_diff, diff_stride,
                          src, src_stride, dst, dst_stride);
       vp9_tx_identity(src_diff, coeff, diff_stride, bs, shift);
+#endif  // CONFIG_VP9_HIGHBITDEPTH
 
       if (tx_size <= TX_16X16) {
 #if CONFIG_NEW_QUANT
+#if CONFIG_VP9_HIGHBITDEPTH
+        if (use_hbd) {
+          if (x->quant_fp)
+            vp9_highbd_quantize_fp_nuq(coeff, bs * bs, x->skip_block,
+                                       p->quant_fp, pd->dequant,
+                                       (const cumbins_type_nuq *)p->cumbins_nuq,
+                                       (const dequant_val_type_nuq *)
+                                       pd->dequant_val_nuq,
+                                       qcoeff, dqcoeff, eob,
+                                       scan_order->scan, band);
+          else
+            vp9_highbd_quantize_nuq(coeff, bs * bs, x->skip_block,
+                                    p->quant, p->quant_shift, pd->dequant,
+                                    (const cumbins_type_nuq *)p->cumbins_nuq,
+                                    (const dequant_val_type_nuq *)
+                                    pd->dequant_val_nuq,
+                                    qcoeff, dqcoeff, eob,
+                                    scan_order->scan, band);
+        } else {
+          if (x->quant_fp)
+            vp9_quantize_fp_nuq(coeff, bs * bs, x->skip_block,
+                                p->quant_fp, pd->dequant,
+                                (const cumbins_type_nuq *)p->cumbins_nuq,
+                                (const dequant_val_type_nuq *)
+                                pd->dequant_val_nuq,
+                                qcoeff, dqcoeff, eob,
+                                scan_order->scan, band);
+          else
+            vp9_quantize_nuq(coeff, bs * bs, x->skip_block,
+                             p->quant, p->quant_shift, pd->dequant,
+                             (const cumbins_type_nuq *)p->cumbins_nuq,
+                             (const dequant_val_type_nuq *)
+                             pd->dequant_val_nuq,
+                             qcoeff, dqcoeff, eob,
+                             scan_order->scan, band);
+        }
+#else  // CONFIG_VP9_HIGHBITDEPTH
         if (x->quant_fp)
           vp9_quantize_fp_nuq(coeff, bs * bs, x->skip_block,
                               p->quant_fp, pd->dequant,
@@ -2456,14 +2694,67 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
                            pd->dequant_val_nuq,
                            qcoeff, dqcoeff, eob,
                            scan_order->scan, band);
-#else
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+#else  // CONFIG_NEW_QUANT
+#if CONFIG_VP9_HIGHBITDEPTH
+        if (use_hbd)
+          vp9_highbd_quantize_b(coeff, bs * bs, x->skip_block, p->zbin,
+                                p->round, p->quant, p->quant_shift, qcoeff,
+                                dqcoeff, pd->dequant, eob,
+                                scan_order->scan, scan_order->iscan);
+        else
+          vp9_quantize_b(coeff, bs * bs, x->skip_block, p->zbin, p->round,
+                         p->quant, p->quant_shift, qcoeff, dqcoeff,
+                         pd->dequant, eob, scan_order->scan,
+                         scan_order->iscan);
+#else  // CONFIG_VP9_HIGHBITDEPTH
         vp9_quantize_b(coeff, bs * bs, x->skip_block, p->zbin, p->round,
                        p->quant, p->quant_shift, qcoeff, dqcoeff,
                        pd->dequant, eob, scan_order->scan,
                        scan_order->iscan);
+#endif  // CONFIG_VP9_HIGHBITDEPTH
 #endif  // CONFIG_NEW_QUANT
       } else if (tx_size == TX_32X32) {
 #if CONFIG_NEW_QUANT
+#if CONFIG_VP9_HIGHBITDEPTH
+        if (use_hbd) {
+          if (x->quant_fp)
+            vp9_highbd_quantize_32x32_fp_nuq(coeff, bs * bs, x->skip_block,
+                                             p->quant_fp, pd->dequant,
+                                             (const cumbins_type_nuq *)p->
+                                             cumbins_nuq,
+                                             (const dequant_val_type_nuq *)
+                                             pd->dequant_val_nuq,
+                                             qcoeff, dqcoeff, eob,
+                                             scan_order->scan, band);
+          else
+            vp9_highbd_quantize_32x32_nuq(coeff, bs * bs, x->skip_block,
+                                          p->quant, p->quant_shift, pd->dequant,
+                                          (const cumbins_type_nuq *)
+                                          p->cumbins_nuq,
+                                          (const dequant_val_type_nuq *)
+                                          pd->dequant_val_nuq,
+                                          qcoeff, dqcoeff, eob,
+                                          scan_order->scan, band);
+        } else {
+          if (x->quant_fp)
+            vp9_quantize_32x32_fp_nuq(coeff, bs * bs, x->skip_block,
+                                      p->quant_fp, pd->dequant,
+                                      (const cumbins_type_nuq *)p->cumbins_nuq,
+                                      (const dequant_val_type_nuq *)
+                                      pd->dequant_val_nuq,
+                                      qcoeff, dqcoeff, eob,
+                                      scan_order->scan, band);
+          else
+            vp9_quantize_32x32_nuq(coeff, bs * bs, x->skip_block,
+                                   p->quant, p->quant_shift, pd->dequant,
+                                   (const cumbins_type_nuq *)p->cumbins_nuq,
+                                   (const dequant_val_type_nuq *)
+                                   pd->dequant_val_nuq,
+                                   qcoeff, dqcoeff, eob,
+                                   scan_order->scan, band);
+        }
+#else  // CONFIG_VP9_HIGHBITDEPTH
         if (x->quant_fp)
           vp9_quantize_32x32_fp_nuq(coeff, bs * bs, x->skip_block,
                                     p->quant_fp, pd->dequant,
@@ -2480,20 +2771,75 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
                                  pd->dequant_val_nuq,
                                  qcoeff, dqcoeff, eob,
                                  scan_order->scan, band);
-#else
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+#else  // CONFIG_NEW_QUANT
+#if CONFIG_VP9_HIGHBITDEPTH
+        if (use_hbd)
+          vp9_highbd_quantize_b_32x32(coeff, bs * bs, x->skip_block, p->zbin,
+                                      p->round, p->quant, p->quant_shift,
+                                      qcoeff, dqcoeff, pd->dequant, eob,
+                                      scan_order->scan, scan_order->iscan);
+        else
+          vp9_quantize_b_32x32(coeff, bs * bs, x->skip_block, p->zbin,
+                               p->round, p->quant, p->quant_shift, qcoeff,
+                               dqcoeff, pd->dequant, eob,
+                               scan_order->scan, scan_order->iscan);
+#else  // CONFIG_VP9_HIGHBITDEPTH
         vp9_quantize_b_32x32(coeff, bs * bs, x->skip_block, p->zbin,
                              p->round, p->quant, p->quant_shift, qcoeff,
                              dqcoeff, pd->dequant, eob,
                              scan_order->scan, scan_order->iscan);
+#endif  // CONFIG_VP9_HIGHBITDEPTH
 #endif  // CONFIG_NEW_QUANT
       }
 #if CONFIG_TX64X64
       else if (tx_size == TX_64X64) {
 #if CONFIG_NEW_QUANT
+#if CONFIG_VP9_HIGHBITDEPTH
+        if (use_hbd) {
+          if (x->quant_fp)
+            vp9_highbd_quantize_64x64_fp_nuq(coeff, bs * bs, x->skip_block,
+                                             p->quant_fp, pd->dequant,
+                                             (const cumbins_type_nuq *)
+                                             p->cumbins_nuq,
+                                             (const dequant_val_type_nuq *)
+                                             pd->dequant_val_nuq,
+                                             qcoeff, dqcoeff, eob,
+                                             scan_order->scan, band);
+          else
+            vp9_highbd_quantize_64x64_nuq(coeff, bs * bs, x->skip_block,
+                                          p->quant, p->quant_shift, pd->dequant,
+                                          (const cumbins_type_nuq *)
+                                          p->cumbins_nuq,
+                                          (const dequant_val_type_nuq *)
+                                          pd->dequant_val_nuq,
+                                          qcoeff, dqcoeff, eob,
+                                          scan_order->scan, band);
+        } else {
+          if (x->quant_fp)
+            vp9_quantize_64x64_fp_nuq(coeff, bs * bs, x->skip_block,
+                                      p->quant_fp, pd->dequant,
+                                      (const cumbins_type_nuq *)
+                                      p->cumbins_nuq,
+                                      (const dequant_val_type_nuq *)
+                                      pd->dequant_val_nuq,
+                                      qcoeff, dqcoeff, eob,
+                                      scan_order->scan, band);
+          else
+            vp9_quantize_64x64_nuq(coeff, bs * bs, x->skip_block,
+                                   p->quant, p->quant_shift, pd->dequant,
+                                   (const cumbins_type_nuq *)p->cumbins_nuq,
+                                   (const dequant_val_type_nuq *)
+                                   pd->dequant_val_nuq,
+                                   qcoeff, dqcoeff, eob,
+                                   scan_order->scan, band);
+        }
+#else
         if (x->quant_fp)
           vp9_quantize_64x64_fp_nuq(coeff, bs * bs, x->skip_block,
                                     p->quant_fp, pd->dequant,
-                                    (const cumbins_type_nuq *)p->cumbins_nuq,
+                                    (const cumbins_type_nuq *)
+                                    p->cumbins_nuq,
                                     (const dequant_val_type_nuq *)
                                     pd->dequant_val_nuq,
                                     qcoeff, dqcoeff, eob,
@@ -2506,18 +2852,41 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
                                  pd->dequant_val_nuq,
                                  qcoeff, dqcoeff, eob,
                                  scan_order->scan, band);
-#else
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+#else  // CONFIG_NEW_QUANT
+#if CONFIG_VP9_HIGHBITDEPTH
+        if (use_hbd)
+          vp9_highbd_quantize_b_64x64(coeff, bs * bs, x->skip_block, p->zbin,
+                                      p->round, p->quant, p->quant_shift,
+                                      qcoeff, dqcoeff, pd->dequant, eob,
+                                      scan_order->scan,  scan_order->iscan);
+        else
+          vp9_quantize_b_64x64(coeff, bs * bs, x->skip_block, p->zbin,
+                               p->round, p->quant, p->quant_shift, qcoeff,
+                               dqcoeff, pd->dequant, eob,
+                               scan_order->scan,  scan_order->iscan);
+#else  // CONFIG_VP9_HIGHBITDEPTH
         vp9_quantize_b_64x64(coeff, bs * bs, x->skip_block, p->zbin,
                              p->round, p->quant, p->quant_shift, qcoeff,
                              dqcoeff, pd->dequant, eob,
                              scan_order->scan,  scan_order->iscan);
+#endif  // CONFIG_VP9_HIGHBITDEPTH
 #endif  // CONFIG_NEW_QUANT
       }
 #endif  // CONFIG_TX64X64
     }
 
-    if (!x->skip_encode && *eob)
+    if (!x->skip_encode && *eob) {
+#if CONFIG_VP9_HIGHBITDEPTH
+      if (use_hbd)
+        vp9_highbd_tx_identity_add(dqcoeff, dst, dst_stride, 4 << tx_size,
+                                   shift, xd->bd);
+      else
+        vp9_tx_identity_add(dqcoeff, dst, dst_stride, 4 << tx_size, shift);
+#else
       vp9_tx_identity_add(dqcoeff, dst, dst_stride, 4 << tx_size, shift);
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+    }
 
     if (*eob)
       *(args->skip) = 0;
diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c
index b878433ef..c460feaf7 100644
--- a/vp9/encoder/vp9_quantize.c
+++ b/vp9/encoder/vp9_quantize.c
@@ -1526,7 +1526,7 @@ void vp9_quantize_rect(const tran_low_t *coeff_ptr, int row, int col,
                        const int16_t *quant_shift_ptr,
                        tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
                        const int16_t *dequant_ptr,
-                       int logsizeby32, int stride, int has_dc) {
+                       int logsizeby32, int stride, int has_dc, int hbd) {
   int r, c;
   int zbins[2] = {ROUND_POWER_OF_TWO(zbin_ptr[0],
                                      1 + (logsizeby32 < 0 ? -1 : logsizeby32)),
@@ -1552,7 +1552,10 @@ void vp9_quantize_rect(const tran_low_t *coeff_ptr, int row, int col,
           abs_coeff += round_ptr[idx];
         else
           abs_coeff += ROUND_POWER_OF_TWO(round_ptr[idx], (1 + logsizeby32));
-        abs_coeff = clamp(abs_coeff, INT16_MIN, INT16_MAX);
+        if (hbd)
+          abs_coeff = clamp(abs_coeff, INT32_MIN, INT32_MAX);
+        else
+          abs_coeff = clamp(abs_coeff, INT16_MIN, INT16_MAX);
         tmp = ((((abs_coeff * quant_ptr[idx]) >> 16) + abs_coeff) *
               quant_shift_ptr[idx]) >> (15 - logsizeby32);
 
diff --git a/vp9/encoder/vp9_quantize.h b/vp9/encoder/vp9_quantize.h
index 7ac28b8fd..67f2ef38f 100644
--- a/vp9/encoder/vp9_quantize.h
+++ b/vp9/encoder/vp9_quantize.h
@@ -231,7 +231,7 @@ void vp9_quantize_rect(const tran_low_t *coeff_ptr, int row, int col,
                        const int16_t *quant_ptr, const int16_t *quant_shift_ptr,
                        tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
                        const int16_t *dequant_ptr,
-                       int logsizeby32, int stride, int has_dc);
+                       int logsizeby32, int stride, int has_dc, int hbd);
 #if CONFIG_NEW_QUANT
 void vp9_quantize_rect_nuq(const tran_low_t *coeff_ptr,
                            int row,
-- 
2.49.0