From 571fdbb05f8f5364b1dd601d5333145238d120b2 Mon Sep 17 00:00:00 2001
From: punksu <huisu@google.com>
Date: Wed, 17 Dec 2014 13:51:27 +0800
Subject: [PATCH] dpcm intra prediction for tx_skip

Implements vertical, horizontal, and tm dpcm intra prediction for
blocks in tx_skip mode. Typical coding gain on screen content video
is 2%~5%.

Change-Id: Idd5bd84ac59daa586ec0cd724680cef695981651
---
 vp9/common/vp9_idct.c         |  18 +++--
 vp9/common/vp9_idct.h         |   3 +
 vp9/decoder/vp9_decodeframe.c | 124 ++++++++++++++++++++++++++++++-
 vp9/decoder/vp9_decodemv.c    |   5 +-
 vp9/encoder/vp9_bitstream.c   |   2 +-
 vp9/encoder/vp9_dct.c         |  16 ++--
 vp9/encoder/vp9_encodeframe.c |   2 +-
 vp9/encoder/vp9_encodemb.c    | 135 ++++++++++++++++++++++++++++++++++
 vp9/encoder/vp9_encodemb.h    |   3 +
 vp9/encoder/vp9_quantize.c    |  61 +++++++++++++++
 vp9/encoder/vp9_quantize.h    |  11 +++
 11 files changed, 362 insertions(+), 18 deletions(-)

diff --git a/vp9/common/vp9_idct.c b/vp9/common/vp9_idct.c
index dd15d7920..0ca3d60a6 100644
--- a/vp9/common/vp9_idct.c
+++ b/vp9/common/vp9_idct.c
@@ -1593,15 +1593,21 @@ void vp9_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,
 }
 
 #if CONFIG_TX_SKIP
-void vp9_tx_identity_add(const tran_low_t *input, uint8_t *dest,
-                         int stride, int bs, int shift) {
+void vp9_tx_identity_add_rect(const tran_low_t *input, uint8_t *dest,
+                              int row, int col,
+                              int stride_in, int stride_out, int shift) {
   int r, c, temp;
-  for (r = 0; r < bs; r++)
-    for (c = 0; c < bs; c++) {
-      temp = dest[r * stride + c] + (input[r * bs + c] >> shift);
-      dest[r * stride + c] = clip_pixel(temp);
+  for (r = 0; r < row; r++)
+    for (c = 0; c < col; c++) {
+      temp = dest[r * stride_out + c] + (input[r * stride_in + c] >> shift);
+      dest[r * stride_out + c] = clip_pixel(temp);
     }
 }
+
+void vp9_tx_identity_add(const tran_low_t *input, uint8_t *dest,
+                         int stride, int bs, int shift) {
+  vp9_tx_identity_add_rect(input, dest, bs, bs, bs, stride, shift);
+}
 #endif
 
 #if CONFIG_TX64X64
diff --git a/vp9/common/vp9_idct.h b/vp9/common/vp9_idct.h
index 8b476ffdf..fb468aed0 100644
--- a/vp9/common/vp9_idct.h
+++ b/vp9/common/vp9_idct.h
@@ -170,6 +170,9 @@ void vp9_highbd_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input,
 
 #endif  // VP9_COMMON_VP9_IDCT_H_
 #if CONFIG_TX_SKIP
+void vp9_tx_identity_add_rect(const tran_low_t *input, uint8_t *dest,
+                              int row, int col, int stride_in,
+                              int stride_out, int shift);
 void vp9_tx_identity_add(const tran_low_t *input, uint8_t *dest,
                          int stride, int bs, int shift);
 #endif
diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c
index 57e4999d9..0fd53f3d6 100644
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -202,6 +202,86 @@ static void setup_plane_dequants(VP9_COMMON *cm, MACROBLOCKD *xd, int q_index) {
     xd->plane[i].dequant = cm->uv_dequant[q_index];
 }
 
+#if CONFIG_TX_SKIP
+static void vp9_intra_dpcm_add(tran_low_t *dqcoeff, uint8_t *dst, int stride,
+                               PREDICTION_MODE mode, int bs, int shift) {
+  int r, c, temp;
+
+  switch (mode) {
+    case H_PRED:
+      for (r = 0; r < bs; r++) {
+        temp = dst[r * stride] + (dqcoeff[r * bs] >> shift);
+        dst[r * stride] = clip_pixel(temp);
+      }
+      for (r = 0; r < bs; r++)
+        for (c = 1; c < bs; c++) {
+          temp = dst[r * stride + c - 1] +
+              (dqcoeff[r * bs + c] >> shift);
+          dst[r * stride + c] = clip_pixel(temp);
+        }
+      break;
+    case V_PRED:
+      for (c = 0; c < bs; c++) {
+        temp = dst[c] + (dqcoeff[c] >> shift);
+        dst[c] = clip_pixel(temp);
+      }
+      for (r = 1; r < bs; r++)
+        for (c = 0; c < bs; c++) {
+          temp = dst[(r - 1) * stride + c] +
+              (dqcoeff[r * bs + c] >> shift);
+          dst[r * stride + c] = clip_pixel(temp);
+        }
+      break;
+    case TM_PRED:
+      for (c = 0; c < bs; c++) {
+        temp = dst[c] + (dqcoeff[c] >> shift);
+        dst[c] = clip_pixel(temp);
+      }
+      for (r = 1; r < bs; r++) {
+        temp = dst[r * stride] + (dqcoeff[r * bs] >> shift);
+        dst[r * stride] = clip_pixel(temp);
+      }
+      for (r = 1; r < bs; r++)
+        for (c = 1; c < bs; c++) {
+          temp = dst[stride * r + c - 1] + dst[stride * (r - 1) + c] -
+                 dst[stride * (r - 1) + c - 1];
+          temp = clip_pixel(temp);
+          temp = temp + (dqcoeff[r * bs + c] >> shift);
+          dst[stride * r + c] = clip_pixel(temp);
+        }
+      break;
+    default:
+      break;
+  }
+}
+
+static void vp9_intra_dpcm_add_nocoeff(uint8_t *dst, int stride,
+                                       PREDICTION_MODE mode, int bs) {
+  int r, c, temp;
+
+  switch (mode) {
+    case H_PRED:
+      for (r = 0; r < bs; r++)
+        memset(dst + r * stride + 1, dst[r * stride], bs - 1);
+      break;
+    case V_PRED:
+      for (r = 1; r < bs; r++)
+        memcpy(dst + r * stride, dst, bs * sizeof(*dst));
+      break;
+    case TM_PRED:
+      for (r = 1; r < bs; r++)
+        for (c = 1; c < bs; c++) {
+          temp = dst[stride * r + c - 1] + dst[stride * (r - 1) + c] -
+              dst[stride * (r - 1) + c - 1];
+          dst[stride * r + c] = clip_pixel(temp);
+        }
+      break;
+    default:
+      break;
+  }
+}
+#endif  // CONFIG_TX_SKIP
+
 static void inverse_transform_block(MACROBLOCKD* xd, int plane, int block,
                                     TX_SIZE tx_size, uint8_t *dst, int stride,
                                     int eob) {
@@ -209,6 +289,8 @@ static void inverse_transform_block(MACROBLOCKD* xd, int plane, int block,
 #if CONFIG_TX_SKIP
   MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi;
   int shift = mbmi->tx_skip_shift;
+  PREDICTION_MODE mode = (plane == 0) ? get_y_mode(xd->mi[0].src_mi, block):
+                                        mbmi->uv_mode;
 #endif
   if (eob > 0) {
     TX_TYPE tx_type = DCT_DCT;
@@ -386,7 +468,10 @@ static void inverse_transform_block(MACROBLOCKD* xd, int plane, int block,
           tx_type = get_tx_type_4x4(plane_type, xd, block);
 #if CONFIG_TX_SKIP
           if (mbmi->tx_skip[plane != 0]) {
-            vp9_tx_identity_add(dqcoeff, dst, stride, 4, shift);
+            if (mode == V_PRED || mode == H_PRED || mode == TM_PRED)
+              vp9_intra_dpcm_add(dqcoeff, dst, stride, mode, 4, shift);
+            else
+              vp9_tx_identity_add(dqcoeff, dst, stride, 4, shift);
           } else {
             vp9_iht4x4_add(tx_type, dqcoeff, dst, stride, eob);
           }
@@ -398,7 +483,10 @@ static void inverse_transform_block(MACROBLOCKD* xd, int plane, int block,
           tx_type = get_tx_type(plane_type, xd);
 #if CONFIG_TX_SKIP
           if (mbmi->tx_skip[plane != 0]) {
-            vp9_tx_identity_add(dqcoeff, dst, stride, 8, shift);
+            if (mode == V_PRED || mode == H_PRED || mode == TM_PRED)
+              vp9_intra_dpcm_add(dqcoeff, dst, stride, mode, 8, shift);
+            else
+              vp9_tx_identity_add(dqcoeff, dst, stride, 8, shift);
           } else {
             vp9_iht8x8_add(tx_type, dqcoeff, dst, stride, eob);
           }
@@ -410,7 +498,10 @@ static void inverse_transform_block(MACROBLOCKD* xd, int plane, int block,
           tx_type = get_tx_type(plane_type, xd);
 #if CONFIG_TX_SKIP
           if (mbmi->tx_skip[plane != 0]) {
-            vp9_tx_identity_add(dqcoeff, dst, stride, 16, shift);
+            if (mode == V_PRED || mode == H_PRED || mode == TM_PRED)
+              vp9_intra_dpcm_add(dqcoeff, dst, stride, mode, 16, shift);
+            else
+              vp9_tx_identity_add(dqcoeff, dst, stride, 16, shift);
           } else {
             vp9_iht16x16_add(tx_type, dqcoeff, dst, stride, eob);
           }
@@ -422,7 +513,10 @@ static void inverse_transform_block(MACROBLOCKD* xd, int plane, int block,
           tx_type = DCT_DCT;
 #if CONFIG_TX_SKIP
           if (mbmi->tx_skip[plane != 0]) {
-            vp9_tx_identity_add(dqcoeff, dst, stride, 32, shift);
+            if (mode == V_PRED || mode == H_PRED || mode == TM_PRED)
+              vp9_intra_dpcm_add(dqcoeff, dst, stride, mode, 32, shift);
+            else
+              vp9_tx_identity_add(dqcoeff, dst, stride, 32, shift);
           } else {
             vp9_idct32x32_add(dqcoeff, dst, stride, eob);;
           }
@@ -482,6 +576,9 @@ static void predict_and_reconstruct_intra_block(int plane, int block,
                                             : mi->mbmi.uv_mode;
   int x, y;
   uint8_t *dst;
+#if CONFIG_TX_SKIP
+  int no_coeff = 0;
+#endif
 #if CONFIG_FILTERINTRA
   int fbit;
   if (plane == 0)
@@ -508,7 +605,26 @@ static void predict_and_reconstruct_intra_block(int plane, int block,
                                             args->r);
     inverse_transform_block(xd, plane, block, tx_size, dst, pd->dst.stride,
                             eob);
+#if CONFIG_TX_SKIP
+    no_coeff = !eob;
+#endif
+  }
+
+#if CONFIG_TX_SKIP
+  if ((mi->mbmi.skip || no_coeff) && mi->mbmi.tx_skip[plane != 0] &&
+      mode == TM_PRED && tx_size <= TX_32X32) {
+    int bs = 4 * (1 << tx_size);
+    vp9_intra_dpcm_add_nocoeff(dst, pd->dst.stride, mode, bs);
   }
+#endif
+
+#if CONFIG_TX_SKIP && CONFIG_FILTERINTRA
+  if ((mi->mbmi.skip || no_coeff) && mi->mbmi.tx_skip[plane != 0] &&
+      (mode == H_PRED || mode == V_PRED) && fbit) {
+    int bs = 4 * (1 << tx_size);
+    vp9_intra_dpcm_add_nocoeff(dst, pd->dst.stride, mode, bs);
+  }
+#endif
 }
 
 struct inter_args {
diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c
index 4d9665c94..edf4e9098 100644
--- a/vp9/decoder/vp9_decodemv.c
+++ b/vp9/decoder/vp9_decodemv.c
@@ -746,6 +746,9 @@ static void read_inter_block_mode_info(VP9_COMMON *const cm,
     xd->corrupted |= !assign_mv(cm, mbmi->mode, mbmi->mv, nearestmv,
                                 nearestmv, nearmv, is_compound, allow_hp, r);
   }
+#if CONFIG_TX_SKIP
+    mbmi->uv_mode = mbmi->mode;
+#endif
 }
 
 static void read_inter_frame_mode_info(VP9_COMMON *const cm,
@@ -859,7 +862,7 @@ static void read_inter_frame_mode_info(VP9_COMMON *const cm,
 #if CONFIG_SUPERTX
     if (try_tx_skip) {
 #else
-    if (try_tx_skip && !mbmi->skip) {
+    if (try_tx_skip && (!mbmi->skip || !inter_block)) {
 #endif  // CONFIG_SUPERTX
       if (xd->lossless) {
 #if CONFIG_SUPERTX
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index 59dc6b5b7..e34f86037 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -439,7 +439,7 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, const MODE_INFO *mi,
 #if CONFIG_SUPERTX
     if (try_tx_skip) {
 #else
-    if (try_tx_skip && !skip) {
+    if (try_tx_skip && (!skip || !is_inter)) {
 #endif  // CONFIG_SUPERTX
       if (xd->lossless) {
 #if CONFIG_SUPERTX
diff --git a/vp9/encoder/vp9_dct.c b/vp9/encoder/vp9_dct.c
index 7f092b59f..706afb26e 100644
--- a/vp9/encoder/vp9_dct.c
+++ b/vp9/encoder/vp9_dct.c
@@ -1440,14 +1440,20 @@ void vp9_fdct32x32_rd_c(const int16_t *input, tran_low_t *out, int stride) {
 }
 
 #if CONFIG_TX_SKIP
-void vp9_tx_identity(const int16_t *input, tran_low_t *out, int stride,
-                     int bs, int shift) {
+void vp9_tx_identity_rect(const int16_t *input, tran_low_t *out,
+                          int row, int col,
+                          int stride_in, int stride_out, int shift) {
   int r, c;
-  for (r = 0; r < bs; r++)
-    for (c = 0; c < bs; c++) {
-      out[bs * r + c] = input[stride * r + c] << shift;
+  for (r = 0; r < row; r++)
+    for (c = 0; c < col; c++) {
+      out[stride_out * r + c] = input[stride_in * r + c] << shift;
     }
 }
+
+void vp9_tx_identity(const int16_t *input, tran_low_t *out, int stride,
+                     int bs, int shift) {
+  vp9_tx_identity_rect(input, out, bs, bs, stride, bs, shift);
+}
 #endif
 
 #if CONFIG_TX64X64
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index dbca9b0f1..255ac2191 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -4800,7 +4800,7 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled,
 #if CONFIG_SUPERTX
       if (try_tx_skip) {
 #else
-      if (try_tx_skip && !(mbmi->skip || seg_skip)) {
+      if (try_tx_skip && (!(mbmi->skip || seg_skip) || !is_inter_block(mbmi))) {
 #endif
         ++cm->counts.y_tx_skip[is_inter_block(mbmi)][mbmi->tx_skip[0]];
         ++cm->counts.uv_tx_skip[mbmi->tx_skip[0]][mbmi->tx_skip[1]];
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c
index c35c504b7..4b4f36b47 100644
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -1435,6 +1435,109 @@ void vp9_encode_sb_supertx(MACROBLOCK *x, BLOCK_SIZE bsize) {
 }
 #endif
 
+#if CONFIG_TX_SKIP
+static int vp9_dpcm_intra(uint8_t *src, int src_stride,
+                          uint8_t *dst, int dst_stride,
+                          int16_t *src_diff, int diff_stride,
+                          tran_low_t *coeff, tran_low_t *qcoeff,
+                          tran_low_t *dqcoeff, struct macroblock_plane *p,
+                          struct macroblockd_plane *pd,
+                          const scan_order *scan_order, PREDICTION_MODE mode,
+                          int bs, int shift, int logsizeby32) {
+  int i, j, eob, temp;
+  vpx_memset(qcoeff, 0, bs * bs * sizeof(*qcoeff));
+  vpx_memset(dqcoeff, 0, bs * bs * sizeof(*dqcoeff));
+
+  switch (mode) {
+    case H_PRED:
+      for (i = 0 ; i < bs; i++) {
+        vp9_subtract_block_c(bs, 1, src_diff + i, diff_stride,
+                             src + i, src_stride,
+                             dst + i, dst_stride);
+        vp9_tx_identity_rect(src_diff + i, coeff + i, bs, 1,
+                             diff_stride, bs, shift);
+        vp9_quantize_rect(coeff + i, bs, 1, p->zbin, p->round, p->quant,
+                          p->quant_shift, qcoeff + i, dqcoeff + i,
+                          pd->dequant, p->zbin_extra, logsizeby32, bs, i == 0);
+        vp9_tx_identity_add_rect(dqcoeff + i, dst + i, bs, 1,
+                                 bs, dst_stride, shift);
+        if ( i < bs - 1 && 1)
+          for (j = 0 ; j < bs; j++)
+            *(dst + j * dst_stride + i + 1) =
+                *(dst + j * dst_stride + i);
+      }
+      break;
+    case V_PRED:
+      for (i = 0 ; i < bs; i++) {
+        vp9_subtract_block_c(1, bs, src_diff + diff_stride * i,
+                             diff_stride,
+                             src + src_stride * i, src_stride,
+                             dst + dst_stride * i, dst_stride);
+        vp9_tx_identity_rect(src_diff + diff_stride * i,
+                             coeff + bs * i, 1, bs,
+                             diff_stride, bs, shift);
+        vp9_quantize_rect(coeff + bs * i, 1, bs, p->zbin, p->round, p->quant,
+                          p->quant_shift, qcoeff + bs * i, dqcoeff + bs * i,
+                          pd->dequant, p->zbin_extra, logsizeby32, bs, i == 0);
+        vp9_tx_identity_add_rect(dqcoeff + bs * i, dst + dst_stride * i,
+                                 1, bs, bs, dst_stride, shift);
+        if (i < bs - 1)
+          vpx_memcpy(dst + (i + 1) * dst_stride,
+                     dst + i * dst_stride, bs * sizeof(dst[0]));
+      }
+      break;
+    case TM_PRED:
+      vp9_subtract_block_c(1, bs, src_diff, diff_stride, src, src_stride,
+                           dst, dst_stride);
+      vp9_tx_identity_rect(src_diff, coeff, 1, bs, diff_stride, bs, shift);
+      vp9_quantize_rect(coeff, 1, bs, p->zbin, p->round, p->quant,
+                        p->quant_shift, qcoeff, dqcoeff, pd->dequant,
+                        p->zbin_extra, logsizeby32, bs, 1);
+      vp9_tx_identity_add_rect(dqcoeff, dst, 1, bs, bs, dst_stride, shift);
+
+      vp9_subtract_block_c(bs -1, 1, src_diff + diff_stride, diff_stride,
+                           src + src_stride, src_stride,
+                           dst + dst_stride, dst_stride);
+      vp9_tx_identity_rect(src_diff + diff_stride, coeff + bs, bs - 1, 1,
+                           diff_stride, bs, shift);
+      vp9_quantize_rect(coeff + bs, bs - 1, 1, p->zbin, p->round, p->quant,
+                        p->quant_shift, qcoeff + bs, dqcoeff + bs,
+                        pd->dequant, p->zbin_extra, logsizeby32, bs, 0);
+      vp9_tx_identity_add_rect(dqcoeff + bs, dst + dst_stride, bs - 1, 1,
+                               bs, dst_stride, shift);
+
+      for (i = 1 ; i < bs; i++) {
+        for (j = 1 ; j < bs; j++) {
+          temp = dst[(i - 1) * dst_stride + j] + dst[i * dst_stride + j - 1] -
+                 dst[(i - 1) * dst_stride + j - 1];
+          temp = clip_pixel(temp);
+          dst[i * dst_stride + j] = temp;
+          vp9_subtract_block_c(1, 1, src_diff + diff_stride * i + j,
+                               diff_stride, src + src_stride * i + j,
+                               src_stride, dst + dst_stride * i + j,
+                               dst_stride);
+          vp9_tx_identity_rect(src_diff + i * diff_stride + j,
+                               coeff + bs * i + j, 1, 1, diff_stride,
+                               bs, shift);
+          vp9_quantize_rect(coeff + bs * i + j, 1, 1, p->zbin, p->round,
+                            p->quant, p->quant_shift, qcoeff + bs * i + j,
+                            dqcoeff + bs * i + j, pd->dequant,
+                            p->zbin_extra, logsizeby32, bs, 0);
+          vp9_tx_identity_add_rect(dqcoeff + bs * i + j,
+                                   dst + dst_stride * i + j, 1, 1, bs,
+                                   dst_stride, shift);
+        }
+      }
+      break;
+    default:
+      break;
+  }
+
+  eob = get_eob(qcoeff, bs * bs, scan_order->scan);
+  return eob;
+}
+#endif  // CONFIG_TX_SKIP
+
 static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
                                TX_SIZE tx_size, void *arg) {
   struct encode_b_args* const args = arg;
@@ -1512,6 +1615,14 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
                                     dst, dst_stride, i, j, plane);
 
             if (!x->skip_recode) {
+              if (mode == V_PRED || mode == H_PRED || mode == TM_PRED) {
+                *eob = vp9_dpcm_intra(src, src_stride, dst, dst_stride,
+                                      src_diff, diff_stride,
+                                      coeff, qcoeff, dqcoeff, p, pd,
+                                      scan_order, mode, 32, shift, 0);
+                break;
+              }
+
               vp9_subtract_block(32, 32, src_diff, diff_stride,
                                  src, src_stride, dst, dst_stride);
               vp9_tx_identity(src_diff, coeff, diff_stride, 32, shift);
@@ -1537,6 +1648,14 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
                                     x->skip_encode ? src_stride : dst_stride,
                                     dst, dst_stride, i, j, plane);
             if (!x->skip_recode) {
+              if (mode == V_PRED || mode == H_PRED || mode == TM_PRED) {
+                *eob = vp9_dpcm_intra(src, src_stride, dst, dst_stride,
+                                      src_diff, diff_stride,
+                                      coeff, qcoeff, dqcoeff, p, pd,
+                                      scan_order, mode, 16, shift, -1);
+                break;
+              }
+
               vp9_subtract_block(16, 16, src_diff, diff_stride,
                                  src, src_stride, dst, dst_stride);
               vp9_tx_identity(src_diff, coeff, diff_stride, 16, shift);
@@ -1561,6 +1680,14 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
                                     x->skip_encode ? src_stride : dst_stride,
                                     dst, dst_stride, i, j, plane);
             if (!x->skip_recode) {
+              if (mode == V_PRED || mode == H_PRED || mode == TM_PRED) {
+                *eob = vp9_dpcm_intra(src, src_stride, dst, dst_stride,
+                                      src_diff, diff_stride,
+                                      coeff, qcoeff, dqcoeff, p, pd,
+                                      scan_order, mode, 8, shift, -1);
+                break;
+              }
+
               vp9_subtract_block(8, 8, src_diff, diff_stride,
                                  src, src_stride, dst, dst_stride);
               vp9_tx_identity(src_diff, coeff, diff_stride, 8, shift);
@@ -1587,6 +1714,14 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
                                     dst, dst_stride, i, j, plane);
 
             if (!x->skip_recode) {
+              if (mode == V_PRED || mode == H_PRED || mode == TM_PRED) {
+                *eob = vp9_dpcm_intra(src, src_stride, dst, dst_stride,
+                                      src_diff, diff_stride,
+                                      coeff, qcoeff, dqcoeff, p, pd,
+                                      scan_order, mode, 4, shift, -1);
+                break;
+              }
+
               vp9_subtract_block(4, 4, src_diff, diff_stride,
                                  src, src_stride, dst, dst_stride);
               vp9_tx_identity(src_diff, coeff, diff_stride, 4, shift);
diff --git a/vp9/encoder/vp9_encodemb.h b/vp9/encoder/vp9_encodemb.h
index 0f0f584e7..8d11c3540 100644
--- a/vp9/encoder/vp9_encodemb.h
+++ b/vp9/encoder/vp9_encodemb.h
@@ -39,6 +39,9 @@ void vp9_encode_block_intra(MACROBLOCK *x, int plane, int block,
 void vp9_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane);
 
 #if CONFIG_TX_SKIP
+void vp9_tx_identity_rect(const int16_t *input, tran_low_t *out,
+                          int row, int col,
+                          int stride_in, int stride_out, int shift);
 void vp9_tx_identity(const int16_t *input, tran_low_t *out, int stride,
                      int bs, int shift);
 #endif
diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c
index be6a41897..93c7f76b8 100644
--- a/vp9/encoder/vp9_quantize.c
+++ b/vp9/encoder/vp9_quantize.c
@@ -495,6 +495,67 @@ void vp9_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
   *eob_ptr = eob + 1;
 }
 
+#if CONFIG_TX_SKIP
+void vp9_quantize_rect(const tran_low_t *coeff_ptr, int row, int col,
+                       const int16_t *zbin_ptr, const int16_t *round_ptr,
+                       const int16_t *quant_ptr,
+                       const int16_t *quant_shift_ptr,
+                       tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+                       const int16_t *dequant_ptr, int zbin_oq_value,
+                       int logsizeby32, int stride, int has_dc) {
+  int r, c;
+  int zbins[2] = {ROUND_POWER_OF_TWO(zbin_ptr[0] + zbin_oq_value,
+                                     1 + (logsizeby32 < 0 ? -1 : logsizeby32)),
+                  ROUND_POWER_OF_TWO(zbin_ptr[1] + zbin_oq_value,
+                                     1 + (logsizeby32 < 0 ? -1 : logsizeby32))};
+  if (logsizeby32 < 0) {
+    logsizeby32 = -1;
+    zbins[0] = zbin_ptr[0] + zbin_oq_value;
+    zbins[1] = zbin_ptr[1] + zbin_oq_value;
+  }
+
+  for (r = 0; r < row; r++)
+    for (c = 0; c < col; c++) {
+      const int coeff = coeff_ptr[r * stride + c];
+      const int coeff_sign = (coeff >> 31);
+      int tmp;
+      int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+      int idx = (r == 0 && c == 0 && has_dc) ? 0 : 1;
+      qcoeff_ptr[r * stride + c] = dqcoeff_ptr[r * stride + c] = 0;
+
+      if (abs_coeff >= zbins[idx]) {
+        if (logsizeby32 < 0)
+          abs_coeff += round_ptr[idx];
+        else
+          abs_coeff += ROUND_POWER_OF_TWO(round_ptr[idx], (1 + logsizeby32));
+        abs_coeff = clamp(abs_coeff, INT16_MIN, INT16_MAX);
+        tmp = ((((abs_coeff * quant_ptr[idx]) >> 16) + abs_coeff) *
+              quant_shift_ptr[idx]) >> (15 - logsizeby32);
+
+        qcoeff_ptr[r * stride + c] = (tmp ^ coeff_sign) - coeff_sign;
+        dqcoeff_ptr[r * stride + c] = qcoeff_ptr[r * stride + c] *
+                                      dequant_ptr[idx] /
+                                      (1 << (logsizeby32 + 1));
+      }
+    }
+}
+
+int get_eob(tran_low_t *qcoeff_ptr, intptr_t n_coeffs, const int16_t *scan) {
+  int i, rc, eob = -1;
+
+  for (i = (int)n_coeffs - 1; i >= 0; i--) {
+    rc = scan[i];
+    if (qcoeff_ptr[rc]) {
+      eob = i;
+      break;
+    }
+  }
+
+  eob += 1;
+  return eob;
+}
+#endif
+
 #if CONFIG_VP9_HIGHBITDEPTH
 void vp9_highbd_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
                              int skip_block, const int16_t *zbin_ptr,
diff --git a/vp9/encoder/vp9_quantize.h b/vp9/encoder/vp9_quantize.h
index f454724bf..fc7ccf7c8 100644
--- a/vp9/encoder/vp9_quantize.h
+++ b/vp9/encoder/vp9_quantize.h
@@ -96,6 +96,17 @@ int vp9_quantizer_to_qindex(int quantizer);
 
 int vp9_qindex_to_quantizer(int qindex);
 
+#if CONFIG_TX_SKIP
+void vp9_quantize_rect(const tran_low_t *coeff_ptr, int row, int col,
+                       const int16_t *zbin_ptr, const int16_t *round_ptr,
+                       const int16_t *quant_ptr, const int16_t *quant_shift_ptr,
+                       tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+                       const int16_t *dequant_ptr, int zbin_oq_value,
+                       int logsizeby32, int stride, int has_dc);
+
+int get_eob(tran_low_t *qcoeff_ptr, intptr_t n_coeffs, const int16_t *scan);
+#endif
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
-- 
2.40.0