granicus.if.org Git - libvpx/blob - vp9/encoder/vp9_encodemb.c

   1 /*
   2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
   3  *
   4  *  Use of this source code is governed by a BSD-style license
   5  *  that can be found in the LICENSE file in the root of the source
   6  *  tree. An additional intellectual property rights grant can be found
   7  *  in the file PATENTS.  All contributing project authors may
   8  *  be found in the AUTHORS file in the root of the source tree.
   9  */
  10
  11
  12 #include "./vp9_rtcd.h"
  13 #include "./vpx_config.h"
  14
  15 #include "vpx_mem/vpx_mem.h"
  16 #include "vpx_ports/mem.h"
  17
  18 #include "vp9/common/vp9_idct.h"
  19 #include "vp9/common/vp9_reconinter.h"
  20 #include "vp9/common/vp9_reconintra.h"
  21 #include "vp9/common/vp9_systemdependent.h"
  22
  23 #include "vp9/encoder/vp9_encodemb.h"
  24 #include "vp9/encoder/vp9_quantize.h"
  25 #include "vp9/encoder/vp9_rd.h"
  26 #include "vp9/encoder/vp9_tokenize.h"
  27
  28 struct optimize_ctx {
  29   ENTROPY_CONTEXT ta[MAX_MB_PLANE][16];
  30   ENTROPY_CONTEXT tl[MAX_MB_PLANE][16];
  31 };
  32
  33 void vp9_subtract_block_c(int rows, int cols,
  34                           int16_t *diff, ptrdiff_t diff_stride,
  35                           const uint8_t *src, ptrdiff_t src_stride,
  36                           const uint8_t *pred, ptrdiff_t pred_stride) {
  37   int r, c;
  38
  39   for (r = 0; r < rows; r++) {
  40     for (c = 0; c < cols; c++)
  41       diff[c] = src[c] - pred[c];
  42
  43     diff += diff_stride;
  44     pred += pred_stride;
  45     src  += src_stride;
  46   }
  47 }
  48
  49 #if CONFIG_VP9_HIGHBITDEPTH
  50 void vp9_highbd_subtract_block_c(int rows, int cols,
  51                                  int16_t *diff, ptrdiff_t diff_stride,
  52                                  const uint8_t *src8, ptrdiff_t src_stride,
  53                                  const uint8_t *pred8, ptrdiff_t pred_stride,
  54                                  int bd) {
  55   int r, c;
  56   uint16_t *src = CONVERT_TO_SHORTPTR(src8);
  57   uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
  58   (void) bd;
  59
  60   for (r = 0; r < rows; r++) {
  61     for (c = 0; c < cols; c++) {
  62       diff[c] = src[c] - pred[c];
  63     }
  64
  65     diff += diff_stride;
  66     pred += pred_stride;
  67     src  += src_stride;
  68   }
  69 }
  70 #endif  // CONFIG_VP9_HIGHBITDEPTH
  71
  72 void vp9_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
  73   struct macroblock_plane *const p = &x->plane[plane];
  74   const struct macroblockd_plane *const pd = &x->e_mbd.plane[plane];
  75   const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
  76   const int bw = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
  77   const int bh = 4 * num_4x4_blocks_high_lookup[plane_bsize];
  78
  79 #if CONFIG_VP9_HIGHBITDEPTH
  80   if (x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
  81     vp9_highbd_subtract_block(bh, bw, p->src_diff, bw, p->src.buf,
  82                               p->src.stride, pd->dst.buf, pd->dst.stride,
  83                               x->e_mbd.bd);
  84     return;
  85   }
  86 #endif  // CONFIG_VP9_HIGHBITDEPTH
  87   vp9_subtract_block(bh, bw, p->src_diff, bw, p->src.buf, p->src.stride,
  88                      pd->dst.buf, pd->dst.stride);
  89 }
  90
  91 #define RDTRUNC(RM, DM, R, D) ((128 + (R) * (RM)) & 0xFF)
  92
  93 typedef struct vp9_token_state {
  94   int           rate;
  95   int           error;
  96   int           next;
  97   int16_t       token;
  98   short         qc;
  99 } vp9_token_state;
 100
 101 // TODO(jimbankoski): experiment to find optimal RD numbers.
 102 static const int plane_rd_mult[PLANE_TYPES] = { 4, 2 };
 103
 104 #define UPDATE_RD_COST()\
 105 {\
 106   rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0);\
 107   rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1);\
 108   if (rd_cost0 == rd_cost1) {\
 109     rd_cost0 = RDTRUNC(rdmult, rddiv, rate0, error0);\
 110     rd_cost1 = RDTRUNC(rdmult, rddiv, rate1, error1);\
 111   }\
 112 }
 113
 114 // This function is a place holder for now but may ultimately need
 115 // to scan previous tokens to work out the correct context.
 116 static int trellis_get_coeff_context(const int16_t *scan,
 117                                      const int16_t *nb,
 118                                      int idx, int token,
 119                                      uint8_t *token_cache) {
 120   int bak = token_cache[scan[idx]], pt;
 121   token_cache[scan[idx]] = vp9_pt_energy_class[token];
 122   pt = get_coef_context(nb, token_cache, idx + 1);
 123   token_cache[scan[idx]] = bak;
 124   return pt;
 125 }
 126
 127 static int optimize_b(MACROBLOCK *mb, int plane, int block,
 128                       TX_SIZE tx_size, int ctx) {
 129   MACROBLOCKD *const xd = &mb->e_mbd;
 130   struct macroblock_plane *const p = &mb->plane[plane];
 131   struct macroblockd_plane *const pd = &xd->plane[plane];
 132   const int ref = is_inter_block(&xd->mi[0]->mbmi);
 133   vp9_token_state tokens[1025][2];
 134   unsigned best_index[1025][2];
 135   uint8_t token_cache[1024];
 136   const tran_low_t *const coeff = BLOCK_OFFSET(mb->plane[plane].coeff, block);
 137   tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
 138   tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
 139   const int eob = p->eobs[block];
 140   const PLANE_TYPE type = pd->plane_type;
 141   const int default_eob = 16 << (tx_size << 1);
 142   const int mul = 1 + (tx_size == TX_32X32);
 143   const int16_t *dequant_ptr = pd->dequant;
 144   const uint8_t *const band_translate = get_band_translate(tx_size);
 145   const scan_order *const so = get_scan(xd, tx_size, type, block);
 146   const int16_t *const scan = so->scan;
 147   const int16_t *const nb = so->neighbors;
 148   int next = eob, sz = 0;
 149   int64_t rdmult = mb->rdmult * plane_rd_mult[type], rddiv = mb->rddiv;
 150   int64_t rd_cost0, rd_cost1;
 151   int rate0, rate1, error0, error1;
 152   int16_t t0, t1;
 153   EXTRABIT e0;
 154   int best, band, pt, i, final_eob;
 155 #if CONFIG_VP9_HIGHBITDEPTH
 156   const int16_t *cat6_high_cost = vp9_get_high_cost_table(xd->bd);
 157 #else
 158   const int16_t *cat6_high_cost = vp9_get_high_cost_table(8);
 159 #endif
 160
 161   assert((!type && !plane) || (type && plane));
 162   assert(eob <= default_eob);
 163
 164   /* Now set up a Viterbi trellis to evaluate alternative roundings. */
 165   if (!ref)
 166     rdmult = (rdmult * 9) >> 4;
 167
 168   /* Initialize the sentinel node of the trellis. */
 169   tokens[eob][0].rate = 0;
 170   tokens[eob][0].error = 0;
 171   tokens[eob][0].next = default_eob;
 172   tokens[eob][0].token = EOB_TOKEN;
 173   tokens[eob][0].qc = 0;
 174   tokens[eob][1] = tokens[eob][0];
 175
 176   for (i = 0; i < eob; i++)
 177     token_cache[scan[i]] =
 178         vp9_pt_energy_class[vp9_get_token(qcoeff[scan[i]])];
 179
 180   for (i = eob; i-- > 0;) {
 181     int base_bits, d2, dx;
 182     const int rc = scan[i];
 183     int x = qcoeff[rc];
 184     /* Only add a trellis state for non-zero coefficients. */
 185     if (x) {
 186       int shortcut = 0;
 187       error0 = tokens[next][0].error;
 188       error1 = tokens[next][1].error;
 189       /* Evaluate the first possibility for this state. */
 190       rate0 = tokens[next][0].rate;
 191       rate1 = tokens[next][1].rate;
 192       vp9_get_token_extra(x, &t0, &e0);
 193       /* Consider both possible successor states. */
 194       if (next < default_eob) {
 195         band = band_translate[i + 1];
 196         pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache);
 197         rate0 += mb->token_costs[tx_size][type][ref][band][0][pt]
 198                                 [tokens[next][0].token];
 199         rate1 += mb->token_costs[tx_size][type][ref][band][0][pt]
 200                                 [tokens[next][1].token];
 201       }
 202       UPDATE_RD_COST();
 203       /* And pick the best. */
 204       best = rd_cost1 < rd_cost0;
 205       base_bits = vp9_get_cost(t0, e0, cat6_high_cost);
 206       dx = mul * (dqcoeff[rc] - coeff[rc]);
 207 #if CONFIG_VP9_HIGHBITDEPTH
 208       if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
 209         dx >>= xd->bd - 8;
 210       }
 211 #endif  // CONFIG_VP9_HIGHBITDEPTH
 212       d2 = dx * dx;
 213       tokens[i][0].rate = base_bits + (best ? rate1 : rate0);
 214       tokens[i][0].error = d2 + (best ? error1 : error0);
 215       tokens[i][0].next = next;
 216       tokens[i][0].token = t0;
 217       tokens[i][0].qc = x;
 218       best_index[i][0] = best;
 219
 220       /* Evaluate the second possibility for this state. */
 221       rate0 = tokens[next][0].rate;
 222       rate1 = tokens[next][1].rate;
 223
 224       if ((abs(x) * dequant_ptr[rc != 0] > abs(coeff[rc]) * mul) &&
 225           (abs(x) * dequant_ptr[rc != 0] < abs(coeff[rc]) * mul +
 226                                                dequant_ptr[rc != 0]))
 227         shortcut = 1;
 228       else
 229         shortcut = 0;
 230
 231       if (shortcut) {
 232         sz = -(x < 0);
 233         x -= 2 * sz + 1;
 234       }
 235
 236       /* Consider both possible successor states. */
 237       if (!x) {
 238         /* If we reduced this coefficient to zero, check to see if
 239          *  we need to move the EOB back here.
 240          */
 241         t0 = tokens[next][0].token == EOB_TOKEN ? EOB_TOKEN : ZERO_TOKEN;
 242         t1 = tokens[next][1].token == EOB_TOKEN ? EOB_TOKEN : ZERO_TOKEN;
 243         e0 = 0;
 244       } else {
 245         vp9_get_token_extra(x, &t0, &e0);
 246         t1 = t0;
 247       }
 248       if (next < default_eob) {
 249         band = band_translate[i + 1];
 250         if (t0 != EOB_TOKEN) {
 251           pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache);
 252           rate0 += mb->token_costs[tx_size][type][ref][band][!x][pt]
 253                                   [tokens[next][0].token];
 254         }
 255         if (t1 != EOB_TOKEN) {
 256           pt = trellis_get_coeff_context(scan, nb, i, t1, token_cache);
 257           rate1 += mb->token_costs[tx_size][type][ref][band][!x][pt]
 258                                   [tokens[next][1].token];
 259         }
 260       }
 261
 262       UPDATE_RD_COST();
 263       /* And pick the best. */
 264       best = rd_cost1 < rd_cost0;
 265       base_bits = vp9_get_cost(t0, e0, cat6_high_cost);
 266
 267       if (shortcut) {
 268 #if CONFIG_VP9_HIGHBITDEPTH
 269         if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
 270           dx -= ((dequant_ptr[rc != 0] >> (xd->bd - 8)) + sz) ^ sz;
 271         } else {
 272           dx -= (dequant_ptr[rc != 0] + sz) ^ sz;
 273         }
 274 #else
 275         dx -= (dequant_ptr[rc != 0] + sz) ^ sz;
 276 #endif  // CONFIG_VP9_HIGHBITDEPTH
 277         d2 = dx * dx;
 278       }
 279       tokens[i][1].rate = base_bits + (best ? rate1 : rate0);
 280       tokens[i][1].error = d2 + (best ? error1 : error0);
 281       tokens[i][1].next = next;
 282       tokens[i][1].token = best ? t1 : t0;
 283       tokens[i][1].qc = x;
 284       best_index[i][1] = best;
 285       /* Finally, make this the new head of the trellis. */
 286       next = i;
 287     } else {
 288       /* There's no choice to make for a zero coefficient, so we don't
 289        *  add a new trellis node, but we do need to update the costs.
 290        */
 291       band = band_translate[i + 1];
 292       t0 = tokens[next][0].token;
 293       t1 = tokens[next][1].token;
 294       /* Update the cost of each path if we're past the EOB token. */
 295       if (t0 != EOB_TOKEN) {
 296         tokens[next][0].rate +=
 297             mb->token_costs[tx_size][type][ref][band][1][0][t0];
 298         tokens[next][0].token = ZERO_TOKEN;
 299       }
 300       if (t1 != EOB_TOKEN) {
 301         tokens[next][1].rate +=
 302             mb->token_costs[tx_size][type][ref][band][1][0][t1];
 303         tokens[next][1].token = ZERO_TOKEN;
 304       }
 305       best_index[i][0] = best_index[i][1] = 0;
 306       /* Don't update next, because we didn't add a new node. */
 307     }
 308   }
 309
 310   /* Now pick the best path through the whole trellis. */
 311   band = band_translate[i + 1];
 312   rate0 = tokens[next][0].rate;
 313   rate1 = tokens[next][1].rate;
 314   error0 = tokens[next][0].error;
 315   error1 = tokens[next][1].error;
 316   t0 = tokens[next][0].token;
 317   t1 = tokens[next][1].token;
 318   rate0 += mb->token_costs[tx_size][type][ref][band][0][ctx][t0];
 319   rate1 += mb->token_costs[tx_size][type][ref][band][0][ctx][t1];
 320   UPDATE_RD_COST();
 321   best = rd_cost1 < rd_cost0;
 322   final_eob = -1;
 323   memset(qcoeff, 0, sizeof(*qcoeff) * (16 << (tx_size * 2)));
 324   memset(dqcoeff, 0, sizeof(*dqcoeff) * (16 << (tx_size * 2)));
 325   for (i = next; i < eob; i = next) {
 326     const int x = tokens[i][best].qc;
 327     const int rc = scan[i];
 328     if (x) {
 329       final_eob = i;
 330     }
 331
 332     qcoeff[rc] = x;
 333     dqcoeff[rc] = (x * dequant_ptr[rc != 0]) / mul;
 334
 335     next = tokens[i][best].next;
 336     best = best_index[i][best];
 337   }
 338   final_eob++;
 339
 340   mb->plane[plane].eobs[block] = final_eob;
 341   return final_eob;
 342 }
 343
 344 static INLINE void fdct32x32(int rd_transform,
 345                              const int16_t *src, tran_low_t *dst,
 346                              int src_stride) {
 347   if (rd_transform)
 348     vp9_fdct32x32_rd(src, dst, src_stride);
 349   else
 350     vp9_fdct32x32(src, dst, src_stride);
 351 }
 352
 353 #if CONFIG_VP9_HIGHBITDEPTH
 354 static INLINE void highbd_fdct32x32(int rd_transform, const int16_t *src,
 355                                     tran_low_t *dst, int src_stride) {
 356   if (rd_transform)
 357     vp9_highbd_fdct32x32_rd(src, dst, src_stride);
 358   else
 359     vp9_highbd_fdct32x32(src, dst, src_stride);
 360 }
 361 #endif  // CONFIG_VP9_HIGHBITDEPTH
 362
 363 void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block,
 364                         BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
 365   MACROBLOCKD *const xd = &x->e_mbd;
 366   const struct macroblock_plane *const p = &x->plane[plane];
 367   const struct macroblockd_plane *const pd = &xd->plane[plane];
 368   const scan_order *const scan_order = &vp9_default_scan_orders[tx_size];
 369   tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
 370   tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
 371   tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
 372   uint16_t *const eob = &p->eobs[block];
 373   const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
 374   int i, j;
 375   const int16_t *src_diff;
 376   txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
 377   src_diff = &p->src_diff[4 * (j * diff_stride + i)];
 378
 379 #if CONFIG_VP9_HIGHBITDEPTH
 380   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
 381     switch (tx_size) {
 382       case TX_32X32:
 383         highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
 384         vp9_highbd_quantize_fp_32x32(coeff, 1024, x->skip_block, p->zbin,
 385                                      p->round_fp, p->quant_fp, p->quant_shift,
 386                                      qcoeff, dqcoeff, pd->dequant,
 387                                      eob, scan_order->scan,
 388                                      scan_order->iscan);
 389         break;
 390       case TX_16X16:
 391         vp9_highbd_fdct16x16(src_diff, coeff, diff_stride);
 392         vp9_highbd_quantize_fp(coeff, 256, x->skip_block, p->zbin, p->round_fp,
 393                                p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
 394                                pd->dequant, eob,
 395                                scan_order->scan, scan_order->iscan);
 396         break;
 397       case TX_8X8:
 398         vp9_highbd_fdct8x8(src_diff, coeff, diff_stride);
 399         vp9_highbd_quantize_fp(coeff, 64, x->skip_block, p->zbin, p->round_fp,
 400                                p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
 401                                pd->dequant, eob,
 402                                scan_order->scan, scan_order->iscan);
 403         break;
 404       case TX_4X4:
 405         x->fwd_txm4x4(src_diff, coeff, diff_stride);
 406         vp9_highbd_quantize_fp(coeff, 16, x->skip_block, p->zbin, p->round_fp,
 407                                p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
 408                                pd->dequant, eob,
 409                                scan_order->scan, scan_order->iscan);
 410         break;
 411       default:
 412         assert(0);
 413     }
 414     return;
 415   }
 416 #endif  // CONFIG_VP9_HIGHBITDEPTH
 417
 418   switch (tx_size) {
 419     case TX_32X32:
 420       fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
 421       vp9_quantize_fp_32x32(coeff, 1024, x->skip_block, p->zbin, p->round_fp,
 422                             p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
 423                             pd->dequant, eob, scan_order->scan,
 424                             scan_order->iscan);
 425       break;
 426     case TX_16X16:
 427       vp9_fdct16x16(src_diff, coeff, diff_stride);
 428       vp9_quantize_fp(coeff, 256, x->skip_block, p->zbin, p->round_fp,
 429                       p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
 430                       pd->dequant, eob,
 431                       scan_order->scan, scan_order->iscan);
 432       break;
 433     case TX_8X8:
 434       vp9_fdct8x8_quant(src_diff, diff_stride, coeff, 64,
 435                         x->skip_block, p->zbin, p->round_fp,
 436                         p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
 437                         pd->dequant, eob,
 438                         scan_order->scan, scan_order->iscan);
 439       break;
 440     case TX_4X4:
 441       x->fwd_txm4x4(src_diff, coeff, diff_stride);
 442       vp9_quantize_fp(coeff, 16, x->skip_block, p->zbin, p->round_fp,
 443                       p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
 444                       pd->dequant, eob,
 445                       scan_order->scan, scan_order->iscan);
 446       break;
 447     default:
 448       assert(0);
 449       break;
 450   }
 451 }
 452
 453 void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block,
 454                         BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
 455   MACROBLOCKD *const xd = &x->e_mbd;
 456   const struct macroblock_plane *const p = &x->plane[plane];
 457   const struct macroblockd_plane *const pd = &xd->plane[plane];
 458   tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
 459   tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
 460   tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
 461   uint16_t *const eob = &p->eobs[block];
 462   const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
 463   int i, j;
 464   const int16_t *src_diff;
 465
 466   txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
 467   src_diff = &p->src_diff[4 * (j * diff_stride + i)];
 468
 469 #if CONFIG_VP9_HIGHBITDEPTH
 470   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
 471     switch (tx_size) {
 472       case TX_32X32:
 473         vp9_highbd_fdct32x32_1(src_diff, coeff, diff_stride);
 474         vp9_highbd_quantize_dc_32x32(coeff, x->skip_block, p->round,
 475                                      p->quant_fp[0], qcoeff, dqcoeff,
 476                                      pd->dequant[0], eob);
 477         break;
 478       case TX_16X16:
 479         vp9_highbd_fdct16x16_1(src_diff, coeff, diff_stride);
 480         vp9_highbd_quantize_dc(coeff, 256, x->skip_block, p->round,
 481                                p->quant_fp[0], qcoeff, dqcoeff,
 482                                pd->dequant[0], eob);
 483         break;
 484       case TX_8X8:
 485         vp9_highbd_fdct8x8_1(src_diff, coeff, diff_stride);
 486         vp9_highbd_quantize_dc(coeff, 64, x->skip_block, p->round,
 487                                p->quant_fp[0], qcoeff, dqcoeff,
 488                                pd->dequant[0], eob);
 489         break;
 490       case TX_4X4:
 491         x->fwd_txm4x4(src_diff, coeff, diff_stride);
 492         vp9_highbd_quantize_dc(coeff, 16, x->skip_block, p->round,
 493                                p->quant_fp[0], qcoeff, dqcoeff,
 494                                pd->dequant[0], eob);
 495         break;
 496       default:
 497         assert(0);
 498     }
 499     return;
 500   }
 501 #endif  // CONFIG_VP9_HIGHBITDEPTH
 502
 503   switch (tx_size) {
 504     case TX_32X32:
 505       vp9_fdct32x32_1(src_diff, coeff, diff_stride);
 506       vp9_quantize_dc_32x32(coeff, x->skip_block, p->round,
 507                             p->quant_fp[0], qcoeff, dqcoeff,
 508                             pd->dequant[0], eob);
 509       break;
 510     case TX_16X16:
 511       vp9_fdct16x16_1(src_diff, coeff, diff_stride);
 512       vp9_quantize_dc(coeff, 256, x->skip_block, p->round,
 513                      p->quant_fp[0], qcoeff, dqcoeff,
 514                      pd->dequant[0], eob);
 515       break;
 516     case TX_8X8:
 517       vp9_fdct8x8_1(src_diff, coeff, diff_stride);
 518       vp9_quantize_dc(coeff, 64, x->skip_block, p->round,
 519                       p->quant_fp[0], qcoeff, dqcoeff,
 520                       pd->dequant[0], eob);
 521       break;
 522     case TX_4X4:
 523       x->fwd_txm4x4(src_diff, coeff, diff_stride);
 524       vp9_quantize_dc(coeff, 16, x->skip_block, p->round,
 525                       p->quant_fp[0], qcoeff, dqcoeff,
 526                       pd->dequant[0], eob);
 527       break;
 528     default:
 529       assert(0);
 530       break;
 531   }
 532 }
 533
 534 void vp9_xform_quant(MACROBLOCK *x, int plane, int block,
 535                      BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
 536   MACROBLOCKD *const xd = &x->e_mbd;
 537   const struct macroblock_plane *const p = &x->plane[plane];
 538   const struct macroblockd_plane *const pd = &xd->plane[plane];
 539   const scan_order *const scan_order = &vp9_default_scan_orders[tx_size];
 540   tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
 541   tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
 542   tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
 543   uint16_t *const eob = &p->eobs[block];
 544   const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
 545   int i, j;
 546   const int16_t *src_diff;
 547   txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
 548   src_diff = &p->src_diff[4 * (j * diff_stride + i)];
 549
 550 #if CONFIG_VP9_HIGHBITDEPTH
 551   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
 552      switch (tx_size) {
 553       case TX_32X32:
 554         highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
 555         vp9_highbd_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin,
 556                                     p->round, p->quant, p->quant_shift, qcoeff,
 557                                     dqcoeff, pd->dequant, eob,
 558                                     scan_order->scan, scan_order->iscan);
 559         break;
 560       case TX_16X16:
 561         vp9_highbd_fdct16x16(src_diff, coeff, diff_stride);
 562         vp9_highbd_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
 563                               p->quant, p->quant_shift, qcoeff, dqcoeff,
 564                               pd->dequant, eob,
 565                               scan_order->scan, scan_order->iscan);
 566         break;
 567       case TX_8X8:
 568         vp9_highbd_fdct8x8(src_diff, coeff, diff_stride);
 569         vp9_highbd_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round,
 570                               p->quant, p->quant_shift, qcoeff, dqcoeff,
 571                               pd->dequant, eob,
 572                               scan_order->scan, scan_order->iscan);
 573         break;
 574       case TX_4X4:
 575         x->fwd_txm4x4(src_diff, coeff, diff_stride);
 576         vp9_highbd_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round,
 577                               p->quant, p->quant_shift, qcoeff, dqcoeff,
 578                               pd->dequant, eob,
 579                               scan_order->scan, scan_order->iscan);
 580         break;
 581       default:
 582         assert(0);
 583     }
 584     return;
 585   }
 586 #endif  // CONFIG_VP9_HIGHBITDEPTH
 587
 588   switch (tx_size) {
 589     case TX_32X32:
 590       fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
 591       vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round,
 592                            p->quant, p->quant_shift, qcoeff, dqcoeff,
 593                            pd->dequant, eob, scan_order->scan,
 594                            scan_order->iscan);
 595       break;
 596     case TX_16X16:
 597       vp9_fdct16x16(src_diff, coeff, diff_stride);
 598       vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
 599                      p->quant, p->quant_shift, qcoeff, dqcoeff,
 600                      pd->dequant, eob,
 601                      scan_order->scan, scan_order->iscan);
 602       break;
 603     case TX_8X8:
 604       vp9_fdct8x8(src_diff, coeff, diff_stride);
 605       vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round,
 606                      p->quant, p->quant_shift, qcoeff, dqcoeff,
 607                      pd->dequant, eob,
 608                      scan_order->scan, scan_order->iscan);
 609       break;
 610     case TX_4X4:
 611       x->fwd_txm4x4(src_diff, coeff, diff_stride);
 612       vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round,
 613                      p->quant, p->quant_shift, qcoeff, dqcoeff,
 614                      pd->dequant, eob,
 615                      scan_order->scan, scan_order->iscan);
 616       break;
 617     default:
 618       assert(0);
 619       break;
 620   }
 621 }
 622
 623 static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize,
 624                          TX_SIZE tx_size, void *arg) {
 625   struct encode_b_args *const args = arg;
 626   MACROBLOCK *const x = args->x;
 627   MACROBLOCKD *const xd = &x->e_mbd;
 628   struct optimize_ctx *const ctx = args->ctx;
 629   struct macroblock_plane *const p = &x->plane[plane];
 630   struct macroblockd_plane *const pd = &xd->plane[plane];
 631   tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
 632   int i, j;
 633   uint8_t *dst;
 634   ENTROPY_CONTEXT *a, *l;
 635   txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
 636   dst = &pd->dst.buf[4 * j * pd->dst.stride + 4 * i];
 637   a = &ctx->ta[plane][i];
 638   l = &ctx->tl[plane][j];
 639
 640   // TODO(jingning): per transformed block zero forcing only enabled for
 641   // luma component. will integrate chroma components as well.
 642   if (x->zcoeff_blk[tx_size][block] && plane == 0) {
 643     p->eobs[block] = 0;
 644     *a = *l = 0;
 645     return;
 646   }
 647
 648   if (!x->skip_recode) {
 649     if (x->quant_fp) {
 650       // Encoding process for rtc mode
 651       if (x->skip_txfm[0] == 1 && plane == 0) {
 652         // skip forward transform
 653         p->eobs[block] = 0;
 654         *a = *l = 0;
 655         return;
 656       } else {
 657         vp9_xform_quant_fp(x, plane, block, plane_bsize, tx_size);
 658       }
 659     } else {
 660       if (max_txsize_lookup[plane_bsize] == tx_size) {
 661         int txfm_blk_index = (plane << 2) + (block >> (tx_size << 1));
 662         if (x->skip_txfm[txfm_blk_index] == 0) {
 663           // full forward transform and quantization
 664           vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
 665         } else if (x->skip_txfm[txfm_blk_index]== 2) {
 666           // fast path forward transform and quantization
 667           vp9_xform_quant_dc(x, plane, block, plane_bsize, tx_size);
 668         } else {
 669           // skip forward transform
 670           p->eobs[block] = 0;
 671           *a = *l = 0;
 672           return;
 673         }
 674       } else {
 675         vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
 676       }
 677     }
 678   }
 679
 680   if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
 681     const int ctx = combine_entropy_contexts(*a, *l);
 682     *a = *l = optimize_b(x, plane, block, tx_size, ctx) > 0;
 683   } else {
 684     *a = *l = p->eobs[block] > 0;
 685   }
 686
 687   if (p->eobs[block])
 688     *(args->skip) = 0;
 689
 690   if (x->skip_encode || p->eobs[block] == 0)
 691     return;
 692 #if CONFIG_VP9_HIGHBITDEPTH
 693   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
 694     switch (tx_size) {
 695       case TX_32X32:
 696         vp9_highbd_idct32x32_add(dqcoeff, dst, pd->dst.stride,
 697                                  p->eobs[block], xd->bd);
 698         break;
 699       case TX_16X16:
 700         vp9_highbd_idct16x16_add(dqcoeff, dst, pd->dst.stride,
 701                                  p->eobs[block], xd->bd);
 702         break;
 703       case TX_8X8:
 704         vp9_highbd_idct8x8_add(dqcoeff, dst, pd->dst.stride,
 705                                p->eobs[block], xd->bd);
 706         break;
 707       case TX_4X4:
 708         // this is like vp9_short_idct4x4 but has a special case around eob<=1
 709         // which is significant (not just an optimization) for the lossless
 710         // case.
 711         x->highbd_itxm_add(dqcoeff, dst, pd->dst.stride,
 712                            p->eobs[block], xd->bd);
 713         break;
 714       default:
 715         assert(0 && "Invalid transform size");
 716     }
 717     return;
 718   }
 719 #endif  // CONFIG_VP9_HIGHBITDEPTH
 720
 721   switch (tx_size) {
 722     case TX_32X32:
 723       vp9_idct32x32_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
 724       break;
 725     case TX_16X16:
 726       vp9_idct16x16_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
 727       break;
 728     case TX_8X8:
 729       vp9_idct8x8_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
 730       break;
 731     case TX_4X4:
 732       // this is like vp9_short_idct4x4 but has a special case around eob<=1
 733       // which is significant (not just an optimization) for the lossless
 734       // case.
 735       x->itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
 736       break;
 737     default:
 738       assert(0 && "Invalid transform size");
 739       break;
 740   }
 741 }
 742
 743 static void encode_block_pass1(int plane, int block, BLOCK_SIZE plane_bsize,
 744                                TX_SIZE tx_size, void *arg) {
 745   MACROBLOCK *const x = (MACROBLOCK *)arg;
 746   MACROBLOCKD *const xd = &x->e_mbd;
 747   struct macroblock_plane *const p = &x->plane[plane];
 748   struct macroblockd_plane *const pd = &xd->plane[plane];
 749   tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
 750   int i, j;
 751   uint8_t *dst;
 752   txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
 753   dst = &pd->dst.buf[4 * j * pd->dst.stride + 4 * i];
 754
 755   vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
 756
 757   if (p->eobs[block] > 0) {
 758 #if CONFIG_VP9_HIGHBITDEPTH
 759     if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
 760        x->highbd_itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block], xd->bd);
 761        return;
 762     }
 763 #endif  // CONFIG_VP9_HIGHBITDEPTH
 764     x->itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
 765   }
 766 }
 767
 768 void vp9_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize) {
 769   vp9_subtract_plane(x, bsize, 0);
 770   vp9_foreach_transformed_block_in_plane(&x->e_mbd, bsize, 0,
 771                                          encode_block_pass1, x);
 772 }
 773
 774 void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) {
 775   MACROBLOCKD *const xd = &x->e_mbd;
 776   struct optimize_ctx ctx;
 777   MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
 778   struct encode_b_args arg = {x, &ctx, &mbmi->skip};
 779   int plane;
 780
 781   mbmi->skip = 1;
 782
 783   if (x->skip)
 784     return;
 785
 786   for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
 787     if (!x->skip_recode)
 788       vp9_subtract_plane(x, bsize, plane);
 789
 790     if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
 791       const struct macroblockd_plane* const pd = &xd->plane[plane];
 792       const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi, pd) : mbmi->tx_size;
 793       vp9_get_entropy_contexts(bsize, tx_size, pd,
 794                                ctx.ta[plane], ctx.tl[plane]);
 795     }
 796
 797     vp9_foreach_transformed_block_in_plane(xd, bsize, plane, encode_block,
 798                                            &arg);
 799   }
 800 }
 801
 802 void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
 803                                TX_SIZE tx_size, void *arg) {
 804   struct encode_b_args* const args = arg;
 805   MACROBLOCK *const x = args->x;
 806   MACROBLOCKD *const xd = &x->e_mbd;
 807   MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
 808   struct macroblock_plane *const p = &x->plane[plane];
 809   struct macroblockd_plane *const pd = &xd->plane[plane];
 810   tran_low_t *coeff = BLOCK_OFFSET(p->coeff, block);
 811   tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block);
 812   tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
 813   const scan_order *scan_order;
 814   TX_TYPE tx_type;
 815   PREDICTION_MODE mode;
 816   const int bwl = b_width_log2_lookup[plane_bsize];
 817   const int diff_stride = 4 * (1 << bwl);
 818   uint8_t *src, *dst;
 819   int16_t *src_diff;
 820   uint16_t *eob = &p->eobs[block];
 821   const int src_stride = p->src.stride;
 822   const int dst_stride = pd->dst.stride;
 823   int i, j;
 824   txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
 825   dst = &pd->dst.buf[4 * (j * dst_stride + i)];
 826   src = &p->src.buf[4 * (j * src_stride + i)];
 827   src_diff = &p->src_diff[4 * (j * diff_stride + i)];
 828
 829 #if CONFIG_VP9_HIGHBITDEPTH
 830   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
 831     switch (tx_size) {
 832       case TX_32X32:
 833         scan_order = &vp9_default_scan_orders[TX_32X32];
 834         mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
 835         vp9_predict_intra_block(xd, block >> 6, bwl, TX_32X32, mode,
 836                                 x->skip_encode ? src : dst,
 837                                 x->skip_encode ? src_stride : dst_stride,
 838                                 dst, dst_stride, i, j, plane);
 839         if (!x->skip_recode) {
 840           vp9_highbd_subtract_block(32, 32, src_diff, diff_stride,
 841                                     src, src_stride, dst, dst_stride, xd->bd);
 842           highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
 843           vp9_highbd_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin,
 844                                       p->round, p->quant, p->quant_shift,
 845                                       qcoeff, dqcoeff, pd->dequant, eob,
 846                                       scan_order->scan, scan_order->iscan);
 847         }
 848         if (!x->skip_encode && *eob) {
 849           vp9_highbd_idct32x32_add(dqcoeff, dst, dst_stride, *eob, xd->bd);
 850         }
 851         break;
 852       case TX_16X16:
 853         tx_type = get_tx_type(pd->plane_type, xd);
 854         scan_order = &vp9_scan_orders[TX_16X16][tx_type];
 855         mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
 856         vp9_predict_intra_block(xd, block >> 4, bwl, TX_16X16, mode,
 857                                 x->skip_encode ? src : dst,
 858                                 x->skip_encode ? src_stride : dst_stride,
 859                                 dst, dst_stride, i, j, plane);
 860         if (!x->skip_recode) {
 861           vp9_highbd_subtract_block(16, 16, src_diff, diff_stride,
 862                                     src, src_stride, dst, dst_stride, xd->bd);
 863           vp9_highbd_fht16x16(src_diff, coeff, diff_stride, tx_type);
 864           vp9_highbd_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
 865                                 p->quant, p->quant_shift, qcoeff, dqcoeff,
 866                                 pd->dequant, eob,
 867                                 scan_order->scan, scan_order->iscan);
 868         }
 869         if (!x->skip_encode && *eob) {
 870           vp9_highbd_iht16x16_add(tx_type, dqcoeff, dst, dst_stride,
 871                                   *eob, xd->bd);
 872         }
 873         break;
 874       case TX_8X8:
 875         tx_type = get_tx_type(pd->plane_type, xd);
 876         scan_order = &vp9_scan_orders[TX_8X8][tx_type];
 877         mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
 878         vp9_predict_intra_block(xd, block >> 2, bwl, TX_8X8, mode,
 879                                 x->skip_encode ? src : dst,
 880                                 x->skip_encode ? src_stride : dst_stride,
 881                                 dst, dst_stride, i, j, plane);
 882         if (!x->skip_recode) {
 883           vp9_highbd_subtract_block(8, 8, src_diff, diff_stride,
 884                                     src, src_stride, dst, dst_stride, xd->bd);
 885           vp9_highbd_fht8x8(src_diff, coeff, diff_stride, tx_type);
 886           vp9_highbd_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round,
 887                                 p->quant, p->quant_shift, qcoeff, dqcoeff,
 888                                 pd->dequant, eob,
 889                                 scan_order->scan, scan_order->iscan);
 890         }
 891         if (!x->skip_encode && *eob) {
 892           vp9_highbd_iht8x8_add(tx_type, dqcoeff, dst, dst_stride, *eob,
 893                                 xd->bd);
 894         }
 895         break;
 896       case TX_4X4:
 897         tx_type = get_tx_type_4x4(pd->plane_type, xd, block);
 898         scan_order = &vp9_scan_orders[TX_4X4][tx_type];
 899         mode = plane == 0 ? get_y_mode(xd->mi[0], block) : mbmi->uv_mode;
 900         vp9_predict_intra_block(xd, block, bwl, TX_4X4, mode,
 901                                 x->skip_encode ? src : dst,
 902                                 x->skip_encode ? src_stride : dst_stride,
 903                                 dst, dst_stride, i, j, plane);
 904
 905         if (!x->skip_recode) {
 906           vp9_highbd_subtract_block(4, 4, src_diff, diff_stride,
 907                                     src, src_stride, dst, dst_stride, xd->bd);
 908           if (tx_type != DCT_DCT)
 909             vp9_highbd_fht4x4(src_diff, coeff, diff_stride, tx_type);
 910           else
 911             x->fwd_txm4x4(src_diff, coeff, diff_stride);
 912           vp9_highbd_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round,
 913                                 p->quant, p->quant_shift, qcoeff, dqcoeff,
 914                                 pd->dequant, eob,
 915                                 scan_order->scan, scan_order->iscan);
 916         }
 917
 918         if (!x->skip_encode && *eob) {
 919           if (tx_type == DCT_DCT) {
 920             // this is like vp9_short_idct4x4 but has a special case around
 921             // eob<=1 which is significant (not just an optimization) for the
 922             // lossless case.
 923             x->highbd_itxm_add(dqcoeff, dst, dst_stride, *eob, xd->bd);
 924           } else {
 925             vp9_highbd_iht4x4_16_add(dqcoeff, dst, dst_stride, tx_type, xd->bd);
 926           }
 927         }
 928         break;
 929       default:
 930         assert(0);
 931         return;
 932     }
 933     if (*eob)
 934       *(args->skip) = 0;
 935     return;
 936   }
 937 #endif  // CONFIG_VP9_HIGHBITDEPTH
 938
 939   switch (tx_size) {
 940     case TX_32X32:
 941       scan_order = &vp9_default_scan_orders[TX_32X32];
 942       mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
 943       vp9_predict_intra_block(xd, block >> 6, bwl, TX_32X32, mode,
 944                               x->skip_encode ? src : dst,
 945                               x->skip_encode ? src_stride : dst_stride,
 946                               dst, dst_stride, i, j, plane);
 947       if (!x->skip_recode) {
 948         vp9_subtract_block(32, 32, src_diff, diff_stride,
 949                            src, src_stride, dst, dst_stride);
 950         fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
 951         vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round,
 952                              p->quant, p->quant_shift, qcoeff, dqcoeff,
 953                              pd->dequant, eob, scan_order->scan,
 954                              scan_order->iscan);
 955       }
 956       if (!x->skip_encode && *eob)
 957         vp9_idct32x32_add(dqcoeff, dst, dst_stride, *eob);
 958       break;
 959     case TX_16X16:
 960       tx_type = get_tx_type(pd->plane_type, xd);
 961       scan_order = &vp9_scan_orders[TX_16X16][tx_type];
 962       mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
 963       vp9_predict_intra_block(xd, block >> 4, bwl, TX_16X16, mode,
 964                               x->skip_encode ? src : dst,
 965                               x->skip_encode ? src_stride : dst_stride,
 966                               dst, dst_stride, i, j, plane);
 967       if (!x->skip_recode) {
 968         vp9_subtract_block(16, 16, src_diff, diff_stride,
 969                            src, src_stride, dst, dst_stride);
 970         vp9_fht16x16(src_diff, coeff, diff_stride, tx_type);
 971         vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
 972                        p->quant, p->quant_shift, qcoeff, dqcoeff,
 973                        pd->dequant, eob, scan_order->scan,
 974                        scan_order->iscan);
 975       }
 976       if (!x->skip_encode && *eob)
 977         vp9_iht16x16_add(tx_type, dqcoeff, dst, dst_stride, *eob);
 978       break;
 979     case TX_8X8:
 980       tx_type = get_tx_type(pd->plane_type, xd);
 981       scan_order = &vp9_scan_orders[TX_8X8][tx_type];
 982       mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
 983       vp9_predict_intra_block(xd, block >> 2, bwl, TX_8X8, mode,
 984                               x->skip_encode ? src : dst,
 985                               x->skip_encode ? src_stride : dst_stride,
 986                               dst, dst_stride, i, j, plane);
 987       if (!x->skip_recode) {
 988         vp9_subtract_block(8, 8, src_diff, diff_stride,
 989                            src, src_stride, dst, dst_stride);
 990         vp9_fht8x8(src_diff, coeff, diff_stride, tx_type);
 991         vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant,
 992                        p->quant_shift, qcoeff, dqcoeff,
 993                        pd->dequant, eob, scan_order->scan,
 994                        scan_order->iscan);
 995       }
 996       if (!x->skip_encode && *eob)
 997         vp9_iht8x8_add(tx_type, dqcoeff, dst, dst_stride, *eob);
 998       break;
 999     case TX_4X4:
1000       tx_type = get_tx_type_4x4(pd->plane_type, xd, block);
1001       scan_order = &vp9_scan_orders[TX_4X4][tx_type];
1002       mode = plane == 0 ? get_y_mode(xd->mi[0], block) : mbmi->uv_mode;
1003       vp9_predict_intra_block(xd, block, bwl, TX_4X4, mode,
1004                               x->skip_encode ? src : dst,
1005                               x->skip_encode ? src_stride : dst_stride,
1006                               dst, dst_stride, i, j, plane);
1007
1008       if (!x->skip_recode) {
1009         vp9_subtract_block(4, 4, src_diff, diff_stride,
1010                            src, src_stride, dst, dst_stride);
1011         if (tx_type != DCT_DCT)
1012           vp9_fht4x4(src_diff, coeff, diff_stride, tx_type);
1013         else
1014           x->fwd_txm4x4(src_diff, coeff, diff_stride);
1015         vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant,
1016                        p->quant_shift, qcoeff, dqcoeff,
1017                        pd->dequant, eob, scan_order->scan,
1018                        scan_order->iscan);
1019       }
1020
1021       if (!x->skip_encode && *eob) {
1022         if (tx_type == DCT_DCT)
1023           // this is like vp9_short_idct4x4 but has a special case around eob<=1
1024           // which is significant (not just an optimization) for the lossless
1025           // case.
1026           x->itxm_add(dqcoeff, dst, dst_stride, *eob);
1027         else
1028           vp9_iht4x4_16_add(dqcoeff, dst, dst_stride, tx_type);
1029       }
1030       break;
1031     default:
1032       assert(0);
1033       break;
1034   }
1035   if (*eob)
1036     *(args->skip) = 0;
1037 }
1038
1039 void vp9_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
1040   const MACROBLOCKD *const xd = &x->e_mbd;
1041   struct encode_b_args arg = {x, NULL, &xd->mi[0]->mbmi.skip};
1042
1043   vp9_foreach_transformed_block_in_plane(xd, bsize, plane,
1044                                          vp9_encode_block_intra, &arg);
1045 }