2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
12 #include "./vp9_rtcd.h"
13 #include "./vpx_config.h"
15 #include "vpx_mem/vpx_mem.h"
16 #include "vpx_ports/mem.h"
18 #include "vp9/common/vp9_idct.h"
19 #include "vp9/common/vp9_reconinter.h"
20 #include "vp9/common/vp9_reconintra.h"
21 #include "vp9/common/vp9_systemdependent.h"
23 #include "vp9/encoder/vp9_encodemb.h"
24 #include "vp9/encoder/vp9_quantize.h"
25 #include "vp9/encoder/vp9_rd.h"
26 #include "vp9/encoder/vp9_tokenize.h"
29 ENTROPY_CONTEXT ta[MAX_MB_PLANE][16];
30 ENTROPY_CONTEXT tl[MAX_MB_PLANE][16];
33 void vp9_subtract_block_c(int rows, int cols,
34 int16_t *diff, ptrdiff_t diff_stride,
35 const uint8_t *src, ptrdiff_t src_stride,
36 const uint8_t *pred, ptrdiff_t pred_stride) {
39 for (r = 0; r < rows; r++) {
40 for (c = 0; c < cols; c++)
41 diff[c] = src[c] - pred[c];
49 #if CONFIG_VP9_HIGHBITDEPTH
50 void vp9_highbd_subtract_block_c(int rows, int cols,
51 int16_t *diff, ptrdiff_t diff_stride,
52 const uint8_t *src8, ptrdiff_t src_stride,
53 const uint8_t *pred8, ptrdiff_t pred_stride,
56 uint16_t *src = CONVERT_TO_SHORTPTR(src8);
57 uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
60 for (r = 0; r < rows; r++) {
61 for (c = 0; c < cols; c++) {
62 diff[c] = src[c] - pred[c];
70 #endif // CONFIG_VP9_HIGHBITDEPTH
72 void vp9_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
73 struct macroblock_plane *const p = &x->plane[plane];
74 const struct macroblockd_plane *const pd = &x->e_mbd.plane[plane];
75 const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
76 const int bw = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
77 const int bh = 4 * num_4x4_blocks_high_lookup[plane_bsize];
79 #if CONFIG_VP9_HIGHBITDEPTH
80 if (x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
81 vp9_highbd_subtract_block(bh, bw, p->src_diff, bw, p->src.buf,
82 p->src.stride, pd->dst.buf, pd->dst.stride,
86 #endif // CONFIG_VP9_HIGHBITDEPTH
87 vp9_subtract_block(bh, bw, p->src_diff, bw, p->src.buf, p->src.stride,
88 pd->dst.buf, pd->dst.stride);
91 #define RDTRUNC(RM, DM, R, D) ((128 + (R) * (RM)) & 0xFF)
93 typedef struct vp9_token_state {
101 // TODO(jimbankoski): experiment to find optimal RD numbers.
102 static const int plane_rd_mult[PLANE_TYPES] = { 4, 2 };
104 #define UPDATE_RD_COST()\
106 rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0);\
107 rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1);\
108 if (rd_cost0 == rd_cost1) {\
109 rd_cost0 = RDTRUNC(rdmult, rddiv, rate0, error0);\
110 rd_cost1 = RDTRUNC(rdmult, rddiv, rate1, error1);\
114 // This function is a place holder for now but may ultimately need
115 // to scan previous tokens to work out the correct context.
116 static int trellis_get_coeff_context(const int16_t *scan,
119 uint8_t *token_cache) {
120 int bak = token_cache[scan[idx]], pt;
121 token_cache[scan[idx]] = vp9_pt_energy_class[token];
122 pt = get_coef_context(nb, token_cache, idx + 1);
123 token_cache[scan[idx]] = bak;
127 static int optimize_b(MACROBLOCK *mb, int plane, int block,
128 TX_SIZE tx_size, int ctx) {
129 MACROBLOCKD *const xd = &mb->e_mbd;
130 struct macroblock_plane *const p = &mb->plane[plane];
131 struct macroblockd_plane *const pd = &xd->plane[plane];
132 const int ref = is_inter_block(&xd->mi[0]->mbmi);
133 vp9_token_state tokens[1025][2];
134 unsigned best_index[1025][2];
135 uint8_t token_cache[1024];
136 const tran_low_t *const coeff = BLOCK_OFFSET(mb->plane[plane].coeff, block);
137 tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
138 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
139 const int eob = p->eobs[block];
140 const PLANE_TYPE type = pd->plane_type;
141 const int default_eob = 16 << (tx_size << 1);
142 const int mul = 1 + (tx_size == TX_32X32);
143 const int16_t *dequant_ptr = pd->dequant;
144 const uint8_t *const band_translate = get_band_translate(tx_size);
145 const scan_order *const so = get_scan(xd, tx_size, type, block);
146 const int16_t *const scan = so->scan;
147 const int16_t *const nb = so->neighbors;
148 int next = eob, sz = 0;
149 int64_t rdmult = mb->rdmult * plane_rd_mult[type], rddiv = mb->rddiv;
150 int64_t rd_cost0, rd_cost1;
151 int rate0, rate1, error0, error1;
154 int best, band, pt, i, final_eob;
155 #if CONFIG_VP9_HIGHBITDEPTH
156 const int16_t *cat6_high_cost = vp9_get_high_cost_table(xd->bd);
158 const int16_t *cat6_high_cost = vp9_get_high_cost_table(8);
161 assert((!type && !plane) || (type && plane));
162 assert(eob <= default_eob);
164 /* Now set up a Viterbi trellis to evaluate alternative roundings. */
166 rdmult = (rdmult * 9) >> 4;
168 /* Initialize the sentinel node of the trellis. */
169 tokens[eob][0].rate = 0;
170 tokens[eob][0].error = 0;
171 tokens[eob][0].next = default_eob;
172 tokens[eob][0].token = EOB_TOKEN;
173 tokens[eob][0].qc = 0;
174 tokens[eob][1] = tokens[eob][0];
176 for (i = 0; i < eob; i++)
177 token_cache[scan[i]] =
178 vp9_pt_energy_class[vp9_get_token(qcoeff[scan[i]])];
180 for (i = eob; i-- > 0;) {
181 int base_bits, d2, dx;
182 const int rc = scan[i];
184 /* Only add a trellis state for non-zero coefficients. */
187 error0 = tokens[next][0].error;
188 error1 = tokens[next][1].error;
189 /* Evaluate the first possibility for this state. */
190 rate0 = tokens[next][0].rate;
191 rate1 = tokens[next][1].rate;
192 vp9_get_token_extra(x, &t0, &e0);
193 /* Consider both possible successor states. */
194 if (next < default_eob) {
195 band = band_translate[i + 1];
196 pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache);
197 rate0 += mb->token_costs[tx_size][type][ref][band][0][pt]
198 [tokens[next][0].token];
199 rate1 += mb->token_costs[tx_size][type][ref][band][0][pt]
200 [tokens[next][1].token];
203 /* And pick the best. */
204 best = rd_cost1 < rd_cost0;
205 base_bits = vp9_get_cost(t0, e0, cat6_high_cost);
206 dx = mul * (dqcoeff[rc] - coeff[rc]);
207 #if CONFIG_VP9_HIGHBITDEPTH
208 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
211 #endif // CONFIG_VP9_HIGHBITDEPTH
213 tokens[i][0].rate = base_bits + (best ? rate1 : rate0);
214 tokens[i][0].error = d2 + (best ? error1 : error0);
215 tokens[i][0].next = next;
216 tokens[i][0].token = t0;
218 best_index[i][0] = best;
220 /* Evaluate the second possibility for this state. */
221 rate0 = tokens[next][0].rate;
222 rate1 = tokens[next][1].rate;
224 if ((abs(x) * dequant_ptr[rc != 0] > abs(coeff[rc]) * mul) &&
225 (abs(x) * dequant_ptr[rc != 0] < abs(coeff[rc]) * mul +
226 dequant_ptr[rc != 0]))
236 /* Consider both possible successor states. */
238 /* If we reduced this coefficient to zero, check to see if
239 * we need to move the EOB back here.
241 t0 = tokens[next][0].token == EOB_TOKEN ? EOB_TOKEN : ZERO_TOKEN;
242 t1 = tokens[next][1].token == EOB_TOKEN ? EOB_TOKEN : ZERO_TOKEN;
245 vp9_get_token_extra(x, &t0, &e0);
248 if (next < default_eob) {
249 band = band_translate[i + 1];
250 if (t0 != EOB_TOKEN) {
251 pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache);
252 rate0 += mb->token_costs[tx_size][type][ref][band][!x][pt]
253 [tokens[next][0].token];
255 if (t1 != EOB_TOKEN) {
256 pt = trellis_get_coeff_context(scan, nb, i, t1, token_cache);
257 rate1 += mb->token_costs[tx_size][type][ref][band][!x][pt]
258 [tokens[next][1].token];
263 /* And pick the best. */
264 best = rd_cost1 < rd_cost0;
265 base_bits = vp9_get_cost(t0, e0, cat6_high_cost);
268 #if CONFIG_VP9_HIGHBITDEPTH
269 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
270 dx -= ((dequant_ptr[rc != 0] >> (xd->bd - 8)) + sz) ^ sz;
272 dx -= (dequant_ptr[rc != 0] + sz) ^ sz;
275 dx -= (dequant_ptr[rc != 0] + sz) ^ sz;
276 #endif // CONFIG_VP9_HIGHBITDEPTH
279 tokens[i][1].rate = base_bits + (best ? rate1 : rate0);
280 tokens[i][1].error = d2 + (best ? error1 : error0);
281 tokens[i][1].next = next;
282 tokens[i][1].token = best ? t1 : t0;
284 best_index[i][1] = best;
285 /* Finally, make this the new head of the trellis. */
288 /* There's no choice to make for a zero coefficient, so we don't
289 * add a new trellis node, but we do need to update the costs.
291 band = band_translate[i + 1];
292 t0 = tokens[next][0].token;
293 t1 = tokens[next][1].token;
294 /* Update the cost of each path if we're past the EOB token. */
295 if (t0 != EOB_TOKEN) {
296 tokens[next][0].rate +=
297 mb->token_costs[tx_size][type][ref][band][1][0][t0];
298 tokens[next][0].token = ZERO_TOKEN;
300 if (t1 != EOB_TOKEN) {
301 tokens[next][1].rate +=
302 mb->token_costs[tx_size][type][ref][band][1][0][t1];
303 tokens[next][1].token = ZERO_TOKEN;
305 best_index[i][0] = best_index[i][1] = 0;
306 /* Don't update next, because we didn't add a new node. */
310 /* Now pick the best path through the whole trellis. */
311 band = band_translate[i + 1];
312 rate0 = tokens[next][0].rate;
313 rate1 = tokens[next][1].rate;
314 error0 = tokens[next][0].error;
315 error1 = tokens[next][1].error;
316 t0 = tokens[next][0].token;
317 t1 = tokens[next][1].token;
318 rate0 += mb->token_costs[tx_size][type][ref][band][0][ctx][t0];
319 rate1 += mb->token_costs[tx_size][type][ref][band][0][ctx][t1];
321 best = rd_cost1 < rd_cost0;
323 memset(qcoeff, 0, sizeof(*qcoeff) * (16 << (tx_size * 2)));
324 memset(dqcoeff, 0, sizeof(*dqcoeff) * (16 << (tx_size * 2)));
325 for (i = next; i < eob; i = next) {
326 const int x = tokens[i][best].qc;
327 const int rc = scan[i];
333 dqcoeff[rc] = (x * dequant_ptr[rc != 0]) / mul;
335 next = tokens[i][best].next;
336 best = best_index[i][best];
340 mb->plane[plane].eobs[block] = final_eob;
344 static INLINE void fdct32x32(int rd_transform,
345 const int16_t *src, tran_low_t *dst,
348 vp9_fdct32x32_rd(src, dst, src_stride);
350 vp9_fdct32x32(src, dst, src_stride);
353 #if CONFIG_VP9_HIGHBITDEPTH
354 static INLINE void highbd_fdct32x32(int rd_transform, const int16_t *src,
355 tran_low_t *dst, int src_stride) {
357 vp9_highbd_fdct32x32_rd(src, dst, src_stride);
359 vp9_highbd_fdct32x32(src, dst, src_stride);
361 #endif // CONFIG_VP9_HIGHBITDEPTH
363 void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block,
364 BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
365 MACROBLOCKD *const xd = &x->e_mbd;
366 const struct macroblock_plane *const p = &x->plane[plane];
367 const struct macroblockd_plane *const pd = &xd->plane[plane];
368 const scan_order *const scan_order = &vp9_default_scan_orders[tx_size];
369 tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
370 tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
371 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
372 uint16_t *const eob = &p->eobs[block];
373 const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
375 const int16_t *src_diff;
376 txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
377 src_diff = &p->src_diff[4 * (j * diff_stride + i)];
379 #if CONFIG_VP9_HIGHBITDEPTH
380 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
383 highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
384 vp9_highbd_quantize_fp_32x32(coeff, 1024, x->skip_block, p->zbin,
385 p->round_fp, p->quant_fp, p->quant_shift,
386 qcoeff, dqcoeff, pd->dequant,
387 eob, scan_order->scan,
391 vp9_highbd_fdct16x16(src_diff, coeff, diff_stride);
392 vp9_highbd_quantize_fp(coeff, 256, x->skip_block, p->zbin, p->round_fp,
393 p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
395 scan_order->scan, scan_order->iscan);
398 vp9_highbd_fdct8x8(src_diff, coeff, diff_stride);
399 vp9_highbd_quantize_fp(coeff, 64, x->skip_block, p->zbin, p->round_fp,
400 p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
402 scan_order->scan, scan_order->iscan);
405 x->fwd_txm4x4(src_diff, coeff, diff_stride);
406 vp9_highbd_quantize_fp(coeff, 16, x->skip_block, p->zbin, p->round_fp,
407 p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
409 scan_order->scan, scan_order->iscan);
416 #endif // CONFIG_VP9_HIGHBITDEPTH
420 fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
421 vp9_quantize_fp_32x32(coeff, 1024, x->skip_block, p->zbin, p->round_fp,
422 p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
423 pd->dequant, eob, scan_order->scan,
427 vp9_fdct16x16(src_diff, coeff, diff_stride);
428 vp9_quantize_fp(coeff, 256, x->skip_block, p->zbin, p->round_fp,
429 p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
431 scan_order->scan, scan_order->iscan);
434 vp9_fdct8x8_quant(src_diff, diff_stride, coeff, 64,
435 x->skip_block, p->zbin, p->round_fp,
436 p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
438 scan_order->scan, scan_order->iscan);
441 x->fwd_txm4x4(src_diff, coeff, diff_stride);
442 vp9_quantize_fp(coeff, 16, x->skip_block, p->zbin, p->round_fp,
443 p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
445 scan_order->scan, scan_order->iscan);
453 void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block,
454 BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
455 MACROBLOCKD *const xd = &x->e_mbd;
456 const struct macroblock_plane *const p = &x->plane[plane];
457 const struct macroblockd_plane *const pd = &xd->plane[plane];
458 tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
459 tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
460 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
461 uint16_t *const eob = &p->eobs[block];
462 const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
464 const int16_t *src_diff;
466 txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
467 src_diff = &p->src_diff[4 * (j * diff_stride + i)];
469 #if CONFIG_VP9_HIGHBITDEPTH
470 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
473 vp9_highbd_fdct32x32_1(src_diff, coeff, diff_stride);
474 vp9_highbd_quantize_dc_32x32(coeff, x->skip_block, p->round,
475 p->quant_fp[0], qcoeff, dqcoeff,
476 pd->dequant[0], eob);
479 vp9_highbd_fdct16x16_1(src_diff, coeff, diff_stride);
480 vp9_highbd_quantize_dc(coeff, 256, x->skip_block, p->round,
481 p->quant_fp[0], qcoeff, dqcoeff,
482 pd->dequant[0], eob);
485 vp9_highbd_fdct8x8_1(src_diff, coeff, diff_stride);
486 vp9_highbd_quantize_dc(coeff, 64, x->skip_block, p->round,
487 p->quant_fp[0], qcoeff, dqcoeff,
488 pd->dequant[0], eob);
491 x->fwd_txm4x4(src_diff, coeff, diff_stride);
492 vp9_highbd_quantize_dc(coeff, 16, x->skip_block, p->round,
493 p->quant_fp[0], qcoeff, dqcoeff,
494 pd->dequant[0], eob);
501 #endif // CONFIG_VP9_HIGHBITDEPTH
505 vp9_fdct32x32_1(src_diff, coeff, diff_stride);
506 vp9_quantize_dc_32x32(coeff, x->skip_block, p->round,
507 p->quant_fp[0], qcoeff, dqcoeff,
508 pd->dequant[0], eob);
511 vp9_fdct16x16_1(src_diff, coeff, diff_stride);
512 vp9_quantize_dc(coeff, 256, x->skip_block, p->round,
513 p->quant_fp[0], qcoeff, dqcoeff,
514 pd->dequant[0], eob);
517 vp9_fdct8x8_1(src_diff, coeff, diff_stride);
518 vp9_quantize_dc(coeff, 64, x->skip_block, p->round,
519 p->quant_fp[0], qcoeff, dqcoeff,
520 pd->dequant[0], eob);
523 x->fwd_txm4x4(src_diff, coeff, diff_stride);
524 vp9_quantize_dc(coeff, 16, x->skip_block, p->round,
525 p->quant_fp[0], qcoeff, dqcoeff,
526 pd->dequant[0], eob);
534 void vp9_xform_quant(MACROBLOCK *x, int plane, int block,
535 BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
536 MACROBLOCKD *const xd = &x->e_mbd;
537 const struct macroblock_plane *const p = &x->plane[plane];
538 const struct macroblockd_plane *const pd = &xd->plane[plane];
539 const scan_order *const scan_order = &vp9_default_scan_orders[tx_size];
540 tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
541 tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
542 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
543 uint16_t *const eob = &p->eobs[block];
544 const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
546 const int16_t *src_diff;
547 txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
548 src_diff = &p->src_diff[4 * (j * diff_stride + i)];
550 #if CONFIG_VP9_HIGHBITDEPTH
551 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
554 highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
555 vp9_highbd_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin,
556 p->round, p->quant, p->quant_shift, qcoeff,
557 dqcoeff, pd->dequant, eob,
558 scan_order->scan, scan_order->iscan);
561 vp9_highbd_fdct16x16(src_diff, coeff, diff_stride);
562 vp9_highbd_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
563 p->quant, p->quant_shift, qcoeff, dqcoeff,
565 scan_order->scan, scan_order->iscan);
568 vp9_highbd_fdct8x8(src_diff, coeff, diff_stride);
569 vp9_highbd_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round,
570 p->quant, p->quant_shift, qcoeff, dqcoeff,
572 scan_order->scan, scan_order->iscan);
575 x->fwd_txm4x4(src_diff, coeff, diff_stride);
576 vp9_highbd_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round,
577 p->quant, p->quant_shift, qcoeff, dqcoeff,
579 scan_order->scan, scan_order->iscan);
586 #endif // CONFIG_VP9_HIGHBITDEPTH
590 fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
591 vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round,
592 p->quant, p->quant_shift, qcoeff, dqcoeff,
593 pd->dequant, eob, scan_order->scan,
597 vp9_fdct16x16(src_diff, coeff, diff_stride);
598 vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
599 p->quant, p->quant_shift, qcoeff, dqcoeff,
601 scan_order->scan, scan_order->iscan);
604 vp9_fdct8x8(src_diff, coeff, diff_stride);
605 vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round,
606 p->quant, p->quant_shift, qcoeff, dqcoeff,
608 scan_order->scan, scan_order->iscan);
611 x->fwd_txm4x4(src_diff, coeff, diff_stride);
612 vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round,
613 p->quant, p->quant_shift, qcoeff, dqcoeff,
615 scan_order->scan, scan_order->iscan);
623 static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize,
624 TX_SIZE tx_size, void *arg) {
625 struct encode_b_args *const args = arg;
626 MACROBLOCK *const x = args->x;
627 MACROBLOCKD *const xd = &x->e_mbd;
628 struct optimize_ctx *const ctx = args->ctx;
629 struct macroblock_plane *const p = &x->plane[plane];
630 struct macroblockd_plane *const pd = &xd->plane[plane];
631 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
634 ENTROPY_CONTEXT *a, *l;
635 txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
636 dst = &pd->dst.buf[4 * j * pd->dst.stride + 4 * i];
637 a = &ctx->ta[plane][i];
638 l = &ctx->tl[plane][j];
640 // TODO(jingning): per transformed block zero forcing only enabled for
641 // luma component. will integrate chroma components as well.
642 if (x->zcoeff_blk[tx_size][block] && plane == 0) {
648 if (!x->skip_recode) {
650 // Encoding process for rtc mode
651 if (x->skip_txfm[0] == 1 && plane == 0) {
652 // skip forward transform
657 vp9_xform_quant_fp(x, plane, block, plane_bsize, tx_size);
660 if (max_txsize_lookup[plane_bsize] == tx_size) {
661 int txfm_blk_index = (plane << 2) + (block >> (tx_size << 1));
662 if (x->skip_txfm[txfm_blk_index] == 0) {
663 // full forward transform and quantization
664 vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
665 } else if (x->skip_txfm[txfm_blk_index]== 2) {
666 // fast path forward transform and quantization
667 vp9_xform_quant_dc(x, plane, block, plane_bsize, tx_size);
669 // skip forward transform
675 vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
680 if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
681 const int ctx = combine_entropy_contexts(*a, *l);
682 *a = *l = optimize_b(x, plane, block, tx_size, ctx) > 0;
684 *a = *l = p->eobs[block] > 0;
690 if (x->skip_encode || p->eobs[block] == 0)
692 #if CONFIG_VP9_HIGHBITDEPTH
693 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
696 vp9_highbd_idct32x32_add(dqcoeff, dst, pd->dst.stride,
697 p->eobs[block], xd->bd);
700 vp9_highbd_idct16x16_add(dqcoeff, dst, pd->dst.stride,
701 p->eobs[block], xd->bd);
704 vp9_highbd_idct8x8_add(dqcoeff, dst, pd->dst.stride,
705 p->eobs[block], xd->bd);
708 // this is like vp9_short_idct4x4 but has a special case around eob<=1
709 // which is significant (not just an optimization) for the lossless
711 x->highbd_itxm_add(dqcoeff, dst, pd->dst.stride,
712 p->eobs[block], xd->bd);
715 assert(0 && "Invalid transform size");
719 #endif // CONFIG_VP9_HIGHBITDEPTH
723 vp9_idct32x32_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
726 vp9_idct16x16_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
729 vp9_idct8x8_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
732 // this is like vp9_short_idct4x4 but has a special case around eob<=1
733 // which is significant (not just an optimization) for the lossless
735 x->itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
738 assert(0 && "Invalid transform size");
743 static void encode_block_pass1(int plane, int block, BLOCK_SIZE plane_bsize,
744 TX_SIZE tx_size, void *arg) {
745 MACROBLOCK *const x = (MACROBLOCK *)arg;
746 MACROBLOCKD *const xd = &x->e_mbd;
747 struct macroblock_plane *const p = &x->plane[plane];
748 struct macroblockd_plane *const pd = &xd->plane[plane];
749 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
752 txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
753 dst = &pd->dst.buf[4 * j * pd->dst.stride + 4 * i];
755 vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
757 if (p->eobs[block] > 0) {
758 #if CONFIG_VP9_HIGHBITDEPTH
759 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
760 x->highbd_itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block], xd->bd);
763 #endif // CONFIG_VP9_HIGHBITDEPTH
764 x->itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
768 void vp9_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize) {
769 vp9_subtract_plane(x, bsize, 0);
770 vp9_foreach_transformed_block_in_plane(&x->e_mbd, bsize, 0,
771 encode_block_pass1, x);
774 void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) {
775 MACROBLOCKD *const xd = &x->e_mbd;
776 struct optimize_ctx ctx;
777 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
778 struct encode_b_args arg = {x, &ctx, &mbmi->skip};
786 for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
788 vp9_subtract_plane(x, bsize, plane);
790 if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
791 const struct macroblockd_plane* const pd = &xd->plane[plane];
792 const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi, pd) : mbmi->tx_size;
793 vp9_get_entropy_contexts(bsize, tx_size, pd,
794 ctx.ta[plane], ctx.tl[plane]);
797 vp9_foreach_transformed_block_in_plane(xd, bsize, plane, encode_block,
802 void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
803 TX_SIZE tx_size, void *arg) {
804 struct encode_b_args* const args = arg;
805 MACROBLOCK *const x = args->x;
806 MACROBLOCKD *const xd = &x->e_mbd;
807 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
808 struct macroblock_plane *const p = &x->plane[plane];
809 struct macroblockd_plane *const pd = &xd->plane[plane];
810 tran_low_t *coeff = BLOCK_OFFSET(p->coeff, block);
811 tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block);
812 tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
813 const scan_order *scan_order;
815 PREDICTION_MODE mode;
816 const int bwl = b_width_log2_lookup[plane_bsize];
817 const int diff_stride = 4 * (1 << bwl);
820 uint16_t *eob = &p->eobs[block];
821 const int src_stride = p->src.stride;
822 const int dst_stride = pd->dst.stride;
824 txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
825 dst = &pd->dst.buf[4 * (j * dst_stride + i)];
826 src = &p->src.buf[4 * (j * src_stride + i)];
827 src_diff = &p->src_diff[4 * (j * diff_stride + i)];
829 #if CONFIG_VP9_HIGHBITDEPTH
830 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
833 scan_order = &vp9_default_scan_orders[TX_32X32];
834 mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
835 vp9_predict_intra_block(xd, block >> 6, bwl, TX_32X32, mode,
836 x->skip_encode ? src : dst,
837 x->skip_encode ? src_stride : dst_stride,
838 dst, dst_stride, i, j, plane);
839 if (!x->skip_recode) {
840 vp9_highbd_subtract_block(32, 32, src_diff, diff_stride,
841 src, src_stride, dst, dst_stride, xd->bd);
842 highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
843 vp9_highbd_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin,
844 p->round, p->quant, p->quant_shift,
845 qcoeff, dqcoeff, pd->dequant, eob,
846 scan_order->scan, scan_order->iscan);
848 if (!x->skip_encode && *eob) {
849 vp9_highbd_idct32x32_add(dqcoeff, dst, dst_stride, *eob, xd->bd);
853 tx_type = get_tx_type(pd->plane_type, xd);
854 scan_order = &vp9_scan_orders[TX_16X16][tx_type];
855 mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
856 vp9_predict_intra_block(xd, block >> 4, bwl, TX_16X16, mode,
857 x->skip_encode ? src : dst,
858 x->skip_encode ? src_stride : dst_stride,
859 dst, dst_stride, i, j, plane);
860 if (!x->skip_recode) {
861 vp9_highbd_subtract_block(16, 16, src_diff, diff_stride,
862 src, src_stride, dst, dst_stride, xd->bd);
863 vp9_highbd_fht16x16(src_diff, coeff, diff_stride, tx_type);
864 vp9_highbd_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
865 p->quant, p->quant_shift, qcoeff, dqcoeff,
867 scan_order->scan, scan_order->iscan);
869 if (!x->skip_encode && *eob) {
870 vp9_highbd_iht16x16_add(tx_type, dqcoeff, dst, dst_stride,
875 tx_type = get_tx_type(pd->plane_type, xd);
876 scan_order = &vp9_scan_orders[TX_8X8][tx_type];
877 mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
878 vp9_predict_intra_block(xd, block >> 2, bwl, TX_8X8, mode,
879 x->skip_encode ? src : dst,
880 x->skip_encode ? src_stride : dst_stride,
881 dst, dst_stride, i, j, plane);
882 if (!x->skip_recode) {
883 vp9_highbd_subtract_block(8, 8, src_diff, diff_stride,
884 src, src_stride, dst, dst_stride, xd->bd);
885 vp9_highbd_fht8x8(src_diff, coeff, diff_stride, tx_type);
886 vp9_highbd_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round,
887 p->quant, p->quant_shift, qcoeff, dqcoeff,
889 scan_order->scan, scan_order->iscan);
891 if (!x->skip_encode && *eob) {
892 vp9_highbd_iht8x8_add(tx_type, dqcoeff, dst, dst_stride, *eob,
897 tx_type = get_tx_type_4x4(pd->plane_type, xd, block);
898 scan_order = &vp9_scan_orders[TX_4X4][tx_type];
899 mode = plane == 0 ? get_y_mode(xd->mi[0], block) : mbmi->uv_mode;
900 vp9_predict_intra_block(xd, block, bwl, TX_4X4, mode,
901 x->skip_encode ? src : dst,
902 x->skip_encode ? src_stride : dst_stride,
903 dst, dst_stride, i, j, plane);
905 if (!x->skip_recode) {
906 vp9_highbd_subtract_block(4, 4, src_diff, diff_stride,
907 src, src_stride, dst, dst_stride, xd->bd);
908 if (tx_type != DCT_DCT)
909 vp9_highbd_fht4x4(src_diff, coeff, diff_stride, tx_type);
911 x->fwd_txm4x4(src_diff, coeff, diff_stride);
912 vp9_highbd_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round,
913 p->quant, p->quant_shift, qcoeff, dqcoeff,
915 scan_order->scan, scan_order->iscan);
918 if (!x->skip_encode && *eob) {
919 if (tx_type == DCT_DCT) {
920 // this is like vp9_short_idct4x4 but has a special case around
921 // eob<=1 which is significant (not just an optimization) for the
923 x->highbd_itxm_add(dqcoeff, dst, dst_stride, *eob, xd->bd);
925 vp9_highbd_iht4x4_16_add(dqcoeff, dst, dst_stride, tx_type, xd->bd);
937 #endif // CONFIG_VP9_HIGHBITDEPTH
941 scan_order = &vp9_default_scan_orders[TX_32X32];
942 mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
943 vp9_predict_intra_block(xd, block >> 6, bwl, TX_32X32, mode,
944 x->skip_encode ? src : dst,
945 x->skip_encode ? src_stride : dst_stride,
946 dst, dst_stride, i, j, plane);
947 if (!x->skip_recode) {
948 vp9_subtract_block(32, 32, src_diff, diff_stride,
949 src, src_stride, dst, dst_stride);
950 fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
951 vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round,
952 p->quant, p->quant_shift, qcoeff, dqcoeff,
953 pd->dequant, eob, scan_order->scan,
956 if (!x->skip_encode && *eob)
957 vp9_idct32x32_add(dqcoeff, dst, dst_stride, *eob);
960 tx_type = get_tx_type(pd->plane_type, xd);
961 scan_order = &vp9_scan_orders[TX_16X16][tx_type];
962 mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
963 vp9_predict_intra_block(xd, block >> 4, bwl, TX_16X16, mode,
964 x->skip_encode ? src : dst,
965 x->skip_encode ? src_stride : dst_stride,
966 dst, dst_stride, i, j, plane);
967 if (!x->skip_recode) {
968 vp9_subtract_block(16, 16, src_diff, diff_stride,
969 src, src_stride, dst, dst_stride);
970 vp9_fht16x16(src_diff, coeff, diff_stride, tx_type);
971 vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
972 p->quant, p->quant_shift, qcoeff, dqcoeff,
973 pd->dequant, eob, scan_order->scan,
976 if (!x->skip_encode && *eob)
977 vp9_iht16x16_add(tx_type, dqcoeff, dst, dst_stride, *eob);
980 tx_type = get_tx_type(pd->plane_type, xd);
981 scan_order = &vp9_scan_orders[TX_8X8][tx_type];
982 mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
983 vp9_predict_intra_block(xd, block >> 2, bwl, TX_8X8, mode,
984 x->skip_encode ? src : dst,
985 x->skip_encode ? src_stride : dst_stride,
986 dst, dst_stride, i, j, plane);
987 if (!x->skip_recode) {
988 vp9_subtract_block(8, 8, src_diff, diff_stride,
989 src, src_stride, dst, dst_stride);
990 vp9_fht8x8(src_diff, coeff, diff_stride, tx_type);
991 vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant,
992 p->quant_shift, qcoeff, dqcoeff,
993 pd->dequant, eob, scan_order->scan,
996 if (!x->skip_encode && *eob)
997 vp9_iht8x8_add(tx_type, dqcoeff, dst, dst_stride, *eob);
1000 tx_type = get_tx_type_4x4(pd->plane_type, xd, block);
1001 scan_order = &vp9_scan_orders[TX_4X4][tx_type];
1002 mode = plane == 0 ? get_y_mode(xd->mi[0], block) : mbmi->uv_mode;
1003 vp9_predict_intra_block(xd, block, bwl, TX_4X4, mode,
1004 x->skip_encode ? src : dst,
1005 x->skip_encode ? src_stride : dst_stride,
1006 dst, dst_stride, i, j, plane);
1008 if (!x->skip_recode) {
1009 vp9_subtract_block(4, 4, src_diff, diff_stride,
1010 src, src_stride, dst, dst_stride);
1011 if (tx_type != DCT_DCT)
1012 vp9_fht4x4(src_diff, coeff, diff_stride, tx_type);
1014 x->fwd_txm4x4(src_diff, coeff, diff_stride);
1015 vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant,
1016 p->quant_shift, qcoeff, dqcoeff,
1017 pd->dequant, eob, scan_order->scan,
1021 if (!x->skip_encode && *eob) {
1022 if (tx_type == DCT_DCT)
1023 // this is like vp9_short_idct4x4 but has a special case around eob<=1
1024 // which is significant (not just an optimization) for the lossless
1026 x->itxm_add(dqcoeff, dst, dst_stride, *eob);
1028 vp9_iht4x4_16_add(dqcoeff, dst, dst_stride, tx_type);
1039 void vp9_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
1040 const MACROBLOCKD *const xd = &x->e_mbd;
1041 struct encode_b_args arg = {x, NULL, &xd->mi[0]->mbmi.skip};
1043 vp9_foreach_transformed_block_in_plane(xd, bsize, plane,
1044 vp9_encode_block_intra, &arg);