2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
11 #include "./vpx_config.h"
12 #include "./vp9_rtcd.h"
13 #include "vp9/encoder/vp9_encodeframe.h"
14 #include "vp9/encoder/vp9_encodemb.h"
15 #include "vp9/encoder/vp9_encodemv.h"
16 #include "vp9/common/vp9_common.h"
17 #include "vp9/encoder/vp9_onyx_int.h"
18 #include "vp9/common/vp9_extend.h"
19 #include "vp9/common/vp9_entropy.h"
20 #include "vp9/common/vp9_entropymode.h"
21 #include "vp9/common/vp9_quant_common.h"
22 #include "vp9/encoder/vp9_segmentation.h"
23 #include "vp9/encoder/vp9_encodeintra.h"
24 #include "vp9/common/vp9_reconinter.h"
25 #include "vp9/encoder/vp9_rdopt.h"
26 #include "vp9/common/vp9_findnearmv.h"
27 #include "vp9/common/vp9_reconintra.h"
28 #include "vp9/common/vp9_seg_common.h"
29 #include "vp9/common/vp9_tile_common.h"
30 #include "vp9/encoder/vp9_tokenize.h"
31 #include "./vp9_rtcd.h"
35 #include "vpx_ports/vpx_timer.h"
36 #include "vp9/common/vp9_pred_common.h"
37 #include "vp9/common/vp9_mvref_common.h"
39 #define DBG_PRNT_SEGMAP 0
46 void vp9_select_interp_filter_type(VP9_COMP *cpi);
48 static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled,
49 int mi_row, int mi_col, BLOCK_SIZE_TYPE bsize);
51 static void adjust_act_zbin(VP9_COMP *cpi, MACROBLOCK *x);
53 /* activity_avg must be positive, or flat regions could get a zero weight
54 * (infinite lambda), which confounds analysis.
55 * This also avoids the need for divide by zero checks in
56 * vp9_activity_masking().
58 #define VP9_ACTIVITY_AVG_MIN (64)
60 /* This is used as a reference when computing the source variance for the
61 * purposes of activity masking.
62 * Eventually this should be replaced by custom no-reference routines,
63 * which will be faster.
65 static const uint8_t VP9_VAR_OFFS[16] = {128, 128, 128, 128, 128, 128, 128, 128,
66 128, 128, 128, 128, 128, 128, 128, 128};
68 // Original activity measure from Tim T's code.
69 static unsigned int tt_activity_measure(VP9_COMP *cpi, MACROBLOCK *x) {
72 /* TODO: This could also be done over smaller areas (8x8), but that would
73 * require extensive changes elsewhere, as lambda is assumed to be fixed
74 * over an entire MB in most of the code.
75 * Another option is to compute four 8x8 variances, and pick a single
76 * lambda using a non-linear combination (e.g., the smallest, or second
79 act = vp9_variance16x16(x->plane[0].src.buf, x->plane[0].src.stride,
80 VP9_VAR_OFFS, 0, &sse);
83 /* If the region is flat, lower the activity some more. */
85 act = act < 5 << 12 ? act : 5 << 12;
90 // Stub for alternative experimental activity measures.
91 static unsigned int alt_activity_measure(VP9_COMP *cpi, MACROBLOCK *x,
93 return vp9_encode_intra(cpi, x, use_dc_pred);
95 DECLARE_ALIGNED(16, static const uint8_t, vp9_64x64_zeros[64*64]) = {0};
97 // Measure the activity of the current macroblock
98 // What we measure here is TBD so abstracted to this function
99 #define ALT_ACT_MEASURE 1
100 static unsigned int mb_activity_measure(VP9_COMP *cpi, MACROBLOCK *x,
101 int mb_row, int mb_col) {
102 unsigned int mb_activity;
104 if (ALT_ACT_MEASURE) {
105 int use_dc_pred = (mb_col || mb_row) && (!mb_col || !mb_row);
107 // Or use and alternative.
108 mb_activity = alt_activity_measure(cpi, x, use_dc_pred);
110 // Original activity measure from Tim T's code.
111 mb_activity = tt_activity_measure(cpi, x);
114 if (mb_activity < VP9_ACTIVITY_AVG_MIN)
115 mb_activity = VP9_ACTIVITY_AVG_MIN;
120 // Calculate an "average" mb activity value for the frame
122 static void calc_av_activity(VP9_COMP *cpi, int64_t activity_sum) {
124 // Find median: Simple n^2 algorithm for experimentation
128 unsigned int *sortlist;
131 // Create a list to sort to
132 CHECK_MEM_ERROR(&cpi->common, sortlist, vpx_calloc(sizeof(unsigned int),
135 // Copy map to sort list
136 vpx_memcpy(sortlist, cpi->mb_activity_map,
137 sizeof(unsigned int) * cpi->common.MBs);
139 // Ripple each value down to its correct position
140 for (i = 1; i < cpi->common.MBs; i ++) {
141 for (j = i; j > 0; j --) {
142 if (sortlist[j] < sortlist[j - 1]) {
144 tmp = sortlist[j - 1];
145 sortlist[j - 1] = sortlist[j];
152 // Even number MBs so estimate median as mean of two either side.
153 median = (1 + sortlist[cpi->common.MBs >> 1] +
154 sortlist[(cpi->common.MBs >> 1) + 1]) >> 1;
156 cpi->activity_avg = median;
161 // Simple mean for now
162 cpi->activity_avg = (unsigned int) (activity_sum / cpi->common.MBs);
165 if (cpi->activity_avg < VP9_ACTIVITY_AVG_MIN)
166 cpi->activity_avg = VP9_ACTIVITY_AVG_MIN;
168 // Experimental code: return fixed value normalized for several clips
170 cpi->activity_avg = 100000;
173 #define USE_ACT_INDEX 0
174 #define OUTPUT_NORM_ACT_STATS 0
177 // Calculate an activity index for each mb
178 static void calc_activity_index(VP9_COMP *cpi, MACROBLOCK *x) {
179 VP9_COMMON *const cm = &cpi->common;
186 #if OUTPUT_NORM_ACT_STATS
187 FILE *f = fopen("norm_act.stt", "a");
188 fprintf(f, "\n%12d\n", cpi->activity_avg);
191 // Reset pointers to start of activity map
192 x->mb_activity_ptr = cpi->mb_activity_map;
194 // Calculate normalized mb activity number.
195 for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) {
196 // for each macroblock col in image
197 for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) {
198 // Read activity from the map
199 act = *(x->mb_activity_ptr);
201 // Calculate a normalized activity number
202 a = act + 4 * cpi->activity_avg;
203 b = 4 * act + cpi->activity_avg;
206 *(x->activity_ptr) = (int)((b + (a >> 1)) / a) - 1;
208 *(x->activity_ptr) = 1 - (int)((a + (b >> 1)) / b);
210 #if OUTPUT_NORM_ACT_STATS
211 fprintf(f, " %6d", *(x->mb_activity_ptr));
213 // Increment activity map pointers
214 x->mb_activity_ptr++;
217 #if OUTPUT_NORM_ACT_STATS
223 #if OUTPUT_NORM_ACT_STATS
230 // Loop through all MBs. Note activity of each, average activity and
231 // calculate a normalized activity for each
232 static void build_activity_map(VP9_COMP *cpi) {
233 MACROBLOCK * const x = &cpi->mb;
234 MACROBLOCKD *xd = &x->e_mbd;
235 VP9_COMMON * const cm = &cpi->common;
238 YV12_BUFFER_CONFIG *new_yv12 = &cm->yv12_fb[cm->new_fb_idx];
240 int recon_y_stride = new_yv12->y_stride;
244 unsigned int mb_activity;
245 int64_t activity_sum = 0;
247 x->mb_activity_ptr = cpi->mb_activity_map;
249 // for each macroblock row in image
250 for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) {
252 // reset above block coeffs
253 xd->up_available = (mb_row != 0);
254 recon_yoffset = (mb_row * recon_y_stride * 16);
256 // for each macroblock col in image
257 for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) {
259 xd->plane[0].dst.buf = new_yv12->y_buffer + recon_yoffset;
260 xd->left_available = (mb_col != 0);
265 mb_activity = mb_activity_measure(cpi, x, mb_row, mb_col);
268 activity_sum += mb_activity;
270 // Store MB level activity details.
271 *x->mb_activity_ptr = mb_activity;
273 // Increment activity map pointer
274 x->mb_activity_ptr++;
276 // adjust to the next column of source macroblocks
277 x->plane[0].src.buf += 16;
280 // adjust to the next row of mbs
281 x->plane[0].src.buf += 16 * x->plane[0].src.stride - 16 * cm->mb_cols;
284 // Calculate an "average" MB activity
285 calc_av_activity(cpi, activity_sum);
288 // Calculate an activity index number of each mb
289 calc_activity_index(cpi, x);
294 // Macroblock activity masking
295 void vp9_activity_masking(VP9_COMP *cpi, MACROBLOCK *x) {
297 x->rdmult += *(x->mb_activity_ptr) * (x->rdmult >> 2);
298 x->errorperbit = x->rdmult * 100 / (110 * x->rddiv);
299 x->errorperbit += (x->errorperbit == 0);
303 int64_t act = *(x->mb_activity_ptr);
305 // Apply the masking to the RD multiplier.
306 a = act + (2 * cpi->activity_avg);
307 b = (2 * act) + cpi->activity_avg;
309 x->rdmult = (unsigned int) (((int64_t) x->rdmult * b + (a >> 1)) / a);
310 x->errorperbit = x->rdmult * 100 / (110 * x->rddiv);
311 x->errorperbit += (x->errorperbit == 0);
314 // Activity based Zbin adjustment
315 adjust_act_zbin(cpi, x);
318 static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
319 BLOCK_SIZE_TYPE bsize, int output_enabled) {
321 MACROBLOCK * const x = &cpi->mb;
322 MACROBLOCKD * const xd = &x->e_mbd;
323 MODE_INFO *mi = &ctx->mic;
324 MB_MODE_INFO * const mbmi = &xd->mode_info_context->mbmi;
326 int mb_mode_index = ctx->best_mode_index;
327 const int mis = cpi->common.mode_info_stride;
328 const int bh = 1 << mi_height_log2(bsize), bw = 1 << mi_width_log2(bsize);
329 const MB_PREDICTION_MODE mb_mode = mi->mbmi.mode;
331 assert(mb_mode < MB_MODE_COUNT);
332 assert(mb_mode_index < MAX_MODES);
333 assert(mi->mbmi.ref_frame[0] < MAX_REF_FRAMES);
334 assert(mi->mbmi.ref_frame[1] < MAX_REF_FRAMES);
335 assert(mi->mbmi.sb_type == bsize);
337 // Restore the coding context of the MB to that that was in place
338 // when the mode was picked for it
339 for (y = 0; y < bh; y++) {
340 for (x_idx = 0; x_idx < bw; x_idx++) {
341 if ((xd->mb_to_right_edge >> (3 + LOG2_MI_SIZE)) + bw > x_idx
342 && (xd->mb_to_bottom_edge >> (3 + LOG2_MI_SIZE)) + bh > y) {
343 MODE_INFO *mi_addr = xd->mode_info_context + x_idx + y * mis;
348 if (bsize < BLOCK_SIZE_SB32X32) {
349 if (bsize < BLOCK_SIZE_MB16X16)
350 ctx->txfm_rd_diff[ALLOW_16X16] = ctx->txfm_rd_diff[ALLOW_8X8];
351 ctx->txfm_rd_diff[ALLOW_32X32] = ctx->txfm_rd_diff[ALLOW_16X16];
354 if (mbmi->ref_frame[0] != INTRA_FRAME && mbmi->sb_type < BLOCK_SIZE_SB8X8) {
355 *x->partition_info = ctx->partition_info;
356 mbmi->mv[0].as_int = x->partition_info->bmi[3].mv.as_int;
357 mbmi->mv[1].as_int = x->partition_info->bmi[3].second_mv.as_int;
364 if (!vp9_segfeature_active(xd, mbmi->segment_id, SEG_LVL_SKIP)) {
365 for (i = 0; i < NB_TXFM_MODES; i++) {
366 cpi->rd_tx_select_diff[i] += ctx->txfm_rd_diff[i];
370 if (cpi->common.frame_type == KEY_FRAME) {
371 // Restore the coding modes to that held in the coding context
372 // if (mb_mode == I4X4_PRED)
373 // for (i = 0; i < 16; i++)
375 // xd->block[i].bmi.as_mode =
376 // xd->mode_info_context->bmi[i].as_mode;
377 // assert(xd->mode_info_context->bmi[i].as_mode < MB_MODE_COUNT);
379 #if CONFIG_INTERNAL_STATS
380 static const int kf_mode_index[] = {
382 THR_V_PRED /*V_PRED*/,
383 THR_H_PRED /*H_PRED*/,
384 THR_D45_PRED /*D45_PRED*/,
385 THR_D135_PRED /*D135_PRED*/,
386 THR_D117_PRED /*D117_PRED*/,
387 THR_D153_PRED /*D153_PRED*/,
388 THR_D27_PRED /*D27_PRED*/,
389 THR_D63_PRED /*D63_PRED*/,
391 THR_B_PRED /*I4X4_PRED*/,
393 cpi->mode_chosen_counts[kf_mode_index[mb_mode]]++;
396 // Note how often each mode chosen as best
397 cpi->mode_chosen_counts[mb_mode_index]++;
398 if (mbmi->ref_frame[0] != INTRA_FRAME
399 && (mbmi->sb_type < BLOCK_SIZE_SB8X8 || mbmi->mode == NEWMV)) {
400 int_mv best_mv, best_second_mv;
401 const MV_REFERENCE_FRAME rf1 = mbmi->ref_frame[0];
402 const MV_REFERENCE_FRAME rf2 = mbmi->ref_frame[1];
403 best_mv.as_int = ctx->best_ref_mv.as_int;
404 best_second_mv.as_int = ctx->second_best_ref_mv.as_int;
405 if (mbmi->mode == NEWMV) {
406 best_mv.as_int = mbmi->ref_mvs[rf1][0].as_int;
407 best_second_mv.as_int = mbmi->ref_mvs[rf2][0].as_int;
409 mbmi->best_mv.as_int = best_mv.as_int;
410 mbmi->best_second_mv.as_int = best_second_mv.as_int;
411 vp9_update_nmv_count(cpi, x, &best_mv, &best_second_mv);
414 if (bsize > BLOCK_SIZE_SB8X8 && mbmi->mode == NEWMV) {
416 for (j = 0; j < bh; ++j)
417 for (i = 0; i < bw; ++i)
418 if ((xd->mb_to_right_edge >> (3 + LOG2_MI_SIZE)) + bw > i
419 && (xd->mb_to_bottom_edge >> (3 + LOG2_MI_SIZE)) + bh > j)
420 xd->mode_info_context[mis * j + i].mbmi = *mbmi;
423 if (cpi->common.mcomp_filter_type == SWITCHABLE
424 && is_inter_mode(mbmi->mode)) {
425 ++cpi->common.fc.switchable_interp_count[vp9_get_pred_context(
426 &cpi->common, xd, PRED_SWITCHABLE_INTERP)][vp9_switchable_interp_map[mbmi
430 cpi->rd_comp_pred_diff[SINGLE_PREDICTION_ONLY] += ctx->single_pred_diff;
431 cpi->rd_comp_pred_diff[COMP_PREDICTION_ONLY] += ctx->comp_pred_diff;
432 cpi->rd_comp_pred_diff[HYBRID_PREDICTION] += ctx->hybrid_pred_diff;
436 void vp9_setup_src_planes(MACROBLOCK *x, const YV12_BUFFER_CONFIG *src,
437 int mb_row, int mb_col) {
438 uint8_t *buffers[4] = {src->y_buffer, src->u_buffer, src->v_buffer, src
440 int strides[4] = {src->y_stride, src->uv_stride, src->uv_stride, src
444 for (i = 0; i < MAX_MB_PLANE; i++) {
445 setup_pred_plane(&x->plane[i].src, buffers[i], strides[i], mb_row, mb_col,
446 NULL, x->e_mbd.plane[i].subsampling_x,
447 x->e_mbd.plane[i].subsampling_y);
451 static void set_offsets(VP9_COMP *cpi, int mi_row, int mi_col,
452 BLOCK_SIZE_TYPE bsize) {
453 MACROBLOCK * const x = &cpi->mb;
454 VP9_COMMON * const cm = &cpi->common;
455 MACROBLOCKD * const xd = &x->e_mbd;
457 const int dst_fb_idx = cm->new_fb_idx;
458 const int idx_str = xd->mode_info_stride * mi_row + mi_col;
459 const int bw = 1 << mi_width_log2(bsize), bh = 1 << mi_height_log2(bsize);
460 const int mb_row = mi_row >> 1;
461 const int mb_col = mi_col >> 1;
462 const int idx_map = mb_row * cm->mb_cols + mb_col;
465 // entropy context structures
466 for (i = 0; i < MAX_MB_PLANE; i++) {
467 xd->plane[i].above_context = cm->above_context[i]
468 + (mi_col * 2 >> xd->plane[i].subsampling_x);
469 xd->plane[i].left_context = cm->left_context[i]
470 + (((mi_row * 2) & 15) >> xd->plane[i].subsampling_y);
473 // partition contexts
474 set_partition_seg_context(cm, xd, mi_row, mi_col);
476 // Activity map pointer
477 x->mb_activity_ptr = &cpi->mb_activity_map[idx_map];
478 x->active_ptr = cpi->active_map + idx_map;
480 /* pointers to mode info contexts */
481 x->partition_info = x->pi + idx_str;
482 xd->mode_info_context = cm->mi + idx_str;
483 mbmi = &xd->mode_info_context->mbmi;
484 // Special case: if prev_mi is NULL, the previous mode info context
486 xd->prev_mode_info_context = cm->prev_mi ? cm->prev_mi + idx_str : NULL;
488 // Set up destination pointers
489 setup_dst_planes(xd, &cm->yv12_fb[dst_fb_idx], mi_row, mi_col);
491 /* Set up limit values for MV components to prevent them from
492 * extending beyond the UMV borders assuming 16x16 block size */
493 x->mv_row_min = -((mi_row * MI_SIZE)+ VP9BORDERINPIXELS - VP9_INTERP_EXTEND);
494 x->mv_col_min = -((mi_col * MI_SIZE)+ VP9BORDERINPIXELS - VP9_INTERP_EXTEND);
495 x->mv_row_max = ((cm->mi_rows - mi_row) * MI_SIZE
496 + (VP9BORDERINPIXELS - MI_SIZE * bh - VP9_INTERP_EXTEND));
497 x->mv_col_max = ((cm->mi_cols - mi_col) * MI_SIZE
498 + (VP9BORDERINPIXELS - MI_SIZE * bw - VP9_INTERP_EXTEND));
500 // Set up distance of MB to edge of frame in 1/8th pel units
501 assert(!(mi_col & (bw - 1)) && !(mi_row & (bh - 1)));
502 set_mi_row_col(cm, xd, mi_row, bh, mi_col, bw);
504 /* set up source buffers */
505 vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col);
508 x->rddiv = cpi->RDDIV;
509 x->rdmult = cpi->RDMULT;
512 if (xd->segmentation_enabled) {
513 uint8_t *map = xd->update_mb_segmentation_map ? cpi->segmentation_map
514 : cm->last_frame_seg_map;
515 mbmi->segment_id = vp9_get_segment_id(cm, map, bsize, mi_row, mi_col);
517 vp9_mb_init_quantizer(cpi, x);
519 if (xd->segmentation_enabled && cpi->seg0_cnt > 0
520 && !vp9_segfeature_active(xd, 0, SEG_LVL_REF_FRAME)
521 && vp9_segfeature_active(xd, 1, SEG_LVL_REF_FRAME)) {
522 cpi->seg0_progress = (cpi->seg0_idx << 16) / cpi->seg0_cnt;
524 const int y = mb_row & ~3;
525 const int x = mb_col & ~3;
526 const int p16 = ((mb_row & 1) << 1) + (mb_col & 1);
527 const int p32 = ((mb_row & 2) << 2) + ((mb_col & 2) << 1);
528 const int tile_progress = cm->cur_tile_mi_col_start * cm->mb_rows >> 1;
529 const int mb_cols = (cm->cur_tile_mi_col_end - cm->cur_tile_mi_col_start)
532 cpi->seg0_progress = ((y * mb_cols + x * 4 + p32 + p16 + tile_progress)
536 mbmi->segment_id = 0;
540 static void pick_sb_modes(VP9_COMP *cpi, int mi_row, int mi_col,
541 TOKENEXTRA **tp, int *totalrate, int64_t *totaldist,
542 BLOCK_SIZE_TYPE bsize, PICK_MODE_CONTEXT *ctx) {
543 VP9_COMMON * const cm = &cpi->common;
544 MACROBLOCK * const x = &cpi->mb;
545 MACROBLOCKD * const xd = &x->e_mbd;
549 if (bsize < BLOCK_SIZE_SB8X8)
550 if (xd->ab_index != 0)
553 set_offsets(cpi, mi_row, mi_col, bsize);
554 xd->mode_info_context->mbmi.sb_type = bsize;
555 if (cpi->oxcf.tuning == VP8_TUNE_SSIM)
556 vp9_activity_masking(cpi, x);
558 /* Find best coding mode & reconstruct the MB so it is available
559 * as a predictor for MBs that follow in the SB */
560 if (cm->frame_type == KEY_FRAME) {
561 vp9_rd_pick_intra_mode_sb(cpi, x, totalrate, totaldist, bsize, ctx);
563 vp9_rd_pick_inter_mode_sb(cpi, x, mi_row, mi_col, totalrate, totaldist,
568 static void update_stats(VP9_COMP *cpi, int mi_row, int mi_col) {
569 VP9_COMMON * const cm = &cpi->common;
570 MACROBLOCK * const x = &cpi->mb;
571 MACROBLOCKD * const xd = &x->e_mbd;
572 MODE_INFO *mi = xd->mode_info_context;
573 MB_MODE_INFO * const mbmi = &mi->mbmi;
575 if (cm->frame_type != KEY_FRAME) {
576 int segment_id, seg_ref_active;
578 segment_id = mbmi->segment_id;
579 seg_ref_active = vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME);
582 cpi->intra_inter_count[vp9_get_pred_context(cm, xd, PRED_INTRA_INTER)][mbmi
583 ->ref_frame[0] > INTRA_FRAME]++;
585 // If the segment reference feature is enabled we have only a single
586 // reference frame allowed for the segment so exclude it from
587 // the reference frame counts used to work out probabilities.
588 if ((mbmi->ref_frame[0] > INTRA_FRAME) && !seg_ref_active) {
589 if (cm->comp_pred_mode == HYBRID_PREDICTION)
590 cpi->comp_inter_count[vp9_get_pred_context(cm, xd,
591 PRED_COMP_INTER_INTER)][mbmi
592 ->ref_frame[1] > INTRA_FRAME]++;
594 if (mbmi->ref_frame[1] > INTRA_FRAME) {
595 cpi->comp_ref_count[vp9_get_pred_context(cm, xd, PRED_COMP_REF_P)][mbmi
596 ->ref_frame[0] == GOLDEN_FRAME]++;
598 cpi->single_ref_count[vp9_get_pred_context(cm, xd, PRED_SINGLE_REF_P1)][0][mbmi
599 ->ref_frame[0] != LAST_FRAME]++;
600 if (mbmi->ref_frame[0] != LAST_FRAME)
601 cpi->single_ref_count[vp9_get_pred_context(cm, xd, PRED_SINGLE_REF_P2)][1][mbmi
602 ->ref_frame[0] != GOLDEN_FRAME]++;
605 // Count of last ref frame 0,0 usage
606 if ((mbmi->mode == ZEROMV) && (mbmi->ref_frame[0] == LAST_FRAME))
607 cpi->inter_zz_count++;
611 // TODO(jingning): the variables used here are little complicated. need further
612 // refactoring on organizing the the temporary buffers, when recursive
613 // partition down to 4x4 block size is enabled.
614 static PICK_MODE_CONTEXT *get_block_context(MACROBLOCK *x,
615 BLOCK_SIZE_TYPE bsize) {
616 MACROBLOCKD * const xd = &x->e_mbd;
619 case BLOCK_SIZE_SB64X64:
620 return &x->sb64_context;
621 case BLOCK_SIZE_SB64X32:
622 return &x->sb64x32_context[xd->sb_index];
623 case BLOCK_SIZE_SB32X64:
624 return &x->sb32x64_context[xd->sb_index];
625 case BLOCK_SIZE_SB32X32:
626 return &x->sb32_context[xd->sb_index];
627 case BLOCK_SIZE_SB32X16:
628 return &x->sb32x16_context[xd->sb_index][xd->mb_index];
629 case BLOCK_SIZE_SB16X32:
630 return &x->sb16x32_context[xd->sb_index][xd->mb_index];
631 case BLOCK_SIZE_MB16X16:
632 return &x->mb_context[xd->sb_index][xd->mb_index];
633 case BLOCK_SIZE_SB16X8:
634 return &x->sb16x8_context[xd->sb_index][xd->mb_index][xd->b_index];
635 case BLOCK_SIZE_SB8X16:
636 return &x->sb8x16_context[xd->sb_index][xd->mb_index][xd->b_index];
637 case BLOCK_SIZE_SB8X8:
638 return &x->sb8x8_context[xd->sb_index][xd->mb_index][xd->b_index];
639 case BLOCK_SIZE_SB8X4:
640 return &x->sb8x4_context[xd->sb_index][xd->mb_index][xd->b_index];
641 case BLOCK_SIZE_SB4X8:
642 return &x->sb4x8_context[xd->sb_index][xd->mb_index][xd->b_index];
643 case BLOCK_SIZE_AB4X4:
644 return &x->ab4x4_context[xd->sb_index][xd->mb_index][xd->b_index];
651 static BLOCK_SIZE_TYPE *get_sb_partitioning(MACROBLOCK *x,
652 BLOCK_SIZE_TYPE bsize) {
653 MACROBLOCKD *xd = &x->e_mbd;
655 case BLOCK_SIZE_SB64X64:
656 return &x->sb64_partitioning;
657 case BLOCK_SIZE_SB32X32:
658 return &x->sb_partitioning[xd->sb_index];
659 case BLOCK_SIZE_MB16X16:
660 return &x->mb_partitioning[xd->sb_index][xd->mb_index];
661 case BLOCK_SIZE_SB8X8:
662 return &x->b_partitioning[xd->sb_index][xd->mb_index][xd->b_index];
669 static void restore_context(VP9_COMP *cpi, int mi_row, int mi_col,
670 ENTROPY_CONTEXT a[16 * MAX_MB_PLANE],
671 ENTROPY_CONTEXT l[16 * MAX_MB_PLANE],
672 PARTITION_CONTEXT sa[8], PARTITION_CONTEXT sl[8],
673 BLOCK_SIZE_TYPE bsize) {
674 VP9_COMMON * const cm = &cpi->common;
675 MACROBLOCK * const x = &cpi->mb;
676 MACROBLOCKD * const xd = &x->e_mbd;
678 int bwl = b_width_log2(bsize), bw = 1 << bwl;
679 int bhl = b_height_log2(bsize), bh = 1 << bhl;
680 int mwl = mi_width_log2(bsize), mw = 1 << mwl;
681 int mhl = mi_height_log2(bsize), mh = 1 << mhl;
682 for (p = 0; p < MAX_MB_PLANE; p++) {
684 cm->above_context[p] + ((mi_col * 2) >> xd->plane[p].subsampling_x),
685 a + bw * p, sizeof(ENTROPY_CONTEXT) * bw >> xd->plane[p].subsampling_x);
688 + ((mi_row & MI_MASK)* 2 >> xd->plane[p].subsampling_y),l + bh * p,
689 sizeof(ENTROPY_CONTEXT) * bh >> xd->plane[p].subsampling_y);
691 vpx_memcpy(cm->above_seg_context + mi_col, sa,
692 sizeof(PARTITION_CONTEXT) * mw);
693 vpx_memcpy(cm->left_seg_context + (mi_row & MI_MASK), sl,
694 sizeof(PARTITION_CONTEXT) * mh);
696 static void save_context(VP9_COMP *cpi, int mi_row, int mi_col,
697 ENTROPY_CONTEXT a[16 * MAX_MB_PLANE],
698 ENTROPY_CONTEXT l[16 * MAX_MB_PLANE],
699 PARTITION_CONTEXT sa[8], PARTITION_CONTEXT sl[8],
700 BLOCK_SIZE_TYPE bsize) {
701 VP9_COMMON * const cm = &cpi->common;
702 MACROBLOCK * const x = &cpi->mb;
703 MACROBLOCKD * const xd = &x->e_mbd;
705 int bwl = b_width_log2(bsize), bw = 1 << bwl;
706 int bhl = b_height_log2(bsize), bh = 1 << bhl;
707 int mwl = mi_width_log2(bsize), mw = 1 << mwl;
708 int mhl = mi_height_log2(bsize), mh = 1 << mhl;
710 // buffer the above/left context information of the block in search.
711 for (p = 0; p < MAX_MB_PLANE; ++p) {
714 cm->above_context[p] + (mi_col * 2 >> xd->plane[p].subsampling_x),
715 sizeof(ENTROPY_CONTEXT) * bw >> xd->plane[p].subsampling_x);
719 + ((mi_row & MI_MASK)* 2 >> xd->plane[p].subsampling_y),sizeof(ENTROPY_CONTEXT) * bh >> xd->plane[p].subsampling_y);
721 vpx_memcpy(sa, cm->above_seg_context + mi_col,
722 sizeof(PARTITION_CONTEXT) * mw);
723 vpx_memcpy(sl, cm->left_seg_context + (mi_row & MI_MASK),
724 sizeof(PARTITION_CONTEXT) * mh)
727 static void encode_b(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row, int mi_col,
728 int output_enabled, BLOCK_SIZE_TYPE bsize, int sub_index) {
729 VP9_COMMON * const cm = &cpi->common;
730 MACROBLOCK * const x = &cpi->mb;
731 MACROBLOCKD * const xd = &x->e_mbd;
733 if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
737 *(get_sb_index(xd, bsize)) = sub_index;
739 if (bsize < BLOCK_SIZE_SB8X8)
740 if (xd->ab_index > 0)
742 set_offsets(cpi, mi_row, mi_col, bsize);
743 update_state(cpi, get_block_context(x, bsize), bsize, output_enabled);
744 encode_superblock(cpi, tp, output_enabled, mi_row, mi_col, bsize);
746 if (output_enabled) {
747 update_stats(cpi, mi_row, mi_col);
749 (*tp)->token = EOSB_TOKEN;
754 static void encode_sb(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row, int mi_col,
755 int output_enabled, BLOCK_SIZE_TYPE bsize) {
756 VP9_COMMON * const cm = &cpi->common;
757 MACROBLOCK * const x = &cpi->mb;
758 MACROBLOCKD * const xd = &x->e_mbd;
759 BLOCK_SIZE_TYPE c1 = BLOCK_SIZE_SB8X8;
760 const int bsl = b_width_log2(bsize), bs = (1 << bsl) / 4;
762 int UNINITIALIZED_IS_SAFE(pl);
764 if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
767 c1 = BLOCK_SIZE_AB4X4;
768 if (bsize >= BLOCK_SIZE_SB8X8) {
769 set_partition_seg_context(cm, xd, mi_row, mi_col);
770 pl = partition_plane_context(xd, bsize);
771 c1 = *(get_sb_partitioning(x, bsize));
774 bwl = b_width_log2(c1), bhl = b_height_log2(c1);
776 if (bsl == bwl && bsl == bhl) {
777 if (output_enabled && bsize >= BLOCK_SIZE_SB8X8)
778 cpi->partition_count[pl][PARTITION_NONE]++;
779 encode_b(cpi, tp, mi_row, mi_col, output_enabled, c1, -1);
780 } else if (bsl == bhl && bsl > bwl) {
782 cpi->partition_count[pl][PARTITION_VERT]++;
783 encode_b(cpi, tp, mi_row, mi_col, output_enabled, c1, 0);
784 encode_b(cpi, tp, mi_row, mi_col + bs, output_enabled, c1, 1);
785 } else if (bsl == bwl && bsl > bhl) {
787 cpi->partition_count[pl][PARTITION_HORZ]++;
788 encode_b(cpi, tp, mi_row, mi_col, output_enabled, c1, 0);
789 encode_b(cpi, tp, mi_row + bs, mi_col, output_enabled, c1, 1);
791 BLOCK_SIZE_TYPE subsize;
794 assert(bwl < bsl && bhl < bsl);
795 subsize = get_subsize(bsize, PARTITION_SPLIT);
798 cpi->partition_count[pl][PARTITION_SPLIT]++;
800 for (i = 0; i < 4; i++) {
801 const int x_idx = i & 1, y_idx = i >> 1;
803 *(get_sb_index(xd, subsize)) = i;
804 encode_sb(cpi, tp, mi_row + y_idx * bs, mi_col + x_idx * bs,
805 output_enabled, subsize);
809 if (bsize >= BLOCK_SIZE_SB8X8
810 && (bsize == BLOCK_SIZE_SB8X8 || bsl == bwl || bsl == bhl)) {
811 set_partition_seg_context(cm, xd, mi_row, mi_col);
812 update_partition_context(xd, c1, bsize);
816 static void set_partitioning(VP9_COMP *cpi, MODE_INFO *m,
817 BLOCK_SIZE_TYPE bsize) {
818 VP9_COMMON *const cm = &cpi->common;
819 const int mis = cm->mode_info_stride;
820 int block_row, block_col;
821 for (block_row = 0; block_row < 8; ++block_row) {
822 for (block_col = 0; block_col < 8; ++block_col) {
823 m[block_row * mis + block_col].mbmi.sb_type = bsize;
827 static void copy_partitioning(VP9_COMP *cpi, MODE_INFO *m, MODE_INFO *p) {
828 VP9_COMMON *const cm = &cpi->common;
829 const int mis = cm->mode_info_stride;
830 int block_row, block_col;
831 for (block_row = 0; block_row < 8; ++block_row) {
832 for (block_col = 0; block_col < 8; ++block_col) {
833 m[block_row * mis + block_col].mbmi.sb_type =
834 p[block_row * mis + block_col].mbmi.sb_type;
839 static void set_block_size(VP9_COMMON * const cm, MODE_INFO *m,
840 BLOCK_SIZE_TYPE bsize, int mis, int mi_row,
843 int bwl = b_width_log2(bsize);
844 int bhl = b_height_log2(bsize);
845 int bsl = (bwl > bhl ? bwl : bhl);
847 int bs = (1 << bsl) / 2; //
848 MODE_INFO *m2 = m + mi_row * mis + mi_col;
849 for (row = 0; row < bs; row++) {
850 for (col = 0; col < bs; col++) {
851 if (mi_row + row >= cm->mi_rows || mi_col + col >= cm->mi_cols)
853 m2[row * mis + col].mbmi.sb_type = bsize;
859 int64_t sum_square_error;
869 } partition_variance;
871 #define VT(TYPE, BLOCKSIZE) \
873 partition_variance vt; \
874 BLOCKSIZE split[4]; } TYPE;
882 partition_variance *vt;
892 static void tree_to_node(void *data, BLOCK_SIZE_TYPE block_size, vt_node *node) {
894 switch (block_size) {
895 case BLOCK_SIZE_SB64X64: {
896 v64x64 *vt = (v64x64 *) data;
898 for (i = 0; i < 4; i++)
899 node->split[i] = &vt->split[i].vt.none;
902 case BLOCK_SIZE_SB32X32: {
903 v32x32 *vt = (v32x32 *) data;
905 for (i = 0; i < 4; i++)
906 node->split[i] = &vt->split[i].vt.none;
909 case BLOCK_SIZE_MB16X16: {
910 v16x16 *vt = (v16x16 *) data;
912 for (i = 0; i < 4; i++)
913 node->split[i] = &vt->split[i].vt.none;
916 case BLOCK_SIZE_SB8X8: {
917 v8x8 *vt = (v8x8 *) data;
919 for (i = 0; i < 4; i++)
920 node->split[i] = &vt->split[i];
925 for (i = 0; i < 4; i++)
931 // Set variance values given sum square error, sum error, count.
932 static void fill_variance(var *v, int64_t s2, int64_t s, int c) {
933 v->sum_square_error = s2;
938 * (v->sum_square_error - v->sum_error * v->sum_error / v->count)
944 // Combine 2 variance structures by summing the sum_error, sum_square_error,
945 // and counts and then calculating the new variance.
946 void sum_2_variances(var *r, var *a, var*b) {
947 fill_variance(r, a->sum_square_error + b->sum_square_error,
948 a->sum_error + b->sum_error, a->count + b->count);
951 static void fill_variance_tree(void *data, BLOCK_SIZE_TYPE block_size) {
953 tree_to_node(data, block_size, &node);
954 sum_2_variances(&node.vt->horz[0], node.split[0], node.split[1]);
955 sum_2_variances(&node.vt->horz[1], node.split[2], node.split[3]);
956 sum_2_variances(&node.vt->vert[0], node.split[0], node.split[2]);
957 sum_2_variances(&node.vt->vert[1], node.split[1], node.split[3]);
958 sum_2_variances(&node.vt->none, &node.vt->vert[0], &node.vt->vert[1]);
961 #if PERFORM_RANDOM_PARTITIONING
962 static int set_vt_partitioning(VP9_COMP *cpi, void *data, MODE_INFO *m,
963 BLOCK_SIZE_TYPE block_size, int mi_row,
964 int mi_col, int mi_size) {
965 VP9_COMMON * const cm = &cpi->common;
967 const int mis = cm->mode_info_stride;
968 int64_t threshold = 4 * cpi->common.base_qindex * cpi->common.base_qindex;
970 tree_to_node(data, block_size, &vt);
972 // split none is available only if we have more than half a block size
973 // in width and height inside the visible image
974 if (mi_col + mi_size < cm->mi_cols && mi_row + mi_size < cm->mi_rows &&
976 set_block_size(cm, m, block_size, mis, mi_row, mi_col);
980 // vertical split is available on all but the bottom border
981 if (mi_row + mi_size < cm->mi_rows && vt.vt->vert[0].variance < threshold
982 && (rand() & 3) < 1) {
983 set_block_size(cm, m, get_subsize(block_size, PARTITION_VERT), mis, mi_row,
988 // horizontal split is available on all but the right border
989 if (mi_col + mi_size < cm->mi_cols && vt.vt->horz[0].variance < threshold
990 && (rand() & 3) < 1) {
991 set_block_size(cm, m, get_subsize(block_size, PARTITION_HORZ), mis, mi_row,
1001 static int set_vt_partitioning(VP9_COMP *cpi, void *data, MODE_INFO *m,
1002 BLOCK_SIZE_TYPE block_size, int mi_row,
1003 int mi_col, int mi_size) {
1004 VP9_COMMON * const cm = &cpi->common;
1006 const int mis = cm->mode_info_stride;
1007 int64_t threshold = 50 * cpi->common.base_qindex;
1009 tree_to_node(data, block_size, &vt);
1011 // split none is available only if we have more than half a block size
1012 // in width and height inside the visible image
1013 if (mi_col + mi_size < cm->mi_cols && mi_row + mi_size < cm->mi_rows
1014 && vt.vt->none.variance < threshold) {
1015 set_block_size(cm, m, block_size, mis, mi_row, mi_col);
1019 // vertical split is available on all but the bottom border
1020 if (mi_row + mi_size < cm->mi_rows && vt.vt->vert[0].variance < threshold
1021 && vt.vt->vert[1].variance < threshold) {
1022 set_block_size(cm, m, get_subsize(block_size, PARTITION_VERT), mis, mi_row,
1027 // horizontal split is available on all but the right border
1028 if (mi_col + mi_size < cm->mi_cols && vt.vt->horz[0].variance < threshold
1029 && vt.vt->horz[1].variance < threshold) {
1030 set_block_size(cm, m, get_subsize(block_size, PARTITION_HORZ), mis, mi_row,
1039 static void choose_partitioning(VP9_COMP *cpi, MODE_INFO *m, int mi_row,
1041 VP9_COMMON * const cm = &cpi->common;
1042 MACROBLOCK *x = &cpi->mb;
1043 MACROBLOCKD *xd = &cpi->mb.e_mbd;
1044 const int mis = cm->mode_info_stride;
1045 // TODO(JBB): More experimentation or testing of this threshold;
1046 int64_t threshold = 4;
1051 const unsigned char * d;
1053 int pixels_wide = 64, pixels_high = 64;
1055 vpx_memset(&vt, 0, sizeof(vt));
1057 set_offsets(cpi, mi_row, mi_col, BLOCK_SIZE_SB64X64);
1059 if (xd->mb_to_right_edge < 0)
1060 pixels_wide += (xd->mb_to_right_edge >> 3);
1062 if (xd->mb_to_bottom_edge < 0)
1063 pixels_high += (xd->mb_to_bottom_edge >> 3);
1065 s = x->plane[0].src.buf;
1066 sp = x->plane[0].src.stride;
1068 // TODO(JBB): Clearly the higher the quantizer the fewer partitions we want
1069 // but this needs more experimentation.
1070 threshold = threshold * cpi->common.base_qindex * cpi->common.base_qindex;
1072 d = vp9_64x64_zeros;
1074 if (cm->frame_type != KEY_FRAME) {
1075 int_mv nearest_mv, near_mv;
1076 YV12_BUFFER_CONFIG *ref_fb = &cm->yv12_fb[0];
1077 YV12_BUFFER_CONFIG *second_ref_fb = NULL;
1079 setup_pre_planes(xd, ref_fb, second_ref_fb, mi_row, mi_col,
1080 xd->scale_factor, xd->scale_factor_uv);
1081 xd->mode_info_context->mbmi.ref_frame[0] = LAST_FRAME;
1082 xd->mode_info_context->mbmi.sb_type = BLOCK_SIZE_SB64X64;
1083 vp9_find_best_ref_mvs(xd, m->mbmi.ref_mvs[m->mbmi.ref_frame[0]],
1084 &nearest_mv, &near_mv);
1086 xd->mode_info_context->mbmi.mv[0] = nearest_mv;
1087 vp9_build_inter_predictors_sby(xd, mi_row, mi_col, BLOCK_SIZE_SB64X64);
1088 d = xd->plane[0].dst.buf;
1089 dp = xd->plane[0].dst.stride;
1093 // Fill in the entire tree of 8x8 variances for splits.
1094 for (i = 0; i < 4; i++) {
1095 const int x32_idx = ((i & 1) << 5);
1096 const int y32_idx = ((i >> 1) << 5);
1097 for (j = 0; j < 4; j++) {
1098 const int x16_idx = x32_idx + ((j & 1) << 4);
1099 const int y16_idx = y32_idx + ((j >> 1) << 4);
1100 v16x16 *vst = &vt.split[i].split[j];
1101 for (k = 0; k < 4; k++) {
1102 int x_idx = x16_idx + ((k & 1) << 3);
1103 int y_idx = y16_idx + ((k >> 1) << 3);
1104 unsigned int sse = 0;
1106 if (x_idx < pixels_wide && y_idx < pixels_high)
1107 vp9_get_sse_sum_8x8(s + y_idx * sp + x_idx, sp,
1108 d + y_idx * dp + x_idx, dp, &sse, &sum);
1109 fill_variance(&vst->split[k].vt.none, sse, sum, 64);
1113 // Fill the rest of the variance tree by summing the split partition
1115 for (i = 0; i < 4; i++) {
1116 for (j = 0; j < 4; j++) {
1117 fill_variance_tree(&vt.split[i].split[j], BLOCK_SIZE_MB16X16);
1119 fill_variance_tree(&vt.split[i], BLOCK_SIZE_SB32X32);
1121 fill_variance_tree(&vt, BLOCK_SIZE_SB64X64);
1122 // Now go through the entire structure, splitting every block size until
1123 // we get to one that's got a variance lower than our threshold, or we
1125 if (!set_vt_partitioning(cpi, &vt, m, BLOCK_SIZE_SB64X64, mi_row, mi_col,
1127 for (i = 0; i < 4; ++i) {
1128 const int x32_idx = ((i & 1) << 2);
1129 const int y32_idx = ((i >> 1) << 2);
1130 if (!set_vt_partitioning(cpi, &vt.split[i], m, BLOCK_SIZE_SB32X32,
1131 (mi_row + y32_idx), (mi_col + x32_idx), 2)) {
1132 for (j = 0; j < 4; ++j) {
1133 const int x16_idx = ((j & 1) << 1);
1134 const int y16_idx = ((j >> 1) << 1);
1135 if (!set_vt_partitioning(cpi, &vt.split[i].split[j], m,
1137 (mi_row + y32_idx + y16_idx),
1138 (mi_col + x32_idx + x16_idx), 1)) {
1139 for (k = 0; k < 4; ++k) {
1140 const int x8_idx = (k & 1);
1141 const int y8_idx = (k >> 1);
1142 set_block_size(cm, m, BLOCK_SIZE_SB8X8, mis,
1143 (mi_row + y32_idx + y16_idx + y8_idx),
1144 (mi_col + x32_idx + x16_idx + x8_idx));
1152 static void rd_use_partition(VP9_COMP *cpi, MODE_INFO *m, TOKENEXTRA **tp,
1153 int mi_row, int mi_col, BLOCK_SIZE_TYPE bsize,
1154 int *rate, int64_t *dist) {
1155 VP9_COMMON * const cm = &cpi->common;
1156 MACROBLOCK * const x = &cpi->mb;
1157 MACROBLOCKD *xd = &cpi->mb.e_mbd;
1158 const int mis = cm->mode_info_stride;
1159 int bwl = b_width_log2(m->mbmi.sb_type);
1160 int bhl = b_height_log2(m->mbmi.sb_type);
1161 int bsl = b_width_log2(bsize);
1162 int bs = (1 << bsl);
1163 int bh = (1 << bhl);
1166 int bss = (1 << bsl) / 4;
1168 PARTITION_TYPE partition;
1169 BLOCK_SIZE_TYPE subsize;
1170 ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
1171 PARTITION_CONTEXT sl[8], sa[8];
1172 int last_part_rate = INT_MAX;
1173 int64_t last_part_dist = INT_MAX;
1174 int split_rate = INT_MAX;
1175 int64_t split_dist = INT_MAX;
1176 int none_rate = INT_MAX;
1177 int64_t none_dist = INT_MAX;
1178 int chosen_rate = INT_MAX;
1179 int64_t chosen_dist = INT_MAX;
1180 BLOCK_SIZE_TYPE sub_subsize = BLOCK_SIZE_AB4X4;
1181 int splits_below = 0;
1182 BLOCK_SIZE_TYPE bs_type = m->mbmi.sb_type;
1184 if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
1187 // parse the partition type
1188 if ((bwl == bsl) && (bhl == bsl))
1189 partition = PARTITION_NONE;
1190 else if ((bwl == bsl) && (bhl < bsl))
1191 partition = PARTITION_HORZ;
1192 else if ((bwl < bsl) && (bhl == bsl))
1193 partition = PARTITION_VERT;
1194 else if ((bwl < bsl) && (bhl < bsl))
1195 partition = PARTITION_SPLIT;
1199 subsize = get_subsize(bsize, partition);
1201 if (bsize < BLOCK_SIZE_SB8X8) {
1202 if (xd->ab_index != 0) {
1208 *(get_sb_partitioning(x, bsize)) = subsize;
1210 save_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
1212 if (cpi->sf.adjust_partitioning_from_last_frame) {
1213 // Check if any of the sub blocks are further split.
1214 if (partition == PARTITION_SPLIT && subsize > BLOCK_SIZE_SB8X8) {
1215 sub_subsize = get_subsize(subsize, PARTITION_SPLIT);
1217 for (i = 0; i < 4; i++) {
1218 int jj = i >> 1, ii = i & 0x01;
1219 if (m[jj * bss * mis + ii * bss].mbmi.sb_type >= sub_subsize) {
1225 // If partition is not none try none unless each of the 4 splits are split
1227 if (partition != PARTITION_NONE && !splits_below &&
1228 mi_row + (ms >> 1) < cm->mi_rows &&
1229 mi_col + (ms >> 1) < cm->mi_cols) {
1230 *(get_sb_partitioning(x, bsize)) = bsize;
1231 pick_sb_modes(cpi, mi_row, mi_col, tp, &none_rate, &none_dist, bsize,
1232 get_block_context(x, bsize));
1234 set_partition_seg_context(cm, xd, mi_row, mi_col);
1235 pl = partition_plane_context(xd, bsize);
1236 none_rate += x->partition_cost[pl][PARTITION_NONE];
1238 restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
1239 m->mbmi.sb_type = bs_type;
1240 *(get_sb_partitioning(x, bsize)) = subsize;
1244 switch (partition) {
1245 case PARTITION_NONE:
1246 pick_sb_modes(cpi, mi_row, mi_col, tp, &last_part_rate, &last_part_dist,
1247 bsize, get_block_context(x, bsize));
1248 set_partition_seg_context(cm, xd, mi_row, mi_col);
1249 pl = partition_plane_context(xd, bsize);
1250 last_part_rate += x->partition_cost[pl][PARTITION_NONE];
1252 case PARTITION_HORZ:
1253 *(get_sb_index(xd, subsize)) = 0;
1254 pick_sb_modes(cpi, mi_row, mi_col, tp, &last_part_rate, &last_part_dist,
1255 subsize, get_block_context(x, subsize));
1256 if (bsize >= BLOCK_SIZE_SB8X8 && mi_row + (mh >> 1) < cm->mi_rows) {
1259 update_state(cpi, get_block_context(x, subsize), subsize, 0);
1260 encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
1261 *(get_sb_index(xd, subsize)) = 1;
1262 pick_sb_modes(cpi, mi_row + (ms >> 1), mi_col, tp, &rt, &dt, subsize,
1263 get_block_context(x, subsize));
1264 last_part_rate += rt;
1265 last_part_dist += dt;
1267 set_partition_seg_context(cm, xd, mi_row, mi_col);
1268 pl = partition_plane_context(xd, bsize);
1269 last_part_rate += x->partition_cost[pl][PARTITION_HORZ];
1271 case PARTITION_VERT:
1272 *(get_sb_index(xd, subsize)) = 0;
1273 pick_sb_modes(cpi, mi_row, mi_col, tp, &last_part_rate, &last_part_dist,
1274 subsize, get_block_context(x, subsize));
1275 if (bsize >= BLOCK_SIZE_SB8X8 && mi_col + (ms >> 1) < cm->mi_cols) {
1278 update_state(cpi, get_block_context(x, subsize), subsize, 0);
1279 encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
1280 *(get_sb_index(xd, subsize)) = 1;
1281 pick_sb_modes(cpi, mi_row, mi_col + (ms >> 1), tp, &rt, &dt, subsize,
1282 get_block_context(x, subsize));
1283 last_part_rate += rt;
1284 last_part_dist += dt;
1286 set_partition_seg_context(cm, xd, mi_row, mi_col);
1287 pl = partition_plane_context(xd, bsize);
1288 last_part_rate += x->partition_cost[pl][PARTITION_VERT];
1290 case PARTITION_SPLIT:
1294 for (i = 0; i < 4; i++) {
1295 int x_idx = (i & 1) * (ms >> 1);
1296 int y_idx = (i >> 1) * (ms >> 1);
1297 int jj = i >> 1, ii = i & 0x01;
1301 if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols))
1304 *(get_sb_index(xd, subsize)) = i;
1306 rd_use_partition(cpi, m + jj * bss * mis + ii * bss, tp, mi_row + y_idx,
1307 mi_col + x_idx, subsize, &rt, &dt);
1308 last_part_rate += rt;
1309 last_part_dist += dt;
1311 set_partition_seg_context(cm, xd, mi_row, mi_col);
1312 pl = partition_plane_context(xd, bsize);
1313 last_part_rate += x->partition_cost[pl][PARTITION_SPLIT];
1318 if (cpi->sf.adjust_partitioning_from_last_frame
1319 && partition != PARTITION_SPLIT && bsize > BLOCK_SIZE_SB8X8
1320 && (mi_row + ms < cm->mi_rows || mi_row + (ms >> 1) == cm->mi_rows)
1321 && (mi_col + ms < cm->mi_cols || mi_col + (ms >> 1) == cm->mi_cols)) {
1322 BLOCK_SIZE_TYPE split_subsize = get_subsize(bsize, PARTITION_SPLIT);
1325 restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
1328 for (i = 0; i < 4; i++) {
1329 int x_idx = (i & 1) * (bs >> 2);
1330 int y_idx = (i >> 1) * (bs >> 2);
1333 ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
1334 PARTITION_CONTEXT sl[8], sa[8];
1336 if ((mi_row + y_idx >= cm->mi_rows)
1337 || (mi_col + x_idx >= cm->mi_cols))
1340 *(get_sb_index(xd, split_subsize)) = i;
1341 *(get_sb_partitioning(x, bsize)) = split_subsize;
1342 *(get_sb_partitioning(x, split_subsize)) = split_subsize;
1344 save_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
1346 pick_sb_modes(cpi, mi_row + y_idx, mi_col + x_idx, tp, &rt, &dt,
1347 split_subsize, get_block_context(x, split_subsize));
1349 restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
1351 if (rt < INT_MAX && dt < INT_MAX)
1352 encode_sb(cpi, tp, mi_row + y_idx, mi_col + x_idx, 0,
1357 set_partition_seg_context(cm, xd, mi_row + y_idx, mi_col + x_idx);
1358 pl = partition_plane_context(xd, bsize);
1359 split_rate += x->partition_cost[pl][PARTITION_NONE];
1361 set_partition_seg_context(cm, xd, mi_row, mi_col);
1362 pl = partition_plane_context(xd, bsize);
1363 split_rate += x->partition_cost[pl][PARTITION_SPLIT];
1365 chosen_rate = split_rate;
1366 chosen_dist = split_dist;
1369 // If last_part is better set the partitioning to that...
1370 if (RDCOST(x->rdmult, x->rddiv, last_part_rate, last_part_dist)
1371 < RDCOST(x->rdmult, x->rddiv, chosen_rate, chosen_dist)) {
1372 m->mbmi.sb_type = bsize;
1373 if (bsize >= BLOCK_SIZE_SB8X8)
1374 *(get_sb_partitioning(x, bsize)) = subsize;
1375 chosen_rate = last_part_rate;
1376 chosen_dist = last_part_dist;
1378 // If none was better set the partitioning to that...
1379 if (RDCOST(x->rdmult, x->rddiv, chosen_rate, chosen_dist)
1380 > RDCOST(x->rdmult, x->rddiv, none_rate, none_dist)) {
1381 if (bsize >= BLOCK_SIZE_SB8X8)
1382 *(get_sb_partitioning(x, bsize)) = bsize;
1383 chosen_rate = none_rate;
1384 chosen_dist = none_dist;
1387 restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
1389 // We must have chosen a partitioning and encoding or we'll fail later on.
1390 // No other opportunities for success.
1391 assert(chosen_rate < INT_MAX && chosen_dist < INT_MAX);
1393 encode_sb(cpi, tp, mi_row, mi_col, bsize == BLOCK_SIZE_SB64X64, bsize);
1394 *rate = chosen_rate;
1395 *dist = chosen_dist;
1399 // TODO(jingning,jimbankoski,rbultje): properly skip partition types that are
1400 // unlikely to be selected depending on previously rate-distortion optimization
1401 // results, for encoding speed-up.
1402 static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row,
1403 int mi_col, BLOCK_SIZE_TYPE bsize, int *rate,
1405 VP9_COMMON * const cm = &cpi->common;
1406 MACROBLOCK * const x = &cpi->mb;
1407 MACROBLOCKD * const xd = &x->e_mbd;
1408 int bsl = b_width_log2(bsize), bs = 1 << bsl;
1410 ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
1411 PARTITION_CONTEXT sl[8], sa[8];
1412 TOKENEXTRA *tp_orig = *tp;
1414 BLOCK_SIZE_TYPE subsize;
1415 int srate = INT_MAX;
1416 int64_t sdist = INT_MAX;
1418 if (bsize < BLOCK_SIZE_SB8X8)
1419 if (xd->ab_index != 0) {
1424 assert(mi_height_log2(bsize) == mi_width_log2(bsize));
1426 save_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
1429 if (!cpi->sf.use_partitions_greater_than
1430 || (cpi->sf.use_partitions_greater_than
1431 && bsize > cpi->sf.greater_than_block_size)) {
1432 if (bsize >= BLOCK_SIZE_SB8X8) {
1435 subsize = get_subsize(bsize, PARTITION_SPLIT);
1436 *(get_sb_partitioning(x, bsize)) = subsize;
1438 for (i = 0; i < 4; ++i) {
1439 int x_idx = (i & 1) * (ms >> 1);
1440 int y_idx = (i >> 1) * (ms >> 1);
1444 if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols))
1447 *(get_sb_index(xd, subsize)) = i;
1448 rd_pick_partition(cpi, tp, mi_row + y_idx, mi_col + x_idx, subsize, &r,
1454 set_partition_seg_context(cm, xd, mi_row, mi_col);
1455 pl = partition_plane_context(xd, bsize);
1457 r4 += x->partition_cost[pl][PARTITION_SPLIT];
1462 restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
1465 if (!cpi->sf.use_partitions_less_than
1466 || (cpi->sf.use_partitions_less_than
1467 && bsize <= cpi->sf.less_than_block_size)) {
1468 int larger_is_better = 0;
1470 if ((mi_row + (ms >> 1) < cm->mi_rows) &&
1471 (mi_col + (ms >> 1) < cm->mi_cols)) {
1474 pick_sb_modes(cpi, mi_row, mi_col, tp, &r, &d, bsize,
1475 get_block_context(x, bsize));
1476 if (bsize >= BLOCK_SIZE_SB8X8) {
1477 set_partition_seg_context(cm, xd, mi_row, mi_col);
1478 pl = partition_plane_context(xd, bsize);
1479 r += x->partition_cost[pl][PARTITION_NONE];
1482 if (RDCOST(x->rdmult, x->rddiv, r, d)
1483 < RDCOST(x->rdmult, x->rddiv, srate, sdist)) {
1486 larger_is_better = 1;
1487 if (bsize >= BLOCK_SIZE_SB8X8)
1488 *(get_sb_partitioning(x, bsize)) = bsize;
1491 if (!cpi->sf.use_square_partition_only &&
1492 (!cpi->sf.less_rectangular_check ||!larger_is_better)) {
1494 if (bsize >= BLOCK_SIZE_SB8X8 && mi_col + (ms >> 1) < cm->mi_cols) {
1497 subsize = get_subsize(bsize, PARTITION_HORZ);
1498 *(get_sb_index(xd, subsize)) = 0;
1499 pick_sb_modes(cpi, mi_row, mi_col, tp, &r2, &d2, subsize,
1500 get_block_context(x, subsize));
1502 if (mi_row + (ms >> 1) < cm->mi_rows) {
1503 update_state(cpi, get_block_context(x, subsize), subsize, 0);
1504 encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
1506 *(get_sb_index(xd, subsize)) = 1;
1507 pick_sb_modes(cpi, mi_row + (ms >> 1), mi_col, tp, &r, &d, subsize,
1508 get_block_context(x, subsize));
1512 set_partition_seg_context(cm, xd, mi_row, mi_col);
1513 pl = partition_plane_context(xd, bsize);
1515 r2 += x->partition_cost[pl][PARTITION_HORZ];
1516 if (RDCOST(x->rdmult, x->rddiv, r2, d2)
1517 < RDCOST(x->rdmult, x->rddiv, srate, sdist)) {
1520 *(get_sb_partitioning(x, bsize)) = subsize;
1522 restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
1526 if (bsize >= BLOCK_SIZE_SB8X8 && mi_row + (ms >> 1) < cm->mi_rows) {
1529 subsize = get_subsize(bsize, PARTITION_VERT);
1530 *(get_sb_index(xd, subsize)) = 0;
1531 pick_sb_modes(cpi, mi_row, mi_col, tp, &r2, &d2, subsize,
1532 get_block_context(x, subsize));
1533 if (mi_col + (ms >> 1) < cm->mi_cols) {
1536 update_state(cpi, get_block_context(x, subsize), subsize, 0);
1537 encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
1539 *(get_sb_index(xd, subsize)) = 1;
1540 pick_sb_modes(cpi, mi_row, mi_col + (ms >> 1), tp, &r, &d, subsize,
1541 get_block_context(x, subsize));
1545 set_partition_seg_context(cm, xd, mi_row, mi_col);
1546 pl = partition_plane_context(xd, bsize);
1548 r2 += x->partition_cost[pl][PARTITION_VERT];
1549 if (RDCOST(x->rdmult, x->rddiv, r2, d2)
1550 < RDCOST(x->rdmult, x->rddiv, srate, sdist)) {
1553 *(get_sb_partitioning(x, bsize)) = subsize;
1555 restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
1562 restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
1564 if (srate < INT_MAX && sdist < INT_MAX)
1565 encode_sb(cpi, tp, mi_row, mi_col, bsize == BLOCK_SIZE_SB64X64, bsize);
1567 if (bsize == BLOCK_SIZE_SB64X64) {
1568 assert(tp_orig < *tp);
1569 assert(srate < INT_MAX);
1570 assert(sdist < INT_MAX);
1572 assert(tp_orig == *tp);
1576 static void encode_sb_row(VP9_COMP *cpi, int mi_row, TOKENEXTRA **tp,
1578 VP9_COMMON * const cm = &cpi->common;
1581 // Initialize the left context for the new SB row
1582 vpx_memset(&cm->left_context, 0, sizeof(cm->left_context));
1583 vpx_memset(cm->left_seg_context, 0, sizeof(cm->left_seg_context));
1585 // Code each SB in the row
1586 for (mi_col = cm->cur_tile_mi_col_start; mi_col < cm->cur_tile_mi_col_end;
1587 mi_col += 64 / MI_SIZE) {
1590 if (cpi->sf.partition_by_variance || cpi->sf.use_lastframe_partitioning ||
1591 cpi->sf.use_one_partition_size_always ) {
1592 const int idx_str = cm->mode_info_stride * mi_row + mi_col;
1593 MODE_INFO *m = cm->mi + idx_str;
1594 MODE_INFO *p = cm->prev_mi + idx_str;
1596 if (cpi->sf.use_one_partition_size_always) {
1597 set_offsets(cpi, mi_row, mi_col, BLOCK_SIZE_SB64X64);
1598 set_partitioning(cpi, m, cpi->sf.always_this_block_size);
1599 rd_use_partition(cpi, m, tp, mi_row, mi_col, BLOCK_SIZE_SB64X64,
1600 &dummy_rate, &dummy_dist);
1601 } else if (cpi->sf.partition_by_variance) {
1602 choose_partitioning(cpi, cm->mi, mi_row, mi_col);
1603 rd_use_partition(cpi, m, tp, mi_row, mi_col, BLOCK_SIZE_SB64X64,
1604 &dummy_rate, &dummy_dist);
1606 if ((cpi->common.current_video_frame
1607 % cpi->sf.last_partitioning_redo_frequency) == 0
1609 || cpi->common.show_frame == 0
1610 || cpi->common.frame_type == KEY_FRAME
1611 || cpi->is_src_frame_alt_ref) {
1612 rd_pick_partition(cpi, tp, mi_row, mi_col, BLOCK_SIZE_SB64X64,
1613 &dummy_rate, &dummy_dist);
1615 copy_partitioning(cpi, m, p);
1616 rd_use_partition(cpi, m, tp, mi_row, mi_col, BLOCK_SIZE_SB64X64,
1617 &dummy_rate, &dummy_dist);
1621 rd_pick_partition(cpi, tp, mi_row, mi_col, BLOCK_SIZE_SB64X64,
1622 &dummy_rate, &dummy_dist);
1627 static void init_encode_frame_mb_context(VP9_COMP *cpi) {
1628 MACROBLOCK * const x = &cpi->mb;
1629 VP9_COMMON * const cm = &cpi->common;
1630 MACROBLOCKD * const xd = &x->e_mbd;
1632 x->act_zbin_adj = 0;
1635 xd->mode_info_stride = cm->mode_info_stride;
1636 xd->frame_type = cm->frame_type;
1638 xd->frames_since_golden = cm->frames_since_golden;
1639 xd->frames_till_alt_ref_frame = cm->frames_till_alt_ref_frame;
1641 // reset intra mode contexts
1642 if (cm->frame_type == KEY_FRAME)
1643 vp9_init_mbmode_probs(cm);
1645 // Copy data over into macro block data structures.
1646 vp9_setup_src_planes(x, cpi->Source, 0, 0);
1648 // TODO(jkoleszar): are these initializations required?
1649 setup_pre_planes(xd, &cm->yv12_fb[cm->ref_frame_map[cpi->lst_fb_idx]], NULL,
1651 setup_dst_planes(xd, &cm->yv12_fb[cm->new_fb_idx], 0, 0);
1653 setup_block_dptrs(&x->e_mbd, cm->subsampling_x, cm->subsampling_y);
1655 xd->mode_info_context->mbmi.mode = DC_PRED;
1656 xd->mode_info_context->mbmi.uv_mode = DC_PRED;
1658 vp9_zero(cpi->y_mode_count)
1659 vp9_zero(cpi->y_uv_mode_count)
1660 vp9_zero(cm->fc.inter_mode_counts)
1661 vp9_zero(cpi->partition_count);
1662 vp9_zero(cpi->intra_inter_count);
1663 vp9_zero(cpi->comp_inter_count);
1664 vp9_zero(cpi->single_ref_count);
1665 vp9_zero(cpi->comp_ref_count);
1666 vp9_zero(cm->fc.tx_count_32x32p);
1667 vp9_zero(cm->fc.tx_count_16x16p);
1668 vp9_zero(cm->fc.tx_count_8x8p);
1669 vp9_zero(cm->fc.mbskip_count);
1671 // Note: this memset assumes above_context[0], [1] and [2]
1672 // are allocated as part of the same buffer.
1674 cm->above_context[0], 0,
1675 sizeof(ENTROPY_CONTEXT) * 2 * MAX_MB_PLANE * mi_cols_aligned_to_sb(cm));
1676 vpx_memset(cm->above_seg_context, 0,
1677 sizeof(PARTITION_CONTEXT) * mi_cols_aligned_to_sb(cm));
1680 static void switch_lossless_mode(VP9_COMP *cpi, int lossless) {
1682 // printf("Switching to lossless\n");
1683 cpi->mb.fwd_txm8x4 = vp9_short_walsh8x4;
1684 cpi->mb.fwd_txm4x4 = vp9_short_walsh4x4;
1685 cpi->mb.e_mbd.inv_txm4x4_1_add = vp9_short_iwalsh4x4_1_add;
1686 cpi->mb.e_mbd.inv_txm4x4_add = vp9_short_iwalsh4x4_add;
1687 cpi->mb.optimize = 0;
1688 cpi->common.filter_level = 0;
1689 cpi->zbin_mode_boost_enabled = 0;
1690 cpi->common.txfm_mode = ONLY_4X4;
1692 // printf("Not lossless\n");
1693 cpi->mb.fwd_txm8x4 = vp9_short_fdct8x4;
1694 cpi->mb.fwd_txm4x4 = vp9_short_fdct4x4;
1695 cpi->mb.e_mbd.inv_txm4x4_1_add = vp9_short_idct4x4_1_add;
1696 cpi->mb.e_mbd.inv_txm4x4_add = vp9_short_idct4x4_add;
1700 static void switch_txfm_mode(VP9_COMP *cpi) {
1701 if (cpi->sf.tx_size_search_method == USE_LARGESTALL &&
1702 cpi->common.txfm_mode >= ALLOW_32X32)
1703 cpi->common.txfm_mode = ALLOW_32X32;
1706 static void encode_frame_internal(VP9_COMP *cpi) {
1708 MACROBLOCK * const x = &cpi->mb;
1709 VP9_COMMON * const cm = &cpi->common;
1710 MACROBLOCKD * const xd = &x->e_mbd;
1713 // fprintf(stderr, "encode_frame_internal frame %d (%d) type %d\n",
1714 // cpi->common.current_video_frame, cpi->common.show_frame,
1721 statsfile = fopen("segmap2.stt", "a");
1722 fprintf(statsfile, "\n");
1729 // Reset frame count of inter 0,0 motion vector usage.
1730 cpi->inter_zz_count = 0;
1732 vp9_zero(cm->fc.switchable_interp_count);
1733 vp9_zero(cpi->best_switchable_interp_count);
1734 vp9_zero(cpi->txfm_stepdown_count);
1736 xd->mode_info_context = cm->mi;
1737 xd->prev_mode_info_context = cm->prev_mi;
1739 vp9_zero(cpi->NMVcount);
1740 vp9_zero(cpi->coef_counts);
1741 vp9_zero(cm->fc.eob_branch_counts);
1743 cpi->mb.e_mbd.lossless = cm->base_qindex == 0 && cm->y_dc_delta_q == 0
1744 && cm->uv_dc_delta_q == 0 && cm->uv_ac_delta_q == 0;
1745 switch_lossless_mode(cpi, cpi->mb.e_mbd.lossless);
1747 vp9_frame_init_quantizer(cpi);
1749 vp9_initialize_rd_consts(cpi, cm->base_qindex + cm->y_dc_delta_q);
1750 vp9_initialize_me_consts(cpi, cm->base_qindex);
1751 switch_txfm_mode(cpi);
1753 if (cpi->oxcf.tuning == VP8_TUNE_SSIM) {
1754 // Initialize encode frame context.
1755 init_encode_frame_mb_context(cpi);
1757 // Build a frame level activity map
1758 build_activity_map(cpi);
1761 // re-initencode frame context.
1762 init_encode_frame_mb_context(cpi);
1764 vpx_memset(cpi->rd_comp_pred_diff, 0, sizeof(cpi->rd_comp_pred_diff));
1765 vpx_memset(cpi->rd_tx_select_diff, 0, sizeof(cpi->rd_tx_select_diff));
1766 vpx_memset(cpi->rd_tx_select_threshes, 0, sizeof(cpi->rd_tx_select_threshes));
1771 struct vpx_usec_timer emr_timer;
1772 vpx_usec_timer_start(&emr_timer);
1775 // Take tiles into account and give start/end MB
1776 int tile_col, tile_row;
1777 TOKENEXTRA *tp = cpi->tok;
1779 for (tile_row = 0; tile_row < cm->tile_rows; tile_row++) {
1780 vp9_get_tile_row_offsets(cm, tile_row);
1782 for (tile_col = 0; tile_col < cm->tile_columns; tile_col++) {
1783 TOKENEXTRA *tp_old = tp;
1785 // For each row of SBs in the frame
1786 vp9_get_tile_col_offsets(cm, tile_col);
1787 for (mi_row = cm->cur_tile_mi_row_start;
1788 mi_row < cm->cur_tile_mi_row_end; mi_row += 8)
1789 encode_sb_row(cpi, mi_row, &tp, &totalrate);
1791 cpi->tok_count[tile_row][tile_col] = (unsigned int)(tp - tp_old);
1792 assert(tp - cpi->tok <=
1793 get_token_alloc(cm->mb_rows, cm->mb_cols));
1798 vpx_usec_timer_mark(&emr_timer);
1799 cpi->time_encode_mb_row += vpx_usec_timer_elapsed(&emr_timer);
1802 // 256 rate units to the bit,
1803 // projected_frame_size in units of BYTES
1804 cpi->projected_frame_size = totalrate >> 8;
1807 // Keep record of the total distortion this time around for future use
1808 cpi->last_frame_distortion = cpi->frame_distortion;
1813 static int check_dual_ref_flags(VP9_COMP *cpi) {
1814 MACROBLOCKD *xd = &cpi->mb.e_mbd;
1815 int ref_flags = cpi->ref_frame_flags;
1817 if (vp9_segfeature_active(xd, 1, SEG_LVL_REF_FRAME)) {
1820 return (!!(ref_flags & VP9_GOLD_FLAG) + !!(ref_flags & VP9_LAST_FLAG)
1821 + !!(ref_flags & VP9_ALT_FLAG)) >= 2;
1825 static int get_skip_flag(MODE_INFO *mi, int mis, int ymbs, int xmbs) {
1828 for (y = 0; y < ymbs; y++) {
1829 for (x = 0; x < xmbs; x++) {
1830 if (!mi[y * mis + x].mbmi.mb_skip_coeff)
1838 static void set_txfm_flag(MODE_INFO *mi, int mis, int ymbs, int xmbs,
1839 TX_SIZE txfm_size) {
1842 for (y = 0; y < ymbs; y++) {
1843 for (x = 0; x < xmbs; x++)
1844 mi[y * mis + x].mbmi.txfm_size = txfm_size;
1848 static void reset_skip_txfm_size_b(VP9_COMP *cpi, MODE_INFO *mi, int mis,
1849 TX_SIZE txfm_max, int bw, int bh, int mi_row,
1850 int mi_col, BLOCK_SIZE_TYPE bsize) {
1851 VP9_COMMON * const cm = &cpi->common;
1852 MB_MODE_INFO * const mbmi = &mi->mbmi;
1854 if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
1857 if (mbmi->txfm_size > txfm_max) {
1858 MACROBLOCK * const x = &cpi->mb;
1859 MACROBLOCKD * const xd = &x->e_mbd;
1860 const int segment_id = mbmi->segment_id;
1861 const int ymbs = MIN(bh, cm->mi_rows - mi_row);
1862 const int xmbs = MIN(bw, cm->mi_cols - mi_col);
1864 xd->mode_info_context = mi;
1866 vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP) || get_skip_flag(mi, mis, ymbs, xmbs));
1867 set_txfm_flag(mi, mis, ymbs, xmbs, txfm_max);
1871 static void reset_skip_txfm_size_sb(VP9_COMP *cpi, MODE_INFO *mi,
1872 TX_SIZE txfm_max, int mi_row, int mi_col,
1873 BLOCK_SIZE_TYPE bsize) {
1874 VP9_COMMON * const cm = &cpi->common;
1875 const int mis = cm->mode_info_stride;
1877 const int bsl = mi_width_log2(bsize), bs = 1 << (bsl - 1);
1879 if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
1882 bwl = mi_width_log2(mi->mbmi.sb_type);
1883 bhl = mi_height_log2(mi->mbmi.sb_type);
1885 if (bwl == bsl && bhl == bsl) {
1886 reset_skip_txfm_size_b(cpi, mi, mis, txfm_max, 1 << bsl, 1 << bsl, mi_row,
1888 } else if (bwl == bsl && bhl < bsl) {
1889 reset_skip_txfm_size_b(cpi, mi, mis, txfm_max, 1 << bsl, bs, mi_row, mi_col,
1891 reset_skip_txfm_size_b(cpi, mi + bs * mis, mis, txfm_max, 1 << bsl, bs,
1892 mi_row + bs, mi_col, bsize);
1893 } else if (bwl < bsl && bhl == bsl) {
1894 reset_skip_txfm_size_b(cpi, mi, mis, txfm_max, bs, 1 << bsl, mi_row, mi_col,
1896 reset_skip_txfm_size_b(cpi, mi + bs, mis, txfm_max, bs, 1 << bsl, mi_row,
1897 mi_col + bs, bsize);
1899 BLOCK_SIZE_TYPE subsize;
1902 assert(bwl < bsl && bhl < bsl);
1903 if (bsize == BLOCK_SIZE_SB64X64) {
1904 subsize = BLOCK_SIZE_SB32X32;
1905 } else if (bsize == BLOCK_SIZE_SB32X32) {
1906 subsize = BLOCK_SIZE_MB16X16;
1908 assert(bsize == BLOCK_SIZE_MB16X16);
1909 subsize = BLOCK_SIZE_SB8X8;
1912 for (n = 0; n < 4; n++) {
1913 const int y_idx = n >> 1, x_idx = n & 0x01;
1915 reset_skip_txfm_size_sb(cpi, mi + y_idx * bs * mis + x_idx * bs, txfm_max,
1916 mi_row + y_idx * bs, mi_col + x_idx * bs,
1922 static void reset_skip_txfm_size(VP9_COMP *cpi, TX_SIZE txfm_max) {
1923 VP9_COMMON * const cm = &cpi->common;
1925 const int mis = cm->mode_info_stride;
1926 MODE_INFO *mi, *mi_ptr = cm->mi;
1928 for (mi_row = 0; mi_row < cm->mi_rows; mi_row += 8, mi_ptr += 8 * mis) {
1930 for (mi_col = 0; mi_col < cm->mi_cols; mi_col += 8, mi += 8) {
1931 reset_skip_txfm_size_sb(cpi, mi, txfm_max, mi_row, mi_col,
1932 BLOCK_SIZE_SB64X64);
1937 static int get_frame_type(VP9_COMP *cpi) {
1939 if (cpi->common.frame_type == KEY_FRAME)
1941 else if (cpi->is_src_frame_alt_ref && cpi->refresh_golden_frame)
1943 else if (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)
1950 static void select_txfm_mode(VP9_COMP *cpi) {
1951 if (cpi->oxcf.lossless) {
1952 cpi->common.txfm_mode = ONLY_4X4;
1953 } else if (cpi->common.current_video_frame == 0) {
1954 cpi->common.txfm_mode = TX_MODE_SELECT;
1956 if (cpi->sf.tx_size_search_method == USE_FULL_RD) {
1957 int frame_type = get_frame_type(cpi);
1958 cpi->common.txfm_mode =
1959 cpi->rd_tx_select_threshes[frame_type][ALLOW_32X32]
1960 > cpi->rd_tx_select_threshes[frame_type][TX_MODE_SELECT] ?
1961 ALLOW_32X32 : TX_MODE_SELECT;
1962 } else if (cpi->sf.tx_size_search_method == USE_LARGESTALL) {
1963 cpi->common.txfm_mode = ALLOW_32X32;
1965 unsigned int total = 0;
1967 for (i = 0; i < TX_SIZE_MAX_SB; ++i)
1968 total += cpi->txfm_stepdown_count[i];
1970 double fraction = (double)cpi->txfm_stepdown_count[0] / total;
1971 cpi->common.txfm_mode = fraction > 0.90 ? ALLOW_32X32 : TX_MODE_SELECT;
1972 // printf("fraction = %f\n", fraction);
1973 } // else keep unchanged
1978 void vp9_encode_frame(VP9_COMP *cpi) {
1979 VP9_COMMON * const cm = &cpi->common;
1981 // In the longer term the encoder should be generalized to match the
1982 // decoder such that we allow compound where one of the 3 buffers has a
1983 // differnt sign bias and that buffer is then the fixed ref. However, this
1984 // requires further work in the rd loop. For now the only supported encoder
1985 // side behaviour is where the ALT ref buffer has oppositie sign bias to
1987 if ((cm->ref_frame_sign_bias[ALTREF_FRAME]
1988 == cm->ref_frame_sign_bias[GOLDEN_FRAME])
1989 || (cm->ref_frame_sign_bias[ALTREF_FRAME]
1990 == cm->ref_frame_sign_bias[LAST_FRAME])) {
1991 cm->allow_comp_inter_inter = 0;
1993 cm->allow_comp_inter_inter = 1;
1994 cm->comp_fixed_ref = ALTREF_FRAME;
1995 cm->comp_var_ref[0] = LAST_FRAME;
1996 cm->comp_var_ref[1] = GOLDEN_FRAME;
2002 * This code does a single RD pass over the whole frame assuming
2003 * either compound, single or hybrid prediction as per whatever has
2004 * worked best for that type of frame in the past.
2005 * It also predicts whether another coding mode would have worked
2006 * better that this coding mode. If that is the case, it remembers
2007 * that for subsequent frames.
2008 * It does the same analysis for transform size selection also.
2010 int frame_type = get_frame_type(cpi);
2012 /* prediction (compound, single or hybrid) mode selection */
2013 if (frame_type == 3 || !cm->allow_comp_inter_inter)
2014 pred_type = SINGLE_PREDICTION_ONLY;
2015 else if (cpi->rd_prediction_type_threshes[frame_type][1]
2016 > cpi->rd_prediction_type_threshes[frame_type][0]
2017 && cpi->rd_prediction_type_threshes[frame_type][1]
2018 > cpi->rd_prediction_type_threshes[frame_type][2]
2019 && check_dual_ref_flags(cpi) && cpi->static_mb_pct == 100)
2020 pred_type = COMP_PREDICTION_ONLY;
2021 else if (cpi->rd_prediction_type_threshes[frame_type][0]
2022 > cpi->rd_prediction_type_threshes[frame_type][2])
2023 pred_type = SINGLE_PREDICTION_ONLY;
2025 pred_type = HYBRID_PREDICTION;
2027 /* transform size (4x4, 8x8, 16x16 or select-per-mb) selection */
2029 cpi->mb.e_mbd.lossless = 0;
2030 if (cpi->oxcf.lossless) {
2031 cpi->mb.e_mbd.lossless = 1;
2034 select_txfm_mode(cpi);
2035 cpi->common.comp_pred_mode = pred_type;
2036 encode_frame_internal(cpi);
2038 for (i = 0; i < NB_PREDICTION_TYPES; ++i) {
2039 const int diff = (int) (cpi->rd_comp_pred_diff[i] / cpi->common.MBs);
2040 cpi->rd_prediction_type_threshes[frame_type][i] += diff;
2041 cpi->rd_prediction_type_threshes[frame_type][i] >>= 1;
2044 for (i = 0; i < NB_TXFM_MODES; ++i) {
2045 int64_t pd = cpi->rd_tx_select_diff[i];
2047 if (i == TX_MODE_SELECT)
2048 pd -= RDCOST(cpi->mb.rdmult, cpi->mb.rddiv,
2049 2048 * (TX_SIZE_MAX_SB - 1), 0);
2050 diff = (int) (pd / cpi->common.MBs);
2051 cpi->rd_tx_select_threshes[frame_type][i] += diff;
2052 cpi->rd_tx_select_threshes[frame_type][i] /= 2;
2055 if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) {
2056 int single_count_zero = 0;
2057 int comp_count_zero = 0;
2059 for (i = 0; i < COMP_INTER_CONTEXTS; i++) {
2060 single_count_zero += cpi->comp_inter_count[i][0];
2061 comp_count_zero += cpi->comp_inter_count[i][1];
2064 if (comp_count_zero == 0) {
2065 cpi->common.comp_pred_mode = SINGLE_PREDICTION_ONLY;
2066 vp9_zero(cpi->comp_inter_count);
2067 } else if (single_count_zero == 0) {
2068 cpi->common.comp_pred_mode = COMP_PREDICTION_ONLY;
2069 vp9_zero(cpi->comp_inter_count);
2073 if (cpi->common.txfm_mode == TX_MODE_SELECT) {
2075 int count8x8_lp = 0, count8x8_8x8p = 0;
2076 int count16x16_16x16p = 0, count16x16_lp = 0;
2079 for (i = 0; i < TX_SIZE_CONTEXTS; i++)
2080 count4x4 += cm->fc.tx_count_32x32p[i][TX_4X4];
2081 for (i = 0; i < TX_SIZE_CONTEXTS; i++)
2082 count4x4 += cm->fc.tx_count_16x16p[i][TX_4X4];
2083 for (i = 0; i < TX_SIZE_CONTEXTS; i++)
2084 count4x4 += cm->fc.tx_count_8x8p[i][TX_4X4];
2086 for (i = 0; i < TX_SIZE_CONTEXTS; i++)
2087 count8x8_lp += cm->fc.tx_count_32x32p[i][TX_8X8];
2088 for (i = 0; i < TX_SIZE_CONTEXTS; i++)
2089 count8x8_lp += cm->fc.tx_count_16x16p[i][TX_8X8];
2091 for (i = 0; i < TX_SIZE_CONTEXTS; i++)
2092 count8x8_8x8p += cm->fc.tx_count_8x8p[i][TX_8X8];
2094 for (i = 0; i < TX_SIZE_CONTEXTS; i++)
2095 count16x16_16x16p += cm->fc.tx_count_16x16p[i][TX_16X16];
2097 for (i = 0; i < TX_SIZE_CONTEXTS; i++)
2098 count16x16_lp += cm->fc.tx_count_32x32p[i][TX_16X16];
2100 for (i = 0; i < TX_SIZE_CONTEXTS; i++)
2101 count32x32 += cm->fc.tx_count_32x32p[i][TX_32X32];
2103 if (count4x4 == 0 && count16x16_lp == 0 && count16x16_16x16p == 0
2104 && count32x32 == 0) {
2105 cpi->common.txfm_mode = ALLOW_8X8;
2106 reset_skip_txfm_size(cpi, TX_8X8);
2107 } else if (count8x8_8x8p == 0 && count16x16_16x16p == 0
2108 && count8x8_lp == 0 && count16x16_lp == 0 && count32x32 == 0) {
2109 cpi->common.txfm_mode = ONLY_4X4;
2110 reset_skip_txfm_size(cpi, TX_4X4);
2111 } else if (count8x8_lp == 0 && count16x16_lp == 0 && count4x4 == 0) {
2112 cpi->common.txfm_mode = ALLOW_32X32;
2113 } else if (count32x32 == 0 && count8x8_lp == 0 && count4x4 == 0) {
2114 cpi->common.txfm_mode = ALLOW_16X16;
2115 reset_skip_txfm_size(cpi, TX_16X16);
2119 // Update interpolation filter strategy for next frame.
2120 if ((cpi->common.frame_type != KEY_FRAME) && (cpi->sf.search_best_filter))
2121 vp9_select_interp_filter_type(cpi);
2123 encode_frame_internal(cpi);
2128 static void sum_intra_stats(VP9_COMP *cpi, MACROBLOCK *x) {
2129 const MACROBLOCKD *xd = &x->e_mbd;
2130 const MB_PREDICTION_MODE m = xd->mode_info_context->mbmi.mode;
2131 const MB_PREDICTION_MODE uvm = xd->mode_info_context->mbmi.uv_mode;
2133 ++cpi->y_uv_mode_count[m][uvm];
2134 if (xd->mode_info_context->mbmi.sb_type >= BLOCK_SIZE_SB8X8) {
2135 const BLOCK_SIZE_TYPE bsize = xd->mode_info_context->mbmi.sb_type;
2136 const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
2137 const int bsl = MIN(bwl, bhl);
2138 ++cpi->y_mode_count[MIN(bsl, 3)][m];
2141 int bw = 1 << b_width_log2(xd->mode_info_context->mbmi.sb_type);
2142 int bh = 1 << b_height_log2(xd->mode_info_context->mbmi.sb_type);
2143 for (idy = 0; idy < 2; idy += bh) {
2144 for (idx = 0; idx < 2; idx += bw) {
2145 int m = xd->mode_info_context->bmi[idy * 2 + idx].as_mode;
2146 ++cpi->y_mode_count[0][m];
2152 // Experimental stub function to create a per MB zbin adjustment based on
2153 // some previously calculated measure of MB activity.
2154 static void adjust_act_zbin(VP9_COMP *cpi, MACROBLOCK *x) {
2156 x->act_zbin_adj = *(x->mb_activity_ptr);
2160 int64_t act = *(x->mb_activity_ptr);
2162 // Apply the masking to the RD multiplier.
2163 a = act + 4 * cpi->activity_avg;
2164 b = 4 * act + cpi->activity_avg;
2166 if (act > cpi->activity_avg)
2167 x->act_zbin_adj = (int) (((int64_t) b + (a >> 1)) / a) - 1;
2169 x->act_zbin_adj = 1 - (int) (((int64_t) a + (b >> 1)) / b);
2173 static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled,
2174 int mi_row, int mi_col, BLOCK_SIZE_TYPE bsize) {
2175 VP9_COMMON * const cm = &cpi->common;
2176 MACROBLOCK * const x = &cpi->mb;
2177 MACROBLOCKD * const xd = &x->e_mbd;
2178 MODE_INFO *mi = xd->mode_info_context;
2179 MB_MODE_INFO *mbmi = &mi->mbmi;
2180 unsigned int segment_id = mbmi->segment_id;
2181 const int mis = cm->mode_info_stride;
2182 const int bwl = mi_width_log2(bsize);
2183 const int bw = 1 << bwl, bh = 1 << mi_height_log2(bsize);
2186 if (cm->frame_type == KEY_FRAME) {
2187 if (cpi->oxcf.tuning == VP8_TUNE_SSIM) {
2188 adjust_act_zbin(cpi, x);
2189 vp9_update_zbin_extra(cpi, x);
2192 vp9_setup_interp_filters(xd, mbmi->interp_filter, cm);
2194 if (cpi->oxcf.tuning == VP8_TUNE_SSIM) {
2195 // Adjust the zbin based on this MB rate.
2196 adjust_act_zbin(cpi, x);
2199 // Experimental code. Special case for gf and arf zeromv modes.
2200 // Increase zbin size to suppress noise
2201 cpi->zbin_mode_boost = 0;
2202 if (cpi->zbin_mode_boost_enabled) {
2203 if (mbmi->ref_frame[0] != INTRA_FRAME) {
2204 if (mbmi->mode == ZEROMV) {
2205 if (mbmi->ref_frame[0] != LAST_FRAME)
2206 cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST;
2208 cpi->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST;
2209 } else if (mbmi->sb_type < BLOCK_SIZE_SB8X8) {
2210 cpi->zbin_mode_boost = SPLIT_MV_ZBIN_BOOST;
2212 cpi->zbin_mode_boost = MV_ZBIN_BOOST;
2215 cpi->zbin_mode_boost = INTRA_ZBIN_BOOST;
2219 vp9_update_zbin_extra(cpi, x);
2222 if (mbmi->ref_frame[0] == INTRA_FRAME) {
2223 vp9_encode_intra_block_y(
2224 cm, x, (bsize < BLOCK_SIZE_SB8X8) ? BLOCK_SIZE_SB8X8 : bsize);
2225 vp9_encode_intra_block_uv(
2226 cm, x, (bsize < BLOCK_SIZE_SB8X8) ? BLOCK_SIZE_SB8X8 : bsize);
2228 sum_intra_stats(cpi, x);
2230 int idx = cm->ref_frame_map[get_ref_frame_idx(cpi, mbmi->ref_frame[0])];
2231 YV12_BUFFER_CONFIG *ref_fb = &cm->yv12_fb[idx];
2232 YV12_BUFFER_CONFIG *second_ref_fb = NULL;
2233 if (mbmi->ref_frame[1] > 0) {
2234 idx = cm->ref_frame_map[get_ref_frame_idx(cpi, mbmi->ref_frame[1])];
2235 second_ref_fb = &cm->yv12_fb[idx];
2238 assert(cm->frame_type != KEY_FRAME);
2240 setup_pre_planes(xd, ref_fb, second_ref_fb, mi_row, mi_col,
2241 xd->scale_factor, xd->scale_factor_uv);
2243 vp9_build_inter_predictors_sb(
2245 bsize < BLOCK_SIZE_SB8X8 ? BLOCK_SIZE_SB8X8 : bsize);
2248 if (xd->mode_info_context->mbmi.ref_frame[0] == INTRA_FRAME) {
2249 vp9_tokenize_sb(cpi, xd, t, !output_enabled,
2250 (bsize < BLOCK_SIZE_SB8X8) ? BLOCK_SIZE_SB8X8 : bsize);
2251 } else if (!x->skip) {
2252 vp9_encode_sb(cm, x, (bsize < BLOCK_SIZE_SB8X8) ? BLOCK_SIZE_SB8X8 : bsize);
2253 vp9_tokenize_sb(cpi, xd, t, !output_enabled,
2254 (bsize < BLOCK_SIZE_SB8X8) ? BLOCK_SIZE_SB8X8 : bsize);
2256 int mb_skip_context = xd->left_available ? (mi - 1)->mbmi.mb_skip_coeff : 0;
2257 mb_skip_context += (mi - mis)->mbmi.mb_skip_coeff;
2259 mbmi->mb_skip_coeff = 1;
2261 cm->fc.mbskip_count[mb_skip_context][1]++;
2262 vp9_reset_sb_tokens_context(
2263 xd, (bsize < BLOCK_SIZE_SB8X8) ? BLOCK_SIZE_SB8X8 : bsize);
2266 // copy skip flag on all mb_mode_info contexts in this SB
2267 // if this was a skip at this txfm size
2268 vp9_set_pred_flag(xd, bsize, PRED_MBSKIP, mi->mbmi.mb_skip_coeff);
2270 if (output_enabled) {
2271 if (cm->txfm_mode == TX_MODE_SELECT && mbmi->sb_type >= BLOCK_SIZE_SB8X8
2272 && !(mbmi->ref_frame[0] != INTRA_FRAME
2273 && (mbmi->mb_skip_coeff
2274 || vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)))) {
2275 const int context = vp9_get_pred_context(cm, xd, PRED_TX_SIZE);
2276 if (bsize >= BLOCK_SIZE_SB32X32) {
2277 cm->fc.tx_count_32x32p[context][mbmi->txfm_size]++;
2278 } else if (bsize >= BLOCK_SIZE_MB16X16) {
2279 cm->fc.tx_count_16x16p[context][mbmi->txfm_size]++;
2281 cm->fc.tx_count_8x8p[context][mbmi->txfm_size]++;
2285 TX_SIZE sz = (cm->txfm_mode == TX_MODE_SELECT) ? TX_32X32 : cm->txfm_mode;
2286 // The new intra coding scheme requires no change of transform size
2287 if (mi->mbmi.ref_frame[0] != INTRA_FRAME) {
2288 if (sz == TX_32X32 && bsize < BLOCK_SIZE_SB32X32)
2290 if (sz == TX_16X16 && bsize < BLOCK_SIZE_MB16X16)
2292 if (sz == TX_8X8 && bsize < BLOCK_SIZE_SB8X8)
2294 } else if (bsize >= BLOCK_SIZE_SB8X8) {
2295 sz = mbmi->txfm_size;
2300 for (y = 0; y < bh; y++) {
2301 for (x = 0; x < bw; x++) {
2302 if (mi_col + x < cm->mi_cols && mi_row + y < cm->mi_rows) {
2303 mi[mis * y + x].mbmi.txfm_size = sz;