]> granicus.if.org Git - libvpx/commitdiff
Early termination in encoding partition search
authorYunqing Wang <yunqingwang@google.com>
Fri, 15 Aug 2014 00:25:21 +0000 (17:25 -0700)
committerYunqing Wang <yunqingwang@google.com>
Thu, 28 Aug 2014 18:27:28 +0000 (11:27 -0700)
In the partition search, the encoder checks all possible
partitionings in the superblock's partition search tree.
This patch proposed a set of criteria for partition search
early termination, which effectively decided whether or
not to terminate the search in current branch based on the
"skippable" result of the quantized transform coefficients.
The "skippable" information was gathered during the
partition mode search, and no overhead calculations were
introduced.

This patch gives significant encoding speed gains without
sacrificing the quality.

Borg test results:
1. At speed 1,
   stdhd set: psnr: +0.074%, ssim: +0.093%;
   derf set:  psnr: -0.024%, ssim: +0.011%;
2. At speed 2,
   stdhd set: psnr: +0.033%, ssim: +0.100%;
   derf set:  psnr: -0.062%, ssim: +0.003%;
3. At speed 3,
   stdhd set: psnr: +0.060%, ssim: +0.190%;
   derf set:  psnr: -0.064%, ssim: -0.002%;
4. At speed 4,
   stdhd set: psnr: +0.070%, ssim: +0.143%;
   derf set:  psnr: -0.104%, ssim: +0.039%;

The speedup ranges from several percent to 60+%.
                 speed1    speed2    speed3    speed4
(1080p, 100f):
old_town_cross:  48.2%     23.9%     20.8%     16.5%
park_joy:        11.4%     17.8%     29.4%     18.2%
pedestrian_area: 10.7%      4.0%      4.2%      2.4%
(720p, 200f):
mobcal:          68.1%     36.3%     34.4%     17.7%
parkrun:         15.8%     24.2%     37.1%     16.8%
shields:         45.1%     32.8%     30.1%      9.6%
(cif, 300f)
bus:              3.7%     10.4%     14.0%      7.9%
deadline:        13.6%     14.8%     12.6%     10.9%
mobile:           5.3%     11.5%     14.7%     10.7%

Change-Id: I246c38fb952ad762ce5e365711235b605f470a66

vp9/encoder/vp9_context_tree.h
vp9/encoder/vp9_encodeframe.c
vp9/encoder/vp9_rdopt.c
vp9/encoder/vp9_speed_features.c
vp9/encoder/vp9_speed_features.h

index d60e6c3eb5b7bcf6f04b5b3bfbb99d4e6a286ae8..0cbb24429b34386854c50447471d396a96a53383 100644 (file)
@@ -34,6 +34,9 @@ typedef struct {
   int num_4x4_blk;
   int skip;
   int skip_txfm[MAX_MB_PLANE];
+  // For current partition, only if all Y, U, and V transform blocks'
+  // coefficients are quantized to 0, skippable is set to 0.
+  int skippable;
   int best_mode_index;
   int hybrid_pred_diff;
   int comp_pred_diff;
index 950a6c8bbe7aaa9ba9787f3589b2b338e4ffd436..07134dc9d3aeb70b11ea43ea1ae28845551bdbd2 100644 (file)
@@ -727,6 +727,7 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile,
     p[i].eobs = ctx->eobs_pbuf[i][0];
   }
   ctx->is_coded = 0;
+  ctx->skippable = 0;
   x->skip_recode = 0;
 
   // Set to zero to make sure we do not use the previous encoded frame stats
@@ -2158,8 +2159,8 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
       sum_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_dist);
 
       if (sum_rd < best_rd) {
-        int64_t stop_thresh = 4096;
-        int64_t stop_thresh_rd;
+        int64_t dist_breakout_thr = cpi->sf.partition_search_breakout_dist_thr;
+        int rate_breakout_thr = cpi->sf.partition_search_breakout_rate_thr;
 
         best_rate = this_rate;
         best_dist = this_dist;
@@ -2167,14 +2168,18 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
         if (bsize >= BLOCK_8X8)
           pc_tree->partitioning = PARTITION_NONE;
 
-        // Adjust threshold according to partition size.
-        stop_thresh >>= 8 - (b_width_log2(bsize) +
+        // Adjust dist breakout threshold according to the partition size.
+        dist_breakout_thr >>= 8 - (b_width_log2(bsize) +
             b_height_log2(bsize));
 
-        stop_thresh_rd = RDCOST(x->rdmult, x->rddiv, 0, stop_thresh);
-        // If obtained distortion is very small, choose current partition
-        // and stop splitting.
-        if (!x->e_mbd.lossless && best_rd < stop_thresh_rd) {
+        // If all y, u, v transform blocks in this partition are skippable, and
+        // the dist & rate are within the thresholds, the partition search is
+        // terminated for current branch of the partition search tree.
+        // The dist & rate thresholds are set to 0 at speed 0 to disable the
+        // early termination at that speed.
+        if (!x->e_mbd.lossless &&
+            (ctx->skippable && best_dist < dist_breakout_thr &&
+            best_rate < rate_breakout_thr)) {
           do_split = 0;
           do_rect = 0;
         }
index 2efa3db524ebee9a3f097bec7c4046b7d3212487..f73006ef7021ed8cbdb70449b8dd66fffbe6c4cd 100644 (file)
@@ -1720,12 +1720,14 @@ static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
                          int mode_index,
                          int64_t comp_pred_diff[REFERENCE_MODES],
                          const int64_t tx_size_diff[TX_MODES],
-                         int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS]) {
+                         int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS],
+                         int skippable) {
   MACROBLOCKD *const xd = &x->e_mbd;
 
   // Take a snapshot of the coding context so it can be
   // restored if we decide to encode this way
   ctx->skip = x->skip;
+  ctx->skippable = skippable;
   ctx->best_mode_index = mode_index;
   ctx->mic = *xd->mi[0];
   ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_REFERENCE];
@@ -2556,6 +2558,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
   int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS];
   int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
   MB_MODE_INFO best_mbmode;
+  int best_mode_skippable = 0;
   int mode_index, best_mode_index = -1;
   unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
   vp9_prob comp_mode_p;
@@ -2963,6 +2966,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
         best_rd = this_rd;
         best_mbmode = *mbmi;
         best_skip2 = this_skip2;
+        best_mode_skippable = skippable;
+
         if (!x->select_tx_size)
           swap_block_ptr(x, ctx, 1, 0, 0, max_plane);
         vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size],
@@ -3119,8 +3124,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
   }
 
   set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
-  store_coding_context(x, ctx, best_mode_index,
-                       best_pred_diff, best_tx_diff, best_filter_diff);
+  store_coding_context(x, ctx, best_mode_index, best_pred_diff,
+                       best_tx_diff, best_filter_diff, best_mode_skippable);
 
   return best_rd;
 }
@@ -3225,7 +3230,7 @@ int64_t vp9_rd_pick_inter_mode_sb_seg_skip(VP9_COMP *cpi, MACROBLOCK *x,
   if (!x->select_tx_size)
     swap_block_ptr(x, ctx, 1, 0, 0, MAX_MB_PLANE);
   store_coding_context(x, ctx, THR_ZEROMV,
-                       best_pred_diff, best_tx_diff, best_filter_diff);
+                       best_pred_diff, best_tx_diff, best_filter_diff, 0);
 
   return this_rd;
 }
@@ -3830,7 +3835,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
 
   set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
   store_coding_context(x, ctx, best_ref_index,
-                       best_pred_diff, best_tx_diff, best_filter_diff);
+                       best_pred_diff, best_tx_diff, best_filter_diff, 0);
 
   return best_rd;
 }
index 0afcde5359e69466865ecff7ed397843b22bda6a..0bead48fe63f2efcce43afe3af55546fc499118c 100644 (file)
@@ -92,6 +92,12 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm,
     sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V;
 
     sf->tx_size_search_breakout = 1;
+
+    if (MIN(cm->width, cm->height) >= 720)
+      sf->partition_search_breakout_dist_thr = (1 << 23);
+    else
+      sf->partition_search_breakout_dist_thr = (1 << 21);
+    sf->partition_search_breakout_rate_thr = 500;
   }
 
   if (speed >= 2) {
@@ -120,6 +126,12 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm,
     sf->auto_min_max_partition_size = CONSTRAIN_NEIGHBORING_MIN_MAX;
     sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_LOW_MOTION;
     sf->adjust_partitioning_from_last_frame = 1;
+
+    if (MIN(cm->width, cm->height) >= 720)
+      sf->partition_search_breakout_dist_thr = (1 << 24);
+    else
+      sf->partition_search_breakout_dist_thr = (1 << 22);
+    sf->partition_search_breakout_rate_thr = 700;
   }
 
   if (speed >= 3) {
@@ -144,6 +156,12 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm,
     sf->intra_y_mode_mask[TX_32X32] = INTRA_DC;
     sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC;
     sf->adaptive_interp_filter_search = 1;
+
+    if (MIN(cm->width, cm->height) >= 720)
+      sf->partition_search_breakout_dist_thr = (1 << 25);
+    else
+      sf->partition_search_breakout_dist_thr = (1 << 23);
+    sf->partition_search_breakout_rate_thr = 1000;
   }
 
   if (speed >= 4) {
@@ -158,6 +176,12 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm,
     sf->use_lp32x32fdct = 1;
     sf->use_fast_coef_updates = ONE_LOOP_REDUCED;
     sf->use_fast_coef_costing = 1;
+
+    if (MIN(cm->width, cm->height) >= 720)
+      sf->partition_search_breakout_dist_thr = (1 << 26);
+    else
+      sf->partition_search_breakout_dist_thr = (1 << 24);
+    sf->partition_search_breakout_rate_thr = 1500;
   }
 
   if (speed >= 5) {
@@ -411,6 +435,8 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
   sf->recode_tolerance = 25;
   sf->default_interp_filter = SWITCHABLE;
   sf->tx_size_search_breakout = 0;
+  sf->partition_search_breakout_dist_thr = 0;
+  sf->partition_search_breakout_rate_thr = 0;
 
   if (oxcf->mode == REALTIME)
     set_rt_speed_feature(cpi, sf, oxcf->speed, oxcf->content);
index 46eedc1470c56961f18578b9cc5e2832e9de8ece..e31185e59ece4da243808340082696907ec1c146 100644 (file)
@@ -393,6 +393,10 @@ typedef struct SPEED_FEATURES {
 
   // mask for skip evaluation of certain interp_filter type.
   INTERP_FILTER_MASK interp_filter_search_mask;
+
+  // Partition search early breakout thresholds.
+  int64_t partition_search_breakout_dist_thr;
+  int partition_search_breakout_rate_thr;
 } SPEED_FEATURES;
 
 struct VP9_COMP;