]> granicus.if.org Git - libvpx/commitdiff
VP9: Add speed 9 for subpel search.
authorJerome Jiang <jianj@google.com>
Thu, 29 Mar 2018 21:59:58 +0000 (14:59 -0700)
committerMarco Paniconi <marpan@google.com>
Mon, 14 May 2018 15:47:18 +0000 (08:47 -0700)
Set subpel search stop to 2 when motion vector is non zero.

10% speedup on 1 and 2 threads on Samsung Galaxy S8+.

Change-Id: I7323bb913000229cf60a37495bf88bcc51d0ac96

test/cpu_speed_test.cc
test/encode_perf_test.cc
test/svc_datarate_test.cc
test/vp9_datarate_test.cc
test/vp9_ethread_test.cc
vp9/encoder/vp9_pickmode.c
vp9/encoder/vp9_speed_features.c
vp9/encoder/vp9_speed_features.h
vp9/vp9_cx_iface.c

index 404b5b44f4f2333d5a7dd988d5f7437d34e7fe3e..34e35b0653522cf5f2aefbb99e0772986cd2fb36 100644 (file)
@@ -152,5 +152,5 @@ VP9_INSTANTIATE_TEST_CASE(CpuSpeedTest,
                           ::testing::Values(::libvpx_test::kTwoPassGood,
                                             ::libvpx_test::kOnePassGood,
                                             ::libvpx_test::kRealTime),
-                          ::testing::Range(0, 9));
+                          ::testing::Range(0, 10));
 }  // namespace
index 0bb435502b39e8899e3b5f3b4ad95a08ae877015..142d9e2da8ef57727a9426049c6c55c5f793f5f6 100644 (file)
@@ -48,7 +48,7 @@ const EncodePerfTestVideo kVP9EncodePerfTestVectors[] = {
   EncodePerfTestVideo("niklas_1280_720_30.yuv", 1280, 720, 600, 470),
 };
 
-const int kEncodePerfTestSpeeds[] = { 5, 6, 7, 8 };
+const int kEncodePerfTestSpeeds[] = { 5, 6, 7, 8, 9 };
 const int kEncodePerfTestThreads[] = { 1, 2, 4 };
 
 #define NELEMENTS(x) (sizeof((x)) / sizeof((x)[0]))
index 608f27ebfb3df28d9d0af0e9b256adc0b98ca5b3..5156db750c2820e73a770e12e960581d3b32b198 100644 (file)
@@ -1150,20 +1150,21 @@ TEST_P(DatarateOnePassCbrSvcSmallKF, OnePassCbrSvc2SL3TLSmallKf) {
 }
 
 VP9_INSTANTIATE_TEST_CASE(DatarateOnePassCbrSvcSingleBR,
-                          ::testing::Range(5, 9));
+                          ::testing::Range(5, 10));
 
-VP9_INSTANTIATE_TEST_CASE(DatarateOnePassCbrSvcMultiBR, ::testing::Range(5, 9),
+VP9_INSTANTIATE_TEST_CASE(DatarateOnePassCbrSvcMultiBR, ::testing::Range(5, 10),
                           ::testing::Range(0, 3));
 
 VP9_INSTANTIATE_TEST_CASE(DatarateOnePassCbrSvcFrameDropMultiBR,
-                          ::testing::Range(5, 9), ::testing::Range(0, 2),
+                          ::testing::Range(5, 10), ::testing::Range(0, 2),
                           ::testing::Range(0, 3));
 
 #if CONFIG_VP9_TEMPORAL_DENOISING
-VP9_INSTANTIATE_TEST_CASE(DatarateOnePassCbrSvcDenoiser, ::testing::Range(5, 9),
-                          ::testing::Range(1, 3), ::testing::Range(0, 3));
+VP9_INSTANTIATE_TEST_CASE(DatarateOnePassCbrSvcDenoiser,
+                          ::testing::Range(5, 10), ::testing::Range(1, 3),
+                          ::testing::Range(0, 3));
 #endif
 
-VP9_INSTANTIATE_TEST_CASE(DatarateOnePassCbrSvcSmallKF, ::testing::Range(5, 9),
+VP9_INSTANTIATE_TEST_CASE(DatarateOnePassCbrSvcSmallKF, ::testing::Range(5, 10),
                           ::testing::Range(32, 36));
 }  // namespace
index c4dbcacbef22e290f1de7d55231c5c789a78ae39..cb3aad83945e3933cd4a56362449bfe80f8de1fc 100644 (file)
@@ -824,16 +824,17 @@ TEST_P(DatarateTestVP9LargeDenoiser, DenoiserOffOn) {
 VP9_INSTANTIATE_TEST_CASE(DatarateTestVP9Large,
                           ::testing::Values(::libvpx_test::kOnePassGood,
                                             ::libvpx_test::kRealTime),
-                          ::testing::Range(2, 9), ::testing::Range(0, 4));
+                          ::testing::Range(2, 10), ::testing::Range(0, 4));
 
 VP9_INSTANTIATE_TEST_CASE(DatarateTestVP9LargeOneBR,
                           ::testing::Values(::libvpx_test::kOnePassGood,
                                             ::libvpx_test::kRealTime),
-                          ::testing::Range(2, 9));
+                          ::testing::Range(2, 10));
 
-VP9_INSTANTIATE_TEST_CASE(DatarateTestVP9RealTime, ::testing::Range(5, 9));
+VP9_INSTANTIATE_TEST_CASE(DatarateTestVP9RealTime, ::testing::Range(5, 10));
 
 #if CONFIG_VP9_TEMPORAL_DENOISING
-VP9_INSTANTIATE_TEST_CASE(DatarateTestVP9LargeDenoiser, ::testing::Range(5, 9));
+VP9_INSTANTIATE_TEST_CASE(DatarateTestVP9LargeDenoiser,
+                          ::testing::Range(5, 10));
 #endif
 }  // namespace
index 6b7e512116c186c5b77fcaf7a201601b7da8d50c..44659904f34972efa0c09ccf259522bbabbee18b 100644 (file)
@@ -409,7 +409,7 @@ INSTANTIATE_TEST_CASE_P(
         ::testing::Values(::libvpx_test::kTwoPassGood,
                           ::libvpx_test::kOnePassGood,
                           ::libvpx_test::kRealTime),
-        ::testing::Range(3, 9),    // cpu_used
+        ::testing::Range(3, 10),   // cpu_used
         ::testing::Range(0, 3),    // tile_columns
         ::testing::Range(2, 5)));  // threads
 
index f7d420bc7167aca7cd9d58941dbd68152d633f85..f86c9f09261b7b10a97c4ca1a24eff9ffd56c71a 100644 (file)
@@ -224,6 +224,14 @@ static int combined_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
   if (rv && search_subpel) {
     int subpel_force_stop = cpi->sf.mv.subpel_force_stop;
     if (use_base_mv && cpi->sf.base_mv_aggressive) subpel_force_stop = 2;
+    if (cpi->sf.mv.enable_adaptive_subpel_force_stop) {
+      int mv_thresh = cpi->sf.mv.adapt_subpel_force_stop.mv_thresh;
+      if (abs(tmp_mv->as_mv.row) >= mv_thresh ||
+          abs(tmp_mv->as_mv.col) >= mv_thresh)
+        subpel_force_stop = cpi->sf.mv.adapt_subpel_force_stop.force_stop_above;
+      else
+        subpel_force_stop = cpi->sf.mv.adapt_subpel_force_stop.force_stop_below;
+    }
     cpi->find_fractional_mv_step(
         x, &tmp_mv->as_mv, &ref_mv, cpi->common.allow_high_precision_mv,
         x->errorperbit, &cpi->fn_ptr[bsize], subpel_force_stop,
index 90da687268990b20634d3faae020d192c33380df..05ec3c61276c04c3dcd87b35d0b6baee021665e8 100644 (file)
@@ -661,6 +661,14 @@ static void set_rt_speed_feature_framesize_independent(
     sf->limit_newmv_early_exit = 0;
     sf->use_simple_block_yrd = 1;
   }
+
+  if (speed >= 9) {
+    sf->mv.enable_adaptive_subpel_force_stop = 1;
+    sf->mv.adapt_subpel_force_stop.mv_thresh = 2;
+    sf->mv.adapt_subpel_force_stop.force_stop_below = 1;
+    sf->mv.adapt_subpel_force_stop.force_stop_above = 2;
+  }
+
   if (sf->use_altref_onepass) {
     if (cpi->rc.is_src_frame_alt_ref && cm->frame_type != KEY_FRAME) {
       sf->partition_search_type = FIXED_PARTITION;
index 946bf05454a8c85e3df7121936b843ea62f9ca6f..8595e54ab66d8fe670396ef10b31a3f78dacd3fe 100644 (file)
@@ -161,6 +161,17 @@ typedef enum {
   ONE_LOOP_REDUCED = 1
 } FAST_COEFF_UPDATE;
 
+typedef struct ADAPT_SUBPEL_FORCE_STOP {
+  // Threshold for full pixel motion vector;
+  int mv_thresh;
+
+  // subpel_force_stop if full pixel MV is below the threshold.
+  int force_stop_below;
+
+  // subpel_force_stop if full pixel MV is equal to or above the threshold.
+  int force_stop_above;
+} ADAPT_SUBPEL_FORCE_STOP;
+
 typedef struct MV_SPEED_FEATURES {
   // Motion search method (Diamond, NSTEP, Hex, Big Diamond, Square, etc).
   SEARCH_METHODS search_method;
@@ -189,6 +200,11 @@ typedef struct MV_SPEED_FEATURES {
   // 3: Stop at full pixel.
   int subpel_force_stop;
 
+  // If it's enabled, different subpel_force_stop will be used for different MV.
+  int enable_adaptive_subpel_force_stop;
+
+  ADAPT_SUBPEL_FORCE_STOP adapt_subpel_force_stop;
+
   // This variable sets the step_param used in full pel motion search.
   int fullpel_search_step_param;
 } MV_SPEED_FEATURES;
index 5eaa7a18a8c4108dfbea080fca845797f1182e2d..d6c6ece9168f5c6d498bff8260756f48dea14542 100644 (file)
@@ -248,7 +248,7 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx,
   RANGE_CHECK(extra_cfg, row_mt, 0, 1);
   RANGE_CHECK(extra_cfg, motion_vector_unit_test, 0, 2);
   RANGE_CHECK(extra_cfg, enable_auto_alt_ref, 0, 2);
-  RANGE_CHECK(extra_cfg, cpu_used, -8, 8);
+  RANGE_CHECK(extra_cfg, cpu_used, -9, 9);
   RANGE_CHECK_HI(extra_cfg, noise_sensitivity, 6);
   RANGE_CHECK(extra_cfg, tile_columns, 0, 6);
   RANGE_CHECK(extra_cfg, tile_rows, 0, 2);