]> granicus.if.org Git - libvpx/blobdiff - vp9/encoder/vp9_speed_features.c
Merge "Make the row based multi-threaded encoder deterministic"
[libvpx] / vp9 / encoder / vp9_speed_features.c
index a3efc1e6aaae688254c3f86484cc52349e445f59..17b0a5644bc84c94fd0c19984d9590595c574f80 100644 (file)
@@ -20,19 +20,14 @@ static MESH_PATTERN best_quality_mesh_pattern[MAX_MESH_STEP] = {
   { 64, 4 }, { 28, 2 }, { 15, 1 }, { 7, 1 }
 };
 
-#define MAX_MESH_SPEED 5  // Max speed setting for mesh motion method
+// Define 3 mesh density levels to control the number of searches.
+#define MESH_DENSITY_LEVELS 3
 static MESH_PATTERN
-    good_quality_mesh_patterns[MAX_MESH_SPEED + 1][MAX_MESH_STEP] = {
-      { { 64, 8 }, { 28, 4 }, { 15, 1 }, { 7, 1 } },
+    good_quality_mesh_patterns[MESH_DENSITY_LEVELS][MAX_MESH_STEP] = {
       { { 64, 8 }, { 28, 4 }, { 15, 1 }, { 7, 1 } },
       { { 64, 8 }, { 14, 2 }, { 7, 1 }, { 7, 1 } },
       { { 64, 16 }, { 24, 8 }, { 12, 4 }, { 7, 1 } },
-      { { 64, 16 }, { 24, 8 }, { 12, 4 }, { 7, 1 } },
-      { { 64, 16 }, { 24, 8 }, { 12, 4 }, { 7, 1 } },
     };
-static unsigned char good_quality_max_mesh_pct[MAX_MESH_SPEED + 1] = {
-  50, 25, 15, 5, 1, 1
-};
 
 // Intra only frames, golden frames (except alt ref overlays) and
 // alt ref frames tend to be coded at a higher than ambient quality
@@ -163,6 +158,7 @@ static void set_good_speed_feature_framesize_independent(VP9_COMP *cpi,
                                                          SPEED_FEATURES *sf,
                                                          int speed) {
   const int boosted = frame_is_boosted(cpi);
+  int i;
 
   sf->tx_size_search_breakout = 1;
   sf->adaptive_rd_thresh = 1;
@@ -171,6 +167,19 @@ static void set_good_speed_feature_framesize_independent(VP9_COMP *cpi,
   sf->use_square_partition_only = !frame_is_boosted(cpi);
   sf->use_square_only_threshold = BLOCK_16X16;
 
+  if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) {
+    sf->exhaustive_searches_thresh = (1 << 22);
+    for (i = 0; i < MAX_MESH_STEP; ++i) {
+      int mesh_density_level = 0;
+      sf->mesh_patterns[i].range =
+          good_quality_mesh_patterns[mesh_density_level][i].range;
+      sf->mesh_patterns[i].interval =
+          good_quality_mesh_patterns[mesh_density_level][i].interval;
+    }
+  } else {
+    sf->exhaustive_searches_thresh = INT_MAX;
+  }
+
   if (speed >= 1) {
     if (cpi->oxcf.pass == 2) {
       TWO_PASS *const twopass = &cpi->twopass;
@@ -208,6 +217,10 @@ static void set_good_speed_feature_framesize_independent(VP9_COMP *cpi,
 
     sf->recode_tolerance_low = 15;
     sf->recode_tolerance_high = 30;
+
+    sf->exhaustive_searches_thresh =
+        (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) ? (1 << 23)
+                                                                : INT_MAX;
   }
 
   if (speed >= 2) {
@@ -229,6 +242,16 @@ static void set_good_speed_feature_framesize_independent(VP9_COMP *cpi,
     sf->allow_partition_search_skip = 1;
     sf->recode_tolerance_low = 15;
     sf->recode_tolerance_high = 45;
+
+    if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) {
+      for (i = 0; i < MAX_MESH_STEP; ++i) {
+        int mesh_density_level = 1;
+        sf->mesh_patterns[i].range =
+            good_quality_mesh_patterns[mesh_density_level][i].range;
+        sf->mesh_patterns[i].interval =
+            good_quality_mesh_patterns[mesh_density_level][i].interval;
+      }
+    }
   }
 
   if (speed >= 3) {
@@ -247,6 +270,16 @@ static void set_good_speed_feature_framesize_independent(VP9_COMP *cpi,
     sf->intra_y_mode_mask[TX_32X32] = INTRA_DC;
     sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC;
     sf->adaptive_interp_filter_search = 1;
+
+    if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) {
+      for (i = 0; i < MAX_MESH_STEP; ++i) {
+        int mesh_density_level = 2;
+        sf->mesh_patterns[i].range =
+            good_quality_mesh_patterns[mesh_density_level][i].range;
+        sf->mesh_patterns[i].interval =
+            good_quality_mesh_patterns[mesh_density_level][i].interval;
+      }
+    }
   }
 
   if (speed >= 4) {
@@ -325,7 +358,6 @@ static void set_rt_speed_feature_framesize_independent(
   sf->adaptive_rd_thresh = 1;
   sf->adaptive_rd_thresh_row_mt = 0;
   sf->use_fast_coef_costing = 1;
-  sf->allow_exhaustive_searches = 0;
   sf->exhaustive_searches_thresh = INT_MAX;
   sf->allow_acl = 0;
   sf->copy_partition_flag = 0;
@@ -498,7 +530,15 @@ static void set_rt_speed_feature_framesize_independent(
       // Enable short circuit for low temporal variance.
       sf->short_circuit_low_temp_var = 1;
     }
-    if (cpi->use_svc) sf->base_mv_aggressive = 1;
+    if (cpi->svc.temporal_layer_id > 0) {
+      sf->adaptive_rd_thresh = 4;
+      sf->limit_newmv_early_exit = 0;
+      sf->mv.subpel_force_stop = (cpi->svc.temporal_layer_id == 1) ? 1 : 2;
+      sf->base_mv_aggressive =
+          (cpi->svc.temporal_layer_id == cpi->svc.number_temporal_layers - 1)
+              ? 1
+              : 0;
+    }
   }
 
   if (speed >= 7) {
@@ -512,12 +552,9 @@ static void set_rt_speed_feature_framesize_independent(
     }
     if (!cpi->external_resize) sf->use_source_sad = 1;
     if (sf->use_source_sad) {
-      // For SVC allocate for top layer.
-      if (cpi->content_state_sb == NULL &&
+      if (cpi->content_state_sb_fd == NULL &&
           (!cpi->use_svc ||
            cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1)) {
-        cpi->content_state_sb = (uint8_t *)vpx_calloc(
-            (cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1), sizeof(uint8_t));
         cpi->content_state_sb_fd = (uint8_t *)vpx_calloc(
             (cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1), sizeof(uint8_t));
       }
@@ -528,28 +565,9 @@ static void set_rt_speed_feature_framesize_independent(
     sf->adaptive_rd_thresh = 4;
     // Enable partition copy
     if (!cpi->use_svc && !cpi->resize_pending && cpi->resize_state == ORIG &&
-        !cpi->external_resize && cpi->oxcf.resize_mode == RESIZE_NONE)
+        !cpi->external_resize && cpi->oxcf.resize_mode == RESIZE_NONE) {
       sf->copy_partition_flag = 1;
-
-    if (sf->copy_partition_flag) {
       cpi->max_copied_frame = 4;
-      if (cpi->prev_partition == NULL) {
-        cpi->prev_partition = (BLOCK_SIZE *)vpx_calloc(
-            cm->mi_stride * cm->mi_rows, sizeof(BLOCK_SIZE));
-      }
-      if (cpi->prev_segment_id == NULL) {
-        cpi->prev_segment_id = (int8_t *)vpx_calloc(
-            (cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1), sizeof(int8_t));
-      }
-      if (cpi->prev_variance_low == NULL) {
-        cpi->prev_variance_low = (uint8_t *)vpx_calloc(
-            (cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1) * 25,
-            sizeof(uint8_t));
-      }
-      if (cpi->copied_frame_cnt == NULL) {
-        cpi->copied_frame_cnt = (uint8_t *)vpx_calloc(
-            (cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1), sizeof(uint8_t));
-      }
     }
 
     if (cpi->row_mt && cpi->oxcf.max_threads > 1)
@@ -577,10 +595,13 @@ static void set_rt_speed_feature_framesize_independent(
       }
       // Since the short_circuit_low_temp_var is used, reduce the
       // adaptive_rd_thresh level.
-      sf->adaptive_rd_thresh = 2;
+      if (cm->width > 320 && cm->height > 240)
+        sf->adaptive_rd_thresh = 1;
+      else
+        sf->adaptive_rd_thresh = 2;
     }
     sf->limit_newmv_early_exit = 0;
-    if (cm->width > 640 && cm->height > 480) sf->use_simple_block_yrd = 1;
+    if (cm->width > 320 && cm->height > 240) sf->use_simple_block_yrd = 1;
   }
   // Turn off adaptive_rd_thresh if row_mt is on for speed 5, 6, 7.
   if (speed >= 5 && speed < 8 && cpi->row_mt && cpi->num_workers > 1) {
@@ -626,11 +647,13 @@ void vp9_set_speed_features_framesize_dependent(VP9_COMP *cpi) {
   // With row based multi-threading, the following speed features
   // have to be disabled to guarantee that bitstreams encoded with single thread
   // and multiple threads match
-  if (cpi->oxcf.row_mt_bit_exact) {
-    sf->adaptive_rd_thresh = 0;
-    sf->allow_exhaustive_searches = 0;
-    sf->adaptive_pred_interp_filter = 0;
-  }
+  if (cpi->row_mt_bit_exact) sf->adaptive_rd_thresh = 0;
+
+  // This is only used in motion vector unit test.
+  if (cpi->oxcf.motion_vector_unit_test == 1)
+    cpi->find_fractional_mv_step = vp9_return_max_sub_pixel_mv;
+  else if (cpi->oxcf.motion_vector_unit_test == 2)
+    cpi->find_fractional_mv_step = vp9_return_min_sub_pixel_mv;
 }
 
 void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) {
@@ -724,6 +747,16 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) {
   sf->adaptive_rd_thresh = 1;
   sf->tx_size_search_breakout = 1;
 
+  sf->exhaustive_searches_thresh =
+      (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) ? (1 << 20)
+                                                              : INT_MAX;
+  if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) {
+    for (i = 0; i < MAX_MESH_STEP; ++i) {
+      sf->mesh_patterns[i].range = best_quality_mesh_pattern[i].range;
+      sf->mesh_patterns[i].interval = best_quality_mesh_pattern[i].interval;
+    }
+  }
+
   if (oxcf->mode == REALTIME)
     set_rt_speed_feature_framesize_independent(cpi, sf, oxcf->speed,
                                                oxcf->content);
@@ -733,34 +766,6 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) {
   cpi->full_search_sad = vp9_full_search_sad;
   cpi->diamond_search_sad = vp9_diamond_search_sad;
 
-  sf->allow_exhaustive_searches = 1;
-  if (oxcf->mode == BEST) {
-    if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION)
-      sf->exhaustive_searches_thresh = (1 << 20);
-    else
-      sf->exhaustive_searches_thresh = (1 << 21);
-    sf->max_exaustive_pct = 100;
-    for (i = 0; i < MAX_MESH_STEP; ++i) {
-      sf->mesh_patterns[i].range = best_quality_mesh_pattern[i].range;
-      sf->mesh_patterns[i].interval = best_quality_mesh_pattern[i].interval;
-    }
-  } else {
-    int speed = (oxcf->speed > MAX_MESH_SPEED) ? MAX_MESH_SPEED : oxcf->speed;
-    if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION)
-      sf->exhaustive_searches_thresh = (1 << 22);
-    else
-      sf->exhaustive_searches_thresh = (1 << 23);
-    sf->max_exaustive_pct = good_quality_max_mesh_pct[speed];
-    if (speed > 0)
-      sf->exhaustive_searches_thresh = sf->exhaustive_searches_thresh << 1;
-
-    for (i = 0; i < MAX_MESH_STEP; ++i) {
-      sf->mesh_patterns[i].range = good_quality_mesh_patterns[speed][i].range;
-      sf->mesh_patterns[i].interval =
-          good_quality_mesh_patterns[speed][i].interval;
-    }
-  }
-
   // Slow quant, dct and trellis not worthwhile for first pass
   // so make sure they are always turned off.
   if (oxcf->pass == 1) sf->optimize_coefficients = 0;
@@ -796,9 +801,11 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) {
   // With row based multi-threading, the following speed features
   // have to be disabled to guarantee that bitstreams encoded with single thread
   // and multiple threads match
-  if (cpi->oxcf.row_mt_bit_exact) {
-    sf->adaptive_rd_thresh = 0;
-    sf->allow_exhaustive_searches = 0;
-    sf->adaptive_pred_interp_filter = 0;
-  }
+  if (cpi->row_mt_bit_exact) sf->adaptive_rd_thresh = 0;
+
+  // This is only used in motion vector unit test.
+  if (cpi->oxcf.motion_vector_unit_test == 1)
+    cpi->find_fractional_mv_step = vp9_return_max_sub_pixel_mv;
+  else if (cpi->oxcf.motion_vector_unit_test == 2)
+    cpi->find_fractional_mv_step = vp9_return_min_sub_pixel_mv;
 }