Relocate tpl buffer allocation

author Jingning Han <jingning@google.com>

Tue, 18 Dec 2018 00:09:06 +0000 (16:09 -0800)

committer Jingning Han <jingning@google.com>

Tue, 18 Dec 2018 05:01:13 +0000 (21:01 -0800)
author Jingning Han <jingning@google.com>
Tue, 18 Dec 2018 00:09:06 +0000 (16:09 -0800)
committer Jingning Han <jingning@google.com>
Tue, 18 Dec 2018 05:01:13 +0000 (21:01 -0800)
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c

index 98343f0d2436912ec42451091aabb054707674f7..12bd776af8f7e967fab9796680fd32f034131665 100644 (file)
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -5734,7 +5734,7 @@ static void encode_frame_internal(VP9_COMP *cpi) {
      int64_t mc_dep_cost_base = 0;
      int row, col;
  
-    for (row = 0; row < cm->mi_rows; ++row) {
+    for (row = 0; row < cm->mi_rows && tpl_frame->is_valid; ++row) {
        for (col = 0; col < cm->mi_cols; ++col) {
          TplDepStats *this_stats = &tpl_stats[row * tpl_stride + col];
          intra_cost_base += this_stats->intra_cost;
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c

index df4223a235dd18c835b59ba9f4416833bea0439c..f2d8504c599ea23a664768065dde6d2a69a2f85b 100644 (file)
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -2110,7 +2110,7 @@ static void cal_nmvsadcosts_hp(int *mvsadcost[2]) {
  
  VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
                                  BufferPool *const pool) {
-  unsigned int i, frame;
+  unsigned int i;
    VP9_COMP *volatile const cpi = vpx_memalign(32, sizeof(VP9_COMP));
    VP9_COMMON *volatile const cm = cpi != NULL ? &cpi->common : NULL;
  
@@ -2361,51 +2361,10 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
    vp9_set_speed_features_framesize_independent(cpi);
    vp9_set_speed_features_framesize_dependent(cpi);
  
-  if (cpi->sf.enable_tpl_model) {
-    const int mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
-    const int mi_rows = mi_cols_aligned_to_sb(cm->mi_rows);
-#if CONFIG_NON_GREEDY_MV
-    CHECK_MEM_ERROR(
-        cm, cpi->feature_score_loc_arr,
-        vpx_calloc(mi_rows * mi_cols, sizeof(*cpi->feature_score_loc_arr)));
-    CHECK_MEM_ERROR(
-        cm, cpi->feature_score_loc_sort,
-        vpx_calloc(mi_rows * mi_cols, sizeof(*cpi->feature_score_loc_sort)));
-    CHECK_MEM_ERROR(
-        cm, cpi->feature_score_loc_heap,
-        vpx_calloc(mi_rows * mi_cols, sizeof(*cpi->feature_score_loc_heap)));
-#endif
-    // TODO(jingning): Reduce the actual memory use for tpl model build up.
-    for (frame = 0; frame < MAX_ARF_GOP_SIZE; ++frame) {
  #if CONFIG_NON_GREEDY_MV
-      int sqr_bsize;
-      int rf_idx;
-      for (rf_idx = 0; rf_idx < 3; ++rf_idx) {
-        for (sqr_bsize = 0; sqr_bsize < SQUARE_BLOCK_SIZES; ++sqr_bsize) {
-          CHECK_MEM_ERROR(
-              cm, cpi->tpl_stats[frame].pyramid_mv_arr[rf_idx][sqr_bsize],
-              vpx_calloc(mi_rows * mi_cols,
-                         sizeof(*cpi->tpl_stats[frame]
-                                     .pyramid_mv_arr[rf_idx][sqr_bsize])));
-        }
-      }
-#endif
-      CHECK_MEM_ERROR(cm, cpi->tpl_stats[frame].tpl_stats_ptr,
-                      vpx_calloc(mi_rows * mi_cols,
-                                 sizeof(*cpi->tpl_stats[frame].tpl_stats_ptr)));
-      cpi->tpl_stats[frame].is_valid = 0;
-      cpi->tpl_stats[frame].width = mi_cols;
-      cpi->tpl_stats[frame].height = mi_rows;
-      cpi->tpl_stats[frame].stride = mi_cols;
-      cpi->tpl_stats[frame].mi_rows = cm->mi_rows;
-      cpi->tpl_stats[frame].mi_cols = cm->mi_cols;
-    }
-
-    for (frame = 0; frame < REF_FRAMES; ++frame) {
-      cpi->enc_frame_buf[frame].mem_valid = 0;
-      cpi->enc_frame_buf[frame].released = 1;
-    }
-  }
+  cpi->feature_score_loc_alloc = 0;
+#endif  // CONFIG_NON_GREEDY_MV
+  for (i = 0; i < MAX_ARF_GOP_SIZE; ++i) cpi->tpl_stats[i].tpl_stats_ptr = NULL;
  
    // Allocate memory to store variances for a frame.
    CHECK_MEM_ERROR(cm, cpi->source_diff_var, vpx_calloc(cm->MBs, sizeof(diff)));
@@ -6434,6 +6393,71 @@ static void dump_tpl_stats(const VP9_COMP *cpi, int tpl_group_frames,
  #endif  // DUMP_TPL_STATS
  #endif  // CONFIG_NON_GREEDY_MV
  
+static void init_tpl_buffer(VP9_COMP *cpi) {
+  VP9_COMMON *cm = &cpi->common;
+  int frame;
+
+  const int mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
+  const int mi_rows = mi_cols_aligned_to_sb(cm->mi_rows);
+#if CONFIG_NON_GREEDY_MV
+  int sqr_bsize;
+  int rf_idx;
+
+  // TODO(angiebird): This probably needs further modifications to support
+  // frame scaling later on.
+  if (cpi->feature_score_loc_alloc == 0) {
+    CHECK_MEM_ERROR(
+        cm, cpi->feature_score_loc_arr,
+        vpx_calloc(mi_rows * mi_cols, sizeof(*cpi->feature_score_loc_arr)));
+    CHECK_MEM_ERROR(
+        cm, cpi->feature_score_loc_sort,
+        vpx_calloc(mi_rows * mi_cols, sizeof(*cpi->feature_score_loc_sort)));
+    CHECK_MEM_ERROR(
+        cm, cpi->feature_score_loc_heap,
+        vpx_calloc(mi_rows * mi_cols, sizeof(*cpi->feature_score_loc_heap)));
+
+    cpi->feature_score_loc_alloc = 1;
+  }
+#endif
+
+  // TODO(jingning): Reduce the actual memory use for tpl model build up.
+  for (frame = 0; frame < MAX_ARF_GOP_SIZE; ++frame) {
+    if (cpi->tpl_stats[frame].width >= mi_cols &&
+        cpi->tpl_stats[frame].height >= mi_rows &&
+        cpi->tpl_stats[frame].tpl_stats_ptr)
+      continue;
+
+#if CONFIG_NON_GREEDY_MV
+    vpx_free(cpi->tpl_stats[frame].pyramid_mv_arr);
+    for (rf_idx = 0; rf_idx < 3; ++rf_idx) {
+      for (sqr_bsize = 0; sqr_bsize < SQUARE_BLOCK_SIZES; ++sqr_bsize) {
+        CHECK_MEM_ERROR(
+            cm, cpi->tpl_stats[frame].pyramid_mv_arr[rf_idx][sqr_bsize],
+            vpx_calloc(
+                mi_rows * mi_cols,
+                sizeof(
+                    *cpi->tpl_stats[frame].pyramid_mv_arr[rf_idx][sqr_bsize])));
+      }
+    }
+#endif
+    vpx_free(cpi->tpl_stats[frame].tpl_stats_ptr);
+    CHECK_MEM_ERROR(cm, cpi->tpl_stats[frame].tpl_stats_ptr,
+                    vpx_calloc(mi_rows * mi_cols,
+                               sizeof(*cpi->tpl_stats[frame].tpl_stats_ptr)));
+    cpi->tpl_stats[frame].is_valid = 0;
+    cpi->tpl_stats[frame].width = mi_cols;
+    cpi->tpl_stats[frame].height = mi_rows;
+    cpi->tpl_stats[frame].stride = mi_cols;
+    cpi->tpl_stats[frame].mi_rows = cm->mi_rows;
+    cpi->tpl_stats[frame].mi_cols = cm->mi_cols;
+  }
+
+  for (frame = 0; frame < REF_FRAMES; ++frame) {
+    cpi->enc_frame_buf[frame].mem_valid = 0;
+    cpi->enc_frame_buf[frame].released = 1;
+  }
+}
+
  static void setup_tpl_stats(VP9_COMP *cpi) {
    GF_PICTURE gf_picture[MAX_ARF_GOP_SIZE];
    const GF_GROUP *gf_group = &cpi->twopass.gf_group;
@@ -6671,6 +6695,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
    if (gf_group_index == 1 &&
        cpi->twopass.gf_group.update_type[gf_group_index] == ARF_UPDATE &&
        cpi->sf.enable_tpl_model) {
+    init_tpl_buffer(cpi);
      vp9_estimate_qp_gop(cpi);
      setup_tpl_stats(cpi);
    }
diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h

index 5974750cf565256cca297da007156a24ca142425..9b79fcdce1c5440f8bc8c58684d8c61780ad7767 100644 (file)
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -569,6 +569,7 @@ typedef struct VP9_COMP {
    YV12_BUFFER_CONFIG *tpl_recon_frames[REF_FRAMES];
    EncFrameBuf enc_frame_buf[REF_FRAMES];
  #if CONFIG_NON_GREEDY_MV
+  int feature_score_loc_alloc;
    FEATURE_SCORE_LOC *feature_score_loc_arr;
    FEATURE_SCORE_LOC **feature_score_loc_sort;
    FEATURE_SCORE_LOC **feature_score_loc_heap;
author	Jingning Han <jingning@google.com>
	Tue, 18 Dec 2018 00:09:06 +0000 (16:09 -0800)
committer	Jingning Han <jingning@google.com>
	Tue, 18 Dec 2018 05:01:13 +0000 (21:01 -0800)
vp9/encoder/vp9_encodeframe.c		patch \| blob \| history
vp9/encoder/vp9_encoder.c		patch \| blob \| history
vp9/encoder/vp9_encoder.h		patch \| blob \| history