Fix decoder resolution change with tiles

author Frank Galligan <fgalligan@google.com>

Mon, 7 Apr 2014 03:07:14 +0000 (20:07 -0700)

committer Frank Galligan <fgalligan@google.com>

Tue, 8 Apr 2014 22:16:11 +0000 (15:16 -0700)
author Frank Galligan <fgalligan@google.com>
Mon, 7 Apr 2014 03:07:14 +0000 (20:07 -0700)
committer Frank Galligan <fgalligan@google.com>
Tue, 8 Apr 2014 22:16:11 +0000 (15:16 -0700)
diff --git a/test/test-data.sha1 b/test/test-data.sha1

index b8f668a78f79dca8fafdb3d67b51059016dda3df..18f5b88598584d9b97bf5045a49ddcb19aaf2613 100644 (file)
--- a/test/test-data.sha1
+++ b/test/test-data.sha1
@@ -591,3 +591,7 @@ c21e97e4ba486520118d78b01a5cb6e6dc33e190  vp90-2-12-droppable_3.ivf
  601abc9e4176c70f82ac0381365e9b151fdd24cd  vp90-2-12-droppable_3.ivf.md5
  61c640dad23cd4f7ad811b867e7b7e3521f4e3ba  vp90-2-13-largescaling.webm
  bca1b02eebdb088fa3f389fe0e7571e75a71f523  vp90-2-13-largescaling.webm.md5
+248e799b418cc4a20db149f0ca085583e772643c  vp90-2-14-resize-1280x720-848x480.webm
+80b82616efc12593d796b7b1d5cee06c48a0a11e  vp90-2-14-resize-1280x720-848x480.webm.md5
+df93c49c951c460dfebd06af1e81ca11de7e3f51  vp90-2-14-resize-848x480-1280x720.webm
+b1faac0b794a50efa811253d255940c4b1c3b885  vp90-2-14-resize-848x480-1280x720.webm.md5
diff --git a/test/test.mk b/test/test.mk

index 4d96bc69db3c2dc43a1a0bd2f3a8001662f135c9..0cf2b3655a91c96c6169b8fc5237c0bdf9f21e64 100644 (file)
--- a/test/test.mk
+++ b/test/test.mk
@@ -698,6 +698,10 @@ LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-13-largescaling.webm
  LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-13-largescaling.webm.md5
  LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yv444.webm
  LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yv444.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-1280x720-848x480.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-1280x720-848x480.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-848x480-1280x720.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-848x480-1280x720.webm.md5
  
  ifeq ($(CONFIG_DECODE_PERF_TESTS),yes)
  # BBB VP9 streams
diff --git a/test/test_vectors.cc b/test/test_vectors.cc

index 8c789ffe7a4add966b13b1e63f1fa455c3e2381b..a718f3d4134a8767b0b9bf550dfec29024a3b7d3 100644 (file)
--- a/test/test_vectors.cc
+++ b/test/test_vectors.cc
@@ -164,7 +164,9 @@ const char *const kVP9TestVectors[] = {
    "vp90-2-11-size-351x287.webm", "vp90-2-11-size-351x288.webm",
    "vp90-2-11-size-352x287.webm", "vp90-2-12-droppable_1.ivf",
    "vp90-2-12-droppable_2.ivf", "vp90-2-12-droppable_3.ivf",
-  "vp90-2-13-largescaling.webm", "vp91-2-04-yv444.webm"
+  "vp90-2-13-largescaling.webm", "vp91-2-04-yv444.webm",
+  "vp90-2-14-resize-1280x720-848x480.webm",
+  "vp90-2-14-resize-848x480-1280x720.webm"
  };
  const int kNumVP9TestVectors = NELEMENTS(kVP9TestVectors);
  #endif  // CONFIG_VP9_DECODER
diff --git a/test/vp9_thread_test.cc b/test/vp9_thread_test.cc

index a78cdea6b1284f7908513c281cd2cf9028d19c22..731c236e30ac6efe387baef7294be969fa402c2d 100644 (file)
--- a/test/vp9_thread_test.cc
+++ b/test/vp9_thread_test.cc
@@ -153,6 +153,26 @@ TEST(VP9DecodeMTTest, MTDecode2) {
    }
  }
  
+// Test tile quantity changes within one file.
+TEST(VP9DecodeMTTest, MTDecode3) {
+  static const struct {
+    const char *name;
+    const char *expected_md5;
+  } files[] = {
+    { "vp90-2-14-resize-1280x720-848x480.webm",
+      "1fe742b0c5d9ed92314b7bc68bbea153" },
+    { "vp90-2-14-resize-848x480-1280x720.webm",
+      "0f163f491617f0ebcb80f1033eeb5d40" },
+  };
+
+  for (int i = 0; i < static_cast<int>(sizeof(files) / sizeof(files[0])); ++i) {
+    for (int t = 2; t <= 8; ++t) {
+      EXPECT_STREQ(files[i].expected_md5, DecodeFile(files[i].name, t).c_str())
+          << "threads = " << t;
+    }
+  }
+}
+
  INSTANTIATE_TEST_CASE_P(Synchronous, VP9WorkerThreadTest, ::testing::Bool());
  
  }  // namespace
diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c

index 5a2e6f8811afce4b2d91383943a1e493367d4a90..08b8522cc9239e02a21edd6071dd91727003a913 100644 (file)
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -882,12 +882,16 @@ static const uint8_t *decode_tiles_mt(VP9D_COMP *pbi,
    assert(tile_rows == 1);
    (void)tile_rows;
  
-  if (num_workers > pbi->num_tile_workers) {
+  // TODO(jzen): See if we can remove the restriction of passing in max
+  // threads to the decoder.
+  if (pbi->num_tile_workers == 0) {
+    const int num_threads = pbi->oxcf.max_threads & ~1;
      int i;
+    // TODO(jzern): Allocate one less worker, as in the current code we only
+    // use num_threads - 1 workers.
      CHECK_MEM_ERROR(cm, pbi->tile_workers,
-                    vpx_realloc(pbi->tile_workers,
-                                num_workers * sizeof(*pbi->tile_workers)));
-    for (i = pbi->num_tile_workers; i < num_workers; ++i) {
+                    vpx_malloc(num_threads * sizeof(*pbi->tile_workers)));
+    for (i = 0; i < num_threads; ++i) {
        VP9Worker *const worker = &pbi->tile_workers[i];
        ++pbi->num_tile_workers;
  
@@ -895,7 +899,7 @@ static const uint8_t *decode_tiles_mt(VP9D_COMP *pbi,
        CHECK_MEM_ERROR(cm, worker->data1,
                        vpx_memalign(32, sizeof(TileWorkerData)));
        CHECK_MEM_ERROR(cm, worker->data2, vpx_malloc(sizeof(TileInfo)));
-      if (i < num_workers - 1 && !vp9_worker_reset(worker)) {
+      if (i < num_threads - 1 && !vp9_worker_reset(worker)) {
          vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
                             "Tile decoder thread creation failed");
        }
@@ -903,7 +907,7 @@ static const uint8_t *decode_tiles_mt(VP9D_COMP *pbi,
    }
  
    // Reset tile decoding hook
-  for (n = 0; n < pbi->num_tile_workers; ++n) {
+  for (n = 0; n < num_workers; ++n) {
      pbi->tile_workers[n].hook = (VP9WorkerHook)tile_worker_hook;
    }
  
diff --git a/vp9/decoder/vp9_dthread.c b/vp9/decoder/vp9_dthread.c

index 1639360217fbbe5ecceb038cca8e24187264e760..acbe878b33c85cd05ce807465940b64df5c3831e 100644 (file)
--- a/vp9/decoder/vp9_dthread.c
+++ b/vp9/decoder/vp9_dthread.c
@@ -139,6 +139,8 @@ void vp9_loop_filter_frame_mt(VP9D_COMP *pbi,
                                int y_only, int partial_frame) {
    // Number of superblock rows and cols
    const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;
+  const int tile_cols = 1 << cm->log2_tile_cols;
+  const int num_workers = MIN(pbi->oxcf.max_threads & ~1, tile_cols);
    int i;
  
    // Allocate memory used in thread synchronization.
@@ -168,7 +170,16 @@ void vp9_loop_filter_frame_mt(VP9D_COMP *pbi,
               sizeof(*pbi->lf_row_sync.cur_sb_col) * sb_rows);
  
    // Set up loopfilter thread data.
-  for (i = 0; i < pbi->num_tile_workers; ++i) {
+  // The decoder is using num_workers instead of pbi->num_tile_workers
+  // because it has been observed that using more threads on the
+  // loopfilter, than there are tile columns in the frame will hurt
+  // performance on Android. This is because the system will only
+  // schedule the tile decode workers on cores equal to the number
+  // of tile columns. Then if the decoder tries to use more threads for the
+  // loopfilter, it will hurt performance because of contention. If the
+  // multithreading code changes in the future then the number of workers
+  // used by the loopfilter should be revisited.
+  for (i = 0; i < num_workers; ++i) {
      VP9Worker *const worker = &pbi->tile_workers[i];
      TileWorkerData *const tile_data = (TileWorkerData*)worker->data1;
      LFWorkerData *const lf_data = &tile_data->lfdata;
@@ -184,10 +195,10 @@ void vp9_loop_filter_frame_mt(VP9D_COMP *pbi,
      lf_data->y_only = y_only;   // always do all planes in decoder
  
      lf_data->lf_sync = &pbi->lf_row_sync;
-    lf_data->num_lf_workers = pbi->num_tile_workers;
+    lf_data->num_lf_workers = num_workers;
  
      // Start loopfiltering
-    if (i == pbi->num_tile_workers - 1) {
+    if (i == num_workers - 1) {
        vp9_worker_execute(worker);
      } else {
        vp9_worker_launch(worker);
@@ -195,7 +206,7 @@ void vp9_loop_filter_frame_mt(VP9D_COMP *pbi,
    }
  
    // Wait till all rows are finished
-  for (i = 0; i < pbi->num_tile_workers; ++i) {
+  for (i = 0; i < num_workers; ++i) {
      vp9_worker_sync(&pbi->tile_workers[i]);
    }
  }
author	Frank Galligan <fgalligan@google.com>
	Mon, 7 Apr 2014 03:07:14 +0000 (20:07 -0700)
committer	Frank Galligan <fgalligan@google.com>
	Tue, 8 Apr 2014 22:16:11 +0000 (15:16 -0700)
test/test-data.sha1		patch \| blob \| history
test/test.mk		patch \| blob \| history
test/test_vectors.cc		patch \| blob \| history
test/vp9_thread_test.cc		patch \| blob \| history
vp9/decoder/vp9_decodeframe.c		patch \| blob \| history
vp9/decoder/vp9_dthread.c		patch \| blob \| history