vpx_dsp_common: add VPX prefix to MIN/MAX

[libvpx] / vp9 / common / vp9_thread_common.c
diff --git a/vp9/common/vp9_thread_common.c b/vp9/common/vp9_thread_common.c

index 27a3212ca6da4e8af057b027d7ff2600045c450e..2e6285a42fe4943c1932ebeb3c3eaa22f62d36d3 100644 (file)
--- a/vp9/common/vp9_thread_common.c
+++ b/vp9/common/vp9_thread_common.c
@@ -13,6 +13,7 @@
  #include "vp9/common/vp9_entropymode.h"
  #include "vp9/common/vp9_thread_common.h"
  #include "vp9/common/vp9_reconinter.h"
+#include "vp9/common/vp9_loopfilter.h"
  
  #if CONFIG_MULTITHREAD
  static INLINE void mutex_lock(pthread_mutex_t *const mutex) {
@@ -92,14 +93,21 @@ void thread_loop_filter_rows(const YV12_BUFFER_CONFIG *const frame_buffer,
                               int start, int stop, int y_only,
                               VP9LfSync *const lf_sync) {
    const int num_planes = y_only ? 1 : MAX_MB_PLANE;
-  const int use_420 = y_only || (planes[1].subsampling_y == 1 &&
-                                 planes[1].subsampling_x == 1);
    const int sb_cols = mi_cols_aligned_to_sb(cm->mi_cols) >> MI_BLOCK_SIZE_LOG2;
    int mi_row, mi_col;
+  enum lf_path path;
+  if (y_only)
+    path = LF_PATH_444;
+  else if (planes[1].subsampling_y == 1 && planes[1].subsampling_x == 1)
+    path = LF_PATH_420;
+  else if (planes[1].subsampling_y == 0 && planes[1].subsampling_x == 0)
+    path = LF_PATH_444;
+  else
+    path = LF_PATH_SLOW;
  
    for (mi_row = start; mi_row < stop;
         mi_row += lf_sync->num_workers * MI_BLOCK_SIZE) {
-    MODE_INFO *const mi = cm->mi + mi_row * cm->mi_stride;
+    MODE_INFO **const mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
  
      for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) {
        const int r = mi_row >> MI_BLOCK_SIZE_LOG2;
@@ -112,16 +120,23 @@ void thread_loop_filter_rows(const YV12_BUFFER_CONFIG *const frame_buffer,
        vp9_setup_dst_planes(planes, frame_buffer, mi_row, mi_col);
  
        // TODO(JBB): Make setup_mask work for non 420.
-      if (use_420)
-        vp9_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride,
-                       &lfm);
-
-      for (plane = 0; plane < num_planes; ++plane) {
-        if (use_420)
-          vp9_filter_block_plane(cm, &planes[plane], mi_row, &lfm);
-        else
-          vp9_filter_block_plane_non420(cm, &planes[plane], mi + mi_col,
-                                        mi_row, mi_col);
+      vp9_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride,
+                     &lfm);
+
+      vp9_filter_block_plane_ss00(cm, &planes[0], mi_row, &lfm);
+      for (plane = 1; plane < num_planes; ++plane) {
+        switch (path) {
+          case LF_PATH_420:
+            vp9_filter_block_plane_ss11(cm, &planes[plane], mi_row, &lfm);
+            break;
+          case LF_PATH_444:
+            vp9_filter_block_plane_ss00(cm, &planes[plane], mi_row, &lfm);
+            break;
+          case LF_PATH_SLOW:
+            vp9_filter_block_plane_non420(cm, &planes[plane], mi + mi_col,
+                                          mi_row, mi_col);
+            break;
+        }
        }
  
        sync_write(lf_sync, r, c, sb_cols);
@@ -142,15 +157,15 @@ static void loop_filter_rows_mt(YV12_BUFFER_CONFIG *frame,
                                  VP9_COMMON *cm,
                                  struct macroblockd_plane planes[MAX_MB_PLANE],
                                  int start, int stop, int y_only,
-                                VP9Worker *workers, int nworkers,
+                                VPxWorker *workers, int nworkers,
                                  VP9LfSync *lf_sync) {
-  const VP9WorkerInterface *const winterface = vp9_get_worker_interface();
+  const VPxWorkerInterface *const winterface = vpx_get_worker_interface();
    // Number of superblock rows and cols
    const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;
    // Decoder may allocate more threads than number of tiles based on user's
    // input.
    const int tile_cols = 1 << cm->log2_tile_cols;
-  const int num_workers = MIN(nworkers, tile_cols);
+  const int num_workers = VPXMIN(nworkers, tile_cols);
    int i;
  
    if (!lf_sync->sync_range || sb_rows != lf_sync->rows ||
@@ -160,7 +175,7 @@ static void loop_filter_rows_mt(YV12_BUFFER_CONFIG *frame,
    }
  
    // Initialize cur_sb_col to -1 for all SB rows.
-  vpx_memset(lf_sync->cur_sb_col, -1, sizeof(*lf_sync->cur_sb_col) * sb_rows);
+  memset(lf_sync->cur_sb_col, -1, sizeof(*lf_sync->cur_sb_col) * sb_rows);
  
    // Set up loopfilter thread data.
    // The decoder is capping num_workers because it has been observed that using
@@ -171,10 +186,10 @@ static void loop_filter_rows_mt(YV12_BUFFER_CONFIG *frame,
    // because of contention. If the multithreading code changes in the future
    // then the number of workers used by the loopfilter should be revisited.
    for (i = 0; i < num_workers; ++i) {
-    VP9Worker *const worker = &workers[i];
+    VPxWorker *const worker = &workers[i];
      LFWorkerData *const lf_data = &lf_sync->lfdata[i];
  
-    worker->hook = (VP9WorkerHook)loop_filter_row_worker;
+    worker->hook = (VPxWorkerHook)loop_filter_row_worker;
      worker->data1 = lf_sync;
      worker->data2 = lf_data;
  
@@ -203,7 +218,7 @@ void vp9_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame,
                                struct macroblockd_plane planes[MAX_MB_PLANE],
                                int frame_filter_level,
                                int y_only, int partial_frame,
-                              VP9Worker *workers, int num_workers,
+                              VPxWorker *workers, int num_workers,
                                VP9LfSync *lf_sync) {
    int start_mi_row, end_mi_row, mi_rows_to_filter;
  
@@ -214,7 +229,7 @@ void vp9_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame,
    if (partial_frame && cm->mi_rows > 8) {
      start_mi_row = cm->mi_rows >> 1;
      start_mi_row &= 0xfffffff8;
-    mi_rows_to_filter = MAX(cm->mi_rows / 8, 8);
+    mi_rows_to_filter = VPXMAX(cm->mi_rows / 8, 8);
    }
    end_mi_row = start_mi_row + mi_rows_to_filter;
    vp9_loop_filter_frame_init(cm, frame_filter_level);
@@ -382,6 +397,9 @@ void vp9_accumulate_frame_counts(VP9_COMMON *cm, FRAME_COUNTS *counts,
        cm->counts.tx.p8x8[i][j] += counts->tx.p8x8[i][j];
    }
  
+  for (i = 0; i < TX_SIZES; i++)
+    cm->counts.tx.tx_totals[i] += counts->tx.tx_totals[i];
+
    for (i = 0; i < SKIP_CONTEXTS; i++)
      for (j = 0; j < 2; j++)
        cm->counts.skip[i][j] += counts->skip[i][j];