]> granicus.if.org Git - libvpx/commitdiff
Rewrite filter_selectively_horiz for parallel loopfiltering
authorYunqing Wang <yunqingwang@google.com>
Sat, 9 Nov 2013 00:12:04 +0000 (16:12 -0800)
committerYunqing Wang <yunqingwang@google.com>
Tue, 12 Nov 2013 01:06:01 +0000 (17:06 -0800)
Added loop filter mask checking, and made the caller function
ready for implementation of parallel loopfiltering in horizontal
direction.

Next, we need to go through the loopfilter functions (both c and
optimized versions), and provide 16-byte wide loopfiltering for
each filter type.

Change-Id: Ifef47e7ef9086ebc2fd6ca7ede8f27c9bbf79e66

vp9/common/vp9_loopfilter.c

index 408fb552eaa950a1f9394c206bad4d7834597dcf..62a890e8c225c8f5d5a4299ade819206abc1d08e 100644 (file)
@@ -383,7 +383,6 @@ static void filter_selectively_horiz(uint8_t *s, int pitch,
                                      unsigned int mask_8x8,
                                      unsigned int mask_4x4,
                                      unsigned int mask_4x4_int,
-                                     int only_4x4_1,
                                      const loop_filter_info_n *lfi_n,
                                      const uint8_t *lfl) {
   unsigned int mask;
@@ -392,37 +391,101 @@ static void filter_selectively_horiz(uint8_t *s, int pitch,
   for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int;
        mask; mask >>= count) {
     const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl;
+
     count = 1;
     if (mask & 1) {
-      if (!only_4x4_1) {
-        if (mask_16x16 & 1) {
-          if ((mask_16x16 & 3) == 3) {
-            vp9_mb_lpf_horizontal_edge_w(s, pitch, lfi->mblim, lfi->lim,
-                                         lfi->hev_thr, 2);
-            count = 2;
+      if (mask_16x16 & 1) {
+        if ((mask_16x16 & 3) == 3) {
+          vp9_mb_lpf_horizontal_edge_w(s, pitch, lfi->mblim, lfi->lim,
+                                       lfi->hev_thr, 2);
+          count = 2;
+        } else {
+          vp9_mb_lpf_horizontal_edge_w(s, pitch, lfi->mblim, lfi->lim,
+                                       lfi->hev_thr, 1);
+        }
+        assert(!(mask_8x8 & 1));
+        assert(!(mask_4x4 & 1));
+        assert(!(mask_4x4_int & 1));
+      } else if (mask_8x8 & 1) {
+        if ((mask_8x8 & 3) == 3) {
+          // Next block's thresholds
+          const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
+
+          // TODO(yunqingwang): Combine next 2 calls as 1 wide filtering.
+          vp9_mbloop_filter_horizontal_edge(s, pitch, lfi->mblim, lfi->lim,
+                                            lfi->hev_thr, 1);
+          vp9_mbloop_filter_horizontal_edge(s + 8, pitch, lfin->mblim,
+                                            lfin->lim, lfin->hev_thr, 1);
+
+          if ((mask_4x4_int & 3) == 3) {
+            // TODO(yunqingwang): Combine next 2 calls as 1 wide filtering.
+            vp9_loop_filter_horizontal_edge(s + 4 * pitch, pitch, lfi->mblim,
+                                            lfi->lim, lfi->hev_thr, 1);
+            vp9_loop_filter_horizontal_edge(s + 8 + 4 * pitch, pitch,
+                                            lfin->mblim, lfin->lim,
+                                            lfin->hev_thr, 1);
           } else {
-            vp9_mb_lpf_horizontal_edge_w(s, pitch, lfi->mblim, lfi->lim,
-                                         lfi->hev_thr, 1);
+            if (mask_4x4_int & 1)
+              vp9_loop_filter_horizontal_edge(s + 4 * pitch, pitch, lfi->mblim,
+                                              lfi->lim, lfi->hev_thr, 1);
+            else if (mask_4x4_int & 2)
+              vp9_loop_filter_horizontal_edge(s + 8 + 4 * pitch, pitch,
+                                              lfin->mblim, lfin->lim,
+                                              lfin->hev_thr, 1);
           }
-          assert(!(mask_8x8 & 1));
-          assert(!(mask_4x4 & 1));
-          assert(!(mask_4x4_int & 1));
-        } else if (mask_8x8 & 1) {
+          count = 2;
+        } else {
           vp9_mbloop_filter_horizontal_edge(s, pitch, lfi->mblim, lfi->lim,
                                             lfi->hev_thr, 1);
-          assert(!(mask_16x16 & 1));
-          assert(!(mask_4x4 & 1));
-        } else if (mask_4x4 & 1) {
-          vp9_loop_filter_horizontal_edge(s, pitch, lfi->mblim, lfi->lim,
-                                          lfi->hev_thr, 1);
-          assert(!(mask_16x16 & 1));
-          assert(!(mask_8x8 & 1));
+
+          if (mask_4x4_int & 1)
+            vp9_loop_filter_horizontal_edge(s + 4 * pitch, pitch, lfi->mblim,
+                                            lfi->lim, lfi->hev_thr, 1);
         }
-      }
+        assert(!(mask_16x16 & 1));
+        assert(!(mask_4x4 & 1));
+      } else if (mask_4x4 & 1) {
+        if ((mask_4x4 & 3) == 3) {
+          // Next block's thresholds
+          const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
 
-      if (mask_4x4_int & 1)
+          // TODO(yunqingwang): Combine next 2 calls as 1 wide filtering.
+          vp9_loop_filter_horizontal_edge(s, pitch, lfi->mblim, lfi->lim,
+                                            lfi->hev_thr, 1);
+          vp9_loop_filter_horizontal_edge(s + 8, pitch, lfin->mblim, lfin->lim,
+                                            lfin->hev_thr, 1);
+
+          if ((mask_4x4_int & 3) == 3) {
+            // TODO(yunqingwang): Combine next 2 calls as 1 wide filtering.
+            vp9_loop_filter_horizontal_edge(s + 4 * pitch, pitch, lfi->mblim,
+                                            lfi->lim, lfi->hev_thr, 1);
+            vp9_loop_filter_horizontal_edge(s + 8 + 4 * pitch, pitch,
+                                            lfin->mblim, lfin->lim,
+                                            lfin->hev_thr, 1);
+          } else {
+            if (mask_4x4_int & 1)
+              vp9_loop_filter_horizontal_edge(s + 4 * pitch, pitch, lfi->mblim,
+                                              lfi->lim, lfi->hev_thr, 1);
+            else if (mask_4x4_int & 2)
+              vp9_loop_filter_horizontal_edge(s + 8 + 4 * pitch, pitch,
+                                              lfin->mblim, lfin->lim,
+                                              lfin->hev_thr, 1);
+          }
+          count = 2;
+        } else {
+        vp9_loop_filter_horizontal_edge(s, pitch, lfi->mblim, lfi->lim,
+                                        lfi->hev_thr, 1);
+
+        if (mask_4x4_int & 1)
+          vp9_loop_filter_horizontal_edge(s + 4 * pitch, pitch, lfi->mblim,
+                                          lfi->lim, lfi->hev_thr, 1);
+        }
+        assert(!(mask_16x16 & 1));
+        assert(!(mask_8x8 & 1));
+      } else if (mask_4x4_int & 1) {
         vp9_loop_filter_horizontal_edge(s + 4 * pitch, pitch, lfi->mblim,
                                         lfi->lim, lfi->hev_thr, 1);
+      }
     }
     s += 8 * count;
     lfl += count;
@@ -913,11 +976,25 @@ static void filter_block_plane_non420(VP9_COMMON *cm,
     const int skip_border_4x4_r = ss_y && mi_row + r == cm->mi_rows - 1;
     const unsigned int mask_4x4_int_r = skip_border_4x4_r ? 0 : mask_4x4_int[r];
 
+    unsigned int mask_16x16_r;
+    unsigned int mask_8x8_r;
+    unsigned int mask_4x4_r;
+
+    if (mi_row + r == 0) {
+      mask_16x16_r = 0;
+      mask_8x8_r = 0;
+      mask_4x4_r = 0;
+    } else {
+      mask_16x16_r = mask_16x16[r];
+      mask_8x8_r = mask_8x8[r];
+      mask_4x4_r = mask_4x4[r];
+    }
+
     filter_selectively_horiz(dst->buf, dst->stride,
-                             mask_16x16[r],
-                             mask_8x8[r],
-                             mask_4x4[r],
-                             mask_4x4_int_r, mi_row + r == 0,
+                             mask_16x16_r,
+                             mask_8x8_r,
+                             mask_4x4_r,
+                             mask_4x4_int_r,
                              &cm->lf_info, lfl[r]);
     dst->buf += 8 * dst->stride;
   }
@@ -969,12 +1046,25 @@ static void filter_block_plane(VP9_COMMON *const cm,
     mask_4x4 = lfm->above_y[TX_4X4];
 
     for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r++) {
+      unsigned int mask_16x16_r;
+      unsigned int mask_8x8_r;
+      unsigned int mask_4x4_r;
+
+      if (mi_row + r == 0) {
+        mask_16x16_r = 0;
+        mask_8x8_r = 0;
+        mask_4x4_r = 0;
+      } else {
+        mask_16x16_r = mask_16x16 & 0xff;
+        mask_8x8_r = mask_8x8 & 0xff;
+        mask_4x4_r = mask_4x4 & 0xff;
+      }
+
       filter_selectively_horiz(dst->buf, dst->stride,
-                               mask_16x16 & 0xff,
-                               mask_8x8 & 0xff,
-                               mask_4x4 & 0xff,
+                               mask_16x16_r,
+                               mask_8x8_r,
+                               mask_4x4_r,
                                mask_4x4_int_row[r],
-                               mi_row + r == 0,
                                &cm->lf_info, lfm->lfl_y[r]);
 
       dst->buf += 8 * dst->stride;
@@ -1021,13 +1111,25 @@ static void filter_block_plane(VP9_COMMON *const cm,
       const int skip_border_4x4_r = mi_row + r == cm->mi_rows - 1;
       const unsigned int mask_4x4_int_r = skip_border_4x4_r ?
           0 : (mask_4x4_int_row[r]);
+      unsigned int mask_16x16_r;
+      unsigned int mask_8x8_r;
+      unsigned int mask_4x4_r;
+
+      if (mi_row + r == 0) {
+        mask_16x16_r = 0;
+        mask_8x8_r = 0;
+        mask_4x4_r = 0;
+      } else {
+        mask_16x16_r = mask_16x16 & 0xf;
+        mask_8x8_r = mask_8x8 & 0xf;
+        mask_4x4_r = mask_4x4 & 0xf;
+      }
 
       filter_selectively_horiz(dst->buf, dst->stride,
-                               mask_16x16 & 0xf,
-                               mask_8x8 & 0xf,
-                               mask_4x4 & 0xf,
+                               mask_16x16_r,
+                               mask_8x8_r,
+                               mask_4x4_r,
                                mask_4x4_int_r,
-                               mi_row + r == 0,
                                &cm->lf_info, lfm->lfl_uv[r]);
 
       dst->buf += 8 * dst->stride;