]> granicus.if.org Git - libvpx/commitdiff
Initial sse2 version of the wide loopfilters
authorScott LaVarnway <slavarnway@google.com>
Fri, 11 Jan 2013 22:54:14 +0000 (14:54 -0800)
committerScott LaVarnway <slavarnway@google.com>
Fri, 11 Jan 2013 22:54:14 +0000 (14:54 -0800)
Updated the rtcd_defs and used the sse2 uv version
of the loopfilter.  The performance improved by ~8%
for the test clip used.

Change-Id: I5a0bca3b6674198d40ca4a77b8cc722ddde79c36

vp9/common/vp9_loopfilter.h
vp9/common/vp9_loopfilter_filters.c
vp9/common/vp9_rtcd_defs.sh
vp9/common/x86/vp9_loopfilter_x86.c

index 8b752aa5ccd7306bb97d1830e94a0f7b421ae82a..53ec336a454a4307bb95ac97b03de4afb1811ffd 100644 (file)
@@ -92,10 +92,15 @@ void vp9_loop_filter_partial_frame(struct VP9Common *cm,
 void vp9_loop_filter_update_sharpness(loop_filter_info_n *lfi,
                                       int sharpness_lvl);
 
-extern void vp9_lpf_mbv_w(unsigned char *y_ptr, unsigned char *u_ptr,
-                          unsigned char *v_ptr, int y_stride, int uv_stride,
-                          struct loop_filter_info *lfi);
-extern void vp9_lpf_mbh_w(unsigned char *y_ptr, unsigned char *u_ptr,
-                           unsigned char *v_ptr, int y_stride, int uv_stride,
-                           struct loop_filter_info *lfi);
+void vp9_mb_lpf_horizontal_edge_w(unsigned char *s, int p,
+                                  const unsigned char *blimit,
+                                  const unsigned char *limit,
+                                  const unsigned char *thresh,
+                                  int count);
+
+void vp9_mb_lpf_vertical_edge_w(unsigned char *s, int p,
+                                const unsigned char *blimit,
+                                const unsigned char *limit,
+                                const unsigned char *thresh,
+                                int count);
 #endif  // VP9_COMMON_VP9_LOOPFILTER_H_
index 18f0fea139391b6d370d0b9e1cc10e25e9d50071..ed7b422df23aa5093aa6a87a0bfb6c0156201161 100644 (file)
@@ -596,7 +596,7 @@ static __inline void wide_mbfilter(int8_t mask, uint8_t hev,
   }
 }
 
-static void vp9_mb_lpf_horizontal_edge_w
+void vp9_mb_lpf_horizontal_edge_w
 (
   unsigned char *s,
   int p,
index 33ed5a4906fa89a987c38c39843fd4e78f8b63f8..d8517bbfa9638ac52e5ef9550771d08848466fdf 100644 (file)
@@ -233,11 +233,14 @@ vp9_loop_filter_simple_bh_c=vp9_loop_filter_bhs_c
 vp9_loop_filter_simple_bh_mmx=vp9_loop_filter_bhs_mmx
 vp9_loop_filter_simple_bh_sse2=vp9_loop_filter_bhs_sse2
 
+if [ "$CONFIG_WIDERLPF" = "yes" ]; then
 prototype void vp9_lpf_mbh_w "unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, struct loop_filter_info *lfi"
-specialize vp9_lpf_mbh_w
+specialize vp9_lpf_mbh_w sse2
 
 prototype void vp9_lpf_mbv_w "unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, struct loop_filter_info *lfi"
-specialize vp9_lpf_mbv_w
+specialize vp9_lpf_mbv_w sse2
+fi
+
 #
 # post proc
 #
index 61b1c77da1ec8fd27f2c16c6f559dccac2e68586..9f46eec4c63cc27a54a1bab72f2a1dc578f6e0a0 100644 (file)
@@ -604,6 +604,20 @@ void vp9_loop_filter_mbh_sse2(unsigned char *y_ptr,
                                               lfi->lim, lfi->hev_thr, v_ptr);
 }
 
+#if CONFIG_WIDERLPF
+void vp9_lpf_mbh_w_sse2(unsigned char *y_ptr, unsigned char *u_ptr,
+                           unsigned char *v_ptr, int y_stride, int uv_stride,
+                           struct loop_filter_info *lfi) {
+  vp9_mb_lpf_horizontal_edge_w(y_ptr, y_stride,
+                                      lfi->mblim, lfi->lim, lfi->hev_thr, 2);
+
+  /* u,v */
+  if (u_ptr)
+    vp9_mbloop_filter_horizontal_edge_uv_sse2(u_ptr, uv_stride, lfi->mblim,
+                                              lfi->lim, lfi->hev_thr, v_ptr);
+}
+#endif
+
 void vp9_loop_filter_bh8x8_sse2(unsigned char *y_ptr, unsigned char *u_ptr,
                              unsigned char *v_ptr, int y_stride, int uv_stride,
                              struct loop_filter_info *lfi) {
@@ -624,6 +638,20 @@ void vp9_loop_filter_mbv_sse2(unsigned char *y_ptr, unsigned char *u_ptr,
                                             lfi->lim, lfi->hev_thr, v_ptr);
 }
 
+#if CONFIG_WIDERLPF
+void vp9_lpf_mbv_w_sse2(unsigned char *y_ptr, unsigned char *u_ptr,
+                   unsigned char *v_ptr, int y_stride, int uv_stride,
+                   struct loop_filter_info *lfi) {
+  vp9_mb_lpf_vertical_edge_w(y_ptr, y_stride,
+                                    lfi->mblim, lfi->lim, lfi->hev_thr, 2);
+
+  /* u,v */
+  if (u_ptr)
+    vp9_mbloop_filter_vertical_edge_uv_sse2(u_ptr, uv_stride, lfi->mblim,
+                                            lfi->lim, lfi->hev_thr, v_ptr);
+}
+#endif
+
 void vp9_loop_filter_bv8x8_sse2(unsigned char *y_ptr, unsigned char *u_ptr,
                              unsigned char *v_ptr, int y_stride, int uv_stride,
                              struct loop_filter_info *lfi) {