]> granicus.if.org Git - libvpx/commitdiff
Some cosmetic improvements since HBD variance 4x4 optimization
authorYi Luo <luoyi@google.com>
Thu, 7 Apr 2016 22:06:43 +0000 (15:06 -0700)
committerYi Luo <luoyi@google.com>
Fri, 8 Apr 2016 17:32:13 +0000 (10:32 -0700)
Change-Id: I414c1fabd2e3a9b1d9daa8a90f85a0bace8bd3cd

vp10/encoder/mcomp.c
vpx_dsp/variance.c
vpx_dsp/variance.h
vpx_dsp/x86/highbd_variance_sse4.c

index 0c8ec43eb5f323cbb9f3a2adf99dde09c167179d..4327d974c88110337beee8c16897b0be5873ed37 100644 (file)
@@ -367,8 +367,8 @@ static unsigned int setup_center_error(const MACROBLOCKD *xd,
   if (second_pred != NULL) {
     if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
       DECLARE_ALIGNED(16, uint16_t, comp_pred16[MAX_SB_SQUARE]);
-      vpx_highbd_comp_avg_pred_c(comp_pred16, second_pred, w, h, y + offset,
-                                 y_stride);
+      vpx_highbd_comp_avg_pred(comp_pred16, second_pred, w, h, y + offset,
+                               y_stride);
       besterr = vfp->vf(CONVERT_TO_BYTEPTR(comp_pred16), w, src, src_stride,
                         sse1);
     } else {
index 90c8bed52771e98082a9c0a3ca3bba477289fab8..e6be1dd7352115326730a5298d0affd580d2ce15 100644 (file)
@@ -433,7 +433,7 @@ uint32_t vpx_highbd_12_mse##W##x##H##_c(const uint8_t *src, \
   return *sse; \
 }
 
-void highbd_var_filter_block2d_bil_first_pass(
+void vpx_highbd_var_filter_block2d_bil_first_pass(
     const uint8_t *src_ptr8,
     uint16_t *output_ptr,
     unsigned int src_pixels_per_line,
@@ -459,7 +459,7 @@ void highbd_var_filter_block2d_bil_first_pass(
   }
 }
 
-void highbd_var_filter_block2d_bil_second_pass(
+void vpx_highbd_var_filter_block2d_bil_second_pass(
     const uint16_t *src_ptr,
     uint16_t *output_ptr,
     unsigned int src_pixels_per_line,
@@ -492,13 +492,14 @@ uint32_t vpx_highbd_8_sub_pixel_variance##W##x##H##_c( \
   uint16_t fdata3[(H + 1) * W]; \
   uint16_t temp2[H * W]; \
 \
-  highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
-                                           W, bilinear_filters_2t[xoffset]); \
-  highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
-                                            bilinear_filters_2t[yoffset]); \
+  vpx_highbd_var_filter_block2d_bil_first_pass( \
+      src, fdata3, src_stride, 1, H + 1, \
+      W, bilinear_filters_2t[xoffset]); \
+  vpx_highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
+                                                bilinear_filters_2t[yoffset]); \
 \
   return vpx_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, dst, \
-                                          dst_stride, sse); \
+                                            dst_stride, sse); \
 } \
 \
 uint32_t vpx_highbd_10_sub_pixel_variance##W##x##H##_c( \
@@ -509,10 +510,11 @@ uint32_t vpx_highbd_10_sub_pixel_variance##W##x##H##_c( \
   uint16_t fdata3[(H + 1) * W]; \
   uint16_t temp2[H * W]; \
 \
-  highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
-                                           W, bilinear_filters_2t[xoffset]); \
-  highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
-                                            bilinear_filters_2t[yoffset]); \
+  vpx_highbd_var_filter_block2d_bil_first_pass( \
+      src, fdata3, src_stride, 1, H + 1, \
+      W, bilinear_filters_2t[xoffset]); \
+  vpx_highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
+                                                bilinear_filters_2t[yoffset]); \
 \
   return vpx_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
                                              W, dst, dst_stride, sse); \
@@ -526,10 +528,11 @@ uint32_t vpx_highbd_12_sub_pixel_variance##W##x##H##_c( \
   uint16_t fdata3[(H + 1) * W]; \
   uint16_t temp2[H * W]; \
 \
-  highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
-                                           W, bilinear_filters_2t[xoffset]); \
-  highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
-                                            bilinear_filters_2t[yoffset]); \
+  vpx_highbd_var_filter_block2d_bil_first_pass( \
+      src, fdata3, src_stride, 1, H + 1, \
+      W, bilinear_filters_2t[xoffset]); \
+  vpx_highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
+                                                bilinear_filters_2t[yoffset]); \
 \
   return vpx_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
                                              W, dst, dst_stride, sse); \
@@ -546,16 +549,17 @@ uint32_t vpx_highbd_8_sub_pixel_avg_variance##W##x##H##_c( \
   uint16_t temp2[H * W]; \
   DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
 \
-  highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
-                                           W, bilinear_filters_2t[xoffset]); \
-  highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
-                                            bilinear_filters_2t[yoffset]); \
+  vpx_highbd_var_filter_block2d_bil_first_pass( \
+      src, fdata3, src_stride, 1, H + 1, \
+      W, bilinear_filters_2t[xoffset]); \
+  vpx_highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
+                                                bilinear_filters_2t[yoffset]); \
 \
   vpx_highbd_comp_avg_pred_c(temp3, second_pred, W, H, \
                              CONVERT_TO_BYTEPTR(temp2), W); \
 \
   return vpx_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, dst, \
-                                          dst_stride, sse); \
+                                            dst_stride, sse);           \
 } \
 \
 uint32_t vpx_highbd_10_sub_pixel_avg_variance##W##x##H##_c( \
@@ -568,10 +572,11 @@ uint32_t vpx_highbd_10_sub_pixel_avg_variance##W##x##H##_c( \
   uint16_t temp2[H * W]; \
   DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
 \
-  highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
-                                           W, bilinear_filters_2t[xoffset]); \
-  highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
-                                            bilinear_filters_2t[yoffset]); \
+  vpx_highbd_var_filter_block2d_bil_first_pass( \
+      src, fdata3, src_stride, 1, H + 1, \
+      W, bilinear_filters_2t[xoffset]); \
+  vpx_highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
+                                                bilinear_filters_2t[yoffset]); \
 \
   vpx_highbd_comp_avg_pred_c(temp3, second_pred, W, H, \
                              CONVERT_TO_BYTEPTR(temp2), W); \
@@ -590,10 +595,11 @@ uint32_t vpx_highbd_12_sub_pixel_avg_variance##W##x##H##_c( \
   uint16_t temp2[H * W]; \
   DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
 \
-  highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
-                                           W, bilinear_filters_2t[xoffset]); \
-  highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
-                                            bilinear_filters_2t[yoffset]); \
+  vpx_highbd_var_filter_block2d_bil_first_pass( \
+      src, fdata3, src_stride, 1, H + 1, \
+      W, bilinear_filters_2t[xoffset]); \
+  vpx_highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
+                                                bilinear_filters_2t[yoffset]); \
 \
   vpx_highbd_comp_avg_pred_c(temp3, second_pred, W, H, \
                              CONVERT_TO_BYTEPTR(temp2), W); \
@@ -914,11 +920,11 @@ unsigned int vpx_highbd_masked_sub_pixel_variance##W##x##H##_c( \
   uint16_t fdata3[(H + 1) * W]; \
   uint16_t temp2[H * W]; \
 \
-  highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, \
-                                           H + 1, W, \
-                                           bilinear_filters_2t[xoffset]); \
-  highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
-                                            bilinear_filters_2t[yoffset]); \
+  vpx_highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, \
+                                               H + 1, W, \
+                                               bilinear_filters_2t[xoffset]); \
+  vpx_highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
+                                                bilinear_filters_2t[yoffset]); \
 \
   return vpx_highbd_masked_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
                                                  W, dst, dst_stride, \
@@ -934,11 +940,11 @@ unsigned int vpx_highbd_10_masked_sub_pixel_variance##W##x##H##_c( \
   uint16_t fdata3[(H + 1) * W]; \
   uint16_t temp2[H * W]; \
 \
-  highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, \
-                                           H + 1, W, \
-                                           bilinear_filters_2t[xoffset]); \
-  highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
-                                            bilinear_filters_2t[yoffset]); \
+  vpx_highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, \
+                                               H + 1, W, \
+                                               bilinear_filters_2t[xoffset]); \
+  vpx_highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
+                                                bilinear_filters_2t[yoffset]); \
 \
   return vpx_highbd_10_masked_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
                                                     W, dst, dst_stride, \
@@ -954,11 +960,11 @@ unsigned int vpx_highbd_12_masked_sub_pixel_variance##W##x##H##_c( \
   uint16_t fdata3[(H + 1) * W]; \
   uint16_t temp2[H * W]; \
 \
-  highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, \
-                                           H + 1, W, \
-                                           bilinear_filters_2t[xoffset]); \
-  highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
-                                            bilinear_filters_2t[yoffset]); \
+  vpx_highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, \
+                                               H + 1, W, \
+                                               bilinear_filters_2t[xoffset]); \
+  vpx_highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
+                                                bilinear_filters_2t[yoffset]); \
 \
   return vpx_highbd_12_masked_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
                                                     W, dst, dst_stride, \
index 4ad23f8ae9c7b74e86d3d15ce47e69cf9464f89e..175985468a6b2e46a54603558af70e692851fc4b 100644 (file)
@@ -130,7 +130,7 @@ typedef struct vp10_variance_vtable {
 } vp10_variance_fn_ptr_t;
 #endif  // CONFIG_VP10
 
-void highbd_var_filter_block2d_bil_first_pass(
+void vpx_highbd_var_filter_block2d_bil_first_pass(
     const uint8_t *src_ptr8,
     uint16_t *output_ptr,
     unsigned int src_pixels_per_line,
@@ -139,7 +139,7 @@ void highbd_var_filter_block2d_bil_first_pass(
     unsigned int output_width,
     const uint8_t *filter);
 
-void highbd_var_filter_block2d_bil_second_pass(
+void vpx_highbd_var_filter_block2d_bil_second_pass(
     const uint16_t *src_ptr,
     uint16_t *output_ptr,
     unsigned int src_pixels_per_line,
index 18ecc7efd2599fe5227b2000f23307afa72fb990..5c1dfe4dc4130887beb3136e10715dcf80da7c96 100644 (file)
@@ -119,10 +119,12 @@ uint32_t vpx_highbd_8_sub_pixel_variance4x4_sse4_1(
   uint16_t fdata3[(4 + 1) * 4];
   uint16_t temp2[4 * 4];
 
-  highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, 4 + 1,
-                                           4, bilinear_filters_2t[xoffset]);
-  highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 4, 4,
-                                            bilinear_filters_2t[yoffset]);
+  vpx_highbd_var_filter_block2d_bil_first_pass(
+      src, fdata3, src_stride, 1, 4 + 1,
+      4, bilinear_filters_2t[xoffset]);
+  vpx_highbd_var_filter_block2d_bil_second_pass(
+      fdata3, temp2, 4, 4, 4, 4,
+      bilinear_filters_2t[yoffset]);
 
   return vpx_highbd_8_variance4x4(CONVERT_TO_BYTEPTR(temp2),
                                   4, dst, dst_stride, sse);
@@ -137,10 +139,12 @@ uint32_t vpx_highbd_10_sub_pixel_variance4x4_sse4_1(
   uint16_t fdata3[(4 + 1) * 4];
   uint16_t temp2[4 * 4];
 
-  highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, 4 + 1,
-                                           4, bilinear_filters_2t[xoffset]);
-  highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 4, 4,
-                                            bilinear_filters_2t[yoffset]);
+  vpx_highbd_var_filter_block2d_bil_first_pass(
+      src, fdata3, src_stride, 1, 4 + 1,
+      4, bilinear_filters_2t[xoffset]);
+  vpx_highbd_var_filter_block2d_bil_second_pass(
+      fdata3, temp2, 4, 4, 4, 4,
+      bilinear_filters_2t[yoffset]);
 
   return vpx_highbd_10_variance4x4(CONVERT_TO_BYTEPTR(temp2),
                                    4, dst, dst_stride, sse);
@@ -155,10 +159,12 @@ uint32_t vpx_highbd_12_sub_pixel_variance4x4_sse4_1(
   uint16_t fdata3[(4 + 1) * 4];
   uint16_t temp2[4 * 4];
 
-  highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, 4 + 1,
-                                           4, bilinear_filters_2t[xoffset]);
-  highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 4, 4,
-                                            bilinear_filters_2t[yoffset]);
+  vpx_highbd_var_filter_block2d_bil_first_pass(
+      src, fdata3, src_stride, 1, 4 + 1,
+      4, bilinear_filters_2t[xoffset]);
+  vpx_highbd_var_filter_block2d_bil_second_pass(
+      fdata3, temp2, 4, 4, 4, 4,
+      bilinear_filters_2t[yoffset]);
 
   return vpx_highbd_12_variance4x4(CONVERT_TO_BYTEPTR(temp2),
                                    4, dst, dst_stride, sse);
@@ -177,13 +183,15 @@ uint32_t vpx_highbd_8_sub_pixel_avg_variance4x4_sse4_1(
   uint16_t temp2[4 * 4];
   DECLARE_ALIGNED(16, uint16_t, temp3[4 * 4]);
 
-  highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, 4 + 1,
-                                           4, bilinear_filters_2t[xoffset]);
-  highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 4, 4,
-                                            bilinear_filters_2t[yoffset]);
+  vpx_highbd_var_filter_block2d_bil_first_pass(
+      src, fdata3, src_stride, 1, 4 + 1,
+      4, bilinear_filters_2t[xoffset]);
+  vpx_highbd_var_filter_block2d_bil_second_pass(
+      fdata3, temp2, 4, 4, 4, 4,
+      bilinear_filters_2t[yoffset]);
 
-  vpx_highbd_comp_avg_pred_c(temp3, second_pred, 4, 4,
-                             CONVERT_TO_BYTEPTR(temp2), 4);
+  vpx_highbd_comp_avg_pred(temp3, second_pred, 4, 4,
+                           CONVERT_TO_BYTEPTR(temp2), 4);
 
   return vpx_highbd_8_variance4x4(CONVERT_TO_BYTEPTR(temp3),
                                   4, dst, dst_stride, sse);
@@ -200,13 +208,15 @@ uint32_t vpx_highbd_10_sub_pixel_avg_variance4x4_sse4_1(
   uint16_t temp2[4 * 4];
   DECLARE_ALIGNED(16, uint16_t, temp3[4 * 4]);
 
-  highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, 4 + 1,
-                                           4, bilinear_filters_2t[xoffset]);
-  highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 4, 4,
-                                            bilinear_filters_2t[yoffset]);
+  vpx_highbd_var_filter_block2d_bil_first_pass(
+      src, fdata3, src_stride, 1, 4 + 1,
+      4, bilinear_filters_2t[xoffset]);
+  vpx_highbd_var_filter_block2d_bil_second_pass(
+      fdata3, temp2, 4, 4, 4, 4,
+      bilinear_filters_2t[yoffset]);
 
-  vpx_highbd_comp_avg_pred_c(temp3, second_pred, 4, 4,
-                             CONVERT_TO_BYTEPTR(temp2), 4);
+  vpx_highbd_comp_avg_pred(temp3, second_pred, 4, 4,
+                           CONVERT_TO_BYTEPTR(temp2), 4);
 
   return vpx_highbd_10_variance4x4(CONVERT_TO_BYTEPTR(temp3),
                                    4, dst, dst_stride, sse);
@@ -223,13 +233,15 @@ uint32_t vpx_highbd_12_sub_pixel_avg_variance4x4_sse4_1(
   uint16_t temp2[4 * 4];
   DECLARE_ALIGNED(16, uint16_t, temp3[4 * 4]);
 
-  highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, 4 + 1,
-                                           4, bilinear_filters_2t[xoffset]);
-  highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 4, 4,
-                                            bilinear_filters_2t[yoffset]);
+  vpx_highbd_var_filter_block2d_bil_first_pass(
+      src, fdata3, src_stride, 1, 4 + 1,
+      4, bilinear_filters_2t[xoffset]);
+  vpx_highbd_var_filter_block2d_bil_second_pass(
+      fdata3, temp2, 4, 4, 4, 4,
+      bilinear_filters_2t[yoffset]);
 
-  vpx_highbd_comp_avg_pred_c(temp3, second_pred, 4, 4,
-                             CONVERT_TO_BYTEPTR(temp2), 4);
+  vpx_highbd_comp_avg_pred(temp3, second_pred, 4, 4,
+                           CONVERT_TO_BYTEPTR(temp2), 4);
 
   return vpx_highbd_12_variance4x4(CONVERT_TO_BYTEPTR(temp3),
                                    4, dst, dst_stride, sse);