]> granicus.if.org Git - libvpx/commitdiff
vpxdsp: [x86] add highbd_dc_128_predictor functions
authorScott LaVarnway <slavarnway@google.com>
Tue, 5 Sep 2017 14:52:36 +0000 (07:52 -0700)
committerScott LaVarnway <slavarnway@google.com>
Tue, 5 Sep 2017 14:57:42 +0000 (07:57 -0700)
C vs SSE2 speed gains:
_4x4 : ~7.64x
_8x8 : ~16.60x
_16x16 : ~8.15x
_32x32 : ~5.05x

BUG=webm:1411

Change-Id: If165d419711cfda901bd428a05ca1560a009e62e

test/test_intra_pred_speed.cc
test/vp9_intrapred_test.cc
vpx_dsp/vpx_dsp_rtcd_defs.pl
vpx_dsp/x86/highbd_intrapred_intrin_sse2.c

index 8a50c75465347e3c286be628557ee529cedf2f34..cbc1a8c43b7761facb9a60cb406a32a3af75c1bd 100644 (file)
@@ -483,7 +483,8 @@ HIGHBD_INTRA_PRED_TEST(
 HIGHBD_INTRA_PRED_TEST(SSE2, TestHighbdIntraPred4,
                        vpx_highbd_dc_predictor_4x4_sse2,
                        vpx_highbd_dc_left_predictor_4x4_sse2,
-                       vpx_highbd_dc_top_predictor_4x4_sse2, NULL,
+                       vpx_highbd_dc_top_predictor_4x4_sse2,
+                       vpx_highbd_dc_128_predictor_4x4_sse2,
                        vpx_highbd_v_predictor_4x4_sse2,
                        vpx_highbd_h_predictor_4x4_sse2, NULL, NULL, NULL, NULL,
                        NULL, NULL, vpx_highbd_tm_predictor_4x4_c)
@@ -491,7 +492,8 @@ HIGHBD_INTRA_PRED_TEST(SSE2, TestHighbdIntraPred4,
 HIGHBD_INTRA_PRED_TEST(SSE2, TestHighbdIntraPred8,
                        vpx_highbd_dc_predictor_8x8_sse2,
                        vpx_highbd_dc_left_predictor_8x8_sse2,
-                       vpx_highbd_dc_top_predictor_8x8_sse2, NULL,
+                       vpx_highbd_dc_top_predictor_8x8_sse2,
+                       vpx_highbd_dc_128_predictor_8x8_sse2,
                        vpx_highbd_v_predictor_8x8_sse2,
                        vpx_highbd_h_predictor_8x8_sse2, NULL, NULL, NULL, NULL,
                        NULL, NULL, vpx_highbd_tm_predictor_8x8_sse2)
@@ -499,7 +501,8 @@ HIGHBD_INTRA_PRED_TEST(SSE2, TestHighbdIntraPred8,
 HIGHBD_INTRA_PRED_TEST(SSE2, TestHighbdIntraPred16,
                        vpx_highbd_dc_predictor_16x16_sse2,
                        vpx_highbd_dc_left_predictor_16x16_sse2,
-                       vpx_highbd_dc_top_predictor_16x16_sse2, NULL,
+                       vpx_highbd_dc_top_predictor_16x16_sse2,
+                       vpx_highbd_dc_128_predictor_16x16_sse2,
                        vpx_highbd_v_predictor_16x16_sse2,
                        vpx_highbd_h_predictor_16x16_sse2, NULL, NULL, NULL,
                        NULL, NULL, NULL, vpx_highbd_tm_predictor_16x16_sse2)
@@ -507,7 +510,8 @@ HIGHBD_INTRA_PRED_TEST(SSE2, TestHighbdIntraPred16,
 HIGHBD_INTRA_PRED_TEST(SSE2, TestHighbdIntraPred32,
                        vpx_highbd_dc_predictor_32x32_sse2,
                        vpx_highbd_dc_left_predictor_32x32_sse2,
-                       vpx_highbd_dc_top_predictor_32x32_sse2, NULL,
+                       vpx_highbd_dc_top_predictor_32x32_sse2,
+                       vpx_highbd_dc_128_predictor_32x32_sse2,
                        vpx_highbd_v_predictor_32x32_sse2,
                        vpx_highbd_h_predictor_32x32_sse2, NULL, NULL, NULL,
                        NULL, NULL, NULL, vpx_highbd_tm_predictor_32x32_sse2)
index d87215b43ed41052652ea6162da90a441896f0ce..96985bd14c620e137933b4f0228b48f1045f9d97 100644 (file)
@@ -471,6 +471,14 @@ TEST_P(VP9HighbdIntraPredTest, HighbdIntraPredTests) {
 INSTANTIATE_TEST_CASE_P(
     SSE2_TO_C_8, VP9HighbdIntraPredTest,
     ::testing::Values(
+        HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_4x4_sse2,
+                             &vpx_highbd_dc_128_predictor_4x4_c, 4, 8),
+        HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_8x8_sse2,
+                             &vpx_highbd_dc_128_predictor_8x8_c, 8, 8),
+        HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_16x16_sse2,
+                             &vpx_highbd_dc_128_predictor_16x16_c, 16, 8),
+        HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_32x32_sse2,
+                             &vpx_highbd_dc_128_predictor_32x32_c, 32, 8),
         HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_4x4_sse2,
                              &vpx_highbd_dc_left_predictor_4x4_c, 4, 8),
         HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_8x8_sse2,
@@ -523,6 +531,14 @@ INSTANTIATE_TEST_CASE_P(
 INSTANTIATE_TEST_CASE_P(
     SSE2_TO_C_10, VP9HighbdIntraPredTest,
     ::testing::Values(
+        HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_4x4_sse2,
+                             &vpx_highbd_dc_128_predictor_4x4_c, 4, 10),
+        HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_8x8_sse2,
+                             &vpx_highbd_dc_128_predictor_8x8_c, 8, 10),
+        HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_16x16_sse2,
+                             &vpx_highbd_dc_128_predictor_16x16_c, 16, 10),
+        HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_32x32_sse2,
+                             &vpx_highbd_dc_128_predictor_32x32_c, 32, 10),
         HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_4x4_sse2,
                              &vpx_highbd_dc_left_predictor_4x4_c, 4, 10),
         HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_8x8_sse2,
@@ -575,6 +591,14 @@ INSTANTIATE_TEST_CASE_P(
 INSTANTIATE_TEST_CASE_P(
     SSE2_TO_C_12, VP9HighbdIntraPredTest,
     ::testing::Values(
+        HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_4x4_sse2,
+                             &vpx_highbd_dc_128_predictor_4x4_c, 4, 12),
+        HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_8x8_sse2,
+                             &vpx_highbd_dc_128_predictor_8x8_c, 8, 12),
+        HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_16x16_sse2,
+                             &vpx_highbd_dc_128_predictor_16x16_c, 16, 12),
+        HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_32x32_sse2,
+                             &vpx_highbd_dc_128_predictor_32x32_c, 32, 12),
         HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_4x4_sse2,
                              &vpx_highbd_dc_left_predictor_4x4_c, 4, 12),
         HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_8x8_sse2,
index 3826b1301e97cdcb6667a0aebafae379b5f5731e..1b0a5c27e217028da80735b00ad86a0242f21053 100644 (file)
@@ -220,7 +220,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
   specialize qw/vpx_highbd_dc_left_predictor_4x4 neon sse2/;
 
   add_proto qw/void vpx_highbd_dc_128_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
-  specialize qw/vpx_highbd_dc_128_predictor_4x4 neon/;
+  specialize qw/vpx_highbd_dc_128_predictor_4x4 neon sse2/;
 
   add_proto qw/void vpx_highbd_d207_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
 
@@ -255,7 +255,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
   specialize qw/vpx_highbd_dc_left_predictor_8x8 neon sse2/;
 
   add_proto qw/void vpx_highbd_dc_128_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
-  specialize qw/vpx_highbd_dc_128_predictor_8x8 neon/;
+  specialize qw/vpx_highbd_dc_128_predictor_8x8 neon sse2/;
 
   add_proto qw/void vpx_highbd_d207_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
 
@@ -290,7 +290,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
   specialize qw/vpx_highbd_dc_left_predictor_16x16 neon sse2/;
 
   add_proto qw/void vpx_highbd_dc_128_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
-  specialize qw/vpx_highbd_dc_128_predictor_16x16 neon/;
+  specialize qw/vpx_highbd_dc_128_predictor_16x16 neon sse2/;
 
   add_proto qw/void vpx_highbd_d207_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
 
@@ -325,7 +325,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
   specialize qw/vpx_highbd_dc_left_predictor_32x32 neon sse2/;
 
   add_proto qw/void vpx_highbd_dc_128_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
-  specialize qw/vpx_highbd_dc_128_predictor_32x32 neon/;
+  specialize qw/vpx_highbd_dc_128_predictor_32x32 neon sse2/;
 }  # CONFIG_VP9_HIGHBITDEPTH
 
 #
index 668178fdd0bd9d0a13284e7ef3d2cb5caf34420f..83113a293505f079208760a1109d0a4fb2cdeee8 100644 (file)
@@ -199,6 +199,16 @@ void vpx_highbd_dc_top_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t stride,
   dc_store_4x4(dst, stride, &dc);
 }
 
+void vpx_highbd_dc_128_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t stride,
+                                          const uint16_t *above,
+                                          const uint16_t *left, int bd) {
+  const __m128i dc = _mm_cvtsi32_si128(1 << (bd - 1));
+  const __m128i dc_dup = _mm_shufflelo_epi16(dc, 0x0);
+  (void)above;
+  (void)left;
+  dc_store_4x4(dst, stride, &dc_dup);
+}
+
 //------------------------------------------------------------------------------
 // DC 8x8
 
@@ -243,6 +253,16 @@ void vpx_highbd_dc_top_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t stride,
   dc_store_8x8(dst, stride, &dc);
 }
 
+void vpx_highbd_dc_128_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t stride,
+                                          const uint16_t *above,
+                                          const uint16_t *left, int bd) {
+  const __m128i dc = _mm_cvtsi32_si128(1 << (bd - 1));
+  const __m128i dc_dup = _mm_shufflelo_epi16(dc, 0x0);
+  (void)above;
+  (void)left;
+  dc_store_8x8(dst, stride, &dc_dup);
+}
+
 //------------------------------------------------------------------------------
 // DC 16x16
 
@@ -285,6 +305,16 @@ void vpx_highbd_dc_top_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t stride,
   dc_store_16x16(dst, stride, &dc);
 }
 
+void vpx_highbd_dc_128_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t stride,
+                                            const uint16_t *above,
+                                            const uint16_t *left, int bd) {
+  const __m128i dc = _mm_cvtsi32_si128(1 << (bd - 1));
+  const __m128i dc_dup = _mm_shufflelo_epi16(dc, 0x0);
+  (void)above;
+  (void)left;
+  dc_store_16x16(dst, stride, &dc_dup);
+}
+
 //------------------------------------------------------------------------------
 // DC 32x32
 
@@ -331,3 +361,13 @@ void vpx_highbd_dc_top_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t stride,
   (void)bd;
   dc_store_32x32(dst, stride, &dc);
 }
+
+void vpx_highbd_dc_128_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t stride,
+                                            const uint16_t *above,
+                                            const uint16_t *left, int bd) {
+  const __m128i dc = _mm_cvtsi32_si128(1 << (bd - 1));
+  const __m128i dc_dup = _mm_shufflelo_epi16(dc, 0x0);
+  (void)above;
+  (void)left;
+  dc_store_32x32(dst, stride, &dc_dup);
+}