vp9_reconintra_neon: add d45 16x16

author James Zern <jzern@google.com>

Tue, 23 Jun 2015 03:57:14 +0000 (20:57 -0700)

committer James Zern <jzern@google.com>

Tue, 23 Jun 2015 04:00:07 +0000 (21:00 -0700)
author James Zern <jzern@google.com>
Tue, 23 Jun 2015 03:57:14 +0000 (20:57 -0700)
committer James Zern <jzern@google.com>
Tue, 23 Jun 2015 04:00:07 +0000 (21:00 -0700)
diff --git a/test/test_intra_pred_speed.cc b/test/test_intra_pred_speed.cc

index 7b4c4350e90720d798872f2fd8bdfee0fb567d89..46d4a2582ddde20073f038c79fdb1369c7ce3203 100644 (file)
--- a/test/test_intra_pred_speed.cc
+++ b/test/test_intra_pred_speed.cc
@@ -316,8 +316,8 @@ INTRA_PRED_TEST(NEON, TestIntraPred16, vp9_dc_predictor_16x16_neon,
                  vp9_dc_left_predictor_16x16_neon,
                  vp9_dc_top_predictor_16x16_neon,
                  vp9_dc_128_predictor_16x16_neon, vp9_v_predictor_16x16_neon,
-                vp9_h_predictor_16x16_neon, NULL, NULL, NULL, NULL, NULL, NULL,
-                vp9_tm_predictor_16x16_neon)
+                vp9_h_predictor_16x16_neon, vp9_d45_predictor_16x16_neon, NULL,
+                NULL, NULL, NULL, NULL, vp9_tm_predictor_16x16_neon)
  #endif  // HAVE_NEON
  
  #if HAVE_MSA
diff --git a/vp9/common/arm/neon/vp9_reconintra_neon.c b/vp9/common/arm/neon/vp9_reconintra_neon.c

index cfd5905ef542244d7088e2f00e86ed37d8b277e2..92706bf2c6926d023f2deaf08cd9fb296edd9f0a 100644 (file)
--- a/vp9/common/arm/neon/vp9_reconintra_neon.c
+++ b/vp9/common/arm/neon/vp9_reconintra_neon.c
@@ -358,6 +358,23 @@ void vp9_d45_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride,
    vst1_u8(dst + i * stride, row);
  }
  
+void vp9_d45_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride,
+                                  const uint8_t *above, const uint8_t *left) {
+  const uint8x16_t A0 = vld1q_u8(above);  // top row
+  const uint8x16_t above_right = vld1q_dup_u8(above + 15);
+  const uint8x16_t A1 = vextq_u8(A0, above_right, 1);
+  const uint8x16_t A2 = vextq_u8(A0, above_right, 2);
+  const uint8x16_t avg1 = vhaddq_u8(A0, A2);
+  uint8x16_t row = vrhaddq_u8(avg1, A1);
+  int i;
+  (void)left;
+  for (i = 0; i < 15; ++i) {
+    vst1q_u8(dst + i * stride, row);
+    row = vextq_u8(row, above_right, 1);
+  }
+  vst1q_u8(dst + i * stride, row);
+}
+
  // -----------------------------------------------------------------------------
  
  void vp9_d135_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride,
diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl

index 52c6ff1f36f435873a75bba7c8669ab90c2168c7..fb8cadc5c0db10ef87af41f35358f149937a7e02 100644 (file)
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -138,7 +138,7 @@ add_proto qw/void vp9_d207_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride,
  specialize qw/vp9_d207_predictor_16x16/, "$ssse3_x86inc";
  
  add_proto qw/void vp9_d45_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_d45_predictor_16x16/, "$ssse3_x86inc";
+specialize qw/vp9_d45_predictor_16x16 neon/, "$ssse3_x86inc";
  
  add_proto qw/void vp9_d63_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
  specialize qw/vp9_d63_predictor_16x16/, "$ssse3_x86inc";
author	James Zern <jzern@google.com>
	Tue, 23 Jun 2015 03:57:14 +0000 (20:57 -0700)
committer	James Zern <jzern@google.com>
	Tue, 23 Jun 2015 04:00:07 +0000 (21:00 -0700)
test/test_intra_pred_speed.cc		patch \| blob \| history
vp9/common/arm/neon/vp9_reconintra_neon.c		patch \| blob \| history
vp9/common/vp9_rtcd_defs.pl		patch \| blob \| history