From 9db1f24c47e543cc14eff3e8dc375cca1476b4d1 Mon Sep 17 00:00:00 2001 From: James Zern Date: Mon, 22 Jun 2015 20:57:14 -0700 Subject: [PATCH] vp9_reconintra_neon: add d45 16x16 ~90% faster over 20M pixels Change-Id: I92d80f66e91e0a870a672cfb5dd29bf1a17cb11a --- test/test_intra_pred_speed.cc | 4 ++-- vp9/common/arm/neon/vp9_reconintra_neon.c | 17 +++++++++++++++++ vp9/common/vp9_rtcd_defs.pl | 2 +- 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/test/test_intra_pred_speed.cc b/test/test_intra_pred_speed.cc index 7b4c4350e..46d4a2582 100644 --- a/test/test_intra_pred_speed.cc +++ b/test/test_intra_pred_speed.cc @@ -316,8 +316,8 @@ INTRA_PRED_TEST(NEON, TestIntraPred16, vp9_dc_predictor_16x16_neon, vp9_dc_left_predictor_16x16_neon, vp9_dc_top_predictor_16x16_neon, vp9_dc_128_predictor_16x16_neon, vp9_v_predictor_16x16_neon, - vp9_h_predictor_16x16_neon, NULL, NULL, NULL, NULL, NULL, NULL, - vp9_tm_predictor_16x16_neon) + vp9_h_predictor_16x16_neon, vp9_d45_predictor_16x16_neon, NULL, + NULL, NULL, NULL, NULL, vp9_tm_predictor_16x16_neon) #endif // HAVE_NEON #if HAVE_MSA diff --git a/vp9/common/arm/neon/vp9_reconintra_neon.c b/vp9/common/arm/neon/vp9_reconintra_neon.c index cfd5905ef..92706bf2c 100644 --- a/vp9/common/arm/neon/vp9_reconintra_neon.c +++ b/vp9/common/arm/neon/vp9_reconintra_neon.c @@ -358,6 +358,23 @@ void vp9_d45_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride, vst1_u8(dst + i * stride, row); } +void vp9_d45_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + const uint8x16_t A0 = vld1q_u8(above); // top row + const uint8x16_t above_right = vld1q_dup_u8(above + 15); + const uint8x16_t A1 = vextq_u8(A0, above_right, 1); + const uint8x16_t A2 = vextq_u8(A0, above_right, 2); + const uint8x16_t avg1 = vhaddq_u8(A0, A2); + uint8x16_t row = vrhaddq_u8(avg1, A1); + int i; + (void)left; + for (i = 0; i < 15; ++i) { + vst1q_u8(dst + i * stride, row); + row = vextq_u8(row, above_right, 1); + } + vst1q_u8(dst + i * stride, row); +} + // ----------------------------------------------------------------------------- void vp9_d135_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride, diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl index 52c6ff1f3..fb8cadc5c 100644 --- a/vp9/common/vp9_rtcd_defs.pl +++ b/vp9/common/vp9_rtcd_defs.pl @@ -138,7 +138,7 @@ add_proto qw/void vp9_d207_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, specialize qw/vp9_d207_predictor_16x16/, "$ssse3_x86inc"; add_proto qw/void vp9_d45_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vp9_d45_predictor_16x16/, "$ssse3_x86inc"; +specialize qw/vp9_d45_predictor_16x16 neon/, "$ssse3_x86inc"; add_proto qw/void vp9_d63_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; specialize qw/vp9_d63_predictor_16x16/, "$ssse3_x86inc"; -- 2.40.0