From: Luca Barbato Date: Tue, 18 Apr 2017 22:55:53 +0000 (+0000) Subject: ppc: h predictor 8x8 X-Git-Tag: v1.7.0~533 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=914b160fb594b7c6d183aba6b40258b66d0ce054;p=libvpx ppc: h predictor 8x8 Slightly faster with the current compiler. Change-Id: Iae225fac08395eb430c97a2abec69c60f5cf5c47 --- diff --git a/test/test_intra_pred_speed.cc b/test/test_intra_pred_speed.cc index 539f7d4fb..23fce335a 100644 --- a/test/test_intra_pred_speed.cc +++ b/test/test_intra_pred_speed.cc @@ -318,8 +318,9 @@ INTRA_PRED_TEST(VSX, TestIntraPred4, NULL, NULL, NULL, NULL, NULL, vpx_tm_predictor_4x4_vsx) INTRA_PRED_TEST(VSX, TestIntraPred8, vpx_dc_predictor_8x8_vsx, NULL, NULL, NULL, - NULL, NULL, vpx_d45_predictor_8x8_vsx, NULL, NULL, NULL, NULL, - vpx_d63_predictor_8x8_vsx, vpx_tm_predictor_8x8_vsx) + NULL, vpx_h_predictor_8x8_vsx, vpx_d45_predictor_8x8_vsx, NULL, + NULL, NULL, NULL, vpx_d63_predictor_8x8_vsx, + vpx_tm_predictor_8x8_vsx) INTRA_PRED_TEST(VSX, TestIntraPred16, vpx_dc_predictor_16x16_vsx, vpx_dc_left_predictor_16x16_vsx, vpx_dc_top_predictor_16x16_vsx, diff --git a/vpx_dsp/ppc/intrapred_vsx.c b/vpx_dsp/ppc/intrapred_vsx.c index 0ff0f935d..6273460f1 100644 --- a/vpx_dsp/ppc/intrapred_vsx.c +++ b/vpx_dsp/ppc/intrapred_vsx.c @@ -56,6 +56,38 @@ void vpx_h_predictor_4x4_vsx(uint8_t *dst, ptrdiff_t stride, vec_vsx_st(vec_sel(v3, vec_vsx_ld(0, dst), (uint8x16_t)mask4), 0, dst); } +void vpx_h_predictor_8x8_vsx(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + const uint8x16_t d = vec_vsx_ld(0, left); + const uint8x16_t v0 = vec_splat(d, 0); + const uint8x16_t v1 = vec_splat(d, 1); + const uint8x16_t v2 = vec_splat(d, 2); + const uint8x16_t v3 = vec_splat(d, 3); + + const uint8x16_t v4 = vec_splat(d, 4); + const uint8x16_t v5 = vec_splat(d, 5); + const uint8x16_t v6 = vec_splat(d, 6); + const uint8x16_t v7 = vec_splat(d, 7); + + (void)above; + + vec_vsx_st(xxpermdi(v0, vec_vsx_ld(0, dst), 1), 0, dst); + dst += stride; + vec_vsx_st(xxpermdi(v1, vec_vsx_ld(0, dst), 1), 0, dst); + dst += stride; + vec_vsx_st(xxpermdi(v2, vec_vsx_ld(0, dst), 1), 0, dst); + dst += stride; + vec_vsx_st(xxpermdi(v3, vec_vsx_ld(0, dst), 1), 0, dst); + dst += stride; + vec_vsx_st(xxpermdi(v4, vec_vsx_ld(0, dst), 1), 0, dst); + dst += stride; + vec_vsx_st(xxpermdi(v5, vec_vsx_ld(0, dst), 1), 0, dst); + dst += stride; + vec_vsx_st(xxpermdi(v6, vec_vsx_ld(0, dst), 1), 0, dst); + dst += stride; + vec_vsx_st(xxpermdi(v7, vec_vsx_ld(0, dst), 1), 0, dst); +} + void vpx_h_predictor_16x16_vsx(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { const uint8x16_t d = vec_vsx_ld(0, left); diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl index ef4ce24e9..ed0339cbe 100644 --- a/vpx_dsp/vpx_dsp_rtcd_defs.pl +++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl @@ -81,7 +81,7 @@ add_proto qw/void vpx_d63_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, con specialize qw/vpx_d63_predictor_8x8 ssse3 vsx/; add_proto qw/void vpx_h_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_h_predictor_8x8 neon dspr2 msa sse2/; +specialize qw/vpx_h_predictor_8x8 neon dspr2 msa sse2 vsx/; add_proto qw/void vpx_d117_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";