From: Luca Barbato Date: Fri, 7 Apr 2017 14:49:00 +0000 (+0000) Subject: ppc: horizontal predictor 16x16 X-Git-Tag: v1.7.0~549^2~3 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=a7a2d1653bbb6d42d292a0a10acd4d4e28c43d6b;p=libvpx ppc: horizontal predictor 16x16 About 10x faster. Change-Id: Ie81077fa32ad214cdb46bdcb0be4e9e2c7df47c2 --- diff --git a/test/test_intra_pred_speed.cc b/test/test_intra_pred_speed.cc index 9ae05b02f..75abc169b 100644 --- a/test/test_intra_pred_speed.cc +++ b/test/test_intra_pred_speed.cc @@ -314,8 +314,8 @@ INTRA_PRED_TEST(MSA, TestIntraPred32, vpx_dc_predictor_32x32_msa, #if HAVE_VSX INTRA_PRED_TEST(VSX, TestIntraPred16, NULL, NULL, NULL, NULL, - vpx_v_predictor_16x16_vsx, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL) + vpx_v_predictor_16x16_vsx, vpx_h_predictor_16x16_vsx, NULL, + NULL, NULL, NULL, NULL, NULL, NULL) INTRA_PRED_TEST(VSX, TestIntraPred32, NULL, NULL, NULL, NULL, vpx_v_predictor_32x32_vsx, NULL, NULL, NULL, NULL, NULL, NULL, diff --git a/vpx_dsp/ppc/intrapred_vsx.c b/vpx_dsp/ppc/intrapred_vsx.c index 16e08eec4..d803d3876 100644 --- a/vpx_dsp/ppc/intrapred_vsx.c +++ b/vpx_dsp/ppc/intrapred_vsx.c @@ -34,3 +34,61 @@ void vpx_v_predictor_32x32_vsx(uint8_t *dst, ptrdiff_t stride, vec_vsx_st(d1, 16, dst); } } + +void vpx_h_predictor_16x16_vsx(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + const uint8x16_t d = vec_vsx_ld(0, left); + const uint8x16_t v0 = vec_splat(d, 0); + const uint8x16_t v1 = vec_splat(d, 1); + const uint8x16_t v2 = vec_splat(d, 2); + const uint8x16_t v3 = vec_splat(d, 3); + + const uint8x16_t v4 = vec_splat(d, 4); + const uint8x16_t v5 = vec_splat(d, 5); + const uint8x16_t v6 = vec_splat(d, 6); + const uint8x16_t v7 = vec_splat(d, 7); + + const uint8x16_t v8 = vec_splat(d, 8); + const uint8x16_t v9 = vec_splat(d, 9); + const uint8x16_t v10 = vec_splat(d, 10); + const uint8x16_t v11 = vec_splat(d, 11); + + const uint8x16_t v12 = vec_splat(d, 12); + const uint8x16_t v13 = vec_splat(d, 13); + const uint8x16_t v14 = vec_splat(d, 14); + const uint8x16_t v15 = vec_splat(d, 15); + + (void)above; + + vec_vsx_st(v0, 0, dst); + dst += stride; + vec_vsx_st(v1, 0, dst); + dst += stride; + vec_vsx_st(v2, 0, dst); + dst += stride; + vec_vsx_st(v3, 0, dst); + dst += stride; + vec_vsx_st(v4, 0, dst); + dst += stride; + vec_vsx_st(v5, 0, dst); + dst += stride; + vec_vsx_st(v6, 0, dst); + dst += stride; + vec_vsx_st(v7, 0, dst); + dst += stride; + vec_vsx_st(v8, 0, dst); + dst += stride; + vec_vsx_st(v9, 0, dst); + dst += stride; + vec_vsx_st(v10, 0, dst); + dst += stride; + vec_vsx_st(v11, 0, dst); + dst += stride; + vec_vsx_st(v12, 0, dst); + dst += stride; + vec_vsx_st(v13, 0, dst); + dst += stride; + vec_vsx_st(v14, 0, dst); + dst += stride; + vec_vsx_st(v15, 0, dst); +} diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl index f0609f591..adbb72974 100644 --- a/vpx_dsp/vpx_dsp_rtcd_defs.pl +++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl @@ -119,7 +119,7 @@ add_proto qw/void vpx_d63_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, c specialize qw/vpx_d63_predictor_16x16 ssse3/; add_proto qw/void vpx_h_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_h_predictor_16x16 neon dspr2 msa sse2/; +specialize qw/vpx_h_predictor_16x16 neon dspr2 msa sse2 vsx/; add_proto qw/void vpx_d117_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";