From: Luca Barbato Date: Sat, 8 Apr 2017 02:55:33 +0000 (+0000) Subject: ppc: dc top and left predictor 16x16 X-Git-Tag: v1.7.0~545 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=ad9dea1f6d1cea9c704c4f936742288ac5c0c2c6;p=libvpx ppc: dc top and left predictor 16x16 13x faster. Change-Id: I1771ac39fda599153f933cb3f0506c9f97a6cbe6 --- diff --git a/test/test_intra_pred_speed.cc b/test/test_intra_pred_speed.cc index 4ba018227..533ca7198 100644 --- a/test/test_intra_pred_speed.cc +++ b/test/test_intra_pred_speed.cc @@ -316,10 +316,10 @@ INTRA_PRED_TEST(MSA, TestIntraPred32, vpx_dc_predictor_32x32_msa, INTRA_PRED_TEST(VSX, TestIntraPred8, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, vpx_tm_predictor_8x8_vsx) -INTRA_PRED_TEST(VSX, TestIntraPred16, NULL, NULL, NULL, - vpx_dc_128_predictor_16x16_vsx, vpx_v_predictor_16x16_vsx, - vpx_h_predictor_16x16_vsx, NULL, NULL, NULL, NULL, NULL, NULL, - vpx_tm_predictor_16x16_vsx) +INTRA_PRED_TEST(VSX, TestIntraPred16, NULL, vpx_dc_left_predictor_16x16_vsx, + vpx_dc_top_predictor_16x16_vsx, vpx_dc_128_predictor_16x16_vsx, + vpx_v_predictor_16x16_vsx, vpx_h_predictor_16x16_vsx, NULL, + NULL, NULL, NULL, NULL, NULL, vpx_tm_predictor_16x16_vsx) INTRA_PRED_TEST(VSX, TestIntraPred32, NULL, NULL, NULL, vpx_dc_128_predictor_32x32_vsx, vpx_v_predictor_32x32_vsx, diff --git a/vpx_dsp/ppc/intrapred_vsx.c b/vpx_dsp/ppc/intrapred_vsx.c index 9350170f3..71c35e3e3 100644 --- a/vpx_dsp/ppc/intrapred_vsx.c +++ b/vpx_dsp/ppc/intrapred_vsx.c @@ -398,3 +398,28 @@ void vpx_dc_128_predictor_32x32_vsx(uint8_t *dst, ptrdiff_t stride, dc_fill_predictor_32x32(dst, stride, v128); } + +static uint8x16_t avg16(const uint8_t *values) { + const int32x4_t sum4s = + (int32x4_t)vec_sum4s(vec_vsx_ld(0, values), vec_splat_u32(0)); + const uint32x4_t sum = (uint32x4_t)vec_sums(sum4s, vec_splat_s32(8)); + const uint32x4_t avg = (uint32x4_t)vec_sr(sum, vec_splat_u32(4)); + + return vec_splat(vec_pack(vec_pack(avg, vec_splat_u32(0)), vec_splat_u16(0)), + 3); +} + +void vpx_dc_left_predictor_16x16_vsx(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, + const uint8_t *left) { + (void)above; + + dc_fill_predictor_16x16(dst, stride, avg16(left)); +} + +void vpx_dc_top_predictor_16x16_vsx(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + (void)left; + + dc_fill_predictor_16x16(dst, stride, avg16(above)); +} diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl index f347290c3..c6e8ae577 100644 --- a/vpx_dsp/vpx_dsp_rtcd_defs.pl +++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl @@ -139,10 +139,10 @@ add_proto qw/void vpx_dc_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, co specialize qw/vpx_dc_predictor_16x16 dspr2 neon msa sse2/; add_proto qw/void vpx_dc_top_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_dc_top_predictor_16x16 neon msa sse2/; +specialize qw/vpx_dc_top_predictor_16x16 neon msa sse2 vsx/; add_proto qw/void vpx_dc_left_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_dc_left_predictor_16x16 neon msa sse2/; +specialize qw/vpx_dc_left_predictor_16x16 neon msa sse2 vsx/; add_proto qw/void vpx_dc_128_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_dc_128_predictor_16x16 neon msa sse2 vsx/;