ppc: horizontal predictor 32x32

author Luca Barbato <lu_zero@gentoo.org>

Fri, 7 Apr 2017 14:49:00 +0000 (14:49 +0000)

committer Luca Barbato <lu_zero@gentoo.org>

Tue, 18 Apr 2017 23:48:09 +0000 (01:48 +0200)
author Luca Barbato <lu_zero@gentoo.org>
Fri, 7 Apr 2017 14:49:00 +0000 (14:49 +0000)
committer Luca Barbato <lu_zero@gentoo.org>
Tue, 18 Apr 2017 23:48:09 +0000 (01:48 +0200)
diff --git a/test/test_intra_pred_speed.cc b/test/test_intra_pred_speed.cc

index 75abc169b430e096702b0cb16d994f66f224cb82..7dec13ecc2bb4b22694f6d8af197bb676de41d80 100644 (file)
--- a/test/test_intra_pred_speed.cc
+++ b/test/test_intra_pred_speed.cc
@@ -318,8 +318,8 @@ INTRA_PRED_TEST(VSX, TestIntraPred16, NULL, NULL, NULL, NULL,
                  NULL, NULL, NULL, NULL, NULL, NULL)
  
  INTRA_PRED_TEST(VSX, TestIntraPred32, NULL, NULL, NULL, NULL,
-                vpx_v_predictor_32x32_vsx, NULL, NULL, NULL, NULL, NULL, NULL,
-                NULL, NULL)
+                vpx_v_predictor_32x32_vsx, vpx_h_predictor_32x32_vsx, NULL,
+                NULL, NULL, NULL, NULL, NULL, NULL)
  #endif  // HAVE_VSX
  
  // -----------------------------------------------------------------------------
diff --git a/vpx_dsp/ppc/intrapred_vsx.c b/vpx_dsp/ppc/intrapred_vsx.c

index d803d3876122041d01f6fcaedcbdbdee7a64741c..cce1ff96cf76bb3a4e827e257278b5b2379d36ec 100644 (file)
--- a/vpx_dsp/ppc/intrapred_vsx.c
+++ b/vpx_dsp/ppc/intrapred_vsx.c
@@ -92,3 +92,90 @@ void vpx_h_predictor_16x16_vsx(uint8_t *dst, ptrdiff_t stride,
    dst += stride;
    vec_vsx_st(v15, 0, dst);
  }
+
+#define H_PREDICTOR_32(v) \
+  vec_vsx_st(v, 0, dst);  \
+  vec_vsx_st(v, 16, dst); \
+  dst += stride
+
+void vpx_h_predictor_32x32_vsx(uint8_t *dst, ptrdiff_t stride,
+                               const uint8_t *above, const uint8_t *left) {
+  const uint8x16_t d0 = vec_vsx_ld(0, left);
+  const uint8x16_t d1 = vec_vsx_ld(16, left);
+
+  const uint8x16_t v0_0 = vec_splat(d0, 0);
+  const uint8x16_t v1_0 = vec_splat(d0, 1);
+  const uint8x16_t v2_0 = vec_splat(d0, 2);
+  const uint8x16_t v3_0 = vec_splat(d0, 3);
+  const uint8x16_t v4_0 = vec_splat(d0, 4);
+  const uint8x16_t v5_0 = vec_splat(d0, 5);
+  const uint8x16_t v6_0 = vec_splat(d0, 6);
+  const uint8x16_t v7_0 = vec_splat(d0, 7);
+  const uint8x16_t v8_0 = vec_splat(d0, 8);
+  const uint8x16_t v9_0 = vec_splat(d0, 9);
+  const uint8x16_t v10_0 = vec_splat(d0, 10);
+  const uint8x16_t v11_0 = vec_splat(d0, 11);
+  const uint8x16_t v12_0 = vec_splat(d0, 12);
+  const uint8x16_t v13_0 = vec_splat(d0, 13);
+  const uint8x16_t v14_0 = vec_splat(d0, 14);
+  const uint8x16_t v15_0 = vec_splat(d0, 15);
+
+  const uint8x16_t v0_1 = vec_splat(d1, 0);
+  const uint8x16_t v1_1 = vec_splat(d1, 1);
+  const uint8x16_t v2_1 = vec_splat(d1, 2);
+  const uint8x16_t v3_1 = vec_splat(d1, 3);
+  const uint8x16_t v4_1 = vec_splat(d1, 4);
+  const uint8x16_t v5_1 = vec_splat(d1, 5);
+  const uint8x16_t v6_1 = vec_splat(d1, 6);
+  const uint8x16_t v7_1 = vec_splat(d1, 7);
+  const uint8x16_t v8_1 = vec_splat(d1, 8);
+  const uint8x16_t v9_1 = vec_splat(d1, 9);
+  const uint8x16_t v10_1 = vec_splat(d1, 10);
+  const uint8x16_t v11_1 = vec_splat(d1, 11);
+  const uint8x16_t v12_1 = vec_splat(d1, 12);
+  const uint8x16_t v13_1 = vec_splat(d1, 13);
+  const uint8x16_t v14_1 = vec_splat(d1, 14);
+  const uint8x16_t v15_1 = vec_splat(d1, 15);
+
+  (void)above;
+
+  H_PREDICTOR_32(v0_0);
+  H_PREDICTOR_32(v1_0);
+  H_PREDICTOR_32(v2_0);
+  H_PREDICTOR_32(v3_0);
+
+  H_PREDICTOR_32(v4_0);
+  H_PREDICTOR_32(v5_0);
+  H_PREDICTOR_32(v6_0);
+  H_PREDICTOR_32(v7_0);
+
+  H_PREDICTOR_32(v8_0);
+  H_PREDICTOR_32(v9_0);
+  H_PREDICTOR_32(v10_0);
+  H_PREDICTOR_32(v11_0);
+
+  H_PREDICTOR_32(v12_0);
+  H_PREDICTOR_32(v13_0);
+  H_PREDICTOR_32(v14_0);
+  H_PREDICTOR_32(v15_0);
+
+  H_PREDICTOR_32(v0_1);
+  H_PREDICTOR_32(v1_1);
+  H_PREDICTOR_32(v2_1);
+  H_PREDICTOR_32(v3_1);
+
+  H_PREDICTOR_32(v4_1);
+  H_PREDICTOR_32(v5_1);
+  H_PREDICTOR_32(v6_1);
+  H_PREDICTOR_32(v7_1);
+
+  H_PREDICTOR_32(v8_1);
+  H_PREDICTOR_32(v9_1);
+  H_PREDICTOR_32(v10_1);
+  H_PREDICTOR_32(v11_1);
+
+  H_PREDICTOR_32(v12_1);
+  H_PREDICTOR_32(v13_1);
+  H_PREDICTOR_32(v14_1);
+  H_PREDICTOR_32(v15_1);
+}
diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl

index adbb72974c2484780b00dfffd28e3872cc6cc88a..a7bc5bf78d90654862123ae130f7fa57c314091d 100644 (file)
--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -157,7 +157,7 @@ add_proto qw/void vpx_d63_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, c
  specialize qw/vpx_d63_predictor_32x32 ssse3/;
  
  add_proto qw/void vpx_h_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_h_predictor_32x32 neon msa sse2/;
+specialize qw/vpx_h_predictor_32x32 neon msa sse2 vsx/;
  
  add_proto qw/void vpx_d117_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
author	Luca Barbato <lu_zero@gentoo.org>
	Fri, 7 Apr 2017 14:49:00 +0000 (14:49 +0000)
committer	Luca Barbato <lu_zero@gentoo.org>
	Tue, 18 Apr 2017 23:48:09 +0000 (01:48 +0200)
test/test_intra_pred_speed.cc		patch \| blob \| history
vpx_dsp/ppc/intrapred_vsx.c		patch \| blob \| history
vpx_dsp/vpx_dsp_rtcd_defs.pl		patch \| blob \| history