[VSX] Replace vec_pack and vec_perm with single vec_perm

author Luc Trudeau <luc@trud.ca>

Tue, 26 Jun 2018 21:06:52 +0000 (17:06 -0400)

committer Luc Trudeau <luc@trud.ca>

Wed, 27 Jun 2018 14:32:14 +0000 (14:32 +0000)
author Luc Trudeau <luc@trud.ca>
Tue, 26 Jun 2018 21:06:52 +0000 (17:06 -0400)
committer Luc Trudeau <luc@trud.ca>
Wed, 27 Jun 2018 14:32:14 +0000 (14:32 +0000)
diff --git a/vp9/encoder/ppc/vp9_quantize_vsx.c b/vp9/encoder/ppc/vp9_quantize_vsx.c

index f6fdb55a64893c2f7e7755cc4f86fe9a3ddae30d..3720b0876d8b1f57530fc00e531e828b5d188458 100644 (file)
--- a/vp9/encoder/ppc/vp9_quantize_vsx.c
+++ b/vp9/encoder/ppc/vp9_quantize_vsx.c
@@ -154,7 +154,6 @@ static INLINE int32x4_t vec_is_neg(int32x4_t a) {
  // vec_mladd results in overflow.
  static INLINE int16x8_t dequantize_coeff_32(int16x8_t qcoeff,
                                              int16x8_t dequant) {
-  int16x8_t dqcoeff;
    int32x4_t dqcoeffe = vec_mule(qcoeff, dequant);
    int32x4_t dqcoeffo = vec_mulo(qcoeff, dequant);
    // Add 1 if negative to round towards zero because the C uses division.
@@ -162,8 +161,7 @@ static INLINE int16x8_t dequantize_coeff_32(int16x8_t qcoeff,
    dqcoeffo = vec_add(dqcoeffo, vec_is_neg(dqcoeffo));
    dqcoeffe = vec_sra(dqcoeffe, vec_ones_u32);
    dqcoeffo = vec_sra(dqcoeffo, vec_ones_u32);
-  dqcoeff = vec_pack(dqcoeffe, dqcoeffo);
-  return vec_perm(dqcoeff, dqcoeff, vec_perm_merge);
+  return (int16x8_t)vec_perm(dqcoeffe, dqcoeffo, vec_perm_odd_even_pack);
  }
  
  void vp9_quantize_fp_32x32_vsx(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
diff --git a/vpx_dsp/ppc/fdct32x32_vsx.c b/vpx_dsp/ppc/fdct32x32_vsx.c

index 65ff00006e55b4609e781c8e59b9ad8417dd27d7..0156683c2be1e8237e8be85e32b51a9d0bcc0279 100644 (file)
--- a/vpx_dsp/ppc/fdct32x32_vsx.c
+++ b/vpx_dsp/ppc/fdct32x32_vsx.c
@@ -15,10 +15,6 @@
  #include "vpx_dsp/ppc/txfm_common_vsx.h"
  #include "vpx_dsp/ppc/transpose_vsx.h"
  
-static const uint8x16_t vec_perm_pack = { 0x00, 0x01, 0x10, 0x11, 0x04, 0x05,
-                                          0x14, 0x15, 0x08, 0x09, 0x18, 0x19,
-                                          0x0C, 0x0D, 0x1C, 0x1D };
-
  // Returns ((a +/- b) * cospi16 + (2 << 13)) >> 14.
  static INLINE void single_butterfly(int16x8_t a, int16x8_t b, int16x8_t *add,
                                      int16x8_t *sub) {
@@ -47,8 +43,8 @@ static INLINE void single_butterfly(int16x8_t a, int16x8_t b, int16x8_t *add,
    const int32x4_t sdiff_e = vec_sra(rdiff_e, vec_dct_const_bits);
  
    // There's no pack operation for even and odd, so we need to permute.
-  *add = (int16x8_t)vec_perm(ssum_e, ssum_o, vec_perm_pack);
-  *sub = (int16x8_t)vec_perm(sdiff_e, sdiff_o, vec_perm_pack);
+  *add = (int16x8_t)vec_perm(ssum_e, ssum_o, vec_perm_odd_even_pack);
+  *sub = (int16x8_t)vec_perm(sdiff_e, sdiff_o, vec_perm_odd_even_pack);
  }
  
  // Returns (a * c1 +/- b * c2 + (2 << 13)) >> 14
@@ -82,8 +78,8 @@ static INLINE void double_butterfly(int16x8_t a, int16x8_t c1, int16x8_t b,
    const int32x4_t sdiff_e = vec_sra(rdiff_e, vec_dct_const_bits);
  
    // There's no pack operation for even and odd, so we need to permute.
-  *add = (int16x8_t)vec_perm(ssum_e, ssum_o, vec_perm_pack);
-  *sub = (int16x8_t)vec_perm(sdiff_e, sdiff_o, vec_perm_pack);
+  *add = (int16x8_t)vec_perm(ssum_e, ssum_o, vec_perm_odd_even_pack);
+  *sub = (int16x8_t)vec_perm(sdiff_e, sdiff_o, vec_perm_odd_even_pack);
  }
  
  // While other architecture combine the load and the stage 1 operations, Power9
diff --git a/vpx_dsp/ppc/quantize_vsx.c b/vpx_dsp/ppc/quantize_vsx.c

index 3a9092f64a00f200234ea04cdfe4b1ba02b810c3..d85e63bd148663d48cb347f72880c9d6f88e4c8b 100644 (file)
--- a/vpx_dsp/ppc/quantize_vsx.c
+++ b/vpx_dsp/ppc/quantize_vsx.c
@@ -68,7 +68,6 @@ static INLINE int16x8_t quantize_coeff_32(int16x8_t coeff, int16x8_t coeff_abs,
  // vec_mladd results in overflow.
  static INLINE int16x8_t dequantize_coeff_32(int16x8_t qcoeff,
                                              int16x8_t dequant) {
-  int16x8_t dqcoeff;
    int32x4_t dqcoeffe = vec_mule(qcoeff, dequant);
    int32x4_t dqcoeffo = vec_mulo(qcoeff, dequant);
    // Add 1 if negative to round towards zero because the C uses division.
@@ -76,8 +75,7 @@ static INLINE int16x8_t dequantize_coeff_32(int16x8_t qcoeff,
    dqcoeffo = vec_add(dqcoeffo, vec_is_neg(dqcoeffo));
    dqcoeffe = vec_sra(dqcoeffe, vec_ones_u32);
    dqcoeffo = vec_sra(dqcoeffo, vec_ones_u32);
-  dqcoeff = vec_pack(dqcoeffe, dqcoeffo);
-  return vec_perm(dqcoeff, dqcoeff, vec_perm_merge);
+  return (int16x8_t)vec_perm(dqcoeffe, dqcoeffo, vec_perm_odd_even_pack);
  }
  
  static INLINE int16x8_t nonzero_scanindex(int16x8_t qcoeff, bool16x8_t mask,
diff --git a/vpx_dsp/ppc/types_vsx.h b/vpx_dsp/ppc/types_vsx.h

index 7a9f9c0923b6d77962a331597a3c88bab3c201da..56ecdce59c7cf3da97816bd357a77dcdb36cf148 100644 (file)
--- a/vpx_dsp/ppc/types_vsx.h
+++ b/vpx_dsp/ppc/types_vsx.h
@@ -96,8 +96,9 @@ static const uint8x16_t vec_perm16 = { 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
                                         0x08, 0x09, 0x0A, 0x0B, 0x0E, 0x0D,
                                         0x0E, 0x0F, 0x00, 0x01 };
  
-static const uint8x16_t vec_perm_merge = { 0x00, 0x01, 0x08, 0x09, 0x02, 0x03,
-                                           0x0A, 0x0B, 0x04, 0x05, 0x0C, 0x0D,
-                                           0x06, 0x07, 0x0E, 0x0F };
+static const uint8x16_t vec_perm_odd_even_pack = { 0x00, 0x01, 0x10, 0x11,
+                                                   0x04, 0x05, 0x14, 0x15,
+                                                   0x08, 0x09, 0x18, 0x19,
+                                                   0x0C, 0x0D, 0x1C, 0x1D };
  
  #endif  // VPX_DSP_PPC_TYPES_VSX_H_
author	Luc Trudeau <luc@trud.ca>
	Tue, 26 Jun 2018 21:06:52 +0000 (17:06 -0400)
committer	Luc Trudeau <luc@trud.ca>
	Wed, 27 Jun 2018 14:32:14 +0000 (14:32 +0000)
vp9/encoder/ppc/vp9_quantize_vsx.c		patch \| blob \| history
vpx_dsp/ppc/fdct32x32_vsx.c		patch \| blob \| history
vpx_dsp/ppc/quantize_vsx.c		patch \| blob \| history
vpx_dsp/ppc/types_vsx.h		patch \| blob \| history