]> granicus.if.org Git - libx264/commitdiff
ppc: AltiVec add8x8_idct_dc
authorAlexandra Hájková <alexandra@khirnov.net>
Mon, 14 Nov 2016 14:06:05 +0000 (15:06 +0100)
committerHenrik Gramner <henrik@gramner.com>
Thu, 1 Dec 2016 15:10:37 +0000 (16:10 +0100)
common/dct.c
common/ppc/dct.c
common/ppc/dct.h

index 7dfeea27bb2ede570299c5166d5548bbc65aca11..d59c2db2ef20b5ba50dc3f5c932bcf52a606e64d 100644 (file)
@@ -720,6 +720,8 @@ void x264_dct_init( int cpu, x264_dct_function_t *dctf )
         dctf->sub8x8_dct    = x264_sub8x8_dct_altivec;
         dctf->sub16x16_dct  = x264_sub16x16_dct_altivec;
 
+        dctf->add8x8_idct_dc = x264_add8x8_idct_dc_altivec;
+
         dctf->add4x4_idct   = x264_add4x4_idct_altivec;
         dctf->add8x8_idct   = x264_add8x8_idct_altivec;
         dctf->add16x16_idct = x264_add16x16_idct_altivec;
index ddf62ec50bb9b8bc9cbde8bc8acfc3294e0c73b2..d0fdfed6927215f782074fd8785b2227f7fb3189 100644 (file)
@@ -229,6 +229,35 @@ void x264_sub16x16_dct8_altivec( int16_t dct[4][64], uint8_t *pix1, uint8_t *pix
  * IDCT transform:
  ****************************************************************************/
 
+#define ALTIVEC_STORE8_DC_SUM_CLIP(dest, dcv)                         \
+{                                                                     \
+    /* unaligned load */                                              \
+    vec_u8_t dstv   = vec_vsx_ld( 0, dest );                          \
+    vec_s16_t dcvsum = vec_adds( dcv, vec_u8_to_s16_h( dstv ) );      \
+    vec_u8_t dcvsum8 = vec_packsu( dcvsum, vec_u8_to_s16_l( dstv ) ); \
+    /* unaligned store */                                             \
+    vec_vsx_st( dcvsum8, 0, dest );                                   \
+}
+
+static void idct8_dc_altivec( uint8_t *dst, int16_t dc1, int16_t dc2 )
+{
+    dc1 = (dc1 + 32) >> 6;
+    dc2 = (dc2 + 32) >> 6;
+    vec_s16_t dcv = { dc1, dc1, dc1, dc1, dc2, dc2, dc2, dc2 };
+
+    LOAD_ZERO;
+    ALTIVEC_STORE8_DC_SUM_CLIP( &dst[0*FDEC_STRIDE], dcv );
+    ALTIVEC_STORE8_DC_SUM_CLIP( &dst[1*FDEC_STRIDE], dcv );
+    ALTIVEC_STORE8_DC_SUM_CLIP( &dst[2*FDEC_STRIDE], dcv );
+    ALTIVEC_STORE8_DC_SUM_CLIP( &dst[3*FDEC_STRIDE], dcv );
+}
+
+void x264_add8x8_idct_dc_altivec( uint8_t *p_dst, int16_t dct[4] )
+{
+    idct8_dc_altivec( &p_dst[0],               dct[0], dct[1] );
+    idct8_dc_altivec( &p_dst[4*FDEC_STRIDE+0], dct[2], dct[3] );
+}
+
 #define IDCT_1D_ALTIVEC(s0, s1, s2, s3,  d0, d1, d2, d3) \
 {                                                        \
     /*        a0  = SRC(0) + SRC(2); */                  \
index 332f3ccc28880ed8ec57530a049c687cad419cfc..4011b8f92c9e28fa900f79f9d7a35e654c7f0cd6 100644 (file)
@@ -31,6 +31,8 @@ void x264_sub4x4_dct_altivec( int16_t dct[16], uint8_t *pix1, uint8_t *pix2 );
 void x264_sub8x8_dct_altivec( int16_t dct[4][16], uint8_t *pix1, uint8_t *pix2 );
 void x264_sub16x16_dct_altivec( int16_t dct[16][16], uint8_t *pix1, uint8_t *pix2 );
 
+void x264_add8x8_idct_dc_altivec( uint8_t *p_dst, int16_t dct[4] );
+
 void x264_add4x4_idct_altivec( uint8_t *p_dst, int16_t dct[16] );
 void x264_add8x8_idct_altivec( uint8_t *p_dst, int16_t dct[4][16] );
 void x264_add16x16_idct_altivec( uint8_t *p_dst, int16_t dct[16][16] );