From: Manuel Rommel Date: Thu, 20 Mar 2008 19:21:16 +0000 (-0600) Subject: update altivec zigzags X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=72869f7648732a0d39398068e6288730ad009135;p=libx264 update altivec zigzags --- diff --git a/common/ppc/dct.c b/common/ppc/dct.c index 2be98937..024a157a 100644 --- a/common/ppc/dct.c +++ b/common/ppc/dct.c @@ -456,11 +456,10 @@ void x264_add16x16_idct8_altivec( uint8_t *dst, int16_t dct[4][8][8] ) x264_add8x8_idct8_altivec( &dst[8*FDEC_STRIDE+8], dct[3] ); } -void x264_zigzag_scan_4x4_frame_altivec( int level[16], int16_t dct[4][4] ) +void x264_zigzag_scan_4x4_frame_altivec( int16_t level[16], int16_t dct[4][4] ) { vec_s16_t dct0v, dct1v; vec_s16_t tmp0v, tmp1v; - vec_s32_t level0v, level1v, level2v, level3v; dct0v = vec_ld(0x00, (int16_t*)dct); dct1v = vec_ld(0x10, (int16_t*)dct); @@ -471,22 +470,14 @@ void x264_zigzag_scan_4x4_frame_altivec( int level[16], int16_t dct[4][4] ) tmp0v = vec_perm( dct0v, dct1v, sel0 ); tmp1v = vec_perm( dct0v, dct1v, sel1 ); - level0v = vec_unpackh( tmp0v ); - level1v = vec_unpackl( tmp0v ); - level2v = vec_unpackh( tmp1v ); - level3v = vec_unpackl( tmp1v ); - - vec_st( level0v, 0x00, level ); - vec_st( level1v, 0x10, level ); - vec_st( level2v, 0x20, level ); - vec_st( level3v, 0x30, level ); + vec_st( tmp0v, 0x00, level ); + vec_st( tmp1v, 0x10, level ); } -void x264_zigzag_scan_4x4_field_altivec( int level[16], int16_t dct[4][4] ) +void x264_zigzag_scan_4x4_field_altivec( int16_t level[16], int16_t dct[4][4] ) { vec_s16_t dct0v, dct1v; vec_s16_t tmp0v, tmp1v; - vec_s32_t level0v, level1v, level2v, level3v; dct0v = vec_ld(0x00, (int16_t*)dct); dct1v = vec_ld(0x10, (int16_t*)dct); @@ -496,22 +487,14 @@ void x264_zigzag_scan_4x4_field_altivec( int level[16], int16_t dct[4][4] ) tmp0v = vec_perm( dct0v, dct1v, sel0 ); tmp1v = dct1v; - level0v = vec_unpackh( tmp0v ); - level1v = vec_unpackl( tmp0v ); - level2v = vec_unpackh( tmp1v ); - level3v = vec_unpackl( tmp1v ); - - vec_st( level0v, 0x00, level ); - vec_st( level1v, 0x10, level ); - vec_st( level2v, 0x20, level ); - vec_st( level3v, 0x30, level ); + vec_st( tmp0v, 0x00, level ); + vec_st( tmp1v, 0x10, level ); } -void x264_zigzag_scan_4x4ac_frame_altivec( int level[15], int16_t dct[4][4] ) +void x264_zigzag_scan_4x4ac_frame_altivec( int16_t level[15], int16_t dct[4][4] ) { vec_s16_t dct0v, dct1v; vec_s16_t tmp0v, tmp1v; - vec_s32_t level0v, level1v, level2v, level3v; dct0v = vec_ld(0x00, (int16_t*)dct); dct1v = vec_ld(0x10, (int16_t*)dct); @@ -522,22 +505,14 @@ void x264_zigzag_scan_4x4ac_frame_altivec( int level[15], int16_t dct[4][4] ) tmp0v = vec_perm( dct0v, dct1v, sel0 ); tmp1v = vec_perm( dct0v, dct1v, sel1 ); - level0v = vec_unpackh( tmp0v ); - level1v = vec_unpackl( tmp0v ); - level2v = vec_unpackh( tmp1v ); - level3v = vec_unpackl( tmp1v ); - - vec_st( level0v, 0x00, level ); - vec_st( level1v, 0x10, level ); - vec_st( level2v, 0x20, level ); - vec_st( level3v, 0x30, level ); // FIXME?: write level[15] + vec_st( tmp0v, 0x00, level ); + vec_st( tmp1v, 0x10, level ); } -void x264_zigzag_scan_4x4ac_field_altivec( int level[15], int16_t dct[4][4] ) +void x264_zigzag_scan_4x4ac_field_altivec( int16_t level[15], int16_t dct[4][4] ) { vec_s16_t dct0v, dct1v; vec_s16_t tmp0v, tmp1v; - vec_s32_t level0v, level1v, level2v, level3v; dct0v = vec_ld(0x00, (int16_t*)dct); dct1v = vec_ld(0x10, (int16_t*)dct); @@ -548,13 +523,6 @@ void x264_zigzag_scan_4x4ac_field_altivec( int level[15], int16_t dct[4][4] ) tmp0v = vec_perm( dct0v, dct1v, sel0 ); tmp1v = vec_perm( dct0v, dct1v, sel1 ); - level0v = vec_unpackh( tmp0v ); - level1v = vec_unpackl( tmp0v ); - level2v = vec_unpackh( tmp1v ); - level3v = vec_unpackl( tmp1v ); - - vec_st( level0v, 0x00, level ); - vec_st( level1v, 0x10, level ); - vec_st( level2v, 0x20, level ); - vec_st( level3v, 0x30, level ); // FIXME?: write level[15] + vec_st( tmp0v, 0x00, level ); + vec_st( tmp1v, 0x10, level ); } diff --git a/common/ppc/dct.h b/common/ppc/dct.h index 7bcde437..fa3023b1 100644 --- a/common/ppc/dct.h +++ b/common/ppc/dct.h @@ -44,10 +44,10 @@ void x264_sub16x16_dct8_altivec( int16_t dct[4][8][8], void x264_add8x8_idct8_altivec( uint8_t *dst, int16_t dct[8][8] ); void x264_add16x16_idct8_altivec( uint8_t *dst, int16_t dct[4][8][8] ); -void x264_zigzag_scan_4x4_frame_altivec( int level[16], int16_t dct[4][4] ); -void x264_zigzag_scan_4x4ac_frame_altivec( int level[15], int16_t dct[4][4] ); +void x264_zigzag_scan_4x4_frame_altivec( int16_t level[16], int16_t dct[4][4] ); +void x264_zigzag_scan_4x4ac_frame_altivec( int16_t level[15], int16_t dct[4][4] ); -void x264_zigzag_scan_4x4_field_altivec( int level[16], int16_t dct[4][4] ); -void x264_zigzag_scan_4x4ac_field_altivec( int level[15], int16_t dct[4][4] ); +void x264_zigzag_scan_4x4_field_altivec( int16_t level[16], int16_t dct[4][4] ); +void x264_zigzag_scan_4x4ac_field_altivec( int16_t level[15], int16_t dct[4][4] ); #endif