permHighv = (vec_u8_t) CV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17);
VEC_DCT( dct0v, dct1v, dct2v, dct3v, tmp0v, tmp1v, tmp2v, tmp3v );
- vec_st(vec_perm(tmp0v, tmp1v, permHighv), 0, dct);
- vec_st(vec_perm(tmp2v, tmp3v, permHighv), 16, dct);
+ vec_st(vec_perm(tmp0v, tmp1v, permHighv), 0, (int16_t*)dct);
+ vec_st(vec_perm(tmp2v, tmp3v, permHighv), 16, (int16_t*)dct);
}
void x264_sub8x8_dct_altivec( int16_t dct[4][4][4],
VEC_DCT( dct0v, dct1v, dct2v, dct3v, tmp0v, tmp1v, tmp2v, tmp3v );
VEC_DCT( dct4v, dct5v, dct6v, dct7v, tmp4v, tmp5v, tmp6v, tmp7v );
- vec_st(vec_perm(tmp0v, tmp1v, permHighv), 0, dct);
- vec_st(vec_perm(tmp2v, tmp3v, permHighv), 16, dct);
- vec_st(vec_perm(tmp4v, tmp5v, permHighv), 32, dct);
- vec_st(vec_perm(tmp6v, tmp7v, permHighv), 48, dct);
- vec_st(vec_perm(tmp0v, tmp1v, permLowv), 64, dct);
- vec_st(vec_perm(tmp2v, tmp3v, permLowv), 80, dct);
- vec_st(vec_perm(tmp4v, tmp5v, permLowv), 96, dct);
- vec_st(vec_perm(tmp6v, tmp7v, permLowv), 112, dct);
+ vec_st(vec_perm(tmp0v, tmp1v, permHighv), 0, (int16_t*)dct);
+ vec_st(vec_perm(tmp2v, tmp3v, permHighv), 16, (int16_t*)dct);
+ vec_st(vec_perm(tmp4v, tmp5v, permHighv), 32, (int16_t*)dct);
+ vec_st(vec_perm(tmp6v, tmp7v, permHighv), 48, (int16_t*)dct);
+ vec_st(vec_perm(tmp0v, tmp1v, permLowv), 64, (int16_t*)dct);
+ vec_st(vec_perm(tmp2v, tmp3v, permLowv), 80, (int16_t*)dct);
+ vec_st(vec_perm(tmp4v, tmp5v, permLowv), 96, (int16_t*)dct);
+ vec_st(vec_perm(tmp6v, tmp7v, permLowv), 112, (int16_t*)dct);
}
void x264_sub16x16_dct_altivec( int16_t dct[16][4][4],
void x264_add8x8_idct8_altivec( uint8_t *dst, int16_t dct[8][8] )
{
- vec_s16_t onev = vec_splat_s16(1);
- vec_s16_t twov = vec_splat_s16(2);
+ vec_u16_t onev = vec_splat_s16(1);
+ vec_u16_t twov = vec_splat_s16(2);
dct[0][0] += 32; // rounding for the >>6 at the end
vec_u8_t perm_ldv = vec_lvsl(0, dst);
vec_u8_t perm_stv = vec_lvsr(8, dst);
- vec_s16_t sixv = vec_splat_s16(6);
+ vec_u16_t sixv = vec_splat_s16(6);
const vec_u8_t sel = (vec_u8_t) CV(0,0,0,0,0,0,0,0,-1,-1,-1,-1,-1,-1,-1,-1);
LOAD_ZERO;
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
*****************************************************************************/
-#ifdef HAVE_ALTIVEC_H
-#include <altivec.h>
-#endif
-
#include "common/common.h"
#include "ppccommon.h"
#include "quant.h"
temp1v = vec_xor(temp1v, mskA); \
temp2v = vec_xor(temp2v, mskB); \
temp1v = vec_adds(temp1v, vec_and(mskA, one)); \
-vec_st(temp1v, (dct0), dct); \
+vec_st(temp1v, (dct0), (int16_t*)dct); \
temp2v = vec_adds(temp2v, vec_and(mskB, one)); \
-vec_st(temp2v, (dct1), dct);
+vec_st(temp2v, (dct1), (int16_t*)dct);
void x264_quant_4x4_altivec( int16_t dct[4][4], int quant_mf[4][4], int const i_qbits, int const f ) {
vector bool short mskA;
- vec_s32_t i_qbitsv;
+ vec_u32_t i_qbitsv;
vec_u16_t coefvA;
vec_u32_t multEvenvA, multOddvA;
- vec_u32_t mfvA;
+ vec_u16_t mfvA;
vec_s16_t zerov, one;
- vec_s32_t fV;
+ vec_u32_t fV;
vector bool short mskB;
vec_u16_t coefvB;
vec_u32_t multEvenvB, multOddvB;
- vec_u32_t mfvB;
+ vec_u16_t mfvB;
vec_s16_t temp1v, temp2v;
- vect_sint_u qbits_u;
+ vect_int_u qbits_u;
qbits_u.s[0]=i_qbits;
i_qbitsv = vec_splat(qbits_u.v, 0);
- vect_sint_u f_u;
+ vect_int_u f_u;
f_u.s[0]=f;
fV = vec_splat(f_u.v, 0);
temp1v = vec_xor(temp1v, mskA); \
temp2v = vec_xor(temp2v, mskB); \
temp1v = vec_add(temp1v, vec_and(mskA, one)); \
-vec_st(temp1v, (dct0), dct); \
+vec_st(temp1v, (dct0), (int16_t*)dct); \
temp2v = vec_add(temp2v, vec_and(mskB, one)); \
-vec_st(temp2v, (dct1), dct);
+vec_st(temp2v, (dct1), (int16_t*)dct);
void x264_quant_4x4_dc_altivec( int16_t dct[4][4], int i_quant_mf, int const i_qbits, int const f ) {
vector bool short mskA;
- vec_s32_t i_qbitsv;
+ vec_u32_t i_qbitsv;
vec_u16_t coefvA;
vec_u32_t multEvenvA, multOddvA;
vec_s16_t zerov, one;
- vec_s32_t fV;
+ vec_u32_t fV;
vector bool short mskB;
vec_u16_t coefvB;
vec_s16_t temp1v, temp2v;
- vec_u32_t mfv;
- vect_int_u mf_u;
+ vec_u16_t mfv;
+ vect_ushort_u mf_u;
mf_u.s[0]=i_quant_mf;
mfv = vec_splat( mf_u.v, 0 );
- mfv = vec_packs( mfv, mfv);
- vect_sint_u qbits_u;
+ vect_int_u qbits_u;
qbits_u.s[0]=i_qbits;
i_qbitsv = vec_splat(qbits_u.v, 0);
- vect_sint_u f_u;
+ vect_int_u f_u;
f_u.s[0]=f;
fV = vec_splat(f_u.v, 0);
void x264_quant_8x8_altivec( int16_t dct[8][8], int quant_mf[8][8], int const i_qbits, int const f ) {
vector bool short mskA;
- vec_s32_t i_qbitsv;
+ vec_u32_t i_qbitsv;
vec_u16_t coefvA;
- vec_s32_t multEvenvA, multOddvA, mfvA;
+ vec_u32_t multEvenvA, multOddvA;
+ vec_u16_t mfvA;
vec_s16_t zerov, one;
- vec_s32_t fV;
+ vec_u32_t fV;
vector bool short mskB;
vec_u16_t coefvB;
- vec_u32_t multEvenvB, multOddvB, mfvB;
+ vec_u32_t multEvenvB, multOddvB;
+ vec_u16_t mfvB;
vec_s16_t temp1v, temp2v;
qbits_u.s[0]=i_qbits;
i_qbitsv = vec_splat(qbits_u.v, 0);
- vect_sint_u f_u;
+ vect_int_u f_u;
f_u.s[0]=f;
fV = vec_splat(f_u.v, 0);