vec_u8_t lv = vec_ld( 7, dest ); \
vec_u8_t dstv = vec_perm( hv, lv, (vec_u8_t)perm_ldv ); \
vec_s16_t idct_sh6 = vec_sra(idctv, sixv); \
- vec_u16_t dst16 = vec_mergeh(zero_u8v, dstv); \
+ vec_u16_t dst16 = (vec_u16_t)vec_mergeh(zero_u8v, dstv); \
vec_s16_t idstsum = vec_adds(idct_sh6, (vec_s16_t)dst16); \
vec_u8_t idstsum8 = vec_packsu(zero_s16v, idstsum); \
/* unaligned store */ \
void x264_add8x8_idct8_altivec( uint8_t *dst, int16_t dct[8][8] )
{
- vec_u16_t onev = vec_splat_s16(1);
- vec_u16_t twov = vec_splat_s16(2);
+ vec_u16_t onev = vec_splat_u16(1);
+ vec_u16_t twov = vec_splat_u16(2);
dct[0][0] += 32; // rounding for the >>6 at the end
vec_u8_t perm_ldv = vec_lvsl(0, dst);
vec_u8_t perm_stv = vec_lvsr(8, dst);
- vec_u16_t sixv = vec_splat_s16(6);
+ vec_u16_t sixv = vec_splat_u16(6);
const vec_u8_t sel = (vec_u8_t) CV(0,0,0,0,0,0,0,0,-1,-1,-1,-1,-1,-1,-1,-1);
LOAD_ZERO;
#define PREP_DIFF_8BYTEALIGNED \
LOAD_ZERO; \
vec_s16_t pix1v, pix2v; \
+vec_u8_t pix1v8, pix2v8; \
vec_u8_t permPix1, permPix2; \
permPix1 = vec_lvsl(0, pix1); \
permPix2 = vec_lvsl(0, pix2); \
#define VEC_DIFF_H_8BYTE_ALIGNED(p1,i1,p2,i2,n,d) \
-pix1v = vec_perm(vec_ld(0,p1), zero_u8v, permPix1); \
-pix2v = vec_perm(vec_ld(0, p2), zero_u8v, permPix2); \
-pix1v = vec_u8_to_s16( pix1v ); \
-pix2v = vec_u8_to_s16( pix2v ); \
-d = vec_sub( pix1v, pix2v); \
-p1 += i1; \
+pix1v8 = vec_perm(vec_ld(0,p1), zero_u8v, permPix1); \
+pix2v8 = vec_perm(vec_ld(0, p2), zero_u8v, permPix2); \
+pix1v = vec_u8_to_s16( pix1v8 ); \
+pix2v = vec_u8_to_s16( pix2v8 ); \
+d = vec_sub( pix1v, pix2v); \
+p1 += i1; \
p2 += i2;