typedef struct
{
/* context */
- DECLARE_ALIGNED( uint8_t, state[460], 16 );
+ DECLARE_ALIGNED_16( uint8_t state[460] );
int f8_bits_encoded; // only if using x264_cabac_size_decision()
/* Current MB DCT coeffs */
struct
{
- DECLARE_ALIGNED( int16_t, luma16x16_dc[16], 16 );
- DECLARE_ALIGNED( int16_t, chroma_dc[2][4], 16 );
+ DECLARE_ALIGNED_16( int16_t luma16x16_dc[16] );
+ DECLARE_ALIGNED_16( int16_t chroma_dc[2][4] );
// FIXME share memory?
- DECLARE_ALIGNED( int16_t, luma8x8[4][64], 16 );
- DECLARE_ALIGNED( int16_t, luma4x4[16+8][16], 16 );
+ DECLARE_ALIGNED_16( int16_t luma8x8[4][64] );
+ DECLARE_ALIGNED_16( int16_t luma4x4[16+8][16] );
} dct;
/* MB table and cache for current frame/mb */
/* space for p_fenc and p_fdec */
#define FENC_STRIDE 16
#define FDEC_STRIDE 32
- DECLARE_ALIGNED( uint8_t, fenc_buf[24*FENC_STRIDE], 16 );
- DECLARE_ALIGNED( uint8_t, fdec_buf[27*FDEC_STRIDE], 16 );
+ DECLARE_ALIGNED_16( uint8_t fenc_buf[24*FENC_STRIDE] );
+ DECLARE_ALIGNED_16( uint8_t fdec_buf[27*FDEC_STRIDE] );
/* i4x4 and i8x8 backup data, for skipping the encode stage when possible */
- DECLARE_ALIGNED( uint8_t, i4x4_fdec_buf[16*16], 16 );
- DECLARE_ALIGNED( uint8_t, i8x8_fdec_buf[16*16], 16 );
- DECLARE_ALIGNED( int16_t, i8x8_dct_buf[3][64], 16 );
- DECLARE_ALIGNED( int16_t, i4x4_dct_buf[15][16], 16 );
+ DECLARE_ALIGNED_16( uint8_t i4x4_fdec_buf[16*16] );
+ DECLARE_ALIGNED_16( uint8_t i8x8_fdec_buf[16*16] );
+ DECLARE_ALIGNED_16( int16_t i8x8_dct_buf[3][64] );
+ DECLARE_ALIGNED_16( int16_t i4x4_dct_buf[15][16] );
/* pointer over mb of the frame to be compressed */
uint8_t *p_fenc[3];
uint8_t non_zero_count[X264_SCAN8_SIZE];
/* -1 if unused, -2 if unavailable */
- DECLARE_ALIGNED( int8_t, ref[2][X264_SCAN8_SIZE], 4 );
+ DECLARE_ALIGNED_4( int8_t ref[2][X264_SCAN8_SIZE] );
/* 0 if not available */
- DECLARE_ALIGNED( int16_t, mv[2][X264_SCAN8_SIZE][2], 16 );
- DECLARE_ALIGNED( int16_t, mvd[2][X264_SCAN8_SIZE][2], 4 );
+ DECLARE_ALIGNED_16( int16_t mv[2][X264_SCAN8_SIZE][2] );
+ DECLARE_ALIGNED_4( int16_t mvd[2][X264_SCAN8_SIZE][2] );
/* 1 if SKIP or DIRECT. set only for B-frames + CABAC */
- DECLARE_ALIGNED( int8_t, skip[X264_SCAN8_SIZE], 4 );
+ DECLARE_ALIGNED_4( int8_t skip[X264_SCAN8_SIZE] );
- DECLARE_ALIGNED( int16_t, direct_mv[2][X264_SCAN8_SIZE][2], 16 ) ;
+ DECLARE_ALIGNED_16( int16_t direct_mv[2][X264_SCAN8_SIZE][2] );
int8_t direct_ref[2][X264_SCAN8_SIZE];
int pskip_mv[2];
const int i_ref1 = h->mb.cache.ref[1][i8];
const int mvx1 = x264_clip3( h->mb.cache.mv[1][i8][0], h->mb.mv_min[0], h->mb.mv_max[0] );
int mvy1 = x264_clip3( h->mb.cache.mv[1][i8][1], h->mb.mv_min[1], h->mb.mv_max[1] );
- DECLARE_ALIGNED( uint8_t, tmp[16*16], 16 );
+ DECLARE_ALIGNED_16( uint8_t tmp[16*16] );
int i_mode = x264_size2pixel[height][width];
x264_mb_mc_0xywh( h, x, y, width, height );
#endif
#ifdef _MSC_VER
-#define DECLARE_ALIGNED( type, var, n ) __declspec(align(n)) type var
+#define DECLARE_ALIGNED( var, n ) __declspec(align(n)) var
#else
-#define DECLARE_ALIGNED( type, var, n ) type var __attribute__((aligned(n)))
+#define DECLARE_ALIGNED( var, n ) var __attribute__((aligned(n)))
#endif
+#define DECLARE_ALIGNED_16( var ) DECLARE_ALIGNED( var, 16 )
+#define DECLARE_ALIGNED_8( var ) DECLARE_ALIGNED( var, 8 )
+#define DECLARE_ALIGNED_4( var ) DECLARE_ALIGNED( var, 4 )
#if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 0)
#define UNUSED __attribute__((unused))
static inline void write16x4(uint8_t *dst, int dst_stride,
register vec_u8_t r0, register vec_u8_t r1,
register vec_u8_t r2, register vec_u8_t r3) {
- DECLARE_ALIGNED(unsigned char, result[64], 16);
+ DECLARE_ALIGNED_16(unsigned char result[64]);
uint32_t *src_int = (uint32_t *)result, *dst_int = (uint32_t *)dst;
int int_dst_stride = dst_stride/4;
}
#define h264_loop_filter_luma_altivec(p2, p1, p0, q0, q1, q2, alpha, beta, tc0) { \
- DECLARE_ALIGNED(unsigned char, temp[16], 16); \
+ DECLARE_ALIGNED_16(unsigned char temp[16]); \
register vec_u8_t alphavec; \
register vec_u8_t betavec; \
register vec_u8_t mask; \
int d8x = mvx & 0x07;
int d8y = mvy & 0x07;
- DECLARE_ALIGNED( uint16_t, coeff[4], 16 );
+ DECLARE_ALIGNED_16( uint16_t coeff[4] );
coeff[0] = (8-d8x)*(8-d8y);
coeff[1] = d8x *(8-d8y);
coeff[2] = (8-d8x)*d8y;
int d8x = mvx & 0x07;
int d8y = mvy & 0x07;
- DECLARE_ALIGNED( uint16_t, coeff[4], 16 );
+ DECLARE_ALIGNED_16( uint16_t coeff[4] );
coeff[0] = (8-d8x)*(8-d8y);
coeff[1] = d8x *(8-d8y);
coeff[2] = (8-d8x)*d8y;
uint8_t *pix2, int i_pix2 ) \
{ \
int y; \
- DECLARE_ALIGNED( int, sum, 16 ); \
+ DECLARE_ALIGNED_16( int sum ); \
\
LOAD_ZERO; \
PREP_LOAD; \
static int pixel_satd_4x4_altivec( uint8_t *pix1, int i_pix1,
uint8_t *pix2, int i_pix2 )
{
- DECLARE_ALIGNED( int, i_satd, 16 );
+ DECLARE_ALIGNED_16( int i_satd );
PREP_DIFF;
vec_s16_t diff0v, diff1v, diff2v, diff3v;
static int pixel_satd_4x8_altivec( uint8_t *pix1, int i_pix1,
uint8_t *pix2, int i_pix2 )
{
- DECLARE_ALIGNED( int, i_satd, 16 );
+ DECLARE_ALIGNED_16( int i_satd );
PREP_DIFF;
vec_s16_t diff0v, diff1v, diff2v, diff3v;
static int pixel_satd_8x4_altivec( uint8_t *pix1, int i_pix1,
uint8_t *pix2, int i_pix2 )
{
- DECLARE_ALIGNED( int, i_satd, 16 );
+ DECLARE_ALIGNED_16( int i_satd );
PREP_DIFF;
vec_s16_t diff0v, diff1v, diff2v, diff3v,
static int pixel_satd_8x8_altivec( uint8_t *pix1, int i_pix1,
uint8_t *pix2, int i_pix2 )
{
- DECLARE_ALIGNED( int, i_satd, 16 );
+ DECLARE_ALIGNED_16( int i_satd );
PREP_DIFF;
vec_s16_t diff0v, diff1v, diff2v, diff3v,
static int pixel_satd_8x16_altivec( uint8_t *pix1, int i_pix1,
uint8_t *pix2, int i_pix2 )
{
- DECLARE_ALIGNED( int, i_satd, 16 );
+ DECLARE_ALIGNED_16( int i_satd );
PREP_DIFF;
vec_s16_t diff0v, diff1v, diff2v, diff3v,
static int pixel_satd_16x8_altivec( uint8_t *pix1, int i_pix1,
uint8_t *pix2, int i_pix2 )
{
- DECLARE_ALIGNED( int, i_satd, 16 );
+ DECLARE_ALIGNED_16( int i_satd );
LOAD_ZERO;
PREP_LOAD;
static int pixel_satd_16x16_altivec( uint8_t *pix1, int i_pix1,
uint8_t *pix2, int i_pix2 )
{
- DECLARE_ALIGNED( int, i_satd, 16 );
+ DECLARE_ALIGNED_16( int i_satd );
LOAD_ZERO;
PREP_LOAD;
static void pixel_sad_x4_16x16_altivec( uint8_t *fenc, uint8_t *pix0, uint8_t *pix1, uint8_t *pix2, uint8_t *pix3, int i_stride, int scores[4] )
{
- DECLARE_ALIGNED( int, sum0, 16 );
- DECLARE_ALIGNED( int, sum1, 16 );
- DECLARE_ALIGNED( int, sum2, 16 );
- DECLARE_ALIGNED( int, sum3, 16 );
+ DECLARE_ALIGNED_16( int sum0 );
+ DECLARE_ALIGNED_16( int sum1 );
+ DECLARE_ALIGNED_16( int sum2 );
+ DECLARE_ALIGNED_16( int sum3 );
int y;
LOAD_ZERO;
static void pixel_sad_x3_16x16_altivec( uint8_t *fenc, uint8_t *pix0, uint8_t *pix1, uint8_t *pix2, int i_stride, int scores[3] )
{
- DECLARE_ALIGNED( int, sum0, 16 );
- DECLARE_ALIGNED( int, sum1, 16 );
- DECLARE_ALIGNED( int, sum2, 16 );
+ DECLARE_ALIGNED_16( int sum0 );
+ DECLARE_ALIGNED_16( int sum1 );
+ DECLARE_ALIGNED_16( int sum2 );
int y;
LOAD_ZERO;
static void pixel_sad_x4_16x8_altivec( uint8_t *fenc, uint8_t *pix0, uint8_t *pix1, uint8_t *pix2, uint8_t *pix3, int i_stride, int scores[4] )
{
- DECLARE_ALIGNED( int, sum0, 16 );
- DECLARE_ALIGNED( int, sum1, 16 );
- DECLARE_ALIGNED( int, sum2, 16 );
- DECLARE_ALIGNED( int, sum3, 16 );
+ DECLARE_ALIGNED_16( int sum0 );
+ DECLARE_ALIGNED_16( int sum1 );
+ DECLARE_ALIGNED_16( int sum2 );
+ DECLARE_ALIGNED_16( int sum3 );
int y;
LOAD_ZERO;
static void pixel_sad_x3_16x8_altivec( uint8_t *fenc, uint8_t *pix0, uint8_t *pix1, uint8_t *pix2, int i_stride, int scores[3] )
{
- DECLARE_ALIGNED( int, sum0, 16 );
- DECLARE_ALIGNED( int, sum1, 16 );
- DECLARE_ALIGNED( int, sum2, 16 );
+ DECLARE_ALIGNED_16( int sum0 );
+ DECLARE_ALIGNED_16( int sum1 );
+ DECLARE_ALIGNED_16( int sum2 );
int y;
LOAD_ZERO;
static void pixel_sad_x4_8x16_altivec( uint8_t *fenc, uint8_t *pix0, uint8_t *pix1, uint8_t *pix2, uint8_t *pix3, int i_stride, int scores[4] )
{
- DECLARE_ALIGNED( int, sum0, 16 );
- DECLARE_ALIGNED( int, sum1, 16 );
- DECLARE_ALIGNED( int, sum2, 16 );
- DECLARE_ALIGNED( int, sum3, 16 );
+ DECLARE_ALIGNED_16( int sum0 );
+ DECLARE_ALIGNED_16( int sum1 );
+ DECLARE_ALIGNED_16( int sum2 );
+ DECLARE_ALIGNED_16( int sum3 );
int y;
LOAD_ZERO;
static void pixel_sad_x3_8x16_altivec( uint8_t *fenc, uint8_t *pix0, uint8_t *pix1, uint8_t *pix2, int i_stride, int scores[3] )
{
- DECLARE_ALIGNED( int, sum0, 16 );
- DECLARE_ALIGNED( int, sum1, 16 );
- DECLARE_ALIGNED( int, sum2, 16 );
+ DECLARE_ALIGNED_16( int sum0 );
+ DECLARE_ALIGNED_16( int sum1 );
+ DECLARE_ALIGNED_16( int sum2 );
int y;
LOAD_ZERO;
static void pixel_sad_x4_8x8_altivec( uint8_t *fenc, uint8_t *pix0, uint8_t *pix1, uint8_t *pix2, uint8_t *pix3, int i_stride, int scores[4] )
{
- DECLARE_ALIGNED( int, sum0, 16 );
- DECLARE_ALIGNED( int, sum1, 16 );
- DECLARE_ALIGNED( int, sum2, 16 );
- DECLARE_ALIGNED( int, sum3, 16 );
+ DECLARE_ALIGNED_16( int sum0 );
+ DECLARE_ALIGNED_16( int sum1 );
+ DECLARE_ALIGNED_16( int sum2 );
+ DECLARE_ALIGNED_16( int sum3 );
int y;
LOAD_ZERO;
static void pixel_sad_x3_8x8_altivec( uint8_t *fenc, uint8_t *pix0, uint8_t *pix1, uint8_t *pix2, int i_stride, int scores[3] )
{
- DECLARE_ALIGNED( int, sum0, 16 );
- DECLARE_ALIGNED( int, sum1, 16 );
- DECLARE_ALIGNED( int, sum2, 16 );
+ DECLARE_ALIGNED_16( int sum0 );
+ DECLARE_ALIGNED_16( int sum1 );
+ DECLARE_ALIGNED_16( int sum2 );
int y;
LOAD_ZERO;
static int pixel_ssd_16x16_altivec ( uint8_t *pix1, int i_stride_pix1,
uint8_t *pix2, int i_stride_pix2)
{
- DECLARE_ALIGNED( int, sum, 16 );
+ DECLARE_ALIGNED_16( int sum );
int y;
LOAD_ZERO;
static int pixel_ssd_8x8_altivec ( uint8_t *pix1, int i_stride_pix1,
uint8_t *pix2, int i_stride_pix2)
{
- DECLARE_ALIGNED( int, sum, 16 );
+ DECLARE_ALIGNED_16( int sum );
int y;
LOAD_ZERO;
const uint8_t *pix2, int stride2,
int sums[2][4] )
{
- DECLARE_ALIGNED( int, temp[4], 16 );
+ DECLARE_ALIGNED_16( int temp[4] );
int y;
vec_u8_t pix1v, pix2v;
PREDICT_8x8_LOAD_TOP\
PREDICT_8x8_LOAD_LEFT\
int t;\
- DECLARE_ALIGNED( int16_t, sa8d_1d[2][8], 16 );\
+ DECLARE_ALIGNED_16( int16_t sa8d_1d[2][8] );\
SUMSUB(l0,l4,l1,l5,l2,l6,l3,l7);\
SUMSUB(l0,l2,l1,l3,l4,l6,l5,l7);\
SUMSUB(l0,l1,l2,l3,l4,l5,l6,l7);\
/* 8x8 */
int i_cost8x8;
/* [ref][0] is 16x16 mv, [ref][1..4] are 8x8 mv from partition [0..3] */
- DECLARE_ALIGNED( int, mvc[32][5][2], 8 );
+ DECLARE_ALIGNED_8( int mvc[32][5][2] );
x264_me_t me8x8[4];
/* Sub 4x4 */
/* 8x8 prediction selection */
if( flags & X264_ANALYSE_I8x8 )
{
- DECLARE_ALIGNED( uint8_t, edge[33], 16 );
+ DECLARE_ALIGNED_16( uint8_t edge[33] );
x264_pixel_cmp_t sa8d = (*h->pixf.mbcmp == *h->pixf.sad) ? h->pixf.sad[PIXEL_8x8] : h->pixf.sa8d[PIXEL_8x8];
int i_satd_thresh = a->b_mbrd ? COST_MAX : X264_MIN( i_satd_inter, a->i_satd_i16x16 );
int i_cost = 0;
}
else if( h->mb.i_type == I_8x8 )
{
- DECLARE_ALIGNED( uint8_t, edge[33], 16 );
+ DECLARE_ALIGNED_16( uint8_t edge[33] );
for( idx = 0; idx < 4; idx++ )
{
uint64_t pels_h = 0;
{
x264_me_t m;
uint8_t **p_fenc = h->mb.pic.p_fenc;
- DECLARE_ALIGNED( int, mvc[3][2], 8 );
+ DECLARE_ALIGNED_8( int mvc[3][2] );
int i, j;
/* XXX Needed for x264_mb_predict_mv */
{
x264_me_t m;
uint8_t **p_fenc = h->mb.pic.p_fenc;
- DECLARE_ALIGNED( int, mvc[3][2], 8 );
+ DECLARE_ALIGNED_8( int mvc[3][2] );
int i, j;
/* XXX Needed for x264_mb_predict_mv */
static int x264_mb_analyse_inter_p4x4_chroma( x264_t *h, x264_mb_analysis_t *a, uint8_t **p_fref, int i8x8, int pixel )
{
- DECLARE_ALIGNED( uint8_t, pix1[16*8], 8 );
+ DECLARE_ALIGNED_8( uint8_t pix1[16*8] );
uint8_t *pix2 = pix1+8;
const int i_stride = h->mb.pic.i_stride[1];
const int or = 4*(i8x8&1) + 2*(i8x8&2)*i_stride;
static void x264_mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a )
{
- DECLARE_ALIGNED( uint8_t, pix1[16*16], 16 );
- DECLARE_ALIGNED( uint8_t, pix2[16*16], 16 );
+ DECLARE_ALIGNED_16( uint8_t pix1[16*16] );
+ DECLARE_ALIGNED_16( uint8_t pix2[16*16] );
uint8_t *src2;
int stride2 = 16;
int weight;
uint8_t **p_fref[2] =
{ h->mb.pic.p_fref[0][a->l0.i_ref],
h->mb.pic.p_fref[1][a->l1.i_ref] };
- DECLARE_ALIGNED( uint8_t, pix[2][8*8], 8 );
+ DECLARE_ALIGNED_8( uint8_t pix[2][8*8] );
int i, l;
/* XXX Needed for x264_mb_predict_mv */
uint8_t **p_fref[2] =
{ h->mb.pic.p_fref[0][a->l0.i_ref],
h->mb.pic.p_fref[1][a->l1.i_ref] };
- DECLARE_ALIGNED( uint8_t, pix[2][16*8], 16 );
- DECLARE_ALIGNED( int, mvc[2][2], 8 );
+ DECLARE_ALIGNED_16( uint8_t pix[2][16*8] );
+ DECLARE_ALIGNED_8( int mvc[2][2] );
int i, l;
h->mb.i_partition = D_16x8;
uint8_t **p_fref[2] =
{ h->mb.pic.p_fref[0][a->l0.i_ref],
h->mb.pic.p_fref[1][a->l1.i_ref] };
- DECLARE_ALIGNED( uint8_t, pix[2][8*16], 8 );
- DECLARE_ALIGNED( int, mvc[2][2], 8 );
+ DECLARE_ALIGNED_8( uint8_t pix[2][8*16] );
+ DECLARE_ALIGNED_8( int mvc[2][2] );
int i, l;
h->mb.i_partition = D_8x16;
int y = 4 * block_idx_y[idx];
uint8_t *p_src = &h->mb.pic.p_fenc[0][x+y*FENC_STRIDE];
uint8_t *p_dst = &h->mb.pic.p_fdec[0][x+y*FDEC_STRIDE];
- DECLARE_ALIGNED( int16_t, dct4x4[4][4], 16 );
+ DECLARE_ALIGNED_16( int16_t dct4x4[4][4] );
if( h->mb.b_lossless )
{
int y = 8 * (idx>>1);
uint8_t *p_src = &h->mb.pic.p_fenc[0][x+y*FENC_STRIDE];
uint8_t *p_dst = &h->mb.pic.p_fdec[0][x+y*FDEC_STRIDE];
- DECLARE_ALIGNED( int16_t, dct8x8[8][8], 16 );
+ DECLARE_ALIGNED_16( int16_t dct8x8[8][8] );
h->dctf.sub8x8_dct8( dct8x8, p_src, p_dst );
uint8_t *p_src = h->mb.pic.p_fenc[0];
uint8_t *p_dst = h->mb.pic.p_fdec[0];
- DECLARE_ALIGNED( int16_t, dct4x4[16+1][4][4], 16 );
+ DECLARE_ALIGNED_16( int16_t dct4x4[16+1][4][4] );
int i;
uint8_t *p_dst = h->mb.pic.p_fdec[1+ch];
int i_decimate_score = 0;
- DECLARE_ALIGNED( int16_t, dct2x2[2][2] , 16 );
- DECLARE_ALIGNED( int16_t, dct4x4[4][4][4], 16 );
+ DECLARE_ALIGNED_16( int16_t dct2x2[2][2] );
+ DECLARE_ALIGNED_16( int16_t dct4x4[4][4][4] );
if( h->mb.b_lossless )
{
}
else if( h->mb.i_type == I_8x8 )
{
- DECLARE_ALIGNED( uint8_t, edge[33], 16 );
+ DECLARE_ALIGNED_16( uint8_t edge[33] );
h->mb.b_transform_8x8 = 1;
/* If we already encoded 3 of the 4 i8x8 blocks, we don't have to do them again. */
if( h->mb.i_skip_intra )
}
else if( h->mb.b_transform_8x8 )
{
- DECLARE_ALIGNED( int16_t, dct8x8[4][8][8], 16 );
+ DECLARE_ALIGNED_16( int16_t dct8x8[4][8][8] );
int nnz8x8[4] = {1,1,1,1};
b_decimate &= !h->mb.b_trellis; // 8x8 trellis is inherently optimal decimation
h->dctf.sub16x16_dct8( dct8x8, h->mb.pic.p_fenc[0], h->mb.pic.p_fdec[0] );
}
else
{
- DECLARE_ALIGNED( int16_t, dct4x4[16][4][4], 16 );
+ DECLARE_ALIGNED_16( int16_t dct4x4[16][4][4] );
int nnz8x8[4] = {1,1,1,1};
h->dctf.sub16x16_dct( dct4x4, h->mb.pic.p_fenc[0], h->mb.pic.p_fdec[0] );
*****************************************************************************/
int x264_macroblock_probe_skip( x264_t *h, const int b_bidir )
{
- DECLARE_ALIGNED( int16_t, dct4x4[16][4][4], 16 );
- DECLARE_ALIGNED( int16_t, dct2x2[2][2], 16 );
- DECLARE_ALIGNED( int16_t, dctscan[16], 16 );
+ DECLARE_ALIGNED_16( int16_t dct4x4[16][4][4] );
+ DECLARE_ALIGNED_16( int16_t dct2x2[2][2] );
+ DECLARE_ALIGNED_16( int16_t dctscan[16] );
int i_qp = h->mb.i_qp;
int mvp[2];
if( h->mb.b_transform_8x8 )
{
- DECLARE_ALIGNED( int16_t, dct8x8[8][8], 16 );
+ DECLARE_ALIGNED_16( int16_t dct8x8[8][8] );
h->dctf.sub8x8_dct8( dct8x8, p_fenc, p_fdec );
h->quantf.quant_8x8( dct8x8, h->quant8_mf[CQM_8PY][i_qp], h->quant8_bias[CQM_8PY][i_qp] );
h->zigzagf.scan_8x8( h->dct.luma8x8[i8], dct8x8 );
else
{
int i4;
- DECLARE_ALIGNED( int16_t, dct4x4[4][4][4], 16 );
+ DECLARE_ALIGNED_16( int16_t dct4x4[4][4][4] );
h->dctf.sub8x8_dct( dct4x4, p_fenc, p_fdec );
h->quantf.quant_4x4( dct4x4[0], h->quant4_mf[CQM_4PY][i_qp], h->quant4_bias[CQM_4PY][i_qp] );
h->quantf.quant_4x4( dct4x4[1], h->quant4_mf[CQM_4PY][i_qp], h->quant4_bias[CQM_4PY][i_qp] );
for( ch = 0; ch < 2; ch++ )
{
- DECLARE_ALIGNED( int16_t, dct4x4[4][4], 16 );
+ DECLARE_ALIGNED_16( int16_t dct4x4[4][4] );
p_fenc = h->mb.pic.p_fenc[1+ch] + (i8&1)*4 + (i8>>1)*4*FENC_STRIDE;
p_fdec = h->mb.pic.p_fdec[1+ch] + (i8&1)*4 + (i8>>1)*4*FDEC_STRIDE;
int bpred_mx = 0, bpred_my = 0, bpred_cost = COST_MAX;
int omx, omy, pmx, pmy;
uint8_t *p_fref = m->p_fref[0];
- DECLARE_ALIGNED( uint8_t, pix[16*16], 16 );
+ DECLARE_ALIGNED_16( uint8_t pix[16*16] );
int i, j;
int dir;
* because sum(abs(diff)) >= abs(diff(sum)). */
const int stride = m->i_stride[0];
uint16_t *sums_base = m->integral;
- DECLARE_ALIGNED( static uint8_t, zero[16*16], 16 ) = {0,};
- DECLARE_ALIGNED( int, enc_dc[4], 16 );
+ DECLARE_ALIGNED_16( static uint8_t zero[16*16] );
+ DECLARE_ALIGNED_16( int enc_dc[4] );
int sad_size = i_pixel <= PIXEL_8x8 ? PIXEL_8x8 : PIXEL_4x4;
int delta = x264_pixel_size[sad_size].w;
int16_t xs_buf[64];
const int i_pixel = m->i_pixel;
const int b_chroma_me = h->mb.b_chroma_me && i_pixel <= PIXEL_8x8;
- DECLARE_ALIGNED( uint8_t, pix[2][32*18], 16 ); // really 17x17, but round up for alignment
+ DECLARE_ALIGNED_16( uint8_t pix[2][32*18] ); // really 17x17, but round up for alignment
int omx, omy;
int i;
const int16_t *p_cost_m0y = m0->p_cost_mv - x264_clip3( m0->mvp[1], h->mb.mv_min_spel[0], h->mb.mv_max_spel[0] );
const int16_t *p_cost_m1x = m1->p_cost_mv - x264_clip3( m1->mvp[0], h->mb.mv_min_spel[0], h->mb.mv_max_spel[0] );
const int16_t *p_cost_m1y = m1->p_cost_mv - x264_clip3( m1->mvp[1], h->mb.mv_min_spel[0], h->mb.mv_max_spel[0] );
- DECLARE_ALIGNED( uint8_t, pix0[9][16*16], 16 );
- DECLARE_ALIGNED( uint8_t, pix1[9][16*16], 16 );
- DECLARE_ALIGNED( uint8_t, pix[16*16], 16 );
+ DECLARE_ALIGNED_16( uint8_t pix0[9][16*16] );
+ DECLARE_ALIGNED_16( uint8_t pix1[9][16*16] );
+ DECLARE_ALIGNED_16( uint8_t pix[16*16] );
int bm0x = m0->mv[0], om0x = bm0x;
int bm0y = m0->mv[1], om0y = bm0y;
int bm1x = m1->mv[0], om1x = bm1x;
const int bh = x264_pixel_size[m->i_pixel].h>>2;
const int i_pixel = m->i_pixel;
- DECLARE_ALIGNED( uint8_t, pix[16*16], 16 );
+ DECLARE_ALIGNED_16( uint8_t pix[16*16] );
int bcost = m->i_pixel == PIXEL_16x16 ? m->cost : COST_MAX;
int bmx = m->mv[0];
int bmy = m->mv[1];
/* output */
int cost_mv; /* lambda * nbits for the chosen mv */
int cost; /* satd + lambda * nbits */
- DECLARE_ALIGNED( int, mv[2], 8 );
+ DECLARE_ALIGNED_8( int mv[2] );
} x264_me_t;
void x264_me_search_ref( x264_t *h, x264_me_t *m, int (*mvc)[2], int i_mvc, int *p_fullpel_thresh );
const int i_stride = fenc->i_stride_lowres;
const int i_pel_offset = 8 * ( i_mb_x + i_mb_y * i_stride );
- DECLARE_ALIGNED( uint8_t, pix1[9*FDEC_STRIDE], 8 );
+ DECLARE_ALIGNED_8( uint8_t pix1[9*FDEC_STRIDE] );
uint8_t *pix2 = pix1+8;
x264_me_t m[2];
int i_bcost = COST_MAX;
if( i_icost < i_bcost * 2 )
{
- DECLARE_ALIGNED( uint8_t, edge[33], 16 );
+ DECLARE_ALIGNED_16( uint8_t edge[33] );
x264_predict_8x8_filter( pix, edge, ALL_NEIGHBORS, ALL_NEIGHBORS );
for( i=3; i<9; i++ )
{
#include <gtk/gtk.h>
-#define DECLARE_ALIGNED( type, var, n ) type var __attribute__((aligned(n)))
-
#include "../x264.h"
#include "../common/common.h"
x264_predict_t predict_8x8c[4+3];
x264_predict_t predict_4x4[9+3];
x264_predict8x8_t predict_8x8[9+3];
- DECLARE_ALIGNED( uint8_t, edge[33], 16 );
+ DECLARE_ALIGNED_16( uint8_t edge[33] );
uint16_t cost_mv[32];
int ret = 0, ok, used_asm;
int i, j;
for( i=0; i<100 && ok; i++ )
if( pixel_asm.ads[i&3] != pixel_ref.ads[i&3] )
{
- DECLARE_ALIGNED( uint16_t, sums[72], 16 );
- DECLARE_ALIGNED( int, dc[4], 16 );
+ DECLARE_ALIGNED_16( uint16_t sums[72] );
+ DECLARE_ALIGNED_16( int dc[4] );
int16_t mvs_a[32], mvs_c[32];
int mvn_a, mvn_c;
int thresh = rand() & 0x3fff;
x264_dct_function_t dct_asm;
x264_quant_function_t qf;
int ret = 0, ok, used_asm, i;
- int16_t dct1[16][4][4] __attribute__((aligned(16)));
- int16_t dct2[16][4][4] __attribute__((aligned(16)));
- int16_t dct4[16][4][4] __attribute__((aligned(16)));
- int16_t dct8[4][8][8] __attribute__((aligned(16)));
+ DECLARE_ALIGNED_16( int16_t dct1[16][4][4] );
+ DECLARE_ALIGNED_16( int16_t dct2[16][4][4] );
+ DECLARE_ALIGNED_16( int16_t dct4[16][4][4] );
+ DECLARE_ALIGNED_16( int16_t dct8[4][8][8] );
x264_t h_buf;
x264_t *h = &h_buf;
ok = 1; used_asm = 0;
if( dct_asm.dct4x4dc != dct_ref.dct4x4dc )
{
- int16_t dct1[4][4] __attribute((aligned(16))) = { {-12, 42, 23, 67},{2, 90, 89,56}, {67,43,-76,91},{56,-78,-54,1}};
- int16_t dct2[4][4] __attribute((aligned(16))) = { {-12, 42, 23, 67},{2, 90, 89,56}, {67,43,-76,91},{56,-78,-54,1}};
+ DECLARE_ALIGNED_16( int16_t dct1[4][4] ) = {{-12, 42, 23, 67},{2, 90, 89,56},{67,43,-76,91},{56,-78,-54,1}};
+ DECLARE_ALIGNED_16( int16_t dct2[4][4] ) = {{-12, 42, 23, 67},{2, 90, 89,56},{67,43,-76,91},{56,-78,-54,1}};
used_asm = 1;
call_c( dct_c.dct4x4dc, dct1 );
call_a( dct_asm.dct4x4dc, dct2 );
}
if( dct_asm.idct4x4dc != dct_ref.idct4x4dc )
{
- int16_t dct1[4][4] __attribute((aligned(16))) = { {-12, 42, 23, 67},{2, 90, 89,56}, {67,43,-76,91},{56,-78,-54,1}};
- int16_t dct2[4][4] __attribute((aligned(16))) = { {-12, 42, 23, 67},{2, 90, 89,56}, {67,43,-76,91},{56,-78,-54,1}};
+ DECLARE_ALIGNED_16( int16_t dct1[4][4] ) = {{-12, 42, 23, 67},{2, 90, 89,56},{67,43,-76,91},{56,-78,-54,1}};
+ DECLARE_ALIGNED_16( int16_t dct2[4][4] ) = {{-12, 42, 23, 67},{2, 90, 89,56},{67,43,-76,91},{56,-78,-54,1}};
used_asm = 1;
call_c( dct_c.idct4x4dc, dct1 );
call_a( dct_asm.idct4x4dc, dct2 );
ok = 1; used_asm = 0;
if( dct_asm.dct2x2dc != dct_ref.dct2x2dc )
{
- int16_t dct1[2][2] __attribute((aligned(16))) = { {-12, 42},{2, 90}};
- int16_t dct2[2][2] __attribute((aligned(16))) = { {-12, 42},{2, 90}};
+ DECLARE_ALIGNED_16( int16_t dct1[2][2] ) = {{-12, 42},{2, 90}};
+ DECLARE_ALIGNED_16( int16_t dct2[2][2] ) = {{-12, 42},{2, 90}};
used_asm = 1;
call_c( dct_c.dct2x2dc, dct1 );
call_a( dct_asm.dct2x2dc, dct2 );
}
if( dct_asm.idct2x2dc != dct_ref.idct2x2dc )
{
- int16_t dct1[2][2] __attribute((aligned(16))) = { {-12, 42},{2, 90}};
- int16_t dct2[2][2] __attribute((aligned(16))) = { {-12, 42},{2, 90}};
+ DECLARE_ALIGNED_16( int16_t dct1[2][2] ) = {{-12, 42},{2, 90}};
+ DECLARE_ALIGNED_16( int16_t dct2[2][2] ) = {{-12, 42},{2, 90}};
used_asm = 1;
call_c( dct_c.idct2x2dc, dct1 );
call_a( dct_asm.idct2x2dc, dct2 );
x264_zigzag_function_t zigzag_ref;
x264_zigzag_function_t zigzag_asm;
- int16_t level1[64] __attribute__((aligned(16)));
- int16_t level2[64] __attribute__((aligned(16)));
+ DECLARE_ALIGNED_16( int16_t level1[64] );
+ DECLARE_ALIGNED_16( int16_t level2[64] );
#define TEST_ZIGZAG_SCAN( name, t1, t2, dct, size ) \
if( zigzag_asm.name != zigzag_ref.name ) \
x264_quant_function_t qf_c;
x264_quant_function_t qf_ref;
x264_quant_function_t qf_a;
- int16_t dct1[64] __attribute__((__aligned__(16)));
- int16_t dct2[64] __attribute__((__aligned__(16)));
- uint8_t cqm_buf[64] __attribute__((__aligned__(16)));
+ DECLARE_ALIGNED_16( int16_t dct1[64] );
+ DECLARE_ALIGNED_16( int16_t dct2[64] );
+ DECLARE_ALIGNED_16( uint8_t cqm_buf[64] );
int ret = 0, ok, used_asm;
int oks[2] = {1,1}, used_asms[2] = {0,0};
int i, i_cqm, qp;
{
int ret = 0, ok = 1, used_asm = 0;
int i;
- DECLARE_ALIGNED( uint8_t, edge[33], 16 );
+ DECLARE_ALIGNED_16( uint8_t edge[33] );
struct
{
x264_predict_t predict_16x16[4+3];