From: Loren Merritt Date: Sat, 22 Mar 2008 09:25:03 +0000 (-0600) Subject: cosmetics in DECLARE_ALIGNED X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=542027fac9212ca1f6d24a39ebea779bfec91123;p=libx264 cosmetics in DECLARE_ALIGNED --- diff --git a/common/cabac.h b/common/cabac.h index f829162d..2a862704 100644 --- a/common/cabac.h +++ b/common/cabac.h @@ -27,7 +27,7 @@ typedef struct { /* context */ - DECLARE_ALIGNED( uint8_t, state[460], 16 ); + DECLARE_ALIGNED_16( uint8_t state[460] ); int f8_bits_encoded; // only if using x264_cabac_size_decision() diff --git a/common/common.h b/common/common.h index 4a18cfcf..b9f4869f 100644 --- a/common/common.h +++ b/common/common.h @@ -336,11 +336,11 @@ struct x264_t /* Current MB DCT coeffs */ struct { - DECLARE_ALIGNED( int16_t, luma16x16_dc[16], 16 ); - DECLARE_ALIGNED( int16_t, chroma_dc[2][4], 16 ); + DECLARE_ALIGNED_16( int16_t luma16x16_dc[16] ); + DECLARE_ALIGNED_16( int16_t chroma_dc[2][4] ); // FIXME share memory? - DECLARE_ALIGNED( int16_t, luma8x8[4][64], 16 ); - DECLARE_ALIGNED( int16_t, luma4x4[16+8][16], 16 ); + DECLARE_ALIGNED_16( int16_t luma8x8[4][64] ); + DECLARE_ALIGNED_16( int16_t luma4x4[16+8][16] ); } dct; /* MB table and cache for current frame/mb */ @@ -430,14 +430,14 @@ struct x264_t /* space for p_fenc and p_fdec */ #define FENC_STRIDE 16 #define FDEC_STRIDE 32 - DECLARE_ALIGNED( uint8_t, fenc_buf[24*FENC_STRIDE], 16 ); - DECLARE_ALIGNED( uint8_t, fdec_buf[27*FDEC_STRIDE], 16 ); + DECLARE_ALIGNED_16( uint8_t fenc_buf[24*FENC_STRIDE] ); + DECLARE_ALIGNED_16( uint8_t fdec_buf[27*FDEC_STRIDE] ); /* i4x4 and i8x8 backup data, for skipping the encode stage when possible */ - DECLARE_ALIGNED( uint8_t, i4x4_fdec_buf[16*16], 16 ); - DECLARE_ALIGNED( uint8_t, i8x8_fdec_buf[16*16], 16 ); - DECLARE_ALIGNED( int16_t, i8x8_dct_buf[3][64], 16 ); - DECLARE_ALIGNED( int16_t, i4x4_dct_buf[15][16], 16 ); + DECLARE_ALIGNED_16( uint8_t i4x4_fdec_buf[16*16] ); + DECLARE_ALIGNED_16( uint8_t i8x8_fdec_buf[16*16] ); + DECLARE_ALIGNED_16( int16_t i8x8_dct_buf[3][64] ); + DECLARE_ALIGNED_16( int16_t i4x4_dct_buf[15][16] ); /* pointer over mb of the frame to be compressed */ uint8_t *p_fenc[3]; @@ -464,16 +464,16 @@ struct x264_t uint8_t non_zero_count[X264_SCAN8_SIZE]; /* -1 if unused, -2 if unavailable */ - DECLARE_ALIGNED( int8_t, ref[2][X264_SCAN8_SIZE], 4 ); + DECLARE_ALIGNED_4( int8_t ref[2][X264_SCAN8_SIZE] ); /* 0 if not available */ - DECLARE_ALIGNED( int16_t, mv[2][X264_SCAN8_SIZE][2], 16 ); - DECLARE_ALIGNED( int16_t, mvd[2][X264_SCAN8_SIZE][2], 4 ); + DECLARE_ALIGNED_16( int16_t mv[2][X264_SCAN8_SIZE][2] ); + DECLARE_ALIGNED_4( int16_t mvd[2][X264_SCAN8_SIZE][2] ); /* 1 if SKIP or DIRECT. set only for B-frames + CABAC */ - DECLARE_ALIGNED( int8_t, skip[X264_SCAN8_SIZE], 4 ); + DECLARE_ALIGNED_4( int8_t skip[X264_SCAN8_SIZE] ); - DECLARE_ALIGNED( int16_t, direct_mv[2][X264_SCAN8_SIZE][2], 16 ) ; + DECLARE_ALIGNED_16( int16_t direct_mv[2][X264_SCAN8_SIZE][2] ); int8_t direct_ref[2][X264_SCAN8_SIZE]; int pskip_mv[2]; diff --git a/common/macroblock.c b/common/macroblock.c index ead6a7c5..7b05c32c 100644 --- a/common/macroblock.c +++ b/common/macroblock.c @@ -660,7 +660,7 @@ static inline void x264_mb_mc_01xywh( x264_t *h, int x, int y, int width, int he const int i_ref1 = h->mb.cache.ref[1][i8]; const int mvx1 = x264_clip3( h->mb.cache.mv[1][i8][0], h->mb.mv_min[0], h->mb.mv_max[0] ); int mvy1 = x264_clip3( h->mb.cache.mv[1][i8][1], h->mb.mv_min[1], h->mb.mv_max[1] ); - DECLARE_ALIGNED( uint8_t, tmp[16*16], 16 ); + DECLARE_ALIGNED_16( uint8_t tmp[16*16] ); int i_mode = x264_size2pixel[height][width]; x264_mb_mc_0xywh( h, x, y, width, height ); diff --git a/common/osdep.h b/common/osdep.h index eac513ae..7606d667 100644 --- a/common/osdep.h +++ b/common/osdep.h @@ -58,10 +58,13 @@ #endif #ifdef _MSC_VER -#define DECLARE_ALIGNED( type, var, n ) __declspec(align(n)) type var +#define DECLARE_ALIGNED( var, n ) __declspec(align(n)) var #else -#define DECLARE_ALIGNED( type, var, n ) type var __attribute__((aligned(n))) +#define DECLARE_ALIGNED( var, n ) var __attribute__((aligned(n))) #endif +#define DECLARE_ALIGNED_16( var ) DECLARE_ALIGNED( var, 16 ) +#define DECLARE_ALIGNED_8( var ) DECLARE_ALIGNED( var, 8 ) +#define DECLARE_ALIGNED_4( var ) DECLARE_ALIGNED( var, 4 ) #if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 0) #define UNUSED __attribute__((unused)) diff --git a/common/ppc/deblock.c b/common/ppc/deblock.c index 127a0eef..33b653c0 100644 --- a/common/ppc/deblock.c +++ b/common/ppc/deblock.c @@ -48,7 +48,7 @@ static inline void write16x4(uint8_t *dst, int dst_stride, register vec_u8_t r0, register vec_u8_t r1, register vec_u8_t r2, register vec_u8_t r3) { - DECLARE_ALIGNED(unsigned char, result[64], 16); + DECLARE_ALIGNED_16(unsigned char result[64]); uint32_t *src_int = (uint32_t *)result, *dst_int = (uint32_t *)dst; int int_dst_stride = dst_stride/4; @@ -229,7 +229,7 @@ static inline vec_u8_t h264_deblock_q1(register vec_u8_t p0, } #define h264_loop_filter_luma_altivec(p2, p1, p0, q0, q1, q2, alpha, beta, tc0) { \ - DECLARE_ALIGNED(unsigned char, temp[16], 16); \ + DECLARE_ALIGNED_16(unsigned char temp[16]); \ register vec_u8_t alphavec; \ register vec_u8_t betavec; \ register vec_u8_t mask; \ diff --git a/common/ppc/mc.c b/common/ppc/mc.c index eff4360e..3c69b261 100644 --- a/common/ppc/mc.c +++ b/common/ppc/mc.c @@ -262,7 +262,7 @@ static void mc_chroma_altivec_4xh( uint8_t *dst, int i_dst_stride, int d8x = mvx & 0x07; int d8y = mvy & 0x07; - DECLARE_ALIGNED( uint16_t, coeff[4], 16 ); + DECLARE_ALIGNED_16( uint16_t coeff[4] ); coeff[0] = (8-d8x)*(8-d8y); coeff[1] = d8x *(8-d8y); coeff[2] = (8-d8x)*d8y; @@ -328,7 +328,7 @@ static void mc_chroma_altivec_8xh( uint8_t *dst, int i_dst_stride, int d8x = mvx & 0x07; int d8y = mvy & 0x07; - DECLARE_ALIGNED( uint16_t, coeff[4], 16 ); + DECLARE_ALIGNED_16( uint16_t coeff[4] ); coeff[0] = (8-d8x)*(8-d8y); coeff[1] = d8x *(8-d8y); coeff[2] = (8-d8x)*d8y; diff --git a/common/ppc/pixel.c b/common/ppc/pixel.c index 4bfa1cbb..e964cbe0 100644 --- a/common/ppc/pixel.c +++ b/common/ppc/pixel.c @@ -38,7 +38,7 @@ static int name( uint8_t *pix1, int i_pix1, \ uint8_t *pix2, int i_pix2 ) \ { \ int y; \ - DECLARE_ALIGNED( int, sum, 16 ); \ + DECLARE_ALIGNED_16( int sum ); \ \ LOAD_ZERO; \ PREP_LOAD; \ @@ -121,7 +121,7 @@ PIXEL_SAD_ALTIVEC( pixel_sad_8x8_altivec, 8, 8, 2s, 1 ) static int pixel_satd_4x4_altivec( uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 ) { - DECLARE_ALIGNED( int, i_satd, 16 ); + DECLARE_ALIGNED_16( int i_satd ); PREP_DIFF; vec_s16_t diff0v, diff1v, diff2v, diff3v; @@ -161,7 +161,7 @@ static int pixel_satd_4x4_altivec( uint8_t *pix1, int i_pix1, static int pixel_satd_4x8_altivec( uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 ) { - DECLARE_ALIGNED( int, i_satd, 16 ); + DECLARE_ALIGNED_16( int i_satd ); PREP_DIFF; vec_s16_t diff0v, diff1v, diff2v, diff3v; @@ -211,7 +211,7 @@ static int pixel_satd_4x8_altivec( uint8_t *pix1, int i_pix1, static int pixel_satd_8x4_altivec( uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 ) { - DECLARE_ALIGNED( int, i_satd, 16 ); + DECLARE_ALIGNED_16( int i_satd ); PREP_DIFF; vec_s16_t diff0v, diff1v, diff2v, diff3v, @@ -260,7 +260,7 @@ static int pixel_satd_8x4_altivec( uint8_t *pix1, int i_pix1, static int pixel_satd_8x8_altivec( uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 ) { - DECLARE_ALIGNED( int, i_satd, 16 ); + DECLARE_ALIGNED_16( int i_satd ); PREP_DIFF; vec_s16_t diff0v, diff1v, diff2v, diff3v, @@ -315,7 +315,7 @@ static int pixel_satd_8x8_altivec( uint8_t *pix1, int i_pix1, static int pixel_satd_8x16_altivec( uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 ) { - DECLARE_ALIGNED( int, i_satd, 16 ); + DECLARE_ALIGNED_16( int i_satd ); PREP_DIFF; vec_s16_t diff0v, diff1v, diff2v, diff3v, @@ -395,7 +395,7 @@ static int pixel_satd_8x16_altivec( uint8_t *pix1, int i_pix1, static int pixel_satd_16x8_altivec( uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 ) { - DECLARE_ALIGNED( int, i_satd, 16 ); + DECLARE_ALIGNED_16( int i_satd ); LOAD_ZERO; PREP_LOAD; @@ -478,7 +478,7 @@ static int pixel_satd_16x8_altivec( uint8_t *pix1, int i_pix1, static int pixel_satd_16x16_altivec( uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 ) { - DECLARE_ALIGNED( int, i_satd, 16 ); + DECLARE_ALIGNED_16( int i_satd ); LOAD_ZERO; PREP_LOAD; @@ -604,10 +604,10 @@ static int pixel_satd_16x16_altivec( uint8_t *pix1, int i_pix1, static void pixel_sad_x4_16x16_altivec( uint8_t *fenc, uint8_t *pix0, uint8_t *pix1, uint8_t *pix2, uint8_t *pix3, int i_stride, int scores[4] ) { - DECLARE_ALIGNED( int, sum0, 16 ); - DECLARE_ALIGNED( int, sum1, 16 ); - DECLARE_ALIGNED( int, sum2, 16 ); - DECLARE_ALIGNED( int, sum3, 16 ); + DECLARE_ALIGNED_16( int sum0 ); + DECLARE_ALIGNED_16( int sum1 ); + DECLARE_ALIGNED_16( int sum2 ); + DECLARE_ALIGNED_16( int sum3 ); int y; LOAD_ZERO; @@ -730,9 +730,9 @@ static void pixel_sad_x4_16x16_altivec( uint8_t *fenc, uint8_t *pix0, uint8_t *p static void pixel_sad_x3_16x16_altivec( uint8_t *fenc, uint8_t *pix0, uint8_t *pix1, uint8_t *pix2, int i_stride, int scores[3] ) { - DECLARE_ALIGNED( int, sum0, 16 ); - DECLARE_ALIGNED( int, sum1, 16 ); - DECLARE_ALIGNED( int, sum2, 16 ); + DECLARE_ALIGNED_16( int sum0 ); + DECLARE_ALIGNED_16( int sum1 ); + DECLARE_ALIGNED_16( int sum2 ); int y; LOAD_ZERO; @@ -832,10 +832,10 @@ static void pixel_sad_x3_16x16_altivec( uint8_t *fenc, uint8_t *pix0, uint8_t *p static void pixel_sad_x4_16x8_altivec( uint8_t *fenc, uint8_t *pix0, uint8_t *pix1, uint8_t *pix2, uint8_t *pix3, int i_stride, int scores[4] ) { - DECLARE_ALIGNED( int, sum0, 16 ); - DECLARE_ALIGNED( int, sum1, 16 ); - DECLARE_ALIGNED( int, sum2, 16 ); - DECLARE_ALIGNED( int, sum3, 16 ); + DECLARE_ALIGNED_16( int sum0 ); + DECLARE_ALIGNED_16( int sum1 ); + DECLARE_ALIGNED_16( int sum2 ); + DECLARE_ALIGNED_16( int sum3 ); int y; LOAD_ZERO; @@ -958,9 +958,9 @@ static void pixel_sad_x4_16x8_altivec( uint8_t *fenc, uint8_t *pix0, uint8_t *pi static void pixel_sad_x3_16x8_altivec( uint8_t *fenc, uint8_t *pix0, uint8_t *pix1, uint8_t *pix2, int i_stride, int scores[3] ) { - DECLARE_ALIGNED( int, sum0, 16 ); - DECLARE_ALIGNED( int, sum1, 16 ); - DECLARE_ALIGNED( int, sum2, 16 ); + DECLARE_ALIGNED_16( int sum0 ); + DECLARE_ALIGNED_16( int sum1 ); + DECLARE_ALIGNED_16( int sum2 ); int y; LOAD_ZERO; @@ -1061,10 +1061,10 @@ static void pixel_sad_x3_16x8_altivec( uint8_t *fenc, uint8_t *pix0, uint8_t *pi static void pixel_sad_x4_8x16_altivec( uint8_t *fenc, uint8_t *pix0, uint8_t *pix1, uint8_t *pix2, uint8_t *pix3, int i_stride, int scores[4] ) { - DECLARE_ALIGNED( int, sum0, 16 ); - DECLARE_ALIGNED( int, sum1, 16 ); - DECLARE_ALIGNED( int, sum2, 16 ); - DECLARE_ALIGNED( int, sum3, 16 ); + DECLARE_ALIGNED_16( int sum0 ); + DECLARE_ALIGNED_16( int sum1 ); + DECLARE_ALIGNED_16( int sum2 ); + DECLARE_ALIGNED_16( int sum3 ); int y; LOAD_ZERO; @@ -1184,9 +1184,9 @@ static void pixel_sad_x4_8x16_altivec( uint8_t *fenc, uint8_t *pix0, uint8_t *pi static void pixel_sad_x3_8x16_altivec( uint8_t *fenc, uint8_t *pix0, uint8_t *pix1, uint8_t *pix2, int i_stride, int scores[3] ) { - DECLARE_ALIGNED( int, sum0, 16 ); - DECLARE_ALIGNED( int, sum1, 16 ); - DECLARE_ALIGNED( int, sum2, 16 ); + DECLARE_ALIGNED_16( int sum0 ); + DECLARE_ALIGNED_16( int sum1 ); + DECLARE_ALIGNED_16( int sum2 ); int y; LOAD_ZERO; @@ -1289,10 +1289,10 @@ static void pixel_sad_x3_8x16_altivec( uint8_t *fenc, uint8_t *pix0, uint8_t *pi static void pixel_sad_x4_8x8_altivec( uint8_t *fenc, uint8_t *pix0, uint8_t *pix1, uint8_t *pix2, uint8_t *pix3, int i_stride, int scores[4] ) { - DECLARE_ALIGNED( int, sum0, 16 ); - DECLARE_ALIGNED( int, sum1, 16 ); - DECLARE_ALIGNED( int, sum2, 16 ); - DECLARE_ALIGNED( int, sum3, 16 ); + DECLARE_ALIGNED_16( int sum0 ); + DECLARE_ALIGNED_16( int sum1 ); + DECLARE_ALIGNED_16( int sum2 ); + DECLARE_ALIGNED_16( int sum3 ); int y; LOAD_ZERO; @@ -1414,9 +1414,9 @@ static void pixel_sad_x4_8x8_altivec( uint8_t *fenc, uint8_t *pix0, uint8_t *pix static void pixel_sad_x3_8x8_altivec( uint8_t *fenc, uint8_t *pix0, uint8_t *pix1, uint8_t *pix2, int i_stride, int scores[3] ) { - DECLARE_ALIGNED( int, sum0, 16 ); - DECLARE_ALIGNED( int, sum1, 16 ); - DECLARE_ALIGNED( int, sum2, 16 ); + DECLARE_ALIGNED_16( int sum0 ); + DECLARE_ALIGNED_16( int sum1 ); + DECLARE_ALIGNED_16( int sum2 ); int y; LOAD_ZERO; @@ -1523,7 +1523,7 @@ static void pixel_sad_x3_8x8_altivec( uint8_t *fenc, uint8_t *pix0, uint8_t *pix static int pixel_ssd_16x16_altivec ( uint8_t *pix1, int i_stride_pix1, uint8_t *pix2, int i_stride_pix2) { - DECLARE_ALIGNED( int, sum, 16 ); + DECLARE_ALIGNED_16( int sum ); int y; LOAD_ZERO; @@ -1607,7 +1607,7 @@ static int pixel_ssd_16x16_altivec ( uint8_t *pix1, int i_stride_pix1, static int pixel_ssd_8x8_altivec ( uint8_t *pix1, int i_stride_pix1, uint8_t *pix2, int i_stride_pix2) { - DECLARE_ALIGNED( int, sum, 16 ); + DECLARE_ALIGNED_16( int sum ); int y; LOAD_ZERO; @@ -1801,7 +1801,7 @@ static void ssim_4x4x2_core_altivec( const uint8_t *pix1, int stride1, const uint8_t *pix2, int stride2, int sums[2][4] ) { - DECLARE_ALIGNED( int, temp[4], 16 ); + DECLARE_ALIGNED_16( int temp[4] ); int y; vec_u8_t pix1v, pix2v; diff --git a/common/x86/predict-c.c b/common/x86/predict-c.c index 1c40cfc4..3982a0d9 100644 --- a/common/x86/predict-c.c +++ b/common/x86/predict-c.c @@ -455,7 +455,7 @@ void x264_intra_sa8d_x3_8x8_##cpu( uint8_t *fenc, uint8_t edge[33], int res[3] ) PREDICT_8x8_LOAD_TOP\ PREDICT_8x8_LOAD_LEFT\ int t;\ - DECLARE_ALIGNED( int16_t, sa8d_1d[2][8], 16 );\ + DECLARE_ALIGNED_16( int16_t sa8d_1d[2][8] );\ SUMSUB(l0,l4,l1,l5,l2,l6,l3,l7);\ SUMSUB(l0,l2,l1,l3,l4,l6,l5,l7);\ SUMSUB(l0,l1,l2,l3,l4,l5,l6,l7);\ diff --git a/encoder/analyse.c b/encoder/analyse.c index 13152a52..bde51e82 100644 --- a/encoder/analyse.c +++ b/encoder/analyse.c @@ -45,7 +45,7 @@ typedef struct /* 8x8 */ int i_cost8x8; /* [ref][0] is 16x16 mv, [ref][1..4] are 8x8 mv from partition [0..3] */ - DECLARE_ALIGNED( int, mvc[32][5][2], 8 ); + DECLARE_ALIGNED_8( int mvc[32][5][2] ); x264_me_t me8x8[4]; /* Sub 4x4 */ @@ -586,7 +586,7 @@ static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_satd_ /* 8x8 prediction selection */ if( flags & X264_ANALYSE_I8x8 ) { - DECLARE_ALIGNED( uint8_t, edge[33], 16 ); + DECLARE_ALIGNED_16( uint8_t edge[33] ); x264_pixel_cmp_t sa8d = (*h->pixf.mbcmp == *h->pixf.sad) ? h->pixf.sad[PIXEL_8x8] : h->pixf.sa8d[PIXEL_8x8]; int i_satd_thresh = a->b_mbrd ? COST_MAX : X264_MIN( i_satd_inter, a->i_satd_i16x16 ); int i_cost = 0; @@ -857,7 +857,7 @@ static void x264_intra_rd_refine( x264_t *h, x264_mb_analysis_t *a ) } else if( h->mb.i_type == I_8x8 ) { - DECLARE_ALIGNED( uint8_t, edge[33], 16 ); + DECLARE_ALIGNED_16( uint8_t edge[33] ); for( idx = 0; idx < 4; idx++ ) { uint64_t pels_h = 0; @@ -1166,7 +1166,7 @@ static void x264_mb_analyse_inter_p16x8( x264_t *h, x264_mb_analysis_t *a ) { x264_me_t m; uint8_t **p_fenc = h->mb.pic.p_fenc; - DECLARE_ALIGNED( int, mvc[3][2], 8 ); + DECLARE_ALIGNED_8( int mvc[3][2] ); int i, j; /* XXX Needed for x264_mb_predict_mv */ @@ -1216,7 +1216,7 @@ static void x264_mb_analyse_inter_p8x16( x264_t *h, x264_mb_analysis_t *a ) { x264_me_t m; uint8_t **p_fenc = h->mb.pic.p_fenc; - DECLARE_ALIGNED( int, mvc[3][2], 8 ); + DECLARE_ALIGNED_8( int mvc[3][2] ); int i, j; /* XXX Needed for x264_mb_predict_mv */ @@ -1263,7 +1263,7 @@ static void x264_mb_analyse_inter_p8x16( x264_t *h, x264_mb_analysis_t *a ) static int x264_mb_analyse_inter_p4x4_chroma( x264_t *h, x264_mb_analysis_t *a, uint8_t **p_fref, int i8x8, int pixel ) { - DECLARE_ALIGNED( uint8_t, pix1[16*8], 8 ); + DECLARE_ALIGNED_8( uint8_t pix1[16*8] ); uint8_t *pix2 = pix1+8; const int i_stride = h->mb.pic.i_stride[1]; const int or = 4*(i8x8&1) + 2*(i8x8&2)*i_stride; @@ -1443,8 +1443,8 @@ static void x264_mb_analyse_inter_direct( x264_t *h, x264_mb_analysis_t *a ) static void x264_mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a ) { - DECLARE_ALIGNED( uint8_t, pix1[16*16], 16 ); - DECLARE_ALIGNED( uint8_t, pix2[16*16], 16 ); + DECLARE_ALIGNED_16( uint8_t pix1[16*16] ); + DECLARE_ALIGNED_16( uint8_t pix2[16*16] ); uint8_t *src2; int stride2 = 16; int weight; @@ -1655,7 +1655,7 @@ static void x264_mb_analyse_inter_b8x8( x264_t *h, x264_mb_analysis_t *a ) uint8_t **p_fref[2] = { h->mb.pic.p_fref[0][a->l0.i_ref], h->mb.pic.p_fref[1][a->l1.i_ref] }; - DECLARE_ALIGNED( uint8_t, pix[2][8*8], 8 ); + DECLARE_ALIGNED_8( uint8_t pix[2][8*8] ); int i, l; /* XXX Needed for x264_mb_predict_mv */ @@ -1719,8 +1719,8 @@ static void x264_mb_analyse_inter_b16x8( x264_t *h, x264_mb_analysis_t *a ) uint8_t **p_fref[2] = { h->mb.pic.p_fref[0][a->l0.i_ref], h->mb.pic.p_fref[1][a->l1.i_ref] }; - DECLARE_ALIGNED( uint8_t, pix[2][16*8], 16 ); - DECLARE_ALIGNED( int, mvc[2][2], 8 ); + DECLARE_ALIGNED_16( uint8_t pix[2][16*8] ); + DECLARE_ALIGNED_8( int mvc[2][2] ); int i, l; h->mb.i_partition = D_16x8; @@ -1788,8 +1788,8 @@ static void x264_mb_analyse_inter_b8x16( x264_t *h, x264_mb_analysis_t *a ) uint8_t **p_fref[2] = { h->mb.pic.p_fref[0][a->l0.i_ref], h->mb.pic.p_fref[1][a->l1.i_ref] }; - DECLARE_ALIGNED( uint8_t, pix[2][8*16], 8 ); - DECLARE_ALIGNED( int, mvc[2][2], 8 ); + DECLARE_ALIGNED_8( uint8_t pix[2][8*16] ); + DECLARE_ALIGNED_8( int mvc[2][2] ); int i, l; h->mb.i_partition = D_8x16; diff --git a/encoder/macroblock.c b/encoder/macroblock.c index eb221b7a..37422bda 100644 --- a/encoder/macroblock.c +++ b/encoder/macroblock.c @@ -85,7 +85,7 @@ void x264_mb_encode_i4x4( x264_t *h, int idx, int i_qscale ) int y = 4 * block_idx_y[idx]; uint8_t *p_src = &h->mb.pic.p_fenc[0][x+y*FENC_STRIDE]; uint8_t *p_dst = &h->mb.pic.p_fdec[0][x+y*FDEC_STRIDE]; - DECLARE_ALIGNED( int16_t, dct4x4[4][4], 16 ); + DECLARE_ALIGNED_16( int16_t dct4x4[4][4] ); if( h->mb.b_lossless ) { @@ -113,7 +113,7 @@ void x264_mb_encode_i8x8( x264_t *h, int idx, int i_qscale ) int y = 8 * (idx>>1); uint8_t *p_src = &h->mb.pic.p_fenc[0][x+y*FENC_STRIDE]; uint8_t *p_dst = &h->mb.pic.p_fdec[0][x+y*FDEC_STRIDE]; - DECLARE_ALIGNED( int16_t, dct8x8[8][8], 16 ); + DECLARE_ALIGNED_16( int16_t dct8x8[8][8] ); h->dctf.sub8x8_dct8( dct8x8, p_src, p_dst ); @@ -132,7 +132,7 @@ static void x264_mb_encode_i16x16( x264_t *h, int i_qscale ) uint8_t *p_src = h->mb.pic.p_fenc[0]; uint8_t *p_dst = h->mb.pic.p_fdec[0]; - DECLARE_ALIGNED( int16_t, dct4x4[16+1][4][4], 16 ); + DECLARE_ALIGNED_16( int16_t dct4x4[16+1][4][4] ); int i; @@ -195,8 +195,8 @@ void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qscale ) uint8_t *p_dst = h->mb.pic.p_fdec[1+ch]; int i_decimate_score = 0; - DECLARE_ALIGNED( int16_t, dct2x2[2][2] , 16 ); - DECLARE_ALIGNED( int16_t, dct4x4[4][4][4], 16 ); + DECLARE_ALIGNED_16( int16_t dct2x2[2][2] ); + DECLARE_ALIGNED_16( int16_t dct4x4[4][4][4] ); if( h->mb.b_lossless ) { @@ -366,7 +366,7 @@ void x264_macroblock_encode( x264_t *h ) } else if( h->mb.i_type == I_8x8 ) { - DECLARE_ALIGNED( uint8_t, edge[33], 16 ); + DECLARE_ALIGNED_16( uint8_t edge[33] ); h->mb.b_transform_8x8 = 1; /* If we already encoded 3 of the 4 i8x8 blocks, we don't have to do them again. */ if( h->mb.i_skip_intra ) @@ -431,7 +431,7 @@ void x264_macroblock_encode( x264_t *h ) } else if( h->mb.b_transform_8x8 ) { - DECLARE_ALIGNED( int16_t, dct8x8[4][8][8], 16 ); + DECLARE_ALIGNED_16( int16_t dct8x8[4][8][8] ); int nnz8x8[4] = {1,1,1,1}; b_decimate &= !h->mb.b_trellis; // 8x8 trellis is inherently optimal decimation h->dctf.sub16x16_dct8( dct8x8, h->mb.pic.p_fenc[0], h->mb.pic.p_fdec[0] ); @@ -476,7 +476,7 @@ void x264_macroblock_encode( x264_t *h ) } else { - DECLARE_ALIGNED( int16_t, dct4x4[16][4][4], 16 ); + DECLARE_ALIGNED_16( int16_t dct4x4[16][4][4] ); int nnz8x8[4] = {1,1,1,1}; h->dctf.sub16x16_dct( dct4x4, h->mb.pic.p_fenc[0], h->mb.pic.p_fdec[0] ); @@ -616,9 +616,9 @@ void x264_macroblock_encode( x264_t *h ) *****************************************************************************/ int x264_macroblock_probe_skip( x264_t *h, const int b_bidir ) { - DECLARE_ALIGNED( int16_t, dct4x4[16][4][4], 16 ); - DECLARE_ALIGNED( int16_t, dct2x2[2][2], 16 ); - DECLARE_ALIGNED( int16_t, dctscan[16], 16 ); + DECLARE_ALIGNED_16( int16_t dct4x4[16][4][4] ); + DECLARE_ALIGNED_16( int16_t dct2x2[2][2] ); + DECLARE_ALIGNED_16( int16_t dctscan[16] ); int i_qp = h->mb.i_qp; int mvp[2]; @@ -786,7 +786,7 @@ void x264_macroblock_encode_p8x8( x264_t *h, int i8 ) if( h->mb.b_transform_8x8 ) { - DECLARE_ALIGNED( int16_t, dct8x8[8][8], 16 ); + DECLARE_ALIGNED_16( int16_t dct8x8[8][8] ); h->dctf.sub8x8_dct8( dct8x8, p_fenc, p_fdec ); h->quantf.quant_8x8( dct8x8, h->quant8_mf[CQM_8PY][i_qp], h->quant8_bias[CQM_8PY][i_qp] ); h->zigzagf.scan_8x8( h->dct.luma8x8[i8], dct8x8 ); @@ -805,7 +805,7 @@ void x264_macroblock_encode_p8x8( x264_t *h, int i8 ) else { int i4; - DECLARE_ALIGNED( int16_t, dct4x4[4][4][4], 16 ); + DECLARE_ALIGNED_16( int16_t dct4x4[4][4][4] ); h->dctf.sub8x8_dct( dct4x4, p_fenc, p_fdec ); h->quantf.quant_4x4( dct4x4[0], h->quant4_mf[CQM_4PY][i_qp], h->quant4_bias[CQM_4PY][i_qp] ); h->quantf.quant_4x4( dct4x4[1], h->quant4_mf[CQM_4PY][i_qp], h->quant4_bias[CQM_4PY][i_qp] ); @@ -836,7 +836,7 @@ void x264_macroblock_encode_p8x8( x264_t *h, int i8 ) for( ch = 0; ch < 2; ch++ ) { - DECLARE_ALIGNED( int16_t, dct4x4[4][4], 16 ); + DECLARE_ALIGNED_16( int16_t dct4x4[4][4] ); p_fenc = h->mb.pic.p_fenc[1+ch] + (i8&1)*4 + (i8>>1)*4*FENC_STRIDE; p_fdec = h->mb.pic.p_fdec[1+ch] + (i8&1)*4 + (i8>>1)*4*FDEC_STRIDE; diff --git a/encoder/me.c b/encoder/me.c index 32d60448..cd5a9fcc 100644 --- a/encoder/me.c +++ b/encoder/me.c @@ -161,7 +161,7 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int (*mvc)[2], int i_mvc, int int bpred_mx = 0, bpred_my = 0, bpred_cost = COST_MAX; int omx, omy, pmx, pmy; uint8_t *p_fref = m->p_fref[0]; - DECLARE_ALIGNED( uint8_t, pix[16*16], 16 ); + DECLARE_ALIGNED_16( uint8_t pix[16*16] ); int i, j; int dir; @@ -462,8 +462,8 @@ me_hex2: * because sum(abs(diff)) >= abs(diff(sum)). */ const int stride = m->i_stride[0]; uint16_t *sums_base = m->integral; - DECLARE_ALIGNED( static uint8_t, zero[16*16], 16 ) = {0,}; - DECLARE_ALIGNED( int, enc_dc[4], 16 ); + DECLARE_ALIGNED_16( static uint8_t zero[16*16] ); + DECLARE_ALIGNED_16( int enc_dc[4] ); int sad_size = i_pixel <= PIXEL_8x8 ? PIXEL_8x8 : PIXEL_4x4; int delta = x264_pixel_size[sad_size].w; int16_t xs_buf[64]; @@ -671,7 +671,7 @@ static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_ite const int i_pixel = m->i_pixel; const int b_chroma_me = h->mb.b_chroma_me && i_pixel <= PIXEL_8x8; - DECLARE_ALIGNED( uint8_t, pix[2][32*18], 16 ); // really 17x17, but round up for alignment + DECLARE_ALIGNED_16( uint8_t pix[2][32*18] ); // really 17x17, but round up for alignment int omx, omy; int i; @@ -822,9 +822,9 @@ int x264_me_refine_bidir( x264_t *h, x264_me_t *m0, x264_me_t *m1, int i_weight const int16_t *p_cost_m0y = m0->p_cost_mv - x264_clip3( m0->mvp[1], h->mb.mv_min_spel[0], h->mb.mv_max_spel[0] ); const int16_t *p_cost_m1x = m1->p_cost_mv - x264_clip3( m1->mvp[0], h->mb.mv_min_spel[0], h->mb.mv_max_spel[0] ); const int16_t *p_cost_m1y = m1->p_cost_mv - x264_clip3( m1->mvp[1], h->mb.mv_min_spel[0], h->mb.mv_max_spel[0] ); - DECLARE_ALIGNED( uint8_t, pix0[9][16*16], 16 ); - DECLARE_ALIGNED( uint8_t, pix1[9][16*16], 16 ); - DECLARE_ALIGNED( uint8_t, pix[16*16], 16 ); + DECLARE_ALIGNED_16( uint8_t pix0[9][16*16] ); + DECLARE_ALIGNED_16( uint8_t pix1[9][16*16] ); + DECLARE_ALIGNED_16( uint8_t pix[16*16] ); int bm0x = m0->mv[0], om0x = bm0x; int bm0y = m0->mv[1], om0y = bm0y; int bm1x = m1->mv[0], om1x = bm1x; @@ -912,7 +912,7 @@ void x264_me_refine_qpel_rd( x264_t *h, x264_me_t *m, int i_lambda2, int i8 ) const int bh = x264_pixel_size[m->i_pixel].h>>2; const int i_pixel = m->i_pixel; - DECLARE_ALIGNED( uint8_t, pix[16*16], 16 ); + DECLARE_ALIGNED_16( uint8_t pix[16*16] ); int bcost = m->i_pixel == PIXEL_16x16 ? m->cost : COST_MAX; int bmx = m->mv[0]; int bmy = m->mv[1]; diff --git a/encoder/me.h b/encoder/me.h index a1cc0afb..e9760930 100644 --- a/encoder/me.h +++ b/encoder/me.h @@ -44,7 +44,7 @@ typedef struct /* output */ int cost_mv; /* lambda * nbits for the chosen mv */ int cost; /* satd + lambda * nbits */ - DECLARE_ALIGNED( int, mv[2], 8 ); + DECLARE_ALIGNED_8( int mv[2] ); } x264_me_t; void x264_me_search_ref( x264_t *h, x264_me_t *m, int (*mvc)[2], int i_mvc, int *p_fullpel_thresh ); diff --git a/encoder/slicetype.c b/encoder/slicetype.c index b0e6359a..722aa223 100644 --- a/encoder/slicetype.c +++ b/encoder/slicetype.c @@ -54,7 +54,7 @@ int x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a, const int i_stride = fenc->i_stride_lowres; const int i_pel_offset = 8 * ( i_mb_x + i_mb_y * i_stride ); - DECLARE_ALIGNED( uint8_t, pix1[9*FDEC_STRIDE], 8 ); + DECLARE_ALIGNED_8( uint8_t pix1[9*FDEC_STRIDE] ); uint8_t *pix2 = pix1+8; x264_me_t m[2]; int i_bcost = COST_MAX; @@ -218,7 +218,7 @@ lowres_intra_mb: if( i_icost < i_bcost * 2 ) { - DECLARE_ALIGNED( uint8_t, edge[33], 16 ); + DECLARE_ALIGNED_16( uint8_t edge[33] ); x264_predict_8x8_filter( pix, edge, ALL_NEIGHBORS, ALL_NEIGHBORS ); for( i=3; i<9; i++ ) { diff --git a/gtk/test.c b/gtk/test.c index a4c5609f..81b7e64c 100644 --- a/gtk/test.c +++ b/gtk/test.c @@ -3,8 +3,6 @@ #include -#define DECLARE_ALIGNED( type, var, n ) type var __attribute__((aligned(n))) - #include "../x264.h" #include "../common/common.h" diff --git a/tools/checkasm.c b/tools/checkasm.c index 9e71e612..3686ef98 100644 --- a/tools/checkasm.c +++ b/tools/checkasm.c @@ -34,7 +34,7 @@ static int check_pixel( int cpu_ref, int cpu_new ) x264_predict_t predict_8x8c[4+3]; x264_predict_t predict_4x4[9+3]; x264_predict8x8_t predict_8x8[9+3]; - DECLARE_ALIGNED( uint8_t, edge[33], 16 ); + DECLARE_ALIGNED_16( uint8_t edge[33] ); uint16_t cost_mv[32]; int ret = 0, ok, used_asm; int i, j; @@ -160,8 +160,8 @@ static int check_pixel( int cpu_ref, int cpu_new ) for( i=0; i<100 && ok; i++ ) if( pixel_asm.ads[i&3] != pixel_ref.ads[i&3] ) { - DECLARE_ALIGNED( uint16_t, sums[72], 16 ); - DECLARE_ALIGNED( int, dc[4], 16 ); + DECLARE_ALIGNED_16( uint16_t sums[72] ); + DECLARE_ALIGNED_16( int dc[4] ); int16_t mvs_a[32], mvs_c[32]; int mvn_a, mvn_c; int thresh = rand() & 0x3fff; @@ -196,10 +196,10 @@ static int check_dct( int cpu_ref, int cpu_new ) x264_dct_function_t dct_asm; x264_quant_function_t qf; int ret = 0, ok, used_asm, i; - int16_t dct1[16][4][4] __attribute__((aligned(16))); - int16_t dct2[16][4][4] __attribute__((aligned(16))); - int16_t dct4[16][4][4] __attribute__((aligned(16))); - int16_t dct8[4][8][8] __attribute__((aligned(16))); + DECLARE_ALIGNED_16( int16_t dct1[16][4][4] ); + DECLARE_ALIGNED_16( int16_t dct2[16][4][4] ); + DECLARE_ALIGNED_16( int16_t dct4[16][4][4] ); + DECLARE_ALIGNED_16( int16_t dct8[4][8][8] ); x264_t h_buf; x264_t *h = &h_buf; @@ -288,8 +288,8 @@ static int check_dct( int cpu_ref, int cpu_new ) ok = 1; used_asm = 0; if( dct_asm.dct4x4dc != dct_ref.dct4x4dc ) { - int16_t dct1[4][4] __attribute((aligned(16))) = { {-12, 42, 23, 67},{2, 90, 89,56}, {67,43,-76,91},{56,-78,-54,1}}; - int16_t dct2[4][4] __attribute((aligned(16))) = { {-12, 42, 23, 67},{2, 90, 89,56}, {67,43,-76,91},{56,-78,-54,1}}; + DECLARE_ALIGNED_16( int16_t dct1[4][4] ) = {{-12, 42, 23, 67},{2, 90, 89,56},{67,43,-76,91},{56,-78,-54,1}}; + DECLARE_ALIGNED_16( int16_t dct2[4][4] ) = {{-12, 42, 23, 67},{2, 90, 89,56},{67,43,-76,91},{56,-78,-54,1}}; used_asm = 1; call_c( dct_c.dct4x4dc, dct1 ); call_a( dct_asm.dct4x4dc, dct2 ); @@ -301,8 +301,8 @@ static int check_dct( int cpu_ref, int cpu_new ) } if( dct_asm.idct4x4dc != dct_ref.idct4x4dc ) { - int16_t dct1[4][4] __attribute((aligned(16))) = { {-12, 42, 23, 67},{2, 90, 89,56}, {67,43,-76,91},{56,-78,-54,1}}; - int16_t dct2[4][4] __attribute((aligned(16))) = { {-12, 42, 23, 67},{2, 90, 89,56}, {67,43,-76,91},{56,-78,-54,1}}; + DECLARE_ALIGNED_16( int16_t dct1[4][4] ) = {{-12, 42, 23, 67},{2, 90, 89,56},{67,43,-76,91},{56,-78,-54,1}}; + DECLARE_ALIGNED_16( int16_t dct2[4][4] ) = {{-12, 42, 23, 67},{2, 90, 89,56},{67,43,-76,91},{56,-78,-54,1}}; used_asm = 1; call_c( dct_c.idct4x4dc, dct1 ); call_a( dct_asm.idct4x4dc, dct2 ); @@ -317,8 +317,8 @@ static int check_dct( int cpu_ref, int cpu_new ) ok = 1; used_asm = 0; if( dct_asm.dct2x2dc != dct_ref.dct2x2dc ) { - int16_t dct1[2][2] __attribute((aligned(16))) = { {-12, 42},{2, 90}}; - int16_t dct2[2][2] __attribute((aligned(16))) = { {-12, 42},{2, 90}}; + DECLARE_ALIGNED_16( int16_t dct1[2][2] ) = {{-12, 42},{2, 90}}; + DECLARE_ALIGNED_16( int16_t dct2[2][2] ) = {{-12, 42},{2, 90}}; used_asm = 1; call_c( dct_c.dct2x2dc, dct1 ); call_a( dct_asm.dct2x2dc, dct2 ); @@ -330,8 +330,8 @@ static int check_dct( int cpu_ref, int cpu_new ) } if( dct_asm.idct2x2dc != dct_ref.idct2x2dc ) { - int16_t dct1[2][2] __attribute((aligned(16))) = { {-12, 42},{2, 90}}; - int16_t dct2[2][2] __attribute((aligned(16))) = { {-12, 42},{2, 90}}; + DECLARE_ALIGNED_16( int16_t dct1[2][2] ) = {{-12, 42},{2, 90}}; + DECLARE_ALIGNED_16( int16_t dct2[2][2] ) = {{-12, 42},{2, 90}}; used_asm = 1; call_c( dct_c.idct2x2dc, dct1 ); call_a( dct_asm.idct2x2dc, dct2 ); @@ -347,8 +347,8 @@ static int check_dct( int cpu_ref, int cpu_new ) x264_zigzag_function_t zigzag_ref; x264_zigzag_function_t zigzag_asm; - int16_t level1[64] __attribute__((aligned(16))); - int16_t level2[64] __attribute__((aligned(16))); + DECLARE_ALIGNED_16( int16_t level1[64] ); + DECLARE_ALIGNED_16( int16_t level2[64] ); #define TEST_ZIGZAG_SCAN( name, t1, t2, dct, size ) \ if( zigzag_asm.name != zigzag_ref.name ) \ @@ -627,9 +627,9 @@ static int check_quant( int cpu_ref, int cpu_new ) x264_quant_function_t qf_c; x264_quant_function_t qf_ref; x264_quant_function_t qf_a; - int16_t dct1[64] __attribute__((__aligned__(16))); - int16_t dct2[64] __attribute__((__aligned__(16))); - uint8_t cqm_buf[64] __attribute__((__aligned__(16))); + DECLARE_ALIGNED_16( int16_t dct1[64] ); + DECLARE_ALIGNED_16( int16_t dct2[64] ); + DECLARE_ALIGNED_16( uint8_t cqm_buf[64] ); int ret = 0, ok, used_asm; int oks[2] = {1,1}, used_asms[2] = {0,0}; int i, i_cqm, qp; @@ -782,7 +782,7 @@ static int check_intra( int cpu_ref, int cpu_new ) { int ret = 0, ok = 1, used_asm = 0; int i; - DECLARE_ALIGNED( uint8_t, edge[33], 16 ); + DECLARE_ALIGNED_16( uint8_t edge[33] ); struct { x264_predict_t predict_16x16[4+3];