Place it immediately after "static".
// included at the end because it needs x264_t
#include "macroblock.h"
-static int ALWAYS_INLINE x264_predictor_roundclip( int16_t (*dst)[2], int16_t (*mvc)[2], int i_mvc, int16_t mv_limit[2][2], uint32_t pmv )
+static ALWAYS_INLINE int x264_predictor_roundclip( int16_t (*dst)[2], int16_t (*mvc)[2], int i_mvc, int16_t mv_limit[2][2], uint32_t pmv )
{
int cnt = 0;
for( int i = 0; i < i_mvc; i++ )
return cnt;
}
-static int ALWAYS_INLINE x264_predictor_clip( int16_t (*dst)[2], int16_t (*mvc)[2], int i_mvc, int16_t mv_limit[2][2], uint32_t pmv )
+static ALWAYS_INLINE int x264_predictor_clip( int16_t (*dst)[2], int16_t (*mvc)[2], int i_mvc, int16_t mv_limit[2][2], uint32_t pmv )
{
int cnt = 0;
int qpel_limit[4] = {mv_limit[0][0] << 2, mv_limit[0][1] << 2, mv_limit[1][0] << 2, mv_limit[1][1] << 2};
add8x8_idct8( &dst[8*FDEC_STRIDE+8], dct[3] );
}
-static void inline add4x4_idct_dc( pixel *p_dst, dctcoef dc )
+static inline void add4x4_idct_dc( pixel *p_dst, dctcoef dc )
{
dc = (dc + 32) >> 6;
for( int i = 0; i < 4; i++, p_dst += FDEC_STRIDE )
return 0;
}
-static void ALWAYS_INLINE pixel_memset( pixel *dst, pixel *src, int len, int size )
+static ALWAYS_INLINE void pixel_memset( pixel *dst, pixel *src, int len, int size )
{
uint8_t *dstp = (uint8_t*)dst;
uint32_t v1 = *src;
}
}
-static void ALWAYS_INLINE plane_expand_border( pixel *pix, int i_stride, int i_width, int i_height, int i_padh, int i_padv, int b_pad_top, int b_pad_bottom, int b_chroma )
+static ALWAYS_INLINE void plane_expand_border( pixel *pix, int i_stride, int i_width, int i_height, int i_padh, int i_padv, int b_pad_top, int b_pad_bottom, int b_chroma )
{
#define PPIXEL(x, y) ( pix + (x) + (y)*i_stride )
for( int y = 0; y < i_height; y++ )
dst[i*FDEC_STRIDE] = src[i*FDEC_STRIDE];
}
-static void ALWAYS_INLINE macroblock_load_pic_pointers( x264_t *h, int mb_x, int mb_y, int i, int b_chroma, int b_mbaff )
+static ALWAYS_INLINE void macroblock_load_pic_pointers( x264_t *h, int mb_x, int mb_y, int i, int b_chroma, int b_mbaff )
{
int mb_interlaced = b_mbaff && MB_INTERLACED;
int height = b_chroma ? 16 >> CHROMA_V_SHIFT : 16;
{{ 4, 5, 6, 3}, { 3, 7, 11, 15}, {16+1, 16+5, 32+1, 32+5}, {0, 1, 2, 3}, {0, 0, 1, 1}}
};
-static void ALWAYS_INLINE macroblock_cache_load_neighbours( x264_t *h, int mb_x, int mb_y, int b_interlaced )
+static ALWAYS_INLINE void macroblock_cache_load_neighbours( x264_t *h, int mb_x, int mb_y, int b_interlaced )
{
const int mb_interlaced = b_interlaced && MB_INTERLACED;
int top_y = mb_y - (1 << mb_interlaced);
# define LBOT 0
#endif
-static void ALWAYS_INLINE macroblock_cache_load( x264_t *h, int mb_x, int mb_y, int b_mbaff )
+static ALWAYS_INLINE void macroblock_cache_load( x264_t *h, int mb_x, int mb_y, int b_mbaff )
{
macroblock_cache_load_neighbours( h, mb_x, mb_y, b_mbaff );
macroblock_deblock_strength_mbaff( h, bs );
}
-static void ALWAYS_INLINE macroblock_store_pic( x264_t *h, int mb_x, int mb_y, int i, int b_chroma, int b_mbaff )
+static ALWAYS_INLINE void macroblock_store_pic( x264_t *h, int mb_x, int mb_y, int i, int b_chroma, int b_mbaff )
{
int height = b_chroma ? 16>>CHROMA_V_SHIFT : 16;
int i_stride = h->fdec->i_stride[i];
h->mc.copy[PIXEL_16x16]( &h->fdec->plane[i][i_pix_offset], i_stride2, h->mb.pic.p_fdec[i], FDEC_STRIDE, 16 );
}
-static void ALWAYS_INLINE macroblock_backup_intra( x264_t *h, int mb_x, int mb_y, int b_mbaff )
+static ALWAYS_INLINE void macroblock_backup_intra( x264_t *h, int mb_x, int mb_y, int b_mbaff )
{
/* In MBAFF we store the last two rows in intra_border_backup[0] and [1].
* For progressive mbs this is the bottom two rows, and for interlaced the
#endif
/* For values with 4 bits or less. */
-static int ALWAYS_INLINE x264_ctz_4bit( uint32_t x )
+static ALWAYS_INLINE int x264_ctz_4bit( uint32_t x )
{
static uint8_t lut[16] = {4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0};
return lut[x];
#define x264_clz(x) __builtin_clz(x)
#define x264_ctz(x) __builtin_ctz(x)
#else
-static int ALWAYS_INLINE x264_clz( uint32_t x )
+static ALWAYS_INLINE int x264_clz( uint32_t x )
{
static uint8_t lut[16] = {4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0};
int y, z = (((x >> 16) - 1) >> 27) & 16;
return z + lut[x];
}
-static int ALWAYS_INLINE x264_ctz( uint32_t x )
+static ALWAYS_INLINE int x264_ctz( uint32_t x )
{
static uint8_t lut[16] = {4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0};
int y, z = (((x & 0xffff) - 1) >> 27) & 16;
* chroma: for the complete mb: if score < 7 -> null
*/
-static int ALWAYS_INLINE decimate_score_internal( dctcoef *dct, int i_max )
+static ALWAYS_INLINE int decimate_score_internal( dctcoef *dct, int i_max )
{
const uint8_t *ds_table = (i_max == 64) ? x264_decimate_table8 : x264_decimate_table4;
int i_score = 0;
}
#define x264_predictor_clip x264_predictor_clip_mmx2
-static int ALWAYS_INLINE x264_predictor_clip_mmx2( int16_t (*dst)[2], int16_t (*mvc)[2], int i_mvc, int16_t mv_limit[2][2], uint32_t pmv )
+static ALWAYS_INLINE int x264_predictor_clip_mmx2( int16_t (*dst)[2], int16_t (*mvc)[2], int i_mvc, int16_t mv_limit[2][2], uint32_t pmv )
{
static const uint32_t pd_32 = 0x20;
intptr_t tmp = (intptr_t)mv_limit, mvc_max = i_mvc, i = 0;
/* Same as the above, except we do (mv + 2) >> 2 on the input. */
#define x264_predictor_roundclip x264_predictor_roundclip_mmx2
-static int ALWAYS_INLINE x264_predictor_roundclip_mmx2( int16_t (*dst)[2], int16_t (*mvc)[2], int i_mvc, int16_t mv_limit[2][2], uint32_t pmv )
+static ALWAYS_INLINE int x264_predictor_roundclip_mmx2( int16_t (*dst)[2], int16_t (*mvc)[2], int i_mvc, int16_t mv_limit[2][2], uint32_t pmv )
{
static const uint64_t pw_2 = 0x0002000200020002ULL;
static const uint32_t pd_32 = 0x20;
}
/* For trellis=2, we need to do this for both sizes of DCT, for trellis=1 we only need to use it on the chosen mode. */
-static void inline psy_trellis_init( x264_t *h, int do_both_dct )
+static inline void psy_trellis_init( x264_t *h, int do_both_dct )
{
if( do_both_dct || h->mb.b_transform_8x8 )
h->dctf.sub16x16_dct8( h->mb.pic.fenc_dct8, h->mb.pic.p_fenc[0], (pixel*)x264_zero );
cabac_mb_header_i( h, cb, i_mb_type, SLICE_TYPE_B, chroma );
}
-static int ALWAYS_INLINE cabac_cbf_ctxidxinc( x264_t *h, int i_cat, int i_idx, int b_intra, int b_dc )
+static ALWAYS_INLINE int cabac_cbf_ctxidxinc( x264_t *h, int i_cat, int i_idx, int b_intra, int b_dc )
{
static const uint16_t base_ctx[14] = {85,89,93,97,101,1012,460,464,468,1016,472,476,480,1020};
cabac_block_residual_internal( h, cb, ctx_block_cat, l, 0 );
}
-static void ALWAYS_INLINE cabac_block_residual( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l )
+static ALWAYS_INLINE void cabac_block_residual( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l )
{
#if ARCH_X86_64 && HAVE_MMX && !defined( __MACH__ )
h->bsf.cabac_block_residual_internal( l, MB_INTERLACED, ctx_block_cat, cb );
/* Faster RDO by merging sigmap and level coding. Note that for 8x8dct and chroma 4:2:2 dc this is
* slightly incorrect because the sigmap is not reversible (contexts are repeated). However, there
* is nearly no quality penalty for this (~0.001db) and the speed boost (~30%) is worth it. */
-static void ALWAYS_INLINE cabac_block_residual_internal( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l, int b_8x8, int chroma422dc )
+static ALWAYS_INLINE void cabac_block_residual_internal( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l, int b_8x8, int chroma422dc )
{
const uint8_t *sig_offset = x264_significant_coeff_flag_offset_8x8[MB_INTERLACED];
int ctx_sig = x264_significant_coeff_flag_offset[MB_INTERLACED][ctx_block_cat];
#define x264_iter_kludge x264_template(iter_kludge)
int x264_iter_kludge = 0;
-static void ALWAYS_INLINE me_refine_bidir( x264_t *h, x264_me_t *m0, x264_me_t *m1, int i_weight, int i8, int i_lambda2, int rd )
+static ALWAYS_INLINE void me_refine_bidir( x264_t *h, x264_me_t *m0, x264_me_t *m1, int i_weight, int i8, int i_lambda2, int rd )
{
int x = i8&1;
int y = i8>>1;