Compilers are good at optimizing multiplication by shift.
{
uint32_t v = val + (1<<exp_bits);
int k = 31 - x264_clz( v );
- uint32_t x = (bypass_lut[k-exp_bits]<<exp_bits) + v;
+ uint32_t x = ((uint32_t)bypass_lut[k-exp_bits]<<exp_bits) + v;
k = 2*k+1-exp_bits;
int i = ((k-1)&7)+1;
do {
tc++;
}
- delta = x264_clip3( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
+ delta = x264_clip3( (((q0 - p0 ) * 4) + (p1 - q1) + 4) >> 3, -tc, tc );
pix[-1*xstride] = x264_clip_pixel( p0 + delta ); /* p0' */
pix[ 0*xstride] = x264_clip_pixel( q0 - delta ); /* q0' */
}
if( abs( p0 - q0 ) < alpha && abs( p1 - p0 ) < beta && abs( q1 - q0 ) < beta )
{
- int delta = x264_clip3( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
+ int delta = x264_clip3( (((q0 - p0 ) * 4) + (p1 - q1) + 4) >> 3, -tc, tc );
pix[-1*xstride] = x264_clip_pixel( p0 + delta ); /* p0' */
pix[ 0*xstride] = x264_clip_pixel( q0 - delta ); /* q0' */
}
if( !M32(bS) || !alpha || !beta )
return;
- tc[0] = (tc0_table(index_a)[bS[0]] << (BIT_DEPTH-8)) + b_chroma;
- tc[1] = (tc0_table(index_a)[bS[1]] << (BIT_DEPTH-8)) + b_chroma;
- tc[2] = (tc0_table(index_a)[bS[2]] << (BIT_DEPTH-8)) + b_chroma;
- tc[3] = (tc0_table(index_a)[bS[3]] << (BIT_DEPTH-8)) + b_chroma;
+ tc[0] = (tc0_table(index_a)[bS[0]] * (1 << (BIT_DEPTH-8))) + b_chroma;
+ tc[1] = (tc0_table(index_a)[bS[1]] * (1 << (BIT_DEPTH-8))) + b_chroma;
+ tc[2] = (tc0_table(index_a)[bS[2]] * (1 << (BIT_DEPTH-8))) + b_chroma;
+ tc[3] = (tc0_table(index_a)[bS[3]] * (1 << (BIT_DEPTH-8))) + b_chroma;
pf_inter( pix, i_stride, alpha, beta, tc );
}
if( h->mb.cache.varref[l][index] >= 0 )\
{\
h->mb.cache.varref[l][index] >>= 1;\
- h->mb.cache.varmv[l][index][1] <<= 1;\
+ h->mb.cache.varmv[l][index][1] *= 2;\
h->mb.cache.mvd[l][index][1] <<= 1;\
}
MAP_MVS
static ALWAYS_INLINE uint32_t pack16to32_mask( int a, int b )
{
#if WORDS_BIGENDIAN
- return (b&0xFFFF) + (a<<16);
+ return (b&0xFFFF) + ((uint32_t)a<<16);
#else
- return (a&0xFFFF) + (b<<16);
+ return (a&0xFFFF) + ((uint32_t)b<<16);
#endif
}
static ALWAYS_INLINE uint64_t pack32to64( uint32_t a, uint32_t b )
static void mc_weight( pixel *dst, intptr_t i_dst_stride, pixel *src, intptr_t i_src_stride,
const x264_weight_t *weight, int i_width, int i_height )
{
- int offset = weight->i_offset << (BIT_DEPTH-8);
+ int offset = weight->i_offset * (1 << (BIT_DEPTH-8));
int scale = weight->i_scale;
int denom = weight->i_denom;
if( denom >= 1 )
int i_part_8x8 = i_mb_8x8 + x8 + (ypart>>1) * h->mb.i_b8_stride;
int i_ref1_ref = h->fref[1][0]->ref[0][i_part_8x8];
- int i_ref = (map_col_to_list0(i_ref1_ref>>preshift) << postshift) + (offset&i_ref1_ref&MB_INTERLACED);
+ int i_ref = (map_col_to_list0(i_ref1_ref>>preshift) * (1 << postshift)) + (offset&i_ref1_ref&MB_INTERLACED);
if( i_ref >= 0 )
{
int dist_scale_factor = h->mb.dist_scale_factor[i_ref][0];
int16_t *mv_col = h->fref[1][0]->mv[0][i_mb_4x4 + 3*x8 + ypart * h->mb.i_b4_stride];
- int16_t mv_y = (mv_col[1]<<yshift)/2;
+ int16_t mv_y = (mv_col[1] * (1 << yshift)) / 2;
int l0x = ( dist_scale_factor * mv_col[0] + 128 ) >> 8;
int l0y = ( dist_scale_factor * mv_y + 128 ) >> 8;
if( h->param.i_threads > 1 && (l0y > h->mb.mv_max_spel[1] || l0y-mv_y > h->mb.mv_max_spel[1]) )
int shift = 1 + MB_INTERLACED - h->mb.field[xy]; \
int16_t *mvp = h->mb.mvr[i_list][i_ref<<1>>shift][xy]; \
mvc[i][0] = mvp[0]; \
- mvc[i][1] = mvp[1]<<1>>shift; \
+ mvc[i][1] = mvp[1]*2>>shift; \
i++; \
}
}
#define DEQUANT_SHL( x ) \
- dct[x] = ( dct[x] * dequant_mf[i_mf][x] ) << i_qbits
+ dct[x] = ( dct[x] * dequant_mf[i_mf][x] ) * (1 << i_qbits)
#define DEQUANT_SHR( x ) \
dct[x] = ( dct[x] * dequant_mf[i_mf][x] + f ) >> (-i_qbits)
else
w->weightfn = h->mc.offsetadd;
for( int i = 0; i < 8; i++ )
- w->cachea[i] = abs(w->i_offset<<(BIT_DEPTH-8));
+ w->cachea[i] = abs(w->i_offset * (1 << (BIT_DEPTH-8)));
return;
}
w->weightfn = h->mc.weight;
int den1 = 1<<w->i_denom;
int den2 = w->i_scale<<1;
- int den3 = 1+(w->i_offset<<(BIT_DEPTH-8+1));
+ int den3 = 1+(w->i_offset * (1 << (BIT_DEPTH-8+1)));
for( int i = 0; i < 8; i++ )
{
w->cachea[i] = den1;
return;
}
w->weightfn = h->mc.weight;
- den1 = w->i_offset << w->i_denom | (w->i_denom ? 1 << (w->i_denom - 1) : 0);
+ den1 = (w->i_offset * (1<<w->i_denom)) | (w->i_denom ? 1 << (w->i_denom - 1) : 0);
for( i = 0; i < 8; i++ )
{
w->cachea[i] = w->i_scale;
sh->i_disable_deblocking_filter_idc = param->b_sliced_threads ? 2 : 0;
else
sh->i_disable_deblocking_filter_idc = 1;
- sh->i_alpha_c0_offset = param->i_deblocking_filter_alphac0 << 1;
- sh->i_beta_offset = param->i_deblocking_filter_beta << 1;
+ sh->i_alpha_c0_offset = param->i_deblocking_filter_alphac0 * 2;
+ sh->i_beta_offset = param->i_deblocking_filter_beta * 2;
}
static void slice_header_write( bs_t *s, x264_slice_header_t *sh, int i_nal_ref_idc )
if( M32( mvp ) )
h->mc.mc_chroma( h->mb.pic.p_fdec[1], h->mb.pic.p_fdec[2], FDEC_STRIDE,
h->mb.pic.p_fref[0][0][4], h->mb.pic.i_stride[1],
- mvp[0], mvp[1]<<chroma422, 8, chroma422?16:8 );
+ mvp[0], mvp[1] * (1<<chroma422), 8, chroma422?16:8 );
else
h->mc.load_deinterleave_chroma_fdec( h->mb.pic.p_fdec[1], h->mb.pic.p_fref[0][0][4],
h->mb.pic.i_stride[1], chroma422?16:8 );
static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_iters, int *p_halfpel_thresh, int b_refine_qpel );
#define BITS_MVD( mx, my )\
- (p_cost_mvx[(mx)<<2] + p_cost_mvy[(my)<<2])
+ (p_cost_mvx[(mx)*4] + p_cost_mvy[(my)*4])
#define COST_MV( mx, my )\
do\
p_fref_w + (m1x) + (m1y)*stride,\
p_fref_w + (m2x) + (m2y)*stride,\
stride, costs );\
- costs[0] += p_cost_mvx[(m0x)<<2]; /* no cost_mvy */\
- costs[1] += p_cost_mvx[(m1x)<<2];\
- costs[2] += p_cost_mvx[(m2x)<<2];\
+ costs[0] += p_cost_mvx[(m0x)*4]; /* no cost_mvy */\
+ costs[1] += p_cost_mvx[(m1x)*4];\
+ costs[2] += p_cost_mvx[(m2x)*4];\
COPY3_IF_LT( bcost, costs[0], bmx, m0x, bmy, m0y );\
COPY3_IF_LT( bcost, costs[1], bmx, m1x, bmy, m1y );\
COPY3_IF_LT( bcost, costs[2], bmx, m2x, bmy, m2y );\
}
#define FPEL(mv) (((mv)+2)>>2) /* Convert subpel MV to fullpel with rounding... */
-#define SPEL(mv) ((mv)<<2) /* ... and the reverse. */
+#define SPEL(mv) ((mv)*4) /* ... and the reverse. */
#define SPELx2(mv) (SPEL(mv)&0xFFFCFFFC) /* for two packed MVs */
void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc, int *p_halfpel_thresh )
int mv_x_max = h->mb.mv_limit_fpel[1][0];
int mv_y_max = h->mb.mv_limit_fpel[1][1];
/* Special version of pack to allow shortcuts in CHECK_MVRANGE */
-#define pack16to32_mask2(mx,my) ((mx<<16)|(my&0x7FFF))
+#define pack16to32_mask2(mx,my) (((uint32_t)(mx)<<16)|((uint32_t)(my)&0x7FFF))
uint32_t mv_min = pack16to32_mask2( -mv_x_min, -mv_y_min );
uint32_t mv_max = pack16to32_mask2( mv_x_max, mv_y_max )|0x8000;
uint32_t pmv, bpred_mv = 0;
COPY1_IF_LT( bcost, (costs[3]<<4)+12 );
if( !(bcost&15) )
break;
- bmx -= (bcost<<28)>>30;
- bmy -= (bcost<<30)>>30;
+ bmx -= (int32_t)((uint32_t)bcost<<28)>>30;
+ bmy -= (int32_t)((uint32_t)bcost<<30)>>30;
bcost &= ~15;
} while( --i && CHECK_MVRANGE(bmx, bmy) );
bcost >>= 4;
if( dir )
{
bmx = omx + i*(dir>>4);
- bmy = omy + i*((dir<<28)>>28);
+ bmy = omy + i*((int32_t)((uint32_t)dir<<28)>>28);
}
}
} while( ++i <= i_me_range>>2 );
for( int my = min_y; my <= max_y; my++ )
{
int i;
- int ycost = p_cost_mvy[my<<2];
+ int ycost = p_cost_mvy[my*4];
if( bsad <= ycost )
continue;
bsad -= ycost;
for( int my = min_y; my <= max_y; my++ )
{
int i;
- int ycost = p_cost_mvy[my<<2];
+ int ycost = p_cost_mvy[my*4];
if( bcost <= ycost )
continue;
bcost -= ycost;
uint32_t bmv_spel = SPELx2(bmv);
if( h->mb.i_subpel_refine < 3 )
{
- m->cost_mv = p_cost_mvx[bmx<<2] + p_cost_mvy[bmy<<2];
+ m->cost_mv = p_cost_mvx[bmx*4] + p_cost_mvy[bmy*4];
m->cost = bcost;
/* compute the real cost */
if( bmv == pmv ) m->cost += m->cost_mv;
COPY1_IF_LT( bcost, (costs[3]<<6)+48 );
if( !(bcost&63) )
break;
- bmx -= (bcost<<26)>>29;
- bmy -= (bcost<<29)>>29;
+ bmx -= (int32_t)((uint32_t)bcost<<26)>>29;
+ bmy -= (int32_t)((uint32_t)bcost<<29)>>29;
bcost &= ~63;
}
bcost >>= 6;
COPY1_IF_LT( bcost, (costs[1]<<4)+3 );
COPY1_IF_LT( bcost, (costs[2]<<4)+4 );
COPY1_IF_LT( bcost, (costs[3]<<4)+12 );
- bmx -= (bcost<<28)>>30;
- bmy -= (bcost<<30)>>30;
+ bmx -= (int32_t)((uint32_t)bcost<<28)>>30;
+ bmy -= (int32_t)((uint32_t)bcost<<30)>>30;
bcost >>= 4;
}