}
static ALWAYS_INLINE void x264_macroblock_cache_rect1( void *dst, int width, int height, uint8_t val )
{
- int dy;
if( width == 4 )
{
uint32_t val2 = val * 0x01010101;
- for( dy = 0; dy < height; dy++ )
- ((uint32_t*)dst)[2*dy] = val2;
+ ((uint32_t*)dst)[0] = val2;
+ if( height >= 2 ) ((uint32_t*)dst)[2] = val2;
+ if( height == 4 ) ((uint32_t*)dst)[4] = val2;
+ if( height == 4 ) ((uint32_t*)dst)[6] = val2;
}
else // 2
{
uint32_t val2 = val * 0x0101;
- for( dy = 0; dy < height; dy++ )
- ((uint16_t*)dst)[4*dy] = val2;
+ ((uint16_t*)dst)[ 0] = val2;
+ if( height >= 2 ) ((uint16_t*)dst)[ 4] = val2;
+ if( height == 4 ) ((uint16_t*)dst)[ 8] = val2;
+ if( height == 4 ) ((uint16_t*)dst)[12] = val2;
}
}
static ALWAYS_INLINE void x264_macroblock_cache_rect4( void *dst, int width, int height, uint32_t val )
{
- int dy, dx;
+ int dy;
if( width == 1 || WORD_SIZE < 8 )
{
for( dy = 0; dy < height; dy++ )
- for( dx = 0; dx < width; dx++ )
- ((uint32_t*)dst)[dx+8*dy] = val;
+ {
+ ((uint32_t*)dst)[8*dy+0] = val;
+ if( width >= 2 ) ((uint32_t*)dst)[8*dy+1] = val;
+ if( width == 4 ) ((uint32_t*)dst)[8*dy+2] = val;
+ if( width == 4 ) ((uint32_t*)dst)[8*dy+3] = val;
+ }
}
else
{
uint64_t val64 = val + ((uint64_t)val<<32);
for( dy = 0; dy < height; dy++ )
- for( dx = 0; dx < width/2; dx++ )
- ((uint64_t*)dst)[dx+4*dy] = val64;
+ {
+ ((uint64_t*)dst)[4*dy+0] = val64;
+ if( width == 4 ) ((uint64_t*)dst)[4*dy+1] = val64;
+ }
}
}
#define x264_macroblock_cache_mv_ptr(a,x,y,w,h,l,mv) x264_macroblock_cache_mv(a,x,y,w,h,l,*(uint32_t*)mv)