Less verbose.
X86SRC0 = const-a.asm cabac-a.asm dct-a.asm deblock-a.asm mc-a.asm \
mc-a2.asm pixel-a.asm predict-a.asm quant-a.asm \
cpu-a.asm dct-32.asm bitstream-a.asm
-ifneq ($(findstring X264_HIGH_BIT_DEPTH, $(CONFIG)),)
+ifneq ($(findstring HIGH_BIT_DEPTH, $(CONFIG)),)
X86SRC0 += sad16-a.asm
else
X86SRC0 += sad-a.asm
void x264_hpel_filter_c_neon( uint8_t *, int16_t *, int );
void x264_hpel_filter_h_neon( uint8_t *, uint8_t *, int );
-#if !X264_HIGH_BIT_DEPTH
+#if !HIGH_BIT_DEPTH
static void x264_weight_cache_neon( x264_t *h, x264_weight_t *w )
{
if( w->i_scale == 1<<w->i_denom )
src += stride;
}
}
-#endif // !X264_HIGH_BIT_DEPTH
+#endif // !HIGH_BIT_DEPTH
void x264_mc_init_arm( int cpu, x264_mc_functions_t *pf )
{
if( !(cpu&X264_CPU_ARMV6) )
return;
-#if !X264_HIGH_BIT_DEPTH
+#if !HIGH_BIT_DEPTH
pf->prefetch_fenc = x264_prefetch_fenc_arm;
pf->prefetch_ref = x264_prefetch_ref_arm;
-#endif // !X264_HIGH_BIT_DEPTH
+#endif // !HIGH_BIT_DEPTH
if( !(cpu&X264_CPU_NEON) )
return;
-#if !X264_HIGH_BIT_DEPTH
+#if !HIGH_BIT_DEPTH
pf->copy_16x16_unaligned = x264_mc_copy_w16_neon;
pf->copy[PIXEL_16x16] = x264_mc_copy_w16_aligned_neon;
pf->copy[PIXEL_8x8] = x264_mc_copy_w8_neon;
pf->get_ref = get_ref_neon;
pf->hpel_filter = hpel_filter_neon;
pf->frame_init_lowres_core = x264_frame_init_lowres_core_neon;
-#endif // !X264_HIGH_BIT_DEPTH
+#endif // !HIGH_BIT_DEPTH
// Apple's gcc stupidly cannot align stack variables, and ALIGNED_ARRAY can't work on structs
#ifndef SYS_MACOSX
if (!(cpu&X264_CPU_ARMV6))
return;
-#if !X264_HIGH_BIT_DEPTH
+#if !HIGH_BIT_DEPTH
pf[I_PRED_4x4_H] = x264_predict_4x4_h_armv6;
pf[I_PRED_4x4_DC] = x264_predict_4x4_dc_armv6;
pf[I_PRED_4x4_DDR] = x264_predict_4x4_ddr_armv6;
return;
pf[I_PRED_4x4_DDL] = x264_predict_4x4_ddl_neon;
-#endif // !X264_HIGH_BIT_DEPTH
+#endif // !HIGH_BIT_DEPTH
}
void x264_predict_8x8c_init_arm( int cpu, x264_predict_t pf[7] )
if (!(cpu&X264_CPU_NEON))
return;
-#if !X264_HIGH_BIT_DEPTH
+#if !HIGH_BIT_DEPTH
pf[I_PRED_CHROMA_DC] = x264_predict_8x8c_dc_neon;
pf[I_PRED_CHROMA_DC_TOP] = x264_predict_8x8c_dc_top_neon;
pf[I_PRED_CHROMA_DC_LEFT] = x264_predict_8x8c_dc_left_neon;
pf[I_PRED_CHROMA_H] = x264_predict_8x8c_h_neon;
pf[I_PRED_CHROMA_V] = x264_predict_8x8c_v_neon;
pf[I_PRED_CHROMA_P] = x264_predict_8x8c_p_neon;
-#endif // !X264_HIGH_BIT_DEPTH
+#endif // !HIGH_BIT_DEPTH
}
void x264_predict_8x8_init_arm( int cpu, x264_predict8x8_t pf[12], x264_predict_8x8_filter_t *predict_filter )
if (!(cpu&X264_CPU_NEON))
return;
-#if !X264_HIGH_BIT_DEPTH
+#if !HIGH_BIT_DEPTH
pf[I_PRED_8x8_DC] = x264_predict_8x8_dc_neon;
pf[I_PRED_8x8_H] = x264_predict_8x8_h_neon;
-#endif // !X264_HIGH_BIT_DEPTH
+#endif // !HIGH_BIT_DEPTH
}
void x264_predict_16x16_init_arm( int cpu, x264_predict_t pf[7] )
if (!(cpu&X264_CPU_NEON))
return;
-#if !X264_HIGH_BIT_DEPTH
+#if !HIGH_BIT_DEPTH
pf[I_PRED_16x16_DC ] = x264_predict_16x16_dc_neon;
pf[I_PRED_16x16_DC_TOP] = x264_predict_16x16_dc_top_neon;
pf[I_PRED_16x16_DC_LEFT]= x264_predict_16x16_dc_left_neon;
pf[I_PRED_16x16_H ] = x264_predict_16x16_h_neon;
pf[I_PRED_16x16_V ] = x264_predict_16x16_v_neon;
pf[I_PRED_16x16_P ] = x264_predict_16x16_p_neon;
-#endif // !X264_HIGH_BIT_DEPTH
+#endif // !HIGH_BIT_DEPTH
}
#define CP64(dst,src) M64(dst) = M64(src)
#define CP128(dst,src) M128(dst) = M128(src)
-#if X264_HIGH_BIT_DEPTH
+#if HIGH_BIT_DEPTH
typedef uint16_t pixel;
typedef uint64_t pixel4;
typedef int32_t dctcoef;
dctf->dct4x4dc = dct4x4dc;
dctf->idct4x4dc = idct4x4dc;
-#if X264_HIGH_BIT_DEPTH
+#if HIGH_BIT_DEPTH
#if HAVE_MMX
if( cpu&X264_CPU_MMX )
{
dctf->sub16x16_dct = x264_sub16x16_dct_mmx;
}
#endif // HAVE_MMX
-#else // !X264_HIGH_BIT_DEPTH
+#else // !HIGH_BIT_DEPTH
#if HAVE_MMX
if( cpu&X264_CPU_MMX )
{
dctf->add16x16_idct8= x264_add16x16_idct8_neon;
}
#endif
-#endif // X264_HIGH_BIT_DEPTH
+#endif // HIGH_BIT_DEPTH
}
void x264_dct_init_weights( void )
pf->sub_8x8 = zigzag_sub_8x8_field;
pf->sub_4x4 = zigzag_sub_4x4_field;
pf->sub_4x4ac = zigzag_sub_4x4ac_field;
-#if !X264_HIGH_BIT_DEPTH
+#if !HIGH_BIT_DEPTH
#if HAVE_MMX
if( cpu&X264_CPU_MMXEXT )
{
if( cpu&X264_CPU_ALTIVEC )
pf->scan_4x4 = x264_zigzag_scan_4x4_field_altivec;
#endif
-#endif // !X264_HIGH_BIT_DEPTH
+#endif // !HIGH_BIT_DEPTH
}
else
{
pf->sub_8x8 = zigzag_sub_8x8_frame;
pf->sub_4x4 = zigzag_sub_4x4_frame;
pf->sub_4x4ac = zigzag_sub_4x4ac_frame;
-#if !X264_HIGH_BIT_DEPTH
+#if !HIGH_BIT_DEPTH
#if HAVE_MMX
if( cpu&X264_CPU_MMX )
pf->scan_4x4 = x264_zigzag_scan_4x4_frame_mmx;
if( cpu&X264_CPU_NEON )
pf->scan_4x4 = x264_zigzag_scan_4x4_frame_neon;
#endif
-#endif // !X264_HIGH_BIT_DEPTH
+#endif // !HIGH_BIT_DEPTH
}
pf->interleave_8x8_cavlc = zigzag_interleave_8x8_cavlc;
-#if !X264_HIGH_BIT_DEPTH
+#if !HIGH_BIT_DEPTH
#if HAVE_MMX
if( cpu&X264_CPU_MMX )
pf->interleave_8x8_cavlc = x264_zigzag_interleave_8x8_cavlc_mmx;
if( cpu&X264_CPU_SHUFFLE_IS_FAST )
pf->interleave_8x8_cavlc = x264_zigzag_interleave_8x8_cavlc_sse2;
#endif
-#endif // !X264_HIGH_BIT_DEPTH
+#endif // !HIGH_BIT_DEPTH
}
void x264_deblock_v_chroma_intra_mmxext( pixel *pix, int stride, int alpha, int beta );
void x264_deblock_h_chroma_intra_mmxext( pixel *pix, int stride, int alpha, int beta );
-#if X264_HIGH_BIT_DEPTH
+#if HIGH_BIT_DEPTH
void x264_deblock_v_luma_mmxext( pixel *pix, int stride, int alpha, int beta, int8_t *tc0 );
void x264_deblock_v_luma_intra_mmxext( pixel *pix, int stride, int alpha, int beta );
#else
x264_deblock_v8_luma_intra_mmxext( pix, stride, alpha, beta );
x264_deblock_v8_luma_intra_mmxext( pix+8, stride, alpha, beta );
}
-#endif // X264_HIGH_BIT_DEPTH
+#endif // HIGH_BIT_DEPTH
#endif
#endif
}
#endif
-#if !X264_HIGH_BIT_DEPTH
+#if !HIGH_BIT_DEPTH
#if HAVE_ALTIVEC
if( cpu&X264_CPU_ALTIVEC )
{
// pf->deblock_chroma[0] = x264_deblock_h_chroma_neon;
}
#endif
-#endif // !X264_HIGH_BIT_DEPTH
+#endif // !HIGH_BIT_DEPTH
}
return -1;
}
-#if X264_HIGH_BIT_DEPTH
+#if HIGH_BIT_DEPTH
if( !(src->img.i_csp & X264_CSP_HIGH_DEPTH) )
{
x264_log( h, X264_LOG_ERROR, "This build of x264 requires high depth input. Rebuild to support 8-bit input.\n" );
#endif
}
-#if X264_HIGH_BIT_DEPTH
+#if HIGH_BIT_DEPTH
# define pack_pixel_1to2 pack16to32
# define pack_pixel_2to4 pack32to64
#else
SAD_X( 4x8 )
SAD_X( 4x4 )
-#if !X264_HIGH_BIT_DEPTH
+#if !HIGH_BIT_DEPTH
#if ARCH_UltraSparc
SAD_X( 16x16_vis )
SAD_X( 16x8_vis )
SAD_X( 8x16_vis )
SAD_X( 8x8_vis )
#endif
-#endif // !X264_HIGH_BIT_DEPTH
+#endif // !HIGH_BIT_DEPTH
/****************************************************************************
* pixel_satd_x4
SATD_X_DECL7()
#if HAVE_MMX
SATD_X_DECL7( _mmxext )
-#if !X264_HIGH_BIT_DEPTH
+#if !HIGH_BIT_DEPTH
SATD_X_DECL6( _sse2 )
SATD_X_DECL7( _ssse3 )
SATD_X_DECL7( _sse4 )
-#endif // !X264_HIGH_BIT_DEPTH
+#endif // !HIGH_BIT_DEPTH
#endif
-#if !X264_HIGH_BIT_DEPTH
+#if !HIGH_BIT_DEPTH
#if HAVE_ARMV6
SATD_X_DECL7( _neon )
#endif
-#endif // !X264_HIGH_BIT_DEPTH
+#endif // !HIGH_BIT_DEPTH
#define INTRA_MBCMP_8x8( mbcmp, cpu )\
void x264_intra_##mbcmp##_x3_8x8##cpu( pixel *fenc, pixel edge[33], int res[3] )\
INTRA_MBCMP_8x8( sad, )
INTRA_MBCMP_8x8(sa8d, )
-#if X264_HIGH_BIT_DEPTH && HAVE_MMX
+#if HIGH_BIT_DEPTH && HAVE_MMX
INTRA_MBCMP_8x8( sad, _mmxext)
INTRA_MBCMP_8x8( sad, _sse2 )
INTRA_MBCMP_8x8( sad, _ssse3 )
INTRA_MBCMP( sad, 16, v, h, dc, , )
INTRA_MBCMP(satd, 16, v, h, dc, , )
-#if X264_HIGH_BIT_DEPTH && HAVE_MMX
+#if HIGH_BIT_DEPTH && HAVE_MMX
INTRA_MBCMP( sad, 4, v, h, dc, , _mmxext)
INTRA_MBCMP(satd, 4, v, h, dc, , _mmxext)
INTRA_MBCMP( sad, 8, dc, h, v, c, _mmxext)
pixf->intra_sad_x3_16x16 = x264_intra_sad_x3_16x16;
pixf->intra_satd_x3_16x16 = x264_intra_satd_x3_16x16;
-#if X264_HIGH_BIT_DEPTH
+#if HIGH_BIT_DEPTH
#if HAVE_MMX
if( cpu&X264_CPU_MMXEXT )
{
pixf->intra_sad_x3_16x16 = x264_intra_sad_x3_16x16_ssse3;
}
#endif // HAVE_MMX
-#else // !X264_HIGH_BIT_DEPTH
+#else // !HIGH_BIT_DEPTH
#if HAVE_MMX
if( cpu&X264_CPU_MMX )
{
}
}
#endif
-#endif // X264_HIGH_BIT_DEPTH
+#endif // HIGH_BIT_DEPTH
#if HAVE_ALTIVEC
if( cpu&X264_CPU_ALTIVEC )
{
x264_pixel_altivec_init( pixf );
}
#endif
-#if !X264_HIGH_BIT_DEPTH
+#if !HIGH_BIT_DEPTH
#if ARCH_UltraSparc
INIT4( sad, _vis );
INIT4( sad_x3, _vis );
INIT4( sad_x4, _vis );
#endif
-#endif // !X264_HIGH_BIT_DEPTH
+#endif // !HIGH_BIT_DEPTH
pixf->ads[PIXEL_8x16] =
pixf->ads[PIXEL_8x4] =
#include "common/common.h"
#include "ppccommon.h"
-#if !X264_HIGH_BIT_DEPTH
+#if !HIGH_BIT_DEPTH
#define VEC_DCT(a0,a1,a2,a3,b0,b1,b2,b3) \
b1 = vec_add( a0, a3 ); \
b3 = vec_add( a1, a2 ); \
vec_st( tmp0v, 0x00, level );
vec_st( tmp1v, 0x10, level );
}
-#endif // !X264_HIGH_BIT_DEPTH
+#endif // !HIGH_BIT_DEPTH
#include "common/common.h"
#include "ppccommon.h"
-#if !X264_HIGH_BIT_DEPTH
+#if !HIGH_BIT_DEPTH
#define transpose4x16(r0, r1, r2, r3) \
{ \
register vec_u8_t r4; \
transpose4x16(line1, line2, line3, line4);
write16x4(pix-2, stride, line1, line2, line3, line4);
}
-#endif // !X264_HIGH_BIT_DEPTH
+#endif // !HIGH_BIT_DEPTH
#include "mc.h"
#include "ppccommon.h"
-#if !X264_HIGH_BIT_DEPTH
+#if !HIGH_BIT_DEPTH
typedef void (*pf_mc_t)( uint8_t *src, int i_src,
uint8_t *dst, int i_dst, int i_height );
dstc += dst_stride;
}
}
-#endif // !X264_HIGH_BIT_DEPTH
+#endif // !HIGH_BIT_DEPTH
void x264_mc_altivec_init( x264_mc_functions_t *pf )
{
-#if !X264_HIGH_BIT_DEPTH
+#if !HIGH_BIT_DEPTH
pf->mc_luma = mc_luma_altivec;
pf->get_ref = get_ref_altivec;
pf->mc_chroma = mc_chroma_altivec;
pf->hpel_filter = x264_hpel_filter_altivec;
pf->frame_init_lowres_core = frame_init_lowres_core_altivec;
-#endif // !X264_HIGH_BIT_DEPTH
+#endif // !HIGH_BIT_DEPTH
}
#include "common/common.h"
#include "ppccommon.h"
-#if !X264_HIGH_BIT_DEPTH
+#if !HIGH_BIT_DEPTH
/***********************************************************************
* SAD routines
**********************************************************************/
sums[0][3] = temp[0];
sums[1][3] = temp[1];
}
-#endif // !X264_HIGH_BIT_DEPTH
+#endif // !HIGH_BIT_DEPTH
/****************************************************************************
* x264_pixel_init:
****************************************************************************/
void x264_pixel_altivec_init( x264_pixel_function_t *pixf )
{
-#if !X264_HIGH_BIT_DEPTH
+#if !HIGH_BIT_DEPTH
pixf->sad[PIXEL_16x16] = pixel_sad_16x16_altivec;
pixf->sad[PIXEL_8x16] = pixel_sad_8x16_altivec;
pixf->sad[PIXEL_16x8] = pixel_sad_16x8_altivec;
pixf->hadamard_ac[PIXEL_8x8] = x264_pixel_hadamard_ac_8x8_altivec;
pixf->ssim_4x4x2_core = ssim_4x4x2_core_altivec;
-#endif // !X264_HIGH_BIT_DEPTH
+#endif // !HIGH_BIT_DEPTH
}
#include "pixel.h"
#include "ppccommon.h"
-#if !X264_HIGH_BIT_DEPTH
+#if !HIGH_BIT_DEPTH
static void predict_8x8c_p_altivec( uint8_t *src )
{
int H = 0, V = 0;
src += FDEC_STRIDE;
}
}
-#endif // !X264_HIGH_BIT_DEPTH
+#endif // !HIGH_BIT_DEPTH
/****************************************************************************
****************************************************************************/
void x264_predict_16x16_init_altivec( x264_predict_t pf[7] )
{
-#if !X264_HIGH_BIT_DEPTH
+#if !HIGH_BIT_DEPTH
pf[I_PRED_16x16_V ] = predict_16x16_v_altivec;
pf[I_PRED_16x16_H ] = predict_16x16_h_altivec;
pf[I_PRED_16x16_DC] = predict_16x16_dc_altivec;
pf[I_PRED_16x16_DC_LEFT] = predict_16x16_dc_left_altivec;
pf[I_PRED_16x16_DC_TOP ] = predict_16x16_dc_top_altivec;
pf[I_PRED_16x16_DC_128 ] = predict_16x16_dc_128_altivec;
-#endif // !X264_HIGH_BIT_DEPTH
+#endif // !HIGH_BIT_DEPTH
}
void x264_predict_8x8c_init_altivec( x264_predict_t pf[7] )
{
-#if !X264_HIGH_BIT_DEPTH
+#if !HIGH_BIT_DEPTH
pf[I_PRED_CHROMA_P] = predict_8x8c_p_altivec;
-#endif // !X264_HIGH_BIT_DEPTH
+#endif // !HIGH_BIT_DEPTH
}
#include "ppccommon.h"
#include "quant.h"
-#if !X264_HIGH_BIT_DEPTH
+#if !HIGH_BIT_DEPTH
// quant of a whole 4x4 block, unrolled 2x and "pre-scheduled"
#define QUANT_16_U( idx0, idx1 ) \
{ \
DEQUANT_SHR();
}
}
-#endif // !X264_HIGH_BIT_DEPTH
+#endif // !HIGH_BIT_DEPTH
pf->coeff_level_run[ DCT_LUMA_AC] = x264_coeff_level_run15;
pf->coeff_level_run[ DCT_LUMA_4x4] = x264_coeff_level_run16;
-#if X264_HIGH_BIT_DEPTH
+#if HIGH_BIT_DEPTH
#if HAVE_MMX
if( cpu&X264_CPU_MMX )
{
pf->quant_8x8 = x264_quant_8x8_sse4;
}
#endif // HAVE_MMX
-#else // !X264_HIGH_BIT_DEPTH
+#else // !HIGH_BIT_DEPTH
#if HAVE_MMX
if( cpu&X264_CPU_MMX )
{
pf->coeff_last[DCT_LUMA_8x8] = x264_coeff_last64_neon;
}
#endif
-#endif // X264_HIGH_BIT_DEPTH
+#endif // HIGH_BIT_DEPTH
pf->coeff_last[ DCT_LUMA_DC] = pf->coeff_last[DCT_LUMA_4x4];
pf->coeff_last[DCT_CHROMA_AC] = pf->coeff_last[ DCT_LUMA_AC];
pf->coeff_level_run[ DCT_LUMA_DC] = pf->coeff_level_run[DCT_LUMA_4x4];
movq [r0+24], m3
RET
-%ifdef X264_HIGH_BIT_DEPTH
+%ifdef HIGH_BIT_DEPTH
;-----------------------------------------------------------------------------
; void sub4x4_dct( int32_t dct[4][4], uint16_t *pix1, uint16_t *pix2 )
;-----------------------------------------------------------------------------
STORE_DIFF m2, m4, m5, [r0+32], [r0+40]
STORE_DIFF m3, m4, m5, [r0+48], [r0+56]
RET
-%endif ; X264_HIGH_BIT_DEPTH
+%endif ; HIGH_BIT_DEPTH
-%ifndef X264_HIGH_BIT_DEPTH
+%ifndef HIGH_BIT_DEPTH
%macro SUB_DCT4 1
;-----------------------------------------------------------------------------
; void sub4x4_dct( int16_t dct[4][4], uint8_t *pix1, uint8_t *pix2 )
SUB_DCT4 mmx
SUB_DCT4 ssse3
-%endif ; !X264_HIGH_BIT_DEPTH
+%endif ; !HIGH_BIT_DEPTH
-%ifndef X264_HIGH_BIT_DEPTH
+%ifndef HIGH_BIT_DEPTH
;-----------------------------------------------------------------------------
; void add4x4_idct( uint8_t *p_dst, int16_t dct[4][4] )
;-----------------------------------------------------------------------------
movd [r0+FDEC_STRIDE*2], m0
pextrd [r0+FDEC_STRIDE*3], m0, 1
RET
-%endif ; !X264_HIGH_BIT_DEPTH
+%endif ; !HIGH_BIT_DEPTH
INIT_MMX
;-----------------------------------------------------------------------------
;-----------------------------------------------------------------------------
%macro SUB_NxN_DCT 6
cglobal %1, 3,3,11*(mmsize/16)
-%ifndef X264_HIGH_BIT_DEPTH
+%ifndef HIGH_BIT_DEPTH
%if mmsize == 8
pxor m7, m7
%else
add r2, 4*FDEC_STRIDE
mova m7, [hsub_mul]
%endif
-%endif ; !X264_HIGH_BIT_DEPTH
+%endif ; !HIGH_BIT_DEPTH
.skip_prologue:
%ifdef WIN64
sub rsp, 8
%endif
%endmacro
-%ifdef X264_HIGH_BIT_DEPTH
+%ifdef HIGH_BIT_DEPTH
INIT_MMX
SUB_NxN_DCT sub8x8_dct_mmx, sub4x4_dct_mmx.skip_prologue, 64, 8, 0, 0
SUB_NxN_DCT sub16x16_dct_mmx, sub8x8_dct_mmx.skip_prologue, 64, 16, 8, 8
-%else ; !X264_HIGH_BIT_DEPTH
+%else ; !HIGH_BIT_DEPTH
%ifndef ARCH_X86_64
SUB_NxN_DCT sub8x8_dct_mmx, sub4x4_dct_mmx.skip_prologue, 32, 4, 0, 0
ADD_NxN_IDCT add8x8_idct_mmx, add4x4_idct_mmx.skip_prologue, 32, 4, 0, 0
cextern sub8x8_dct8_ssse3.skip_prologue
SUB_NxN_DCT sub16x16_dct8_ssse3, sub8x8_dct8_ssse3.skip_prologue, 128, 8, 0, 0
-%endif ; X264_HIGH_BIT_DEPTH
+%endif ; HIGH_BIT_DEPTH
;-----------------------------------------------------------------------------
cextern pw_00ff
cextern pw_pixel_max
-%ifdef X264_HIGH_BIT_DEPTH
+%ifdef HIGH_BIT_DEPTH
; out: %4 = |%1-%2|-%3
; clobbers: %5
%macro ABS_SUB 5
DEBLOCK_LUMA sse2
DEBLOCK_LUMA_INTRA sse2
%endif
-%endif ; X264_HIGH_BIT_DEPTH
+%endif ; HIGH_BIT_DEPTH
-%ifndef X264_HIGH_BIT_DEPTH
+%ifndef HIGH_BIT_DEPTH
; expands to [base],...,[base+7*stride]
%define PASS8ROWS(base, base3, stride, stride3) \
[base], [base+stride], [base+stride*2], [base3], \
INIT_MMX
DEBLOCK_LUMA_INTRA mmxext, v8
%endif
-%endif ; !X264_HIGH_BIT_DEPTH
+%endif ; !HIGH_BIT_DEPTH
-%ifdef X264_HIGH_BIT_DEPTH
+%ifdef HIGH_BIT_DEPTH
; in: %1=p0, %2=q0, %3=p1, %4=q1, %5=mask, %6=tmp, %7=tmp
; out: %1=p0', %2=q0'
%macro CHROMA_DEBLOCK_P0_Q0_INTRA 7
%endif
INIT_XMM
DEBLOCK_CHROMA sse2
-%endif ; X264_HIGH_BIT_DEPTH
+%endif ; HIGH_BIT_DEPTH
-%ifndef X264_HIGH_BIT_DEPTH
+%ifndef HIGH_BIT_DEPTH
%macro CHROMA_V_START 0
dec r2d ; alpha-1
dec r3d ; beta-1
INIT_MMX
DEBLOCK_CHROMA_INTRA mmxext
%endif
-%endif ; !X264_HIGH_BIT_DEPTH
+%endif ; !HIGH_BIT_DEPTH
; P frame explicit weighted prediction
;=============================================================================
-%ifdef X264_HIGH_BIT_DEPTH
+%ifdef HIGH_BIT_DEPTH
%macro WEIGHT_START 1 ; (width)
movd m2, [r4+32] ; denom
movd m3, [r4+36] ; scale
%endrep
%endmacro
-%else ; !X264_HIGH_BIT_DEPTH
+%else ; !HIGH_BIT_DEPTH
%macro WEIGHT_START 1
mova m3, [r4]
%endrep
%endmacro
-%endif ; X264_HIGH_BIT_DEPTH
+%endif ; HIGH_BIT_DEPTH
;-----------------------------------------------------------------------------
;void mc_weight_wX( uint8_t *dst, int i_dst_stride, uint8_t *src, int i_src_stride, weight_t *weight, int h )
%endif
%assign XMMREGS 7
-%ifdef X264_HIGH_BIT_DEPTH
+%ifdef HIGH_BIT_DEPTH
%assign NUMREGS NUMREGS+1
%assign XMMREGS 8
%endif
WEIGHTER 8, sse2
WEIGHTER 16, sse2
WEIGHTER 20, sse2
-%ifdef X264_HIGH_BIT_DEPTH
+%ifdef HIGH_BIT_DEPTH
WEIGHTER 12, sse2
%else
%define WEIGHT WEIGHT_SSSE3
; pixel avg2
;=============================================================================
-%ifdef X264_HIGH_BIT_DEPTH
+%ifdef HIGH_BIT_DEPTH
;-----------------------------------------------------------------------------
; void pixel_avg2_wN( uint16_t *dst, int dst_stride,
; uint16_t *src1, int src_stride,
lea r0, [r0+r1*2]
jg .height_loop
REP_RET
-%endif ; X264_HIGH_BIT_DEPTH
+%endif ; HIGH_BIT_DEPTH
-%ifndef X264_HIGH_BIT_DEPTH
+%ifndef HIGH_BIT_DEPTH
;-----------------------------------------------------------------------------
; void pixel_avg2_w4( uint8_t *dst, int dst_stride,
; uint8_t *src1, int src_stride,
%assign j j+1
%assign k k+1
%endrep
-%endif ; !X264_HIGH_BIT_DEPTH
+%endif ; !HIGH_BIT_DEPTH
;=============================================================================
; pixel copy
%1 [r0+%3], m3
%endmacro
-%ifdef X264_HIGH_BIT_DEPTH
+%ifdef HIGH_BIT_DEPTH
%macro COPY_ONE 6
COPY4 %1, %2, %3, %4
%endmacro
MC_COPY ONE, 8, movu, sse2, 0
MC_COPY TWO, 16, movu, sse2, 8
MC_COPY TWO, 16, mova, aligned_sse2, 8
-%endif ; X264_HIGH_BIT_DEPTH
+%endif ; HIGH_BIT_DEPTH
-%ifndef X264_HIGH_BIT_DEPTH
+%ifndef HIGH_BIT_DEPTH
INIT_MMX
;-----------------------------------------------------------------------------
; void mc_copy_w4( uint8_t *dst, int i_dst_stride,
; but with SSE3 the overhead is zero, so there's no reason not to include it.
COPY_W16_SSE2 mc_copy_w16_sse3, lddqu
COPY_W16_SSE2 mc_copy_w16_aligned_sse2, movdqa
-%endif ; !X264_HIGH_BIT_DEPTH
+%endif ; !HIGH_BIT_DEPTH
add r3, t0 ; src += (dx>>3) + (dy>>3) * src_stride
%endmacro
-%ifdef X264_HIGH_BIT_DEPTH
+%ifdef HIGH_BIT_DEPTH
%macro UNPACK_UNALIGNED 4
movu %1, [%4+0]
movu %2, [%4+4]
shufps %2, %3, 11011101b
%endif
%endmacro
-%else ; !X264_HIGH_BIT_DEPTH
+%else ; !HIGH_BIT_DEPTH
%macro UNPACK_UNALIGNED_MEM 3
punpcklwd %1, %3
%endmacro
movh %2, %3
punpcklwd %1, %2
%endmacro
-%endif ; X264_HIGH_BIT_DEPTH
+%endif ; HIGH_BIT_DEPTH
;-----------------------------------------------------------------------------
; void mc_chroma( uint8_t *dstu, uint8_t *dstv, int dst_stride,
pshufd m5, m5, 0x55
jg .width8
%endif
-%ifdef X264_HIGH_BIT_DEPTH
+%ifdef HIGH_BIT_DEPTH
add r2, r2
UNPACK_UNALIGNED m0, m1, m2, r3
%else
mova m1, m0
pand m0, [pw_00ff]
psrlw m1, 8
-%endif ; X264_HIGH_BIT_DEPTH
+%endif ; HIGH_BIT_DEPTH
pmaddwd m0, m7
pmaddwd m1, m7
packssdw m0, m1
SWAP m3, m0
ALIGN 4
.loop2:
-%ifdef X264_HIGH_BIT_DEPTH
+%ifdef HIGH_BIT_DEPTH
UNPACK_UNALIGNED m0, m1, m2, r3+r4
pmullw m3, m6
-%else ; !X264_HIGH_BIT_DEPTH
+%else ; !HIGH_BIT_DEPTH
movu m0, [r3+r4]
UNPACK_UNALIGNED m0, m1, [r3+r4+2]
pmullw m3, m6
mova m1, m0
pand m0, [pw_00ff]
psrlw m1, 8
-%endif ; X264_HIGH_BIT_DEPTH
+%endif ; HIGH_BIT_DEPTH
pmaddwd m0, m7
pmaddwd m1, m7
mova m2, [pw_32]
pmullw m0, m5
paddw m0, m2
psrlw m0, 6
-%ifdef X264_HIGH_BIT_DEPTH
+%ifdef HIGH_BIT_DEPTH
movh [r0], m0
%if mmsize == 8
psrlq m0, 32
%else
movhps [r1], m0
%endif
-%else ; !X264_HIGH_BIT_DEPTH
+%else ; !HIGH_BIT_DEPTH
packuswb m0, m0
movd [r0], m0
%if mmsize==8
psrldq m0, 4
%endif
movd [r1], m0
-%endif ; X264_HIGH_BIT_DEPTH
+%endif ; HIGH_BIT_DEPTH
add r3, r4
add r0, r2
add r1, r2
%endif
FIX_STRIDES r2
.loopx:
-%ifdef X264_HIGH_BIT_DEPTH
+%ifdef HIGH_BIT_DEPTH
UNPACK_UNALIGNED m0, m2, m4, r3
UNPACK_UNALIGNED m1, m3, m5, r3+mmsize
%else
add r3, r4
ALIGN 4
.loop4:
-%ifdef X264_HIGH_BIT_DEPTH
+%ifdef HIGH_BIT_DEPTH
UNPACK_UNALIGNED m0, m1, m2, r3
pmaddwd m0, m7
pmaddwd m1, m7
pmaddwd m1, m7
pmaddwd m2, m7
packssdw m1, m2
-%else ; !X264_HIGH_BIT_DEPTH
+%else ; !HIGH_BIT_DEPTH
movu m0, [r3]
movu m1, [r3+mmsize/2]
UNPACK_UNALIGNED m0, m2, [r3+2]
pmaddwd m3, m7
packssdw m0, m2
packssdw m1, m3
-%endif ; X264_HIGH_BIT_DEPTH
+%endif ; HIGH_BIT_DEPTH
pmullw m4, m6
pmullw m5, m6
mova m2, [pw_32]
paddw m1, m3
psrlw m0, 6
psrlw m1, 6
-%ifdef X264_HIGH_BIT_DEPTH
+%ifdef HIGH_BIT_DEPTH
movh [r0], m0
movh [r0+mmsize/2], m1
%if mmsize==8
movhps [r1], m0
movhps [r1+mmsize/2], m1
%endif
-%else ; !X264_HIGH_BIT_DEPTH
+%else ; !HIGH_BIT_DEPTH
packuswb m0, m1
%if mmsize==8
pshufw m1, m0, 0x8
movq [r0], m0
movhps [r1], m0
%endif
-%endif ; X264_HIGH_BIT_DEPTH
+%endif ; HIGH_BIT_DEPTH
add r3, r4
add r0, r2
add r1, r2
movd m5, r5d
mov r6d, 2*SIZEOF_PIXEL
.mc1d:
-%ifdef X264_HIGH_BIT_DEPTH
+%ifdef HIGH_BIT_DEPTH
%if mmsize == 16
WIN64_SPILL_XMM 8
%endif
shr r5d, 1
%endif
.loop1d_w4:
-%ifdef X264_HIGH_BIT_DEPTH
+%ifdef HIGH_BIT_DEPTH
%if mmsize == 8
movq m0, [r3+0]
movq m2, [r3+8]
SBUTTERFLY wd, 0, 2, 6
SBUTTERFLY wd, 1, 3, 7
%endif
-%else ; !X264_HIGH_BIT_DEPTH
+%else ; !HIGH_BIT_DEPTH
movq m0, [r3]
movq m1, [r3+r6]
%if mmsize!=8
pand m1, [pw_00ff]
psrlw m2, 8
psrlw m3, 8
-%endif ; X264_HIGH_BIT_DEPTH
+%endif ; HIGH_BIT_DEPTH
pmullw m0, m4
pmullw m1, m5
pmullw m2, m4
paddw m2, m3
psrlw m0, 3
psrlw m2, 3
-%ifdef X264_HIGH_BIT_DEPTH
+%ifdef HIGH_BIT_DEPTH
%if mmsize == 8
xchg r4, r11
xchg r2, r10
movhps [r0], m0
movhps [r1], m2
%endif
-%else ; !X264_HIGH_BIT_DEPTH
+%else ; !HIGH_BIT_DEPTH
packuswb m0, m2
%if mmsize==8
xchg r4, r11
movd [r0], m0
movd [r1], m1
%endif
-%endif ; X264_HIGH_BIT_DEPTH
+%endif ; HIGH_BIT_DEPTH
add r3, r4
add r0, r2
add r1, r2
REP_RET
%endmacro
-%ifdef X264_HIGH_BIT_DEPTH
+%ifdef HIGH_BIT_DEPTH
INIT_MMX
MC_CHROMA mmxext
INIT_XMM
MC_CHROMA sse2
-%else ; !X264_HIGH_BIT_DEPTH
+%else ; !HIGH_BIT_DEPTH
INIT_MMX
%define UNPACK_UNALIGNED UNPACK_UNALIGNED_MEM
MC_CHROMA mmxext
MC_CHROMA sse2
MC_CHROMA_SSSE3
MC_CHROMA_SSSE3 _cache64
-%endif ; X264_HIGH_BIT_DEPTH
+%endif ; HIGH_BIT_DEPTH
;%define movntps movaps
;%define sfence
-%ifdef X264_HIGH_BIT_DEPTH
+%ifdef HIGH_BIT_DEPTH
;-----------------------------------------------------------------------------
; void hpel_filter_v( uint16_t *dst, uint16_t *src, int16_t *buf, int stride, int width );
;-----------------------------------------------------------------------------
HPEL_FILTER mmxext
INIT_XMM
HPEL_FILTER sse2
-%endif ; X264_HIGH_BIT_DEPTH
+%endif ; HIGH_BIT_DEPTH
-%ifndef X264_HIGH_BIT_DEPTH
+%ifndef HIGH_BIT_DEPTH
INIT_MMX
%macro HPEL_V 1-2 0
%undef movntq
%undef movntps
%undef sfence
-%endif ; !X264_HIGH_BIT_DEPTH
+%endif ; !HIGH_BIT_DEPTH
;-----------------------------------------------------------------------------
; void plane_copy_core( uint8_t *dst, int i_dst,
x264_pixel_avg2_w20_##name5,\
};
-#if X264_HIGH_BIT_DEPTH
+#if HIGH_BIT_DEPTH
/* we can replace w12/w20 with w10/w18 as only 9/17 pixels in fact are important */
#define x264_pixel_avg2_w12_mmxext x264_pixel_avg2_w10_mmxext
#define x264_pixel_avg2_w20_mmxext x264_pixel_avg2_w18_mmxext
#define x264_pixel_avg2_w12_cache64_sse2 x264_pixel_avg2_w16_cache64_sse2
#define x264_pixel_avg2_w12_sse3 x264_pixel_avg2_w16_sse3
#define x264_pixel_avg2_w12_sse2 x264_pixel_avg2_w16_sse2
-#endif // X264_HIGH_BIT_DEPTH
+#endif // HIGH_BIT_DEPTH
PIXEL_AVG_WTAB(mmxext, mmxext, mmxext, mmxext, mmxext, mmxext)
-#if X264_HIGH_BIT_DEPTH
+#if HIGH_BIT_DEPTH
PIXEL_AVG_WTAB(sse2, mmxext, sse2, sse2, sse2, sse2)
-#else // !X264_HIGH_BIT_DEPTH
+#else // !HIGH_BIT_DEPTH
#if ARCH_X86
PIXEL_AVG_WTAB(cache32_mmxext, mmxext, cache32_mmxext, cache32_mmxext, cache32_mmxext, cache32_mmxext)
PIXEL_AVG_WTAB(cache64_mmxext, mmxext, cache64_mmxext, cache64_mmxext, cache64_mmxext, cache64_mmxext)
PIXEL_AVG_WTAB(sse2_misalign, mmxext, mmxext, sse2, sse2, sse2_misalign)
PIXEL_AVG_WTAB(cache64_sse2, mmxext, cache64_mmxext, cache64_sse2, cache64_sse2, cache64_sse2)
PIXEL_AVG_WTAB(cache64_ssse3, mmxext, cache64_mmxext, cache64_ssse3, cache64_ssse3, cache64_sse2)
-#endif // X264_HIGH_BIT_DEPTH
+#endif // HIGH_BIT_DEPTH
#define MC_COPY_WTAB(instr, name1, name2, name3)\
static void (* const x264_mc_copy_wtab_##instr[5])( pixel *, int, pixel *, int, int ) =\
x264_mc_##function##_w20_##instr,\
};
-#if X264_HIGH_BIT_DEPTH
+#if HIGH_BIT_DEPTH
MC_WEIGHT_WTAB(weight,mmxext,mmxext,mmxext,12)
MC_WEIGHT_WTAB(weight,sse2,mmxext,sse2,12)
#else
w->cacheb[i] = w->i_offset;
}
}
-#endif // !X264_HIGH_BIT_DEPTH
+#endif // !HIGH_BIT_DEPTH
static const uint8_t hpel_ref0[16] = {0,1,1,1,0,1,1,1,2,3,3,3,0,1,1,1};
static const uint8_t hpel_ref1[16] = {0,0,0,0,2,2,3,2,2,2,3,2,2,2,3,2};
MC_LUMA(mmxext,mmxext,mmx)
MC_LUMA(sse2,sse2,sse2)
-#if !X264_HIGH_BIT_DEPTH
+#if !HIGH_BIT_DEPTH
#if ARCH_X86
MC_LUMA(cache32_mmxext,cache32_mmxext,mmx)
MC_LUMA(cache64_mmxext,cache64_mmxext,mmx)
#endif
MC_LUMA(cache64_sse2,cache64_sse2,sse2)
MC_LUMA(cache64_ssse3,cache64_ssse3,sse2)
-#endif // !X264_HIGH_BIT_DEPTH
+#endif // !HIGH_BIT_DEPTH
#define GET_REF(name)\
static pixel *get_ref_##name( pixel *dst, int *i_dst_stride,\
GET_REF(mmxext)
GET_REF(sse2)
-#if !X264_HIGH_BIT_DEPTH
+#if !HIGH_BIT_DEPTH
#if ARCH_X86
GET_REF(cache32_mmxext)
GET_REF(cache64_mmxext)
GET_REF(sse2_misalign)
GET_REF(cache64_sse2)
GET_REF(cache64_ssse3)
-#endif // !X264_HIGH_BIT_DEPTH
+#endif // !HIGH_BIT_DEPTH
#define HPEL(align, cpu, cpuv, cpuc, cpuh)\
void x264_hpel_filter_v_##cpuv( pixel *dst, pixel *src, int16_t *buf, int stride, int width);\
}
HPEL(8, mmxext, mmxext, mmxext, mmxext)
-#if X264_HIGH_BIT_DEPTH
+#if HIGH_BIT_DEPTH
HPEL(16, sse2, sse2, sse2, sse2 )
-#else // !X264_HIGH_BIT_DEPTH
+#else // !HIGH_BIT_DEPTH
HPEL(16, sse2_amd, mmxext, mmxext, sse2)
#if ARCH_X86_64
void x264_hpel_filter_sse2( uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8_t *src, int stride, int width, int height, int16_t *buf );
PLANE_INTERLEAVE(mmxext)
PLANE_INTERLEAVE(sse2)
-#endif // X264_HIGH_BIT_DEPTH
+#endif // HIGH_BIT_DEPTH
void x264_mc_init_mmx( int cpu, x264_mc_functions_t *pf )
{
pf->hpel_filter = x264_hpel_filter_mmxext;
pf->weight = x264_mc_weight_wtab_mmxext;
-#if X264_HIGH_BIT_DEPTH
+#if HIGH_BIT_DEPTH
if( !(cpu&X264_CPU_SSE2) )
return;
if( (cpu&X264_CPU_SHUFFLE_IS_FAST) && !(cpu&X264_CPU_SLOW_ATOM) )
pf->integral_init4v = x264_integral_init4v_ssse3;
-#else // !X264_HIGH_BIT_DEPTH
+#else // !HIGH_BIT_DEPTH
pf->offsetadd = x264_mc_offsetadd_wtab_mmxext;
pf->offsetsub = x264_mc_offsetsub_wtab_mmxext;
pf->weight_cache = x264_weight_cache_mmxext;
pf->integral_init4h = x264_integral_init4h_sse4;
pf->integral_init8h = x264_integral_init8h_sse4;
-#endif // X264_HIGH_BIT_DEPTH
+#endif // HIGH_BIT_DEPTH
}
; SSD
;=============================================================================
-%ifdef X264_HIGH_BIT_DEPTH
+%ifdef HIGH_BIT_DEPTH
;-----------------------------------------------------------------------------
; int pixel_ssd_MxN( uint16_t *, int, uint16_t *, int )
;-----------------------------------------------------------------------------
SSD_ONE 8, 16, sse2
SSD_ONE 16, 8, sse2
SSD_ONE 16, 16, sse2
-%endif ; X264_HIGH_BIT_DEPTH
+%endif ; HIGH_BIT_DEPTH
-%ifndef X264_HIGH_BIT_DEPTH
+%ifndef HIGH_BIT_DEPTH
%macro SSD_LOAD_FULL 5
mova m1, [t0+%1]
mova m2, [t2+%2]
SSD 4, 4, ssse3
SSD 4, 8, ssse3
%assign function_align 16
-%endif ; !X264_HIGH_BIT_DEPTH
+%endif ; !HIGH_BIT_DEPTH
;-----------------------------------------------------------------------------
; void pixel_ssd_nv12_core( uint16_t *pixuv1, int stride1, uint16_t *pixuv2, int stride2,
; For 10-bit MMX this means width >= 16416 and for XMM >= 32832. At sane
; distortion levels it will take much more than that though.
;-----------------------------------------------------------------------------
-%ifdef X264_HIGH_BIT_DEPTH
+%ifdef HIGH_BIT_DEPTH
%macro SSD_NV12 1-2 0
cglobal pixel_ssd_nv12_core_%1, 6,7,7*(mmsize/16)
shl r4d, 2
movq [r4], m5
RET
%endmacro ; SSD_NV12
-%endif ; X264_HIGH_BIT_DEPTH
+%endif ; HIGH_BIT_DEPTH
-%ifndef X264_HIGH_BIT_DEPTH
+%ifndef HIGH_BIT_DEPTH
;-----------------------------------------------------------------------------
; void pixel_ssd_nv12_core( uint8_t *pixuv1, int stride1, uint8_t *pixuv2, int stride2,
; int width, int height, uint64_t *ssd_u, uint64_t *ssd_v )
%macro VAR_START 1
pxor m5, m5 ; sum
pxor m6, m6 ; sum squared
-%ifndef X264_HIGH_BIT_DEPTH
+%ifndef HIGH_BIT_DEPTH
%if %1
mova m7, [pw_00ff]
%else
pxor m7, m7 ; zero
%endif
-%endif ; !X264_HIGH_BIT_DEPTH
+%endif ; !HIGH_BIT_DEPTH
%endmacro
%macro VAR_END 2
-%ifdef X264_HIGH_BIT_DEPTH
+%ifdef HIGH_BIT_DEPTH
%if mmsize == 8 && %1*%2 == 256
HADDUW m5, m2
%else
HADDW m5, m2
%endif
-%else ; !X264_HIGH_BIT_DEPTH
+%else ; !HIGH_BIT_DEPTH
HADDW m5, m2
-%endif ; X264_HIGH_BIT_DEPTH
+%endif ; HIGH_BIT_DEPTH
movd eax, m5
HADDD m6, m1
movd edx, m6
%macro VAR_2ROW 2
mov r2d, %2
.loop:
-%ifdef X264_HIGH_BIT_DEPTH
+%ifdef HIGH_BIT_DEPTH
mova m0, [r0]
mova m1, [r0+mmsize]
mova m3, [r0+%1]
mova m4, [r0+%1+mmsize]
-%else ; !X264_HIGH_BIT_DEPTH
+%else ; !HIGH_BIT_DEPTH
mova m0, [r0]
mova m1, m0
mova m3, [r0+%1]
mova m4, m3
punpcklbw m0, m7
punpckhbw m1, m7
-%endif ; X264_HIGH_BIT_DEPTH
+%endif ; HIGH_BIT_DEPTH
%ifidn %1, r1
lea r0, [r0+%1*2]
%else
add r0, r1
%endif
-%ifndef X264_HIGH_BIT_DEPTH
+%ifndef HIGH_BIT_DEPTH
punpcklbw m3, m7
punpckhbw m4, m7
-%endif ; !X264_HIGH_BIT_DEPTH
+%endif ; !HIGH_BIT_DEPTH
dec r2d
VAR_CORE
jg .loop
VAR_END 8, 8
INIT_XMM
-%ifdef X264_HIGH_BIT_DEPTH
+%ifdef HIGH_BIT_DEPTH
cglobal pixel_var_16x16_sse2, 2,3,8
FIX_STRIDES r1
VAR_START 0
mova m4, [r0+r2*2]
VAR_CORE
VAR_END 8, 8
-%endif ; X264_HIGH_BIT_DEPTH
+%endif ; HIGH_BIT_DEPTH
-%ifndef X264_HIGH_BIT_DEPTH
+%ifndef HIGH_BIT_DEPTH
cglobal pixel_var_16x16_sse2, 2,3,8
VAR_START 1
mov r2d, 8
dec r2d
jg .loop
VAR_END 8, 8
-%endif ; !X264_HIGH_BIT_DEPTH
+%endif ; !HIGH_BIT_DEPTH
%macro VAR2_END 0
HADDW m5, m7
VAR_START 0
mov r5d, 8
.loop:
-%ifdef X264_HIGH_BIT_DEPTH
+%ifdef HIGH_BIT_DEPTH
mova m0, [r0]
mova m1, [r0+mmsize]
psubw m0, [r2]
psubw m1, [r2+mmsize]
-%else ; !X264_HIGH_BIT_DEPTH
+%else ; !HIGH_BIT_DEPTH
movq m0, [r0]
movq m1, m0
movq m2, [r2]
punpckhbw m3, m7
psubw m0, m2
psubw m1, m3
-%endif ; X264_HIGH_BIT_DEPTH
+%endif ; HIGH_BIT_DEPTH
paddw m5, m0
paddw m5, m1
pmaddwd m0, m0
VAR_START 1
mov r5d, 4
.loop:
-%ifdef X264_HIGH_BIT_DEPTH
+%ifdef HIGH_BIT_DEPTH
mova m0, [r0]
mova m1, [r0+r1*2]
mova m2, [r2]
mova m3, [r2+r3*2]
-%else ; !X264_HIGH_BIT_DEPTH
+%else ; !HIGH_BIT_DEPTH
movq m1, [r0]
movhps m1, [r0+r1]
movq m3, [r2]
movhps m3, [r2+r3]
DEINTB 0, 1, 2, 3, 7
-%endif ; X264_HIGH_BIT_DEPTH
+%endif ; HIGH_BIT_DEPTH
psubw m0, m2
psubw m1, m3
paddw m5, m0
VAR2_END
RET
-%ifndef X264_HIGH_BIT_DEPTH
+%ifndef HIGH_BIT_DEPTH
cglobal pixel_var2_8x8_ssse3, 5,6,8
pxor m5, m5 ; sum
pxor m6, m6 ; sum squared
jg .loop
VAR2_END
RET
-%endif ; !X264_HIGH_BIT_DEPTH
+%endif ; !HIGH_BIT_DEPTH
;=============================================================================
; SATD
%endmacro
%macro SATD_END_MMX 0
-%ifdef X264_HIGH_BIT_DEPTH
+%ifdef HIGH_BIT_DEPTH
HADDUW m0, m1
movd eax, m0
-%else ; !X264_HIGH_BIT_DEPTH
+%else ; !HIGH_BIT_DEPTH
pshufw m1, m0, 01001110b
paddw m0, m1
pshufw m1, m0, 10110001b
paddw m0, m1
movd eax, m0
and eax, 0xffff
-%endif ; X264_HIGH_BIT_DEPTH
+%endif ; HIGH_BIT_DEPTH
RET
%endmacro
paddw m0, m1
ret
-%ifdef X264_HIGH_BIT_DEPTH
+%ifdef HIGH_BIT_DEPTH
%macro SATD_MxN_MMX 3
cglobal pixel_satd_%1x%2_mmxext, 4,7
SATD_START_MMX
SATD_MxN_MMX 16, 16, 4
SATD_MxN_MMX 16, 8, 4
SATD_MxN_MMX 8, 16, 8
-%endif ; X264_HIGH_BIT_DEPTH
+%endif ; HIGH_BIT_DEPTH
-%ifndef X264_HIGH_BIT_DEPTH
+%ifndef HIGH_BIT_DEPTH
cglobal pixel_satd_16x16_mmxext, 4,6
SATD_START_MMX
pxor m0, m0
lea r2, [r2+4*r3]
call pixel_satd_8x8_internal_mmxext
SATD_END_MMX
-%endif ; !X264_HIGH_BIT_DEPTH
+%endif ; !HIGH_BIT_DEPTH
cglobal pixel_satd_8x8_mmxext, 4,6
SATD_START_MMX
%define lh m0
%define rh [esp+48]
%endif
-%ifdef X264_HIGH_BIT_DEPTH
+%ifdef HIGH_BIT_DEPTH
HADDUW m0, m1
paddd lh, rh
%else
paddusw lh, rh
-%endif ; X264_HIGH_BIT_DEPTH
+%endif ; HIGH_BIT_DEPTH
%endmacro
%macro SA8D 1
-%ifdef X264_HIGH_BIT_DEPTH
+%ifdef HIGH_BIT_DEPTH
%define vertical 1
%elifidn %1, sse2 ; sse2 doesn't seem to like the horizontal way of doing things
%define vertical 1
mova m7, [hmul_8p]
%endif
call pixel_sa8d_8x8_internal_%1
-%ifdef X264_HIGH_BIT_DEPTH
+%ifdef HIGH_BIT_DEPTH
HADDUW m0, m1
%else
HADDW m0, m1
-%endif ; X264_HIGH_BIT_DEPTH
+%endif ; HIGH_BIT_DEPTH
movd eax, m0
add eax, 1
shr eax, 1
call pixel_sa8d_8x8_internal_%1 ; pix[0]
add r2, 8*SIZEOF_PIXEL
add r0, 8*SIZEOF_PIXEL
-%ifdef X264_HIGH_BIT_DEPTH
+%ifdef HIGH_BIT_DEPTH
HADDUW m0, m1
%endif
mova m10, m0
call pixel_sa8d_8x8_internal_%1 ; pix[8*stride]
SA8D_INTER
SWAP m0, m10
-%ifndef X264_HIGH_BIT_DEPTH
+%ifndef HIGH_BIT_DEPTH
HADDUW m0, m1
%endif
movd eax, m0
lea r4, [3*r1]
lea r5, [3*r3]
call pixel_sa8d_8x8_internal_%1
-%ifdef X264_HIGH_BIT_DEPTH
+%ifdef HIGH_BIT_DEPTH
HADDUW m0, m1
%else
HADDW m0, m1
-%endif ; X264_HIGH_BIT_DEPTH
+%endif ; HIGH_BIT_DEPTH
movd eax, m0
add eax, 1
shr eax, 1
lea r0, [r0+4*r1]
lea r2, [r2+4*r3]
%endif
-%ifdef X264_HIGH_BIT_DEPTH
+%ifdef HIGH_BIT_DEPTH
HADDUW m0, m1
%endif
mova [esp+48], m0
%endif
mova [esp+64-mmsize], m0
call pixel_sa8d_8x8_internal_%1
-%ifdef X264_HIGH_BIT_DEPTH
+%ifdef HIGH_BIT_DEPTH
SA8D_INTER
-%else ; !X264_HIGH_BIT_DEPTH
+%else ; !HIGH_BIT_DEPTH
paddusw m0, [esp+64-mmsize]
%if mmsize == 16
HADDUW m0, m1
paddd m0, m2
HADDD m0, m1
%endif
-%endif ; X264_HIGH_BIT_DEPTH
+%endif ; HIGH_BIT_DEPTH
movd eax, m0
add eax, 1
shr eax, 1
; in: r0=pix, r1=stride, r2=stride*3, r3=tmp, m6=mask_ac4, m7=0
; out: [tmp]=hadamard4, m0=satd
cglobal hadamard_ac_4x4_mmxext
-%ifdef X264_HIGH_BIT_DEPTH
+%ifdef HIGH_BIT_DEPTH
mova m0, [r0]
mova m1, [r0+r1]
mova m2, [r0+r1*2]
mova m3, [r0+r2]
-%else ; !X264_HIGH_BIT_DEPTH
+%else ; !HIGH_BIT_DEPTH
movh m0, [r0]
movh m1, [r0+r1]
movh m2, [r0+r1*2]
punpcklbw m1, m7
punpcklbw m2, m7
punpcklbw m3, m7
-%endif ; X264_HIGH_BIT_DEPTH
+%endif ; HIGH_BIT_DEPTH
HADAMARD4_2D 0, 1, 2, 3, 4
mova [r3], m0
mova [r3+8], m1
ABS4 m0, m2, m1, m3, m4, m5
HADAMARD 0, max, 0, 2, 4, 5
HADAMARD 0, max, 1, 3, 4, 5
-%ifdef X264_HIGH_BIT_DEPTH
+%ifdef HIGH_BIT_DEPTH
pmaddwd m0, m7
pmaddwd m1, m7
paddd m6, m0
paddd m6, m1
-%else ; !X264_HIGH_BIT_DEPTH
+%else ; !HIGH_BIT_DEPTH
paddw m7, m0
paddw m7, m1
-%endif ; X264_HIGH_BIT_DEPTH
+%endif ; HIGH_BIT_DEPTH
SAVE_MM_PERMUTATION hadamard_ac_2x2max_mmxext
ret
%macro AC_PREP 2
-%ifdef X264_HIGH_BIT_DEPTH
+%ifdef HIGH_BIT_DEPTH
pmaddwd %1, %2
%endif
%endmacro
%macro AC_PADD 3
-%ifdef X264_HIGH_BIT_DEPTH
+%ifdef HIGH_BIT_DEPTH
AC_PREP %2, %3
paddd %1, %2
%else
paddw %1, %2
-%endif ; X264_HIGH_BIT_DEPTH
+%endif ; HIGH_BIT_DEPTH
%endmacro
cglobal hadamard_ac_8x8_mmxext
mova m6, [mask_ac4]
-%ifdef X264_HIGH_BIT_DEPTH
+%ifdef HIGH_BIT_DEPTH
mova m7, [pw_1]
%else
pxor m7, m7
-%endif ; X264_HIGH_BIT_DEPTH
+%endif ; HIGH_BIT_DEPTH
call hadamard_ac_4x4_mmxext
add r0, 4*SIZEOF_PIXEL
add r3, 32
AC_PADD m5, m0, m7
sub r3, 40
mova [rsp+gprsize+8], m5 ; save satd
-%ifdef X264_HIGH_BIT_DEPTH
+%ifdef HIGH_BIT_DEPTH
pxor m6, m6
%endif
%rep 3
HADAMARD 0, sumsub, 0, 2, 4, 5
ABS4 m1, m3, m0, m2, m4, m5
HADAMARD 0, max, 1, 3, 4, 5
-%ifdef X264_HIGH_BIT_DEPTH
+%ifdef HIGH_BIT_DEPTH
pand m0, [mask_ac4]
pmaddwd m1, m7
pmaddwd m0, m7
paddd m6, m6
paddd m0, m6
SWAP m0, m6
-%else ; !X264_HIGH_BIT_DEPTH
+%else ; !HIGH_BIT_DEPTH
pand m6, m0
paddw m7, m1
paddw m6, m2
paddw m7, m7
paddw m6, m7
-%endif ; X264_HIGH_BIT_DEPTH
+%endif ; HIGH_BIT_DEPTH
mova [rsp+gprsize], m6 ; save sa8d
SWAP m0, m6
SAVE_MM_PERMUTATION hadamard_ac_8x8_mmxext
%macro HADAMARD_AC_WXH_SUM_MMXEXT 2
mova m1, [rsp+1*mmsize]
-%ifdef X264_HIGH_BIT_DEPTH
+%ifdef HIGH_BIT_DEPTH
%if %1*%2 >= 128
paddd m0, [rsp+2*mmsize]
paddd m1, [rsp+3*mmsize]
HADDD m0, m2
psrld m1, 1
HADDD m1, m3
-%else ; !X264_HIGH_BIT_DEPTH
+%else ; !HIGH_BIT_DEPTH
%if %1*%2 >= 128
paddusw m0, [rsp+2*mmsize]
paddusw m1, [rsp+3*mmsize]
%endif
psrlw m1, 1
HADDW m1, m3
-%endif ; X264_HIGH_BIT_DEPTH
+%endif ; HIGH_BIT_DEPTH
%endmacro
%macro HADAMARD_AC_WXH_MMX 2
HADAMARD_AC_WXH_MMX 8, 8
%macro LOAD_INC_8x4W_SSE2 5
-%ifdef X264_HIGH_BIT_DEPTH
+%ifdef HIGH_BIT_DEPTH
movu m%1, [r0]
movu m%2, [r0+r1]
movu m%3, [r0+r1*2]
%ifidn %1, 0
lea r0, [r0+r1*4]
%endif
-%else ; !X264_HIGH_BIT_DEPTH
+%else ; !HIGH_BIT_DEPTH
movh m%1, [r0]
movh m%2, [r0+r1]
movh m%3, [r0+r1*2]
punpcklbw m%2, m%5
punpcklbw m%3, m%5
punpcklbw m%4, m%5
-%endif ; X264_HIGH_BIT_DEPTH
+%endif ; HIGH_BIT_DEPTH
%endmacro
%macro LOAD_INC_8x4W_SSSE3 5
%define spill1 [rsp+gprsize+16]
%define spill2 [rsp+gprsize+32]
%endif
-%ifdef X264_HIGH_BIT_DEPTH
+%ifdef HIGH_BIT_DEPTH
%define vertical 1
%elifidn %1, sse2
%define vertical 1
AC_PREP m2, [pw_1]
AC_PADD m2, m3, [pw_1]
AC_PADD m2, m1, [pw_1]
-%ifdef X264_HIGH_BIT_DEPTH
+%ifdef HIGH_BIT_DEPTH
paddd m2, m2
%else
paddw m2, m2
-%endif ; X264_HIGH_BIT_DEPTH
+%endif ; HIGH_BIT_DEPTH
ABS1 m4, m7
pand m0, [mask_ac8]
ABS1 m0, m7
%macro HADAMARD_AC_WXH_SUM_SSE2 2
mova m1, [rsp+2*mmsize]
-%ifdef X264_HIGH_BIT_DEPTH
+%ifdef HIGH_BIT_DEPTH
%if %1*%2 >= 128
paddd m0, [rsp+3*mmsize]
paddd m1, [rsp+4*mmsize]
%endif
HADDD m0, m2
HADDD m1, m3
-%else ; !X264_HIGH_BIT_DEPTH
+%else ; !HIGH_BIT_DEPTH
%if %1*%2 >= 128
paddusw m0, [rsp+3*mmsize]
paddusw m1, [rsp+4*mmsize]
%endif
HADDUW m0, m2
HADDW m1, m3
-%endif ; X264_HIGH_BIT_DEPTH
+%endif ; HIGH_BIT_DEPTH
%endmacro
; struct { int satd, int sa8d; } pixel_hadamard_ac_16x16( uint8_t *pix, int stride )
SA8D sse2
SATDS_SSE2 sse2
INTRA_SA8D_SSE2 sse2
-%ifndef X264_HIGH_BIT_DEPTH
+%ifndef HIGH_BIT_DEPTH
INTRA_SATDS_MMX mmxext
%endif
HADAMARD_AC_SSE2 sse2
%define DIFFOP DIFF_SUMSUB_SSSE3
%define JDUP JDUP_CONROE
%define LOAD_DUP_4x8P LOAD_DUP_4x8P_CONROE
-%ifndef X264_HIGH_BIT_DEPTH
+%ifndef HIGH_BIT_DEPTH
%define LOAD_INC_8x4W LOAD_INC_8x4W_SSSE3
%define LOAD_SUMSUB_8x4P LOAD_SUMSUB_8x4P_SSSE3
%define LOAD_SUMSUB_16P LOAD_SUMSUB_16P_SSSE3
void x264_predict_16x16_v_sse2( uint8_t *src );
void x264_predict_16x16_p_core_sse2( uint8_t *src, int i00, int b, int c );
-#if !X264_HIGH_BIT_DEPTH
+#if !HIGH_BIT_DEPTH
ALIGNED_8( static const int8_t pb_12345678[8] ) = {1,2,3,4,5,6,7,8};
ALIGNED_8( static const int8_t pb_m87654321[8] ) = {-8,-7,-6,-5,-4,-3,-2,-1};
ALIGNED_8( static const int8_t pb_m32101234[8] ) = {-3,-2,-1,0,1,2,3,4};
#else
INTRA_SA8D_X3(mmxext)
#endif
-#endif // !X264_HIGH_BIT_DEPTH
+#endif // !HIGH_BIT_DEPTH
/****************************************************************************
* Exported functions:
{
if( !(cpu&X264_CPU_MMX) )
return;
-#if !X264_HIGH_BIT_DEPTH
+#if !HIGH_BIT_DEPTH
pf[I_PRED_16x16_V] = x264_predict_16x16_v_mmx;
if( !(cpu&X264_CPU_MMXEXT) )
return;
#ifdef __GNUC__
pf[I_PRED_16x16_P] = x264_predict_16x16_p_ssse3;
#endif
-#endif // !X264_HIGH_BIT_DEPTH
+#endif // !HIGH_BIT_DEPTH
}
void x264_predict_8x8c_init_mmx( int cpu, x264_predict_t pf[7] )
{
if( !(cpu&X264_CPU_MMX) )
return;
-#if !X264_HIGH_BIT_DEPTH
+#if !HIGH_BIT_DEPTH
#if ARCH_X86_64
pf[I_PRED_CHROMA_DC_LEFT] = x264_predict_8x8c_dc_left;
#endif
#ifdef __GNUC__
pf[I_PRED_CHROMA_P] = x264_predict_8x8c_p_ssse3;
#endif
-#endif // !X264_HIGH_BIT_DEPTH
+#endif // !HIGH_BIT_DEPTH
}
void x264_predict_8x8_init_mmx( int cpu, x264_predict8x8_t pf[12], x264_predict_8x8_filter_t *predict_8x8_filter )
{
if( !(cpu&X264_CPU_MMXEXT) )
return;
-#if !X264_HIGH_BIT_DEPTH
+#if !HIGH_BIT_DEPTH
pf[I_PRED_8x8_V] = x264_predict_8x8_v_mmxext;
pf[I_PRED_8x8_H] = x264_predict_8x8_h_mmxext;
pf[I_PRED_8x8_DC] = x264_predict_8x8_dc_mmxext;
pf[I_PRED_8x8_HD] = x264_predict_8x8_hd_ssse3;
pf[I_PRED_8x8_HU] = x264_predict_8x8_hu_ssse3;
*predict_8x8_filter = x264_predict_8x8_filter_ssse3;
-#endif // !X264_HIGH_BIT_DEPTH
+#endif // !HIGH_BIT_DEPTH
}
void x264_predict_4x4_init_mmx( int cpu, x264_predict_t pf[12] )
{
if( !(cpu&X264_CPU_MMXEXT) )
return;
-#if !X264_HIGH_BIT_DEPTH
+#if !HIGH_BIT_DEPTH
pf[I_PRED_4x4_VR] = x264_predict_4x4_vr_mmxext;
pf[I_PRED_4x4_DDL] = x264_predict_4x4_ddl_mmxext;
pf[I_PRED_4x4_VL] = x264_predict_4x4_vl_mmxext;
pf[I_PRED_4x4_DDR] = x264_predict_4x4_ddr_ssse3;
pf[I_PRED_4x4_VR] = x264_predict_4x4_vr_ssse3;
pf[I_PRED_4x4_HD] = x264_predict_4x4_hd_ssse3;
-#endif // !X264_HIGH_BIT_DEPTH
+#endif // !HIGH_BIT_DEPTH
}
%macro QUANT_DC_START_MMX 0
movd m6, r1m ; mf
movd m7, r2m ; bias
-%ifdef X264_HIGH_BIT_DEPTH
+%ifdef HIGH_BIT_DEPTH
SPLATD m6, m6
SPLATD m7, m7
%else
SPLATW m6, m6
SPLATW m7, m7
-%endif ; X264_HIGH_BIT_DEPTH
+%endif ; HIGH_BIT_DEPTH
%endmacro
%macro QUANT_DC_START_SSSE3 0
setne al
%endmacro
-%ifdef X264_HIGH_BIT_DEPTH
+%ifdef HIGH_BIT_DEPTH
%macro QUANT_ONE_DC_MMX 4
mova m0, [%1]
PABSD m1, m0
%undef QUANT_TWO_AC
%undef QUANT_ONE_DC
%undef QUANT_TWO_DC
-%endif ; X264_HIGH_BIT_DEPTH
+%endif ; HIGH_BIT_DEPTH
-%ifndef X264_HIGH_BIT_DEPTH
+%ifndef HIGH_BIT_DEPTH
%macro QUANT_ONE 4
;;; %1 (m64) dct[y][x]
;;; %2 (m64/mmx) mf[y][x] or mf[0][0] (as uint16_t)
QUANT_DC quant_4x4_dc_sse4, 2, 8
QUANT_AC quant_4x4_sse4, 2
QUANT_AC quant_8x8_sse4, 8
-%endif ; !X264_HIGH_BIT_DEPTH
+%endif ; !HIGH_BIT_DEPTH
INIT_XMM
DEQUANT_DC sse2
-%ifdef X264_HIGH_BIT_DEPTH
+%ifdef HIGH_BIT_DEPTH
;-----------------------------------------------------------------------------
; void denoise_dct( int32_t *dct, uint32_t *sum, uint32_t *offset, int size )
;-----------------------------------------------------------------------------
%define PSIGND PSIGND_SSSE3
DENOISE_DCT ssse3, 8
-%else ; !X264_HIGH_BIT_DEPTH
+%else ; !HIGH_BIT_DEPTH
;-----------------------------------------------------------------------------
; void denoise_dct( int16_t *dct, uint32_t *sum, uint16_t *offset, int size )
%define PSIGNW PSIGNW_SSSE3
DENOISE_DCT ssse3, 7
-%endif ; !X264_HIGH_BIT_DEPTH
+%endif ; !HIGH_BIT_DEPTH
;-----------------------------------------------------------------------------
; int decimate_score( dctcoef *dct )
;-----------------------------------------------------------------------------
%macro DECIMATE_MASK_SSE2 7
-%ifdef X264_HIGH_BIT_DEPTH
+%ifdef HIGH_BIT_DEPTH
movdqa xmm0, [%3+ 0]
movdqa xmm1, [%3+32]
packssdw xmm0, [%3+16]
%endmacro
%macro DECIMATE_MASK_MMX 7
-%ifdef X264_HIGH_BIT_DEPTH
+%ifdef HIGH_BIT_DEPTH
movq mm0, [%3+ 0]
movq mm1, [%3+16]
movq mm2, [%3+32]
xor %1, %3
%endmacro
-%ifdef X264_HIGH_BIT_DEPTH
+%ifdef HIGH_BIT_DEPTH
%macro LAST_MASK4_MMX 2-3
movq mm0, [%2]
packssdw mm0, [%2+8]
shl %3, 8
or %1, %3
%endmacro
-%else ; !X264_HIGH_BIT_DEPTH
+%else ; !HIGH_BIT_DEPTH
%macro LAST_MASK4_MMX 2-3
movq mm0, [%2]
packsswb mm0, mm0
COEFF_LAST4 mmxext
%define LAST LAST_SSE4A
COEFF_LAST4 mmxext_lzcnt
-%endif ; X264_HIGH_BIT_DEPTH
+%endif ; HIGH_BIT_DEPTH
%macro COEFF_LAST 1
cglobal coeff_last15_%1, 1,3
mov [t1], t4d
.loop:
LZCOUNT t3d, t5d, 0x1f
-%ifdef X264_HIGH_BIT_DEPTH
+%ifdef HIGH_BIT_DEPTH
mov t2d, [t0+t4*4]
mov [t1+t6 +4+16*4], t3b
mov [t1+t6*4+ 4], t2d
%assign SIZEOF_PIXEL 1
%assign SIZEOF_DCTCOEF 2
-%ifdef X264_HIGH_BIT_DEPTH
+%ifdef HIGH_BIT_DEPTH
%assign SIZEOF_PIXEL 2
%assign SIZEOF_DCTCOEF 4
%endif
%macro LOAD_DIFF 5
-%ifdef X264_HIGH_BIT_DEPTH
+%ifdef HIGH_BIT_DEPTH
mova %1, %4
psubw %1, %5
%elifidn %3, none
packuswb %2, %1
%endmacro
-%ifdef X264_HIGH_BIT_DEPTH
+%ifdef HIGH_BIT_DEPTH
%macro STORE_DIFF 5
punpcklwd %2, %1
punpckhwd %3, %1
%endmacro
%macro FIX_STRIDES 1-*
-%ifdef X264_HIGH_BIT_DEPTH
+%ifdef HIGH_BIT_DEPTH
%rep %0
add %1, %1
%rotate 1
fi
if [ "$bit_depth" -gt "8" ]; then
- define X264_HIGH_BIT_DEPTH
- ASFLAGS="$ASFLAGS -DX264_HIGH_BIT_DEPTH"
+ define HIGH_BIT_DEPTH
+ ASFLAGS="$ASFLAGS -DHIGH_BIT_DEPTH"
fi
define BIT_DEPTH $bit_depth
x264_log( h, X264_LOG_WARNING, "invalid DTS: PTS is less than DTS\n" );
pic_out->img.i_csp = X264_CSP_NV12;
-#if X264_HIGH_BIT_DEPTH
+#if HIGH_BIT_DEPTH
pic_out->img.i_csp |= X264_CSP_HIGH_DEPTH;
#endif
pic_out->img.i_plane = h->fdec->i_plane;