From e46bf243d4c05f9abb106573b4c46d4fe88caba2 Mon Sep 17 00:00:00 2001 From: Oskar Arvidsson Date: Wed, 2 Jun 2010 02:08:45 +0200 Subject: [PATCH] Convert to a unified "dctcoeff" type for DCT data Necessary for future high bit-depth support. --- common/common.h | 20 ++++++----- common/dct.c | 80 ++++++++++++++++++++++---------------------- common/dct.h | 42 +++++++++++------------ common/macroblock.h | 2 +- common/quant.c | 38 ++++++++++----------- common/quant.h | 26 +++++++------- encoder/cabac.c | 8 ++--- encoder/cavlc.c | 2 +- encoder/macroblock.c | 52 ++++++++++++++-------------- encoder/macroblock.h | 6 ++-- encoder/rdo.c | 14 ++++---- tools/checkasm.c | 32 +++++++++--------- 12 files changed, 163 insertions(+), 159 deletions(-) diff --git a/common/common.h b/common/common.h index d0d43d26..06b6256c 100644 --- a/common/common.h +++ b/common/common.h @@ -102,11 +102,15 @@ typedef union { x264_uint128_t i; uint64_t a[2]; uint32_t b[4]; uint16_t c[8]; u typedef uint8_t pixel; typedef uint32_t pixel4; +typedef int16_t dctcoef; #define PIXEL_SPLAT_X4(x) ((x)*0x01010101U) #define MPIXEL_X4(src) M32(src) #define CPPIXEL_X4(dst,src) CP32(dst,src) #define CPPIXEL_X8(dst,src) CP64(dst,src) +#define MDCT_X2(dct) M32(dct) +#define CPDCT_X2(dst,src) CP32(dst,src) +#define CPDCT_X4(dst,src) CP64(dst,src) #define X264_SCAN8_SIZE (6*8) #define X264_SCAN8_LUMA_SIZE (5*8) @@ -502,11 +506,11 @@ struct x264_t /* Current MB DCT coeffs */ struct { - ALIGNED_16( int16_t luma16x16_dc[16] ); - ALIGNED_16( int16_t chroma_dc[2][4] ); + ALIGNED_16( dctcoef luma16x16_dc[16] ); + ALIGNED_16( dctcoef chroma_dc[2][4] ); // FIXME share memory? - ALIGNED_16( int16_t luma8x8[4][64] ); - ALIGNED_16( int16_t luma4x4[16+8][16] ); + ALIGNED_16( dctcoef luma8x8[4][64] ); + ALIGNED_16( dctcoef luma4x4[16+8][16] ); } dct; /* MB table and cache for current frame/mb */ @@ -625,16 +629,16 @@ struct x264_t /* i4x4 and i8x8 backup data, for skipping the encode stage when possible */ ALIGNED_16( pixel i4x4_fdec_buf[16*16] ); ALIGNED_16( pixel i8x8_fdec_buf[16*16] ); - ALIGNED_16( int16_t i8x8_dct_buf[3][64] ); - ALIGNED_16( int16_t i4x4_dct_buf[15][16] ); + ALIGNED_16( dctcoef i8x8_dct_buf[3][64] ); + ALIGNED_16( dctcoef i4x4_dct_buf[15][16] ); uint32_t i4x4_nnz_buf[4]; uint32_t i8x8_nnz_buf[4]; int i4x4_cbp; int i8x8_cbp; /* Psy trellis DCT data */ - ALIGNED_16( int16_t fenc_dct8[4][64] ); - ALIGNED_16( int16_t fenc_dct4[16][16] ); + ALIGNED_16( dctcoef fenc_dct8[4][64] ); + ALIGNED_16( dctcoef fenc_dct4[16][16] ); /* Psy RD SATD/SA8D scores cache */ ALIGNED_16( uint64_t fenc_hadamard_cache[9] ); diff --git a/common/dct.c b/common/dct.c index 402f20d7..dc402b85 100644 --- a/common/dct.c +++ b/common/dct.c @@ -35,9 +35,9 @@ int x264_dct4_weight2_zigzag[2][16]; int x264_dct8_weight2_zigzag[2][64]; -static void dct4x4dc( int16_t d[16] ) +static void dct4x4dc( dctcoef d[16] ) { - int16_t tmp[16]; + dctcoef tmp[16]; for( int i = 0; i < 4; i++ ) { @@ -66,9 +66,9 @@ static void dct4x4dc( int16_t d[16] ) } } -static void idct4x4dc( int16_t d[16] ) +static void idct4x4dc( dctcoef d[16] ) { - int16_t tmp[16]; + dctcoef tmp[16]; for( int i = 0; i < 4; i++ ) { @@ -97,7 +97,7 @@ static void idct4x4dc( int16_t d[16] ) } } -static inline void pixel_sub_wxh( int16_t *diff, int i_size, +static inline void pixel_sub_wxh( dctcoef *diff, int i_size, pixel *pix1, int i_pix1, pixel *pix2, int i_pix2 ) { for( int y = 0; y < i_size; y++ ) @@ -109,10 +109,10 @@ static inline void pixel_sub_wxh( int16_t *diff, int i_size, } } -static void sub4x4_dct( int16_t dct[16], pixel *pix1, pixel *pix2 ) +static void sub4x4_dct( dctcoef dct[16], pixel *pix1, pixel *pix2 ) { - int16_t d[16]; - int16_t tmp[16]; + dctcoef d[16]; + dctcoef tmp[16]; pixel_sub_wxh( d, 4, pix1, FENC_STRIDE, pix2, FDEC_STRIDE ); @@ -143,7 +143,7 @@ static void sub4x4_dct( int16_t dct[16], pixel *pix1, pixel *pix2 ) } } -static void sub8x8_dct( int16_t dct[4][16], pixel *pix1, pixel *pix2 ) +static void sub8x8_dct( dctcoef dct[4][16], pixel *pix1, pixel *pix2 ) { sub4x4_dct( dct[0], &pix1[0], &pix2[0] ); sub4x4_dct( dct[1], &pix1[4], &pix2[4] ); @@ -151,7 +151,7 @@ static void sub8x8_dct( int16_t dct[4][16], pixel *pix1, pixel *pix2 ) sub4x4_dct( dct[3], &pix1[4*FENC_STRIDE+4], &pix2[4*FDEC_STRIDE+4] ); } -static void sub16x16_dct( int16_t dct[16][16], pixel *pix1, pixel *pix2 ) +static void sub16x16_dct( dctcoef dct[16][16], pixel *pix1, pixel *pix2 ) { sub8x8_dct( &dct[ 0], &pix1[0], &pix2[0] ); sub8x8_dct( &dct[ 4], &pix1[8], &pix2[8] ); @@ -161,7 +161,7 @@ static void sub16x16_dct( int16_t dct[16][16], pixel *pix1, pixel *pix2 ) static int sub4x4_dct_dc( pixel *pix1, pixel *pix2 ) { - int16_t d[16]; + dctcoef d[16]; int sum = 0; pixel_sub_wxh( d, 4, pix1, FENC_STRIDE, pix2, FDEC_STRIDE ); @@ -172,7 +172,7 @@ static int sub4x4_dct_dc( pixel *pix1, pixel *pix2 ) return sum; } -static void sub8x8_dct_dc( int16_t dct[4], pixel *pix1, pixel *pix2 ) +static void sub8x8_dct_dc( dctcoef dct[4], pixel *pix1, pixel *pix2 ) { dct[0] = sub4x4_dct_dc( &pix1[0], &pix2[0] ); dct[1] = sub4x4_dct_dc( &pix1[4], &pix2[4] ); @@ -190,10 +190,10 @@ static void sub8x8_dct_dc( int16_t dct[4], pixel *pix1, pixel *pix2 ) dct[3] = d2 - d3; } -static void add4x4_idct( pixel *p_dst, int16_t dct[16] ) +static void add4x4_idct( pixel *p_dst, dctcoef dct[16] ) { - int16_t d[16]; - int16_t tmp[16]; + dctcoef d[16]; + dctcoef tmp[16]; for( int i = 0; i < 4; i++ ) { @@ -230,7 +230,7 @@ static void add4x4_idct( pixel *p_dst, int16_t dct[16] ) } } -static void add8x8_idct( pixel *p_dst, int16_t dct[4][16] ) +static void add8x8_idct( pixel *p_dst, dctcoef dct[4][16] ) { add4x4_idct( &p_dst[0], dct[0] ); add4x4_idct( &p_dst[4], dct[1] ); @@ -238,7 +238,7 @@ static void add8x8_idct( pixel *p_dst, int16_t dct[4][16] ) add4x4_idct( &p_dst[4*FDEC_STRIDE+4], dct[3] ); } -static void add16x16_idct( pixel *p_dst, int16_t dct[16][16] ) +static void add16x16_idct( pixel *p_dst, dctcoef dct[16][16] ) { add8x8_idct( &p_dst[0], &dct[0] ); add8x8_idct( &p_dst[8], &dct[4] ); @@ -277,9 +277,9 @@ static void add16x16_idct( pixel *p_dst, int16_t dct[16][16] ) DST(7) = (a4>>2) - a7 ;\ } -static void sub8x8_dct8( int16_t dct[64], pixel *pix1, pixel *pix2 ) +static void sub8x8_dct8( dctcoef dct[64], pixel *pix1, pixel *pix2 ) { - int16_t tmp[64]; + dctcoef tmp[64]; pixel_sub_wxh( tmp, 8, pix1, FENC_STRIDE, pix2, FDEC_STRIDE ); @@ -298,7 +298,7 @@ static void sub8x8_dct8( int16_t dct[64], pixel *pix1, pixel *pix2 ) #undef DST } -static void sub16x16_dct8( int16_t dct[4][64], pixel *pix1, pixel *pix2 ) +static void sub16x16_dct8( dctcoef dct[4][64], pixel *pix1, pixel *pix2 ) { sub8x8_dct8( dct[0], &pix1[0], &pix2[0] ); sub8x8_dct8( dct[1], &pix1[8], &pix2[8] ); @@ -333,7 +333,7 @@ static void sub16x16_dct8( int16_t dct[4][64], pixel *pix1, pixel *pix2 ) DST(7, b0 - b7);\ } -static void add8x8_idct8( pixel *dst, int16_t dct[64] ) +static void add8x8_idct8( pixel *dst, dctcoef dct[64] ) { dct[0] += 32; // rounding for the >>6 at the end @@ -352,7 +352,7 @@ static void add8x8_idct8( pixel *dst, int16_t dct[64] ) #undef DST } -static void add16x16_idct8( pixel *dst, int16_t dct[4][64] ) +static void add16x16_idct8( pixel *dst, dctcoef dct[4][64] ) { add8x8_idct8( &dst[0], dct[0] ); add8x8_idct8( &dst[8], dct[1] ); @@ -360,7 +360,7 @@ static void add16x16_idct8( pixel *dst, int16_t dct[4][64] ) add8x8_idct8( &dst[8*FDEC_STRIDE+8], dct[3] ); } -static void inline add4x4_idct_dc( pixel *p_dst, int16_t dc ) +static void inline add4x4_idct_dc( pixel *p_dst, dctcoef dc ) { dc = (dc + 32) >> 6; for( int i = 0; i < 4; i++, p_dst += FDEC_STRIDE ) @@ -372,7 +372,7 @@ static void inline add4x4_idct_dc( pixel *p_dst, int16_t dc ) } } -static void add8x8_idct_dc( pixel *p_dst, int16_t dct[4] ) +static void add8x8_idct_dc( pixel *p_dst, dctcoef dct[4] ) { add4x4_idct_dc( &p_dst[0], dct[0] ); add4x4_idct_dc( &p_dst[4], dct[1] ); @@ -380,7 +380,7 @@ static void add8x8_idct_dc( pixel *p_dst, int16_t dct[4] ) add4x4_idct_dc( &p_dst[4*FDEC_STRIDE+4], dct[3] ); } -static void add16x16_idct_dc( pixel *p_dst, int16_t dct[16] ) +static void add16x16_idct_dc( pixel *p_dst, dctcoef dct[16] ) { for( int i = 0; i < 4; i++, dct += 4, p_dst += 4*FDEC_STRIDE ) { @@ -578,12 +578,12 @@ void x264_dct_init_weights( void ) ZIG( 8,0,2) ZIG( 9,1,2) ZIG(10,2,2) ZIG(11,3,2)\ ZIG(12,0,3) ZIG(13,1,3) ZIG(14,2,3) ZIG(15,3,3) -static void zigzag_scan_8x8_frame( int16_t level[64], int16_t dct[64] ) +static void zigzag_scan_8x8_frame( dctcoef level[64], dctcoef dct[64] ) { ZIGZAG8_FRAME } -static void zigzag_scan_8x8_field( int16_t level[64], int16_t dct[64] ) +static void zigzag_scan_8x8_field( dctcoef level[64], dctcoef dct[64] ) { ZIGZAG8_FIELD } @@ -592,18 +592,18 @@ static void zigzag_scan_8x8_field( int16_t level[64], int16_t dct[64] ) #define ZIG(i,y,x) level[i] = dct[x*4+y]; #define ZIGDC(i,y,x) ZIG(i,y,x) -static void zigzag_scan_4x4_frame( int16_t level[16], int16_t dct[16] ) +static void zigzag_scan_4x4_frame( dctcoef level[16], dctcoef dct[16] ) { ZIGZAG4_FRAME } -static void zigzag_scan_4x4_field( int16_t level[16], int16_t dct[16] ) +static void zigzag_scan_4x4_field( dctcoef level[16], dctcoef dct[16] ) { - CP32( level, dct ); + CPDCT_X2( level, dct ); ZIG(2,0,1) ZIG(3,2,0) ZIG(4,3,0) ZIG(5,1,1) - CP32( level+6, dct+6 ); - CP64( level+8, dct+8 ); - CP64( level+12, dct+12 ); + CPDCT_X2( level+6, dct+6 ); + CPDCT_X4( level+8, dct+8 ); + CPDCT_X4( level+12, dct+12 ); } #undef ZIG @@ -628,7 +628,7 @@ static void zigzag_scan_4x4_field( int16_t level[16], int16_t dct[16] ) CPPIXEL_X8( p_dst+6*FDEC_STRIDE, p_src+6*FENC_STRIDE );\ CPPIXEL_X8( p_dst+7*FDEC_STRIDE, p_src+7*FENC_STRIDE ); -static int zigzag_sub_4x4_frame( int16_t level[16], const pixel *p_src, pixel *p_dst ) +static int zigzag_sub_4x4_frame( dctcoef level[16], const pixel *p_src, pixel *p_dst ) { int nz = 0; ZIGZAG4_FRAME @@ -636,7 +636,7 @@ static int zigzag_sub_4x4_frame( int16_t level[16], const pixel *p_src, pixel *p return !!nz; } -static int zigzag_sub_4x4_field( int16_t level[16], const pixel *p_src, pixel *p_dst ) +static int zigzag_sub_4x4_field( dctcoef level[16], const pixel *p_src, pixel *p_dst ) { int nz = 0; ZIGZAG4_FIELD @@ -652,7 +652,7 @@ static int zigzag_sub_4x4_field( int16_t level[16], const pixel *p_src, pixel *p level[0] = 0;\ } -static int zigzag_sub_4x4ac_frame( int16_t level[16], const pixel *p_src, pixel *p_dst, int16_t *dc ) +static int zigzag_sub_4x4ac_frame( dctcoef level[16], const pixel *p_src, pixel *p_dst, dctcoef *dc ) { int nz = 0; ZIGZAG4_FRAME @@ -660,7 +660,7 @@ static int zigzag_sub_4x4ac_frame( int16_t level[16], const pixel *p_src, pixel return !!nz; } -static int zigzag_sub_4x4ac_field( int16_t level[16], const pixel *p_src, pixel *p_dst, int16_t *dc ) +static int zigzag_sub_4x4ac_field( dctcoef level[16], const pixel *p_src, pixel *p_dst, dctcoef *dc ) { int nz = 0; ZIGZAG4_FIELD @@ -668,14 +668,14 @@ static int zigzag_sub_4x4ac_field( int16_t level[16], const pixel *p_src, pixel return !!nz; } -static int zigzag_sub_8x8_frame( int16_t level[64], const pixel *p_src, pixel *p_dst ) +static int zigzag_sub_8x8_frame( dctcoef level[64], const pixel *p_src, pixel *p_dst ) { int nz = 0; ZIGZAG8_FRAME COPY8x8 return !!nz; } -static int zigzag_sub_8x8_field( int16_t level[64], const pixel *p_src, pixel *p_dst ) +static int zigzag_sub_8x8_field( dctcoef level[64], const pixel *p_src, pixel *p_dst ) { int nz = 0; ZIGZAG8_FIELD @@ -686,7 +686,7 @@ static int zigzag_sub_8x8_field( int16_t level[64], const pixel *p_src, pixel *p #undef ZIG #undef COPY4x4 -static void zigzag_interleave_8x8_cavlc( int16_t *dst, int16_t *src, uint8_t *nnz ) +static void zigzag_interleave_8x8_cavlc( dctcoef *dst, dctcoef *src, uint8_t *nnz ) { for( int i = 0; i < 4; i++ ) { diff --git a/common/dct.h b/common/dct.h index 1305d784..0215b5ff 100644 --- a/common/dct.h +++ b/common/dct.h @@ -91,37 +91,37 @@ typedef struct // pix1 stride = FENC_STRIDE // pix2 stride = FDEC_STRIDE // p_dst stride = FDEC_STRIDE - void (*sub4x4_dct) ( int16_t dct[16], pixel *pix1, pixel *pix2 ); - void (*add4x4_idct) ( pixel *p_dst, int16_t dct[16] ); + void (*sub4x4_dct) ( dctcoef dct[16], pixel *pix1, pixel *pix2 ); + void (*add4x4_idct) ( pixel *p_dst, dctcoef dct[16] ); - void (*sub8x8_dct) ( int16_t dct[4][16], pixel *pix1, pixel *pix2 ); - void (*sub8x8_dct_dc)( int16_t dct[4], pixel *pix1, pixel *pix2 ); - void (*add8x8_idct) ( pixel *p_dst, int16_t dct[4][16] ); - void (*add8x8_idct_dc) ( pixel *p_dst, int16_t dct[4] ); + void (*sub8x8_dct) ( dctcoef dct[4][16], pixel *pix1, pixel *pix2 ); + void (*sub8x8_dct_dc)( dctcoef dct[4], pixel *pix1, pixel *pix2 ); + void (*add8x8_idct) ( pixel *p_dst, dctcoef dct[4][16] ); + void (*add8x8_idct_dc) ( pixel *p_dst, dctcoef dct[4] ); - void (*sub16x16_dct) ( int16_t dct[16][16], pixel *pix1, pixel *pix2 ); - void (*add16x16_idct)( pixel *p_dst, int16_t dct[16][16] ); - void (*add16x16_idct_dc) ( pixel *p_dst, int16_t dct[16] ); + void (*sub16x16_dct) ( dctcoef dct[16][16], pixel *pix1, pixel *pix2 ); + void (*add16x16_idct)( pixel *p_dst, dctcoef dct[16][16] ); + void (*add16x16_idct_dc) ( pixel *p_dst, dctcoef dct[16] ); - void (*sub8x8_dct8) ( int16_t dct[64], pixel *pix1, pixel *pix2 ); - void (*add8x8_idct8) ( pixel *p_dst, int16_t dct[64] ); + void (*sub8x8_dct8) ( dctcoef dct[64], pixel *pix1, pixel *pix2 ); + void (*add8x8_idct8) ( pixel *p_dst, dctcoef dct[64] ); - void (*sub16x16_dct8) ( int16_t dct[4][64], pixel *pix1, pixel *pix2 ); - void (*add16x16_idct8)( pixel *p_dst, int16_t dct[4][64] ); + void (*sub16x16_dct8) ( dctcoef dct[4][64], pixel *pix1, pixel *pix2 ); + void (*add16x16_idct8)( pixel *p_dst, dctcoef dct[4][64] ); - void (*dct4x4dc) ( int16_t d[16] ); - void (*idct4x4dc)( int16_t d[16] ); + void (*dct4x4dc) ( dctcoef d[16] ); + void (*idct4x4dc)( dctcoef d[16] ); } x264_dct_function_t; typedef struct { - void (*scan_8x8)( int16_t level[64], int16_t dct[64] ); - void (*scan_4x4)( int16_t level[16], int16_t dct[16] ); - int (*sub_8x8) ( int16_t level[64], const pixel *p_src, pixel *p_dst ); - int (*sub_4x4) ( int16_t level[16], const pixel *p_src, pixel *p_dst ); - int (*sub_4x4ac)( int16_t level[16], const pixel *p_src, pixel *p_dst, int16_t *dc ); - void (*interleave_8x8_cavlc)( int16_t *dst, int16_t *src, uint8_t *nnz ); + void (*scan_8x8)( dctcoef level[64], dctcoef dct[64] ); + void (*scan_4x4)( dctcoef level[16], dctcoef dct[16] ); + int (*sub_8x8) ( dctcoef level[64], const pixel *p_src, pixel *p_dst ); + int (*sub_4x4) ( dctcoef level[16], const pixel *p_src, pixel *p_dst ); + int (*sub_4x4ac)( dctcoef level[16], const pixel *p_src, pixel *p_dst, dctcoef *dc ); + void (*interleave_8x8_cavlc)( dctcoef *dst, dctcoef *src, uint8_t *nnz ); } x264_zigzag_function_t; diff --git a/common/macroblock.h b/common/macroblock.h index 76f14e22..8a617682 100644 --- a/common/macroblock.h +++ b/common/macroblock.h @@ -346,7 +346,7 @@ static ALWAYS_INLINE uint32_t pack16to32_mask( int a, int b ) #define array_non_zero(a) array_non_zero_int(a, sizeof(a)) #define array_non_zero_int array_non_zero_int -static ALWAYS_INLINE int array_non_zero_int( int16_t *v, int i_count ) +static ALWAYS_INLINE int array_non_zero_int( dctcoef *v, int i_count ) { if(i_count == 8) return !!M64( &v[0] ); diff --git a/common/quant.c b/common/quant.c index 8b1fc87f..d3ba7e36 100644 --- a/common/quant.c +++ b/common/quant.c @@ -42,7 +42,7 @@ nz |= (coef); \ } -static int quant_8x8( int16_t dct[64], uint16_t mf[64], uint16_t bias[64] ) +static int quant_8x8( dctcoef dct[64], uint16_t mf[64], uint16_t bias[64] ) { int nz = 0; for( int i = 0; i < 64; i++ ) @@ -50,7 +50,7 @@ static int quant_8x8( int16_t dct[64], uint16_t mf[64], uint16_t bias[64] ) return !!nz; } -static int quant_4x4( int16_t dct[16], uint16_t mf[16], uint16_t bias[16] ) +static int quant_4x4( dctcoef dct[16], uint16_t mf[16], uint16_t bias[16] ) { int nz = 0; for( int i = 0; i < 16; i++ ) @@ -58,7 +58,7 @@ static int quant_4x4( int16_t dct[16], uint16_t mf[16], uint16_t bias[16] ) return !!nz; } -static int quant_4x4_dc( int16_t dct[16], int mf, int bias ) +static int quant_4x4_dc( dctcoef dct[16], int mf, int bias ) { int nz = 0; for( int i = 0; i < 16; i++ ) @@ -66,7 +66,7 @@ static int quant_4x4_dc( int16_t dct[16], int mf, int bias ) return !!nz; } -static int quant_2x2_dc( int16_t dct[4], int mf, int bias ) +static int quant_2x2_dc( dctcoef dct[4], int mf, int bias ) { int nz = 0; QUANT_ONE( dct[0], mf, bias ); @@ -82,7 +82,7 @@ static int quant_2x2_dc( int16_t dct[4], int mf, int bias ) #define DEQUANT_SHR( x ) \ dct[x] = ( dct[x] * dequant_mf[i_mf][x] + f ) >> (-i_qbits) -static void dequant_4x4( int16_t dct[16], int dequant_mf[6][16], int i_qp ) +static void dequant_4x4( dctcoef dct[16], int dequant_mf[6][16], int i_qp ) { const int i_mf = i_qp%6; const int i_qbits = i_qp/6 - 4; @@ -100,7 +100,7 @@ static void dequant_4x4( int16_t dct[16], int dequant_mf[6][16], int i_qp ) } } -static void dequant_8x8( int16_t dct[64], int dequant_mf[6][64], int i_qp ) +static void dequant_8x8( dctcoef dct[64], int dequant_mf[6][64], int i_qp ) { const int i_mf = i_qp%6; const int i_qbits = i_qp/6 - 6; @@ -118,7 +118,7 @@ static void dequant_8x8( int16_t dct[64], int dequant_mf[6][64], int i_qp ) } } -static void dequant_4x4_dc( int16_t dct[16], int dequant_mf[6][16], int i_qp ) +static void dequant_4x4_dc( dctcoef dct[16], int dequant_mf[6][16], int i_qp ) { const int i_qbits = i_qp/6 - 6; @@ -137,7 +137,7 @@ static void dequant_4x4_dc( int16_t dct[16], int dequant_mf[6][16], int i_qp ) } } -static void x264_denoise_dct( int16_t *dct, uint32_t *sum, uint16_t *offset, int size ) +static void x264_denoise_dct( dctcoef *dct, uint32_t *sum, uint16_t *offset, int size ) { for( int i = 1; i < size; i++ ) { @@ -171,14 +171,14 @@ const uint8_t x264_decimate_table8[64] = 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }; -static int ALWAYS_INLINE x264_decimate_score_internal( int16_t *dct, int i_max ) +static int ALWAYS_INLINE x264_decimate_score_internal( dctcoef *dct, int i_max ) { const uint8_t *ds_table = (i_max == 64) ? x264_decimate_table8 : x264_decimate_table4; int i_score = 0; int idx = i_max - 1; /* Yes, dct[idx-1] is guaranteed to be 32-bit aligned. idx>=0 instead of 1 works correctly for the same reason */ - while( idx >= 0 && M32( &dct[idx-1] ) == 0 ) + while( idx >= 0 && MDCT_X2( &dct[idx-1] ) == 0 ) idx -= 2; if( idx >= 0 && dct[idx] == 0 ) idx--; @@ -201,20 +201,20 @@ static int ALWAYS_INLINE x264_decimate_score_internal( int16_t *dct, int i_max ) return i_score; } -static int x264_decimate_score15( int16_t *dct ) +static int x264_decimate_score15( dctcoef *dct ) { return x264_decimate_score_internal( dct+1, 15 ); } -static int x264_decimate_score16( int16_t *dct ) +static int x264_decimate_score16( dctcoef *dct ) { return x264_decimate_score_internal( dct, 16 ); } -static int x264_decimate_score64( int16_t *dct ) +static int x264_decimate_score64( dctcoef *dct ) { return x264_decimate_score_internal( dct, 64 ); } -static int ALWAYS_INLINE x264_coeff_last_internal( int16_t *l, int i_count ) +static int ALWAYS_INLINE x264_coeff_last_internal( dctcoef *l, int i_count ) { int i_last; for( i_last = i_count-1; i_last >= 3; i_last -= 4 ) @@ -225,25 +225,25 @@ static int ALWAYS_INLINE x264_coeff_last_internal( int16_t *l, int i_count ) return i_last; } -static int x264_coeff_last4( int16_t *l ) +static int x264_coeff_last4( dctcoef *l ) { return x264_coeff_last_internal( l, 4 ); } -static int x264_coeff_last15( int16_t *l ) +static int x264_coeff_last15( dctcoef *l ) { return x264_coeff_last_internal( l, 15 ); } -static int x264_coeff_last16( int16_t *l ) +static int x264_coeff_last16( dctcoef *l ) { return x264_coeff_last_internal( l, 16 ); } -static int x264_coeff_last64( int16_t *l ) +static int x264_coeff_last64( dctcoef *l ) { return x264_coeff_last_internal( l, 64 ); } #define level_run(num)\ -static int x264_coeff_level_run##num( int16_t *dct, x264_run_level_t *runlevel )\ +static int x264_coeff_level_run##num( dctcoef *dct, x264_run_level_t *runlevel )\ {\ int i_last = runlevel->last = x264_coeff_last##num(dct);\ int i_total = 0;\ diff --git a/common/quant.h b/common/quant.h index 1cfe95d1..b8ba43eb 100644 --- a/common/quant.h +++ b/common/quant.h @@ -25,22 +25,22 @@ typedef struct { - int (*quant_8x8)( int16_t dct[64], uint16_t mf[64], uint16_t bias[64] ); - int (*quant_4x4)( int16_t dct[16], uint16_t mf[16], uint16_t bias[16] ); - int (*quant_4x4_dc)( int16_t dct[16], int mf, int bias ); - int (*quant_2x2_dc)( int16_t dct[4], int mf, int bias ); + int (*quant_8x8)( dctcoef dct[64], uint16_t mf[64], uint16_t bias[64] ); + int (*quant_4x4)( dctcoef dct[16], uint16_t mf[16], uint16_t bias[16] ); + int (*quant_4x4_dc)( dctcoef dct[16], int mf, int bias ); + int (*quant_2x2_dc)( dctcoef dct[4], int mf, int bias ); - void (*dequant_8x8)( int16_t dct[64], int dequant_mf[6][64], int i_qp ); - void (*dequant_4x4)( int16_t dct[16], int dequant_mf[6][16], int i_qp ); - void (*dequant_4x4_dc)( int16_t dct[16], int dequant_mf[6][16], int i_qp ); + void (*dequant_8x8)( dctcoef dct[64], int dequant_mf[6][64], int i_qp ); + void (*dequant_4x4)( dctcoef dct[16], int dequant_mf[6][16], int i_qp ); + void (*dequant_4x4_dc)( dctcoef dct[16], int dequant_mf[6][16], int i_qp ); - void (*denoise_dct)( int16_t *dct, uint32_t *sum, uint16_t *offset, int size ); + void (*denoise_dct)( dctcoef *dct, uint32_t *sum, uint16_t *offset, int size ); - int (*decimate_score15)( int16_t *dct ); - int (*decimate_score16)( int16_t *dct ); - int (*decimate_score64)( int16_t *dct ); - int (*coeff_last[6])( int16_t *dct ); - int (*coeff_level_run[5])( int16_t *dct, x264_run_level_t *runlevel ); + int (*decimate_score15)( dctcoef *dct ); + int (*decimate_score16)( dctcoef *dct ); + int (*decimate_score64)( dctcoef *dct ); + int (*coeff_last[6])( dctcoef *dct ); + int (*coeff_level_run[5])( dctcoef *dct, x264_run_level_t *runlevel ); } x264_quant_function_t; void x264_quant_init( x264_t *h, int cpu, x264_quant_function_t *pf ); diff --git a/encoder/cabac.c b/encoder/cabac.c index dc18c9d4..ae363f36 100644 --- a/encoder/cabac.c +++ b/encoder/cabac.c @@ -551,7 +551,7 @@ static const uint8_t coeff_abs_level_transition[2][8] = { static const uint8_t count_cat_m1[5] = {15, 14, 15, 3, 14}; #if !RDO_SKIP_BS -static void block_residual_write_cabac( x264_t *h, x264_cabac_t *cb, int i_ctxBlockCat, int16_t *l ) +static void block_residual_write_cabac( x264_t *h, x264_cabac_t *cb, int i_ctxBlockCat, dctcoef *l ) { const int i_ctx_sig = significant_coeff_flag_offset[h->mb.b_interlaced][i_ctxBlockCat]; const int i_ctx_last = last_coeff_flag_offset[h->mb.b_interlaced][i_ctxBlockCat]; @@ -645,7 +645,7 @@ static void block_residual_write_cabac( x264_t *h, x264_cabac_t *cb, int i_ctxBl * this is slightly incorrect because the sigmap is not reversible * (contexts are repeated). However, there is nearly no quality penalty * for this (~0.001db) and the speed boost (~30%) is worth it. */ -static void ALWAYS_INLINE block_residual_write_cabac_internal( x264_t *h, x264_cabac_t *cb, int i_ctxBlockCat, int16_t *l, int b_8x8 ) +static void ALWAYS_INLINE block_residual_write_cabac_internal( x264_t *h, x264_cabac_t *cb, int i_ctxBlockCat, dctcoef *l, int b_8x8 ) { const int i_ctx_sig = significant_coeff_flag_offset[h->mb.b_interlaced][i_ctxBlockCat]; const int i_ctx_last = last_coeff_flag_offset[h->mb.b_interlaced][i_ctxBlockCat]; @@ -726,11 +726,11 @@ static void ALWAYS_INLINE block_residual_write_cabac_internal( x264_t *h, x264_c } } -static void block_residual_write_cabac_8x8( x264_t *h, x264_cabac_t *cb, int16_t *l ) +static void block_residual_write_cabac_8x8( x264_t *h, x264_cabac_t *cb, dctcoef *l ) { block_residual_write_cabac_internal( h, cb, DCT_LUMA_8x8, l, 1 ); } -static void block_residual_write_cabac( x264_t *h, x264_cabac_t *cb, int i_ctxBlockCat, int16_t *l ) +static void block_residual_write_cabac( x264_t *h, x264_cabac_t *cb, int i_ctxBlockCat, dctcoef *l ) { block_residual_write_cabac_internal( h, cb, i_ctxBlockCat, l, 0 ); } diff --git a/encoder/cavlc.c b/encoder/cavlc.c index 7d7424c0..b2544652 100644 --- a/encoder/cavlc.c +++ b/encoder/cavlc.c @@ -113,7 +113,7 @@ static inline int block_residual_write_cavlc_escape( x264_t *h, int i_suffix_len return i_suffix_length; } -static int block_residual_write_cavlc( x264_t *h, int i_ctxBlockCat, int16_t *l, int nC ) +static int block_residual_write_cavlc( x264_t *h, int i_ctxBlockCat, dctcoef *l, int nC ) { bs_t *s = &h->out.bs; static const uint8_t ctz_index[8] = {3,0,1,0,2,0,1,0}; diff --git a/encoder/macroblock.c b/encoder/macroblock.c index 19d2f85e..52847376 100644 --- a/encoder/macroblock.c +++ b/encoder/macroblock.c @@ -28,7 +28,7 @@ /* These chroma DC functions don't have assembly versions and are only used here. */ #define ZIG(i,y,x) level[i] = dct[x*2+y]; -static inline void zigzag_scan_2x2_dc( int16_t level[4], int16_t dct[4] ) +static inline void zigzag_scan_2x2_dc( dctcoef level[4], dctcoef dct[4] ) { ZIG(0,0,0) ZIG(1,0,1) @@ -44,7 +44,7 @@ static inline void zigzag_scan_2x2_dc( int16_t level[4], int16_t dct[4] ) int d3 = dct[2] - dct[3]; \ int dmf = dequant_mf[i_qp%6][0] << i_qp/6; -static inline void idct_dequant_2x2_dc( int16_t dct[4], int16_t dct4x4[4][16], int dequant_mf[6][16], int i_qp ) +static inline void idct_dequant_2x2_dc( dctcoef dct[4], dctcoef dct4x4[4][16], int dequant_mf[6][16], int i_qp ) { IDCT_DEQUANT_START dct4x4[0][0] = (d0 + d1) * dmf >> 5; @@ -53,7 +53,7 @@ static inline void idct_dequant_2x2_dc( int16_t dct[4], int16_t dct4x4[4][16], i dct4x4[3][0] = (d2 - d3) * dmf >> 5; } -static inline void idct_dequant_2x2_dconly( int16_t out[4], int16_t dct[4], int dequant_mf[6][16], int i_qp ) +static inline void idct_dequant_2x2_dconly( dctcoef out[4], dctcoef dct[4], int dequant_mf[6][16], int i_qp ) { IDCT_DEQUANT_START out[0] = (d0 + d1) * dmf >> 5; @@ -62,7 +62,7 @@ static inline void idct_dequant_2x2_dconly( int16_t out[4], int16_t dct[4], int out[3] = (d2 - d3) * dmf >> 5; } -static inline void dct2x2dc( int16_t d[4], int16_t dct4x4[4][16] ) +static inline void dct2x2dc( dctcoef d[4], dctcoef dct4x4[4][16] ) { int d0 = dct4x4[0][0] + dct4x4[1][0]; int d1 = dct4x4[2][0] + dct4x4[3][0]; @@ -78,7 +78,7 @@ static inline void dct2x2dc( int16_t d[4], int16_t dct4x4[4][16] ) dct4x4[3][0] = 0; } -static ALWAYS_INLINE int x264_quant_4x4( x264_t *h, int16_t dct[16], int i_qp, int i_ctxBlockCat, int b_intra, int idx ) +static ALWAYS_INLINE int x264_quant_4x4( x264_t *h, dctcoef dct[16], int i_qp, int i_ctxBlockCat, int b_intra, int idx ) { int i_quant_cat = b_intra ? CQM_4IY : CQM_4PY; if( h->mb.b_trellis ) @@ -87,7 +87,7 @@ static ALWAYS_INLINE int x264_quant_4x4( x264_t *h, int16_t dct[16], int i_qp, i return h->quantf.quant_4x4( dct, h->quant4_mf[i_quant_cat][i_qp], h->quant4_bias[i_quant_cat][i_qp] ); } -static ALWAYS_INLINE int x264_quant_8x8( x264_t *h, int16_t dct[64], int i_qp, int b_intra, int idx ) +static ALWAYS_INLINE int x264_quant_8x8( x264_t *h, dctcoef dct[64], int i_qp, int b_intra, int idx ) { int i_quant_cat = b_intra ? CQM_8IY : CQM_8PY; if( h->mb.b_trellis ) @@ -112,7 +112,7 @@ void x264_mb_encode_i4x4( x264_t *h, int idx, int i_qp ) int nz; pixel *p_src = &h->mb.pic.p_fenc[0][block_idx_xy_fenc[idx]]; pixel *p_dst = &h->mb.pic.p_fdec[0][block_idx_xy_fdec[idx]]; - ALIGNED_ARRAY_16( int16_t, dct4x4,[16] ); + ALIGNED_ARRAY_16( dctcoef, dct4x4,[16] ); if( h->mb.b_lossless ) { @@ -158,7 +158,7 @@ void x264_mb_encode_i8x8( x264_t *h, int idx, int i_qp ) int nz; pixel *p_src = &h->mb.pic.p_fenc[0][8*x + 8*y*FENC_STRIDE]; pixel *p_dst = &h->mb.pic.p_fdec[0][8*x + 8*y*FDEC_STRIDE]; - ALIGNED_ARRAY_16( int16_t, dct8x8,[64] ); + ALIGNED_ARRAY_16( dctcoef, dct8x8,[64] ); if( h->mb.b_lossless ) { @@ -188,8 +188,8 @@ static void x264_mb_encode_i16x16( x264_t *h, int i_qp ) pixel *p_src = h->mb.pic.p_fenc[0]; pixel *p_dst = h->mb.pic.p_fdec[0]; - ALIGNED_ARRAY_16( int16_t, dct4x4,[16],[16] ); - ALIGNED_ARRAY_16( int16_t, dct_dc4x4,[16] ); + ALIGNED_ARRAY_16( dctcoef, dct4x4,[16],[16] ); + ALIGNED_ARRAY_16( dctcoef, dct_dc4x4,[16] ); int nz; int decimate_score = h->mb.b_dct_decimate ? 0 : 9; @@ -264,9 +264,9 @@ static void x264_mb_encode_i16x16( x264_t *h, int i_qp ) h->dctf.add16x16_idct_dc( p_dst, dct_dc4x4 ); } -static inline int idct_dequant_round_2x2_dc( int16_t ref[4], int16_t dct[4], int dequant_mf[6][16], int i_qp ) +static inline int idct_dequant_round_2x2_dc( dctcoef ref[4], dctcoef dct[4], int dequant_mf[6][16], int i_qp ) { - int16_t out[4]; + dctcoef out[4]; idct_dequant_2x2_dconly( out, dct, dequant_mf, i_qp ); return ((ref[0] ^ (out[0]+32)) | (ref[1] ^ (out[1]+32)) @@ -278,9 +278,9 @@ static inline int idct_dequant_round_2x2_dc( int16_t ref[4], int16_t dct[4], int * Unlike luma blocks, this can't be done with a lookup table or * other shortcut technique because of the interdependencies * between the coefficients due to the chroma DC transform. */ -static inline int x264_mb_optimize_chroma_dc( x264_t *h, int b_inter, int i_qp, int16_t dct2x2[4] ) +static inline int x264_mb_optimize_chroma_dc( x264_t *h, int b_inter, int i_qp, dctcoef dct2x2[4] ) { - int16_t dct2x2_orig[4]; + dctcoef dct2x2_orig[4]; int coeff, nz; /* If the QP is too high, there's no benefit to rounding optimization. */ @@ -323,7 +323,7 @@ void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qp ) { int nz, nz_dc; int b_decimate = b_inter && h->mb.b_dct_decimate; - ALIGNED_ARRAY_16( int16_t, dct2x2,[4] ); + ALIGNED_ARRAY_16( dctcoef, dct2x2,[4] ); h->mb.i_cbp_chroma = 0; /* Early termination: check variance of chroma residual before encoding. @@ -381,7 +381,7 @@ void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qp ) int i_decimate_score = 0; int nz_ac = 0; - ALIGNED_ARRAY_16( int16_t, dct4x4,[4],[16] ); + ALIGNED_ARRAY_16( dctcoef, dct4x4,[4],[16] ); if( h->mb.b_lossless ) { @@ -751,7 +751,7 @@ void x264_macroblock_encode( x264_t *h ) } else if( h->mb.b_transform_8x8 ) { - ALIGNED_ARRAY_16( int16_t, dct8x8,[4],[64] ); + ALIGNED_ARRAY_16( dctcoef, dct8x8,[4],[64] ); b_decimate &= !h->mb.b_trellis; // 8x8 trellis is inherently optimal decimation h->dctf.sub16x16_dct8( dct8x8, h->mb.pic.p_fenc[0], h->mb.pic.p_fdec[0] ); h->nr_count[1] += h->mb.b_noise_reduction * 4; @@ -803,7 +803,7 @@ void x264_macroblock_encode( x264_t *h ) } else { - ALIGNED_ARRAY_16( int16_t, dct4x4,[16],[16] ); + ALIGNED_ARRAY_16( dctcoef, dct4x4,[16],[16] ); h->dctf.sub16x16_dct( dct4x4, h->mb.pic.p_fenc[0], h->mb.pic.p_fdec[0] ); h->nr_count[0] += h->mb.b_noise_reduction * 16; @@ -922,9 +922,9 @@ void x264_macroblock_encode( x264_t *h ) *****************************************************************************/ int x264_macroblock_probe_skip( x264_t *h, int b_bidir ) { - ALIGNED_ARRAY_16( int16_t, dct4x4,[4],[16] ); - ALIGNED_ARRAY_16( int16_t, dct2x2,[4] ); - ALIGNED_ARRAY_16( int16_t, dctscan,[16] ); + ALIGNED_ARRAY_16( dctcoef, dct4x4,[4],[16] ); + ALIGNED_ARRAY_16( dctcoef, dct2x2,[4] ); + ALIGNED_ARRAY_16( dctcoef, dctscan,[16] ); ALIGNED_4( int16_t mvp[2] ); int i_qp = h->mb.i_qp; @@ -1090,7 +1090,7 @@ void x264_macroblock_encode_p8x8( x264_t *h, int i8 ) } for( int ch = 0; ch < 2; ch++ ) { - int16_t dc; + dctcoef dc; p_fenc = h->mb.pic.p_fenc[1+ch] + 4*x + 4*y*FENC_STRIDE; p_fdec = h->mb.pic.p_fdec[1+ch] + 4*x + 4*y*FDEC_STRIDE; nz = h->zigzagf.sub_4x4ac( h->dct.luma4x4[16+i8+ch*4], p_fenc, p_fdec, &dc ); @@ -1101,7 +1101,7 @@ void x264_macroblock_encode_p8x8( x264_t *h, int i8 ) { if( h->mb.b_transform_8x8 ) { - ALIGNED_ARRAY_16( int16_t, dct8x8,[64] ); + ALIGNED_ARRAY_16( dctcoef, dct8x8,[64] ); h->dctf.sub8x8_dct8( dct8x8, p_fenc, p_fdec ); nnz8x8 = x264_quant_8x8( h, dct8x8, i_qp, 0, i8 ); if( nnz8x8 ) @@ -1126,7 +1126,7 @@ void x264_macroblock_encode_p8x8( x264_t *h, int i8 ) else { int i_decimate_8x8 = 0; - ALIGNED_ARRAY_16( int16_t, dct4x4,[4],[16] ); + ALIGNED_ARRAY_16( dctcoef, dct4x4,[4],[16] ); h->dctf.sub8x8_dct( dct4x4, p_fenc, p_fdec ); for( int i4 = 0; i4 < 4; i4++ ) { @@ -1155,7 +1155,7 @@ void x264_macroblock_encode_p8x8( x264_t *h, int i8 ) for( int ch = 0; ch < 2; ch++ ) { - ALIGNED_ARRAY_16( int16_t, dct4x4,[16] ); + ALIGNED_ARRAY_16( dctcoef, dct4x4,[16] ); p_fenc = h->mb.pic.p_fenc[1+ch] + 4*x + 4*y*FENC_STRIDE; p_fdec = h->mb.pic.p_fdec[1+ch] + 4*x + 4*y*FDEC_STRIDE; @@ -1200,7 +1200,7 @@ void x264_macroblock_encode_p4x4( x264_t *h, int i4 ) } else { - ALIGNED_ARRAY_16( int16_t, dct4x4,[16] ); + ALIGNED_ARRAY_16( dctcoef, dct4x4,[16] ); h->dctf.sub4x4_dct( dct4x4, p_fenc, p_fdec ); nz = x264_quant_4x4( h, dct4x4, i_qp, DCT_LUMA_4x4, 0, i4 ); h->mb.cache.non_zero_count[x264_scan8[i4]] = nz; diff --git a/encoder/macroblock.h b/encoder/macroblock.h index f42e6a4c..b1b02fa5 100644 --- a/encoder/macroblock.h +++ b/encoder/macroblock.h @@ -55,11 +55,11 @@ void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qp ); void x264_cabac_mb_skip( x264_t *h, int b_skip ); -int x264_quant_dc_trellis( x264_t *h, int16_t *dct, int i_quant_cat, +int x264_quant_dc_trellis( x264_t *h, dctcoef *dct, int i_quant_cat, int i_qp, int i_ctxBlockCat, int b_intra, int b_chroma ); -int x264_quant_4x4_trellis( x264_t *h, int16_t *dct, int i_quant_cat, +int x264_quant_4x4_trellis( x264_t *h, dctcoef *dct, int i_quant_cat, int i_qp, int i_ctxBlockCat, int b_intra, int b_chroma, int idx ); -int x264_quant_8x8_trellis( x264_t *h, int16_t *dct, int i_quant_cat, +int x264_quant_8x8_trellis( x264_t *h, dctcoef *dct, int i_quant_cat, int i_qp, int b_intra, int idx ); void x264_noise_reduction_update( x264_t *h ); diff --git a/encoder/rdo.c b/encoder/rdo.c index 1fecea62..5dddd03f 100644 --- a/encoder/rdo.c +++ b/encoder/rdo.c @@ -407,7 +407,7 @@ typedef struct { // comparable to the input. so unquant is the direct inverse of quant, // and uses the dct scaling factors, not the idct ones. -static ALWAYS_INLINE int quant_trellis_cabac( x264_t *h, int16_t *dct, +static ALWAYS_INLINE int quant_trellis_cabac( x264_t *h, dctcoef *dct, const uint16_t *quant_mf, const int *unquant_mf, const int *coef_weight, const uint8_t *zigzag, int i_ctxBlockCat, int i_lambda2, int b_ac, int dc, int i_coefs, int idx ) @@ -634,8 +634,8 @@ static ALWAYS_INLINE int quant_trellis_cabac( x264_t *h, int16_t *dct, const static uint8_t x264_zigzag_scan2[4] = {0,1,2,3}; -int x264_quant_dc_trellis( x264_t *h, int16_t *dct, int i_quant_cat, - int i_qp, int i_ctxBlockCat, int b_intra, int b_chroma ) +int x264_quant_dc_trellis( x264_t *h, dctcoef *dct, int i_quant_cat, + int i_qp, int i_ctxBlockCat, int b_intra, int b_chroma ) { return quant_trellis_cabac( h, dct, h->quant4_mf[i_quant_cat][i_qp], h->unquant4_mf[i_quant_cat][i_qp], @@ -643,8 +643,8 @@ int x264_quant_dc_trellis( x264_t *h, int16_t *dct, int i_quant_cat, i_ctxBlockCat, h->mb.i_trellis_lambda2[b_chroma][b_intra], 0, 1, i_ctxBlockCat==DCT_CHROMA_DC ? 4 : 16, 0 ); } -int x264_quant_4x4_trellis( x264_t *h, int16_t *dct, int i_quant_cat, - int i_qp, int i_ctxBlockCat, int b_intra, int b_chroma, int idx ) +int x264_quant_4x4_trellis( x264_t *h, dctcoef *dct, int i_quant_cat, + int i_qp, int i_ctxBlockCat, int b_intra, int b_chroma, int idx ) { int b_ac = (i_ctxBlockCat == DCT_LUMA_AC || i_ctxBlockCat == DCT_CHROMA_AC); return quant_trellis_cabac( h, dct, @@ -654,8 +654,8 @@ int x264_quant_4x4_trellis( x264_t *h, int16_t *dct, int i_quant_cat, i_ctxBlockCat, h->mb.i_trellis_lambda2[b_chroma][b_intra], b_ac, 0, 16, idx ); } -int x264_quant_8x8_trellis( x264_t *h, int16_t *dct, int i_quant_cat, - int i_qp, int b_intra, int idx ) +int x264_quant_8x8_trellis( x264_t *h, dctcoef *dct, int i_quant_cat, + int i_qp, int b_intra, int idx ) { return quant_trellis_cabac( h, dct, h->quant8_mf[i_quant_cat][i_qp], h->unquant8_mf[i_quant_cat][i_qp], diff --git a/tools/checkasm.c b/tools/checkasm.c index e60dcde4..bdf7140f 100644 --- a/tools/checkasm.c +++ b/tools/checkasm.c @@ -515,11 +515,11 @@ static int check_dct( int cpu_ref, int cpu_new ) x264_dct_function_t dct_asm; x264_quant_function_t qf; int ret = 0, ok, used_asm, interlace; - ALIGNED_16( int16_t dct1[16][16] ); - ALIGNED_16( int16_t dct2[16][16] ); - ALIGNED_16( int16_t dct4[16][16] ); - ALIGNED_16( int16_t dct8[4][64] ); - ALIGNED_8( int16_t dctdc[2][4] ); + ALIGNED_16( dctcoef dct1[16][16] ); + ALIGNED_16( dctcoef dct2[16][16] ); + ALIGNED_16( dctcoef dct4[16][16] ); + ALIGNED_16( dctcoef dct8[4][64] ); + ALIGNED_8( dctcoef dctdc[2][4] ); x264_t h_buf; x264_t *h = &h_buf; @@ -645,18 +645,18 @@ static int check_dct( int cpu_ref, int cpu_new ) x264_zigzag_function_t zigzag_ref; x264_zigzag_function_t zigzag_asm; - ALIGNED_16( int16_t level1[64] ); - ALIGNED_16( int16_t level2[64] ); + ALIGNED_16( dctcoef level1[64] ); + ALIGNED_16( dctcoef level2[64] ); #define TEST_ZIGZAG_SCAN( name, t1, t2, dct, size ) \ if( zigzag_asm.name != zigzag_ref.name ) \ { \ set_func_name( "zigzag_"#name"_%s", interlace?"field":"frame" ); \ used_asm = 1; \ - memcpy(dct, buf1, size*sizeof(int16_t)); \ + memcpy(dct, buf1, size*sizeof(dctcoef)); \ call_c( zigzag_c.name, t1, dct ); \ call_a( zigzag_asm.name, t2, dct ); \ - if( memcmp( t1, t2, size*sizeof(int16_t) ) ) \ + if( memcmp( t1, t2, size*sizeof(dctcoef) ) ) \ { \ ok = 0; \ fprintf( stderr, #name " [FAILED]\n" ); \ @@ -673,7 +673,7 @@ static int check_dct( int cpu_ref, int cpu_new ) memcpy( buf4, buf1, 16*FDEC_STRIDE * sizeof(pixel) ); \ nz_c = call_c1( zigzag_c.name, t1, pbuf2, pbuf3 ); \ nz_a = call_a1( zigzag_asm.name, t2, pbuf2, pbuf4 ); \ - if( memcmp( t1, t2, size*sizeof(int16_t) )|| memcmp( buf3, buf4, 16*FDEC_STRIDE ) || nz_c != nz_a ) \ + if( memcmp( t1, t2, size*sizeof(dctcoef) )|| memcmp( buf3, buf4, 16*FDEC_STRIDE ) || nz_c != nz_a ) \ { \ ok = 0; \ fprintf( stderr, #name " [FAILED]\n" ); \ @@ -686,7 +686,7 @@ static int check_dct( int cpu_ref, int cpu_new ) if( zigzag_asm.name != zigzag_ref.name ) \ { \ int nz_a, nz_c; \ - int16_t dc_a, dc_c; \ + dctcoef dc_a, dc_c; \ set_func_name( "zigzag_"#name"_%s", interlace?"field":"frame" ); \ used_asm = 1; \ for( int i = 0; i < 2; i++ ) \ @@ -700,7 +700,7 @@ static int check_dct( int cpu_ref, int cpu_new ) } \ nz_c = call_c1( zigzag_c.name, t1, pbuf2, pbuf3, &dc_c ); \ nz_a = call_a1( zigzag_asm.name, t2, pbuf2, pbuf4, &dc_a ); \ - if( memcmp( t1+1, t2+1, 15*sizeof(int16_t) ) || memcmp( buf3, buf4, 16*FDEC_STRIDE * sizeof(pixel) ) || nz_c != nz_a || dc_c != dc_a ) \ + if( memcmp( t1+1, t2+1, 15*sizeof(dctcoef) ) || memcmp( buf3, buf4, 16*FDEC_STRIDE * sizeof(pixel) ) || nz_c != nz_a || dc_c != dc_a ) \ { \ ok = 0; \ fprintf( stderr, #name " [FAILED]\n" ); \ @@ -718,13 +718,13 @@ static int check_dct( int cpu_ref, int cpu_new ) { \ set_func_name( "zigzag_"#name"_%s", interlace?"field":"frame" ); \ used_asm = 1; \ - memcpy(dct, buf1, size*sizeof(int16_t)); \ + memcpy(dct, buf1, size*sizeof(dctcoef)); \ for( int i = 0; i < size; i++ ) \ dct[i] = rand()&0x1F ? 0 : dct[i]; \ memcpy(buf3, buf4, 10); \ call_c( zigzag_c.name, t1, dct, buf3 ); \ call_a( zigzag_asm.name, t2, dct, buf4 ); \ - if( memcmp( t1, t2, size*sizeof(int16_t) ) || memcmp( buf3, buf4, 10 ) ) \ + if( memcmp( t1, t2, size*sizeof(dctcoef) ) || memcmp( buf3, buf4, 10 ) ) \ { \ ok = 0; \ } \ @@ -1200,8 +1200,8 @@ static int check_quant( int cpu_ref, int cpu_new ) x264_quant_function_t qf_c; x264_quant_function_t qf_ref; x264_quant_function_t qf_a; - ALIGNED_16( int16_t dct1[64] ); - ALIGNED_16( int16_t dct2[64] ); + ALIGNED_16( dctcoef dct1[64] ); + ALIGNED_16( dctcoef dct2[64] ); ALIGNED_16( uint8_t cqm_buf[64] ); int ret = 0, ok, used_asm; int oks[2] = {1,1}, used_asms[2] = {0,0}; -- 2.40.0