From: Fiona Glaser Date: Tue, 10 Nov 2009 05:22:41 +0000 (-0800) Subject: Fix one (of possibly many) miscompilations in weightp X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=3a4c7dae3deeeb729251e1098d70befab1ad4a0e;p=libx264 Fix one (of possibly many) miscompilations in weightp Use NOINLINE and some emms calls to fix emms reordering issues. This issue occurred with some GCC versions if threads > 1 and the phase of the moon was right. Also a cosmetic in x264.c. --- diff --git a/encoder/slicetype.c b/encoder/slicetype.c index 9323981a..9c65d03d 100644 --- a/encoder/slicetype.c +++ b/encoder/slicetype.c @@ -59,7 +59,7 @@ static void get_h264_weight( unsigned int weight_nonh264, int offset, x264_weigh /* due to a GCC bug on some platforms (win32), flat[16] may not actually be aligned. */ ALIGNED_16( static uint8_t flat[17] ) = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1}; -static void weights_plane_analyse( x264_t *h, uint8_t *plane, int width, int height, int stride, unsigned int *sum, uint64_t *var ) +static NOINLINE void weights_plane_analyse( x264_t *h, uint8_t *plane, int width, int height, int stride, unsigned int *sum, uint64_t *var ) { int x,y; unsigned int sad = 0; @@ -85,7 +85,7 @@ static void weights_plane_analyse( x264_t *h, uint8_t *plane, int width, int hei (dst)[3] = &(src)[3][i_pel_offset]; \ } -static uint8_t *x264_weight_cost_init_luma( x264_t *h, x264_frame_t *fenc, x264_frame_t *ref, uint8_t *dest, int b_lowres ) +static NOINLINE uint8_t *x264_weight_cost_init_luma( x264_t *h, x264_frame_t *fenc, x264_frame_t *ref, uint8_t *dest, int b_lowres ) { uint8_t **ref_planes = b_lowres ? ref->lowres : ref->filtered; int ref0_distance = fenc->i_frame - ref->i_frame - 1; @@ -114,13 +114,15 @@ static uint8_t *x264_weight_cost_init_luma( x264_t *h, x264_frame_t *fenc, x264_ h->mc.mc_luma( pix, i_stride, src, i_stride, mvx, mvy, mbsize, mbsize, weight_none ); } + x264_emms(); return dest; } + x264_emms(); return ref_planes[0]; } #undef LOAD_HPELS_LUMA -static unsigned int x264_weight_cost( x264_t *h, x264_frame_t *fenc, uint8_t *src, x264_weight_t *w, int b_lowres ) +static NOINLINE unsigned int x264_weight_cost( x264_t *h, x264_frame_t *fenc, uint8_t *src, x264_weight_t *w, int b_lowres ) { int x, y; unsigned int cost = 0; @@ -160,6 +162,7 @@ static unsigned int x264_weight_cost( x264_t *h, x264_frame_t *fenc, uint8_t *sr // Multiply by 2 as there will be a duplicate. 10 bits added as if there is a weighted frame, then an additional duplicate is used. cost += lambda * numslices * ( 10 + 2 * ( bs_size_ue( w[0].i_denom ) + bs_size_se( w[0].i_scale ) + bs_size_se( w[0].i_offset ) ) ); } + x264_emms(); return cost; } diff --git a/x264.c b/x264.c index dbad1ff4..115e66aa 100644 --- a/x264.c +++ b/x264.c @@ -273,10 +273,10 @@ static void Help( x264_param_t *defaults, int longhelp ) " - none, spatial, temporal, auto\n", strtable_lookup( x264_direct_pred_names, defaults->analyse.i_direct_mv_pred ) ); H2( " --no-weightb Disable weighted prediction for B-frames\n" ); - H1( " --weightp Weighted prediction for P-frames [2]\n" + H1( " --weightp Weighted prediction for P-frames [%d]\n" " - 0: Disabled\n" " - 1: Blind offset\n" - " - 2: Smart analysis\n"); + " - 2: Smart analysis\n", defaults->analyse.i_weighted_pred ); H1( " --me Integer pixel motion estimation method [\"%s\"]\n", strtable_lookup( x264_motion_est_names, defaults->analyse.i_me_method ) ); H2( " - dia: diamond search, radius 1 (fast)\n"