+++ /dev/null
-From 7737e6ad4acf1058aeb0f9802e2a3ca1e0a30d29 Mon Sep 17 00:00:00 2001
-From: Henrik Gramner <henrik@gramner.com>
-Date: Sat, 2 Jun 2018 20:35:10 +0200
-Subject: [PATCH 1/1] Fix clang stack alignment issues
-
-Clang emits aligned AVX stores for things like zeroing stack-allocated
-variables when using -mavx even with -fno-tree-vectorize set which can
-result in crashes if this occurs before we've realigned the stack.
-
-Previously we only ensured that the stack was realigned before calling
-assembly functions that accesses stack-allocated buffers but this is
-not sufficient. Fix the issue by changing the stack realignment to
-instead occur immediately in all CLI, API and thread entry points.
----
- common/base.c | 60 +++++++++++++++++++++++++++++++-------
- common/threadpool.c | 9 ++++--
- common/x86/cpu-a.asm | 80 ++++++++++++++++++++++++++++-----------------------
- encoder/api.c | 29 +++++++++++--------
- encoder/encoder.c | 8 +++---
- encoder/lookahead.c | 15 ++++++----
- encoder/ratecontrol.c | 2 +-
- tools/checkasm.c | 6 +++-
- x264.c | 7 ++++-
- 9 files changed, 144 insertions(+), 72 deletions(-)
-
-diff --git a/common/base.c b/common/base.c
-index a07d9c6b..3befe73d 100644
---- a/common/base.c
-+++ b/common/base.c
-@@ -196,7 +196,7 @@ error:
- /****************************************************************************
- * x264_picture_init:
- ****************************************************************************/
--void x264_picture_init( x264_picture_t *pic )
-+static void picture_init( x264_picture_t *pic )
- {
- memset( pic, 0, sizeof( x264_picture_t ) );
- pic->i_type = X264_TYPE_AUTO;
-@@ -204,10 +204,15 @@ void x264_picture_init( x264_picture_t *pic )
- pic->i_pic_struct = PIC_STRUCT_AUTO;
- }
-
-+void x264_picture_init( x264_picture_t *pic )
-+{
-+ x264_stack_align( picture_init, pic );
-+}
-+
- /****************************************************************************
- * x264_picture_alloc:
- ****************************************************************************/
--int x264_picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_height )
-+static int picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_height )
- {
- typedef struct
- {
-@@ -237,7 +242,7 @@ int x264_picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_heigh
- int csp = i_csp & X264_CSP_MASK;
- if( csp <= X264_CSP_NONE || csp >= X264_CSP_MAX || csp == X264_CSP_V210 )
- return -1;
-- x264_picture_init( pic );
-+ picture_init( pic );
- pic->img.i_csp = i_csp;
- pic->img.i_plane = csp_tab[csp].planes;
- int depth_factor = i_csp & X264_CSP_HIGH_DEPTH ? 2 : 1;
-@@ -259,10 +264,15 @@ int x264_picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_heigh
- return 0;
- }
-
-+int x264_picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_height )
-+{
-+ return x264_stack_align( picture_alloc, pic, i_csp, i_width, i_height );
-+}
-+
- /****************************************************************************
- * x264_picture_clean:
- ****************************************************************************/
--void x264_picture_clean( x264_picture_t *pic )
-+static void picture_clean( x264_picture_t *pic )
- {
- x264_free( pic->img.plane[0] );
-
-@@ -270,10 +280,15 @@ void x264_picture_clean( x264_picture_t *pic )
- memset( pic, 0, sizeof( x264_picture_t ) );
- }
-
-+void x264_picture_clean( x264_picture_t *pic )
-+{
-+ x264_stack_align( picture_clean, pic );
-+}
-+
- /****************************************************************************
- * x264_param_default:
- ****************************************************************************/
--void x264_param_default( x264_param_t *param )
-+static void param_default( x264_param_t *param )
- {
- /* */
- memset( param, 0, sizeof( x264_param_t ) );
-@@ -416,6 +431,11 @@ void x264_param_default( x264_param_t *param )
- param->psz_clbin_file = NULL;
- }
-
-+void x264_param_default( x264_param_t *param )
-+{
-+ x264_stack_align( param_default, param );
-+}
-+
- static int param_apply_preset( x264_param_t *param, const char *preset )
- {
- char *end;
-@@ -643,9 +663,9 @@ static int param_apply_tune( x264_param_t *param, const char *tune )
- return 0;
- }
-
--int x264_param_default_preset( x264_param_t *param, const char *preset, const char *tune )
-+static int param_default_preset( x264_param_t *param, const char *preset, const char *tune )
- {
-- x264_param_default( param );
-+ param_default( param );
-
- if( preset && param_apply_preset( param, preset ) < 0 )
- return -1;
-@@ -654,7 +674,12 @@ int x264_param_default_preset( x264_param_t *param, const char *preset, const ch
- return 0;
- }
-
--void x264_param_apply_fastfirstpass( x264_param_t *param )
-+int x264_param_default_preset( x264_param_t *param, const char *preset, const char *tune )
-+{
-+ return x264_stack_align( param_default_preset, param, preset, tune );
-+}
-+
-+static void param_apply_fastfirstpass( x264_param_t *param )
- {
- /* Set faster options in case of turbo firstpass. */
- if( param->rc.b_stat_write && !param->rc.b_stat_read )
-@@ -669,6 +694,11 @@ void x264_param_apply_fastfirstpass( x264_param_t *param )
- }
- }
-
-+void x264_param_apply_fastfirstpass( x264_param_t *param )
-+{
-+ x264_stack_align( param_apply_fastfirstpass, param );
-+}
-+
- static int profile_string_to_int( const char *str )
- {
- if( !strcasecmp( str, "baseline" ) )
-@@ -686,7 +716,7 @@ static int profile_string_to_int( const char *str )
- return -1;
- }
-
--int x264_param_apply_profile( x264_param_t *param, const char *profile )
-+static int param_apply_profile( x264_param_t *param, const char *profile )
- {
- if( !profile )
- return 0;
-@@ -748,6 +778,11 @@ int x264_param_apply_profile( x264_param_t *param, const char *profile )
- return 0;
- }
-
-+int x264_param_apply_profile( x264_param_t *param, const char *profile )
-+{
-+ return x264_stack_align( param_apply_profile, param, profile );
-+}
-+
- static int parse_enum( const char *arg, const char * const *names, int *dst )
- {
- for( int i = 0; names[i]; i++ )
-@@ -809,7 +844,7 @@ static double atof_internal( const char *str, int *b_error )
- #define atoi(str) atoi_internal( str, &b_error )
- #define atof(str) atof_internal( str, &b_error )
-
--int x264_param_parse( x264_param_t *p, const char *name, const char *value )
-+static int param_parse( x264_param_t *p, const char *name, const char *value )
- {
- char *name_buf = NULL;
- int b_error = 0;
-@@ -1308,6 +1343,11 @@ int x264_param_parse( x264_param_t *p, const char *name, const char *value )
- return b_error ? errortype : 0;
- }
-
-+int x264_param_parse( x264_param_t *param, const char *name, const char *value )
-+{
-+ return x264_stack_align( param_parse, param, name, value );
-+}
-+
- /****************************************************************************
- * x264_param2string:
- ****************************************************************************/
-diff --git a/common/threadpool.c b/common/threadpool.c
-index 5a71feb1..7f98f778 100644
---- a/common/threadpool.c
-+++ b/common/threadpool.c
-@@ -47,7 +47,7 @@ struct x264_threadpool_t
- x264_sync_frame_list_t done; /* list of jobs that have finished processing */
- };
-
--static void *threadpool_thread( x264_threadpool_t *pool )
-+static void *threadpool_thread_internal( x264_threadpool_t *pool )
- {
- if( pool->init_func )
- pool->init_func( pool->init_arg );
-@@ -66,12 +66,17 @@ static void *threadpool_thread( x264_threadpool_t *pool )
- x264_pthread_mutex_unlock( &pool->run.mutex );
- if( !job )
- continue;
-- job->ret = (void*)x264_stack_align( job->func, job->arg ); /* execute the function */
-+ job->ret = job->func( job->arg );
- x264_sync_frame_list_push( &pool->done, (void*)job );
- }
- return NULL;
- }
-
-+static void *threadpool_thread( x264_threadpool_t *pool )
-+{
-+ return (void*)x264_stack_align( threadpool_thread_internal, pool );
-+}
-+
- int x264_threadpool_init( x264_threadpool_t **p_pool, int threads,
- void (*init_func)(void *), void *init_arg )
- {
-diff --git a/common/x86/cpu-a.asm b/common/x86/cpu-a.asm
-index ad42c26d..d94f7d54 100644
---- a/common/x86/cpu-a.asm
-+++ b/common/x86/cpu-a.asm
-@@ -64,23 +64,42 @@ cglobal cpu_xgetbv
- %endif
- ret
-
-+;-----------------------------------------------------------------------------
-+; void cpu_emms( void )
-+;-----------------------------------------------------------------------------
-+cglobal cpu_emms
-+ emms
-+ ret
-+
-+;-----------------------------------------------------------------------------
-+; void cpu_sfence( void )
-+;-----------------------------------------------------------------------------
-+cglobal cpu_sfence
-+ sfence
-+ ret
-+
- %if ARCH_X86_64
-
- ;-----------------------------------------------------------------------------
--; void stack_align( void (*func)(void*), void *arg );
-+; intptr_t stack_align( void (*func)(void*), ... ); (up to 5 args)
- ;-----------------------------------------------------------------------------
- cglobal stack_align
-- push rbp
-- mov rbp, rsp
-+ mov rax, r0mp
-+ mov r0, r1mp
-+ mov r1, r2mp
-+ mov r2, r3mp
-+ mov r3, r4mp
-+ mov r4, r5mp
-+ push rbp
-+ mov rbp, rsp
-+%if WIN64
-+ sub rsp, 40 ; shadow space + r4
-+%endif
-+ and rsp, ~(STACK_ALIGNMENT-1)
- %if WIN64
-- sub rsp, 32 ; shadow space
-+ mov [rsp+32], r4
- %endif
-- and rsp, ~(STACK_ALIGNMENT-1)
-- mov rax, r0
-- mov r0, r1
-- mov r1, r2
-- mov r2, r3
-- call rax
-+ call rax
- leave
- ret
-
-@@ -113,33 +132,22 @@ cglobal cpu_cpuid_test
- ret
-
- cglobal stack_align
-- push ebp
-- mov ebp, esp
-- sub esp, 12
-- and esp, ~(STACK_ALIGNMENT-1)
-- mov ecx, [ebp+8]
-- mov edx, [ebp+12]
-- mov [esp], edx
-- mov edx, [ebp+16]
-- mov [esp+4], edx
-- mov edx, [ebp+20]
-- mov [esp+8], edx
-- call ecx
-+ push ebp
-+ mov ebp, esp
-+ sub esp, 20
-+ and esp, ~(STACK_ALIGNMENT-1)
-+ mov r0, [ebp+12]
-+ mov r1, [ebp+16]
-+ mov r2, [ebp+20]
-+ mov [esp+ 0], r0
-+ mov [esp+ 4], r1
-+ mov [esp+ 8], r2
-+ mov r0, [ebp+24]
-+ mov r1, [ebp+28]
-+ mov [esp+12], r0
-+ mov [esp+16], r1
-+ call [ebp+ 8]
- leave
- ret
-
- %endif
--
--;-----------------------------------------------------------------------------
--; void cpu_emms( void )
--;-----------------------------------------------------------------------------
--cglobal cpu_emms
-- emms
-- ret
--
--;-----------------------------------------------------------------------------
--; void cpu_sfence( void )
--;-----------------------------------------------------------------------------
--cglobal cpu_sfence
-- sfence
-- ret
-diff --git a/encoder/api.c b/encoder/api.c
-index e247f3e4..b97612b7 100644
---- a/encoder/api.c
-+++ b/encoder/api.c
-@@ -73,7 +73,7 @@ typedef struct x264_api_t
- int (*encoder_invalidate_reference)( x264_t *, int64_t pts );
- } x264_api_t;
-
--x264_t *x264_encoder_open( x264_param_t *param )
-+static x264_api_t *encoder_open( x264_param_t *param )
- {
- x264_api_t *api = calloc( 1, sizeof( x264_api_t ) );
- if( !api )
-@@ -118,15 +118,20 @@ x264_t *x264_encoder_open( x264_param_t *param )
- return NULL;
- }
-
-+ return api;
-+}
-+
-+x264_t *x264_encoder_open( x264_param_t *param )
-+{
- /* x264_t is opaque */
-- return (x264_t *)api;
-+ return (x264_t *)x264_stack_align( encoder_open, param );
- }
-
- void x264_encoder_close( x264_t *h )
- {
- x264_api_t *api = (x264_api_t *)h;
-
-- api->encoder_close( api->x264 );
-+ x264_stack_align( api->encoder_close, api->x264 );
- free( api );
- }
-
-@@ -134,61 +139,61 @@ void x264_nal_encode( x264_t *h, uint8_t *dst, x264_nal_t *nal )
- {
- x264_api_t *api = (x264_api_t *)h;
-
-- api->nal_encode( api->x264, dst, nal );
-+ x264_stack_align( api->nal_encode, api->x264, dst, nal );
- }
-
- int x264_encoder_reconfig( x264_t *h, x264_param_t *param)
- {
- x264_api_t *api = (x264_api_t *)h;
-
-- return api->encoder_reconfig( api->x264, param );
-+ return x264_stack_align( api->encoder_reconfig, api->x264, param );
- }
-
- void x264_encoder_parameters( x264_t *h, x264_param_t *param )
- {
- x264_api_t *api = (x264_api_t *)h;
-
-- api->encoder_parameters( api->x264, param );
-+ x264_stack_align( api->encoder_parameters, api->x264, param );
- }
-
- int x264_encoder_headers( x264_t *h, x264_nal_t **pp_nal, int *pi_nal )
- {
- x264_api_t *api = (x264_api_t *)h;
-
-- return api->encoder_headers( api->x264, pp_nal, pi_nal );
-+ return x264_stack_align( api->encoder_headers, api->x264, pp_nal, pi_nal );
- }
-
- int x264_encoder_encode( x264_t *h, x264_nal_t **pp_nal, int *pi_nal, x264_picture_t *pic_in, x264_picture_t *pic_out )
- {
- x264_api_t *api = (x264_api_t *)h;
-
-- return api->encoder_encode( api->x264, pp_nal, pi_nal, pic_in, pic_out );
-+ return x264_stack_align( api->encoder_encode, api->x264, pp_nal, pi_nal, pic_in, pic_out );
- }
-
- int x264_encoder_delayed_frames( x264_t *h )
- {
- x264_api_t *api = (x264_api_t *)h;
-
-- return api->encoder_delayed_frames( api->x264 );
-+ return x264_stack_align( api->encoder_delayed_frames, api->x264 );
- }
-
- int x264_encoder_maximum_delayed_frames( x264_t *h )
- {
- x264_api_t *api = (x264_api_t *)h;
-
-- return api->encoder_maximum_delayed_frames( api->x264 );
-+ return x264_stack_align( api->encoder_maximum_delayed_frames, api->x264 );
- }
-
- void x264_encoder_intra_refresh( x264_t *h )
- {
- x264_api_t *api = (x264_api_t *)h;
-
-- api->encoder_intra_refresh( api->x264 );
-+ x264_stack_align( api->encoder_intra_refresh, api->x264 );
- }
-
- int x264_encoder_invalidate_reference( x264_t *h, int64_t pts )
- {
- x264_api_t *api = (x264_api_t *)h;
-
-- return api->encoder_invalidate_reference( api->x264, pts );
-+ return x264_stack_align( api->encoder_invalidate_reference, api->x264, pts );
- }
-diff --git a/encoder/encoder.c b/encoder/encoder.c
-index 243a87a5..286b112b 100644
---- a/encoder/encoder.c
-+++ b/encoder/encoder.c
-@@ -1564,7 +1564,7 @@ x264_t *x264_encoder_open( x264_param_t *param )
- if( h->param.b_cabac )
- x264_cabac_init( h );
- else
-- x264_stack_align( x264_cavlc_init, h );
-+ x264_cavlc_init( h );
-
- mbcmp_init( h );
- chroma_dsp_init( h );
-@@ -3087,7 +3087,7 @@ static void *slices_write( x264_t *h )
- }
- }
- h->sh.i_last_mb = X264_MIN( h->sh.i_last_mb, last_thread_mb );
-- if( x264_stack_align( slice_write, h ) )
-+ if( slice_write( h ) )
- goto fail;
- h->sh.i_first_mb = h->sh.i_last_mb + 1;
- // if i_first_mb is not the last mb in a row then go to the next mb in MBAFF order
-@@ -3122,7 +3122,7 @@ static int threaded_slices_write( x264_t *h )
- t->sh.i_last_mb = t->i_threadslice_end * h->mb.i_mb_width - 1;
- }
-
-- x264_stack_align( x264_analyse_weight_frame, h, h->mb.i_mb_height*16 + 16 );
-+ x264_analyse_weight_frame( h, h->mb.i_mb_height*16 + 16 );
-
- x264_threads_distribute_ratecontrol( h );
-
-@@ -3300,7 +3300,7 @@ int x264_encoder_encode( x264_t *h,
- return -1;
- }
- else
-- x264_stack_align( x264_adaptive_quant_frame, h, fenc, pic_in->prop.quant_offsets );
-+ x264_adaptive_quant_frame( h, fenc, pic_in->prop.quant_offsets );
-
- if( pic_in->prop.quant_offsets_free )
- pic_in->prop.quant_offsets_free( pic_in->prop.quant_offsets );
-diff --git a/encoder/lookahead.c b/encoder/lookahead.c
-index da8e6c2e..5c948cfb 100644
---- a/encoder/lookahead.c
-+++ b/encoder/lookahead.c
-@@ -67,7 +67,7 @@ static void lookahead_update_last_nonb( x264_t *h, x264_frame_t *new_nonb )
- #if HAVE_THREAD
- static void lookahead_slicetype_decide( x264_t *h )
- {
-- x264_stack_align( x264_slicetype_decide, h );
-+ x264_slicetype_decide( h );
-
- lookahead_update_last_nonb( h, h->lookahead->next.list[0] );
- int shift_frames = h->lookahead->next.list[0]->i_bframes + 1;
-@@ -82,12 +82,12 @@ static void lookahead_slicetype_decide( x264_t *h )
-
- /* For MB-tree and VBV lookahead, we have to perform propagation analysis on I-frames too. */
- if( h->lookahead->b_analyse_keyframe && IS_X264_TYPE_I( h->lookahead->last_nonb->i_type ) )
-- x264_stack_align( x264_slicetype_analyse, h, shift_frames );
-+ x264_slicetype_analyse( h, shift_frames );
-
- x264_pthread_mutex_unlock( &h->lookahead->ofbuf.mutex );
- }
-
--static void *lookahead_thread( x264_t *h )
-+static void *lookahead_thread_internal( x264_t *h )
- {
- while( !h->lookahead->b_exit_thread )
- {
-@@ -121,6 +121,11 @@ static void *lookahead_thread( x264_t *h )
- x264_pthread_mutex_unlock( &h->lookahead->ofbuf.mutex );
- return NULL;
- }
-+
-+static void *lookahead_thread( x264_t *h )
-+{
-+ return (void*)x264_stack_align( lookahead_thread_internal, h );
-+}
- #endif
-
- int x264_lookahead_init( x264_t *h, int i_slicetype_length )
-@@ -230,14 +235,14 @@ void x264_lookahead_get_frames( x264_t *h )
- if( h->frames.current[0] || !h->lookahead->next.i_size )
- return;
-
-- x264_stack_align( x264_slicetype_decide, h );
-+ x264_slicetype_decide( h );
- lookahead_update_last_nonb( h, h->lookahead->next.list[0] );
- int shift_frames = h->lookahead->next.list[0]->i_bframes + 1;
- lookahead_shift( &h->lookahead->ofbuf, &h->lookahead->next, shift_frames );
-
- /* For MB-tree and VBV lookahead, we have to perform propagation analysis on I-frames too. */
- if( h->lookahead->b_analyse_keyframe && IS_X264_TYPE_I( h->lookahead->last_nonb->i_type ) )
-- x264_stack_align( x264_slicetype_analyse, h, shift_frames );
-+ x264_slicetype_analyse( h, shift_frames );
-
- lookahead_encoder_shift( h );
- }
-diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c
-index 85548f0b..b7f0ee07 100644
---- a/encoder/ratecontrol.c
-+++ b/encoder/ratecontrol.c
-@@ -574,7 +574,7 @@ int x264_macroblock_tree_read( x264_t *h, x264_frame_t *frame, float *quant_offs
- rc->mbtree.qpbuf_pos--;
- }
- else
-- x264_stack_align( x264_adaptive_quant_frame, h, frame, quant_offsets );
-+ x264_adaptive_quant_frame( h, frame, quant_offsets );
- return 0;
- fail:
- x264_log( h, X264_LOG_ERROR, "Incomplete MB-tree stats file.\n" );
-diff --git a/tools/checkasm.c b/tools/checkasm.c
-index 440e1d23..5f1e275f 100644
---- a/tools/checkasm.c
-+++ b/tools/checkasm.c
-@@ -2913,7 +2913,7 @@ static int check_all_flags( void )
- return ret;
- }
-
--int main(int argc, char *argv[])
-+static int main_internal( int argc, char **argv )
- {
- #ifdef _WIN32
- /* Disable the Windows Error Reporting dialog */
-@@ -2973,3 +2973,7 @@ int main(int argc, char *argv[])
- return 0;
- }
-
-+int main( int argc, char **argv )
-+{
-+ return x264_stack_align( main_internal, argc, argv );
-+}
-diff --git a/x264.c b/x264.c
-index b02ba49a..83bc9660 100644
---- a/x264.c
-+++ b/x264.c
-@@ -351,7 +351,7 @@ static void print_version_info( void )
- #endif
- }
-
--int main( int argc, char **argv )
-+static int main_internal( int argc, char **argv )
- {
- x264_param_t param;
- cli_opt_t opt = {0};
-@@ -403,6 +403,11 @@ int main( int argc, char **argv )
- return ret;
- }
-
-+int main( int argc, char **argv )
-+{
-+ return x264_stack_align( main_internal, argc, argv );
-+}
-+
- static char const *strtable_lookup( const char * const table[], int idx )
- {
- int i = 0; while( table[i] ) i++;
---
-2.11.0
-