From a8a3248bc0894e33fd548565d2d67bd6ce564c35 Mon Sep 17 00:00:00 2001 From: Bradley Sepos Date: Sat, 14 Apr 2018 17:12:38 -0400 Subject: [PATCH] contrib: Add x264 patch to fix AVX-512 alignment. --- contrib/x264/A01-avx512-alignment.patch | 74 +++++++++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 contrib/x264/A01-avx512-alignment.patch diff --git a/contrib/x264/A01-avx512-alignment.patch b/contrib/x264/A01-avx512-alignment.patch new file mode 100644 index 000000000..8c744e432 --- /dev/null +++ b/contrib/x264/A01-avx512-alignment.patch @@ -0,0 +1,74 @@ +From 9384a7389b251b59a079ccc3d1af9edd42e3d5e6 Mon Sep 17 00:00:00 2001 +From: Henrik Gramner +Date: Mon, 15 Jan 2018 21:42:59 +0100 +Subject: [PATCH] Correctly align buffers for AVX and AVX-512 + +Fixes segfaults on Windows where the stack is only 16-byte aligned. +--- + common/common.h | 8 ++++---- + encoder/analyse.c | 4 ++-- + encoder/ratecontrol.c | 2 +- + 3 files changed, 7 insertions(+), 7 deletions(-) + +diff --git a/common/common.h b/common/common.h +index 27a56fbd..84d39ce3 100644 +--- a/common/common.h ++++ b/common/common.h +@@ -569,16 +569,16 @@ struct x264_t + ALIGNED_64( pixel fdec_buf[54*FDEC_STRIDE] ); + + /* i4x4 and i8x8 backup data, for skipping the encode stage when possible */ +- ALIGNED_16( pixel i4x4_fdec_buf[16*16] ); +- ALIGNED_16( pixel i8x8_fdec_buf[16*16] ); ++ ALIGNED_32( pixel i4x4_fdec_buf[16*16] ); ++ ALIGNED_32( pixel i8x8_fdec_buf[16*16] ); + ALIGNED_64( dctcoef i8x8_dct_buf[3][64] ); + ALIGNED_64( dctcoef i4x4_dct_buf[15][16] ); + uint32_t i4x4_nnz_buf[4]; + uint32_t i8x8_nnz_buf[4]; + + /* Psy trellis DCT data */ +- ALIGNED_16( dctcoef fenc_dct8[4][64] ); +- ALIGNED_16( dctcoef fenc_dct4[16][16] ); ++ ALIGNED_64( dctcoef fenc_dct8[4][64] ); ++ ALIGNED_64( dctcoef fenc_dct4[16][16] ); + + /* Psy RD SATD/SA8D scores cache */ + ALIGNED_64( uint32_t fenc_satd_cache[32] ); +diff --git a/encoder/analyse.c b/encoder/analyse.c +index ecd6dae1..3577e5bc 100644 +--- a/encoder/analyse.c ++++ b/encoder/analyse.c +@@ -558,7 +558,7 @@ static ALWAYS_INLINE const int8_t *predict_4x4_mode_available( int force_intra, + /* For trellis=2, we need to do this for both sizes of DCT, for trellis=1 we only need to use it on the chosen mode. */ + static void inline psy_trellis_init( x264_t *h, int do_both_dct ) + { +- ALIGNED_16( static pixel zero[16*FDEC_STRIDE] ) = {0}; ++ ALIGNED_64( static pixel zero[16*FDEC_STRIDE] ) = {0}; + + if( do_both_dct || h->mb.b_transform_8x8 ) + h->dctf.sub16x16_dct8( h->mb.pic.fenc_dct8, h->mb.pic.p_fenc[0], zero ); +@@ -2011,7 +2011,7 @@ static void mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a ) + } + else + { +- ALIGNED_ARRAY_32( pixel, pixuv, [2],[16*FENC_STRIDE] ); ++ ALIGNED_ARRAY_64( pixel, pixuv, [2],[16*FENC_STRIDE] ); + int chromapix = h->luma2chroma_pixel[PIXEL_16x16]; + int v_shift = CHROMA_V_SHIFT; + +diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c +index b09c2434..99803718 100644 +--- a/encoder/ratecontrol.c ++++ b/encoder/ratecontrol.c +@@ -243,7 +243,7 @@ static ALWAYS_INLINE uint32_t ac_energy_plane( x264_t *h, int mb_x, int mb_y, x2 + stride <<= b_field; + if( b_chroma ) + { +- ALIGNED_ARRAY_32( pixel, pix,[FENC_STRIDE*16] ); ++ ALIGNED_ARRAY_64( pixel, pix,[FENC_STRIDE*16] ); + int chromapix = h->luma2chroma_pixel[PIXEL_16x16]; + int shift = 7 - CHROMA_V_SHIFT; + +-- +2.11.0 -- 2.40.0