contrib: Add x264 patch to fix AVX-512 alignment.

author Bradley Sepos <bradley@bradleysepos.com>

Sat, 14 Apr 2018 21:12:38 +0000 (17:12 -0400)

committer Bradley Sepos <bradley@bradleysepos.com>

Sat, 14 Apr 2018 21:12:50 +0000 (17:12 -0400)
author Bradley Sepos <bradley@bradleysepos.com>
Sat, 14 Apr 2018 21:12:38 +0000 (17:12 -0400)
committer Bradley Sepos <bradley@bradleysepos.com>
Sat, 14 Apr 2018 21:12:50 +0000 (17:12 -0400)
diff --git a/contrib/x264/A01-avx512-alignment.patch b/contrib/x264/A01-avx512-alignment.patch

new file mode 100644 (file)

index 0000000..8c744e4
--- /dev/null
+++ b/contrib/x264/A01-avx512-alignment.patch
@@ -0,0 +1,74 @@
+From 9384a7389b251b59a079ccc3d1af9edd42e3d5e6 Mon Sep 17 00:00:00 2001
+From: Henrik Gramner <henrik@gramner.com>
+Date: Mon, 15 Jan 2018 21:42:59 +0100
+Subject: [PATCH] Correctly align buffers for AVX and AVX-512
+
+Fixes segfaults on Windows where the stack is only 16-byte aligned.
+---
+ common/common.h       | 8 ++++----
+ encoder/analyse.c     | 4 ++--
+ encoder/ratecontrol.c | 2 +-
+ 3 files changed, 7 insertions(+), 7 deletions(-)
+
+diff --git a/common/common.h b/common/common.h
+index 27a56fbd..84d39ce3 100644
+--- a/common/common.h
++++ b/common/common.h
+@@ -569,16 +569,16 @@ struct x264_t
+             ALIGNED_64( pixel fdec_buf[54*FDEC_STRIDE] );
+ 
+             /* i4x4 and i8x8 backup data, for skipping the encode stage when possible */
+-            ALIGNED_16( pixel i4x4_fdec_buf[16*16] );
+-            ALIGNED_16( pixel i8x8_fdec_buf[16*16] );
++            ALIGNED_32( pixel i4x4_fdec_buf[16*16] );
++            ALIGNED_32( pixel i8x8_fdec_buf[16*16] );
+             ALIGNED_64( dctcoef i8x8_dct_buf[3][64] );
+             ALIGNED_64( dctcoef i4x4_dct_buf[15][16] );
+             uint32_t i4x4_nnz_buf[4];
+             uint32_t i8x8_nnz_buf[4];
+ 
+             /* Psy trellis DCT data */
+-            ALIGNED_16( dctcoef fenc_dct8[4][64] );
+-            ALIGNED_16( dctcoef fenc_dct4[16][16] );
++            ALIGNED_64( dctcoef fenc_dct8[4][64] );
++            ALIGNED_64( dctcoef fenc_dct4[16][16] );
+ 
+             /* Psy RD SATD/SA8D scores cache */
+             ALIGNED_64( uint32_t fenc_satd_cache[32] );
+diff --git a/encoder/analyse.c b/encoder/analyse.c
+index ecd6dae1..3577e5bc 100644
+--- a/encoder/analyse.c
++++ b/encoder/analyse.c
+@@ -558,7 +558,7 @@ static ALWAYS_INLINE const int8_t *predict_4x4_mode_available( int force_intra,
+ /* For trellis=2, we need to do this for both sizes of DCT, for trellis=1 we only need to use it on the chosen mode. */
+ static void inline psy_trellis_init( x264_t *h, int do_both_dct )
+ {
+-    ALIGNED_16( static pixel zero[16*FDEC_STRIDE] ) = {0};
++    ALIGNED_64( static pixel zero[16*FDEC_STRIDE] ) = {0};
+ 
+     if( do_both_dct || h->mb.b_transform_8x8 )
+         h->dctf.sub16x16_dct8( h->mb.pic.fenc_dct8, h->mb.pic.p_fenc[0], zero );
+@@ -2011,7 +2011,7 @@ static void mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a )
+             }
+             else
+             {
+-                ALIGNED_ARRAY_32( pixel, pixuv, [2],[16*FENC_STRIDE] );
++                ALIGNED_ARRAY_64( pixel, pixuv, [2],[16*FENC_STRIDE] );
+                 int chromapix = h->luma2chroma_pixel[PIXEL_16x16];
+                 int v_shift = CHROMA_V_SHIFT;
+ 
+diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c
+index b09c2434..99803718 100644
+--- a/encoder/ratecontrol.c
++++ b/encoder/ratecontrol.c
+@@ -243,7 +243,7 @@ static ALWAYS_INLINE uint32_t ac_energy_plane( x264_t *h, int mb_x, int mb_y, x2
+     stride <<= b_field;
+     if( b_chroma )
+     {
+-        ALIGNED_ARRAY_32( pixel, pix,[FENC_STRIDE*16] );
++        ALIGNED_ARRAY_64( pixel, pix,[FENC_STRIDE*16] );
+         int chromapix = h->luma2chroma_pixel[PIXEL_16x16];
+         int shift = 7 - CHROMA_V_SHIFT;
+ 
+-- 
+2.11.0
author	Bradley Sepos <bradley@bradleysepos.com>
	Sat, 14 Apr 2018 21:12:38 +0000 (17:12 -0400)
committer	Bradley Sepos <bradley@bradleysepos.com>
	Sat, 14 Apr 2018 21:12:50 +0000 (17:12 -0400)