Fix undefined behavior: access within misaligned address

author Anton Mitrofanov <BugMaster@narod.ru>

Sun, 1 Mar 2020 11:42:50 +0000 (14:42 +0300)

committer Anton Mitrofanov <BugMaster@narod.ru>

Thu, 9 Apr 2020 12:36:22 +0000 (15:36 +0300)
author Anton Mitrofanov <BugMaster@narod.ru>
Sun, 1 Mar 2020 11:42:50 +0000 (14:42 +0300)
committer Anton Mitrofanov <BugMaster@narod.ru>
Thu, 9 Apr 2020 12:36:22 +0000 (15:36 +0300)
diff --git a/common/base.h b/common/base.h

index cc0bce4ea212b08e0887dd7382079ec7741fcfad..d164d128e8f5b665d26365c25557026f37fa91dd 100644 (file)
--- a/common/base.h
+++ b/common/base.h
@@ -61,11 +61,11 @@
   * Mn: load or store n bits, aligned, native-endian
   * CPn: copy n bits, aligned, native-endian
   * we don't use memcpy for CPn because memcpy's args aren't assumed to be aligned */
-typedef union { uint16_t i; uint8_t  c[2]; } MAY_ALIAS x264_union16_t;
-typedef union { uint32_t i; uint16_t b[2]; uint8_t  c[4]; } MAY_ALIAS x264_union32_t;
-typedef union { uint64_t i; uint32_t a[2]; uint16_t b[4]; uint8_t c[8]; } MAY_ALIAS x264_union64_t;
+typedef union { uint16_t i; uint8_t  b[2]; } MAY_ALIAS x264_union16_t;
+typedef union { uint32_t i; uint16_t w[2]; uint8_t  b[4]; } MAY_ALIAS x264_union32_t;
+typedef union { uint64_t i; uint32_t d[2]; uint16_t w[4]; uint8_t b[8]; } MAY_ALIAS x264_union64_t;
  typedef struct { uint64_t i[2]; } x264_uint128_t;
-typedef union { x264_uint128_t i; uint64_t a[2]; uint32_t b[4]; uint16_t c[8]; uint8_t d[16]; } MAY_ALIAS x264_union128_t;
+typedef union { x264_uint128_t i; uint64_t q[2]; uint32_t d[4]; uint16_t w[8]; uint8_t b[16]; } MAY_ALIAS x264_union128_t;
  #define M16(src) (((x264_union16_t*)(src))->i)
  #define M32(src) (((x264_union32_t*)(src))->i)
  #define M64(src) (((x264_union64_t*)(src))->i)
diff --git a/common/x86/mc-c.c b/common/x86/mc-c.c

index 4efa9047c5173c4b5ed16bf308c379c719355857..f96483e529a17b2b690880b55f3ff3fac60efc15 100644 (file)
--- a/common/x86/mc-c.c
+++ b/common/x86/mc-c.c
@@ -739,28 +739,32 @@ PLANE_INTERLEAVE(avx)
  #define MC_CLIP_ADD(s,x)\
  do\
  {\
-    int temp;\
+    int temp_s = s;\
+    int temp_x = x;\
      asm("movd       %0, %%xmm0     \n"\
-        "movd       %2, %%xmm1     \n"\
+        "movd       %1, %%xmm1     \n"\
          "paddsw %%xmm1, %%xmm0     \n"\
-        "movd   %%xmm0, %1         \n"\
-        :"+m"(s), "=&r"(temp)\
-        :"m"(x)\
+        "movd   %%xmm0, %0         \n"\
+        :"+&r"(temp_s)\
+        :"r"(temp_x)\
      );\
-    s = temp;\
+    s = temp_s;\
  } while( 0 )
  
  #undef MC_CLIP_ADD2
  #define MC_CLIP_ADD2(s,x)\
  do\
  {\
+    x264_union32_t temp = { .w={ (s)[0], (s)[1] } };\
      asm("movd       %0, %%xmm0     \n"\
          "movd       %1, %%xmm1     \n"\
          "paddsw %%xmm1, %%xmm0     \n"\
          "movd   %%xmm0, %0         \n"\
-        :"+m"(M32(s))\
+        :"+&r"(temp)\
          :"m"(M32(x))\
      );\
+    (s)[0] = temp.w[0];\
+    (s)[1] = temp.w[1];\
  } while( 0 )
  #endif
  
diff --git a/common/x86/util.h b/common/x86/util.h

index 12b251a282bc9dbf0b10ed4784eb1fead7ed9dc3..d928b3d8edf33fa83d444d713b2dd22c346c36b9 100644 (file)
--- a/common/x86/util.h
+++ b/common/x86/util.h
@@ -33,7 +33,7 @@
  #undef M128_ZERO
  #define M128_ZERO ((__m128){0,0,0,0})
  #define x264_union128_t x264_union128_sse_t
-typedef union { __m128 i; uint64_t a[2]; uint32_t b[4]; uint16_t c[8]; uint8_t d[16]; } MAY_ALIAS x264_union128_sse_t;
+typedef union { __m128 i; uint64_t q[2]; uint32_t d[4]; uint16_t w[8]; uint8_t b[16]; } MAY_ALIAS x264_union128_sse_t;
  #if HAVE_VECTOREXT
  typedef uint32_t v4si __attribute__((vector_size (16)));
  #endif
diff --git a/encoder/analyse.c b/encoder/analyse.c

index 75518547af2fcaa771d622d597ceddc66bcc12e3..4d7d6428e705b342f488b6249f08f8afad931a79 100644 (file)
--- a/encoder/analyse.c
+++ b/encoder/analyse.c
@@ -49,8 +49,8 @@ typedef struct
      int i_cost4x8[4]; /* cost per 8x8 partition */
      int i_cost16x8;
      int i_cost8x16;
-    /* [ref][0] is 16x16 mv, [ref][1..4] are 8x8 mv from partition [0..3] */
-    ALIGNED_4( int16_t mvc[32][5][2] );
+    /* [ref][0] is 16x16 mv, [ref][1..4] are 8x8 mv from partition [0..3], [ref][5] is for alignment */
+    ALIGNED_8( int16_t mvc[32][6][2] );
  } x264_mb_analysis_list_t;
  
  typedef struct
@@ -1256,7 +1256,7 @@ static void mb_analyse_inter_p16x16( x264_t *h, x264_mb_analysis_t *a )
  {
      x264_me_t m;
      int i_mvc;
-    ALIGNED_4( int16_t mvc[8][2] );
+    ALIGNED_ARRAY_8( int16_t, mvc,[8],[2] );
      int i_halfpel_thresh = INT_MAX;
      int *p_halfpel_thresh = (a->b_early_terminate && h->mb.pic.i_fref[0]>1) ? &i_halfpel_thresh : NULL;
  
@@ -1481,7 +1481,7 @@ static void mb_analyse_inter_p16x8( x264_t *h, x264_mb_analysis_t *a, int i_best
  {
      x264_me_t m;
      pixel **p_fenc = h->mb.pic.p_fenc;
-    ALIGNED_4( int16_t mvc[3][2] );
+    ALIGNED_ARRAY_8( int16_t, mvc,[3],[2] );
  
      /* XXX Needed for x264_mb_predict_mv */
      h->mb.i_partition = D_16x8;
@@ -1547,7 +1547,7 @@ static void mb_analyse_inter_p8x16( x264_t *h, x264_mb_analysis_t *a, int i_best
  {
      x264_me_t m;
      pixel **p_fenc = h->mb.pic.p_fenc;
-    ALIGNED_4( int16_t mvc[3][2] );
+    ALIGNED_ARRAY_8( int16_t, mvc,[3],[2] );
  
      /* XXX Needed for x264_mb_predict_mv */
      h->mb.i_partition = D_8x16;
@@ -1894,7 +1894,7 @@ static void mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a )
      pixel *src0, *src1;
      intptr_t stride0 = 16, stride1 = 16;
      int i_ref, i_mvc;
-    ALIGNED_4( int16_t mvc[9][2] );
+    ALIGNED_ARRAY_8( int16_t, mvc,[9],[2] );
      int try_skip = a->b_try_skip;
      int list1_skipped = 0;
      int i_halfpel_thresh[2] = {INT_MAX, INT_MAX};
@@ -2360,7 +2360,7 @@ static void mb_analyse_inter_b8x8( x264_t *h, x264_mb_analysis_t *a )
  static void mb_analyse_inter_b16x8( x264_t *h, x264_mb_analysis_t *a, int i_best_satd )
  {
      ALIGNED_ARRAY_32( pixel, pix,[2],[16*8] );
-    ALIGNED_4( int16_t mvc[3][2] );
+    ALIGNED_ARRAY_8( int16_t, mvc,[3],[2] );
  
      h->mb.i_partition = D_16x8;
      a->i_cost16x8bi = 0;
@@ -2454,7 +2454,7 @@ static void mb_analyse_inter_b16x8( x264_t *h, x264_mb_analysis_t *a, int i_best
  static void mb_analyse_inter_b8x16( x264_t *h, x264_mb_analysis_t *a, int i_best_satd )
  {
      ALIGNED_ARRAY_16( pixel, pix,[2],[8*16] );
-    ALIGNED_4( int16_t mvc[3][2] );
+    ALIGNED_ARRAY_8( int16_t, mvc,[3],[2] );
  
      h->mb.i_partition = D_8x16;
      a->i_cost8x16bi = 0;
diff --git a/encoder/me.h b/encoder/me.h

index 5bc4d72952ac40a9a4156cdaa6468d982930b472..c2eff106ddfb728d671385585467ad2acd058d49 100644 (file)
--- a/encoder/me.h
+++ b/encoder/me.h
@@ -52,7 +52,7 @@ typedef struct
      /* output */
      int cost_mv;        /* lambda * nbits for the chosen mv */
      int cost;           /* satd + lambda * nbits */
-    ALIGNED_4( int16_t mv[2] );
+    ALIGNED_8( int16_t mv[2] );
  } ALIGNED_64( x264_me_t );
  
  #define x264_me_search_ref x264_template(me_search_ref)
diff --git a/encoder/rdo.c b/encoder/rdo.c

index aa29db5ecc62ef63cb87f4564a8e3d53be0837ef..548560673b93df62d95d28ffcde9cae82a6945f3 100644 (file)
--- a/encoder/rdo.c
+++ b/encoder/rdo.c
@@ -705,8 +705,12 @@ int quant_trellis_cabac( x264_t *h, dctcoef *dct,
      }
  
  #if HAVE_MMX && ARCH_X86_64 && !defined( __MACH__ )
+    uint64_t level_state0;
+    memcpy( &level_state0, cabac_state, sizeof(uint64_t) );
+    uint16_t level_state1;
+    memcpy( &level_state1, cabac_state+8, sizeof(uint16_t) );
  #define TRELLIS_ARGS unquant_mf, zigzag, lambda2, last_nnz, orig_coefs, quant_coefs, dct,\
-                     cabac_state_sig, cabac_state_last, M64(cabac_state), M16(cabac_state+8)
+                     cabac_state_sig, cabac_state_last, level_state0, level_state1
      if( num_coefs == 16 && !dc )
          if( b_chroma || !h->mb.i_psy_trellis )
              return h->quantf.trellis_cabac_4x4( TRELLIS_ARGS, b_ac );
diff --git a/encoder/slicetype.c b/encoder/slicetype.c

index de76795f729bc86c0972c12bf0d8564bc28aec61..d7827c50f0431166f6584dcadc26147f54450bbd 100644 (file)
--- a/encoder/slicetype.c
+++ b/encoder/slicetype.c
@@ -658,7 +658,7 @@ static void slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a,
          {
              int i_mvc = 0;
              int16_t (*fenc_mv)[2] = fenc_mvs[l];
-            ALIGNED_4( int16_t mvc[4][2] );
+            ALIGNED_ARRAY_8( int16_t, mvc,[4],[2] );
  
              /* Reverse-order MV prediction. */
              M32( mvc[0] ) = 0;
diff --git a/tools/checkasm-a.asm b/tools/checkasm-a.asm

index 0faa83a5c07321ea7166d7f9d9db92d4494447b2..b69abc401854b33e545e7f5aad763874c1652615 100644 (file)
--- a/tools/checkasm-a.asm
+++ b/tools/checkasm-a.asm
@@ -58,7 +58,6 @@ SECTION .text
  cextern_naked puts
  
  ; max number of args used by any x264 asm function.
-; (max_args % 4) must equal 3 for stack alignment
  %define max_args 15
  
  %if ARCH_X86_64
@@ -88,7 +87,7 @@ cglobal checkasm_stack_clobber, 1,2
  ; intptr_t x264_checkasm_call( intptr_t (*func)(), int *ok, ... )
  ;-----------------------------------------------------------------------------
  INIT_XMM
-cglobal checkasm_call, 2,15,16,max_args*8+8
+cglobal checkasm_call, 2,15,16,-1*(((max_args+1)*8+STACK_ALIGNMENT-1) & ~(STACK_ALIGNMENT-1))
      mov  r6, r0
      mov  [rsp+max_args*8], r1
  
@@ -103,14 +102,14 @@ cglobal checkasm_call, 2,15,16,max_args*8+8
      mov  r5, r11mp
      %assign i 6
      %rep max_args-6
-        mov  r9, [rsp+stack_offset+(i+1)*8]
+        mov  r9, [rstk+stack_offset+(i+1)*8]
          mov  [rsp+(i-6)*8], r9
          %assign i i+1
      %endrep
  %else
      %assign i 4
      %rep max_args-4
-        mov  r9, [rsp+stack_offset+(i+7)*8]
+        mov  r9, [rstk+stack_offset+(i+7)*8]
          mov  [rsp+i*8], r9
          %assign i i+1
      %endrep
@@ -176,16 +175,19 @@ cglobal checkasm_call, 2,15,16,max_args*8+8
  ;-----------------------------------------------------------------------------
  ; intptr_t x264_checkasm_call( intptr_t (*func)(), int *ok, ... )
  ;-----------------------------------------------------------------------------
-cglobal checkasm_call, 1,7
+cglobal checkasm_call, 2,7,0,-1*(((max_args+1)*4+STACK_ALIGNMENT-1) & ~(STACK_ALIGNMENT-1))
+    mov  [esp+max_args*4], r1
+%assign i 0
+%rep max_args
+    mov  r1, [rstk+stack_offset+12+i*4]
+    mov  [esp+i*4], r1
+    %assign i i+1
+%endrep
      mov  r3, n3
      mov  r4, n4
      mov  r5, n5
      mov  r6, n6
-%rep max_args
-    push dword [esp+24+max_args*4]
-%endrep
      call r0
-    add  esp, max_args*4
      xor  r3, n3
      xor  r4, n4
      xor  r5, n5
@@ -197,10 +199,9 @@ cglobal checkasm_call, 1,7
      mov  r3, eax
      mov  r4, edx
      lea  r1, [error_message]
-    push r1
+    mov  [esp], r1
      call puts
-    add  esp, 4
-    mov  r1, r1m
+    mov  r1, [esp+max_args*4]
      mov  dword [r1], 0
      mov  edx, r4
      mov  eax, r3
author	Anton Mitrofanov <BugMaster@narod.ru>
	Sun, 1 Mar 2020 11:42:50 +0000 (14:42 +0300)
committer	Anton Mitrofanov <BugMaster@narod.ru>
	Thu, 9 Apr 2020 12:36:22 +0000 (15:36 +0300)
common/base.h		patch \| blob \| history
common/x86/mc-c.c		patch \| blob \| history
common/x86/util.h		patch \| blob \| history
encoder/analyse.c		patch \| blob \| history
encoder/me.h		patch \| blob \| history
encoder/rdo.c		patch \| blob \| history
encoder/slicetype.c		patch \| blob \| history
tools/checkasm-a.asm		patch \| blob \| history