osdep: Rework alignment macros

author Henrik Gramner <henrik@gramner.com>

Sun, 29 Jan 2017 15:41:33 +0000 (16:41 +0100)

committer Henrik Gramner <henrik@gramner.com>

Fri, 19 May 2017 14:12:15 +0000 (16:12 +0200)
author Henrik Gramner <henrik@gramner.com>
Sun, 29 Jan 2017 15:41:33 +0000 (16:41 +0100)
committer Henrik Gramner <henrik@gramner.com>
Fri, 19 May 2017 14:12:15 +0000 (16:12 +0200)
diff --git a/common/common.h b/common/common.h

index c7850ca0ef5772facdbad6715537392f436354e7..8cc1dc1e69f2dc9ae9054c59fd5c4176c68ae959 100644 (file)
--- a/common/common.h
+++ b/common/common.h
@@ -635,11 +635,11 @@ struct x264_t
      /* Current MB DCT coeffs */
      struct
      {
-        ALIGNED_N( dctcoef luma16x16_dc[3][16] );
+        ALIGNED_32( dctcoef luma16x16_dc[3][16] );
          ALIGNED_16( dctcoef chroma_dc[2][8] );
          // FIXME share memory?
-        ALIGNED_N( dctcoef luma8x8[12][64] );
-        ALIGNED_N( dctcoef luma4x4[16*3][16] );
+        ALIGNED_32( dctcoef luma8x8[12][64] );
+        ALIGNED_32( dctcoef luma4x4[16*3][16] );
      } dct;
  
      /* MB table and cache for current frame/mb */
@@ -778,8 +778,8 @@ struct x264_t
              /* space for p_fenc and p_fdec */
  #define FENC_STRIDE 16
  #define FDEC_STRIDE 32
-            ALIGNED_N( pixel fenc_buf[48*FENC_STRIDE] );
-            ALIGNED_N( pixel fdec_buf[52*FDEC_STRIDE] );
+            ALIGNED_32( pixel fenc_buf[48*FENC_STRIDE] );
+            ALIGNED_32( pixel fdec_buf[52*FDEC_STRIDE] );
  
              /* i4x4 and i8x8 backup data, for skipping the encode stage when possible */
              ALIGNED_16( pixel i4x4_fdec_buf[16*16] );
@@ -796,8 +796,8 @@ struct x264_t
              ALIGNED_16( dctcoef fenc_dct4[16][16] );
  
              /* Psy RD SATD/SA8D scores cache */
-            ALIGNED_N( uint64_t fenc_hadamard_cache[9] );
-            ALIGNED_N( uint32_t fenc_satd_cache[32] );
+            ALIGNED_32( uint64_t fenc_hadamard_cache[9] );
+            ALIGNED_32( uint32_t fenc_satd_cache[32] );
  
              /* pointer over mb of the frame to be compressed */
              pixel *p_fenc[3]; /* y,u,v */
@@ -930,8 +930,8 @@ struct x264_t
      uint32_t (*nr_residual_sum)[64];
      uint32_t *nr_count;
  
-    ALIGNED_N( udctcoef nr_offset_denoise[4][64] );
-    ALIGNED_N( uint32_t nr_residual_sum_buf[2][4][64] );
+    ALIGNED_32( udctcoef nr_offset_denoise[4][64] );
+    ALIGNED_32( uint32_t nr_residual_sum_buf[2][4][64] );
      uint32_t nr_count_buf[2][4];
  
      uint8_t luma2chroma_pixel[7]; /* Subsampled pixel size */
diff --git a/common/macroblock.c b/common/macroblock.c

index 661e678490fd284edd2f8333fbc2dfc217318fd5..e5097a6d2559b843ec90ef8925179898eb6eeaea 100644 (file)
--- a/common/macroblock.c
+++ b/common/macroblock.c
@@ -121,8 +121,8 @@ static NOINLINE void x264_mb_mc_01xywh( x264_t *h, int x, int y, int width, int
      int mvy1   = x264_clip3( h->mb.cache.mv[1][i8][1], h->mb.mv_min[1], h->mb.mv_max[1] ) + 4*4*y;
      int i_mode = x264_size2pixel[height][width];
      intptr_t i_stride0 = 16, i_stride1 = 16;
-    ALIGNED_ARRAY_N( pixel, tmp0,[16*16] );
-    ALIGNED_ARRAY_N( pixel, tmp1,[16*16] );
+    ALIGNED_ARRAY_32( pixel, tmp0,[16*16] );
+    ALIGNED_ARRAY_32( pixel, tmp1,[16*16] );
      pixel *src0, *src1;
  
      MC_LUMA_BI( 0 );
diff --git a/common/osdep.h b/common/osdep.h

index ed3ed5976f294ff6d3ad80573c378848dca83df2..95444018df65334b497e5d2fede4775928ff0bca 100644 (file)
--- a/common/osdep.h
+++ b/common/osdep.h
@@ -108,10 +108,10 @@ int x264_is_pipe( const char *path );
  #else
  #define DECLARE_ALIGNED( var, n ) var __attribute__((aligned(n)))
  #endif
-#define ALIGNED_32( var ) DECLARE_ALIGNED( var, 32 )
-#define ALIGNED_16( var ) DECLARE_ALIGNED( var, 16 )
-#define ALIGNED_8( var )  DECLARE_ALIGNED( var, 8 )
+
  #define ALIGNED_4( var )  DECLARE_ALIGNED( var, 4 )
+#define ALIGNED_8( var )  DECLARE_ALIGNED( var, 8 )
+#define ALIGNED_16( var ) DECLARE_ALIGNED( var, 16 )
  
  // ARM compiliers don't reliably align stack variables
  // - EABI requires only 8 byte stack alignment to be maintained
@@ -127,37 +127,31 @@ int x264_is_pipe( const char *path );
  #if ARCH_ARM && SYS_MACOSX
  #define ALIGNED_ARRAY_8( ... ) ALIGNED_ARRAY_EMU( 7, __VA_ARGS__ )
  #else
-#define ALIGNED_ARRAY_8( type, name, sub1, ... )\
-    ALIGNED_8( type name sub1 __VA_ARGS__ )
+#define ALIGNED_ARRAY_8( type, name, sub1, ... ) ALIGNED_8( type name sub1 __VA_ARGS__ )
  #endif
  
  #if ARCH_ARM
  #define ALIGNED_ARRAY_16( ... ) ALIGNED_ARRAY_EMU( 15, __VA_ARGS__ )
  #else
-#define ALIGNED_ARRAY_16( type, name, sub1, ... )\
-    ALIGNED_16( type name sub1 __VA_ARGS__ )
+#define ALIGNED_ARRAY_16( type, name, sub1, ... ) ALIGNED_16( type name sub1 __VA_ARGS__ )
  #endif
  
  #define EXPAND(x) x
  
+#if ARCH_X86 || ARCH_X86_64
+#define NATIVE_ALIGN 32
+#define ALIGNED_32( var ) DECLARE_ALIGNED( var, 32 )
  #if STACK_ALIGNMENT >= 32
-#define ALIGNED_ARRAY_32( type, name, sub1, ... )\
-    ALIGNED_32( type name sub1 __VA_ARGS__ )
+#define ALIGNED_ARRAY_32( type, name, sub1, ... ) ALIGNED_32( type name sub1 __VA_ARGS__ )
  #else
  #define ALIGNED_ARRAY_32( ... ) EXPAND( ALIGNED_ARRAY_EMU( 31, __VA_ARGS__ ) )
  #endif
-
  #define ALIGNED_ARRAY_64( ... ) EXPAND( ALIGNED_ARRAY_EMU( 63, __VA_ARGS__ ) )
-
-/* For AVX2 */
-#if ARCH_X86 || ARCH_X86_64
-#define NATIVE_ALIGN 32
-#define ALIGNED_N ALIGNED_32
-#define ALIGNED_ARRAY_N ALIGNED_ARRAY_32
  #else
  #define NATIVE_ALIGN 16
-#define ALIGNED_N ALIGNED_16
-#define ALIGNED_ARRAY_N ALIGNED_ARRAY_16
+#define ALIGNED_32 ALIGNED_16
+#define ALIGNED_ARRAY_32 ALIGNED_ARRAY_16
+#define ALIGNED_ARRAY_64 ALIGNED_ARRAY_16
  #endif
  
  #if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 0)
diff --git a/encoder/analyse.c b/encoder/analyse.c

index 1941bf289e37294c1e265c74a80b264393cb80fb..3fbdd53f51375efa0fca606869f4300812b8c1ad 100644 (file)
--- a/encoder/analyse.c
+++ b/encoder/analyse.c
@@ -1735,7 +1735,7 @@ static void x264_mb_analyse_inter_p8x16( x264_t *h, x264_mb_analysis_t *a, int i
  static ALWAYS_INLINE int x264_mb_analyse_inter_p4x4_chroma_internal( x264_t *h, x264_mb_analysis_t *a,
                                                                       pixel **p_fref, int i8x8, int size, int chroma )
  {
-    ALIGNED_ARRAY_N( pixel, pix1,[16*16] );
+    ALIGNED_ARRAY_32( pixel, pix1,[16*16] );
      pixel *pix2 = pix1+8;
      int i_stride = h->mb.pic.i_stride[1];
      int chroma_h_shift = chroma <= CHROMA_422;
@@ -1919,8 +1919,8 @@ static void x264_mb_analyse_inter_p4x8( x264_t *h, x264_mb_analysis_t *a, int i8
  
  static ALWAYS_INLINE int x264_analyse_bi_chroma( x264_t *h, x264_mb_analysis_t *a, int idx, int i_pixel )
  {
-    ALIGNED_ARRAY_N( pixel, pix, [4],[16*16] );
-    ALIGNED_ARRAY_N( pixel,  bi, [2],[16*16] );
+    ALIGNED_ARRAY_32( pixel, pix, [4],[16*16] );
+    ALIGNED_ARRAY_32( pixel,  bi, [2],[16*16] );
      int i_chroma_cost = 0;
      int chromapix = h->luma2chroma_pixel[i_pixel];
  
@@ -2013,8 +2013,8 @@ static void x264_mb_analyse_inter_direct( x264_t *h, x264_mb_analysis_t *a )
  
  static void x264_mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a )
  {
-    ALIGNED_ARRAY_N( pixel, pix0,[16*16] );
-    ALIGNED_ARRAY_N( pixel, pix1,[16*16] );
+    ALIGNED_ARRAY_32( pixel, pix0,[16*16] );
+    ALIGNED_ARRAY_32( pixel, pix1,[16*16] );
      pixel *src0, *src1;
      intptr_t stride0 = 16, stride1 = 16;
      int i_ref, i_mvc;
@@ -2147,7 +2147,7 @@ static void x264_mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a )
              }
              else
              {
-                ALIGNED_ARRAY_N( pixel, pixuv, [2],[16*FENC_STRIDE] );
+                ALIGNED_ARRAY_32( pixel, pixuv, [2],[16*FENC_STRIDE] );
                  int chromapix = h->luma2chroma_pixel[PIXEL_16x16];
                  int v_shift = CHROMA_V_SHIFT;
  
@@ -2483,7 +2483,7 @@ static void x264_mb_analyse_inter_b8x8( x264_t *h, x264_mb_analysis_t *a )
  
  static void x264_mb_analyse_inter_b16x8( x264_t *h, x264_mb_analysis_t *a, int i_best_satd )
  {
-    ALIGNED_ARRAY_N( pixel, pix,[2],[16*8] );
+    ALIGNED_ARRAY_32( pixel, pix,[2],[16*8] );
      ALIGNED_4( int16_t mvc[3][2] );
  
      h->mb.i_partition = D_16x8;
diff --git a/encoder/macroblock.c b/encoder/macroblock.c

index 87ba7f2de49d20916c1b82e4ea367e567a8a750d..87b076f52859a6256b1be19d06d65f3f7d3585ad 100644 (file)
--- a/encoder/macroblock.c
+++ b/encoder/macroblock.c
@@ -128,8 +128,8 @@ static void x264_mb_encode_i16x16( x264_t *h, int p, int i_qp )
      pixel *p_src = h->mb.pic.p_fenc[p];
      pixel *p_dst = h->mb.pic.p_fdec[p];
  
-    ALIGNED_ARRAY_N( dctcoef, dct4x4,[16],[16] );
-    ALIGNED_ARRAY_N( dctcoef, dct_dc4x4,[16] );
+    ALIGNED_ARRAY_32( dctcoef, dct4x4,[16],[16] );
+    ALIGNED_ARRAY_32( dctcoef, dct_dc4x4,[16] );
  
      int nz, block_cbp = 0;
      int decimate_score = h->mb.b_dct_decimate ? 0 : 9;
@@ -350,7 +350,7 @@ static ALWAYS_INLINE void x264_mb_encode_chroma_internal( x264_t *h, int b_inter
          int i_decimate_score = b_decimate ? 0 : 7;
          int nz_ac = 0;
  
-        ALIGNED_ARRAY_N( dctcoef, dct4x4,[8],[16] );
+        ALIGNED_ARRAY_32( dctcoef, dct4x4,[8],[16] );
  
          if( h->mb.b_lossless )
          {
@@ -780,7 +780,7 @@ static ALWAYS_INLINE void x264_macroblock_encode_internal( x264_t *h, int plane_
          }
          else if( h->mb.b_transform_8x8 )
          {
-            ALIGNED_ARRAY_N( dctcoef, dct8x8,[4],[64] );
+            ALIGNED_ARRAY_32( dctcoef, dct8x8,[4],[64] );
              b_decimate &= !h->mb.b_trellis || !h->param.b_cabac; // 8x8 trellis is inherently optimal decimation for CABAC
  
              for( int p = 0; p < plane_count; p++, i_qp = h->mb.i_chroma_qp )
@@ -824,7 +824,7 @@ static ALWAYS_INLINE void x264_macroblock_encode_internal( x264_t *h, int plane_
          }
          else
          {
-            ALIGNED_ARRAY_N( dctcoef, dct4x4,[16],[16] );
+            ALIGNED_ARRAY_32( dctcoef, dct4x4,[16],[16] );
              for( int p = 0; p < plane_count; p++, i_qp = h->mb.i_chroma_qp )
              {
                  int quant_cat = p ? CQM_4PC : CQM_4PY;
@@ -965,7 +965,7 @@ void x264_macroblock_encode( x264_t *h )
   *****************************************************************************/
  static ALWAYS_INLINE int x264_macroblock_probe_skip_internal( x264_t *h, int b_bidir, int plane_count, int chroma )
  {
-    ALIGNED_ARRAY_N( dctcoef, dct4x4,[8],[16] );
+    ALIGNED_ARRAY_32( dctcoef, dct4x4,[8],[16] );
      ALIGNED_ARRAY_16( dctcoef, dctscan,[16] );
      ALIGNED_4( int16_t mvp[2] );
      int i_qp = h->mb.i_qp;
@@ -1219,7 +1219,7 @@ static ALWAYS_INLINE void x264_macroblock_encode_p8x8_internal( x264_t *h, int i
                  int quant_cat = p ? CQM_8PC : CQM_8PY;
                  pixel *p_fenc = h->mb.pic.p_fenc[p] + 8*x + 8*y*FENC_STRIDE;
                  pixel *p_fdec = h->mb.pic.p_fdec[p] + 8*x + 8*y*FDEC_STRIDE;
-                ALIGNED_ARRAY_N( dctcoef, dct8x8,[64] );
+                ALIGNED_ARRAY_32( dctcoef, dct8x8,[64] );
  
                  h->dctf.sub8x8_dct8( dct8x8, p_fenc, p_fdec );
                  int nnz8x8 = x264_quant_8x8( h, dct8x8, i_qp, ctx_cat_plane[DCT_LUMA_8x8][p], 0, p, i8 );
@@ -1252,7 +1252,7 @@ static ALWAYS_INLINE void x264_macroblock_encode_p8x8_internal( x264_t *h, int i
                  pixel *p_fenc = h->mb.pic.p_fenc[p] + 8*x + 8*y*FENC_STRIDE;
                  pixel *p_fdec = h->mb.pic.p_fdec[p] + 8*x + 8*y*FDEC_STRIDE;
                  int i_decimate_8x8 = b_decimate ? 0 : 4;
-                ALIGNED_ARRAY_N( dctcoef, dct4x4,[4],[16] );
+                ALIGNED_ARRAY_32( dctcoef, dct4x4,[4],[16] );
                  int nnz8x8 = 0;
  
                  h->dctf.sub8x8_dct( dct4x4, p_fenc, p_fdec );
@@ -1311,7 +1311,7 @@ static ALWAYS_INLINE void x264_macroblock_encode_p8x8_internal( x264_t *h, int i
              i_qp = h->mb.i_chroma_qp;
              for( int ch = 0; ch < 2; ch++ )
              {
-                ALIGNED_ARRAY_N( dctcoef, dct4x4,[2],[16] );
+                ALIGNED_ARRAY_32( dctcoef, dct4x4,[2],[16] );
                  pixel *p_fenc = h->mb.pic.p_fenc[1+ch] + 4*x + (chroma422?8:4)*y*FENC_STRIDE;
                  pixel *p_fdec = h->mb.pic.p_fdec[1+ch] + 4*x + (chroma422?8:4)*y*FDEC_STRIDE;
  
@@ -1376,7 +1376,7 @@ static ALWAYS_INLINE void x264_macroblock_encode_p4x4_internal( x264_t *h, int i
          }
          else
          {
-            ALIGNED_ARRAY_N( dctcoef, dct4x4,[16] );
+            ALIGNED_ARRAY_32( dctcoef, dct4x4,[16] );
              h->dctf.sub4x4_dct( dct4x4, p_fenc, p_fdec );
              nz = x264_quant_4x4( h, dct4x4, i_qp, ctx_cat_plane[DCT_LUMA_4x4][p], 0, p, i4 );
              h->mb.cache.non_zero_count[x264_scan8[p*16+i4]] = nz;
diff --git a/encoder/macroblock.h b/encoder/macroblock.h

index f868802187f1e575f0fdd02e96308f8b348d43d4..9ab470066b5ea6425bf8a229a9369aa694d58841 100644 (file)
--- a/encoder/macroblock.h
+++ b/encoder/macroblock.h
@@ -116,7 +116,7 @@ static ALWAYS_INLINE void x264_mb_encode_i4x4( x264_t *h, int p, int idx, int i_
      int nz;
      pixel *p_src = &h->mb.pic.p_fenc[p][block_idx_xy_fenc[idx]];
      pixel *p_dst = &h->mb.pic.p_fdec[p][block_idx_xy_fdec[idx]];
-    ALIGNED_ARRAY_N( dctcoef, dct4x4,[16] );
+    ALIGNED_ARRAY_32( dctcoef, dct4x4,[16] );
  
      if( b_predict )
      {
@@ -154,7 +154,7 @@ static ALWAYS_INLINE void x264_mb_encode_i8x8( x264_t *h, int p, int idx, int i_
      int nz;
      pixel *p_src = &h->mb.pic.p_fenc[p][8*x + 8*y*FENC_STRIDE];
      pixel *p_dst = &h->mb.pic.p_fdec[p][8*x + 8*y*FDEC_STRIDE];
-    ALIGNED_ARRAY_N( dctcoef, dct8x8,[64] );
+    ALIGNED_ARRAY_32( dctcoef, dct8x8,[64] );
      ALIGNED_ARRAY_32( pixel, edge_buf,[36] );
  
      if( b_predict )
diff --git a/encoder/me.c b/encoder/me.c

index 310bff7f684b221a4a5715b54d3cd7efe99e071a..58a39dcf74fb1977a775e521d3ddccb3a42cf8a4 100644 (file)
--- a/encoder/me.c
+++ b/encoder/me.c
@@ -191,7 +191,7 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc,
      int omx, omy, pmx, pmy;
      pixel *p_fenc = m->p_fenc[0];
      pixel *p_fref_w = m->p_fref_w;
-    ALIGNED_ARRAY_N( pixel, pix,[16*16] );
+    ALIGNED_ARRAY_32( pixel, pix,[16*16] );
      ALIGNED_ARRAY_8( int16_t, mvc_temp,[16],[2] );
  
      ALIGNED_ARRAY_16( int, costs,[16] );
@@ -875,7 +875,7 @@ static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_ite
      int chroma_v_shift = CHROMA_V_SHIFT;
      int mvy_offset = chroma_v_shift & MB_INTERLACED & m->i_ref ? (h->mb.i_mb_y & 1)*4 - 2 : 0;
  
-    ALIGNED_ARRAY_N( pixel, pix,[64*18] ); // really 17x17x2, but round up for alignment
+    ALIGNED_ARRAY_32( pixel, pix,[64*18] ); // really 17x17x2, but round up for alignment
      ALIGNED_ARRAY_16( int, costs,[4] );
  
      int bmx = m->mv[0];
@@ -1034,9 +1034,9 @@ static void ALWAYS_INLINE x264_me_refine_bidir( x264_t *h, x264_me_t *m0, x264_m
      const int i_pixel = m0->i_pixel;
      const int bw = x264_pixel_size[i_pixel].w;
      const int bh = x264_pixel_size[i_pixel].h;
-    ALIGNED_ARRAY_N( pixel, pixy_buf,[2],[9][16*16] );
-    ALIGNED_ARRAY_N( pixel, pixu_buf,[2],[9][16*16] );
-    ALIGNED_ARRAY_N( pixel, pixv_buf,[2],[9][16*16] );
+    ALIGNED_ARRAY_32( pixel, pixy_buf,[2],[9][16*16] );
+    ALIGNED_ARRAY_32( pixel, pixu_buf,[2],[9][16*16] );
+    ALIGNED_ARRAY_32( pixel, pixv_buf,[2],[9][16*16] );
      pixel *src[3][2][9];
      int chromapix = h->luma2chroma_pixel[i_pixel];
      int chroma_v_shift = CHROMA_V_SHIFT;
@@ -1059,7 +1059,7 @@ static void ALWAYS_INLINE x264_me_refine_bidir( x264_t *h, x264_me_t *m0, x264_m
      uint64_t bcostrd = COST_MAX64;
      uint16_t amvd;
      /* each byte of visited represents 8 possible m1y positions, so a 4D array isn't needed */
-    ALIGNED_ARRAY_N( uint8_t, visited,[8],[8][8] );
+    ALIGNED_ARRAY_32( uint8_t, visited,[8],[8][8] );
      /* all permutations of an offset in up to 2 of the dimensions */
      ALIGNED_4( static const int8_t dia4d[33][4] ) =
      {
diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c

index dbccb277c894c4e2a0a7b4b787a77122227ef6d8..79e73878744065820372a1e282848456aa945fcf 100644 (file)
--- a/encoder/ratecontrol.c
+++ b/encoder/ratecontrol.c
@@ -243,7 +243,7 @@ static ALWAYS_INLINE uint32_t ac_energy_plane( x264_t *h, int mb_x, int mb_y, x2
      stride <<= b_field;
      if( b_chroma )
      {
-        ALIGNED_ARRAY_N( pixel, pix,[FENC_STRIDE*16] );
+        ALIGNED_ARRAY_32( pixel, pix,[FENC_STRIDE*16] );
          int chromapix = h->luma2chroma_pixel[PIXEL_16x16];
          int shift = 7 - CHROMA_V_SHIFT;
  
diff --git a/encoder/rdo.c b/encoder/rdo.c

index cd7668257e510605722f5d0ef50f7a45559db846..bd2eafb5e0d7537238df691db8f5c0807f910dc0 100644 (file)
--- a/encoder/rdo.c
+++ b/encoder/rdo.c
@@ -634,8 +634,8 @@ int quant_trellis_cabac( x264_t *h, dctcoef *dct,
                           const uint8_t *zigzag, int ctx_block_cat, int lambda2, int b_ac,
                           int b_chroma, int dc, int num_coefs, int idx )
  {
-    ALIGNED_ARRAY_N( dctcoef, orig_coefs, [64] );
-    ALIGNED_ARRAY_N( dctcoef, quant_coefs, [64] );
+    ALIGNED_ARRAY_32( dctcoef, orig_coefs, [64] );
+    ALIGNED_ARRAY_32( dctcoef, quant_coefs, [64] );
      const uint32_t *coef_weight1 = num_coefs == 64 ? x264_dct8_weight_tab : x264_dct4_weight_tab;
      const uint32_t *coef_weight2 = num_coefs == 64 ? x264_dct8_weight2_tab : x264_dct4_weight2_tab;
      const int b_interlaced = MB_INTERLACED;
diff --git a/tools/checkasm.c b/tools/checkasm.c

index 44d0896a6a877863e4349626962b706a59565b3f..c201193cbdbf37daff3efe976d89ad89e372c90e 100644 (file)
--- a/tools/checkasm.c
+++ b/tools/checkasm.c
@@ -827,10 +827,10 @@ static int check_dct( int cpu_ref, int cpu_new )
      x264_dct_function_t dct_asm;
      x264_quant_function_t qf;
      int ret = 0, ok, used_asm, interlace = 0;
-    ALIGNED_ARRAY_N( dctcoef, dct1, [16],[16] );
-    ALIGNED_ARRAY_N( dctcoef, dct2, [16],[16] );
-    ALIGNED_ARRAY_N( dctcoef, dct4, [16],[16] );
-    ALIGNED_ARRAY_N( dctcoef, dct8, [4],[64] );
+    ALIGNED_ARRAY_32( dctcoef, dct1, [16],[16] );
+    ALIGNED_ARRAY_32( dctcoef, dct2, [16],[16] );
+    ALIGNED_ARRAY_32( dctcoef, dct4, [16],[16] );
+    ALIGNED_ARRAY_32( dctcoef, dct8, [4],[64] );
      ALIGNED_16( dctcoef dctdc[2][8] );
      x264_t h_buf;
      x264_t *h = &h_buf;
@@ -1925,7 +1925,7 @@ static int check_deblock( int cpu_ref, int cpu_new )
              ALIGNED_ARRAY_16( uint8_t, nnz, [X264_SCAN8_SIZE] );
              ALIGNED_4( int8_t ref[2][X264_SCAN8_LUMA_SIZE] );
              ALIGNED_ARRAY_16( int16_t, mv, [2],[X264_SCAN8_LUMA_SIZE][2] );
-            ALIGNED_ARRAY_N( uint8_t, bs, [2],[2][8][4] );
+            ALIGNED_ARRAY_32( uint8_t, bs, [2],[2][8][4] );
              memset( bs, 99, sizeof(uint8_t)*2*4*8*2 );
              for( int j = 0; j < X264_SCAN8_SIZE; j++ )
                  nnz[j] = ((rand()&7) == 7) * rand() & 0xf;
@@ -1969,11 +1969,11 @@ static int check_quant( int cpu_ref, int cpu_new )
      x264_quant_function_t qf_c;
      x264_quant_function_t qf_ref;
      x264_quant_function_t qf_a;
-    ALIGNED_ARRAY_N( dctcoef, dct1,[64] );
-    ALIGNED_ARRAY_N( dctcoef, dct2,[64] );
-    ALIGNED_ARRAY_N( dctcoef, dct3,[8],[16] );
-    ALIGNED_ARRAY_N( dctcoef, dct4,[8],[16] );
-    ALIGNED_ARRAY_N( uint8_t, cqm_buf,[64] );
+    ALIGNED_ARRAY_32( dctcoef, dct1,[64] );
+    ALIGNED_ARRAY_32( dctcoef, dct2,[64] );
+    ALIGNED_ARRAY_32( dctcoef, dct3,[8],[16] );
+    ALIGNED_ARRAY_32( dctcoef, dct4,[8],[16] );
+    ALIGNED_ARRAY_32( uint8_t, cqm_buf,[64] );
      int ret = 0, ok, used_asm;
      int oks[3] = {1,1,1}, used_asms[3] = {0,0,0};
      x264_t h_buf;
@@ -2587,7 +2587,7 @@ static int check_cabac( int cpu_ref, int cpu_new )
              {\
                  for( int j = 0; j < 256; j++ )\
                  {\
-                    ALIGNED_ARRAY_N( dctcoef, dct, [2],[64] );\
+                    ALIGNED_ARRAY_32( dctcoef, dct, [2],[64] );\
                      uint8_t bitstream[2][1<<16];\
                      static const uint8_t ctx_ac[14] = {0,1,0,0,1,0,0,1,0,0,0,1,0,0};\
                      int ac = ctx_ac[ctx_block_cat];\
author	Henrik Gramner <henrik@gramner.com>
	Sun, 29 Jan 2017 15:41:33 +0000 (16:41 +0100)
committer	Henrik Gramner <henrik@gramner.com>
	Fri, 19 May 2017 14:12:15 +0000 (16:12 +0200)
common/common.h		patch \| blob \| history
common/macroblock.c		patch \| blob \| history
common/osdep.h		patch \| blob \| history
encoder/analyse.c		patch \| blob \| history
encoder/macroblock.c		patch \| blob \| history
encoder/macroblock.h		patch \| blob \| history
encoder/me.c		patch \| blob \| history
encoder/ratecontrol.c		patch \| blob \| history
encoder/rdo.c		patch \| blob \| history
tools/checkasm.c		patch \| blob \| history