fix some strides that weren't a multiple of 16.

author Loren Merritt <pengvado@videolan.org>

Mon, 2 Oct 2006 02:46:23 +0000 (02:46 +0000)

committer Loren Merritt <pengvado@videolan.org>

Mon, 2 Oct 2006 02:46:23 +0000 (02:46 +0000)
author Loren Merritt <pengvado@videolan.org>
Mon, 2 Oct 2006 02:46:23 +0000 (02:46 +0000)
committer Loren Merritt <pengvado@videolan.org>
Mon, 2 Oct 2006 02:46:23 +0000 (02:46 +0000)
diff --git a/encoder/analyse.c b/encoder/analyse.c

index 8d1c6c0f150139d61496cbbe7a8a7a4c3969a896..2093ed1fe5748f4bd55f886cff05358f0132071d 100644 (file)
--- a/encoder/analyse.c
+++ b/encoder/analyse.c
@@ -1162,15 +1162,15 @@ static void x264_mb_analyse_inter_p8x16( x264_t *h, x264_mb_analysis_t *a )
  
  static int x264_mb_analyse_inter_p4x4_chroma( x264_t *h, x264_mb_analysis_t *a, uint8_t **p_fref, int i8x8, int pixel )
  {
-    DECLARE_ALIGNED( uint8_t, pix1[8*8], 8 );
-    DECLARE_ALIGNED( uint8_t, pix2[8*8], 8 );
+    DECLARE_ALIGNED( uint8_t, pix1[16*8], 8 );
+    uint8_t *pix2 = pix1+8;
      const int i_stride = h->mb.pic.i_stride[1];
      const int or = 4*(i8x8&1) + 2*(i8x8&2)*i_stride;
      const int oe = 4*(i8x8&1) + 2*(i8x8&2)*FENC_STRIDE;
  
  #define CHROMA4x4MC( width, height, me, x, y ) \
-    h->mc.mc_chroma( &p_fref[4][or+x+y*i_stride], i_stride, &pix1[x+y*8], 8, (me).mv[0], (me).mv[1], width, height ); \
-    h->mc.mc_chroma( &p_fref[5][or+x+y*i_stride], i_stride, &pix2[x+y*8], 8, (me).mv[0], (me).mv[1], width, height );
+    h->mc.mc_chroma( &p_fref[4][or+x+y*i_stride], i_stride, &pix1[x+y*16], 16, (me).mv[0], (me).mv[1], width, height ); \
+    h->mc.mc_chroma( &p_fref[5][or+x+y*i_stride], i_stride, &pix2[x+y*16], 16, (me).mv[0], (me).mv[1], width, height );
  
      if( pixel == PIXEL_4x4 )
      {
@@ -1190,8 +1190,8 @@ static int x264_mb_analyse_inter_p4x4_chroma( x264_t *h, x264_mb_analysis_t *a,
          CHROMA4x4MC( 2,4, a->l0.me4x8[i8x8][1], 2,0 );
      }
  
-    return h->pixf.mbcmp[PIXEL_4x4]( &h->mb.pic.p_fenc[1][oe], FENC_STRIDE, pix1, 8 )
-         + h->pixf.mbcmp[PIXEL_4x4]( &h->mb.pic.p_fenc[2][oe], FENC_STRIDE, pix2, 8 );
+    return h->pixf.mbcmp[PIXEL_4x4]( &h->mb.pic.p_fenc[1][oe], FENC_STRIDE, pix1, 16 )
+         + h->pixf.mbcmp[PIXEL_4x4]( &h->mb.pic.p_fenc[2][oe], FENC_STRIDE, pix2, 16 );
  }
  
  static void x264_mb_analyse_inter_p4x4( x264_t *h, x264_mb_analysis_t *a, int i8x8 )
diff --git a/encoder/slicetype_decision.c b/encoder/slicetype_decision.c

index 8a1db9c9ebaac63f5710180f59e257c04ea034ab..e3411b5e4a4cc724fc26b70e3d174d3229588e62 100644 (file)
--- a/encoder/slicetype_decision.c
+++ b/encoder/slicetype_decision.c
@@ -55,7 +55,8 @@ int x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a,
      const int i_stride = fenc->i_stride_lowres;
      const int i_pel_offset = 8 * ( i_mb_x + i_mb_y * i_stride );
  
-    uint8_t pix1[9*9], pix2[8*8];
+    DECLARE_ALIGNED( uint8_t, pix1[9*FDEC_STRIDE], 8 );
+    uint8_t *pix2 = pix1+8;
      x264_me_t m[2];
      int i_bcost = COST_MAX;
      int i_cost_bak;
@@ -104,16 +105,16 @@ int x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a,
      }
  #define TRY_BIDIR( mv0, mv1, penalty ) \
      { \
-        int stride2 = 8; \
+        int stride2 = 16; \
          uint8_t *src2; \
          int i_cost; \
-        h->mc.mc_luma( m[0].p_fref, m[0].i_stride[0], pix1, 8, \
+        h->mc.mc_luma( m[0].p_fref, m[0].i_stride[0], pix1, 16, \
                         (mv0)[0], (mv0)[1], 8, 8 ); \
          src2 = h->mc.get_ref( m[1].p_fref, m[1].i_stride[0], pix2, &stride2, \
                         (mv1)[0], (mv1)[1], 8, 8 ); \
-        h->mc.avg[PIXEL_8x8]( pix1, 8, src2, stride2 ); \
+        h->mc.avg[PIXEL_8x8]( pix1, 16, src2, stride2 ); \
          i_cost = penalty + h->pixf.mbcmp[PIXEL_8x8]( \
-                           m[0].p_fenc[0], FENC_STRIDE, pix1, 8 ); \
+                           m[0].p_fenc[0], FENC_STRIDE, pix1, 16 ); \
          if( i_bcost > i_cost ) \
          { \
              i_bcost = i_cost; \
@@ -196,8 +197,7 @@ int x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a,
  
  lowres_intra_mb:
      {
-        DECLARE_ALIGNED( uint8_t, pix_buf[9*FDEC_STRIDE], 8 );
-        uint8_t *pix = &pix_buf[8+FDEC_STRIDE - 1];
+        uint8_t *pix = &pix1[8+FDEC_STRIDE - 1];
          uint8_t *src = &fenc->lowres[0][i_pel_offset - 1];
          int intra_penalty = 5 + 10 * b_bidir;
          int satds[4], i_icost;
diff --git a/tools/checkasm.c b/tools/checkasm.c

index 888cd14c4bbbffb82d878c43bbc1db2f17aa1e09..cc22f8b12e283ad7b1cdc4ba1d8059bca58f24f5 100644 (file)
--- a/tools/checkasm.c
+++ b/tools/checkasm.c
@@ -56,8 +56,8 @@ static int check_pixel( int cpu_ref, int cpu_new )
          if( pixel_asm.name[i] != pixel_ref.name[i] ) \
          { \
              used_asm = 1; \
-            res_c   = pixel_c.name[i]( buf1, 32, buf2, 24 ); \
-            res_asm = pixel_asm.name[i]( buf1, 32, buf2, 24 ); \
+            res_c   = pixel_c.name[i]( buf1, 32, buf2, 16 ); \
+            res_asm = pixel_asm.name[i]( buf1, 32, buf2, 16 ); \
              if( res_c != res_asm ) \
              { \
                  ok = 0; \
@@ -79,16 +79,16 @@ static int check_pixel( int cpu_ref, int cpu_new )
          if( pixel_asm.sad_x##N[i] && pixel_asm.sad_x##N[i] != pixel_ref.sad_x##N[i] ) \
          { \
              used_asm = 1; \
-            res_c[0] = pixel_c.sad[i]( buf1, 16, buf2, 24 ); \
-            res_c[1] = pixel_c.sad[i]( buf1, 16, buf2+30, 24 ); \
-            res_c[2] = pixel_c.sad[i]( buf1, 16, buf2+1, 24 ); \
+            res_c[0] = pixel_c.sad[i]( buf1, 16, buf2, 32 ); \
+            res_c[1] = pixel_c.sad[i]( buf1, 16, buf2+30, 32 ); \
+            res_c[2] = pixel_c.sad[i]( buf1, 16, buf2+1, 32 ); \
              if(N==4) \
              { \
-                res_c[3] = pixel_c.sad[i]( buf1, 16, buf2+99, 24 ); \
-                pixel_asm.sad_x4[i]( buf1, buf2, buf2+30, buf2+1, buf2+99, 24, res_asm ); \
+                res_c[3] = pixel_c.sad[i]( buf1, 16, buf2+99, 32 ); \
+                pixel_asm.sad_x4[i]( buf1, buf2, buf2+30, buf2+1, buf2+99, 32, res_asm ); \
              } \
              else \
-                pixel_asm.sad_x3[i]( buf1, buf2, buf2+30, buf2+1, 24, res_asm ); \
+                pixel_asm.sad_x3[i]( buf1, buf2, buf2+30, buf2+1, 32, res_asm ); \
              if( memcmp(res_c, res_asm, sizeof(res_c)) ) \
              { \
                  ok = 0; \
@@ -387,8 +387,8 @@ static int check_mc( int cpu_ref, int cpu_new )
          if( mc_a.name[i] != mc_ref.name[i] ) \
          { \
              used_asm = 1; \
-            mc_c.name[i]( buf3, 32, buf2, 24, ##__VA_ARGS__ ); \
-            mc_a.name[i]( buf4, 32, buf2, 24, ##__VA_ARGS__ ); \
+            mc_c.name[i]( buf3, 32, buf2, 16, ##__VA_ARGS__ ); \
+            mc_a.name[i]( buf4, 32, buf2, 16, ##__VA_ARGS__ ); \
              if( memcmp( buf3, buf4, 1024 ) )               \
              { \
                  ok = 0; \
author	Loren Merritt <pengvado@videolan.org>
	Mon, 2 Oct 2006 02:46:23 +0000 (02:46 +0000)
committer	Loren Merritt <pengvado@videolan.org>
	Mon, 2 Oct 2006 02:46:23 +0000 (02:46 +0000)
encoder/analyse.c		patch \| blob \| history
encoder/slicetype_decision.c		patch \| blob \| history
tools/checkasm.c		patch \| blob \| history