]> granicus.if.org Git - libx264/commitdiff
Before, we eliminated dct blocks containing only a small single coefficient. Now...
authorLoren Merritt <pengvado@videolan.org>
Wed, 19 Apr 2006 09:02:19 +0000 (09:02 +0000)
committerLoren Merritt <pengvado@videolan.org>
Wed, 19 Apr 2006 09:02:19 +0000 (09:02 +0000)
based on a patch by Alex Wright.

git-svn-id: svn://svn.videolan.org/x264/trunk@503 df754926-b1dd-0310-bc7b-ec298dee348c

common/common.c
encoder/macroblock.c
x264.c
x264.h

index e4d5ef69e9c10821873e4bc9ac85f6bed87574bb..f48c50a5a6b5a4064cd21f4eda95bec6a585221c 100644 (file)
@@ -121,6 +121,7 @@ void    x264_param_default( x264_param_t *param )
     param->analyse.i_mv_range = -1; // set from level_idc
     param->analyse.i_chroma_qp_offset = 0;
     param->analyse.b_fast_pskip = 1;
+    param->analyse.b_dct_decimate = 1;
     param->analyse.b_psnr = 1;
 
     param->i_cqm_preset = X264_CQM_FLAT;
@@ -479,6 +480,7 @@ char *x264_param2string( x264_param_t *p, int b_res )
     s += sprintf( s, " chroma_qp_offset=%d", p->analyse.i_chroma_qp_offset );
     s += sprintf( s, " slices=%d", p->i_threads );
     s += sprintf( s, " nr=%d", p->analyse.i_noise_reduction );
+    s += sprintf( s, " decimate=%d", p->analyse.b_dct_decimate );
 
     s += sprintf( s, " bframes=%d", p->i_bframe );
     if( p->i_bframe )
index 2ca0e655d173bcafcc202693a052e6feb8040c11..0888d5d7e1abc86953b978197fc7414deb829a5b 100644 (file)
@@ -297,6 +297,7 @@ static void x264_mb_encode_i16x16( x264_t *h, int i_qscale )
 static void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qscale )
 {
     int i, ch;
+    int b_decimate = b_inter && (h->sh.i_type == SLICE_TYPE_B || h->param.analyse.b_dct_decimate);
 
     for( ch = 0; ch < 2; ch++ )
     {
@@ -331,7 +332,7 @@ static void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qscale )
             quant_4x4( h, dct4x4[i], h->quant4_mf[CQM_4IC + b_inter], i_qscale, !b_inter );
             scan_zigzag_4x4( h->dct.block[16+i+ch*4].residual_ac, dct4x4[i] );
 
-            if( b_inter )
+            if( b_decimate )
             {
                 i_decimate_score += x264_mb_decimate_score( h->dct.block[16+i+ch*4].residual_ac, 15 );
             }
@@ -345,11 +346,13 @@ static void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qscale )
         h->dctf.idct2x2dc( dct2x2 );
         x264_mb_dequant_2x2_dc( dct2x2, h->dequant4_mf[CQM_4IC + b_inter], i_qscale );  /* XXX not inversed */
 
-        if( b_inter && i_decimate_score < 7 )
+        if( b_decimate && i_decimate_score < 7 )
         {
             /* Near null chroma 8x8 block so make it null (bits saving) */
-            memset( dct4x4, 0, sizeof( dct4x4 ) );
             memset( &h->dct.block[16+ch*4], 0, 4 * sizeof( *h->dct.block ) );
+            if( !array_non_zero( (int*)dct2x2, sizeof(dct2x2)/sizeof(int) ) )
+                continue;
+            memset( dct4x4, 0, sizeof( dct4x4 ) );
         }
         else
         {
@@ -357,12 +360,8 @@ static void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qscale )
                 h->quantf.dequant_4x4( dct4x4[i], h->dequant4_mf[CQM_4IC + b_inter], i_qscale );
         }
 
-        /* calculate dct coeffs */
         for( i = 0; i < 4; i++ )
-        {
-            /* copy dc coeff */
             dct4x4[i][0][0] = dct2x2[0][i];
-        }
         h->dctf.add8x8_idct( p_dst, dct4x4 );
     }
 }
@@ -417,6 +416,7 @@ void x264_macroblock_encode( x264_t *h )
 {
     int i_cbp_dc = 0;
     int i_qp = h->mb.i_qp;
+    int b_decimate = h->sh.i_type == SLICE_TYPE_B || h->param.analyse.b_dct_decimate;
     int i;
 
     if( h->mb.i_type == P_SKIP )
@@ -494,6 +494,7 @@ void x264_macroblock_encode( x264_t *h )
         {
             int16_t dct8x8[4][8][8];
             int nnz8x8[4] = {1,1,1,1};
+            b_decimate &= !h->mb.b_trellis; // 8x8 trellis is inherently optimal decimation
             h->dctf.sub16x16_dct8( dct8x8, h->mb.pic.p_fenc[0], h->mb.pic.p_fdec[0] );
 
             for( idx = 0; idx < 4; idx++ )
@@ -507,7 +508,7 @@ void x264_macroblock_encode( x264_t *h )
 
                 scan_zigzag_8x8full( h->dct.luma8x8[idx], dct8x8[idx] );
 
-                if( !h->mb.b_trellis )
+                if( b_decimate )
                 {
                     int i_decimate_8x8 = x264_mb_decimate_score( h->dct.luma8x8[idx], 64 );
                     i_decimate_mb += i_decimate_8x8;
@@ -518,9 +519,11 @@ void x264_macroblock_encode( x264_t *h )
                         nnz8x8[idx] = 0;
                     }
                 }
+                else
+                    nnz8x8[idx] = array_non_zero( (int*)dct8x8[idx], sizeof(*dct8x8)/sizeof(int) );
             }
 
-            if( i_decimate_mb < 6 && !h->mb.b_trellis )
+            if( i_decimate_mb < 6 && b_decimate )
                 memset( h->dct.luma8x8, 0, sizeof( h->dct.luma8x8 ) );
             else
             {
@@ -556,13 +559,14 @@ void x264_macroblock_encode( x264_t *h )
                         quant_4x4( h, dct4x4[idx], h->quant4_mf[CQM_4PY], i_qp, 0 );
 
                     scan_zigzag_4x4full( h->dct.block[idx].luma4x4, dct4x4[idx] );
-
-                    i_decimate_8x8 += x264_mb_decimate_score( h->dct.block[idx].luma4x4, 16 );
+                    
+                    if( b_decimate )
+                        i_decimate_8x8 += x264_mb_decimate_score( h->dct.block[idx].luma4x4, 16 );
                 }
 
                 /* decimate this 8x8 block */
                 i_decimate_mb += i_decimate_8x8;
-                if( i_decimate_8x8 < 4 )
+                if( i_decimate_8x8 < 4 && b_decimate )
                 {
                     memset( &dct4x4[i8x8*4], 0, 4 * sizeof( *dct4x4 ) );
                     memset( &h->dct.block[i8x8*4], 0, 4 * sizeof( *h->dct.block ) );
@@ -570,7 +574,7 @@ void x264_macroblock_encode( x264_t *h )
                 }
             }
 
-            if( i_decimate_mb < 6 )
+            if( i_decimate_mb < 6 && b_decimate )
                 memset( h->dct.block, 0, 16 * sizeof( *h->dct.block ) );
             else
             {
@@ -855,8 +859,8 @@ void x264_macroblock_encode_p8x8( x264_t *h, int i8 )
     int i_qp = h->mb.i_qp;
     uint8_t *p_fenc = h->mb.pic.p_fenc[0] + (i8&1)*8 + (i8>>1)*8*FENC_STRIDE;
     uint8_t *p_fdec = h->mb.pic.p_fdec[0] + (i8&1)*8 + (i8>>1)*8*FDEC_STRIDE;
-    int i_decimate_8x8 = 0;
-    int nnz8x8 = 1;
+    int b_decimate = h->sh.i_type == SLICE_TYPE_B || h->param.analyse.b_dct_decimate;
+    int nnz8x8;
     int ch;
 
     x264_mb_mc_8x8( h, i8 );
@@ -865,16 +869,14 @@ void x264_macroblock_encode_p8x8( x264_t *h, int i8 )
     {
         int16_t dct8x8[8][8];
         h->dctf.sub8x8_dct8( dct8x8, p_fenc, p_fdec );
-
         quant_8x8( h, dct8x8, h->quant8_mf[CQM_8PY], i_qp, 0 );
         scan_zigzag_8x8full( h->dct.luma8x8[i8], dct8x8 );
-        i_decimate_8x8 = x264_mb_decimate_score( h->dct.luma8x8[i8], 64 );
 
-        if( i_decimate_8x8 < 4 )
-        {
-            memset( h->dct.luma8x8[i8], 0, sizeof(h->dct.luma8x8[i8]) );
-            nnz8x8 = 0;
-        }
+        if( b_decimate )
+            nnz8x8 = 4 <= x264_mb_decimate_score( h->dct.luma8x8[i8], 64 );
+        else
+            nnz8x8 = array_non_zero( (int*)dct8x8, sizeof(dct8x8)/sizeof(int) );
+
         if( nnz8x8 )
         {
             h->quantf.dequant_8x8( dct8x8, h->dequant8_mf[CQM_8PY], i_qp );
@@ -883,28 +885,30 @@ void x264_macroblock_encode_p8x8( x264_t *h, int i8 )
     }
     else
     {
-        int i4, idx;
+        int i4;
         int16_t dct4x4[4][4][4];
         h->dctf.sub8x8_dct( dct4x4, p_fenc, p_fdec );
-
+        quant_4x4( h, dct4x4[0], h->quant4_mf[CQM_4PY], i_qp, 0 );
+        quant_4x4( h, dct4x4[1], h->quant4_mf[CQM_4PY], i_qp, 0 );
+        quant_4x4( h, dct4x4[2], h->quant4_mf[CQM_4PY], i_qp, 0 );
+        quant_4x4( h, dct4x4[3], h->quant4_mf[CQM_4PY], i_qp, 0 );
         for( i4 = 0; i4 < 4; i4++ )
-        {
-            idx = i8 * 4 + i4;
-
-            quant_4x4( h, dct4x4[i4], h->quant4_mf[CQM_4PY], i_qp, 0 );
-            scan_zigzag_4x4full( h->dct.block[idx].luma4x4, dct4x4[i4] );
-            i_decimate_8x8 += x264_mb_decimate_score( h->dct.block[idx].luma4x4, 16 );
-        }
+            scan_zigzag_4x4full( h->dct.block[i8*4+i4].luma4x4, dct4x4[i4] );
 
-        if( i_decimate_8x8 < 4 )
+        if( b_decimate )
         {
-            memset( &h->dct.block[i8*4], 0, 4 * sizeof(*h->dct.block) );
-            nnz8x8 = 0;
+            int i_decimate_8x8 = 0;
+            for( i4 = 0; i4 < 4 && i_decimate_8x8 < 4; i4++ )
+                i_decimate_8x8 += x264_mb_decimate_score( h->dct.block[i8*4+i4].luma4x4, 16 );
+            nnz8x8 = 4 <= i_decimate_8x8;
         }
+        else
+            nnz8x8 = array_non_zero( (int*)dct4x4, sizeof(dct4x4)/sizeof(int) );
+
         if( nnz8x8 )
         {
             for( i4 = 0; i4 < 4; i4++ )
-                 h->quantf.dequant_4x4( dct4x4[i4], h->dequant4_mf[CQM_4PY], i_qp );
+                h->quantf.dequant_4x4( dct4x4[i4], h->dequant4_mf[CQM_4PY], i_qp );
             h->dctf.add8x8_idct( p_fdec, dct4x4 );
         }
     }
@@ -920,8 +924,11 @@ void x264_macroblock_encode_p8x8( x264_t *h, int i8 )
         h->dctf.sub4x4_dct( dct4x4, p_fenc, p_fdec );
         quant_4x4( h, dct4x4, h->quant4_mf[CQM_4PC], i_qp, 0 );
         scan_zigzag_4x4( h->dct.block[16+i8+ch*4].residual_ac, dct4x4 );
-        h->quantf.dequant_4x4( dct4x4, h->dequant4_mf[CQM_4PC], i_qp );
-        h->dctf.add4x4_idct( p_fdec, dct4x4 );
+        if( array_non_zero( (int*)dct4x4, sizeof(dct4x4)/sizeof(int) ) )
+        {
+            h->quantf.dequant_4x4( dct4x4, h->dequant4_mf[CQM_4PC], i_qp );
+            h->dctf.add4x4_idct( p_fdec, dct4x4 );
+        }
     }
 
     if( nnz8x8 )
diff --git a/x264.c b/x264.c
index 6f9a4858e70936c7a71c20c60272ff0d82e59455..4d894ad1a960f1a242e30b85f02bf8effa92b157 100644 (file)
--- a/x264.c
+++ b/x264.c
@@ -221,6 +221,7 @@ static void Help( x264_param_t *defaults )
              "                                  - 1: enabled only on the final encode of a MB\n"
              "                                  - 2: enabled on all mode decisions\n"
              "      --no-fast-pskip         Disables early SKIP detection on P-frames\n"
+             "      --no-dct-decimate       Disables coefficient thresholding on P-frames\n"
              "      --nr <integer>          Noise reduction [%d]\n"
              "\n"
              "      --cqm <string>          Preset quant matrices [\"flat\"]\n"
@@ -447,6 +448,7 @@ static int  Parse( int argc, char **argv,
 #define OPT_BIME 318
 #define OPT_NR 319
 #define OPT_THREAD_INPUT 320
+#define OPT_NO_DCT_DECIMATE 321
 
         static struct option long_options[] =
         {
@@ -487,6 +489,7 @@ static int  Parse( int argc, char **argv,
             { "8x8dct",  no_argument,       NULL, '8' },
             { "trellis", required_argument, NULL, 't' },
             { "no-fast-pskip", no_argument, NULL, OPT_NO_FAST_PSKIP },
+            { "no-dct-decimate", no_argument, NULL, OPT_NO_DCT_DECIMATE },
             { "level",   required_argument, NULL, OPT_LEVEL },
             { "ratetol", required_argument, NULL, OPT_RATETOL },
             { "vbv-maxrate", required_argument, NULL, OPT_VBVMAXRATE },
@@ -729,6 +732,9 @@ static int  Parse( int argc, char **argv,
             case OPT_NO_FAST_PSKIP:
                 param->analyse.b_fast_pskip = 0;
                 break;
+            case OPT_NO_DCT_DECIMATE:
+                param->analyse.b_dct_decimate = 0;
+                break;
             case OPT_LEVEL:
                 if( atof(optarg) < 6 )
                     param->i_level_idc = (int)(10*atof(optarg)+.5);
diff --git a/x264.h b/x264.h
index 97bfde9b9c4aacc5a71ccfd41c9f3a7b0519c53f..45c52da4b378c90a117384f610a5200867336aa4 100644 (file)
--- a/x264.h
+++ b/x264.h
@@ -35,7 +35,7 @@
 
 #include <stdarg.h>
 
-#define X264_BUILD 45
+#define X264_BUILD 46
 
 /* x264_t:
  *      opaque handler for decoder and encoder */
@@ -203,6 +203,7 @@ typedef struct
         int          b_mixed_references; /* allow each mb partition in P-frames to have it's own reference number */
         int          i_trellis;  /* trellis RD quantization */
         int          b_fast_pskip; /* early SKIP detection on P-frames */
+        int          b_dct_decimate; /* transform coefficient thresholding on P-frames */
         int          i_noise_reduction; /* adaptive pseudo-deadzone */
 
         int          b_psnr;    /* Do we compute PSNR stats (save a few % of cpu) */