Extend trellis to support luma/chroma DC and chroma AC

author Fiona Glaser <fiona@x264.com>

Thu, 16 Oct 2008 10:17:53 +0000 (03:17 -0700)

committer Fiona Glaser <fiona@x264.com>

Sat, 18 Oct 2008 10:40:46 +0000 (03:40 -0700)
author Fiona Glaser <fiona@x264.com>
Thu, 16 Oct 2008 10:17:53 +0000 (03:17 -0700)
committer Fiona Glaser <fiona@x264.com>
Sat, 18 Oct 2008 10:40:46 +0000 (03:40 -0700)
diff --git a/encoder/macroblock.c b/encoder/macroblock.c

index 76af071360a0d39c3d1a2d953c9f06ee6146213e..0562153dc8984cd985960a75b26b59aafbe7b942 100644 (file)
--- a/encoder/macroblock.c
+++ b/encoder/macroblock.c
@@ -187,7 +187,10 @@ static void x264_mb_encode_i16x16( x264_t *h, int i_qp )
      }
  
      h->dctf.dct4x4dc( dct_dc4x4 );
-    h->quantf.quant_4x4_dc( dct_dc4x4, h->quant4_mf[CQM_4IY][i_qp][0]>>1, h->quant4_bias[CQM_4IY][i_qp][0]<<1 );
+    if( h->mb.b_trellis )
+        x264_quant_dc_trellis( h, (int16_t*)dct_dc4x4, CQM_4IY, i_qp, DCT_LUMA_DC, 1);
+    else
+        h->quantf.quant_4x4_dc( dct_dc4x4, h->quant4_mf[CQM_4IY][i_qp][0]>>1, h->quant4_bias[CQM_4IY][i_qp][0]<<1 );
      h->zigzagf.scan_4x4( h->dct.luma16x16_dc, dct_dc4x4 );
  
      /* output samples to fdec */
@@ -239,8 +242,10 @@ void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qp )
              dct2x2[i>>1][i&1] = dct4x4[i][0][0];
              dct4x4[i][0][0] = 0;
  
-            /* no trellis; it doesn't seem to help chroma noticeably */
-            h->quantf.quant_4x4( dct4x4[i], h->quant4_mf[CQM_4IC+b_inter][i_qp], h->quant4_bias[CQM_4IC+b_inter][i_qp] );
+            if( h->mb.b_trellis )
+                x264_quant_4x4_trellis( h, dct4x4[i], CQM_4IC+b_inter, i_qp, DCT_CHROMA_AC, !b_inter, 0 );
+            else
+                h->quantf.quant_4x4( dct4x4[i], h->quant4_mf[CQM_4IC+b_inter][i_qp], h->quant4_bias[CQM_4IC+b_inter][i_qp] );
              h->zigzagf.scan_4x4( h->dct.luma4x4[16+i+ch*4], dct4x4[i] );
  
              if( b_decimate )
@@ -248,7 +253,10 @@ void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qp )
          }
  
          h->dctf.dct2x2dc( dct2x2 );
-        h->quantf.quant_2x2_dc( dct2x2, h->quant4_mf[CQM_4IC+b_inter][i_qp][0]>>1, h->quant4_bias[CQM_4IC+b_inter][i_qp][0]<<1 );
+        if( h->mb.b_trellis )
+            x264_quant_dc_trellis( h, (int16_t*)dct2x2, CQM_4IC+b_inter, i_qp, DCT_CHROMA_DC, !b_inter );
+        else
+            h->quantf.quant_2x2_dc( dct2x2, h->quant4_mf[CQM_4IC+b_inter][i_qp][0]>>1, h->quant4_bias[CQM_4IC+b_inter][i_qp][0]<<1 );
          zigzag_scan_2x2_dc( h->dct.chroma_dc[ch], dct2x2 );
  
          /* output samples to fdec */
@@ -937,9 +945,14 @@ void x264_macroblock_encode_p8x8( x264_t *h, int i8 )
              p_fdec = h->mb.pic.p_fdec[1+ch] + (i8&1)*4 + (i8>>1)*4*FDEC_STRIDE;
  
              h->dctf.sub4x4_dct( dct4x4, p_fenc, p_fdec );
-            h->quantf.quant_4x4( dct4x4, h->quant4_mf[CQM_4PC][i_qp], h->quant4_bias[CQM_4PC][i_qp] );
+            dct4x4[0][0] = 0;
+
+            if( h->mb.b_trellis )
+                x264_quant_4x4_trellis( h, dct4x4, CQM_4PC, i_qp, DCT_CHROMA_AC, 0, 0 );
+            else
+                h->quantf.quant_4x4( dct4x4, h->quant4_mf[CQM_4PC][i_qp], h->quant4_bias[CQM_4PC][i_qp] );
+
              h->zigzagf.scan_4x4( h->dct.luma4x4[16+i8+ch*4], dct4x4 );
-            h->dct.luma4x4[16+i8+ch*4][0] = 0;
              if( array_non_zero( dct4x4 ) )
              {
                  h->quantf.dequant_4x4( dct4x4, h->dequant4_mf[CQM_4PC], i_qp );
diff --git a/encoder/macroblock.h b/encoder/macroblock.h

index 9fda183de3df1789b3c75ff31054a2af39a498fd..b25509a895f349a329159072084ba7c0e44be5d4 100644 (file)
--- a/encoder/macroblock.h
+++ b/encoder/macroblock.h
@@ -54,6 +54,8 @@ void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qp );
  
  void x264_cabac_mb_skip( x264_t *h, int b_skip );
  
+void x264_quant_dc_trellis( x264_t *h, int16_t *dct, int i_quant_cat,
+                             int i_qp, int i_ctxBlockCat, int b_intra );
  void x264_quant_4x4_trellis( x264_t *h, int16_t dct[4][4], int i_quant_cat,
                               int i_qp, int i_ctxBlockCat, int b_intra, int idx );
  void x264_quant_8x8_trellis( x264_t *h, int16_t dct[8][8], int i_quant_cat,
diff --git a/encoder/rdo.c b/encoder/rdo.c

index c56b6e090339fdc8e0b9d7cfd0881793f01403bc..dbdcbe8f05f9af62a4effeff5159876aa7922436 100644 (file)
--- a/encoder/rdo.c
+++ b/encoder/rdo.c
@@ -333,7 +333,6 @@ typedef struct {
  } trellis_node_t;
  
  // TODO:
-// support chroma and i16x16 DC
  // save cabac state between blocks?
  // use trellis' RD score instead of x264_mb_decimate_score?
  // code 8x8 sig/last flags forwards with deadzone and save the contexts at
@@ -353,10 +352,10 @@ typedef struct {
  // comparable to the input. so unquant is the direct inverse of quant,
  // and uses the dct scaling factors, not the idct ones.
  
-static inline void quant_trellis_cabac( x264_t *h, int16_t *dct,
+static ALWAYS_INLINE void quant_trellis_cabac( x264_t *h, int16_t *dct,
                                   const uint16_t *quant_mf, const int *unquant_mf,
                                   const int *coef_weight, const uint8_t *zigzag,
-                                 int i_ctxBlockCat, int i_lambda2, int b_ac, int i_coefs, int idx )
+                                 int i_ctxBlockCat, int i_lambda2, int b_ac, int dc, int i_coefs, int idx )
  {
      int abs_coefs[64], signs[64];
      trellis_node_t nodes[2][8];
@@ -381,7 +380,7 @@ static inline void quant_trellis_cabac( x264_t *h, int16_t *dct,
  
      /* init coefs */
      for( i = i_coefs-1; i >= b_ac; i-- )
-        if( (unsigned)(dct[zigzag[i]] * quant_mf[zigzag[i]] + f-1) >= 2*f )
+        if( (unsigned)(dct[zigzag[i]] * (dc?quant_mf[0]>>1:quant_mf[zigzag[i]]) + f-1) >= 2*f )
              break;
  
      if( i < b_ac )
@@ -425,17 +424,22 @@ static inline void quant_trellis_cabac( x264_t *h, int16_t *dct,
              cabac_state_last[i] = ctx_last[ last_coeff_flag_offset_8x8[i] ];
          }
      }
-    else
+    else if( !dc || i_ctxBlockCat != DCT_CHROMA_DC )
      {
          memcpy( cabac_state_sig,  &h->cabac.state[ significant_coeff_flag_offset[b_interlaced][i_ctxBlockCat] ], 15 );
          memcpy( cabac_state_last, &h->cabac.state[ last_coeff_flag_offset[b_interlaced][i_ctxBlockCat] ], 15 );
      }
+    else
+    {
+        memcpy( cabac_state_sig,  &h->cabac.state[ significant_coeff_flag_offset[b_interlaced][i_ctxBlockCat] ], 3 );
+        memcpy( cabac_state_last, &h->cabac.state[ last_coeff_flag_offset[b_interlaced][i_ctxBlockCat] ], 3 );
+    }
      memcpy( nodes_cur[0].cabac_state, &h->cabac.state[ coeff_abs_level_m1_offset[i_ctxBlockCat] ], 10 );
  
      for( i = i_last_nnz; i >= b_ac; i-- )
      {
          int i_coef = abs_coefs[i];
-        int q = ( f + i_coef * quant_mf[zigzag[i]] ) >> 16;
+        int q = ( f + i_coef * (dc?quant_mf[0]>>1:quant_mf[zigzag[i]]) ) >> 16;
          int abs_level;
          int cost_sig[2], cost_last[2];
          trellis_node_t n;
@@ -488,11 +492,11 @@ static inline void quant_trellis_cabac( x264_t *h, int16_t *dct,
          // that are better left coded, especially at QP > 40.
          for( abs_level = q; abs_level >= q-1; abs_level-- )
          {
-            int unquant_abs_level = ((unquant_mf[zigzag[i]] * abs_level + 128) >> 8);
+            int unquant_abs_level = (((dc?unquant_mf[0]<<1:unquant_mf[zigzag[i]]) * abs_level + 128) >> 8);
              int d = i_coef - unquant_abs_level;
              int64_t ssd;
              /* Psy trellis: bias in favor of higher AC coefficients in the reconstructed frame. */
-            if( h->mb.i_psy_trellis && i )
+            if( h->mb.i_psy_trellis && i && !dc && i_ctxBlockCat != DCT_CHROMA_AC )
              {
                  int orig_coef = (i_coefs == 64) ? h->mb.pic.fenc_dct8[idx][i] : h->mb.pic.fenc_dct4[idx][i];
                  int predicted_coef = orig_coef - i_coef * signs[i];
@@ -501,7 +505,8 @@ static inline void quant_trellis_cabac( x264_t *h, int16_t *dct,
                  ssd = (int64_t)d*d * coef_weight[i] - psy_weight * psy_value;
              }
              else
-                ssd = (int64_t)d*d * coef_weight[i];
+            /* FIXME: for i16x16 dc is this weight optimal? */
+                ssd = (int64_t)d*d * (dc?256:coef_weight[i]);
  
              for( j = 0; j < 8; j++ )
              {
@@ -563,19 +568,28 @@ static inline void quant_trellis_cabac( x264_t *h, int16_t *dct,
      }
  }
  
+const static uint8_t x264_zigzag_scan2[4] = {0,1,2,3};
+
+void x264_quant_dc_trellis( x264_t *h, int16_t *dct, int i_quant_cat,
+                            int i_qp, int i_ctxBlockCat, int b_intra )
+{
+    quant_trellis_cabac( h, (int16_t*)dct,
+        h->quant4_mf[i_quant_cat][i_qp], h->unquant4_mf[i_quant_cat][i_qp],
+        NULL, i_ctxBlockCat==DCT_CHROMA_DC ? x264_zigzag_scan2 : x264_zigzag_scan4[h->mb.b_interlaced],
+        i_ctxBlockCat, lambda2_tab[b_intra][i_qp], 0, 1, i_ctxBlockCat==DCT_CHROMA_DC ? 4 : 16, 0 );
+}
  
  void x264_quant_4x4_trellis( x264_t *h, int16_t dct[4][4], int i_quant_cat,
                               int i_qp, int i_ctxBlockCat, int b_intra, int idx )
  {
-    int b_ac = (i_ctxBlockCat == DCT_LUMA_AC);
+    int b_ac = (i_ctxBlockCat == DCT_LUMA_AC || i_ctxBlockCat == DCT_CHROMA_AC);
      quant_trellis_cabac( h, (int16_t*)dct,
          h->quant4_mf[i_quant_cat][i_qp], h->unquant4_mf[i_quant_cat][i_qp],
          x264_dct4_weight2_zigzag[h->mb.b_interlaced],
          x264_zigzag_scan4[h->mb.b_interlaced],
-        i_ctxBlockCat, lambda2_tab[b_intra][i_qp], b_ac, 16, idx );
+        i_ctxBlockCat, lambda2_tab[b_intra][i_qp], b_ac, 0, 16, idx );
  }
  
-
  void x264_quant_8x8_trellis( x264_t *h, int16_t dct[8][8], int i_quant_cat,
                               int i_qp, int b_intra, int idx )
  {
@@ -583,6 +597,6 @@ void x264_quant_8x8_trellis( x264_t *h, int16_t dct[8][8], int i_quant_cat,
          h->quant8_mf[i_quant_cat][i_qp], h->unquant8_mf[i_quant_cat][i_qp],
          x264_dct8_weight2_zigzag[h->mb.b_interlaced],
          x264_zigzag_scan8[h->mb.b_interlaced],
-        DCT_LUMA_8x8, lambda2_tab[b_intra][i_qp], 0, 64, idx );
+        DCT_LUMA_8x8, lambda2_tab[b_intra][i_qp], 0, 0, 64, idx );
  }
author	Fiona Glaser <fiona@x264.com>
	Thu, 16 Oct 2008 10:17:53 +0000 (03:17 -0700)
committer	Fiona Glaser <fiona@x264.com>
	Sat, 18 Oct 2008 10:40:46 +0000 (03:40 -0700)
encoder/macroblock.c		patch \| blob \| history
encoder/macroblock.h		patch \| blob \| history
encoder/rdo.c		patch \| blob \| history