From: Fiona Glaser <fiona@x264.com>
Date: Thu, 23 Jul 2009 19:20:39 +0000 (-0700)
Subject: Add QPRD support as subme=10
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=4304c427fd6419b205c42aa139bfd8cebbdf60bf;p=libx264

Add QPRD support as subme=10
Refactor trellis lambda selection to be done in analyse_init instead of in trellis.
This will allow for more easy adaption of lambda later on; for now it allows constant lambda across variable QPs.
QPRD is only available with adaptive quantization enabled and generally improves SSIM and visual quality.
Additionally, weight the SSD values from RD based on the relative QP offset for chroma; helps visually at high QPs where chroma has a lower QP than luma.
This fixes some visual artifacts created by QPRD at high QPs.
Note that this generally hurts PSNR and SSIM, and so is only on when psy-RD is on.
---

diff --git a/Makefile b/Makefile
index a4cc170d..e0656118 100644
--- a/Makefile
+++ b/Makefile
@@ -110,7 +110,7 @@ OPT1 = --crf 16 -b2 -m3 -r3 --me hex --no-8x8dct --direct spatial --no-dct-decim
 OPT2 = --crf 26 -b4 -m5 -r2 --me hex --cqm jvt --nr 100 --psnr --no-mixed-refs --b-adapt 2
 OPT3 = --crf 18 -b3 -m9 -r5 --me umh -t1 -A all --b-pyramid --direct auto --no-fast-pskip
 OPT4 = --crf 22 -b3 -m7 -r4 --me esa -t2 -A all --psy-rd 1.0:1.0
-OPT5 = --frames 50 --crf 24 -b3 -m9 -r3 --me tesa -t1
+OPT5 = --frames 50 --crf 24 -b3 -m10 -r3 --me tesa -t1
 OPT6 = --frames 50 -q0 -m9 -r2 --me hex -Aall
 OPT7 = --frames 50 -q0 -m2 -r1 --me hex --no-cabac
 
diff --git a/common/common.h b/common/common.h
index 1e46ae8e..8a25a130 100644
--- a/common/common.h
+++ b/common/common.h
@@ -554,6 +554,11 @@ struct x264_t
         int     b_direct_auto_read; /* take stats for --direct auto from the 2pass log */
         int     b_direct_auto_write; /* analyse direct modes, to use and/or save */
 
+        /* lambda values */
+        int     i_trellis_lambda2[2][2]; /* [luma,chroma][inter,intra] */
+        int     i_psy_rd_lambda;
+        int     i_chroma_lambda2_offset;
+
         /* B_direct and weighted prediction */
         int16_t dist_scale_factor[16][2];
         int16_t bipred_weight[32][4];
diff --git a/encoder/analyse.c b/encoder/analyse.c
index afa61c6b..dc75fb16 100644
--- a/encoder/analyse.c
+++ b/encoder/analyse.c
@@ -156,6 +156,41 @@ const int x264_lambda2_tab[52] = {
 943718, 1189010, 1498059, 1887436                                  /* 48 - 51 */
 };
 
+// should the intra and inter lambdas be different?
+// I'm just matching the behaviour of deadzone quant.
+static const int x264_trellis_lambda2_tab[2][52] = {
+    // inter lambda = .85 * .85 * 2**(qp/3. + 10 - LAMBDA_BITS)
+    {    46,      58,      73,      92,     117,     147,
+        185,     233,     294,     370,     466,     587,
+        740,     932,    1174,    1480,    1864,    2349,
+       2959,    3728,    4697,    5918,    7457,    9395,
+      11837,   14914,   18790,   23674,   29828,   37581,
+      47349,   59656,   75163,   94699,  119313,  150326,
+     189399,  238627,  300652,  378798,  477255,  601304,
+     757596,  954511, 1202608, 1515192, 1909022, 2405217,
+    3030384, 3818045, 4810435, 6060769 },
+    // intra lambda = .65 * .65 * 2**(qp/3. + 10 - LAMBDA_BITS)
+    {    27,      34,      43,      54,      68,      86,
+        108,     136,     172,     216,     273,     343,
+        433,     545,     687,     865,    1090,    1374,
+       1731,    2180,    2747,    3461,    4361,    5494,
+       6922,    8721,   10988,   13844,   17442,   21976,
+      27688,   34885,   43953,   55377,   69771,   87906,
+     110755,  139543,  175813,  221511,  279087,  351627,
+     443023,  558174,  703255,  886046, 1116348, 1406511,
+    1772093, 2232697, 2813022, 3544186 }
+};
+
+static const uint16_t x264_chroma_lambda2_offset_tab[] = {
+       16,    20,    25,    32,    40,    50,
+       64,    80,   101,   128,   161,   203,
+      256,   322,   406,   512,   645,   812,
+     1024,  1290,  1625,  2048,  2580,  3250,
+     4096,  5160,  6501,  8192, 10321, 13003,
+    16384, 20642, 26007, 32768, 41285, 52015,
+    65535
+};
+
 /* TODO: calculate CABAC costs */
 static const int i_mb_b_cost_table[X264_MBTYPE_MAX] = {
     9, 9, 9, 9, 0, 0, 0, 1, 3, 7, 7, 7, 3, 7, 7, 7, 5, 9, 0
@@ -219,19 +254,36 @@ static void x264_mb_analyse_load_costs( x264_t *h, x264_mb_analysis_t *a )
 static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int i_qp )
 {
     int i = h->param.analyse.i_subpel_refine - (h->sh.i_type == SLICE_TYPE_B);
+
     /* mbrd == 1 -> RD mode decision */
     /* mbrd == 2 -> RD refinement */
-    a->i_mbrd = (i>=6) + (i>=8);
+    /* mbrd == 3 -> QPRD */
+    a->i_mbrd = (i>=6) + (i>=8) + (h->param.analyse.i_subpel_refine>=10);
+
     /* conduct the analysis using this lamda and QP */
     a->i_qp = h->mb.i_qp = i_qp;
     h->mb.i_chroma_qp = h->chroma_qp_table[i_qp];
+
     a->i_lambda = x264_lambda_tab[i_qp];
     a->i_lambda2 = x264_lambda2_tab[i_qp];
+
+    h->mb.b_trellis = h->param.analyse.i_trellis > 1 && a->i_mbrd;
+    if( h->mb.b_trellis )
+    {
+        h->mb.i_trellis_lambda2[0][0] = x264_trellis_lambda2_tab[0][h->mb.i_qp];
+        h->mb.i_trellis_lambda2[0][1] = x264_trellis_lambda2_tab[1][h->mb.i_qp];
+        h->mb.i_trellis_lambda2[1][0] = x264_trellis_lambda2_tab[0][h->mb.i_chroma_qp];
+        h->mb.i_trellis_lambda2[1][1] = x264_trellis_lambda2_tab[1][h->mb.i_chroma_qp];
+    }
+    h->mb.i_psy_rd_lambda = a->i_lambda;
+    /* Adjusting chroma lambda based on QP offset hurts PSNR, so we'll leave it as part of psy-RD. */
+    h->mb.i_chroma_lambda2_offset = h->mb.i_psy_rd ? x264_chroma_lambda2_offset_tab[h->mb.i_qp-h->mb.i_chroma_qp+12] : 256;
+
     h->mb.i_me_method = h->param.analyse.i_me_method;
     h->mb.i_subpel_refine = h->param.analyse.i_subpel_refine;
     h->mb.b_chroma_me = h->param.analyse.b_chroma_me && h->sh.i_type == SLICE_TYPE_P
                         && h->mb.i_subpel_refine >= 5;
-    h->mb.b_trellis = h->param.analyse.i_trellis > 1 && a->i_mbrd;
+
     h->mb.b_transform_8x8 = 0;
     h->mb.b_noise_reduction = 0;
 
@@ -2123,7 +2175,7 @@ static inline void x264_mb_analyse_transform_rd( x264_t *h, x264_mb_analysis_t *
     {
         int i_rd8;
         x264_analyse_update_cache( h, a );
-        h->mb.b_transform_8x8 = !h->mb.b_transform_8x8;
+        h->mb.b_transform_8x8 ^= 1;
         /* FIXME only luma is needed, but the score for comparison already includes chroma */
         i_rd8 = x264_rd_cost_mb( h, a->i_lambda2 );
 
@@ -2134,10 +2186,70 @@ static inline void x264_mb_analyse_transform_rd( x264_t *h, x264_mb_analysis_t *
             *i_rd = i_rd8;
         }
         else
-            h->mb.b_transform_8x8 = !h->mb.b_transform_8x8;
+            h->mb.b_transform_8x8 ^= 1;
     }
 }
 
+/* Rate-distortion optimal QP selection.
+ * FIXME: More than half of the benefit of this function seems to be
+ * in the way it improves the coding of chroma DC (by decimating or
+ * finding a better way to code a single DC coefficient.)
+ * There must be a more efficient way to get that portion of the benefit
+ * without doing full QP-RD, but RD-decimation doesn't seem to do the
+ * trick. */
+static inline void x264_mb_analyse_qp_rd( x264_t *h, x264_mb_analysis_t *a )
+{
+    int bcost, cost, direction, failures, prevcost, origcost;
+    int orig_qp = h->mb.i_qp, bqp = h->mb.i_qp;
+    origcost = bcost = x264_rd_cost_mb( h, a->i_lambda2 );
+
+    /* If CBP is already zero, don't raise the quantizer any higher. */
+    for( direction = h->mb.cbp[h->mb.i_mb_xy] ? 1 : -1; direction >= -1; direction-=2 )
+    {
+        h->mb.i_qp = orig_qp;
+        failures = 0;
+        prevcost = origcost;
+        while( h->mb.i_qp > 0 && h->mb.i_qp < 51 )
+        {
+            h->mb.i_qp += direction;
+            h->mb.i_chroma_qp = h->chroma_qp_table[h->mb.i_qp];
+            cost = x264_rd_cost_mb( h, a->i_lambda2 );
+            COPY2_IF_LT( bcost, cost, bqp, h->mb.i_qp );
+
+            /* We can't assume that the costs are monotonic over QPs.
+             * Tie case-as-failure seems to give better results. */
+            if( cost < prevcost )
+                failures = 0;
+            else
+                failures++;
+            prevcost = cost;
+
+            /* Without psy-RD, require monotonicity when lowering
+             * quant, allow 1 failure when raising quant.
+             * With psy-RD, allow 1 failure when lowering quant,
+             * allow 2 failures when raising quant.
+             * Psy-RD generally seems to result in more chaotic
+             * RD score-vs-quantizer curves. */
+            if( failures > ((direction + 1)>>1)+(!!h->mb.i_psy_rd) )
+                break;
+            if( direction == 1 && !h->mb.cbp[h->mb.i_mb_xy] )
+                break;
+        }
+    }
+
+    h->mb.i_qp = bqp;
+    h->mb.i_chroma_qp = h->chroma_qp_table[h->mb.i_qp];
+
+    /* Check transform again; decision from before may no longer be optimal. */
+    if( h->mb.i_qp != orig_qp && x264_mb_transform_8x8_allowed( h ) &&
+        h->param.analyse.b_transform_8x8 )
+    {
+        h->mb.b_transform_8x8 ^= 1;
+        cost = x264_rd_cost_mb( h, a->i_lambda2 );
+        if( cost > bcost )
+            h->mb.b_transform_8x8 ^= 1;
+    }
+}
 
 /*****************************************************************************
  * x264_macroblock_analyse:
@@ -2150,7 +2262,13 @@ void x264_macroblock_analyse( x264_t *h )
 
     h->mb.i_qp = x264_ratecontrol_qp( h );
     if( h->param.rc.i_aq_mode )
+    {
         x264_adaptive_quant( h );
+        /* If the QP of this MB is within 1 of the previous MB, code the same QP as the previous MB,
+         * to lower the bit cost of the qp_delta.  Don't do this if QPRD is enabled. */
+        if( analysis.i_mbrd < 3 && abs(h->mb.i_qp - h->mb.i_last_qp) == 1 )
+            h->mb.i_qp = h->mb.i_last_qp;
+    }
 
     x264_mb_analyse_init( h, &analysis, h->mb.i_qp );
 
@@ -2745,6 +2863,9 @@ void x264_macroblock_analyse( x264_t *h )
     if( !analysis.i_mbrd )
         x264_mb_analyse_transform( h );
 
+    if( analysis.i_mbrd == 3 && !IS_SKIP(h->mb.i_type) )
+        x264_mb_analyse_qp_rd( h, &analysis );
+
     h->mb.b_trellis = h->param.analyse.i_trellis;
     h->mb.b_noise_reduction = !!h->param.analyse.i_noise_reduction;
     if( !IS_SKIP(h->mb.i_type) && h->mb.i_psy_trellis && h->param.analyse.i_trellis == 1 )
diff --git a/encoder/encoder.c b/encoder/encoder.c
index ac13260b..5005db73 100644
--- a/encoder/encoder.c
+++ b/encoder/encoder.c
@@ -497,7 +497,7 @@ static int x264_validate_parameters( x264_t *h )
     if( h->param.analyse.i_me_method == X264_ME_TESA &&
         (h->mb.b_lossless || h->param.analyse.i_subpel_refine <= 1) )
         h->param.analyse.i_me_method = X264_ME_ESA;
-    h->param.analyse.i_subpel_refine = x264_clip3( h->param.analyse.i_subpel_refine, 0, 9 );
+    h->param.analyse.i_subpel_refine = x264_clip3( h->param.analyse.i_subpel_refine, 0, 10 );
     h->param.analyse.b_mixed_references = h->param.analyse.b_mixed_references && h->param.i_frame_reference > 1;
     h->param.analyse.inter &= X264_ANALYSE_PSUB16x16|X264_ANALYSE_PSUB8x8|X264_ANALYSE_BSUB16x16|
                               X264_ANALYSE_I4x4|X264_ANALYSE_I8x8;
@@ -538,6 +538,8 @@ static int x264_validate_parameters( x264_t *h )
     if( h->param.rc.f_aq_strength == 0 )
         h->param.rc.i_aq_mode = 0;
     h->param.analyse.i_noise_reduction = x264_clip3( h->param.analyse.i_noise_reduction, 0, 1<<16 );
+    if( h->param.analyse.i_subpel_refine == 10 && (h->param.analyse.i_trellis != 2 || !h->param.rc.i_aq_mode) )
+        h->param.analyse.i_subpel_refine = 9;
 
     {
         const x264_level_t *l = x264_levels;
diff --git a/encoder/macroblock.c b/encoder/macroblock.c
index 459737b0..7e3f91f7 100644
--- a/encoder/macroblock.c
+++ b/encoder/macroblock.c
@@ -100,7 +100,7 @@ static ALWAYS_INLINE int x264_quant_4x4( x264_t *h, int16_t dct[4][4], int i_qp,
 {
     int i_quant_cat = b_intra ? CQM_4IY : CQM_4PY;
     if( h->mb.b_trellis )
-        return x264_quant_4x4_trellis( h, dct, i_quant_cat, i_qp, i_ctxBlockCat, b_intra, idx );
+        return x264_quant_4x4_trellis( h, dct, i_quant_cat, i_qp, i_ctxBlockCat, b_intra, 0, idx );
     else
         return h->quantf.quant_4x4( dct, h->quant4_mf[i_quant_cat][i_qp], h->quant4_bias[i_quant_cat][i_qp] );
 }
@@ -252,7 +252,7 @@ static void x264_mb_encode_i16x16( x264_t *h, int i_qp )
 
     h->dctf.dct4x4dc( dct_dc4x4 );
     if( h->mb.b_trellis )
-        nz = x264_quant_dc_trellis( h, (int16_t*)dct_dc4x4, CQM_4IY, i_qp, DCT_LUMA_DC, 1);
+        nz = x264_quant_dc_trellis( h, (int16_t*)dct_dc4x4, CQM_4IY, i_qp, DCT_LUMA_DC, 1, 0 );
     else
         nz = h->quantf.quant_4x4_dc( dct_dc4x4, h->quant4_mf[CQM_4IY][i_qp][0]>>1, h->quant4_bias[CQM_4IY][i_qp][0]<<1 );
 
@@ -311,7 +311,7 @@ void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qp )
                     h->dctf.sub8x8_dct_dc( dct2x2, h->mb.pic.p_fenc[1+ch], h->mb.pic.p_fdec[1+ch] );
                     dct2x2dc_dconly( dct2x2 );
                     if( h->mb.b_trellis )
-                        nz_dc = x264_quant_dc_trellis( h, (int16_t*)dct2x2, CQM_4IC+b_inter, i_qp, DCT_CHROMA_DC, !b_inter );
+                        nz_dc = x264_quant_dc_trellis( h, (int16_t*)dct2x2, CQM_4IC+b_inter, i_qp, DCT_CHROMA_DC, !b_inter, 1 );
                     else
                         nz_dc = h->quantf.quant_2x2_dc( dct2x2, h->quant4_mf[CQM_4IC+b_inter][i_qp][0]>>1, h->quant4_bias[CQM_4IC+b_inter][i_qp][0]<<
     1 );
@@ -358,7 +358,7 @@ void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qp )
         for( i = 0; i < 4; i++ )
         {
             if( h->mb.b_trellis )
-                nz = x264_quant_4x4_trellis( h, dct4x4[i], CQM_4IC+b_inter, i_qp, DCT_CHROMA_AC, !b_inter, 0 );
+                nz = x264_quant_4x4_trellis( h, dct4x4[i], CQM_4IC+b_inter, i_qp, DCT_CHROMA_AC, !b_inter, 1, 0 );
             else
                 nz = h->quantf.quant_4x4( dct4x4[i], h->quant4_mf[CQM_4IC+b_inter][i_qp], h->quant4_bias[CQM_4IC+b_inter][i_qp] );
             h->mb.cache.non_zero_count[x264_scan8[16+i+ch*4]] = nz;
@@ -373,7 +373,7 @@ void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qp )
         }
 
         if( h->mb.b_trellis )
-            nz_dc = x264_quant_dc_trellis( h, (int16_t*)dct2x2, CQM_4IC+b_inter, i_qp, DCT_CHROMA_DC, !b_inter );
+            nz_dc = x264_quant_dc_trellis( h, (int16_t*)dct2x2, CQM_4IC+b_inter, i_qp, DCT_CHROMA_DC, !b_inter, 1 );
         else
             nz_dc = h->quantf.quant_2x2_dc( dct2x2, h->quant4_mf[CQM_4IC+b_inter][i_qp][0]>>1, h->quant4_bias[CQM_4IC+b_inter][i_qp][0]<<1 );
 
@@ -1074,7 +1074,7 @@ void x264_macroblock_encode_p8x8( x264_t *h, int i8 )
             dct4x4[0][0] = 0;
 
             if( h->mb.b_trellis )
-                nz = x264_quant_4x4_trellis( h, dct4x4, CQM_4PC, i_qp, DCT_CHROMA_AC, 0, 0 );
+                nz = x264_quant_4x4_trellis( h, dct4x4, CQM_4PC, i_qp, DCT_CHROMA_AC, 0, 1, 0 );
             else
                 nz = h->quantf.quant_4x4( dct4x4, h->quant4_mf[CQM_4PC][i_qp], h->quant4_bias[CQM_4PC][i_qp] );
 
diff --git a/encoder/macroblock.h b/encoder/macroblock.h
index 7b9f08a3..94545779 100644
--- a/encoder/macroblock.h
+++ b/encoder/macroblock.h
@@ -56,9 +56,9 @@ void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qp );
 void x264_cabac_mb_skip( x264_t *h, int b_skip );
 
 int x264_quant_dc_trellis( x264_t *h, int16_t *dct, int i_quant_cat,
-                             int i_qp, int i_ctxBlockCat, int b_intra );
+                             int i_qp, int i_ctxBlockCat, int b_intra, int b_chroma );
 int x264_quant_4x4_trellis( x264_t *h, int16_t dct[4][4], int i_quant_cat,
-                             int i_qp, int i_ctxBlockCat, int b_intra, int idx );
+                             int i_qp, int i_ctxBlockCat, int b_intra, int b_chroma, int idx );
 int x264_quant_8x8_trellis( x264_t *h, int16_t dct[8][8], int i_quant_cat,
                              int i_qp, int b_intra, int idx );
 
diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c
index 0d5e3d16..f33c63ea 100644
--- a/encoder/ratecontrol.c
+++ b/encoder/ratecontrol.c
@@ -288,10 +288,6 @@ void x264_adaptive_quant( x264_t *h )
 {
     x264_emms();
     h->mb.i_qp = x264_clip3( h->rc->f_qpm + h->fenc->f_qp_offset[h->mb.i_mb_xy] + .5, h->param.rc.i_qp_min, h->param.rc.i_qp_max );
-    /* If the QP of this MB is within 1 of the previous MB, code the same QP as the previous MB,
-     * to lower the bit cost of the qp_delta. */
-    if( abs(h->mb.i_qp - h->mb.i_last_qp) == 1 )
-        h->mb.i_qp = h->mb.i_last_qp;
 }
 
 int x264_ratecontrol_new( x264_t *h )
diff --git a/encoder/rdo.c b/encoder/rdo.c
index d625929f..7a381c54 100644
--- a/encoder/rdo.c
+++ b/encoder/rdo.c
@@ -123,16 +123,16 @@ static inline int ssd_plane( x264_t *h, int size, int p, int x, int y )
             int dc = h->pixf.sad[size]( fdec, FDEC_STRIDE, zero, 0 ) >> 1;
             satd = abs(h->pixf.satd[size]( fdec, FDEC_STRIDE, zero, 0 ) - dc - sum_satd( h, size, x, y ));
         }
-        satd = (satd * h->mb.i_psy_rd * x264_lambda_tab[h->mb.i_qp] + 128) >> 8;
+        satd = (satd * h->mb.i_psy_rd * h->mb.i_psy_rd_lambda + 128) >> 8;
     }
     return h->pixf.ssd[size](fenc, FENC_STRIDE, fdec, FDEC_STRIDE) + satd;
 }
 
 static inline int ssd_mb( x264_t *h )
 {
-    return ssd_plane(h, PIXEL_16x16, 0, 0, 0)
-         + ssd_plane(h, PIXEL_8x8,   1, 0, 0)
-         + ssd_plane(h, PIXEL_8x8,   2, 0, 0);
+    int chromassd = ssd_plane(h, PIXEL_8x8, 1, 0, 0) + ssd_plane(h, PIXEL_8x8, 2, 0, 0);
+    chromassd = (chromassd * h->mb.i_chroma_lambda2_offset + 128) >> 8;
+    return ssd_plane(h, PIXEL_16x16, 0, 0, 0) + chromassd;
 }
 
 static int x264_rd_cost_mb( x264_t *h, int i_lambda2 )
@@ -202,6 +202,7 @@ uint64_t x264_rd_cost_part( x264_t *h, int i_lambda2, int i4, int i_pixel )
 {
     uint64_t i_ssd, i_bits;
     int i8 = i4 >> 2;
+    int chromassd;
 
     if( i_pixel == PIXEL_16x16 )
     {
@@ -222,9 +223,10 @@ uint64_t x264_rd_cost_part( x264_t *h, int i_lambda2, int i4, int i_pixel )
     if( i_pixel == PIXEL_8x16 )
         x264_macroblock_encode_p8x8( h, i8+2 );
 
-    i_ssd = ssd_plane( h, i_pixel,   0, (i8&1)*8, (i8>>1)*8 )
-          + ssd_plane( h, i_pixel+3, 1, (i8&1)*4, (i8>>1)*4 )
-          + ssd_plane( h, i_pixel+3, 2, (i8&1)*4, (i8>>1)*4 );
+    chromassd = ssd_plane( h, i_pixel+3, 1, (i8&1)*4, (i8>>1)*4 )
+              + ssd_plane( h, i_pixel+3, 2, (i8&1)*4, (i8>>1)*4 );
+    chromassd = (chromassd * h->mb.i_chroma_lambda2_offset + 128) >> 8;
+    i_ssd = ssd_plane( h, i_pixel,   0, (i8&1)*8, (i8>>1)*8 ) + chromassd;
 
     if( h->param.b_cabac )
     {
@@ -356,31 +358,6 @@ void x264_rdo_init( void )
     }
 }
 
-// should the intra and inter lambdas be different?
-// I'm just matching the behaviour of deadzone quant.
-static const int lambda2_tab[2][52] = {
-    // inter lambda = .85 * .85 * 2**(qp/3. + 10 - LAMBDA_BITS)
-    {    46,      58,      73,      92,     117,     147,
-        185,     233,     294,     370,     466,     587,
-        740,     932,    1174,    1480,    1864,    2349,
-       2959,    3728,    4697,    5918,    7457,    9395,
-      11837,   14914,   18790,   23674,   29828,   37581,
-      47349,   59656,   75163,   94699,  119313,  150326,
-     189399,  238627,  300652,  378798,  477255,  601304,
-     757596,  954511, 1202608, 1515192, 1909022, 2405217,
-    3030384, 3818045, 4810435, 6060769 },
-    // intra lambda = .65 * .65 * 2**(qp/3. + 10 - LAMBDA_BITS)
-    {    27,      34,      43,      54,      68,      86,
-        108,     136,     172,     216,     273,     343,
-        433,     545,     687,     865,    1090,    1374,
-       1731,    2180,    2747,    3461,    4361,    5494,
-       6922,    8721,   10988,   13844,   17442,   21976,
-      27688,   34885,   43953,   55377,   69771,   87906,
-     110755,  139543,  175813,  221511,  279087,  351627,
-     443023,  558174,  703255,  886046, 1116348, 1406511,
-    1772093, 2232697, 2813022, 3544186 }
-};
-
 typedef struct {
     int64_t score;
     int level_idx; // index into level_tree[]
@@ -623,23 +600,23 @@ static ALWAYS_INLINE int quant_trellis_cabac( x264_t *h, int16_t *dct,
 const static uint8_t x264_zigzag_scan2[4] = {0,1,2,3};
 
 int x264_quant_dc_trellis( x264_t *h, int16_t *dct, int i_quant_cat,
-                            int i_qp, int i_ctxBlockCat, int b_intra )
+                            int i_qp, int i_ctxBlockCat, int b_intra, int b_chroma )
 {
     return quant_trellis_cabac( h, (int16_t*)dct,
         h->quant4_mf[i_quant_cat][i_qp], h->unquant4_mf[i_quant_cat][i_qp],
         NULL, i_ctxBlockCat==DCT_CHROMA_DC ? x264_zigzag_scan2 : x264_zigzag_scan4[h->mb.b_interlaced],
-        i_ctxBlockCat, lambda2_tab[b_intra][i_qp], 0, 1, i_ctxBlockCat==DCT_CHROMA_DC ? 4 : 16, 0 );
+        i_ctxBlockCat, h->mb.i_trellis_lambda2[b_chroma][b_intra], 0, 1, i_ctxBlockCat==DCT_CHROMA_DC ? 4 : 16, 0 );
 }
 
 int x264_quant_4x4_trellis( x264_t *h, int16_t dct[4][4], int i_quant_cat,
-                             int i_qp, int i_ctxBlockCat, int b_intra, int idx )
+                             int i_qp, int i_ctxBlockCat, int b_intra, int b_chroma, int idx )
 {
     int b_ac = (i_ctxBlockCat == DCT_LUMA_AC || i_ctxBlockCat == DCT_CHROMA_AC);
     return quant_trellis_cabac( h, (int16_t*)dct,
         h->quant4_mf[i_quant_cat][i_qp], h->unquant4_mf[i_quant_cat][i_qp],
         x264_dct4_weight2_zigzag[h->mb.b_interlaced],
         x264_zigzag_scan4[h->mb.b_interlaced],
-        i_ctxBlockCat, lambda2_tab[b_intra][i_qp], b_ac, 0, 16, idx );
+        i_ctxBlockCat, h->mb.i_trellis_lambda2[b_chroma][b_intra], b_ac, 0, 16, idx );
 }
 
 int x264_quant_8x8_trellis( x264_t *h, int16_t dct[8][8], int i_quant_cat,
@@ -649,6 +626,6 @@ int x264_quant_8x8_trellis( x264_t *h, int16_t dct[8][8], int i_quant_cat,
         h->quant8_mf[i_quant_cat][i_qp], h->unquant8_mf[i_quant_cat][i_qp],
         x264_dct8_weight2_zigzag[h->mb.b_interlaced],
         x264_zigzag_scan8[h->mb.b_interlaced],
-        DCT_LUMA_8x8, lambda2_tab[b_intra][i_qp], 0, 0, 64, idx );
+        DCT_LUMA_8x8, h->mb.i_trellis_lambda2[0][b_intra], 0, 0, 64, idx );
 }
 
diff --git a/x264.c b/x264.c
index e9eb91d2..0a140704 100644
--- a/x264.c
+++ b/x264.c
@@ -269,8 +269,9 @@ static void Help( x264_param_t *defaults, int b_longhelp )
         "                                  - 6: RD mode decision for I/P-frames\n"
         "                                  - 7: RD mode decision for all frames\n"
         "                                  - 8: RD refinement for I/P-frames\n"
-        "                                  - 9: RD refinement for all frames\n" );
-    else H0( "                                  decision quality: 1=fast, 9=best.\n"  );
+        "                                  - 9: RD refinement for all frames\n"
+        "                                  - 10: QP-RD - requires trellis=2, aq-mode>0\n" );
+    else H0( "                                  decision quality: 1=fast, 10=best.\n"  );
     H0( "      --psy-rd                Strength of psychovisual optimization [\"%.1f:%.1f\"]\n"
         "                                  #1: RD (requires subme>=6)\n"
         "                                  #2: Trellis (requires trellis, experimental)\n",
@@ -581,7 +582,7 @@ static int  Parse( int argc, char **argv,
             else if( !strcasecmp( optarg, "placebo" ) )
             {
                 param->analyse.i_me_method = X264_ME_TESA;
-                param->analyse.i_subpel_refine = 9;
+                param->analyse.i_subpel_refine = 10;
                 param->analyse.i_me_range = 24;
                 param->i_frame_reference = 16;
                 param->i_bframe_adaptive = X264_B_ADAPT_TRELLIS;