}
}
-static int x264_cabac_mb_cbf_ctxidxinc( x264_t *h, int i_cat, int i_idx )
+/* i_ctxBlockCat: 0-> DC 16x16 i_idx = 0
+ * 1-> AC 16x16 i_idx = luma4x4idx
+ * 2-> Luma4x4 i_idx = luma4x4idx
+ * 3-> DC Chroma i_idx = iCbCr
+ * 4-> AC Chroma i_idx = 4 * iCbCr + chroma4x4idx
+ * 5-> Luma8x8 i_idx = luma8x8idx
+ */
+
+static int ALWAYS_INLINE x264_cabac_mb_cbf_ctxidxinc( x264_t *h, int i_cat, int i_idx )
{
- /* i_ctxBlockCat: 0-> DC 16x16 i_idx = 0
- * 1-> AC 16x16 i_idx = luma4x4idx
- * 2-> Luma4x4 i_idx = luma4x4idx
- * 3-> DC Chroma i_idx = iCbCr
- * 4-> AC Chroma i_idx = 4 * iCbCr + chroma4x4idx
- * 5-> Luma8x8 i_idx = luma8x8idx
- */
-
- int i_mba_xy = -1;
- int i_mbb_xy = -1;
- int i_nza = 0;
- int i_nzb = 0;
+ int i_nza;
+ int i_nzb;
+ int b_intra = IS_INTRA( h->mb.i_type );
switch( i_cat )
{
- case DCT_LUMA_DC:
- if( h->mb.i_neighbour & MB_LEFT )
- {
- i_mba_xy = h->mb.i_mb_xy - 1;
- i_nza = h->mb.cbp[i_mba_xy] & 0x100;
- }
- if( h->mb.i_neighbour & MB_TOP )
- {
- i_mbb_xy = h->mb.i_mb_top_xy;
- i_nzb = h->mb.cbp[i_mbb_xy] & 0x100;
- }
- break;
case DCT_LUMA_AC:
case DCT_LUMA_4x4:
- if( i_idx & ~10 ) // block_idx_x > 0
- i_mba_xy = h->mb.i_mb_xy;
- else if( h->mb.i_neighbour & MB_LEFT )
- i_mba_xy = h->mb.i_mb_xy - 1;
-
- if( i_idx & ~5 ) // block_idx_y > 0
- i_mbb_xy = h->mb.i_mb_xy;
- else if( h->mb.i_neighbour & MB_TOP )
- i_mbb_xy = h->mb.i_mb_top_xy;
-
+ case DCT_CHROMA_AC:
/* no need to test for skip/pcm */
- if( i_mba_xy >= 0 )
- i_nza = h->mb.cache.non_zero_count[x264_scan8[i_idx] - 1];
- if( i_mbb_xy >= 0 )
- i_nzb = h->mb.cache.non_zero_count[x264_scan8[i_idx] - 8];
- break;
+ i_nza = h->mb.cache.non_zero_count[x264_scan8[i_idx] - 1];
+ i_nzb = h->mb.cache.non_zero_count[x264_scan8[i_idx] - 8];
+ i_nza &= 0x7f + (b_intra << 7);
+ i_nzb &= 0x7f + (b_intra << 7);
+ return 4*i_cat + 2*!!i_nzb + !!i_nza;
+ case DCT_LUMA_DC:
+ /* Note: this depends on the exact values of MB_LEFT and MB_TOP enums */
+ i_nza = ((h->mb.cbp[h->mb.i_mb_xy - 1] >> 8) | ~h->mb.i_neighbour) & 1;
+ i_nzb = ((h->mb.cbp[h->mb.i_mb_top_xy] >> 7) | ~h->mb.i_neighbour) & 2;
+ return 4*i_cat + i_nzb + i_nza;
case DCT_CHROMA_DC:
/* no need to test skip/pcm */
i_idx -= 25;
- if( h->mb.i_neighbour & MB_LEFT )
- {
- i_mba_xy = h->mb.i_mb_xy - 1;
- i_nza = h->mb.cbp[i_mba_xy] & (0x200 << i_idx);
- }
- if( h->mb.i_neighbour & MB_TOP )
- {
- i_mbb_xy = h->mb.i_mb_top_xy;
- i_nzb = h->mb.cbp[i_mbb_xy] & (0x200 << i_idx);
- }
- break;
- case DCT_CHROMA_AC:
- if( i_idx & 1 )
- i_mba_xy = h->mb.i_mb_xy;
- else if( h->mb.i_neighbour & MB_LEFT )
- i_mba_xy = h->mb.i_mb_xy - 1;
-
- if( i_idx & 2 )
- i_mbb_xy = h->mb.i_mb_xy;
- else if( h->mb.i_neighbour & MB_TOP )
- i_mbb_xy = h->mb.i_mb_top_xy;
-
- /* no need to test skip/pcm */
- if( i_mba_xy >= 0 )
- i_nza = h->mb.cache.non_zero_count[x264_scan8[i_idx] - 1];
- if( i_mbb_xy >= 0 )
- i_nzb = h->mb.cache.non_zero_count[x264_scan8[i_idx] - 8];
+ i_nza = h->mb.i_neighbour & MB_LEFT ? (h->mb.cbp[h->mb.i_mb_xy - 1] >> (9 + i_idx)) & 1 : b_intra;
+ i_nzb = h->mb.i_neighbour & MB_TOP ? (h->mb.cbp[h->mb.i_mb_top_xy] >> (9 + i_idx)) & 1 : b_intra;
+ return 4*i_cat + 2*i_nzb + i_nza;
}
-
- if( IS_INTRA( h->mb.i_type ) )
- {
- i_nza |= i_mba_xy < 0;
- i_nzb |= i_mbb_xy < 0;
- }
-
- return 4*i_cat + 2*!!i_nzb + !!i_nza;
}
};
#if !RDO_SKIP_BS
-static void block_residual_write_cabac( x264_t *h, x264_cabac_t *cb, int i_ctxBlockCat, int i_idx, int16_t *l, int i_count )
+static void block_residual_write_cabac( x264_t *h, x264_cabac_t *cb, int i_ctxBlockCat, int i_idx, int16_t *l, int i_count, int ctxidxinc )
{
const int i_ctx_sig = significant_coeff_flag_offset[h->mb.b_interlaced][i_ctxBlockCat];
const int i_ctx_last = last_coeff_flag_offset[h->mb.b_interlaced][i_ctxBlockCat];
if( i_count != 64 )
{
/* coded block flag */
- int ctx = 85 + x264_cabac_mb_cbf_ctxidxinc( h, i_ctxBlockCat, i_idx );
+ int ctx = 85 + ctxidxinc;
if( h->mb.cache.non_zero_count[x264_scan8[i_idx]] )
x264_cabac_encode_decision( cb, ctx, 1 );
else
* this is slightly incorrect because the sigmap is not reversible
* (contexts are repeated). However, there is nearly no quality penalty
* for this (~0.001db) and the speed boost (~30%) is worth it. */
-static void ALWAYS_INLINE block_residual_write_cabac_internal( x264_t *h, x264_cabac_t *cb, int i_ctxBlockCat, int i_idx, int16_t *l, int i_count, int b_8x8 )
+static void ALWAYS_INLINE block_residual_write_cabac_internal( x264_t *h, x264_cabac_t *cb, int i_ctxBlockCat, int i_idx, int16_t *l, int i_count, int ctxidxinc, int b_8x8 )
{
const int i_ctx_sig = significant_coeff_flag_offset[h->mb.b_interlaced][i_ctxBlockCat];
const int i_ctx_last = last_coeff_flag_offset[h->mb.b_interlaced][i_ctxBlockCat];
if( !b_8x8 )
{
/* coded block flag */
- ctx = 85 + x264_cabac_mb_cbf_ctxidxinc( h, i_ctxBlockCat, i_idx );
+ ctx = 85 + ctxidxinc;
if( h->mb.cache.non_zero_count[x264_scan8[i_idx]] )
x264_cabac_encode_decision( cb, ctx, 1 );
else
static void block_residual_write_cabac_8x8( x264_t *h, x264_cabac_t *cb, int i_idx, int16_t *l )
{
- block_residual_write_cabac_internal( h, cb, DCT_LUMA_8x8, i_idx, l, 64, 1 );
+ block_residual_write_cabac_internal( h, cb, DCT_LUMA_8x8, i_idx, l, 64, 0, 1 );
}
-static void block_residual_write_cabac( x264_t *h, x264_cabac_t *cb, int i_ctxBlockCat, int i_idx, int16_t *l, int i_count )
+static void block_residual_write_cabac( x264_t *h, x264_cabac_t *cb, int i_ctxBlockCat, int i_idx, int16_t *l, int i_count, int ctxidxinc )
{
- block_residual_write_cabac_internal( h, cb, i_ctxBlockCat, i_idx, l, i_count, 0 );
+ block_residual_write_cabac_internal( h, cb, i_ctxBlockCat, i_idx, l, i_count, ctxidxinc, 0 );
}
#endif
+#define block_residual_write_cabac( h, cb, i_ctxBlockCat, i_idx, l, i_count ) \
+{ \
+ int ctxidxinc = x264_cabac_mb_cbf_ctxidxinc( h, i_ctxBlockCat, i_idx); \
+ block_residual_write_cabac( h, cb, i_ctxBlockCat, i_idx, l, i_count, ctxidxinc ); \
+}
+
void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb )
{
const int i_mb_type = h->mb.i_type;