From: Fiona Glaser Date: Mon, 16 Feb 2009 13:56:12 +0000 (-0800) Subject: Optimize neighbor CBP calculation and fix related regression X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=56967517b7003192e9ac9e3110d566b2a05839f9;p=libx264 Optimize neighbor CBP calculation and fix related regression r1105 introduced array overflow in cbp handling --- diff --git a/common/common.h b/common/common.h index 394f9dab..c8fb1d99 100644 --- a/common/common.h +++ b/common/common.h @@ -529,6 +529,10 @@ struct x264_t /* number of neighbors (top and left) that used 8x8 dct */ int i_neighbour_transform_size; int i_neighbour_interlaced; + + /* neighbor CBPs */ + int i_cbp_top; + int i_cbp_left; } cache; /* */ diff --git a/common/macroblock.c b/common/macroblock.c index 6c74ef2e..54bc09c9 100644 --- a/common/macroblock.c +++ b/common/macroblock.c @@ -897,6 +897,7 @@ void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y ) { h->mb.i_mb_type_top = i_top_type= h->mb.type[i_top_xy]; + h->mb.cache.i_cbp_top = h->mb.cbp[i_top_xy]; h->mb.i_neighbour |= MB_TOP; @@ -912,6 +913,7 @@ void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y ) else { h->mb.i_mb_type_top = -1; + h->mb.cache.i_cbp_top = -1; /* load intra4x4 */ h->mb.cache.intra4x4_pred_mode[x264_scan8[0] - 8] = @@ -935,6 +937,7 @@ void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y ) i_left_xy = i_mb_xy - 1; h->mb.i_mb_type_left = i_left_type = h->mb.type[i_left_xy]; + h->mb.cache.i_cbp_left = h->mb.cbp[h->mb.i_mb_xy - 1]; h->mb.i_neighbour |= MB_LEFT; @@ -959,6 +962,7 @@ void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y ) else { h->mb.i_mb_type_left = -1; + h->mb.cache.i_cbp_left = -1; h->mb.cache.intra4x4_pred_mode[x264_scan8[0 ] - 1] = h->mb.cache.intra4x4_pred_mode[x264_scan8[2 ] - 1] = diff --git a/encoder/cabac.c b/encoder/cabac.c index b44905e1..2cd7cd38 100644 --- a/encoder/cabac.c +++ b/encoder/cabac.c @@ -233,8 +233,8 @@ static void x264_cabac_mb_intra_chroma_pred_mode( x264_t *h, x264_cabac_t *cb ) static void x264_cabac_mb_cbp_luma( x264_t *h, x264_cabac_t *cb ) { int cbp = h->mb.i_cbp_luma; - int cbp_l = h->mb.i_neighbour & MB_LEFT ? h->mb.cbp[h->mb.i_mb_xy - 1] : -1; - int cbp_t = h->mb.i_neighbour & MB_TOP ? h->mb.cbp[h->mb.i_mb_top_xy] : -1; + int cbp_l = h->mb.cache.i_cbp_left; + int cbp_t = h->mb.cache.i_cbp_top; x264_cabac_encode_decision( cb, 76 - ((cbp_l >> 1) & 1) - ((cbp_t >> 1) & 2), (h->mb.i_cbp_luma >> 0) & 1 ); x264_cabac_encode_decision( cb, 76 - ((cbp >> 0) & 1) - ((cbp_t >> 2) & 2), (h->mb.i_cbp_luma >> 1) & 1 ); x264_cabac_encode_decision( cb, 76 - ((cbp_l >> 3) & 1) - ((cbp << 1) & 2), (h->mb.i_cbp_luma >> 2) & 1 ); @@ -243,20 +243,12 @@ static void x264_cabac_mb_cbp_luma( x264_t *h, x264_cabac_t *cb ) static void x264_cabac_mb_cbp_chroma( x264_t *h, x264_cabac_t *cb ) { - int cbp_a = -1; - int cbp_b = -1; - int ctx; - - /* No need to test for SKIP/PCM */ - if( h->mb.i_neighbour & MB_LEFT ) - cbp_a = (h->mb.cbp[h->mb.i_mb_xy - 1] >> 4)&0x3; + int cbp_a = h->mb.cache.i_cbp_left & 0x30; + int cbp_b = h->mb.cache.i_cbp_top & 0x30; + int ctx = 0; - if( h->mb.i_neighbour & MB_TOP ) - cbp_b = (h->mb.cbp[h->mb.i_mb_top_xy] >> 4)&0x3; - - ctx = 0; - if( cbp_a > 0 ) ctx++; - if( cbp_b > 0 ) ctx += 2; + if( cbp_a && h->mb.cache.i_cbp_left != -1 ) ctx++; + if( cbp_b && h->mb.cache.i_cbp_top != -1 ) ctx+=2; if( h->mb.i_cbp_chroma == 0 ) x264_cabac_encode_decision_noup( cb, 77 + ctx, 0 ); else @@ -264,8 +256,8 @@ static void x264_cabac_mb_cbp_chroma( x264_t *h, x264_cabac_t *cb ) x264_cabac_encode_decision_noup( cb, 77 + ctx, 1 ); ctx = 4; - if( cbp_a == 2 ) ctx++; - if( cbp_b == 2 ) ctx += 2; + if( cbp_a == 0x20 ) ctx++; + if( cbp_b == 0x20 ) ctx += 2; x264_cabac_encode_decision_noup( cb, 77 + ctx, h->mb.i_cbp_chroma > 1 ); } } @@ -531,15 +523,14 @@ static int ALWAYS_INLINE x264_cabac_mb_cbf_ctxidxinc( x264_t *h, int i_cat, int i_nzb &= 0x7f + (b_intra << 7); return 4*i_cat + 2*!!i_nzb + !!i_nza; case DCT_LUMA_DC: - /* Note: this depends on the exact values of MB_LEFT and MB_TOP enums */ - i_nza = ((h->mb.cbp[h->mb.i_mb_xy - 1] >> 8) | ~h->mb.i_neighbour) & 1; - i_nzb = ((h->mb.cbp[h->mb.i_mb_top_xy] >> 7) | ~h->mb.i_neighbour) & 2; - return 4*i_cat + i_nzb + i_nza; + i_nza = (h->mb.cache.i_cbp_left >> 8) & 1; + i_nzb = (h->mb.cache.i_cbp_top >> 8) & 1; + return 4*i_cat + 2*i_nzb + i_nza; case DCT_CHROMA_DC: /* no need to test skip/pcm */ i_idx -= 25; - i_nza = h->mb.i_neighbour & MB_LEFT ? (h->mb.cbp[h->mb.i_mb_xy - 1] >> (9 + i_idx)) & 1 : b_intra; - i_nzb = h->mb.i_neighbour & MB_TOP ? (h->mb.cbp[h->mb.i_mb_top_xy] >> (9 + i_idx)) & 1 : b_intra; + i_nza = h->mb.cache.i_cbp_left != -1 ? (h->mb.cache.i_cbp_left >> (9 + i_idx)) & 1 : b_intra; + i_nzb = h->mb.cache.i_cbp_top != -1 ? (h->mb.cache.i_cbp_top >> (9 + i_idx)) & 1 : b_intra; return 4*i_cat + 2*i_nzb + i_nza; default: return 0;