# include "arm/dct.h"
#endif
+/* the inverse of the scaling factors introduced by 8x8 fdct */
+/* uint32 is for the asm implementation of trellis. the actual values fit in uint16. */
+#define W(i) (i==0 ? FIX8(1.0000) :\
+ i==1 ? FIX8(0.8859) :\
+ i==2 ? FIX8(1.6000) :\
+ i==3 ? FIX8(0.9415) :\
+ i==4 ? FIX8(1.2651) :\
+ i==5 ? FIX8(1.1910) :0)
+const uint32_t x264_dct8_weight_tab[64] = {
+ W(0), W(3), W(4), W(3), W(0), W(3), W(4), W(3),
+ W(3), W(1), W(5), W(1), W(3), W(1), W(5), W(1),
+ W(4), W(5), W(2), W(5), W(4), W(5), W(2), W(5),
+ W(3), W(1), W(5), W(1), W(3), W(1), W(5), W(1),
+
+ W(0), W(3), W(4), W(3), W(0), W(3), W(4), W(3),
+ W(3), W(1), W(5), W(1), W(3), W(1), W(5), W(1),
+ W(4), W(5), W(2), W(5), W(4), W(5), W(2), W(5),
+ W(3), W(1), W(5), W(1), W(3), W(1), W(5), W(1)
+};
+#undef W
+
+#define W(i) (i==0 ? FIX8(1.76777) :\
+ i==1 ? FIX8(1.11803) :\
+ i==2 ? FIX8(0.70711) :0)
+const uint32_t x264_dct4_weight_tab[16] = {
+ W(0), W(1), W(0), W(1),
+ W(1), W(2), W(1), W(2),
+ W(0), W(1), W(0), W(1),
+ W(1), W(2), W(1), W(2)
+};
+#undef W
+
+/* inverse squared */
+#define W(i) (i==0 ? FIX8(3.125) :\
+ i==1 ? FIX8(1.25) :\
+ i==2 ? FIX8(0.5) :0)
+const uint32_t x264_dct4_weight2_tab[16] = {
+ W(0), W(1), W(0), W(1),
+ W(1), W(2), W(1), W(2),
+ W(0), W(1), W(0), W(1),
+ W(1), W(2), W(1), W(2)
+};
+#undef W
+
+#define W(i) (i==0 ? FIX8(1.00000) :\
+ i==1 ? FIX8(0.78487) :\
+ i==2 ? FIX8(2.56132) :\
+ i==3 ? FIX8(0.88637) :\
+ i==4 ? FIX8(1.60040) :\
+ i==5 ? FIX8(1.41850) :0)
+const uint32_t x264_dct8_weight2_tab[64] = {
+ W(0), W(3), W(4), W(3), W(0), W(3), W(4), W(3),
+ W(3), W(1), W(5), W(1), W(3), W(1), W(5), W(1),
+ W(4), W(5), W(2), W(5), W(4), W(5), W(2), W(5),
+ W(3), W(1), W(5), W(1), W(3), W(1), W(5), W(1),
+
+ W(0), W(3), W(4), W(3), W(0), W(3), W(4), W(3),
+ W(3), W(1), W(5), W(1), W(3), W(1), W(5), W(1),
+ W(4), W(5), W(2), W(5), W(4), W(5), W(2), W(5),
+ W(3), W(1), W(5), W(1), W(3), W(1), W(5), W(1)
+};
+#undef W
+
+
static void dct4x4dc( dctcoef d[16] )
{
dctcoef tmp[16];
#ifndef X264_DCT_H
#define X264_DCT_H
-/* the inverse of the scaling factors introduced by 8x8 fdct */
-#define W(i) (i==0 ? FIX8(1.0000) :\
- i==1 ? FIX8(0.8859) :\
- i==2 ? FIX8(1.6000) :\
- i==3 ? FIX8(0.9415) :\
- i==4 ? FIX8(1.2651) :\
- i==5 ? FIX8(1.1910) :0)
-static const uint16_t x264_dct8_weight_tab[64] = {
- W(0), W(3), W(4), W(3), W(0), W(3), W(4), W(3),
- W(3), W(1), W(5), W(1), W(3), W(1), W(5), W(1),
- W(4), W(5), W(2), W(5), W(4), W(5), W(2), W(5),
- W(3), W(1), W(5), W(1), W(3), W(1), W(5), W(1),
-
- W(0), W(3), W(4), W(3), W(0), W(3), W(4), W(3),
- W(3), W(1), W(5), W(1), W(3), W(1), W(5), W(1),
- W(4), W(5), W(2), W(5), W(4), W(5), W(2), W(5),
- W(3), W(1), W(5), W(1), W(3), W(1), W(5), W(1)
-};
-#undef W
-
-#define W(i) (i==0 ? FIX8(1.76777) :\
- i==1 ? FIX8(1.11803) :\
- i==2 ? FIX8(0.70711) :0)
-static const uint16_t x264_dct4_weight_tab[16] = {
- W(0), W(1), W(0), W(1),
- W(1), W(2), W(1), W(2),
- W(0), W(1), W(0), W(1),
- W(1), W(2), W(1), W(2)
-};
-#undef W
-
-/* inverse squared */
-#define W(i) (i==0 ? FIX8(3.125) :\
- i==1 ? FIX8(1.25) :\
- i==2 ? FIX8(0.5) :0)
-static const uint16_t x264_dct4_weight2_tab[16] = {
- W(0), W(1), W(0), W(1),
- W(1), W(2), W(1), W(2),
- W(0), W(1), W(0), W(1),
- W(1), W(2), W(1), W(2)
-};
-#undef W
-
-#define W(i) (i==0 ? FIX8(1.00000) :\
- i==1 ? FIX8(0.78487) :\
- i==2 ? FIX8(2.56132) :\
- i==3 ? FIX8(0.88637) :\
- i==4 ? FIX8(1.60040) :\
- i==5 ? FIX8(1.41850) :0)
-static const uint16_t x264_dct8_weight2_tab[64] = {
- W(0), W(3), W(4), W(3), W(0), W(3), W(4), W(3),
- W(3), W(1), W(5), W(1), W(3), W(1), W(5), W(1),
- W(4), W(5), W(2), W(5), W(4), W(5), W(2), W(5),
- W(3), W(1), W(5), W(1), W(3), W(1), W(5), W(1),
-
- W(0), W(3), W(4), W(3), W(0), W(3), W(4), W(3),
- W(3), W(1), W(5), W(1), W(3), W(1), W(5), W(1),
- W(4), W(5), W(2), W(5), W(4), W(5), W(2), W(5),
- W(3), W(1), W(5), W(1), W(3), W(1), W(5), W(1)
-};
-#undef W
+extern const uint32_t x264_dct4_weight_tab[16];
+extern const uint32_t x264_dct8_weight_tab[64];
+extern const uint32_t x264_dct4_weight2_tab[16];
+extern const uint32_t x264_dct8_weight2_tab[64];
typedef struct
{
x264_cabac_encode_decision( cb, ctxbase + 5, 1 );
if( i_abs < 9 )
{
- cb->f8_bits_encoded += cabac_size_unary[i_abs - 3][cb->state[ctxbase+6]];
- cb->state[ctxbase+6] = cabac_transition_unary[i_abs - 3][cb->state[ctxbase+6]];
+ cb->f8_bits_encoded += x264_cabac_size_unary[i_abs - 3][cb->state[ctxbase+6]];
+ cb->state[ctxbase+6] = x264_cabac_transition_unary[i_abs - 3][cb->state[ctxbase+6]];
}
else
{
{
227+0, 227+10, 227+20, 227+30, 227+39, 426, 952+0, 952+10, 952+20, 708, 982+0, 982+10, 982+20, 766
};
-static const uint8_t significant_coeff_flag_offset_8x8[2][63] =
+#if RDO_SKIP_BS
+extern const uint8_t x264_significant_coeff_flag_offset_8x8[2][63];
+extern const uint8_t x264_last_coeff_flag_offset_8x8[63];
+extern const uint8_t x264_coeff_flag_offset_chroma_422_dc[7];
+#else
+const uint8_t x264_significant_coeff_flag_offset_8x8[2][63] =
{{
0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14
}};
-static const uint8_t last_coeff_flag_offset_8x8[63] =
+const uint8_t x264_last_coeff_flag_offset_8x8[63] =
{
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
};
-static const uint8_t coeff_flag_offset_chroma_422_dc[7] = { 0, 0, 1, 1, 2, 2, 2 }; /* MIN( i/2, 2 ) */
+const uint8_t x264_coeff_flag_offset_chroma_422_dc[7] = { 0, 0, 1, 1, 2, 2, 2 }; /* MIN( i/2, 2 ) */
+#endif
// node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
// 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
if( chroma422dc )
{
int count_m1 = 7;
- WRITE_SIGMAP( coeff_flag_offset_chroma_422_dc[i], coeff_flag_offset_chroma_422_dc[i] )
+ WRITE_SIGMAP( x264_coeff_flag_offset_chroma_422_dc[i], x264_coeff_flag_offset_chroma_422_dc[i] )
}
else
{
int count_m1 = count_cat_m1[ctx_block_cat];
if( count_m1 == 63 )
{
- const uint8_t *sig_offset = significant_coeff_flag_offset_8x8[MB_INTERLACED];
- WRITE_SIGMAP( sig_offset[i], last_coeff_flag_offset_8x8[i] )
+ const uint8_t *sig_offset = x264_significant_coeff_flag_offset_8x8[MB_INTERLACED];
+ WRITE_SIGMAP( sig_offset[i], x264_last_coeff_flag_offset_8x8[i] )
}
else
WRITE_SIGMAP( i, i )
* is nearly no quality penalty for this (~0.001db) and the speed boost (~30%) is worth it. */
static void ALWAYS_INLINE x264_cabac_block_residual_internal( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l, int b_8x8, int chroma422dc )
{
- const uint8_t *sig_offset = significant_coeff_flag_offset_8x8[MB_INTERLACED];
+ const uint8_t *sig_offset = x264_significant_coeff_flag_offset_8x8[MB_INTERLACED];
int ctx_sig = significant_coeff_flag_offset[MB_INTERLACED][ctx_block_cat];
int ctx_last = last_coeff_flag_offset[MB_INTERLACED][ctx_block_cat];
int ctx_level = coeff_abs_level_m1_offset[ctx_block_cat];
if( last != (b_8x8 ? 63 : chroma422dc ? 7 : count_cat_m1[ctx_block_cat]) )
{
x264_cabac_encode_decision( cb, ctx_sig + (b_8x8 ? sig_offset[last] :
- chroma422dc ? coeff_flag_offset_chroma_422_dc[last] : last), 1 );
- x264_cabac_encode_decision( cb, ctx_last + (b_8x8 ? last_coeff_flag_offset_8x8[last] :
- chroma422dc ? coeff_flag_offset_chroma_422_dc[last] : last), 1 );
+ chroma422dc ? x264_coeff_flag_offset_chroma_422_dc[last] : last), 1 );
+ x264_cabac_encode_decision( cb, ctx_last + (b_8x8 ? x264_last_coeff_flag_offset_8x8[last] :
+ chroma422dc ? x264_coeff_flag_offset_chroma_422_dc[last] : last), 1 );
}
if( coeff_abs > 1 )
ctx = levelgt1_ctx[0] + ctx_level;
if( coeff_abs < 15 )
{
- cb->f8_bits_encoded += cabac_size_unary[coeff_abs-1][cb->state[ctx]];
- cb->state[ctx] = cabac_transition_unary[coeff_abs-1][cb->state[ctx]];
+ cb->f8_bits_encoded += x264_cabac_size_unary[coeff_abs-1][cb->state[ctx]];
+ cb->state[ctx] = x264_cabac_transition_unary[coeff_abs-1][cb->state[ctx]];
}
else
{
- cb->f8_bits_encoded += cabac_size_unary[14][cb->state[ctx]];
- cb->state[ctx] = cabac_transition_unary[14][cb->state[ctx]];
+ cb->f8_bits_encoded += x264_cabac_size_unary[14][cb->state[ctx]];
+ cb->state[ctx] = x264_cabac_transition_unary[14][cb->state[ctx]];
x264_cabac_encode_ue_bypass( cb, 0, coeff_abs - 15 );
}
node_ctx = coeff_abs_level_transition[1][0];
{
coeff_abs = abs(l[i]);
x264_cabac_encode_decision( cb, ctx_sig + (b_8x8 ? sig_offset[i] :
- chroma422dc ? coeff_flag_offset_chroma_422_dc[i] : i), 1 );
- x264_cabac_encode_decision( cb, ctx_last + (b_8x8 ? last_coeff_flag_offset_8x8[i] :
- chroma422dc ? coeff_flag_offset_chroma_422_dc[i] : i), 0 );
+ chroma422dc ? x264_coeff_flag_offset_chroma_422_dc[i] : i), 1 );
+ x264_cabac_encode_decision( cb, ctx_last + (b_8x8 ? x264_last_coeff_flag_offset_8x8[i] :
+ chroma422dc ? x264_coeff_flag_offset_chroma_422_dc[i] : i), 0 );
ctx = coeff_abs_level1_ctx[node_ctx] + ctx_level;
if( coeff_abs > 1 )
ctx = levelgt1_ctx[node_ctx] + ctx_level;
if( coeff_abs < 15 )
{
- cb->f8_bits_encoded += cabac_size_unary[coeff_abs-1][cb->state[ctx]];
- cb->state[ctx] = cabac_transition_unary[coeff_abs-1][cb->state[ctx]];
+ cb->f8_bits_encoded += x264_cabac_size_unary[coeff_abs-1][cb->state[ctx]];
+ cb->state[ctx] = x264_cabac_transition_unary[coeff_abs-1][cb->state[ctx]];
}
else
{
- cb->f8_bits_encoded += cabac_size_unary[14][cb->state[ctx]];
- cb->state[ctx] = cabac_transition_unary[14][cb->state[ctx]];
+ cb->f8_bits_encoded += x264_cabac_size_unary[14][cb->state[ctx]];
+ cb->state[ctx] = x264_cabac_transition_unary[14][cb->state[ctx]];
x264_cabac_encode_ue_bypass( cb, 0, coeff_abs - 15 );
}
node_ctx = coeff_abs_level_transition[1][node_ctx];
}
else
x264_cabac_encode_decision( cb, ctx_sig + (b_8x8 ? sig_offset[i] :
- chroma422dc ? coeff_flag_offset_chroma_422_dc[i] : i), 0 );
+ chroma422dc ? x264_coeff_flag_offset_chroma_422_dc[i] : i), 0 );
}
}
{
int dct8x8 = cat&1;
int size = dct8x8 ? 64 : 16;
- const uint16_t *weight = dct8x8 ? x264_dct8_weight2_tab : x264_dct4_weight2_tab;
+ const uint32_t *weight = dct8x8 ? x264_dct8_weight2_tab : x264_dct4_weight2_tab;
if( h->nr_count[cat] > (dct8x8 ? (1<<16) : (1<<18)) )
{
/* Transition and size tables for abs<9 MVD and residual coding */
/* Consist of i_prefix-2 1s, one zero, and a bypass sign bit */
-static uint8_t cabac_transition_unary[15][128];
-static uint16_t cabac_size_unary[15][128];
+uint8_t x264_cabac_transition_unary[15][128];
+uint16_t x264_cabac_size_unary[15][128];
/* Transition and size tables for abs>9 MVD */
/* Consist of 5 1s and a bypass sign bit */
static uint8_t cabac_transition_5ones[128];
f8_bits += x264_cabac_size_decision2( &ctx, 0 );
f8_bits += 1 << CABAC_SIZE_BITS; //sign
- cabac_size_unary[i_prefix][i_ctx] = f8_bits;
- cabac_transition_unary[i_prefix][i_ctx] = ctx;
+ x264_cabac_size_unary[i_prefix][i_ctx] = f8_bits;
+ x264_cabac_transition_unary[i_prefix][i_ctx] = ctx;
}
}
for( int i_ctx = 0; i_ctx < 128; i_ctx++ )
unsigned f8_bits = cost_sig;
int prefix = X264_MIN( abs_level - 1, 14 );
f8_bits += x264_cabac_size_decision_noup2( cabac_state+1, prefix > 0 );
- f8_bits += cabac_size_unary[prefix][cabac_state[5]];
+ f8_bits += x264_cabac_size_unary[prefix][cabac_state[5]];
if( abs_level >= 15 )
f8_bits += bs_size_ue_big( abs_level - 15 ) << CABAC_SIZE_BITS;
score += (uint64_t)f8_bits * lambda2 >> ( CABAC_SIZE_BITS - LAMBDA_BITS );
if( const_level > 1 )
{
levelgt1_state = j >= 6 ? nodes_prev[j].cabac_state[levelgt1_ctx-6] : level_state[levelgt1_ctx];
- f8_bits += cabac_size_unary[prefix][levelgt1_state] + suffix_cost;
+ f8_bits += x264_cabac_size_unary[prefix][levelgt1_state] + suffix_cost;
}
else
f8_bits += 1 << CABAC_SIZE_BITS;
if( j >= 3 ) // skip the transition if we're not going to reuse the context
nodes_cur[node_ctx].cabac_state[level1_ctx>>2] = x264_cabac_transition[level1_state][const_level > 1];
if( const_level > 1 && node_ctx == 7 )
- nodes_cur[node_ctx].cabac_state[levelgt1_ctx-6] = cabac_transition_unary[prefix][levelgt1_state];
+ nodes_cur[node_ctx].cabac_state[levelgt1_ctx-6] = x264_cabac_transition_unary[prefix][levelgt1_state];
nodes_cur[node_ctx].level_idx = nodes_prev[j].level_idx;
SET_LEVEL( nodes_cur[node_ctx], nodes_prev[j], abs_level );
}
{
ALIGNED_ARRAY_16( dctcoef, orig_coefs, [64] );
ALIGNED_ARRAY_16( dctcoef, quant_coefs, [64] );
- const uint16_t *coef_weight1 = num_coefs == 64 ? x264_dct8_weight_tab : x264_dct4_weight_tab;
- const uint16_t *coef_weight2 = num_coefs == 64 ? x264_dct8_weight2_tab : x264_dct4_weight2_tab;
+ const uint32_t *coef_weight1 = num_coefs == 64 ? x264_dct8_weight_tab : x264_dct4_weight_tab;
+ const uint32_t *coef_weight2 = num_coefs == 64 ? x264_dct8_weight2_tab : x264_dct4_weight2_tab;
const int b_interlaced = MB_INTERLACED;
uint8_t *cabac_state_sig = &h->cabac.state[ significant_coeff_flag_offset[b_interlaced][ctx_block_cat] ];
uint8_t *cabac_state_last = &h->cabac.state[ last_coeff_flag_offset[b_interlaced][ctx_block_cat] ];
* subtracting from one score is equivalent to adding to the rest. */\
if( !ctx_hi )\
{\
- int sigindex = !dc && num_coefs == 64 ? significant_coeff_flag_offset_8x8[b_interlaced][i] :\
- b_chroma && dc && num_coefs == 8 ? coeff_flag_offset_chroma_422_dc[i] : i;\
+ int sigindex = !dc && num_coefs == 64 ? x264_significant_coeff_flag_offset_8x8[b_interlaced][i] :\
+ b_chroma && dc && num_coefs == 8 ? x264_coeff_flag_offset_chroma_422_dc[i] : i;\
uint64_t cost_sig0 = x264_cabac_size_decision_noup2( &cabac_state_sig[sigindex], 0 )\
* (uint64_t)lambda2 >> ( CABAC_SIZE_BITS - LAMBDA_BITS );\
nodes_cur[0].score -= cost_sig0;\
\
if( i < num_coefs-1 || ctx_hi )\
{\
- int sigindex = !dc && num_coefs == 64 ? significant_coeff_flag_offset_8x8[b_interlaced][i] :\
- b_chroma && dc && num_coefs == 8 ? coeff_flag_offset_chroma_422_dc[i] : i;\
- int lastindex = !dc && num_coefs == 64 ? last_coeff_flag_offset_8x8[i] :\
- b_chroma && dc && num_coefs == 8 ? coeff_flag_offset_chroma_422_dc[i] : i;\
+ int sigindex = !dc && num_coefs == 64 ? x264_significant_coeff_flag_offset_8x8[b_interlaced][i] :\
+ b_chroma && dc && num_coefs == 8 ? x264_coeff_flag_offset_chroma_422_dc[i] : i;\
+ int lastindex = !dc && num_coefs == 64 ? x264_last_coeff_flag_offset_8x8[i] :\
+ b_chroma && dc && num_coefs == 8 ? x264_coeff_flag_offset_chroma_422_dc[i] : i;\
cost_siglast[0] = x264_cabac_size_decision_noup2( &cabac_state_sig[sigindex], 0 );\
int cost_sig1 = x264_cabac_size_decision_noup2( &cabac_state_sig[sigindex], 1 );\
cost_siglast[1] = x264_cabac_size_decision_noup2( &cabac_state_last[lastindex], 0 ) + cost_sig1;\
{
ALIGNED_16( dctcoef quant_coefs[2][16] );
ALIGNED_16( dctcoef coefs[16] ) = {0};
- const uint16_t *coef_weight1 = b_8x8 ? x264_dct8_weight_tab : x264_dct4_weight_tab;
- const uint16_t *coef_weight2 = b_8x8 ? x264_dct8_weight2_tab : x264_dct4_weight2_tab;
+ const uint32_t *coef_weight1 = b_8x8 ? x264_dct8_weight_tab : x264_dct4_weight_tab;
+ const uint32_t *coef_weight2 = b_8x8 ? x264_dct8_weight2_tab : x264_dct4_weight2_tab;
int delta_distortion[16];
int64_t score = 1ULL<<62;
int i, j;