]> granicus.if.org Git - libvpx/commitdiff
New vp8_decode_mb_tokens()
authorScott LaVarnway <slavarnway@google.com>
Fri, 23 Mar 2012 19:50:08 +0000 (15:50 -0400)
committerScott LaVarnway <slavarnway@google.com>
Fri, 23 Mar 2012 19:50:08 +0000 (15:50 -0400)
This new vp8_decode_mb_tokens() uses a modified version of
WebP's GetCoeffs function.  For now, the dequant does not
occur in GetCoeffs.
Tests showed performance improvements up to 2.5% depending
on material.

Change-Id: Ia24d78627e16ffee5eb4d777ee8379a9270f07c5

vp8/decoder/detokenize.c

index ba94c58bbb80b6a4b4e49fedf29631612f707890..c5752ee0b188e82ef9e4b5a2eef810c44f71078e 100644 (file)
 #include "vpx_ports/mem.h"
 #include "detokenize.h"
 
-#define BOOL_DATA unsigned char
-
-#define OCB_X PREV_COEF_CONTEXTS * ENTROPY_NODES
-DECLARE_ALIGNED(16, static const unsigned char, coef_bands_x[16]) =
-{
-    0 * OCB_X, 1 * OCB_X, 2 * OCB_X, 3 * OCB_X,
-    6 * OCB_X, 4 * OCB_X, 5 * OCB_X, 6 * OCB_X,
-    6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X,
-    6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 7 * OCB_X
-};
-#define EOB_CONTEXT_NODE            0
-#define ZERO_CONTEXT_NODE           1
-#define ONE_CONTEXT_NODE            2
-#define LOW_VAL_CONTEXT_NODE        3
-#define TWO_CONTEXT_NODE            4
-#define THREE_CONTEXT_NODE          5
-#define HIGH_LOW_CONTEXT_NODE       6
-#define CAT_ONE_CONTEXT_NODE        7
-#define CAT_THREEFOUR_CONTEXT_NODE  8
-#define CAT_THREE_CONTEXT_NODE      9
-#define CAT_FIVE_CONTEXT_NODE       10
-
-#define CAT1_MIN_VAL    5
-#define CAT2_MIN_VAL    7
-#define CAT3_MIN_VAL   11
-#define CAT4_MIN_VAL   19
-#define CAT5_MIN_VAL   35
-#define CAT6_MIN_VAL   67
-
-#define CAT1_PROB0    159
-#define CAT2_PROB0    145
-#define CAT2_PROB1    165
-
-#define CAT3_PROB0 140
-#define CAT3_PROB1 148
-#define CAT3_PROB2 173
-
-#define CAT4_PROB0 135
-#define CAT4_PROB1 140
-#define CAT4_PROB2 155
-#define CAT4_PROB3 176
-
-#define CAT5_PROB0 130
-#define CAT5_PROB1 134
-#define CAT5_PROB2 141
-#define CAT5_PROB3 157
-#define CAT5_PROB4 180
-
-static const unsigned char cat6_prob[12] =
-{ 129, 130, 133, 140, 153, 177, 196, 230, 243, 254, 254, 0 };
-
-
 void vp8_reset_mb_tokens_context(MACROBLOCKD *x)
 {
     /* Clear entropy contexts for Y2 blocks */
@@ -83,302 +31,216 @@ void vp8_reset_mb_tokens_context(MACROBLOCKD *x)
     }
 }
 
-DECLARE_ALIGNED(16, extern const unsigned char, vp8_norm[256]);
-#define FILL \
-    if(count < 0) \
-        VP8DX_BOOL_DECODER_FILL(count, value, bufptr, bufend);
+/*
+    ------------------------------------------------------------------------------
+    Residual decoding (Paragraph 13.2 / 13.3)
+*/
+static const uint8_t kBands[16 + 1] = {
+  0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7,
+  0  /* extra entry as sentinel */
+};
 
-#define NORMALIZE \
-    /*if(range < 0x80)*/                            \
-    { \
-        shift = vp8_norm[range]; \
-        range <<= shift; \
-        value <<= shift; \
-        count -= shift; \
-    }
+static const uint8_t kCat3[] = { 173, 148, 140, 0 };
+static const uint8_t kCat4[] = { 176, 155, 140, 135, 0 };
+static const uint8_t kCat5[] = { 180, 157, 141, 134, 130, 0 };
+static const uint8_t kCat6[] =
+  { 254, 254, 243, 230, 196, 177, 153, 140, 133, 130, 129, 0 };
+static const uint8_t* const kCat3456[] = { kCat3, kCat4, kCat5, kCat6 };
+static const uint8_t kZigzag[16] = {
+  0, 1, 4, 8,  5, 2, 3, 6,  9, 12, 13, 10,  7, 11, 14, 15
+};
 
-#define DECODE_AND_APPLYSIGN(value_to_sign) \
-    split = (range + 1) >> 1; \
-    bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8); \
-    FILL \
-    if ( value < bigsplit ) \
-    { \
-        range = split; \
-        v= value_to_sign; \
-    } \
-    else \
-    { \
-        range = range-split; \
-        value = value-bigsplit; \
-        v = -value_to_sign; \
-    } \
-    range +=range;                   \
-    value +=value;                   \
-    count--;
+#define VP8GetBit vp8dx_decode_bool
+#define NUM_PROBAS  11
+#define NUM_CTX  3
 
-#define DECODE_AND_BRANCH_IF_ZERO(probability,branch) \
-    { \
-        split = 1 +  ((( probability*(range-1) ) )>> 8); \
-        bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8); \
-        FILL \
-        if ( value < bigsplit ) \
-        { \
-            range = split; \
-            NORMALIZE \
-            goto branch; \
-        } \
-        value -= bigsplit; \
-        range = range - split; \
-        NORMALIZE \
-    }
+typedef const uint8_t (*ProbaArray)[NUM_CTX][NUM_PROBAS];  // for const-casting
 
-#define DECODE_AND_LOOP_IF_ZERO(probability,branch) \
-    { \
-        split = 1 + ((( probability*(range-1) ) ) >> 8); \
-        bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8); \
-        FILL \
-        if ( value < bigsplit ) \
-        { \
-            range = split; \
-            NORMALIZE \
-            Prob = coef_probs; \
-            if(c<15) {\
-            ++c; \
-            Prob += coef_bands_x[c]; \
-            goto branch; \
-            } goto BLOCK_FINISHED; /*for malformed input */\
-        } \
-        value -= bigsplit; \
-        range = range - split; \
-        NORMALIZE \
-    }
+static int GetSigned(BOOL_DECODER *br, int value_to_sign)
+{
+    int split = (br->range + 1) >> 1;
+    VP8_BD_VALUE bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8);
+    int v;
 
-#define DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val) \
-    DECODE_AND_APPLYSIGN(val) \
-    Prob = coef_probs + (ENTROPY_NODES*2); \
-    if(c < 15){\
-        qcoeff_ptr [ scan[c] ] = (int16_t) v; \
-        ++c; \
-        goto DO_WHILE; }\
-    qcoeff_ptr [ 15 ] = (int16_t) v; \
-    goto BLOCK_FINISHED;
+    if(br->count < 0)
+        vp8dx_bool_decoder_fill(br);
 
+    if ( br->value < bigsplit )
+    {
+        br->range = split;
+        v= value_to_sign;
+    }
+    else
+    {
+        br->range = br->range-split;
+        br->value = br->value-bigsplit;
+        v = -value_to_sign;
+    }
+    br->range +=br->range;
+    br->value +=br->value;
+    br->count--;
 
-#define DECODE_EXTRABIT_AND_ADJUST_VAL(prob, bits_count)\
-    split = 1 +  (((range-1) * prob) >> 8); \
-    bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8); \
-    FILL \
-    if(value >= bigsplit)\
-    {\
-        range = range-split;\
-        value = value-bigsplit;\
-        val += ((uint16_t)1<<bits_count);\
-    }\
-    else\
-    {\
-        range = split;\
-    }\
-    NORMALIZE
+    return v;
+}
+/*
+   Returns the position of the last non-zero coeff plus one
+   (and 0 if there's no coeff at all)
+*/
+static int GetCoeffs(BOOL_DECODER *br, ProbaArray prob,
+                     int ctx, int n, int16_t* out)
+{
+    const uint8_t* p = prob[n][ctx];
+    if (!VP8GetBit(br, p[0]))
+    {   /* first EOB is more a 'CBP' bit. */
+        return 0;
+    }
+    while (1)
+    {
+        ++n;
+        if (!VP8GetBit(br, p[1]))
+        {
+            p = prob[kBands[n]][0];
+        }
+        else
+        {  /* non zero coeff */
+            int v, j;
+            if (!VP8GetBit(br, p[2]))
+            {
+                p = prob[kBands[n]][1];
+                v = 1;
+            }
+            else
+            {
+                if (!VP8GetBit(br, p[3]))
+                {
+                    if (!VP8GetBit(br, p[4]))
+                    {
+                        v = 2;
+                    }
+                    else
+                    {
+                        v = 3 + VP8GetBit(br, p[5]);
+                    }
+                }
+                else
+                {
+                    if (!VP8GetBit(br, p[6]))
+                    {
+                        if (!VP8GetBit(br, p[7]))
+                        {
+                            v = 5 + VP8GetBit(br, 159);
+                        } else
+                        {
+                            v = 7 + 2 * VP8GetBit(br, 165);
+                            v += VP8GetBit(br, 145);
+                        }
+                    }
+                    else
+                    {
+                        const uint8_t* tab;
+                        const int bit1 = VP8GetBit(br, p[8]);
+                        const int bit0 = VP8GetBit(br, p[9 + bit1]);
+                        const int cat = 2 * bit1 + bit0;
+                        v = 0;
+                        for (tab = kCat3456[cat]; *tab; ++tab)
+                        {
+                            v += v + VP8GetBit(br, *tab);
+                        }
+                        v += 3 + (8 << cat);
+                    }
+                }
+                p = prob[kBands[n]][2];
+            }
+            j = kZigzag[n - 1];
+
+            out[j] = GetSigned(br, v);
+
+            if (n == 16 || !VP8GetBit(br, p[0]))
+            {   /* EOB */
+                return n;
+            }
+        }
+        if (n == 16)
+        {
+            return 16;
+        }
+    }
+}
 
 int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x)
 {
-    ENTROPY_CONTEXT *A = (ENTROPY_CONTEXT *)x->above_context;
-    ENTROPY_CONTEXT *L = (ENTROPY_CONTEXT *)x->left_context;
-    const FRAME_CONTEXT * const fc = &dx->common.fc;
-
     BOOL_DECODER *bc = x->current_bc;
-
+    const FRAME_CONTEXT * const fc = &dx->common.fc;
     char *eobs = x->eobs;
 
-    ENTROPY_CONTEXT *a;
-    ENTROPY_CONTEXT *l;
     int i;
-
+    int nonzeros;
     int eobtotal = 0;
 
-    register int count;
-
-    const BOOL_DATA *bufptr;
-    const BOOL_DATA *bufend;
-    register unsigned int range;
-    VP8_BD_VALUE value;
-    const int *scan;
-    register unsigned int shift;
-    unsigned int split;
-    VP8_BD_VALUE bigsplit;
     short *qcoeff_ptr;
+    ProbaArray coef_probs;
+    ENTROPY_CONTEXT *a_ctx = ((ENTROPY_CONTEXT *)x->above_context);
+    ENTROPY_CONTEXT *l_ctx = ((ENTROPY_CONTEXT *)x->left_context);
+    ENTROPY_CONTEXT *a;
+    ENTROPY_CONTEXT *l;
+    int skip_dc = 0;
 
-    const vp8_prob *coef_probs;
-    int stop;
-    int val, bits_count;
-    int c;
-    int v;
-    const vp8_prob *Prob;
-    int start_coeff;
-
-
-    i = 0;
-    stop = 16;
-
-    scan = vp8_default_zig_zag1d;
     qcoeff_ptr = &x->qcoeff[0];
-    coef_probs = fc->coef_probs [3] [ 0 ] [0];
 
     if (x->mode_info_context->mbmi.mode != B_PRED &&
         x->mode_info_context->mbmi.mode != SPLITMV)
     {
-        i = 24;
-        stop = 24;
-        qcoeff_ptr += 24*16;
-        eobtotal -= 16;
-        coef_probs = fc->coef_probs [1] [ 0 ] [0];
-    }
-
-    bufend  = bc->user_buffer_end;
-    bufptr  = bc->user_buffer;
-    value   = bc->value;
-    count   = bc->count;
-    range   = bc->range;
-
-    start_coeff = 0;
-
-BLOCK_LOOP:
-    a = A + vp8_block2above[i];
-    l = L + vp8_block2left[i];
-
-    c = start_coeff;
+        a = a_ctx + 8;
+        l = l_ctx + 8;
 
-    VP8_COMBINEENTROPYCONTEXTS(v, *a, *l);
+        coef_probs = fc->coef_probs [1];
 
-    Prob = coef_probs;
-    Prob += v * ENTROPY_NODES;
-    *a = *l = 0;
+        nonzeros = GetCoeffs(bc, coef_probs, (*a + *l), 0, qcoeff_ptr + 24 * 16);
+        *a = *l = (nonzeros > 0);
 
-DO_WHILE:
-    Prob += coef_bands_x[c];
-    DECODE_AND_BRANCH_IF_ZERO(Prob[EOB_CONTEXT_NODE], BLOCK_FINISHED);
-    *a = *l = 1;
+        eobs[24] = nonzeros;
+        eobtotal += nonzeros - 16;
 
-CHECK_0_:
-    DECODE_AND_LOOP_IF_ZERO(Prob[ZERO_CONTEXT_NODE], CHECK_0_);
-    DECODE_AND_BRANCH_IF_ZERO(Prob[ONE_CONTEXT_NODE], ONE_CONTEXT_NODE_0_);
-    DECODE_AND_BRANCH_IF_ZERO(Prob[LOW_VAL_CONTEXT_NODE],
-                              LOW_VAL_CONTEXT_NODE_0_);
-    DECODE_AND_BRANCH_IF_ZERO(Prob[HIGH_LOW_CONTEXT_NODE],
-                              HIGH_LOW_CONTEXT_NODE_0_);
-    DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_THREEFOUR_CONTEXT_NODE],
-                              CAT_THREEFOUR_CONTEXT_NODE_0_);
-    DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_FIVE_CONTEXT_NODE],
-                              CAT_FIVE_CONTEXT_NODE_0_);
-
-    val = CAT6_MIN_VAL;
-    bits_count = 10;
-
-    do
+        coef_probs = fc->coef_probs [0];
+        skip_dc = 1;
+    }
+    else
     {
-        DECODE_EXTRABIT_AND_ADJUST_VAL(cat6_prob[bits_count], bits_count);
-        bits_count -- ;
+        coef_probs = fc->coef_probs [3];
+        skip_dc = 0;
     }
-    while (bits_count >= 0);
 
-    DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val);
-
-CAT_FIVE_CONTEXT_NODE_0_:
-    val = CAT5_MIN_VAL;
-    DECODE_EXTRABIT_AND_ADJUST_VAL(CAT5_PROB4, 4);
-    DECODE_EXTRABIT_AND_ADJUST_VAL(CAT5_PROB3, 3);
-    DECODE_EXTRABIT_AND_ADJUST_VAL(CAT5_PROB2, 2);
-    DECODE_EXTRABIT_AND_ADJUST_VAL(CAT5_PROB1, 1);
-    DECODE_EXTRABIT_AND_ADJUST_VAL(CAT5_PROB0, 0);
-    DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val);
-
-CAT_THREEFOUR_CONTEXT_NODE_0_:
-    DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_THREE_CONTEXT_NODE],
-                              CAT_THREE_CONTEXT_NODE_0_);
-    val = CAT4_MIN_VAL;
-    DECODE_EXTRABIT_AND_ADJUST_VAL(CAT4_PROB3, 3);
-    DECODE_EXTRABIT_AND_ADJUST_VAL(CAT4_PROB2, 2);
-    DECODE_EXTRABIT_AND_ADJUST_VAL(CAT4_PROB1, 1);
-    DECODE_EXTRABIT_AND_ADJUST_VAL(CAT4_PROB0, 0);
-    DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val);
-
-CAT_THREE_CONTEXT_NODE_0_:
-    val = CAT3_MIN_VAL;
-    DECODE_EXTRABIT_AND_ADJUST_VAL(CAT3_PROB2, 2);
-    DECODE_EXTRABIT_AND_ADJUST_VAL(CAT3_PROB1, 1);
-    DECODE_EXTRABIT_AND_ADJUST_VAL(CAT3_PROB0, 0);
-    DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val);
-
-HIGH_LOW_CONTEXT_NODE_0_:
-    DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_ONE_CONTEXT_NODE],
-                              CAT_ONE_CONTEXT_NODE_0_);
-
-    val = CAT2_MIN_VAL;
-    DECODE_EXTRABIT_AND_ADJUST_VAL(CAT2_PROB1, 1);
-    DECODE_EXTRABIT_AND_ADJUST_VAL(CAT2_PROB0, 0);
-    DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val);
-
-CAT_ONE_CONTEXT_NODE_0_:
-    val = CAT1_MIN_VAL;
-    DECODE_EXTRABIT_AND_ADJUST_VAL(CAT1_PROB0, 0);
-    DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val);
-
-LOW_VAL_CONTEXT_NODE_0_:
-    DECODE_AND_BRANCH_IF_ZERO(Prob[TWO_CONTEXT_NODE], TWO_CONTEXT_NODE_0_);
-    DECODE_AND_BRANCH_IF_ZERO(Prob[THREE_CONTEXT_NODE], THREE_CONTEXT_NODE_0_);
-    DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(4);
-
-THREE_CONTEXT_NODE_0_:
-    DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(3);
-
-TWO_CONTEXT_NODE_0_:
-    DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(2);
-
-ONE_CONTEXT_NODE_0_:
-    DECODE_AND_APPLYSIGN(1);
-    Prob = coef_probs + ENTROPY_NODES;
-
-    if (c < 15)
+    for (i = 0; i < 16; ++i)
     {
-        qcoeff_ptr [ scan[c] ] = (int16_t) v;
-        ++c;
-        goto DO_WHILE;
-    }
+        a = a_ctx + (i&3);
+        l = l_ctx + ((i&0xc)>>2);
 
-    qcoeff_ptr [ 15 ] = (int16_t) v;
-BLOCK_FINISHED:
-    eobs[i] = c;
-    eobtotal += c;
-    qcoeff_ptr += 16;
+        nonzeros = GetCoeffs(bc, coef_probs, (*a + *l), skip_dc, qcoeff_ptr);
+        *a = *l = (nonzeros > 0);
 
-    i++;
+        nonzeros += skip_dc;
+        eobs[i] = nonzeros;
+        eobtotal += nonzeros;
+        qcoeff_ptr += 16;
+    }
 
-    if (i < stop)
-        goto BLOCK_LOOP;
+    coef_probs = fc->coef_probs [2];
 
-    if (i == 25)
+    a_ctx += 4;
+    l_ctx += 4;
+    for (i = 16; i < 24; ++i)
     {
-        start_coeff = 1;
-        i = 0;
-        stop = 16;
-        coef_probs = fc->coef_probs [0] [ 0 ] [0];
-        qcoeff_ptr -= (24*16 + 16);
-        goto BLOCK_LOOP;
-    }
+        a = a_ctx + ((i > 19)<<1) + (i&1);
+        l = l_ctx + ((i > 19)<<1) + ((i&3)>1);
 
-    if (i == 16)
-    {
-        start_coeff = 0;
-        coef_probs = fc->coef_probs [2] [ 0 ] [0];
-        stop = 24;
-        goto BLOCK_LOOP;
+        nonzeros = GetCoeffs(bc, coef_probs, (*a + *l), 0, qcoeff_ptr);
+        *a = *l = (nonzeros > 0);
+
+        eobs[i] = nonzeros;
+        eobtotal += nonzeros;
+        qcoeff_ptr += 16;
     }
 
-    FILL
-    bc->user_buffer = bufptr;
-    bc->value = value;
-    bc->count = count;
-    bc->range = range;
     return eobtotal;
-
 }
+