From: Adrian Grange <agrange@google.com>
Date: Thu, 2 Feb 2012 17:04:40 +0000 (-0800)
Subject: Added encoding in Superblock Order
X-Git-Tag: v1.3.0~1217^2~380^2~79
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=5d0b5a17d9a73b3f4588ae95fbd4b18e7af79f35;p=libvpx

Added encoding in Superblock Order

As a precursor to encoding 32x32 blocks this cl adds the
ability to encode the frame superblock (=32x32 block) at
a time. Within a SB the 4 indiviual MBs are encoded in
raster-order (NW,NE,SW,SE).

This functionality is added as an experiment which can be
enabled by ispecifying --enable-superblocks in the
command line specified to configure (CONFIG_SUPERBLOCKS
macro in the code).

To make this work I had to disable the two intra
prediction modes that use data from the top-right of the
MB.

On the tests that I have run the results produce
almost exactly the same PSNRs & SSIMs with a very
slightly higher average data rate (and slightly higher
data rate than just disabling the two intra modes in
the original code).

NOTE: This will also break the multi-threaded code.

This replaces the abandoned change:
Iebebe0d1a50ce8c15c79862c537b765a2f67e162

Change-Id: I1bc1a00f236abc1a373c7210d756e25f970fcad8
---

diff --git a/configure b/configure
index cdb5f4d1f..f1ccaa19b 100755
--- a/configure
+++ b/configure
@@ -229,6 +229,7 @@ EXPERIMENT_LIST="
     compred
     newlpf
     enhanced_interp
+    superblocks
 "
 CONFIG_LIST="
     external_build
diff --git a/vp8/decoder/decodemv.c b/vp8/decoder/decodemv.c
index 73a20a2f0..1dee3beea 100644
--- a/vp8/decoder/decodemv.c
+++ b/vp8/decoder/decodemv.c
@@ -58,7 +58,7 @@ static int vp8_read_uv_mode(vp8_reader *bc, const vp8_prob *p)
     return i;
 }
 
-// This function reads the current macro block's segnment id to from bitstream
+// This function reads the current macro block's segnent id from the bitstream
 // It should only be called if a segment map update is indicated.
 static void vp8_read_mb_segid(vp8_reader *r, MB_MODE_INFO *mi, MACROBLOCKD *x)
 {
@@ -923,6 +923,108 @@ static void read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
 
 }
 
+#if CONFIG_SUPERBLOCKS
+void vp8_decode_mode_mvs(VP8D_COMP *pbi)
+{
+    int i;
+    VP8_COMMON *cm = &pbi->common;
+    int sb_row, sb_col;
+    int sb_rows = (cm->mb_rows + 1)>>1;
+    int sb_cols = (cm->mb_cols + 1)>>1;
+    MODE_INFO *mi = cm->mi;
+    int row_delta[4] = {-1,  0, +1,  0};
+    int col_delta[4] = {+1, +1, -1, +1};
+
+#if CONFIG_NEWNEAR
+    MODE_INFO *prev_mi = cm->prev_mi;
+#endif
+
+    mb_mode_mv_init(pbi);
+
+#if CONFIG_QIMODE
+    if(cm->frame_type==KEY_FRAME && !cm->kf_ymode_probs_update)
+    {
+        cm->kf_ymode_probs_index = vp8_read_literal(&pbi->bc, 3);
+    }
+#endif
+
+    for (sb_row=0; sb_row<sb_rows; sb_row++)
+    {
+        int mb_col = -col_delta[0];
+        int mb_row = (sb_row <<1)-row_delta[0];
+
+        for (sb_col=0; sb_col<sb_cols; sb_col++)
+        {
+            for ( i=0; i<4; i++ )
+            {
+                int mb_to_top_edge;
+                int mb_to_bottom_edge;
+
+#if CONFIG_ERROR_CONCEALMENT
+                int mb_num;
+#endif
+                int offset_extended = row_delta[(i+1) & 0x3]
+                                    * cm->mode_info_stride + col_delta[(i+1) & 0x3];
+                int dy = row_delta[i];
+                int dx = col_delta[i];
+
+                mb_row += dy;
+                mb_col += dx;
+
+                if ((mb_row >= cm->mb_rows) || (mb_col >= cm->mb_cols))
+                {
+#if CONFIG_NEWNEAR
+                    prev_mi += offset_extended;
+#endif
+                    mi += offset_extended;       /* next macroblock */
+                    continue;
+                }
+
+                pbi->mb.mb_to_top_edge =
+                mb_to_top_edge = -((mb_row * 16)) << 3;
+                mb_to_top_edge -= LEFT_TOP_MARGIN;
+
+                pbi->mb.mb_to_bottom_edge =
+                mb_to_bottom_edge = ((pbi->common.mb_rows - 1 - mb_row) * 16) << 3;
+                mb_to_bottom_edge += RIGHT_BOTTOM_MARGIN;
+
+#if CONFIG_ERROR_CONCEALMENT
+                mb_num = mb_row * pbi->common.mb_cols + mb_col;
+#endif
+                /*read_mb_modes_mv(pbi, cm->mode_info_context, &cm->mode_info_context->mbmi, mb_row, mb_col);*/
+                if(pbi->common.frame_type == KEY_FRAME)
+                    vp8_kfread_modes(pbi, mi, mb_row, mb_col);
+                else
+                    read_mb_modes_mv(pbi, mi, &mi->mbmi,
+#if CONFIG_NEWNEAR
+                    prev_mi,
+#endif
+                    mb_row, mb_col);
+
+#if CONFIG_ERROR_CONCEALMENT
+                /* look for corruption. set mvs_corrupt_from_mb to the current
+                 * mb_num if the frame is corrupt from this macroblock. */
+                if (vp8dx_bool_error(&pbi->bc) && mb_num < pbi->mvs_corrupt_from_mb)
+                {
+                    pbi->mvs_corrupt_from_mb = mb_num;
+                    /* no need to continue since the partition is corrupt from
+                     * here on.
+                     */
+                    return;
+                }
+#endif
+
+#if CONFIG_NEWNEAR
+                prev_mi += offset_extended;
+#endif
+                mi += offset_extended;       /* next macroblock */
+            }
+        }
+
+        mi += cm->mode_info_stride + (1 - (cm->mb_cols & 0x1));
+    }
+}
+#else
 void vp8_decode_mode_mvs(VP8D_COMP *pbi)
 {
     MODE_INFO *mi = pbi->common.mi;
@@ -1036,3 +1138,5 @@ void vp8_decode_mode_mvs(VP8D_COMP *pbi)
 
 
 }
+#endif /* CONFIG_SUPERBLOCKS */
+
diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c
index 8224316c4..7cf5eae38 100644
--- a/vp8/decoder/decodframe.c
+++ b/vp8/decoder/decodframe.c
@@ -248,7 +248,9 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
                 RECON_INVOKE(&pbi->common.rtcd.recon,
                     build_intra_predictors_mby)(xd);
             } else {
+#if !CONFIG_SUPERBLOCKS
                 vp8_intra_prediction_down_copy(xd);
+#endif
             }
         }
     }
@@ -464,8 +466,163 @@ static int get_delta_q(vp8_reader *bc, int prev, int *q_update)
 FILE *vpxlog = 0;
 #endif
 
+#if CONFIG_SUPERBLOCKS
+static void
+decode_sb_row(VP8D_COMP *pbi, VP8_COMMON *pc, int mbrow, MACROBLOCKD *xd)
+{
+    int i;
+    int recon_yoffset, recon_uvoffset;
+    int mb_row, mb_col;
+    int ref_fb_idx = pc->lst_fb_idx;
+    int dst_fb_idx = pc->new_fb_idx;
+    int recon_y_stride = pc->yv12_fb[ref_fb_idx].y_stride;
+    int recon_uv_stride = pc->yv12_fb[ref_fb_idx].uv_stride;
+    int sb_col;
+    int row_delta[4] = { 0, +1,  0, -1};
+    int col_delta[4] = {+1, -1, +1, +1};
+    int sb_cols = (pc->mb_cols + 1)>>1;
+    ENTROPY_CONTEXT_PLANES left_context[2];
+
+    vpx_memset(left_context, 0, sizeof(left_context));
+
+    mb_row = mbrow;
+    mb_col = 0;
+
+    for (sb_col=0; sb_col<sb_cols; sb_col++)
+    {
+        for ( i=0; i<4; i++ )
+        {
+            int dy = row_delta[i];
+            int dx = col_delta[i];
+            int offset_extended = dy * xd->mode_info_stride + dx;
+
+            if ((mb_row >= pc->mb_rows) || (mb_col >= pc->mb_cols))
+            {
+                // Skip on to the next MB
+                mb_row += dy;
+                mb_col += dx;
+                xd->mode_info_context += offset_extended;
+                continue;
+            }
+
+            // Copy in the appropriate left context
+            vpx_memcpy (&pc->left_context,
+                        &left_context[(i>>1) & 0x1],
+                        sizeof(ENTROPY_CONTEXT_PLANES));
+
+            // reset above block coeffs
+            xd->above_context = pc->above_context + mb_col;
+
+            /* Distance of Mb to the various image edges.
+             * These are specified to 8th pel as they are always compared to
+             * values that are in 1/8th pel units
+             */
+            xd->mb_to_top_edge = -((mb_row * 16)) << 3;
+            xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3;
+
+            xd->mb_to_left_edge = -((mb_col * 16) << 3);
+            xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
+
+            xd->up_available = (mb_row != 0);
+            xd->left_available = (mb_col != 0);
+
+#if CONFIG_ERROR_CONCEALMENT
+            {
+                int corrupt_residual = (!pbi->independent_partitions &&
+                                       pbi->frame_corrupt_residual) ||
+                                       vp8dx_bool_error(xd->current_bc);
+                if (pbi->ec_active &&
+                    xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME &&
+                    corrupt_residual)
+                {
+                    /* We have an intra block with corrupt coefficients, better
+                     * to conceal with an inter block. Interpolate MVs from
+                     * neighboring MBs.
+                     *
+                     * Note that for the first mb with corrupt residual in a
+                     * frame, we might not discover that before decoding the
+                     * residual. That happens after this check, and therefore
+                     * no inter concealment will be done.
+                     */
+                    vp8_interpolate_motion(xd,
+                                           mb_row, mb_col,
+                                           pc->mb_rows, pc->mb_cols,
+                                           pc->mode_info_stride);
+                }
+            }
+#endif
+
+            update_blockd_bmi(xd);
+
+            recon_yoffset = (mb_row * recon_y_stride * 16) + (mb_col * 16);
+            recon_uvoffset = (mb_row * recon_uv_stride * 8) + (mb_col * 8);
 
+            xd->dst.y_buffer = pc->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
+            xd->dst.u_buffer = pc->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
+            xd->dst.v_buffer = pc->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
 
+            /* Select the appropriate reference frame for this MB */
+            if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)
+                ref_fb_idx = pc->lst_fb_idx;
+            else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME)
+                ref_fb_idx = pc->gld_fb_idx;
+            else
+                ref_fb_idx = pc->alt_fb_idx;
+
+            xd->pre.y_buffer = pc->yv12_fb[ref_fb_idx].y_buffer +recon_yoffset;
+            xd->pre.u_buffer = pc->yv12_fb[ref_fb_idx].u_buffer +recon_uvoffset;
+            xd->pre.v_buffer = pc->yv12_fb[ref_fb_idx].v_buffer +recon_uvoffset;
+
+#if CONFIG_DUALPRED
+            if (xd->mode_info_context->mbmi.second_ref_frame)
+            {
+                int second_ref_fb_idx;
+
+                /* Select the appropriate reference frame for this MB */
+                if (xd->mode_info_context->mbmi.second_ref_frame == LAST_FRAME)
+                    second_ref_fb_idx = pc->lst_fb_idx;
+                else if (xd->mode_info_context->mbmi.second_ref_frame ==
+                                                                   GOLDEN_FRAME)
+                    second_ref_fb_idx = pc->gld_fb_idx;
+                else
+                    second_ref_fb_idx = pc->alt_fb_idx;
+
+                xd->second_pre.y_buffer =
+                       pc->yv12_fb[second_ref_fb_idx].y_buffer + recon_yoffset;
+                xd->second_pre.u_buffer =
+                       pc->yv12_fb[second_ref_fb_idx].u_buffer + recon_uvoffset;
+                xd->second_pre.v_buffer =
+                       pc->yv12_fb[second_ref_fb_idx].v_buffer + recon_uvoffset;
+            }
+#endif /* CONFIG_DUALPRED */
+
+            if (xd->mode_info_context->mbmi.ref_frame != INTRA_FRAME)
+            {
+                /* propagate errors from reference frames */
+                xd->corrupted |= pc->yv12_fb[ref_fb_idx].corrupted;
+            }
+
+            decode_macroblock(pbi, xd, mb_row * pc->mb_cols + mb_col);
+
+            /* check if the boolean decoder has suffered an error */
+            xd->corrupted |= vp8dx_bool_error(xd->current_bc);
+
+            // Copy in the appropriate left context
+            vpx_memcpy (&left_context[(i>>1) & 0x1],
+                        &pc->left_context,
+                        sizeof(ENTROPY_CONTEXT_PLANES));
+
+            // skip to next MB
+            xd->mode_info_context += offset_extended;
+            mb_row += dy;
+            mb_col += dx;
+        }
+    }
+
+    /* skip prediction column */
+    xd->mode_info_context += 1 - (pc->mb_cols & 0x1) + xd->mode_info_stride;
+}
+#else
 static void
 decode_mb_row(VP8D_COMP *pbi, VP8_COMMON *pc, int mb_row, MACROBLOCKD *xd)
 {
@@ -591,7 +748,7 @@ decode_mb_row(VP8D_COMP *pbi, VP8_COMMON *pc, int mb_row, MACROBLOCKD *xd)
 
     ++xd->mode_info_context;      /* skip prediction column */
 }
-
+#endif // CONFIG_SUPERBLOCKS
 
 static unsigned int read_partition_size(const unsigned char *cx_size)
 {
@@ -1376,7 +1533,23 @@ int vp8_decode_frame(VP8D_COMP *pbi)
         int num_part = 1 << pc->multi_token_partition;
         pbi->frame_corrupt_residual = 0;
 
-        /* Decode the individual macro block */
+#if CONFIG_SUPERBLOCKS
+        /* Decode a row of super-blocks */
+        for (mb_row = 0; mb_row < pc->mb_rows; mb_row+=2)
+        {
+            if (num_part > 1)
+            {
+                xd->current_bc = & pbi->mbc[ibc];
+                ibc++;
+
+                if (ibc == num_part)
+                    ibc = 0;
+            }
+
+            decode_sb_row(pbi, pc, mb_row, xd);
+        }
+#else
+        /* Decode a row of macro blocks */
         for (mb_row = 0; mb_row < pc->mb_rows; mb_row++)
         {
 
@@ -1391,6 +1564,7 @@ int vp8_decode_frame(VP8D_COMP *pbi)
 
             decode_mb_row(pbi, pc, mb_row, xd);
         }
+#endif /* CONFIG_SUPERBLOCKS */
         corrupt_tokens |= xd->corrupted;
     }
 
@@ -1441,3 +1615,4 @@ int vp8_decode_frame(VP8D_COMP *pbi)
 
     return 0;
 }
+
diff --git a/vp8/encoder/bitstream.c b/vp8/encoder/bitstream.c
index 37bf515ea..2aee8249d 100644
--- a/vp8/encoder/bitstream.c
+++ b/vp8/encoder/bitstream.c
@@ -960,6 +960,422 @@ static void encode_ref_frame( vp8_writer *const w,
     }
 }
 
+#if CONFIG_SUPERBLOCKS
+static void pack_inter_mode_mvs(VP8_COMP *const cpi)
+{
+    VP8_COMMON *const pc = & cpi->common;
+    vp8_writer *const w = & cpi->bc;
+    const MV_CONTEXT *mvc = pc->fc.mvc;
+    MACROBLOCKD *xd = &cpi->mb.e_mbd;
+
+#if CONFIG_DUALPRED
+    int i;
+#endif
+    int pred_context;
+
+    const int *const rfct = cpi->count_mb_ref_frame_usage;
+    const int rf_intra = rfct[INTRA_FRAME];
+    const int rf_inter = rfct[LAST_FRAME] + rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME];
+    MODE_INFO *m = pc->mi;
+#if CONFIG_NEWNEAR
+    MODE_INFO *prev_m = pc->prev_mi;
+#endif
+
+    const int mis = pc->mode_info_stride;
+    int mb_row, mb_col;
+    int row, col;
+
+    int prob_skip_false = 0;
+#if CONFIG_DUALPRED
+    int prob_dual_pred[3];
+#endif /* CONFIG_DUALPRED */
+
+    // Values used in prediction model coding
+    vp8_prob pred_prob;
+    unsigned char prediction_flag;
+
+    int row_delta[4] = { 0, +1,  0, -1};
+    int col_delta[4] = {+1, -1, +1, +1};
+
+    cpi->mb.partition_info = cpi->mb.pi;
+
+    // Calculate the probabilities to be used to code the reference frame
+    // based on actual useage this frame
+//#if CONFIG_SEGFEATURES
+    pc->prob_intra_coded = (rf_intra + rf_inter)
+                            ? rf_intra * 255 / (rf_intra + rf_inter) : 1;
+
+    if (!pc->prob_intra_coded)
+        pc->prob_intra_coded = 1;
+
+    pc->prob_last_coded = rf_inter ? (rfct[LAST_FRAME] * 255) / rf_inter : 128;
+
+    if (!pc->prob_last_coded)
+        pc->prob_last_coded = 1;
+
+    pc->prob_gf_coded = (rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME])
+                    ? (rfct[GOLDEN_FRAME] * 255) / (rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME]) : 128;
+
+    if (!pc->prob_gf_coded)
+        pc->prob_gf_coded = 1;
+
+#if CONFIG_COMPRED
+    // Compute a modified set of probabilities to use when prediction of the
+    // reference frame fails
+    compute_mod_refprobs( pc );
+#endif
+
+#ifdef ENTROPY_STATS
+    active_section = 1;
+#endif
+
+    if (pc->mb_no_coeff_skip)
+    {
+        // Divide by 0 check. 0 case possible with segment features
+        if ( (cpi->skip_false_count + cpi->skip_true_count) )
+        {
+            prob_skip_false = cpi->skip_false_count * 256 /
+                              (cpi->skip_false_count + cpi->skip_true_count);
+
+            if (prob_skip_false <= 1)
+                prob_skip_false = 1;
+
+            if (prob_skip_false > 255)
+                prob_skip_false = 255;
+        }
+        else
+            prob_skip_false = 255;
+
+        cpi->prob_skip_false = prob_skip_false;
+        vp8_write_literal(w, prob_skip_false, 8);
+    }
+
+    vp8_write_literal(w, pc->prob_intra_coded, 8);
+    vp8_write_literal(w, pc->prob_last_coded, 8);
+    vp8_write_literal(w, pc->prob_gf_coded, 8);
+
+#if CONFIG_DUALPRED
+    if (cpi->common.dual_pred_mode == HYBRID_PREDICTION)
+    {
+        vp8_write(w, 1, 128);
+        vp8_write(w, 1, 128);
+        for (i = 0; i < 3; i++) {
+        if (cpi->single_pred_count[i] + cpi->dual_pred_count[i])
+        {
+            prob_dual_pred[i] = cpi->single_pred_count[i] * 256 /
+                        (cpi->single_pred_count[i] + cpi->dual_pred_count[i]);
+            if (prob_dual_pred[i] < 1)
+                prob_dual_pred[i] = 1;
+            else if (prob_dual_pred[i] > 255)
+                prob_dual_pred[i] = 255;
+        }
+        else
+        {
+            prob_dual_pred[i] = 128;
+        }
+        vp8_write_literal(w, prob_dual_pred[i], 8);
+        }
+    }
+    else if (cpi->common.dual_pred_mode == SINGLE_PREDICTION_ONLY)
+    {
+        vp8_write(w, 0, 128);
+    }
+    else /* dual prediction only */
+    {
+        vp8_write(w, 1, 128);
+        vp8_write(w, 0, 128);
+    }
+#endif /* CONFIG_DUALPRED */
+
+    update_mbintra_mode_probs(cpi);
+
+    vp8_write_mvprobs(cpi);
+
+    mb_row = 0;
+    for (row=0; row < pc->mb_rows; row += 2)
+    {
+        m = pc->mi + row * mis;
+
+        mb_col = 0;
+        for (col=0; col < pc->mb_cols; col += 2)
+        {
+            int i;
+
+            for (i=0; i<4; i++)
+            {
+                const MB_MODE_INFO *const mi = & m->mbmi;
+                const MV_REFERENCE_FRAME rf = mi->ref_frame;
+                const MB_PREDICTION_MODE mode = mi->mode;
+                const int segment_id = mi->segment_id;
+
+                int dy = row_delta[i];
+                int dx = col_delta[i];
+                int offset_extended = dy * mis + dx;
+
+                if ((mb_row >= pc->mb_rows) || (mb_col >= pc->mb_cols))
+                {
+                    mb_row += dy;
+                    mb_col += dx;
+                    m += offset_extended;
+                    cpi->mb.partition_info += offset_extended;
+                    continue;
+                }
+
+                // Distance of Mb to the various image edges.
+                // These specified to 8th pel as they are always compared to MV
+                // values that are in 1/8th pel units
+                xd->mb_to_left_edge = -((mb_col * 16) << 3);
+                xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
+                xd->mb_to_top_edge = -((mb_row * 16)) << 3;
+                xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3;
+
+                // Make sure the MacroBlockD mode info pointer is set correctly
+                xd->mode_info_context = m;
+#if CONFIG_NEWNEAR
+                xd->prev_mode_info_context = prev_m;
+#endif
+
+#ifdef ENTROPY_STATS
+                active_section = 9;
+#endif
+                if (cpi->mb.e_mbd.update_mb_segmentation_map)
+                {
+                    // Is temporal coding of the segment map enabled
+                    if (pc->temporal_update)
+                    {
+                        prediction_flag =
+                            get_pred_flag( xd, PRED_SEG_ID );
+                        pred_prob =
+                            get_pred_prob( pc, xd, PRED_SEG_ID);
+
+                        // Code the segment id prediction flag for this mb
+                        vp8_write( w, prediction_flag, pred_prob );
+
+                        // If the mbs segment id was not predicted code explicitly
+                        if (!prediction_flag)
+                            write_mb_segid(w, mi, &cpi->mb.e_mbd);
+                    }
+                    else
+                    {
+                        // Normal undpredicted coding
+                        write_mb_segid(w, mi, &cpi->mb.e_mbd);
+                    }
+                }
+
+//#if CONFIG_SEGFEATURES
+                if ( pc->mb_no_coeff_skip &&
+                     ( !segfeature_active( xd, segment_id, SEG_LVL_EOB ) ||
+                       ( get_segdata( xd, segment_id, SEG_LVL_EOB ) != 0 ) ) )
+                {
+                    vp8_encode_bool(w, mi->mb_skip_coeff, prob_skip_false);
+                }
+
+                // Encode the reference frame.
+                encode_ref_frame( w, pc, xd,
+                              segment_id, rf );
+
+                if (rf == INTRA_FRAME)
+                {
+#ifdef ENTROPY_STATS
+                    active_section = 6;
+#endif
+
+//#if CONFIG_SEGFEATURES
+                    if ( !segfeature_active( xd, segment_id, SEG_LVL_MODE ) )
+                        write_ymode(w, mode, pc->fc.ymode_prob);
+
+                    if (mode == B_PRED)
+                    {
+                        int j = 0;
+
+                        do
+                            write_bmode(w, m->bmi[j].as_mode, pc->fc.bmode_prob);
+                        while (++j < 16);
+                    }
+
+                    if(mode == I8X8_PRED)
+                    {
+                        write_i8x8_mode(w, m->bmi[0].as_mode, pc->i8x8_mode_prob);
+                        write_i8x8_mode(w, m->bmi[2].as_mode, pc->i8x8_mode_prob);
+                        write_i8x8_mode(w, m->bmi[8].as_mode, pc->i8x8_mode_prob);
+                        write_i8x8_mode(w, m->bmi[10].as_mode, pc->i8x8_mode_prob);
+                    }
+                    else
+                    {
+#if CONFIG_UVINTRA
+                        write_uv_mode(w, mi->uv_mode, pc->fc.uv_mode_prob[mode]);
+#ifdef MODE_STATS
+                        if(mode!=B_PRED)
+                            ++cpi->y_uv_mode_count[mode][mi->uv_mode];
+#endif
+
+#else
+                        write_uv_mode(w, mi->uv_mode, pc->fc.uv_mode_prob);
+#endif /*CONFIG_UVINTRA*/
+                    }
+
+                }
+                else
+                {
+                    int_mv best_mv;
+                    vp8_prob mv_ref_p [VP8_MVREFS-1];
+
+                    {
+                        int_mv n1, n2;
+                        int ct[4];
+
+                        vp8_find_near_mvs(xd, m,
+#if CONFIG_NEWNEAR
+                                          prev_m,
+#endif
+                                          &n1, &n2, &best_mv, ct, rf,
+                                          cpi->common.ref_frame_sign_bias);
+                        vp8_mv_ref_probs(&cpi->common, mv_ref_p, ct);
+
+#ifdef ENTROPY_STATS
+                        accum_mv_refs(mode, ct);
+#endif
+                    }
+
+#ifdef ENTROPY_STATS
+                    active_section = 3;
+#endif
+
+//#if CONFIG_SEGFEATURES
+                    // Is the segment coding of mode enabled
+                    if ( !segfeature_active( xd, segment_id, SEG_LVL_MODE ) )
+                    {
+                        write_mv_ref(w, mode, mv_ref_p);
+#if CONFIG_NEWNEAR
+                        vp8_accum_mv_refs(&cpi->common, mode, ct);
+#endif
+                    }
+
+
+                    {
+                        switch (mode)   /* new, split require MVs */
+                        {
+                        case NEWMV:
+
+#ifdef ENTROPY_STATS
+                            active_section = 5;
+#endif
+
+                            write_mv(w, &mi->mv.as_mv, &best_mv, mvc);
+#if CONFIG_DUALPRED
+                            if (cpi->common.dual_pred_mode == HYBRID_PREDICTION)
+                            {
+                                int t = m[-mis].mbmi.second_ref_frame != INTRA_FRAME;
+                                int l = m[-1  ].mbmi.second_ref_frame != INTRA_FRAME;
+                                vp8_write(w, mi->second_ref_frame != INTRA_FRAME,
+                                          prob_dual_pred[t + l]);
+                            }
+                            if (mi->second_ref_frame)
+                            {
+                                const int second_rf = mi->second_ref_frame;
+                                int_mv n1, n2;
+                                int ct[4];
+                                vp8_find_near_mvs(xd, m,
+#if CONFIG_NEWNEAR
+                                                  prev_m,
+#endif
+                                                  &n1, &n2, &best_mv,
+                                                  ct, second_rf,
+                                                  cpi->common.ref_frame_sign_bias);
+                                write_mv(w, &mi->second_mv.as_mv, &best_mv, mvc);
+                            }
+#endif /* CONFIG_DUALPRED */
+                            break;
+
+                        case SPLITMV:
+                        {
+                            int j = 0;
+
+#ifdef MODE_STATS
+                            ++count_mb_seg [mi->partitioning];
+#endif
+
+                            write_split(w, mi->partitioning);
+
+                            do
+                            {
+                                B_PREDICTION_MODE blockmode;
+                                int_mv blockmv;
+                                const int *const  L = vp8_mbsplits [mi->partitioning];
+                                int k = -1;  /* first block in subset j */
+                                int mv_contz;
+                                int_mv leftmv, abovemv;
+
+                                blockmode =  cpi->mb.partition_info->bmi[j].mode;
+                                blockmv =  cpi->mb.partition_info->bmi[j].mv;
+#if CONFIG_DEBUG
+                                while (j != L[++k])
+                                    if (k >= 16)
+                                        assert(0);
+#else
+                                while (j != L[++k]);
+#endif
+                                leftmv.as_int = left_block_mv(m, k);
+                                abovemv.as_int = above_block_mv(m, k, mis);
+                                mv_contz = vp8_mv_cont(&leftmv, &abovemv);
+
+                                write_sub_mv_ref(w, blockmode, vp8_sub_mv_ref_prob2 [mv_contz]);
+
+                                if (blockmode == NEW4X4)
+                                {
+#ifdef ENTROPY_STATS
+                                    active_section = 11;
+#endif
+                                    write_mv(w, &blockmv.as_mv, &best_mv, (const MV_CONTEXT *) mvc);
+                                }
+                            }
+                            while (++j < cpi->mb.partition_info->count);
+                        }
+                        break;
+                        default:
+#if CONFIG_DUALPRED
+                            if (cpi->common.dual_pred_mode == HYBRID_PREDICTION)
+                            {
+                                int t = m[-mis].mbmi.second_ref_frame != INTRA_FRAME;
+                                int l = m[-1  ].mbmi.second_ref_frame != INTRA_FRAME;
+                                vp8_write(w, mi->second_ref_frame != INTRA_FRAME,
+                                          prob_dual_pred[t + l]);
+                            }
+#endif /* CONFIG_DUALPRED */
+                            break;
+                        }
+                    }
+                }
+
+#if CONFIG_NEWNEAR
+                prev_m += offset_extended;
+                assert((prev_m-cpi->common.prev_mip)==(m-cpi->common.mip));
+                assert((prev_m-cpi->common.prev_mi)==(m-cpi->common.mi));
+#endif
+
+                // skip to next MB
+                mb_row += dy;
+                mb_col += dx;
+                m += offset_extended;
+                cpi->mb.partition_info += offset_extended;
+            }
+        }
+
+        mb_row += 2;
+        m += mis + (1- (pc->mb_cols & 0x1));
+        cpi->mb.partition_info += mis + (1- (pc->mb_cols & 0x1));
+    }
+
+#if CONFIG_DUALPRED
+    if (cpi->common.dual_pred_mode == HYBRID_PREDICTION)
+    {
+        cpi->prob_dualpred[0] = (prob_dual_pred[0] + cpi->prob_dualpred[0] + 1) >> 1;
+        cpi->prob_dualpred[1] = (prob_dual_pred[1] + cpi->prob_dualpred[1] + 1) >> 1;
+        cpi->prob_dualpred[2] = (prob_dual_pred[2] + cpi->prob_dualpred[2] + 1) >> 1;
+    }
+#endif /* CONFIG_DUALPRED */
+}
+#else
 static void pack_inter_mode_mvs(VP8_COMP *const cpi)
 {
     VP8_COMMON *const pc = & cpi->common;
@@ -1348,8 +1764,140 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi)
     }
 #endif /* CONFIG_DUALPRED */
 }
+#endif // CONFIG_SUPERBLOCKS
+
+#if CONFIG_SUPERBLOCKS
+static void write_kfmodes(VP8_COMP *cpi)
+{
+    vp8_writer *const bc = & cpi->bc;
+    const VP8_COMMON *const c = & cpi->common;
+    MODE_INFO *m;
+    int i;
+    int row, col;
+    int mb_row, mb_col;
+    int prob_skip_false = 0;
+    int row_delta[4] = { 0, +1,  0, -1};
+    int col_delta[4] = {+1, -1, +1, +1};
+    const int mis = c->mode_info_stride;
+
+//#if CONFIG_SEGFEATURES
+    MACROBLOCKD *xd = &cpi->mb.e_mbd;
+
+    if (c->mb_no_coeff_skip)
+    {
+        // Divide by 0 check. 0 case possible with segment features
+        if ( (cpi->skip_false_count + cpi->skip_true_count) )
+        {
+            prob_skip_false = cpi->skip_false_count * 256 /
+                              (cpi->skip_false_count + cpi->skip_true_count);
+
+            if (prob_skip_false <= 1)
+                prob_skip_false = 1;
+
+            if (prob_skip_false > 255)
+                prob_skip_false = 255;
+        }
+        else
+            prob_skip_false = 255;
+
+        cpi->prob_skip_false = prob_skip_false;
+        vp8_write_literal(bc, prob_skip_false, 8);
+    }
+
+#if CONFIG_QIMODE
+    if(!c->kf_ymode_probs_update)
+    {
+        vp8_write_literal(bc, c->kf_ymode_probs_index, 3);
+    }
+#endif
+
+    mb_row = 0;
+    for (row=0; row < c->mb_rows; row += 2)
+    {
+        m = c->mi + row * mis;
+
+        mb_col = 0;
+        for (col=0; col < c->mb_cols; col += 2)
+        {
+            for (i=0; i<4; i++)
+            {
+                int ym;
+                int segment_id;
+                int dy = row_delta[i];
+                int dx = col_delta[i];
+                int offset_extended = dy * mis + dx;
+
+                if ((mb_row >= c->mb_rows) || (mb_col >= c->mb_cols))
+                {
+                    mb_row += dy;
+                    mb_col += dx;
+                    m += offset_extended;
+                    continue;
+                }
+
+                ym = m->mbmi.mode;
+                segment_id = m->mbmi.segment_id;
+
+                if (cpi->mb.e_mbd.update_mb_segmentation_map)
+                {
+                    write_mb_segid(bc, &m->mbmi, &cpi->mb.e_mbd);
+                }
 
+//#if CONFIG_SEGFEATURES
+                if ( c->mb_no_coeff_skip &&
+                     ( !segfeature_active( xd, segment_id, SEG_LVL_EOB ) ||
+                       (get_segdata( xd, segment_id, SEG_LVL_EOB ) != 0) ) )
+                {
+                    vp8_encode_bool(bc, m->mbmi.mb_skip_coeff, prob_skip_false);
+                }
+#if CONFIG_QIMODE
+                kfwrite_ymode(bc, ym, c->kf_ymode_prob[c->kf_ymode_probs_index]);
+#else
+                kfwrite_ymode(bc, ym, c->kf_ymode_prob);
+#endif
+                if (ym == B_PRED)
+                {
+                    int i = 0;
 
+                    do
+                    {
+                        const B_PREDICTION_MODE A = above_block_mode(m, i, mis);
+                        const B_PREDICTION_MODE L = left_block_mode(m, i);
+                        const int bm = m->bmi[i].as_mode;
+
+#ifdef ENTROPY_STATS
+                        ++intra_mode_stats [A] [L] [bm];
+#endif
+
+                        write_bmode(bc, bm, c->kf_bmode_prob [A] [L]);
+                    }
+                    while (++i < 16);
+                }
+
+                if(ym == I8X8_PRED)
+                {
+                    write_i8x8_mode(bc, m->bmi[0].as_mode, c->i8x8_mode_prob);
+                    write_i8x8_mode(bc, m->bmi[2].as_mode, c->i8x8_mode_prob);
+                    write_i8x8_mode(bc, m->bmi[8].as_mode, c->i8x8_mode_prob);
+                    write_i8x8_mode(bc, m->bmi[10].as_mode, c->i8x8_mode_prob);
+                }
+                else
+#if CONFIG_UVINTRA
+                write_uv_mode(bc, m->mbmi.uv_mode, c->kf_uv_mode_prob[ym]);
+#else
+                write_uv_mode(bc, m->mbmi.uv_mode, c->kf_uv_mode_prob);
+#endif
+                // skip to next MB
+                mb_row += dy;
+                mb_col += dx;
+                m += offset_extended;
+            }
+        }
+
+        mb_row += 2;
+    }
+}
+#else
 static void write_kfmodes(VP8_COMP *cpi)
 {
     vp8_writer *const bc = & cpi->bc;
@@ -1454,6 +2002,8 @@ static void write_kfmodes(VP8_COMP *cpi)
         m++;    // skip L prediction border
     }
 }
+#endif /* CONFIG_SUPERBLOCKS */
+
 
 /* This function is used for debugging probability trees. */
 static void print_prob_tree(vp8_prob
diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c
index c221b2f6f..a0a024592 100644
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c
@@ -563,6 +563,268 @@ void vp8_activity_masking(VP8_COMP *cpi, MACROBLOCK *x)
     adjust_act_zbin(cpi, x);
 }
 
+#if CONFIG_SUPERBLOCKS
+static
+void encode_sb_row (VP8_COMP *cpi,
+                   VP8_COMMON *cm,
+                   int mbrow,
+                   MACROBLOCK  *x,
+                   MACROBLOCKD *xd,
+                   TOKENEXTRA **tp,
+                   int *totalrate)
+{
+    int i;
+    int map_index;
+    int mb_row, mb_col;
+    int recon_yoffset, recon_uvoffset;
+    int ref_fb_idx = cm->lst_fb_idx;
+    int dst_fb_idx = cm->new_fb_idx;
+    int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride;
+    int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride;
+    int row_delta[4] = {-1,  0, +1,  0};
+    int col_delta[4] = {+1, +1, -1, +1};
+    int sb_cols = (cm->mb_cols + 1)>>1;
+    int sb_col;
+    ENTROPY_CONTEXT_PLANES left_context[2];
+
+    vpx_memset (left_context, 0, sizeof(left_context));
+
+    // TODO put NULL into MB rows that have no tokens?
+    cpi->tplist[mbrow].start = *tp;
+
+    x->src.y_buffer -= 16 * (col_delta[0] + row_delta[0]*x->src.y_stride);
+    x->src.u_buffer -= 8  * (col_delta[0] + row_delta[0]*x->src.uv_stride);
+    x->src.v_buffer -= 8  * (col_delta[0] + row_delta[0]*x->src.uv_stride);
+    mb_row = mbrow - row_delta[0];
+    mb_col = 0     - col_delta[0];
+
+    for (sb_col=0; sb_col<sb_cols; sb_col++)
+   {
+        /* Encode MBs within the SB in raster order */
+        for ( i=0; i<4; i++ )
+        {
+            int offset_extended = row_delta[(i+1) & 0x3] *
+                                  xd->mode_info_stride + col_delta[(i+1) & 0x3];
+            int offset_unextended = row_delta[(i+1) & 0x3] *
+                                    cm->mb_cols + col_delta[(i+1) & 0x3];
+           int dy = row_delta[i];
+            int dx = col_delta[i];
+
+            mb_row += dy;
+            mb_col += dx;
+
+            x->src.y_buffer += 16 * (dx + dy*x->src.y_stride);
+            x->src.u_buffer += 8  * (dx + dy*x->src.uv_stride);
+            x->src.v_buffer += 8  * (dx + dy*x->src.uv_stride);
+
+            if ((mb_row >= cm->mb_rows) || (mb_col >= cm->mb_cols))
+            {
+                // Skip on to the next MB
+                x->gf_active_ptr      += offset_unextended;
+                x->partition_info     += offset_extended;
+                xd->mode_info_context += offset_extended;
+
+#if CONFIG_NEWNEAR
+                xd->prev_mode_info_context += offset_extended;
+
+                assert((xd->prev_mode_info_context - cpi->common.prev_mip)
+                    ==(xd->mode_info_context - cpi->common.mip));
+#endif
+                continue;
+            }
+
+            // Copy in the appropriate left context
+            vpx_memcpy (&cm->left_context,
+                        &left_context[(i>>1) & 0x1],
+                        sizeof(ENTROPY_CONTEXT_PLANES));
+
+            map_index = (mb_row * cpi->common.mb_cols) + mb_col;
+            x->mb_activity_ptr = &cpi->mb_activity_map[map_index];
+
+            // reset above block coeffs
+            xd->above_context = cm->above_context + mb_col;
+
+            // Distance of Mb to the top & bottom edges, specified in 1/8th pel
+            // units as they are always compared to values in 1/8th pel units
+            xd->mb_to_top_edge = -((mb_row * 16) << 3);
+            xd->mb_to_bottom_edge = ((cm->mb_rows - 1 - mb_row) * 16) << 3;
+
+            // Set up limit values for motion vector components
+            // to prevent them extending beyond the UMV borders
+            x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16));
+            x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16)
+                                + (VP8BORDERINPIXELS - 16);
+            x->mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 16));
+            x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16)
+                                + (VP8BORDERINPIXELS - 16);
+
+            // Distance of Mb to the left & right edges, specified in
+            // 1/8th pel units as they are always compared to values
+            // that are in 1/8th pel units
+            xd->mb_to_left_edge = -((mb_col * 16) << 3);
+            xd->mb_to_right_edge = ((cm->mb_cols - 1 - mb_col) * 16) << 3;
+
+            xd->up_available = (mb_row != 0);
+            xd->left_available = (mb_col != 0);
+
+            recon_yoffset = (mb_row * recon_y_stride * 16) + (mb_col * 16);
+            recon_uvoffset = (mb_row * recon_uv_stride * 8) + (mb_col * 8);
+
+            xd->dst.y_buffer = cm->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
+            xd->dst.u_buffer = cm->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
+            xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
+
+            x->rddiv = cpi->RDDIV;
+            x->rdmult = cpi->RDMULT;
+
+            // Copy current mb to a buffer
+            RECON_INVOKE(&xd->rtcd->recon, copy16x16)(x->src.y_buffer,
+                                                      x->src.y_stride,
+                                                      x->thismb, 16);
+
+            if(cpi->oxcf.tuning == VP8_TUNE_SSIM)
+                vp8_activity_masking(cpi, x);
+
+            // Is segmentation enabled
+            if (xd->segmentation_enabled)
+            {
+                // Code to set segment id in xd->mbmi.segment_id
+                if (cpi->segmentation_map[map_index] <= 3)
+                    xd->mode_info_context->mbmi.segment_id =
+                                  cpi->segmentation_map[map_index];
+                else
+                    xd->mode_info_context->mbmi.segment_id = 0;
+
+                vp8cx_mb_init_quantizer(cpi, x);
+            }
+            else
+                // Set to Segment 0 by default
+                xd->mode_info_context->mbmi.segment_id = 0;
+
+            x->active_ptr = cpi->active_map + map_index;
+
+            if (cm->frame_type == KEY_FRAME)
+            {
+                *totalrate += vp8cx_encode_intra_macro_block(cpi, x, tp);
+                //Note the encoder may have changed the segment_id
+
+#ifdef MODE_STATS
+                y_modes[xd->mode_info_context->mbmi.mode] ++;
+#endif
+            }
+            else
+            {
+                *totalrate += vp8cx_encode_inter_macroblock(cpi, x, tp,
+                                             recon_yoffset, recon_uvoffset);
+                //Note the encoder may have changed the segment_id
+
+#ifdef MODE_STATS
+                inter_y_modes[xd->mode_info_context->mbmi.mode] ++;
+
+                if (xd->mode_info_context->mbmi.mode == SPLITMV)
+                {
+                    int b;
+
+                    for (b = 0; b < x->partition_info->count; b++)
+                    {
+                        inter_b_modes[x->partition_info->bmi[b].mode] ++;
+                   }
+                }
+
+#endif
+
+                // Count of last ref frame 0,0 usage
+                if ((xd->mode_info_context->mbmi.mode == ZEROMV) &&
+                    (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME))
+                    cpi->inter_zz_count ++;
+
+                // Actions required if segmentation enabled
+                if ( xd->segmentation_enabled )
+                {
+                    // Special case code for cyclic refresh
+                    // If cyclic update enabled then copy xd->mbmi.segment_id;
+                    // (which may have been updated based on mode during
+                    // vp8cx_encode_inter_macroblock()) back into the global
+                    // segmentation map
+                    if (cpi->cyclic_refresh_mode_enabled)
+                    {
+                        cpi->segmentation_map[map_index] =
+                            xd->mode_info_context->mbmi.segment_id;
+
+                        // If the block has been refreshed mark it as clean (the
+                        // magnitude of the -ve influences how long it will be
+                        // before we consider another refresh):
+                        // Else if it was coded (last frame 0,0) and has not
+                        // already been refreshed then mark it as a candidate
+                        // for cleanup next time (marked 0)
+                        // else mark it as dirty (1).
+                        if (xd->mode_info_context->mbmi.segment_id)
+                            cpi->cyclic_refresh_map[map_index] = -1;
+
+                        else if ((xd->mode_info_context->mbmi.mode == ZEROMV) &&
+                                 (xd->mode_info_context->mbmi.ref_frame ==
+                                  LAST_FRAME))
+                        {
+                            if (cpi->cyclic_refresh_map[map_index] == 1)
+                                cpi->cyclic_refresh_map[map_index] = 0;
+                        }
+                        else
+                            cpi->cyclic_refresh_map[map_index] = 1;
+                    }
+                }
+            }
+
+            // TODO Make sure partitioning works with this new scheme
+            cpi->tplist[mbrow].stop = *tp;
+
+            // Copy back updated left context
+            vpx_memcpy (&left_context[(i>>1) & 0x1],
+                        &cm->left_context,
+                        sizeof(ENTROPY_CONTEXT_PLANES));
+
+            // skip to next mb
+            x->gf_active_ptr      += offset_unextended;
+            x->partition_info     += offset_extended;
+            xd->mode_info_context += offset_extended;
+
+#if CONFIG_NEWNEAR
+            xd->prev_mode_info_context += offset_extended;
+
+            assert((xd->prev_mode_info_context - cpi->common.prev_mip)
+                ==(xd->mode_info_context - cpi->common.mip));
+#endif
+        }
+    }
+
+    // Intra-pred modes requiring top-right data have been disabled,
+    // so we don't need this:
+    // extend the recon for intra prediction
+    /*vp8_extend_mb_row(
+        &cm->yv12_fb[dst_fb_idx],
+        xd->dst.y_buffer + 16,
+        xd->dst.u_buffer + 8,
+        xd->dst.v_buffer + 8);*/
+
+    // this is to account for the border
+#if CONFIG_NEWNEAR
+    xd->prev_mode_info_context += 1 - (cm->mb_cols & 0x1) + xd->mode_info_stride;
+#endif
+    xd->mode_info_context += 1 - (cm->mb_cols & 0x1) + xd->mode_info_stride;
+    x->partition_info     += 1 - (cm->mb_cols & 0x1) + xd->mode_info_stride;
+    x->gf_active_ptr      += cm->mb_cols - (cm->mb_cols & 0x1);
+
+//#if CONFIG_SEGFEATURES
+// debug output
+#if DBG_PRNT_SEGMAP
+    {
+        FILE *statsfile;
+        statsfile = fopen("segmap2.stt", "a");
+        fprintf(statsfile, "\n" );
+        fclose(statsfile);
+    }
+#endif
+}
+#else
 static
 void encode_mb_row(VP8_COMP *cpi,
                    VP8_COMMON *cm,
@@ -590,6 +852,8 @@ void encode_mb_row(VP8_COMP *cpi,
     else
         last_row_current_mb_col = &rightmost_col;
 #endif
+    // Reset the left context
+    vp8_zero(cm->left_context)
 
     // reset above block coeffs
     xd->above_context = cm->above_context;
@@ -818,6 +1082,7 @@ void encode_mb_row(VP8_COMP *cpi,
     }
 #endif
 }
+#endif /* CONFIG_SUPERBLOCKS */
 
 void init_encode_frame_mb_context(VP8_COMP *cpi)
 {
@@ -1060,7 +1325,7 @@ static void encode_frame_internal(VP8_COMP *cpi)
 
             for (mb_row = 0; mb_row < cm->mb_rows; mb_row += (cpi->encoding_thread_count + 1))
             {
-                vp8_zero(cm->left_context)
+                //vp8_zero(cm->left_context)
 
                 tp = cpi->tok + mb_row * (cm->mb_cols * 16 * 24);
 
@@ -1101,19 +1366,31 @@ static void encode_frame_internal(VP8_COMP *cpi)
         else
 #endif
         {
-            // for each macroblock row in image
+#if CONFIG_SUPERBLOCKS
+            // for each superblock row in the image
+            for (mb_row = 0; mb_row < cm->mb_rows; mb_row+=2)
+             {
+                int offset = cm->mb_cols - 1 + (cm->mb_cols & 0x1);
+
+                encode_sb_row(cpi, cm, mb_row, x, xd, &tp, &totalrate);
+
+                // adjust to the next row of SBs
+                x->src.y_buffer += 16 * x->src.y_stride - 16 * offset;
+                x->src.u_buffer += 8 * x->src.uv_stride - 8 * offset;
+                x->src.v_buffer += 8 * x->src.uv_stride - 8 * offset;
+            }
+#else
+            // for each macroblock row in the image
             for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
             {
-
-                vp8_zero(cm->left_context)
-
                 encode_mb_row(cpi, cm, mb_row, x, xd, &tp, &totalrate);
 
-                // adjust to the next row of mbs
+                // adjust to the next row of MBs
                 x->src.y_buffer += 16 * x->src.y_stride - 16 * cm->mb_cols;
                 x->src.u_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols;
                 x->src.v_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols;
             }
+#endif  // CONFIG_SUPERBLOCKS
 
             cpi->tok_count = tp - cpi->tok;
 
diff --git a/vp8/encoder/encodeintra.c b/vp8/encoder/encodeintra.c
index 2064491a6..6f825ff59 100644
--- a/vp8/encoder/encodeintra.c
+++ b/vp8/encoder/encodeintra.c
@@ -86,7 +86,9 @@ void vp8_encode_intra4x4mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *mb)
     int i;
 
     MACROBLOCKD *x = &mb->e_mbd;
+#if !CONFIG_SUPERBLOCKS
     vp8_intra_prediction_down_copy(x);
+#endif
 
     for (i = 0; i < 16; i++)
         vp8_encode_intra4x4block(rtcd, mb, i);
diff --git a/vp8/encoder/pickinter.c b/vp8/encoder/pickinter.c
index 5de312d82..469152e21 100644
--- a/vp8/encoder/pickinter.c
+++ b/vp8/encoder/pickinter.c
@@ -160,6 +160,13 @@ static int pick_intra4x4block(
     {
         int this_rd;
 
+#if CONFIG_SUPERBLOCKS
+        // Pre-empt mode range being restored to B_HU_PRED in the loop above:
+        // Ignore modes that need the above-right data
+        if (mode==B_LD_PRED || mode==B_VL_PRED)
+            continue;
+#endif
+
         rate = mode_costs[mode];
         RECON_INVOKE(&rtcd->common->recon, intra4x4_predict)
                      (b, mode, b->predictor);
@@ -196,7 +203,9 @@ static int pick_intra4x4mby_modes
     int distortion = 0;
     unsigned int *bmode_costs;
 
+#if !CONFIG_SUPERBLOCKS
     vp8_intra_prediction_down_copy(xd);
+#endif
 
     bmode_costs = mb->inter_bmode_costs;
 
diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c
index e5f1146ed..86d53d324 100644
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -738,6 +738,11 @@ static int rd_pick_intra4x4block(
         int this_rd;
         int ratey;
 
+#if CONFIG_SUPERBLOCKS
+        // Ignore modes thact need the above-right data
+        if (mode==B_LD_PRED || mode==B_VL_PRED)
+            continue;
+#endif
         rate = bmode_costs[mode];
 
         RECON_INVOKE(&cpi->rtcd.common->recon, intra4x4_predict)
@@ -796,7 +801,9 @@ static int rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *Rate,
     ta = (ENTROPY_CONTEXT *)&t_above;
     tl = (ENTROPY_CONTEXT *)&t_left;
 
+#if !CONFIG_SUPERBLOCKS
     vp8_intra_prediction_down_copy(xd);
+#endif
 
     bmode_costs = mb->inter_bmode_costs;
 
@@ -1728,6 +1735,7 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x,
     return bsi.segment_rd;
 }
 
+/* Order arr in increasing order, original position stored in idx */
 static void insertsortmv(int arr[], int len)
 {
     int i, j, k;
@@ -2321,7 +2329,8 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
             vpx_memcpy(mdcounts, frame_mdcounts[x->e_mbd.mode_info_context->mbmi.ref_frame], sizeof(mdcounts));
         }
 
-        // Experimental code. Special case for gf and arf zeromv modes. Increase zbin size to supress noise
+        // Experimental code. Special case for gf and arf zeromv modes.
+        // Increase zbin size to suppress noise
         if (cpi->zbin_mode_boost_enabled)
         {
             if ( vp8_ref_frame_order[mode_index] == INTRA_FRAME )
diff --git a/vp8/encoder/segmentation.c b/vp8/encoder/segmentation.c
index 1d2e98bd0..91dffe166 100644
--- a/vp8/encoder/segmentation.c
+++ b/vp8/encoder/segmentation.c
@@ -214,7 +214,7 @@ void choose_segmap_coding_method( VP8_COMP *cpi )
     // First of all generate stats regarding how well the last segment map
     // predicts this one
 
-    // Initialize macroblod decoder mode info context for to the first mb
+    // Initialize macroblock decoder mode info context for the first mb
     // in the frame
     xd->mode_info_context = cm->mi;
 
@@ -244,7 +244,7 @@ void choose_segmap_coding_method( VP8_COMP *cpi )
                 temporal_predictor_count[pred_context][seg_predicted]++;
 
                 if ( !seg_predicted )
-                    // Update the "undpredicted" segment count
+                    // Update the "unpredicted" segment count
                     t_unpred_seg_counts[segment_id]++;
             }