From d12376aa2c71a7e3ecca479ab436457ec4ba79a3 Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Fri, 19 Apr 2013 15:52:17 -0700
Subject: [PATCH] Move dst to per-plane MACROBLOCKD data

First in a series of commits moving the framebuffers pointers to
per-plane data, so that they can be indexed numerically rather than
by name.

Change-Id: I6e0d60fd4d51e6375c384eb7321776564df21775
---
 vp9/common/vp9_blockd.h                 |   8 +-
 vp9/common/vp9_mbpitch.c                |  10 +--
 vp9/common/vp9_recon.c                  |  10 +--
 vp9/common/vp9_reconinter.c             |  30 +++----
 vp9/common/vp9_reconinter.h             |  35 +++++++-
 vp9/common/vp9_reconintra.c             |  30 +++----
 vp9/common/x86/vp9_recon_wrapper_sse2.c |  22 ++---
 vp9/decoder/vp9_decodframe.c            | 113 +++++++++++++-----------
 vp9/encoder/vp9_encodeframe.c           |  38 ++++----
 vp9/encoder/vp9_encodeintra.c           |   5 +-
 vp9/encoder/vp9_encodemb.c              |  10 ++-
 vp9/encoder/vp9_firstpass.c             |  14 +--
 vp9/encoder/vp9_mbgraph.c               |  18 ++--
 vp9/encoder/vp9_rdopt.c                 |  70 ++++++++-------
 14 files changed, 238 insertions(+), 175 deletions(-)

diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h
index 265c1f21a..be48ab31b 100644
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -338,6 +338,11 @@ struct scale_factors {
 
 enum { MAX_MB_PLANE = 3 };
 
+struct buf_2d {
+  uint8_t *buf;
+  int stride;
+};
+
 struct mb_plane {
   DECLARE_ALIGNED(16, int16_t,  qcoeff[64 * 64]);
   DECLARE_ALIGNED(16, int16_t,  dqcoeff[64 * 64]);
@@ -346,6 +351,8 @@ struct mb_plane {
   PLANE_TYPE plane_type;
   int subsampling_x;
   int subsampling_y;
+  struct buf_2d dst;
+  struct buf_2d pre[2];
 };
 
 #define BLOCK_OFFSET(x, i, n) ((x) + (i) * (n))
@@ -366,7 +373,6 @@ typedef struct macroblockd {
 
   YV12_BUFFER_CONFIG pre; /* Filtered copy of previous frame reconstruction */
   YV12_BUFFER_CONFIG second_pre;
-  YV12_BUFFER_CONFIG dst;
   struct scale_factors scale_factor[2];
   struct scale_factors scale_factor_uv[2];
 
diff --git a/vp9/common/vp9_mbpitch.c b/vp9/common/vp9_mbpitch.c
index 00fe9aa15..957068f01 100644
--- a/vp9/common/vp9_mbpitch.c
+++ b/vp9/common/vp9_mbpitch.c
@@ -36,9 +36,9 @@ static void setup_macroblock(MACROBLOCKD *mb, BLOCKSET bs) {
   int i, stride;
 
   if (bs == DEST) {
-    y = &mb->dst.y_buffer;
-    u = &mb->dst.u_buffer;
-    v = &mb->dst.v_buffer;
+    y = &mb->plane[0].dst.buf;
+    u = &mb->plane[1].dst.buf;
+    v = &mb->plane[2].dst.buf;
 
     y2 = NULL;
     u2 = NULL;
@@ -54,14 +54,14 @@ static void setup_macroblock(MACROBLOCKD *mb, BLOCKSET bs) {
   }
 
   // luma
-  stride = mb->dst.y_stride;
+  stride = mb->plane[0].dst.stride;
   for (i = 0; i < 16; ++i) {
     const int offset = (i >> 2) * 4 * stride + (i & 3) * 4;
     setup_block(&blockd[i], y, y2, stride, offset, bs);
   }
 
   // chroma
-  stride = mb->dst.uv_stride;
+  stride = mb->plane[1].dst.stride;
   for (i = 16; i < 20; i++) {
     const int offset = ((i - 16) >> 1) * 4 * stride + (i & 1) * 4;
     setup_block(&blockd[i],     u, u2, stride, offset, bs);
diff --git a/vp9/common/vp9_recon.c b/vp9/common/vp9_recon.c
index 08cd5f807..cc44afed8 100644
--- a/vp9/common/vp9_recon.c
+++ b/vp9/common/vp9_recon.c
@@ -53,8 +53,8 @@ void vp9_recon2b_c(uint8_t *pred_ptr, int16_t *diff_ptr, uint8_t *dst_ptr,
 void vp9_recon_sby_c(MACROBLOCKD *mb, BLOCK_SIZE_TYPE bsize) {
   const int bw = 16 << mb_width_log2(bsize), bh = 16 << mb_height_log2(bsize);
   int x, y;
-  const int stride = mb->dst.y_stride;
-  uint8_t *dst = mb->dst.y_buffer;
+  const int stride = mb->plane[0].dst.stride;
+  uint8_t *dst = mb->plane[0].dst.buf;
   const int16_t *diff = mb->plane[0].diff;
 
   for (y = 0; y < bh; y++) {
@@ -70,9 +70,9 @@ void vp9_recon_sbuv_c(MACROBLOCKD *mb, BLOCK_SIZE_TYPE bsize) {
   const int bwl = mb_width_log2(bsize), bhl = mb_height_log2(bsize);
   const int bw = 8 << bwl, bh = 8 << bhl;
   int x, y;
-  const int stride =  mb->dst.uv_stride;
-  uint8_t *u_dst = mb->dst.u_buffer;
-  uint8_t *v_dst = mb->dst.v_buffer;
+  const int stride =  mb->plane[1].dst.stride;
+  uint8_t *u_dst = mb->plane[1].dst.buf;
+  uint8_t *v_dst = mb->plane[2].dst.buf;
   const int16_t *u_diff = mb->plane[1].diff;
   const int16_t *v_diff = mb->plane[2].diff;
 
diff --git a/vp9/common/vp9_reconinter.c b/vp9/common/vp9_reconinter.c
index 549993200..c5b677f0f 100644
--- a/vp9/common/vp9_reconinter.c
+++ b/vp9/common/vp9_reconinter.c
@@ -614,11 +614,11 @@ void vp9_build_inter_predictors_sbuv(MACROBLOCKD *xd,
 void vp9_build_inter_predictors_sb(MACROBLOCKD *xd,
                                    int mb_row, int mb_col,
                                    BLOCK_SIZE_TYPE bsize) {
-  uint8_t *const y = xd->dst.y_buffer;
-  uint8_t *const u = xd->dst.u_buffer;
-  uint8_t *const v = xd->dst.v_buffer;
-  const int y_stride = xd->dst.y_stride;
-  const int uv_stride = xd->dst.uv_stride;
+  uint8_t *const y = xd->plane[0].dst.buf;
+  uint8_t *const u = xd->plane[1].dst.buf;
+  uint8_t *const v = xd->plane[2].dst.buf;
+  const int y_stride = xd->plane[0].dst.stride;
+  const int uv_stride = xd->plane[1].dst.stride;
 
   vp9_build_inter_predictors_sby(xd, y, y_stride, mb_row, mb_col, bsize);
   vp9_build_inter_predictors_sbuv(xd, u, v, uv_stride, mb_row, mb_col, bsize);
@@ -670,8 +670,8 @@ static void clamp_mv_to_umv_border(MV *mv, const MACROBLOCKD *xd) {
 static int64_t get_consistency_metric(MACROBLOCKD *xd,
                                       uint8_t *tmp_y, int tmp_ystride) {
   int block_size = 16 <<  xd->mode_info_context->mbmi.sb_type;
-  uint8_t *rec_y = xd->dst.y_buffer;
-  int rec_ystride = xd->dst.y_stride;
+  uint8_t *rec_y = xd->plane[0].dst.buf;
+  int rec_ystride = xd->plane[0].dst.stride;
   int64_t metric = 0;
   int i;
   if (xd->up_available) {
@@ -1182,11 +1182,11 @@ void vp9_build_inter_predictors_sbuv(MACROBLOCKD *xd,
 void vp9_build_inter_predictors_sb(MACROBLOCKD *mb,
                                    int mb_row, int mb_col,
                                    BLOCK_SIZE_TYPE bsize) {
-  uint8_t *const y = mb->dst.y_buffer;
-  uint8_t *const u = mb->dst.u_buffer;
-  uint8_t *const v = mb->dst.v_buffer;
-  const int y_stride = mb->dst.y_stride;
-  const int uv_stride = mb->dst.uv_stride;
+  uint8_t *const y = mb->plane[0].dst.buf;
+  uint8_t *const u = mb->plane[1].dst.buf;
+  uint8_t *const v = mb->plane[2].dst.buf;
+  const int y_stride = mb->plane[0].dst.stride;
+  const int uv_stride = mb->plane[1].dst.stride;
 
   vp9_build_inter_predictors_sby(mb, y, y_stride, mb_row, mb_col, bsize);
   vp9_build_inter_predictors_sbuv(mb, u, v, uv_stride, mb_row, mb_col, bsize);
@@ -1233,9 +1233,9 @@ void vp9_build_inter_predictors_mb(MACROBLOCKD *xd,
 /*encoder only*/
 void vp9_build_inter4x4_predictors_mbuv(MACROBLOCKD *xd,
                                         int mb_row, int mb_col) {
-  uint8_t *const u = xd->dst.u_buffer;
-  uint8_t *const v = xd->dst.v_buffer;
-  const int uv_stride = xd->dst.uv_stride;
+  uint8_t *const u = xd->plane[1].dst.buf;
+  uint8_t *const v = xd->plane[2].dst.buf;
+  const int uv_stride = xd->plane[1].dst.stride;
 
   vp9_build_inter_predictors_sbuv(xd, u, v, uv_stride, mb_row, mb_col,
                                   BLOCK_SIZE_MB16X16);
diff --git a/vp9/common/vp9_reconinter.h b/vp9/common/vp9_reconinter.h
index ee34fc5d2..8ffdfd13b 100644
--- a/vp9/common/vp9_reconinter.h
+++ b/vp9/common/vp9_reconinter.h
@@ -83,8 +83,39 @@ static int scaled_buffer_offset(int x_offset,
                                 int y_offset,
                                 int stride,
                                 const struct scale_factors *scale) {
-  return scale->scale_value_y(y_offset, scale) * stride +
-      scale->scale_value_x(x_offset, scale);
+  if (scale)
+    return scale->scale_value_y(y_offset, scale) * stride +
+        scale->scale_value_x(x_offset, scale);
+  return y_offset * stride + x_offset;
+}
+
+static void setup_pred_plane(struct buf_2d *dst,
+                             uint8_t *src, int stride,
+                             int mb_row, int mb_col,
+                             const struct scale_factors *scale,
+                             int subsampling_x, int subsampling_y) {
+  const int x = (16 * mb_col) >> subsampling_x;
+  const int y = (16 * mb_row) >> subsampling_y;
+  dst->buf = src + scaled_buffer_offset(x, y, stride, scale);
+  dst->stride = stride;
+}
+
+// TODO(jkoleszar): audit all uses of this that don't set mb_row, mb_col
+static void setup_dst_planes(MACROBLOCKD *xd,
+                             const YV12_BUFFER_CONFIG *src,
+                             int mb_row, int mb_col) {
+  setup_pred_plane(&xd->plane[0].dst,
+                   src->y_buffer, src->y_stride,
+                   mb_row, mb_col, NULL,
+                   xd->plane[0].subsampling_x, xd->plane[0].subsampling_y);
+  setup_pred_plane(&xd->plane[1].dst,
+                   src->u_buffer, src->uv_stride,
+                   mb_row, mb_col, NULL,
+                   xd->plane[1].subsampling_x, xd->plane[1].subsampling_y);
+  setup_pred_plane(&xd->plane[2].dst,
+                   src->v_buffer, src->uv_stride,
+                   mb_row, mb_col, NULL,
+                   xd->plane[2].subsampling_x, xd->plane[2].subsampling_y);
 }
 
 static void setup_pred_block(YV12_BUFFER_CONFIG *dst,
diff --git a/vp9/common/vp9_reconintra.c b/vp9/common/vp9_reconintra.c
index 88c3f191e..1031be7ba 100644
--- a/vp9/common/vp9_reconintra.c
+++ b/vp9/common/vp9_reconintra.c
@@ -609,7 +609,7 @@ void vp9_build_interintra_16x16_predictors_mby(MACROBLOCKD *xd,
                                                int ystride) {
   uint8_t intrapredictor[256];
   vp9_build_intra_predictors(
-      xd->dst.y_buffer, xd->dst.y_stride,
+      xd->plane[0].dst.buf, xd->plane[0].dst.stride,
       intrapredictor, 16,
       xd->mode_info_context->mbmi.interintra_mode, 16, 16,
       xd->up_available, xd->left_available, xd->right_available);
@@ -624,12 +624,12 @@ void vp9_build_interintra_16x16_predictors_mbuv(MACROBLOCKD *xd,
   uint8_t uintrapredictor[64];
   uint8_t vintrapredictor[64];
   vp9_build_intra_predictors(
-      xd->dst.u_buffer, xd->dst.uv_stride,
+      xd->plane[1].dst.buf, xd->plane[1].dst.stride,
       uintrapredictor, 8,
       xd->mode_info_context->mbmi.interintra_uv_mode, 8, 8,
       xd->up_available, xd->left_available, xd->right_available);
   vp9_build_intra_predictors(
-      xd->dst.v_buffer, xd->dst.uv_stride,
+      xd->plane[2].dst.buf, xd->plane[1].dst.stride,
       vintrapredictor, 8,
       xd->mode_info_context->mbmi.interintra_uv_mode, 8, 8,
       xd->up_available, xd->left_available, xd->right_available);
@@ -644,7 +644,7 @@ void vp9_build_interintra_32x32_predictors_sby(MACROBLOCKD *xd,
                                                int ystride) {
   uint8_t intrapredictor[1024];
   vp9_build_intra_predictors(
-      xd->dst.y_buffer, xd->dst.y_stride,
+      xd->plane[0].dst.buf, xd->plane[0].dst.stride,
       intrapredictor, 32,
       xd->mode_info_context->mbmi.interintra_mode, 32, 32,
       xd->up_available, xd->left_available, xd->right_available);
@@ -659,12 +659,12 @@ void vp9_build_interintra_32x32_predictors_sbuv(MACROBLOCKD *xd,
   uint8_t uintrapredictor[256];
   uint8_t vintrapredictor[256];
   vp9_build_intra_predictors(
-      xd->dst.u_buffer, xd->dst.uv_stride,
+      xd->plane[1].dst.buf, xd->plane[1].dst.stride,
       uintrapredictor, 16,
       xd->mode_info_context->mbmi.interintra_uv_mode, 16, 16,
       xd->up_available, xd->left_available, xd->right_available);
   vp9_build_intra_predictors(
-      xd->dst.v_buffer, xd->dst.uv_stride,
+      xd->plane[2].dst.buf, xd->plane[1].dst.stride,
       vintrapredictor, 16,
       xd->mode_info_context->mbmi.interintra_uv_mode, 16, 16,
       xd->up_available, xd->left_available, xd->right_available);
@@ -689,7 +689,7 @@ void vp9_build_interintra_64x64_predictors_sby(MACROBLOCKD *xd,
                                                int ystride) {
   uint8_t intrapredictor[4096];
   const int mode = xd->mode_info_context->mbmi.interintra_mode;
-  vp9_build_intra_predictors(xd->dst.y_buffer, xd->dst.y_stride,
+  vp9_build_intra_predictors(xd->plane[0].dst.buf, xd->plane[0].dst.stride,
                              intrapredictor, 64, mode, 64, 64,
                              xd->up_available, xd->left_available,
                              xd->right_available);
@@ -704,11 +704,11 @@ void vp9_build_interintra_64x64_predictors_sbuv(MACROBLOCKD *xd,
   uint8_t uintrapredictor[1024];
   uint8_t vintrapredictor[1024];
   const int mode = xd->mode_info_context->mbmi.interintra_uv_mode;
-  vp9_build_intra_predictors(xd->dst.u_buffer, xd->dst.uv_stride,
+  vp9_build_intra_predictors(xd->plane[1].dst.buf, xd->plane[1].dst.stride,
                              uintrapredictor, 32, mode, 32, 32,
                              xd->up_available, xd->left_available,
                              xd->right_available);
-  vp9_build_intra_predictors(xd->dst.v_buffer, xd->dst.uv_stride,
+  vp9_build_intra_predictors(xd->plane[2].dst.buf, xd->plane[1].dst.stride,
                              vintrapredictor, 32, mode, 32, 32,
                              xd->up_available, xd->left_available,
                              xd->right_available);
@@ -734,8 +734,8 @@ void vp9_build_intra_predictors_sby_s(MACROBLOCKD *xd,
   const int bwl = b_width_log2(bsize),  bw = 4 << bwl;
   const int bhl = b_height_log2(bsize), bh = 4 << bhl;
 
-  vp9_build_intra_predictors(xd->dst.y_buffer, xd->dst.y_stride,
-                             xd->dst.y_buffer, xd->dst.y_stride,
+  vp9_build_intra_predictors(xd->plane[0].dst.buf, xd->plane[0].dst.stride,
+                             xd->plane[0].dst.buf, xd->plane[0].dst.stride,
                              xd->mode_info_context->mbmi.mode,
                              bw, bh,
                              xd->up_available, xd->left_available,
@@ -747,13 +747,13 @@ void vp9_build_intra_predictors_sbuv_s(MACROBLOCKD *xd,
   const int bwl = b_width_log2(bsize)  - 1, bw = 4 << bwl;
   const int bhl = b_height_log2(bsize) - 1, bh = 4 << bhl;
 
-  vp9_build_intra_predictors(xd->dst.u_buffer, xd->dst.uv_stride,
-                             xd->dst.u_buffer, xd->dst.uv_stride,
+  vp9_build_intra_predictors(xd->plane[1].dst.buf, xd->plane[1].dst.stride,
+                             xd->plane[1].dst.buf, xd->plane[1].dst.stride,
                              xd->mode_info_context->mbmi.uv_mode,
                              bw, bh, xd->up_available,
                              xd->left_available, xd->right_available);
-  vp9_build_intra_predictors(xd->dst.v_buffer, xd->dst.uv_stride,
-                             xd->dst.v_buffer, xd->dst.uv_stride,
+  vp9_build_intra_predictors(xd->plane[2].dst.buf, xd->plane[1].dst.stride,
+                             xd->plane[2].dst.buf, xd->plane[1].dst.stride,
                              xd->mode_info_context->mbmi.uv_mode,
                              bw, bh, xd->up_available,
                              xd->left_available, xd->right_available);
diff --git a/vp9/common/x86/vp9_recon_wrapper_sse2.c b/vp9/common/x86/vp9_recon_wrapper_sse2.c
index 12d2f970c..97148fbb8 100644
--- a/vp9/common/x86/vp9_recon_wrapper_sse2.c
+++ b/vp9/common/x86/vp9_recon_wrapper_sse2.c
@@ -35,7 +35,7 @@ static void build_intra_predictors_mbuv_x86(MACROBLOCKD *xd,
                                             build_intra_pred_mbuv_fn_t ho_fn) {
   int mode = xd->mode_info_context->mbmi.uv_mode;
   build_intra_pred_mbuv_fn_t fn;
-  int src_stride = xd->dst.uv_stride;
+  int src_stride = xd->plane[1].dst.stride;
 
   switch (mode) {
     case  V_PRED:
@@ -68,34 +68,34 @@ static void build_intra_predictors_mbuv_x86(MACROBLOCKD *xd,
       return;
   }
 
-  fn(dst_u, dst_stride, xd->dst.u_buffer, src_stride);
-  fn(dst_v, dst_stride, xd->dst.v_buffer, src_stride);
+  fn(dst_u, dst_stride, xd->plane[1].dst.buf, src_stride);
+  fn(dst_v, dst_stride, xd->plane[2].dst.buf, src_stride);
 }
 
 void vp9_build_intra_predictors_mbuv_sse2(MACROBLOCKD *xd) {
-  build_intra_predictors_mbuv_x86(xd, xd->dst.u_buffer,
-                                  xd->dst.v_buffer, xd->dst.uv_stride,
+  build_intra_predictors_mbuv_x86(xd, xd->plane[1].dst.buf,
+                                  xd->plane[2].dst.buf, xd->plane[1].dst.stride,
                                   vp9_intra_pred_uv_tm_sse2,
                                   vp9_intra_pred_uv_ho_mmx2);
 }
 
 void vp9_build_intra_predictors_mbuv_ssse3(MACROBLOCKD *xd) {
-  build_intra_predictors_mbuv_x86(xd, xd->dst.u_buffer,
-                                  xd->dst.v_buffer, xd->dst.uv_stride,
+  build_intra_predictors_mbuv_x86(xd, xd->plane[1].dst.buf,
+                                  xd->plane[2].dst.buf, xd->plane[1].dst.stride,
                                   vp9_intra_pred_uv_tm_ssse3,
                                   vp9_intra_pred_uv_ho_ssse3);
 }
 
 void vp9_build_intra_predictors_mbuv_s_sse2(MACROBLOCKD *xd) {
-  build_intra_predictors_mbuv_x86(xd, xd->dst.u_buffer,
-                                  xd->dst.v_buffer, xd->dst.uv_stride,
+  build_intra_predictors_mbuv_x86(xd, xd->plane[1].dst.buf,
+                                  xd->plane[2].dst.buf, xd->plane[1].dst.stride,
                                   vp9_intra_pred_uv_tm_sse2,
                                   vp9_intra_pred_uv_ho_mmx2);
 }
 
 void vp9_build_intra_predictors_mbuv_s_ssse3(MACROBLOCKD *xd) {
-  build_intra_predictors_mbuv_x86(xd, xd->dst.u_buffer,
-                                  xd->dst.v_buffer, xd->dst.uv_stride,
+  build_intra_predictors_mbuv_x86(xd, xd->plane[1].dst.buf,
+                                  xd->plane[2].dst.buf, xd->plane[1].dst.stride,
                                   vp9_intra_pred_uv_tm_ssse3,
                                   vp9_intra_pred_uv_ho_ssse3);
 }
diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c
index f8ef6c030..98378cfdb 100644
--- a/vp9/decoder/vp9_decodframe.c
+++ b/vp9/decoder/vp9_decodframe.c
@@ -250,15 +250,15 @@ static void decode_16x16(MACROBLOCKD *xd) {
   const TX_TYPE tx_type = get_tx_type_16x16(xd, 0);
 
   vp9_dequant_iht_add_16x16_c(tx_type, xd->plane[0].qcoeff,
-                              xd->block[0].dequant, xd->dst.y_buffer,
-                              xd->dst.y_stride, xd->plane[0].eobs[0]);
+                              xd->block[0].dequant, xd->plane[0].dst.buf,
+                              xd->plane[0].dst.stride, xd->plane[0].eobs[0]);
 
   vp9_dequant_idct_add_8x8(xd->plane[1].qcoeff, xd->block[16].dequant,
-                           xd->dst.u_buffer, xd->dst.uv_stride,
+                           xd->plane[1].dst.buf, xd->plane[1].dst.stride,
                            xd->plane[1].eobs[0]);
 
   vp9_dequant_idct_add_8x8(xd->plane[2].qcoeff, xd->block[20].dequant,
-                           xd->dst.v_buffer, xd->dst.uv_stride,
+                           xd->plane[2].dst.buf, xd->plane[1].dst.stride,
                            xd->plane[2].eobs[0]);
 }
 
@@ -275,7 +275,7 @@ static void decode_8x8(MACROBLOCKD *xd) {
       int16_t *q  = BLOCK_OFFSET(xd->plane[0].qcoeff, idx, 16);
       int16_t *dq = xd->block[0].dequant;
       uint8_t *dst = *(xd->block[ib].base_dst) + xd->block[ib].dst;
-      int stride = xd->dst.y_stride;
+      int stride = xd->plane[0].dst.stride;
       if (mode == I8X8_PRED) {
         BLOCKD *b = &xd->block[ib];
         int i8x8mode = b->bmi.as_mode.first;
@@ -287,8 +287,8 @@ static void decode_8x8(MACROBLOCKD *xd) {
     }
   } else {
     vp9_dequant_idct_add_y_block_8x8(xd->plane[0].qcoeff,
-                                     xd->block[0].dequant, xd->dst.y_buffer,
-                                     xd->dst.y_stride, xd);
+                                     xd->block[0].dequant, xd->plane[0].dst.buf,
+                                     xd->plane[0].dst.stride, xd);
   }
 
   // chroma
@@ -315,16 +315,16 @@ static void decode_8x8(MACROBLOCKD *xd) {
     }
   } else if (mode == SPLITMV) {
     xd->itxm_add_uv_block(xd->plane[1].qcoeff, xd->block[16].dequant,
-         xd->dst.u_buffer, xd->dst.uv_stride, xd->plane[1].eobs);
+         xd->plane[1].dst.buf, xd->plane[1].dst.stride, xd->plane[1].eobs);
     xd->itxm_add_uv_block(xd->plane[2].qcoeff, xd->block[16].dequant,
-         xd->dst.v_buffer, xd->dst.uv_stride, xd->plane[2].eobs);
+         xd->plane[2].dst.buf, xd->plane[1].dst.stride, xd->plane[2].eobs);
   } else {
     vp9_dequant_idct_add_8x8(xd->plane[1].qcoeff, xd->block[16].dequant,
-                             xd->dst.u_buffer, xd->dst.uv_stride,
+                             xd->plane[1].dst.buf, xd->plane[1].dst.stride,
                              xd->plane[1].eobs[0]);
 
     vp9_dequant_idct_add_8x8(xd->plane[2].qcoeff, xd->block[16].dequant,
-                             xd->dst.v_buffer, xd->dst.uv_stride,
+                             xd->plane[2].dst.buf, xd->plane[1].dst.stride,
                              xd->plane[2].eobs[0]);
   }
 }
@@ -396,27 +396,27 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd, vp9_reader *r) {
 #endif
     vp9_build_intra_predictors_sbuv_s(xd, BLOCK_SIZE_MB16X16);
     xd->itxm_add_uv_block(xd->plane[1].qcoeff, xd->block[16].dequant,
-         xd->dst.u_buffer, xd->dst.uv_stride, xd->plane[1].eobs);
+         xd->plane[1].dst.buf, xd->plane[1].dst.stride, xd->plane[1].eobs);
     xd->itxm_add_uv_block(xd->plane[2].qcoeff, xd->block[16].dequant,
-         xd->dst.v_buffer, xd->dst.uv_stride, xd->plane[2].eobs);
+         xd->plane[2].dst.buf, xd->plane[1].dst.stride, xd->plane[2].eobs);
   } else if (mode == SPLITMV || get_tx_type_4x4(xd, 0) == DCT_DCT) {
     xd->itxm_add_y_block(xd->plane[0].qcoeff,
                           xd->block[0].dequant,
-                          xd->dst.y_buffer, xd->dst.y_stride, xd);
+                          xd->plane[0].dst.buf, xd->plane[0].dst.stride, xd);
     xd->itxm_add_uv_block(xd->plane[1].qcoeff, xd->block[16].dequant,
-         xd->dst.u_buffer, xd->dst.uv_stride, xd->plane[1].eobs);
+         xd->plane[1].dst.buf, xd->plane[1].dst.stride, xd->plane[1].eobs);
     xd->itxm_add_uv_block(xd->plane[2].qcoeff, xd->block[16].dequant,
-         xd->dst.v_buffer, xd->dst.uv_stride, xd->plane[2].eobs);
+         xd->plane[2].dst.buf, xd->plane[1].dst.stride, xd->plane[2].eobs);
   } else {
     for (i = 0; i < 16; i++) {
       tx_type = get_tx_type_4x4(xd, i);
       dequant_add_y(xd, tx_type, i);
     }
     xd->itxm_add_uv_block(xd->plane[1].qcoeff, xd->block[16].dequant,
-                          xd->dst.u_buffer, xd->dst.uv_stride,
+                          xd->plane[1].dst.buf, xd->plane[1].dst.stride,
                           xd->plane[1].eobs);
     xd->itxm_add_uv_block(xd->plane[2].qcoeff, xd->block[16].dequant,
-                          xd->dst.v_buffer, xd->dst.uv_stride,
+                          xd->plane[2].dst.buf, xd->plane[1].dst.stride,
                           xd->plane[2].eobs);
   }
 }
@@ -430,10 +430,11 @@ static INLINE void decode_sby_32x32(MACROBLOCKD *mb, BLOCK_SIZE_TYPE bsize) {
   for (n = 0; n < y_count; n++) {
     const int x_idx = n & (bw - 1);
     const int y_idx = n >> bwl;
-    const int y_offset = (y_idx * 32) * mb->dst.y_stride + (x_idx * 32);
+    const int y_offset = (y_idx * 32) * mb->plane[0].dst.stride + (x_idx * 32);
     vp9_dequant_idct_add_32x32(BLOCK_OFFSET(mb->plane[0].qcoeff, n, 1024),
                                mb->block[0].dequant ,
-                               mb->dst.y_buffer + y_offset, mb->dst.y_stride,
+                               mb->plane[0].dst.buf + y_offset,
+                               mb->plane[0].dst.stride,
                                mb->plane[0].eobs[n * 64]);
   }
 }
@@ -446,15 +447,18 @@ static INLINE void decode_sbuv_32x32(MACROBLOCKD *mb, BLOCK_SIZE_TYPE bsize) {
   for (n = 0; n < uv_count; n++) {
      const int x_idx = n & (bw - 1);
      const int y_idx = n >> (bwl - 1);
-     const int uv_offset = (y_idx * 32) * mb->dst.uv_stride + (x_idx * 32);
+     const int uv_offset = (y_idx * 32) * mb->plane[1].dst.stride +
+         (x_idx * 32);
      vp9_dequant_idct_add_32x32(BLOCK_OFFSET(mb->plane[1].qcoeff, n, 1024),
                                 mb->block[16].dequant,
-                                mb->dst.u_buffer + uv_offset,
-                                mb->dst.uv_stride, mb->plane[1].eobs[n * 64]);
+                                mb->plane[1].dst.buf + uv_offset,
+                                mb->plane[1].dst.stride,
+                                mb->plane[1].eobs[n * 64]);
      vp9_dequant_idct_add_32x32(BLOCK_OFFSET(mb->plane[2].qcoeff, n, 1024),
                                 mb->block[20].dequant,
-                                mb->dst.v_buffer + uv_offset,
-                                mb->dst.uv_stride, mb->plane[2].eobs[n * 64]);
+                                mb->plane[2].dst.buf + uv_offset,
+                                mb->plane[1].dst.stride,
+                                mb->plane[2].eobs[n * 64]);
   }
 }
 
@@ -467,14 +471,14 @@ static INLINE void decode_sby_16x16(MACROBLOCKD *mb, BLOCK_SIZE_TYPE bsize) {
   for (n = 0; n < y_count; n++) {
     const int x_idx = n & (bw - 1);
     const int y_idx = n >> bwl;
-    const int y_offset = (y_idx * 16) * mb->dst.y_stride + (x_idx * 16);
+    const int y_offset = (y_idx * 16) * mb->plane[0].dst.stride + (x_idx * 16);
     const TX_TYPE tx_type = get_tx_type_16x16(mb,
                                 (y_idx * (4 * bw) + x_idx) * 4);
     vp9_dequant_iht_add_16x16_c(tx_type,
                                 BLOCK_OFFSET(mb->plane[0].qcoeff, n, 256),
                                 mb->block[0].dequant,
-                                mb->dst.y_buffer + y_offset,
-                                mb->dst.y_stride,
+                                mb->plane[0].dst.buf + y_offset,
+                                mb->plane[0].dst.stride,
                                 mb->plane[0].eobs[n * 16]);
   }
 }
@@ -490,14 +494,16 @@ static INLINE void decode_sbuv_16x16(MACROBLOCKD *mb, BLOCK_SIZE_TYPE bsize) {
   for (n = 0; n < uv_count; n++) {
     const int x_idx = n & (bw - 1);
     const int y_idx = n >> (bwl - 1);
-    const int uv_offset = (y_idx * 16) * mb->dst.uv_stride + (x_idx * 16);
+    const int uv_offset = (y_idx * 16) * mb->plane[1].dst.stride + (x_idx * 16);
     vp9_dequant_idct_add_16x16(BLOCK_OFFSET(mb->plane[1].qcoeff, n, 256),
                                mb->block[16].dequant,
-                               mb->dst.u_buffer + uv_offset, mb->dst.uv_stride,
+                               mb->plane[1].dst.buf + uv_offset,
+                               mb->plane[1].dst.stride,
                                mb->plane[1].eobs[n * 16]);
     vp9_dequant_idct_add_16x16(BLOCK_OFFSET(mb->plane[2].qcoeff, n, 256),
                                mb->block[20].dequant,
-                               mb->dst.v_buffer + uv_offset, mb->dst.uv_stride,
+                               mb->plane[2].dst.buf + uv_offset,
+                               mb->plane[1].dst.stride,
                                mb->plane[2].eobs[n * 16]);
   }
 }
@@ -512,14 +518,15 @@ static INLINE void decode_sby_8x8(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) {
   for (n = 0; n < y_count; n++) {
     const int x_idx = n & (bw - 1);
     const int y_idx = n >> bwl;
-    const int y_offset = (y_idx * 8) * xd->dst.y_stride + (x_idx * 8);
+    const int y_offset = (y_idx * 8) * xd->plane[0].dst.stride + (x_idx * 8);
     const TX_TYPE tx_type = get_tx_type_8x8(xd,
                                             (y_idx * (2 * bw) + x_idx) * 2);
 
     vp9_dequant_iht_add_8x8_c(tx_type,
                               BLOCK_OFFSET(xd->plane[0].qcoeff, n, 64),
                               xd->block[0].dequant,
-                              xd->dst.y_buffer + y_offset, xd->dst.y_stride,
+                              xd->plane[0].dst.buf + y_offset,
+                              xd->plane[0].dst.stride,
                               xd->plane[0].eobs[n * 4]);
   }
 }
@@ -534,14 +541,16 @@ static INLINE void decode_sbuv_8x8(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) {
   for (n = 0; n < uv_count; n++) {
     const int x_idx = n & (bw - 1);
     const int y_idx = n >> (bwl - 1);
-    const int uv_offset = (y_idx * 8) * xd->dst.uv_stride + (x_idx * 8);
+    const int uv_offset = (y_idx * 8) * xd->plane[1].dst.stride + (x_idx * 8);
     vp9_dequant_idct_add_8x8(BLOCK_OFFSET(xd->plane[1].qcoeff, n, 64),
                              xd->block[16].dequant,
-                             xd->dst.u_buffer + uv_offset, xd->dst.uv_stride,
+                             xd->plane[1].dst.buf + uv_offset,
+                             xd->plane[1].dst.stride,
                              xd->plane[1].eobs[n * 4]);
     vp9_dequant_idct_add_8x8(BLOCK_OFFSET(xd->plane[2].qcoeff, n, 64),
                              xd->block[20].dequant,
-                             xd->dst.v_buffer + uv_offset, xd->dst.uv_stride,
+                             xd->plane[2].dst.buf + uv_offset,
+                             xd->plane[1].dst.stride,
                              xd->plane[2].eobs[n * 4]);
   }
 }
@@ -555,18 +564,19 @@ static INLINE void decode_sby_4x4(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) {
   for (n = 0; n < y_count; n++) {
     const int x_idx = n & (bw - 1);
     const int y_idx = n >> bwl;
-    const int y_offset = (y_idx * 4) * xd->dst.y_stride + (x_idx * 4);
+    const int y_offset = (y_idx * 4) * xd->plane[0].dst.stride + (x_idx * 4);
     const TX_TYPE tx_type = get_tx_type_4x4(xd, n);
     if (tx_type == DCT_DCT) {
       xd->itxm_add(BLOCK_OFFSET(xd->plane[0].qcoeff, n, 16),
                    xd->block[0].dequant,
-                   xd->dst.y_buffer + y_offset, xd->dst.y_stride,
+                   xd->plane[0].dst.buf + y_offset, xd->plane[0].dst.stride,
                    xd->plane[0].eobs[n]);
     } else {
       vp9_dequant_iht_add_c(tx_type,
                             BLOCK_OFFSET(xd->plane[0].qcoeff, n, 16),
-                            xd->block[0].dequant, xd->dst.y_buffer + y_offset,
-                            xd->dst.y_stride, xd->plane[0].eobs[n]);
+                            xd->block[0].dequant,
+                            xd->plane[0].dst.buf + y_offset,
+                            xd->plane[0].dst.stride, xd->plane[0].eobs[n]);
     }
   }
 }
@@ -580,13 +590,15 @@ static INLINE void decode_sbuv_4x4(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) {
   for (n = 0; n < uv_count; n++) {
     const int x_idx = n & (bw - 1);
     const int y_idx = n >> (bwl - 1);
-    const int uv_offset = (y_idx * 4) * xd->dst.uv_stride + (x_idx * 4);
+    const int uv_offset = (y_idx * 4) * xd->plane[1].dst.stride + (x_idx * 4);
     xd->itxm_add(BLOCK_OFFSET(xd->plane[1].qcoeff, n, 16),
         xd->block[16].dequant,
-        xd->dst.u_buffer + uv_offset, xd->dst.uv_stride, xd->plane[1].eobs[n]);
+        xd->plane[1].dst.buf + uv_offset, xd->plane[1].dst.stride,
+        xd->plane[1].eobs[n]);
     xd->itxm_add(BLOCK_OFFSET(xd->plane[2].qcoeff, n, 16),
         xd->block[20].dequant,
-        xd->dst.v_buffer + uv_offset, xd->dst.uv_stride, xd->plane[2].eobs[n]);
+        xd->plane[2].dst.buf + uv_offset, xd->plane[1].dst.stride,
+        xd->plane[2].eobs[n]);
   }
 }
 
@@ -758,21 +770,21 @@ static void decode_mb(VP9D_COMP *pbi, MACROBLOCKD *xd,
     printf("final y\n");
     for (i = 0; i < 16; i++) {
       for (j = 0; j < 16; j++)
-        printf("%3d ", xd->dst.y_buffer[i * xd->dst.y_stride + j]);
+        printf("%3d ", xd->plane[0].dst.buf[i * xd->plane[0].dst.stride + j]);
       printf("\n");
     }
     printf("\n");
     printf("final u\n");
     for (i = 0; i < 8; i++) {
       for (j = 0; j < 8; j++)
-        printf("%3d ", xd->dst.u_buffer[i * xd->dst.uv_stride + j]);
+        printf("%3d ", xd->plane[1].dst.buf[i * xd->plane[1].dst.stride + j]);
       printf("\n");
     }
     printf("\n");
     printf("final v\n");
     for (i = 0; i < 8; i++) {
       for (j = 0; j < 8; j++)
-        printf("%3d ", xd->dst.v_buffer[i * xd->dst.uv_stride + j]);
+        printf("%3d ", xd->plane[2].dst.buf[i * xd->plane[1].dst.stride + j]);
       printf("\n");
     }
     fflush(stdout);
@@ -820,9 +832,9 @@ static void set_offsets(VP9D_COMP *pbi, BLOCK_SIZE_TYPE bsize,
   set_mb_row(cm, xd, mb_row, bh);
   set_mb_col(cm, xd, mb_col, bw);
 
-  xd->dst.y_buffer = dst_fb->y_buffer + recon_yoffset;
-  xd->dst.u_buffer = dst_fb->u_buffer + recon_uvoffset;
-  xd->dst.v_buffer = dst_fb->v_buffer + recon_uvoffset;
+  xd->plane[0].dst.buf = dst_fb->y_buffer + recon_yoffset;
+  xd->plane[1].dst.buf = dst_fb->u_buffer + recon_uvoffset;
+  xd->plane[2].dst.buf = dst_fb->v_buffer + recon_uvoffset;
 }
 
 static void set_refs(VP9D_COMP *pbi, int mb_row, int mb_col) {
@@ -1651,8 +1663,7 @@ int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) {
   // Initialize xd pointers. Any reference should do for xd->pre, so use 0.
   vpx_memcpy(&xd->pre, &pc->yv12_fb[pc->active_ref_idx[0]],
              sizeof(YV12_BUFFER_CONFIG));
-  vpx_memcpy(&xd->dst, &pc->yv12_fb[pc->new_fb_idx],
-             sizeof(YV12_BUFFER_CONFIG));
+  setup_dst_planes(xd, &pc->yv12_fb[pc->new_fb_idx], 0, 0);
 
   // Create the segmentation map structure and set to 0
   if (!pc->last_frame_seg_map)
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index dc5e1299e..c563e8e33 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -278,7 +278,7 @@ static void build_activity_map(VP9_COMP *cpi) {
     // for each macroblock col in image
     for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) {
 #if ALT_ACT_MEASURE
-      xd->dst.y_buffer = new_yv12->y_buffer + recon_yoffset;
+      xd->plane[0].dst.buf = new_yv12->y_buffer + recon_yoffset;
       xd->left_available = (mb_col != 0);
       recon_yoffset += 16;
 #endif
@@ -305,8 +305,8 @@ static void build_activity_map(VP9_COMP *cpi) {
 
 #if ALT_ACT_MEASURE
     // extend the recon for intra prediction
-    vp9_extend_mb_row(new_yv12, xd->dst.y_buffer + 16,
-                      xd->dst.u_buffer + 8, xd->dst.v_buffer + 8);
+    vp9_extend_mb_row(new_yv12, xd->plane[0].dst.buf + 16,
+                      xd->plane[1].dst.buf + 8, xd->plane[2].dst.buf + 8);
 #endif
 
   }
@@ -565,9 +565,7 @@ static void set_offsets(VP9_COMP *cpi,
   xd->prev_mode_info_context = cm->prev_mi + idx_str;
 
   // Set up destination pointers
-  setup_pred_block(&xd->dst,
-                   &cm->yv12_fb[dst_fb_idx],
-                   mb_row, mb_col, NULL, NULL);
+  setup_dst_planes(xd, &cm->yv12_fb[dst_fb_idx], mb_row, mb_col);
 
   /* Set up limit values for MV components to prevent them from
    * extending beyond the UMV borders assuming 16x16 block size */
@@ -1244,7 +1242,7 @@ static void init_encode_frame_mb_context(VP9_COMP *cpi) {
   // Copy data over into macro block data structures.
   x->src = *cpi->Source;
   xd->pre = cm->yv12_fb[cm->ref_frame_map[cpi->lst_fb_idx]];
-  xd->dst = cm->yv12_fb[cm->new_fb_idx];
+  setup_dst_planes(xd, &cm->yv12_fb[cm->new_fb_idx], 0, 0);
 
   // set up frame for intra coded blocks
   vp9_setup_intra_recon(&cm->yv12_fb[cm->new_fb_idx]);
@@ -2131,11 +2129,11 @@ static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t,
 #if CONFIG_COMP_INTERINTRA_PRED
       if (xd->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME) {
         vp9_build_interintra_16x16_predictors_mb(xd,
-                                                 xd->dst.y_buffer,
-                                                 xd->dst.u_buffer,
-                                                 xd->dst.v_buffer,
-                                                 xd->dst.y_stride,
-                                                 xd->dst.uv_stride);
+                                                 xd->plane[0].dst.buf,
+                                                 xd->plane[1].dst.buf,
+                                                 xd->plane[2].dst.buf,
+                                                 xd->plane[0].dst.stride,
+                                                 xd->plane[1].dst.stride);
       }
 #endif
     }
@@ -2173,21 +2171,21 @@ static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t,
       printf("final y\n");
       for (i = 0; i < 16; i++) {
         for (j = 0; j < 16; j++)
-          printf("%3d ", xd->dst.y_buffer[i * xd->dst.y_stride + j]);
+          printf("%3d ", xd->plane[0].dst.buf[i * xd->plane[0].dst.stride + j]);
         printf("\n");
       }
       printf("\n");
       printf("final u\n");
       for (i = 0; i < 8; i++) {
         for (j = 0; j < 8; j++)
-          printf("%3d ", xd->dst.u_buffer[i * xd->dst.uv_stride + j]);
+          printf("%3d ", xd->plane[1].dst.buf[i * xd->plane[1].dst.stride + j]);
         printf("\n");
       }
       printf("\n");
       printf("final v\n");
       for (i = 0; i < 8; i++) {
         for (j = 0; j < 8; j++)
-          printf("%3d ", xd->dst.v_buffer[i * xd->dst.uv_stride + j]);
+          printf("%3d ", xd->plane[2].dst.buf[i * xd->plane[1].dst.stride + j]);
         printf("\n");
       }
       fflush(stdout);
@@ -2245,13 +2243,13 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t,
   MACROBLOCK *const x = &cpi->mb;
   MACROBLOCKD *const xd = &x->e_mbd;
   const uint8_t *src = x->src.y_buffer;
-  uint8_t *dst = xd->dst.y_buffer;
+  uint8_t *dst = xd->plane[0].dst.buf;
   const uint8_t *usrc = x->src.u_buffer;
-  uint8_t *udst = xd->dst.u_buffer;
+  uint8_t *udst = xd->plane[1].dst.buf;
   const uint8_t *vsrc = x->src.v_buffer;
-  uint8_t *vdst = xd->dst.v_buffer;
-  int src_y_stride = x->src.y_stride, dst_y_stride = xd->dst.y_stride;
-  int src_uv_stride = x->src.uv_stride, dst_uv_stride = xd->dst.uv_stride;
+  uint8_t *vdst = xd->plane[2].dst.buf;
+  int src_y_stride = x->src.y_stride, dst_y_stride = xd->plane[0].dst.stride;
+  int src_uv_stride = x->src.uv_stride, dst_uv_stride = xd->plane[1].dst.stride;
   int n;
   MODE_INFO *mi = x->e_mbd.mode_info_context;
   unsigned int segment_id = mi->mbmi.segment_id;
diff --git a/vp9/encoder/vp9_encodeintra.c b/vp9/encoder/vp9_encodeintra.c
index 3c100f2d7..95befb72d 100644
--- a/vp9/encoder/vp9_encodeintra.c
+++ b/vp9/encoder/vp9_encodeintra.c
@@ -88,7 +88,7 @@ void vp9_encode_intra16x16mby(VP9_COMMON *const cm, MACROBLOCK *x) {
   vp9_build_intra_predictors_sby_s(xd, BLOCK_SIZE_MB16X16);
   vp9_subtract_sby_s_c(x->src_diff,
                        x->src.y_buffer, x->src.y_stride,
-                       xd->dst.y_buffer, xd->dst.y_stride,
+                       xd->plane[0].dst.buf, xd->plane[0].dst.stride,
                        BLOCK_SIZE_MB16X16);
 
   switch (tx_size) {
@@ -125,7 +125,8 @@ void vp9_encode_intra16x16mbuv(VP9_COMMON *const cm, MACROBLOCK *x) {
   vp9_build_intra_predictors_sbuv_s(xd, BLOCK_SIZE_MB16X16);
   vp9_subtract_sbuv_s_c(x->src_diff,
                         x->src.u_buffer, x->src.v_buffer, x->src.uv_stride,
-                        xd->dst.u_buffer, xd->dst.v_buffer, xd->dst.uv_stride,
+                        xd->plane[1].dst.buf, xd->plane[2].dst.buf,
+                        xd->plane[1].dst.stride,
                         BLOCK_SIZE_MB16X16);
 
   switch (tx_size) {
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c
index 4d4b4249f..ea19fbfa8 100644
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -107,11 +107,12 @@ void vp9_subtract_sbuv_s_c(int16_t *diff, const uint8_t *usrc,
 static void subtract_mb(MACROBLOCK *x) {
   MACROBLOCKD *xd = &x->e_mbd;
   vp9_subtract_sby_s_c(x->src_diff, x->src.y_buffer, x->src.y_stride,
-                       xd->dst.y_buffer, xd->dst.y_stride,
+                       xd->plane[0].dst.buf, xd->plane[0].dst.stride,
                        BLOCK_SIZE_MB16X16);
   vp9_subtract_sbuv_s_c(x->src_diff, x->src.u_buffer, x->src.v_buffer,
                         x->src.uv_stride,
-                        xd->dst.u_buffer, xd->dst.v_buffer, xd->dst.uv_stride,
+                        xd->plane[1].dst.buf, xd->plane[2].dst.buf,
+                        xd->plane[1].dst.stride,
                         BLOCK_SIZE_MB16X16);
 }
 
@@ -913,10 +914,11 @@ void vp9_encode_inter16x16(VP9_COMMON *const cm, MACROBLOCK *x,
 void vp9_encode_inter16x16y(MACROBLOCK *x, int mb_row, int mb_col) {
   MACROBLOCKD *xd = &x->e_mbd;
 
-  vp9_build_inter_predictors_sby(xd, xd->dst.y_buffer, xd->dst.y_stride,
+  vp9_build_inter_predictors_sby(xd, xd->plane[0].dst.buf,
+                                 xd->plane[0].dst.stride,
                                  mb_row, mb_col, BLOCK_SIZE_MB16X16);
   vp9_subtract_sby_s_c(x->src_diff, x->src.y_buffer, x->src.y_stride,
-                       xd->dst.y_buffer, xd->dst.y_stride,
+                       xd->plane[0].dst.buf, xd->plane[0].dst.stride,
                        BLOCK_SIZE_MB16X16);
 
   vp9_transform_sby_4x4(x, BLOCK_SIZE_MB16X16);
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index f2e004608..91d7f055f 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -32,6 +32,8 @@
 #include "vp9/common/vp9_entropymv.h"
 #include "vp9/encoder/vp9_encodemv.h"
 #include "./vpx_scale_rtcd.h"
+// TODO(jkoleszar): for setup_dst_planes
+#include "vp9/common/vp9_reconinter.h"
 
 #define OUTPUT_FPF 0
 
@@ -484,7 +486,7 @@ void vp9_first_pass(VP9_COMP *cpi) {
 
   x->src = * cpi->Source;
   xd->pre = *lst_yv12;
-  xd->dst = *new_yv12;
+  setup_dst_planes(xd, new_yv12, 0, 0);
 
   x->partition_info = x->pi;
 
@@ -533,9 +535,9 @@ void vp9_first_pass(VP9_COMP *cpi) {
       int use_dc_pred = (mb_col || mb_row) && (!mb_col || !mb_row);
 
       set_mb_col(cm, xd, mb_col, 1 << mb_height_log2(BLOCK_SIZE_MB16X16));
-      xd->dst.y_buffer = new_yv12->y_buffer + recon_yoffset;
-      xd->dst.u_buffer = new_yv12->u_buffer + recon_uvoffset;
-      xd->dst.v_buffer = new_yv12->v_buffer + recon_uvoffset;
+      xd->plane[0].dst.buf = new_yv12->y_buffer + recon_yoffset;
+      xd->plane[1].dst.buf = new_yv12->u_buffer + recon_uvoffset;
+      xd->plane[2].dst.buf = new_yv12->v_buffer + recon_uvoffset;
       xd->left_available = (mb_col != 0);
 
       // do intra 16x16 prediction
@@ -700,8 +702,8 @@ void vp9_first_pass(VP9_COMP *cpi) {
     x->src.v_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols;
 
     // extend the recon for intra prediction
-    vp9_extend_mb_row(new_yv12, xd->dst.y_buffer + 16,
-                      xd->dst.u_buffer + 8, xd->dst.v_buffer + 8);
+    vp9_extend_mb_row(new_yv12, xd->plane[0].dst.buf + 16,
+                      xd->plane[1].dst.buf + 8, xd->plane[2].dst.buf + 8);
     vp9_clear_system_state();  // __asm emms;
   }
 
diff --git a/vp9/encoder/vp9_mbgraph.c b/vp9/encoder/vp9_mbgraph.c
index e9da395bc..9784d2df8 100644
--- a/vp9/encoder/vp9_mbgraph.c
+++ b/vp9/encoder/vp9_mbgraph.c
@@ -71,10 +71,12 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi,
   }
 
   vp9_set_mbmode_and_mvs(x, NEWMV, dst_mv);
-  vp9_build_inter_predictors_sby(xd, xd->dst.y_buffer, xd->dst.y_stride,
+  vp9_build_inter_predictors_sby(xd, xd->plane[0].dst.buf,
+                                 xd->plane[0].dst.stride,
                                  mb_row, mb_col, BLOCK_SIZE_MB16X16);
   best_err = vp9_sad16x16(x->src.y_buffer, x->src.y_stride,
-                          xd->dst.y_buffer, xd->dst.y_stride, INT_MAX);
+                          xd->plane[0].dst.buf, xd->plane[0].dst.stride,
+                          INT_MAX);
 
   /* restore UMV window */
   x->mv_col_min = tmp_col_min;
@@ -189,13 +191,13 @@ static int find_best_16x16_intra
 
     xd->mode_info_context->mbmi.mode = mode;
     vp9_build_intra_predictors(x->src.y_buffer, x->src.y_stride,
-                               xd->dst.y_buffer, xd->dst.y_stride,
+                               xd->plane[0].dst.buf, xd->plane[0].dst.stride,
                                xd->mode_info_context->mbmi.mode,
                                bw, bh,
                                xd->up_available, xd->left_available,
                                xd->right_available);
     err = vp9_sad16x16(x->src.y_buffer, x->src.y_stride,
-                       xd->dst.y_buffer, xd->dst.y_stride, best_err);
+                       xd->plane[0].dst.buf, xd->plane[0].dst.stride, best_err);
 
     // find best
     if (err < best_err) {
@@ -234,8 +236,8 @@ static void update_mbgraph_mb_stats
   x->src.y_buffer = buf->y_buffer + mb_y_offset;
   x->src.y_stride = buf->y_stride;
 
-  xd->dst.y_buffer = cm->yv12_fb[cm->new_fb_idx].y_buffer + mb_y_offset;
-  xd->dst.y_stride = cm->yv12_fb[cm->new_fb_idx].y_stride;
+  xd->plane[0].dst.buf = cm->yv12_fb[cm->new_fb_idx].y_buffer + mb_y_offset;
+  xd->plane[0].dst.stride = cm->yv12_fb[cm->new_fb_idx].y_stride;
 
   // do intra 16x16 prediction
   intra_error = find_best_16x16_intra(cpi, buf, mb_y_offset,
@@ -303,9 +305,9 @@ static void update_mbgraph_frame_stats
   x->mv_row_max     = (cm->mb_rows - 1) * 16 + VP9BORDERINPIXELS
                       - 16 - VP9_INTERP_EXTEND;
   xd->up_available  = 0;
-  xd->dst.y_stride  = buf->y_stride;
+  xd->plane[0].dst.stride  = buf->y_stride;
   xd->pre.y_stride  = buf->y_stride;
-  xd->dst.uv_stride = buf->uv_stride;
+  xd->plane[1].dst.stride = buf->uv_stride;
   xd->mode_info_context = &mi_local;
 
   for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) {
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index cf37626f9..b940e9849 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -835,8 +835,8 @@ static void super_block_yrd(VP9_COMP *cpi,
   VP9_COMMON *const cm = &cpi->common;
   MACROBLOCKD *const xd = &x->e_mbd;
   int r[TX_SIZE_MAX_SB][2], d[TX_SIZE_MAX_SB], s[TX_SIZE_MAX_SB];
-  uint8_t *src = x->src.y_buffer, *dst = xd->dst.y_buffer;
-  int src_y_stride = x->src.y_stride, dst_y_stride = xd->dst.y_stride;
+  uint8_t *src = x->src.y_buffer, *dst = xd->plane[0].dst.buf;
+  int src_y_stride = x->src.y_stride, dst_y_stride = xd->plane[0].dst.stride;
 
   vp9_subtract_sby_s_c(x->src_diff, src, src_y_stride, dst, dst_y_stride, bs);
 
@@ -1519,9 +1519,9 @@ static void super_block_uvrd(VP9_COMMON *const cm, MACROBLOCK *x,
                              BLOCK_SIZE_TYPE bsize) {
   MACROBLOCKD *const xd = &x->e_mbd;
   MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
-  uint8_t *usrc = x->src.u_buffer, *udst = xd->dst.u_buffer;
-  uint8_t *vsrc = x->src.v_buffer, *vdst = xd->dst.v_buffer;
-  int src_uv_stride = x->src.uv_stride, dst_uv_stride = xd->dst.uv_stride;
+  uint8_t *usrc = x->src.u_buffer, *udst = xd->plane[1].dst.buf;
+  uint8_t *vsrc = x->src.v_buffer, *vdst = xd->plane[2].dst.buf;
+  int src_uv_stride = x->src.uv_stride, dst_uv_stride = xd->plane[1].dst.stride;
 
   vp9_subtract_sbuv_s_c(x->src_diff, usrc, vsrc, src_uv_stride,
                         udst, vdst, dst_uv_stride, bsize);
@@ -3035,7 +3035,8 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
         int tmp_dist_y, tmp_dist_u, tmp_dist_v;
         vp9_build_inter_predictors_sb(xd, mb_row, mb_col, bsize);
         var = cpi->fn_ptr[block_size].vf(*(b->base_src), b->src_stride,
-                                         xd->dst.y_buffer, xd->dst.y_stride,
+                                         xd->plane[0].dst.buf,
+                                         xd->plane[0].dst.stride,
                                          &sse);
         // Note our transform coeffs are 8 times an orthogonal transform.
         // Hence quantizer step is also 8 times. To get effective quantizer
@@ -3044,13 +3045,15 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
                                  xd->block[0].dequant[1] >> 3,
                                  &tmp_rate_y, &tmp_dist_y);
         var = cpi->fn_ptr[uv_block_size].vf(x->src.u_buffer, x->src.uv_stride,
-                                            xd->dst.u_buffer, xd->dst.uv_stride,
+                                            xd->plane[1].dst.buf,
+                                            xd->plane[1].dst.stride,
                                             &sse);
         model_rd_from_var_lapndz(var, 8 * bw * 8 * bh,
                                  xd->block[16].dequant[1] >> 3,
                                  &tmp_rate_u, &tmp_dist_u);
         var = cpi->fn_ptr[uv_block_size].vf(x->src.v_buffer, x->src.uv_stride,
-                                            xd->dst.v_buffer, xd->dst.uv_stride,
+                                            xd->plane[2].dst.buf,
+                                            xd->plane[1].dst.stride,
                                             &sse);
         model_rd_from_var_lapndz(var, 8 * bw * 8 * bh,
                                  xd->block[20].dequant[1] >> 3,
@@ -3079,15 +3082,15 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
         int i;
         for (i = 0; i < 16 * bh; ++i)
           vpx_memcpy(tmp_ybuf + i * 16 * bw,
-                     xd->dst.y_buffer + i * xd->dst.y_stride,
+                     xd->plane[0].dst.buf + i * xd->plane[0].dst.stride,
                      sizeof(unsigned char) * 16 * bw);
         for (i = 0; i < 8 * bh; ++i)
           vpx_memcpy(tmp_ubuf + i * 8 * bw,
-                     xd->dst.u_buffer + i * xd->dst.uv_stride,
+                     xd->plane[1].dst.buf + i * xd->plane[1].dst.stride,
                      sizeof(unsigned char) * 8 * bw);
         for (i = 0; i < 8 * bh; ++i)
           vpx_memcpy(tmp_vbuf + i * 8 * bw,
-                     xd->dst.v_buffer + i * xd->dst.uv_stride,
+                     xd->plane[2].dst.buf + i * xd->plane[1].dst.stride,
                      sizeof(unsigned char) * 8 * bw);
         pred_exists = 1;
       }
@@ -3120,18 +3123,21 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
         int tmp_dist_y, tmp_dist_u, tmp_dist_v;
         vp9_build_inter_predictors_sb(xd, mb_row, mb_col, BLOCK_SIZE_MB16X16);
         var = vp9_variance16x16(*(b->base_src), b->src_stride,
-                                xd->dst.y_buffer, xd->dst.y_stride, &sse);
+                                xd->plane[0].dst.buf, xd->plane[0].dst.stride,
+                                &sse);
         // Note our transform coeffs are 8 times an orthogonal transform.
         // Hence quantizer step is also 8 times. To get effective quantizer
         // we need to divide by 8 before sending to modeling function.
         model_rd_from_var_lapndz(var, 16 * 16, xd->block[0].dequant[1] >> 3,
                                  &tmp_rate_y, &tmp_dist_y);
         var = vp9_variance8x8(x->src.u_buffer, x->src.uv_stride,
-                              xd->dst.u_buffer, xd->dst.uv_stride, &sse);
+                              xd->plane[1].dst.buf, xd->plane[1].dst.stride,
+                              &sse);
         model_rd_from_var_lapndz(var, 8 * 8, xd->block[16].dequant[1] >> 3,
                                  &tmp_rate_u, &tmp_dist_u);
         var = vp9_variance8x8(x->src.v_buffer, x->src.uv_stride,
-                              xd->dst.v_buffer, xd->dst.uv_stride, &sse);
+                              xd->plane[2].dst.buf, xd->plane[1].dst.stride,
+                              &sse);
         model_rd_from_var_lapndz(var, 8 * 8, xd->block[20].dequant[1] >> 3,
                                  &tmp_rate_v, &tmp_dist_v);
         rd = RDCOST(x->rdmult, x->rddiv,
@@ -3158,15 +3164,15 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
         int i;
         for (i = 0; i < 16 * bh; ++i)
           vpx_memcpy(tmp_ybuf + i * 16 * bw,
-                     xd->dst.y_buffer + i * xd->dst.y_stride,
+                     xd->plane[0].dst.buf + i * xd->plane[0].dst.stride,
                      sizeof(unsigned char) * 16 * bw);
         for (i = 0; i < 8 * bh; ++i)
           vpx_memcpy(tmp_ubuf + i * 8 * bw,
-                     xd->dst.u_buffer + i * xd->dst.uv_stride,
+                     xd->plane[1].dst.buf + i * xd->plane[1].dst.stride,
                      sizeof(unsigned char) * 8 * bw);
         for (i = 0; i < 8 * bh; ++i)
           vpx_memcpy(tmp_vbuf + i * 8 * bw,
-                     xd->dst.v_buffer + i * xd->dst.uv_stride,
+                     xd->plane[2].dst.buf + i * xd->plane[1].dst.stride,
                      sizeof(unsigned char) * 8 * bw);
         pred_exists = 1;
       }
@@ -3185,13 +3191,13 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
   if (pred_exists) {
     // FIXME(rbultje): mb code still predicts into xd->predictor
     for (i = 0; i < bh * 16; ++i)
-      vpx_memcpy(xd->dst.y_buffer + i * xd->dst.y_stride,
+      vpx_memcpy(xd->plane[0].dst.buf + i * xd->plane[0].dst.stride,
                  tmp_ybuf + i * bw * 16, sizeof(unsigned char) * bw * 16);
     for (i = 0; i < bh * 8; ++i)
-      vpx_memcpy(xd->dst.u_buffer + i * xd->dst.uv_stride,
+      vpx_memcpy(xd->plane[1].dst.buf + i * xd->plane[1].dst.stride,
                  tmp_ubuf + i * bw * 8, sizeof(unsigned char) * bw * 8);
     for (i = 0; i < bh * 8; ++i)
-      vpx_memcpy(xd->dst.v_buffer + i * xd->dst.uv_stride,
+      vpx_memcpy(xd->plane[2].dst.buf + i * xd->plane[1].dst.stride,
                  tmp_vbuf + i * bw * 8, sizeof(unsigned char) * bw * 8);
   } else {
     // Handles the special case when a filter that is not in the
@@ -3217,11 +3223,13 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
 
     if (bsize != BLOCK_SIZE_MB16X16) {
       var = cpi->fn_ptr[block_size].vf(*(b->base_src), b->src_stride,
-                                       xd->dst.y_buffer, xd->dst.y_stride,
+                                       xd->plane[0].dst.buf,
+                                       xd->plane[0].dst.stride,
                                        &sse);
     } else {
       var = vp9_variance16x16(*(b->base_src), b->src_stride,
-                              xd->dst.y_buffer, xd->dst.y_stride, &sse);
+                              xd->plane[0].dst.buf, xd->plane[0].dst.stride,
+                              &sse);
     }
 
     if ((int)sse < threshold) {
@@ -3237,18 +3245,20 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
           unsigned int sse2u, sse2v;
           // FIXME(rbultje): mb predictors predict into xd->predictor
           var = cpi->fn_ptr[uv_block_size].vf(x->src.u_buffer, x->src.uv_stride,
-                                              xd->dst.u_buffer,
-                                              xd->dst.uv_stride, &sse2u);
+                                              xd->plane[1].dst.buf,
+                                              xd->plane[1].dst.stride, &sse2u);
           var = cpi->fn_ptr[uv_block_size].vf(x->src.v_buffer, x->src.uv_stride,
-                                              xd->dst.v_buffer,
-                                              xd->dst.uv_stride, &sse2v);
+                                              xd->plane[2].dst.buf,
+                                              xd->plane[1].dst.stride, &sse2v);
           sse2 = sse2u + sse2v;
         } else {
           unsigned int sse2u, sse2v;
           var = vp9_variance8x8(x->src.u_buffer, x->src.uv_stride,
-                                xd->dst.u_buffer, xd->dst.uv_stride, &sse2u);
+                                xd->plane[1].dst.buf, xd->plane[1].dst.stride,
+                                &sse2u);
           var = vp9_variance8x8(x->src.v_buffer, x->src.uv_stride,
-                                xd->dst.v_buffer, xd->dst.uv_stride, &sse2v);
+                                xd->plane[2].dst.buf, xd->plane[1].dst.stride,
+                                &sse2v);
           sse2 = sse2u + sse2v;
         }
 
@@ -3764,8 +3774,8 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
         vp9_subtract_sbuv_s_c(x->src_diff,
                               x->src.u_buffer,
                               x->src.v_buffer, x->src.uv_stride,
-                              xd->dst.u_buffer,
-                              xd->dst.v_buffer, xd->dst.uv_stride,
+                              xd->plane[1].dst.buf,
+                              xd->plane[2].dst.buf, xd->plane[1].dst.stride,
                               BLOCK_SIZE_MB16X16);
 
         super_block_uvrd_4x4(cm, x, &rate_uv, &distortion_uv,
-- 
2.50.1