Implicit weighted prediction experiment

author Deb Mukherjee <debargha@google.com>

Tue, 12 Mar 2013 21:21:08 +0000 (14:21 -0700)

committer Deb Mukherjee <debargha@google.com>

Tue, 26 Mar 2013 23:58:56 +0000 (16:58 -0700)
author Deb Mukherjee <debargha@google.com>
Tue, 12 Mar 2013 21:21:08 +0000 (14:21 -0700)
committer Deb Mukherjee <debargha@google.com>
Tue, 26 Mar 2013 23:58:56 +0000 (16:58 -0700)
diff --git a/configure b/configure

index a791ae5418ead591330366ff35f10bfe775241ca..dea79dd23e3419f79352cab3ad0a9bbc154f3cc1 100755 (executable)
--- a/configure
+++ b/configure
@@ -249,6 +249,7 @@ EXPERIMENT_LIST="
      useselectrefmv
      modelcoefprob
      loop_dering
+    implicit_compoundinter_weight
  "
  CONFIG_LIST="
      external_build
diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h

index 3dfef41b2686968753616aeed7f0c968ae413474..23d0bfd593374c8a9970157d52aaaff244dfb1c5 100644 (file)
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -288,7 +288,11 @@ struct scale_factors {
    int y_den;
    int y_offset_q4;
    int y_step_q4;
+#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT
+  convolve_fn_t predict[2][2][8];  // horiz, vert, weight (0 - 7)
+#else
    convolve_fn_t predict[2][2][2];  // horiz, vert, avg
+#endif
  };
  
  typedef struct macroblockd {
diff --git a/vp9/common/vp9_convolve.c b/vp9/common/vp9_convolve.c

index b062e7dc7fe2a530f2a6b2a72e3099012aec0cc3..3ab8bec7a162d0b16c107dbf1b5dac9a67ed85bb 100644 (file)
--- a/vp9/common/vp9_convolve.c
+++ b/vp9/common/vp9_convolve.c
@@ -122,6 +122,78 @@ static void convolve_avg_horiz_c(const uint8_t *src, int src_stride,
    }
  }
  
+#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT
+
+static inline uint8_t combine_qtr(uint8_t a, uint8_t b) {
+  return (((a) + (b) * 3 + 2) >> 2);
+}
+
+static inline uint8_t combine_3qtr(uint8_t a, uint8_t b) {
+  return (((a) * 3 + (b) + 2) >> 2);
+}
+
+static inline uint8_t combine_1by8(uint8_t a, uint8_t b) {
+  return (((a) * 1 + (b) * 7 + 4) >> 3);
+}
+
+static inline uint8_t combine_3by8(uint8_t a, uint8_t b) {
+  return (((a) * 3 + (b) * 5 + 4) >> 3);
+}
+
+static inline uint8_t combine_5by8(uint8_t a, uint8_t b) {
+  return (((a) * 5 + (b) * 3 + 4) >> 3);
+}
+
+static inline uint8_t combine_7by8(uint8_t a, uint8_t b) {
+  return (((a) * 7 + (b) * 1 + 4) >> 3);
+}
+
+// TODO(debargha): Implment with a separate weight parameter
+static void convolve_wtd_horiz_c(const uint8_t *src, int src_stride,
+                                 uint8_t *dst, int dst_stride,
+                                 const int16_t *filter_x0, int x_step_q4,
+                                 const int16_t *filter_y, int y_step_q4,
+                                 int w, int h, int taps,
+                                 uint8_t (*combine)(uint8_t a, uint8_t b)) {
+  int x, y, k, sum;
+  const int16_t *filter_x_base = filter_x0;
+
+#if ALIGN_FILTERS_256
+  filter_x_base = (const int16_t *)(((intptr_t)filter_x0) & ~(intptr_t)0xff);
+#endif
+
+  /* Adjust base pointer address for this source line */
+  src -= taps / 2 - 1;
+
+  for (y = 0; y < h; ++y) {
+    /* Pointer to filter to use */
+    const int16_t *filter_x = filter_x0;
+
+    /* Initial phase offset */
+    int x0_q4 = (filter_x - filter_x_base) / taps;
+    int x_q4 = x0_q4;
+
+    for (x = 0; x < w; ++x) {
+      /* Per-pixel src offset */
+      int src_x = (x_q4 - x0_q4) >> 4;
+
+      for (sum = 0, k = 0; k < taps; ++k) {
+        sum += src[src_x + k] * filter_x[k];
+      }
+      sum += (VP9_FILTER_WEIGHT >> 1);
+      dst[x] = combine(dst[x], clip_pixel(sum >> VP9_FILTER_SHIFT));
+
+      /* Adjust source and filter to use for the next pixel */
+      x_q4 += x_step_q4;
+      filter_x = filter_x_base + (x_q4 & 0xf) * taps;
+    }
+    src += src_stride;
+    dst += dst_stride;
+  }
+}
+
+#endif
+
  static void convolve_vert_c(const uint8_t *src, int src_stride,
                              uint8_t *dst, int dst_stride,
                              const int16_t *filter_x, int x_step_q4,
@@ -207,6 +279,52 @@ static void convolve_avg_vert_c(const uint8_t *src, int src_stride,
    }
  }
  
+#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT
+static void convolve_wtd_vert_c(const uint8_t *src, int src_stride,
+                                uint8_t *dst, int dst_stride,
+                                const int16_t *filter_x, int x_step_q4,
+                                const int16_t *filter_y0, int y_step_q4,
+                                int w, int h, int taps,
+                                uint8_t (*combine)(uint8_t a, uint8_t b)) {
+  int x, y, k, sum;
+
+  const int16_t *filter_y_base = filter_y0;
+
+#if ALIGN_FILTERS_256
+  filter_y_base = (const int16_t *)(((intptr_t)filter_y0) & ~(intptr_t)0xff);
+#endif
+
+  /* Adjust base pointer address for this source column */
+  src -= src_stride * (taps / 2 - 1);
+  for (x = 0; x < w; ++x) {
+    /* Pointer to filter to use */
+    const int16_t *filter_y = filter_y0;
+
+    /* Initial phase offset */
+    int y0_q4 = (filter_y - filter_y_base) / taps;
+    int y_q4 = y0_q4;
+
+    for (y = 0; y < h; ++y) {
+      /* Per-pixel src offset */
+      int src_y = (y_q4 - y0_q4) >> 4;
+
+      for (sum = 0, k = 0; k < taps; ++k) {
+        sum += src[(src_y + k) * src_stride] * filter_y[k];
+      }
+      sum += (VP9_FILTER_WEIGHT >> 1);
+      dst[y * dst_stride] = combine(dst[y * dst_stride],
+                                    clip_pixel(sum >> VP9_FILTER_SHIFT));
+
+      /* Adjust source and filter to use for the next pixel */
+      y_q4 += y_step_q4;
+      filter_y = filter_y_base + (y_q4 & 0xf) * taps;
+    }
+    ++src;
+    ++dst;
+  }
+}
+#endif
+
  static void convolve_c(const uint8_t *src, int src_stride,
                         uint8_t *dst, int dst_stride,
                         const int16_t *filter_x, int x_step_q4,
@@ -285,6 +403,68 @@ void vp9_convolve8_avg_horiz_c(const uint8_t *src, int src_stride,
                         w, h, 8);
  }
  
+#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT
+void vp9_convolve8_1by8_horiz_c(const uint8_t *src, int src_stride,
+                                uint8_t *dst, int dst_stride,
+                                const int16_t *filter_x, int x_step_q4,
+                                const int16_t *filter_y, int y_step_q4,
+                                int w, int h) {
+  convolve_wtd_horiz_c(src, src_stride, dst, dst_stride,
+                       filter_x, x_step_q4, filter_y, y_step_q4,
+                       w, h, 8, combine_1by8);
+}
+
+void vp9_convolve8_qtr_horiz_c(const uint8_t *src, int src_stride,
+                               uint8_t *dst, int dst_stride,
+                               const int16_t *filter_x, int x_step_q4,
+                               const int16_t *filter_y, int y_step_q4,
+                               int w, int h) {
+  convolve_wtd_horiz_c(src, src_stride, dst, dst_stride,
+                       filter_x, x_step_q4, filter_y, y_step_q4,
+                       w, h, 8, combine_qtr);
+}
+
+void vp9_convolve8_3by8_horiz_c(const uint8_t *src, int src_stride,
+                                uint8_t *dst, int dst_stride,
+                                const int16_t *filter_x, int x_step_q4,
+                                const int16_t *filter_y, int y_step_q4,
+                                int w, int h) {
+  convolve_wtd_horiz_c(src, src_stride, dst, dst_stride,
+                       filter_x, x_step_q4, filter_y, y_step_q4,
+                       w, h, 8, combine_3by8);
+}
+
+void vp9_convolve8_5by8_horiz_c(const uint8_t *src, int src_stride,
+                                uint8_t *dst, int dst_stride,
+                                const int16_t *filter_x, int x_step_q4,
+                                const int16_t *filter_y, int y_step_q4,
+                                int w, int h) {
+  convolve_wtd_horiz_c(src, src_stride, dst, dst_stride,
+                       filter_x, x_step_q4, filter_y, y_step_q4,
+                       w, h, 8, combine_5by8);
+}
+
+void vp9_convolve8_3qtr_horiz_c(const uint8_t *src, int src_stride,
+                                uint8_t *dst, int dst_stride,
+                                const int16_t *filter_x, int x_step_q4,
+                                const int16_t *filter_y, int y_step_q4,
+                                int w, int h) {
+  convolve_wtd_horiz_c(src, src_stride, dst, dst_stride,
+                       filter_x, x_step_q4, filter_y, y_step_q4,
+                       w, h, 8, combine_3qtr);
+}
+
+void vp9_convolve8_7by8_horiz_c(const uint8_t *src, int src_stride,
+                                uint8_t *dst, int dst_stride,
+                                const int16_t *filter_x, int x_step_q4,
+                                const int16_t *filter_y, int y_step_q4,
+                                int w, int h) {
+  convolve_wtd_horiz_c(src, src_stride, dst, dst_stride,
+                       filter_x, x_step_q4, filter_y, y_step_q4,
+                       w, h, 8, combine_7by8);
+}
+#endif
+
  void vp9_convolve8_vert_c(const uint8_t *src, int src_stride,
                            uint8_t *dst, int dst_stride,
                            const int16_t *filter_x, int x_step_q4,
@@ -305,6 +485,68 @@ void vp9_convolve8_avg_vert_c(const uint8_t *src, int src_stride,
                        w, h, 8);
  }
  
+#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT
+void vp9_convolve8_1by8_vert_c(const uint8_t *src, int src_stride,
+                               uint8_t *dst, int dst_stride,
+                               const int16_t *filter_x, int x_step_q4,
+                               const int16_t *filter_y, int y_step_q4,
+                               int w, int h) {
+  convolve_wtd_vert_c(src, src_stride, dst, dst_stride,
+                      filter_x, x_step_q4, filter_y, y_step_q4,
+                      w, h, 8, combine_1by8);
+}
+
+void vp9_convolve8_qtr_vert_c(const uint8_t *src, int src_stride,
+                              uint8_t *dst, int dst_stride,
+                              const int16_t *filter_x, int x_step_q4,
+                              const int16_t *filter_y, int y_step_q4,
+                              int w, int h) {
+  convolve_wtd_vert_c(src, src_stride, dst, dst_stride,
+                      filter_x, x_step_q4, filter_y, y_step_q4,
+                      w, h, 8, combine_qtr);
+}
+
+void vp9_convolve8_3by8_vert_c(const uint8_t *src, int src_stride,
+                               uint8_t *dst, int dst_stride,
+                               const int16_t *filter_x, int x_step_q4,
+                               const int16_t *filter_y, int y_step_q4,
+                               int w, int h) {
+  convolve_wtd_vert_c(src, src_stride, dst, dst_stride,
+                      filter_x, x_step_q4, filter_y, y_step_q4,
+                      w, h, 8, combine_3by8);
+}
+
+void vp9_convolve8_5by8_vert_c(const uint8_t *src, int src_stride,
+                               uint8_t *dst, int dst_stride,
+                               const int16_t *filter_x, int x_step_q4,
+                               const int16_t *filter_y, int y_step_q4,
+                               int w, int h) {
+  convolve_wtd_vert_c(src, src_stride, dst, dst_stride,
+                      filter_x, x_step_q4, filter_y, y_step_q4,
+                      w, h, 8, combine_5by8);
+}
+
+void vp9_convolve8_3qtr_vert_c(const uint8_t *src, int src_stride,
+                               uint8_t *dst, int dst_stride,
+                               const int16_t *filter_x, int x_step_q4,
+                               const int16_t *filter_y, int y_step_q4,
+                               int w, int h) {
+  convolve_wtd_vert_c(src, src_stride, dst, dst_stride,
+                      filter_x, x_step_q4, filter_y, y_step_q4,
+                      w, h, 8, combine_3qtr);
+}
+
+void vp9_convolve8_7by8_vert_c(const uint8_t *src, int src_stride,
+                               uint8_t *dst, int dst_stride,
+                               const int16_t *filter_x, int x_step_q4,
+                               const int16_t *filter_y, int y_step_q4,
+                               int w, int h) {
+  convolve_wtd_vert_c(src, src_stride, dst, dst_stride,
+                      filter_x, x_step_q4, filter_y, y_step_q4,
+                      w, h, 8, combine_7by8);
+}
+#endif
+
  void vp9_convolve8_c(const uint8_t *src, int src_stride,
                       uint8_t *dst, int dst_stride,
                       const int16_t *filter_x, int x_step_q4,
@@ -337,6 +579,140 @@ void vp9_convolve8_avg_c(const uint8_t *src, int src_stride,
                     w, h);
  }
  
+#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT
+void vp9_convolve8_1by8_c(const uint8_t *src, int src_stride,
+                         uint8_t *dst, int dst_stride,
+                         const int16_t *filter_x, int x_step_q4,
+                         const int16_t *filter_y, int y_step_q4,
+                         int w, int h) {
+  /* Fixed size intermediate buffer places limits on parameters. */
+  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp, 16 * 16);
+  assert(w <= 16);
+  assert(h <= 16);
+
+  vp9_convolve8(src, src_stride,
+                temp, 16,
+                filter_x, x_step_q4,
+                filter_y, y_step_q4,
+                w, h);
+  vp9_convolve_1by8(temp, 16,
+                    dst, dst_stride,
+                    NULL, 0, /* These unused parameter should be removed! */
+                    NULL, 0, /* These unused parameter should be removed! */
+                    w, h);
+}
+
+void vp9_convolve8_qtr_c(const uint8_t *src, int src_stride,
+                         uint8_t *dst, int dst_stride,
+                         const int16_t *filter_x, int x_step_q4,
+                         const int16_t *filter_y, int y_step_q4,
+                         int w, int h) {
+  /* Fixed size intermediate buffer places limits on parameters. */
+  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp, 16 * 16);
+  assert(w <= 16);
+  assert(h <= 16);
+
+  vp9_convolve8(src, src_stride,
+                temp, 16,
+                filter_x, x_step_q4,
+                filter_y, y_step_q4,
+                w, h);
+  vp9_convolve_qtr(temp, 16,
+                   dst, dst_stride,
+                   NULL, 0, /* These unused parameter should be removed! */
+                   NULL, 0, /* These unused parameter should be removed! */
+                   w, h);
+}
+
+void vp9_convolve8_3by8_c(const uint8_t *src, int src_stride,
+                         uint8_t *dst, int dst_stride,
+                         const int16_t *filter_x, int x_step_q4,
+                         const int16_t *filter_y, int y_step_q4,
+                         int w, int h) {
+  /* Fixed size intermediate buffer places limits on parameters. */
+  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp, 16 * 16);
+  assert(w <= 16);
+  assert(h <= 16);
+
+  vp9_convolve8(src, src_stride,
+                temp, 16,
+                filter_x, x_step_q4,
+                filter_y, y_step_q4,
+                w, h);
+  vp9_convolve_3by8(temp, 16,
+                    dst, dst_stride,
+                    NULL, 0, /* These unused parameter should be removed! */
+                    NULL, 0, /* These unused parameter should be removed! */
+                    w, h);
+}
+
+void vp9_convolve8_5by8_c(const uint8_t *src, int src_stride,
+                         uint8_t *dst, int dst_stride,
+                         const int16_t *filter_x, int x_step_q4,
+                         const int16_t *filter_y, int y_step_q4,
+                         int w, int h) {
+  /* Fixed size intermediate buffer places limits on parameters. */
+  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp, 16 * 16);
+  assert(w <= 16);
+  assert(h <= 16);
+
+  vp9_convolve8(src, src_stride,
+                temp, 16,
+                filter_x, x_step_q4,
+                filter_y, y_step_q4,
+                w, h);
+  vp9_convolve_5by8(temp, 16,
+                    dst, dst_stride,
+                    NULL, 0, /* These unused parameter should be removed! */
+                    NULL, 0, /* These unused parameter should be removed! */
+                    w, h);
+}
+
+void vp9_convolve8_3qtr_c(const uint8_t *src, int src_stride,
+                          uint8_t *dst, int dst_stride,
+                          const int16_t *filter_x, int x_step_q4,
+                          const int16_t *filter_y, int y_step_q4,
+                          int w, int h) {
+  /* Fixed size intermediate buffer places limits on parameters. */
+  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp, 16 * 16);
+  assert(w <= 16);
+  assert(h <= 16);
+
+  vp9_convolve8(src, src_stride,
+                temp, 16,
+                filter_x, x_step_q4,
+                filter_y, y_step_q4,
+                w, h);
+  vp9_convolve_3qtr(temp, 16,
+                    dst, dst_stride,
+                    NULL, 0, /* These unused parameter should be removed! */
+                    NULL, 0, /* These unused parameter should be removed! */
+                    w, h);
+}
+
+void vp9_convolve8_7by8_c(const uint8_t *src, int src_stride,
+                         uint8_t *dst, int dst_stride,
+                         const int16_t *filter_x, int x_step_q4,
+                         const int16_t *filter_y, int y_step_q4,
+                         int w, int h) {
+  /* Fixed size intermediate buffer places limits on parameters. */
+  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp, 16 * 16);
+  assert(w <= 16);
+  assert(h <= 16);
+
+  vp9_convolve8(src, src_stride,
+                temp, 16,
+                filter_x, x_step_q4,
+                filter_y, y_step_q4,
+                w, h);
+  vp9_convolve_7by8(temp, 16,
+                    dst, dst_stride,
+                    NULL, 0, /* These unused parameter should be removed! */
+                    NULL, 0, /* These unused parameter should be removed! */
+                    w, h);
+}
+#endif
+
  void vp9_convolve_copy(const uint8_t *src, int src_stride,
                         uint8_t *dst, int dst_stride,
                         const int16_t *filter_x, int filter_x_stride,
@@ -374,3 +750,101 @@ void vp9_convolve_avg(const uint8_t *src, int src_stride,
      dst += dst_stride;
    }
  }
+
+#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT
+void vp9_convolve_1by8(const uint8_t *src, int src_stride,
+                       uint8_t *dst, int dst_stride,
+                       const int16_t *filter_x, int filter_x_stride,
+                       const int16_t *filter_y, int filter_y_stride,
+                       int w, int h) {
+  int x, y;
+
+  for (y = 0; y < h; ++y) {
+    for (x = 0; x < w; ++x) {
+      dst[x] = combine_1by8(dst[x], src[x]);
+    }
+    src += src_stride;
+    dst += dst_stride;
+  }
+}
+
+void vp9_convolve_qtr(const uint8_t *src, int src_stride,
+                      uint8_t *dst, int dst_stride,
+                      const int16_t *filter_x, int filter_x_stride,
+                      const int16_t *filter_y, int filter_y_stride,
+                      int w, int h) {
+  int x, y;
+
+  for (y = 0; y < h; ++y) {
+    for (x = 0; x < w; ++x) {
+      dst[x] = combine_qtr(dst[x], src[x]);
+    }
+    src += src_stride;
+    dst += dst_stride;
+  }
+}
+
+void vp9_convolve_3by8(const uint8_t *src, int src_stride,
+                       uint8_t *dst, int dst_stride,
+                       const int16_t *filter_x, int filter_x_stride,
+                       const int16_t *filter_y, int filter_y_stride,
+                       int w, int h) {
+  int x, y;
+
+  for (y = 0; y < h; ++y) {
+    for (x = 0; x < w; ++x) {
+      dst[x] = combine_3by8(dst[x], src[x]);
+    }
+    src += src_stride;
+    dst += dst_stride;
+  }
+}
+
+void vp9_convolve_5by8(const uint8_t *src, int src_stride,
+                       uint8_t *dst, int dst_stride,
+                       const int16_t *filter_x, int filter_x_stride,
+                       const int16_t *filter_y, int filter_y_stride,
+                       int w, int h) {
+  int x, y;
+
+  for (y = 0; y < h; ++y) {
+    for (x = 0; x < w; ++x) {
+      dst[x] = combine_5by8(dst[x], src[x]);
+    }
+    src += src_stride;
+    dst += dst_stride;
+  }
+}
+
+void vp9_convolve_3qtr(const uint8_t *src, int src_stride,
+                       uint8_t *dst, int dst_stride,
+                       const int16_t *filter_x, int filter_x_stride,
+                       const int16_t *filter_y, int filter_y_stride,
+                       int w, int h) {
+  int x, y;
+
+  for (y = 0; y < h; ++y) {
+    for (x = 0; x < w; ++x) {
+      dst[x] = combine_3qtr(dst[x], src[x]);
+    }
+    src += src_stride;
+    dst += dst_stride;
+  }
+}
+
+void vp9_convolve_7by8(const uint8_t *src, int src_stride,
+                       uint8_t *dst, int dst_stride,
+                       const int16_t *filter_x, int filter_x_stride,
+                       const int16_t *filter_y, int filter_y_stride,
+                       int w, int h) {
+  int x, y;
+
+  for (y = 0; y < h; ++y) {
+    for (x = 0; x < w; ++x) {
+      dst[x] = combine_7by8(dst[x], src[x]);
+    }
+    src += src_stride;
+    dst += dst_stride;
+  }
+}
+#endif
diff --git a/vp9/common/vp9_convolve.h b/vp9/common/vp9_convolve.h

index 8c485618730fb9765f91881c105d16b3748dfa6e..bef2d85641783ef1a14c4cccd493bb23d8033a19 100644 (file)
--- a/vp9/common/vp9_convolve.h
+++ b/vp9/common/vp9_convolve.h
@@ -10,6 +10,7 @@
  #ifndef VP9_COMMON_CONVOLVE_H_
  #define VP9_COMMON_CONVOLVE_H_
  
+#include "./vpx_config.h"
  #include "vpx/vpx_integer.h"
  
  typedef void (*convolve_fn_t)(const uint8_t *src, int src_stride,
@@ -32,6 +33,50 @@ void vp9_convolve_avg(const uint8_t *src, int src_stride,
                        const int16_t *filter_y, int y_step_q4,
                        int w, int h);
  
+#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT
+// Not a convolution, a block wtd (1/8, 7/8) average for (dst, src)
+void vp9_convolve_1by8(const uint8_t *src, int src_stride,
+                       uint8_t *dst, int dst_stride,
+                       const int16_t *filter_x, int x_step_q4,
+                       const int16_t *filter_y, int y_step_q4,
+                       int w, int h);
+
+// Not a convolution, a block wtd (1/4, 3/4) average for (dst, src)
+void vp9_convolve_qtr(const uint8_t *src, int src_stride,
+                      uint8_t *dst, int dst_stride,
+                      const int16_t *filter_x, int x_step_q4,
+                      const int16_t *filter_y, int y_step_q4,
+                      int w, int h);
+
+// Not a convolution, a block wtd (3/8, 5/8) average for (dst, src)
+void vp9_convolve_3by8(const uint8_t *src, int src_stride,
+                       uint8_t *dst, int dst_stride,
+                       const int16_t *filter_x, int x_step_q4,
+                       const int16_t *filter_y, int y_step_q4,
+                       int w, int h);
+
+// Not a convolution, a block wtd (5/8, 3/8) average for (dst, src)
+void vp9_convolve_5by8(const uint8_t *src, int src_stride,
+                       uint8_t *dst, int dst_stride,
+                       const int16_t *filter_x, int x_step_q4,
+                       const int16_t *filter_y, int y_step_q4,
+                       int w, int h);
+
+// Not a convolution, a block wtd (3/4, 1/4) average for (dst, src)
+void vp9_convolve_3qtr(const uint8_t *src, int src_stride,
+                       uint8_t *dst, int dst_stride,
+                       const int16_t *filter_x, int x_step_q4,
+                       const int16_t *filter_y, int y_step_q4,
+                       int w, int h);
+
+// Not a convolution, a block wtd (7/8, 1/8) average for (dst, src)
+void vp9_convolve_7by8(const uint8_t *src, int src_stride,
+                       uint8_t *dst, int dst_stride,
+                       const int16_t *filter_x, int x_step_q4,
+                       const int16_t *filter_y, int y_step_q4,
+                       int w, int h);
+#endif
+
  struct subpix_fn_table {
    const int16_t (*filter_x)[8];
    const int16_t (*filter_y)[8];
diff --git a/vp9/common/vp9_reconinter.c b/vp9/common/vp9_reconinter.c

index bd4c83f44e48713b986a676d08582d3fc18f66fd..110af5e325ed68c2e94d319933b57207663f466b 100644 (file)
--- a/vp9/common/vp9_reconinter.c
+++ b/vp9/common/vp9_reconinter.c
@@ -39,6 +39,127 @@ void vp9_setup_scale_factors_for_frame(struct scale_factors *scale,
    // applied in one direction only, and not at all for 0,0, seems to give the
    // best quality, but it may be worth trying an additional mode that does
    // do the filtering on full-pel.
+#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT
+  if (scale->x_step_q4 == 16) {
+    if (scale->y_step_q4 == 16) {
+      // No scaling in either direction.
+      scale->predict[0][0][0] = vp9_convolve_copy;
+      scale->predict[0][0][1] = vp9_convolve_1by8;
+      scale->predict[0][0][2] = vp9_convolve_qtr;
+      scale->predict[0][0][3] = vp9_convolve_3by8;
+      scale->predict[0][0][4] = vp9_convolve_avg;
+      scale->predict[0][0][5] = vp9_convolve_5by8;
+      scale->predict[0][0][6] = vp9_convolve_3qtr;
+      scale->predict[0][0][7] = vp9_convolve_7by8;
+      scale->predict[0][1][0] = vp9_convolve8_vert;
+      scale->predict[0][1][1] = vp9_convolve8_1by8_vert;
+      scale->predict[0][1][2] = vp9_convolve8_qtr_vert;
+      scale->predict[0][1][3] = vp9_convolve8_3by8_vert;
+      scale->predict[0][1][4] = vp9_convolve8_avg_vert;
+      scale->predict[0][1][5] = vp9_convolve8_5by8_vert;
+      scale->predict[0][1][6] = vp9_convolve8_3qtr_vert;
+      scale->predict[0][1][7] = vp9_convolve8_7by8_vert;
+      scale->predict[1][0][0] = vp9_convolve8_horiz;
+      scale->predict[1][0][1] = vp9_convolve8_1by8_horiz;
+      scale->predict[1][0][2] = vp9_convolve8_qtr_horiz;
+      scale->predict[1][0][3] = vp9_convolve8_3by8_horiz;
+      scale->predict[1][0][4] = vp9_convolve8_avg_horiz;
+      scale->predict[1][0][5] = vp9_convolve8_5by8_horiz;
+      scale->predict[1][0][6] = vp9_convolve8_3qtr_horiz;
+      scale->predict[1][0][7] = vp9_convolve8_7by8_horiz;
+    } else {
+      // No scaling in x direction. Must always scale in the y direction.
+      scale->predict[0][0][0] = vp9_convolve8_vert;
+      scale->predict[0][0][1] = vp9_convolve8_1by8_vert;
+      scale->predict[0][0][2] = vp9_convolve8_qtr_vert;
+      scale->predict[0][0][3] = vp9_convolve8_3by8_vert;
+      scale->predict[0][0][4] = vp9_convolve8_avg_vert;
+      scale->predict[0][0][5] = vp9_convolve8_5by8_vert;
+      scale->predict[0][0][6] = vp9_convolve8_3qtr_vert;
+      scale->predict[0][0][7] = vp9_convolve8_7by8_vert;
+      scale->predict[0][1][0] = vp9_convolve8_vert;
+      scale->predict[0][1][1] = vp9_convolve8_1by8_vert;
+      scale->predict[0][1][2] = vp9_convolve8_qtr_vert;
+      scale->predict[0][1][3] = vp9_convolve8_3by8_vert;
+      scale->predict[0][1][4] = vp9_convolve8_avg_vert;
+      scale->predict[0][1][5] = vp9_convolve8_5by8_vert;
+      scale->predict[0][1][6] = vp9_convolve8_3qtr_vert;
+      scale->predict[0][1][7] = vp9_convolve8_7by8_vert;
+      scale->predict[1][0][0] = vp9_convolve8;
+      scale->predict[1][0][1] = vp9_convolve8_1by8;
+      scale->predict[1][0][2] = vp9_convolve8_qtr;
+      scale->predict[1][0][3] = vp9_convolve8_3by8;
+      scale->predict[1][0][4] = vp9_convolve8_avg;
+      scale->predict[1][0][5] = vp9_convolve8_5by8;
+      scale->predict[1][0][6] = vp9_convolve8_3qtr;
+      scale->predict[1][0][7] = vp9_convolve8_7by8;
+    }
+  } else {
+    if (scale->y_step_q4 == 16) {
+      // No scaling in the y direction. Must always scale in the x direction.
+      scale->predict[0][0][0] = vp9_convolve8_horiz;
+      scale->predict[0][0][1] = vp9_convolve8_1by8_horiz;
+      scale->predict[0][0][2] = vp9_convolve8_qtr_horiz;
+      scale->predict[0][0][3] = vp9_convolve8_3by8_horiz;
+      scale->predict[0][0][4] = vp9_convolve8_avg_horiz;
+      scale->predict[0][0][5] = vp9_convolve8_5by8_horiz;
+      scale->predict[0][0][6] = vp9_convolve8_3qtr_horiz;
+      scale->predict[0][0][7] = vp9_convolve8_7by8_horiz;
+      scale->predict[0][1][0] = vp9_convolve8;
+      scale->predict[0][1][1] = vp9_convolve8_1by8;
+      scale->predict[0][1][2] = vp9_convolve8_qtr;
+      scale->predict[0][1][3] = vp9_convolve8_3by8;
+      scale->predict[0][1][4] = vp9_convolve8_avg;
+      scale->predict[0][1][5] = vp9_convolve8_5by8;
+      scale->predict[0][1][6] = vp9_convolve8_3qtr;
+      scale->predict[0][1][7] = vp9_convolve8_7by8;
+      scale->predict[1][0][0] = vp9_convolve8_horiz;
+      scale->predict[1][0][1] = vp9_convolve8_1by8_horiz;
+      scale->predict[1][0][2] = vp9_convolve8_qtr_horiz;
+      scale->predict[1][0][3] = vp9_convolve8_3by8_horiz;
+      scale->predict[1][0][4] = vp9_convolve8_avg_horiz;
+      scale->predict[1][0][5] = vp9_convolve8_5by8_horiz;
+      scale->predict[1][0][6] = vp9_convolve8_3qtr_horiz;
+      scale->predict[1][0][7] = vp9_convolve8_7by8_horiz;
+    } else {
+      // Must always scale in both directions.
+      scale->predict[0][0][0] = vp9_convolve8;
+      scale->predict[0][0][1] = vp9_convolve8_1by8;
+      scale->predict[0][0][2] = vp9_convolve8_qtr;
+      scale->predict[0][0][3] = vp9_convolve8_3by8;
+      scale->predict[0][0][4] = vp9_convolve8_avg;
+      scale->predict[0][0][5] = vp9_convolve8_5by8;
+      scale->predict[0][0][6] = vp9_convolve8_3qtr;
+      scale->predict[0][0][7] = vp9_convolve8_7by8;
+      scale->predict[0][1][0] = vp9_convolve8;
+      scale->predict[0][1][1] = vp9_convolve8_1by8;
+      scale->predict[0][1][2] = vp9_convolve8_qtr;
+      scale->predict[0][1][3] = vp9_convolve8_3by8;
+      scale->predict[0][1][4] = vp9_convolve8_avg;
+      scale->predict[0][1][5] = vp9_convolve8_5by8;
+      scale->predict[0][1][6] = vp9_convolve8_3qtr;
+      scale->predict[0][1][7] = vp9_convolve8_7by8;
+      scale->predict[1][0][0] = vp9_convolve8;
+      scale->predict[1][0][1] = vp9_convolve8_1by8;
+      scale->predict[1][0][2] = vp9_convolve8_qtr;
+      scale->predict[1][0][3] = vp9_convolve8_3by8;
+      scale->predict[1][0][4] = vp9_convolve8_avg;
+      scale->predict[1][0][5] = vp9_convolve8_5by8;
+      scale->predict[1][0][6] = vp9_convolve8_3qtr;
+      scale->predict[1][0][7] = vp9_convolve8_7by8;
+    }
+  }
+  // 2D subpel motion always gets filtered in both directions
+  scale->predict[1][1][0] = vp9_convolve8;
+  scale->predict[1][1][1] = vp9_convolve8_1by8;
+  scale->predict[1][1][2] = vp9_convolve8_qtr;
+  scale->predict[1][1][3] = vp9_convolve8_3by8;
+  scale->predict[1][1][4] = vp9_convolve8_avg;
+  scale->predict[1][1][5] = vp9_convolve8_5by8;
+  scale->predict[1][1][6] = vp9_convolve8_3qtr;
+  scale->predict[1][1][7] = vp9_convolve8_7by8;
+}
+#else
    if (scale->x_step_q4 == 16) {
      if (scale->y_step_q4 == 16) {
        // No scaling in either direction.
@@ -80,6 +201,7 @@ void vp9_setup_scale_factors_for_frame(struct scale_factors *scale,
    scale->predict[1][1][0] = vp9_convolve8;
    scale->predict[1][1][1] = vp9_convolve8_avg;
  }
+#endif
  
  void vp9_setup_interp_filters(MACROBLOCKD *xd,
                                INTERPOLATIONFILTERTYPE mcomp_filter_type,
@@ -269,12 +391,11 @@ void vp9_build_inter_predictor(const uint8_t *src, int src_stride,
                                 uint8_t *dst, int dst_stride,
                                 const int_mv *mv_q3,
                                 const struct scale_factors *scale,
-                               int w, int h, int do_avg,
+                               int w, int h, int weight,
                                 const struct subpix_fn_table *subpix) {
    int_mv32 mv = scale_motion_vector_q3_to_q4(mv_q3, scale);
    src += (mv.as_mv.row >> 4) * src_stride + (mv.as_mv.col >> 4);
-
-  scale->predict[!!(mv.as_mv.col & 15)][!!(mv.as_mv.row & 15)][do_avg](
+  scale->predict[!!(mv.as_mv.col & 15)][!!(mv.as_mv.row & 15)][weight](
        src, src_stride, dst, dst_stride,
        subpix->filter_x[mv.as_mv.col & 15], scale->x_step_q4,
        subpix->filter_y[mv.as_mv.row & 15], scale->y_step_q4,
@@ -289,7 +410,7 @@ void vp9_build_inter_predictor_q4(const uint8_t *src, int src_stride,
                                    const int_mv *fullpel_mv_q3,
                                    const int_mv *frac_mv_q4,
                                    const struct scale_factors *scale,
-                                  int w, int h, int do_avg,
+                                  int w, int h, int weight,
                                    const struct subpix_fn_table *subpix) {
    const int mv_row_q4 = ((fullpel_mv_q3->as_mv.row >> 3) << 4)
                          + (frac_mv_q4->as_mv.row & 0xf);
@@ -305,18 +426,21 @@ void vp9_build_inter_predictor_q4(const uint8_t *src, int src_stride,
    const int subpel_y = scaled_mv_row_q4 & 15;
  
    src += (scaled_mv_row_q4 >> 4) * src_stride + (scaled_mv_col_q4 >> 4);
-  scale->predict[!!subpel_x][!!subpel_y][do_avg](
+  scale->predict[!!subpel_x][!!subpel_y][weight](
        src, src_stride, dst, dst_stride,
        subpix->filter_x[subpel_x], scale->x_step_q4,
        subpix->filter_y[subpel_y], scale->y_step_q4,
        w, h);
  }
  
-static void build_2x1_inter_predictor(const BLOCKD *d0, const BLOCKD *d1,
-                                      struct scale_factors *scale,
-                                      int block_size, int stride, int which_mv,
-                                      const struct subpix_fn_table *subpix,
-                                      int row, int col) {
+static void build_2x1_inter_predictor_wh(const BLOCKD *d0, const BLOCKD *d1,
+                                         struct scale_factors *scale,
+                                         uint8_t *predictor,
+                                         int block_size, int stride,
+                                         int which_mv, int weight,
+                                         int width, int height,
+                                         const struct subpix_fn_table *subpix,
+                                         int row, int col) {
    assert(d1->predictor - d0->predictor == block_size);
    assert(d1->pre == d0->pre + block_size);
  
@@ -327,11 +451,11 @@ static void build_2x1_inter_predictor(const BLOCKD *d0, const BLOCKD *d1,
  
      vp9_build_inter_predictor(*base_pre + d0->pre,
                                d0->pre_stride,
-                              d0->predictor, stride,
+                              predictor, stride,
                                &d0->bmi.as_mv[which_mv],
                                &scale[which_mv],
-                              2 * block_size, block_size, which_mv,
-                              subpix);
+                              width, height,
+                              weight, subpix);
  
    } else {
      uint8_t **base_pre0 = which_mv ? d0->base_second_pre : d0->base_pre;
@@ -339,116 +463,69 @@ static void build_2x1_inter_predictor(const BLOCKD *d0, const BLOCKD *d1,
  
      vp9_build_inter_predictor(*base_pre0 + d0->pre,
                                d0->pre_stride,
-                              d0->predictor, stride,
+                              predictor, stride,
                                &d0->bmi.as_mv[which_mv],
                                &scale[which_mv],
-                              block_size, block_size, which_mv,
-                              subpix);
+                              width > block_size ? block_size : width, height,
+                              weight, subpix);
+
+    if (width <= block_size) return;
  
      set_scaled_offsets(&scale[which_mv], row, col + block_size);
  
      vp9_build_inter_predictor(*base_pre1 + d1->pre,
                                d1->pre_stride,
-                              d1->predictor, stride,
+                              predictor + block_size, stride,
                                &d1->bmi.as_mv[which_mv],
                                &scale[which_mv],
-                              block_size, block_size, which_mv,
-                              subpix);
+                              width - block_size, height,
+                              weight, subpix);
    }
  }
  
-/*encoder only*/
-void vp9_build_inter4x4_predictors_mbuv(MACROBLOCKD *xd,
-                                        int mb_row,
-                                        int mb_col) {
-  int i, j;
-  BLOCKD *blockd = xd->block;
-
-  /* build uv mvs */
-  for (i = 0; i < 2; i++) {
-    for (j = 0; j < 2; j++) {
-      int yoffset = i * 8 + j * 2;
-      int uoffset = 16 + i * 2 + j;
-      int voffset = 20 + i * 2 + j;
-      int temp;
-
-      temp = blockd[yoffset  ].bmi.as_mv[0].as_mv.row
-             + blockd[yoffset + 1].bmi.as_mv[0].as_mv.row
-             + blockd[yoffset + 4].bmi.as_mv[0].as_mv.row
-             + blockd[yoffset + 5].bmi.as_mv[0].as_mv.row;
-
-      if (temp < 0) temp -= 4;
-      else temp += 4;
-
-      xd->block[uoffset].bmi.as_mv[0].as_mv.row = (temp / 8) &
-        xd->fullpixel_mask;
-
-      temp = blockd[yoffset  ].bmi.as_mv[0].as_mv.col
-             + blockd[yoffset + 1].bmi.as_mv[0].as_mv.col
-             + blockd[yoffset + 4].bmi.as_mv[0].as_mv.col
-             + blockd[yoffset + 5].bmi.as_mv[0].as_mv.col;
-
-      if (temp < 0) temp -= 4;
-      else temp += 4;
-
-      blockd[uoffset].bmi.as_mv[0].as_mv.col = (temp / 8) &
-        xd->fullpixel_mask;
-
-      blockd[voffset].bmi.as_mv[0].as_mv.row =
-        blockd[uoffset].bmi.as_mv[0].as_mv.row;
-      blockd[voffset].bmi.as_mv[0].as_mv.col =
-        blockd[uoffset].bmi.as_mv[0].as_mv.col;
-
-      if (xd->mode_info_context->mbmi.second_ref_frame > 0) {
-        temp = blockd[yoffset  ].bmi.as_mv[1].as_mv.row
-               + blockd[yoffset + 1].bmi.as_mv[1].as_mv.row
-               + blockd[yoffset + 4].bmi.as_mv[1].as_mv.row
-               + blockd[yoffset + 5].bmi.as_mv[1].as_mv.row;
-
-        if (temp < 0) {
-          temp -= 4;
-        } else {
-          temp += 4;
-        }
-
-        blockd[uoffset].bmi.as_mv[1].as_mv.row = (temp / 8) &
-          xd->fullpixel_mask;
+static void build_2x1_inter_predictor(const BLOCKD *d0, const BLOCKD *d1,
+                                      struct scale_factors *scale,
+                                      int block_size, int stride,
+                                      int which_mv, int weight,
+                                      const struct subpix_fn_table *subpix,
+                                      int row, int col) {
+  assert(d1->predictor - d0->predictor == block_size);
+  assert(d1->pre == d0->pre + block_size);
  
-        temp = blockd[yoffset  ].bmi.as_mv[1].as_mv.col
-               + blockd[yoffset + 1].bmi.as_mv[1].as_mv.col
-               + blockd[yoffset + 4].bmi.as_mv[1].as_mv.col
-               + blockd[yoffset + 5].bmi.as_mv[1].as_mv.col;
+  set_scaled_offsets(&scale[which_mv], row, col);
  
-        if (temp < 0) {
-          temp -= 4;
-        } else {
-          temp += 4;
-        }
+  if (d0->bmi.as_mv[which_mv].as_int == d1->bmi.as_mv[which_mv].as_int) {
+    uint8_t **base_pre = which_mv ? d0->base_second_pre : d0->base_pre;
  
-        blockd[uoffset].bmi.as_mv[1].as_mv.col = (temp / 8) &
-          xd->fullpixel_mask;
+    vp9_build_inter_predictor(*base_pre + d0->pre,
+                              d0->pre_stride,
+                              d0->predictor, stride,
+                              &d0->bmi.as_mv[which_mv],
+                              &scale[which_mv],
+                              2 * block_size, block_size,
+                              weight, subpix);
  
-        blockd[voffset].bmi.as_mv[1].as_mv.row =
-          blockd[uoffset].bmi.as_mv[1].as_mv.row;
-        blockd[voffset].bmi.as_mv[1].as_mv.col =
-          blockd[uoffset].bmi.as_mv[1].as_mv.col;
-      }
-    }
-  }
+  } else {
+    uint8_t **base_pre0 = which_mv ? d0->base_second_pre : d0->base_pre;
+    uint8_t **base_pre1 = which_mv ? d1->base_second_pre : d1->base_pre;
  
-  for (i = 16; i < 24; i += 2) {
-    const int use_second_ref = xd->mode_info_context->mbmi.second_ref_frame > 0;
-    const int x = 4 * (i & 1);
-    const int y = ((i - 16) >> 1) * 4;
+    vp9_build_inter_predictor(*base_pre0 + d0->pre,
+                              d0->pre_stride,
+                              d0->predictor, stride,
+                              &d0->bmi.as_mv[which_mv],
+                              &scale[which_mv],
+                              block_size, block_size,
+                              weight, subpix);
  
-    int which_mv;
-    BLOCKD *d0 = &blockd[i];
-    BLOCKD *d1 = &blockd[i + 1];
+    set_scaled_offsets(&scale[which_mv], row, col + block_size);
  
-    for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) {
-      build_2x1_inter_predictor(d0, d1, xd->scale_factor_uv, 4, 8, which_mv,
-                                &xd->subpix, mb_row * 8 + y, mb_col * 8 + x);
-    }
+    vp9_build_inter_predictor(*base_pre1 + d1->pre,
+                              d1->pre_stride,
+                              d1->predictor, stride,
+                              &d1->bmi.as_mv[which_mv],
+                              &scale[which_mv],
+                              block_size, block_size,
+                              weight, subpix);
    }
  }
  
@@ -488,18 +565,326 @@ static void clamp_uvmv_to_umv_border(MV *mv, const MACROBLOCKD *xd) {
              (xd->mb_to_bottom_edge + (16 << 3)) >> 1 : mv->row;
  }
  
-/*encoder only*/
-void vp9_build_inter16x16_predictors_mby(MACROBLOCKD *xd,
-                                         uint8_t *dst_y,
-                                         int dst_ystride,
-                                         int mb_row,
-                                         int mb_col) {
+#define AVERAGE_WEIGHT  (1 << (2 * CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT))
+
+#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT
+
+// Whether to use implicit weighting for UV
+#define USE_IMPLICIT_WEIGHT_UV
+
+// Whether to use implicit weighting for SplitMV
+// #define USE_IMPLICIT_WEIGHT_SPLITMV
+
+// #define SEARCH_MIN3
+static int64_t get_consistency_metric(MACROBLOCKD *xd,
+                                      uint8_t *tmp_y, int tmp_ystride) {
+  int block_size = 16 <<  xd->mode_info_context->mbmi.sb_type;
+  uint8_t *rec_y = xd->dst.y_buffer;
+  int rec_ystride = xd->dst.y_stride;
+  int64_t metric = 0;
+  int i;
+  if (xd->up_available) {
+    for (i = 0; i < block_size; ++i) {
+      int diff = abs(*(rec_y - rec_ystride + i) -
+                     *(tmp_y + i));
+#ifdef SEARCH_MIN3
+      // Searches for the min abs diff among 3 pixel neighbors in the border
+      int diff1 = xd->left_available ?
+          abs(*(rec_y - rec_ystride + i - 1) - *(tmp_y + i)) : diff;
+      int diff2 = i < block_size - 1 ?
+          abs(*(rec_y - rec_ystride + i + 1) - *(tmp_y + i)) : diff;
+      diff = diff <= diff1 ? diff : diff1;
+      diff = diff <= diff2 ? diff : diff2;
+#endif
+      metric += diff;
+    }
+  }
+  if (xd->left_available) {
+    for (i = 0; i < block_size; ++i) {
+      int diff = abs(*(rec_y - 1 + i * rec_ystride) -
+                     *(tmp_y + i * tmp_ystride));
+#ifdef SEARCH_MIN3
+      // Searches for the min abs diff among 3 pixel neighbors in the border
+      int diff1 = xd->up_available ?
+          abs(*(rec_y - 1 + (i - 1) * rec_ystride) -
+                      *(tmp_y + i * tmp_ystride)) : diff;
+      int diff2 = i < block_size - 1 ?
+          abs(*(rec_y - 1 + (i + 1) * rec_ystride) -
+              *(tmp_y + i * tmp_ystride)) : diff;
+      diff = diff <= diff1 ? diff : diff1;
+      diff = diff <= diff2 ? diff : diff2;
+#endif
+      metric += diff;
+    }
+  }
+  return metric;
+}
+
+static int get_weight(MACROBLOCKD *xd, int64_t metric_1, int64_t metric_2) {
+  int weight = AVERAGE_WEIGHT;
+  if (2 * metric_1 < metric_2)
+    weight = 6;
+  else if (4 * metric_1 < 3 * metric_2)
+    weight = 5;
+  else if (2 * metric_2 < metric_1)
+    weight = 2;
+  else if (4 * metric_2 < 3 * metric_1)
+    weight = 3;
+  return weight;
+}
+
+#ifdef USE_IMPLICIT_WEIGHT_SPLITMV
+static int get_implicit_compoundinter_weight_splitmv(
+    MACROBLOCKD *xd, int mb_row, int mb_col) {
+  MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
+  BLOCKD *blockd = xd->block;
+  const int use_second_ref = mbmi->second_ref_frame > 0;
+  int64_t metric_2 = 0, metric_1 = 0;
+  int i, which_mv, weight;
+  uint8_t tmp_y[256];
+  const int tmp_ystride = 16;
+
+  if (!use_second_ref) return 0;
+  if (!(xd->up_available || xd->left_available))
+    return AVERAGE_WEIGHT;
+
+  assert(xd->mode_info_context->mbmi.mode == SPLITMV);
+
+  which_mv = 1;  // second predictor
+  if (xd->mode_info_context->mbmi.partitioning != PARTITIONING_4X4) {
+    for (i = 0; i < 16; i += 8) {
+      BLOCKD *d0 = &blockd[i];
+      BLOCKD *d1 = &blockd[i + 2];
+      const int y = i & 8;
+
+      blockd[i + 0].bmi = xd->mode_info_context->bmi[i + 0];
+      blockd[i + 2].bmi = xd->mode_info_context->bmi[i + 2];
+
+      if (mbmi->need_to_clamp_mvs) {
+        clamp_mv_to_umv_border(&blockd[i + 0].bmi.as_mv[which_mv].as_mv, xd);
+        clamp_mv_to_umv_border(&blockd[i + 2].bmi.as_mv[which_mv].as_mv, xd);
+      }
+      if (i == 0) {
+        build_2x1_inter_predictor_wh(d0, d1, xd->scale_factor, tmp_y, 8, 16,
+                                     which_mv, 0, 16, 1,
+                                     &xd->subpix, mb_row * 16 + y, mb_col * 16);
+        build_2x1_inter_predictor_wh(d0, d1, xd->scale_factor, tmp_y, 8, 16,
+                                     which_mv, 0, 1, 8,
+                                     &xd->subpix, mb_row * 16 + y, mb_col * 16);
+      } else {
+        build_2x1_inter_predictor_wh(d0, d1, xd->scale_factor, tmp_y + 8 * 16,
+                                     8, 16, which_mv, 0, 1, 8,
+                                     &xd->subpix, mb_row * 16 + y, mb_col * 16);
+      }
+    }
+  } else {
+    for (i = 0; i < 16; i += 2) {
+      BLOCKD *d0 = &blockd[i];
+      BLOCKD *d1 = &blockd[i + 1];
+      const int x = (i & 3) * 4;
+      const int y = (i >> 2) * 4;
+
+      blockd[i + 0].bmi = xd->mode_info_context->bmi[i + 0];
+      blockd[i + 1].bmi = xd->mode_info_context->bmi[i + 1];
+
+      if (i >= 4 && (i & 3) != 0) continue;
+
+      if (i == 0) {
+        build_2x1_inter_predictor_wh(d0, d1, xd->scale_factor, tmp_y, 4, 16,
+                                     which_mv, 0, 8, 1, &xd->subpix,
+                                     mb_row * 16 + y, mb_col * 16 + x);
+        build_2x1_inter_predictor_wh(d0, d1, xd->scale_factor, tmp_y, 4, 16,
+                                     which_mv, 0, 1, 4, &xd->subpix,
+                                     mb_row * 16 + y, mb_col * 16 + x);
+      } else if (i < 4) {
+        build_2x1_inter_predictor_wh(d0, d1, xd->scale_factor, tmp_y + x, 4, 16,
+                                     which_mv, 0, 8, 1, &xd->subpix,
+                                     mb_row * 16 + y, mb_col * 16 + x);
+      } else {
+        build_2x1_inter_predictor_wh(d0, d1, xd->scale_factor, tmp_y + y * 16,
+                                     4, 16, which_mv, 0, 1, 4, &xd->subpix,
+                                     mb_row * 16 + y, mb_col * 16 + x);
+      }
+    }
+  }
+  metric_2 = get_consistency_metric(xd, tmp_y, tmp_ystride);
+
+  which_mv = 0;  // first predictor
+  if (xd->mode_info_context->mbmi.partitioning != PARTITIONING_4X4) {
+    for (i = 0; i < 16; i += 8) {
+      BLOCKD *d0 = &blockd[i];
+      BLOCKD *d1 = &blockd[i + 2];
+      const int y = i & 8;
+
+      blockd[i + 0].bmi = xd->mode_info_context->bmi[i + 0];
+      blockd[i + 2].bmi = xd->mode_info_context->bmi[i + 2];
+
+      if (mbmi->need_to_clamp_mvs) {
+        clamp_mv_to_umv_border(&blockd[i + 0].bmi.as_mv[which_mv].as_mv, xd);
+        clamp_mv_to_umv_border(&blockd[i + 2].bmi.as_mv[which_mv].as_mv, xd);
+      }
+      if (i == 0) {
+        build_2x1_inter_predictor_wh(d0, d1, xd->scale_factor, tmp_y, 8, 16,
+                                     which_mv, 0, 16, 1,
+                                     &xd->subpix, mb_row * 16 + y, mb_col * 16);
+        build_2x1_inter_predictor_wh(d0, d1, xd->scale_factor, tmp_y, 8, 16,
+                                     which_mv, 0, 1, 8,
+                                     &xd->subpix, mb_row * 16 + y, mb_col * 16);
+      } else {
+        build_2x1_inter_predictor_wh(d0, d1, xd->scale_factor, tmp_y + 8 * 16,
+                                     8, 16, which_mv, 0, 1, 8,
+                                     &xd->subpix, mb_row * 16 + y, mb_col * 16);
+      }
+    }
+  } else {
+    for (i = 0; i < 16; i += 2) {
+      BLOCKD *d0 = &blockd[i];
+      BLOCKD *d1 = &blockd[i + 1];
+      const int x = (i & 3) * 4;
+      const int y = (i >> 2) * 4;
+
+      blockd[i + 0].bmi = xd->mode_info_context->bmi[i + 0];
+      blockd[i + 1].bmi = xd->mode_info_context->bmi[i + 1];
+
+      if (i >= 4 && (i & 3) != 0) continue;
+
+      if (i == 0) {
+        build_2x1_inter_predictor_wh(d0, d1, xd->scale_factor, tmp_y, 4, 16,
+                                     which_mv, 0, 8, 1, &xd->subpix,
+                                     mb_row * 16 + y, mb_col * 16 + x);
+        build_2x1_inter_predictor_wh(d0, d1, xd->scale_factor, tmp_y, 4, 16,
+                                     which_mv, 0, 1, 4, &xd->subpix,
+                                     mb_row * 16 + y, mb_col * 16 + x);
+      } else if (i < 4) {
+        build_2x1_inter_predictor_wh(d0, d1, xd->scale_factor, tmp_y + x, 4, 16,
+                                     which_mv, 0, 8, 1, &xd->subpix,
+                                     mb_row * 16 + y, mb_col * 16 + x);
+      } else {
+        build_2x1_inter_predictor_wh(d0, d1, xd->scale_factor, tmp_y + y * 16,
+                                     4, 16, which_mv, 0, 1, 4, &xd->subpix,
+                                     mb_row * 16 + y, mb_col * 16 + x);
+      }
+    }
+  }
+  metric_1 = get_consistency_metric(xd, tmp_y, tmp_ystride);
+
+  // Choose final weight for averaging
+  weight = get_weight(xd, metric_1, metric_2);
+  return weight;
+}
+#endif
+
+static int get_implicit_compoundinter_weight(MACROBLOCKD *xd,
+                                             int mb_row,
+                                             int mb_col) {
+  const int use_second_ref = xd->mode_info_context->mbmi.second_ref_frame > 0;
+  int64_t metric_2 = 0, metric_1 = 0;
+  int n, clamp_mvs, pre_stride;
+  uint8_t *base_pre;
+  int_mv ymv;
+  uint8_t tmp_y[4096];
+  const int tmp_ystride = 64;
+  int weight;
+  int edge[4];
+  int block_size = 16 <<  xd->mode_info_context->mbmi.sb_type;
+
+  if (!use_second_ref) return 0;
+  if (!(xd->up_available || xd->left_available))
+    return AVERAGE_WEIGHT;
+
+  edge[0] = xd->mb_to_top_edge;
+  edge[1] = xd->mb_to_bottom_edge;
+  edge[2] = xd->mb_to_left_edge;
+  edge[3] = xd->mb_to_right_edge;
+
+  clamp_mvs = xd->mode_info_context->mbmi.need_to_clamp_secondmv;
+  base_pre = xd->second_pre.y_buffer;
+  pre_stride = xd->second_pre.y_stride;
+  ymv.as_int = xd->mode_info_context->mbmi.mv[1].as_int;
+  // First generate the second predictor
+  for (n = 0; n < block_size; n += 16) {
+    xd->mb_to_left_edge   = edge[2] - (n << 3);
+    xd->mb_to_right_edge  = edge[3] + ((16 - n) << 3);
+    if (clamp_mvs)
+      clamp_mv_to_umv_border(&ymv.as_mv, xd);
+    set_scaled_offsets(&xd->scale_factor[1], mb_row * 16, mb_col * 16 + n);
+    // predict a single row of pixels
+    vp9_build_inter_predictor(
+        base_pre + scaled_buffer_offset(n, 0, pre_stride, &xd->scale_factor[1]),
+        pre_stride, tmp_y + n, tmp_ystride, &ymv, &xd->scale_factor[1],
+        16, 1, 0, &xd->subpix);
+  }
+  xd->mb_to_left_edge = edge[2];
+  xd->mb_to_right_edge = edge[3];
+  for (n = 0; n < block_size; n += 16) {
+    xd->mb_to_top_edge    = edge[0] - (n << 3);
+    xd->mb_to_bottom_edge = edge[1] + ((16 - n) << 3);
+    if (clamp_mvs)
+      clamp_mv_to_umv_border(&ymv.as_mv, xd);
+    set_scaled_offsets(&xd->scale_factor[1], mb_row * 16 + n, mb_col * 16);
+    // predict a single col of pixels
+    vp9_build_inter_predictor(
+        base_pre + scaled_buffer_offset(0, n, pre_stride, &xd->scale_factor[1]),
+        pre_stride, tmp_y + n * tmp_ystride, tmp_ystride, &ymv,
+        &xd->scale_factor[1], 1, 16, 0, &xd->subpix);
+  }
+  xd->mb_to_top_edge = edge[0];
+  xd->mb_to_bottom_edge = edge[1];
+  // Compute consistency metric
+  metric_2 = get_consistency_metric(xd, tmp_y, tmp_ystride);
+
+  clamp_mvs = xd->mode_info_context->mbmi.need_to_clamp_mvs;
+  base_pre = xd->pre.y_buffer;
+  pre_stride = xd->pre.y_stride;
+  ymv.as_int = xd->mode_info_context->mbmi.mv[0].as_int;
+  // Now generate the first predictor
+  for (n = 0; n < block_size; n += 16) {
+    xd->mb_to_left_edge   = edge[2] - (n << 3);
+    xd->mb_to_right_edge  = edge[3] + ((16 - n) << 3);
+    if (clamp_mvs)
+      clamp_mv_to_umv_border(&ymv.as_mv, xd);
+    set_scaled_offsets(&xd->scale_factor[0], mb_row * 16, mb_col * 16 + n);
+    // predict a single row of pixels
+    vp9_build_inter_predictor(
+        base_pre + scaled_buffer_offset(n, 0, pre_stride, &xd->scale_factor[0]),
+        pre_stride, tmp_y + n, tmp_ystride, &ymv, &xd->scale_factor[0],
+        16, 1, 0, &xd->subpix);
+  }
+  xd->mb_to_left_edge = edge[2];
+  xd->mb_to_right_edge = edge[3];
+  for (n = 0; n < block_size; n += 16) {
+    xd->mb_to_top_edge    = edge[0] - (n << 3);
+    xd->mb_to_bottom_edge = edge[1] + ((16 - n) << 3);
+    if (clamp_mvs)
+      clamp_mv_to_umv_border(&ymv.as_mv, xd);
+    set_scaled_offsets(&xd->scale_factor[0], mb_row * 16 + n, mb_col * 16);
+    // predict a single col of pixels
+    vp9_build_inter_predictor(
+        base_pre + scaled_buffer_offset(0, n, pre_stride, &xd->scale_factor[0]),
+        pre_stride, tmp_y + n * tmp_ystride, tmp_ystride, &ymv,
+        &xd->scale_factor[0], 1, 16, 0, &xd->subpix);
+  }
+  xd->mb_to_top_edge = edge[0];
+  xd->mb_to_bottom_edge = edge[1];
+  metric_1 = get_consistency_metric(xd, tmp_y, tmp_ystride);
+
+  // Choose final weight for averaging
+  weight = get_weight(xd, metric_1, metric_2);
+  return weight;
+}
+
+static void build_inter16x16_predictors_mby_w(MACROBLOCKD *xd,
+                                              uint8_t *dst_y,
+                                              int dst_ystride,
+                                              int weight,
+                                              int mb_row,
+                                              int mb_col) {
    const int use_second_ref = xd->mode_info_context->mbmi.second_ref_frame > 0;
    int which_mv;
  
    for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) {
      const int clamp_mvs = which_mv ?
-         xd->mode_info_context->mbmi.need_to_clamp_secondmv :
+        xd->mode_info_context->mbmi.need_to_clamp_secondmv :
           xd->mode_info_context->mbmi.need_to_clamp_mvs;
  
      uint8_t *base_pre = which_mv ? xd->second_pre.y_buffer : xd->pre.y_buffer;
@@ -515,19 +900,65 @@ void vp9_build_inter16x16_predictors_mby(MACROBLOCKD *xd,
      vp9_build_inter_predictor(base_pre, pre_stride,
                                dst_y, dst_ystride,
                                &ymv, &xd->scale_factor[which_mv],
-                              16, 16, which_mv, &xd->subpix);
+                              16, 16, which_mv ? weight : 0, &xd->subpix);
    }
  }
  
-void vp9_build_inter16x16_predictors_mbuv(MACROBLOCKD *xd,
-                                          uint8_t *dst_u,
-                                          uint8_t *dst_v,
-                                          int dst_uvstride,
-                                          int mb_row,
-                                          int mb_col) {
-  const int use_second_ref = xd->mode_info_context->mbmi.second_ref_frame > 0;
-  int which_mv;
-
+void vp9_build_inter16x16_predictors_mby(MACROBLOCKD *xd,
+                                         uint8_t *dst_y,
+                                         int dst_ystride,
+                                         int mb_row,
+                                         int mb_col) {
+  int weight = get_implicit_compoundinter_weight(xd, mb_row, mb_col);
+
+  build_inter16x16_predictors_mby_w(xd, dst_y, dst_ystride, weight,
+                                    mb_row, mb_col);
+}
+
+#else
+
+void vp9_build_inter16x16_predictors_mby(MACROBLOCKD *xd,
+                                         uint8_t *dst_y,
+                                         int dst_ystride,
+                                         int mb_row,
+                                         int mb_col) {
+  const int use_second_ref = xd->mode_info_context->mbmi.second_ref_frame > 0;
+  int which_mv;
+
+  for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) {
+    const int clamp_mvs = which_mv ?
+         xd->mode_info_context->mbmi.need_to_clamp_secondmv :
+         xd->mode_info_context->mbmi.need_to_clamp_mvs;
+
+    uint8_t *base_pre = which_mv ? xd->second_pre.y_buffer : xd->pre.y_buffer;
+    int pre_stride = which_mv ? xd->second_pre.y_stride : xd->pre.y_stride;
+    int_mv ymv;
+    ymv.as_int = xd->mode_info_context->mbmi.mv[which_mv].as_int;
+
+    if (clamp_mvs)
+      clamp_mv_to_umv_border(&ymv.as_mv, xd);
+
+    set_scaled_offsets(&xd->scale_factor[which_mv], mb_row * 16, mb_col * 16);
+
+    vp9_build_inter_predictor(base_pre, pre_stride,
+                              dst_y, dst_ystride,
+                              &ymv, &xd->scale_factor[which_mv],
+                              16, 16, which_mv, &xd->subpix);
+  }
+}
+#endif
+
+#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT
+static void build_inter16x16_predictors_mbuv_w(MACROBLOCKD *xd,
+                                               uint8_t *dst_u,
+                                               uint8_t *dst_v,
+                                               int dst_uvstride,
+                                               int weight,
+                                               int mb_row,
+                                               int mb_col) {
+  const int use_second_ref = xd->mode_info_context->mbmi.second_ref_frame > 0;
+  int which_mv;
+
    for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) {
      const int clamp_mvs =
          which_mv ? xd->mode_info_context->mbmi.need_to_clamp_secondmv
@@ -567,19 +998,347 @@ void vp9_build_inter16x16_predictors_mbuv(MACROBLOCKD *xd,
      set_scaled_offsets(&xd->scale_factor_uv[which_mv],
                         mb_row * 16, mb_col * 16);
  
-    vp9_build_inter_predictor_q4(uptr, pre_stride,
-                                 dst_u, dst_uvstride,
-                                 &_16x16mv, &_o16x16mv,
-                                 &xd->scale_factor_uv[which_mv],
-                                 8, 8, which_mv, &xd->subpix);
+    vp9_build_inter_predictor_q4(
+        uptr, pre_stride, dst_u, dst_uvstride, &_16x16mv, &_o16x16mv,
+        &xd->scale_factor_uv[which_mv], 8, 8,
+        which_mv ? weight : 0, &xd->subpix);
+
+    vp9_build_inter_predictor_q4(
+        vptr, pre_stride, dst_v, dst_uvstride, &_16x16mv, &_o16x16mv,
+        &xd->scale_factor_uv[which_mv], 8, 8,
+        which_mv ? weight : 0, &xd->subpix);
+  }
+}
+
+void vp9_build_inter16x16_predictors_mbuv(MACROBLOCKD *xd,
+                                          uint8_t *dst_u,
+                                          uint8_t *dst_v,
+                                          int dst_uvstride,
+                                          int mb_row,
+                                          int mb_col) {
+#ifdef USE_IMPLICIT_WEIGHT_UV
+  int weight = get_implicit_compoundinter_weight(xd, mb_row, mb_col);
+#else
+  int weight = AVERAGE_WEIGHT;
+#endif
+  build_inter16x16_predictors_mbuv_w(xd, dst_u, dst_v, dst_uvstride,
+                                     weight, mb_row, mb_col);
+}
+
+#else
+
+void vp9_build_inter16x16_predictors_mbuv(MACROBLOCKD *xd,
+                                          uint8_t *dst_u,
+                                          uint8_t *dst_v,
+                                          int dst_uvstride,
+                                          int mb_row,
+                                          int mb_col) {
+  const int use_second_ref = xd->mode_info_context->mbmi.second_ref_frame > 0;
+  int which_mv;
+
+  for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) {
+    const int clamp_mvs =
+        which_mv ? xd->mode_info_context->mbmi.need_to_clamp_secondmv
+                 : xd->mode_info_context->mbmi.need_to_clamp_mvs;
+    uint8_t *uptr, *vptr;
+    int pre_stride = which_mv ? xd->second_pre.uv_stride
+                              : xd->pre.uv_stride;
+    int_mv _o16x16mv;
+    int_mv _16x16mv;
+
+    _16x16mv.as_int = xd->mode_info_context->mbmi.mv[which_mv].as_int;
+
+    if (clamp_mvs)
+      clamp_mv_to_umv_border(&_16x16mv.as_mv, xd);
+
+    _o16x16mv = _16x16mv;
+    /* calc uv motion vectors */
+    if (_16x16mv.as_mv.row < 0)
+      _16x16mv.as_mv.row -= 1;
+    else
+      _16x16mv.as_mv.row += 1;
+
+    if (_16x16mv.as_mv.col < 0)
+      _16x16mv.as_mv.col -= 1;
+    else
+      _16x16mv.as_mv.col += 1;
+
+    _16x16mv.as_mv.row /= 2;
+    _16x16mv.as_mv.col /= 2;
+
+    _16x16mv.as_mv.row &= xd->fullpixel_mask;
+    _16x16mv.as_mv.col &= xd->fullpixel_mask;
+
+    uptr = (which_mv ? xd->second_pre.u_buffer : xd->pre.u_buffer);
+    vptr = (which_mv ? xd->second_pre.v_buffer : xd->pre.v_buffer);
+
+    set_scaled_offsets(&xd->scale_factor_uv[which_mv],
+                       mb_row * 16, mb_col * 16);
+
+    vp9_build_inter_predictor_q4(
+        uptr, pre_stride, dst_u, dst_uvstride, &_16x16mv, &_o16x16mv,
+        &xd->scale_factor_uv[which_mv], 8, 8,
+        which_mv << (2 * CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT), &xd->subpix);
+
+    vp9_build_inter_predictor_q4(
+        vptr, pre_stride, dst_v, dst_uvstride, &_16x16mv, &_o16x16mv,
+        &xd->scale_factor_uv[which_mv], 8, 8,
+        which_mv << (2 * CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT), &xd->subpix);
+  }
+}
+#endif
+
+#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT
+static void build_inter32x32_predictors_sby_w(MACROBLOCKD *x,
+                                              uint8_t *dst_y,
+                                              int dst_ystride,
+                                              int weight,
+                                              int mb_row,
+                                              int mb_col) {
+  uint8_t *y1 = x->pre.y_buffer;
+  uint8_t *y2 = x->second_pre.y_buffer;
+  int edge[4], n;
+
+  edge[0] = x->mb_to_top_edge;
+  edge[1] = x->mb_to_bottom_edge;
+  edge[2] = x->mb_to_left_edge;
+  edge[3] = x->mb_to_right_edge;
+
+  for (n = 0; n < 4; n++) {
+    const int x_idx = n & 1, y_idx = n >> 1;
+
+    x->mb_to_top_edge    = edge[0] -      ((y_idx  * 16) << 3);
+    x->mb_to_bottom_edge = edge[1] + (((1 - y_idx) * 16) << 3);
+    x->mb_to_left_edge   = edge[2] -      ((x_idx  * 16) << 3);
+    x->mb_to_right_edge  = edge[3] + (((1 - x_idx) * 16) << 3);
+
+    x->pre.y_buffer = y1 + scaled_buffer_offset(x_idx * 16,
+                                                y_idx * 16,
+                                                x->pre.y_stride,
+                                                &x->scale_factor[0]);
+    if (x->mode_info_context->mbmi.second_ref_frame > 0) {
+      x->second_pre.y_buffer = y2 +
+          scaled_buffer_offset(x_idx * 16,
+                               y_idx * 16,
+                               x->second_pre.y_stride,
+                               &x->scale_factor[1]);
+    }
+    build_inter16x16_predictors_mby_w(x,
+        dst_y + y_idx * 16 * dst_ystride  + x_idx * 16,
+        dst_ystride, weight, mb_row + y_idx, mb_col + x_idx);
+  }
+  x->mb_to_top_edge    = edge[0];
+  x->mb_to_bottom_edge = edge[1];
+  x->mb_to_left_edge   = edge[2];
+  x->mb_to_right_edge  = edge[3];
+
+  x->pre.y_buffer = y1;
+  if (x->mode_info_context->mbmi.second_ref_frame > 0) {
+    x->second_pre.y_buffer = y2;
+  }
+}
+
+void vp9_build_inter32x32_predictors_sby(MACROBLOCKD *x,
+                                         uint8_t *dst_y,
+                                         int dst_ystride,
+                                         int mb_row,
+                                         int mb_col) {
+  int weight = get_implicit_compoundinter_weight(x, mb_row, mb_col);
+  build_inter32x32_predictors_sby_w(x, dst_y, dst_ystride, weight,
+                                    mb_row, mb_col);
+}
+
+#else
+
+// TODO(all): Can we use 32x32 specific implementations of this rather than
+// using 16x16 implementations ?
+void vp9_build_inter32x32_predictors_sby(MACROBLOCKD *x,
+                                         uint8_t *dst_y,
+                                         int dst_ystride,
+                                         int mb_row,
+                                         int mb_col) {
+  uint8_t *y1 = x->pre.y_buffer;
+  uint8_t *y2 = x->second_pre.y_buffer;
+  int edge[4], n;
+
+  edge[0] = x->mb_to_top_edge;
+  edge[1] = x->mb_to_bottom_edge;
+  edge[2] = x->mb_to_left_edge;
+  edge[3] = x->mb_to_right_edge;
+
+  for (n = 0; n < 4; n++) {
+    const int x_idx = n & 1, y_idx = n >> 1;
+
+    x->mb_to_top_edge    = edge[0] -      ((y_idx  * 16) << 3);
+    x->mb_to_bottom_edge = edge[1] + (((1 - y_idx) * 16) << 3);
+    x->mb_to_left_edge   = edge[2] -      ((x_idx  * 16) << 3);
+    x->mb_to_right_edge  = edge[3] + (((1 - x_idx) * 16) << 3);
+
+    x->pre.y_buffer = y1 + scaled_buffer_offset(x_idx * 16,
+                                                y_idx * 16,
+                                                x->pre.y_stride,
+                                                &x->scale_factor[0]);
+    if (x->mode_info_context->mbmi.second_ref_frame > 0) {
+      x->second_pre.y_buffer = y2 +
+          scaled_buffer_offset(x_idx * 16,
+                               y_idx * 16,
+                               x->second_pre.y_stride,
+                               &x->scale_factor[1]);
+    }
+    vp9_build_inter16x16_predictors_mby(x,
+        dst_y + y_idx * 16 * dst_ystride  + x_idx * 16,
+        dst_ystride, mb_row + y_idx, mb_col + x_idx);
+  }
+  x->mb_to_top_edge    = edge[0];
+  x->mb_to_bottom_edge = edge[1];
+  x->mb_to_left_edge   = edge[2];
+  x->mb_to_right_edge  = edge[3];
+
+  x->pre.y_buffer = y1;
+  if (x->mode_info_context->mbmi.second_ref_frame > 0) {
+    x->second_pre.y_buffer = y2;
+  }
+}
+
+#endif
+
+#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT
+static void build_inter32x32_predictors_sbuv_w(MACROBLOCKD *x,
+                                               uint8_t *dst_u,
+                                               uint8_t *dst_v,
+                                               int dst_uvstride,
+                                               int weight,
+                                               int mb_row,
+                                               int mb_col) {
+  uint8_t *u1 = x->pre.u_buffer, *v1 = x->pre.v_buffer;
+  uint8_t *u2 = x->second_pre.u_buffer, *v2 = x->second_pre.v_buffer;
+  int edge[4], n;
+
+  edge[0] = x->mb_to_top_edge;
+  edge[1] = x->mb_to_bottom_edge;
+  edge[2] = x->mb_to_left_edge;
+  edge[3] = x->mb_to_right_edge;
+
+  for (n = 0; n < 4; n++) {
+    int scaled_uv_offset;
+    const int x_idx = n & 1, y_idx = n >> 1;
+
+    x->mb_to_top_edge    = edge[0] -      ((y_idx  * 16) << 3);
+    x->mb_to_bottom_edge = edge[1] + (((1 - y_idx) * 16) << 3);
+    x->mb_to_left_edge   = edge[2] -      ((x_idx  * 16) << 3);
+    x->mb_to_right_edge  = edge[3] + (((1 - x_idx) * 16) << 3);
+
+    scaled_uv_offset = scaled_buffer_offset(x_idx * 8,
+                                            y_idx * 8,
+                                            x->pre.uv_stride,
+                                            &x->scale_factor_uv[0]);
+    x->pre.u_buffer = u1 + scaled_uv_offset;
+    x->pre.v_buffer = v1 + scaled_uv_offset;
+
+    if (x->mode_info_context->mbmi.second_ref_frame > 0) {
+      scaled_uv_offset = scaled_buffer_offset(x_idx * 8,
+                                              y_idx * 8,
+                                              x->second_pre.uv_stride,
+                                              &x->scale_factor_uv[1]);
+      x->second_pre.u_buffer = u2 + scaled_uv_offset;
+      x->second_pre.v_buffer = v2 + scaled_uv_offset;
+    }
+
+    build_inter16x16_predictors_mbuv_w(x,
+        dst_u + y_idx *  8 * dst_uvstride + x_idx *  8,
+        dst_v + y_idx *  8 * dst_uvstride + x_idx *  8,
+        dst_uvstride, weight, mb_row + y_idx, mb_col + x_idx);
+  }
+  x->mb_to_top_edge    = edge[0];
+  x->mb_to_bottom_edge = edge[1];
+  x->mb_to_left_edge   = edge[2];
+  x->mb_to_right_edge  = edge[3];
+
+  x->pre.u_buffer = u1;
+  x->pre.v_buffer = v1;
+
+  if (x->mode_info_context->mbmi.second_ref_frame > 0) {
+    x->second_pre.u_buffer = u2;
+    x->second_pre.v_buffer = v2;
+  }
+}
+
+void vp9_build_inter32x32_predictors_sbuv(MACROBLOCKD *xd,
+                                          uint8_t *dst_u,
+                                          uint8_t *dst_v,
+                                          int dst_uvstride,
+                                          int mb_row,
+                                          int mb_col) {
+#ifdef USE_IMPLICIT_WEIGHT_UV
+  int weight = get_implicit_compoundinter_weight(xd, mb_row, mb_col);
+#else
+  int weight = AVERAGE_WEIGHT;
+#endif
+  build_inter32x32_predictors_sbuv_w(xd, dst_u, dst_v, dst_uvstride,
+                                     weight, mb_row, mb_col);
+}
+
+#else
+
+void vp9_build_inter32x32_predictors_sbuv(MACROBLOCKD *x,
+                                          uint8_t *dst_u,
+                                          uint8_t *dst_v,
+                                          int dst_uvstride,
+                                          int mb_row,
+                                          int mb_col) {
+  uint8_t *u1 = x->pre.u_buffer, *v1 = x->pre.v_buffer;
+  uint8_t *u2 = x->second_pre.u_buffer, *v2 = x->second_pre.v_buffer;
+  int edge[4], n;
+
+  edge[0] = x->mb_to_top_edge;
+  edge[1] = x->mb_to_bottom_edge;
+  edge[2] = x->mb_to_left_edge;
+  edge[3] = x->mb_to_right_edge;
+
+  for (n = 0; n < 4; n++) {
+    int scaled_uv_offset;
+    const int x_idx = n & 1, y_idx = n >> 1;
+
+    x->mb_to_top_edge    = edge[0] -      ((y_idx  * 16) << 3);
+    x->mb_to_bottom_edge = edge[1] + (((1 - y_idx) * 16) << 3);
+    x->mb_to_left_edge   = edge[2] -      ((x_idx  * 16) << 3);
+    x->mb_to_right_edge  = edge[3] + (((1 - x_idx) * 16) << 3);
+
+    scaled_uv_offset = scaled_buffer_offset(x_idx * 8,
+                                            y_idx * 8,
+                                            x->pre.uv_stride,
+                                            &x->scale_factor_uv[0]);
+    x->pre.u_buffer = u1 + scaled_uv_offset;
+    x->pre.v_buffer = v1 + scaled_uv_offset;
+
+    if (x->mode_info_context->mbmi.second_ref_frame > 0) {
+      scaled_uv_offset = scaled_buffer_offset(x_idx * 8,
+                                              y_idx * 8,
+                                              x->second_pre.uv_stride,
+                                              &x->scale_factor_uv[1]);
+      x->second_pre.u_buffer = u2 + scaled_uv_offset;
+      x->second_pre.v_buffer = v2 + scaled_uv_offset;
+    }
+
+    vp9_build_inter16x16_predictors_mbuv(x,
+        dst_u + y_idx *  8 * dst_uvstride + x_idx *  8,
+        dst_v + y_idx *  8 * dst_uvstride + x_idx *  8,
+        dst_uvstride, mb_row + y_idx, mb_col + x_idx);
+  }
+  x->mb_to_top_edge    = edge[0];
+  x->mb_to_bottom_edge = edge[1];
+  x->mb_to_left_edge   = edge[2];
+  x->mb_to_right_edge  = edge[3];
+
+  x->pre.u_buffer = u1;
+  x->pre.v_buffer = v1;
  
-    vp9_build_inter_predictor_q4(vptr, pre_stride,
-                                 dst_v, dst_uvstride,
-                                 &_16x16mv, &_o16x16mv,
-                                 &xd->scale_factor_uv[which_mv],
-                                 8, 8, which_mv, &xd->subpix);
+  if (x->mode_info_context->mbmi.second_ref_frame > 0) {
+    x->second_pre.u_buffer = u2;
+    x->second_pre.v_buffer = v2;
    }
  }
+#endif
  
  void vp9_build_inter32x32_predictors_sb(MACROBLOCKD *x,
                                          uint8_t *dst_y,
@@ -589,9 +1348,27 @@ void vp9_build_inter32x32_predictors_sb(MACROBLOCKD *x,
                                          int dst_uvstride,
                                          int mb_row,
                                          int mb_col) {
-  uint8_t *y1 = x->pre.y_buffer, *u1 = x->pre.u_buffer, *v1 = x->pre.v_buffer;
-  uint8_t *y2 = x->second_pre.y_buffer, *u2 = x->second_pre.u_buffer,
-          *v2 = x->second_pre.v_buffer;
+  vp9_build_inter32x32_predictors_sby(x, dst_y, dst_ystride,
+                                      mb_row, mb_col);
+  vp9_build_inter32x32_predictors_sbuv(x, dst_u, dst_v, dst_uvstride,
+                                      mb_row, mb_col);
+#if CONFIG_COMP_INTERINTRA_PRED
+  if (x->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME) {
+    vp9_build_interintra_32x32_predictors_sb(
+        x, dst_y, dst_u, dst_v, dst_ystride, dst_uvstride);
+  }
+#endif
+}
+
+#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT
+static void build_inter64x64_predictors_sby_w(MACROBLOCKD *x,
+                                              uint8_t *dst_y,
+                                              int dst_ystride,
+                                              int weight,
+                                              int mb_row,
+                                              int mb_col) {
+  uint8_t *y1 = x->pre.y_buffer;
+  uint8_t *y2 = x->second_pre.y_buffer;
    int edge[4], n;
  
    edge[0] = x->mb_to_top_edge;
@@ -601,43 +1378,28 @@ void vp9_build_inter32x32_predictors_sb(MACROBLOCKD *x,
  
    for (n = 0; n < 4; n++) {
      const int x_idx = n & 1, y_idx = n >> 1;
-    int scaled_uv_offset;
  
-    x->mb_to_top_edge    = edge[0] -      ((y_idx  * 16) << 3);
-    x->mb_to_bottom_edge = edge[1] + (((1 - y_idx) * 16) << 3);
-    x->mb_to_left_edge   = edge[2] -      ((x_idx  * 16) << 3);
-    x->mb_to_right_edge  = edge[3] + (((1 - x_idx) * 16) << 3);
+    x->mb_to_top_edge    = edge[0] -      ((y_idx  * 32) << 3);
+    x->mb_to_bottom_edge = edge[1] + (((1 - y_idx) * 32) << 3);
+    x->mb_to_left_edge   = edge[2] -      ((x_idx  * 32) << 3);
+    x->mb_to_right_edge  = edge[3] + (((1 - x_idx) * 32) << 3);
  
-    x->pre.y_buffer = y1 + scaled_buffer_offset(x_idx * 16,
-                                                y_idx * 16,
+    x->pre.y_buffer = y1 + scaled_buffer_offset(x_idx * 32,
+                                                y_idx * 32,
                                                  x->pre.y_stride,
                                                  &x->scale_factor[0]);
-    scaled_uv_offset = scaled_buffer_offset(x_idx * 8,
-                                            y_idx * 8,
-                                            x->pre.uv_stride,
-                                            &x->scale_factor_uv[0]);
-    x->pre.u_buffer = u1 + scaled_uv_offset;
-    x->pre.v_buffer = v1 + scaled_uv_offset;
  
      if (x->mode_info_context->mbmi.second_ref_frame > 0) {
        x->second_pre.y_buffer = y2 +
-          scaled_buffer_offset(x_idx * 16,
-                               y_idx * 16,
+          scaled_buffer_offset(x_idx * 32,
+                               y_idx * 32,
                                 x->second_pre.y_stride,
                                 &x->scale_factor[1]);
-      scaled_uv_offset = scaled_buffer_offset(x_idx * 8,
-                                              y_idx * 8,
-                                              x->second_pre.uv_stride,
-                                              &x->scale_factor_uv[1]);
-      x->second_pre.u_buffer = u2 + scaled_uv_offset;
-      x->second_pre.v_buffer = v2 + scaled_uv_offset;
      }
  
-    vp9_build_inter16x16_predictors_mb(x,
-        dst_y + y_idx * 16 * dst_ystride  + x_idx * 16,
-        dst_u + y_idx *  8 * dst_uvstride + x_idx *  8,
-        dst_v + y_idx *  8 * dst_uvstride + x_idx *  8,
-        dst_ystride, dst_uvstride, mb_row + y_idx, mb_col + x_idx);
+    build_inter32x32_predictors_sby_w(x,
+        dst_y + y_idx * 32 * dst_ystride  + x_idx * 32,
+        dst_ystride, weight, mb_row + y_idx * 2, mb_col + x_idx * 2);
    }
  
    x->mb_to_top_edge    = edge[0];
@@ -646,34 +1408,31 @@ void vp9_build_inter32x32_predictors_sb(MACROBLOCKD *x,
    x->mb_to_right_edge  = edge[3];
  
    x->pre.y_buffer = y1;
-  x->pre.u_buffer = u1;
-  x->pre.v_buffer = v1;
  
    if (x->mode_info_context->mbmi.second_ref_frame > 0) {
      x->second_pre.y_buffer = y2;
-    x->second_pre.u_buffer = u2;
-    x->second_pre.v_buffer = v2;
    }
+}
  
-#if CONFIG_COMP_INTERINTRA_PRED
-  if (x->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME) {
-    vp9_build_interintra_32x32_predictors_sb(
-        x, dst_y, dst_u, dst_v, dst_ystride, dst_uvstride);
-  }
-#endif
+void vp9_build_inter64x64_predictors_sby(MACROBLOCKD *x,
+                                         uint8_t *dst_y,
+                                         int dst_ystride,
+                                         int mb_row,
+                                         int mb_col) {
+  int weight = get_implicit_compoundinter_weight(x, mb_row, mb_col);
+  build_inter64x64_predictors_sby_w(x, dst_y, dst_ystride, weight,
+                                    mb_row, mb_col);
  }
  
-void vp9_build_inter64x64_predictors_sb(MACROBLOCKD *x,
-                                        uint8_t *dst_y,
-                                        uint8_t *dst_u,
-                                        uint8_t *dst_v,
-                                        int dst_ystride,
-                                        int dst_uvstride,
-                                        int mb_row,
-                                        int mb_col) {
-  uint8_t *y1 = x->pre.y_buffer, *u1 = x->pre.u_buffer, *v1 = x->pre.v_buffer;
-  uint8_t *y2 = x->second_pre.y_buffer, *u2 = x->second_pre.u_buffer,
-          *v2 = x->second_pre.v_buffer;
+#else
+
+void vp9_build_inter64x64_predictors_sby(MACROBLOCKD *x,
+                                         uint8_t *dst_y,
+                                         int dst_ystride,
+                                         int mb_row,
+                                         int mb_col) {
+  uint8_t *y1 = x->pre.y_buffer;
+  uint8_t *y2 = x->second_pre.y_buffer;
    int edge[4], n;
  
    edge[0] = x->mb_to_top_edge;
@@ -683,7 +1442,6 @@ void vp9_build_inter64x64_predictors_sb(MACROBLOCKD *x,
  
    for (n = 0; n < 4; n++) {
      const int x_idx = n & 1, y_idx = n >> 1;
-    int scaled_uv_offset;
  
      x->mb_to_top_edge    = edge[0] -      ((y_idx  * 32) << 3);
      x->mb_to_bottom_edge = edge[1] + (((1 - y_idx) * 32) << 3);
@@ -694,6 +1452,57 @@ void vp9_build_inter64x64_predictors_sb(MACROBLOCKD *x,
                                                  y_idx * 32,
                                                  x->pre.y_stride,
                                                  &x->scale_factor[0]);
+
+    if (x->mode_info_context->mbmi.second_ref_frame > 0) {
+      x->second_pre.y_buffer = y2 +
+          scaled_buffer_offset(x_idx * 32,
+                               y_idx * 32,
+                               x->second_pre.y_stride,
+                               &x->scale_factor[1]);
+    }
+
+    vp9_build_inter32x32_predictors_sby(x,
+        dst_y + y_idx * 32 * dst_ystride  + x_idx * 32,
+        dst_ystride, mb_row + y_idx * 2, mb_col + x_idx * 2);
+  }
+
+  x->mb_to_top_edge    = edge[0];
+  x->mb_to_bottom_edge = edge[1];
+  x->mb_to_left_edge   = edge[2];
+  x->mb_to_right_edge  = edge[3];
+
+  x->pre.y_buffer = y1;
+
+  if (x->mode_info_context->mbmi.second_ref_frame > 0) {
+    x->second_pre.y_buffer = y2;
+  }
+}
+#endif
+
+void vp9_build_inter64x64_predictors_sbuv(MACROBLOCKD *x,
+                                          uint8_t *dst_u,
+                                          uint8_t *dst_v,
+                                          int dst_uvstride,
+                                          int mb_row,
+                                          int mb_col) {
+  uint8_t *u1 = x->pre.u_buffer, *v1 = x->pre.v_buffer;
+  uint8_t *u2 = x->second_pre.u_buffer, *v2 = x->second_pre.v_buffer;
+  int edge[4], n;
+
+  edge[0] = x->mb_to_top_edge;
+  edge[1] = x->mb_to_bottom_edge;
+  edge[2] = x->mb_to_left_edge;
+  edge[3] = x->mb_to_right_edge;
+
+  for (n = 0; n < 4; n++) {
+    const int x_idx = n & 1, y_idx = n >> 1;
+    int scaled_uv_offset;
+
+    x->mb_to_top_edge    = edge[0] -      ((y_idx  * 32) << 3);
+    x->mb_to_bottom_edge = edge[1] + (((1 - y_idx) * 32) << 3);
+    x->mb_to_left_edge   = edge[2] -      ((x_idx  * 32) << 3);
+    x->mb_to_right_edge  = edge[3] + (((1 - x_idx) * 32) << 3);
+
      scaled_uv_offset = scaled_buffer_offset(x_idx * 16,
                                              y_idx * 16,
                                              x->pre.uv_stride,
@@ -702,11 +1511,6 @@ void vp9_build_inter64x64_predictors_sb(MACROBLOCKD *x,
      x->pre.v_buffer = v1 + scaled_uv_offset;
  
      if (x->mode_info_context->mbmi.second_ref_frame > 0) {
-      x->second_pre.y_buffer = y2 +
-          scaled_buffer_offset(x_idx * 32,
-                               y_idx * 32,
-                               x->second_pre.y_stride,
-                               &x->scale_factor[1]);
        scaled_uv_offset = scaled_buffer_offset(x_idx * 16,
                                                y_idx * 16,
                                                x->second_pre.uv_stride,
@@ -715,11 +1519,10 @@ void vp9_build_inter64x64_predictors_sb(MACROBLOCKD *x,
        x->second_pre.v_buffer = v2 + scaled_uv_offset;
      }
  
-    vp9_build_inter32x32_predictors_sb(x,
-        dst_y + y_idx * 32 * dst_ystride  + x_idx * 32,
+    vp9_build_inter32x32_predictors_sbuv(x,
          dst_u + y_idx * 16 * dst_uvstride + x_idx * 16,
          dst_v + y_idx * 16 * dst_uvstride + x_idx * 16,
-        dst_ystride, dst_uvstride, mb_row + y_idx * 2, mb_col + x_idx * 2);
+        dst_uvstride, mb_row + y_idx * 2, mb_col + x_idx * 2);
    }
  
    x->mb_to_top_edge    = edge[0];
@@ -727,16 +1530,27 @@ void vp9_build_inter64x64_predictors_sb(MACROBLOCKD *x,
    x->mb_to_left_edge   = edge[2];
    x->mb_to_right_edge  = edge[3];
  
-  x->pre.y_buffer = y1;
    x->pre.u_buffer = u1;
    x->pre.v_buffer = v1;
  
    if (x->mode_info_context->mbmi.second_ref_frame > 0) {
-    x->second_pre.y_buffer = y2;
      x->second_pre.u_buffer = u2;
      x->second_pre.v_buffer = v2;
    }
+}
  
+void vp9_build_inter64x64_predictors_sb(MACROBLOCKD *x,
+                                        uint8_t *dst_y,
+                                        uint8_t *dst_u,
+                                        uint8_t *dst_v,
+                                        int dst_ystride,
+                                        int dst_uvstride,
+                                        int mb_row,
+                                        int mb_col) {
+  vp9_build_inter64x64_predictors_sby(x, dst_y, dst_ystride,
+                                      mb_row, mb_col);
+  vp9_build_inter64x64_predictors_sbuv(x, dst_u, dst_v, dst_uvstride,
+                                       mb_row, mb_col);
  #if CONFIG_COMP_INTERINTRA_PRED
    if (x->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME) {
      vp9_build_interintra_64x64_predictors_sb(x, dst_y, dst_u, dst_v,
@@ -752,6 +1566,11 @@ static void build_inter4x4_predictors_mb(MACROBLOCKD *xd,
    BLOCKD *blockd = xd->block;
    int which_mv = 0;
    const int use_second_ref = mbmi->second_ref_frame > 0;
+#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT && defined(USE_IMPLICIT_WEIGHT_SPLITMV)
+  int weight = get_implicit_compoundinter_weight_splitmv(xd, mb_row, mb_col);
+#else
+  int weight = AVERAGE_WEIGHT;
+#endif
  
    if (xd->mode_info_context->mbmi.partitioning != PARTITIONING_4X4) {
      for (i = 0; i < 16; i += 8) {
@@ -768,9 +1587,9 @@ static void build_inter4x4_predictors_mb(MACROBLOCKD *xd,
            clamp_mv_to_umv_border(&blockd[i + 2].bmi.as_mv[which_mv].as_mv, xd);
          }
  
-        build_2x1_inter_predictor(d0, d1, xd->scale_factor, 8, 16,
-                                  which_mv, &xd->subpix,
-                                  mb_row * 16 + y, mb_col * 16);
+        build_2x1_inter_predictor(d0, d1, xd->scale_factor, 8, 16, which_mv,
+                                  which_mv ? weight : 0,
+                                  &xd->subpix, mb_row * 16 + y, mb_col * 16);
        }
      }
    } else {
@@ -784,13 +1603,18 @@ static void build_inter4x4_predictors_mb(MACROBLOCKD *xd,
        blockd[i + 1].bmi = xd->mode_info_context->bmi[i + 1];
  
        for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) {
-        build_2x1_inter_predictor(d0, d1, xd->scale_factor, 4, 16,
-                                  which_mv, &xd->subpix,
+        build_2x1_inter_predictor(d0, d1, xd->scale_factor, 4, 16, which_mv,
+                                  which_mv ? weight : 0,
+                                  &xd->subpix,
                                    mb_row * 16 + y, mb_col * 16 + x);
        }
      }
    }
-
+#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT
+#if !defined(USE_IMPLICIT_WEIGHT_UV)
+  weight = AVERAGE_WEIGHT;
+#endif
+#endif
    for (i = 16; i < 24; i += 2) {
      BLOCKD *d0 = &blockd[i];
      BLOCKD *d1 = &blockd[i + 1];
@@ -798,8 +1622,8 @@ static void build_inter4x4_predictors_mb(MACROBLOCKD *xd,
      const int y = ((i - 16) >> 1) * 4;
  
      for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) {
-      build_2x1_inter_predictor(d0, d1, xd->scale_factor_uv, 4, 8,
-                                which_mv, &xd->subpix,
+      build_2x1_inter_predictor(d0, d1, xd->scale_factor_uv, 4, 8, which_mv,
+                                which_mv ? weight : 0, &xd->subpix,
                                  mb_row * 8 + y, mb_col * 8 + x);
      }
    }
@@ -876,9 +1700,14 @@ void vp9_build_inter16x16_predictors_mb(MACROBLOCKD *xd,
    vp9_build_inter16x16_predictors_mby(xd, dst_y, dst_ystride, mb_row, mb_col);
    vp9_build_inter16x16_predictors_mbuv(xd, dst_u, dst_v, dst_uvstride,
                                         mb_row, mb_col);
+#if CONFIG_COMP_INTERINTRA_PRED
+  if (xd->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME) {
+    vp9_build_interintra_16x16_predictors_mb(xd, dst_y, dst_u, dst_v,
+                                             dst_ystride, dst_uvstride);
+  }
+#endif
  }
  
-
  void vp9_build_inter_predictors_mb(MACROBLOCKD *xd,
                                     int mb_row,
                                     int mb_col) {
@@ -888,15 +1717,116 @@ void vp9_build_inter_predictors_mb(MACROBLOCKD *xd,
                                         &xd->predictor[320], 16, 8,
                                         mb_row, mb_col);
  
-#if CONFIG_COMP_INTERINTRA_PRED
-    if (xd->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME) {
-      vp9_build_interintra_16x16_predictors_mb(xd, xd->predictor,
-                                               &xd->predictor[256],
-                                               &xd->predictor[320], 16, 8);
-    }
-#endif
    } else {
      build_4x4uvmvs(xd);
      build_inter4x4_predictors_mb(xd, mb_row, mb_col);
    }
  }
+
+/*encoder only*/
+void vp9_build_inter4x4_predictors_mbuv(MACROBLOCKD *xd,
+                                        int mb_row,
+                                        int mb_col) {
+  int i, j;
+  int weight;
+  BLOCKD *blockd = xd->block;
+
+  /* build uv mvs */
+  for (i = 0; i < 2; i++) {
+    for (j = 0; j < 2; j++) {
+      int yoffset = i * 8 + j * 2;
+      int uoffset = 16 + i * 2 + j;
+      int voffset = 20 + i * 2 + j;
+      int temp;
+
+      temp = blockd[yoffset  ].bmi.as_mv[0].as_mv.row
+             + blockd[yoffset + 1].bmi.as_mv[0].as_mv.row
+             + blockd[yoffset + 4].bmi.as_mv[0].as_mv.row
+             + blockd[yoffset + 5].bmi.as_mv[0].as_mv.row;
+
+      if (temp < 0)
+        temp -= 4;
+      else
+        temp += 4;
+
+      xd->block[uoffset].bmi.as_mv[0].as_mv.row = (temp / 8) &
+        xd->fullpixel_mask;
+
+      temp = blockd[yoffset  ].bmi.as_mv[0].as_mv.col
+             + blockd[yoffset + 1].bmi.as_mv[0].as_mv.col
+             + blockd[yoffset + 4].bmi.as_mv[0].as_mv.col
+             + blockd[yoffset + 5].bmi.as_mv[0].as_mv.col;
+
+      if (temp < 0)
+        temp -= 4;
+      else
+        temp += 4;
+
+      blockd[uoffset].bmi.as_mv[0].as_mv.col = (temp / 8) &
+        xd->fullpixel_mask;
+
+      blockd[voffset].bmi.as_mv[0].as_mv.row =
+        blockd[uoffset].bmi.as_mv[0].as_mv.row;
+      blockd[voffset].bmi.as_mv[0].as_mv.col =
+        blockd[uoffset].bmi.as_mv[0].as_mv.col;
+
+      if (xd->mode_info_context->mbmi.second_ref_frame > 0) {
+        temp = blockd[yoffset  ].bmi.as_mv[1].as_mv.row
+               + blockd[yoffset + 1].bmi.as_mv[1].as_mv.row
+               + blockd[yoffset + 4].bmi.as_mv[1].as_mv.row
+               + blockd[yoffset + 5].bmi.as_mv[1].as_mv.row;
+
+        if (temp < 0) {
+          temp -= 4;
+        } else {
+          temp += 4;
+        }
+
+        blockd[uoffset].bmi.as_mv[1].as_mv.row = (temp / 8) &
+          xd->fullpixel_mask;
+
+        temp = blockd[yoffset  ].bmi.as_mv[1].as_mv.col
+               + blockd[yoffset + 1].bmi.as_mv[1].as_mv.col
+               + blockd[yoffset + 4].bmi.as_mv[1].as_mv.col
+               + blockd[yoffset + 5].bmi.as_mv[1].as_mv.col;
+
+        if (temp < 0) {
+          temp -= 4;
+        } else {
+          temp += 4;
+        }
+
+        blockd[uoffset].bmi.as_mv[1].as_mv.col = (temp / 8) &
+          xd->fullpixel_mask;
+
+        blockd[voffset].bmi.as_mv[1].as_mv.row =
+          blockd[uoffset].bmi.as_mv[1].as_mv.row;
+        blockd[voffset].bmi.as_mv[1].as_mv.col =
+          blockd[uoffset].bmi.as_mv[1].as_mv.col;
+      }
+    }
+  }
+
+#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT && \
+  defined(USE_IMPLICIT_WEIGHT_SPLITMV) && \
+  defined(USE_IMPLICIT_WEIGHT_UV)
+  weight = get_implicit_compoundinter_weight_splitmv(xd, mb_row, mb_col);
+#else
+  weight = AVERAGE_WEIGHT;
+#endif
+  for (i = 16; i < 24; i += 2) {
+    const int use_second_ref = xd->mode_info_context->mbmi.second_ref_frame > 0;
+    const int x = 4 * (i & 1);
+    const int y = ((i - 16) >> 1) * 4;
+
+    int which_mv;
+    BLOCKD *d0 = &blockd[i];
+    BLOCKD *d1 = &blockd[i + 1];
+
+    for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) {
+      build_2x1_inter_predictor(d0, d1, xd->scale_factor_uv, 4, 8, which_mv,
+                                which_mv ? weight : 0,
+                                &xd->subpix, mb_row * 8 + y, mb_col * 8 + x);
+    }
+  }
+}
diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh

index c1153ed9a562e71729b3002f6a39d0997548a2cc..8de68505a0ff49420402ca0e247db1a828fbf89a 100644 (file)
--- a/vp9/common/vp9_rtcd_defs.sh
+++ b/vp9/common/vp9_rtcd_defs.sh
@@ -276,6 +276,62 @@ specialize vp9_convolve8_avg_horiz ssse3
  prototype void vp9_convolve8_avg_vert "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
  specialize vp9_convolve8_avg_vert ssse3
  
+#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT
+prototype void vp9_convolve8_1by8 "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
+specialize vp9_convolve8_1by8
+
+prototype void vp9_convolve8_qtr "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
+specialize vp9_convolve8_qtr
+
+prototype void vp9_convolve8_3by8 "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
+specialize vp9_convolve8_3by8
+
+prototype void vp9_convolve8_5by8 "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
+specialize vp9_convolve8_5by8
+
+prototype void vp9_convolve8_3qtr "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
+specialize vp9_convolve8_3qtr
+
+prototype void vp9_convolve8_7by8 "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
+specialize vp9_convolve8_7by8
+
+prototype void vp9_convolve8_1by8_horiz "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
+specialize vp9_convolve8_1by8_horiz
+
+prototype void vp9_convolve8_qtr_horiz "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
+specialize vp9_convolve8_qtr_horiz
+
+prototype void vp9_convolve8_3by8_horiz "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
+specialize vp9_convolve8_3by8_horiz
+
+prototype void vp9_convolve8_5by8_horiz "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
+specialize vp9_convolve8_5by8_horiz
+
+prototype void vp9_convolve8_3qtr_horiz "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
+specialize vp9_convolve8_3qtr_horiz
+
+prototype void vp9_convolve8_7by8_horiz "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
+specialize vp9_convolve8_7by8_horiz
+
+prototype void vp9_convolve8_1by8_vert "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
+specialize vp9_convolve8_1by8_vert
+
+prototype void vp9_convolve8_qtr_vert "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
+specialize vp9_convolve8_qtr_vert
+
+prototype void vp9_convolve8_3by8_vert "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
+specialize vp9_convolve8_3by8_vert
+
+prototype void vp9_convolve8_5by8_vert "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
+specialize vp9_convolve8_5by8_vert
+
+prototype void vp9_convolve8_3qtr_vert "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
+specialize vp9_convolve8_3qtr_vert
+
+prototype void vp9_convolve8_7by8_vert "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
+specialize vp9_convolve8_7by8_vert
+#endif
+
  #
  # dct
  #
diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c

index ddfdaba4fb572640cab8d02a3e1cb4a6dec02c4b..9cb18143f5d9f80da11473c4c54fb7e7b3d6bdf7 100644 (file)
--- a/vp9/decoder/vp9_decodemv.c
+++ b/vp9/decoder/vp9_decodemv.c
@@ -815,7 +815,7 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
          second_ref_fb_idx = cm->active_ref_idx[mbmi->second_ref_frame - 1];
  
          setup_pred_block(&xd->second_pre, &cm->yv12_fb[second_ref_fb_idx],
-            mb_row, mb_col, &xd->scale_factor[1], &xd->scale_factor_uv[1]);
+             mb_row, mb_col, &xd->scale_factor[1], &xd->scale_factor_uv[1]);
  
          vp9_find_mv_refs(cm, xd, mi, use_prev_in_find_mv_refs ? prev_mi : NULL,
                           mbmi->second_ref_frame,
diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c

index 802374048b6936e89704aa1bfdb53586ed7a24a8..246d4ac33d1bd7bc010d135d9e72c3c62b5dff88 100644 (file)
--- a/vp9/decoder/vp9_decodframe.c
+++ b/vp9/decoder/vp9_decodframe.c
@@ -195,16 +195,6 @@ static void skip_recon_mb(VP9D_COMP *pbi, MACROBLOCKD *xd,
                                           xd->dst.y_stride,
                                           xd->dst.uv_stride,
                                           mb_row, mb_col);
-#if CONFIG_COMP_INTERINTRA_PRED
-      if (xd->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME) {
-        vp9_build_interintra_16x16_predictors_mb(xd,
-                                                 xd->dst.y_buffer,
-                                                 xd->dst.u_buffer,
-                                                 xd->dst.v_buffer,
-                                                 xd->dst.y_stride,
-                                                 xd->dst.uv_stride);
-      }
-#endif
      }
    }
  }
@@ -212,7 +202,7 @@ static void skip_recon_mb(VP9D_COMP *pbi, MACROBLOCKD *xd,
  static void decode_16x16(VP9D_COMP *pbi, MACROBLOCKD *xd,
                           BOOL_DECODER* const bc) {
    TX_TYPE tx_type = get_tx_type_16x16(xd, 0);
-#ifdef DEC_DEBUG
+#if 0  // def DEC_DEBUG
    if (dec_debug) {
      int i;
      printf("\n");
@@ -250,7 +240,7 @@ static void decode_8x8(VP9D_COMP *pbi, MACROBLOCKD *xd,
    // First do Y
    // if the first one is DCT_DCT assume all the rest are as well
    TX_TYPE tx_type = get_tx_type_8x8(xd, 0);
-#ifdef DEC_DEBUG
+#if 0  // def DEC_DEBUG
    if (dec_debug) {
      int i;
      printf("\n");
@@ -322,7 +312,7 @@ static void decode_8x8(VP9D_COMP *pbi, MACROBLOCKD *xd,
           xd->predictor + 16 * 16, xd->dst.u_buffer, xd->dst.v_buffer,
           xd->dst.uv_stride, xd);
    }
-#ifdef DEC_DEBUG
+#if 0  // def DEC_DEBUG
    if (dec_debug) {
      int i;
      printf("\n");
@@ -340,6 +330,17 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd,
    TX_TYPE tx_type;
    int i, eobtotal = 0;
    MB_PREDICTION_MODE mode = xd->mode_info_context->mbmi.mode;
+#if 0  // def DEC_DEBUG
+  if (dec_debug) {
+    int i;
+    printf("\n");
+    printf("predictor\n");
+    for (i = 0; i < 384; i++) {
+      printf("%3d ", xd->predictor[i]);
+      if (i % 16 == 15) printf("\n");
+    }
+  }
+#endif
    if (mode == I8X8_PRED) {
      for (i = 0; i < 4; i++) {
        int ib = vp9_i8x8_block[i];
@@ -420,7 +421,7 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd,
                             xd->dst.uv_stride,
                             xd);
    } else {
-#ifdef DEC_DEBUG
+#if 0  // def DEC_DEBUG
      if (dec_debug) {
        int i;
        printf("\n");
@@ -834,14 +835,14 @@ static void decode_macroblock(VP9D_COMP *pbi, MACROBLOCKD *xd,
      skip_recon_mb(pbi, xd, mb_row, mb_col);
      return;
    }
-#ifdef DEC_DEBUG
+#if 0  // def DEC_DEBUG
    if (dec_debug)
      printf("Decoding mb:  %d %d\n", xd->mode_info_context->mbmi.mode, tx_size);
  #endif
  
    // moved to be performed before detokenization
-//  if (xd->segmentation_enabled)
-//    mb_init_dequantizer(pbi, xd);
+  //  if (xd->segmentation_enabled)
+  //    mb_init_dequantizer(pbi, xd);
  
    /* do prediction */
    if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) {
@@ -852,7 +853,7 @@ static void decode_macroblock(VP9D_COMP *pbi, MACROBLOCKD *xd,
        }
      }
    } else {
-#ifdef DEC_DEBUG
+#if 0  // def DEC_DEBUG
    if (dec_debug)
      printf("Decoding mb:  %d %d interp %d\n",
             xd->mode_info_context->mbmi.mode, tx_size,
@@ -872,6 +873,13 @@ static void decode_macroblock(VP9D_COMP *pbi, MACROBLOCKD *xd,
    if (dec_debug) {
      int i, j;
      printf("\n");
+    printf("predictor y\n");
+    for (i = 0; i < 16; i++) {
+      for (j = 0; j < 16; j++)
+        printf("%3d ", xd->predictor[i * 16 + j]);
+      printf("\n");
+    }
+    printf("\n");
      printf("final y\n");
      for (i = 0; i < 16; i++) {
        for (j = 0; j < 16; j++)
@@ -994,9 +1002,10 @@ static void decode_sb_row(VP9D_COMP *pbi, VP9_COMMON *pc,
         mb_col < pc->cur_tile_mb_col_end; mb_col += 4) {
      if (vp9_read(bc, pc->sb64_coded)) {
  #ifdef DEC_DEBUG
-      dec_debug = (pc->current_video_frame == 1 && mb_row == 0 && mb_col == 0);
+      dec_debug = (pc->current_video_frame == 11 && pc->show_frame &&
+                   mb_row == 8 && mb_col == 0);
        if (dec_debug)
-        printf("Debug\n");
+        printf("Debug Decode SB64\n");
  #endif
        set_offsets(pbi, 64, mb_row, mb_col);
        vp9_decode_mb_mode_mv(pbi, xd, mb_row, mb_col, bc);
@@ -1019,8 +1028,10 @@ static void decode_sb_row(VP9D_COMP *pbi, VP9_COMMON *pc,
  
          if (vp9_read(bc, pc->sb32_coded)) {
  #ifdef DEC_DEBUG
-          dec_debug = (pc->current_video_frame == 1 &&
-                       mb_row + y_idx_sb == 0 && mb_col + x_idx_sb == 0);
+          dec_debug = (pc->current_video_frame == 11 && pc->show_frame &&
+                       mb_row + y_idx_sb == 8 && mb_col + x_idx_sb == 0);
+          if (dec_debug)
+            printf("Debug Decode SB32\n");
  #endif
            set_offsets(pbi, 32, mb_row + y_idx_sb, mb_col + x_idx_sb);
            vp9_decode_mb_mode_mv(pbi,
@@ -1043,8 +1054,10 @@ static void decode_sb_row(VP9D_COMP *pbi, VP9_COMMON *pc,
                continue;
              }
  #ifdef DEC_DEBUG
-            dec_debug = (pc->current_video_frame == 1 &&
-                         mb_row + y_idx == 0 && mb_col + x_idx == 0);
+            dec_debug = (pc->current_video_frame == 11 && pc->show_frame &&
+                         mb_row + y_idx == 8 && mb_col + x_idx == 0);
+            if (dec_debug)
+              printf("Debug Decode MB\n");
  #endif
  
              set_offsets(pbi, 16, mb_row + y_idx, mb_col + x_idx);
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c

index 5e17c552d2386e8415bea3f0b9ec8003d02bf612..e4d097d5139b62f15cfe42480b15f62fcf37c62c 100644 (file)
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -757,7 +757,7 @@ static int pick_mb_modes(VP9_COMP *cpi,
      // as a predictor for MBs that follow in the SB
      if (cm->frame_type == KEY_FRAME) {
        int r, d;
-#ifdef ENC_DEBUG
+#if 0  // ENC_DEBUG
        if (enc_debug)
          printf("intra pick_mb_modes %d %d\n", mb_row, mb_col);
  #endif
@@ -776,7 +776,7 @@ static int pick_mb_modes(VP9_COMP *cpi,
      } else {
        int seg_id, r, d;
  
-#ifdef ENC_DEBUG
+#if 0  // ENC_DEBUG
        if (enc_debug)
          printf("inter pick_mb_modes %d %d\n", mb_row, mb_col);
  #endif
@@ -2057,8 +2057,8 @@ static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t,
    assert(!xd->mode_info_context->mbmi.sb_type);
  
  #ifdef ENC_DEBUG
-  enc_debug = (cpi->common.current_video_frame == 1 &&
-               mb_row == 0 && mb_col == 0 && output_enabled);
+  enc_debug = (cpi->common.current_video_frame == 11 && cm->show_frame &&
+               mb_row == 8 && mb_col == 0 && output_enabled);
    if (enc_debug)
      printf("Encode MB %d %d output %d\n", mb_row, mb_col, output_enabled);
  #endif
@@ -2105,7 +2105,7 @@ static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t,
    }
  
    if (mbmi->ref_frame == INTRA_FRAME) {
-#ifdef ENC_DEBUG
+#if 0  // def ENC_DEBUG
      if (enc_debug) {
        printf("Mode %d skip %d tx_size %d\n", mbmi->mode, x->skip,
               mbmi->txfm_size);
@@ -2316,10 +2316,16 @@ static void encode_superblock32(VP9_COMP *cpi, TOKENEXTRA **t,
    const int mis = cm->mode_info_stride;
  
  #ifdef ENC_DEBUG
-  enc_debug = (cpi->common.current_video_frame == 1 &&
-               mb_row == 0 && mb_col == 0 && output_enabled);
-  if (enc_debug)
+  enc_debug = (cpi->common.current_video_frame == 11 && cm->show_frame &&
+               mb_row == 8 && mb_col == 0 && output_enabled);
+  if (enc_debug) {
      printf("Encode SB32 %d %d output %d\n", mb_row, mb_col, output_enabled);
+    printf("Mode %d skip %d tx_size %d ref %d ref2 %d mv %d %d interp %d\n",
+           mi->mbmi.mode, x->skip, mi->mbmi.txfm_size,
+           mi->mbmi.ref_frame, mi->mbmi.second_ref_frame,
+           mi->mbmi.mv[0].as_mv.row, mi->mbmi.mv[0].as_mv.col,
+           mi->mbmi.interp_filter);
+  }
  #endif
    if (cm->frame_type == KEY_FRAME) {
      if (cpi->oxcf.tuning == VP8_TUNE_SSIM) {
@@ -2537,8 +2543,8 @@ static void encode_superblock64(VP9_COMP *cpi, TOKENEXTRA **t,
    const int mis = cm->mode_info_stride;
  
  #ifdef ENC_DEBUG
-  enc_debug = (cpi->common.current_video_frame == 1 &&
-               mb_row == 0 && mb_col == 0 && output_enabled);
+  enc_debug = (cpi->common.current_video_frame == 11 && cm->show_frame &&
+               mb_row == 8 && mb_col == 0 && output_enabled);
    if (enc_debug)
      printf("Encode SB64 %d %d output %d\n", mb_row, mb_col, output_enabled);
  #endif
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c

index 04b732a451962efdc69b62b69d8c1a0f823129e7..a765782946d1e5fc7dff85c9af805367c6fcfc4b 100644 (file)
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -2424,13 +2424,15 @@ static int64_t encode_inter_mb_segment(VP9_COMMON *const cm,
                                  &xd->scale_factor[0],
                                  4, 4, 0 /* no avg */, &xd->subpix);
  
+      // TODO(debargha): Make this work properly with the
+      // implicit-compoundinter-weight experiment when implicit
+      // weighting for splitmv modes is turned on.
        if (xd->mode_info_context->mbmi.second_ref_frame > 0) {
-        vp9_build_inter_predictor(*(bd->base_second_pre) + bd->pre,
-                                  bd->pre_stride,
-                                  bd->predictor, 16,
-                                  &bd->bmi.as_mv[1],
-                                  &xd->scale_factor[1],
-                                  4, 4, 1 /* avg */, &xd->subpix);
+        vp9_build_inter_predictor(
+            *(bd->base_second_pre) + bd->pre, bd->pre_stride, bd->predictor, 16,
+            &bd->bmi.as_mv[1], &xd->scale_factor[1], 4, 4,
+            1 << (2 * CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT) /* avg */,
+            &xd->subpix);
        }
  
        vp9_subtract_b(be, bd, 16);
@@ -2486,12 +2488,14 @@ static int64_t encode_inter_mb_segment_8x8(VP9_COMMON *const cm,
        for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) {
          uint8_t **base_pre = which_mv ? bd->base_second_pre : bd->base_pre;
  
-        vp9_build_inter_predictor(*base_pre + bd->pre,
-                                  bd->pre_stride,
-                                  bd->predictor, 16,
-                                  &bd->bmi.as_mv[which_mv],
-                                  &xd->scale_factor[which_mv],
-                                  8, 8, which_mv, &xd->subpix);
+        // TODO(debargha): Make this work properly with the
+        // implicit-compoundinter-weight experiment when implicit
+        // weighting for splitmv modes is turned on.
+        vp9_build_inter_predictor(
+            *base_pre + bd->pre, bd->pre_stride, bd->predictor, 16,
+            &bd->bmi.as_mv[which_mv], &xd->scale_factor[which_mv], 8, 8,
+            which_mv << (2 * CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT),
+            &xd->subpix);
        }
  
        vp9_subtract_4b_c(be, bd, 16);
@@ -3866,27 +3870,10 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
          unsigned int sse, var;
          int tmp_rate_y, tmp_rate_u, tmp_rate_v;
          int tmp_dist_y, tmp_dist_u, tmp_dist_v;
-        // TODO(jkoleszar): these 2 y/uv should be replaced with one call to
-        // vp9_build_interintra_16x16_predictors_mb().
-        vp9_build_inter16x16_predictors_mby(xd, xd->predictor, 16,
-                                            mb_row, mb_col);
-
-#if CONFIG_COMP_INTERINTRA_PRED
-        if (is_comp_interintra_pred) {
-          vp9_build_interintra_16x16_predictors_mby(xd, xd->predictor, 16);
-        }
-#endif
-
-        vp9_build_inter16x16_predictors_mbuv(xd, xd->predictor + 256,
-                                             xd->predictor + 320, 8,
-                                             mb_row, mb_col);
-
-#if CONFIG_COMP_INTERINTRA_PRED
-        if (is_comp_interintra_pred) {
-          vp9_build_interintra_16x16_predictors_mbuv(xd, xd->predictor + 256,
-                                                     xd->predictor + 320, 8);
-        }
-#endif
+        vp9_build_inter16x16_predictors_mb(xd, xd->predictor,
+                                           xd->predictor + 256,
+                                           xd->predictor + 320,
+                                           16, 8, mb_row, mb_col);
          var = vp9_variance16x16(*(b->base_src), b->src_stride,
                                  xd->predictor, 16, &sse);
          // Note our transform coeffs are 8 times an orthogonal transform.
@@ -3986,24 +3973,10 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
                                           xd->dst.uv_stride,
                                           mb_row, mb_col);
      } else {
-      // TODO(jkoleszar): These y/uv fns can be replaced with their mb
-      // equivalent
-      vp9_build_inter16x16_predictors_mby(xd, xd->predictor, 16,
-                                          mb_row, mb_col);
-#if CONFIG_COMP_INTERINTRA_PRED
-      if (is_comp_interintra_pred) {
-        vp9_build_interintra_16x16_predictors_mby(xd, xd->predictor, 16);
-      }
-#endif
-      vp9_build_inter16x16_predictors_mbuv(xd, &xd->predictor[256],
-                                           &xd->predictor[320], 8,
-                                           mb_row, mb_col);
-#if CONFIG_COMP_INTERINTRA_PRED
-      if (is_comp_interintra_pred) {
-        vp9_build_interintra_16x16_predictors_mbuv(xd, &xd->predictor[256],
-                                                   &xd->predictor[320], 8);
-      }
-#endif
+      vp9_build_inter16x16_predictors_mb(xd, xd->predictor,
+                                         xd->predictor + 256,
+                                         xd->predictor + 320,
+                                         16, 8, mb_row, mb_col);
      }
    }
  
@@ -4586,7 +4559,7 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
          vpx_memcpy(x->partition_info, &tmp_best_partition,
                     sizeof(PARTITION_INFO));
          for (i = 0; i < 16; i++) {
-          xd->block[i].bmi = tmp_best_bmodes[i];
+          xd->block[i].bmi = xd->mode_info_context->bmi[i] = tmp_best_bmodes[i];
          }
        }
  
diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c

index dc1d3d48dbea595b96144cbcc8eee2db8f8b345d..22a12f4a8558978014a7aaaf1e8cbb44f6f957c4 100644 (file)
--- a/vp9/encoder/vp9_temporal_filter.c
+++ b/vp9/encoder/vp9_temporal_filter.c
@@ -54,7 +54,10 @@ static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd,
                              &pred[0], 16,
                              &subpel_mv,
                              &xd->scale_factor[which_mv],
-                            16, 16, which_mv, &xd->subpix);
+                            16, 16,
+                            which_mv <<
+                            (2 * CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT),
+                            &xd->subpix);
  
    stride = (stride + 1) >> 1;
  
@@ -62,13 +65,19 @@ static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd,
                                 &pred[256], 8,
                                 &fullpel_mv, &subpel_mv,
                                 &xd->scale_factor_uv[which_mv],
-                               8, 8, which_mv, &xd->subpix);
+                               8, 8,
+                               which_mv <<
+                               (2 * CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT),
+                               &xd->subpix);
  
    vp9_build_inter_predictor_q4(v_mb_ptr, stride,
                                 &pred[320], 8,
                                 &fullpel_mv, &subpel_mv,
                                 &xd->scale_factor_uv[which_mv],
-                               8, 8, which_mv, &xd->subpix);
+                               8, 8,
+                               which_mv <<
+                               (2 * CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT),
+                               &xd->subpix);
  }
  
  void vp9_temporal_filter_apply_c(uint8_t *frame1,
diff --git a/vpxenc.c b/vpxenc.c

index e915efdfac622124499a0545e17a6dc4dc619ea3..87205e636732669a541f8b733bb015a20134214f 100644 (file)
--- a/vpxenc.c
+++ b/vpxenc.c
@@ -1488,7 +1488,7 @@ static void find_mismatch(vpx_image_t *img1, vpx_image_t *img2,
    const unsigned int bsize2 = bsize >> 1;
    unsigned int match = 1;
    unsigned int i, j;
-  yloc[0] = yloc[1] = -1;
+  yloc[0] = yloc[1] = yloc[2] = yloc[3] = -1;
    for (i = 0, match = 1; match && i < img1->d_h; i += bsize) {
      for (j = 0; match && j < img1->d_w; j += bsize) {
        int k, l;
@@ -1502,13 +1502,17 @@ static void find_mismatch(vpx_image_t *img1, vpx_image_t *img2,
                  (i + k) * img2->stride[VPX_PLANE_Y] + j + l)) {
              yloc[0] = i + k;
              yloc[1] = j + l;
+            yloc[2] = *(img1->planes[VPX_PLANE_Y] +
+                        (i + k) * img1->stride[VPX_PLANE_Y] + j + l);
+            yloc[3] = *(img2->planes[VPX_PLANE_Y] +
+                        (i + k) * img2->stride[VPX_PLANE_Y] + j + l);
              match = 0;
              break;
            }
          }
      }
    }
-  uloc[0] = uloc[1] = -1;
+  uloc[0] = uloc[1] = uloc[2] = uloc[3] = -1;
    for (i = 0, match = 1; match && i < (img1->d_h + 1) / 2; i += bsize2) {
      for (j = 0; j < match && (img1->d_w + 1) / 2; j += bsize2) {
        int k, l;
@@ -1522,13 +1526,17 @@ static void find_mismatch(vpx_image_t *img1, vpx_image_t *img2,
                  (i + k) * img2->stride[VPX_PLANE_U] + j + l)) {
              uloc[0] = i + k;
              uloc[1] = j + l;
+            uloc[2] = *(img1->planes[VPX_PLANE_U] +
+                        (i + k) * img1->stride[VPX_PLANE_U] + j + l);
+            uloc[3] = *(img2->planes[VPX_PLANE_U] +
+                        (i + k) * img2->stride[VPX_PLANE_V] + j + l);
              match = 0;
              break;
            }
          }
      }
    }
-  vloc[0] = vloc[1] = -1;
+  vloc[0] = vloc[1] = vloc[2] = vloc[3] = -1;
    for (i = 0, match = 1; match && i < (img1->d_h + 1) / 2; i += bsize2) {
      for (j = 0; j < match && (img1->d_w + 1) / 2; j += bsize2) {
        int k, l;
@@ -1542,6 +1550,10 @@ static void find_mismatch(vpx_image_t *img1, vpx_image_t *img2,
                  (i + k) * img2->stride[VPX_PLANE_V] + j + l)) {
              vloc[0] = i + k;
              vloc[1] = j + l;
+            vloc[2] = *(img1->planes[VPX_PLANE_V] +
+                        (i + k) * img1->stride[VPX_PLANE_V] + j + l);
+            vloc[3] = *(img2->planes[VPX_PLANE_V] +
+                        (i + k) * img2->stride[VPX_PLANE_V] + j + l);
              match = 0;
              break;
            }
@@ -2454,14 +2466,18 @@ static void test_decode(struct stream_state  *stream,
    ctx_exit_on_error(&stream->decoder, "Failed to get decoder reference frame");
  
    if (!compare_img(&enc_img, &dec_img)) {
-    int y[2], u[2], v[2];
+    int y[4], u[4], v[4];
      find_mismatch(&enc_img, &dec_img, y, u, v);
      stream->decoder.err = 1;
      warn_or_exit_on_error(&stream->decoder, fatal == TEST_DECODE_FATAL,
-                          "Stream %d: Encode/decode mismatch on frame %d"
-                          " at Y[%d, %d], U[%d, %d], V[%d, %d]",
+                          "Stream %d: Encode/decode mismatch on frame %d at"
+                          " Y[%d, %d] {%d/%d},"
+                          " U[%d, %d] {%d/%d},"
+                          " V[%d, %d] {%d/%d}",
                            stream->index, stream->frames_out,
-                          y[0], y[1], u[0], u[1], v[0], v[1]);
+                          y[0], y[1], y[2], y[3],
+                          u[0], u[1], u[2], u[3],
+                          v[0], v[1], v[2], v[3]);
      stream->mismatch_seen = stream->frames_out;
    }
author	Deb Mukherjee <debargha@google.com>
	Tue, 12 Mar 2013 21:21:08 +0000 (14:21 -0700)
committer	Deb Mukherjee <debargha@google.com>
	Tue, 26 Mar 2013 23:58:56 +0000 (16:58 -0700)
configure		patch \| blob \| history
vp9/common/vp9_blockd.h		patch \| blob \| history
vp9/common/vp9_convolve.c		patch \| blob \| history
vp9/common/vp9_convolve.h		patch \| blob \| history
vp9/common/vp9_reconinter.c		patch \| blob \| history
vp9/common/vp9_rtcd_defs.sh		patch \| blob \| history
vp9/decoder/vp9_decodemv.c		patch \| blob \| history
vp9/decoder/vp9_decodframe.c		patch \| blob \| history
vp9/encoder/vp9_encodeframe.c		patch \| blob \| history
vp9/encoder/vp9_rdopt.c		patch \| blob \| history
vp9/encoder/vp9_temporal_filter.c		patch \| blob \| history
vpxenc.c		patch \| blob \| history