From: Debargha Mukherjee <debargha@google.com>
Date: Tue, 10 May 2016 16:30:30 +0000 (-0700)
Subject: Various wedge enhancements
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=fb8ea1736b41ae9067901e7d8ebcfa15bc9b2fa9;p=libvpx

Various wedge enhancements

Increases number of wedges for smaller block and removes
wedge coding mode for blocks larger than 32x32.

Also adds various other enhancements for subsequent experimentation,
including adding provision for multiple smoothing functions
(though one is used currently), adds a speed feature that decides
the sign for interinter wedges using a fast mechanism, and refactors
wedge representations.

lowres: -2.651% BDRATE

Most of the gain is due to increase in codebook size for 8x8 - 16x16.

Change-Id: I50669f558c8d0d45e5a6f70aca4385a185b58b5b
---

diff --git a/vp10/common/blockd.h b/vp10/common/blockd.h
index 21147afa1..071e89f64 100644
--- a/vp10/common/blockd.h
+++ b/vp10/common/blockd.h
@@ -53,43 +53,6 @@ static INLINE int is_inter_mode(PREDICTION_MODE mode) {
 }
 
 #if CONFIG_EXT_INTER
-#define WEDGE_BITS_SML    2
-#define WEDGE_BITS_MED    3
-#define WEDGE_BITS_BIG    4
-#define WEDGE_NONE       -1
-#define WEDGE_WEIGHT_BITS 6
-
-static const int get_wedge_bits_lookup[BLOCK_SIZES] = {
-  0,
-  0,
-  0,
-  WEDGE_BITS_SML,
-  WEDGE_BITS_MED,
-  WEDGE_BITS_MED,
-  WEDGE_BITS_MED,
-  WEDGE_BITS_MED,
-  WEDGE_BITS_MED,
-  WEDGE_BITS_MED,
-  WEDGE_BITS_BIG,
-  WEDGE_BITS_BIG,
-  WEDGE_BITS_BIG,
-#if CONFIG_EXT_PARTITION
-  WEDGE_BITS_BIG,
-  WEDGE_BITS_BIG,
-  WEDGE_BITS_BIG,
-#endif  // CONFIG_EXT_PARTITION
-};
-
-static INLINE int is_interinter_wedge_used(BLOCK_SIZE sb_type) {
-  (void) sb_type;
-  return get_wedge_bits_lookup[sb_type] > 0;
-}
-
-static INLINE int is_interintra_wedge_used(BLOCK_SIZE sb_type) {
-  (void) sb_type;
-  return get_wedge_bits_lookup[sb_type] > 0;
-}
-
 static INLINE int is_inter_singleref_mode(PREDICTION_MODE mode) {
   return mode >= NEARESTMV && mode <= NEWFROMNEARMV;
 }
diff --git a/vp10/common/entropymode.c b/vp10/common/entropymode.c
index 0ae257234..a47b2fee1 100644
--- a/vp10/common/entropymode.c
+++ b/vp10/common/entropymode.c
@@ -281,16 +281,16 @@ static const vpx_prob
 };
 
 static const vpx_prob default_wedge_interintra_prob[BLOCK_SIZES] = {
-  208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208,
+  208, 208, 208, 208, 208, 208, 216, 216, 216, 224, 224, 224, 240,
 #if CONFIG_EXT_PARTITION
   208, 208, 208
 #endif  // CONFIG_EXT_PARTITION
 };
 
 static const vpx_prob default_wedge_interinter_prob[BLOCK_SIZES] = {
-  208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208,
+  208, 208, 208, 208, 208, 208, 216, 216, 216, 224, 224, 224, 240,
 #if CONFIG_EXT_PARTITION
-  208, 208, 208
+  255, 255, 255
 #endif  // CONFIG_EXT_PARTITION
 };
 #endif  // CONFIG_EXT_INTER
diff --git a/vp10/common/reconinter.c b/vp10/common/reconinter.c
index ecfb54cd7..d6ac4bb8d 100644
--- a/vp10/common/reconinter.c
+++ b/vp10/common/reconinter.c
@@ -23,206 +23,352 @@
 #endif  // CONFIG_OBMC
 
 #if CONFIG_EXT_INTER
-static int get_masked_weight(int m) {
+#define NSMOOTHERS  2
+static int get_masked_weight(int m, int smoothness) {
 #define SMOOTHER_LEN  32
-  static const uint8_t smoothfn[2 * SMOOTHER_LEN + 1] = {
-    0,  0,  0,  0,  0,  0,  0,  0,
-    0,  0,  0,  0,  0,  0,  0,  0,
-    0,  0,  0,  0,  0,  0,  0,  0,
-    1,  1,  2,  4,  6, 10, 16, 23,
-    32,
-    41, 48, 54, 58, 60, 62, 63, 63,
-    64, 64, 64, 64, 64, 64, 64, 64,
-    64, 64, 64, 64, 64, 64, 64, 64,
-    64, 64, 64, 64, 64, 64, 64, 64,
+  static const uint8_t smoothfn[NSMOOTHERS][2 * SMOOTHER_LEN + 1] = {
+    {
+      0,  0,  0,  0,  0,  0,  0,  0,
+      0,  0,  0,  0,  0,  0,  0,  0,
+      0,  0,  0,  0,  0,  0,  0,  0,
+      0,  0,  1,  2,  4,  7, 13, 21,
+      32,
+      43, 51, 57, 60, 62, 63, 64, 64,
+      64, 64, 64, 64, 64, 64, 64, 64,
+      64, 64, 64, 64, 64, 64, 64, 64,
+      64, 64, 64, 64, 64, 64, 64, 64,
+    }, {
+      0,  0,  0,  0,  0,  0,  0,  0,
+      1,  1,  1,  1,  1,  1,  2,  2,
+      3,  3,  4,  4,  5,  6,  8,  9,
+      11, 13, 15, 17, 20, 23, 26, 29,
+      32,
+      35, 38, 41, 44, 47, 49, 51, 53,
+      55, 56, 58, 59, 60, 60, 61, 61,
+      62, 62, 63, 63, 63, 63, 63, 63,
+      64, 64, 64, 64, 64, 64, 64, 64,
+    }
   };
   if (m < -SMOOTHER_LEN)
     return 0;
   else if (m > SMOOTHER_LEN)
     return (1 << WEDGE_WEIGHT_BITS);
   else
-    return smoothfn[m + SMOOTHER_LEN];
+    return smoothfn[smoothness][m + SMOOTHER_LEN];
 }
 
-#define WEDGE_OBLIQUE  1
-#define WEDGE_STRAIGHT 0
-
-#define WEDGE_PARMS    5
-
-// [negative][transpose][reverse]
-DECLARE_ALIGNED(16, static uint8_t,
-                wedge_mask_obl[2][2][2][MASK_MASTER_SIZE * MASK_MASTER_SIZE]);
-// [negative][transpose]
-DECLARE_ALIGNED(16, static uint8_t,
-                wedge_mask_str[2][2][MASK_MASTER_SIZE * MASK_MASTER_SIZE]);
+// Angles are with respect to horizontal anti-clockwise
+typedef enum {
+  WEDGE_HORIZONTAL = 0,
+  WEDGE_VERTICAL = 1,
+  WEDGE_OBLIQUE27 = 2,
+  WEDGE_OBLIQUE63 = 3,
+  WEDGE_OBLIQUE117 = 4,
+  WEDGE_OBLIQUE153 = 5,
+  WEDGE_DIRECTIONS
+} WedgeDirectionType;
+
+#define WEDGE_PARMS    4
+
+// [smoother][negative][direction]
+DECLARE_ALIGNED(
+    16, static uint8_t,
+    wedge_mask_obl[NSMOOTHERS][2][WEDGE_DIRECTIONS]
+                  [MASK_MASTER_SIZE * MASK_MASTER_SIZE]);
 
 // Equation of line: f(x, y) = a[0]*(x - a[2]*w/8) + a[1]*(y - a[3]*h/8) = 0
 void vp10_init_wedge_masks() {
-  int i, j;
+  int i, j, s;
   const int w = MASK_MASTER_SIZE;
   const int h = MASK_MASTER_SIZE;
   const int stride = MASK_MASTER_STRIDE;
-  const int a[4] = {2, 1, 4, 4};
-  for (i = 0; i < h; ++i)
-    for (j = 0; j < w; ++j) {
-      int x = (2 * j + 1 - (a[2] * w) / 4);
-      int y = (2 * i + 1 - (a[3] * h) / 4);
-      int m = (a[0] * x + a[1] * y) / 2;
-      wedge_mask_obl[1][0][0][i * stride + j] =
-      wedge_mask_obl[1][1][0][j * stride + i] =
-          get_masked_weight(m);
-      wedge_mask_obl[1][0][1][i * stride + w - 1 - j] =
-      wedge_mask_obl[1][1][1][(w - 1 - j) * stride + i] =
-          (1 << WEDGE_WEIGHT_BITS) - get_masked_weight(m);
-      wedge_mask_obl[0][0][0][i * stride + j] =
-      wedge_mask_obl[0][1][0][j * stride + i] =
-          (1 << WEDGE_WEIGHT_BITS) - get_masked_weight(m);
-      wedge_mask_obl[0][0][1][i * stride + w - 1 - j] =
-      wedge_mask_obl[0][1][1][(w - 1 - j) * stride + i] =
-          get_masked_weight(m);
-      wedge_mask_str[1][0][i * stride + j] =
-      wedge_mask_str[1][1][j * stride + i] =
-          get_masked_weight(x);
-      wedge_mask_str[0][0][i * stride + j] =
-      wedge_mask_str[0][1][j * stride + i] =
-          (1 << WEDGE_WEIGHT_BITS) - get_masked_weight(x);
-    }
+  const int a[2] = {2, 1};
+  const double asqrt = sqrt(a[0] * a[0] + a[1] * a[1]);
+  for (s = 0; s < NSMOOTHERS; s++) {
+    for (i = 0; i < h; ++i)
+      for (j = 0; j < w; ++j) {
+        int x = (2 * j + 1 - w);
+        int y = (2 * i + 1 - h);
+        int m = (int)rint((a[0] * x + a[1] * y) / asqrt);
+        wedge_mask_obl[s][1][WEDGE_OBLIQUE63][i * stride + j] =
+        wedge_mask_obl[s][1][WEDGE_OBLIQUE27][j * stride + i] =
+            get_masked_weight(m, s);
+        wedge_mask_obl[s][1][WEDGE_OBLIQUE117][i * stride + w - 1 - j] =
+        wedge_mask_obl[s][1][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] =
+            (1 << WEDGE_WEIGHT_BITS) - get_masked_weight(m, s);
+        wedge_mask_obl[s][0][WEDGE_OBLIQUE63][i * stride + j] =
+        wedge_mask_obl[s][0][WEDGE_OBLIQUE27][j * stride + i] =
+            (1 << WEDGE_WEIGHT_BITS) - get_masked_weight(m, s);
+        wedge_mask_obl[s][0][WEDGE_OBLIQUE117][i * stride + w - 1 - j] =
+        wedge_mask_obl[s][0][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] =
+            get_masked_weight(m, s);
+        wedge_mask_obl[s][1][WEDGE_VERTICAL][i * stride + j] =
+        wedge_mask_obl[s][1][WEDGE_HORIZONTAL][j * stride + i] =
+            get_masked_weight(x, s);
+        wedge_mask_obl[s][0][WEDGE_VERTICAL][i * stride + j] =
+        wedge_mask_obl[s][0][WEDGE_HORIZONTAL][j * stride + i] =
+            (1 << WEDGE_WEIGHT_BITS) - get_masked_weight(x, s);
+      }
+  }
 }
 
-static const int wedge_params_sml[1 << WEDGE_BITS_SML]
-                                 [WEDGE_PARMS] = {
-    {WEDGE_OBLIQUE,  1, 1, 4, 4},
-    {WEDGE_OBLIQUE,  1, 0, 4, 4},
-    {WEDGE_OBLIQUE,  0, 1, 4, 4},
-    {WEDGE_OBLIQUE,  0, 0, 4, 4},
+static const int wedge_params_4[1 << WEDGE_BITS_2]
+                               [WEDGE_PARMS] = {
+    {WEDGE_OBLIQUE27,  4, 4, 1},
+    {WEDGE_OBLIQUE63,  4, 4, 1},
+    {WEDGE_OBLIQUE117, 4, 4, 1},
+    {WEDGE_OBLIQUE153, 4, 4, 1},
 };
 
-static const int wedge_params_med_hgtw[1 << WEDGE_BITS_MED]
-                                      [WEDGE_PARMS] = {
-    {WEDGE_OBLIQUE,  1, 1, 4, 4},
-    {WEDGE_OBLIQUE,  1, 0, 4, 4},
-    {WEDGE_OBLIQUE,  0, 1, 4, 4},
-    {WEDGE_OBLIQUE,  0, 0, 4, 4},
-
-    {WEDGE_OBLIQUE,  1, 1, 4, 2},
-    {WEDGE_OBLIQUE,  1, 1, 4, 6},
-    {WEDGE_OBLIQUE,  1, 0, 4, 2},
-    {WEDGE_OBLIQUE,  1, 0, 4, 6},
+static const int wedge_params_8_hgtw[1 << WEDGE_BITS_3]
+                                    [WEDGE_PARMS] = {
+    {WEDGE_OBLIQUE27,  4, 4, 1},
+    {WEDGE_OBLIQUE63,  4, 4, 1},
+    {WEDGE_OBLIQUE117, 4, 4, 1},
+    {WEDGE_OBLIQUE153, 4, 4, 1},
+
+    {WEDGE_OBLIQUE27,  4, 2, 1},
+    {WEDGE_OBLIQUE27,  4, 6, 1},
+    {WEDGE_OBLIQUE153, 4, 2, 1},
+    {WEDGE_OBLIQUE153, 4, 6, 1},
 };
 
-static const int wedge_params_med_hltw[1 << WEDGE_BITS_MED]
-                                      [WEDGE_PARMS] = {
-    {WEDGE_OBLIQUE,  1, 1, 4, 4},
-    {WEDGE_OBLIQUE,  1, 0, 4, 4},
-    {WEDGE_OBLIQUE,  0, 1, 4, 4},
-    {WEDGE_OBLIQUE,  0, 0, 4, 4},
-
-    {WEDGE_OBLIQUE,  0, 1, 2, 4},
-    {WEDGE_OBLIQUE,  0, 1, 6, 4},
-    {WEDGE_OBLIQUE,  0, 0, 2, 4},
-    {WEDGE_OBLIQUE,  0, 0, 6, 4},
+static const int wedge_params_8_hltw[1 << WEDGE_BITS_3]
+                                    [WEDGE_PARMS] = {
+    {WEDGE_OBLIQUE27,  4, 4, 1},
+    {WEDGE_OBLIQUE63,  4, 4, 1},
+    {WEDGE_OBLIQUE117, 4, 4, 1},
+    {WEDGE_OBLIQUE153, 4, 4, 1},
+
+    {WEDGE_OBLIQUE63,  2, 4, 1},
+    {WEDGE_OBLIQUE63,  6, 4, 1},
+    {WEDGE_OBLIQUE117, 2, 4, 1},
+    {WEDGE_OBLIQUE117, 6, 4, 1},
 };
 
-static const int wedge_params_med_heqw[1 << WEDGE_BITS_MED]
-                                      [WEDGE_PARMS] = {
-    {WEDGE_OBLIQUE,  1, 1, 4, 4},
-    {WEDGE_OBLIQUE,  1, 0, 4, 4},
-    {WEDGE_OBLIQUE,  0, 1, 4, 4},
-    {WEDGE_OBLIQUE,  0, 0, 4, 4},
-
-    {WEDGE_STRAIGHT, 1, 0, 4, 2},
-    {WEDGE_STRAIGHT, 1, 0, 4, 6},
-    {WEDGE_STRAIGHT, 0, 0, 2, 4},
-    {WEDGE_STRAIGHT, 0, 0, 6, 4},
+static const int wedge_params_8_heqw[1 << WEDGE_BITS_3]
+                                     [WEDGE_PARMS] = {
+    {WEDGE_OBLIQUE27,  4, 4, 1},
+    {WEDGE_OBLIQUE63,  4, 4, 1},
+    {WEDGE_OBLIQUE117, 4, 4, 1},
+    {WEDGE_OBLIQUE153, 4, 4, 1},
+
+    {WEDGE_HORIZONTAL, 4, 2, 1},
+    {WEDGE_HORIZONTAL, 4, 6, 1},
+    {WEDGE_VERTICAL,   2, 4, 1},
+    {WEDGE_VERTICAL,   6, 4, 1},
 };
 
-static const int wedge_params_big_hgtw[1 << WEDGE_BITS_BIG]
-                                      [WEDGE_PARMS] = {
-    {WEDGE_OBLIQUE,  1, 1, 4, 4},
-    {WEDGE_OBLIQUE,  1, 0, 4, 4},
-    {WEDGE_OBLIQUE,  0, 1, 4, 4},
-    {WEDGE_OBLIQUE,  0, 0, 4, 4},
-
-    {WEDGE_OBLIQUE,  1, 1, 4, 2},
-    {WEDGE_OBLIQUE,  1, 1, 4, 6},
-    {WEDGE_OBLIQUE,  1, 0, 4, 2},
-    {WEDGE_OBLIQUE,  1, 0, 4, 6},
-
-    {WEDGE_OBLIQUE,  0, 1, 2, 4},
-    {WEDGE_OBLIQUE,  0, 1, 6, 4},
-    {WEDGE_OBLIQUE,  0, 0, 2, 4},
-    {WEDGE_OBLIQUE,  0, 0, 6, 4},
-
-    {WEDGE_STRAIGHT, 1, 0, 4, 2},
-    {WEDGE_STRAIGHT, 1, 0, 4, 4},
-    {WEDGE_STRAIGHT, 1, 0, 4, 6},
-    {WEDGE_STRAIGHT, 0, 0, 4, 4},
+static const int wedge_params_16_hgtw[1 << WEDGE_BITS_4]
+                                     [WEDGE_PARMS] = {
+    {WEDGE_OBLIQUE27,  4, 4, 0},
+    {WEDGE_OBLIQUE63,  4, 4, 0},
+    {WEDGE_OBLIQUE117, 4, 4, 0},
+    {WEDGE_OBLIQUE153, 4, 4, 0},
+
+    {WEDGE_HORIZONTAL, 4, 2, 0},
+    {WEDGE_HORIZONTAL, 4, 4, 0},
+    {WEDGE_HORIZONTAL, 4, 6, 0},
+    {WEDGE_VERTICAL,   4, 4, 0},
+
+    {WEDGE_OBLIQUE27,  4, 2, 0},
+    {WEDGE_OBLIQUE27,  4, 6, 0},
+    {WEDGE_OBLIQUE153, 4, 2, 0},
+    {WEDGE_OBLIQUE153, 4, 6, 0},
+
+    {WEDGE_OBLIQUE63,  2, 4, 0},
+    {WEDGE_OBLIQUE63,  6, 4, 0},
+    {WEDGE_OBLIQUE117, 2, 4, 0},
+    {WEDGE_OBLIQUE117, 6, 4, 0},
 };
 
-static const int wedge_params_big_hltw[1 << WEDGE_BITS_BIG]
-                                      [WEDGE_PARMS] = {
-    {WEDGE_OBLIQUE,  1, 1, 4, 4},
-    {WEDGE_OBLIQUE,  1, 0, 4, 4},
-    {WEDGE_OBLIQUE,  0, 1, 4, 4},
-    {WEDGE_OBLIQUE,  0, 0, 4, 4},
-
-    {WEDGE_OBLIQUE,  1, 1, 4, 2},
-    {WEDGE_OBLIQUE,  1, 1, 4, 6},
-    {WEDGE_OBLIQUE,  1, 0, 4, 2},
-    {WEDGE_OBLIQUE,  1, 0, 4, 6},
-
-    {WEDGE_OBLIQUE,  0, 1, 2, 4},
-    {WEDGE_OBLIQUE,  0, 1, 6, 4},
-    {WEDGE_OBLIQUE,  0, 0, 2, 4},
-    {WEDGE_OBLIQUE,  0, 0, 6, 4},
-
-    {WEDGE_STRAIGHT, 0, 0, 2, 4},
-    {WEDGE_STRAIGHT, 0, 0, 4, 4},
-    {WEDGE_STRAIGHT, 0, 0, 6, 4},
-    {WEDGE_STRAIGHT, 1, 0, 4, 4},
+static const int wedge_params_16_hltw[1 << WEDGE_BITS_4]
+                                     [WEDGE_PARMS] = {
+    {WEDGE_OBLIQUE27,  4, 4, 0},
+    {WEDGE_OBLIQUE63,  4, 4, 0},
+    {WEDGE_OBLIQUE117, 4, 4, 0},
+    {WEDGE_OBLIQUE153, 4, 4, 0},
+
+    {WEDGE_VERTICAL,   2, 4, 0},
+    {WEDGE_VERTICAL,   4, 4, 0},
+    {WEDGE_VERTICAL,   6, 4, 0},
+    {WEDGE_HORIZONTAL, 4, 4, 0},
+
+    {WEDGE_OBLIQUE27,  4, 2, 0},
+    {WEDGE_OBLIQUE27,  4, 6, 0},
+    {WEDGE_OBLIQUE153, 4, 2, 0},
+    {WEDGE_OBLIQUE153, 4, 6, 0},
+
+    {WEDGE_OBLIQUE63,  2, 4, 0},
+    {WEDGE_OBLIQUE63,  6, 4, 0},
+    {WEDGE_OBLIQUE117, 2, 4, 0},
+    {WEDGE_OBLIQUE117, 6, 4, 0},
 };
 
-static const int wedge_params_big_heqw[1 << WEDGE_BITS_BIG]
-                                      [WEDGE_PARMS] = {
-    {WEDGE_OBLIQUE,  1, 1, 4, 4},
-    {WEDGE_OBLIQUE,  1, 0, 4, 4},
-    {WEDGE_OBLIQUE,  0, 1, 4, 4},
-    {WEDGE_OBLIQUE,  0, 0, 4, 4},
-
-    {WEDGE_OBLIQUE,  1, 1, 4, 2},
-    {WEDGE_OBLIQUE,  1, 1, 4, 6},
-    {WEDGE_OBLIQUE,  1, 0, 4, 2},
-    {WEDGE_OBLIQUE,  1, 0, 4, 6},
-
-    {WEDGE_OBLIQUE,  0, 1, 2, 4},
-    {WEDGE_OBLIQUE,  0, 1, 6, 4},
-    {WEDGE_OBLIQUE,  0, 0, 2, 4},
-    {WEDGE_OBLIQUE,  0, 0, 6, 4},
-
-    {WEDGE_STRAIGHT, 1, 0, 4, 2},
-    {WEDGE_STRAIGHT, 1, 0, 4, 6},
-    {WEDGE_STRAIGHT, 0, 0, 2, 4},
-    {WEDGE_STRAIGHT, 0, 0, 6, 4},
+static const int wedge_params_16_heqw[1 << WEDGE_BITS_4]
+                                     [WEDGE_PARMS] = {
+    {WEDGE_OBLIQUE27,  4, 4, 0},
+    {WEDGE_OBLIQUE63,  4, 4, 0},
+    {WEDGE_OBLIQUE117, 4, 4, 0},
+    {WEDGE_OBLIQUE153, 4, 4, 0},
+
+    {WEDGE_HORIZONTAL, 4, 2, 0},
+    {WEDGE_HORIZONTAL, 4, 6, 0},
+    {WEDGE_VERTICAL,   2, 4, 0},
+    {WEDGE_VERTICAL,   6, 4, 0},
+
+    {WEDGE_OBLIQUE27,  4, 2, 0},
+    {WEDGE_OBLIQUE27,  4, 6, 0},
+    {WEDGE_OBLIQUE153, 4, 2, 0},
+    {WEDGE_OBLIQUE153, 4, 6, 0},
+
+    {WEDGE_OBLIQUE63,  2, 4, 0},
+    {WEDGE_OBLIQUE63,  6, 4, 0},
+    {WEDGE_OBLIQUE117, 2, 4, 0},
+    {WEDGE_OBLIQUE117, 6, 4, 0},
+};
+
+static const int wedge_params_32_hgtw[1 << WEDGE_BITS_5]
+                                     [WEDGE_PARMS] = {
+    {WEDGE_OBLIQUE27,  4, 4, 0},
+    {WEDGE_OBLIQUE63,  4, 4, 0},
+    {WEDGE_OBLIQUE117, 4, 4, 0},
+    {WEDGE_OBLIQUE153, 4, 4, 0},
+
+    {WEDGE_HORIZONTAL, 4, 2, 0},
+    {WEDGE_HORIZONTAL, 4, 4, 0},
+    {WEDGE_HORIZONTAL, 4, 6, 0},
+    {WEDGE_VERTICAL,   4, 4, 0},
+
+    {WEDGE_OBLIQUE27,  4, 1, 0},
+    {WEDGE_OBLIQUE27,  4, 2, 0},
+    {WEDGE_OBLIQUE27,  4, 3, 0},
+    {WEDGE_OBLIQUE27,  4, 5, 0},
+    {WEDGE_OBLIQUE27,  4, 6, 0},
+    {WEDGE_OBLIQUE27,  4, 7, 0},
+
+    {WEDGE_OBLIQUE153, 4, 1, 0},
+    {WEDGE_OBLIQUE153, 4, 2, 0},
+    {WEDGE_OBLIQUE153, 4, 3, 0},
+    {WEDGE_OBLIQUE153, 4, 5, 0},
+    {WEDGE_OBLIQUE153, 4, 6, 0},
+    {WEDGE_OBLIQUE153, 4, 7, 0},
+
+    {WEDGE_OBLIQUE63,  1, 4, 0},
+    {WEDGE_OBLIQUE63,  2, 4, 0},
+    {WEDGE_OBLIQUE63,  3, 4, 0},
+    {WEDGE_OBLIQUE63,  5, 4, 0},
+    {WEDGE_OBLIQUE63,  6, 4, 0},
+    {WEDGE_OBLIQUE63,  7, 4, 0},
+
+    {WEDGE_OBLIQUE117, 1, 4, 0},
+    {WEDGE_OBLIQUE117, 2, 4, 0},
+    {WEDGE_OBLIQUE117, 3, 4, 0},
+    {WEDGE_OBLIQUE117, 5, 4, 0},
+    {WEDGE_OBLIQUE117, 6, 4, 0},
+    {WEDGE_OBLIQUE117, 7, 4, 0},
+};
+
+static const int wedge_params_32_hltw[1 << WEDGE_BITS_5]
+                                     [WEDGE_PARMS] = {
+    {WEDGE_OBLIQUE27,  4, 4, 0},
+    {WEDGE_OBLIQUE63,  4, 4, 0},
+    {WEDGE_OBLIQUE117, 4, 4, 0},
+    {WEDGE_OBLIQUE153, 4, 4, 0},
+
+    {WEDGE_VERTICAL,   2, 4, 0},
+    {WEDGE_VERTICAL,   4, 4, 0},
+    {WEDGE_VERTICAL,   6, 4, 0},
+    {WEDGE_HORIZONTAL, 4, 4, 0},
+
+    {WEDGE_OBLIQUE27,  4, 1, 0},
+    {WEDGE_OBLIQUE27,  4, 2, 0},
+    {WEDGE_OBLIQUE27,  4, 3, 0},
+    {WEDGE_OBLIQUE27,  4, 5, 0},
+    {WEDGE_OBLIQUE27,  4, 6, 0},
+    {WEDGE_OBLIQUE27,  4, 7, 0},
+
+    {WEDGE_OBLIQUE153, 4, 1, 0},
+    {WEDGE_OBLIQUE153, 4, 2, 0},
+    {WEDGE_OBLIQUE153, 4, 3, 0},
+    {WEDGE_OBLIQUE153, 4, 5, 0},
+    {WEDGE_OBLIQUE153, 4, 6, 0},
+    {WEDGE_OBLIQUE153, 4, 7, 0},
+
+    {WEDGE_OBLIQUE63,  1, 4, 0},
+    {WEDGE_OBLIQUE63,  2, 4, 0},
+    {WEDGE_OBLIQUE63,  3, 4, 0},
+    {WEDGE_OBLIQUE63,  5, 4, 0},
+    {WEDGE_OBLIQUE63,  6, 4, 0},
+    {WEDGE_OBLIQUE63,  7, 4, 0},
+
+    {WEDGE_OBLIQUE117, 1, 4, 0},
+    {WEDGE_OBLIQUE117, 2, 4, 0},
+    {WEDGE_OBLIQUE117, 3, 4, 0},
+    {WEDGE_OBLIQUE117, 5, 4, 0},
+    {WEDGE_OBLIQUE117, 6, 4, 0},
+    {WEDGE_OBLIQUE117, 7, 4, 0},
+};
+
+static const int wedge_params_32_heqw[1 << WEDGE_BITS_5]
+                                     [WEDGE_PARMS] = {
+    {WEDGE_OBLIQUE27,  4, 4, 0},
+    {WEDGE_OBLIQUE63,  4, 4, 0},
+    {WEDGE_OBLIQUE117, 4, 4, 0},
+    {WEDGE_OBLIQUE153, 4, 4, 0},
+
+    {WEDGE_HORIZONTAL, 4, 2, 0},
+    {WEDGE_HORIZONTAL, 4, 6, 0},
+    {WEDGE_VERTICAL,   2, 4, 0},
+    {WEDGE_VERTICAL,   6, 4, 0},
+
+    {WEDGE_OBLIQUE27,  4, 1, 0},
+    {WEDGE_OBLIQUE27,  4, 2, 0},
+    {WEDGE_OBLIQUE27,  4, 3, 0},
+    {WEDGE_OBLIQUE27,  4, 5, 0},
+    {WEDGE_OBLIQUE27,  4, 6, 0},
+    {WEDGE_OBLIQUE27,  4, 7, 0},
+
+    {WEDGE_OBLIQUE153, 4, 1, 0},
+    {WEDGE_OBLIQUE153, 4, 2, 0},
+    {WEDGE_OBLIQUE153, 4, 3, 0},
+    {WEDGE_OBLIQUE153, 4, 5, 0},
+    {WEDGE_OBLIQUE153, 4, 6, 0},
+    {WEDGE_OBLIQUE153, 4, 7, 0},
+
+    {WEDGE_OBLIQUE63,  1, 4, 0},
+    {WEDGE_OBLIQUE63,  2, 4, 0},
+    {WEDGE_OBLIQUE63,  3, 4, 0},
+    {WEDGE_OBLIQUE63,  5, 4, 0},
+    {WEDGE_OBLIQUE63,  6, 4, 0},
+    {WEDGE_OBLIQUE63,  7, 4, 0},
+
+    {WEDGE_OBLIQUE117, 1, 4, 0},
+    {WEDGE_OBLIQUE117, 2, 4, 0},
+    {WEDGE_OBLIQUE117, 3, 4, 0},
+    {WEDGE_OBLIQUE117, 5, 4, 0},
+    {WEDGE_OBLIQUE117, 6, 4, 0},
+    {WEDGE_OBLIQUE117, 7, 4, 0},
 };
 
 static const int *get_wedge_params_lookup[BLOCK_SIZES] = {
   NULL,
   NULL,
   NULL,
-  &wedge_params_sml[0][0],
-  &wedge_params_med_hgtw[0][0],
-  &wedge_params_med_hltw[0][0],
-  &wedge_params_med_heqw[0][0],
-  &wedge_params_med_hgtw[0][0],
-  &wedge_params_med_hltw[0][0],
-  &wedge_params_med_heqw[0][0],
-  &wedge_params_big_hgtw[0][0],
-  &wedge_params_big_hltw[0][0],
-  &wedge_params_big_heqw[0][0],
+  &wedge_params_16_heqw[0][0],
+  &wedge_params_16_hgtw[0][0],
+  &wedge_params_16_hltw[0][0],
+  &wedge_params_16_heqw[0][0],
+  &wedge_params_16_hgtw[0][0],
+  &wedge_params_16_hltw[0][0],
+  &wedge_params_16_heqw[0][0],
+  NULL,
+  NULL,
+  NULL,
 #if CONFIG_EXT_PARTITION
-  &wedge_params_big_hgtw[0][0],
-  &wedge_params_big_hltw[0][0],
-  &wedge_params_big_heqw[0][0],
+  NULL,
+  NULL,
+  NULL,
 #endif  // CONFIG_EXT_PARTITION
 };
 
@@ -244,11 +390,9 @@ static const uint8_t *get_wedge_mask_inplace(int wedge_index,
   const int *a = get_wedge_params(wedge_index, sb_type);
   int woff, hoff;
   if (!a) return NULL;
-  woff = (a[3] * bw) >> 3;
-  hoff = (a[4] * bh) >> 3;
-  master = (a[0] ?
-            wedge_mask_obl[neg][a[1]][a[2]] :
-            wedge_mask_str[neg][a[1]]) +
+  woff = (a[1] * bw) >> 3;
+  hoff = (a[2] * bh) >> 3;
+  master = wedge_mask_obl[a[3]][neg][a[0]] +
       MASK_MASTER_STRIDE * (MASK_MASTER_SIZE / 2 - hoff) +
       MASK_MASTER_SIZE / 2 - woff;
   return master;
diff --git a/vp10/common/reconinter.h b/vp10/common/reconinter.h
index 1e8679b87..28a5ae9da 100644
--- a/vp10/common/reconinter.h
+++ b/vp10/common/reconinter.h
@@ -146,6 +146,55 @@ static INLINE void highbd_inter_predictor(const uint8_t *src, int src_stride,
 }
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 
+#if CONFIG_EXT_INTER
+#define WEDGE_BITS_2      2
+#define WEDGE_BITS_3      3
+#define WEDGE_BITS_4      4
+#define WEDGE_BITS_5      5
+#define WEDGE_NONE       -1
+#define WEDGE_WEIGHT_BITS 6
+
+static const int get_wedge_bits_lookup[BLOCK_SIZES] = {
+  0,
+  0,
+  0,
+  WEDGE_BITS_4,
+  WEDGE_BITS_4,
+  WEDGE_BITS_4,
+  WEDGE_BITS_4,
+  WEDGE_BITS_4,
+  WEDGE_BITS_4,
+  WEDGE_BITS_4,
+  0,
+  0,
+  0,
+#if CONFIG_EXT_PARTITION
+  0,
+  0,
+  0,
+#endif  // CONFIG_EXT_PARTITION
+};
+
+static INLINE int is_interinter_wedge_used(BLOCK_SIZE sb_type) {
+  (void) sb_type;
+  return get_wedge_bits_lookup[sb_type] > 0;
+}
+
+static INLINE int get_interinter_wedge_bits(BLOCK_SIZE sb_type) {
+  const int wbits = get_wedge_bits_lookup[sb_type];
+  return (wbits > 0) ? wbits + 1 : 0;
+}
+
+static INLINE int is_interintra_wedge_used(BLOCK_SIZE sb_type) {
+  (void) sb_type;
+  return get_wedge_bits_lookup[sb_type] > 0;
+}
+
+static INLINE int get_interintra_wedge_bits(BLOCK_SIZE sb_type) {
+  return get_wedge_bits_lookup[sb_type];
+}
+#endif  // CONFIG_EXT_INTER
+
 void build_inter_predictors(MACROBLOCKD *xd, int plane,
 #if CONFIG_OBMC
                             int mi_col_offset, int mi_row_offset,
diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c
index c111b56bc..6af0eb872 100644
--- a/vp10/encoder/rdopt.c
+++ b/vp10/encoder/rdopt.c
@@ -540,8 +540,6 @@ int adst_vs_flipadst(const VP10_COMP *cpi,
   get_energy_distribution_fine(cpi, bsize, src, src_stride,
                                dst, dst_stride, hdist, vdist);
 
-
-
   svm_proj_v = vdist[0] * ADST_FLIP_SVM[0] +
                vdist[1] * ADST_FLIP_SVM[1] +
                vdist[2] * ADST_FLIP_SVM[2] + ADST_FLIP_SVM[3];
@@ -717,7 +715,7 @@ static int do_tx_type_search(TX_TYPE tx_type,
 
 static void model_rd_for_sb(VP10_COMP *cpi, BLOCK_SIZE bsize,
                             MACROBLOCK *x, MACROBLOCKD *xd,
-                            int num_planes,
+                            int plane_from, int plane_to,
                             int *out_rate_sum, int64_t *out_dist_sum,
                             int *skip_txfm_sb, int64_t *skip_sse_sb) {
   // Note our transform coeffs are 8 times an orthogonal transform.
@@ -744,7 +742,7 @@ static void model_rd_for_sb(VP10_COMP *cpi, BLOCK_SIZE bsize,
 
   x->pred_sse[ref] = 0;
 
-  for (i = 0; i < num_planes; ++i) {
+  for (i = plane_from; i <= plane_to; ++i) {
     struct macroblock_plane *const p = &x->plane[i];
     struct macroblockd_plane *const pd = &xd->plane[i];
     const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
@@ -6384,6 +6382,55 @@ static INLINE void clamp_mv2(MV *mv, const MACROBLOCKD *xd) {
                xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN);
 }
 
+#if CONFIG_EXT_INTER
+static int estimate_wedge_sign(const VP10_COMP *cpi,
+                               const MACROBLOCK *x,
+                               const BLOCK_SIZE bsize,
+                               uint8_t *pred0, int stride0,
+                               uint8_t *pred1, int stride1) {
+  const struct macroblock_plane *const p = &x->plane[0];
+  const uint8_t *src = p->src.buf;
+  int src_stride = p->src.stride;
+  const int f_index = bsize - BLOCK_8X8;
+  const int bw = 4 << (b_width_log2_lookup[bsize]);
+  const int bh = 4 << (b_height_log2_lookup[bsize]);
+  uint32_t esq[2][4], var;
+  int64_t tl, br;
+
+  var = cpi->fn_ptr[f_index].vf(
+      src, src_stride,
+      pred0, stride0, &esq[0][0]);
+  var = cpi->fn_ptr[f_index].vf(
+      src + bw / 2, src_stride,
+      pred0 + bw / 2, stride0, &esq[0][1]);
+  var = cpi->fn_ptr[f_index].vf(
+      src + bh / 2 * src_stride, src_stride,
+      pred0 + bh / 2 * stride0, stride0, &esq[0][2]);
+  var = cpi->fn_ptr[f_index].vf(
+      src + bh / 2 * src_stride + bw / 2, src_stride,
+      pred0 + bh / 2 * stride0 + bw / 2, stride0, &esq[0][3]);
+  var = cpi->fn_ptr[f_index].vf(
+      src, src_stride,
+      pred1, stride1, &esq[1][0]);
+  var = cpi->fn_ptr[f_index].vf(
+      src + bw / 2, src_stride,
+      pred1 + bw / 2, stride1, &esq[1][1]);
+  var = cpi->fn_ptr[f_index].vf(
+      src + bh / 2 * src_stride, src_stride,
+      pred1 + bh / 2 * stride1, stride0, &esq[1][2]);
+  var = cpi->fn_ptr[f_index].vf(
+      src + bh / 2 * src_stride + bw / 2, src_stride,
+      pred1 + bh / 2 * stride1 + bw / 2, stride0, &esq[1][3]);
+  (void) var;
+
+  tl = (int64_t)(esq[0][0] + esq[0][1] + esq[0][2]) -
+       (int64_t)(esq[1][0] + esq[1][1] + esq[1][2]);
+  br = (int64_t)(esq[1][3] + esq[1][1] + esq[1][2]) -
+       (int64_t)(esq[0][3] + esq[0][1] + esq[0][2]);
+  return (tl + br > 0);
+}
+#endif  // CONFIG_EXT_INTER
+
 static INTERP_FILTER predict_interp_filter(const VP10_COMP *cpi,
                                            const MACROBLOCK *x,
                                            const BLOCK_SIZE bsize,
@@ -6946,8 +6993,8 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
           }
         }
         vp10_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
-        model_rd_for_sb(cpi, bsize, x, xd, MAX_MB_PLANE, &rate_sum, &dist_sum,
-                        &tmp_skip_sb, &tmp_skip_sse);
+        model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1,
+                        &rate_sum, &dist_sum, &tmp_skip_sb, &tmp_skip_sse);
 
         rd = RDCOST(x->rdmult, x->rddiv, rate_sum, dist_sum);
         if (cm->interp_filter == SWITCHABLE)
@@ -7024,8 +7071,9 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
 #endif  // CONFIG_OBMC
 
   if (is_comp_pred && is_interinter_wedge_used(bsize)) {
-    int wedge_index, best_wedge_index = WEDGE_NONE, rs;
-    int rate_sum;
+    int wedge_index, best_wedge_index = WEDGE_NONE;
+    int wedge_sign, best_wedge_sign = 0;
+    int rate_sum, rs;
     int64_t dist_sum;
     int64_t best_rd_nowedge = INT64_MAX;
     int64_t best_rd_wedge = INT64_MAX;
@@ -7034,6 +7082,7 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
     int64_t tmp_skip_sse_sb;
 
     rs = vp10_cost_bit(cm->fc->wedge_interinter_prob[bsize], 0);
+    mbmi->use_wedge_interinter = 0;
     vp10_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
     vp10_subtract_plane(x, bsize, 0);
     rd = estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
@@ -7042,41 +7091,43 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
     if (rd != INT64_MAX)
       rd = RDCOST(x->rdmult, x->rddiv, rs + rate_mv + rate_sum, dist_sum);
     best_rd_nowedge = rd;
-    mbmi->use_wedge_interinter = 0;
 
     // Disbale wedge search if source variance is small
     if (x->source_variance > cpi->sf.disable_wedge_search_var_thresh &&
-        best_rd_nowedge < 3 * ref_best_rd) {
+        best_rd_nowedge / 3 < ref_best_rd) {
+      uint8_t pred0[2 * MAX_SB_SQUARE * 3];
+      uint8_t pred1[2 * MAX_SB_SQUARE * 3];
+      uint8_t *preds0[3] = {pred0,
+        pred0 + 2 * MAX_SB_SQUARE,
+        pred0 + 4 * MAX_SB_SQUARE};
+      uint8_t *preds1[3] = {pred1,
+        pred1 + 2 * MAX_SB_SQUARE,
+        pred1 + 4 * MAX_SB_SQUARE};
+      int strides[3] = {MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE};
+      int est_wedge_sign;
 
       mbmi->use_wedge_interinter = 1;
-      rs = vp10_cost_literal(1 + get_wedge_bits_lookup[bsize]) +
+      rs = vp10_cost_literal(get_interinter_wedge_bits(bsize)) +
           vp10_cost_bit(cm->fc->wedge_interinter_prob[bsize], 1);
       wedge_types = (1 << get_wedge_bits_lookup[bsize]);
-      if (have_newmv_in_inter_mode(this_mode)) {
-        int_mv tmp_mv[2];
-        int rate_mvs[2], tmp_rate_mv = 0;
-        uint8_t pred0[2 * MAX_SB_SQUARE * 3];
-        uint8_t pred1[2 * MAX_SB_SQUARE * 3];
-        uint8_t *preds0[3] = {pred0,
-          pred0 + 2 * MAX_SB_SQUARE,
-          pred0 + 4 * MAX_SB_SQUARE};
-        uint8_t *preds1[3] = {pred1,
-          pred1 + 2 * MAX_SB_SQUARE,
-          pred1 + 4 * MAX_SB_SQUARE};
-        int strides[3] = {MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE};
-        vp10_build_inter_predictors_for_planes_single_buf(
-            xd, bsize, 0, 0,  mi_row, mi_col, 0, preds0, strides);
-        vp10_build_inter_predictors_for_planes_single_buf(
-            xd, bsize, 0, 0, mi_row, mi_col, 1, preds1, strides);
-
-        for (wedge_index = 0; wedge_index < 2 * wedge_types; ++wedge_index) {
-          mbmi->interinter_wedge_index = wedge_index >> 1;
-          mbmi->interinter_wedge_sign = wedge_index & 1;
-          vp10_build_wedge_inter_predictor_from_buf(xd, bsize, 0, 0,
-                                                    mi_row, mi_col,
+
+      vp10_build_inter_predictors_for_planes_single_buf(
+          xd, bsize, 0, 0,  mi_row, mi_col, 0, preds0, strides);
+      vp10_build_inter_predictors_for_planes_single_buf(
+          xd, bsize, 0, 0, mi_row, mi_col, 1, preds1, strides);
+
+      // Choose the best wedge
+      if (cpi->sf.fast_wedge_sign_estimate) {
+        est_wedge_sign = estimate_wedge_sign(
+            cpi, x, bsize, pred0, MAX_SB_SIZE, pred1, MAX_SB_SIZE);
+        best_wedge_sign = mbmi->interinter_wedge_sign = est_wedge_sign;
+        for (wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
+          mbmi->interinter_wedge_index = wedge_index;
+          vp10_build_wedge_inter_predictor_from_buf(xd, bsize,
+                                                    0, 0, mi_row, mi_col,
                                                     preds0, strides,
                                                     preds1, strides);
-          model_rd_for_sb(cpi, bsize, x, xd, 1,
+          model_rd_for_sb(cpi, bsize, x, xd, 0, 0,
                           &rate_sum, &dist_sum,
                           &tmp_skip_txfm_sb, &tmp_skip_sse_sb);
           rd = RDCOST(x->rdmult, x->rddiv, rs + rate_mv + rate_sum, dist_sum);
@@ -7085,8 +7136,34 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
             best_rd_wedge = rd;
           }
         }
-        mbmi->interinter_wedge_index = best_wedge_index >> 1;
-        mbmi->interinter_wedge_sign = best_wedge_index & 1;
+      } else {
+        for (wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
+          for (wedge_sign = 0; wedge_sign < 2; ++wedge_sign) {
+            mbmi->interinter_wedge_index = wedge_index;
+            mbmi->interinter_wedge_sign = wedge_sign;
+            vp10_build_wedge_inter_predictor_from_buf(xd, bsize,
+                                                      0, 0, mi_row, mi_col,
+                                                      preds0, strides,
+                                                      preds1, strides);
+            model_rd_for_sb(cpi, bsize, x, xd, 0, 0,
+                            &rate_sum, &dist_sum,
+                            &tmp_skip_txfm_sb, &tmp_skip_sse_sb);
+            rd = RDCOST(x->rdmult, x->rddiv,
+                        rs + rate_mv + rate_sum, dist_sum);
+            if (rd < best_rd_wedge) {
+              best_wedge_index = wedge_index;
+              best_wedge_sign = wedge_sign;
+              best_rd_wedge = rd;
+            }
+          }
+        }
+      }
+      mbmi->interinter_wedge_index = best_wedge_index;
+      mbmi->interinter_wedge_sign = best_wedge_sign;
+
+      if (have_newmv_in_inter_mode(this_mode)) {
+        int_mv tmp_mv[2];
+        int rate_mvs[2], tmp_rate_mv = 0;
         if (this_mode == NEW_NEWMV) {
           int mv_idxs[2] = {0, 0};
           do_masked_motion_search_indexed(cpi, x,
@@ -7117,7 +7194,7 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
           mbmi->mv[1].as_int = tmp_mv[1].as_int;
         }
         vp10_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
-        model_rd_for_sb(cpi, bsize, x, xd, 1, &rate_sum, &dist_sum,
+        model_rd_for_sb(cpi, bsize, x, xd, 0, 0, &rate_sum, &dist_sum,
                         &tmp_skip_txfm_sb, &tmp_skip_sse_sb);
         rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate_mv + rate_sum, dist_sum);
         if (rd < best_rd_wedge) {
@@ -7131,7 +7208,6 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
                                                     preds0, strides,
                                                     preds1, strides);
         }
-
         vp10_subtract_plane(x, bsize, 0);
         rd = estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
                                  &tmp_skip_txfm_sb, &tmp_skip_sse_sb,
@@ -7143,8 +7219,8 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
 
         if (best_rd_wedge < best_rd_nowedge) {
           mbmi->use_wedge_interinter = 1;
-          mbmi->interinter_wedge_index = best_wedge_index >> 1;
-          mbmi->interinter_wedge_sign = best_wedge_index & 1;
+          mbmi->interinter_wedge_index = best_wedge_index;
+          mbmi->interinter_wedge_sign = best_wedge_sign;
           xd->mi[0]->bmi[0].as_mv[0].as_int = mbmi->mv[0].as_int;
           xd->mi[0]->bmi[0].as_mv[1].as_int = mbmi->mv[1].as_int;
           *rate2 += tmp_rate_mv - rate_mv;
@@ -7157,37 +7233,6 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
           xd->mi[0]->bmi[0].as_mv[1].as_int = mbmi->mv[1].as_int;
         }
       } else {
-        uint8_t pred0[2 * MAX_SB_SQUARE * 3];
-        uint8_t pred1[2 * MAX_SB_SQUARE * 3];
-        uint8_t *preds0[3] = {pred0,
-          pred0 + 2 * MAX_SB_SQUARE,
-          pred0 + 4 * MAX_SB_SQUARE};
-        uint8_t *preds1[3] = {pred1,
-          pred1 + 2 * MAX_SB_SQUARE,
-          pred1 + 4 * MAX_SB_SQUARE};
-        int strides[3] = {MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE};
-        vp10_build_inter_predictors_for_planes_single_buf(
-            xd, bsize, 0, 0, mi_row, mi_col, 0, preds0, strides);
-        vp10_build_inter_predictors_for_planes_single_buf(
-            xd, bsize, 0, 0, mi_row, mi_col, 1, preds1, strides);
-        for (wedge_index = 0; wedge_index < 2 * wedge_types; ++wedge_index) {
-          mbmi->interinter_wedge_index = wedge_index >> 1;
-          mbmi->interinter_wedge_sign = wedge_index & 1;
-          vp10_build_wedge_inter_predictor_from_buf(xd, bsize,
-                                                    0, 0, mi_row, mi_col,
-                                                    preds0, strides,
-                                                    preds1, strides);
-          model_rd_for_sb(cpi, bsize, x, xd, 1,
-                          &rate_sum, &dist_sum,
-                          &tmp_skip_txfm_sb, &tmp_skip_sse_sb);
-          rd = RDCOST(x->rdmult, x->rddiv, rs + rate_mv + rate_sum, dist_sum);
-          if (rd < best_rd_wedge) {
-            best_wedge_index = wedge_index;
-            best_rd_wedge = rd;
-          }
-        }
-        mbmi->interinter_wedge_sign = best_wedge_index & 1;
-        mbmi->interinter_wedge_index = best_wedge_index >> 1;
         vp10_build_wedge_inter_predictor_from_buf(xd, bsize,
                                                   0, 0, mi_row, mi_col,
                                                   preds0, strides,
@@ -7197,12 +7242,12 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
                                  &tmp_skip_txfm_sb, &tmp_skip_sse_sb,
                                  INT64_MAX);
         if (rd != INT64_MAX)
-          rd = RDCOST(x->rdmult, x->rddiv, rs + rate_mv + rate_sum, dist_sum);
-        best_rd_wedge = rd;
+            rd = RDCOST(x->rdmult, x->rddiv, rs + rate_mv + rate_sum, dist_sum);
+          best_rd_wedge = rd;
         if (best_rd_wedge < best_rd_nowedge) {
           mbmi->use_wedge_interinter = 1;
-          mbmi->interinter_wedge_index = best_wedge_index >> 1;
-          mbmi->interinter_wedge_sign = best_wedge_index & 1;
+          mbmi->interinter_wedge_index = best_wedge_index;
+          mbmi->interinter_wedge_sign = best_wedge_sign;
         } else {
           mbmi->use_wedge_interinter = 0;
         }
@@ -7217,7 +7262,7 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
 
     if (mbmi->use_wedge_interinter)
       *compmode_wedge_cost =
-          vp10_cost_literal(1 + get_wedge_bits_lookup[bsize]) +
+          vp10_cost_literal(get_interinter_wedge_bits(bsize)) +
           vp10_cost_bit(cm->fc->wedge_interinter_prob[bsize], 1);
     else
       *compmode_wedge_cost =
@@ -7303,7 +7348,7 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
 
         mbmi->use_wedge_interintra = 1;
         wedge_types = (1 << get_wedge_bits_lookup[bsize]);
-        rwedge = vp10_cost_literal(get_wedge_bits_lookup[bsize]) +
+        rwedge = vp10_cost_literal(get_interintra_wedge_bits(bsize)) +
             vp10_cost_bit(cm->fc->wedge_interintra_prob[bsize], 1);
         for (wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
           mbmi->interintra_wedge_index = wedge_index;
@@ -7311,7 +7356,7 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
           vp10_combine_interintra(xd, bsize, 0,
                                   tmp_buf, MAX_SB_SIZE,
                                   intrapred, MAX_SB_SIZE);
-          model_rd_for_sb(cpi, bsize, x, xd, 1,
+          model_rd_for_sb(cpi, bsize, x, xd, 0, 0,
                           &rate_sum, &dist_sum,
                           &tmp_skip_txfm_sb, &tmp_skip_sse_sb);
           rd = RDCOST(x->rdmult, x->rddiv,
@@ -7333,7 +7378,7 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
                                   0, mv_idx);
           mbmi->mv[0].as_int = tmp_mv.as_int;
           vp10_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
-          model_rd_for_sb(cpi, bsize, x, xd, 1, &rate_sum, &dist_sum,
+          model_rd_for_sb(cpi, bsize, x, xd, 0, 0, &rate_sum, &dist_sum,
                           &tmp_skip_txfm_sb, &tmp_skip_sse_sb);
           rd = RDCOST(x->rdmult, x->rddiv,
                       rmode + tmp_rate_mv + rwedge + rate_sum, dist_sum);
@@ -7390,7 +7435,7 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
           cm->fc->wedge_interintra_prob[bsize], mbmi->use_wedge_interintra);
       if (mbmi->use_wedge_interintra) {
         *compmode_interintra_cost +=
-            vp10_cost_literal(get_wedge_bits_lookup[bsize]);
+            vp10_cost_literal(get_interintra_wedge_bits(bsize));
       }
     }
   } else if (is_interintra_allowed(mbmi)) {
@@ -7428,8 +7473,8 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
     // switchable list (ex. bilinear) is indicated at the frame level, or
     // skip condition holds.
     vp10_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
-    model_rd_for_sb(cpi, bsize, x, xd, MAX_MB_PLANE, &tmp_rate, &tmp_dist,
-                    &skip_txfm_sb, &skip_sse_sb);
+    model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1,
+                    &tmp_rate, &tmp_dist, &skip_txfm_sb, &skip_sse_sb);
     rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate, tmp_dist);
     memcpy(skip_txfm, x->skip_txfm, sizeof(skip_txfm));
     memcpy(bsse, x->bsse, sizeof(bsse));
@@ -7575,8 +7620,8 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
                                        NULL, NULL,
                                        dst_buf1, dst_stride1,
                                        dst_buf2, dst_stride2);
-      model_rd_for_sb(cpi, bsize, x, xd, MAX_MB_PLANE, &tmp_rate, &tmp_dist,
-                      &skip_txfm_sb, &skip_sse_sb);
+      model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1,
+                      &tmp_rate, &tmp_dist, &skip_txfm_sb, &skip_sse_sb);
     }
 #if CONFIG_VP9_HIGHBITDEPTH
     if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
diff --git a/vp10/encoder/speed_features.c b/vp10/encoder/speed_features.c
index 155f28e44..b766caeba 100644
--- a/vp10/encoder/speed_features.c
+++ b/vp10/encoder/speed_features.c
@@ -165,6 +165,7 @@ static void set_good_speed_feature(VP10_COMP *cpi, VP10_COMMON *cm,
     sf->use_transform_domain_distortion = 1;
 #if CONFIG_EXT_INTER
     sf->disable_wedge_search_var_thresh = 100;
+    sf->fast_wedge_sign_estimate = 1;
 #endif  // CONFIG_EXT_INTER
   }
 
@@ -283,6 +284,7 @@ static void set_rt_speed_feature(VP10_COMP *cpi, SPEED_FEATURES *sf,
   sf->use_upsampled_references = 0;
 #if CONFIG_EXT_INTER
   sf->disable_wedge_search_var_thresh = 100;
+  sf->fast_wedge_sign_estimate = 1;
 #endif  // CONFIG_EXT_INTER
 
   // Use transform domain distortion computation
@@ -517,6 +519,7 @@ void vp10_set_speed_features_framesize_independent(VP10_COMP *cpi) {
 #endif  // CONFIG_EXT_TILE
 #if CONFIG_EXT_INTER
   sf->disable_wedge_search_var_thresh = 0;
+  sf->fast_wedge_sign_estimate = 0;
 #endif  // CONFIG_EXT_INTER
 
   for (i = 0; i < TX_SIZES; i++) {
diff --git a/vp10/encoder/speed_features.h b/vp10/encoder/speed_features.h
index 6cee74813..ca6adbefa 100644
--- a/vp10/encoder/speed_features.h
+++ b/vp10/encoder/speed_features.h
@@ -402,6 +402,9 @@ typedef struct SPEED_FEATURES {
 #if CONFIG_EXT_INTER
   // A source variance threshold below which wedge search is disabled
   unsigned int disable_wedge_search_var_thresh;
+
+  // Whether fast wedge sign estimate is used
+  int fast_wedge_sign_estimate;
 #endif  // CONFIG_EXT_INTER
 
   // These bit masks allow you to enable or disable intra modes for each