From: Debargha Mukherjee <debargha@google.com>
Date: Fri, 30 Oct 2015 16:19:36 +0000 (-0700)
Subject: New interpolation experiment
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=85514c40ae603d1a5ccc9843f49329d0123d2501;p=libvpx

New interpolation experiment

Adds a new interpolation experiment.

Improves entropy coding to send the filter type only if
the motion vectors have subpel components.
Adds one new 8-tap smooth filter, and tweaks the others.

derflr: +0.695%
hevcmr: +0.305%

About 5% encode slowdown. No visible impact for decoding.

Also makes the interpolation framework flexible to support both
strictly interpolating filters as well as non-interpolating
filters that filter integer offsets. This is mainly for
further experimentation and if not found useful the code will
be removed.

Change-Id: I8db9cde56ca916be771fe54a130d608bf10786e6
---

diff --git a/vp10/common/blockd.h b/vp10/common/blockd.h
index cd3646b48..c4dce6020 100644
--- a/vp10/common/blockd.h
+++ b/vp10/common/blockd.h
@@ -38,6 +38,13 @@ typedef enum {
   FRAME_TYPES,
 } FRAME_TYPE;
 
+#if CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS
+#define IsInterpolatingFilter(filter) \
+    (vp10_filter_kernels[filter][0][SUBPEL_TAPS / 2 - 1] == 128)
+#else
+#define IsInterpolatingFilter(filter)  (1)
+#endif  // CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS
+
 static INLINE int is_inter_mode(PREDICTION_MODE mode) {
   return mode >= NEARESTMV && mode <= NEWMV;
 }
diff --git a/vp10/common/entropymode.c b/vp10/common/entropymode.c
index de3de2210..f2502b9db 100644
--- a/vp10/common/entropymode.c
+++ b/vp10/common/entropymode.c
@@ -752,6 +752,16 @@ static const vpx_prob default_skip_probs[SKIP_CONTEXTS] = {
   192, 128, 64
 };
 
+#if CONFIG_EXT_INTERP && SWITCHABLE_FILTERS == 4
+static const vpx_prob default_switchable_interp_prob[SWITCHABLE_FILTER_CONTEXTS]
+                                                    [SWITCHABLE_FILTERS - 1] = {
+  { 235, 192, 128},
+  { 36, 243, 208},
+  { 34, 16, 128},
+  { 36, 243, 48},
+  { 149, 160, 128},
+};
+#else
 static const vpx_prob default_switchable_interp_prob[SWITCHABLE_FILTER_CONTEXTS]
                                                     [SWITCHABLE_FILTERS - 1] = {
   { 235, 162, },
@@ -759,6 +769,7 @@ static const vpx_prob default_switchable_interp_prob[SWITCHABLE_FILTER_CONTEXTS]
   { 34, 3, },
   { 149, 144, },
 };
+#endif
 
 #if CONFIG_EXT_TX
 const vpx_tree_index vp10_ext_tx_inter_tree[EXT_TX_SETS_INTER]
@@ -987,11 +998,20 @@ static void init_mode_probs(FRAME_CONTEXT *fc) {
 #endif  // CONFIG_EXT_INTRA
 }
 
+#if CONFIG_EXT_INTERP && SWITCHABLE_FILTERS == 4
+const vpx_tree_index vp10_switchable_interp_tree
+[TREE_SIZE(SWITCHABLE_FILTERS)] = {
+  -EIGHTTAP, 2,
+  4, -EIGHTTAP_SHARP,
+  -EIGHTTAP_SMOOTH, -EIGHTTAP_SMOOTH2,
+};
+#else
 const vpx_tree_index vp10_switchable_interp_tree
 [TREE_SIZE(SWITCHABLE_FILTERS)] = {
   -EIGHTTAP, 2,
   -EIGHTTAP_SMOOTH, -EIGHTTAP_SHARP
 };
+#endif  // CONFIG_EXT_INTERP
 
 void vp10_adapt_inter_frame_probs(VP10_COMMON *cm) {
   int i, j;
diff --git a/vp10/common/filter.c b/vp10/common/filter.c
index dda279f13..a9225b66e 100644
--- a/vp10/common/filter.c
+++ b/vp10/common/filter.c
@@ -32,9 +32,28 @@ DECLARE_ALIGNED(256, static const InterpKernel,
   { 0, 0, 0,   8, 120, 0, 0, 0 }
 };
 
-// Lagrangian interpolation filter
 DECLARE_ALIGNED(256, static const InterpKernel,
                 sub_pel_filters_8[SUBPEL_SHIFTS]) = {
+#if CONFIG_EXT_INTERP
+  // intfilt 0.575
+  {0,   0,   0, 128,   0,   0,   0, 0},
+  {0,   1,  -5, 126,   8,  -3,   1, 0},
+  {-1,   3, -10, 123,  18,  -6,   2, -1},
+  {-1,   4, -14, 118,  27,  -9,   3, 0},
+  {-1,   5, -16, 112,  37, -12,   4, -1},
+  {-1,   5, -18, 105,  48, -14,   4, -1},
+  {-1,   6, -19,  97,  58, -17,   5, -1},
+  {-1,   6, -20,  88,  68, -18,   6, -1},
+  {-1,   6, -19,  78,  78, -19,   6, -1},
+  {-1,   6, -18,  68,  88, -20,   6, -1},
+  {-1,   5, -17,  58,  97, -19,   6, -1},
+  {-1,   4, -14,  48, 105, -18,   5, -1},
+  {-1,   4, -12,  37, 112, -16,   5, -1},
+  {0,   3,  -9,  27, 118, -14,   4, -1},
+  {-1,   2,  -6,  18, 123, -10,   3, -1},
+  {0,   1,  -3,   8, 126,  -5,   1, 0},
+#else
+  // Lagrangian interpolation filter
   { 0,   0,   0, 128,   0,   0,   0,  0},
   { 0,   1,  -5, 126,   8,  -3,   1,  0},
   { -1,   3, -10, 122,  18,  -6,   2,  0},
@@ -51,11 +70,31 @@ DECLARE_ALIGNED(256, static const InterpKernel,
   { -1,   3,  -9,  27, 118, -13,   4, -1},
   { 0,   2,  -6,  18, 122, -10,   3, -1},
   { 0,   1,  -3,   8, 126,  -5,   1,  0}
+#endif  // CONFIG_EXT_INTERP
 };
 
-// DCT based filter
 DECLARE_ALIGNED(256, static const InterpKernel,
-                sub_pel_filters_8s[SUBPEL_SHIFTS]) = {
+                sub_pel_filters_8sharp[SUBPEL_SHIFTS]) = {
+#if CONFIG_EXT_INTERP
+  // intfilt 0.8
+  {0,   0,   0, 128,   0,   0,   0, 0},
+  {-1,   2,  -6, 127,   9,  -4,   2, -1},
+  {-2,   5, -12, 124,  18,  -7,   4, -2},
+  {-2,   7, -16, 119,  28, -11,   5, -2},
+  {-3,   8, -19, 114,  38, -14,   7, -3},
+  {-3,   9, -22, 107,  49, -17,   8, -3},
+  {-4,  10, -23,  99,  60, -20,  10, -4},
+  {-4,  11, -23,  90,  70, -22,  10, -4},
+  {-4,  11, -23,  80,  80, -23,  11, -4},
+  {-4,  10, -22,  70,  90, -23,  11, -4},
+  {-4,  10, -20,  60,  99, -23,  10, -4},
+  {-3,   8, -17,  49, 107, -22,   9, -3},
+  {-3,   7, -14,  38, 114, -19,   8, -3},
+  {-2,   5, -11,  28, 119, -16,   7, -2},
+  {-2,   4,  -7,  18, 124, -12,   5, -2},
+  {-1,   2,  -4,   9, 127,  -6,   2, -1},
+#else
+  // DCT based filter
   {0,   0,   0, 128,   0,   0,   0, 0},
   {-1,   3,  -7, 127,   8,  -3,   1, 0},
   {-2,   5, -13, 125,  17,  -6,   3, -1},
@@ -72,11 +111,58 @@ DECLARE_ALIGNED(256, static const InterpKernel,
   {-2,   5, -10,  27, 121, -17,   7, -3},
   {-1,   3,  -6,  17, 125, -13,   5, -2},
   {0,   1,  -3,   8, 127,  -7,   3, -1}
+#endif  // CONFIG_EXT_INTERP
 };
 
-// freqmultiplier = 0.5
+#if CONFIG_EXT_INTERP && SWITCHABLE_FILTERS == 4
+
 DECLARE_ALIGNED(256, static const InterpKernel,
-                sub_pel_filters_8lp[SUBPEL_SHIFTS]) = {
+                sub_pel_filters_8smooth2[SUBPEL_SHIFTS]) = {
+// freqmultiplier = 0.35
+  {0,  0,  0, 128,  0,  0,  0,  0},
+  {-1,  8, 31, 47, 34, 10,  0, -1},
+  {-1,  7, 29, 46, 36, 12,  0, -1},
+  {-1,  6, 28, 46, 37, 13,  0, -1},
+  {-1,  5, 26, 46, 38, 14,  1, -1},
+  {-1,  4, 25, 45, 39, 16,  1, -1},
+  {-1,  4, 23, 44, 41, 17,  1, -1},
+  {-1,  3, 21, 44, 42, 18,  2, -1},
+  {-1,  2, 20, 43, 43, 20,  2, -1},
+  {-1,  2, 18, 42, 44, 21,  3, -1},
+  {-1,  1, 17, 41, 44, 23,  4, -1},
+  {-1,  1, 16, 39, 45, 25,  4, -1},
+  {-1,  1, 14, 38, 46, 26,  5, -1},
+  {-1,  0, 13, 37, 46, 28,  6, -1},
+  {-1,  0, 12, 36, 46, 29,  7, -1},
+  {-1,  0, 10, 34, 47, 31,  8, -1},
+};
+
+DECLARE_ALIGNED(256, static const InterpKernel,
+                sub_pel_filters_8smooth[SUBPEL_SHIFTS]) = {
+// freqmultiplier = 0.75
+  {0,  0,  0, 128,  0,  0,  0,  0},
+  {2, -10,  19,  95,  31, -11,   2, 0},
+  {2,  -9,  14,  94,  37, -12,   2, 0},
+  {2,  -8,   9,  92,  43, -12,   1, 1},
+  {2,  -7,   5,  90,  49, -12,   1, 0},
+  {2,  -5,   1,  86,  55, -12,   0, 1},
+  {1,  -4,  -2,  82,  61, -11,   0, 1},
+  {1, -3, -5, 77, 67, -9, -1, 1},
+  {1, -2, -7, 72, 72, -7, -2, 1},
+  {1, -1, -9, 67, 77, -5, -3, 1},
+  {1,   0, -11,  61,  82,  -2,  -4, 1},
+  {1,   0, -12,  55,  86,   1,  -5, 2},
+  {0,   1, -12,  49,  90,   5,  -7, 2},
+  {1,   1, -12,  43,  92,   9,  -8, 2},
+  {0,   2, -12,  37,  94,  14,  -9, 2},
+  {0,   2, -11,  31,  95,  19, -10, 2},
+};
+
+#else
+
+DECLARE_ALIGNED(256, static const InterpKernel,
+                sub_pel_filters_8smooth[SUBPEL_SHIFTS]) = {
+// freqmultiplier = 0.5
   { 0,  0,  0, 128,  0,  0,  0,  0},
   {-3, -1, 32,  64, 38,  1, -3,  0},
   {-2, -2, 29,  63, 41,  2, -3,  0},
@@ -95,10 +181,14 @@ DECLARE_ALIGNED(256, static const InterpKernel,
   { 0, -3,  1,  38, 64, 32, -1, -3}
 };
 
+#endif  // CONFIG_EXT_INTERP
 
-const InterpKernel *vp10_filter_kernels[4] = {
+const InterpKernel *vp10_filter_kernels[SWITCHABLE_FILTERS + 1] = {
   sub_pel_filters_8,
-  sub_pel_filters_8lp,
-  sub_pel_filters_8s,
+  sub_pel_filters_8smooth,
+  sub_pel_filters_8sharp,
+#if CONFIG_EXT_INTERP && SWITCHABLE_FILTERS == 4
+  sub_pel_filters_8smooth2,
+#endif
   bilinear_filters
 };
diff --git a/vp10/common/filter.h b/vp10/common/filter.h
index 826cd0386..de26b76f0 100644
--- a/vp10/common/filter.h
+++ b/vp10/common/filter.h
@@ -24,16 +24,24 @@ extern "C" {
 #define EIGHTTAP            0
 #define EIGHTTAP_SMOOTH     1
 #define EIGHTTAP_SHARP      2
+
+#if CONFIG_EXT_INTERP
+#define SUPPORT_NONINTERPOLATING_FILTERS 0  /* turn it on for experimentation */
+#define EIGHTTAP_SMOOTH2    3
+#define SWITCHABLE_FILTERS  4 /* Number of switchable filters */
+#else
 #define SWITCHABLE_FILTERS  3 /* Number of switchable filters */
-#define BILINEAR            3
+#endif  // CONFIG_EXT_INTERP
 // The codec can operate in four possible inter prediction filter mode:
 // 8-tap, 8-tap-smooth, 8-tap-sharp, and switching between the three.
+
+#define BILINEAR            (SWITCHABLE_FILTERS)
+#define SWITCHABLE          (SWITCHABLE_FILTERS + 1)  /* the last one */
 #define SWITCHABLE_FILTER_CONTEXTS (SWITCHABLE_FILTERS + 1)
-#define SWITCHABLE 4 /* should be the last one */
 
 typedef uint8_t INTERP_FILTER;
 
-extern const InterpKernel *vp10_filter_kernels[4];
+extern const InterpKernel *vp10_filter_kernels[SWITCHABLE_FILTERS + 1];
 
 #ifdef __cplusplus
 }  // extern "C"
diff --git a/vp10/common/mv.h b/vp10/common/mv.h
index b4971a567..4cc263820 100644
--- a/vp10/common/mv.h
+++ b/vp10/common/mv.h
@@ -48,6 +48,9 @@ static INLINE void clamp_mv(MV *mv, int min_col, int max_col,
   mv->row = clamp(mv->row, min_row, max_row);
 }
 
+static INLINE int mv_has_subpel(const MV *mv) {
+  return (mv->row & 0x0F) || (mv->col & 0x0F);
+}
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/vp10/common/onyxc_int.h b/vp10/common/onyxc_int.h
index 1ef92484e..3b83c2a3f 100644
--- a/vp10/common/onyxc_int.h
+++ b/vp10/common/onyxc_int.h
@@ -20,6 +20,7 @@
 #include "vp10/common/entropymv.h"
 #include "vp10/common/entropy.h"
 #include "vp10/common/entropymode.h"
+#include "vp10/common/mv.h"
 #include "vp10/common/frame_buffers.h"
 #include "vp10/common/quant_common.h"
 #include "vp10/common/tile_common.h"
@@ -531,6 +532,54 @@ static INLINE int txfm_partition_context(TXFM_CONTEXT *above_ctx,
 }
 #endif
 
+#if CONFIG_EXT_INTERP
+static INLINE int vp10_is_interp_needed(const MACROBLOCKD *const xd) {
+  MODE_INFO *const mi = xd->mi[0];
+  MB_MODE_INFO *const mbmi = &mi->mbmi;
+  const BLOCK_SIZE bsize = mbmi->sb_type;
+  const int is_compound = has_second_ref(mbmi);
+  int intpel_mv;
+
+#if SUPPORT_NONINTERPOLATING_FILTERS
+  // TODO(debargha): This is is currently only for experimentation
+  // with non-interpolating filters. Remove later.
+  // If any of the filters are non-interpolating, then indicate the
+  // interpolation filter always.
+  int i;
+  for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
+    if (!IsInterpolatingFilter(i)) return 1;
+  }
+#endif
+
+  // For scaled references, interpolation filter is indicated all the time.
+  if (vp10_is_scaled(&xd->block_refs[0]->sf))
+    return 1;
+  if (is_compound && vp10_is_scaled(&xd->block_refs[1]->sf))
+    return 1;
+
+  if (bsize < BLOCK_8X8) {
+    intpel_mv =
+        !mv_has_subpel(&mi->bmi[0].as_mv[0].as_mv) &&
+        !mv_has_subpel(&mi->bmi[1].as_mv[0].as_mv) &&
+        !mv_has_subpel(&mi->bmi[2].as_mv[0].as_mv) &&
+        !mv_has_subpel(&mi->bmi[3].as_mv[0].as_mv);
+    if (is_compound && intpel_mv) {
+      intpel_mv &=
+          !mv_has_subpel(&mi->bmi[0].as_mv[1].as_mv) &&
+          !mv_has_subpel(&mi->bmi[1].as_mv[1].as_mv) &&
+          !mv_has_subpel(&mi->bmi[2].as_mv[1].as_mv) &&
+          !mv_has_subpel(&mi->bmi[3].as_mv[1].as_mv);
+    }
+  } else {
+    intpel_mv = !mv_has_subpel(&mbmi->mv[0].as_mv);
+    if (is_compound && intpel_mv) {
+      intpel_mv &= !mv_has_subpel(&mbmi->mv[1].as_mv);
+    }
+  }
+  return !intpel_mv;
+}
+#endif  // CONFIG_EXT_INTERP
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/vp10/common/reconinter.c b/vp10/common/reconinter.c
index fdcb9673c..88c7569f6 100644
--- a/vp10/common/reconinter.c
+++ b/vp10/common/reconinter.c
@@ -64,9 +64,9 @@ void vp10_build_inter_predictor(const uint8_t *src, int src_stride,
 }
 
 void build_inter_predictors(MACROBLOCKD *xd, int plane, int block,
-                                   int bw, int bh,
-                                   int x, int y, int w, int h,
-                                   int mi_x, int mi_y) {
+                            int bw, int bh,
+                            int x, int y, int w, int h,
+                            int mi_x, int mi_y) {
   struct macroblockd_plane *const pd = &xd->plane[plane];
   const MODE_INFO *mi = xd->mi[0];
   const int is_compound = has_second_ref(&mi->mbmi);
diff --git a/vp10/common/reconinter.h b/vp10/common/reconinter.h
index 5678f473f..bcfa3f608 100644
--- a/vp10/common/reconinter.h
+++ b/vp10/common/reconinter.h
@@ -28,9 +28,22 @@ static INLINE void inter_predictor(const uint8_t *src, int src_stride,
                                    int w, int h, int ref,
                                    const InterpKernel *kernel,
                                    int xs, int ys) {
+#if CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS
+  if (kernel[0][SUBPEL_TAPS / 2 - 1] == 128) {
+    // Interpolating filter
+    sf->predict[subpel_x != 0][subpel_y != 0][ref](
+        src, src_stride, dst, dst_stride,
+        kernel[subpel_x], xs, kernel[subpel_y], ys, w, h);
+  } else {
+    sf->predict_ni[subpel_x != 0][subpel_y != 0][ref](
+        src, src_stride, dst, dst_stride,
+        kernel[subpel_x], xs, kernel[subpel_y], ys, w, h);
+  }
+#else
   sf->predict[subpel_x != 0][subpel_y != 0][ref](
       src, src_stride, dst, dst_stride,
       kernel[subpel_x], xs, kernel[subpel_y], ys, w, h);
+#endif  // CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS
 }
 
 #if CONFIG_VP9_HIGHBITDEPTH
@@ -42,9 +55,22 @@ static INLINE void high_inter_predictor(const uint8_t *src, int src_stride,
                                         int w, int h, int ref,
                                         const InterpKernel *kernel,
                                         int xs, int ys, int bd) {
+#if CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS
+  if (kernel[0][SUBPEL_TAPS / 2 - 1] == 128) {
+    // Interpolating filter
+    sf->highbd_predict[subpel_x != 0][subpel_y != 0][ref](
+        src, src_stride, dst, dst_stride,
+        kernel[subpel_x], xs, kernel[subpel_y], ys, w, h, bd);
+  } else {
+    sf->highbd_predict_ni[subpel_x != 0][subpel_y != 0][ref](
+        src, src_stride, dst, dst_stride,
+        kernel[subpel_x], xs, kernel[subpel_y], ys, w, h, bd);
+  }
+#else
   sf->highbd_predict[subpel_x != 0][subpel_y != 0][ref](
       src, src_stride, dst, dst_stride,
       kernel[subpel_x], xs, kernel[subpel_y], ys, w, h, bd);
+#endif  // CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS
 }
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 
@@ -192,7 +218,6 @@ void vp10_setup_dst_planes(struct macroblockd_plane planes[MAX_MB_PLANE],
 void vp10_setup_pre_planes(MACROBLOCKD *xd, int idx,
                           const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
                           const struct scale_factors *sf);
-
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/vp10/common/scale.c b/vp10/common/scale.c
index ce6062c19..65e14a99f 100644
--- a/vp10/common/scale.c
+++ b/vp10/common/scale.c
@@ -46,15 +46,15 @@ MV32 vp10_scale_mv(const MV *mv, int x, int y, const struct scale_factors *sf) {
 
 #if CONFIG_VP9_HIGHBITDEPTH
 void vp10_setup_scale_factors_for_frame(struct scale_factors *sf,
-                                       int other_w, int other_h,
-                                       int this_w, int this_h,
-                                       int use_highbd) {
+                                        int other_w, int other_h,
+                                        int this_w, int this_h,
+                                        int use_highbd) {
 #else
 void vp10_setup_scale_factors_for_frame(struct scale_factors *sf,
-                                       int other_w, int other_h,
-                                       int this_w, int this_h) {
+                                        int other_w, int other_h,
+                                        int this_w, int this_h) {
 #endif
-  if (!valid_ref_frame_size(other_w, other_h, this_w, this_h)) {
+    if (!valid_ref_frame_size(other_w, other_h, this_w, this_h)) {
     sf->x_scale_fp = REF_INVALID_SCALE;
     sf->y_scale_fp = REF_INVALID_SCALE;
     return;
@@ -79,6 +79,16 @@ void vp10_setup_scale_factors_for_frame(struct scale_factors *sf,
   // applied in one direction only, and not at all for 0,0, seems to give the
   // best quality, but it may be worth trying an additional mode that does
   // do the filtering on full-pel.
+#if CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS
+  sf->predict_ni[0][0][0] = vpx_convolve8_c;
+  sf->predict_ni[0][0][1] = vpx_convolve8_avg_c;
+  sf->predict_ni[0][1][0] = vpx_convolve8_c;
+  sf->predict_ni[0][1][1] = vpx_convolve8_avg_c;
+  sf->predict_ni[1][0][0] = vpx_convolve8_c;
+  sf->predict_ni[1][0][1] = vpx_convolve8_avg_c;
+  sf->predict_ni[1][1][0] = vpx_convolve8;
+  sf->predict_ni[1][1][1] = vpx_convolve8_avg;
+#endif  // CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS
   if (sf->x_step_q4 == 16) {
     if (sf->y_step_q4 == 16) {
       // No scaling in either direction.
@@ -119,8 +129,19 @@ void vp10_setup_scale_factors_for_frame(struct scale_factors *sf,
   // 2D subpel motion always gets filtered in both directions
   sf->predict[1][1][0] = vpx_convolve8;
   sf->predict[1][1][1] = vpx_convolve8_avg;
+
 #if CONFIG_VP9_HIGHBITDEPTH
   if (use_highbd) {
+#if CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS
+    sf->highbd_predict_ni[0][0][0] = vpx_highbd_convolve8_c;
+    sf->highbd_predict_ni[0][0][1] = vpx_highbd_convolve8_avg_c;
+    sf->highbd_predict_ni[0][1][0] = vpx_highbd_convolve8_c;
+    sf->highbd_predict_ni[0][1][1] = vpx_highbd_convolve8_avg_c;
+    sf->highbd_predict_ni[1][0][0] = vpx_highbd_convolve8_c;
+    sf->highbd_predict_ni[1][0][1] = vpx_highbd_convolve8_avg_c;
+    sf->highbd_predict_ni[1][1][0] = vpx_highbd_convolve8;
+    sf->highbd_predict_ni[1][1][1] = vpx_highbd_convolve8_avg;
+#endif  // CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS
     if (sf->x_step_q4 == 16) {
       if (sf->y_step_q4 == 16) {
         // No scaling in either direction.
@@ -162,5 +183,5 @@ void vp10_setup_scale_factors_for_frame(struct scale_factors *sf,
     sf->highbd_predict[1][1][0] = vpx_highbd_convolve8;
     sf->highbd_predict[1][1][1] = vpx_highbd_convolve8_avg;
   }
-#endif
+#endif  // CONFIG_VP9_HIGHBITDEPTH
 }
diff --git a/vp10/common/scale.h b/vp10/common/scale.h
index 833f6c411..604b9d2d5 100644
--- a/vp10/common/scale.h
+++ b/vp10/common/scale.h
@@ -34,7 +34,15 @@ struct scale_factors {
   convolve_fn_t predict[2][2][2];  // horiz, vert, avg
 #if CONFIG_VP9_HIGHBITDEPTH
   highbd_convolve_fn_t highbd_predict[2][2][2];  // horiz, vert, avg
-#endif
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+// Functions for non-interpolating filters (those that filter zero offsets)
+#if CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS
+  convolve_fn_t predict_ni[2][2][2];  // horiz, vert, avg
+#if CONFIG_VP9_HIGHBITDEPTH
+  highbd_convolve_fn_t highbd_predict_ni[2][2][2];  // horiz, vert, avg
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+#endif  // CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS
 };
 
 MV32 vp10_scale_mv(const MV *mv, int x, int y, const struct scale_factors *sf);
@@ -48,7 +56,7 @@ void vp10_setup_scale_factors_for_frame(struct scale_factors *sf,
 void vp10_setup_scale_factors_for_frame(struct scale_factors *sf,
                                        int other_w, int other_h,
                                        int this_w, int this_h);
-#endif
+#endif  // CONFIG_VP9_HIGHBITDEPTH
 
 static INLINE int vp10_is_valid_scale(const struct scale_factors *sf) {
   return sf->x_scale_fp != REF_INVALID_SCALE &&
diff --git a/vp10/decoder/decodeframe.c b/vp10/decoder/decodeframe.c
index d807268ef..c75d94a11 100644
--- a/vp10/decoder/decodeframe.c
+++ b/vp10/decoder/decodeframe.c
@@ -625,6 +625,9 @@ static void dec_build_inter_predictors(VP10Decoder *const pbi, MACROBLOCKD *xd,
   int xs, ys, x0, y0, x0_16, y0_16, frame_width, frame_height,
       buf_stride, subpel_x, subpel_y;
   uint8_t *ref_frame, *buf_ptr;
+#if CONFIG_EXT_INTERP
+  const int i_filter = IsInterpolatingFilter(xd->mi[0]->mbmi.interp_filter);
+#endif  // CONFIG_EXT_INTERP
 
   // Get reference frame pointer, width and height.
   if (plane == 0) {
@@ -694,6 +697,9 @@ static void dec_build_inter_predictors(VP10Decoder *const pbi, MACROBLOCKD *xd,
   // Do border extension if there is motion or the
   // width/height is not a multiple of 8 pixels.
   if (is_scaled || scaled_mv.col || scaled_mv.row ||
+#if CONFIG_EXT_INTERP
+      !i_filter ||
+#endif
       (frame_width & 0x7) || (frame_height & 0x7)) {
     int y1 = ((y0_16 + (h - 1) * ys) >> SUBPEL_BITS) + 1;
 
@@ -701,13 +707,21 @@ static void dec_build_inter_predictors(VP10Decoder *const pbi, MACROBLOCKD *xd,
     int x1 = ((x0_16 + (w - 1) * xs) >> SUBPEL_BITS) + 1;
     int x_pad = 0, y_pad = 0;
 
-    if (subpel_x || (sf->x_step_q4 != SUBPEL_SHIFTS)) {
+    if (subpel_x ||
+#if CONFIG_EXT_INTERP
+        !i_filter ||
+#endif
+        (sf->x_step_q4 != SUBPEL_SHIFTS)) {
       x0 -= VP9_INTERP_EXTEND - 1;
       x1 += VP9_INTERP_EXTEND;
       x_pad = 1;
     }
 
-    if (subpel_y || (sf->y_step_q4 != SUBPEL_SHIFTS)) {
+    if (subpel_y ||
+#if CONFIG_EXT_INTERP
+        !i_filter ||
+#endif
+        (sf->y_step_q4 != SUBPEL_SHIFTS)) {
       y0 -= VP9_INTERP_EXTEND - 1;
       y1 += VP9_INTERP_EXTEND;
       y_pad = 1;
@@ -1296,7 +1310,8 @@ static void setup_segmentation_dequant(VP10_COMMON *const cm) {
 }
 
 static INTERP_FILTER read_interp_filter(struct vpx_read_bit_buffer *rb) {
-  return vpx_rb_read_bit(rb) ? SWITCHABLE : vpx_rb_read_literal(rb, 2);
+  return vpx_rb_read_bit(rb) ?
+      SWITCHABLE : vpx_rb_read_literal(rb, 2 + CONFIG_EXT_INTERP);
 }
 
 static void setup_render_size(VP10_COMMON *cm,
diff --git a/vp10/decoder/decodemv.c b/vp10/decoder/decodemv.c
index a96c51221..cc0f3f09c 100644
--- a/vp10/decoder/decodemv.c
+++ b/vp10/decoder/decodemv.c
@@ -582,12 +582,17 @@ static INLINE INTERP_FILTER read_switchable_interp_filter(
     VP10_COMMON *const cm, MACROBLOCKD *const xd,
     vpx_reader *r) {
   const int ctx = vp10_get_pred_context_switchable_interp(xd);
-  const INTERP_FILTER type =
-      (INTERP_FILTER)vpx_read_tree(r, vp10_switchable_interp_tree,
-                                   cm->fc->switchable_interp_prob[ctx]);
   FRAME_COUNTS *counts = xd->counts;
+  INTERP_FILTER type;
+#if CONFIG_EXT_INTERP
+  if (!vp10_is_interp_needed(xd)) return EIGHTTAP;
+#endif
+  type = (INTERP_FILTER)vpx_read_tree(r, vp10_switchable_interp_tree,
+                                      cm->fc->switchable_interp_prob[ctx]);
   if (counts)
     ++counts->switchable_interp[ctx][type];
+  // printf("%d/%d -> %d, %d\n", cm->current_video_frame, cm->show_frame,
+  //        xd->mi[0]->mbmi.sb_type, xd->mi[0]->mbmi.interp_filter);
   return type;
 }
 
@@ -753,9 +758,11 @@ static void read_inter_block_mode_info(VP10Decoder *const pbi,
     }
   }
 
+#if !CONFIG_EXT_INTERP
   mbmi->interp_filter = (cm->interp_filter == SWITCHABLE)
-                      ? read_switchable_interp_filter(cm, xd, r)
-                      : cm->interp_filter;
+                        ? read_switchable_interp_filter(cm, xd, r)
+                        : cm->interp_filter;
+#endif  // !CONFIG_EXT_INTERP
 
   if (bsize < BLOCK_8X8) {
     const int num_4x4_w = 1 << xd->bmode_blocks_wl;
@@ -804,6 +811,11 @@ static void read_inter_block_mode_info(VP10Decoder *const pbi,
     xd->corrupted |= !assign_mv(cm, xd, mbmi->mode, mbmi->mv, nearestmv,
                                 nearestmv, nearmv, is_compound, allow_hp, r);
   }
+#if CONFIG_EXT_INTERP
+  mbmi->interp_filter = (cm->interp_filter == SWITCHABLE)
+                        ? read_switchable_interp_filter(cm, xd, r)
+                        : cm->interp_filter;
+#endif  // CONFIG_EXT_INTERP
 }
 
 static void read_inter_frame_mode_info(VP10Decoder *const pbi,
diff --git a/vp10/encoder/bitstream.c b/vp10/encoder/bitstream.c
index 69e92f9a2..beb3414bf 100644
--- a/vp10/encoder/bitstream.c
+++ b/vp10/encoder/bitstream.c
@@ -38,8 +38,13 @@
 static const struct vp10_token intra_mode_encodings[INTRA_MODES] = {
   {0, 1}, {6, 3}, {28, 5}, {30, 5}, {58, 6}, {59, 6}, {126, 7}, {127, 7},
   {62, 6}, {2, 2}};
+#if CONFIG_EXT_INTERP && SWITCHABLE_FILTERS == 4
+static const struct vp10_token switchable_interp_encodings[SWITCHABLE_FILTERS] =
+  {{0, 1}, {4, 3}, {3, 2}, {5, 3}};
+#else
 static const struct vp10_token switchable_interp_encodings[SWITCHABLE_FILTERS] =
   {{0, 1}, {2, 2}, {3, 2}};
+#endif  // CONFIG_EXT_INTERP && SWITCHABLE_FILTERS == 4
 static const struct vp10_token partition_encodings[PARTITION_TYPES] =
   {{0, 1}, {2, 2}, {6, 3}, {7, 3}};
 static const struct vp10_token inter_mode_encodings[INTER_MODES] =
@@ -548,6 +553,28 @@ static void write_ext_intra_mode_info(const VP10_COMMON *const cm,
 }
 #endif  // CONFIG_EXT_INTRA
 
+static void write_switchable_interp_filter(VP10_COMP *cpi,
+                                           const MACROBLOCKD *xd,
+                                           vpx_writer *w) {
+  VP10_COMMON *const cm = &cpi->common;
+  const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+  if (cm->interp_filter == SWITCHABLE) {
+    const int ctx = vp10_get_pred_context_switchable_interp(xd);
+#if CONFIG_EXT_INTERP
+    if (!vp10_is_interp_needed(xd)) {
+      // if (mbmi->interp_filter != EIGHTTAP)
+      //   printf("Error [%d]\n", mbmi->sb_type);
+      assert(mbmi->interp_filter == EIGHTTAP);
+      return;
+    }
+#endif
+    vp10_write_token(w, vp10_switchable_interp_tree,
+                     cm->fc->switchable_interp_prob[ctx],
+                     &switchable_interp_encodings[mbmi->interp_filter]);
+    ++cpi->interp_filter_selected[0][mbmi->interp_filter];
+  }
+}
+
 static void pack_inter_mode_mvs(VP10_COMP *cpi, const MODE_INFO *mi,
                                 vpx_writer *w) {
   VP10_COMMON *const cm = &cpi->common;
@@ -645,15 +672,9 @@ static void pack_inter_mode_mvs(VP10_COMP *cpi, const MODE_INFO *mi,
       }
     }
 
-    if (cm->interp_filter == SWITCHABLE) {
-      const int ctx = vp10_get_pred_context_switchable_interp(xd);
-      vp10_write_token(w, vp10_switchable_interp_tree,
-                      cm->fc->switchable_interp_prob[ctx],
-                      &switchable_interp_encodings[mbmi->interp_filter]);
-      ++cpi->interp_filter_selected[0][mbmi->interp_filter];
-    } else {
-      assert(mbmi->interp_filter == cm->interp_filter);
-    }
+#if !CONFIG_EXT_INTERP
+    write_switchable_interp_filter(cpi, xd, w);
+#endif  // !CONFIG_EXT_INTERP
 
     if (bsize < BLOCK_8X8) {
       const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];
@@ -680,6 +701,9 @@ static void pack_inter_mode_mvs(VP10_COMP *cpi, const MODE_INFO *mi,
                         allow_hp);
       }
     }
+#if CONFIG_EXT_INTERP
+    write_switchable_interp_filter(cpi, xd, w);
+#endif  // CONFIG_EXT_INTERP
   }
 
 #if CONFIG_EXT_TX
@@ -1051,7 +1075,6 @@ static void update_coef_probs_common(vpx_writer* const bc, VP10_COMP *cpi,
         }
       }
 
-      // printf("Update %d %d, savings %d\n", update[0], update[1], savings);
       /* Is coef updated at all */
       if (update[1] == 0 || savings < 0) {
         vpx_write_bit(bc, 0);
@@ -1377,7 +1400,7 @@ static void write_interp_filter(INTERP_FILTER filter,
                                 struct vpx_write_bit_buffer *wb) {
   vpx_wb_write_bit(wb, filter == SWITCHABLE);
   if (filter != SWITCHABLE)
-    vpx_wb_write_literal(wb, filter, 2);
+    vpx_wb_write_literal(wb, filter, 2 + CONFIG_EXT_INTERP);
 }
 
 static void fix_interp_filter(VP10_COMMON *cm, FRAME_COUNTS *counts) {
diff --git a/vp10/encoder/encodeframe.c b/vp10/encoder/encodeframe.c
index 95cddcc30..47fe58941 100644
--- a/vp10/encoder/encodeframe.c
+++ b/vp10/encoder/encodeframe.c
@@ -1069,7 +1069,11 @@ static void update_state(VP10_COMP *cpi, ThreadData *td,
     if (is_inter_block(mbmi)) {
       vp10_update_mv_count(td);
 
-      if (cm->interp_filter == SWITCHABLE) {
+      if (cm->interp_filter == SWITCHABLE
+#if CONFIG_EXT_INTERP
+          && vp10_is_interp_needed(xd)
+#endif
+          ) {
         const int ctx = vp10_get_pred_context_switchable_interp(xd);
         ++td->counts->switchable_interp[ctx][mbmi->interp_filter];
       }
@@ -2954,8 +2958,9 @@ void vp10_encode_frame(VP10_COMP *cpi) {
     else
       cm->reference_mode = REFERENCE_MODE_SELECT;
 
-    if (cm->interp_filter == SWITCHABLE)
+    if (cm->interp_filter == SWITCHABLE) {
       cm->interp_filter = get_interp_filter(filter_thrs, is_alt_ref);
+    }
 
     encode_frame_internal(cpi);
 
diff --git a/vp10/encoder/encoder.c b/vp10/encoder/encoder.c
index a98085486..dae86a585 100644
--- a/vp10/encoder/encoder.c
+++ b/vp10/encoder/encoder.c
@@ -3261,7 +3261,6 @@ static void encode_with_recode_loop(VP10_COMP *cpi,
     if (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF) {
       save_coding_context(cpi);
       vp10_pack_bitstream(cpi, dest, size);
-
       rc->projected_frame_size = (int)(*size) << 3;
       restore_coding_context(cpi);
 
diff --git a/vp10/encoder/rd.c b/vp10/encoder/rd.c
index 817eb0aed..d46bb6351 100644
--- a/vp10/encoder/rd.c
+++ b/vp10/encoder/rd.c
@@ -589,8 +589,11 @@ int vp10_get_switchable_rate(const VP10_COMP *cpi,
                              const MACROBLOCKD *const xd) {
   const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
   const int ctx = vp10_get_pred_context_switchable_interp(xd);
+#if CONFIG_EXT_INTERP
+  if (!vp10_is_interp_needed(xd)) return 0;
+#endif  // CONFIG_EXT_INTERP
   return SWITCHABLE_INTERP_RATE_FACTOR *
-             cpi->switchable_interp_costs[ctx][mbmi->interp_filter];
+      cpi->switchable_interp_costs[ctx][mbmi->interp_filter];
 }
 
 void vp10_set_rd_speed_thresholds(VP10_COMP *cpi) {
diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c
index 7309bf6b5..63b2a972e 100644
--- a/vp10/encoder/rdopt.c
+++ b/vp10/encoder/rdopt.c
@@ -2945,10 +2945,6 @@ static INLINE void mi_buf_restore(MACROBLOCK *x, struct buf_2d orig_src,
     x->e_mbd.plane[0].pre[1] = orig_pre[1];
 }
 
-static INLINE int mv_has_subpel(const MV *mv) {
-  return (mv->row & 0x0F) || (mv->col & 0x0F);
-}
-
 // Check if NEARESTMV/NEARMV/ZEROMV is the cheapest way encode zero motion.
 // TODO(aconverse): Find out if this is still productive then clean up or remove
 static int check_best_zero_mv(
@@ -3041,11 +3037,11 @@ static void joint_motion_search(VP10_COMP *cpi, MACROBLOCK *x,
   // frame we must use a unit scaling factor during mode selection.
 #if CONFIG_VP9_HIGHBITDEPTH
   vp10_setup_scale_factors_for_frame(&sf, cm->width, cm->height,
-                                    cm->width, cm->height,
-                                    cm->use_highbitdepth);
+                                     cm->width, cm->height,
+                                     cm->use_highbitdepth);
 #else
   vp10_setup_scale_factors_for_frame(&sf, cm->width, cm->height,
-                                    cm->width, cm->height);
+                                     cm->width, cm->height);
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 
   // Allow joint search multiple times iteratively for each reference frame
@@ -4030,6 +4026,10 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
   if (cm->interp_filter != BILINEAR) {
     if (x->source_variance < cpi->sf.disable_filter_search_var_thresh) {
       best_filter = EIGHTTAP;
+#if CONFIG_EXT_INTERP
+    } else if (!vp10_is_interp_needed(xd) && cm->interp_filter == SWITCHABLE) {
+      best_filter = EIGHTTAP;
+#endif
     } else if (best_filter == SWITCHABLE) {
       int newbest;
       int tmp_rate_sum = 0;
@@ -4045,7 +4045,7 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
         rs = vp10_get_switchable_rate(cpi, xd);
         rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
 
-        if (i > 0 && intpel_mv) {
+        if (i > 0 && intpel_mv && IsInterpolatingFilter(i)) {
           rd = RDCOST(x->rdmult, x->rddiv, tmp_rate_sum, tmp_dist_sum);
           filter_cache[i] = rd;
           filter_cache[SWITCHABLE_FILTERS] =
@@ -4067,7 +4067,7 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
                (!i || best_needs_copy)) ||
               (cm->interp_filter != SWITCHABLE &&
                (cm->interp_filter == mbmi->interp_filter ||
-                (i == 0 && intpel_mv)))) {
+                (i == 0 && intpel_mv && IsInterpolatingFilter(i))))) {
             restore_dst_buf(xd, orig_dst, orig_dst_stride);
           } else {
             for (j = 0; j < MAX_MB_PLANE; j++) {
@@ -4087,7 +4087,7 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
             rd += rs_rd;
           *mask_filter = VPXMAX(*mask_filter, rd);
 
-          if (i == 0 && intpel_mv) {
+          if (i == 0 && intpel_mv && IsInterpolatingFilter(i)) {
             tmp_rate_sum = rate_sum;
             tmp_dist_sum = dist_sum;
           }
@@ -4104,7 +4104,8 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
         if (newbest) {
           best_rd = rd;
           best_filter = mbmi->interp_filter;
-          if (cm->interp_filter == SWITCHABLE && i && !intpel_mv)
+          if (cm->interp_filter == SWITCHABLE && i &&
+              !(intpel_mv && IsInterpolatingFilter(i)))
             best_needs_copy = !best_needs_copy;
         }
 
@@ -4123,6 +4124,7 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
       restore_dst_buf(xd, orig_dst, orig_dst_stride);
     }
   }
+
   // Set the appropriate filter
   mbmi->interp_filter = cm->interp_filter != SWITCHABLE ?
       cm->interp_filter : best_filter;
@@ -4840,6 +4842,7 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi,
                                   single_newmv, single_inter_filter,
                                   single_skippable, &total_sse, best_rd,
                                   &mask_filter, filter_cache);
+
       if (this_rd == INT64_MAX)
         continue;
 
@@ -4864,6 +4867,7 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi,
 
         // Cost the skip mb case
         rate2 += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 1);
+
       } else if (ref_frame != INTRA_FRAME && !xd->lossless[mbmi->segment_id]) {
         if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) <
             RDCOST(x->rdmult, x->rddiv, 0, total_sse)) {
@@ -5175,6 +5179,9 @@ void vp10_rd_pick_inter_mode_sb_seg_skip(VP10_COMP *cpi,
   if (cm->interp_filter != BILINEAR) {
     best_filter = EIGHTTAP;
     if (cm->interp_filter == SWITCHABLE &&
+#if CONFIG_EXT_INTERP
+        vp10_is_interp_needed(xd) &&
+#endif  // CONFIG_EXT_INTERP
         x->source_variance >= cpi->sf.disable_filter_search_var_thresh) {
       int rs;
       int best_rs = INT_MAX;
@@ -5516,7 +5523,11 @@ void vp10_rd_pick_inter_mode_sub8x8(VP10_COMP *cpi,
                                               (int) this_rd_thresh, seg_mvs,
                                               bsi, switchable_filter_index,
                                               mi_row, mi_col);
-
+#if CONFIG_EXT_INTERP
+            if (!vp10_is_interp_needed(xd) && cm->interp_filter == SWITCHABLE &&
+                mbmi->interp_filter != EIGHTTAP)  // invalid configuration
+              continue;
+#endif  // CONFIG_EXT_INTERP
             if (tmp_rd == INT64_MAX)
               continue;
             rs = vp10_get_switchable_rate(cpi, xd);
@@ -5570,15 +5581,30 @@ void vp10_rd_pick_inter_mode_sub8x8(VP10_COMP *cpi,
 
       mbmi->interp_filter = (cm->interp_filter == SWITCHABLE ?
                              tmp_best_filter : cm->interp_filter);
+
+
       if (!pred_exists) {
         // Handles the special case when a filter that is not in the
-        // switchable list (bilinear, 6-tap) is indicated at the frame level
+        // switchable list (bilinear) is indicated at the frame level
         tmp_rd = rd_pick_best_sub8x8_mode(cpi, x,
                                           &x->mbmi_ext->ref_mvs[ref_frame][0],
                                           second_ref, best_yrd, &rate, &rate_y,
                                           &distortion, &skippable, &total_sse,
                                           (int) this_rd_thresh, seg_mvs, bsi, 0,
                                           mi_row, mi_col);
+#if CONFIG_EXT_INTERP
+        if (!vp10_is_interp_needed(xd) && cm->interp_filter == SWITCHABLE &&
+            mbmi->interp_filter != EIGHTTAP) {
+          mbmi->interp_filter = EIGHTTAP;
+          tmp_rd = rd_pick_best_sub8x8_mode(
+              cpi, x,
+              &x->mbmi_ext->ref_mvs[ref_frame][0],
+              second_ref, best_yrd, &rate, &rate_y,
+              &distortion, &skippable, &total_sse,
+              (int) this_rd_thresh, seg_mvs, bsi, 0,
+              mi_row, mi_col);
+        }
+#endif  // CONFIG_EXT_INTERP
         if (tmp_rd == INT64_MAX)
           continue;
       } else {