]> granicus.if.org Git - libvpx/commitdiff
ext-intra experiment
authorhui su <huisu@google.com>
Wed, 7 Oct 2015 16:29:02 +0000 (09:29 -0700)
committerhui su <huisu@google.com>
Wed, 4 Nov 2015 02:46:02 +0000 (18:46 -0800)
Currently there are two parts in this experiment: extra directional intra
prediction modes and the filter intra modes migrated from the nextgen branch.

Several macros are defined in "blockd.h" to provide controls of the experiment
settings. Setting "DR_ONLY" as 1 (default is 0) means we only use directional
modes, and skip the filter-intra modes; "EXT_INTRA_ANGLES" (default is 128)
defines the number of different angles we want to support; setting
"ANGLE_FAST_SEARCH" as 1 (default is 1) means we use fast sub-optimal search
for the best prediction angle, instead of exhaustive search. The fast search
is about 6 times faster than the exhaustive search, while preserving about
60% of the coding gains.

With extra directional prediction modes (fast search), we observe the following
code gains (number in parentheses is for all-key-frame setting):
derflr +0.42%  (+1.79%)
hevclr +0.78%  (+2.19%)
hevcmr +1.20%  (+3.49%)
stdhd  +0.56%
Speed-wise, about 110% slower for key frames, and 30% slower overall.

The gains of filter intra modes mostly add up with the gains of directional
modes. The overall coding gain of this experiment:
derflr +0.94%
hevclr +1.46%
hevcmr +1.94%
stdhd  +1.58%

Change-Id: Ida9ad00cdb33aff422d06eb42b4f4e5f25df8a2a

vp10/common/blockd.h
vp10/common/entropymode.c
vp10/common/entropymode.h
vp10/common/enums.h
vp10/common/reconintra.c
vp10/common/thread_common.c
vp10/decoder/decodemv.c
vp10/encoder/bitstream.c
vp10/encoder/encodeframe.c
vp10/encoder/rdopt.c

index 529fcdbf1aa74e5f3ecc211b19f51cd2b864cbdd..cd3646b48357034fa333706b657fc81bbf406fb1 100644 (file)
@@ -77,6 +77,15 @@ typedef struct {
   uint8_t palette_first_color_idx[2];
 } PALETTE_MODE_INFO;
 
+#if CONFIG_EXT_INTRA
+typedef struct {
+  // 1: an ext intra mode is used; 0: otherwise.
+  uint8_t use_ext_intra_mode[PLANE_TYPES];
+  EXT_INTRA_MODE ext_intra_mode[PLANE_TYPES];
+  uint8_t ext_intra_angle[PLANE_TYPES];
+} EXT_INTRA_MODE_INFO;
+#endif  // CONFIG_EXT_INTRA
+
 // This structure now relates to 8x8 block regions.
 typedef struct {
   // Common for both INTER and INTRA blocks
@@ -106,6 +115,10 @@ typedef struct {
   TX_TYPE tx_type;
 #endif  // CONFIG_EXT_TX
 
+#if CONFIG_EXT_INTRA
+  EXT_INTRA_MODE_INFO ext_intra_mode_info;
+#endif  // CONFIG_EXT_INTRA
+
   // TODO(slavarnway): Delete and use bmi[3].as_mv[] instead.
   int_mv mv[2];
 } MB_MODE_INFO;
@@ -307,11 +320,72 @@ static const int ext_tx_used_inter[EXT_TX_SETS_INTER][TX_TYPES] = {
 };
 #endif  // CONFIG_EXT_TX
 
+#if CONFIG_EXT_INTRA
+// 0: use both directional and filter modes; 1: use directional modes only.
+#define DR_ONLY 0
+// 0: use slow exhaustive search; 1: use fast sub-optimal search.
+#define ANGLE_FAST_SEARCH 1
+// A parameter to adjust early termination in the fast search of angles.
+#define RD_ADJUSTER 1.4
+// Number of different angles that are supported
+#define EXT_INTRA_ANGLES 128
+
+static const TX_TYPE filter_intra_mode_to_tx_type_lookup[FILTER_INTRA_MODES] = {
+  DCT_DCT,    // FILTER_DC
+  ADST_DCT,   // FILTER_V
+  DCT_ADST,   // FILTER_H
+  DCT_DCT,    // FILTER_D45
+  ADST_ADST,  // FILTER_D135
+  ADST_DCT,   // FILTER_D117
+  DCT_ADST,   // FILTER_D153
+  DCT_ADST,   // FILTER_D207
+  ADST_DCT,   // FILTER_D63
+  ADST_ADST,  // FILTER_TM
+};
+
+// Maps the angle index to the actual prediction angle (in degrees).
+// Angle index is in the range [0, EXT_INTRA_ANGLES); the actual prediction
+// angle is in the range (0, 270).
+static INLINE int prediction_angle_map(int angle_in) {
+  return (10 + 2 * angle_in);
+}
+#endif  // CONFIG_EXT_INTRA
+
 static INLINE TX_TYPE get_tx_type(PLANE_TYPE plane_type,
                                   const MACROBLOCKD *xd,
                                   int block_idx, TX_SIZE tx_size) {
   const MODE_INFO *const mi = xd->mi[0];
   const MB_MODE_INFO *const mbmi = &mi->mbmi;
+#if CONFIG_EXT_INTRA
+  const int use_ext_intra_mode_info =
+      mbmi->ext_intra_mode_info.use_ext_intra_mode[plane_type];
+  const EXT_INTRA_MODE ext_intra_mode =
+      mbmi->ext_intra_mode_info.ext_intra_mode[plane_type];
+
+  if (!is_inter_block(mbmi) && use_ext_intra_mode_info) {
+    if (!xd->lossless[mbmi->segment_id] && tx_size < TX_32X32
+#if CONFIG_EXT_TX
+        && !(mbmi->sb_type >= BLOCK_8X8 && plane_type == PLANE_TYPE_Y)
+#endif  // CONFIG_EXT_TX
+    ) {
+      if (ext_intra_mode > FILTER_TM_PRED) {
+        int angle = mbmi->ext_intra_mode_info.ext_intra_angle[plane_type];
+        angle = prediction_angle_map(angle);
+        assert(angle > 0 && angle < 270);
+        if (angle == 135)
+          return ADST_ADST;
+        else if (angle < 45 || angle > 225)
+          return DCT_DCT;
+        else if (angle < 135)
+          return ADST_DCT;
+        else
+          return DCT_ADST;
+      } else {
+        return filter_intra_mode_to_tx_type_lookup[ext_intra_mode];
+      }
+    }
+  }
+#endif  // CONFIG_EXT_INTRA
 
 #if CONFIG_EXT_TX
 #if USE_IDTX_FOR_32X32
@@ -328,7 +402,6 @@ static INLINE TX_TYPE get_tx_type(PLANE_TYPE plane_type,
       // UV Inter only
       return (mbmi->tx_type == IDTX && tx_size == TX_32X32 ?
               DCT_DCT : mbmi->tx_type);
-      // return mbmi->tx_type;
   }
 
   // Sub8x8-Inter/Intra OR UV-Intra
index af15b2d2dc7905c0098080be3f6e06825bb3c1b9..de3de22106ee84cd20d1106328c5eb1986b1d4e8 100644 (file)
@@ -955,6 +955,10 @@ static const struct segmentation_probs default_seg_probs = {
 };
 #endif
 
+#if CONFIG_EXT_INTRA
+static  const vpx_prob default_ext_intra_probs[2] = {200, 200};
+#endif  // CONFIG_EXT_INTRA
+
 static void init_mode_probs(FRAME_CONTEXT *fc) {
   vp10_copy(fc->uv_mode_prob, default_uv_probs);
   vp10_copy(fc->y_mode_prob, default_if_y_probs);
@@ -978,6 +982,9 @@ static void init_mode_probs(FRAME_CONTEXT *fc) {
   vp10_copy(fc->seg.tree_probs, default_seg_probs.tree_probs);
   vp10_copy(fc->seg.pred_probs, default_seg_probs.pred_probs);
 #endif
+#if CONFIG_EXT_INTRA
+  vp10_copy(fc->ext_intra_probs, default_ext_intra_probs);
+#endif  // CONFIG_EXT_INTRA
 }
 
 const vpx_tree_index vp10_switchable_interp_tree
@@ -1119,6 +1126,12 @@ void vp10_adapt_intra_frame_probs(VP10_COMMON *cm) {
     vpx_tree_merge_probs(vp10_partition_tree, pre_fc->partition_prob[i],
                          counts->partition[i], fc->partition_prob[i]);
 #endif
+#if CONFIG_EXT_INTRA
+  for (i = 0; i < PLANE_TYPES; ++i) {
+    fc->ext_intra_probs[i] = mode_mv_merge_probs(
+              pre_fc->ext_intra_probs[i], counts->ext_intra[i]);
+  }
+#endif  // CONFIG_EXT_INTRA
 }
 
 static void set_default_lf_deltas(struct loopfilter *lf) {
index cb7807d45895ced20f54546b858bfecf6accfed1..00eacc533f567d6137482440d344fcb09bfd30a4 100644 (file)
@@ -32,6 +32,10 @@ extern "C" {
 #define PALETTE_BLOCK_SIZES (BLOCK_64X64 - BLOCK_8X8 + 1)
 #define PALETTE_Y_MODE_CONTEXTS 3
 
+#if CONFIG_EXT_INTRA
+// Probability that an ext_intra mode is a directional prediction mode
+#define DR_EXT_INTRA_PROB 144
+#endif  // CONFIG_EXT_INTRA
 
 struct VP10Common;
 
@@ -72,6 +76,7 @@ typedef struct frame_contexts {
 #endif
   vpx_prob skip_probs[SKIP_CONTEXTS];
   nmv_context nmvc;
+  int initialized;
 #if CONFIG_EXT_TX
   vpx_prob inter_ext_tx_prob[EXT_TX_SETS_INTER][EXT_TX_SIZES][TX_TYPES - 1];
   vpx_prob intra_ext_tx_prob[EXT_TX_SETS_INTRA][EXT_TX_SIZES][INTRA_MODES]
@@ -80,7 +85,9 @@ typedef struct frame_contexts {
 #if CONFIG_MISC_FIXES
   struct segmentation_probs seg;
 #endif
-  int initialized;
+#if CONFIG_EXT_INTRA
+  vpx_prob ext_intra_probs[PLANE_TYPES];
+#endif  // CONFIG_EXT_INTRA
 } FRAME_CONTEXT;
 
 typedef struct FRAME_COUNTS {
@@ -112,6 +119,9 @@ typedef struct FRAME_COUNTS {
 #if CONFIG_MISC_FIXES
   struct seg_counts seg;
 #endif
+#if CONFIG_EXT_INTRA
+  unsigned int ext_intra[PLANE_TYPES][2];
+#endif  // CONFIG_EXT_INTRA
 } FRAME_COUNTS;
 
 extern const vpx_prob vp10_kf_y_mode_prob[INTRA_MODES][INTRA_MODES]
index 53db356bd89ccd582e50251fb72c385707e15ef7..56e9c90a5de0a4a9d5135d0c20b8fcc054184ca2 100644 (file)
@@ -156,6 +156,25 @@ typedef enum {
   PALETTE_COLORS
 } PALETTE_COLOR;
 
+#if CONFIG_EXT_INTRA
+typedef enum {
+  FILTER_DC_PRED,
+  FILTER_V_PRED,
+  FILTER_H_PRED,
+  FILTER_D45_PRED,
+  FILTER_D135_PRED,
+  FILTER_D117_PRED,
+  FILTER_D153_PRED,
+  FILTER_D207_PRED,
+  FILTER_D63_PRED,
+  FILTER_TM_PRED,
+  EXT_DR_PRED,
+  EXT_INTRA_MODES,
+} EXT_INTRA_MODE;
+
+#define FILTER_INTRA_MODES (FILTER_TM_PRED + 1)
+#endif  // CONFIG_EXT_INTRA
+
 #define DC_PRED    0       // Average of above and left pixels
 #define V_PRED     1       // Vertical
 #define H_PRED     2       // Horizontal
index 28665c61ae589c21781bcf1a52b2b1aa2e6e5320..09aee1ae9516dd2a23fc678c3b1ee63e5f2ebc7c 100644 (file)
@@ -8,6 +8,8 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
+#include <math.h>
+
 #include "./vpx_config.h"
 #include "./vpx_dsp_rtcd.h"
 
@@ -268,6 +270,630 @@ static inline void memset16(uint16_t *dst, int val, int n) {
 }
 #endif
 
+#if CONFIG_EXT_INTRA
+#define PI 3.14159265
+#define FILTER_INTRA_PREC_BITS 10
+#define FILTER_INTRA_ROUND_VAL 511
+
+#if CONFIG_MISC_FIXES
+static const uint8_t ext_intra_extend_modes[FILTER_INTRA_MODES] = {
+  NEED_LEFT | NEED_ABOVE,      // FILTER_DC
+  NEED_LEFT | NEED_ABOVE,      // FILTER_V
+  NEED_LEFT | NEED_ABOVE,      // FILTER_H
+  NEED_LEFT | NEED_ABOVE,      // FILTER_D45
+  NEED_LEFT | NEED_ABOVE,      // FILTER_D135
+  NEED_LEFT | NEED_ABOVE,      // FILTER_D117
+  NEED_LEFT | NEED_ABOVE,      // FILTER_D153
+  NEED_LEFT | NEED_ABOVE,      // FILTER_D207
+  NEED_LEFT | NEED_ABOVE,      // FILTER_D63
+  NEED_LEFT | NEED_ABOVE,      // FILTER_TM
+};
+#else
+static const uint8_t ext_intra_extend_modes[FILTER_INTRA_MODES] = {
+  NEED_LEFT | NEED_ABOVERIGHT,  // FILTER_DC
+  NEED_LEFT | NEED_ABOVERIGHT,  // FILTER_V
+  NEED_LEFT | NEED_ABOVERIGHT,  // FILTER_H
+  NEED_LEFT | NEED_ABOVERIGHT,  // FILTER_D45
+  NEED_LEFT | NEED_ABOVERIGHT,  // FILTER_D135
+  NEED_LEFT | NEED_ABOVERIGHT,  // FILTER_D117
+  NEED_LEFT | NEED_ABOVERIGHT,  // FILTER_D153
+  NEED_LEFT | NEED_ABOVERIGHT,  // FILTER_D207
+  NEED_LEFT | NEED_ABOVERIGHT,  // FILTER_D63
+  NEED_LEFT | NEED_ABOVERIGHT,  // FILTER_TM
+};
+#endif  // CONFIG_MISC_FIXES
+
+// Directional prediction, zone 1: 0 < angle < 90
+static void dr_prediction_z1(uint8_t *dst, ptrdiff_t stride, int bs,
+                             const uint8_t *above, const uint8_t *left,
+                             int dx, int dy) {
+  int r, c, x, y, base, shift, val;
+
+  (void)left;
+  (void)dy;
+  assert(dy == 1);
+  assert(dx < 0);
+
+  for (r = 0; r < bs; ++r) {
+    y = r + 1;
+    for (c = 0; c < bs; ++c) {
+      x = c * 256 - y * dx;
+      base = x >> 8;
+      shift = x - base * 256;
+      if (base < 2 * bs - 1) {
+        val =
+            (above[base] * (256 - shift) + above[base + 1] * shift + 128) >> 8;
+        dst[c] = clip_pixel(val);
+      } else {
+        dst[c] = above[2 * bs - 1];
+      }
+    }
+    dst += stride;
+  }
+}
+
+// Directional prediction, zone 2: 90 < angle < 180
+static void dr_prediction_z2(uint8_t *dst, ptrdiff_t stride, int bs,
+                             const uint8_t *above, const uint8_t *left,
+                             int dx, int dy) {
+  int r, c, x, y, val1, val2, shift, val, base;
+
+  assert(dx > 0);
+  assert(dy > 0);
+
+  for (r = 0; r < bs; ++r) {
+    for (c = 0; c < bs; ++c) {
+      y = r + 1;
+      x = c * 256 - y * dx;
+      if (x >= -256) {
+        if (x <= 0) {
+          val1 = above[-1];
+          val2 = above[0];
+          shift = x + 256;
+        } else {
+          base = x >> 8;
+          val1 = above[base];
+          val2 = above[base + 1];
+          shift = x - base * 256;
+        }
+      } else {
+        x = c + 1;
+        y = r * 256 - x * dy;
+        base = y >> 8;
+        if (base >= 0) {
+          val1 = left[base];
+          val2 = left[base + 1];
+          shift = y - base * 256;
+        } else {
+          val1 = val2 = left[0];
+          shift = 0;
+        }
+      }
+      val = (val1 * (256 - shift) + val2 * shift + 128) >> 8;
+      dst[c] = clip_pixel(val);
+    }
+    dst += stride;
+  }
+}
+
+// Directional prediction, zone 3: 180 < angle < 270
+static void dr_prediction_z3(uint8_t *dst, ptrdiff_t stride, int bs,
+                             const uint8_t *above, const uint8_t *left,
+                             int dx, int dy) {
+  int r, c, x, y, base, shift, val;
+
+  (void)above;
+  (void)dx;
+  assert(dx == 1);
+  assert(dy < 0);
+
+  for (r = 0; r < bs; ++r) {
+    for (c = 0; c < bs; ++c) {
+      x = c + 1;
+      y = r * 256 - x * dy;
+      base = y >> 8;
+      shift = y - base * 256;
+      if (base < bs - 1) {
+        val =
+            (left[base] * (256 - shift) + left[base + 1] * shift + 128) >> 8;
+        dst[c] = clip_pixel(val);
+      } else {
+        dst[c] = left[bs - 1];
+      }
+    }
+    dst += stride;
+  }
+}
+
+static INLINE void v_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
+                               const uint8_t *above, const uint8_t *left) {
+  int r;
+  (void) left;
+
+  for (r = 0; r < bs; r++) {
+    memcpy(dst, above, bs);
+    dst += stride;
+  }
+}
+
+static INLINE void h_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
+                               const uint8_t *above, const uint8_t *left) {
+  int r;
+  (void) above;
+
+  for (r = 0; r < bs; r++) {
+    memset(dst, left[r], bs);
+    dst += stride;
+  }
+}
+
+static void dr_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
+                         const uint8_t *above, const uint8_t *left, int angle) {
+  double t = 0;
+  int dx, dy;
+
+  if (angle != 90 && angle != 180)
+    t = tan(angle * PI / 180.0);
+  if (angle > 0 && angle < 90) {
+    dx = -((int)(256 / t));
+    dy = 1;
+    dr_prediction_z1(dst, stride, bs, above, left, dx, dy);
+  } else if (angle > 90 && angle < 180) {
+    t = -t;
+    dx = (int)(256 / t);
+    dy = (int)(256 * t);
+    dr_prediction_z2(dst, stride, bs, above, left, dx, dy);
+  } else if (angle > 180 && angle < 270) {
+    dx = 1;
+    dy = -((int)(256 * t));
+    dr_prediction_z3(dst, stride, bs, above, left, dx, dy);
+  } else if (angle == 90) {
+    v_predictor(dst, stride, bs, above, left);
+  } else if (angle == 180) {
+    h_predictor(dst, stride, bs, above, left);
+  }
+}
+
+static int filter_intra_taps_4[TX_SIZES][INTRA_MODES][4] = {
+    {
+        {735, 881, -537, -54},
+        {1005, 519, -488, -11},
+        {383, 990, -343, -6},
+        {442, 805, -542, 319},
+        {658, 616, -133, -116},
+        {875, 442, -141, -151},
+        {386, 741, -23, -80},
+        {390, 1027, -446, 51},
+        {679, 606, -523, 262},
+        {903, 922, -778, -23},
+    },
+    {
+        {648, 803, -444, 16},
+        {972, 620, -576, 7},
+        {561, 967, -499, -5},
+        {585, 762, -468, 144},
+        {596, 619, -182, -9},
+        {895, 459, -176, -153},
+        {557, 722, -126, -129},
+        {601, 839, -523, 105},
+        {562, 709, -499, 251},
+        {803, 872, -695, 43},
+    },
+    {
+        {423, 728, -347, 111},
+        {963, 685, -665, 23},
+        {281, 1024, -480, 216},
+        {640, 596, -437, 78},
+        {429, 669, -259, 99},
+        {740, 646, -415, 23},
+        {568, 771, -346, 40},
+        {404, 833, -486, 209},
+        {398, 712, -423, 307},
+        {939, 935, -887, 17},
+    },
+    {
+        {477, 737, -393, 150},
+        {881, 630, -546, 67},
+        {506, 984, -443, -20},
+        {114, 459, -270, 528},
+        {433, 528, 14, 3},
+        {837, 470, -301, -30},
+        {181, 777, 89, -107},
+        {-29, 716, -232, 259},
+        {589, 646, -495, 255},
+        {740, 884, -728, 77},
+    },
+};
+
+static void filter_intra_predictors_4tap(uint8_t *dst, ptrdiff_t stride, int bs,
+                                         const uint8_t *above,
+                                         const uint8_t *left,
+                                         int mode) {
+  int k, r, c;
+  int pred[33][65];
+  int mean, ipred;
+  const TX_SIZE tx_size = (bs == 32) ? TX_32X32 :
+      ((bs == 16) ? TX_16X16 : ((bs == 8) ? TX_8X8 : (TX_4X4)));
+  const int c0 = filter_intra_taps_4[tx_size][mode][0];
+  const int c1 = filter_intra_taps_4[tx_size][mode][1];
+  const int c2 = filter_intra_taps_4[tx_size][mode][2];
+  const int c3 = filter_intra_taps_4[tx_size][mode][3];
+
+  k = 0;
+  mean = 0;
+  while (k < bs) {
+    mean = mean + (int)left[k];
+    mean = mean + (int)above[k];
+    k++;
+  }
+  mean = (mean + bs) / (2 * bs);
+
+  for (r = 0; r < bs; ++r)
+    pred[r + 1][0] = (int)left[r] - mean;
+
+  for (c = 0; c < 2 * bs + 1; ++c)
+    pred[0][c] = (int)above[c - 1] - mean;
+
+  for (r = 1; r < bs + 1; ++r)
+    for (c = 1; c < 2 * bs + 1 - r; ++c) {
+      ipred = c0 * pred[r - 1][c] + c1 * pred[r][c - 1] +
+          c2 * pred[r - 1][c - 1] + c3 * pred[r - 1][c + 1];
+      pred[r][c] = ipred < 0 ?
+          -((-ipred + FILTER_INTRA_ROUND_VAL) >> FILTER_INTRA_PREC_BITS) :
+          ((ipred + FILTER_INTRA_ROUND_VAL) >> FILTER_INTRA_PREC_BITS);
+    }
+
+  for (r = 0; r < bs; ++r) {
+    for (c = 0; c < bs; ++c) {
+      ipred = pred[r + 1][c + 1] + mean;
+      dst[c] = clip_pixel(ipred);
+    }
+    dst += stride;
+  }
+}
+
+static void dc_filter_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
+                               const uint8_t *above, const uint8_t *left) {
+  filter_intra_predictors_4tap(dst, stride, bs, above, left, DC_PRED);
+}
+
+static void v_filter_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
+                               const uint8_t *above, const uint8_t *left) {
+  filter_intra_predictors_4tap(dst, stride, bs, above, left, V_PRED);
+}
+
+static void h_filter_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
+                               const uint8_t *above, const uint8_t *left) {
+  filter_intra_predictors_4tap(dst, stride, bs, above, left, H_PRED);
+}
+
+static void d45_filter_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
+                                 const uint8_t *above, const uint8_t *left) {
+  filter_intra_predictors_4tap(dst, stride, bs, above, left, D45_PRED);
+}
+
+static void d135_filter_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
+                                  const uint8_t *above, const uint8_t *left) {
+  filter_intra_predictors_4tap(dst, stride, bs, above, left, D135_PRED);
+}
+
+static void d117_filter_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
+                                  const uint8_t *above, const uint8_t *left) {
+  filter_intra_predictors_4tap(dst, stride, bs, above, left, D117_PRED);
+}
+
+static void d153_filter_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
+                                  const uint8_t *above, const uint8_t *left) {
+  filter_intra_predictors_4tap(dst, stride, bs, above, left, D153_PRED);
+}
+
+static void d207_filter_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
+                                  const uint8_t *above, const uint8_t *left) {
+  filter_intra_predictors_4tap(dst, stride, bs, above, left, D207_PRED);
+}
+
+static void d63_filter_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
+                                 const uint8_t *above, const uint8_t *left) {
+  filter_intra_predictors_4tap(dst, stride, bs, above, left, D63_PRED);
+}
+
+static void tm_filter_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
+                                const uint8_t *above, const uint8_t *left) {
+  filter_intra_predictors_4tap(dst, stride, bs, above, left, TM_PRED);
+}
+
+static void (*filter_intra_predictors[EXT_INTRA_MODES])(uint8_t *dst,
+    ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left) = {
+        dc_filter_predictor, v_filter_predictor, h_filter_predictor,
+        d45_filter_predictor, d135_filter_predictor, d117_filter_predictor,
+        d153_filter_predictor, d207_filter_predictor, d63_filter_predictor,
+        tm_filter_predictor,
+};
+
+#if CONFIG_VP9_HIGHBITDEPTH
+// Directional prediction, zone 1: 0 < angle < 90
+static void highbd_dr_prediction_z1(uint16_t *dst, ptrdiff_t stride, int bs,
+                                    const uint16_t *above, const uint16_t *left,
+                                    int dx, int dy, int bd) {
+  int r, c, x, y, base, shift, val;
+
+  (void)left;
+  (void)dy;
+  assert(dy == 1);
+  assert(dx < 0);
+
+  for (r = 0; r < bs; ++r) {
+    y = r + 1;
+    for (c = 0; c < bs; ++c) {
+      x = c * 256 - y * dx;
+      base = x >> 8;
+      shift = x - base * 256;
+      if (base < 2 * bs - 1) {
+        val =
+            (above[base] * (256 - shift) + above[base + 1] * shift + 128) >> 8;
+        dst[c] = clip_pixel_highbd(val, bd);
+      } else {
+        dst[c] = above[2 * bs - 1];
+      }
+    }
+    dst += stride;
+  }
+}
+
+// Directional prediction, zone 2: 90 < angle < 180
+static void highbd_dr_prediction_z2(uint16_t *dst, ptrdiff_t stride, int bs,
+                                    const uint16_t *above, const uint16_t *left,
+                                    int dx, int dy, int bd) {
+  int r, c, x, y, val1, val2, shift, val, base;
+
+  assert(dx > 0);
+  assert(dy > 0);
+
+  for (r = 0; r < bs; ++r) {
+    for (c = 0; c < bs; ++c) {
+      y = r + 1;
+      x = c * 256 - y * dx;
+      if (x >= -256) {
+        if (x <= 0) {
+          val1 = above[-1];
+          val2 = above[0];
+          shift = x + 256;
+        } else {
+          base = x >> 8;
+          val1 = above[base];
+          val2 = above[base + 1];
+          shift = x - base * 256;
+        }
+      } else {
+        x = c + 1;
+        y = r * 256 - x * dy;
+        base = y >> 8;
+        if (base >= 0) {
+          val1 = left[base];
+          val2 = left[base + 1];
+          shift = y - base * 256;
+        } else {
+          val1 = val2 = left[0];
+          shift = 0;
+        }
+      }
+      val = (val1 * (256 - shift) + val2 * shift + 128) >> 8;
+      dst[c] = clip_pixel_highbd(val, bd);
+    }
+    dst += stride;
+  }
+}
+
+// Directional prediction, zone 3: 180 < angle < 270
+static void highbd_dr_prediction_z3(uint16_t *dst, ptrdiff_t stride, int bs,
+                                    const uint16_t *above, const uint16_t *left,
+                                    int dx, int dy, int bd) {
+  int r, c, x, y, base, shift, val;
+
+  (void)above;
+  (void)dx;
+  assert(dx == 1);
+  assert(dy < 0);
+
+  for (r = 0; r < bs; ++r) {
+    for (c = 0; c < bs; ++c) {
+      x = c + 1;
+      y = r * 256 - x * dy;
+      base = y >> 8;
+      shift = y - base * 256;
+      if (base < bs - 1) {
+        val =
+            (left[base] * (256 - shift) + left[base + 1] * shift + 128) >> 8;
+        dst[c] = clip_pixel_highbd(val, bd);
+      } else {
+        dst[c] = left[bs - 1];
+      }
+    }
+    dst += stride;
+  }
+}
+
+static INLINE void highbd_v_predictor(uint16_t *dst, ptrdiff_t stride,
+                                      int bs, const uint16_t *above,
+                                      const uint16_t *left, int bd) {
+  int r;
+  (void) left;
+  (void) bd;
+  for (r = 0; r < bs; r++) {
+    memcpy(dst, above, bs * sizeof(uint16_t));
+    dst += stride;
+  }
+}
+
+static INLINE void highbd_h_predictor(uint16_t *dst, ptrdiff_t stride,
+                                      int bs, const uint16_t *above,
+                                      const uint16_t *left, int bd) {
+  int r;
+  (void) above;
+  (void) bd;
+  for (r = 0; r < bs; r++) {
+    vpx_memset16(dst, left[r], bs);
+    dst += stride;
+  }
+}
+
+static void highbd_dr_predictor(uint16_t *dst, ptrdiff_t stride, int bs,
+                                const uint16_t *above, const uint16_t *left,
+                                int angle, int bd) {
+  double t = 0;
+  int dx, dy;
+
+  if (angle != 90 && angle != 180)
+    t = tan(angle * PI / 180.0);
+  if (angle > 0 && angle < 90) {
+    dx = -((int)(256 / t));
+    dy = 1;
+    highbd_dr_prediction_z1(dst, stride, bs, above, left, dx, dy, bd);
+  } else if (angle > 90 && angle < 180) {
+    t = -t;
+    dx = (int)(256 / t);
+    dy = (int)(256 * t);
+    highbd_dr_prediction_z2(dst, stride, bs, above, left, dx, dy, bd);
+  } else if (angle > 180 && angle < 270) {
+    dx = 1;
+    dy = -((int)(256 * t));
+    highbd_dr_prediction_z3(dst, stride, bs, above, left, dx, dy, bd);
+  } else if (angle == 90) {
+    highbd_v_predictor(dst, stride, bs, above, left, bd);
+  } else if (angle == 180) {
+    highbd_h_predictor(dst, stride, bs, above, left, bd);
+  }
+}
+
+static void highbd_filter_intra_predictors_4tap(uint16_t *dst, ptrdiff_t stride,
+                                                int bs, const uint16_t *above,
+                                                const uint16_t *left, int mode,
+                                                int bd) {
+  int k, r, c;
+  int pred[33][65];
+  int mean, ipred;
+  const TX_SIZE tx_size = (bs == 32) ? TX_32X32 :
+      ((bs == 16) ? TX_16X16 : ((bs == 8) ? TX_8X8 : (TX_4X4)));
+  const int c0 = filter_intra_taps_4[tx_size][mode][0];
+  const int c1 = filter_intra_taps_4[tx_size][mode][1];
+  const int c2 = filter_intra_taps_4[tx_size][mode][2];
+  const int c3 = filter_intra_taps_4[tx_size][mode][3];
+
+  k = 0;
+  mean = 0;
+  while (k < bs) {
+    mean = mean + (int)left[k];
+    mean = mean + (int)above[k];
+    k++;
+  }
+  mean = (mean + bs) / (2 * bs);
+
+  for (r = 0; r < bs; ++r)
+    pred[r + 1][0] = (int)left[r] - mean;
+
+  for (c = 0; c < 2 * bs + 1; ++c)
+    pred[0][c] = (int)above[c - 1] - mean;
+
+  for (r = 1; r < bs + 1; ++r)
+    for (c = 1; c < 2 * bs + 1 - r; ++c) {
+      ipred = c0 * pred[r - 1][c] + c1 * pred[r][c - 1] +
+          c2 * pred[r - 1][c - 1] + c3 * pred[r - 1][c + 1];
+      pred[r][c] = ipred < 0 ?
+          -((-ipred + FILTER_INTRA_ROUND_VAL) >> FILTER_INTRA_PREC_BITS) :
+          ((ipred + FILTER_INTRA_ROUND_VAL) >> FILTER_INTRA_PREC_BITS);
+    }
+
+  for (r = 0; r < bs; ++r) {
+    for (c = 0; c < bs; ++c) {
+      ipred = pred[r + 1][c + 1] + mean;
+      dst[c] = clip_pixel_highbd(ipred, bd);
+    }
+    dst += stride;
+  }
+}
+
+static void highbd_dc_filter_predictor(uint16_t *dst, ptrdiff_t stride,
+                                       int bs, const uint16_t *above,
+                                       const uint16_t *left, int bd) {
+  highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, DC_PRED,
+                                      bd);
+}
+
+static void highbd_v_filter_predictor(uint16_t *dst, ptrdiff_t stride,
+                                      int bs, const uint16_t *above,
+                                      const uint16_t *left, int bd) {
+  highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, V_PRED,
+                                      bd);
+}
+
+static void highbd_h_filter_predictor(uint16_t *dst, ptrdiff_t stride,
+                                      int bs, const uint16_t *above,
+                                      const uint16_t *left, int bd) {
+  highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, H_PRED,
+                                      bd);
+}
+
+static void highbd_d45_filter_predictor(uint16_t *dst, ptrdiff_t stride,
+                                        int bs, const uint16_t *above,
+                                        const uint16_t *left, int bd) {
+  highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, D45_PRED,
+                                      bd);
+}
+
+static void highbd_d135_filter_predictor(uint16_t *dst, ptrdiff_t stride,
+                                         int bs, const uint16_t *above,
+                                         const uint16_t *left, int bd) {
+  highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, D135_PRED,
+                                      bd);
+}
+
+static void highbd_d117_filter_predictor(uint16_t *dst, ptrdiff_t stride,
+                                         int bs, const uint16_t *above,
+                                         const uint16_t *left, int bd) {
+  highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, D117_PRED,
+                                      bd);
+}
+
+static void highbd_d153_filter_predictor(uint16_t *dst, ptrdiff_t stride,
+                                         int bs, const uint16_t *above,
+                                         const uint16_t *left, int bd) {
+  highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, D153_PRED,
+                                      bd);
+}
+
+static void highbd_d207_filter_predictor(uint16_t *dst, ptrdiff_t stride,
+                                         int bs, const uint16_t *above,
+                                         const uint16_t *left, int bd) {
+  highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, D207_PRED,
+                                      bd);
+}
+
+static void highbd_d63_filter_predictor(uint16_t *dst, ptrdiff_t stride,
+                                        int bs, const uint16_t *above,
+                                        const uint16_t *left, int bd) {
+  highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, D63_PRED,
+                                      bd);
+}
+
+static void highbd_tm_filter_predictor(uint16_t *dst, ptrdiff_t stride,
+                                       int bs, const uint16_t *above,
+                                       const uint16_t *left, int bd) {
+  highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, TM_PRED,
+                                      bd);
+}
+
+static void (*highbd_filter_intra_predictors[EXT_INTRA_MODES])(uint16_t *dst,
+    ptrdiff_t stride, int bs, const uint16_t *above, const uint16_t *left,
+    int bd) = {
+        highbd_dc_filter_predictor, highbd_v_filter_predictor,
+        highbd_h_filter_predictor, highbd_d45_filter_predictor,
+        highbd_d135_filter_predictor, highbd_d117_filter_predictor,
+        highbd_d153_filter_predictor, highbd_d207_filter_predictor,
+        highbd_d63_filter_predictor, highbd_tm_filter_predictor,
+};
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+#endif  // CONFIG_EXT_INTRA
+
 #if CONFIG_VP9_HIGHBITDEPTH
 static void build_intra_predictors_high(const MACROBLOCKD *xd,
                                         const uint8_t *ref8,
@@ -298,6 +924,9 @@ static void build_intra_predictors_high(const MACROBLOCKD *xd,
   uint16_t *above_row = above_data + 16;
   const uint16_t *const_above_row = above_row;
   const int bs = 4 << tx_size;
+  int need_left = extend_modes[mode] & NEED_LEFT;
+  int need_above = extend_modes[mode] & NEED_ABOVE;
+  int need_aboveright = extend_modes[mode] & NEED_ABOVERIGHT;
 #if CONFIG_MISC_FIXES
   const uint16_t *above_ref = ref - ref_stride;
 #else
@@ -312,14 +941,65 @@ static void build_intra_predictors_high(const MACROBLOCKD *xd,
   // 129  E   F  ..  U   V
   // 129  G   H  ..  S   T   T   T   T   T
 
+#if CONFIG_EXT_INTRA
+  const EXT_INTRA_MODE_INFO *ext_intra_mode_info =
+      &xd->mi[0]->mbmi.ext_intra_mode_info;
+  const EXT_INTRA_MODE ext_intra_mode =
+      ext_intra_mode_info->ext_intra_mode[plane != 0];
+  const int angle =
+      prediction_angle_map(ext_intra_mode_info->ext_intra_angle[plane != 0]);
+
+  if (ext_intra_mode_info->use_ext_intra_mode[plane != 0]) {
+    EXT_INTRA_MODE ext_intra_mode =
+        ext_intra_mode_info->ext_intra_mode[plane != 0];
+    if (ext_intra_mode <= FILTER_TM_PRED) {
+      need_left = ext_intra_extend_modes[ext_intra_mode] & NEED_LEFT;
+      need_above = ext_intra_extend_modes[ext_intra_mode] & NEED_ABOVE;
+      need_aboveright =
+          ext_intra_extend_modes[ext_intra_mode] & NEED_ABOVERIGHT;
+    } else {
+      assert(angle > 0 && angle < 270);
+#if CONFIG_MISC_FIXES
+      if (angle <= 90)
+        need_above = 1, need_left = 0;
+      else if (angle < 180)
+        need_above = 1, need_left = 1;
+      else
+        need_above = 0, need_left = 1;
+#else
+      if (angle < 90)
+        need_above = 0, need_aboveright = 1, need_left = 0;
+      else if (angle == 90)
+        need_above = 1, need_aboveright = 0, need_left = 0;
+      else if (angle < 180)
+        need_above = 1, need_aboveright = 0, need_left = 1;
+      else
+        need_above = 0, need_aboveright = 0, need_left = 1;
+#endif  // CONFIG_MISC_FIXES
+    }
+  }
+#endif  // CONFIG_EXT_INTRA
+
 #if CONFIG_MISC_FIXES
   (void) x;
   (void) y;
   (void) plane;
 
   // NEED_LEFT
-  if (extend_modes[mode] & NEED_LEFT) {
+  if (need_left) {
+#if CONFIG_EXT_INTRA
+    int need_bottom;
+    if (ext_intra_mode_info->use_ext_intra_mode[plane != 0]) {
+      if (ext_intra_mode <= FILTER_TM_PRED)
+        need_bottom = 0;
+      else
+        need_bottom = angle > 180;
+    } else {
+      need_bottom = !!(extend_modes[mode] & NEED_BOTTOMLEFT);
+    }
+#else
     const int need_bottom = !!(extend_modes[mode] & NEED_BOTTOMLEFT);
+#endif  // CONFIG_EXT_INTRA
     i = 0;
     if (n_left_px > 0) {
       for (; i < n_left_px; i++)
@@ -337,8 +1017,20 @@ static void build_intra_predictors_high(const MACROBLOCKD *xd,
   }
 
   // NEED_ABOVE
-  if (extend_modes[mode] & NEED_ABOVE) {
+  if (need_above) {
+#if CONFIG_EXT_INTRA
+    int need_right;
+    if (ext_intra_mode_info->use_ext_intra_mode[plane != 0]) {
+      if (ext_intra_mode <= FILTER_TM_PRED)
+        need_right = 1;
+      else
+        need_right = angle < 90;
+    } else {
+      need_right = !!(extend_modes[mode] & NEED_ABOVERIGHT);
+    }
+#else
     const int need_right = !!(extend_modes[mode] & NEED_ABOVERIGHT);
+#endif  // CONFIG_EXT_INTRA
     if (n_top_px > 0) {
       memcpy(above_row, above_ref, n_top_px * 2);
       i = n_top_px;
@@ -354,10 +1046,19 @@ static void build_intra_predictors_high(const MACROBLOCKD *xd,
     }
   }
 
-  if (extend_modes[mode] & NEED_ABOVELEFT) {
+  (void)need_aboveright;
+#if CONFIG_EXT_INTRA
+  if (ext_intra_mode_info->use_ext_intra_mode[plane != 0] ||
+      (extend_modes[mode] & NEED_ABOVELEFT)) {
+    above_row[-1] = n_top_px > 0 ?
+        (n_left_px > 0 ? above_ref[-1] : base + 1) : base - 1;
+  }
+#else
+  if ((extend_modes[mode] & NEED_ABOVELEFT)) {
     above_row[-1] = n_top_px > 0 ?
         (n_left_px > 0 ? above_ref[-1] : base + 1) : base - 1;
   }
+#endif  // CONFIG_EXT_INTRA
 #else
   // Get current frame pointer, width and height.
   if (plane == 0) {
@@ -372,82 +1073,126 @@ static void build_intra_predictors_high(const MACROBLOCKD *xd,
   x0 = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)) + x;
   y0 = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)) + y;
 
-  // left
-  if (left_available) {
-    if (xd->mb_to_bottom_edge < 0) {
-      /* slower path if the block needs border extension */
-      if (y0 + bs <= frame_height) {
-        for (i = 0; i < bs; ++i)
-          left_col[i] = ref[i * ref_stride - 1];
+  // NEED_LEFT
+  if (need_left) {
+    if (left_available) {
+      if (xd->mb_to_bottom_edge < 0) {
+        /* slower path if the block needs border extension */
+        if (y0 + bs <= frame_height) {
+          for (i = 0; i < bs; ++i)
+            left_col[i] = ref[i * ref_stride - 1];
+        } else {
+          const int extend_bottom = frame_height - y0;
+          for (i = 0; i < extend_bottom; ++i)
+            left_col[i] = ref[i * ref_stride - 1];
+          for (; i < bs; ++i)
+            left_col[i] = ref[(extend_bottom - 1) * ref_stride - 1];
+        }
       } else {
-        const int extend_bottom = frame_height - y0;
-        for (i = 0; i < extend_bottom; ++i)
+        /* faster path if the block does not need extension */
+        for (i = 0; i < bs; ++i)
           left_col[i] = ref[i * ref_stride - 1];
-        for (; i < bs; ++i)
-          left_col[i] = ref[(extend_bottom - 1) * ref_stride - 1];
       }
     } else {
-      /* faster path if the block does not need extension */
-      for (i = 0; i < bs; ++i)
-        left_col[i] = ref[i * ref_stride - 1];
+      // TODO(Peter): this value should probably change for high bitdepth
+      vpx_memset16(left_col, base + 1, bs);
     }
-  } else {
-    // TODO(Peter): this value should probably change for high bitdepth
-    vpx_memset16(left_col, base + 1, bs);
-  }
-
-  // TODO(hkuang) do not extend 2*bs pixels for all modes.
-  // above
-  if (up_available) {
-    const uint16_t *above_ref = ref - ref_stride;
-    if (xd->mb_to_right_edge < 0) {
-      /* slower path if the block needs border extension */
-      if (x0 + 2 * bs <= frame_width) {
-        if (right_available && bs == 4) {
-          memcpy(above_row, above_ref, 2 * bs * sizeof(above_row[0]));
+  }
+
+  // NEED_ABOVE
+  if (need_above) {
+    if (up_available) {
+      const uint16_t *above_ref = ref - ref_stride;
+      if (xd->mb_to_right_edge < 0) {
+        /* slower path if the block needs border extension */
+        if (x0 + bs <= frame_width) {
+          memcpy(above_row, above_ref, bs * sizeof(above_row[0]));
+        } else if (x0 <= frame_width) {
+          const int r = frame_width - x0;
+          memcpy(above_row, above_ref, r * sizeof(above_row[0]));
+          vpx_memset16(above_row + r, above_row[r - 1], x0 + bs - frame_width);
+        }
+      } else {
+        /* faster path if the block does not need extension */
+        if (bs == 4 && right_available && left_available) {
+          const_above_row = above_ref;
         } else {
           memcpy(above_row, above_ref, bs * sizeof(above_row[0]));
-          vpx_memset16(above_row + bs, above_row[bs - 1], bs);
         }
-      } else if (x0 + bs <= frame_width) {
-        const int r = frame_width - x0;
-        if (right_available && bs == 4) {
+      }
+      above_row[-1] = left_available ? above_ref[-1] : (base + 1);
+    } else {
+      vpx_memset16(above_row, base - 1, bs);
+      above_row[-1] = base - 1;
+    }
+  }
+
+  // NEED_ABOVERIGHT
+  if (need_aboveright) {
+    if (up_available) {
+      const uint16_t *above_ref = ref - ref_stride;
+      if (xd->mb_to_right_edge < 0) {
+        /* slower path if the block needs border extension */
+        if (x0 + 2 * bs <= frame_width) {
+          if (right_available && bs == 4) {
+            memcpy(above_row, above_ref, 2 * bs * sizeof(above_row[0]));
+          } else {
+            memcpy(above_row, above_ref, bs * sizeof(above_row[0]));
+            vpx_memset16(above_row + bs, above_row[bs - 1], bs);
+          }
+        } else if (x0 + bs <= frame_width) {
+          const int r = frame_width - x0;
+          if (right_available && bs == 4) {
+            memcpy(above_row, above_ref, r * sizeof(above_row[0]));
+            vpx_memset16(above_row + r, above_row[r - 1],
+                         x0 + 2 * bs - frame_width);
+          } else {
+            memcpy(above_row, above_ref, bs * sizeof(above_row[0]));
+            vpx_memset16(above_row + bs, above_row[bs - 1], bs);
+          }
+        } else if (x0 <= frame_width) {
+          const int r = frame_width - x0;
           memcpy(above_row, above_ref, r * sizeof(above_row[0]));
           vpx_memset16(above_row + r, above_row[r - 1],
                        x0 + 2 * bs - frame_width);
+        }
+        // TODO(Peter) this value should probably change for high bitdepth
+        above_row[-1] = left_available ? above_ref[-1] : (base + 1);
+      } else {
+        /* faster path if the block does not need extension */
+        if (bs == 4 && right_available && left_available) {
+          const_above_row = above_ref;
         } else {
           memcpy(above_row, above_ref, bs * sizeof(above_row[0]));
-          vpx_memset16(above_row + bs, above_row[bs - 1], bs);
+          if (bs == 4 && right_available)
+            memcpy(above_row + bs, above_ref + bs, bs * sizeof(above_row[0]));
+          else
+            vpx_memset16(above_row + bs, above_row[bs - 1], bs);
+          // TODO(Peter): this value should probably change for high bitdepth
+          above_row[-1] = left_available ? above_ref[-1] : (base + 1);
         }
-      } else if (x0 <= frame_width) {
-        const int r = frame_width - x0;
-        memcpy(above_row, above_ref, r * sizeof(above_row[0]));
-        vpx_memset16(above_row + r, above_row[r - 1],
-                       x0 + 2 * bs - frame_width);
       }
-      // TODO(Peter) this value should probably change for high bitdepth
-      above_row[-1] = left_available ? above_ref[-1] : (base+1);
     } else {
-      /* faster path if the block does not need extension */
-      if (bs == 4 && right_available && left_available) {
-        const_above_row = above_ref;
-      } else {
-        memcpy(above_row, above_ref, bs * sizeof(above_row[0]));
-        if (bs == 4 && right_available)
-          memcpy(above_row + bs, above_ref + bs, bs * sizeof(above_row[0]));
-        else
-          vpx_memset16(above_row + bs, above_row[bs - 1], bs);
-        // TODO(Peter): this value should probably change for high bitdepth
-        above_row[-1] = left_available ? above_ref[-1] : (base+1);
-      }
+      vpx_memset16(above_row, base - 1, bs * 2);
+      // TODO(Peter): this value should probably change for high bitdepth
+      above_row[-1] = base - 1;
     }
-  } else {
-    vpx_memset16(above_row, base - 1, bs * 2);
-    // TODO(Peter): this value should probably change for high bitdepth
-    above_row[-1] = base - 1;
   }
 #endif
 
+#if CONFIG_EXT_INTRA
+  if (ext_intra_mode_info->use_ext_intra_mode[plane != 0]) {
+    if (ext_intra_mode <= FILTER_TM_PRED)
+      highbd_filter_intra_predictors[ext_intra_mode](dst, dst_stride, bs,
+                                                     const_above_row, left_col,
+                                                     bd);
+    else
+      highbd_dr_predictor(dst, dst_stride, bs, const_above_row, left_col,
+                          angle, bd);
+    return;
+  }
+#endif  // CONFIG_EXT_INTRA
+
   // predict
   if (mode == DC_PRED) {
 #if CONFIG_MISC_FIXES
@@ -491,6 +1236,47 @@ static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref,
   uint8_t *above_row = above_data + 16;
   const uint8_t *const_above_row = above_row;
   const int bs = 4 << tx_size;
+  int need_left = extend_modes[mode] & NEED_LEFT;
+  int need_above = extend_modes[mode] & NEED_ABOVE;
+  int need_aboveright = extend_modes[mode] & NEED_ABOVERIGHT;
+#if CONFIG_EXT_INTRA
+  const EXT_INTRA_MODE_INFO *ext_intra_mode_info =
+      &xd->mi[0]->mbmi.ext_intra_mode_info;
+  const EXT_INTRA_MODE ext_intra_mode =
+      ext_intra_mode_info->ext_intra_mode[plane != 0];
+  const int angle =
+      prediction_angle_map(ext_intra_mode_info->ext_intra_angle[plane != 0]);
+
+  if (ext_intra_mode_info->use_ext_intra_mode[plane != 0]) {
+    EXT_INTRA_MODE ext_intra_mode =
+        ext_intra_mode_info->ext_intra_mode[plane != 0];
+    if (ext_intra_mode <= FILTER_TM_PRED) {
+      need_left = ext_intra_extend_modes[ext_intra_mode] & NEED_LEFT;
+      need_above = ext_intra_extend_modes[ext_intra_mode] & NEED_ABOVE;
+      need_aboveright =
+          ext_intra_extend_modes[ext_intra_mode] & NEED_ABOVERIGHT;
+    } else {
+      assert(angle > 0 && angle < 270);
+#if CONFIG_MISC_FIXES
+      if (angle <= 90)
+        need_above = 1, need_left = 0;
+      else if (angle < 180)
+        need_above = 1, need_left = 1;
+      else
+        need_above = 0, need_left = 1;
+#else
+      if (angle < 90)
+        need_above = 0, need_aboveright = 1, need_left = 0;
+      else if (angle == 90)
+        need_above = 1, need_aboveright = 0, need_left = 0;
+      else if (angle < 180)
+        need_above = 1, need_aboveright = 0, need_left = 1;
+      else
+        need_above = 0, need_aboveright = 0, need_left = 1;
+#endif  // CONFIG_MISC_FIXES
+    }
+  }
+#endif  // CONFIG_EXT_INTRA
 
   // 127 127 127 .. 127 127 127 127 127 127
   // 129  A   B  ..  Y   Z
@@ -524,9 +1310,21 @@ static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref,
 #endif
 
   // NEED_LEFT
-  if (extend_modes[mode] & NEED_LEFT) {
+  if (need_left) {
 #if CONFIG_MISC_FIXES
+#if CONFIG_EXT_INTRA
+    int need_bottom;
+    if (ext_intra_mode_info->use_ext_intra_mode[plane != 0]) {
+      if (ext_intra_mode <= FILTER_TM_PRED)
+        need_bottom = 0;
+      else
+        need_bottom = angle > 180;
+    } else {
+      need_bottom = !!(extend_modes[mode] & NEED_BOTTOMLEFT);
+    }
+#else
     const int need_bottom = !!(extend_modes[mode] & NEED_BOTTOMLEFT);
+#endif  // CONFIG_EXT_INTRA
     i = 0;
     if (n_left_px > 0) {
       for (; i < n_left_px; i++)
@@ -567,9 +1365,21 @@ static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref,
   }
 
   // NEED_ABOVE
-  if (extend_modes[mode] & NEED_ABOVE) {
+  if (need_above) {
 #if CONFIG_MISC_FIXES
+#if CONFIG_EXT_INTRA
+    int need_right;
+    if (ext_intra_mode_info->use_ext_intra_mode[plane != 0]) {
+      if (ext_intra_mode <= FILTER_TM_PRED)
+        need_right = 1;
+      else
+        need_right = angle < 90;
+    } else {
+      need_right = !!(extend_modes[mode] & NEED_ABOVERIGHT);
+    }
+#else
     const int need_right = !!(extend_modes[mode] & NEED_ABOVERIGHT);
+#endif  // CONFIG_EXT_INTRA
     if (n_top_px > 0) {
       memcpy(above_row, above_ref, n_top_px);
       i = n_top_px;
@@ -612,12 +1422,20 @@ static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref,
   }
 
 #if CONFIG_MISC_FIXES
-  if (extend_modes[mode] & NEED_ABOVELEFT) {
+  (void)need_aboveright;
+#if CONFIG_EXT_INTRA
+  if (ext_intra_mode_info->use_ext_intra_mode[plane != 0] ||
+      (extend_modes[mode] & NEED_ABOVELEFT)) {
+    above_row[-1] = n_top_px > 0 ? (n_left_px > 0 ? above_ref[-1] : 129) : 127;
+  }
+#else
+  if ((extend_modes[mode] & NEED_ABOVELEFT)) {
     above_row[-1] = n_top_px > 0 ? (n_left_px > 0 ? above_ref[-1] : 129) : 127;
   }
+#endif  // CONFIG_EXT_INTRA
 #else
   // NEED_ABOVERIGHT
-  if (extend_modes[mode] & NEED_ABOVERIGHT) {
+  if (need_aboveright) {
     if (up_available) {
       const uint8_t *above_ref = ref - ref_stride;
       if (xd->mb_to_right_edge < 0) {
@@ -663,6 +1481,17 @@ static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref,
   }
 #endif
 
+#if CONFIG_EXT_INTRA
+  if (ext_intra_mode_info->use_ext_intra_mode[plane != 0]) {
+    if (ext_intra_mode <= FILTER_TM_PRED)
+      filter_intra_predictors[ext_intra_mode](dst, dst_stride, bs,
+                                              const_above_row, left_col);
+    else
+      dr_predictor(dst, dst_stride, bs, const_above_row, left_col, angle);
+    return;
+  }
+#endif  // CONFIG_EXT_INTRA
+
   // predict
   if (mode == DC_PRED) {
 #if CONFIG_MISC_FIXES
index 6395e9615e985f333073e6d0909ffc049f435506..58a7a973e7e9e393008cf0f7154527153d582f10 100644 (file)
@@ -472,4 +472,10 @@ void vp10_accumulate_frame_counts(VP10_COMMON *cm, FRAME_COUNTS *counts,
     cm->counts.seg.tree_mispred[i] += counts->seg.tree_mispred[i];
   }
 #endif
+
+#if CONFIG_EXT_INTRA
+  for (i = 0; i < PLANE_TYPES; ++i)
+    for (j = 0; j < 2; j++)
+      cm->counts.ext_intra[i][j] += counts->ext_intra[i][j];
+#endif  // CONFIG_EXT_INTRA
 }
index 4bbf76fe234187d6a24666d5a76951097112ff87..a96c512213e7b35e2f1de014f2b834e27982fdaf 100644 (file)
@@ -340,6 +340,47 @@ static void read_palette_mode_info(VP10_COMMON *const cm,
   }
 }
 
+#if CONFIG_EXT_INTRA
+static void read_ext_intra_mode_info(VP10_COMMON *const cm,
+                                     MACROBLOCKD *const xd, vpx_reader *r) {
+  MODE_INFO *const mi = xd->mi[0];
+  MB_MODE_INFO *const mbmi = &mi->mbmi;
+  FRAME_COUNTS *counts = xd->counts;
+  if (mbmi->mode == DC_PRED) {
+    mbmi->ext_intra_mode_info.use_ext_intra_mode[0] =
+        vpx_read(r, cm->fc->ext_intra_probs[0]);
+    if (mbmi->ext_intra_mode_info.use_ext_intra_mode[0]) {
+      if (DR_ONLY ? 1 : vpx_read(r, DR_EXT_INTRA_PROB)) {
+        mbmi->ext_intra_mode_info.ext_intra_mode[0] = EXT_DR_PRED;
+        mbmi->ext_intra_mode_info.ext_intra_angle[0] =
+            read_uniform(r, EXT_INTRA_ANGLES);
+      } else {
+        mbmi->ext_intra_mode_info.ext_intra_mode[0] =
+            read_uniform(r, FILTER_INTRA_MODES);
+      }
+    }
+    if (counts)
+      ++counts->ext_intra[0][mbmi->ext_intra_mode_info.use_ext_intra_mode[0]];
+  }
+  if (mbmi->uv_mode == DC_PRED) {
+    mbmi->ext_intra_mode_info.use_ext_intra_mode[1] =
+        vpx_read(r, cm->fc->ext_intra_probs[1]);
+    if (mbmi->ext_intra_mode_info.use_ext_intra_mode[1]) {
+      if (DR_ONLY ? 1 : vpx_read(r, DR_EXT_INTRA_PROB)) {
+        mbmi->ext_intra_mode_info.ext_intra_mode[1] = EXT_DR_PRED;
+        mbmi->ext_intra_mode_info.ext_intra_angle[1] =
+            read_uniform(r, EXT_INTRA_ANGLES);
+      } else {
+        mbmi->ext_intra_mode_info.ext_intra_mode[1] =
+            read_uniform(r, FILTER_INTRA_MODES);
+      }
+    }
+    if (counts)
+      ++counts->ext_intra[1][mbmi->ext_intra_mode_info.use_ext_intra_mode[1]];
+  }
+}
+#endif  // CONFIG_EXT_INTRA
+
 static void read_intra_frame_mode_info(VP10_COMMON *const cm,
                                        MACROBLOCKD *const xd,
                                        int mi_row, int mi_col, vpx_reader *r) {
@@ -413,6 +454,13 @@ static void read_intra_frame_mode_info(VP10_COMMON *const cm,
       mbmi->tx_type = DCT_DCT;
     }
 #endif  // CONFIG_EXT_TX
+
+#if CONFIG_EXT_INTRA
+    mbmi->ext_intra_mode_info.use_ext_intra_mode[0] = 0;
+    mbmi->ext_intra_mode_info.use_ext_intra_mode[1] = 0;
+    if (bsize >= BLOCK_8X8)
+      read_ext_intra_mode_info(cm, xd, r);
+#endif  // CONFIG_EXT_INTRA
 }
 
 static int read_mv_component(vpx_reader *r,
@@ -576,9 +624,14 @@ static void read_intra_block_mode_info(VP10_COMMON *const cm,
   }
 
   mbmi->uv_mode = read_intra_mode_uv(cm, xd, r, mbmi->mode);
-
   mbmi->palette_mode_info.palette_size[0] = 0;
   mbmi->palette_mode_info.palette_size[1] = 0;
+#if CONFIG_EXT_INTRA
+  mbmi->ext_intra_mode_info.use_ext_intra_mode[0] = 0;
+  mbmi->ext_intra_mode_info.use_ext_intra_mode[1] = 0;
+  if (bsize >= BLOCK_8X8)
+    read_ext_intra_mode_info(cm, xd, r);
+#endif  // CONFIG_EXT_INTRA
 }
 
 static INLINE int is_mv_valid(const MV *mv) {
index a24738188d44ac67d67b67b775122de7ced3f4c6..69e92f9a2c363d8740f4309610b2e76a990dd02d 100644 (file)
@@ -511,6 +511,43 @@ static void write_ref_frames(const VP10_COMMON *cm, const MACROBLOCKD *xd,
   }
 }
 
+#if CONFIG_EXT_INTRA
+static void write_ext_intra_mode_info(const VP10_COMMON *const cm,
+                                      const MB_MODE_INFO *const mbmi,
+                                      vpx_writer *w) {
+  if (mbmi->mode == DC_PRED) {
+    vpx_write(w, mbmi->ext_intra_mode_info.use_ext_intra_mode[0],
+              cm->fc->ext_intra_probs[0]);
+    if (mbmi->ext_intra_mode_info.use_ext_intra_mode[0]) {
+      EXT_INTRA_MODE mode = mbmi->ext_intra_mode_info.ext_intra_mode[0];
+      int dr_mode = mode > FILTER_TM_PRED;
+      if (!DR_ONLY)
+        vpx_write(w, dr_mode, DR_EXT_INTRA_PROB);
+      if (dr_mode)
+        write_uniform(w, EXT_INTRA_ANGLES,
+                      mbmi->ext_intra_mode_info.ext_intra_angle[0]);
+      else
+        write_uniform(w, FILTER_INTRA_MODES, mode);
+    }
+  }
+  if (mbmi->uv_mode == DC_PRED) {
+    vpx_write(w, mbmi->ext_intra_mode_info.use_ext_intra_mode[1],
+              cm->fc->ext_intra_probs[1]);
+    if (mbmi->ext_intra_mode_info.use_ext_intra_mode[1]) {
+      EXT_INTRA_MODE mode = mbmi->ext_intra_mode_info.ext_intra_mode[1];
+      int dr_mode = mode > FILTER_TM_PRED;
+      if (!DR_ONLY)
+        vpx_write(w, dr_mode, DR_EXT_INTRA_PROB);
+      if (dr_mode)
+        write_uniform(w, EXT_INTRA_ANGLES,
+                      mbmi->ext_intra_mode_info.ext_intra_angle[1]);
+      else
+        write_uniform(w, FILTER_INTRA_MODES, mode);
+    }
+  }
+}
+#endif  // CONFIG_EXT_INTRA
+
 static void pack_inter_mode_mvs(VP10_COMP *cpi, const MODE_INFO *mi,
                                 vpx_writer *w) {
   VP10_COMMON *const cm = &cpi->common;
@@ -592,6 +629,10 @@ static void pack_inter_mode_mvs(VP10_COMP *cpi, const MODE_INFO *mi,
       }
     }
     write_intra_mode(w, mbmi->uv_mode, cm->fc->uv_mode_prob[mode]);
+#if CONFIG_EXT_INTRA
+    if (bsize >= BLOCK_8X8)
+      write_ext_intra_mode_info(cm, mbmi, w);
+#endif  // CONFIG_EXT_INTRA
   } else {
     const int mode_ctx = mbmi_ext->mode_context[mbmi->ref_frame[0]];
     const vpx_prob *const inter_probs = cm->fc->inter_mode_probs[mode_ctx];
@@ -750,6 +791,11 @@ static void write_mb_modes_kf(const VP10_COMMON *cm, const MACROBLOCKD *xd,
           &ext_tx_intra_encodings[eset][mbmi->tx_type]);
   }
 #endif  // CONFIG_EXT_TX
+
+#if CONFIG_EXT_INTRA
+  if (bsize >= BLOCK_8X8)
+      write_ext_intra_mode_info(cm, mbmi, w);
+#endif  // CONFIG_EXT_INTRA
 }
 
 static void write_modes_b(VP10_COMP *cpi, const TileInfo *const tile,
@@ -1780,7 +1826,7 @@ static size_t write_compressed_header(VP10_COMP *cpi, uint8_t *data) {
                         &counts->mv);
 #if CONFIG_EXT_TX
     update_ext_tx_probs(cm, &header_bc);
-#endif
+#endif  // CONFIG_EXT_TX
   }
 
   vpx_stop_encode(&header_bc);
index c3a85ef24c9775b1c37087b89fba71db426138c1..95cddcc3048c237bb8baae3a23246be27906b236 100644 (file)
@@ -3312,6 +3312,16 @@ static void encode_superblock(VP10_COMP *cpi, ThreadData *td,
       }
     }
 #endif  // CONFIG_EXT_TX
+#if CONFIG_EXT_INTRA
+    if (bsize >= BLOCK_8X8 && !is_inter_block(mbmi)) {
+      if (mbmi->mode == DC_PRED)
+        ++td->counts->ext_intra[0]
+                              [mbmi->ext_intra_mode_info.use_ext_intra_mode[0]];
+      if (mbmi->uv_mode == DC_PRED)
+        ++td->counts->ext_intra[1]
+                              [mbmi->ext_intra_mode_info.use_ext_intra_mode[1]];
+    }
+#endif  // CONFIG_EXT_INTRA
   }
 
 #if CONFIG_VAR_TX
index f398c405762fbba6e0f80d6035a6209272986caa..772585052d4544fad9057b65eaccd93e6e5786f8 100644 (file)
@@ -1447,6 +1447,10 @@ static int64_t rd_pick_intra_sub_8x8_y_mode(VP10_COMP *cpi, MACROBLOCK *mb,
   memcpy(t_above, xd->plane[0].above_context, sizeof(t_above));
   memcpy(t_left, xd->plane[0].left_context, sizeof(t_left));
 
+#if CONFIG_EXT_INTRA
+  mic->mbmi.ext_intra_mode_info.use_ext_intra_mode[0] = 0;
+#endif  // CONFIG_EXT_INTRA
+
   // Pick modes for each sub-block (of size 4x4, 4x8, or 8x4) in an 8x8 block.
   for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
     for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
@@ -1491,6 +1495,172 @@ static int64_t rd_pick_intra_sub_8x8_y_mode(VP10_COMP *cpi, MACROBLOCK *mb,
   return RDCOST(mb->rdmult, mb->rddiv, cost, total_distortion);
 }
 
+#if CONFIG_EXT_INTRA
+// Return 1 if an ext intra mode is selected; return 0 otherwise.
+static int rd_pick_ext_intra_sby(VP10_COMP *cpi, MACROBLOCK *x,
+                                 int *rate, int *rate_tokenonly,
+                                 int64_t *distortion, int *skippable,
+                                 BLOCK_SIZE bsize, int mode_cost,
+                                 int64_t *best_rd) {
+  MACROBLOCKD *const xd = &x->e_mbd;
+  MODE_INFO *const mic = xd->mi[0];
+  MB_MODE_INFO *mbmi = &mic->mbmi;
+  int this_rate, this_rate_tokenonly, s;
+  int ext_intra_selected_flag = 0;
+  int i, step, delta, angle, best_angle, best_angle_dir;
+  int deltas[3] = {25, 5, 1};
+  int branches[3] = {2, 2, 2};
+  int64_t this_distortion, this_rd, best_angle_rd = INT64_MAX;
+  EXT_INTRA_MODE mode;
+  TX_SIZE best_tx_size = TX_4X4;
+  EXT_INTRA_MODE_INFO ext_intra_mode_info;
+#if CONFIG_EXT_TX
+  TX_TYPE best_tx_type;
+#endif  // CONFIG_EXT_TX
+
+  vp10_zero(ext_intra_mode_info);
+  mbmi->ext_intra_mode_info.use_ext_intra_mode[0] = 1;
+  mbmi->mode = DC_PRED;
+
+  if (!DR_ONLY) {
+    for (mode = 0; mode < FILTER_INTRA_MODES; ++mode) {
+      mbmi->ext_intra_mode_info.ext_intra_mode[0] = mode;
+      super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion,
+                      &s, NULL, bsize, *best_rd);
+      if (this_rate_tokenonly == INT_MAX)
+        continue;
+
+      this_rate = this_rate_tokenonly +
+          vp10_cost_bit(cpi->common.fc->ext_intra_probs[0], 1) +
+          vp10_cost_bit(DR_EXT_INTRA_PROB, 0) +
+          write_uniform_cost(FILTER_INTRA_MODES, mode) + mode_cost;
+      this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
+
+      if (this_rd < *best_rd) {
+        *best_rd            = this_rd;
+        best_tx_size        = mic->mbmi.tx_size;
+        ext_intra_mode_info = mbmi->ext_intra_mode_info;
+#if CONFIG_EXT_TX
+        best_tx_type        = mic->mbmi.tx_type;
+#endif  // CONFIG_EXT_TX
+        *rate               = this_rate;
+        *rate_tokenonly     = this_rate_tokenonly;
+        *distortion         = this_distortion;
+        *skippable          = s;
+        ext_intra_selected_flag = 1;
+      }
+    }
+  }
+
+  mbmi->ext_intra_mode_info.ext_intra_mode[0] = EXT_DR_PRED;
+  if (ANGLE_FAST_SEARCH) {
+    best_angle = EXT_INTRA_ANGLES / 2;
+    for (step = 0; step < 3; ++step) {
+      delta = deltas[step];
+      for (i = -branches[step]; i <= branches[step]; ++i) {
+        int64_t rd_thresh;
+        if (i == 0 && step != 0)
+          continue;
+        angle = best_angle + i * delta;
+        if (angle < 0)
+          angle = 0;
+        if (angle >= EXT_INTRA_ANGLES)
+          angle = EXT_INTRA_ANGLES - 1;
+        if (angle == best_angle && step != 0)
+          continue;
+        mbmi->ext_intra_mode_info.ext_intra_angle[0] = angle;
+        if (*best_rd == INT64_MAX)
+          rd_thresh = best_angle_rd;
+        else
+          rd_thresh = VPXMIN(best_angle_rd, *best_rd * RD_ADJUSTER);
+        super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion,
+                        &s, NULL, bsize, rd_thresh);
+        if (this_rate_tokenonly == INT_MAX)
+          continue;
+        this_rate = this_rate_tokenonly +
+            vp10_cost_bit(cpi->common.fc->ext_intra_probs[0], 1) +
+            (DR_ONLY ? 0: vp10_cost_bit(DR_EXT_INTRA_PROB, 1)) +
+            write_uniform_cost(EXT_INTRA_ANGLES, angle) + mode_cost;
+        this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
+        if (this_rd < *best_rd) {
+          *best_rd            = this_rd;
+          best_tx_size        = mic->mbmi.tx_size;
+          ext_intra_mode_info = mbmi->ext_intra_mode_info;
+#if CONFIG_EXT_TX
+          best_tx_type        = mic->mbmi.tx_type;
+#endif  // CONFIG_EXT_TX
+          *rate               = this_rate;
+          *rate_tokenonly     = this_rate_tokenonly;
+          *distortion         = this_distortion;
+          *skippable          = s;
+          ext_intra_selected_flag = 1;
+        }
+        if (this_rd < best_angle_rd) {
+          best_angle_rd = this_rd;
+          best_angle_dir = i;
+        }
+      }
+
+      best_angle += best_angle_dir * delta;
+      if (best_angle < 0)
+        best_angle = 0;
+      if (best_angle >= EXT_INTRA_ANGLES)
+        best_angle = EXT_INTRA_ANGLES - 1;
+      if (*best_rd < best_angle_rd / RD_ADJUSTER)
+        break;
+    }
+  } else {
+    for (angle = 0; angle < EXT_INTRA_ANGLES; ++angle) {
+      mbmi->ext_intra_mode_info.ext_intra_angle[0] = angle;
+      if (prediction_angle_map(angle) == 90 ||
+          prediction_angle_map(angle) == 180)
+        continue;
+      super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion,
+                      &s, NULL, bsize, *best_rd);
+      if (this_rate_tokenonly == INT_MAX)
+        continue;
+
+      this_rate = this_rate_tokenonly +
+          vp10_cost_bit(cpi->common.fc->ext_intra_probs[0], 1) +
+          (DR_ONLY ? 0: vp10_cost_bit(DR_EXT_INTRA_PROB, 1)) +
+          write_uniform_cost(EXT_INTRA_ANGLES, angle) + mode_cost;
+      this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
+
+      if (this_rd < *best_rd) {
+        *best_rd            = this_rd;
+        best_tx_size        = mic->mbmi.tx_size;
+        ext_intra_mode_info = mbmi->ext_intra_mode_info;
+#if CONFIG_EXT_TX
+        best_tx_type        = mic->mbmi.tx_type;
+#endif  // CONFIG_EXT_TX
+        *rate               = this_rate;
+        *rate_tokenonly     = this_rate_tokenonly;
+        *distortion         = this_distortion;
+        *skippable          = s;
+        ext_intra_selected_flag = 1;
+      }
+    }
+  }
+
+  if (ext_intra_selected_flag) {
+    mbmi->mode = DC_PRED;
+    mbmi->tx_size = best_tx_size;
+    mbmi->ext_intra_mode_info.use_ext_intra_mode[0] =
+        ext_intra_mode_info.use_ext_intra_mode[0];
+    mbmi->ext_intra_mode_info.ext_intra_mode[0] =
+        ext_intra_mode_info.ext_intra_mode[0];
+    mbmi->ext_intra_mode_info.ext_intra_angle[0] =
+        ext_intra_mode_info.ext_intra_angle[0];
+#if CONFIG_EXT_TX
+    mbmi->tx_type = best_tx_type;
+#endif  // CONFIG_EXT_TX
+    return 1;
+  } else {
+    return 0;
+  }
+}
+#endif  // CONFIG_EXT_INTRA
+
 // This function is used only for intra_only frames
 static int64_t rd_pick_intra_sby_mode(VP10_COMP *cpi, MACROBLOCK *x,
                                       int *rate, int *rate_tokenonly,
@@ -1504,6 +1674,9 @@ static int64_t rd_pick_intra_sby_mode(VP10_COMP *cpi, MACROBLOCK *x,
   int this_rate, this_rate_tokenonly, s;
   int64_t this_distortion, this_rd;
   TX_SIZE best_tx = TX_4X4;
+#if CONFIG_EXT_INTRA
+  EXT_INTRA_MODE_INFO ext_intra_mode_info;
+#endif  // CONFIG_EXT_INTRA
 #if CONFIG_EXT_TX
   TX_TYPE best_tx_type = DCT_DCT;
 #endif  // CONFIG_EXT_TX
@@ -1520,6 +1693,10 @@ static int64_t rd_pick_intra_sby_mode(VP10_COMP *cpi, MACROBLOCK *x,
   const PREDICTION_MODE L = vp10_left_block_mode(mic, left_mi, 0);
   bmode_costs = cpi->y_mode_costs[A][L];
 
+#if CONFIG_EXT_INTRA
+  ext_intra_mode_info.use_ext_intra_mode[0] = 0;
+  mic->mbmi.ext_intra_mode_info.use_ext_intra_mode[0] = 0;
+#endif  // CONFIG_EXT_INTRA
   memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm));
   palette_mode_info.palette_size[0] = 0;
   mic->mbmi.palette_mode_info.palette_size[0] = 0;
@@ -1529,7 +1706,7 @@ static int64_t rd_pick_intra_sby_mode(VP10_COMP *cpi, MACROBLOCK *x,
     palette_ctx += (left_mi->mbmi.palette_mode_info.palette_size[0] > 0);
 
   /* Y Search for intra prediction mode */
-  for (mode = DC_PRED; mode <= TM_PRED; mode++) {
+  for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
     mic->mbmi.mode = mode;
 
     super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion,
@@ -1543,6 +1720,10 @@ static int64_t rd_pick_intra_sby_mode(VP10_COMP *cpi, MACROBLOCK *x,
       this_rate +=
           vp10_cost_bit(vp10_default_palette_y_mode_prob[bsize - BLOCK_8X8]
                                                          [palette_ctx], 0);
+#if CONFIG_EXT_INTRA
+    if (mode == DC_PRED)
+      this_rate += vp10_cost_bit(cpi->common.fc->ext_intra_probs[0], 0);
+#endif  // CONFIG_EXT_INTRA
     this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
 
     if (this_rd < best_rd) {
@@ -1564,6 +1745,30 @@ static int64_t rd_pick_intra_sby_mode(VP10_COMP *cpi, MACROBLOCK *x,
                               &palette_mode_info, best_palette_color_map,
                               &best_tx, &mode_selected, &best_rd);
 
+#if CONFIG_EXT_INTRA
+  if (!palette_mode_info.palette_size[0] > 0) {
+    if (rd_pick_ext_intra_sby(cpi, x, rate, rate_tokenonly, distortion,
+                              skippable, bsize, bmode_costs[DC_PRED],
+                              &best_rd)) {
+      mode_selected       = mic->mbmi.mode;
+      best_tx             = mic->mbmi.tx_size;
+      ext_intra_mode_info = mic->mbmi.ext_intra_mode_info;
+#if CONFIG_EXT_TX
+      best_tx_type        = mic->mbmi.tx_type;
+#endif  // CONFIG_EXT_TX
+    }
+  }
+
+  mic->mbmi.ext_intra_mode_info.use_ext_intra_mode[0] =
+      ext_intra_mode_info.use_ext_intra_mode[0];
+  if (ext_intra_mode_info.use_ext_intra_mode[0]) {
+    mic->mbmi.ext_intra_mode_info.ext_intra_mode[0] =
+        ext_intra_mode_info.ext_intra_mode[0];
+    mic->mbmi.ext_intra_mode_info.ext_intra_angle[0] =
+        ext_intra_mode_info.ext_intra_angle[0];
+  }
+#endif  // CONFIG_EXT_INTRA
+
   mic->mbmi.mode = mode_selected;
   mic->mbmi.tx_size = best_tx;
 #if CONFIG_EXT_TX
@@ -2263,30 +2468,189 @@ static int super_block_uvrd(const VP10_COMP *cpi, MACROBLOCK *x,
   return is_cost_valid;
 }
 
+#if CONFIG_EXT_INTRA
+// Return 1 if an ext intra mode is selected; return 0 otherwise.
+static int rd_pick_ext_intra_sbuv(VP10_COMP *cpi, MACROBLOCK *x,
+                                  PICK_MODE_CONTEXT *ctx,
+                                  int *rate, int *rate_tokenonly,
+                                  int64_t *distortion, int *skippable,
+                                  BLOCK_SIZE bsize, int64_t *best_rd) {
+  MACROBLOCKD *const xd = &x->e_mbd;
+  MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+  int ext_intra_selected_flag = 0;
+  int this_rate_tokenonly, this_rate, s;
+  int64_t this_distortion, this_sse, this_rd, best_angle_rd = INT64_MAX;
+  EXT_INTRA_MODE mode;
+  int i, step, delta, angle, best_angle, best_angle_dir;
+  int deltas[3] = {25, 5, 1};
+  int branches[3] = {2, 2, 2};
+  EXT_INTRA_MODE_INFO ext_intra_mode_info;
+
+  vp10_zero(ext_intra_mode_info);
+  mbmi->ext_intra_mode_info.use_ext_intra_mode[1] = 1;
+  mbmi->uv_mode = DC_PRED;
+
+  if (!DR_ONLY) {
+    for (mode = 0; mode < FILTER_INTRA_MODES; ++mode) {
+      mbmi->ext_intra_mode_info.ext_intra_mode[1] = mode;
+      if (!super_block_uvrd(cpi, x, &this_rate_tokenonly,
+                            &this_distortion, &s, &this_sse, bsize, *best_rd))
+        continue;
+
+      this_rate = this_rate_tokenonly +
+          vp10_cost_bit(cpi->common.fc->ext_intra_probs[1], 1) +
+          vp10_cost_bit(DR_EXT_INTRA_PROB, 0) +
+          cpi->intra_uv_mode_cost[mbmi->uv_mode] +
+          write_uniform_cost(FILTER_INTRA_MODES, mode);
+      this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
+      if (this_rd < *best_rd) {
+        *best_rd        = this_rd;
+        *rate           = this_rate;
+        *rate_tokenonly = this_rate_tokenonly;
+        *distortion     = this_distortion;
+        *skippable      = s;
+        ext_intra_mode_info = mbmi->ext_intra_mode_info;
+        ext_intra_selected_flag = 1;
+        if (!x->select_tx_size)
+          swap_block_ptr(x, ctx, 2, 0, 1, MAX_MB_PLANE);
+      }
+    }
+  }
+
+  mbmi->ext_intra_mode_info.ext_intra_mode[1] = EXT_DR_PRED;
+  if (ANGLE_FAST_SEARCH) {
+    best_angle = EXT_INTRA_ANGLES / 2;
+    for (step = 0; step < 3; ++step) {
+      delta = deltas[step];
+      for (i = -branches[step]; i <= branches[step]; ++i) {
+        int64_t rd_thresh;
+        if (i == 0 && step != 0)
+          continue;
+        angle = best_angle + i * delta;
+        if (angle < 0)
+          angle = 0;
+        if (angle >= EXT_INTRA_ANGLES)
+          angle = EXT_INTRA_ANGLES - 1;
+        if (angle == best_angle && step != 0)
+          continue;
+        mbmi->ext_intra_mode_info.ext_intra_angle[1] = angle;
+        if (*best_rd == INT64_MAX)
+          rd_thresh = best_angle_rd;
+        else
+          rd_thresh = VPXMIN(best_angle_rd, *best_rd * RD_ADJUSTER);
+        if (!super_block_uvrd(cpi, x, &this_rate_tokenonly, &this_distortion,
+                              &s, &this_sse, bsize, rd_thresh))
+          continue;
+        this_rate = this_rate_tokenonly +
+            vp10_cost_bit(cpi->common.fc->ext_intra_probs[1], 1) +
+            (DR_ONLY ? 0: vp10_cost_bit(DR_EXT_INTRA_PROB, 1)) +
+            cpi->intra_uv_mode_cost[mbmi->uv_mode] +
+            write_uniform_cost(EXT_INTRA_ANGLES, angle);
+        this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
+        if (this_rd < *best_rd) {
+          *best_rd        = this_rd;
+          *rate           = this_rate;
+          *rate_tokenonly = this_rate_tokenonly;
+          *distortion     = this_distortion;
+          *skippable      = s;
+          ext_intra_mode_info = mbmi->ext_intra_mode_info;
+          ext_intra_selected_flag = 1;
+          if (!x->select_tx_size)
+            swap_block_ptr(x, ctx, 2, 0, 1, MAX_MB_PLANE);
+        }
+        if (this_rd < best_angle_rd) {
+          best_angle_rd = this_rd;
+          best_angle_dir = i;
+        }
+      }
+      best_angle += best_angle_dir * delta;
+      if (best_angle < 0)
+        best_angle = 0;
+      if (best_angle >= EXT_INTRA_ANGLES)
+        best_angle = EXT_INTRA_ANGLES - 1;
+      if (*best_rd < best_angle_rd / RD_ADJUSTER)
+        break;
+    }
+  } else {
+    for (angle = 0; angle < EXT_INTRA_ANGLES; ++angle) {
+      mbmi->ext_intra_mode_info.ext_intra_angle[1] = angle;
+      if (prediction_angle_map(angle) == 90 ||
+          prediction_angle_map(angle) == 180)
+        continue;
+      if (!super_block_uvrd(cpi, x, &this_rate_tokenonly,
+                            &this_distortion, &s, &this_sse, bsize, *best_rd))
+        continue;
+
+      this_rate = this_rate_tokenonly +
+          vp10_cost_bit(cpi->common.fc->ext_intra_probs[1], 1) +
+          (DR_ONLY ? 0: vp10_cost_bit(DR_EXT_INTRA_PROB, 1)) +
+          cpi->intra_uv_mode_cost[mbmi->uv_mode] +
+          write_uniform_cost(EXT_INTRA_ANGLES, angle);
+      this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
+      if (this_rd < *best_rd) {
+        *best_rd        = this_rd;
+        *rate           = this_rate;
+        *rate_tokenonly = this_rate_tokenonly;
+        *distortion     = this_distortion;
+        *skippable      = s;
+        ext_intra_mode_info = mbmi->ext_intra_mode_info;
+        ext_intra_selected_flag = 1;
+        if (!x->select_tx_size)
+          swap_block_ptr(x, ctx, 2, 0, 1, MAX_MB_PLANE);
+      }
+    }
+  }
+
+  if (ext_intra_selected_flag) {
+    mbmi->uv_mode = DC_PRED;
+    mbmi->ext_intra_mode_info.use_ext_intra_mode[1] =
+        ext_intra_mode_info.use_ext_intra_mode[1];
+    mbmi->ext_intra_mode_info.ext_intra_mode[1] =
+        ext_intra_mode_info.ext_intra_mode[1];
+    mbmi->ext_intra_mode_info.ext_intra_angle[1] =
+        ext_intra_mode_info.ext_intra_angle[1];
+
+    return 1;
+  } else {
+    return 0;
+  }
+}
+#endif  // CONFIG_EXT_INTRA
+
 static int64_t rd_pick_intra_sbuv_mode(VP10_COMP *cpi, MACROBLOCK *x,
                                        PICK_MODE_CONTEXT *ctx,
                                        int *rate, int *rate_tokenonly,
                                        int64_t *distortion, int *skippable,
                                        BLOCK_SIZE bsize, TX_SIZE max_tx_size) {
   MACROBLOCKD *xd = &x->e_mbd;
+  MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
   PREDICTION_MODE mode;
   PREDICTION_MODE mode_selected = DC_PRED;
   int64_t best_rd = INT64_MAX, this_rd;
   int this_rate_tokenonly, this_rate, s;
   int64_t this_distortion, this_sse;
+#if CONFIG_EXT_INTRA
+  EXT_INTRA_MODE_INFO ext_intra_mode_info;
 
+  ext_intra_mode_info.use_ext_intra_mode[1] = 0;
+  mbmi->ext_intra_mode_info.use_ext_intra_mode[1] = 0;
+#endif  // CONFIG_EXT_INTRA
   memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm));
   xd->mi[0]->mbmi.palette_mode_info.palette_size[1] = 0;
   for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
     if (!(cpi->sf.intra_uv_mode_mask[max_tx_size] & (1 << mode)))
       continue;
 
-    xd->mi[0]->mbmi.uv_mode = mode;
+    mbmi->uv_mode = mode;
 
     if (!super_block_uvrd(cpi, x, &this_rate_tokenonly,
                           &this_distortion, &s, &this_sse, bsize, best_rd))
       continue;
     this_rate = this_rate_tokenonly + cpi->intra_uv_mode_cost[mode];
+#if CONFIG_EXT_INTRA
+    if (mode == DC_PRED)
+      this_rate += vp10_cost_bit(cpi->common.fc->ext_intra_probs[1], 0);
+#endif  // CONFIG_EXT_INTRA
     this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
 
     if (this_rd < best_rd) {
@@ -2301,7 +2665,22 @@ static int64_t rd_pick_intra_sbuv_mode(VP10_COMP *cpi, MACROBLOCK *x,
     }
   }
 
-  xd->mi[0]->mbmi.uv_mode = mode_selected;
+#if CONFIG_EXT_INTRA
+  if (mbmi->sb_type >= BLOCK_8X8) {
+    if (rd_pick_ext_intra_sbuv(cpi, x, ctx, rate, rate_tokenonly, distortion,
+                               skippable, bsize, &best_rd)) {
+      mode_selected   = mbmi->uv_mode;
+      ext_intra_mode_info = mbmi->ext_intra_mode_info;
+    }
+  }
+
+  mbmi->ext_intra_mode_info.use_ext_intra_mode[1] =
+      ext_intra_mode_info.use_ext_intra_mode[1];
+  if (ext_intra_mode_info.use_ext_intra_mode[1])
+    mbmi->ext_intra_mode_info.ext_intra_mode[1] =
+        ext_intra_mode_info.ext_intra_mode[1];
+#endif  // CONFIG_EXT_INTRA
+  mbmi->uv_mode = mode_selected;
   return best_rd;
 }
 
@@ -4104,6 +4483,9 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi,
   int64_t dist_uv[TX_SIZES];
   int skip_uv[TX_SIZES];
   PREDICTION_MODE mode_uv[TX_SIZES];
+#if CONFIG_EXT_INTRA
+  EXT_INTRA_MODE_INFO ext_intra_mode_info_uv[TX_SIZES];
+#endif  // CONFIG_EXT_INTRA
   const int intra_cost_penalty = vp10_get_intra_cost_penalty(
       cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth);
   int best_skip2 = 0;
@@ -4363,6 +4745,10 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi,
     mbmi->uv_mode = DC_PRED;
     mbmi->ref_frame[0] = ref_frame;
     mbmi->ref_frame[1] = second_ref_frame;
+#if CONFIG_EXT_INTRA
+    mbmi->ext_intra_mode_info.use_ext_intra_mode[0] = 0;
+    mbmi->ext_intra_mode_info.use_ext_intra_mode[1] = 0;
+#endif  // CONFIG_EXT_INTRA
     // Evaluate all sub-pel filters irrespective of whether we can use
     // them for this frame.
     mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP
@@ -4385,6 +4771,27 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi,
       memset(x->skip_txfm, 0, sizeof(x->skip_txfm));
       super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable,
                       NULL, bsize, best_rd);
+#if CONFIG_EXT_INTRA
+      // TODO(huisu): ext-intra is turned off in lossless mode for now to
+      // avoid a unit test failure
+      if (mbmi->mode == DC_PRED && !xd->lossless[mbmi->segment_id]) {
+        MB_MODE_INFO mbmi_copy = *mbmi;
+        int rate_dummy;
+
+        if (rate_y != INT_MAX) {
+          int this_rate = rate_y + cpi->mbmode_cost[mbmi->mode] +
+              vp10_cost_bit(cm->fc->ext_intra_probs[0], 0);
+          this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, distortion_y);
+        } else {
+          this_rd = best_rd;
+        }
+
+        if (!rd_pick_ext_intra_sby(cpi, x, &rate_dummy, &rate_y, &distortion_y,
+                                   &skippable, bsize,
+                                   cpi->mbmode_cost[mbmi->mode], &this_rd))
+          *mbmi = mbmi_copy;
+      }
+#endif  // CONFIG_EXT_INTRA
       if (rate_y == INT_MAX)
         continue;
       uv_tx = get_uv_tx_size_impl(mbmi->tx_size, bsize, pd->subsampling_x,
@@ -4393,14 +4800,45 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi,
         choose_intra_uv_mode(cpi, x, ctx, bsize, uv_tx,
                              &rate_uv_intra[uv_tx], &rate_uv_tokenonly[uv_tx],
                              &dist_uv[uv_tx], &skip_uv[uv_tx], &mode_uv[uv_tx]);
+#if CONFIG_EXT_INTRA
+        ext_intra_mode_info_uv[uv_tx] = mbmi->ext_intra_mode_info;
+#endif  // CONFIG_EXT_INTRA
       }
 
       rate_uv = rate_uv_tokenonly[uv_tx];
       distortion_uv = dist_uv[uv_tx];
       skippable = skippable && skip_uv[uv_tx];
       mbmi->uv_mode = mode_uv[uv_tx];
+#if CONFIG_EXT_INTRA
+      mbmi->ext_intra_mode_info.use_ext_intra_mode[1] =
+          ext_intra_mode_info_uv[uv_tx].use_ext_intra_mode[1];
+      if (ext_intra_mode_info_uv[uv_tx].use_ext_intra_mode[1]) {
+        mbmi->ext_intra_mode_info.ext_intra_mode[1] =
+            ext_intra_mode_info_uv[uv_tx].ext_intra_mode[1];
+        mbmi->ext_intra_mode_info.ext_intra_angle[1] =
+            ext_intra_mode_info_uv[uv_tx].ext_intra_angle[1];
+      }
+#endif  // CONFIG_EXT_INTRA
 
       rate2 = rate_y + cpi->mbmode_cost[mbmi->mode] + rate_uv_intra[uv_tx];
+#if CONFIG_EXT_INTRA
+      if (mbmi->mode == DC_PRED) {
+        rate2 += vp10_cost_bit(cm->fc->ext_intra_probs[0],
+                               mbmi->ext_intra_mode_info.use_ext_intra_mode[0]);
+        if (mbmi->ext_intra_mode_info.use_ext_intra_mode[0]) {
+          EXT_INTRA_MODE ext_intra_mode =
+              mbmi->ext_intra_mode_info.ext_intra_mode[0];
+          int angle = mbmi->ext_intra_mode_info.ext_intra_angle[0];
+          if (!DR_ONLY)
+              rate2 += vp10_cost_bit(DR_EXT_INTRA_PROB,
+                                     ext_intra_mode > FILTER_TM_PRED);
+          if (ext_intra_mode > FILTER_TM_PRED)
+            rate2 += write_uniform_cost(EXT_INTRA_ANGLES, angle);
+          else
+            rate2 += write_uniform_cost(FILTER_INTRA_MODES, ext_intra_mode);
+        }
+      }
+#endif  // CONFIG_EXT_INTRA
       if (this_mode != DC_PRED && this_mode != TM_PRED)
         rate2 += intra_cost_penalty;
       distortion2 = distortion_y + distortion_uv;
@@ -4734,6 +5172,10 @@ void vp10_rd_pick_inter_mode_sb_seg_skip(VP10_COMP *cpi,
 
   mbmi->palette_mode_info.palette_size[0] = 0;
   mbmi->palette_mode_info.palette_size[1] = 0;
+#if CONFIG_EXT_INTRA
+  mbmi->ext_intra_mode_info.use_ext_intra_mode[0] = 0;
+  mbmi->ext_intra_mode_info.use_ext_intra_mode[1] = 0;
+#endif  // CONFIG_EXT_INTRA
   mbmi->mode = ZEROMV;
   mbmi->uv_mode = DC_PRED;
   mbmi->ref_frame[0] = LAST_FRAME;
@@ -4848,6 +5290,11 @@ void vp10_rd_pick_inter_mode_sub8x8(VP10_COMP *cpi,
   memset(x->zcoeff_blk[TX_4X4], 0, 4);
   vp10_zero(best_mbmode);
 
+#if CONFIG_EXT_INTRA
+  mbmi->ext_intra_mode_info.use_ext_intra_mode[0] = 0;
+  mbmi->ext_intra_mode_info.use_ext_intra_mode[1] = 0;
+#endif  // CONFIG_EXT_INTRA
+
   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
     filter_cache[i] = INT64_MAX;