From: Debargha Mukherjee <debargha@google.com>
Date: Thu, 23 Apr 2015 00:33:25 +0000 (-0700)
Subject: Global motion continued
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=caae13d54fba59b4ffbd88dcea7f2496bfa9a133;p=libvpx

Global motion continued

Implements a first version of global motion where the
existing ZEROMV mode is converted to a translation only
global motion mode.
A lot of the code for supporting a rotation-zoom affine
model is also incorporated.
WIP.

Change-Id: Ia1288a8dfe82f89484d4e291780288388e56d91b
---

diff --git a/vp9/common/vp9_common.h b/vp9/common/vp9_common.h
index 6801dd3a2..cedce068a 100644
--- a/vp9/common/vp9_common.h
+++ b/vp9/common/vp9_common.h
@@ -118,6 +118,17 @@ typedef int16_t tran_low_t;
 
 #define VP9_FRAME_MARKER 0x2
 
+static INLINE int get_unsigned_bits_gen(unsigned int num_values) {
+  int cat = 0;
+  if (num_values <= 1)
+    return 0;
+  num_values--;
+  while (num_values > 0) {
+    cat++;
+    num_values >>= 1;
+  }
+  return cat;
+}
 
 #ifdef __cplusplus
 }  // extern "C"
diff --git a/vp9/common/vp9_entropymode.h b/vp9/common/vp9_entropymode.h
index 5cef4f1de..3af6b2340 100644
--- a/vp9/common/vp9_entropymode.h
+++ b/vp9/common/vp9_entropymode.h
@@ -100,6 +100,9 @@ typedef struct frame_contexts {
 #if CONFIG_WEDGE_PARTITION
   vp9_prob wedge_interinter_prob[BLOCK_SIZES];
 #endif  // CONFIG_WEDGE_PARTITION
+#if CONFIG_GLOBAL_MOTION
+  vp9_prob global_motion_types_prob[GLOBAL_MOTION_TYPES - 1];
+#endif  // CONFIG_GLOBAL_MOTION
 } FRAME_CONTEXT;
 
 typedef struct {
@@ -159,6 +162,9 @@ typedef struct {
   unsigned int y_palette_size[10][PALETTE_SIZES];
   unsigned int uv_palette_size[10][PALETTE_SIZES];
 #endif  // CONFIG_PALETTE
+#if CONFIG_GLOBAL_MOTION
+  unsigned int global_motion_types[GLOBAL_MOTION_TYPES];
+#endif  // CONFIG_GLOBAL_MOTION
 } FRAME_COUNTS;
 
 extern const vp9_prob vp9_kf_uv_mode_prob[INTRA_MODES][INTRA_MODES - 1];
diff --git a/vp9/common/vp9_entropymv.c b/vp9/common/vp9_entropymv.c
index 47669f96a..d58a36ebb 100644
--- a/vp9/common/vp9_entropymv.c
+++ b/vp9/common/vp9_entropymv.c
@@ -8,6 +8,8 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
+#include <math.h>
+
 #include "vp9/common/vp9_onyxc_int.h"
 #include "vp9/common/vp9_entropymv.h"
 
@@ -118,6 +120,61 @@ static const uint8_t log_in_base_2[] = {
   9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10
 };
 
+#if CONFIG_GLOBAL_MOTION
+const vp9_tree_index vp9_global_motion_types_tree
+          [TREE_SIZE(GLOBAL_MOTION_TYPES)] = {
+  -GLOBAL_ZERO, 2,
+  -GLOBAL_TRANSLATION, -GLOBAL_ROTZOOM
+};
+
+static const vp9_prob default_global_motion_types_prob
+                 [GLOBAL_MOTION_TYPES - 1] = {
+  // Currently only translation is used, so make the second prob very high.
+  240, 255
+};
+
+static void convert_params_to_rotzoom(double *H, Global_Motion_Params *model) {
+  double z = 1.0 + (double) model->zoom / (1 << ZOOM_PRECISION_BITS);
+  double r = (double) model->rotation / (1 << ROTATION_PRECISION_BITS);
+  H[0] =  (1 + z) * cos(r * M_PI / 180.0);
+  H[1] = -(1 + z) * sin(r * M_PI / 180.0);
+  H[2] = (double) model->mv.as_mv.col / 8.0;
+  H[3] = (double) model->mv.as_mv.row / 8.0;
+}
+
+static int_mv get_global_mv(int col, int row, Global_Motion_Params *model) {
+  int_mv mv;
+  double H[4];
+  double x, y;
+  convert_params_to_rotzoom(H, model);
+  x =  H[0] * col + H[1] * row + H[2];
+  y = -H[1] * col + H[0] * row + H[3];
+  mv.as_mv.col = (int)floor(x * 8 + 0.5) - col;
+  mv.as_mv.row = (int)floor(y * 8 + 0.5) - row;
+  return mv;
+}
+
+int_mv vp9_get_global_sb_center_mv(int col, int row, BLOCK_SIZE bsize,
+                                   Global_Motion_Params *model) {
+  col += num_4x4_blocks_wide_lookup[bsize] * 2;
+  row += num_4x4_blocks_high_lookup[bsize] * 2;
+  return get_global_mv(col, row, model);
+}
+
+int_mv vp9_get_global_sub8x8_center_mv(int col, int row, int block,
+                                       Global_Motion_Params *model) {
+  if (block == 0 || block == 2)
+    col += 2;
+  else
+    col += 6;
+  if (block == 0 || block == 1)
+    row += 2;
+  else
+    row += 6;
+  return get_global_mv(col, row, model);
+}
+#endif  // CONFIG_GLOBAL_MOTION
+
 static INLINE int mv_class_base(MV_CLASS_TYPE c) {
   return c ? CLASS0_SIZE << (c + 2) : 0;
 }
@@ -142,7 +199,7 @@ int vp9_get_mv_mag(MV_CLASS_TYPE c, int offset) {
 static void inc_mv_component(int v, nmv_component_counts *comp_counts,
                              int incr, int usehp) {
   int s, z, c, o, d, e, f;
-  assert(v != 0);            /* should not be zero */
+  assert(v != 0);             /* should not be zero */
   s = v < 0;
   comp_counts->sign[s] += incr;
   z = (s ? -v : v) - 1;       /* magnitude - 1 */
@@ -233,4 +290,7 @@ void vp9_init_mv_probs(VP9_COMMON *cm) {
 #if CONFIG_INTRABC
   cm->fc.ndvc = default_nmv_context;
 #endif  // CONFIG_INTRABC
+#if CONFIG_GLOBAL_MOTION
+  vp9_copy(cm->fc.global_motion_types_prob, default_global_motion_types_prob);
+#endif  // CONFIG_GLOBAL_MOTION
 }
diff --git a/vp9/common/vp9_entropymv.h b/vp9/common/vp9_entropymv.h
index 75e6861f4..58ad7f267 100644
--- a/vp9/common/vp9_entropymv.h
+++ b/vp9/common/vp9_entropymv.h
@@ -14,6 +14,7 @@
 
 #include "./vpx_config.h"
 
+#include "vp9/common/vp9_enums.h"
 #include "vp9/common/vp9_mv.h"
 #include "vp9/common/vp9_prob.h"
 
@@ -127,6 +128,39 @@ typedef struct {
 
 void vp9_inc_mv(const MV *mv, nmv_context_counts *mvctx);
 
+#if CONFIG_GLOBAL_MOTION
+#define MAX_GLOBAL_MOTION_MODELS  1
+
+#define ZOOM_PRECISION_BITS       6
+#define ROTATION_PRECISION_BITS   4
+
+#define ABS_ZOOM_BITS             3
+#define ABS_ROTATION_BITS         4
+#define ABS_TRANSLATION_BITS      7
+
+typedef enum {
+  GLOBAL_ZERO = 0,
+  GLOBAL_TRANSLATION = 1,
+  GLOBAL_ROTZOOM = 2,
+  GLOBAL_MOTION_TYPES
+} GLOBAL_MOTION_TYPE;
+
+// Currently this is specialized for rotzoom model only
+typedef struct {
+  int rotation;   // positive or negative rotation angle in degrees
+  int zoom;       // this is actually the zoom multiplier minus 1
+  int_mv mv;
+} Global_Motion_Params;
+
+extern const vp9_tree_index vp9_global_motion_types_tree
+                            [TREE_SIZE(GLOBAL_MOTION_TYPES)];
+
+int_mv vp9_get_global_sb_center_mv(int col, int row, BLOCK_SIZE bsize,
+                                   Global_Motion_Params *model);
+int_mv vp9_get_global_sub8x8_center_mv(int col, int row, int block,
+                                       Global_Motion_Params *model);
+#endif  // CONFIG_GLOBAL_MOTION
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/vp9/common/vp9_mvref_common.c b/vp9/common/vp9_mvref_common.c
index adbc232c0..6e2275af0 100644
--- a/vp9/common/vp9_mvref_common.c
+++ b/vp9/common/vp9_mvref_common.c
@@ -1,4 +1,3 @@
-
 /*
  *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
  *
diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h
index 0b36df4b5..f71aabe9a 100644
--- a/vp9/common/vp9_onyxc_int.h
+++ b/vp9/common/vp9_onyxc_int.h
@@ -228,6 +228,10 @@ typedef struct VP9Common {
   int palette_counter;
   int block_counter;
 #endif  // CONFIG_PALETTE
+#if CONFIG_GLOBAL_MOTION
+  int num_global_motion[MAX_REF_FRAMES];
+  Global_Motion_Params global_motion[MAX_REF_FRAMES][MAX_GLOBAL_MOTION_MODELS];
+#endif
 } VP9_COMMON;
 
 static INLINE YV12_BUFFER_CONFIG *get_ref_frame(VP9_COMMON *cm, int index) {
diff --git a/vp9/common/vp9_reconinter.c b/vp9/common/vp9_reconinter.c
index 82ca30011..c2568a1be 100644
--- a/vp9/common/vp9_reconinter.c
+++ b/vp9/common/vp9_reconinter.c
@@ -1381,7 +1381,7 @@ static void dec_build_inter_predictors(MACROBLOCKD *xd, int plane, int block,
     // Do border extension if there is motion or the
     // width/height is not a multiple of 8 pixels.
     if (is_scaled || scaled_mv.col || scaled_mv.row ||
-        (frame_width & 0x7) || (frame_height & 0x7)) {
+         (frame_width & 0x7) || (frame_height & 0x7)) {
       // Get reference block bottom right coordinate.
       int x1 = ((x0_16 + (w - 1) * xs) >> SUBPEL_BITS) + 1;
       int y1 = ((y0_16 + (h - 1) * ys) >> SUBPEL_BITS) + 1;
diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c
index b70840b68..38a8003e0 100644
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -2472,6 +2472,50 @@ static void read_inter_compound_mode_probs(FRAME_CONTEXT *fc, vp9_reader *r) {
 }
 #endif  // CONFIG_COMPOUND_MODES
 
+#if CONFIG_GLOBAL_MOTION
+static void read_global_motion_params(Global_Motion_Params *params,
+                                      vp9_prob *probs,
+                                      vp9_reader *r) {
+  GLOBAL_MOTION_TYPE gmtype = vp9_read_tree(r, vp9_global_motion_types_tree,
+                                            probs);
+  switch (gmtype) {
+    case GLOBAL_ZERO:
+      break;
+    case GLOBAL_TRANSLATION:
+      params->mv.as_mv.col =
+          vp9_read_primitive_symmetric(r, ABS_TRANSLATION_BITS);
+      params->mv.as_mv.row =
+          vp9_read_primitive_symmetric(r, ABS_TRANSLATION_BITS);
+      break;
+    case GLOBAL_ROTZOOM:
+      params->mv.as_mv.col =
+          vp9_read_primitive_symmetric(r, ABS_TRANSLATION_BITS);
+      params->mv.as_mv.row =
+          vp9_read_primitive_symmetric(r, ABS_TRANSLATION_BITS);
+      params->zoom =
+          vp9_read_primitive_symmetric(r, ABS_ZOOM_BITS);
+      params->rotation =
+          vp9_read_primitive_symmetric(r, ABS_ROTATION_BITS);
+      break;
+    default:
+      assert(0);
+  }
+}
+
+static void read_global_motion(VP9_COMMON *cm, vp9_reader *r) {
+  int frame, i;
+  vpx_memset(cm->num_global_motion, 0, sizeof(cm->num_global_motion));
+  vpx_memset(cm->global_motion, 0, sizeof(cm->global_motion));
+  for (frame = LAST_FRAME; frame <= ALTREF_FRAME; ++frame) {
+    cm->num_global_motion[frame] = 1;
+    for (i = 0; i < cm->num_global_motion[frame]; ++i) {
+      read_global_motion_params(
+          cm->global_motion[frame], cm->fc.global_motion_types_prob, r);
+    }
+  }
+}
+#endif  // CONFIG_GLOBAL_MOTION
+
 static int read_compressed_header(VP9Decoder *pbi, const uint8_t *data,
                                   size_t partition_size) {
   VP9_COMMON *const cm = &pbi->common;
@@ -2572,6 +2616,9 @@ static int read_compressed_header(VP9Decoder *pbi, const uint8_t *data,
       }
     }
 #endif  // CONFIG_WEDGE_PARTITION
+#if CONFIG_GLOBAL_MOTION
+    read_global_motion(cm, &r);
+#endif  // CONFIG_GLOBAL_MOTION
   }
 #if CONFIG_PALETTE
   if (frame_is_intra_only(cm))
diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c
index 0d08fc447..ae9ce68e5 100644
--- a/vp9/decoder/vp9_decodemv.c
+++ b/vp9/decoder/vp9_decodemv.c
@@ -858,11 +858,13 @@ static void read_intra_block_mode_info(VP9_COMMON *const cm, MODE_INFO *mi,
 }
 
 static INLINE int assign_mv(VP9_COMMON *cm, PREDICTION_MODE mode,
+                            MV_REFERENCE_FRAME ref_frame[2],
                             int_mv mv[2], int_mv ref_mv[2],
                             int_mv nearest_mv[2], int_mv near_mv[2],
                             int is_compound, int allow_hp, vp9_reader *r) {
   int i;
   int ret = 1;
+  (void) ref_frame;
 #if CONFIG_COMPOUND_MODES
   assert(is_inter_mode(mode) || is_inter_compound_mode(mode));
 #else
@@ -899,9 +901,15 @@ static INLINE int assign_mv(VP9_COMMON *cm, PREDICTION_MODE mode,
       break;
     }
     case ZEROMV: {
+#if CONFIG_GLOBAL_MOTION
+      mv[0].as_int = cm->global_motion[ref_frame[0]][0].mv.as_int;
+      if (is_compound)
+        mv[1].as_int = cm->global_motion[ref_frame[1]][0].mv.as_int;
+#else
       mv[0].as_int = 0;
       if (is_compound)
         mv[1].as_int = 0;
+#endif
       break;
     }
 #if CONFIG_COMPOUND_MODES
@@ -991,8 +999,13 @@ static INLINE int assign_mv(VP9_COMMON *cm, PREDICTION_MODE mode,
     }
     case ZERO_ZEROMV: {
       assert(is_compound);
+#if CONFIG_GLOBAL_MOTION
+      mv[0].as_int = cm->global_motion[ref_frame[0]][0].mv.as_int;
+      mv[1].as_int = cm->global_motion[ref_frame[1]][0].mv.as_int;
+#else
       mv[0].as_int = 0;
       mv[1].as_int = 0;
+#endif  // CONFIG_GLOBAL_MOTION
       break;
     }
 #endif  // CONFIG_COMPOUND_MODES
@@ -1207,7 +1220,7 @@ static void read_inter_block_mode_info(VP9_COMMON *const cm,
           }
         }
 
-        if (!assign_mv(cm, b_mode, block,
+        if (!assign_mv(cm, b_mode, mbmi->ref_frame, block,
 #if CONFIG_NEWMVREF
                        (b_mode == NEAR_FORNEWMV) ? ref_mvs[1] : ref_mvs[0],
 #else
@@ -1236,12 +1249,14 @@ static void read_inter_block_mode_info(VP9_COMMON *const cm,
   } else {
 #if CONFIG_NEWMVREF
     if (mbmi->mode == NEAR_FORNEWMV)
-      xd->corrupted |= !assign_mv(cm, mbmi->mode, mbmi->mv, nearmv,
-                                  nearestmv, nearmv, is_compound, allow_hp, r);
+      xd->corrupted |= !assign_mv(cm, mbmi->mode, mbmi->ref_frame, mbmi->mv,
+                                  nearmv, nearestmv, nearmv, is_compound,
+                                  allow_hp, r);
     else
 #endif  // CONFIG_NEWMVREF
-    xd->corrupted |= !assign_mv(cm, mbmi->mode, mbmi->mv, nearestmv,
-                                nearestmv, nearmv, is_compound, allow_hp, r);
+    xd->corrupted |= !assign_mv(cm, mbmi->mode, mbmi->ref_frame, mbmi->mv,
+                                nearestmv, nearestmv, nearmv, is_compound,
+                                allow_hp, r);
   }
 #if CONFIG_TX_SKIP
   mbmi->uv_mode = mbmi->mode;
diff --git a/vp9/decoder/vp9_dsubexp.c b/vp9/decoder/vp9_dsubexp.c
index c22617edb..1c309a262 100644
--- a/vp9/decoder/vp9_dsubexp.c
+++ b/vp9/decoder/vp9_dsubexp.c
@@ -74,3 +74,42 @@ void vp9_diff_update_prob(vp9_reader *r, vp9_prob* p) {
     *p = (vp9_prob)inv_remap_prob(delp, *p);
   }
 }
+
+int vp9_read_primitive_uniform(vp9_reader *r, unsigned int num_values) {
+  const int l = get_unsigned_bits_gen(num_values);
+  int m, v;
+  if (l == 0)
+    return 0;
+  m = (1 << l) - num_values;
+  v = vp9_read_literal(r, l - 1);
+  if (v < m)
+    return v;
+  else
+    return (v << 1) + vp9_read_bit(r) - m;
+}
+
+int vp9_read_primitive_subexp(vp9_reader *r, unsigned int k) {
+  int mk = (1 << k);
+  int i = 0;
+  int word;
+  while (vp9_read_bit(r)) {
+    mk <<= 1;
+    ++i;
+  }
+  if (i == 0) {
+    word = vp9_read_literal(r, k);
+  } else {
+    word = vp9_read_literal(r, k + i - 1) + (mk >> 1);
+  }
+  return word;
+}
+
+int vp9_read_primitive_symmetric(vp9_reader *r, unsigned int mag_bits) {
+  if (vp9_read_bit(r)) {
+    int s = vp9_read_bit(r);
+    int x = vp9_read_literal(r, mag_bits) + 1;
+    return (s > 0 ? -x : x);
+  } else {
+    return 0;
+  }
+}
diff --git a/vp9/decoder/vp9_dsubexp.h b/vp9/decoder/vp9_dsubexp.h
index 436f434fb..abab1a8f8 100644
--- a/vp9/decoder/vp9_dsubexp.h
+++ b/vp9/decoder/vp9_dsubexp.h
@@ -20,6 +20,18 @@ extern "C" {
 
 void vp9_diff_update_prob(vp9_reader *r, vp9_prob* p);
 
+// num_values is the number of values the symbol can take
+int vp9_read_primitive_uniform(vp9_reader *r, unsigned int num_values);
+
+// k is the parameter of the subexponential code
+int vp9_read_primitive_subexp(vp9_reader *r, unsigned int k);
+
+// mag_bits is number of bits for magnitude. The alphabet is of size
+// 2 * 2^mag_bits + 1, symmetric around 0, where one bit is used to
+// indicate 0 or non-zero, mag_bits bits are used to indicate magnitide
+// and 1 more bit for the sign if non-zero.
+int vp9_read_primitive_symmetric(vp9_reader *r, unsigned int mag_bits);
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index fd1e71eab..c629d236b 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -55,6 +55,9 @@ static struct vp9_token copy_mode_encodings[COPY_MODE_COUNT - 1];
 #if CONFIG_COMPOUND_MODES
 static struct vp9_token inter_compound_mode_encodings[INTER_COMPOUND_MODES];
 #endif  // CONFIG_COMPOUND_MODES
+#if CONFIG_GLOBAL_MOTION
+static struct vp9_token global_motion_types_encodings[GLOBAL_MOTION_TYPES];
+#endif  // CONFIG_GLOBAL_MOTION
 
 #if CONFIG_SUPERTX
 static int vp9_check_supertx(VP9_COMMON *cm, int mi_row, int mi_col,
@@ -88,6 +91,10 @@ void vp9_entropy_mode_init() {
   vp9_tokens_from_tree(copy_mode_encodings_l2, vp9_copy_mode_tree_l2);
   vp9_tokens_from_tree(copy_mode_encodings, vp9_copy_mode_tree);
 #endif  // CONFIG_COPY_MODE
+#if CONFIG_GLOBAL_MOTION
+  vp9_tokens_from_tree(global_motion_types_encodings,
+                       vp9_global_motion_types_tree);
+#endif  // CONFIG_GLOBAL_MOTION
 }
 
 static void write_intra_mode(vp9_writer *w, PREDICTION_MODE mode,
@@ -1985,6 +1992,66 @@ static void write_uncompressed_header(VP9_COMP *cpi,
   write_tile_info(cm, wb);
 }
 
+#if CONFIG_GLOBAL_MOTION
+static void write_global_motion_params(Global_Motion_Params *params,
+                                       vp9_prob *probs,
+                                       vp9_writer *w) {
+  GLOBAL_MOTION_TYPE gmtype;
+  if (params->zoom == 0 && params->rotation == 0) {
+    if (params->mv.as_int == 0)
+      gmtype = GLOBAL_ZERO;
+    else
+      gmtype = GLOBAL_TRANSLATION;
+  } else {
+      gmtype = GLOBAL_ROTZOOM;
+  }
+  vp9_write_token(w, vp9_global_motion_types_tree, probs,
+                  &global_motion_types_encodings[gmtype]);
+  switch (gmtype) {
+    case GLOBAL_ZERO:
+      break;
+    case GLOBAL_TRANSLATION:
+      vp9_write_primitive_symmetric(w, params->mv.as_mv.col,
+                                    ABS_TRANSLATION_BITS);
+      vp9_write_primitive_symmetric(w, params->mv.as_mv.row,
+                                    ABS_TRANSLATION_BITS);
+      break;
+    case GLOBAL_ROTZOOM:
+      vp9_write_primitive_symmetric(w, params->mv.as_mv.col,
+                                    ABS_TRANSLATION_BITS);
+      vp9_write_primitive_symmetric(w, params->mv.as_mv.row,
+                                    ABS_TRANSLATION_BITS);
+      vp9_write_primitive_symmetric(w, params->zoom, ABS_ZOOM_BITS);
+      vp9_write_primitive_symmetric(w, params->rotation, ABS_ROTATION_BITS);
+      break;
+    default:
+      assert(0);
+  }
+}
+
+static void write_global_motion(VP9_COMP *cpi, vp9_writer *w) {
+  VP9_COMMON *const cm = &cpi->common;
+  int frame, i;
+  for (frame = LAST_FRAME; frame <= ALTREF_FRAME; ++frame) {
+    for (i = 0; i < cm->num_global_motion[frame]; ++i) {
+      if (!cpi->global_motion_used[frame]) {
+        vpx_memset(
+            cm->global_motion[frame], 0,
+            MAX_GLOBAL_MOTION_MODELS * sizeof(*cm->global_motion[frame]));
+      }
+      write_global_motion_params(
+          cm->global_motion[frame], cm->fc.global_motion_types_prob, w);
+      printf("Ref %d [%d] (used %d): %d %d %d %d\n",
+             frame, cm->current_video_frame, cpi->global_motion_used[frame],
+             cm->global_motion[frame][i].zoom,
+             cm->global_motion[frame][i].rotation,
+             cm->global_motion[frame][i].mv.as_mv.col,
+             cm->global_motion[frame][i].mv.as_mv.row);
+    }
+  }
+}
+#endif
+
 static size_t write_compressed_header(VP9_COMP *cpi, uint8_t *data) {
   VP9_COMMON *const cm = &cpi->common;
 #if !CONFIG_TX_SKIP || CONFIG_SUPERTX
@@ -2116,6 +2183,9 @@ static size_t write_compressed_header(VP9_COMP *cpi, uint8_t *data) {
                                     cm->counts.wedge_interinter[i]);
     }
 #endif  // CONFIG_WEDGE_PARTITION
+#if CONFIG_GLOBAL_MOTION
+    write_global_motion(cpi, &header_bc);
+#endif  // CONFIG_GLOBAL_MOTION
   }
 
 #if CONFIG_PALETTE
diff --git a/vp9/encoder/vp9_corner_detect.c b/vp9/encoder/vp9_corner_detect.c
index dd7ab0ae0..0ea768bd3 100644
--- a/vp9/encoder/vp9_corner_detect.c
+++ b/vp9/encoder/vp9_corner_detect.c
@@ -48,6 +48,7 @@
 #include <stdio.h>
 #include <memory.h>
 #include <math.h>
+#include <assert.h>
 
 #include "vp9_corner_detect.h"
 
@@ -2877,10 +2878,15 @@ int NonmaxSuppression(unsigned char *frmbuf, int width, int height, int stride,
   frm_corners_nonmax_xy = fast_nonmax(frmbuf, width, height, stride,
                                       (xy *)frm_corners, num_frm_corners,
                                       NONMAX_BARRIER, &num_frm_corners_nonmax);
-  memcpy(frm_corners, frm_corners_nonmax_xy,
-         sizeof(int) * 2 * num_frm_corners_nonmax);
-  free(frm_corners_nonmax_xy);
-  return num_frm_corners_nonmax;
+  if (frm_corners_nonmax_xy &&
+      num_frm_corners_nonmax <= num_frm_corners) {
+    memcpy(frm_corners, frm_corners_nonmax_xy,
+           sizeof(xy) * num_frm_corners_nonmax);
+    free(frm_corners_nonmax_xy);
+    return num_frm_corners_nonmax;
+  } else {
+    return num_frm_corners;
+  }
 }
 
 //////////////////////////////////////////////////////////////////////////////
@@ -2895,9 +2901,13 @@ int FastCornerDetect(unsigned char *buf, int width, int height, int stride,
                                         FAST_BARRIER, &num_points);
   num_points =
       (num_points <= max_points ? num_points : max_points);
-  memcpy(points, frm_corners_xy, sizeof(int) * num_points * 2);
-  free(frm_corners_xy);
-  return NonmaxSuppression(buf, width, height, stride, points, num_points);
+  if (num_points > 0 && frm_corners_xy) {
+    memcpy(points, frm_corners_xy, sizeof(xy) * num_points);
+    free(frm_corners_xy);
+    return NonmaxSuppression(buf, width, height, stride, points, num_points);
+  } else {
+    return 0;
+  }
 }
 
 //////////////////////////////////////////////////////////////////////////////////
diff --git a/vp9/encoder/vp9_corner_match.c b/vp9/encoder/vp9_corner_match.c
index 5c4fd1354..1d85dd155 100644
--- a/vp9/encoder/vp9_corner_match.c
+++ b/vp9/encoder/vp9_corner_match.c
@@ -16,7 +16,7 @@
 
 #include "vp9_corner_match.h"
 
-#define MATCH_SZ 21
+#define MATCH_SZ 15
 #define MATCH_SZ_BY2 ((MATCH_SZ - 1)/2)
 #define MATCH_SZ_SQ (MATCH_SZ * MATCH_SZ)
 #define SEARCH_SZ 9
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 3576fc68e..8fa4e0157 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -39,6 +39,9 @@
 #if CONFIG_SUPERTX
 #include "vp9/encoder/vp9_cost.h"
 #endif
+#if CONFIG_GLOBAL_MOTION
+#include "vp9/encoder/vp9_global_motion.h"
+#endif  // CONFIG_GLOBAL_MOTION
 #include "vp9/encoder/vp9_encodeframe.h"
 #include "vp9/encoder/vp9_encodemb.h"
 #include "vp9/encoder/vp9_encodemv.h"
@@ -155,17 +158,24 @@ static const uint16_t VP9_HIGH_VAR_OFFS_12[64] = {
 };
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 
-static unsigned int get_sby_perpixel_variance(VP9_COMP *cpi,
-                                              const struct buf_2d *ref,
-                                              BLOCK_SIZE bs) {
+unsigned int get_sby_perpixel_ssd(VP9_COMP *cpi,
+                                  const struct buf_2d *ref,
+                                  BLOCK_SIZE bs) {
   unsigned int sse;
   const unsigned int var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
                                               VP9_VAR_OFFS, 0, &sse);
+  return var;
+}
+
+unsigned int get_sby_perpixel_variance(VP9_COMP *cpi,
+                                       const struct buf_2d *ref,
+                                       BLOCK_SIZE bs) {
+  const unsigned int var = get_sby_perpixel_ssd(cpi, ref, bs);
   return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
 }
 
 #if CONFIG_VP9_HIGHBITDEPTH
-static unsigned int high_get_sby_perpixel_variance(
+unsigned int high_get_sby_perpixel_ssd(
     VP9_COMP *cpi, const struct buf_2d *ref, BLOCK_SIZE bs, int bd) {
   unsigned int var, sse;
   switch (bd) {
@@ -186,6 +196,12 @@ static unsigned int high_get_sby_perpixel_variance(
                                0, &sse);
       break;
   }
+  return var;
+}
+
+unsigned int high_get_sby_perpixel_variance(
+    VP9_COMP *cpi, const struct buf_2d *ref, BLOCK_SIZE bs, int bd) {
+  const unsigned int var = high_get_sby_perpixel_ssd(cpi, ref, bs, bd);
   return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
 }
 #endif  // CONFIG_VP9_HIGHBITDEPTH
@@ -867,6 +883,53 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
   }
 #endif
   if (!frame_is_intra_only(cm)) {
+#if CONFIG_COPY_MODE
+    if (is_inter_block(mbmi) && mbmi->copy_mode == NOREF) {
+#else
+    if (is_inter_block(mbmi)) {
+#endif  // CONFIG_COPY_MODE
+#if CONFIG_GLOBAL_MOTION
+      if (bsize >= BLOCK_8X8) {
+#if CONFIG_COMPOUND_MODES
+        if (mbmi->mode == ZEROMV || mbmi->mode == ZERO_ZEROMV) {
+          ++cpi->global_motion_used[mbmi->ref_frame[0]];
+          if (mbmi->mode == ZERO_ZEROMV)
+            ++cpi->global_motion_used[mbmi->ref_frame[1]];
+        }
+#else
+        if (mbmi->mode == ZEROMV) {
+          ++cpi->global_motion_used[mbmi->ref_frame[0]];
+          if (has_second_ref(mbmi))
+            ++cpi->global_motion_used[mbmi->ref_frame[1]];
+        }
+#endif  // CONFIG_COMPOUND_MODES
+      } else {
+        const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];
+        const int num_4x4_h = num_4x4_blocks_high_lookup[bsize];
+        int idx, idy;
+        for (idy = 0; idy < 2; idy += num_4x4_h) {
+          for (idx = 0; idx < 2; idx += num_4x4_w) {
+            const int j = idy * 2 + idx;
+            const PREDICTION_MODE b_mode = mi->bmi[j].as_mode;
+#if CONFIG_COMPOUND_MODES
+            if (b_mode == ZEROMV || b_mode == ZERO_ZEROMV) {
+              ++cpi->global_motion_used[mbmi->ref_frame[0]];
+              if (b_mode == ZERO_ZEROMV)
+                ++cpi->global_motion_used[mbmi->ref_frame[1]];
+            }
+#else
+            if (b_mode == ZEROMV) {
+              ++cpi->global_motion_used[mbmi->ref_frame[0]];
+              if (has_second_ref(mbmi))
+                ++cpi->global_motion_used[mbmi->ref_frame[1]];
+            }
+#endif  // CONFIG_COMPOUND_MODES
+          }
+        }
+      }
+#endif  // CONFIG_GLOBAL_MOTION
+    }
+
     rd_opt->comp_pred_diff[SINGLE_REFERENCE] += ctx->single_pred_diff;
     rd_opt->comp_pred_diff[COMPOUND_REFERENCE] += ctx->comp_pred_diff;
     rd_opt->comp_pred_diff[REFERENCE_MODE_SELECT] += ctx->hybrid_pred_diff;
@@ -1142,8 +1205,9 @@ void vp9_setup_src_planes(MACROBLOCK *x, const YV12_BUFFER_CONFIG *src,
                      x->e_mbd.plane[i].subsampling_y);
 }
 
-static void set_mode_info_seg_skip(MACROBLOCK *x, TX_MODE tx_mode,
+static void set_mode_info_seg_skip(VP9_COMMON *cm, MACROBLOCK *x,
                                    RD_COST *rd_cost, BLOCK_SIZE bsize) {
+  TX_MODE tx_mode = cm->tx_mode;
   MACROBLOCKD *const xd = &x->e_mbd;
   MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
   INTERP_FILTER filter_ref;
@@ -1163,10 +1227,16 @@ static void set_mode_info_seg_skip(MACROBLOCK *x, TX_MODE tx_mode,
   mbmi->uv_mode = DC_PRED;
   mbmi->ref_frame[0] = LAST_FRAME;
   mbmi->ref_frame[1] = NONE;
-  mbmi->mv[0].as_int = 0;
   mbmi->interp_filter = filter_ref;
 
+#if CONFIG_GLOBAL_MOTION
+  mbmi->mv[0].as_int = cm->global_motion[mbmi->ref_frame[0]][0].mv.as_int;
+  xd->mi[0].src_mi->bmi[0].as_mv[0].as_int =
+      cm->global_motion[mbmi->ref_frame[0]][0].mv.as_int;
+#else
+  mbmi->mv[0].as_int = 0;
   xd->mi[0].src_mi->bmi[0].as_mv[0].as_int = 0;
+#endif  // CONFIG_GLOBAL_MOTION
   x->skip = 1;
 
   vp9_rd_cost_init(rd_cost);
@@ -3754,7 +3824,7 @@ static void nonrd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile,
       x->rdmult = vp9_cyclic_refresh_get_rdmult(cpi->cyclic_refresh);
 
   if (vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP))
-    set_mode_info_seg_skip(x, cm->tx_mode, rd_cost, bsize);
+    set_mode_info_seg_skip(cm, x, rd_cost, bsize);
   else
     vp9_pick_inter_mode(cpi, x, tile, mi_row, mi_col, rd_cost, bsize, ctx);
 
@@ -4517,6 +4587,54 @@ static int input_fpmb_stats(FIRSTPASS_MB_STATS *firstpass_mb_stats,
 }
 #endif
 
+#if CONFIG_GLOBAL_MOTION
+#define MIN_TRANSLATION_THRESH  4
+static void convert_translation_to_params(
+    double *H, Global_Motion_Params *model) {
+  model->mv.as_mv.col = (int) floor(H[0] * 8 + 0.5);
+  model->mv.as_mv.row = (int) floor(H[1] * 8 + 0.5);
+  if (abs(model->mv.as_mv.col) < MIN_TRANSLATION_THRESH &&
+      abs(model->mv.as_mv.row) < MIN_TRANSLATION_THRESH) {
+    model->mv.as_int = 0;
+  }
+  model->mv.as_mv.col =
+      clamp(model->mv.as_mv.col,
+            -(1 << ABS_TRANSLATION_BITS), (1 << ABS_TRANSLATION_BITS));
+  model->mv.as_mv.row =
+      clamp(model->mv.as_mv.row,
+            -(1 << ABS_TRANSLATION_BITS), (1 << ABS_TRANSLATION_BITS));
+}
+
+static void convert_rotzoom_to_params(double *H, Global_Motion_Params *model) {
+  double z = sqrt(H[0] * H[0] + H[1] * H[1]) - 1.0;
+  double r = atan2(-H[1], H[0]) * 180.0 / M_PI;
+  assert(abs(H[0] - (1 + z) * cos(r * M_PI / 180.0)) < 1e-10);
+  assert(abs(H[1] + (1 + z) * sin(r * M_PI / 180.0)) < 1e-10);
+  model->zoom = (int) floor(z * (1 << ZOOM_PRECISION_BITS) + 0.5);
+  model->rotation = (int) floor(r * (1 << ROTATION_PRECISION_BITS) + 0.5);
+  model->zoom = clamp(
+      model->zoom, -(1 << ABS_ZOOM_BITS), (1 << ABS_ZOOM_BITS));
+  model->rotation = clamp(
+      model->rotation, -(1 << ABS_ROTATION_BITS), (1 << ABS_ROTATION_BITS));
+
+  convert_translation_to_params(H + 2, model);
+}
+
+static void convert_model_to_params(double *H, TransformationType type,
+                                    Global_Motion_Params *model) {
+  switch (type) {
+    case ROTZOOM:
+      convert_rotzoom_to_params(H, model);
+      break;
+    case TRANSLATION:
+      convert_translation_to_params(H, model);
+      break;
+    default:
+      break;
+  }
+}
+#endif  // CONFIG_GLOBAL_MOTION
+
 static void encode_frame_internal(VP9_COMP *cpi) {
   SPEED_FEATURES *const sf = &cpi->sf;
   RD_OPT *const rd_opt = &cpi->rd;
@@ -4541,13 +4659,58 @@ static void encode_frame_internal(VP9_COMP *cpi) {
 
   cm->tx_mode = select_tx_mode(cpi);
 
+#if CONFIG_GLOBAL_MOTION
+#define GLOBAL_MOTION_MODEL   TRANSLATION
+  vp9_zero(cpi->global_motion_used);
+  vpx_memset(cm->num_global_motion, 0, sizeof(cm->num_global_motion));
+  cm->num_global_motion[LAST_FRAME] = 1;
+  cm->num_global_motion[GOLDEN_FRAME] = 1;
+  cm->num_global_motion[ALTREF_FRAME] = 1;
+  if (cpi->common.frame_type == INTER_FRAME && cpi->Source) {
+    YV12_BUFFER_CONFIG *ref_buf;
+    int num, frame;
+    double global_motion[9 * MAX_GLOBAL_MOTION_MODELS];
+    for (frame = LAST_FRAME; frame <= ALTREF_FRAME; ++frame) {
+      ref_buf = get_ref_frame_buffer(cpi, frame);
+      if (ref_buf) {
+        if ((num =
+             vp9_compute_global_motion_multiple_block_based(
+                 cpi, GLOBAL_MOTION_MODEL, cpi->Source, ref_buf,
+                 BLOCK_16X16, MAX_GLOBAL_MOTION_MODELS, 0.5, global_motion))) {
+          /*
+             vp9_compute_global_motion_multiple_feature_based(
+                 cpi, GLOBAL_MOTION_MODEL, cpi->Source, ref_buf,
+                 MAX_GLOBAL_MOTION_MODELS, 0.5, global_motion))) {
+                 */
+          int i;
+          for (i = 0; i < num; i++) {
+            convert_model_to_params(
+                global_motion + i * get_numparams(GLOBAL_MOTION_MODEL),
+                GLOBAL_MOTION_MODEL,
+                &cm->global_motion[frame][i]);
+            /*
+            printf("Ref %d [%d]: %d %d %d %d\n",
+                   frame, cm->current_video_frame,
+                   cm->global_motion[frame][i].zoom,
+                   cm->global_motion[frame][i].rotation,
+                   cm->global_motion[frame][i].mv.as_mv.col,
+                   cm->global_motion[frame][i].mv.as_mv.row);
+                   */
+          }
+          cm->num_global_motion[frame] = num;
+        }
+      }
+    }
+  }
+#endif  // CONFIG_GLOBAL_MOTION
+
 #if CONFIG_VP9_HIGHBITDEPTH
   if (cm->use_highbitdepth)
     x->fwd_txm4x4 = xd->lossless ? vp9_highbd_fwht4x4 : vp9_highbd_fdct4x4;
   else
     x->fwd_txm4x4 = xd->lossless ? vp9_fwht4x4 : vp9_fdct4x4;
   x->highbd_itxm_add = xd->lossless ? vp9_highbd_iwht4x4_add :
-                                      vp9_highbd_idct4x4_add;
+      vp9_highbd_idct4x4_add;
 #else
   x->fwd_txm4x4 = xd->lossless ? vp9_fwht4x4 : vp9_fdct4x4;
 #endif  // CONFIG_VP9_HIGHBITDEPTH
diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h
index 8c5ad02e3..f4ae974ce 100644
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -429,6 +429,9 @@ typedef struct VP9_COMP {
 #if CONFIG_VP9_TEMPORAL_DENOISING
   VP9_DENOISER denoiser;
 #endif
+#if CONFIG_GLOBAL_MOTION
+  int global_motion_used[MAX_REF_FRAMES];
+#endif
 } VP9_COMP;
 
 void vp9_initialize_enc();
diff --git a/vp9/encoder/vp9_global_motion.c b/vp9/encoder/vp9_global_motion.c
index 92fd490ca..40567314d 100644
--- a/vp9/encoder/vp9_global_motion.c
+++ b/vp9/encoder/vp9_global_motion.c
@@ -37,23 +37,7 @@
 
 #define MAX_CORNERS 4096
 
-inline int get_numparams(TransformationType type) {
-  switch (type) {
-    case HOMOGRAPHY:
-      return 9;
-    case AFFINE:
-      return 6;
-    case ROTZOOM:
-      return 4;
-    case TRANSLATION:
-      return 2;
-    default:
-      assert(0);
-      return 0;
-  }
-}
-
-inline ransacType get_ransacType(TransformationType type) {
+INLINE ransacType get_ransacType(TransformationType type) {
   switch (type) {
     case HOMOGRAPHY:
       return ransacHomography;
@@ -69,7 +53,7 @@ inline ransacType get_ransacType(TransformationType type) {
   }
 }
 
-inline projectPointsType get_projectPointsType(TransformationType type) {
+INLINE projectPointsType get_projectPointsType(TransformationType type) {
   switch (type) {
     case HOMOGRAPHY:
       return projectPointsHomography;
@@ -365,31 +349,34 @@ int vp9_compute_global_motion_single_block_based(struct VP9_COMP *cpi,
                                                  TransformationType type,
                                                  YV12_BUFFER_CONFIG *frm,
                                                  YV12_BUFFER_CONFIG *ref,
-                                                 int blocksize,
+                                                 BLOCK_SIZE bsize,
                                                  double *H) {
   VP9_COMMON *const cm = &cpi->common;
   int num_correspondences = 0;
   int *correspondences;
   int num_inliers;
   int *inlier_map = NULL;
-
+  int bwidth = num_4x4_blocks_wide_lookup[bsize] << 2;
+  int bheight = num_4x4_blocks_high_lookup[bsize] << 2;
   int i;
   MV motionfield[4096];
   double confidence[4096];
 
-  get_frame_motionfield(cpi, frm, ref, blocksize, motionfield, confidence);
+  vp9_get_frame_motionfield(cpi, frm, ref, bsize, motionfield, confidence);
 
   correspondences = (int *)malloc(4 * cm->mb_rows * cm->mb_cols *
                                   sizeof(*correspondences));
 
   for (i = 0; i < cm->mb_rows * cm->mb_cols; i ++) {
-      int x = (i % cm->mb_cols) * blocksize + blocksize/2;
-      int y = (i / cm->mb_cols) * blocksize + blocksize/2;
+      int x = (i % cm->mb_cols) * bwidth + bwidth / 2;
+      int y = (i / cm->mb_cols) * bheight + bheight / 2;
       if (confidence[i] > CONFIDENCE_THRESHOLD) {
-        correspondences[num_correspondences*4]   = x;
-        correspondences[num_correspondences*4+1] = y;
-        correspondences[num_correspondences*4+2] = motionfield[i].col + x;
-        correspondences[num_correspondences*4+3] = motionfield[i].row + y;
+        correspondences[num_correspondences * 4]   = x;
+        correspondences[num_correspondences * 4 + 1] = y;
+        correspondences[num_correspondences * 4 + 2] =
+            (double)motionfield[i].col / 8 + x;
+        correspondences[num_correspondences * 4 + 3] =
+            (double)motionfield[i].row / 8 + y;
         num_correspondences++;
       }
   }
@@ -415,7 +402,7 @@ int vp9_compute_global_motion_multiple_block_based(struct VP9_COMP *cpi,
                                                    TransformationType type,
                                                    YV12_BUFFER_CONFIG *frm,
                                                    YV12_BUFFER_CONFIG *ref,
-                                                   int blocksize,
+                                                   BLOCK_SIZE bsize,
                                                    int max_models,
                                                    double inlier_prob,
                                                    double *H) {
@@ -425,23 +412,27 @@ int vp9_compute_global_motion_multiple_block_based(struct VP9_COMP *cpi,
   int num_inliers;
   int num_models = 0;
   int *inlier_map = NULL;
+  int bwidth = num_4x4_blocks_wide_lookup[bsize] << 2;
+  int bheight = num_4x4_blocks_high_lookup[bsize] << 2;
 
   int i;
   MV motionfield[4096];
   double confidence[4096];
-  get_frame_motionfield(cpi, frm, ref, blocksize, motionfield, confidence);
+  vp9_get_frame_motionfield(cpi, frm, ref, bsize, motionfield, confidence);
 
   correspondences = (int *)malloc(4 * cm->mb_rows * cm->mb_cols *
                                   sizeof(*correspondences));
 
   for (i = 0; i < cm->mb_rows * cm->mb_cols; i ++) {
-      int x = (i % cm->mb_cols) * blocksize + blocksize/2;
-      int y = (i / cm->mb_cols) * blocksize + blocksize/2;
+      int x = (i % cm->mb_cols) * bwidth + bwidth / 2;
+      int y = (i / cm->mb_cols) * bheight + bheight / 2;
       if (confidence[i] > CONFIDENCE_THRESHOLD) {
-        correspondences[num_correspondences*4]   = x;
-        correspondences[num_correspondences*4+1] = y;
-        correspondences[num_correspondences*4+2] = motionfield[i].col + x;
-        correspondences[num_correspondences*4+3] = motionfield[i].row + y;
+        correspondences[num_correspondences * 4]   = x;
+        correspondences[num_correspondences * 4 + 1] = y;
+        correspondences[num_correspondences * 4 + 2] =
+            (double)motionfield[i].col / 8 + x;
+        correspondences[num_correspondences * 4 + 3] =
+            (double)motionfield[i].row / 8 + y;
         num_correspondences++;
       }
   }
diff --git a/vp9/encoder/vp9_global_motion.h b/vp9/encoder/vp9_global_motion.h
index 654bb58ce..ba51b0a7f 100644
--- a/vp9/encoder/vp9_global_motion.h
+++ b/vp9/encoder/vp9_global_motion.h
@@ -23,7 +23,7 @@
 
 struct VP9_COMP;
 
-static const int CONFIDENCE_THRESHOLD = 10;
+static const int CONFIDENCE_THRESHOLD = 1.0;
 
 typedef enum {
   UNKNOWN_TRANSFORM = -1,
@@ -33,11 +33,25 @@ typedef enum {
   TRANSLATION  // translational motion 2-parameter
 } TransformationType;
 
-inline int get_numparams(TransformationType type);
+static INLINE int get_numparams(TransformationType type) {
+  switch (type) {
+    case HOMOGRAPHY:
+      return 9;
+    case AFFINE:
+      return 6;
+    case ROTZOOM:
+      return 4;
+    case TRANSLATION:
+      return 2;
+    default:
+      assert(0);
+      return 0;
+  }
+}
 
-inline ransacType get_ransacType(TransformationType type);
+INLINE ransacType get_ransacType(TransformationType type);
 
-inline projectPointsType get_projectPointsType(TransformationType type);
+INLINE projectPointsType get_projectPointsType(TransformationType type);
 
 // Returns number of models actually returned: 1 - if success, 0 - if failure
 int vp9_compute_global_motion_single_feature_based(struct VP9_COMP *cpi,
@@ -66,7 +80,7 @@ int vp9_compute_global_motion_single_block_based(struct VP9_COMP *cpi,
                                                  TransformationType type,
                                                  YV12_BUFFER_CONFIG *frm,
                                                  YV12_BUFFER_CONFIG *ref,
-                                                 int blocksize,
+                                                 BLOCK_SIZE bsize,
                                                  double *H);
 
 // Returns number of models actually returned: 1+ - #models, 0 - if failure
@@ -80,7 +94,7 @@ int vp9_compute_global_motion_multiple_block_based(struct VP9_COMP *cpi,
                                                    TransformationType type,
                                                    YV12_BUFFER_CONFIG *frm,
                                                    YV12_BUFFER_CONFIG *ref,
-                                                   int blocksize,
+                                                   BLOCK_SIZE bsize,
                                                    int max_models,
                                                    double inlier_prob,
                                                    double *H);
diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c
index 5ce45870c..c926b5320 100644
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -93,8 +93,8 @@ static int mvsad_err_cost(const MACROBLOCK *x, const MV *mv, const MV *ref,
   if (x->nmvsadcost) {
     const MV diff = { mv->row - ref->row,
                       mv->col - ref->col };
-    return ROUND_POWER_OF_TWO(mv_cost(&diff, x->nmvjointsadcost,
-                                      x->nmvsadcost) * error_per_bit, 8);
+    const int cost = mv_cost(&diff, x->nmvjointsadcost, x->nmvsadcost);
+    return ROUND_POWER_OF_TWO(cost * error_per_bit, 8);
   }
   return 0;
 }
@@ -694,6 +694,7 @@ static INLINE void calc_int_cost_list(const MACROBLOCK *x,
                                       int sadpb,
                                       const vp9_variance_fn_ptr_t *fn_ptr,
                                       const MV *best_mv,
+                                      int use_mvcost,
                                       int *cost_list) {
   static const MV neighbors[4] = {{0, -1}, {1, 0}, {0, 1}, {-1, 0}};
   const struct buf_2d *const what = &x->plane[0].src;
@@ -710,7 +711,7 @@ static INLINE void calc_int_cost_list(const MACROBLOCK *x,
   cost_list[0] = fn_ptr->vf(what->buf, what->stride,
                             get_buf_from_mv(in_what, &this_mv),
                             in_what->stride, &sse) +
-      mvsad_err_cost(x, &this_mv, &fcenter_mv, sadpb);
+      (use_mvcost ? mvsad_err_cost(x, &this_mv, &fcenter_mv, sadpb) : 0);
   if (check_bounds(x, br, bc, 1)) {
     for (i = 0; i < 4; i++) {
       const MV this_mv = {br + neighbors[i].row,
@@ -718,9 +719,9 @@ static INLINE void calc_int_cost_list(const MACROBLOCK *x,
       cost_list[i + 1] = fn_ptr->vf(what->buf, what->stride,
                                     get_buf_from_mv(in_what, &this_mv),
                                     in_what->stride, &sse) +
-          // mvsad_err_cost(x, &this_mv, &fcenter_mv, sadpb);
-          mv_err_cost(&this_mv, &fcenter_mv, x->nmvjointcost, x->mvcost,
-                      x->errorperbit);
+          (use_mvcost ? mv_err_cost(&this_mv, &fcenter_mv,
+                                    x->nmvjointcost, x->mvcost,
+                                    x->errorperbit) : 0);
     }
   } else {
     for (i = 0; i < 4; i++) {
@@ -732,9 +733,9 @@ static INLINE void calc_int_cost_list(const MACROBLOCK *x,
         cost_list[i + 1] = fn_ptr->vf(what->buf, what->stride,
                                       get_buf_from_mv(in_what, &this_mv),
                                       in_what->stride, &sse) +
-            // mvsad_err_cost(x, &this_mv, &fcenter_mv, sadpb);
-            mv_err_cost(&this_mv, &fcenter_mv, x->nmvjointcost, x->mvcost,
-                        x->errorperbit);
+            (use_mvcost ? mv_err_cost(&this_mv, &fcenter_mv,
+                                      x->nmvjointcost, x->mvcost,
+                                      x->errorperbit) : 0);
     }
   }
 }
@@ -777,8 +778,9 @@ static int vp9_pattern_search(const MACROBLOCK *x,
 
   // Work out the start point for the search
   bestsad = vfp->sdf(what->buf, what->stride,
-                     get_buf_from_mv(in_what, ref_mv), in_what->stride) +
-      mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
+                     get_buf_from_mv(in_what, ref_mv), in_what->stride);
+  if (use_mvcost)
+    bestsad += mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
 
   // Search all possible scales upto the search param around the center point
   // pick the scale of the point that is best as the starting scale of
@@ -908,7 +910,8 @@ static int vp9_pattern_search(const MACROBLOCK *x,
   // cost_list[4]: cost at delta {-1, 0} (top)    from the best integer pel
   if (cost_list) {
     const MV best_mv = { br, bc };
-    calc_int_cost_list(x, &fcenter_mv, sad_per_bit, vfp, &best_mv, cost_list);
+    calc_int_cost_list(x, &fcenter_mv, sad_per_bit, vfp, &best_mv,
+                       use_mvcost, cost_list);
   }
   best_mv->row = br;
   best_mv->col = bc;
@@ -956,8 +959,9 @@ static int vp9_pattern_search_sad(const MACROBLOCK *x,
 
   // Work out the start point for the search
   bestsad = vfp->sdf(what->buf, what->stride,
-                     get_buf_from_mv(in_what, ref_mv), in_what->stride) +
-      mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
+                     get_buf_from_mv(in_what, ref_mv), in_what->stride);
+  if (use_mvcost)
+    bestsad += mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
 
   // Search all possible scales upto the search param around the center point
   // pick the scale of the point that is best as the starting scale of
@@ -1172,8 +1176,10 @@ static int vp9_pattern_search_sad(const MACROBLOCK *x,
           const MV this_mv = { br + neighbors[i].row,
                                bc + neighbors[i].col };
           cost_list[i + 1] = vfp->sdf(what->buf, what->stride,
-                                     get_buf_from_mv(in_what, &this_mv),
-                                     in_what->stride);
+                                      get_buf_from_mv(in_what, &this_mv),
+                                      in_what->stride) +
+              (use_mvcost ?
+               mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit) : 0);
         }
       } else {
         for (i = 0; i < 4; i++) {
@@ -1184,7 +1190,9 @@ static int vp9_pattern_search_sad(const MACROBLOCK *x,
           else
             cost_list[i + 1] = vfp->sdf(what->buf, what->stride,
                                        get_buf_from_mv(in_what, &this_mv),
-                                       in_what->stride);
+                                       in_what->stride) +
+                (use_mvcost ?
+                 mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit) : 0);
         }
       }
     } else {
@@ -1668,7 +1676,7 @@ int vp9_full_pixel_diamond(const VP9_COMP *cpi, MACROBLOCK *x,
 
   // Return cost list.
   if (cost_list) {
-    calc_int_cost_list(x, ref_mv, sadpb, fn_ptr, dst_mv, cost_list);
+    calc_int_cost_list(x, ref_mv, sadpb, fn_ptr, dst_mv, 1, cost_list);
   }
   return bestsme;
 }
diff --git a/vp9/encoder/vp9_motionmodel.c b/vp9/encoder/vp9_motionmodel.c
index 89e20458c..be9aa7831 100644
--- a/vp9/encoder/vp9_motionmodel.c
+++ b/vp9/encoder/vp9_motionmodel.c
@@ -19,42 +19,33 @@
 #include "vp9/common/vp9_systemdependent.h"
 #include "vp9/encoder/vp9_global_motion.h"
 
+unsigned int get_sby_perpixel_variance(VP9_COMP *cpi,
+                                       const struct buf_2d *ref,
+                                       BLOCK_SIZE bs);
+unsigned int get_sby_perpixel_ssd(VP9_COMP *cpi,
+                                  const struct buf_2d *ref,
+                                  BLOCK_SIZE bs);
+#if CONFIG_VP9_HIGHBITDEPTH
+unsigned int high_get_sby_perpixel_ssd(
+    VP9_COMP *cpi, const struct buf_2d *ref, BLOCK_SIZE bs, int bd);
+unsigned int high_get_sby_perpixel_variance(
+    VP9_COMP *cpi, const struct buf_2d *ref, BLOCK_SIZE bs, int bd);
+#endif
 
-static unsigned int do_motion_iteration(VP9_COMP *cpi,
-                                        const MV *ref_mv,
-                                        MV *dst_mv,
-                                        int bsize,
-                                        int mb_row,
-                                        int mb_col,
-                                        unsigned int *sse) {
-  MACROBLOCK *const x = &cpi->mb;
-  MACROBLOCKD *const xd = &x->e_mbd;
-
-  PREDICTION_MODE tmp_mode = xd->mi[0].src_mi->mbmi.mode;
-  MV tmp_mv = xd->mi[0].src_mi->mbmi.mv[0].as_mv;
-  int tmp_frame = xd->mi[0].src_mi->mbmi.ref_frame[1];
-  struct macroblockd_plane *const tmp_pd = &xd->plane[0];
-  struct macroblockd_plane otherpd;
-
-  const MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv;
-
-  const int tmp_col_min = x->mv_col_min;
-  const int tmp_col_max = x->mv_col_max;
-  const int tmp_row_min = x->mv_row_min;
-  const int tmp_row_max = x->mv_row_max;
+static int do_motion_iteration(MACROBLOCK *const x,
+                               const vp9_variance_fn_ptr_t *v_fn_ptr,
+                               const MV *ref_mv,
+                               MV *dst_mv) {
   MV ref_full;
-  int cost_list[5];
-  int sad = INT32_MAX;
-  uint8_t tmpbuf[4096];
-  BLOCK_SIZE block = bsize == 16 ? BLOCK_16X16 : BLOCK_8X8;
-  const vp9_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[block];
+  unsigned int sse;
+  int besterr, distortion;
 
   // Further step/diamond searches as necessary
-  int step_param = mv_sf->reduce_first_step_size;
-  step_param = MIN(step_param, MAX_MVSEARCH_STEPS - 2);
-
-  otherpd.dst.buf = tmpbuf;
-  xd->plane[0] = otherpd;
+  // int step_param = mv_sf->reduce_first_step_size;
+  // step_param = MIN(step_param, MAX_MVSEARCH_STEPS - 2);
+  int step_param = 0;
+  int subpel_iters_per_step = 2;
+  int allow_high_precision_mv = 1;
 
   vp9_set_mv_search_range(x, ref_mv);
 
@@ -62,195 +53,71 @@ static unsigned int do_motion_iteration(VP9_COMP *cpi,
   ref_full.row = ref_mv->row >> 3;
 
   /*cpi->sf.search_method == HEX*/
-  vp9_hex_search(x, &ref_full, step_param, x->errorperbit, 0,
-                 cond_cost_list(cpi, cost_list),
-                 &v_fn_ptr, 0, ref_mv, dst_mv);
-
+  vp9_hex_search(x, &ref_full, step_param, x->errorperbit, 0, NULL,
+                 v_fn_ptr, 0, ref_mv, dst_mv);
 
-  // Try sub-pixel MC
-  // if (bestsme > error_thresh && bestsme < INT_MAX)
-  {
-    int distortion;
-    unsigned int sse;
-    cpi->find_fractional_mv_step(
-        x, dst_mv, ref_mv, cpi->common.allow_high_precision_mv, x->errorperbit,
-        &v_fn_ptr, 0, mv_sf->subpel_iters_per_step,
-        cond_cost_list(cpi, cost_list),
-        NULL, NULL,
-        &distortion, &sse, NULL, 0, 0);
-  }
+  besterr = vp9_find_best_sub_pixel_tree(
+      x, dst_mv, ref_mv, allow_high_precision_mv, x->errorperbit,
+      v_fn_ptr, 0, subpel_iters_per_step,
+      NULL, NULL, NULL, &distortion, &sse, NULL, 0, 0);
 
-#if CONFIG_COMPOUND_MODES
-  if (has_second_ref(&xd->mi[0].src_mi->mbmi)) {
-    xd->mi[0].src_mi->mbmi.mode = NEW_NEWMV;
-  } else {
-#endif
-  xd->mi[0].src_mi->mbmi.mode = NEWMV;
-#if CONFIG_COMPOUND_MODES
-  }
-#endif
-  xd->mi[0].src_mi->mbmi.mv[0].as_mv = *dst_mv;
-#if CONFIG_INTERINTRA
-  xd->mi[0].src_mi->mbmi.ref_frame[1] = NONE;
-#endif
-
-  vp9_build_inter_predictors_sby(xd, mb_row, mb_col, block);
-
-  /* restore UMV window */
-  x->mv_col_min = tmp_col_min;
-  x->mv_col_max = tmp_col_max;
-  x->mv_row_min = tmp_row_min;
-  x->mv_row_max = tmp_row_max;
-
-  if (bsize == 16) {
-    sad = vp9_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride,
-                       xd->plane[0].dst.buf, xd->plane[0].dst.stride);
-    vp9_variance16x16(x->plane[0].src.buf, x->plane[0].src.stride,
-                      xd->plane[0].dst.buf, xd->plane[0].dst.stride, sse);
-  } else if (bsize == 8) {
-    sad = vp9_sad8x8(x->plane[0].src.buf, x->plane[0].src.stride,
-                     xd->plane[0].dst.buf, xd->plane[0].dst.stride);
-    vp9_variance8x8(x->plane[0].src.buf, x->plane[0].src.stride,
-                    xd->plane[0].dst.buf, xd->plane[0].dst.stride, sse);
-  }
-  xd->mi[0].src_mi->mbmi.mode = tmp_mode;
-  xd->mi[0].src_mi->mbmi.mv[0].as_mv = tmp_mv;
-  xd->mi[0].src_mi->mbmi.ref_frame[1] = tmp_frame;
-
-  xd->plane[0] = *tmp_pd;
-
-  return sad;
+  return besterr;
 }
 
-static int do_motion_search(VP9_COMP *cpi, const MV *ref_mv, int bsize,
-                            int_mv *dst_mv, int mb_row, int mb_col,
-                            unsigned int *sse) {
-  MACROBLOCK *const x = &cpi->mb;
-  MACROBLOCKD *const xd = &x->e_mbd;
-  unsigned int err, tmp_err;
-  MV tmp_mv;
-
-  // Try zero MV first
-  // FIXME should really use something like near/nearest MV and/or MV prediction
-  if (bsize == 16) {
-    err = vp9_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride,
-                       xd->plane[0].pre[0].buf, xd->plane[0].pre[0].stride);
-  } else {
-    err = vp9_sad8x8(x->plane[0].src.buf, x->plane[0].src.stride,
-                     xd->plane[0].pre[0].buf, xd->plane[0].pre[0].stride);
-  }
-  dst_mv->as_int = 0;
-
-  // Test last reference frame using the previous best mv as the
-  // starting point (best reference) for the search
-  tmp_err = do_motion_iteration(cpi, ref_mv, &tmp_mv,
-                                      bsize, mb_row, mb_col, sse);
-  if (tmp_err < err) {
-    err = tmp_err;
-    dst_mv->as_mv = tmp_mv;
-  }
+static int do_motion_search(MACROBLOCK *const x,
+                            const vp9_variance_fn_ptr_t *v_fn_ptr,
+                            const MV *ref_mv,
+                            int_mv *dst_mv) {
+  int err = do_motion_iteration(x, v_fn_ptr,
+                                ref_mv, &dst_mv->as_mv);
 
   // If the current best reference mv is not centered on 0,0 then do a 0,0
   // based search as well.
   if (ref_mv->row != 0 || ref_mv->col != 0) {
-    unsigned int tmp_err;
+    int tmp_err;
     MV zero_ref_mv = {0, 0}, tmp_mv;
 
-    tmp_err = do_motion_iteration(cpi, &zero_ref_mv, &tmp_mv, bsize,
-                                        mb_row, mb_col, sse);
+    tmp_err = do_motion_iteration(x, v_fn_ptr,
+                                  &zero_ref_mv, &tmp_mv);
     if (tmp_err < err) {
       dst_mv->as_mv = tmp_mv;
       err = tmp_err;
     }
   }
-
   return err;
 }
 
-static void get_mb_motionfield(VP9_COMP *cpi,
+void vp9_get_frame_motionfield(struct VP9_COMP *cpi,
                                YV12_BUFFER_CONFIG *buf,
-                               int mb_y_offset,
                                YV12_BUFFER_CONFIG *ref,
-                               const MV *prev_ref_mv,
-                               int bsize,
-                               int mb_row,
-                               int mb_col,
-                               MV *mv,
+                               BLOCK_SIZE bsize,
+                               MV *motionfield,
                                double *confidence) {
-  MACROBLOCK *const x = &cpi->mb;
-  MACROBLOCKD *const xd = &x->e_mbd;
-  VP9_COMMON *cm = &cpi->common;
-  uint8_t *tmp_buf = x->plane[0].src.buf;
-  int tmp_stride = x->plane[0].src.stride;
-  uint8_t *tmp_dst_buf = xd->plane[0].dst.buf;
-  int tmp_dst_stride = xd->plane[0].dst.stride;
-
-  // FIXME in practice we're completely ignoring chroma here
-  x->plane[0].src.buf = buf->y_buffer + mb_y_offset;
-  x->plane[0].src.stride = buf->y_stride;
-
-  xd->plane[0].dst.buf = get_frame_new_buffer(cm)->y_buffer + mb_y_offset;
-  xd->plane[0].dst.stride = get_frame_new_buffer(cm)->y_stride;
-
-  // Golden frame MV search, if it exists and is different than last frame
-  if (ref) {
-    int_mv intmv;
-    unsigned int sse, sad;
-    xd->plane[0].pre[0].buf = ref->y_buffer + mb_y_offset;
-    xd->plane[0].pre[0].stride = ref->y_stride;
-    sad = do_motion_search(cpi,
-                           prev_ref_mv,
-                           bsize,
-                           &intmv,
-                           mb_row, mb_col, &sse);
-    *confidence = (sse)/(sad+1);
-    *mv = intmv.as_mv;
-  }
-
-  x->plane[0].src.buf = tmp_buf;
-  x->plane[0].src.stride = tmp_stride;
-
-  xd->plane[0].dst.buf = tmp_dst_buf;
-  xd->plane[0].dst.stride = tmp_dst_stride;
-}
-
-void get_frame_motionfield(struct VP9_COMP *cpi,
-                           YV12_BUFFER_CONFIG *buf,
-                           YV12_BUFFER_CONFIG *ref,
-                           int blocksize,
-                           MV *motionfield,
-                           double *confidence) {
-  MACROBLOCK *const x = &cpi->mb;
+  MACROBLOCK mx;
+  MACROBLOCK *const x = &mx;
   MACROBLOCKD *const xd = &x->e_mbd;
   VP9_COMMON *const cm = &cpi->common;
-  int mb_col, mb_row, offset = 0;
+  int mb_col, mb_row;
   int mb_y_offset = 0, ref_y_offset = 0;
-  int tmp_mv_row_min = x->mv_row_min, tmp_mv_row_max = x->mv_row_max;
-  int tmp_up_available = xd->up_available;
-  int tmp_left_available = xd->left_available;
-  int tmp_y_dst_stride = xd->plane[0].dst.stride;
-  int tmp_y_pre_stride = xd->plane[0].pre[0].stride;
-  int tmp_uv_dst_stride = xd->plane[1].dst.stride;
 
-  int bsize = blocksize;
   int border = BORDER_MV_PIXELS_B16;
+  int bwidth = num_4x4_blocks_wide_lookup[bsize] << 2;
+  int bheight = num_4x4_blocks_high_lookup[bsize] << 2;
 
   MV ref_top_mv = {0, 0};
-  MODE_INFO mi_local;
-  MODE_INFO *tmp_mi = xd->mi[0].src_mi;
-  vp9_zero(mi_local);
-  // Set up limit values for motion vectors to prevent them extending outside
-//  // the UMV borders.
+
+  x->errorperbit =
+      vp9_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q) /
+      64;
+  x->errorperbit += (x->errorperbit == 0);
+
+  // the UMV borders.
   x->mv_row_min     = -border;
-  x->mv_row_max     = (cm->mb_rows - 1) * (bsize/2) + border;
+  x->mv_row_max     = cm->mi_rows * 8 + border;
   xd->up_available  = 0;
   xd->plane[0].dst.stride  = buf->y_stride;
-  xd->plane[0].pre[0].stride  = buf->y_stride;
   xd->plane[1].dst.stride = buf->uv_stride;
-  xd->mi[0].src_mi = &mi_local;
-  mi_local.mbmi.sb_type = bsize == 16 ? BLOCK_16X16 : BLOCK_8X8;
-  mi_local.mbmi.ref_frame[0] = LAST_FRAME;
-  mi_local.mbmi.ref_frame[1] = NONE;
+  xd->plane[0].pre[0].stride  = buf->y_stride;
   for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) {
     MV ref_left_mv = ref_top_mv;
     int mb_y_in_offset  = mb_y_offset;
@@ -259,50 +126,48 @@ void get_frame_motionfield(struct VP9_COMP *cpi,
     // Set up limit values for motion vectors to prevent them extending outside
     // the UMV borders.
     x->mv_col_min      = -border;
-    x->mv_col_max      = (cm->mb_cols - 1) * (bsize/2) + border;
+    x->mv_col_max      = cm->mi_cols * 8 + border;
     xd->left_available = 0;
 
     for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) {
-      MV mv;
-      get_mb_motionfield(cpi, buf, mb_y_in_offset,
-                                  ref, &ref_left_mv,
-                                  blocksize,
-                                  mb_row, mb_col, &mv,
-                                  &confidence[mb_row*cm->mb_cols + mb_col]);
-      motionfield[mb_row*cm->mb_cols + mb_col] = mv;
+      int_mv intmv;
+      unsigned int ssd, err;
+
+      x->plane[0].src.buf = buf->y_buffer + mb_y_in_offset;
+      x->plane[0].src.stride = buf->y_stride;
+      xd->plane[0].pre[0].buf = ref->y_buffer + ref_y_in_offset;
+      xd->plane[0].pre[0].stride = ref->y_stride;
+      ssd = get_sby_perpixel_ssd(cpi, &x->plane[0].src, bsize);
+      err =
+          do_motion_search(x,
+                           &cpi->fn_ptr[bsize],
+                           &ref_left_mv,
+                           &intmv);
+      confidence[mb_row * cm->mb_cols + mb_col] = (double)ssd / (err + 1);
+      motionfield[mb_row * cm->mb_cols + mb_col] = intmv.as_mv;
       if (mb_col == 0) {
         ref_top_mv = ref_left_mv;
       }
       xd->left_available = 1;
-      mb_y_in_offset    += bsize;
-      ref_y_in_offset   += bsize;
-      x->mv_col_min     -= bsize;
-      x->mv_col_max     -= bsize;
+      mb_y_in_offset    += bwidth;
+      ref_y_in_offset   += bwidth;
+      x->mv_col_min     -= bwidth;
+      x->mv_col_max     -= bwidth;
     }
     xd->up_available = 1;
-    mb_y_offset     += buf->y_stride * bsize;
-    ref_y_offset    += ref->y_stride * bsize;
-    x->mv_row_min   -= bsize;
-    x->mv_row_max   -= bsize;
-    offset          += cm->mb_cols;
+    mb_y_offset     += buf->y_stride * bheight;
+    ref_y_offset    += ref->y_stride * bheight;
+    x->mv_row_min   -= bheight;
+    x->mv_row_max   -= bheight;
   }
-  xd->mi[0].src_mi           = tmp_mi;
-  x->mv_row_min              = tmp_mv_row_min;
-  x->mv_row_max              = tmp_mv_row_max;
-  xd->up_available           = tmp_up_available;
-  xd->left_available         = tmp_left_available;
-  xd->plane[0].dst.stride    = tmp_y_dst_stride;
-  xd->plane[0].pre[0].stride = tmp_y_pre_stride;
-  xd->plane[1].dst.stride    = tmp_uv_dst_stride;
 }
 
-void vp9_get_motionfield(VP9_COMP *cpi, int ref, int blocksize,
-                         MV *motionfield, double *confidence) {
+void vp9_get_ref_motionfield(VP9_COMP *cpi, int ref, BLOCK_SIZE bsize,
+                             MV *motionfield, double *confidence) {
   YV12_BUFFER_CONFIG *ref_buf = get_ref_frame_buffer(cpi, ref);
-  struct lookahead_entry *q_cur = vp9_lookahead_peek(cpi->lookahead, 0);
 
-  if (q_cur) {
-    get_frame_motionfield(cpi, &q_cur->img, ref_buf,
-                                   blocksize, motionfield, confidence);
+  if (cpi->Source && ref_buf) {
+    vp9_get_frame_motionfield(cpi, cpi->Source, ref_buf,
+                              bsize, motionfield, confidence);
   }
 }
diff --git a/vp9/encoder/vp9_motionmodel.h b/vp9/encoder/vp9_motionmodel.h
index 5f0db6dde..abf2a08ca 100644
--- a/vp9/encoder/vp9_motionmodel.h
+++ b/vp9/encoder/vp9_motionmodel.h
@@ -17,15 +17,18 @@ extern "C" {
 
 struct VP9_COMP;
 
-void get_frame_motionfield(struct VP9_COMP *cpi,
-                           YV12_BUFFER_CONFIG *buf,
-                           YV12_BUFFER_CONFIG *ref,
-                           int blocksize,
-                           MV *motionfield,
-                           double *confidence);
+void vp9_get_frame_motionfield(struct VP9_COMP *cpi,
+                               YV12_BUFFER_CONFIG *buf,
+                               YV12_BUFFER_CONFIG *ref,
+                               BLOCK_SIZE bsize,
+                               MV *motionfield,
+                               double *confidence);
 
-void vp9_get_motionfield(struct VP9_COMP *cpi, int ref,
-                         int blocksize, MV *motionfield, double *confidence);
+void vp9_get_ref_motionfield(struct VP9_COMP *cpi,
+                             int ref,
+                             BLOCK_SIZE bsize,
+                             MV *motionfield,
+                             double *confidence);
 
 #ifdef __cplusplus
 }  // extern "C"
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index daa834693..0a29ae38d 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -580,7 +580,12 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
     PREDICTION_MODE this_mode;
     x->pred_mv_sad[ref_frame] = INT_MAX;
     frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
+#if CONFIG_GLOBAL_MOTION
+    frame_mv[ZEROMV][ref_frame].as_int =
+        cm->global_motion[ref_frame][0].mv.as_int;
+#else
     frame_mv[ZEROMV][ref_frame].as_int = 0;
+#endif  // CONFIG_GLOBAL_MOTION
 
     if (xd->up_available)
       filter_ref = xd->mi[-xd->mi_stride].src_mi->mbmi.interp_filter;
@@ -628,7 +633,9 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
 #endif
       int rate_mv = 0;
       int mode_rd_thresh;
-
+#if CONFIG_GLOBAL_MOTION
+      if (const_motion[ref_frame] && this_mode == NEARMV)
+#else   // CONFIG_GLOBAL_MOTION
 #if CONFIG_COMPOUND_MODES
       if (const_motion[ref_frame] &&
           (this_mode == NEARMV || this_mode == ZEROMV ||
@@ -637,6 +644,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
       if (const_motion[ref_frame] &&
           (this_mode == NEARMV || this_mode == ZEROMV))
 #endif
+#endif  // CONFIG_GLOBAL_MOTION
         continue;
 
       if (!(cpi->sf.inter_mode_mask[bsize] & (1 << this_mode)))
diff --git a/vp9/encoder/vp9_ransac.c b/vp9/encoder/vp9_ransac.c
index 40282f086..8e3643763 100644
--- a/vp9/encoder/vp9_ransac.c
+++ b/vp9/encoder/vp9_ransac.c
@@ -20,6 +20,9 @@
 #define MAX_PARAMDIM 9
 #define MAX_MINPTS   4
 
+#define MAX_DEGENERATE_ITER 10
+#define MINPTS_MULTIPLIER   5
+
 // svdcmp
 // Adopted from Numerical Recipes in C
 
@@ -364,6 +367,30 @@ typedef int  (*findTransformationType)(int points,
                                        double *points2,
                                        double *H);
 
+static int get_rand_indices(int npoints, int minpts, int *indices) {
+  int i, j;
+  int ptr = rand() % npoints;
+  if (minpts > npoints)
+    return 0;
+  indices[0] = ptr;
+  ptr = (ptr == npoints - 1 ? 0 : ptr + 1);
+  i = 1;
+  while (i < minpts) {
+    int index = rand() % npoints;
+    while (index) {
+      ptr = (ptr == npoints - 1 ? 0 : ptr + 1);
+      for (j = 0; j < i; ++j) {
+        if (indices[j] == ptr)
+          break;
+      }
+      if (j == i)
+        index--;
+    }
+    indices[i++] = ptr;
+  }
+  return 1;
+}
+
 int ransac_(double *matched_points,
             int npoints,
             int *number_of_inliers,
@@ -378,7 +405,7 @@ int ransac_(double *matched_points,
             projectPointsType projectPoints) {
 
   static const double INLIER_THRESHOLD_NORMALIZED = 0.1;
-  static const double INLIER_THRESHOLD_UNNORMALIZED = 1.5;
+  static const double INLIER_THRESHOLD_UNNORMALIZED = 0.5;
   static const double PROBABILITY_REQUIRED = 0.9;
   static const double EPS = 1e-12;
   static const int MIN_TRIALS = 20;
@@ -387,13 +414,15 @@ int ransac_(double *matched_points,
                                    INLIER_THRESHOLD_NORMALIZED :
                                    INLIER_THRESHOLD_UNNORMALIZED);
   int N = 10000, trial_count = 0;
-  int i, j;
+  int i;
+  int ret_val = 0;
 
   int max_inliers = 0;
   double best_variance = 0.0;
   double H[MAX_PARAMDIM];
   double points1[2 * MAX_MINPTS];
   double points2[2 * MAX_MINPTS];
+  int indices[MAX_MINPTS];
 
   double *best_inlier_set1;
   double *best_inlier_set2;
@@ -408,11 +437,12 @@ int ransac_(double *matched_points,
   double *cnp1, *cnp2;
   double T1[9], T2[9];
 
-  srand((unsigned)time(NULL)) ;
-  // srand( 12345 ) ;
-  //
+  // srand((unsigned)time(NULL)) ;
+  // better to make this deterministic for a given sequence for ease of testing
+  srand(npoints);
+
   *number_of_inliers = 0;
-  if (npoints < minpts) {
+  if (npoints < minpts * MINPTS_MULTIPLIER) {
     printf("Cannot find motion with %d matches\n", npoints);
     return 1;
   }
@@ -446,19 +476,16 @@ int ransac_(double *matched_points,
     double sum_distance_squared = 0.0;
 
     int degenerate = 1;
+    int num_degenerate_iter = 0;
     while (degenerate) {
+      num_degenerate_iter++;
+      if (!get_rand_indices(npoints, minpts, indices)) {
+        ret_val = 1;
+        goto finish_ransac;
+      }
       i = 0;
       while (i < minpts) {
-        int index = rand() % npoints;
-        int duplicate = 0;
-        for (j = 0; j < i; ++j) {
-          if (points1[j*2] == corners1[index*2] &&
-              points1[j*2+1] == corners1[index*2+1]) {
-            duplicate = 1;
-            break;
-          }
-        }
-        if(duplicate) continue;
+        int index = indices[i];
         // add to list
         points1[i*2] = corners1[index*2];
         points1[i*2+1] = corners1[index*2+1];
@@ -467,6 +494,10 @@ int ransac_(double *matched_points,
         i++;
       }
       degenerate = isDegenerate(points1);
+      if (num_degenerate_iter > MAX_DEGENERATE_ITER) {
+        ret_val = 1;
+        goto finish_ransac;
+      }
     }
 
     if (findTransformation(minpts, points1, points2, H)) {
@@ -539,6 +570,7 @@ int ransac_(double *matched_points,
                        npoints, bestH,
                        best_inlier_mask));
   */
+finish_ransac:
   free(best_inlier_set1);
   free(best_inlier_set2);
   free(inlier_set1);
@@ -548,7 +580,7 @@ int ransac_(double *matched_points,
   free(image1_coord);
   free(image2_coord);
   free(inlier_mask);
-  return 0;
+  return ret_val;
 }
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 8dce72690..f3c732832 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -2721,11 +2721,21 @@ static int set_and_cost_bmi_mvs(VP9_COMP *cpi, MACROBLOCKD *xd, int i,
 #endif
       break;
     case ZEROMV:
+#if CONFIG_GLOBAL_MOTION
+      this_mv[0].as_int =
+          cpi->common.global_motion[mbmi->ref_frame[0]][0].mv.as_int;
+#if !CONFIG_COMPOUND_MODES
+      if (is_compound)
+        this_mv[1].as_int =
+            cpi->common.global_motion[mbmi->ref_frame[1]][0].mv.as_int;
+#endif
+#else   // CONFIG_GLOBAL_MOTION
       this_mv[0].as_int = 0;
 #if !CONFIG_COMPOUND_MODES
       if (is_compound)
         this_mv[1].as_int = 0;
 #endif
+#endif  // CONFIG_GLOBAL_MOTION
       break;
 #if CONFIG_COMPOUND_MODES
     case NEW_NEWMV:
@@ -2771,8 +2781,15 @@ static int set_and_cost_bmi_mvs(VP9_COMP *cpi, MACROBLOCKD *xd, int i,
       this_mv[1].as_int = frame_mv[mbmi->ref_frame[1]].as_int;
       break;
     case ZERO_ZEROMV:
+#if CONFIG_GLOBAL_MOTION
+      this_mv[0].as_int =
+          cpi->common.global_motion[mbmi->ref_frame[0]][0].mv.as_int;
+      this_mv[1].as_int =
+          cpi->common.global_motion[mbmi->ref_frame[1]][0].mv.as_int;
+#else
       this_mv[0].as_int = 0;
       this_mv[1].as_int = 0;
+#endif  // CONFIG_GLOBAL_MOTION
       break;
 #endif
     default:
@@ -2827,23 +2844,23 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
     const uint8_t *pre = &pd->pre[ref].buf[raster_block_offset(
         BLOCK_8X8, i, pd->pre[ref].stride)];
 #if CONFIG_VP9_HIGHBITDEPTH
-  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-    vp9_highbd_build_inter_predictor(pre, pd->pre[ref].stride,
-                                     dst, pd->dst.stride,
-                                     &mi->bmi[i].as_mv[ref].as_mv,
-                                     &xd->block_refs[ref]->sf, width, height,
-                                     ref, kernel, MV_PRECISION_Q3,
-                                     mi_col * MI_SIZE + 4 * (i % 2),
-                                     mi_row * MI_SIZE + 4 * (i / 2), xd->bd);
-  } else {
-    vp9_build_inter_predictor(pre, pd->pre[ref].stride,
-                              dst, pd->dst.stride,
-                              &mi->bmi[i].as_mv[ref].as_mv,
-                              &xd->block_refs[ref]->sf, width, height, ref,
-                              kernel, MV_PRECISION_Q3,
-                              mi_col * MI_SIZE + 4 * (i % 2),
-                              mi_row * MI_SIZE + 4 * (i / 2));
-  }
+    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+      vp9_highbd_build_inter_predictor(pre, pd->pre[ref].stride,
+                                       dst, pd->dst.stride,
+                                       &mi->bmi[i].as_mv[ref].as_mv,
+                                       &xd->block_refs[ref]->sf, width, height,
+                                       ref, kernel, MV_PRECISION_Q3,
+                                       mi_col * MI_SIZE + 4 * (i % 2),
+                                       mi_row * MI_SIZE + 4 * (i / 2), xd->bd);
+    } else {
+      vp9_build_inter_predictor(pre, pd->pre[ref].stride,
+                                dst, pd->dst.stride,
+                                &mi->bmi[i].as_mv[ref].as_mv,
+                                &xd->block_refs[ref]->sf, width, height, ref,
+                                kernel, MV_PRECISION_Q3,
+                                mi_col * MI_SIZE + 4 * (i % 2),
+                                mi_row * MI_SIZE + 4 * (i / 2));
+    }
 #else
     vp9_build_inter_predictor(pre, pd->pre[ref].stride,
                               dst, pd->dst.stride,
@@ -3266,7 +3283,11 @@ static int64_t rd_pick_best_sub8x8_mode(
         vp9_update_mv_context(cm, xd, tile, mi, frame, mv_ref_list,
                               i, mi_row, mi_col);
 #endif  // CONFIG_NEWMVREF
+#if CONFIG_GLOBAL_MOTION
+        frame_mv[ZEROMV][frame].as_int = cm->global_motion[frame][0].mv.as_int;
+#else
         frame_mv[ZEROMV][frame].as_int = 0;
+#endif  // CONFIG_GLOBAL_MOTION
         vp9_append_sub8x8_mvs_for_idx(cm, xd, tile, i, ref, mi_row, mi_col,
 #if CONFIG_NEWMVREF
                                       mv_ref_list,
@@ -3280,7 +3301,12 @@ static int64_t rd_pick_best_sub8x8_mode(
                               &ref_mvs_sub8x8[0][ref], &ref_mvs_sub8x8[1][ref]);
 #endif  // CONFIG_NEWMVREF
 #if CONFIG_COMPOUND_MODES
+#if CONFIG_GLOBAL_MOTION
+        frame_mv[ZERO_ZEROMV][frame].as_int =
+            cm->global_motion[frame][0].mv.as_int;
+#else
         frame_mv[ZERO_ZEROMV][frame].as_int = 0;
+#endif  // CONFIG_GLOBAL_MOTION
         frame_mv[NEAREST_NEARESTMV][frame].as_int =
             frame_mv[NEARESTMV][frame].as_int;
         if (ref == 0) {
@@ -3306,7 +3332,7 @@ static int64_t rd_pick_best_sub8x8_mode(
           frame_mv[NEW_NEARMV][frame].as_int =
               frame_mv[NEARMV][frame].as_int;
         }
-#endif
+#endif  // CONFIG_COMPOUND_MODES
       }
 
       // search for the best motion vector on this segment
@@ -3349,9 +3375,11 @@ static int64_t rd_pick_best_sub8x8_mode(
         if (!(inter_mode_mask & (1 << this_mode)))
           continue;
 
+#if !CONFIG_GLOBAL_MOTION
         if (!check_best_zero_mv(cpi, mbmi->mode_context, frame_mv,
                                 this_mode, mbmi->ref_frame))
           continue;
+#endif
 
         vpx_memcpy(orig_pre, pd->pre, sizeof(orig_pre));
         vpx_memcpy(bsi->rdstat[i][mode_idx].ta, t_above,
@@ -5839,6 +5867,7 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
   const MODE_INFO *left_mi = xd->left_available ?
       xd->mi[-1].src_mi : NULL;
 #endif  // CONFIG_PALETTE
+
   vp9_zero(best_mbmode);
 
   x->skip_encode = sf->skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
@@ -5883,10 +5912,20 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
 #if CONFIG_NEWMVREF
     frame_mv[NEAR_FORNEWMV][ref_frame].as_int = INVALID_MV;
 #endif  // CONFIG_NEWMVREF
+#if CONFIG_GLOBAL_MOTION
+    frame_mv[ZEROMV][ref_frame].as_int =
+        cm->global_motion[ref_frame][0].mv.as_int;
+#else
     frame_mv[ZEROMV][ref_frame].as_int = 0;
+#endif  // CONFIG_GLOBAL_MOTION
 #if CONFIG_COMPOUND_MODES
     frame_mv[NEW_NEWMV][ref_frame].as_int = INVALID_MV;
+#if CONFIG_GLOBAL_MOTION
+    frame_mv[ZERO_ZEROMV][ref_frame].as_int =
+        cm->global_motion[ref_frame][0].mv.as_int;
+#else
     frame_mv[ZERO_ZEROMV][ref_frame].as_int = 0;
+#endif  // CONFIG_GLOBAL_MOTION
 #endif  // CONFIG_COMPOUND_MODES
   }
 
@@ -5924,19 +5963,25 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
     // an unfiltered alternative. We allow near/nearest as well
     // because they may result in zero-zero MVs but be cheaper.
     if (cpi->rc.is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) {
+      int_mv zmv;
       ref_frame_skip_mask[0] = (1 << LAST_FRAME) | (1 << GOLDEN_FRAME);
       ref_frame_skip_mask[1] = SECOND_REF_FRAME_MASK;
       mode_skip_mask[ALTREF_FRAME] = ~INTER_NEAREST_NEAR_ZERO;
-      if (frame_mv[NEARMV][ALTREF_FRAME].as_int != 0)
+#if CONFIG_GLOBAL_MOTION
+      zmv.as_int = cm->global_motion[ALTREF_FRAME][0].mv.as_int;
+#else
+      zmv.as_int = 0;
+#endif
+      if (frame_mv[NEARMV][ALTREF_FRAME].as_int != zmv.as_int)
         mode_skip_mask[ALTREF_FRAME] |= (1 << NEARMV);
-      if (frame_mv[NEARESTMV][ALTREF_FRAME].as_int != 0)
+      if (frame_mv[NEARESTMV][ALTREF_FRAME].as_int != zmv.as_int)
         mode_skip_mask[ALTREF_FRAME] |= (1 << NEARESTMV);
 #if CONFIG_COMPOUND_MODES
-      if (frame_mv[NEAREST_NEARESTMV][ALTREF_FRAME].as_int != 0)
+      if (frame_mv[NEAREST_NEARESTMV][ALTREF_FRAME].as_int != zmv.as_int)
         mode_skip_mask[ALTREF_FRAME] |= (1 << NEAREST_NEARESTMV);
-      if (frame_mv[NEAREST_NEARMV][ALTREF_FRAME].as_int != 0)
+      if (frame_mv[NEAREST_NEARMV][ALTREF_FRAME].as_int != zmv.as_int)
         mode_skip_mask[ALTREF_FRAME] |= (1 << NEAREST_NEARMV);
-      if (frame_mv[NEAR_NEARESTMV][ALTREF_FRAME].as_int != 0)
+      if (frame_mv[NEAR_NEARESTMV][ALTREF_FRAME].as_int != zmv.as_int)
         mode_skip_mask[ALTREF_FRAME] |= (1 << NEAR_NEARESTMV);
 #endif  // CONFIG_COMPOUND_MODES
     }
@@ -6161,12 +6206,16 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
             continue;
 
       if (const_motion)
+#if CONFIG_GLOBAL_MOTION
+        if (this_mode == NEARMV)
+#else   // CONFIG_GLOBAL_MOTION
 #if CONFIG_COMPOUND_MODES
         if (this_mode == NEARMV || this_mode == ZEROMV ||
             this_mode == ZERO_ZEROMV)
-#else
+#else   // CONFIG_COMPOUND_MODES
         if (this_mode == NEARMV || this_mode == ZEROMV)
 #endif  // CONFIG_COMPOUND_MODES
+#endif  // CONFIG_GLOBAL_MOTION
           continue;
     }
 
@@ -6220,11 +6269,13 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
               continue;
         }
       }
+#if !CONFIG_GLOBAL_MOTION
     } else {
       const MV_REFERENCE_FRAME ref_frames[2] = {ref_frame, second_ref_frame};
       if (!check_best_zero_mv(cpi, mbmi->mode_context, frame_mv,
                               this_mode, ref_frames))
         continue;
+#endif  // !CONFIG_GLOBAL_MOTION
     }
 #if CONFIG_INTERINTRA
     if (ref_frame > INTRA_FRAME && second_ref_frame == INTRA_FRAME &&
@@ -6667,6 +6718,14 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
     const MV_REFERENCE_FRAME refs[2] = {best_mbmode.ref_frame[0],
         best_mbmode.ref_frame[1]};
     int comp_pred_mode = refs[1] > INTRA_FRAME;
+    int_mv zmv[2];
+#if CONFIG_GLOBAL_MOTION
+    zmv[0].as_int = cm->global_motion[refs[0]][0].mv.as_int;
+    zmv[1].as_int = cm->global_motion[refs[1]][0].mv.as_int;
+#else
+    zmv[0].as_int = 0;
+    zmv[1].as_int = 0;
+#endif
 
     if (frame_mv[NEARESTMV][refs[0]].as_int == best_mbmode.mv[0].as_int &&
         ((comp_pred_mode && frame_mv[NEARESTMV][refs[1]].as_int ==
@@ -6676,8 +6735,8 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
         ((comp_pred_mode && frame_mv[NEARMV][refs[1]].as_int ==
             best_mbmode.mv[1].as_int) || !comp_pred_mode))
       best_mbmode.mode = NEARMV;
-    else if (best_mbmode.mv[0].as_int == 0 &&
-        ((comp_pred_mode && best_mbmode.mv[1].as_int == 0) ||
+    else if (best_mbmode.mv[0].as_int == zmv[0].as_int &&
+        ((comp_pred_mode && best_mbmode.mv[1].as_int == zmv[1].as_int) ||
           !comp_pred_mode))
       best_mbmode.mode = ZEROMV;
   }
@@ -6687,6 +6746,14 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
     const MV_REFERENCE_FRAME refs[2] = {best_mbmode.ref_frame[0],
         best_mbmode.ref_frame[1]};
     int comp_pred_mode = refs[1] > INTRA_FRAME;
+    int_mv zmv[2];
+#if CONFIG_GLOBAL_MOTION
+    zmv[0].as_int = cm->global_motion[refs[0]][0].mv.as_int;
+    zmv[1].as_int = cm->global_motion[refs[1]][0].mv.as_int;
+#else
+    zmv[0].as_int = 0;
+    zmv[1].as_int = 0;
+#endif
 
     if (frame_mv[NEAREST_NEARESTMV][refs[0]].as_int == best_mbmode.mv[0].as_int
         && ((comp_pred_mode && frame_mv[NEAREST_NEARESTMV][refs[1]].as_int ==
@@ -6702,8 +6769,9 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
              ((comp_pred_mode && frame_mv[NEAR_NEARESTMV][refs[1]].as_int ==
                best_mbmode.mv[1].as_int) || !comp_pred_mode))
       best_mbmode.mode = NEAR_NEARESTMV;
-    else if (best_mbmode.mv[0].as_int == 0 &&
-        ((comp_pred_mode && best_mbmode.mv[1].as_int == 0) || !comp_pred_mode))
+    else if (best_mbmode.mv[0].as_int == zmv[0].as_int &&
+        ((comp_pred_mode && best_mbmode.mv[1].as_int == zmv[1].as_int) ||
+         !comp_pred_mode))
       best_mbmode.mode = ZERO_ZEROMV;
   }
 #endif
@@ -7272,7 +7340,11 @@ void vp9_rd_pick_inter_mode_sb_seg_skip(VP9_COMP *cpi, MACROBLOCK *x,
   mbmi->uv_mode = DC_PRED;
   mbmi->ref_frame[0] = LAST_FRAME;
   mbmi->ref_frame[1] = NONE;
+#if CONFIG_GLOBAL_MOTION
+  mbmi->mv[0].as_int = cm->global_motion[mbmi->ref_frame[0]][0].mv.as_int;
+#else
   mbmi->mv[0].as_int = 0;
+#endif
   x->skip = 1;
 
   // Search for best switchable filter by checking the variance of
@@ -7457,10 +7529,20 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
       ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
     }
     frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
+#if CONFIG_GLOBAL_MOTION
+    frame_mv[ZEROMV][ref_frame].as_int =
+        cm->global_motion[ref_frame][0].mv.as_int;
+#else
     frame_mv[ZEROMV][ref_frame].as_int = 0;
+#endif  // CONFIG_GLOBAL_MOTION
 #if CONFIG_COMPOUND_MODES
     frame_mv[NEW_NEWMV][ref_frame].as_int = INVALID_MV;
+#if CONFIG_GLOBAL_MOTION
+    frame_mv[ZERO_ZEROMV][ref_frame].as_int =
+        cm->global_motion[ref_frame][0].mv.as_int;
+#else
     frame_mv[ZERO_ZEROMV][ref_frame].as_int = 0;
+#endif  // CONFIG_GLOBAL_MOTION
 #endif
   }
 
diff --git a/vp9/encoder/vp9_subexp.c b/vp9/encoder/vp9_subexp.c
index 98bd71a7d..bea2bbe3f 100644
--- a/vp9/encoder/vp9_subexp.c
+++ b/vp9/encoder/vp9_subexp.c
@@ -84,6 +84,7 @@ static int prob_diff_update_cost(vp9_prob newp, vp9_prob oldp) {
 }
 
 static void encode_uniform(vp9_writer *w, int v) {
+  // NOTE: this is equivalent to vp9_encode_primitive_uniform(w, v, 191);
   const int l = 8;
   const int m = (1 << l) - 191;
   if (v < m) {
@@ -199,3 +200,55 @@ int vp9_cond_prob_diff_update_savings(vp9_prob *oldp,
                                                           upd);
   return savings;
 }
+
+void vp9_write_primitive_uniform(vp9_writer *w, int word,
+                                 unsigned int num_values) {
+  const int l = get_unsigned_bits_gen(num_values);
+  int m;
+  if (l == 0)
+    return;
+  m = (1 << l) - num_values;
+  if (word < m) {
+    vp9_write_literal(w, word, l - 1);
+  } else {
+    vp9_write_literal(w, m + ((word - m) >> 1), l - 1);
+    vp9_write_bit(w, (word - m) & 1);
+  }
+}
+
+void vp9_write_primitive_subexp(vp9_writer *w, int word,
+                                unsigned int k) {
+  int mk = (1 << k);
+  int i = 0, j;
+  int tail;
+  while (word >= mk) {
+    mk <<= 1;
+    ++i;
+  }
+  if (i == 0) {
+    tail = word;
+    vp9_write_bit(w, 0);
+    if (k > 0)
+      vp9_write_literal(w, tail, k);
+  } else {
+    tail = word - (mk >> 1);
+    for (j = 0; j < i; j++)
+      vp9_write_bit(w, 1);
+    vp9_write_bit(w, 0);
+    if (k + i - 1 > 0)
+      vp9_write_literal(w, tail, k + i - 1);
+  }
+}
+
+void vp9_write_primitive_symmetric(vp9_writer *w, int word,
+                                   int abs_bits) {
+  if (word == 0) {
+    vp9_write_bit(w, 0);
+  } else {
+    const int x = abs(word);
+    const int s = word < 0;
+    vp9_write_bit(w, 1);
+    vp9_write_bit(w, s);
+    vp9_write_literal(w, x - 1, abs_bits);
+  }
+}
diff --git a/vp9/encoder/vp9_subexp.h b/vp9/encoder/vp9_subexp.h
index f4e63be08..1dbd06e69 100644
--- a/vp9/encoder/vp9_subexp.h
+++ b/vp9/encoder/vp9_subexp.h
@@ -34,6 +34,20 @@ int vp9_prob_diff_update_savings_search_model(const unsigned int *ct,
                                               vp9_prob *bestp,
                                               vp9_prob upd);
 
+// num_values is the number of values word can take
+void vp9_write_primitive_uniform(vp9_writer *w, int word,
+                                 unsigned int num_values);
+
+// k is the parameter of the subexponential code
+void vp9_write_primitive_subexp(vp9_writer *w, int word,
+                                unsigned int k);
+//
+// mag_bits is number of bits for magnitude. The alphabet is of size
+// 2 * 2^mag_bits + 1, symmetric around 0, where one bit is used to
+// indicate 0 or non-zero, mag_bits bits are used to indicate magnitide
+// and 1 more bit for the sign if non-zero.
+void vp9_write_primitive_symmetric(vp9_writer *w, int word,
+                                   unsigned int mag_bits);
 #ifdef __cplusplus
 }  // extern "C"
 #endif