]> granicus.if.org Git - libvpx/commitdiff
Added tests for high bitdepth variance sse2 functions
authorPeter de Rivaz <peter.derivaz@gmail.com>
Mon, 10 Nov 2014 13:59:28 +0000 (13:59 +0000)
committerPeter de Rivaz <peter.derivaz@gmail.com>
Mon, 10 Nov 2014 20:42:24 +0000 (20:42 +0000)
Change-Id: I72735e2e07464a0f7e44968fb14a195c84a58992

test/variance_test.cc

index a438d1721f7d6931ea79a4609ab8a67e39223b07..a3ff430ca87105ebb47ebc8da868610077020732 100644 (file)
@@ -18,6 +18,7 @@
 #include "vpx/vpx_integer.h"
 #include "./vpx_config.h"
 #include "vpx_mem/vpx_mem.h"
+#include "vpx/vpx_codec.h"
 #if CONFIG_VP8_ENCODER
 # include "./vp8_rtcd.h"
 # include "vp8/common/variance.h"
@@ -43,25 +44,97 @@ static unsigned int mb_ss_ref(const int16_t *src) {
   return res;
 }
 
-static unsigned int variance_ref(const uint8_t *ref, const uint8_t *src,
-                                 int l2w, int l2h, unsigned int *sse_ptr) {
+static unsigned int variance_ref(const uint8_t *src, const uint8_t *ref,
+                                 int l2w, int l2h, int src_stride_coeff,
+                                 int ref_stride_coeff, unsigned int *sse_ptr,
+                                 bool use_high_bit_depth_,
+                                 vpx_bit_depth_t bit_depth) {
+#if CONFIG_VP9_HIGHBITDEPTH
+  int64_t se = 0;
+  uint64_t sse = 0;
+  const int w = 1 << l2w, h = 1 << l2h;
+  for (int y = 0; y < h; y++) {
+    for (int x = 0; x < w; x++) {
+      int diff;
+      if (!use_high_bit_depth_) {
+        diff = ref[w * y * ref_stride_coeff + x] -
+               src[w * y * src_stride_coeff + x];
+        se += diff;
+        sse += diff * diff;
+      } else {
+        diff = CONVERT_TO_SHORTPTR(ref)[w * y * ref_stride_coeff + x] -
+               CONVERT_TO_SHORTPTR(src)[w * y * src_stride_coeff + x];
+        se += diff;
+        sse += diff * diff;
+      }
+    }
+  }
+  if (bit_depth > VPX_BITS_8) {
+    sse = ROUND_POWER_OF_TWO(sse, 2*(bit_depth-8));
+    se = ROUND_POWER_OF_TWO(se, bit_depth-8);
+  }
+#else
   int se = 0;
   unsigned int sse = 0;
   const int w = 1 << l2w, h = 1 << l2h;
   for (int y = 0; y < h; y++) {
     for (int x = 0; x < w; x++) {
-      int diff = ref[w * y + x] - src[w * y + x];
+      int diff = ref[w * y * ref_stride_coeff + x] -
+                 src[w * y * src_stride_coeff + x];
       se += diff;
       sse += diff * diff;
     }
   }
+#endif
   *sse_ptr = sse;
   return sse - (((int64_t) se * se) >> (l2w + l2h));
 }
 
 static unsigned int subpel_variance_ref(const uint8_t *ref, const uint8_t *src,
                                         int l2w, int l2h, int xoff, int yoff,
-                                        unsigned int *sse_ptr) {
+                                        unsigned int *sse_ptr,
+                                        bool use_high_bit_depth_,
+                                        vpx_bit_depth_t bit_depth) {
+#if CONFIG_VP9_HIGHBITDEPTH
+  int64_t se = 0;
+  uint64_t sse = 0;
+  const int w = 1 << l2w, h = 1 << l2h;
+  for (int y = 0; y < h; y++) {
+    for (int x = 0; x < w; x++) {
+      // bilinear interpolation at a 16th pel step
+
+      if (!use_high_bit_depth_) {
+        const int a1 = ref[(w + 1) * (y + 0) + x + 0];
+        const int a2 = ref[(w + 1) * (y + 0) + x + 1];
+        const int b1 = ref[(w + 1) * (y + 1) + x + 0];
+        const int b2 = ref[(w + 1) * (y + 1) + x + 1];
+        const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
+        const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
+        const int r = a + (((b - a) * yoff + 8) >> 4);
+        int diff = r - src[w * y + x];
+        se += diff;
+        sse += diff * diff;
+      } else {
+        uint16_t *ref16 = CONVERT_TO_SHORTPTR(ref);
+        uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
+        const int a1 = ref16[(w + 1) * (y + 0) + x + 0];
+        const int a2 = ref16[(w + 1) * (y + 0) + x + 1];
+        const int b1 = ref16[(w + 1) * (y + 1) + x + 0];
+        const int b2 = ref16[(w + 1) * (y + 1) + x + 1];
+        const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
+        const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
+        const int r = a + (((b - a) * yoff + 8) >> 4);
+        int diff = r - src16[w * y + x];
+        se += diff;
+        sse += diff * diff;
+      }
+    }
+  }
+  if (bit_depth > VPX_BITS_8) {
+    sse = ROUND_POWER_OF_TWO(sse, 2 * (bit_depth - 8));
+    se = ROUND_POWER_OF_TWO(se, bit_depth - 8);
+  }
+#else
   int se = 0;
   unsigned int sse = 0;
   const int w = 1 << l2w, h = 1 << l2h;
@@ -80,6 +153,7 @@ static unsigned int subpel_variance_ref(const uint8_t *ref, const uint8_t *src,
       sse += diff * diff;
     }
   }
+#endif
   *sse_ptr = sse;
   return sse - (((int64_t) se * se) >> (l2w + l2h));
 }
@@ -130,40 +204,75 @@ void SumOfSquaresTest::RefTest() {
 
 template<typename VarianceFunctionType>
 class VarianceTest
-    : public ::testing::TestWithParam<tuple<int, int, VarianceFunctionType> > {
+    : public ::testing::TestWithParam<tuple<int, int,
+                                            VarianceFunctionType, int> > {
  public:
   virtual void SetUp() {
-    const tuple<int, int, VarianceFunctionType>& params = this->GetParam();
+    const tuple<int, int, VarianceFunctionType, int>& params =
+        this->GetParam();
     log2width_  = get<0>(params);
     width_ = 1 << log2width_;
     log2height_ = get<1>(params);
     height_ = 1 << log2height_;
     variance_ = get<2>(params);
+    if (get<3>(params)) {
+      bit_depth_ = (vpx_bit_depth_t) get<3>(params);
+      use_high_bit_depth_ = true;
+    } else {
+      bit_depth_ = VPX_BITS_8;
+      use_high_bit_depth_ = false;
+    }
+    mask_ = (1 << bit_depth_)-1;
 
     rnd_.Reset(ACMRandom::DeterministicSeed());
     block_size_ = width_ * height_;
-    src_ = reinterpret_cast<uint8_t *>(vpx_memalign(16, block_size_));
-    ref_ = new uint8_t[block_size_];
+#if CONFIG_VP9_HIGHBITDEPTH
+    if (!use_high_bit_depth_) {
+      src_ = reinterpret_cast<uint8_t *>(vpx_memalign(16, block_size_ * 2));
+      ref_ = new uint8_t[block_size_ * 2];
+    } else {
+      src_ = CONVERT_TO_BYTEPTR(reinterpret_cast<uint16_t *>(
+          vpx_memalign(16, block_size_ * 2 * sizeof(uint16_t))));
+      ref_ = CONVERT_TO_BYTEPTR(new uint16_t[block_size_ * 2]);
+    }
+#else
+    src_ = reinterpret_cast<uint8_t *>(vpx_memalign(16, block_size_ * 2));
+    ref_ = new uint8_t[block_size_ * 2];
+#endif
     ASSERT_TRUE(src_ != NULL);
     ASSERT_TRUE(ref_ != NULL);
   }
 
   virtual void TearDown() {
+#if CONFIG_VP9_HIGHBITDEPTH
+    if (!use_high_bit_depth_) {
+      vpx_free(src_);
+      delete[] ref_;
+    } else {
+      vpx_free(CONVERT_TO_SHORTPTR(src_));
+      delete[] CONVERT_TO_SHORTPTR(ref_);
+    }
+#else
     vpx_free(src_);
     delete[] ref_;
+#endif
     libvpx_test::ClearSystemState();
   }
 
  protected:
   void ZeroTest();
   void RefTest();
+  void RefStrideTest();
   void OneQuarterTest();
 
   ACMRandom rnd_;
-  uint8_tsrc_;
-  uint8_tref_;
+  uint8_t *src_;
+  uint8_t *ref_;
   int width_, log2width_;
   int height_, log2height_;
+  vpx_bit_depth_t bit_depth_;
+  int mask_;
+  bool use_high_bit_depth_;
   int block_size_;
   VarianceFunctionType variance_;
 };
@@ -171,14 +280,32 @@ class VarianceTest
 template<typename VarianceFunctionType>
 void VarianceTest<VarianceFunctionType>::ZeroTest() {
   for (int i = 0; i <= 255; ++i) {
+#if CONFIG_VP9_HIGHBITDEPTH
+    if (!use_high_bit_depth_) {
+      memset(src_, i, block_size_);
+    } else {
+      vpx_memset16(CONVERT_TO_SHORTPTR(src_), i << (bit_depth_ - 8),
+                   block_size_);
+    }
+#else
     memset(src_, i, block_size_);
+#endif
     for (int j = 0; j <= 255; ++j) {
+#if CONFIG_VP9_HIGHBITDEPTH
+      if (!use_high_bit_depth_) {
+        memset(ref_, j, block_size_);
+      } else {
+        vpx_memset16(CONVERT_TO_SHORTPTR(ref_), j  << (bit_depth_ - 8),
+                     block_size_);
+      }
+#else
       memset(ref_, j, block_size_);
+#endif
       unsigned int sse;
       unsigned int var;
       ASM_REGISTER_STATE_CHECK(
           var = variance_(src_, width_, ref_, width_, &sse));
-      EXPECT_EQ(0u, var) << "src values: " << i << "ref values: " << j;
+      EXPECT_EQ(0u, var) << "src values: " << i << " ref values: " << j;
     }
   }
 }
@@ -187,26 +314,87 @@ template<typename VarianceFunctionType>
 void VarianceTest<VarianceFunctionType>::RefTest() {
   for (int i = 0; i < 10; ++i) {
     for (int j = 0; j < block_size_; j++) {
+#if CONFIG_VP9_HIGHBITDEPTH
+    if (!use_high_bit_depth_) {
       src_[j] = rnd_.Rand8();
       ref_[j] = rnd_.Rand8();
+    } else {
+      CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() && mask_;
+      CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() && mask_;
+    }
+#else
+      src_[j] = rnd_.Rand8();
+      ref_[j] = rnd_.Rand8();
+#endif
     }
     unsigned int sse1, sse2;
     unsigned int var1;
+    const int stride_coeff = 1;
     ASM_REGISTER_STATE_CHECK(
         var1 = variance_(src_, width_, ref_, width_, &sse1));
     const unsigned int var2 = variance_ref(src_, ref_, log2width_,
-                                           log2height_, &sse2);
+                                           log2height_, stride_coeff,
+                                           stride_coeff, &sse2,
+                                           use_high_bit_depth_, bit_depth_);
     EXPECT_EQ(sse1, sse2);
     EXPECT_EQ(var1, var2);
   }
 }
 
+template<typename VarianceFunctionType>
+void VarianceTest<VarianceFunctionType>::RefStrideTest() {
+  for (int i = 0; i < 10; ++i) {
+    int ref_stride_coeff = i % 2;
+    int src_stride_coeff = (i >> 1) % 2;
+    for (int j = 0; j < block_size_; j++) {
+      int ref_ind = (j / width_) * ref_stride_coeff * width_ + j % width_;
+      int src_ind = (j / width_) * src_stride_coeff * width_ + j % width_;
+#if CONFIG_VP9_HIGHBITDEPTH
+      if (!use_high_bit_depth_) {
+        src_[src_ind] = rnd_.Rand8();
+        ref_[ref_ind] = rnd_.Rand8();
+      } else {
+        CONVERT_TO_SHORTPTR(src_)[src_ind] = rnd_.Rand16() && mask_;
+        CONVERT_TO_SHORTPTR(ref_)[ref_ind] = rnd_.Rand16() && mask_;
+      }
+#else
+      src_[src_ind] = rnd_.Rand8();
+      ref_[ref_ind] = rnd_.Rand8();
+#endif
+    }
+    unsigned int sse1, sse2;
+    unsigned int var1;
+
+    ASM_REGISTER_STATE_CHECK(var1 = variance_(src_, width_ * src_stride_coeff,
+                                         ref_, width_ * ref_stride_coeff,
+                                         &sse1));
+    const unsigned int var2 = variance_ref(src_, ref_, log2width_,
+                                           log2height_, src_stride_coeff,
+                                           ref_stride_coeff, &sse2,
+                                           use_high_bit_depth_, bit_depth_);
+    EXPECT_EQ(sse1, sse2);
+    EXPECT_EQ(var1, var2);
+  }
+}
 template<typename VarianceFunctionType>
 void VarianceTest<VarianceFunctionType>::OneQuarterTest() {
-  memset(src_, 255, block_size_);
   const int half = block_size_ / 2;
+#if CONFIG_VP9_HIGHBITDEPTH
+  if (!use_high_bit_depth_) {
+    memset(src_, 255, block_size_);
+    memset(ref_, 255, half);
+    memset(ref_ + half, 0, half);
+  } else {
+    vpx_memset16(CONVERT_TO_SHORTPTR(src_), 255 << (bit_depth_ - 8),
+                 block_size_);
+    vpx_memset16(CONVERT_TO_SHORTPTR(ref_), 255 << (bit_depth_ - 8), half);
+    vpx_memset16(CONVERT_TO_SHORTPTR(ref_)+ half, 0, half);
+  }
+#else
+  memset(src_, 255, block_size_);
   memset(ref_, 255, half);
   memset(ref_ + half, 0, half);
+#endif
   unsigned int sse;
   unsigned int var;
   ASM_REGISTER_STATE_CHECK(var = variance_(src_, width_, ref_, width_, &sse));
@@ -264,8 +452,10 @@ void MseTest<MseFunctionType>::RefTest_mse() {
       ref_[j] = rnd.Rand8();
     }
     unsigned int sse1, sse2;
+    const int stride_coeff = 1;
     ASM_REGISTER_STATE_CHECK(mse_(src_, width_, ref_, width_, &sse1));
-    variance_ref(src_, ref_, log2width_, log2height_, &sse2);
+    variance_ref(src_, ref_, log2width_, log2height_, stride_coeff,
+                 stride_coeff, &sse2, false, VPX_BITS_8);
     EXPECT_EQ(sse1, sse2);
   }
 }
@@ -279,9 +469,11 @@ void MseTest<MseFunctionType>::RefTest_sse() {
     }
     unsigned int sse2;
     unsigned int var1;
+    const int stride_coeff = 1;
     ASM_REGISTER_STATE_CHECK(
         var1 = mse_(src_, width_, ref_, width_));
-    variance_ref(src_, ref_, log2width_, log2height_, &sse2);
+    variance_ref(src_, ref_, log2width_, log2height_, stride_coeff,
+                 stride_coeff, &sse2, false, VPX_BITS_8);
     EXPECT_EQ(var1, sse2);
   }
 }
@@ -314,7 +506,49 @@ unsigned int subpel_avg_variance_ref(const uint8_t *ref,
                                      const uint8_t *second_pred,
                                      int l2w, int l2h,
                                      int xoff, int yoff,
-                                     unsigned int *sse_ptr) {
+                                     unsigned int *sse_ptr,
+                                     bool use_high_bit_depth,
+                                     vpx_bit_depth_t bit_depth) {
+#if CONFIG_VP9_HIGHBITDEPTH
+  int64_t se = 0;
+  uint64_t sse = 0;
+  const int w = 1 << l2w, h = 1 << l2h;
+  for (int y = 0; y < h; y++) {
+    for (int x = 0; x < w; x++) {
+      // bilinear interpolation at a 16th pel step
+      if (!use_high_bit_depth) {
+        const int a1 = ref[(w + 1) * (y + 0) + x + 0];
+        const int a2 = ref[(w + 1) * (y + 0) + x + 1];
+        const int b1 = ref[(w + 1) * (y + 1) + x + 0];
+        const int b2 = ref[(w + 1) * (y + 1) + x + 1];
+        const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
+        const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
+        const int r = a + (((b - a) * yoff + 8) >> 4);
+        int diff = ((r + second_pred[w * y + x] + 1) >> 1) - src[w * y + x];
+        se += diff;
+        sse += diff * diff;
+      } else {
+        uint16_t *ref16 = CONVERT_TO_SHORTPTR(ref);
+        uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
+        uint16_t *sec16   = CONVERT_TO_SHORTPTR(second_pred);
+        const int a1 = ref16[(w + 1) * (y + 0) + x + 0];
+        const int a2 = ref16[(w + 1) * (y + 0) + x + 1];
+        const int b1 = ref16[(w + 1) * (y + 1) + x + 0];
+        const int b2 = ref16[(w + 1) * (y + 1) + x + 1];
+        const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
+        const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
+        const int r = a + (((b - a) * yoff + 8) >> 4);
+        int diff = ((r + sec16[w * y + x] + 1) >> 1) - src16[w * y + x];
+        se += diff;
+        sse += diff * diff;
+      }
+    }
+  }
+  if (bit_depth > 8) {
+    sse = ROUND_POWER_OF_TWO(sse, 2*(bit_depth-8));
+    se = ROUND_POWER_OF_TWO(se, bit_depth-8);
+  }
+#else
   int se = 0;
   unsigned int sse = 0;
   const int w = 1 << l2w, h = 1 << l2h;
@@ -333,6 +567,7 @@ unsigned int subpel_avg_variance_ref(const uint8_t *ref,
       sse += diff * diff;
     }
   }
+#endif
   *sse_ptr = sse;
   return sse - (((int64_t) se * se) >> (l2w + l2h));
 }
@@ -340,44 +575,82 @@ unsigned int subpel_avg_variance_ref(const uint8_t *ref,
 template<typename SubpelVarianceFunctionType>
 class SubpelVarianceTest
     : public ::testing::TestWithParam<tuple<int, int,
-                                            SubpelVarianceFunctionType> > {
+                                            SubpelVarianceFunctionType, int> > {
  public:
   virtual void SetUp() {
-    const tuple<int, int, SubpelVarianceFunctionType>& params =
+    const tuple<int, int, SubpelVarianceFunctionType, int>& params =
         this->GetParam();
     log2width_  = get<0>(params);
     width_ = 1 << log2width_;
     log2height_ = get<1>(params);
     height_ = 1 << log2height_;
     subpel_variance_ = get<2>(params);
+    if (get<3>(params)) {
+      bit_depth_ = (vpx_bit_depth_t) get<3>(params);
+      use_high_bit_depth_ = true;
+    } else {
+      bit_depth_ = VPX_BITS_8;
+      use_high_bit_depth_ = false;
+    }
+    mask_ = (1 << bit_depth_)-1;
 
     rnd_.Reset(ACMRandom::DeterministicSeed());
     block_size_ = width_ * height_;
+#if CONFIG_VP9_HIGHBITDEPTH
+    if (!use_high_bit_depth_) {
+      src_ = reinterpret_cast<uint8_t *>(vpx_memalign(16, block_size_));
+      sec_ = reinterpret_cast<uint8_t *>(vpx_memalign(16, block_size_));
+      ref_ = new uint8_t[block_size_ + width_ + height_ + 1];
+    } else {
+      src_ = CONVERT_TO_BYTEPTR(reinterpret_cast<uint16_t *>(vpx_memalign(16,
+                                          block_size_*sizeof(uint16_t))));
+      sec_ = CONVERT_TO_BYTEPTR(reinterpret_cast<uint16_t *>(vpx_memalign(16,
+                                          block_size_*sizeof(uint16_t))));
+      ref_ = CONVERT_TO_BYTEPTR(new uint16_t[block_size_ + width_ + height_
+                                             + 1]);
+    }
+#else
     src_ = reinterpret_cast<uint8_t *>(vpx_memalign(16, block_size_));
     sec_ = reinterpret_cast<uint8_t *>(vpx_memalign(16, block_size_));
     ref_ = new uint8_t[block_size_ + width_ + height_ + 1];
+#endif
     ASSERT_TRUE(src_ != NULL);
     ASSERT_TRUE(sec_ != NULL);
     ASSERT_TRUE(ref_ != NULL);
   }
 
   virtual void TearDown() {
+#if CONFIG_VP9_HIGHBITDEPTH
+    if (!use_high_bit_depth_) {
+      vpx_free(src_);
+      delete[] ref_;
+      vpx_free(sec_);
+    } else {
+      vpx_free(CONVERT_TO_SHORTPTR(src_));
+      delete[] CONVERT_TO_SHORTPTR(ref_);
+      vpx_free(CONVERT_TO_SHORTPTR(sec_));
+    }
+#else
     vpx_free(src_);
     delete[] ref_;
     vpx_free(sec_);
+#endif
     libvpx_test::ClearSystemState();
   }
 
  protected:
   void RefTest();
+  void ExtremeRefTest();
 
   ACMRandom rnd_;
   uint8_t *src_;
   uint8_t *ref_;
   uint8_t *sec_;
+  bool use_high_bit_depth_;
+  vpx_bit_depth_t bit_depth_;
   int width_, log2width_;
   int height_, log2height_;
-  int block_size_;
+  int block_size_,  mask_;
   SubpelVarianceFunctionType subpel_variance_;
 };
 
@@ -385,18 +658,79 @@ template<typename SubpelVarianceFunctionType>
 void SubpelVarianceTest<SubpelVarianceFunctionType>::RefTest() {
   for (int x = 0; x < 16; ++x) {
     for (int y = 0; y < 16; ++y) {
+#if CONFIG_VP9_HIGHBITDEPTH
+      if (!use_high_bit_depth_) {
+        for (int j = 0; j < block_size_; j++) {
+          src_[j] = rnd_.Rand8();
+        }
+        for (int j = 0; j < block_size_ + width_ + height_ + 1; j++) {
+          ref_[j] = rnd_.Rand8();
+        }
+      } else {
+        for (int j = 0; j < block_size_; j++) {
+          CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask_;
+        }
+        for (int j = 0; j < block_size_ + width_ + height_ + 1; j++) {
+          CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask_;
+        }
+      }
+#else
       for (int j = 0; j < block_size_; j++) {
         src_[j] = rnd_.Rand8();
       }
       for (int j = 0; j < block_size_ + width_ + height_ + 1; j++) {
         ref_[j] = rnd_.Rand8();
       }
+#endif
       unsigned int sse1, sse2;
       unsigned int var1;
       ASM_REGISTER_STATE_CHECK(var1 = subpel_variance_(ref_, width_ + 1, x, y,
                                                        src_, width_, &sse1));
       const unsigned int var2 = subpel_variance_ref(ref_, src_, log2width_,
-                                                    log2height_, x, y, &sse2);
+                                                    log2height_, x, y, &sse2,
+                                                    use_high_bit_depth_,
+                                                    bit_depth_);
+      EXPECT_EQ(sse1, sse2) << "at position " << x << ", " << y;
+      EXPECT_EQ(var1, var2) << "at position " << x << ", " << y;
+    }
+  }
+}
+
+template<typename SubpelVarianceFunctionType>
+void SubpelVarianceTest<SubpelVarianceFunctionType>::ExtremeRefTest() {
+  // Compare against reference
+  // Src: Set the first half of values to 0, the second half to the maximum
+  // Ref: Set the first half of values to the maximum, the second half to 0
+  for (int x = 0; x < 16; ++x) {
+    for (int y = 0; y < 16; ++y) {
+      const int half = block_size_ / 2;
+#if CONFIG_VP9_HIGHBITDEPTH
+      if (!use_high_bit_depth_) {
+        memset(src_, 0, half);
+        memset(src_ + half, 255, half);
+        memset(ref_, 255, half);
+        memset(ref_ + half, 0, half + width_ + height_ + 1);
+      } else {
+        vpx_memset16(CONVERT_TO_SHORTPTR(src_), mask_, half);
+        vpx_memset16(CONVERT_TO_SHORTPTR(src_) + half, 0, half);
+        vpx_memset16(CONVERT_TO_SHORTPTR(ref_), 0, half);
+        vpx_memset16(CONVERT_TO_SHORTPTR(ref_) + half, mask_, half + width_ +
+                     height_ + 1);
+      }
+#else
+      memset(src_, 0, half);
+      memset(src_ + half, 255, half);
+      memset(ref_, 255, half);
+      memset(ref_ + half, 0, half + width_ + height_ + 1);
+#endif
+      unsigned int sse1, sse2;
+      unsigned int var1;
+      ASM_REGISTER_STATE_CHECK(var1 = subpel_variance_(ref_, width_ + 1, x, y,
+                                                   src_, width_, &sse1));
+      const unsigned int var2 = subpel_variance_ref(ref_, src_, log2width_,
+                                                    log2height_, x, y, &sse2,
+                                                    use_high_bit_depth_,
+                                                    bit_depth_);
       EXPECT_EQ(sse1, sse2) << "at position " << x << ", " << y;
       EXPECT_EQ(var1, var2) << "at position " << x << ", " << y;
     }
@@ -407,6 +741,25 @@ template<>
 void SubpelVarianceTest<vp9_subp_avg_variance_fn_t>::RefTest() {
   for (int x = 0; x < 16; ++x) {
     for (int y = 0; y < 16; ++y) {
+#if CONFIG_VP9_HIGHBITDEPTH
+      if (!use_high_bit_depth_) {
+        for (int j = 0; j < block_size_; j++) {
+          src_[j] = rnd_.Rand8();
+          sec_[j] = rnd_.Rand8();
+        }
+        for (int j = 0; j < block_size_ + width_ + height_ + 1; j++) {
+          ref_[j] = rnd_.Rand8();
+        }
+      } else {
+        for (int j = 0; j < block_size_; j++) {
+          CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask_;
+          CONVERT_TO_SHORTPTR(sec_)[j] = rnd_.Rand16() & mask_;
+        }
+        for (int j = 0; j < block_size_ + width_ + height_ + 1; j++) {
+          CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask_;
+        }
+      }
+#else
       for (int j = 0; j < block_size_; j++) {
         src_[j] = rnd_.Rand8();
         sec_[j] = rnd_.Rand8();
@@ -414,6 +767,7 @@ void SubpelVarianceTest<vp9_subp_avg_variance_fn_t>::RefTest() {
       for (int j = 0; j < block_size_ + width_ + height_ + 1; j++) {
         ref_[j] = rnd_.Rand8();
       }
+#endif
       unsigned int sse1, sse2;
       unsigned int var1;
       ASM_REGISTER_STATE_CHECK(
@@ -421,7 +775,9 @@ void SubpelVarianceTest<vp9_subp_avg_variance_fn_t>::RefTest() {
                                   src_, width_, &sse1, sec_));
       const unsigned int var2 = subpel_avg_variance_ref(ref_, src_, sec_,
                                                         log2width_, log2height_,
-                                                        x, y, &sse2);
+                                                        x, y, &sse2,
+                                                        use_high_bit_depth_,
+                                                        bit_depth_);
       EXPECT_EQ(sse1, sse2) << "at position " << x << ", " << y;
       EXPECT_EQ(var1, var2) << "at position " << x << ", " << y;
     }
@@ -468,11 +824,11 @@ const vp8_variance_fn_t variance16x8_c = vp8_variance16x8_c;
 const vp8_variance_fn_t variance16x16_c = vp8_variance16x16_c;
 INSTANTIATE_TEST_CASE_P(
     C, VP8VarianceTest,
-    ::testing::Values(make_tuple(2, 2, variance4x4_c),
-                      make_tuple(3, 3, variance8x8_c),
-                      make_tuple(3, 4, variance8x16_c),
-                      make_tuple(4, 3, variance16x8_c),
-                      make_tuple(4, 4, variance16x16_c)));
+    ::testing::Values(make_tuple(2, 2, variance4x4_c, 0),
+                      make_tuple(3, 3, variance8x8_c, 0),
+                      make_tuple(3, 4, variance8x16_c, 0),
+                      make_tuple(4, 3, variance16x8_c, 0),
+                      make_tuple(4, 4, variance16x16_c, 0)));
 
 #if HAVE_NEON
 const vp8_sse_fn_t get4x4sse_cs_neon = vp8_get4x4sse_cs_neon;
@@ -491,13 +847,12 @@ const vp8_variance_fn_t variance16x8_neon = vp8_variance16x8_neon;
 const vp8_variance_fn_t variance16x16_neon = vp8_variance16x16_neon;
 INSTANTIATE_TEST_CASE_P(
     NEON, VP8VarianceTest,
-    ::testing::Values(make_tuple(3, 3, variance8x8_neon),
-                      make_tuple(3, 4, variance8x16_neon),
-                      make_tuple(4, 3, variance16x8_neon),
-                      make_tuple(4, 4, variance16x16_neon)));
+    ::testing::Values(make_tuple(3, 3, variance8x8_neon, 0),
+                      make_tuple(3, 4, variance8x16_neon, 0),
+                      make_tuple(4, 3, variance16x8_neon, 0),
+                      make_tuple(4, 4, variance16x16_neon, 0)));
 #endif
 
-
 #if HAVE_MMX
 const vp8_variance_fn_t variance4x4_mmx = vp8_variance4x4_mmx;
 const vp8_variance_fn_t variance8x8_mmx = vp8_variance8x8_mmx;
@@ -506,11 +861,11 @@ const vp8_variance_fn_t variance16x8_mmx = vp8_variance16x8_mmx;
 const vp8_variance_fn_t variance16x16_mmx = vp8_variance16x16_mmx;
 INSTANTIATE_TEST_CASE_P(
     MMX, VP8VarianceTest,
-    ::testing::Values(make_tuple(2, 2, variance4x4_mmx),
-                      make_tuple(3, 3, variance8x8_mmx),
-                      make_tuple(3, 4, variance8x16_mmx),
-                      make_tuple(4, 3, variance16x8_mmx),
-                      make_tuple(4, 4, variance16x16_mmx)));
+    ::testing::Values(make_tuple(2, 2, variance4x4_mmx, 0),
+                      make_tuple(3, 3, variance8x8_mmx, 0),
+                      make_tuple(3, 4, variance8x16_mmx, 0),
+                      make_tuple(4, 3, variance16x8_mmx, 0),
+                      make_tuple(4, 4, variance16x16_mmx, 0)));
 #endif
 
 #if HAVE_SSE2
@@ -521,11 +876,11 @@ const vp8_variance_fn_t variance16x8_wmt = vp8_variance16x8_wmt;
 const vp8_variance_fn_t variance16x16_wmt = vp8_variance16x16_wmt;
 INSTANTIATE_TEST_CASE_P(
     SSE2, VP8VarianceTest,
-    ::testing::Values(make_tuple(2, 2, variance4x4_wmt),
-                      make_tuple(3, 3, variance8x8_wmt),
-                      make_tuple(3, 4, variance8x16_wmt),
-                      make_tuple(4, 3, variance16x8_wmt),
-                      make_tuple(4, 4, variance16x16_wmt)));
+    ::testing::Values(make_tuple(2, 2, variance4x4_wmt, 0),
+                      make_tuple(3, 3, variance8x8_wmt, 0),
+                      make_tuple(3, 4, variance8x16_wmt, 0),
+                      make_tuple(4, 3, variance16x8_wmt, 0),
+                      make_tuple(4, 4, variance16x16_wmt, 0)));
 #endif
 #endif  // CONFIG_VP8_ENCODER
 
@@ -537,7 +892,6 @@ INSTANTIATE_TEST_CASE_P(
 namespace vp9 {
 
 #if CONFIG_VP9_ENCODER
-
 TEST_P(SumOfSquaresTest, Const) { ConstTest(); }
 TEST_P(SumOfSquaresTest, Ref) { RefTest(); }
 
@@ -550,10 +904,27 @@ typedef SubpelVarianceTest<vp9_subp_avg_variance_fn_t> VP9SubpelAvgVarianceTest;
 
 TEST_P(VP9VarianceTest, Zero) { ZeroTest(); }
 TEST_P(VP9VarianceTest, Ref) { RefTest(); }
+TEST_P(VP9VarianceTest, RefStride) { RefStrideTest(); }
 TEST_P(VP9SubpelVarianceTest, Ref) { RefTest(); }
+TEST_P(VP9SubpelVarianceTest, ExtremeRef) { ExtremeRefTest(); }
 TEST_P(VP9SubpelAvgVarianceTest, Ref) { RefTest(); }
 TEST_P(VP9VarianceTest, OneQuarter) { OneQuarterTest(); }
 
+#if CONFIG_VP9_HIGHBITDEPTH
+typedef VarianceTest<vp9_variance_fn_t> VP9VarianceHighTest;
+typedef SubpelVarianceTest<vp9_subpixvariance_fn_t> VP9SubpelVarianceHighTest;
+typedef SubpelVarianceTest<vp9_subp_avg_variance_fn_t>
+    VP9SubpelAvgVarianceHighTest;
+
+TEST_P(VP9VarianceHighTest, Zero) { ZeroTest(); }
+TEST_P(VP9VarianceHighTest, Ref) { RefTest(); }
+TEST_P(VP9VarianceHighTest, RefStride) { RefStrideTest(); }
+TEST_P(VP9SubpelVarianceHighTest, Ref) { RefTest(); }
+TEST_P(VP9SubpelVarianceHighTest, ExtremeRef) { ExtremeRefTest(); }
+TEST_P(VP9SubpelAvgVarianceHighTest, Ref) { RefTest(); }
+TEST_P(VP9VarianceHighTest, OneQuarter) { OneQuarterTest(); }
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
 const vp9_variance_fn_t variance4x4_c = vp9_variance4x4_c;
 const vp9_variance_fn_t variance4x8_c = vp9_variance4x8_c;
 const vp9_variance_fn_t variance8x4_c = vp9_variance8x4_c;
@@ -569,20 +940,115 @@ const vp9_variance_fn_t variance64x32_c = vp9_variance64x32_c;
 const vp9_variance_fn_t variance64x64_c = vp9_variance64x64_c;
 INSTANTIATE_TEST_CASE_P(
     C, VP9VarianceTest,
-    ::testing::Values(make_tuple(2, 2, variance4x4_c),
-                      make_tuple(2, 3, variance4x8_c),
-                      make_tuple(3, 2, variance8x4_c),
-                      make_tuple(3, 3, variance8x8_c),
-                      make_tuple(3, 4, variance8x16_c),
-                      make_tuple(4, 3, variance16x8_c),
-                      make_tuple(4, 4, variance16x16_c),
-                      make_tuple(4, 5, variance16x32_c),
-                      make_tuple(5, 4, variance32x16_c),
-                      make_tuple(5, 5, variance32x32_c),
-                      make_tuple(5, 6, variance32x64_c),
-                      make_tuple(6, 5, variance64x32_c),
-                      make_tuple(6, 6, variance64x64_c)));
-
+    ::testing::Values(make_tuple(2, 2, variance4x4_c, 0),
+                      make_tuple(2, 3, variance4x8_c, 0),
+                      make_tuple(3, 2, variance8x4_c, 0),
+                      make_tuple(3, 3, variance8x8_c, 0),
+                      make_tuple(3, 4, variance8x16_c, 0),
+                      make_tuple(4, 3, variance16x8_c, 0),
+                      make_tuple(4, 4, variance16x16_c, 0),
+                      make_tuple(4, 5, variance16x32_c, 0),
+                      make_tuple(5, 4, variance32x16_c, 0),
+                      make_tuple(5, 5, variance32x32_c, 0),
+                      make_tuple(5, 6, variance32x64_c, 0),
+                      make_tuple(6, 5, variance64x32_c, 0),
+                      make_tuple(6, 6, variance64x64_c, 0)));
+#if CONFIG_VP9_HIGHBITDEPTH
+const vp9_variance_fn_t highbd_10_variance4x4_c = vp9_highbd_10_variance4x4_c;
+const vp9_variance_fn_t highbd_10_variance4x8_c = vp9_highbd_10_variance4x8_c;
+const vp9_variance_fn_t highbd_10_variance8x4_c = vp9_highbd_10_variance8x4_c;
+const vp9_variance_fn_t highbd_10_variance8x8_c = vp9_highbd_10_variance8x8_c;
+const vp9_variance_fn_t highbd_10_variance8x16_c = vp9_highbd_10_variance8x16_c;
+const vp9_variance_fn_t highbd_10_variance16x8_c = vp9_highbd_10_variance16x8_c;
+const vp9_variance_fn_t highbd_10_variance16x16_c =
+    vp9_highbd_10_variance16x16_c;
+const vp9_variance_fn_t highbd_10_variance16x32_c =
+    vp9_highbd_10_variance16x32_c;
+const vp9_variance_fn_t highbd_10_variance32x16_c =
+    vp9_highbd_10_variance32x16_c;
+const vp9_variance_fn_t highbd_10_variance32x32_c =
+    vp9_highbd_10_variance32x32_c;
+const vp9_variance_fn_t highbd_10_variance32x64_c =
+    vp9_highbd_10_variance32x64_c;
+const vp9_variance_fn_t highbd_10_variance64x32_c =
+    vp9_highbd_10_variance64x32_c;
+const vp9_variance_fn_t highbd_10_variance64x64_c =
+    vp9_highbd_10_variance64x64_c;
+const vp9_variance_fn_t highbd_12_variance4x4_c = vp9_highbd_12_variance4x4_c;
+const vp9_variance_fn_t highbd_12_variance4x8_c = vp9_highbd_12_variance4x8_c;
+const vp9_variance_fn_t highbd_12_variance8x4_c = vp9_highbd_12_variance8x4_c;
+const vp9_variance_fn_t highbd_12_variance8x8_c = vp9_highbd_12_variance8x8_c;
+const vp9_variance_fn_t highbd_12_variance8x16_c = vp9_highbd_12_variance8x16_c;
+const vp9_variance_fn_t highbd_12_variance16x8_c = vp9_highbd_12_variance16x8_c;
+const vp9_variance_fn_t highbd_12_variance16x16_c =
+    vp9_highbd_12_variance16x16_c;
+const vp9_variance_fn_t highbd_12_variance16x32_c =
+    vp9_highbd_12_variance16x32_c;
+const vp9_variance_fn_t highbd_12_variance32x16_c =
+    vp9_highbd_12_variance32x16_c;
+const vp9_variance_fn_t highbd_12_variance32x32_c =
+    vp9_highbd_12_variance32x32_c;
+const vp9_variance_fn_t highbd_12_variance32x64_c =
+    vp9_highbd_12_variance32x64_c;
+const vp9_variance_fn_t highbd_12_variance64x32_c =
+    vp9_highbd_12_variance64x32_c;
+const vp9_variance_fn_t highbd_12_variance64x64_c =
+    vp9_highbd_12_variance64x64_c;
+const vp9_variance_fn_t highbd_variance4x4_c = vp9_highbd_variance4x4_c;
+const vp9_variance_fn_t highbd_variance4x8_c = vp9_highbd_variance4x8_c;
+const vp9_variance_fn_t highbd_variance8x4_c = vp9_highbd_variance8x4_c;
+const vp9_variance_fn_t highbd_variance8x8_c = vp9_highbd_variance8x8_c;
+const vp9_variance_fn_t highbd_variance8x16_c = vp9_highbd_variance8x16_c;
+const vp9_variance_fn_t highbd_variance16x8_c = vp9_highbd_variance16x8_c;
+const vp9_variance_fn_t highbd_variance16x16_c = vp9_highbd_variance16x16_c;
+const vp9_variance_fn_t highbd_variance16x32_c = vp9_highbd_variance16x32_c;
+const vp9_variance_fn_t highbd_variance32x16_c = vp9_highbd_variance32x16_c;
+const vp9_variance_fn_t highbd_variance32x32_c = vp9_highbd_variance32x32_c;
+const vp9_variance_fn_t highbd_variance32x64_c = vp9_highbd_variance32x64_c;
+const vp9_variance_fn_t highbd_variance64x32_c = vp9_highbd_variance64x32_c;
+const vp9_variance_fn_t highbd_variance64x64_c = vp9_highbd_variance64x64_c;
+INSTANTIATE_TEST_CASE_P(
+    C, VP9VarianceHighTest,
+    ::testing::Values(make_tuple(2, 2, highbd_10_variance4x4_c,   10),
+                      make_tuple(2, 3, highbd_10_variance4x8_c,   10),
+                      make_tuple(3, 2, highbd_10_variance8x4_c,   10),
+                      make_tuple(3, 3, highbd_10_variance8x8_c,   10),
+                      make_tuple(3, 4, highbd_10_variance8x16_c,  10),
+                      make_tuple(4, 3, highbd_10_variance16x8_c,  10),
+                      make_tuple(4, 4, highbd_10_variance16x16_c, 10),
+                      make_tuple(4, 5, highbd_10_variance16x32_c, 10),
+                      make_tuple(5, 4, highbd_10_variance32x16_c, 10),
+                      make_tuple(5, 5, highbd_10_variance32x32_c, 10),
+                      make_tuple(5, 6, highbd_10_variance32x64_c, 10),
+                      make_tuple(6, 5, highbd_10_variance64x32_c, 10),
+                      make_tuple(6, 6, highbd_10_variance64x64_c, 10),
+                      make_tuple(2, 2, highbd_12_variance4x4_c,   12),
+                      make_tuple(2, 3, highbd_12_variance4x8_c,   12),
+                      make_tuple(3, 2, highbd_12_variance8x4_c,   12),
+                      make_tuple(3, 3, highbd_12_variance8x8_c,   12),
+                      make_tuple(3, 4, highbd_12_variance8x16_c,  12),
+                      make_tuple(4, 3, highbd_12_variance16x8_c,  12),
+                      make_tuple(4, 4, highbd_12_variance16x16_c, 12),
+                      make_tuple(4, 5, highbd_12_variance16x32_c, 12),
+                      make_tuple(5, 4, highbd_12_variance32x16_c, 12),
+                      make_tuple(5, 5, highbd_12_variance32x32_c, 12),
+                      make_tuple(5, 6, highbd_12_variance32x64_c, 12),
+                      make_tuple(6, 5, highbd_12_variance64x32_c, 12),
+                      make_tuple(6, 6, highbd_12_variance64x64_c, 12),
+                      make_tuple(2, 2, highbd_variance4x4_c,   8),
+                      make_tuple(2, 3, highbd_variance4x8_c,   8),
+                      make_tuple(3, 2, highbd_variance8x4_c,   8),
+                      make_tuple(3, 3, highbd_variance8x8_c,   8),
+                      make_tuple(3, 4, highbd_variance8x16_c,  8),
+                      make_tuple(4, 3, highbd_variance16x8_c,  8),
+                      make_tuple(4, 4, highbd_variance16x16_c, 8),
+                      make_tuple(4, 5, highbd_variance16x32_c, 8),
+                      make_tuple(5, 4, highbd_variance32x16_c, 8),
+                      make_tuple(5, 5, highbd_variance32x32_c, 8),
+                      make_tuple(5, 6, highbd_variance32x64_c, 8),
+                      make_tuple(6, 5, highbd_variance64x32_c, 8),
+                      make_tuple(6, 6, highbd_variance64x64_c, 8)));
+#endif  // CONFIG_VP9_HIGHBITDEPTH
 const vp9_subpixvariance_fn_t subpel_variance4x4_c =
     vp9_sub_pixel_variance4x4_c;
 const vp9_subpixvariance_fn_t subpel_variance4x8_c =
@@ -611,20 +1077,19 @@ const vp9_subpixvariance_fn_t subpel_variance64x64_c =
     vp9_sub_pixel_variance64x64_c;
 INSTANTIATE_TEST_CASE_P(
     C, VP9SubpelVarianceTest,
-    ::testing::Values(make_tuple(2, 2, subpel_variance4x4_c),
-                      make_tuple(2, 3, subpel_variance4x8_c),
-                      make_tuple(3, 2, subpel_variance8x4_c),
-                      make_tuple(3, 3, subpel_variance8x8_c),
-                      make_tuple(3, 4, subpel_variance8x16_c),
-                      make_tuple(4, 3, subpel_variance16x8_c),
-                      make_tuple(4, 4, subpel_variance16x16_c),
-                      make_tuple(4, 5, subpel_variance16x32_c),
-                      make_tuple(5, 4, subpel_variance32x16_c),
-                      make_tuple(5, 5, subpel_variance32x32_c),
-                      make_tuple(5, 6, subpel_variance32x64_c),
-                      make_tuple(6, 5, subpel_variance64x32_c),
-                      make_tuple(6, 6, subpel_variance64x64_c)));
-
+    ::testing::Values(make_tuple(2, 2, subpel_variance4x4_c, 0),
+                      make_tuple(2, 3, subpel_variance4x8_c, 0),
+                      make_tuple(3, 2, subpel_variance8x4_c, 0),
+                      make_tuple(3, 3, subpel_variance8x8_c, 0),
+                      make_tuple(3, 4, subpel_variance8x16_c, 0),
+                      make_tuple(4, 3, subpel_variance16x8_c, 0),
+                      make_tuple(4, 4, subpel_variance16x16_c, 0),
+                      make_tuple(4, 5, subpel_variance16x32_c, 0),
+                      make_tuple(5, 4, subpel_variance32x16_c, 0),
+                      make_tuple(5, 5, subpel_variance32x32_c, 0),
+                      make_tuple(5, 6, subpel_variance32x64_c, 0),
+                      make_tuple(6, 5, subpel_variance64x32_c, 0),
+                      make_tuple(6, 6, subpel_variance64x64_c, 0)));
 const vp9_subp_avg_variance_fn_t subpel_avg_variance4x4_c =
     vp9_sub_pixel_avg_variance4x4_c;
 const vp9_subp_avg_variance_fn_t subpel_avg_variance4x8_c =
@@ -653,23 +1118,288 @@ const vp9_subp_avg_variance_fn_t subpel_avg_variance64x64_c =
     vp9_sub_pixel_avg_variance64x64_c;
 INSTANTIATE_TEST_CASE_P(
     C, VP9SubpelAvgVarianceTest,
-    ::testing::Values(make_tuple(2, 2, subpel_avg_variance4x4_c),
-                      make_tuple(2, 3, subpel_avg_variance4x8_c),
-                      make_tuple(3, 2, subpel_avg_variance8x4_c),
-                      make_tuple(3, 3, subpel_avg_variance8x8_c),
-                      make_tuple(3, 4, subpel_avg_variance8x16_c),
-                      make_tuple(4, 3, subpel_avg_variance16x8_c),
-                      make_tuple(4, 4, subpel_avg_variance16x16_c),
-                      make_tuple(4, 5, subpel_avg_variance16x32_c),
-                      make_tuple(5, 4, subpel_avg_variance32x16_c),
-                      make_tuple(5, 5, subpel_avg_variance32x32_c),
-                      make_tuple(5, 6, subpel_avg_variance32x64_c),
-                      make_tuple(6, 5, subpel_avg_variance64x32_c),
-                      make_tuple(6, 6, subpel_avg_variance64x64_c)));
+    ::testing::Values(make_tuple(2, 2, subpel_avg_variance4x4_c, 0),
+                      make_tuple(2, 3, subpel_avg_variance4x8_c, 0),
+                      make_tuple(3, 2, subpel_avg_variance8x4_c, 0),
+                      make_tuple(3, 3, subpel_avg_variance8x8_c, 0),
+                      make_tuple(3, 4, subpel_avg_variance8x16_c, 0),
+                      make_tuple(4, 3, subpel_avg_variance16x8_c, 0),
+                      make_tuple(4, 4, subpel_avg_variance16x16_c, 0),
+                      make_tuple(4, 5, subpel_avg_variance16x32_c, 0),
+                      make_tuple(5, 4, subpel_avg_variance32x16_c, 0),
+                      make_tuple(5, 5, subpel_avg_variance32x32_c, 0),
+                      make_tuple(5, 6, subpel_avg_variance32x64_c, 0),
+                      make_tuple(6, 5, subpel_avg_variance64x32_c, 0),
+                      make_tuple(6, 6, subpel_avg_variance64x64_c, 0)));
+#if CONFIG_VP9_HIGHBITDEPTH
+const vp9_subpixvariance_fn_t highbd_10_subpel_variance4x4_c =
+    vp9_highbd_10_sub_pixel_variance4x4_c;
+const vp9_subpixvariance_fn_t highbd_10_subpel_variance4x8_c =
+    vp9_highbd_10_sub_pixel_variance4x8_c;
+const vp9_subpixvariance_fn_t highbd_10_subpel_variance8x4_c =
+    vp9_highbd_10_sub_pixel_variance8x4_c;
+const vp9_subpixvariance_fn_t highbd_10_subpel_variance8x8_c =
+    vp9_highbd_10_sub_pixel_variance8x8_c;
+const vp9_subpixvariance_fn_t highbd_10_subpel_variance8x16_c =
+    vp9_highbd_10_sub_pixel_variance8x16_c;
+const vp9_subpixvariance_fn_t highbd_10_subpel_variance16x8_c =
+    vp9_highbd_10_sub_pixel_variance16x8_c;
+const vp9_subpixvariance_fn_t highbd_10_subpel_variance16x16_c =
+    vp9_highbd_10_sub_pixel_variance16x16_c;
+const vp9_subpixvariance_fn_t highbd_10_subpel_variance16x32_c =
+    vp9_highbd_10_sub_pixel_variance16x32_c;
+const vp9_subpixvariance_fn_t highbd_10_subpel_variance32x16_c =
+    vp9_highbd_10_sub_pixel_variance32x16_c;
+const vp9_subpixvariance_fn_t highbd_10_subpel_variance32x32_c =
+    vp9_highbd_10_sub_pixel_variance32x32_c;
+const vp9_subpixvariance_fn_t highbd_10_subpel_variance32x64_c =
+    vp9_highbd_10_sub_pixel_variance32x64_c;
+const vp9_subpixvariance_fn_t highbd_10_subpel_variance64x32_c =
+    vp9_highbd_10_sub_pixel_variance64x32_c;
+const vp9_subpixvariance_fn_t highbd_10_subpel_variance64x64_c =
+    vp9_highbd_10_sub_pixel_variance64x64_c;
+const vp9_subpixvariance_fn_t highbd_12_subpel_variance4x4_c =
+    vp9_highbd_12_sub_pixel_variance4x4_c;
+const vp9_subpixvariance_fn_t highbd_12_subpel_variance4x8_c =
+    vp9_highbd_12_sub_pixel_variance4x8_c;
+const vp9_subpixvariance_fn_t highbd_12_subpel_variance8x4_c =
+    vp9_highbd_12_sub_pixel_variance8x4_c;
+const vp9_subpixvariance_fn_t highbd_12_subpel_variance8x8_c =
+    vp9_highbd_12_sub_pixel_variance8x8_c;
+const vp9_subpixvariance_fn_t highbd_12_subpel_variance8x16_c =
+    vp9_highbd_12_sub_pixel_variance8x16_c;
+const vp9_subpixvariance_fn_t highbd_12_subpel_variance16x8_c =
+    vp9_highbd_12_sub_pixel_variance16x8_c;
+const vp9_subpixvariance_fn_t highbd_12_subpel_variance16x16_c =
+    vp9_highbd_12_sub_pixel_variance16x16_c;
+const vp9_subpixvariance_fn_t highbd_12_subpel_variance16x32_c =
+    vp9_highbd_12_sub_pixel_variance16x32_c;
+const vp9_subpixvariance_fn_t highbd_12_subpel_variance32x16_c =
+    vp9_highbd_12_sub_pixel_variance32x16_c;
+const vp9_subpixvariance_fn_t highbd_12_subpel_variance32x32_c =
+    vp9_highbd_12_sub_pixel_variance32x32_c;
+const vp9_subpixvariance_fn_t highbd_12_subpel_variance32x64_c =
+    vp9_highbd_12_sub_pixel_variance32x64_c;
+const vp9_subpixvariance_fn_t highbd_12_subpel_variance64x32_c =
+    vp9_highbd_12_sub_pixel_variance64x32_c;
+const vp9_subpixvariance_fn_t highbd_12_subpel_variance64x64_c =
+    vp9_highbd_12_sub_pixel_variance64x64_c;
+const vp9_subpixvariance_fn_t highbd_subpel_variance4x4_c =
+    vp9_highbd_sub_pixel_variance4x4_c;
+const vp9_subpixvariance_fn_t highbd_subpel_variance4x8_c =
+    vp9_highbd_sub_pixel_variance4x8_c;
+const vp9_subpixvariance_fn_t highbd_subpel_variance8x4_c =
+    vp9_highbd_sub_pixel_variance8x4_c;
+const vp9_subpixvariance_fn_t highbd_subpel_variance8x8_c =
+    vp9_highbd_sub_pixel_variance8x8_c;
+const vp9_subpixvariance_fn_t highbd_subpel_variance8x16_c =
+    vp9_highbd_sub_pixel_variance8x16_c;
+const vp9_subpixvariance_fn_t highbd_subpel_variance16x8_c =
+    vp9_highbd_sub_pixel_variance16x8_c;
+const vp9_subpixvariance_fn_t highbd_subpel_variance16x16_c =
+    vp9_highbd_sub_pixel_variance16x16_c;
+const vp9_subpixvariance_fn_t highbd_subpel_variance16x32_c =
+    vp9_highbd_sub_pixel_variance16x32_c;
+const vp9_subpixvariance_fn_t highbd_subpel_variance32x16_c =
+    vp9_highbd_sub_pixel_variance32x16_c;
+const vp9_subpixvariance_fn_t highbd_subpel_variance32x32_c =
+    vp9_highbd_sub_pixel_variance32x32_c;
+const vp9_subpixvariance_fn_t highbd_subpel_variance32x64_c =
+    vp9_highbd_sub_pixel_variance32x64_c;
+const vp9_subpixvariance_fn_t highbd_subpel_variance64x32_c =
+    vp9_highbd_sub_pixel_variance64x32_c;
+const vp9_subpixvariance_fn_t highbd_subpel_variance64x64_c =
+    vp9_highbd_sub_pixel_variance64x64_c;
+INSTANTIATE_TEST_CASE_P(
+    C, VP9SubpelVarianceHighTest,
+    ::testing::Values(make_tuple(2, 2, highbd_10_subpel_variance4x4_c,   10),
+                      make_tuple(2, 3, highbd_10_subpel_variance4x8_c,   10),
+                      make_tuple(3, 2, highbd_10_subpel_variance8x4_c,   10),
+                      make_tuple(3, 3, highbd_10_subpel_variance8x8_c,   10),
+                      make_tuple(3, 4, highbd_10_subpel_variance8x16_c,  10),
+                      make_tuple(4, 3, highbd_10_subpel_variance16x8_c,  10),
+                      make_tuple(4, 4, highbd_10_subpel_variance16x16_c, 10),
+                      make_tuple(4, 5, highbd_10_subpel_variance16x32_c, 10),
+                      make_tuple(5, 4, highbd_10_subpel_variance32x16_c, 10),
+                      make_tuple(5, 5, highbd_10_subpel_variance32x32_c, 10),
+                      make_tuple(5, 6, highbd_10_subpel_variance32x64_c, 10),
+                      make_tuple(6, 5, highbd_10_subpel_variance64x32_c, 10),
+                      make_tuple(6, 6, highbd_10_subpel_variance64x64_c, 10),
+                      make_tuple(2, 2, highbd_12_subpel_variance4x4_c,   12),
+                      make_tuple(2, 3, highbd_12_subpel_variance4x8_c,   12),
+                      make_tuple(3, 2, highbd_12_subpel_variance8x4_c,   12),
+                      make_tuple(3, 3, highbd_12_subpel_variance8x8_c,   12),
+                      make_tuple(3, 4, highbd_12_subpel_variance8x16_c,  12),
+                      make_tuple(4, 3, highbd_12_subpel_variance16x8_c,  12),
+                      make_tuple(4, 4, highbd_12_subpel_variance16x16_c, 12),
+                      make_tuple(4, 5, highbd_12_subpel_variance16x32_c, 12),
+                      make_tuple(5, 4, highbd_12_subpel_variance32x16_c, 12),
+                      make_tuple(5, 5, highbd_12_subpel_variance32x32_c, 12),
+                      make_tuple(5, 6, highbd_12_subpel_variance32x64_c, 12),
+                      make_tuple(6, 5, highbd_12_subpel_variance64x32_c, 12),
+                      make_tuple(6, 6, highbd_12_subpel_variance64x64_c, 12),
+                      make_tuple(2, 2, highbd_subpel_variance4x4_c,   8),
+                      make_tuple(2, 3, highbd_subpel_variance4x8_c,   8),
+                      make_tuple(3, 2, highbd_subpel_variance8x4_c,   8),
+                      make_tuple(3, 3, highbd_subpel_variance8x8_c,   8),
+                      make_tuple(3, 4, highbd_subpel_variance8x16_c,  8),
+                      make_tuple(4, 3, highbd_subpel_variance16x8_c,  8),
+                      make_tuple(4, 4, highbd_subpel_variance16x16_c, 8),
+                      make_tuple(4, 5, highbd_subpel_variance16x32_c, 8),
+                      make_tuple(5, 4, highbd_subpel_variance32x16_c, 8),
+                      make_tuple(5, 5, highbd_subpel_variance32x32_c, 8),
+                      make_tuple(5, 6, highbd_subpel_variance32x64_c, 8),
+                      make_tuple(6, 5, highbd_subpel_variance64x32_c, 8),
+                      make_tuple(6, 6, highbd_subpel_variance64x64_c, 8)));
+const vp9_subp_avg_variance_fn_t highbd_10_subpel_avg_variance4x4_c =
+    vp9_highbd_10_sub_pixel_avg_variance4x4_c;
+const vp9_subp_avg_variance_fn_t highbd_10_subpel_avg_variance4x8_c =
+    vp9_highbd_10_sub_pixel_avg_variance4x8_c;
+const vp9_subp_avg_variance_fn_t highbd_10_subpel_avg_variance8x4_c =
+    vp9_highbd_10_sub_pixel_avg_variance8x4_c;
+const vp9_subp_avg_variance_fn_t highbd_10_subpel_avg_variance8x8_c =
+    vp9_highbd_10_sub_pixel_avg_variance8x8_c;
+const vp9_subp_avg_variance_fn_t highbd_10_subpel_avg_variance8x16_c =
+    vp9_highbd_10_sub_pixel_avg_variance8x16_c;
+const vp9_subp_avg_variance_fn_t highbd_10_subpel_avg_variance16x8_c =
+    vp9_highbd_10_sub_pixel_avg_variance16x8_c;
+const vp9_subp_avg_variance_fn_t highbd_10_subpel_avg_variance16x16_c =
+    vp9_highbd_10_sub_pixel_avg_variance16x16_c;
+const vp9_subp_avg_variance_fn_t highbd_10_subpel_avg_variance16x32_c =
+    vp9_highbd_10_sub_pixel_avg_variance16x32_c;
+const vp9_subp_avg_variance_fn_t highbd_10_subpel_avg_variance32x16_c =
+    vp9_highbd_10_sub_pixel_avg_variance32x16_c;
+const vp9_subp_avg_variance_fn_t highbd_10_subpel_avg_variance32x32_c =
+    vp9_highbd_10_sub_pixel_avg_variance32x32_c;
+const vp9_subp_avg_variance_fn_t highbd_10_subpel_avg_variance32x64_c =
+    vp9_highbd_10_sub_pixel_avg_variance32x64_c;
+const vp9_subp_avg_variance_fn_t highbd_10_subpel_avg_variance64x32_c =
+    vp9_highbd_10_sub_pixel_avg_variance64x32_c;
+const vp9_subp_avg_variance_fn_t highbd_10_subpel_avg_variance64x64_c =
+    vp9_highbd_10_sub_pixel_avg_variance64x64_c;
+const vp9_subp_avg_variance_fn_t highbd_12_subpel_avg_variance4x4_c =
+    vp9_highbd_12_sub_pixel_avg_variance4x4_c;
+const vp9_subp_avg_variance_fn_t highbd_12_subpel_avg_variance4x8_c =
+    vp9_highbd_12_sub_pixel_avg_variance4x8_c;
+const vp9_subp_avg_variance_fn_t highbd_12_subpel_avg_variance8x4_c =
+    vp9_highbd_12_sub_pixel_avg_variance8x4_c;
+const vp9_subp_avg_variance_fn_t highbd_12_subpel_avg_variance8x8_c =
+    vp9_highbd_12_sub_pixel_avg_variance8x8_c;
+const vp9_subp_avg_variance_fn_t highbd_12_subpel_avg_variance8x16_c =
+    vp9_highbd_12_sub_pixel_avg_variance8x16_c;
+const vp9_subp_avg_variance_fn_t highbd_12_subpel_avg_variance16x8_c =
+    vp9_highbd_12_sub_pixel_avg_variance16x8_c;
+const vp9_subp_avg_variance_fn_t highbd_12_subpel_avg_variance16x16_c =
+    vp9_highbd_12_sub_pixel_avg_variance16x16_c;
+const vp9_subp_avg_variance_fn_t highbd_12_subpel_avg_variance16x32_c =
+    vp9_highbd_12_sub_pixel_avg_variance16x32_c;
+const vp9_subp_avg_variance_fn_t highbd_12_subpel_avg_variance32x16_c =
+    vp9_highbd_12_sub_pixel_avg_variance32x16_c;
+const vp9_subp_avg_variance_fn_t highbd_12_subpel_avg_variance32x32_c =
+    vp9_highbd_12_sub_pixel_avg_variance32x32_c;
+const vp9_subp_avg_variance_fn_t highbd_12_subpel_avg_variance32x64_c =
+    vp9_highbd_12_sub_pixel_avg_variance32x64_c;
+const vp9_subp_avg_variance_fn_t highbd_12_subpel_avg_variance64x32_c =
+    vp9_highbd_12_sub_pixel_avg_variance64x32_c;
+const vp9_subp_avg_variance_fn_t highbd_12_subpel_avg_variance64x64_c =
+    vp9_highbd_12_sub_pixel_avg_variance64x64_c;
+const vp9_subp_avg_variance_fn_t highbd_subpel_avg_variance4x4_c =
+    vp9_highbd_sub_pixel_avg_variance4x4_c;
+const vp9_subp_avg_variance_fn_t highbd_subpel_avg_variance4x8_c =
+    vp9_highbd_sub_pixel_avg_variance4x8_c;
+const vp9_subp_avg_variance_fn_t highbd_subpel_avg_variance8x4_c =
+    vp9_highbd_sub_pixel_avg_variance8x4_c;
+const vp9_subp_avg_variance_fn_t highbd_subpel_avg_variance8x8_c =
+    vp9_highbd_sub_pixel_avg_variance8x8_c;
+const vp9_subp_avg_variance_fn_t highbd_subpel_avg_variance8x16_c =
+    vp9_highbd_sub_pixel_avg_variance8x16_c;
+const vp9_subp_avg_variance_fn_t highbd_subpel_avg_variance16x8_c =
+    vp9_highbd_sub_pixel_avg_variance16x8_c;
+const vp9_subp_avg_variance_fn_t highbd_subpel_avg_variance16x16_c =
+    vp9_highbd_sub_pixel_avg_variance16x16_c;
+const vp9_subp_avg_variance_fn_t highbd_subpel_avg_variance16x32_c =
+    vp9_highbd_sub_pixel_avg_variance16x32_c;
+const vp9_subp_avg_variance_fn_t highbd_subpel_avg_variance32x16_c =
+    vp9_highbd_sub_pixel_avg_variance32x16_c;
+const vp9_subp_avg_variance_fn_t highbd_subpel_avg_variance32x32_c =
+    vp9_highbd_sub_pixel_avg_variance32x32_c;
+const vp9_subp_avg_variance_fn_t highbd_subpel_avg_variance32x64_c =
+    vp9_highbd_sub_pixel_avg_variance32x64_c;
+const vp9_subp_avg_variance_fn_t highbd_subpel_avg_variance64x32_c =
+    vp9_highbd_sub_pixel_avg_variance64x32_c;
+const vp9_subp_avg_variance_fn_t highbd_subpel_avg_variance64x64_c =
+    vp9_highbd_sub_pixel_avg_variance64x64_c;
+INSTANTIATE_TEST_CASE_P(
+    C, VP9SubpelAvgVarianceHighTest,
+    ::testing::Values(make_tuple(2, 2, highbd_10_subpel_avg_variance4x4_c,
+                                 10),
+                      make_tuple(2, 3, highbd_10_subpel_avg_variance4x8_c,
+                                 10),
+                      make_tuple(3, 2, highbd_10_subpel_avg_variance8x4_c,
+                                 10),
+                      make_tuple(3, 3, highbd_10_subpel_avg_variance8x8_c,
+                                 10),
+                      make_tuple(3, 4, highbd_10_subpel_avg_variance8x16_c,
+                                 10),
+                      make_tuple(4, 3, highbd_10_subpel_avg_variance16x8_c,
+                                 10),
+                      make_tuple(4, 4, highbd_10_subpel_avg_variance16x16_c,
+                                 10),
+                      make_tuple(4, 5, highbd_10_subpel_avg_variance16x32_c,
+                                 10),
+                      make_tuple(5, 4, highbd_10_subpel_avg_variance32x16_c,
+                                 10),
+                      make_tuple(5, 5, highbd_10_subpel_avg_variance32x32_c,
+                                 10),
+                      make_tuple(5, 6, highbd_10_subpel_avg_variance32x64_c,
+                                 10),
+                      make_tuple(6, 5, highbd_10_subpel_avg_variance64x32_c,
+                                 10),
+                      make_tuple(6, 6, highbd_10_subpel_avg_variance64x64_c,
+                                 10),
+                      make_tuple(2, 2, highbd_12_subpel_avg_variance4x4_c,
+                                 12),
+                      make_tuple(2, 3, highbd_12_subpel_avg_variance4x8_c,
+                                 12),
+                      make_tuple(3, 2, highbd_12_subpel_avg_variance8x4_c,
+                                 12),
+                      make_tuple(3, 3, highbd_12_subpel_avg_variance8x8_c,
+                                 12),
+                      make_tuple(3, 4, highbd_12_subpel_avg_variance8x16_c,
+                                 12),
+                      make_tuple(4, 3, highbd_12_subpel_avg_variance16x8_c,
+                                 12),
+                      make_tuple(4, 4, highbd_12_subpel_avg_variance16x16_c,
+                                12),
+                      make_tuple(4, 5, highbd_12_subpel_avg_variance16x32_c,
+                                12),
+                      make_tuple(5, 4, highbd_12_subpel_avg_variance32x16_c,
+                                 12),
+                      make_tuple(5, 5, highbd_12_subpel_avg_variance32x32_c,
+                                 12),
+                      make_tuple(5, 6, highbd_12_subpel_avg_variance32x64_c,
+                                 12),
+                      make_tuple(6, 5, highbd_12_subpel_avg_variance64x32_c,
+                                 12),
+                      make_tuple(6, 6, highbd_12_subpel_avg_variance64x64_c,
+                                 12),
+                      make_tuple(2, 2, highbd_subpel_avg_variance4x4_c,   8),
+                      make_tuple(2, 3, highbd_subpel_avg_variance4x8_c,   8),
+                      make_tuple(3, 2, highbd_subpel_avg_variance8x4_c,   8),
+                      make_tuple(3, 3, highbd_subpel_avg_variance8x8_c,   8),
+                      make_tuple(3, 4, highbd_subpel_avg_variance8x16_c,  8),
+                      make_tuple(4, 3, highbd_subpel_avg_variance16x8_c,  8),
+                      make_tuple(4, 4, highbd_subpel_avg_variance16x16_c, 8),
+                      make_tuple(4, 5, highbd_subpel_avg_variance16x32_c, 8),
+                      make_tuple(5, 4, highbd_subpel_avg_variance32x16_c, 8),
+                      make_tuple(5, 5, highbd_subpel_avg_variance32x32_c, 8),
+                      make_tuple(5, 6, highbd_subpel_avg_variance32x64_c, 8),
+                      make_tuple(6, 5, highbd_subpel_avg_variance64x32_c, 8),
+                      make_tuple(6, 6, highbd_subpel_avg_variance64x64_c, 8)));
+#endif  // CONFIG_VP9_HIGHBITDEPTH
 
 #if HAVE_SSE2
 #if CONFIG_USE_X86INC
-
 INSTANTIATE_TEST_CASE_P(SSE2, SumOfSquaresTest,
                         ::testing::Values(vp9_get_mb_ss_sse2));
 
@@ -688,20 +1418,19 @@ const vp9_variance_fn_t variance64x32_sse2 = vp9_variance64x32_sse2;
 const vp9_variance_fn_t variance64x64_sse2 = vp9_variance64x64_sse2;
 INSTANTIATE_TEST_CASE_P(
     SSE2, VP9VarianceTest,
-    ::testing::Values(make_tuple(2, 2, variance4x4_sse2),
-                      make_tuple(2, 3, variance4x8_sse2),
-                      make_tuple(3, 2, variance8x4_sse2),
-                      make_tuple(3, 3, variance8x8_sse2),
-                      make_tuple(3, 4, variance8x16_sse2),
-                      make_tuple(4, 3, variance16x8_sse2),
-                      make_tuple(4, 4, variance16x16_sse2),
-                      make_tuple(4, 5, variance16x32_sse2),
-                      make_tuple(5, 4, variance32x16_sse2),
-                      make_tuple(5, 5, variance32x32_sse2),
-                      make_tuple(5, 6, variance32x64_sse2),
-                      make_tuple(6, 5, variance64x32_sse2),
-                      make_tuple(6, 6, variance64x64_sse2)));
-
+    ::testing::Values(make_tuple(2, 2, variance4x4_sse2, 0),
+                      make_tuple(2, 3, variance4x8_sse2, 0),
+                      make_tuple(3, 2, variance8x4_sse2, 0),
+                      make_tuple(3, 3, variance8x8_sse2, 0),
+                      make_tuple(3, 4, variance8x16_sse2, 0),
+                      make_tuple(4, 3, variance16x8_sse2, 0),
+                      make_tuple(4, 4, variance16x16_sse2, 0),
+                      make_tuple(4, 5, variance16x32_sse2, 0),
+                      make_tuple(5, 4, variance32x16_sse2, 0),
+                      make_tuple(5, 5, variance32x32_sse2, 0),
+                      make_tuple(5, 6, variance32x64_sse2, 0),
+                      make_tuple(6, 5, variance64x32_sse2, 0),
+                      make_tuple(6, 6, variance64x64_sse2, 0)));
 const vp9_subpixvariance_fn_t subpel_variance4x4_sse =
     vp9_sub_pixel_variance4x4_sse;
 const vp9_subpixvariance_fn_t subpel_variance4x8_sse =
@@ -730,20 +1459,19 @@ const vp9_subpixvariance_fn_t subpel_variance64x64_sse2 =
     vp9_sub_pixel_variance64x64_sse2;
 INSTANTIATE_TEST_CASE_P(
     SSE2, VP9SubpelVarianceTest,
-    ::testing::Values(make_tuple(2, 2, subpel_variance4x4_sse),
-                      make_tuple(2, 3, subpel_variance4x8_sse),
-                      make_tuple(3, 2, subpel_variance8x4_sse2),
-                      make_tuple(3, 3, subpel_variance8x8_sse2),
-                      make_tuple(3, 4, subpel_variance8x16_sse2),
-                      make_tuple(4, 3, subpel_variance16x8_sse2),
-                      make_tuple(4, 4, subpel_variance16x16_sse2),
-                      make_tuple(4, 5, subpel_variance16x32_sse2),
-                      make_tuple(5, 4, subpel_variance32x16_sse2),
-                      make_tuple(5, 5, subpel_variance32x32_sse2),
-                      make_tuple(5, 6, subpel_variance32x64_sse2),
-                      make_tuple(6, 5, subpel_variance64x32_sse2),
-                      make_tuple(6, 6, subpel_variance64x64_sse2)));
-
+    ::testing::Values(make_tuple(2, 2, subpel_variance4x4_sse, 0),
+                      make_tuple(2, 3, subpel_variance4x8_sse, 0),
+                      make_tuple(3, 2, subpel_variance8x4_sse2, 0),
+                      make_tuple(3, 3, subpel_variance8x8_sse2, 0),
+                      make_tuple(3, 4, subpel_variance8x16_sse2, 0),
+                      make_tuple(4, 3, subpel_variance16x8_sse2, 0),
+                      make_tuple(4, 4, subpel_variance16x16_sse2, 0),
+                      make_tuple(4, 5, subpel_variance16x32_sse2, 0),
+                      make_tuple(5, 4, subpel_variance32x16_sse2, 0),
+                      make_tuple(5, 5, subpel_variance32x32_sse2, 0),
+                      make_tuple(5, 6, subpel_variance32x64_sse2, 0),
+                      make_tuple(6, 5, subpel_variance64x32_sse2, 0),
+                      make_tuple(6, 6, subpel_variance64x64_sse2, 0)));
 const vp9_subp_avg_variance_fn_t subpel_avg_variance4x4_sse =
     vp9_sub_pixel_avg_variance4x4_sse;
 const vp9_subp_avg_variance_fn_t subpel_avg_variance4x8_sse =
@@ -772,22 +1500,316 @@ const vp9_subp_avg_variance_fn_t subpel_avg_variance64x64_sse2 =
     vp9_sub_pixel_avg_variance64x64_sse2;
 INSTANTIATE_TEST_CASE_P(
     SSE2, VP9SubpelAvgVarianceTest,
-    ::testing::Values(make_tuple(2, 2, subpel_avg_variance4x4_sse),
-                      make_tuple(2, 3, subpel_avg_variance4x8_sse),
-                      make_tuple(3, 2, subpel_avg_variance8x4_sse2),
-                      make_tuple(3, 3, subpel_avg_variance8x8_sse2),
-                      make_tuple(3, 4, subpel_avg_variance8x16_sse2),
-                      make_tuple(4, 3, subpel_avg_variance16x8_sse2),
-                      make_tuple(4, 4, subpel_avg_variance16x16_sse2),
-                      make_tuple(4, 5, subpel_avg_variance16x32_sse2),
-                      make_tuple(5, 4, subpel_avg_variance32x16_sse2),
-                      make_tuple(5, 5, subpel_avg_variance32x32_sse2),
-                      make_tuple(5, 6, subpel_avg_variance32x64_sse2),
-                      make_tuple(6, 5, subpel_avg_variance64x32_sse2),
-                      make_tuple(6, 6, subpel_avg_variance64x64_sse2)));
-#endif
-#endif
-
+    ::testing::Values(make_tuple(2, 2, subpel_avg_variance4x4_sse, 0),
+                      make_tuple(2, 3, subpel_avg_variance4x8_sse, 0),
+                      make_tuple(3, 2, subpel_avg_variance8x4_sse2, 0),
+                      make_tuple(3, 3, subpel_avg_variance8x8_sse2, 0),
+                      make_tuple(3, 4, subpel_avg_variance8x16_sse2, 0),
+                      make_tuple(4, 3, subpel_avg_variance16x8_sse2, 0),
+                      make_tuple(4, 4, subpel_avg_variance16x16_sse2, 0),
+                      make_tuple(4, 5, subpel_avg_variance16x32_sse2, 0),
+                      make_tuple(5, 4, subpel_avg_variance32x16_sse2, 0),
+                      make_tuple(5, 5, subpel_avg_variance32x32_sse2, 0),
+                      make_tuple(5, 6, subpel_avg_variance32x64_sse2, 0),
+                      make_tuple(6, 5, subpel_avg_variance64x32_sse2, 0),
+                      make_tuple(6, 6, subpel_avg_variance64x64_sse2, 0)));
+#if CONFIG_VP9_HIGHBITDEPTH
+const vp9_variance_fn_t highbd_variance8x8_sse2 = vp9_highbd_variance8x8_sse2;
+const vp9_variance_fn_t highbd_10_variance8x8_sse2 =
+    vp9_highbd_10_variance8x8_sse2;
+const vp9_variance_fn_t highbd_12_variance8x8_sse2 =
+    vp9_highbd_12_variance8x8_sse2;
+const vp9_variance_fn_t highbd_variance8x16_sse2 = vp9_highbd_variance8x16_sse2;
+const vp9_variance_fn_t highbd_10_variance8x16_sse2 =
+    vp9_highbd_10_variance8x16_sse2;
+const vp9_variance_fn_t highbd_12_variance8x16_sse2 =
+    vp9_highbd_12_variance8x16_sse2;
+const vp9_variance_fn_t highbd_variance16x8_sse2 =
+    vp9_highbd_variance16x8_sse2;
+const vp9_variance_fn_t highbd_10_variance16x8_sse2 =
+    vp9_highbd_10_variance16x8_sse2;
+const vp9_variance_fn_t highbd_12_variance16x8_sse2 =
+    vp9_highbd_12_variance16x8_sse2;
+const vp9_variance_fn_t highbd_variance16x16_sse2 =
+    vp9_highbd_variance16x16_sse2;
+const vp9_variance_fn_t highbd_10_variance16x16_sse2 =
+    vp9_highbd_10_variance16x16_sse2;
+const vp9_variance_fn_t highbd_12_variance16x16_sse2 =
+    vp9_highbd_12_variance16x16_sse2;
+const vp9_variance_fn_t highbd_variance16x32_sse2 =
+    vp9_highbd_variance16x32_sse2;
+const vp9_variance_fn_t highbd_10_variance16x32_sse2 =
+    vp9_highbd_10_variance16x32_sse2;
+const vp9_variance_fn_t highbd_12_variance16x32_sse2 =
+    vp9_highbd_12_variance16x32_sse2;
+const vp9_variance_fn_t highbd_variance32x16_sse2 =
+    vp9_highbd_variance32x16_sse2;
+const vp9_variance_fn_t highbd_10_variance32x16_sse2 =
+    vp9_highbd_10_variance32x16_sse2;
+const vp9_variance_fn_t highbd_12_variance32x16_sse2 =
+    vp9_highbd_12_variance32x16_sse2;
+const vp9_variance_fn_t highbd_variance32x32_sse2 =
+    vp9_highbd_variance32x32_sse2;
+const vp9_variance_fn_t highbd_10_variance32x32_sse2 =
+    vp9_highbd_10_variance32x32_sse2;
+const vp9_variance_fn_t highbd_12_variance32x32_sse2 =
+    vp9_highbd_12_variance32x32_sse2;
+const vp9_variance_fn_t highbd_variance32x64_sse2 =
+    vp9_highbd_variance32x64_sse2;
+const vp9_variance_fn_t highbd_10_variance32x64_sse2 =
+    vp9_highbd_10_variance32x64_sse2;
+const vp9_variance_fn_t highbd_12_variance32x64_sse2 =
+    vp9_highbd_12_variance32x64_sse2;
+const vp9_variance_fn_t highbd_variance64x32_sse2 =
+    vp9_highbd_variance64x32_sse2;
+const vp9_variance_fn_t highbd_10_variance64x32_sse2 =
+    vp9_highbd_10_variance64x32_sse2;
+const vp9_variance_fn_t highbd_12_variance64x32_sse2 =
+    vp9_highbd_12_variance64x32_sse2;
+const vp9_variance_fn_t highbd_variance64x64_sse2 =
+    vp9_highbd_variance64x64_sse2;
+const vp9_variance_fn_t highbd_10_variance64x64_sse2 =
+    vp9_highbd_10_variance64x64_sse2;
+const vp9_variance_fn_t highbd_12_variance64x64_sse2 =
+    vp9_highbd_12_variance64x64_sse2;
+INSTANTIATE_TEST_CASE_P(
+    SSE2, VP9VarianceHighTest,
+    ::testing::Values(make_tuple(3, 3, highbd_10_variance8x8_sse2,   10),
+                      make_tuple(3, 4, highbd_10_variance8x16_sse2,  10),
+                      make_tuple(4, 3, highbd_10_variance16x8_sse2,  10),
+                      make_tuple(4, 4, highbd_10_variance16x16_sse2, 10),
+                      make_tuple(4, 5, highbd_10_variance16x32_sse2, 10),
+                      make_tuple(5, 4, highbd_10_variance32x16_sse2, 10),
+                      make_tuple(5, 5, highbd_10_variance32x32_sse2, 10),
+                      make_tuple(5, 6, highbd_10_variance32x64_sse2, 10),
+                      make_tuple(6, 5, highbd_10_variance64x32_sse2, 10),
+                      make_tuple(6, 6, highbd_10_variance64x64_sse2, 10),
+                      make_tuple(3, 3, highbd_12_variance8x8_sse2,   12),
+                      make_tuple(3, 4, highbd_12_variance8x16_sse2,  12),
+                      make_tuple(4, 3, highbd_12_variance16x8_sse2,  12),
+                      make_tuple(4, 4, highbd_12_variance16x16_sse2, 12),
+                      make_tuple(4, 5, highbd_12_variance16x32_sse2, 12),
+                      make_tuple(5, 4, highbd_12_variance32x16_sse2, 12),
+                      make_tuple(5, 5, highbd_12_variance32x32_sse2, 12),
+                      make_tuple(5, 6, highbd_12_variance32x64_sse2, 12),
+                      make_tuple(6, 5, highbd_12_variance64x32_sse2, 12),
+                      make_tuple(6, 6, highbd_12_variance64x64_sse2, 12),
+                      make_tuple(3, 3, highbd_variance8x8_sse2,   8),
+                      make_tuple(3, 4, highbd_variance8x16_sse2,  8),
+                      make_tuple(4, 3, highbd_variance16x8_sse2,  8),
+                      make_tuple(4, 4, highbd_variance16x16_sse2, 8),
+                      make_tuple(4, 5, highbd_variance16x32_sse2, 8),
+                      make_tuple(5, 4, highbd_variance32x16_sse2, 8),
+                      make_tuple(5, 5, highbd_variance32x32_sse2, 8),
+                      make_tuple(5, 6, highbd_variance32x64_sse2, 8),
+                      make_tuple(6, 5, highbd_variance64x32_sse2, 8),
+                      make_tuple(6, 6, highbd_variance64x64_sse2, 8)));
+const vp9_subpixvariance_fn_t highbd_subpel_variance8x4_sse2 =
+    vp9_highbd_sub_pixel_variance8x4_sse2;
+const vp9_subpixvariance_fn_t highbd_subpel_variance8x8_sse2 =
+    vp9_highbd_sub_pixel_variance8x8_sse2;
+const vp9_subpixvariance_fn_t highbd_subpel_variance8x16_sse2 =
+    vp9_highbd_sub_pixel_variance8x16_sse2;
+const vp9_subpixvariance_fn_t highbd_subpel_variance16x8_sse2 =
+    vp9_highbd_sub_pixel_variance16x8_sse2;
+const vp9_subpixvariance_fn_t highbd_subpel_variance16x16_sse2 =
+    vp9_highbd_sub_pixel_variance16x16_sse2;
+const vp9_subpixvariance_fn_t highbd_subpel_variance16x32_sse2 =
+    vp9_highbd_sub_pixel_variance16x32_sse2;
+const vp9_subpixvariance_fn_t highbd_subpel_variance32x16_sse2 =
+    vp9_highbd_sub_pixel_variance32x16_sse2;
+const vp9_subpixvariance_fn_t highbd_subpel_variance32x32_sse2 =
+    vp9_highbd_sub_pixel_variance32x32_sse2;
+const vp9_subpixvariance_fn_t highbd_subpel_variance32x64_sse2 =
+    vp9_highbd_sub_pixel_variance32x64_sse2;
+const vp9_subpixvariance_fn_t highbd_subpel_variance64x32_sse2 =
+    vp9_highbd_sub_pixel_variance64x32_sse2;
+const vp9_subpixvariance_fn_t highbd_subpel_variance64x64_sse2 =
+    vp9_highbd_sub_pixel_variance64x64_sse2;
+const vp9_subpixvariance_fn_t highbd_10_subpel_variance8x4_sse2 =
+    vp9_highbd_10_sub_pixel_variance8x4_sse2;
+const vp9_subpixvariance_fn_t highbd_10_subpel_variance8x8_sse2 =
+    vp9_highbd_10_sub_pixel_variance8x8_sse2;
+const vp9_subpixvariance_fn_t highbd_10_subpel_variance8x16_sse2 =
+    vp9_highbd_10_sub_pixel_variance8x16_sse2;
+const vp9_subpixvariance_fn_t highbd_10_subpel_variance16x8_sse2 =
+    vp9_highbd_10_sub_pixel_variance16x8_sse2;
+const vp9_subpixvariance_fn_t highbd_10_subpel_variance16x16_sse2 =
+    vp9_highbd_10_sub_pixel_variance16x16_sse2;
+const vp9_subpixvariance_fn_t highbd_10_subpel_variance16x32_sse2 =
+    vp9_highbd_10_sub_pixel_variance16x32_sse2;
+const vp9_subpixvariance_fn_t highbd_10_subpel_variance32x16_sse2 =
+    vp9_highbd_10_sub_pixel_variance32x16_sse2;
+const vp9_subpixvariance_fn_t highbd_10_subpel_variance32x32_sse2 =
+    vp9_highbd_10_sub_pixel_variance32x32_sse2;
+const vp9_subpixvariance_fn_t highbd_10_subpel_variance32x64_sse2 =
+    vp9_highbd_10_sub_pixel_variance32x64_sse2;
+const vp9_subpixvariance_fn_t highbd_10_subpel_variance64x32_sse2 =
+    vp9_highbd_10_sub_pixel_variance64x32_sse2;
+const vp9_subpixvariance_fn_t highbd_10_subpel_variance64x64_sse2 =
+    vp9_highbd_10_sub_pixel_variance64x64_sse2;
+const vp9_subpixvariance_fn_t highbd_12_subpel_variance8x4_sse2 =
+    vp9_highbd_12_sub_pixel_variance8x4_sse2;
+const vp9_subpixvariance_fn_t highbd_12_subpel_variance8x8_sse2 =
+    vp9_highbd_12_sub_pixel_variance8x8_sse2;
+const vp9_subpixvariance_fn_t highbd_12_subpel_variance8x16_sse2 =
+    vp9_highbd_12_sub_pixel_variance8x16_sse2;
+const vp9_subpixvariance_fn_t highbd_12_subpel_variance16x8_sse2 =
+    vp9_highbd_12_sub_pixel_variance16x8_sse2;
+const vp9_subpixvariance_fn_t highbd_12_subpel_variance16x16_sse2 =
+    vp9_highbd_12_sub_pixel_variance16x16_sse2;
+const vp9_subpixvariance_fn_t highbd_12_subpel_variance16x32_sse2 =
+    vp9_highbd_12_sub_pixel_variance16x32_sse2;
+const vp9_subpixvariance_fn_t highbd_12_subpel_variance32x16_sse2 =
+    vp9_highbd_12_sub_pixel_variance32x16_sse2;
+const vp9_subpixvariance_fn_t highbd_12_subpel_variance32x32_sse2 =
+    vp9_highbd_12_sub_pixel_variance32x32_sse2;
+const vp9_subpixvariance_fn_t highbd_12_subpel_variance32x64_sse2 =
+    vp9_highbd_12_sub_pixel_variance32x64_sse2;
+const vp9_subpixvariance_fn_t highbd_12_subpel_variance64x32_sse2 =
+    vp9_highbd_12_sub_pixel_variance64x32_sse2;
+const vp9_subpixvariance_fn_t highbd_12_subpel_variance64x64_sse2 =
+    vp9_highbd_12_sub_pixel_variance64x64_sse2;
+INSTANTIATE_TEST_CASE_P(
+    SSE2, VP9SubpelVarianceHighTest,
+    ::testing::Values(make_tuple(3, 2, highbd_10_subpel_variance8x4_sse2, 10),
+                      make_tuple(3, 3, highbd_10_subpel_variance8x8_sse2, 10),
+                      make_tuple(3, 4, highbd_10_subpel_variance8x16_sse2, 10),
+                      make_tuple(4, 3, highbd_10_subpel_variance16x8_sse2, 10),
+                      make_tuple(4, 4, highbd_10_subpel_variance16x16_sse2, 10),
+                      make_tuple(4, 5, highbd_10_subpel_variance16x32_sse2, 10),
+                      make_tuple(5, 4, highbd_10_subpel_variance32x16_sse2, 10),
+                      make_tuple(5, 5, highbd_10_subpel_variance32x32_sse2, 10),
+                      make_tuple(5, 6, highbd_10_subpel_variance32x64_sse2, 10),
+                      make_tuple(6, 5, highbd_10_subpel_variance64x32_sse2, 10),
+                      make_tuple(6, 6, highbd_10_subpel_variance64x64_sse2, 10),
+                      make_tuple(3, 2, highbd_12_subpel_variance8x4_sse2,   12),
+                      make_tuple(3, 3, highbd_12_subpel_variance8x8_sse2,   12),
+                      make_tuple(3, 4, highbd_12_subpel_variance8x16_sse2,  12),
+                      make_tuple(4, 3, highbd_12_subpel_variance16x8_sse2,  12),
+                      make_tuple(4, 4, highbd_12_subpel_variance16x16_sse2, 12),
+                      make_tuple(4, 5, highbd_12_subpel_variance16x32_sse2, 12),
+                      make_tuple(5, 4, highbd_12_subpel_variance32x16_sse2, 12),
+                      make_tuple(5, 5, highbd_12_subpel_variance32x32_sse2, 12),
+                      make_tuple(5, 6, highbd_12_subpel_variance32x64_sse2, 12),
+                      make_tuple(6, 5, highbd_12_subpel_variance64x32_sse2, 12),
+                      make_tuple(6, 6, highbd_12_subpel_variance64x64_sse2, 12),
+                      make_tuple(3, 2, highbd_subpel_variance8x4_sse2, 8),
+                      make_tuple(3, 3, highbd_subpel_variance8x8_sse2, 8),
+                      make_tuple(3, 4, highbd_subpel_variance8x16_sse2, 8),
+                      make_tuple(4, 3, highbd_subpel_variance16x8_sse2, 8),
+                      make_tuple(4, 4, highbd_subpel_variance16x16_sse2, 8),
+                      make_tuple(4, 5, highbd_subpel_variance16x32_sse2, 8),
+                      make_tuple(5, 4, highbd_subpel_variance32x16_sse2, 8),
+                      make_tuple(5, 5, highbd_subpel_variance32x32_sse2, 8),
+                      make_tuple(5, 6, highbd_subpel_variance32x64_sse2, 8),
+                      make_tuple(6, 5, highbd_subpel_variance64x32_sse2, 8),
+                      make_tuple(6, 6, highbd_subpel_variance64x64_sse2, 8)));
+const vp9_subp_avg_variance_fn_t highbd_subpel_avg_variance8x4_sse2 =
+    vp9_highbd_sub_pixel_avg_variance8x4_sse2;
+const vp9_subp_avg_variance_fn_t highbd_subpel_avg_variance8x8_sse2 =
+    vp9_highbd_sub_pixel_avg_variance8x8_sse2;
+const vp9_subp_avg_variance_fn_t highbd_subpel_avg_variance8x16_sse2 =
+    vp9_highbd_sub_pixel_avg_variance8x16_sse2;
+const vp9_subp_avg_variance_fn_t highbd_subpel_avg_variance16x8_sse2 =
+    vp9_highbd_sub_pixel_avg_variance16x8_sse2;
+const vp9_subp_avg_variance_fn_t highbd_subpel_avg_variance16x16_sse2 =
+    vp9_highbd_sub_pixel_avg_variance16x16_sse2;
+const vp9_subp_avg_variance_fn_t highbd_subpel_avg_variance16x32_sse2 =
+    vp9_highbd_sub_pixel_avg_variance16x32_sse2;
+const vp9_subp_avg_variance_fn_t highbd_subpel_avg_variance32x16_sse2 =
+    vp9_highbd_sub_pixel_avg_variance32x16_sse2;
+const vp9_subp_avg_variance_fn_t highbd_subpel_avg_variance32x32_sse2 =
+    vp9_highbd_sub_pixel_avg_variance32x32_sse2;
+const vp9_subp_avg_variance_fn_t highbd_subpel_avg_variance32x64_sse2 =
+    vp9_highbd_sub_pixel_avg_variance32x64_sse2;
+const vp9_subp_avg_variance_fn_t highbd_subpel_avg_variance64x32_sse2 =
+    vp9_highbd_sub_pixel_avg_variance64x32_sse2;
+const vp9_subp_avg_variance_fn_t highbd_subpel_avg_variance64x64_sse2 =
+    vp9_highbd_sub_pixel_avg_variance64x64_sse2;
+const vp9_subp_avg_variance_fn_t highbd_10_subpel_avg_variance8x4_sse2 =
+    vp9_highbd_10_sub_pixel_avg_variance8x4_sse2;
+const vp9_subp_avg_variance_fn_t highbd_10_subpel_avg_variance8x8_sse2 =
+    vp9_highbd_10_sub_pixel_avg_variance8x8_sse2;
+const vp9_subp_avg_variance_fn_t highbd_10_subpel_avg_variance8x16_sse2 =
+    vp9_highbd_10_sub_pixel_avg_variance8x16_sse2;
+const vp9_subp_avg_variance_fn_t highbd_10_subpel_avg_variance16x8_sse2 =
+    vp9_highbd_10_sub_pixel_avg_variance16x8_sse2;
+const vp9_subp_avg_variance_fn_t highbd_10_subpel_avg_variance16x16_sse2 =
+    vp9_highbd_10_sub_pixel_avg_variance16x16_sse2;
+const vp9_subp_avg_variance_fn_t highbd_10_subpel_avg_variance16x32_sse2 =
+    vp9_highbd_10_sub_pixel_avg_variance16x32_sse2;
+const vp9_subp_avg_variance_fn_t highbd_10_subpel_avg_variance32x16_sse2 =
+    vp9_highbd_10_sub_pixel_avg_variance32x16_sse2;
+const vp9_subp_avg_variance_fn_t highbd_10_subpel_avg_variance32x32_sse2 =
+    vp9_highbd_10_sub_pixel_avg_variance32x32_sse2;
+const vp9_subp_avg_variance_fn_t highbd_10_subpel_avg_variance32x64_sse2 =
+    vp9_highbd_10_sub_pixel_avg_variance32x64_sse2;
+const vp9_subp_avg_variance_fn_t highbd_10_subpel_avg_variance64x32_sse2 =
+    vp9_highbd_10_sub_pixel_avg_variance64x32_sse2;
+const vp9_subp_avg_variance_fn_t highbd_10_subpel_avg_variance64x64_sse2 =
+    vp9_highbd_10_sub_pixel_avg_variance64x64_sse2;
+const vp9_subp_avg_variance_fn_t highbd_12_subpel_avg_variance8x4_sse2 =
+    vp9_highbd_12_sub_pixel_avg_variance8x4_sse2;
+const vp9_subp_avg_variance_fn_t highbd_12_subpel_avg_variance8x8_sse2 =
+    vp9_highbd_12_sub_pixel_avg_variance8x8_sse2;
+const vp9_subp_avg_variance_fn_t highbd_12_subpel_avg_variance8x16_sse2 =
+    vp9_highbd_12_sub_pixel_avg_variance8x16_sse2;
+const vp9_subp_avg_variance_fn_t highbd_12_subpel_avg_variance16x8_sse2 =
+    vp9_highbd_12_sub_pixel_avg_variance16x8_sse2;
+const vp9_subp_avg_variance_fn_t highbd_12_subpel_avg_variance16x16_sse2 =
+    vp9_highbd_12_sub_pixel_avg_variance16x16_sse2;
+const vp9_subp_avg_variance_fn_t highbd_12_subpel_avg_variance16x32_sse2 =
+    vp9_highbd_12_sub_pixel_avg_variance16x32_sse2;
+const vp9_subp_avg_variance_fn_t highbd_12_subpel_avg_variance32x16_sse2 =
+    vp9_highbd_12_sub_pixel_avg_variance32x16_sse2;
+const vp9_subp_avg_variance_fn_t highbd_12_subpel_avg_variance32x32_sse2 =
+    vp9_highbd_12_sub_pixel_avg_variance32x32_sse2;
+const vp9_subp_avg_variance_fn_t highbd_12_subpel_avg_variance32x64_sse2 =
+    vp9_highbd_12_sub_pixel_avg_variance32x64_sse2;
+const vp9_subp_avg_variance_fn_t highbd_12_subpel_avg_variance64x32_sse2 =
+    vp9_highbd_12_sub_pixel_avg_variance64x32_sse2;
+const vp9_subp_avg_variance_fn_t highbd_12_subpel_avg_variance64x64_sse2 =
+    vp9_highbd_12_sub_pixel_avg_variance64x64_sse2;
+INSTANTIATE_TEST_CASE_P(
+    SSE2, VP9SubpelAvgVarianceHighTest,
+    ::testing::Values(
+                  make_tuple(3, 2, highbd_10_subpel_avg_variance8x4_sse2,   10),
+                  make_tuple(3, 3, highbd_10_subpel_avg_variance8x8_sse2,   10),
+                  make_tuple(3, 4, highbd_10_subpel_avg_variance8x16_sse2,  10),
+                  make_tuple(4, 3, highbd_10_subpel_avg_variance16x8_sse2,  10),
+                  make_tuple(4, 4, highbd_10_subpel_avg_variance16x16_sse2, 10),
+                  make_tuple(4, 5, highbd_10_subpel_avg_variance16x32_sse2, 10),
+                  make_tuple(5, 4, highbd_10_subpel_avg_variance32x16_sse2, 10),
+                  make_tuple(5, 5, highbd_10_subpel_avg_variance32x32_sse2, 10),
+                  make_tuple(5, 6, highbd_10_subpel_avg_variance32x64_sse2, 10),
+                  make_tuple(6, 5, highbd_10_subpel_avg_variance64x32_sse2, 10),
+                  make_tuple(6, 6, highbd_10_subpel_avg_variance64x64_sse2, 10),
+                  make_tuple(3, 2, highbd_12_subpel_avg_variance8x4_sse2,   12),
+                  make_tuple(3, 3, highbd_12_subpel_avg_variance8x8_sse2,   12),
+                  make_tuple(3, 4, highbd_12_subpel_avg_variance8x16_sse2,  12),
+                  make_tuple(4, 3, highbd_12_subpel_avg_variance16x8_sse2,  12),
+                  make_tuple(4, 4, highbd_12_subpel_avg_variance16x16_sse2, 12),
+                  make_tuple(4, 5, highbd_12_subpel_avg_variance16x32_sse2, 12),
+                  make_tuple(5, 4, highbd_12_subpel_avg_variance32x16_sse2, 12),
+                  make_tuple(5, 5, highbd_12_subpel_avg_variance32x32_sse2, 12),
+                  make_tuple(5, 6, highbd_12_subpel_avg_variance32x64_sse2, 12),
+                  make_tuple(6, 5, highbd_12_subpel_avg_variance64x32_sse2, 12),
+                  make_tuple(6, 6, highbd_12_subpel_avg_variance64x64_sse2, 12),
+                  make_tuple(3, 2, highbd_subpel_avg_variance8x4_sse2,   8),
+                  make_tuple(3, 3, highbd_subpel_avg_variance8x8_sse2,   8),
+                  make_tuple(3, 4, highbd_subpel_avg_variance8x16_sse2,  8),
+                  make_tuple(4, 3, highbd_subpel_avg_variance16x8_sse2,  8),
+                  make_tuple(4, 4, highbd_subpel_avg_variance16x16_sse2, 8),
+                  make_tuple(4, 5, highbd_subpel_avg_variance16x32_sse2, 8),
+                  make_tuple(5, 4, highbd_subpel_avg_variance32x16_sse2, 8),
+                  make_tuple(5, 5, highbd_subpel_avg_variance32x32_sse2, 8),
+                  make_tuple(5, 6, highbd_subpel_avg_variance32x64_sse2, 8),
+                  make_tuple(6, 5, highbd_subpel_avg_variance64x32_sse2, 8),
+                  make_tuple(6, 6, highbd_subpel_avg_variance64x64_sse2, 8)));
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+#endif  // CONFIG_USE_X86INC
+#endif  // HAVE_SSE2
 #if HAVE_SSSE3
 #if CONFIG_USE_X86INC
 
@@ -819,20 +1841,19 @@ const vp9_subpixvariance_fn_t subpel_variance64x64_ssse3 =
     vp9_sub_pixel_variance64x64_ssse3;
 INSTANTIATE_TEST_CASE_P(
     SSSE3, VP9SubpelVarianceTest,
-    ::testing::Values(make_tuple(2, 2, subpel_variance4x4_ssse3),
-                      make_tuple(2, 3, subpel_variance4x8_ssse3),
-                      make_tuple(3, 2, subpel_variance8x4_ssse3),
-                      make_tuple(3, 3, subpel_variance8x8_ssse3),
-                      make_tuple(3, 4, subpel_variance8x16_ssse3),
-                      make_tuple(4, 3, subpel_variance16x8_ssse3),
-                      make_tuple(4, 4, subpel_variance16x16_ssse3),
-                      make_tuple(4, 5, subpel_variance16x32_ssse3),
-                      make_tuple(5, 4, subpel_variance32x16_ssse3),
-                      make_tuple(5, 5, subpel_variance32x32_ssse3),
-                      make_tuple(5, 6, subpel_variance32x64_ssse3),
-                      make_tuple(6, 5, subpel_variance64x32_ssse3),
-                      make_tuple(6, 6, subpel_variance64x64_ssse3)));
-
+    ::testing::Values(make_tuple(2, 2, subpel_variance4x4_ssse3, 0),
+                      make_tuple(2, 3, subpel_variance4x8_ssse3, 0),
+                      make_tuple(3, 2, subpel_variance8x4_ssse3, 0),
+                      make_tuple(3, 3, subpel_variance8x8_ssse3, 0),
+                      make_tuple(3, 4, subpel_variance8x16_ssse3, 0),
+                      make_tuple(4, 3, subpel_variance16x8_ssse3, 0),
+                      make_tuple(4, 4, subpel_variance16x16_ssse3, 0),
+                      make_tuple(4, 5, subpel_variance16x32_ssse3, 0),
+                      make_tuple(5, 4, subpel_variance32x16_ssse3, 0),
+                      make_tuple(5, 5, subpel_variance32x32_ssse3, 0),
+                      make_tuple(5, 6, subpel_variance32x64_ssse3, 0),
+                      make_tuple(6, 5, subpel_variance64x32_ssse3, 0),
+                      make_tuple(6, 6, subpel_variance64x64_ssse3, 0)));
 const vp9_subp_avg_variance_fn_t subpel_avg_variance4x4_ssse3 =
     vp9_sub_pixel_avg_variance4x4_ssse3;
 const vp9_subp_avg_variance_fn_t subpel_avg_variance4x8_ssse3 =
@@ -861,21 +1882,21 @@ const vp9_subp_avg_variance_fn_t subpel_avg_variance64x64_ssse3 =
     vp9_sub_pixel_avg_variance64x64_ssse3;
 INSTANTIATE_TEST_CASE_P(
     SSSE3, VP9SubpelAvgVarianceTest,
-    ::testing::Values(make_tuple(2, 2, subpel_avg_variance4x4_ssse3),
-                      make_tuple(2, 3, subpel_avg_variance4x8_ssse3),
-                      make_tuple(3, 2, subpel_avg_variance8x4_ssse3),
-                      make_tuple(3, 3, subpel_avg_variance8x8_ssse3),
-                      make_tuple(3, 4, subpel_avg_variance8x16_ssse3),
-                      make_tuple(4, 3, subpel_avg_variance16x8_ssse3),
-                      make_tuple(4, 4, subpel_avg_variance16x16_ssse3),
-                      make_tuple(4, 5, subpel_avg_variance16x32_ssse3),
-                      make_tuple(5, 4, subpel_avg_variance32x16_ssse3),
-                      make_tuple(5, 5, subpel_avg_variance32x32_ssse3),
-                      make_tuple(5, 6, subpel_avg_variance32x64_ssse3),
-                      make_tuple(6, 5, subpel_avg_variance64x32_ssse3),
-                      make_tuple(6, 6, subpel_avg_variance64x64_ssse3)));
-#endif
-#endif
+    ::testing::Values(make_tuple(2, 2, subpel_avg_variance4x4_ssse3, 0),
+                      make_tuple(2, 3, subpel_avg_variance4x8_ssse3, 0),
+                      make_tuple(3, 2, subpel_avg_variance8x4_ssse3, 0),
+                      make_tuple(3, 3, subpel_avg_variance8x8_ssse3, 0),
+                      make_tuple(3, 4, subpel_avg_variance8x16_ssse3, 0),
+                      make_tuple(4, 3, subpel_avg_variance16x8_ssse3, 0),
+                      make_tuple(4, 4, subpel_avg_variance16x16_ssse3, 0),
+                      make_tuple(4, 5, subpel_avg_variance16x32_ssse3, 0),
+                      make_tuple(5, 4, subpel_avg_variance32x16_ssse3, 0),
+                      make_tuple(5, 5, subpel_avg_variance32x32_ssse3, 0),
+                      make_tuple(5, 6, subpel_avg_variance32x64_ssse3, 0),
+                      make_tuple(6, 5, subpel_avg_variance64x32_ssse3, 0),
+                      make_tuple(6, 6, subpel_avg_variance64x64_ssse3, 0)));
+#endif  // CONFIG_USE_X86INC
+#endif  // HAVE_SSSE3
 
 #if HAVE_AVX2
 
@@ -886,11 +1907,11 @@ const vp9_variance_fn_t variance64x32_avx2 = vp9_variance64x32_avx2;
 const vp9_variance_fn_t variance64x64_avx2 = vp9_variance64x64_avx2;
 INSTANTIATE_TEST_CASE_P(
     AVX2, VP9VarianceTest,
-    ::testing::Values(make_tuple(4, 4, variance16x16_avx2),
-                      make_tuple(5, 4, variance32x16_avx2),
-                      make_tuple(5, 5, variance32x32_avx2),
-                      make_tuple(6, 5, variance64x32_avx2),
-                      make_tuple(6, 6, variance64x64_avx2)));
+    ::testing::Values(make_tuple(4, 4, variance16x16_avx2, 0),
+                      make_tuple(5, 4, variance32x16_avx2, 0),
+                      make_tuple(5, 5, variance32x32_avx2, 0),
+                      make_tuple(6, 5, variance64x32_avx2, 0),
+                      make_tuple(6, 6, variance64x64_avx2, 0)));
 
 const vp9_subpixvariance_fn_t subpel_variance32x32_avx2 =
     vp9_sub_pixel_variance32x32_avx2;
@@ -898,8 +1919,8 @@ const vp9_subpixvariance_fn_t subpel_variance64x64_avx2 =
     vp9_sub_pixel_variance64x64_avx2;
 INSTANTIATE_TEST_CASE_P(
     AVX2, VP9SubpelVarianceTest,
-    ::testing::Values(make_tuple(5, 5, subpel_variance32x32_avx2),
-                      make_tuple(6, 6, subpel_variance64x64_avx2)));
+    ::testing::Values(make_tuple(5, 5, subpel_variance32x32_avx2, 0),
+                      make_tuple(6, 6, subpel_variance64x64_avx2, 0)));
 
 const vp9_subp_avg_variance_fn_t subpel_avg_variance32x32_avx2 =
     vp9_sub_pixel_avg_variance32x32_avx2;
@@ -907,8 +1928,8 @@ const vp9_subp_avg_variance_fn_t subpel_avg_variance64x64_avx2 =
     vp9_sub_pixel_avg_variance64x64_avx2;
 INSTANTIATE_TEST_CASE_P(
     AVX2, VP9SubpelAvgVarianceTest,
-    ::testing::Values(make_tuple(5, 5, subpel_avg_variance32x32_avx2),
-                      make_tuple(6, 6, subpel_avg_variance64x64_avx2)));
+    ::testing::Values(make_tuple(5, 5, subpel_avg_variance32x32_avx2, 0),
+                      make_tuple(6, 6, subpel_avg_variance64x64_avx2, 0)));
 #endif  // HAVE_AVX2
 #if HAVE_NEON
 const vp9_variance_fn_t variance8x8_neon = vp9_variance8x8_neon;
@@ -916,9 +1937,9 @@ const vp9_variance_fn_t variance16x16_neon = vp9_variance16x16_neon;
 const vp9_variance_fn_t variance32x32_neon = vp9_variance32x32_neon;
 INSTANTIATE_TEST_CASE_P(
     NEON, VP9VarianceTest,
-    ::testing::Values(make_tuple(3, 3, variance8x8_neon),
-                      make_tuple(4, 4, variance16x16_neon),
-                      make_tuple(5, 5, variance32x32_neon)));
+    ::testing::Values(make_tuple(3, 3, variance8x8_neon, 0),
+                      make_tuple(4, 4, variance16x16_neon, 0),
+                      make_tuple(5, 5, variance32x32_neon, 0)));
 
 const vp9_subpixvariance_fn_t subpel_variance8x8_neon =
     vp9_sub_pixel_variance8x8_neon;
@@ -928,12 +1949,11 @@ const vp9_subpixvariance_fn_t subpel_variance32x32_neon =
     vp9_sub_pixel_variance32x32_neon;
 INSTANTIATE_TEST_CASE_P(
     NEON, VP9SubpelVarianceTest,
-    ::testing::Values(make_tuple(3, 3, subpel_variance8x8_neon),
-                      make_tuple(4, 4, subpel_variance16x16_neon),
-                      make_tuple(5, 5, subpel_variance32x32_neon)));
+    ::testing::Values(make_tuple(3, 3, subpel_variance8x8_neon, 0),
+                      make_tuple(4, 4, subpel_variance16x16_neon, 0),
+                      make_tuple(5, 5, subpel_variance32x32_neon, 0)));
 #endif  // HAVE_NEON
 #endif  // CONFIG_VP9_ENCODER
 
 }  // namespace vp9
-
 }  // namespace