* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
-#include <stdlib.h>
-#include <new>
-#include "third_party/googletest/src/include/gtest/gtest.h"
+#include <cstdlib>
+#include <new>
+#include "test/acm_random.h"
#include "test/clear_system_state.h"
#include "test/register_state_check.h"
+#include "third_party/googletest/src/include/gtest/gtest.h"
-#include "vpx/vpx_integer.h"
#include "./vpx_config.h"
+#include "vpx/vpx_codec.h"
+#include "vpx/vpx_integer.h"
#include "vpx_mem/vpx_mem.h"
#if CONFIG_VP8_ENCODER
# include "./vp8_rtcd.h"
# include "./vp9_rtcd.h"
# include "vp9/encoder/vp9_variance.h"
#endif
-#include "test/acm_random.h"
namespace {
return res;
}
-static unsigned int variance_ref(const uint8_t *ref, const uint8_t *src,
- int l2w, int l2h, unsigned int *sse_ptr) {
+static unsigned int variance_ref(const uint8_t *src, const uint8_t *ref,
+ int l2w, int l2h, int src_stride_coeff,
+ int ref_stride_coeff, uint32_t *sse_ptr,
+ bool use_high_bit_depth_,
+ vpx_bit_depth_t bit_depth) {
+#if CONFIG_VP9_HIGHBITDEPTH
+ int64_t se = 0;
+ uint64_t sse = 0;
+ const int w = 1 << l2w;
+ const int h = 1 << l2h;
+ for (int y = 0; y < h; y++) {
+ for (int x = 0; x < w; x++) {
+ int diff;
+ if (!use_high_bit_depth_) {
+ diff = ref[w * y * ref_stride_coeff + x] -
+ src[w * y * src_stride_coeff + x];
+ se += diff;
+ sse += diff * diff;
+ } else {
+ diff = CONVERT_TO_SHORTPTR(ref)[w * y * ref_stride_coeff + x] -
+ CONVERT_TO_SHORTPTR(src)[w * y * src_stride_coeff + x];
+ se += diff;
+ sse += diff * diff;
+ }
+ }
+ }
+ if (bit_depth > VPX_BITS_8) {
+ sse = ROUND_POWER_OF_TWO(sse, 2 * (bit_depth - 8));
+ se = ROUND_POWER_OF_TWO(se, bit_depth - 8);
+ }
+#else
int se = 0;
unsigned int sse = 0;
- const int w = 1 << l2w, h = 1 << l2h;
+ const int w = 1 << l2w;
+ const int h = 1 << l2h;
for (int y = 0; y < h; y++) {
for (int x = 0; x < w; x++) {
- int diff = ref[w * y + x] - src[w * y + x];
+ int diff = ref[w * y * ref_stride_coeff + x] -
+ src[w * y * src_stride_coeff + x];
se += diff;
sse += diff * diff;
}
}
+#endif // CONFIG_VP9_HIGHBITDEPTH
*sse_ptr = sse;
return sse - (((int64_t) se * se) >> (l2w + l2h));
}
static unsigned int subpel_variance_ref(const uint8_t *ref, const uint8_t *src,
int l2w, int l2h, int xoff, int yoff,
- unsigned int *sse_ptr) {
+ unsigned int *sse_ptr,
+ bool use_high_bit_depth_,
+ vpx_bit_depth_t bit_depth) {
+#if CONFIG_VP9_HIGHBITDEPTH
+ int64_t se = 0;
+ uint64_t sse = 0;
+ const int w = 1 << l2w;
+ const int h = 1 << l2h;
+ for (int y = 0; y < h; y++) {
+ for (int x = 0; x < w; x++) {
+ // Bilinear interpolation at a 16th pel step.
+ if (!use_high_bit_depth_) {
+ const int a1 = ref[(w + 1) * (y + 0) + x + 0];
+ const int a2 = ref[(w + 1) * (y + 0) + x + 1];
+ const int b1 = ref[(w + 1) * (y + 1) + x + 0];
+ const int b2 = ref[(w + 1) * (y + 1) + x + 1];
+ const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
+ const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
+ const int r = a + (((b - a) * yoff + 8) >> 4);
+ const int diff = r - src[w * y + x];
+ se += diff;
+ sse += diff * diff;
+ } else {
+ uint16_t *ref16 = CONVERT_TO_SHORTPTR(ref);
+ uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
+ const int a1 = ref16[(w + 1) * (y + 0) + x + 0];
+ const int a2 = ref16[(w + 1) * (y + 0) + x + 1];
+ const int b1 = ref16[(w + 1) * (y + 1) + x + 0];
+ const int b2 = ref16[(w + 1) * (y + 1) + x + 1];
+ const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
+ const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
+ const int r = a + (((b - a) * yoff + 8) >> 4);
+ const int diff = r - src16[w * y + x];
+ se += diff;
+ sse += diff * diff;
+ }
+ }
+ }
+ if (bit_depth > VPX_BITS_8) {
+ sse = ROUND_POWER_OF_TWO(sse, 2 * (bit_depth - 8));
+ se = ROUND_POWER_OF_TWO(se, bit_depth - 8);
+ }
+#else
int se = 0;
unsigned int sse = 0;
- const int w = 1 << l2w, h = 1 << l2h;
+ const int w = 1 << l2w;
+ const int h = 1 << l2h;
for (int y = 0; y < h; y++) {
for (int x = 0; x < w; x++) {
- // bilinear interpolation at a 16th pel step
+ // Bilinear interpolation at a 16th pel step.
const int a1 = ref[(w + 1) * (y + 0) + x + 0];
const int a2 = ref[(w + 1) * (y + 0) + x + 1];
const int b1 = ref[(w + 1) * (y + 1) + x + 0];
const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
const int r = a + (((b - a) * yoff + 8) >> 4);
- int diff = r - src[w * y + x];
+ const int diff = r - src[w * y + x];
se += diff;
sse += diff * diff;
}
}
+#endif // CONFIG_VP9_HIGHBITDEPTH
*sse_ptr = sse;
return sse - (((int64_t) se * se) >> (l2w + l2h));
}
template<typename VarianceFunctionType>
class VarianceTest
- : public ::testing::TestWithParam<tuple<int, int, VarianceFunctionType> > {
+ : public ::testing::TestWithParam<tuple<int, int,
+ VarianceFunctionType, int> > {
public:
virtual void SetUp() {
- const tuple<int, int, VarianceFunctionType>& params = this->GetParam();
+ const tuple<int, int, VarianceFunctionType, int>& params = this->GetParam();
log2width_ = get<0>(params);
width_ = 1 << log2width_;
log2height_ = get<1>(params);
height_ = 1 << log2height_;
variance_ = get<2>(params);
+ if (get<3>(params)) {
+ bit_depth_ = static_cast<vpx_bit_depth_t>(get<3>(params));
+ use_high_bit_depth_ = true;
+ } else {
+ bit_depth_ = VPX_BITS_8;
+ use_high_bit_depth_ = false;
+ }
+ mask_ = (1 << bit_depth_) - 1;
rnd_.Reset(ACMRandom::DeterministicSeed());
block_size_ = width_ * height_;
- src_ = reinterpret_cast<uint8_t *>(vpx_memalign(16, block_size_));
- ref_ = new uint8_t[block_size_];
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (!use_high_bit_depth_) {
+ src_ = reinterpret_cast<uint8_t *>(vpx_memalign(16, block_size_ * 2));
+ ref_ = new uint8_t[block_size_ * 2];
+ } else {
+ src_ = CONVERT_TO_BYTEPTR(reinterpret_cast<uint16_t *>(
+ vpx_memalign(16, block_size_ * 2 * sizeof(uint16_t))));
+ ref_ = CONVERT_TO_BYTEPTR(new uint16_t[block_size_ * 2]);
+ }
+#else
+ src_ = reinterpret_cast<uint8_t *>(vpx_memalign(16, block_size_ * 2));
+ ref_ = new uint8_t[block_size_ * 2];
+#endif
ASSERT_TRUE(src_ != NULL);
ASSERT_TRUE(ref_ != NULL);
}
virtual void TearDown() {
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (!use_high_bit_depth_) {
+ vpx_free(src_);
+ delete[] ref_;
+ } else {
+ vpx_free(CONVERT_TO_SHORTPTR(src_));
+ delete[] CONVERT_TO_SHORTPTR(ref_);
+ }
+#else
vpx_free(src_);
delete[] ref_;
+#endif
libvpx_test::ClearSystemState();
}
protected:
void ZeroTest();
void RefTest();
+ void RefStrideTest();
void OneQuarterTest();
ACMRandom rnd_;
- uint8_t* src_;
- uint8_t* ref_;
+ uint8_t *src_;
+ uint8_t *ref_;
int width_, log2width_;
int height_, log2height_;
+ vpx_bit_depth_t bit_depth_;
+ int mask_;
+ bool use_high_bit_depth_;
int block_size_;
VarianceFunctionType variance_;
};
template<typename VarianceFunctionType>
void VarianceTest<VarianceFunctionType>::ZeroTest() {
for (int i = 0; i <= 255; ++i) {
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (!use_high_bit_depth_) {
+ memset(src_, i, block_size_);
+ } else {
+ vpx_memset16(CONVERT_TO_SHORTPTR(src_), i << (bit_depth_ - 8),
+ block_size_);
+ }
+#else
memset(src_, i, block_size_);
+#endif
for (int j = 0; j <= 255; ++j) {
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (!use_high_bit_depth_) {
+ memset(ref_, j, block_size_);
+ } else {
+ vpx_memset16(CONVERT_TO_SHORTPTR(ref_), j << (bit_depth_ - 8),
+ block_size_);
+ }
+#else
memset(ref_, j, block_size_);
+#endif
unsigned int sse;
unsigned int var;
ASM_REGISTER_STATE_CHECK(
var = variance_(src_, width_, ref_, width_, &sse));
- EXPECT_EQ(0u, var) << "src values: " << i << "ref values: " << j;
+ EXPECT_EQ(0u, var) << "src values: " << i << " ref values: " << j;
}
}
}
void VarianceTest<VarianceFunctionType>::RefTest() {
for (int i = 0; i < 10; ++i) {
for (int j = 0; j < block_size_; j++) {
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (!use_high_bit_depth_) {
src_[j] = rnd_.Rand8();
ref_[j] = rnd_.Rand8();
+ } else {
+ CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() && mask_;
+ CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() && mask_;
+ }
+#else
+ src_[j] = rnd_.Rand8();
+ ref_[j] = rnd_.Rand8();
+#endif
}
unsigned int sse1, sse2;
unsigned int var1;
+ const int stride_coeff = 1;
ASM_REGISTER_STATE_CHECK(
var1 = variance_(src_, width_, ref_, width_, &sse1));
const unsigned int var2 = variance_ref(src_, ref_, log2width_,
- log2height_, &sse2);
+ log2height_, stride_coeff,
+ stride_coeff, &sse2,
+ use_high_bit_depth_, bit_depth_);
+ EXPECT_EQ(sse1, sse2);
+ EXPECT_EQ(var1, var2);
+ }
+}
+
+template<typename VarianceFunctionType>
+void VarianceTest<VarianceFunctionType>::RefStrideTest() {
+ for (int i = 0; i < 10; ++i) {
+ int ref_stride_coeff = i % 2;
+ int src_stride_coeff = (i >> 1) % 2;
+ for (int j = 0; j < block_size_; j++) {
+ int ref_ind = (j / width_) * ref_stride_coeff * width_ + j % width_;
+ int src_ind = (j / width_) * src_stride_coeff * width_ + j % width_;
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (!use_high_bit_depth_) {
+ src_[src_ind] = rnd_.Rand8();
+ ref_[ref_ind] = rnd_.Rand8();
+ } else {
+ CONVERT_TO_SHORTPTR(src_)[src_ind] = rnd_.Rand16() && mask_;
+ CONVERT_TO_SHORTPTR(ref_)[ref_ind] = rnd_.Rand16() && mask_;
+ }
+#else
+ src_[src_ind] = rnd_.Rand8();
+ ref_[ref_ind] = rnd_.Rand8();
+#endif
+ }
+ unsigned int sse1, sse2;
+ unsigned int var1;
+
+ ASM_REGISTER_STATE_CHECK(
+ var1 = variance_(src_, width_ * src_stride_coeff,
+ ref_, width_ * ref_stride_coeff, &sse1));
+ const unsigned int var2 = variance_ref(src_, ref_, log2width_,
+ log2height_, src_stride_coeff,
+ ref_stride_coeff, &sse2,
+ use_high_bit_depth_, bit_depth_);
EXPECT_EQ(sse1, sse2);
EXPECT_EQ(var1, var2);
}
template<typename VarianceFunctionType>
void VarianceTest<VarianceFunctionType>::OneQuarterTest() {
- memset(src_, 255, block_size_);
const int half = block_size_ / 2;
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (!use_high_bit_depth_) {
+ memset(src_, 255, block_size_);
+ memset(ref_, 255, half);
+ memset(ref_ + half, 0, half);
+ } else {
+ vpx_memset16(CONVERT_TO_SHORTPTR(src_), 255 << (bit_depth_ - 8),
+ block_size_);
+ vpx_memset16(CONVERT_TO_SHORTPTR(ref_), 255 << (bit_depth_ - 8), half);
+ vpx_memset16(CONVERT_TO_SHORTPTR(ref_) + half, 0, half);
+ }
+#else
+ memset(src_, 255, block_size_);
memset(ref_, 255, half);
memset(ref_ + half, 0, half);
+#endif
unsigned int sse;
unsigned int var;
ASM_REGISTER_STATE_CHECK(var = variance_(src_, width_, ref_, width_, &sse));
ref_[j] = rnd.Rand8();
}
unsigned int sse1, sse2;
+ const int stride_coeff = 1;
ASM_REGISTER_STATE_CHECK(mse_(src_, width_, ref_, width_, &sse1));
- variance_ref(src_, ref_, log2width_, log2height_, &sse2);
+ variance_ref(src_, ref_, log2width_, log2height_, stride_coeff,
+ stride_coeff, &sse2, false, VPX_BITS_8);
EXPECT_EQ(sse1, sse2);
}
}
}
unsigned int sse2;
unsigned int var1;
- ASM_REGISTER_STATE_CHECK(
- var1 = mse_(src_, width_, ref_, width_));
- variance_ref(src_, ref_, log2width_, log2height_, &sse2);
+ const int stride_coeff = 1;
+ ASM_REGISTER_STATE_CHECK(var1 = mse_(src_, width_, ref_, width_));
+ variance_ref(src_, ref_, log2width_, log2height_, stride_coeff,
+ stride_coeff, &sse2, false, VPX_BITS_8);
EXPECT_EQ(var1, sse2);
}
}
#endif
#if CONFIG_VP9_ENCODER
-
unsigned int subpel_avg_variance_ref(const uint8_t *ref,
const uint8_t *src,
const uint8_t *second_pred,
int l2w, int l2h,
int xoff, int yoff,
- unsigned int *sse_ptr) {
+ unsigned int *sse_ptr,
+ bool use_high_bit_depth,
+ vpx_bit_depth_t bit_depth) {
+#if CONFIG_VP9_HIGHBITDEPTH
+ int64_t se = 0;
+ uint64_t sse = 0;
+ const int w = 1 << l2w;
+ const int h = 1 << l2h;
+ for (int y = 0; y < h; y++) {
+ for (int x = 0; x < w; x++) {
+ // bilinear interpolation at a 16th pel step
+ if (!use_high_bit_depth) {
+ const int a1 = ref[(w + 1) * (y + 0) + x + 0];
+ const int a2 = ref[(w + 1) * (y + 0) + x + 1];
+ const int b1 = ref[(w + 1) * (y + 1) + x + 0];
+ const int b2 = ref[(w + 1) * (y + 1) + x + 1];
+ const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
+ const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
+ const int r = a + (((b - a) * yoff + 8) >> 4);
+ const int diff = ((r + second_pred[w * y + x] + 1) >> 1) - src[w * y + x];
+ se += diff;
+ sse += diff * diff;
+ } else {
+ uint16_t *ref16 = CONVERT_TO_SHORTPTR(ref);
+ uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
+ uint16_t *sec16 = CONVERT_TO_SHORTPTR(second_pred);
+ const int a1 = ref16[(w + 1) * (y + 0) + x + 0];
+ const int a2 = ref16[(w + 1) * (y + 0) + x + 1];
+ const int b1 = ref16[(w + 1) * (y + 1) + x + 0];
+ const int b2 = ref16[(w + 1) * (y + 1) + x + 1];
+ const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
+ const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
+ const int r = a + (((b - a) * yoff + 8) >> 4);
+ const int diff = ((r + sec16[w * y + x] + 1) >> 1) - src16[w * y + x];
+ se += diff;
+ sse += diff * diff;
+ }
+ }
+ }
+ if (bit_depth > 8) {
+ sse = ROUND_POWER_OF_TWO(sse, 2*(bit_depth-8));
+ se = ROUND_POWER_OF_TWO(se, bit_depth-8);
+ }
+#else
int se = 0;
unsigned int sse = 0;
- const int w = 1 << l2w, h = 1 << l2h;
+ const int w = 1 << l2w;
+ const int h = 1 << l2h;
for (int y = 0; y < h; y++) {
for (int x = 0; x < w; x++) {
// bilinear interpolation at a 16th pel step
const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
const int r = a + (((b - a) * yoff + 8) >> 4);
- int diff = ((r + second_pred[w * y + x] + 1) >> 1) - src[w * y + x];
+ const int diff = ((r + second_pred[w * y + x] + 1) >> 1) - src[w * y + x];
se += diff;
sse += diff * diff;
}
}
+#endif // CONFIG_VP9_HIGHBITDEPTH
*sse_ptr = sse;
return sse - (((int64_t) se * se) >> (l2w + l2h));
}
template<typename SubpelVarianceFunctionType>
class SubpelVarianceTest
: public ::testing::TestWithParam<tuple<int, int,
- SubpelVarianceFunctionType> > {
+ SubpelVarianceFunctionType, int> > {
public:
virtual void SetUp() {
- const tuple<int, int, SubpelVarianceFunctionType>& params =
+ const tuple<int, int, SubpelVarianceFunctionType, int>& params =
this->GetParam();
log2width_ = get<0>(params);
width_ = 1 << log2width_;
log2height_ = get<1>(params);
height_ = 1 << log2height_;
subpel_variance_ = get<2>(params);
+ if (get<3>(params)) {
+ bit_depth_ = (vpx_bit_depth_t) get<3>(params);
+ use_high_bit_depth_ = true;
+ } else {
+ bit_depth_ = VPX_BITS_8;
+ use_high_bit_depth_ = false;
+ }
+ mask_ = (1 << bit_depth_)-1;
rnd_.Reset(ACMRandom::DeterministicSeed());
block_size_ = width_ * height_;
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (!use_high_bit_depth_) {
+ src_ = reinterpret_cast<uint8_t *>(vpx_memalign(16, block_size_));
+ sec_ = reinterpret_cast<uint8_t *>(vpx_memalign(16, block_size_));
+ ref_ = new uint8_t[block_size_ + width_ + height_ + 1];
+ } else {
+ src_ = CONVERT_TO_BYTEPTR(
+ reinterpret_cast<uint16_t *>(
+ vpx_memalign(16, block_size_*sizeof(uint16_t))));
+ sec_ = CONVERT_TO_BYTEPTR(
+ reinterpret_cast<uint16_t *>(
+ vpx_memalign(16, block_size_*sizeof(uint16_t))));
+ ref_ = CONVERT_TO_BYTEPTR(
+ new uint16_t[block_size_ + width_ + height_ + 1]);
+ }
+#else
src_ = reinterpret_cast<uint8_t *>(vpx_memalign(16, block_size_));
sec_ = reinterpret_cast<uint8_t *>(vpx_memalign(16, block_size_));
ref_ = new uint8_t[block_size_ + width_ + height_ + 1];
+#endif // CONFIG_VP9_HIGHBITDEPTH
ASSERT_TRUE(src_ != NULL);
ASSERT_TRUE(sec_ != NULL);
ASSERT_TRUE(ref_ != NULL);
}
virtual void TearDown() {
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (!use_high_bit_depth_) {
+ vpx_free(src_);
+ delete[] ref_;
+ vpx_free(sec_);
+ } else {
+ vpx_free(CONVERT_TO_SHORTPTR(src_));
+ delete[] CONVERT_TO_SHORTPTR(ref_);
+ vpx_free(CONVERT_TO_SHORTPTR(sec_));
+ }
+#else
vpx_free(src_);
delete[] ref_;
vpx_free(sec_);
+#endif
libvpx_test::ClearSystemState();
}
protected:
void RefTest();
+ void ExtremeRefTest();
ACMRandom rnd_;
uint8_t *src_;
uint8_t *ref_;
uint8_t *sec_;
+ bool use_high_bit_depth_;
+ vpx_bit_depth_t bit_depth_;
int width_, log2width_;
int height_, log2height_;
- int block_size_;
+ int block_size_, mask_;
SubpelVarianceFunctionType subpel_variance_;
};
void SubpelVarianceTest<SubpelVarianceFunctionType>::RefTest() {
for (int x = 0; x < 16; ++x) {
for (int y = 0; y < 16; ++y) {
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (!use_high_bit_depth_) {
+ for (int j = 0; j < block_size_; j++) {
+ src_[j] = rnd_.Rand8();
+ }
+ for (int j = 0; j < block_size_ + width_ + height_ + 1; j++) {
+ ref_[j] = rnd_.Rand8();
+ }
+ } else {
+ for (int j = 0; j < block_size_; j++) {
+ CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask_;
+ }
+ for (int j = 0; j < block_size_ + width_ + height_ + 1; j++) {
+ CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask_;
+ }
+ }
+#else
for (int j = 0; j < block_size_; j++) {
src_[j] = rnd_.Rand8();
}
for (int j = 0; j < block_size_ + width_ + height_ + 1; j++) {
ref_[j] = rnd_.Rand8();
}
+#endif // CONFIG_VP9_HIGHBITDEPTH
unsigned int sse1, sse2;
unsigned int var1;
ASM_REGISTER_STATE_CHECK(var1 = subpel_variance_(ref_, width_ + 1, x, y,
src_, width_, &sse1));
const unsigned int var2 = subpel_variance_ref(ref_, src_, log2width_,
- log2height_, x, y, &sse2);
+ log2height_, x, y, &sse2,
+ use_high_bit_depth_,
+ bit_depth_);
+ EXPECT_EQ(sse1, sse2) << "at position " << x << ", " << y;
+ EXPECT_EQ(var1, var2) << "at position " << x << ", " << y;
+ }
+ }
+}
+
+template<typename SubpelVarianceFunctionType>
+void SubpelVarianceTest<SubpelVarianceFunctionType>::ExtremeRefTest() {
+ // Compare against reference.
+ // Src: Set the first half of values to 0, the second half to the maximum.
+ // Ref: Set the first half of values to the maximum, the second half to 0.
+ for (int x = 0; x < 16; ++x) {
+ for (int y = 0; y < 16; ++y) {
+ const int half = block_size_ / 2;
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (!use_high_bit_depth_) {
+ memset(src_, 0, half);
+ memset(src_ + half, 255, half);
+ memset(ref_, 255, half);
+ memset(ref_ + half, 0, half + width_ + height_ + 1);
+ } else {
+ vpx_memset16(CONVERT_TO_SHORTPTR(src_), mask_, half);
+ vpx_memset16(CONVERT_TO_SHORTPTR(src_) + half, 0, half);
+ vpx_memset16(CONVERT_TO_SHORTPTR(ref_), 0, half);
+ vpx_memset16(CONVERT_TO_SHORTPTR(ref_) + half, mask_,
+ half + width_ + height_ + 1);
+ }
+#else
+ memset(src_, 0, half);
+ memset(src_ + half, 255, half);
+ memset(ref_, 255, half);
+ memset(ref_ + half, 0, half + width_ + height_ + 1);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ unsigned int sse1, sse2;
+ unsigned int var1;
+ ASM_REGISTER_STATE_CHECK(
+ var1 = subpel_variance_(ref_, width_ + 1, x, y, src_, width_, &sse1));
+ const unsigned int var2 =
+ subpel_variance_ref(ref_, src_, log2width_, log2height_, x, y, &sse2,
+ use_high_bit_depth_, bit_depth_);
EXPECT_EQ(sse1, sse2) << "at position " << x << ", " << y;
EXPECT_EQ(var1, var2) << "at position " << x << ", " << y;
}
void SubpelVarianceTest<vp9_subp_avg_variance_fn_t>::RefTest() {
for (int x = 0; x < 16; ++x) {
for (int y = 0; y < 16; ++y) {
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (!use_high_bit_depth_) {
+ for (int j = 0; j < block_size_; j++) {
+ src_[j] = rnd_.Rand8();
+ sec_[j] = rnd_.Rand8();
+ }
+ for (int j = 0; j < block_size_ + width_ + height_ + 1; j++) {
+ ref_[j] = rnd_.Rand8();
+ }
+ } else {
+ for (int j = 0; j < block_size_; j++) {
+ CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask_;
+ CONVERT_TO_SHORTPTR(sec_)[j] = rnd_.Rand16() & mask_;
+ }
+ for (int j = 0; j < block_size_ + width_ + height_ + 1; j++) {
+ CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask_;
+ }
+ }
+#else
for (int j = 0; j < block_size_; j++) {
src_[j] = rnd_.Rand8();
sec_[j] = rnd_.Rand8();
for (int j = 0; j < block_size_ + width_ + height_ + 1; j++) {
ref_[j] = rnd_.Rand8();
}
+#endif
unsigned int sse1, sse2;
unsigned int var1;
ASM_REGISTER_STATE_CHECK(
src_, width_, &sse1, sec_));
const unsigned int var2 = subpel_avg_variance_ref(ref_, src_, sec_,
log2width_, log2height_,
- x, y, &sse2);
+ x, y, &sse2,
+ use_high_bit_depth_,
+ bit_depth_);
EXPECT_EQ(sse1, sse2) << "at position " << x << ", " << y;
EXPECT_EQ(var1, var2) << "at position " << x << ", " << y;
}
const vp8_variance_fn_t variance16x16_c = vp8_variance16x16_c;
INSTANTIATE_TEST_CASE_P(
C, VP8VarianceTest,
- ::testing::Values(make_tuple(2, 2, variance4x4_c),
- make_tuple(3, 3, variance8x8_c),
- make_tuple(3, 4, variance8x16_c),
- make_tuple(4, 3, variance16x8_c),
- make_tuple(4, 4, variance16x16_c)));
+ ::testing::Values(make_tuple(2, 2, variance4x4_c, 0),
+ make_tuple(3, 3, variance8x8_c, 0),
+ make_tuple(3, 4, variance8x16_c, 0),
+ make_tuple(4, 3, variance16x8_c, 0),
+ make_tuple(4, 4, variance16x16_c, 0)));
#if HAVE_NEON
const vp8_sse_fn_t get4x4sse_cs_neon = vp8_get4x4sse_cs_neon;
const vp8_variance_fn_t variance16x16_neon = vp8_variance16x16_neon;
INSTANTIATE_TEST_CASE_P(
NEON, VP8VarianceTest,
- ::testing::Values(make_tuple(3, 3, variance8x8_neon),
- make_tuple(3, 4, variance8x16_neon),
- make_tuple(4, 3, variance16x8_neon),
- make_tuple(4, 4, variance16x16_neon)));
+ ::testing::Values(make_tuple(3, 3, variance8x8_neon, 0),
+ make_tuple(3, 4, variance8x16_neon, 0),
+ make_tuple(4, 3, variance16x8_neon, 0),
+ make_tuple(4, 4, variance16x16_neon, 0)));
#endif
-
#if HAVE_MMX
const vp8_variance_fn_t variance4x4_mmx = vp8_variance4x4_mmx;
const vp8_variance_fn_t variance8x8_mmx = vp8_variance8x8_mmx;
const vp8_variance_fn_t variance16x16_mmx = vp8_variance16x16_mmx;
INSTANTIATE_TEST_CASE_P(
MMX, VP8VarianceTest,
- ::testing::Values(make_tuple(2, 2, variance4x4_mmx),
- make_tuple(3, 3, variance8x8_mmx),
- make_tuple(3, 4, variance8x16_mmx),
- make_tuple(4, 3, variance16x8_mmx),
- make_tuple(4, 4, variance16x16_mmx)));
+ ::testing::Values(make_tuple(2, 2, variance4x4_mmx, 0),
+ make_tuple(3, 3, variance8x8_mmx, 0),
+ make_tuple(3, 4, variance8x16_mmx, 0),
+ make_tuple(4, 3, variance16x8_mmx, 0),
+ make_tuple(4, 4, variance16x16_mmx, 0)));
#endif
#if HAVE_SSE2
const vp8_variance_fn_t variance16x16_wmt = vp8_variance16x16_wmt;
INSTANTIATE_TEST_CASE_P(
SSE2, VP8VarianceTest,
- ::testing::Values(make_tuple(2, 2, variance4x4_wmt),
- make_tuple(3, 3, variance8x8_wmt),
- make_tuple(3, 4, variance8x16_wmt),
- make_tuple(4, 3, variance16x8_wmt),
- make_tuple(4, 4, variance16x16_wmt)));
+ ::testing::Values(make_tuple(2, 2, variance4x4_wmt, 0),
+ make_tuple(3, 3, variance8x8_wmt, 0),
+ make_tuple(3, 4, variance8x16_wmt, 0),
+ make_tuple(4, 3, variance16x8_wmt, 0),
+ make_tuple(4, 4, variance16x16_wmt, 0)));
#endif
#endif // CONFIG_VP8_ENCODER
namespace vp9 {
#if CONFIG_VP9_ENCODER
-
TEST_P(SumOfSquaresTest, Const) { ConstTest(); }
TEST_P(SumOfSquaresTest, Ref) { RefTest(); }
TEST_P(VP9VarianceTest, Zero) { ZeroTest(); }
TEST_P(VP9VarianceTest, Ref) { RefTest(); }
+TEST_P(VP9VarianceTest, RefStride) { RefStrideTest(); }
TEST_P(VP9SubpelVarianceTest, Ref) { RefTest(); }
+TEST_P(VP9SubpelVarianceTest, ExtremeRef) { ExtremeRefTest(); }
TEST_P(VP9SubpelAvgVarianceTest, Ref) { RefTest(); }
TEST_P(VP9VarianceTest, OneQuarter) { OneQuarterTest(); }
+#if CONFIG_VP9_HIGHBITDEPTH
+typedef VarianceTest<vp9_variance_fn_t> VP9VarianceHighTest;
+typedef SubpelVarianceTest<vp9_subpixvariance_fn_t> VP9SubpelVarianceHighTest;
+typedef SubpelVarianceTest<vp9_subp_avg_variance_fn_t>
+ VP9SubpelAvgVarianceHighTest;
+
+TEST_P(VP9VarianceHighTest, Zero) { ZeroTest(); }
+TEST_P(VP9VarianceHighTest, Ref) { RefTest(); }
+TEST_P(VP9VarianceHighTest, RefStride) { RefStrideTest(); }
+TEST_P(VP9SubpelVarianceHighTest, Ref) { RefTest(); }
+TEST_P(VP9SubpelVarianceHighTest, ExtremeRef) { ExtremeRefTest(); }
+TEST_P(VP9SubpelAvgVarianceHighTest, Ref) { RefTest(); }
+TEST_P(VP9VarianceHighTest, OneQuarter) { OneQuarterTest(); }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
const vp9_variance_fn_t variance4x4_c = vp9_variance4x4_c;
const vp9_variance_fn_t variance4x8_c = vp9_variance4x8_c;
const vp9_variance_fn_t variance8x4_c = vp9_variance8x4_c;
const vp9_variance_fn_t variance64x64_c = vp9_variance64x64_c;
INSTANTIATE_TEST_CASE_P(
C, VP9VarianceTest,
- ::testing::Values(make_tuple(2, 2, variance4x4_c),
- make_tuple(2, 3, variance4x8_c),
- make_tuple(3, 2, variance8x4_c),
- make_tuple(3, 3, variance8x8_c),
- make_tuple(3, 4, variance8x16_c),
- make_tuple(4, 3, variance16x8_c),
- make_tuple(4, 4, variance16x16_c),
- make_tuple(4, 5, variance16x32_c),
- make_tuple(5, 4, variance32x16_c),
- make_tuple(5, 5, variance32x32_c),
- make_tuple(5, 6, variance32x64_c),
- make_tuple(6, 5, variance64x32_c),
- make_tuple(6, 6, variance64x64_c)));
-
+ ::testing::Values(make_tuple(2, 2, variance4x4_c, 0),
+ make_tuple(2, 3, variance4x8_c, 0),
+ make_tuple(3, 2, variance8x4_c, 0),
+ make_tuple(3, 3, variance8x8_c, 0),
+ make_tuple(3, 4, variance8x16_c, 0),
+ make_tuple(4, 3, variance16x8_c, 0),
+ make_tuple(4, 4, variance16x16_c, 0),
+ make_tuple(4, 5, variance16x32_c, 0),
+ make_tuple(5, 4, variance32x16_c, 0),
+ make_tuple(5, 5, variance32x32_c, 0),
+ make_tuple(5, 6, variance32x64_c, 0),
+ make_tuple(6, 5, variance64x32_c, 0),
+ make_tuple(6, 6, variance64x64_c, 0)));
+#if CONFIG_VP9_HIGHBITDEPTH
+const vp9_variance_fn_t highbd_10_variance4x4_c = vp9_highbd_10_variance4x4_c;
+const vp9_variance_fn_t highbd_10_variance4x8_c = vp9_highbd_10_variance4x8_c;
+const vp9_variance_fn_t highbd_10_variance8x4_c = vp9_highbd_10_variance8x4_c;
+const vp9_variance_fn_t highbd_10_variance8x8_c = vp9_highbd_10_variance8x8_c;
+const vp9_variance_fn_t highbd_10_variance8x16_c = vp9_highbd_10_variance8x16_c;
+const vp9_variance_fn_t highbd_10_variance16x8_c = vp9_highbd_10_variance16x8_c;
+const vp9_variance_fn_t highbd_10_variance16x16_c =
+ vp9_highbd_10_variance16x16_c;
+const vp9_variance_fn_t highbd_10_variance16x32_c =
+ vp9_highbd_10_variance16x32_c;
+const vp9_variance_fn_t highbd_10_variance32x16_c =
+ vp9_highbd_10_variance32x16_c;
+const vp9_variance_fn_t highbd_10_variance32x32_c =
+ vp9_highbd_10_variance32x32_c;
+const vp9_variance_fn_t highbd_10_variance32x64_c =
+ vp9_highbd_10_variance32x64_c;
+const vp9_variance_fn_t highbd_10_variance64x32_c =
+ vp9_highbd_10_variance64x32_c;
+const vp9_variance_fn_t highbd_10_variance64x64_c =
+ vp9_highbd_10_variance64x64_c;
+const vp9_variance_fn_t highbd_12_variance4x4_c = vp9_highbd_12_variance4x4_c;
+const vp9_variance_fn_t highbd_12_variance4x8_c = vp9_highbd_12_variance4x8_c;
+const vp9_variance_fn_t highbd_12_variance8x4_c = vp9_highbd_12_variance8x4_c;
+const vp9_variance_fn_t highbd_12_variance8x8_c = vp9_highbd_12_variance8x8_c;
+const vp9_variance_fn_t highbd_12_variance8x16_c = vp9_highbd_12_variance8x16_c;
+const vp9_variance_fn_t highbd_12_variance16x8_c = vp9_highbd_12_variance16x8_c;
+const vp9_variance_fn_t highbd_12_variance16x16_c =
+ vp9_highbd_12_variance16x16_c;
+const vp9_variance_fn_t highbd_12_variance16x32_c =
+ vp9_highbd_12_variance16x32_c;
+const vp9_variance_fn_t highbd_12_variance32x16_c =
+ vp9_highbd_12_variance32x16_c;
+const vp9_variance_fn_t highbd_12_variance32x32_c =
+ vp9_highbd_12_variance32x32_c;
+const vp9_variance_fn_t highbd_12_variance32x64_c =
+ vp9_highbd_12_variance32x64_c;
+const vp9_variance_fn_t highbd_12_variance64x32_c =
+ vp9_highbd_12_variance64x32_c;
+const vp9_variance_fn_t highbd_12_variance64x64_c =
+ vp9_highbd_12_variance64x64_c;
+const vp9_variance_fn_t highbd_variance4x4_c = vp9_highbd_variance4x4_c;
+const vp9_variance_fn_t highbd_variance4x8_c = vp9_highbd_variance4x8_c;
+const vp9_variance_fn_t highbd_variance8x4_c = vp9_highbd_variance8x4_c;
+const vp9_variance_fn_t highbd_variance8x8_c = vp9_highbd_variance8x8_c;
+const vp9_variance_fn_t highbd_variance8x16_c = vp9_highbd_variance8x16_c;
+const vp9_variance_fn_t highbd_variance16x8_c = vp9_highbd_variance16x8_c;
+const vp9_variance_fn_t highbd_variance16x16_c = vp9_highbd_variance16x16_c;
+const vp9_variance_fn_t highbd_variance16x32_c = vp9_highbd_variance16x32_c;
+const vp9_variance_fn_t highbd_variance32x16_c = vp9_highbd_variance32x16_c;
+const vp9_variance_fn_t highbd_variance32x32_c = vp9_highbd_variance32x32_c;
+const vp9_variance_fn_t highbd_variance32x64_c = vp9_highbd_variance32x64_c;
+const vp9_variance_fn_t highbd_variance64x32_c = vp9_highbd_variance64x32_c;
+const vp9_variance_fn_t highbd_variance64x64_c = vp9_highbd_variance64x64_c;
+INSTANTIATE_TEST_CASE_P(
+ C, VP9VarianceHighTest,
+ ::testing::Values(make_tuple(2, 2, highbd_10_variance4x4_c, 10),
+ make_tuple(2, 3, highbd_10_variance4x8_c, 10),
+ make_tuple(3, 2, highbd_10_variance8x4_c, 10),
+ make_tuple(3, 3, highbd_10_variance8x8_c, 10),
+ make_tuple(3, 4, highbd_10_variance8x16_c, 10),
+ make_tuple(4, 3, highbd_10_variance16x8_c, 10),
+ make_tuple(4, 4, highbd_10_variance16x16_c, 10),
+ make_tuple(4, 5, highbd_10_variance16x32_c, 10),
+ make_tuple(5, 4, highbd_10_variance32x16_c, 10),
+ make_tuple(5, 5, highbd_10_variance32x32_c, 10),
+ make_tuple(5, 6, highbd_10_variance32x64_c, 10),
+ make_tuple(6, 5, highbd_10_variance64x32_c, 10),
+ make_tuple(6, 6, highbd_10_variance64x64_c, 10),
+ make_tuple(2, 2, highbd_12_variance4x4_c, 12),
+ make_tuple(2, 3, highbd_12_variance4x8_c, 12),
+ make_tuple(3, 2, highbd_12_variance8x4_c, 12),
+ make_tuple(3, 3, highbd_12_variance8x8_c, 12),
+ make_tuple(3, 4, highbd_12_variance8x16_c, 12),
+ make_tuple(4, 3, highbd_12_variance16x8_c, 12),
+ make_tuple(4, 4, highbd_12_variance16x16_c, 12),
+ make_tuple(4, 5, highbd_12_variance16x32_c, 12),
+ make_tuple(5, 4, highbd_12_variance32x16_c, 12),
+ make_tuple(5, 5, highbd_12_variance32x32_c, 12),
+ make_tuple(5, 6, highbd_12_variance32x64_c, 12),
+ make_tuple(6, 5, highbd_12_variance64x32_c, 12),
+ make_tuple(6, 6, highbd_12_variance64x64_c, 12),
+ make_tuple(2, 2, highbd_variance4x4_c, 8),
+ make_tuple(2, 3, highbd_variance4x8_c, 8),
+ make_tuple(3, 2, highbd_variance8x4_c, 8),
+ make_tuple(3, 3, highbd_variance8x8_c, 8),
+ make_tuple(3, 4, highbd_variance8x16_c, 8),
+ make_tuple(4, 3, highbd_variance16x8_c, 8),
+ make_tuple(4, 4, highbd_variance16x16_c, 8),
+ make_tuple(4, 5, highbd_variance16x32_c, 8),
+ make_tuple(5, 4, highbd_variance32x16_c, 8),
+ make_tuple(5, 5, highbd_variance32x32_c, 8),
+ make_tuple(5, 6, highbd_variance32x64_c, 8),
+ make_tuple(6, 5, highbd_variance64x32_c, 8),
+ make_tuple(6, 6, highbd_variance64x64_c, 8)));
+#endif // CONFIG_VP9_HIGHBITDEPTH
const vp9_subpixvariance_fn_t subpel_variance4x4_c =
vp9_sub_pixel_variance4x4_c;
const vp9_subpixvariance_fn_t subpel_variance4x8_c =
vp9_sub_pixel_variance64x64_c;
INSTANTIATE_TEST_CASE_P(
C, VP9SubpelVarianceTest,
- ::testing::Values(make_tuple(2, 2, subpel_variance4x4_c),
- make_tuple(2, 3, subpel_variance4x8_c),
- make_tuple(3, 2, subpel_variance8x4_c),
- make_tuple(3, 3, subpel_variance8x8_c),
- make_tuple(3, 4, subpel_variance8x16_c),
- make_tuple(4, 3, subpel_variance16x8_c),
- make_tuple(4, 4, subpel_variance16x16_c),
- make_tuple(4, 5, subpel_variance16x32_c),
- make_tuple(5, 4, subpel_variance32x16_c),
- make_tuple(5, 5, subpel_variance32x32_c),
- make_tuple(5, 6, subpel_variance32x64_c),
- make_tuple(6, 5, subpel_variance64x32_c),
- make_tuple(6, 6, subpel_variance64x64_c)));
-
+ ::testing::Values(make_tuple(2, 2, subpel_variance4x4_c, 0),
+ make_tuple(2, 3, subpel_variance4x8_c, 0),
+ make_tuple(3, 2, subpel_variance8x4_c, 0),
+ make_tuple(3, 3, subpel_variance8x8_c, 0),
+ make_tuple(3, 4, subpel_variance8x16_c, 0),
+ make_tuple(4, 3, subpel_variance16x8_c, 0),
+ make_tuple(4, 4, subpel_variance16x16_c, 0),
+ make_tuple(4, 5, subpel_variance16x32_c, 0),
+ make_tuple(5, 4, subpel_variance32x16_c, 0),
+ make_tuple(5, 5, subpel_variance32x32_c, 0),
+ make_tuple(5, 6, subpel_variance32x64_c, 0),
+ make_tuple(6, 5, subpel_variance64x32_c, 0),
+ make_tuple(6, 6, subpel_variance64x64_c, 0)));
const vp9_subp_avg_variance_fn_t subpel_avg_variance4x4_c =
vp9_sub_pixel_avg_variance4x4_c;
const vp9_subp_avg_variance_fn_t subpel_avg_variance4x8_c =
vp9_sub_pixel_avg_variance64x64_c;
INSTANTIATE_TEST_CASE_P(
C, VP9SubpelAvgVarianceTest,
- ::testing::Values(make_tuple(2, 2, subpel_avg_variance4x4_c),
- make_tuple(2, 3, subpel_avg_variance4x8_c),
- make_tuple(3, 2, subpel_avg_variance8x4_c),
- make_tuple(3, 3, subpel_avg_variance8x8_c),
- make_tuple(3, 4, subpel_avg_variance8x16_c),
- make_tuple(4, 3, subpel_avg_variance16x8_c),
- make_tuple(4, 4, subpel_avg_variance16x16_c),
- make_tuple(4, 5, subpel_avg_variance16x32_c),
- make_tuple(5, 4, subpel_avg_variance32x16_c),
- make_tuple(5, 5, subpel_avg_variance32x32_c),
- make_tuple(5, 6, subpel_avg_variance32x64_c),
- make_tuple(6, 5, subpel_avg_variance64x32_c),
- make_tuple(6, 6, subpel_avg_variance64x64_c)));
+ ::testing::Values(make_tuple(2, 2, subpel_avg_variance4x4_c, 0),
+ make_tuple(2, 3, subpel_avg_variance4x8_c, 0),
+ make_tuple(3, 2, subpel_avg_variance8x4_c, 0),
+ make_tuple(3, 3, subpel_avg_variance8x8_c, 0),
+ make_tuple(3, 4, subpel_avg_variance8x16_c, 0),
+ make_tuple(4, 3, subpel_avg_variance16x8_c, 0),
+ make_tuple(4, 4, subpel_avg_variance16x16_c, 0),
+ make_tuple(4, 5, subpel_avg_variance16x32_c, 0),
+ make_tuple(5, 4, subpel_avg_variance32x16_c, 0),
+ make_tuple(5, 5, subpel_avg_variance32x32_c, 0),
+ make_tuple(5, 6, subpel_avg_variance32x64_c, 0),
+ make_tuple(6, 5, subpel_avg_variance64x32_c, 0),
+ make_tuple(6, 6, subpel_avg_variance64x64_c, 0)));
+#if CONFIG_VP9_HIGHBITDEPTH
+const vp9_subpixvariance_fn_t highbd_10_subpel_variance4x4_c =
+ vp9_highbd_10_sub_pixel_variance4x4_c;
+const vp9_subpixvariance_fn_t highbd_10_subpel_variance4x8_c =
+ vp9_highbd_10_sub_pixel_variance4x8_c;
+const vp9_subpixvariance_fn_t highbd_10_subpel_variance8x4_c =
+ vp9_highbd_10_sub_pixel_variance8x4_c;
+const vp9_subpixvariance_fn_t highbd_10_subpel_variance8x8_c =
+ vp9_highbd_10_sub_pixel_variance8x8_c;
+const vp9_subpixvariance_fn_t highbd_10_subpel_variance8x16_c =
+ vp9_highbd_10_sub_pixel_variance8x16_c;
+const vp9_subpixvariance_fn_t highbd_10_subpel_variance16x8_c =
+ vp9_highbd_10_sub_pixel_variance16x8_c;
+const vp9_subpixvariance_fn_t highbd_10_subpel_variance16x16_c =
+ vp9_highbd_10_sub_pixel_variance16x16_c;
+const vp9_subpixvariance_fn_t highbd_10_subpel_variance16x32_c =
+ vp9_highbd_10_sub_pixel_variance16x32_c;
+const vp9_subpixvariance_fn_t highbd_10_subpel_variance32x16_c =
+ vp9_highbd_10_sub_pixel_variance32x16_c;
+const vp9_subpixvariance_fn_t highbd_10_subpel_variance32x32_c =
+ vp9_highbd_10_sub_pixel_variance32x32_c;
+const vp9_subpixvariance_fn_t highbd_10_subpel_variance32x64_c =
+ vp9_highbd_10_sub_pixel_variance32x64_c;
+const vp9_subpixvariance_fn_t highbd_10_subpel_variance64x32_c =
+ vp9_highbd_10_sub_pixel_variance64x32_c;
+const vp9_subpixvariance_fn_t highbd_10_subpel_variance64x64_c =
+ vp9_highbd_10_sub_pixel_variance64x64_c;
+const vp9_subpixvariance_fn_t highbd_12_subpel_variance4x4_c =
+ vp9_highbd_12_sub_pixel_variance4x4_c;
+const vp9_subpixvariance_fn_t highbd_12_subpel_variance4x8_c =
+ vp9_highbd_12_sub_pixel_variance4x8_c;
+const vp9_subpixvariance_fn_t highbd_12_subpel_variance8x4_c =
+ vp9_highbd_12_sub_pixel_variance8x4_c;
+const vp9_subpixvariance_fn_t highbd_12_subpel_variance8x8_c =
+ vp9_highbd_12_sub_pixel_variance8x8_c;
+const vp9_subpixvariance_fn_t highbd_12_subpel_variance8x16_c =
+ vp9_highbd_12_sub_pixel_variance8x16_c;
+const vp9_subpixvariance_fn_t highbd_12_subpel_variance16x8_c =
+ vp9_highbd_12_sub_pixel_variance16x8_c;
+const vp9_subpixvariance_fn_t highbd_12_subpel_variance16x16_c =
+ vp9_highbd_12_sub_pixel_variance16x16_c;
+const vp9_subpixvariance_fn_t highbd_12_subpel_variance16x32_c =
+ vp9_highbd_12_sub_pixel_variance16x32_c;
+const vp9_subpixvariance_fn_t highbd_12_subpel_variance32x16_c =
+ vp9_highbd_12_sub_pixel_variance32x16_c;
+const vp9_subpixvariance_fn_t highbd_12_subpel_variance32x32_c =
+ vp9_highbd_12_sub_pixel_variance32x32_c;
+const vp9_subpixvariance_fn_t highbd_12_subpel_variance32x64_c =
+ vp9_highbd_12_sub_pixel_variance32x64_c;
+const vp9_subpixvariance_fn_t highbd_12_subpel_variance64x32_c =
+ vp9_highbd_12_sub_pixel_variance64x32_c;
+const vp9_subpixvariance_fn_t highbd_12_subpel_variance64x64_c =
+ vp9_highbd_12_sub_pixel_variance64x64_c;
+const vp9_subpixvariance_fn_t highbd_subpel_variance4x4_c =
+ vp9_highbd_sub_pixel_variance4x4_c;
+const vp9_subpixvariance_fn_t highbd_subpel_variance4x8_c =
+ vp9_highbd_sub_pixel_variance4x8_c;
+const vp9_subpixvariance_fn_t highbd_subpel_variance8x4_c =
+ vp9_highbd_sub_pixel_variance8x4_c;
+const vp9_subpixvariance_fn_t highbd_subpel_variance8x8_c =
+ vp9_highbd_sub_pixel_variance8x8_c;
+const vp9_subpixvariance_fn_t highbd_subpel_variance8x16_c =
+ vp9_highbd_sub_pixel_variance8x16_c;
+const vp9_subpixvariance_fn_t highbd_subpel_variance16x8_c =
+ vp9_highbd_sub_pixel_variance16x8_c;
+const vp9_subpixvariance_fn_t highbd_subpel_variance16x16_c =
+ vp9_highbd_sub_pixel_variance16x16_c;
+const vp9_subpixvariance_fn_t highbd_subpel_variance16x32_c =
+ vp9_highbd_sub_pixel_variance16x32_c;
+const vp9_subpixvariance_fn_t highbd_subpel_variance32x16_c =
+ vp9_highbd_sub_pixel_variance32x16_c;
+const vp9_subpixvariance_fn_t highbd_subpel_variance32x32_c =
+ vp9_highbd_sub_pixel_variance32x32_c;
+const vp9_subpixvariance_fn_t highbd_subpel_variance32x64_c =
+ vp9_highbd_sub_pixel_variance32x64_c;
+const vp9_subpixvariance_fn_t highbd_subpel_variance64x32_c =
+ vp9_highbd_sub_pixel_variance64x32_c;
+const vp9_subpixvariance_fn_t highbd_subpel_variance64x64_c =
+ vp9_highbd_sub_pixel_variance64x64_c;
+INSTANTIATE_TEST_CASE_P(
+ C, VP9SubpelVarianceHighTest,
+ ::testing::Values(make_tuple(2, 2, highbd_10_subpel_variance4x4_c, 10),
+ make_tuple(2, 3, highbd_10_subpel_variance4x8_c, 10),
+ make_tuple(3, 2, highbd_10_subpel_variance8x4_c, 10),
+ make_tuple(3, 3, highbd_10_subpel_variance8x8_c, 10),
+ make_tuple(3, 4, highbd_10_subpel_variance8x16_c, 10),
+ make_tuple(4, 3, highbd_10_subpel_variance16x8_c, 10),
+ make_tuple(4, 4, highbd_10_subpel_variance16x16_c, 10),
+ make_tuple(4, 5, highbd_10_subpel_variance16x32_c, 10),
+ make_tuple(5, 4, highbd_10_subpel_variance32x16_c, 10),
+ make_tuple(5, 5, highbd_10_subpel_variance32x32_c, 10),
+ make_tuple(5, 6, highbd_10_subpel_variance32x64_c, 10),
+ make_tuple(6, 5, highbd_10_subpel_variance64x32_c, 10),
+ make_tuple(6, 6, highbd_10_subpel_variance64x64_c, 10),
+ make_tuple(2, 2, highbd_12_subpel_variance4x4_c, 12),
+ make_tuple(2, 3, highbd_12_subpel_variance4x8_c, 12),
+ make_tuple(3, 2, highbd_12_subpel_variance8x4_c, 12),
+ make_tuple(3, 3, highbd_12_subpel_variance8x8_c, 12),
+ make_tuple(3, 4, highbd_12_subpel_variance8x16_c, 12),
+ make_tuple(4, 3, highbd_12_subpel_variance16x8_c, 12),
+ make_tuple(4, 4, highbd_12_subpel_variance16x16_c, 12),
+ make_tuple(4, 5, highbd_12_subpel_variance16x32_c, 12),
+ make_tuple(5, 4, highbd_12_subpel_variance32x16_c, 12),
+ make_tuple(5, 5, highbd_12_subpel_variance32x32_c, 12),
+ make_tuple(5, 6, highbd_12_subpel_variance32x64_c, 12),
+ make_tuple(6, 5, highbd_12_subpel_variance64x32_c, 12),
+ make_tuple(6, 6, highbd_12_subpel_variance64x64_c, 12),
+ make_tuple(2, 2, highbd_subpel_variance4x4_c, 8),
+ make_tuple(2, 3, highbd_subpel_variance4x8_c, 8),
+ make_tuple(3, 2, highbd_subpel_variance8x4_c, 8),
+ make_tuple(3, 3, highbd_subpel_variance8x8_c, 8),
+ make_tuple(3, 4, highbd_subpel_variance8x16_c, 8),
+ make_tuple(4, 3, highbd_subpel_variance16x8_c, 8),
+ make_tuple(4, 4, highbd_subpel_variance16x16_c, 8),
+ make_tuple(4, 5, highbd_subpel_variance16x32_c, 8),
+ make_tuple(5, 4, highbd_subpel_variance32x16_c, 8),
+ make_tuple(5, 5, highbd_subpel_variance32x32_c, 8),
+ make_tuple(5, 6, highbd_subpel_variance32x64_c, 8),
+ make_tuple(6, 5, highbd_subpel_variance64x32_c, 8),
+ make_tuple(6, 6, highbd_subpel_variance64x64_c, 8)));
+const vp9_subp_avg_variance_fn_t highbd_10_subpel_avg_variance4x4_c =
+ vp9_highbd_10_sub_pixel_avg_variance4x4_c;
+const vp9_subp_avg_variance_fn_t highbd_10_subpel_avg_variance4x8_c =
+ vp9_highbd_10_sub_pixel_avg_variance4x8_c;
+const vp9_subp_avg_variance_fn_t highbd_10_subpel_avg_variance8x4_c =
+ vp9_highbd_10_sub_pixel_avg_variance8x4_c;
+const vp9_subp_avg_variance_fn_t highbd_10_subpel_avg_variance8x8_c =
+ vp9_highbd_10_sub_pixel_avg_variance8x8_c;
+const vp9_subp_avg_variance_fn_t highbd_10_subpel_avg_variance8x16_c =
+ vp9_highbd_10_sub_pixel_avg_variance8x16_c;
+const vp9_subp_avg_variance_fn_t highbd_10_subpel_avg_variance16x8_c =
+ vp9_highbd_10_sub_pixel_avg_variance16x8_c;
+const vp9_subp_avg_variance_fn_t highbd_10_subpel_avg_variance16x16_c =
+ vp9_highbd_10_sub_pixel_avg_variance16x16_c;
+const vp9_subp_avg_variance_fn_t highbd_10_subpel_avg_variance16x32_c =
+ vp9_highbd_10_sub_pixel_avg_variance16x32_c;
+const vp9_subp_avg_variance_fn_t highbd_10_subpel_avg_variance32x16_c =
+ vp9_highbd_10_sub_pixel_avg_variance32x16_c;
+const vp9_subp_avg_variance_fn_t highbd_10_subpel_avg_variance32x32_c =
+ vp9_highbd_10_sub_pixel_avg_variance32x32_c;
+const vp9_subp_avg_variance_fn_t highbd_10_subpel_avg_variance32x64_c =
+ vp9_highbd_10_sub_pixel_avg_variance32x64_c;
+const vp9_subp_avg_variance_fn_t highbd_10_subpel_avg_variance64x32_c =
+ vp9_highbd_10_sub_pixel_avg_variance64x32_c;
+const vp9_subp_avg_variance_fn_t highbd_10_subpel_avg_variance64x64_c =
+ vp9_highbd_10_sub_pixel_avg_variance64x64_c;
+const vp9_subp_avg_variance_fn_t highbd_12_subpel_avg_variance4x4_c =
+ vp9_highbd_12_sub_pixel_avg_variance4x4_c;
+const vp9_subp_avg_variance_fn_t highbd_12_subpel_avg_variance4x8_c =
+ vp9_highbd_12_sub_pixel_avg_variance4x8_c;
+const vp9_subp_avg_variance_fn_t highbd_12_subpel_avg_variance8x4_c =
+ vp9_highbd_12_sub_pixel_avg_variance8x4_c;
+const vp9_subp_avg_variance_fn_t highbd_12_subpel_avg_variance8x8_c =
+ vp9_highbd_12_sub_pixel_avg_variance8x8_c;
+const vp9_subp_avg_variance_fn_t highbd_12_subpel_avg_variance8x16_c =
+ vp9_highbd_12_sub_pixel_avg_variance8x16_c;
+const vp9_subp_avg_variance_fn_t highbd_12_subpel_avg_variance16x8_c =
+ vp9_highbd_12_sub_pixel_avg_variance16x8_c;
+const vp9_subp_avg_variance_fn_t highbd_12_subpel_avg_variance16x16_c =
+ vp9_highbd_12_sub_pixel_avg_variance16x16_c;
+const vp9_subp_avg_variance_fn_t highbd_12_subpel_avg_variance16x32_c =
+ vp9_highbd_12_sub_pixel_avg_variance16x32_c;
+const vp9_subp_avg_variance_fn_t highbd_12_subpel_avg_variance32x16_c =
+ vp9_highbd_12_sub_pixel_avg_variance32x16_c;
+const vp9_subp_avg_variance_fn_t highbd_12_subpel_avg_variance32x32_c =
+ vp9_highbd_12_sub_pixel_avg_variance32x32_c;
+const vp9_subp_avg_variance_fn_t highbd_12_subpel_avg_variance32x64_c =
+ vp9_highbd_12_sub_pixel_avg_variance32x64_c;
+const vp9_subp_avg_variance_fn_t highbd_12_subpel_avg_variance64x32_c =
+ vp9_highbd_12_sub_pixel_avg_variance64x32_c;
+const vp9_subp_avg_variance_fn_t highbd_12_subpel_avg_variance64x64_c =
+ vp9_highbd_12_sub_pixel_avg_variance64x64_c;
+const vp9_subp_avg_variance_fn_t highbd_subpel_avg_variance4x4_c =
+ vp9_highbd_sub_pixel_avg_variance4x4_c;
+const vp9_subp_avg_variance_fn_t highbd_subpel_avg_variance4x8_c =
+ vp9_highbd_sub_pixel_avg_variance4x8_c;
+const vp9_subp_avg_variance_fn_t highbd_subpel_avg_variance8x4_c =
+ vp9_highbd_sub_pixel_avg_variance8x4_c;
+const vp9_subp_avg_variance_fn_t highbd_subpel_avg_variance8x8_c =
+ vp9_highbd_sub_pixel_avg_variance8x8_c;
+const vp9_subp_avg_variance_fn_t highbd_subpel_avg_variance8x16_c =
+ vp9_highbd_sub_pixel_avg_variance8x16_c;
+const vp9_subp_avg_variance_fn_t highbd_subpel_avg_variance16x8_c =
+ vp9_highbd_sub_pixel_avg_variance16x8_c;
+const vp9_subp_avg_variance_fn_t highbd_subpel_avg_variance16x16_c =
+ vp9_highbd_sub_pixel_avg_variance16x16_c;
+const vp9_subp_avg_variance_fn_t highbd_subpel_avg_variance16x32_c =
+ vp9_highbd_sub_pixel_avg_variance16x32_c;
+const vp9_subp_avg_variance_fn_t highbd_subpel_avg_variance32x16_c =
+ vp9_highbd_sub_pixel_avg_variance32x16_c;
+const vp9_subp_avg_variance_fn_t highbd_subpel_avg_variance32x32_c =
+ vp9_highbd_sub_pixel_avg_variance32x32_c;
+const vp9_subp_avg_variance_fn_t highbd_subpel_avg_variance32x64_c =
+ vp9_highbd_sub_pixel_avg_variance32x64_c;
+const vp9_subp_avg_variance_fn_t highbd_subpel_avg_variance64x32_c =
+ vp9_highbd_sub_pixel_avg_variance64x32_c;
+const vp9_subp_avg_variance_fn_t highbd_subpel_avg_variance64x64_c =
+ vp9_highbd_sub_pixel_avg_variance64x64_c;
+INSTANTIATE_TEST_CASE_P(
+ C, VP9SubpelAvgVarianceHighTest,
+ ::testing::Values(
+ make_tuple(2, 2, highbd_10_subpel_avg_variance4x4_c, 10),
+ make_tuple(2, 3, highbd_10_subpel_avg_variance4x8_c, 10),
+ make_tuple(3, 2, highbd_10_subpel_avg_variance8x4_c, 10),
+ make_tuple(3, 3, highbd_10_subpel_avg_variance8x8_c, 10),
+ make_tuple(3, 4, highbd_10_subpel_avg_variance8x16_c, 10),
+ make_tuple(4, 3, highbd_10_subpel_avg_variance16x8_c, 10),
+ make_tuple(4, 4, highbd_10_subpel_avg_variance16x16_c, 10),
+ make_tuple(4, 5, highbd_10_subpel_avg_variance16x32_c, 10),
+ make_tuple(5, 4, highbd_10_subpel_avg_variance32x16_c, 10),
+ make_tuple(5, 5, highbd_10_subpel_avg_variance32x32_c, 10),
+ make_tuple(5, 6, highbd_10_subpel_avg_variance32x64_c, 10),
+ make_tuple(6, 5, highbd_10_subpel_avg_variance64x32_c, 10),
+ make_tuple(6, 6, highbd_10_subpel_avg_variance64x64_c, 10),
+ make_tuple(2, 2, highbd_12_subpel_avg_variance4x4_c, 12),
+ make_tuple(2, 3, highbd_12_subpel_avg_variance4x8_c, 12),
+ make_tuple(3, 2, highbd_12_subpel_avg_variance8x4_c, 12),
+ make_tuple(3, 3, highbd_12_subpel_avg_variance8x8_c, 12),
+ make_tuple(3, 4, highbd_12_subpel_avg_variance8x16_c, 12),
+ make_tuple(4, 3, highbd_12_subpel_avg_variance16x8_c, 12),
+ make_tuple(4, 4, highbd_12_subpel_avg_variance16x16_c, 12),
+ make_tuple(4, 5, highbd_12_subpel_avg_variance16x32_c, 12),
+ make_tuple(5, 4, highbd_12_subpel_avg_variance32x16_c, 12),
+ make_tuple(5, 5, highbd_12_subpel_avg_variance32x32_c, 12),
+ make_tuple(5, 6, highbd_12_subpel_avg_variance32x64_c, 12),
+ make_tuple(6, 5, highbd_12_subpel_avg_variance64x32_c, 12),
+ make_tuple(6, 6, highbd_12_subpel_avg_variance64x64_c, 12),
+ make_tuple(2, 2, highbd_subpel_avg_variance4x4_c, 8),
+ make_tuple(2, 3, highbd_subpel_avg_variance4x8_c, 8),
+ make_tuple(3, 2, highbd_subpel_avg_variance8x4_c, 8),
+ make_tuple(3, 3, highbd_subpel_avg_variance8x8_c, 8),
+ make_tuple(3, 4, highbd_subpel_avg_variance8x16_c, 8),
+ make_tuple(4, 3, highbd_subpel_avg_variance16x8_c, 8),
+ make_tuple(4, 4, highbd_subpel_avg_variance16x16_c, 8),
+ make_tuple(4, 5, highbd_subpel_avg_variance16x32_c, 8),
+ make_tuple(5, 4, highbd_subpel_avg_variance32x16_c, 8),
+ make_tuple(5, 5, highbd_subpel_avg_variance32x32_c, 8),
+ make_tuple(5, 6, highbd_subpel_avg_variance32x64_c, 8),
+ make_tuple(6, 5, highbd_subpel_avg_variance64x32_c, 8),
+ make_tuple(6, 6, highbd_subpel_avg_variance64x64_c, 8)));
+#endif // CONFIG_VP9_HIGHBITDEPTH
#if HAVE_SSE2
#if CONFIG_USE_X86INC
-
INSTANTIATE_TEST_CASE_P(SSE2, SumOfSquaresTest,
::testing::Values(vp9_get_mb_ss_sse2));
const vp9_variance_fn_t variance64x64_sse2 = vp9_variance64x64_sse2;
INSTANTIATE_TEST_CASE_P(
SSE2, VP9VarianceTest,
- ::testing::Values(make_tuple(2, 2, variance4x4_sse2),
- make_tuple(2, 3, variance4x8_sse2),
- make_tuple(3, 2, variance8x4_sse2),
- make_tuple(3, 3, variance8x8_sse2),
- make_tuple(3, 4, variance8x16_sse2),
- make_tuple(4, 3, variance16x8_sse2),
- make_tuple(4, 4, variance16x16_sse2),
- make_tuple(4, 5, variance16x32_sse2),
- make_tuple(5, 4, variance32x16_sse2),
- make_tuple(5, 5, variance32x32_sse2),
- make_tuple(5, 6, variance32x64_sse2),
- make_tuple(6, 5, variance64x32_sse2),
- make_tuple(6, 6, variance64x64_sse2)));
-
+ ::testing::Values(make_tuple(2, 2, variance4x4_sse2, 0),
+ make_tuple(2, 3, variance4x8_sse2, 0),
+ make_tuple(3, 2, variance8x4_sse2, 0),
+ make_tuple(3, 3, variance8x8_sse2, 0),
+ make_tuple(3, 4, variance8x16_sse2, 0),
+ make_tuple(4, 3, variance16x8_sse2, 0),
+ make_tuple(4, 4, variance16x16_sse2, 0),
+ make_tuple(4, 5, variance16x32_sse2, 0),
+ make_tuple(5, 4, variance32x16_sse2, 0),
+ make_tuple(5, 5, variance32x32_sse2, 0),
+ make_tuple(5, 6, variance32x64_sse2, 0),
+ make_tuple(6, 5, variance64x32_sse2, 0),
+ make_tuple(6, 6, variance64x64_sse2, 0)));
const vp9_subpixvariance_fn_t subpel_variance4x4_sse =
vp9_sub_pixel_variance4x4_sse;
const vp9_subpixvariance_fn_t subpel_variance4x8_sse =
vp9_sub_pixel_variance64x64_sse2;
INSTANTIATE_TEST_CASE_P(
SSE2, VP9SubpelVarianceTest,
- ::testing::Values(make_tuple(2, 2, subpel_variance4x4_sse),
- make_tuple(2, 3, subpel_variance4x8_sse),
- make_tuple(3, 2, subpel_variance8x4_sse2),
- make_tuple(3, 3, subpel_variance8x8_sse2),
- make_tuple(3, 4, subpel_variance8x16_sse2),
- make_tuple(4, 3, subpel_variance16x8_sse2),
- make_tuple(4, 4, subpel_variance16x16_sse2),
- make_tuple(4, 5, subpel_variance16x32_sse2),
- make_tuple(5, 4, subpel_variance32x16_sse2),
- make_tuple(5, 5, subpel_variance32x32_sse2),
- make_tuple(5, 6, subpel_variance32x64_sse2),
- make_tuple(6, 5, subpel_variance64x32_sse2),
- make_tuple(6, 6, subpel_variance64x64_sse2)));
-
+ ::testing::Values(make_tuple(2, 2, subpel_variance4x4_sse, 0),
+ make_tuple(2, 3, subpel_variance4x8_sse, 0),
+ make_tuple(3, 2, subpel_variance8x4_sse2, 0),
+ make_tuple(3, 3, subpel_variance8x8_sse2, 0),
+ make_tuple(3, 4, subpel_variance8x16_sse2, 0),
+ make_tuple(4, 3, subpel_variance16x8_sse2, 0),
+ make_tuple(4, 4, subpel_variance16x16_sse2, 0),
+ make_tuple(4, 5, subpel_variance16x32_sse2, 0),
+ make_tuple(5, 4, subpel_variance32x16_sse2, 0),
+ make_tuple(5, 5, subpel_variance32x32_sse2, 0),
+ make_tuple(5, 6, subpel_variance32x64_sse2, 0),
+ make_tuple(6, 5, subpel_variance64x32_sse2, 0),
+ make_tuple(6, 6, subpel_variance64x64_sse2, 0)));
const vp9_subp_avg_variance_fn_t subpel_avg_variance4x4_sse =
vp9_sub_pixel_avg_variance4x4_sse;
const vp9_subp_avg_variance_fn_t subpel_avg_variance4x8_sse =
vp9_sub_pixel_avg_variance64x64_sse2;
INSTANTIATE_TEST_CASE_P(
SSE2, VP9SubpelAvgVarianceTest,
- ::testing::Values(make_tuple(2, 2, subpel_avg_variance4x4_sse),
- make_tuple(2, 3, subpel_avg_variance4x8_sse),
- make_tuple(3, 2, subpel_avg_variance8x4_sse2),
- make_tuple(3, 3, subpel_avg_variance8x8_sse2),
- make_tuple(3, 4, subpel_avg_variance8x16_sse2),
- make_tuple(4, 3, subpel_avg_variance16x8_sse2),
- make_tuple(4, 4, subpel_avg_variance16x16_sse2),
- make_tuple(4, 5, subpel_avg_variance16x32_sse2),
- make_tuple(5, 4, subpel_avg_variance32x16_sse2),
- make_tuple(5, 5, subpel_avg_variance32x32_sse2),
- make_tuple(5, 6, subpel_avg_variance32x64_sse2),
- make_tuple(6, 5, subpel_avg_variance64x32_sse2),
- make_tuple(6, 6, subpel_avg_variance64x64_sse2)));
-#endif
-#endif
-
+ ::testing::Values(make_tuple(2, 2, subpel_avg_variance4x4_sse, 0),
+ make_tuple(2, 3, subpel_avg_variance4x8_sse, 0),
+ make_tuple(3, 2, subpel_avg_variance8x4_sse2, 0),
+ make_tuple(3, 3, subpel_avg_variance8x8_sse2, 0),
+ make_tuple(3, 4, subpel_avg_variance8x16_sse2, 0),
+ make_tuple(4, 3, subpel_avg_variance16x8_sse2, 0),
+ make_tuple(4, 4, subpel_avg_variance16x16_sse2, 0),
+ make_tuple(4, 5, subpel_avg_variance16x32_sse2, 0),
+ make_tuple(5, 4, subpel_avg_variance32x16_sse2, 0),
+ make_tuple(5, 5, subpel_avg_variance32x32_sse2, 0),
+ make_tuple(5, 6, subpel_avg_variance32x64_sse2, 0),
+ make_tuple(6, 5, subpel_avg_variance64x32_sse2, 0),
+ make_tuple(6, 6, subpel_avg_variance64x64_sse2, 0)));
+#if CONFIG_VP9_HIGHBITDEPTH
+const vp9_variance_fn_t highbd_variance8x8_sse2 = vp9_highbd_variance8x8_sse2;
+const vp9_variance_fn_t highbd_10_variance8x8_sse2 =
+ vp9_highbd_10_variance8x8_sse2;
+const vp9_variance_fn_t highbd_12_variance8x8_sse2 =
+ vp9_highbd_12_variance8x8_sse2;
+const vp9_variance_fn_t highbd_variance8x16_sse2 = vp9_highbd_variance8x16_sse2;
+const vp9_variance_fn_t highbd_10_variance8x16_sse2 =
+ vp9_highbd_10_variance8x16_sse2;
+const vp9_variance_fn_t highbd_12_variance8x16_sse2 =
+ vp9_highbd_12_variance8x16_sse2;
+const vp9_variance_fn_t highbd_variance16x8_sse2 =
+ vp9_highbd_variance16x8_sse2;
+const vp9_variance_fn_t highbd_10_variance16x8_sse2 =
+ vp9_highbd_10_variance16x8_sse2;
+const vp9_variance_fn_t highbd_12_variance16x8_sse2 =
+ vp9_highbd_12_variance16x8_sse2;
+const vp9_variance_fn_t highbd_variance16x16_sse2 =
+ vp9_highbd_variance16x16_sse2;
+const vp9_variance_fn_t highbd_10_variance16x16_sse2 =
+ vp9_highbd_10_variance16x16_sse2;
+const vp9_variance_fn_t highbd_12_variance16x16_sse2 =
+ vp9_highbd_12_variance16x16_sse2;
+const vp9_variance_fn_t highbd_variance16x32_sse2 =
+ vp9_highbd_variance16x32_sse2;
+const vp9_variance_fn_t highbd_10_variance16x32_sse2 =
+ vp9_highbd_10_variance16x32_sse2;
+const vp9_variance_fn_t highbd_12_variance16x32_sse2 =
+ vp9_highbd_12_variance16x32_sse2;
+const vp9_variance_fn_t highbd_variance32x16_sse2 =
+ vp9_highbd_variance32x16_sse2;
+const vp9_variance_fn_t highbd_10_variance32x16_sse2 =
+ vp9_highbd_10_variance32x16_sse2;
+const vp9_variance_fn_t highbd_12_variance32x16_sse2 =
+ vp9_highbd_12_variance32x16_sse2;
+const vp9_variance_fn_t highbd_variance32x32_sse2 =
+ vp9_highbd_variance32x32_sse2;
+const vp9_variance_fn_t highbd_10_variance32x32_sse2 =
+ vp9_highbd_10_variance32x32_sse2;
+const vp9_variance_fn_t highbd_12_variance32x32_sse2 =
+ vp9_highbd_12_variance32x32_sse2;
+const vp9_variance_fn_t highbd_variance32x64_sse2 =
+ vp9_highbd_variance32x64_sse2;
+const vp9_variance_fn_t highbd_10_variance32x64_sse2 =
+ vp9_highbd_10_variance32x64_sse2;
+const vp9_variance_fn_t highbd_12_variance32x64_sse2 =
+ vp9_highbd_12_variance32x64_sse2;
+const vp9_variance_fn_t highbd_variance64x32_sse2 =
+ vp9_highbd_variance64x32_sse2;
+const vp9_variance_fn_t highbd_10_variance64x32_sse2 =
+ vp9_highbd_10_variance64x32_sse2;
+const vp9_variance_fn_t highbd_12_variance64x32_sse2 =
+ vp9_highbd_12_variance64x32_sse2;
+const vp9_variance_fn_t highbd_variance64x64_sse2 =
+ vp9_highbd_variance64x64_sse2;
+const vp9_variance_fn_t highbd_10_variance64x64_sse2 =
+ vp9_highbd_10_variance64x64_sse2;
+const vp9_variance_fn_t highbd_12_variance64x64_sse2 =
+ vp9_highbd_12_variance64x64_sse2;
+INSTANTIATE_TEST_CASE_P(
+ SSE2, VP9VarianceHighTest,
+ ::testing::Values(make_tuple(3, 3, highbd_10_variance8x8_sse2, 10),
+ make_tuple(3, 4, highbd_10_variance8x16_sse2, 10),
+ make_tuple(4, 3, highbd_10_variance16x8_sse2, 10),
+ make_tuple(4, 4, highbd_10_variance16x16_sse2, 10),
+ make_tuple(4, 5, highbd_10_variance16x32_sse2, 10),
+ make_tuple(5, 4, highbd_10_variance32x16_sse2, 10),
+ make_tuple(5, 5, highbd_10_variance32x32_sse2, 10),
+ make_tuple(5, 6, highbd_10_variance32x64_sse2, 10),
+ make_tuple(6, 5, highbd_10_variance64x32_sse2, 10),
+ make_tuple(6, 6, highbd_10_variance64x64_sse2, 10),
+ make_tuple(3, 3, highbd_12_variance8x8_sse2, 12),
+ make_tuple(3, 4, highbd_12_variance8x16_sse2, 12),
+ make_tuple(4, 3, highbd_12_variance16x8_sse2, 12),
+ make_tuple(4, 4, highbd_12_variance16x16_sse2, 12),
+ make_tuple(4, 5, highbd_12_variance16x32_sse2, 12),
+ make_tuple(5, 4, highbd_12_variance32x16_sse2, 12),
+ make_tuple(5, 5, highbd_12_variance32x32_sse2, 12),
+ make_tuple(5, 6, highbd_12_variance32x64_sse2, 12),
+ make_tuple(6, 5, highbd_12_variance64x32_sse2, 12),
+ make_tuple(6, 6, highbd_12_variance64x64_sse2, 12),
+ make_tuple(3, 3, highbd_variance8x8_sse2, 8),
+ make_tuple(3, 4, highbd_variance8x16_sse2, 8),
+ make_tuple(4, 3, highbd_variance16x8_sse2, 8),
+ make_tuple(4, 4, highbd_variance16x16_sse2, 8),
+ make_tuple(4, 5, highbd_variance16x32_sse2, 8),
+ make_tuple(5, 4, highbd_variance32x16_sse2, 8),
+ make_tuple(5, 5, highbd_variance32x32_sse2, 8),
+ make_tuple(5, 6, highbd_variance32x64_sse2, 8),
+ make_tuple(6, 5, highbd_variance64x32_sse2, 8),
+ make_tuple(6, 6, highbd_variance64x64_sse2, 8)));
+const vp9_subpixvariance_fn_t highbd_subpel_variance8x4_sse2 =
+ vp9_highbd_sub_pixel_variance8x4_sse2;
+const vp9_subpixvariance_fn_t highbd_subpel_variance8x8_sse2 =
+ vp9_highbd_sub_pixel_variance8x8_sse2;
+const vp9_subpixvariance_fn_t highbd_subpel_variance8x16_sse2 =
+ vp9_highbd_sub_pixel_variance8x16_sse2;
+const vp9_subpixvariance_fn_t highbd_subpel_variance16x8_sse2 =
+ vp9_highbd_sub_pixel_variance16x8_sse2;
+const vp9_subpixvariance_fn_t highbd_subpel_variance16x16_sse2 =
+ vp9_highbd_sub_pixel_variance16x16_sse2;
+const vp9_subpixvariance_fn_t highbd_subpel_variance16x32_sse2 =
+ vp9_highbd_sub_pixel_variance16x32_sse2;
+const vp9_subpixvariance_fn_t highbd_subpel_variance32x16_sse2 =
+ vp9_highbd_sub_pixel_variance32x16_sse2;
+const vp9_subpixvariance_fn_t highbd_subpel_variance32x32_sse2 =
+ vp9_highbd_sub_pixel_variance32x32_sse2;
+const vp9_subpixvariance_fn_t highbd_subpel_variance32x64_sse2 =
+ vp9_highbd_sub_pixel_variance32x64_sse2;
+const vp9_subpixvariance_fn_t highbd_subpel_variance64x32_sse2 =
+ vp9_highbd_sub_pixel_variance64x32_sse2;
+const vp9_subpixvariance_fn_t highbd_subpel_variance64x64_sse2 =
+ vp9_highbd_sub_pixel_variance64x64_sse2;
+const vp9_subpixvariance_fn_t highbd_10_subpel_variance8x4_sse2 =
+ vp9_highbd_10_sub_pixel_variance8x4_sse2;
+const vp9_subpixvariance_fn_t highbd_10_subpel_variance8x8_sse2 =
+ vp9_highbd_10_sub_pixel_variance8x8_sse2;
+const vp9_subpixvariance_fn_t highbd_10_subpel_variance8x16_sse2 =
+ vp9_highbd_10_sub_pixel_variance8x16_sse2;
+const vp9_subpixvariance_fn_t highbd_10_subpel_variance16x8_sse2 =
+ vp9_highbd_10_sub_pixel_variance16x8_sse2;
+const vp9_subpixvariance_fn_t highbd_10_subpel_variance16x16_sse2 =
+ vp9_highbd_10_sub_pixel_variance16x16_sse2;
+const vp9_subpixvariance_fn_t highbd_10_subpel_variance16x32_sse2 =
+ vp9_highbd_10_sub_pixel_variance16x32_sse2;
+const vp9_subpixvariance_fn_t highbd_10_subpel_variance32x16_sse2 =
+ vp9_highbd_10_sub_pixel_variance32x16_sse2;
+const vp9_subpixvariance_fn_t highbd_10_subpel_variance32x32_sse2 =
+ vp9_highbd_10_sub_pixel_variance32x32_sse2;
+const vp9_subpixvariance_fn_t highbd_10_subpel_variance32x64_sse2 =
+ vp9_highbd_10_sub_pixel_variance32x64_sse2;
+const vp9_subpixvariance_fn_t highbd_10_subpel_variance64x32_sse2 =
+ vp9_highbd_10_sub_pixel_variance64x32_sse2;
+const vp9_subpixvariance_fn_t highbd_10_subpel_variance64x64_sse2 =
+ vp9_highbd_10_sub_pixel_variance64x64_sse2;
+const vp9_subpixvariance_fn_t highbd_12_subpel_variance8x4_sse2 =
+ vp9_highbd_12_sub_pixel_variance8x4_sse2;
+const vp9_subpixvariance_fn_t highbd_12_subpel_variance8x8_sse2 =
+ vp9_highbd_12_sub_pixel_variance8x8_sse2;
+const vp9_subpixvariance_fn_t highbd_12_subpel_variance8x16_sse2 =
+ vp9_highbd_12_sub_pixel_variance8x16_sse2;
+const vp9_subpixvariance_fn_t highbd_12_subpel_variance16x8_sse2 =
+ vp9_highbd_12_sub_pixel_variance16x8_sse2;
+const vp9_subpixvariance_fn_t highbd_12_subpel_variance16x16_sse2 =
+ vp9_highbd_12_sub_pixel_variance16x16_sse2;
+const vp9_subpixvariance_fn_t highbd_12_subpel_variance16x32_sse2 =
+ vp9_highbd_12_sub_pixel_variance16x32_sse2;
+const vp9_subpixvariance_fn_t highbd_12_subpel_variance32x16_sse2 =
+ vp9_highbd_12_sub_pixel_variance32x16_sse2;
+const vp9_subpixvariance_fn_t highbd_12_subpel_variance32x32_sse2 =
+ vp9_highbd_12_sub_pixel_variance32x32_sse2;
+const vp9_subpixvariance_fn_t highbd_12_subpel_variance32x64_sse2 =
+ vp9_highbd_12_sub_pixel_variance32x64_sse2;
+const vp9_subpixvariance_fn_t highbd_12_subpel_variance64x32_sse2 =
+ vp9_highbd_12_sub_pixel_variance64x32_sse2;
+const vp9_subpixvariance_fn_t highbd_12_subpel_variance64x64_sse2 =
+ vp9_highbd_12_sub_pixel_variance64x64_sse2;
+INSTANTIATE_TEST_CASE_P(
+ SSE2, VP9SubpelVarianceHighTest,
+ ::testing::Values(make_tuple(3, 2, highbd_10_subpel_variance8x4_sse2, 10),
+ make_tuple(3, 3, highbd_10_subpel_variance8x8_sse2, 10),
+ make_tuple(3, 4, highbd_10_subpel_variance8x16_sse2, 10),
+ make_tuple(4, 3, highbd_10_subpel_variance16x8_sse2, 10),
+ make_tuple(4, 4, highbd_10_subpel_variance16x16_sse2, 10),
+ make_tuple(4, 5, highbd_10_subpel_variance16x32_sse2, 10),
+ make_tuple(5, 4, highbd_10_subpel_variance32x16_sse2, 10),
+ make_tuple(5, 5, highbd_10_subpel_variance32x32_sse2, 10),
+ make_tuple(5, 6, highbd_10_subpel_variance32x64_sse2, 10),
+ make_tuple(6, 5, highbd_10_subpel_variance64x32_sse2, 10),
+ make_tuple(6, 6, highbd_10_subpel_variance64x64_sse2, 10),
+ make_tuple(3, 2, highbd_12_subpel_variance8x4_sse2, 12),
+ make_tuple(3, 3, highbd_12_subpel_variance8x8_sse2, 12),
+ make_tuple(3, 4, highbd_12_subpel_variance8x16_sse2, 12),
+ make_tuple(4, 3, highbd_12_subpel_variance16x8_sse2, 12),
+ make_tuple(4, 4, highbd_12_subpel_variance16x16_sse2, 12),
+ make_tuple(4, 5, highbd_12_subpel_variance16x32_sse2, 12),
+ make_tuple(5, 4, highbd_12_subpel_variance32x16_sse2, 12),
+ make_tuple(5, 5, highbd_12_subpel_variance32x32_sse2, 12),
+ make_tuple(5, 6, highbd_12_subpel_variance32x64_sse2, 12),
+ make_tuple(6, 5, highbd_12_subpel_variance64x32_sse2, 12),
+ make_tuple(6, 6, highbd_12_subpel_variance64x64_sse2, 12),
+ make_tuple(3, 2, highbd_subpel_variance8x4_sse2, 8),
+ make_tuple(3, 3, highbd_subpel_variance8x8_sse2, 8),
+ make_tuple(3, 4, highbd_subpel_variance8x16_sse2, 8),
+ make_tuple(4, 3, highbd_subpel_variance16x8_sse2, 8),
+ make_tuple(4, 4, highbd_subpel_variance16x16_sse2, 8),
+ make_tuple(4, 5, highbd_subpel_variance16x32_sse2, 8),
+ make_tuple(5, 4, highbd_subpel_variance32x16_sse2, 8),
+ make_tuple(5, 5, highbd_subpel_variance32x32_sse2, 8),
+ make_tuple(5, 6, highbd_subpel_variance32x64_sse2, 8),
+ make_tuple(6, 5, highbd_subpel_variance64x32_sse2, 8),
+ make_tuple(6, 6, highbd_subpel_variance64x64_sse2, 8)));
+const vp9_subp_avg_variance_fn_t highbd_subpel_avg_variance8x4_sse2 =
+ vp9_highbd_sub_pixel_avg_variance8x4_sse2;
+const vp9_subp_avg_variance_fn_t highbd_subpel_avg_variance8x8_sse2 =
+ vp9_highbd_sub_pixel_avg_variance8x8_sse2;
+const vp9_subp_avg_variance_fn_t highbd_subpel_avg_variance8x16_sse2 =
+ vp9_highbd_sub_pixel_avg_variance8x16_sse2;
+const vp9_subp_avg_variance_fn_t highbd_subpel_avg_variance16x8_sse2 =
+ vp9_highbd_sub_pixel_avg_variance16x8_sse2;
+const vp9_subp_avg_variance_fn_t highbd_subpel_avg_variance16x16_sse2 =
+ vp9_highbd_sub_pixel_avg_variance16x16_sse2;
+const vp9_subp_avg_variance_fn_t highbd_subpel_avg_variance16x32_sse2 =
+ vp9_highbd_sub_pixel_avg_variance16x32_sse2;
+const vp9_subp_avg_variance_fn_t highbd_subpel_avg_variance32x16_sse2 =
+ vp9_highbd_sub_pixel_avg_variance32x16_sse2;
+const vp9_subp_avg_variance_fn_t highbd_subpel_avg_variance32x32_sse2 =
+ vp9_highbd_sub_pixel_avg_variance32x32_sse2;
+const vp9_subp_avg_variance_fn_t highbd_subpel_avg_variance32x64_sse2 =
+ vp9_highbd_sub_pixel_avg_variance32x64_sse2;
+const vp9_subp_avg_variance_fn_t highbd_subpel_avg_variance64x32_sse2 =
+ vp9_highbd_sub_pixel_avg_variance64x32_sse2;
+const vp9_subp_avg_variance_fn_t highbd_subpel_avg_variance64x64_sse2 =
+ vp9_highbd_sub_pixel_avg_variance64x64_sse2;
+const vp9_subp_avg_variance_fn_t highbd_10_subpel_avg_variance8x4_sse2 =
+ vp9_highbd_10_sub_pixel_avg_variance8x4_sse2;
+const vp9_subp_avg_variance_fn_t highbd_10_subpel_avg_variance8x8_sse2 =
+ vp9_highbd_10_sub_pixel_avg_variance8x8_sse2;
+const vp9_subp_avg_variance_fn_t highbd_10_subpel_avg_variance8x16_sse2 =
+ vp9_highbd_10_sub_pixel_avg_variance8x16_sse2;
+const vp9_subp_avg_variance_fn_t highbd_10_subpel_avg_variance16x8_sse2 =
+ vp9_highbd_10_sub_pixel_avg_variance16x8_sse2;
+const vp9_subp_avg_variance_fn_t highbd_10_subpel_avg_variance16x16_sse2 =
+ vp9_highbd_10_sub_pixel_avg_variance16x16_sse2;
+const vp9_subp_avg_variance_fn_t highbd_10_subpel_avg_variance16x32_sse2 =
+ vp9_highbd_10_sub_pixel_avg_variance16x32_sse2;
+const vp9_subp_avg_variance_fn_t highbd_10_subpel_avg_variance32x16_sse2 =
+ vp9_highbd_10_sub_pixel_avg_variance32x16_sse2;
+const vp9_subp_avg_variance_fn_t highbd_10_subpel_avg_variance32x32_sse2 =
+ vp9_highbd_10_sub_pixel_avg_variance32x32_sse2;
+const vp9_subp_avg_variance_fn_t highbd_10_subpel_avg_variance32x64_sse2 =
+ vp9_highbd_10_sub_pixel_avg_variance32x64_sse2;
+const vp9_subp_avg_variance_fn_t highbd_10_subpel_avg_variance64x32_sse2 =
+ vp9_highbd_10_sub_pixel_avg_variance64x32_sse2;
+const vp9_subp_avg_variance_fn_t highbd_10_subpel_avg_variance64x64_sse2 =
+ vp9_highbd_10_sub_pixel_avg_variance64x64_sse2;
+const vp9_subp_avg_variance_fn_t highbd_12_subpel_avg_variance8x4_sse2 =
+ vp9_highbd_12_sub_pixel_avg_variance8x4_sse2;
+const vp9_subp_avg_variance_fn_t highbd_12_subpel_avg_variance8x8_sse2 =
+ vp9_highbd_12_sub_pixel_avg_variance8x8_sse2;
+const vp9_subp_avg_variance_fn_t highbd_12_subpel_avg_variance8x16_sse2 =
+ vp9_highbd_12_sub_pixel_avg_variance8x16_sse2;
+const vp9_subp_avg_variance_fn_t highbd_12_subpel_avg_variance16x8_sse2 =
+ vp9_highbd_12_sub_pixel_avg_variance16x8_sse2;
+const vp9_subp_avg_variance_fn_t highbd_12_subpel_avg_variance16x16_sse2 =
+ vp9_highbd_12_sub_pixel_avg_variance16x16_sse2;
+const vp9_subp_avg_variance_fn_t highbd_12_subpel_avg_variance16x32_sse2 =
+ vp9_highbd_12_sub_pixel_avg_variance16x32_sse2;
+const vp9_subp_avg_variance_fn_t highbd_12_subpel_avg_variance32x16_sse2 =
+ vp9_highbd_12_sub_pixel_avg_variance32x16_sse2;
+const vp9_subp_avg_variance_fn_t highbd_12_subpel_avg_variance32x32_sse2 =
+ vp9_highbd_12_sub_pixel_avg_variance32x32_sse2;
+const vp9_subp_avg_variance_fn_t highbd_12_subpel_avg_variance32x64_sse2 =
+ vp9_highbd_12_sub_pixel_avg_variance32x64_sse2;
+const vp9_subp_avg_variance_fn_t highbd_12_subpel_avg_variance64x32_sse2 =
+ vp9_highbd_12_sub_pixel_avg_variance64x32_sse2;
+const vp9_subp_avg_variance_fn_t highbd_12_subpel_avg_variance64x64_sse2 =
+ vp9_highbd_12_sub_pixel_avg_variance64x64_sse2;
+INSTANTIATE_TEST_CASE_P(
+ SSE2, VP9SubpelAvgVarianceHighTest,
+ ::testing::Values(
+ make_tuple(3, 2, highbd_10_subpel_avg_variance8x4_sse2, 10),
+ make_tuple(3, 3, highbd_10_subpel_avg_variance8x8_sse2, 10),
+ make_tuple(3, 4, highbd_10_subpel_avg_variance8x16_sse2, 10),
+ make_tuple(4, 3, highbd_10_subpel_avg_variance16x8_sse2, 10),
+ make_tuple(4, 4, highbd_10_subpel_avg_variance16x16_sse2, 10),
+ make_tuple(4, 5, highbd_10_subpel_avg_variance16x32_sse2, 10),
+ make_tuple(5, 4, highbd_10_subpel_avg_variance32x16_sse2, 10),
+ make_tuple(5, 5, highbd_10_subpel_avg_variance32x32_sse2, 10),
+ make_tuple(5, 6, highbd_10_subpel_avg_variance32x64_sse2, 10),
+ make_tuple(6, 5, highbd_10_subpel_avg_variance64x32_sse2, 10),
+ make_tuple(6, 6, highbd_10_subpel_avg_variance64x64_sse2, 10),
+ make_tuple(3, 2, highbd_12_subpel_avg_variance8x4_sse2, 12),
+ make_tuple(3, 3, highbd_12_subpel_avg_variance8x8_sse2, 12),
+ make_tuple(3, 4, highbd_12_subpel_avg_variance8x16_sse2, 12),
+ make_tuple(4, 3, highbd_12_subpel_avg_variance16x8_sse2, 12),
+ make_tuple(4, 4, highbd_12_subpel_avg_variance16x16_sse2, 12),
+ make_tuple(4, 5, highbd_12_subpel_avg_variance16x32_sse2, 12),
+ make_tuple(5, 4, highbd_12_subpel_avg_variance32x16_sse2, 12),
+ make_tuple(5, 5, highbd_12_subpel_avg_variance32x32_sse2, 12),
+ make_tuple(5, 6, highbd_12_subpel_avg_variance32x64_sse2, 12),
+ make_tuple(6, 5, highbd_12_subpel_avg_variance64x32_sse2, 12),
+ make_tuple(6, 6, highbd_12_subpel_avg_variance64x64_sse2, 12),
+ make_tuple(3, 2, highbd_subpel_avg_variance8x4_sse2, 8),
+ make_tuple(3, 3, highbd_subpel_avg_variance8x8_sse2, 8),
+ make_tuple(3, 4, highbd_subpel_avg_variance8x16_sse2, 8),
+ make_tuple(4, 3, highbd_subpel_avg_variance16x8_sse2, 8),
+ make_tuple(4, 4, highbd_subpel_avg_variance16x16_sse2, 8),
+ make_tuple(4, 5, highbd_subpel_avg_variance16x32_sse2, 8),
+ make_tuple(5, 4, highbd_subpel_avg_variance32x16_sse2, 8),
+ make_tuple(5, 5, highbd_subpel_avg_variance32x32_sse2, 8),
+ make_tuple(5, 6, highbd_subpel_avg_variance32x64_sse2, 8),
+ make_tuple(6, 5, highbd_subpel_avg_variance64x32_sse2, 8),
+ make_tuple(6, 6, highbd_subpel_avg_variance64x64_sse2, 8)));
+#endif // CONFIG_VP9_HIGHBITDEPTH
+#endif // CONFIG_USE_X86INC
+#endif // HAVE_SSE2
#if HAVE_SSSE3
#if CONFIG_USE_X86INC
vp9_sub_pixel_variance64x64_ssse3;
INSTANTIATE_TEST_CASE_P(
SSSE3, VP9SubpelVarianceTest,
- ::testing::Values(make_tuple(2, 2, subpel_variance4x4_ssse3),
- make_tuple(2, 3, subpel_variance4x8_ssse3),
- make_tuple(3, 2, subpel_variance8x4_ssse3),
- make_tuple(3, 3, subpel_variance8x8_ssse3),
- make_tuple(3, 4, subpel_variance8x16_ssse3),
- make_tuple(4, 3, subpel_variance16x8_ssse3),
- make_tuple(4, 4, subpel_variance16x16_ssse3),
- make_tuple(4, 5, subpel_variance16x32_ssse3),
- make_tuple(5, 4, subpel_variance32x16_ssse3),
- make_tuple(5, 5, subpel_variance32x32_ssse3),
- make_tuple(5, 6, subpel_variance32x64_ssse3),
- make_tuple(6, 5, subpel_variance64x32_ssse3),
- make_tuple(6, 6, subpel_variance64x64_ssse3)));
-
+ ::testing::Values(make_tuple(2, 2, subpel_variance4x4_ssse3, 0),
+ make_tuple(2, 3, subpel_variance4x8_ssse3, 0),
+ make_tuple(3, 2, subpel_variance8x4_ssse3, 0),
+ make_tuple(3, 3, subpel_variance8x8_ssse3, 0),
+ make_tuple(3, 4, subpel_variance8x16_ssse3, 0),
+ make_tuple(4, 3, subpel_variance16x8_ssse3, 0),
+ make_tuple(4, 4, subpel_variance16x16_ssse3, 0),
+ make_tuple(4, 5, subpel_variance16x32_ssse3, 0),
+ make_tuple(5, 4, subpel_variance32x16_ssse3, 0),
+ make_tuple(5, 5, subpel_variance32x32_ssse3, 0),
+ make_tuple(5, 6, subpel_variance32x64_ssse3, 0),
+ make_tuple(6, 5, subpel_variance64x32_ssse3, 0),
+ make_tuple(6, 6, subpel_variance64x64_ssse3, 0)));
const vp9_subp_avg_variance_fn_t subpel_avg_variance4x4_ssse3 =
vp9_sub_pixel_avg_variance4x4_ssse3;
const vp9_subp_avg_variance_fn_t subpel_avg_variance4x8_ssse3 =
vp9_sub_pixel_avg_variance64x64_ssse3;
INSTANTIATE_TEST_CASE_P(
SSSE3, VP9SubpelAvgVarianceTest,
- ::testing::Values(make_tuple(2, 2, subpel_avg_variance4x4_ssse3),
- make_tuple(2, 3, subpel_avg_variance4x8_ssse3),
- make_tuple(3, 2, subpel_avg_variance8x4_ssse3),
- make_tuple(3, 3, subpel_avg_variance8x8_ssse3),
- make_tuple(3, 4, subpel_avg_variance8x16_ssse3),
- make_tuple(4, 3, subpel_avg_variance16x8_ssse3),
- make_tuple(4, 4, subpel_avg_variance16x16_ssse3),
- make_tuple(4, 5, subpel_avg_variance16x32_ssse3),
- make_tuple(5, 4, subpel_avg_variance32x16_ssse3),
- make_tuple(5, 5, subpel_avg_variance32x32_ssse3),
- make_tuple(5, 6, subpel_avg_variance32x64_ssse3),
- make_tuple(6, 5, subpel_avg_variance64x32_ssse3),
- make_tuple(6, 6, subpel_avg_variance64x64_ssse3)));
-#endif
-#endif
+ ::testing::Values(make_tuple(2, 2, subpel_avg_variance4x4_ssse3, 0),
+ make_tuple(2, 3, subpel_avg_variance4x8_ssse3, 0),
+ make_tuple(3, 2, subpel_avg_variance8x4_ssse3, 0),
+ make_tuple(3, 3, subpel_avg_variance8x8_ssse3, 0),
+ make_tuple(3, 4, subpel_avg_variance8x16_ssse3, 0),
+ make_tuple(4, 3, subpel_avg_variance16x8_ssse3, 0),
+ make_tuple(4, 4, subpel_avg_variance16x16_ssse3, 0),
+ make_tuple(4, 5, subpel_avg_variance16x32_ssse3, 0),
+ make_tuple(5, 4, subpel_avg_variance32x16_ssse3, 0),
+ make_tuple(5, 5, subpel_avg_variance32x32_ssse3, 0),
+ make_tuple(5, 6, subpel_avg_variance32x64_ssse3, 0),
+ make_tuple(6, 5, subpel_avg_variance64x32_ssse3, 0),
+ make_tuple(6, 6, subpel_avg_variance64x64_ssse3, 0)));
+#endif // CONFIG_USE_X86INC
+#endif // HAVE_SSSE3
#if HAVE_AVX2
const vp9_variance_fn_t variance64x64_avx2 = vp9_variance64x64_avx2;
INSTANTIATE_TEST_CASE_P(
AVX2, VP9VarianceTest,
- ::testing::Values(make_tuple(4, 4, variance16x16_avx2),
- make_tuple(5, 4, variance32x16_avx2),
- make_tuple(5, 5, variance32x32_avx2),
- make_tuple(6, 5, variance64x32_avx2),
- make_tuple(6, 6, variance64x64_avx2)));
+ ::testing::Values(make_tuple(4, 4, variance16x16_avx2, 0),
+ make_tuple(5, 4, variance32x16_avx2, 0),
+ make_tuple(5, 5, variance32x32_avx2, 0),
+ make_tuple(6, 5, variance64x32_avx2, 0),
+ make_tuple(6, 6, variance64x64_avx2, 0)));
const vp9_subpixvariance_fn_t subpel_variance32x32_avx2 =
vp9_sub_pixel_variance32x32_avx2;
vp9_sub_pixel_variance64x64_avx2;
INSTANTIATE_TEST_CASE_P(
AVX2, VP9SubpelVarianceTest,
- ::testing::Values(make_tuple(5, 5, subpel_variance32x32_avx2),
- make_tuple(6, 6, subpel_variance64x64_avx2)));
+ ::testing::Values(make_tuple(5, 5, subpel_variance32x32_avx2, 0),
+ make_tuple(6, 6, subpel_variance64x64_avx2, 0)));
const vp9_subp_avg_variance_fn_t subpel_avg_variance32x32_avx2 =
vp9_sub_pixel_avg_variance32x32_avx2;
vp9_sub_pixel_avg_variance64x64_avx2;
INSTANTIATE_TEST_CASE_P(
AVX2, VP9SubpelAvgVarianceTest,
- ::testing::Values(make_tuple(5, 5, subpel_avg_variance32x32_avx2),
- make_tuple(6, 6, subpel_avg_variance64x64_avx2)));
+ ::testing::Values(make_tuple(5, 5, subpel_avg_variance32x32_avx2, 0),
+ make_tuple(6, 6, subpel_avg_variance64x64_avx2, 0)));
#endif // HAVE_AVX2
#if HAVE_NEON
const vp9_variance_fn_t variance8x8_neon = vp9_variance8x8_neon;
const vp9_variance_fn_t variance32x32_neon = vp9_variance32x32_neon;
INSTANTIATE_TEST_CASE_P(
NEON, VP9VarianceTest,
- ::testing::Values(make_tuple(3, 3, variance8x8_neon),
- make_tuple(4, 4, variance16x16_neon),
- make_tuple(5, 5, variance32x32_neon)));
+ ::testing::Values(make_tuple(3, 3, variance8x8_neon, 0),
+ make_tuple(4, 4, variance16x16_neon, 0),
+ make_tuple(5, 5, variance32x32_neon, 0)));
const vp9_subpixvariance_fn_t subpel_variance8x8_neon =
vp9_sub_pixel_variance8x8_neon;
vp9_sub_pixel_variance32x32_neon;
INSTANTIATE_TEST_CASE_P(
NEON, VP9SubpelVarianceTest,
- ::testing::Values(make_tuple(3, 3, subpel_variance8x8_neon),
- make_tuple(4, 4, subpel_variance16x16_neon),
- make_tuple(5, 5, subpel_variance32x32_neon)));
+ ::testing::Values(make_tuple(3, 3, subpel_variance8x8_neon, 0),
+ make_tuple(4, 4, subpel_variance16x16_neon, 0),
+ make_tuple(5, 5, subpel_variance32x32_neon, 0)));
#endif // HAVE_NEON
#endif // CONFIG_VP9_ENCODER
} // namespace vp9
-
} // namespace
# variance
add_proto qw/unsigned int vp9_highbd_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_variance32x16/;
+ specialize qw/vp9_highbd_variance32x16/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_variance16x32/;
+ specialize qw/vp9_highbd_variance16x32/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_variance64x32/;
+ specialize qw/vp9_highbd_variance64x32/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_variance32x64/;
+ specialize qw/vp9_highbd_variance32x64/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_variance32x32/;
+ specialize qw/vp9_highbd_variance32x32/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_variance64x64/;
+ specialize qw/vp9_highbd_variance64x64/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_variance16x16/;
+ specialize qw/vp9_highbd_variance16x16/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_variance16x8/;
+ specialize qw/vp9_highbd_variance16x8/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_variance8x16/;
+ specialize qw/vp9_highbd_variance8x16/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_variance8x8/;
+ specialize qw/vp9_highbd_variance8x8/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_highbd_variance8x4/;
specialize qw/vp9_highbd_variance4x4/;
add_proto qw/void vp9_highbd_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
- specialize qw/vp9_highbd_get8x8var/;
+ specialize qw/vp9_highbd_get8x8var/, "$sse2_x86inc";
add_proto qw/void vp9_highbd_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
- specialize qw/vp9_highbd_get16x16var/;
+ specialize qw/vp9_highbd_get16x16var/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_10_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_10_variance32x16/;
+ specialize qw/vp9_highbd_10_variance32x16/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_10_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_10_variance16x32/;
+ specialize qw/vp9_highbd_10_variance16x32/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_10_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_10_variance64x32/;
+ specialize qw/vp9_highbd_10_variance64x32/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_10_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_10_variance32x64/;
+ specialize qw/vp9_highbd_10_variance32x64/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_10_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_10_variance32x32/;
+ specialize qw/vp9_highbd_10_variance32x32/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_10_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_10_variance64x64/;
+ specialize qw/vp9_highbd_10_variance64x64/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_10_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_10_variance16x16/;
+ specialize qw/vp9_highbd_10_variance16x16/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_10_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_10_variance16x8/;
+ specialize qw/vp9_highbd_10_variance16x8/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_10_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_10_variance8x16/;
+ specialize qw/vp9_highbd_10_variance8x16/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_10_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_10_variance8x8/;
+ specialize qw/vp9_highbd_10_variance8x8/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_10_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_highbd_10_variance8x4/;
specialize qw/vp9_highbd_10_variance4x4/;
add_proto qw/void vp9_highbd_10_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
- specialize qw/vp9_highbd_10_get8x8var/;
+ specialize qw/vp9_highbd_10_get8x8var/, "$sse2_x86inc";
add_proto qw/void vp9_highbd_10_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
- specialize qw/vp9_highbd_10_get16x16var/;
+ specialize qw/vp9_highbd_10_get16x16var/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_12_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_12_variance32x16/;
+ specialize qw/vp9_highbd_12_variance32x16/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_12_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_12_variance16x32/;
+ specialize qw/vp9_highbd_12_variance16x32/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_12_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_12_variance64x32/;
+ specialize qw/vp9_highbd_12_variance64x32/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_12_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_12_variance32x64/;
+ specialize qw/vp9_highbd_12_variance32x64/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_12_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_12_variance32x32/;
+ specialize qw/vp9_highbd_12_variance32x32/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_12_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_12_variance64x64/;
+ specialize qw/vp9_highbd_12_variance64x64/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_12_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_12_variance16x16/;
+ specialize qw/vp9_highbd_12_variance16x16/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_12_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_12_variance16x8/;
+ specialize qw/vp9_highbd_12_variance16x8/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_12_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_12_variance8x16/;
+ specialize qw/vp9_highbd_12_variance8x16/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_12_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_12_variance8x8/;
+ specialize qw/vp9_highbd_12_variance8x8/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_12_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_highbd_12_variance8x4/;
specialize qw/vp9_highbd_12_variance4x4/;
add_proto qw/void vp9_highbd_12_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
- specialize qw/vp9_highbd_12_get8x8var/;
+ specialize qw/vp9_highbd_12_get8x8var/, "$sse2_x86inc";
add_proto qw/void vp9_highbd_12_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
- specialize qw/vp9_highbd_12_get16x16var/;
+ specialize qw/vp9_highbd_12_get16x16var/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_sub_pixel_variance64x64/;
+ specialize qw/vp9_highbd_sub_pixel_variance64x64/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
- specialize qw/vp9_highbd_sub_pixel_avg_variance64x64/;
+ specialize qw/vp9_highbd_sub_pixel_avg_variance64x64/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_sub_pixel_variance32x64/;
+ specialize qw/vp9_highbd_sub_pixel_variance32x64/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
- specialize qw/vp9_highbd_sub_pixel_avg_variance32x64/;
+ specialize qw/vp9_highbd_sub_pixel_avg_variance32x64/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_sub_pixel_variance64x32/;
+ specialize qw/vp9_highbd_sub_pixel_variance64x32/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
- specialize qw/vp9_highbd_sub_pixel_avg_variance64x32/;
+ specialize qw/vp9_highbd_sub_pixel_avg_variance64x32/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_sub_pixel_variance32x16/;
+ specialize qw/vp9_highbd_sub_pixel_variance32x16/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
- specialize qw/vp9_highbd_sub_pixel_avg_variance32x16/;
+ specialize qw/vp9_highbd_sub_pixel_avg_variance32x16/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_sub_pixel_variance16x32/;
+ specialize qw/vp9_highbd_sub_pixel_variance16x32/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
- specialize qw/vp9_highbd_sub_pixel_avg_variance16x32/;
+ specialize qw/vp9_highbd_sub_pixel_avg_variance16x32/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_sub_pixel_variance32x32/;
+ specialize qw/vp9_highbd_sub_pixel_variance32x32/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
- specialize qw/vp9_highbd_sub_pixel_avg_variance32x32/;
+ specialize qw/vp9_highbd_sub_pixel_avg_variance32x32/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_sub_pixel_variance16x16/;
+ specialize qw/vp9_highbd_sub_pixel_variance16x16/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
- specialize qw/vp9_highbd_sub_pixel_avg_variance16x16/;
+ specialize qw/vp9_highbd_sub_pixel_avg_variance16x16/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_sub_pixel_variance8x16/;
+ specialize qw/vp9_highbd_sub_pixel_variance8x16/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
- specialize qw/vp9_highbd_sub_pixel_avg_variance8x16/;
+ specialize qw/vp9_highbd_sub_pixel_avg_variance8x16/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_sub_pixel_variance16x8/;
+ specialize qw/vp9_highbd_sub_pixel_variance16x8/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
- specialize qw/vp9_highbd_sub_pixel_avg_variance16x8/;
+ specialize qw/vp9_highbd_sub_pixel_avg_variance16x8/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_sub_pixel_variance8x8/;
+ specialize qw/vp9_highbd_sub_pixel_variance8x8/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
- specialize qw/vp9_highbd_sub_pixel_avg_variance8x8/;
+ specialize qw/vp9_highbd_sub_pixel_avg_variance8x8/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_sub_pixel_variance8x4/;
+ specialize qw/vp9_highbd_sub_pixel_variance8x4/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
- specialize qw/vp9_highbd_sub_pixel_avg_variance8x4/;
+ specialize qw/vp9_highbd_sub_pixel_avg_variance8x4/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_highbd_sub_pixel_variance4x8/;
specialize qw/vp9_highbd_sub_pixel_avg_variance4x4/;
add_proto qw/unsigned int vp9_highbd_10_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_10_sub_pixel_variance64x64/;
+ specialize qw/vp9_highbd_10_sub_pixel_variance64x64/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_10_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
- specialize qw/vp9_highbd_10_sub_pixel_avg_variance64x64/;
+ specialize qw/vp9_highbd_10_sub_pixel_avg_variance64x64/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_10_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_10_sub_pixel_variance32x64/;
+ specialize qw/vp9_highbd_10_sub_pixel_variance32x64/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_10_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
- specialize qw/vp9_highbd_10_sub_pixel_avg_variance32x64/;
+ specialize qw/vp9_highbd_10_sub_pixel_avg_variance32x64/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_10_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_10_sub_pixel_variance64x32/;
+ specialize qw/vp9_highbd_10_sub_pixel_variance64x32/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_10_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
- specialize qw/vp9_highbd_10_sub_pixel_avg_variance64x32/;
+ specialize qw/vp9_highbd_10_sub_pixel_avg_variance64x32/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_10_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_10_sub_pixel_variance32x16/;
+ specialize qw/vp9_highbd_10_sub_pixel_variance32x16/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_10_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
- specialize qw/vp9_highbd_10_sub_pixel_avg_variance32x16/;
+ specialize qw/vp9_highbd_10_sub_pixel_avg_variance32x16/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_10_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_10_sub_pixel_variance16x32/;
+ specialize qw/vp9_highbd_10_sub_pixel_variance16x32/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_10_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
- specialize qw/vp9_highbd_10_sub_pixel_avg_variance16x32/;
+ specialize qw/vp9_highbd_10_sub_pixel_avg_variance16x32/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_10_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_10_sub_pixel_variance32x32/;
+ specialize qw/vp9_highbd_10_sub_pixel_variance32x32/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_10_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
- specialize qw/vp9_highbd_10_sub_pixel_avg_variance32x32/;
+ specialize qw/vp9_highbd_10_sub_pixel_avg_variance32x32/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_10_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_10_sub_pixel_variance16x16/;
+ specialize qw/vp9_highbd_10_sub_pixel_variance16x16/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_10_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
- specialize qw/vp9_highbd_10_sub_pixel_avg_variance16x16/;
+ specialize qw/vp9_highbd_10_sub_pixel_avg_variance16x16/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_10_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_10_sub_pixel_variance8x16/;
+ specialize qw/vp9_highbd_10_sub_pixel_variance8x16/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_10_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
- specialize qw/vp9_highbd_10_sub_pixel_avg_variance8x16/;
+ specialize qw/vp9_highbd_10_sub_pixel_avg_variance8x16/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_10_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_10_sub_pixel_variance16x8/;
+ specialize qw/vp9_highbd_10_sub_pixel_variance16x8/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_10_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
- specialize qw/vp9_highbd_10_sub_pixel_avg_variance16x8/;
+ specialize qw/vp9_highbd_10_sub_pixel_avg_variance16x8/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_10_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_10_sub_pixel_variance8x8/;
+ specialize qw/vp9_highbd_10_sub_pixel_variance8x8/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_10_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
- specialize qw/vp9_highbd_10_sub_pixel_avg_variance8x8/;
+ specialize qw/vp9_highbd_10_sub_pixel_avg_variance8x8/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_10_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_10_sub_pixel_variance8x4/;
+ specialize qw/vp9_highbd_10_sub_pixel_variance8x4/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_10_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
- specialize qw/vp9_highbd_10_sub_pixel_avg_variance8x4/;
+ specialize qw/vp9_highbd_10_sub_pixel_avg_variance8x4/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_10_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_highbd_10_sub_pixel_variance4x8/;
specialize qw/vp9_highbd_10_sub_pixel_avg_variance4x4/;
add_proto qw/unsigned int vp9_highbd_12_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_12_sub_pixel_variance64x64/;
+ specialize qw/vp9_highbd_12_sub_pixel_variance64x64/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_12_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
- specialize qw/vp9_highbd_12_sub_pixel_avg_variance64x64/;
+ specialize qw/vp9_highbd_12_sub_pixel_avg_variance64x64/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_12_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_12_sub_pixel_variance32x64/;
+ specialize qw/vp9_highbd_12_sub_pixel_variance32x64/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_12_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
- specialize qw/vp9_highbd_12_sub_pixel_avg_variance32x64/;
+ specialize qw/vp9_highbd_12_sub_pixel_avg_variance32x64/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_12_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_12_sub_pixel_variance64x32/;
+ specialize qw/vp9_highbd_12_sub_pixel_variance64x32/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_12_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
- specialize qw/vp9_highbd_12_sub_pixel_avg_variance64x32/;
+ specialize qw/vp9_highbd_12_sub_pixel_avg_variance64x32/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_12_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_12_sub_pixel_variance32x16/;
+ specialize qw/vp9_highbd_12_sub_pixel_variance32x16/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_12_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
- specialize qw/vp9_highbd_12_sub_pixel_avg_variance32x16/;
+ specialize qw/vp9_highbd_12_sub_pixel_avg_variance32x16/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_12_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_12_sub_pixel_variance16x32/;
+ specialize qw/vp9_highbd_12_sub_pixel_variance16x32/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_12_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
- specialize qw/vp9_highbd_12_sub_pixel_avg_variance16x32/;
+ specialize qw/vp9_highbd_12_sub_pixel_avg_variance16x32/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_12_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_12_sub_pixel_variance32x32/;
+ specialize qw/vp9_highbd_12_sub_pixel_variance32x32/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_12_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
- specialize qw/vp9_highbd_12_sub_pixel_avg_variance32x32/;
+ specialize qw/vp9_highbd_12_sub_pixel_avg_variance32x32/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_12_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_12_sub_pixel_variance16x16/;
+ specialize qw/vp9_highbd_12_sub_pixel_variance16x16/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_12_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
- specialize qw/vp9_highbd_12_sub_pixel_avg_variance16x16/;
+ specialize qw/vp9_highbd_12_sub_pixel_avg_variance16x16/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_12_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_12_sub_pixel_variance8x16/;
+ specialize qw/vp9_highbd_12_sub_pixel_variance8x16/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_12_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
- specialize qw/vp9_highbd_12_sub_pixel_avg_variance8x16/;
+ specialize qw/vp9_highbd_12_sub_pixel_avg_variance8x16/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_12_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_12_sub_pixel_variance16x8/;
+ specialize qw/vp9_highbd_12_sub_pixel_variance16x8/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_12_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
- specialize qw/vp9_highbd_12_sub_pixel_avg_variance16x8/;
+ specialize qw/vp9_highbd_12_sub_pixel_avg_variance16x8/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_12_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_12_sub_pixel_variance8x8/;
+ specialize qw/vp9_highbd_12_sub_pixel_variance8x8/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_12_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
- specialize qw/vp9_highbd_12_sub_pixel_avg_variance8x8/;
+ specialize qw/vp9_highbd_12_sub_pixel_avg_variance8x8/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_12_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_12_sub_pixel_variance8x4/;
+ specialize qw/vp9_highbd_12_sub_pixel_variance8x4/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_12_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
- specialize qw/vp9_highbd_12_sub_pixel_avg_variance8x4/;
+ specialize qw/vp9_highbd_12_sub_pixel_avg_variance8x4/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_12_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_highbd_12_sub_pixel_variance4x8/;
specialize qw/vp9_highbd_sad4x4x4d sse2/;
add_proto qw/unsigned int vp9_highbd_mse16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- specialize qw/vp9_highbd_mse16x16/;
+ specialize qw/vp9_highbd_mse16x16/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_mse8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
specialize qw/vp9_highbd_mse8x16/;
specialize qw/vp9_highbd_mse16x8/;
add_proto qw/unsigned int vp9_highbd_mse8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- specialize qw/vp9_highbd_mse8x8/;
+ specialize qw/vp9_highbd_mse8x8/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_10_mse16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- specialize qw/vp9_highbd_10_mse16x16/;
+ specialize qw/vp9_highbd_10_mse16x16/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_10_mse8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
specialize qw/vp9_highbd_10_mse8x16/;
specialize qw/vp9_highbd_10_mse16x8/;
add_proto qw/unsigned int vp9_highbd_10_mse8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- specialize qw/vp9_highbd_10_mse8x8/;
+ specialize qw/vp9_highbd_10_mse8x8/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_12_mse16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- specialize qw/vp9_highbd_12_mse16x16/;
+ specialize qw/vp9_highbd_12_mse16x16/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_highbd_12_mse8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
specialize qw/vp9_highbd_12_mse8x16/;
specialize qw/vp9_highbd_12_mse16x8/;
add_proto qw/unsigned int vp9_highbd_12_mse8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- specialize qw/vp9_highbd_12_mse8x8/;
+ specialize qw/vp9_highbd_12_mse8x8/, "$sse2_x86inc";
# ENCODEMB INVOKE
--- /dev/null
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#include "./vpx_config.h"
+#include "vp9/common/vp9_common.h"
+
+#include "vp9/encoder/vp9_variance.h"
+#include "vpx_ports/mem.h"
+
+typedef uint32_t (*high_variance_fn_t) (const uint16_t *src, int src_stride,
+ const uint16_t *ref, int ref_stride,
+ uint32_t *sse, int *sum);
+
+uint32_t vp9_highbd_calc8x8var_sse2(const uint16_t *src, int src_stride,
+ const uint16_t *ref, int ref_stride,
+ uint32_t *sse, int *sum);
+
+uint32_t vp9_highbd_calc16x16var_sse2(const uint16_t *src, int src_stride,
+ const uint16_t *ref, int ref_stride,
+ uint32_t *sse, int *sum);
+
+static void highbd_variance_sse2(const uint16_t *src, int src_stride,
+ const uint16_t *ref, int ref_stride,
+ int w, int h, uint32_t *sse, int *sum,
+ high_variance_fn_t var_fn, int block_size) {
+ int i, j;
+
+ *sse = 0;
+ *sum = 0;
+
+ for (i = 0; i < h; i += block_size) {
+ for (j = 0; j < w; j += block_size) {
+ unsigned int sse0;
+ int sum0;
+ var_fn(src + src_stride * i + j, src_stride,
+ ref + ref_stride * i + j, ref_stride, &sse0, &sum0);
+ *sse += sse0;
+ *sum += sum0;
+ }
+ }
+}
+
+static void highbd_10_variance_sse2(const uint16_t *src, int src_stride,
+ const uint16_t *ref, int ref_stride,
+ int w, int h, uint32_t *sse, int *sum,
+ high_variance_fn_t var_fn, int block_size) {
+ int i, j;
+ uint64_t sse_long = 0;
+ int64_t sum_long = 0;
+
+ for (i = 0; i < h; i += block_size) {
+ for (j = 0; j < w; j += block_size) {
+ unsigned int sse0;
+ int sum0;
+ var_fn(src + src_stride * i + j, src_stride,
+ ref + ref_stride * i + j, ref_stride, &sse0, &sum0);
+ sse_long += sse0;
+ sum_long += sum0;
+ }
+ }
+ *sum = ROUND_POWER_OF_TWO(sum_long, 2);
+ *sse = ROUND_POWER_OF_TWO(sse_long, 4);
+}
+
+static void highbd_12_variance_sse2(const uint16_t *src, int src_stride,
+ const uint16_t *ref, int ref_stride,
+ int w, int h, uint32_t *sse, int *sum,
+ high_variance_fn_t var_fn, int block_size) {
+ int i, j;
+ uint64_t sse_long = 0;
+ int64_t sum_long = 0;
+
+ for (i = 0; i < h; i += block_size) {
+ for (j = 0; j < w; j += block_size) {
+ unsigned int sse0;
+ int sum0;
+ var_fn(src + src_stride * i + j, src_stride,
+ ref + ref_stride * i + j, ref_stride, &sse0, &sum0);
+ sse_long += sse0;
+ sum_long += sum0;
+ }
+ }
+ *sum = ROUND_POWER_OF_TWO(sum_long, 4);
+ *sse = ROUND_POWER_OF_TWO(sse_long, 8);
+}
+
+
+#define HIGH_GET_VAR(S) \
+void vp9_highbd_get##S##x##S##var_sse2(const uint8_t *src8, int src_stride, \
+ const uint8_t *ref8, int ref_stride, \
+ uint32_t *sse, int *sum) { \
+ uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
+ uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \
+ vp9_highbd_calc##S##x##S##var_sse2(src, src_stride, ref, ref_stride, \
+ sse, sum); \
+} \
+\
+void vp9_highbd_10_get##S##x##S##var_sse2(const uint8_t *src8, int src_stride, \
+ const uint8_t *ref8, int ref_stride, \
+ uint32_t *sse, int *sum) { \
+ uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
+ uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \
+ vp9_highbd_calc##S##x##S##var_sse2(src, src_stride, ref, ref_stride, \
+ sse, sum); \
+ *sum = ROUND_POWER_OF_TWO(*sum, 2); \
+ *sse = ROUND_POWER_OF_TWO(*sse, 4); \
+} \
+\
+void vp9_highbd_12_get##S##x##S##var_sse2(const uint8_t *src8, int src_stride, \
+ const uint8_t *ref8, int ref_stride, \
+ uint32_t *sse, int *sum) { \
+ uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
+ uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \
+ vp9_highbd_calc##S##x##S##var_sse2(src, src_stride, ref, ref_stride, \
+ sse, sum); \
+ *sum = ROUND_POWER_OF_TWO(*sum, 4); \
+ *sse = ROUND_POWER_OF_TWO(*sse, 8); \
+}
+
+HIGH_GET_VAR(16);
+HIGH_GET_VAR(8);
+
+#undef HIGH_GET_VAR
+
+#define VAR_FN(w, h, block_size, shift) \
+uint32_t vp9_highbd_variance##w##x##h##_sse2( \
+ const uint8_t *src8, int src_stride, \
+ const uint8_t *ref8, int ref_stride, uint32_t *sse) { \
+ int sum; \
+ uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
+ uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \
+ highbd_variance_sse2(src, src_stride, ref, ref_stride, w, h, sse, &sum, \
+ vp9_highbd_calc##block_size##x##block_size##var_sse2, \
+ block_size); \
+ return *sse - (((int64_t)sum * sum) >> shift); \
+} \
+\
+uint32_t vp9_highbd_10_variance##w##x##h##_sse2( \
+ const uint8_t *src8, int src_stride, \
+ const uint8_t *ref8, int ref_stride, uint32_t *sse) { \
+ int sum; \
+ uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
+ uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \
+ highbd_10_variance_sse2( \
+ src, src_stride, ref, ref_stride, w, h, sse, &sum, \
+ vp9_highbd_calc##block_size##x##block_size##var_sse2, block_size); \
+ return *sse - (((int64_t)sum * sum) >> shift); \
+} \
+\
+uint32_t vp9_highbd_12_variance##w##x##h##_sse2( \
+ const uint8_t *src8, int src_stride, \
+ const uint8_t *ref8, int ref_stride, uint32_t *sse) { \
+ int sum; \
+ uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
+ uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \
+ highbd_12_variance_sse2( \
+ src, src_stride, ref, ref_stride, w, h, sse, &sum, \
+ vp9_highbd_calc##block_size##x##block_size##var_sse2, block_size); \
+ return *sse - (((int64_t)sum * sum) >> shift); \
+}
+
+VAR_FN(64, 64, 16, 12);
+VAR_FN(64, 32, 16, 11);
+VAR_FN(32, 64, 16, 11);
+VAR_FN(32, 32, 16, 10);
+VAR_FN(32, 16, 16, 9);
+VAR_FN(16, 32, 16, 9);
+VAR_FN(16, 16, 16, 8);
+VAR_FN(16, 8, 8, 7);
+VAR_FN(8, 16, 8, 7);
+VAR_FN(8, 8, 8, 6);
+
+#undef VAR_FN
+
+unsigned int vp9_highbd_mse16x16_sse2(const uint8_t *src8, int src_stride,
+ const uint8_t *ref8, int ref_stride,
+ unsigned int *sse) {
+ int sum;
+ uint16_t *src = CONVERT_TO_SHORTPTR(src8);
+ uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
+ highbd_variance_sse2(src, src_stride, ref, ref_stride, 16, 16,
+ sse, &sum, vp9_highbd_calc16x16var_sse2, 16);
+ return *sse;
+}
+
+unsigned int vp9_highbd_10_mse16x16_sse2(const uint8_t *src8, int src_stride,
+ const uint8_t *ref8, int ref_stride,
+ unsigned int *sse) {
+ int sum;
+ uint16_t *src = CONVERT_TO_SHORTPTR(src8);
+ uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
+ highbd_10_variance_sse2(src, src_stride, ref, ref_stride, 16, 16,
+ sse, &sum, vp9_highbd_calc16x16var_sse2, 16);
+ return *sse;
+}
+
+unsigned int vp9_highbd_12_mse16x16_sse2(const uint8_t *src8, int src_stride,
+ const uint8_t *ref8, int ref_stride,
+ unsigned int *sse) {
+ int sum;
+ uint16_t *src = CONVERT_TO_SHORTPTR(src8);
+ uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
+ highbd_12_variance_sse2(src, src_stride, ref, ref_stride, 16, 16,
+ sse, &sum, vp9_highbd_calc16x16var_sse2, 16);
+ return *sse;
+}
+
+unsigned int vp9_highbd_mse8x8_sse2(const uint8_t *src8, int src_stride,
+ const uint8_t *ref8, int ref_stride,
+ unsigned int *sse) {
+ int sum;
+ uint16_t *src = CONVERT_TO_SHORTPTR(src8);
+ uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
+ highbd_variance_sse2(src, src_stride, ref, ref_stride, 8, 8,
+ sse, &sum, vp9_highbd_calc8x8var_sse2, 8);
+ return *sse;
+}
+
+unsigned int vp9_highbd_10_mse8x8_sse2(const uint8_t *src8, int src_stride,
+ const uint8_t *ref8, int ref_stride,
+ unsigned int *sse) {
+ int sum;
+ uint16_t *src = CONVERT_TO_SHORTPTR(src8);
+ uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
+ highbd_10_variance_sse2(src, src_stride, ref, ref_stride, 8, 8,
+ sse, &sum, vp9_highbd_calc8x8var_sse2, 8);
+ return *sse;
+}
+
+unsigned int vp9_highbd_12_mse8x8_sse2(const uint8_t *src8, int src_stride,
+ const uint8_t *ref8, int ref_stride,
+ unsigned int *sse) {
+ int sum;
+ uint16_t *src = CONVERT_TO_SHORTPTR(src8);
+ uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
+ highbd_12_variance_sse2(src, src_stride, ref, ref_stride, 8, 8,
+ sse, &sum, vp9_highbd_calc8x8var_sse2, 8);
+ return *sse;
+}
+
+#define DECL(w, opt) \
+int vp9_highbd_sub_pixel_variance##w##xh_##opt(const uint16_t *src, \
+ ptrdiff_t src_stride, \
+ int x_offset, int y_offset, \
+ const uint16_t *dst, \
+ ptrdiff_t dst_stride, \
+ int height, unsigned int *sse);
+#define DECLS(opt1, opt2) \
+DECL(8, opt1); \
+DECL(16, opt1)
+
+DECLS(sse2, sse);
+// DECLS(ssse3, ssse3);
+#undef DECLS
+#undef DECL
+
+#define FN(w, h, wf, wlog2, hlog2, opt, cast) \
+uint32_t vp9_highbd_sub_pixel_variance##w##x##h##_##opt(const uint8_t *src8, \
+ int src_stride, \
+ int x_offset, \
+ int y_offset, \
+ const uint8_t *dst8, \
+ int dst_stride, \
+ uint32_t *sse_ptr) { \
+ uint32_t sse; \
+ uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
+ uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); \
+ int se = vp9_highbd_sub_pixel_variance##wf##xh_##opt(src, src_stride, \
+ x_offset, y_offset, \
+ dst, dst_stride, h, \
+ &sse); \
+ if (w > wf) { \
+ unsigned int sse2; \
+ int se2 = vp9_highbd_sub_pixel_variance##wf##xh_##opt(src + 16, \
+ src_stride, \
+ x_offset, y_offset, \
+ dst + 16, \
+ dst_stride, \
+ h, &sse2); \
+ se += se2; \
+ sse += sse2; \
+ if (w > wf * 2) { \
+ se2 = vp9_highbd_sub_pixel_variance##wf##xh_##opt(src + 32, src_stride, \
+ x_offset, y_offset, \
+ dst + 32, dst_stride, \
+ h, &sse2); \
+ se += se2; \
+ sse += sse2; \
+ se2 = vp9_highbd_sub_pixel_variance##wf##xh_##opt( \
+ src + 48, src_stride, x_offset, y_offset, \
+ dst + 48, dst_stride, h, &sse2); \
+ se += se2; \
+ sse += sse2; \
+ } \
+ } \
+ *sse_ptr = sse; \
+ return sse - ((cast se * se) >> (wlog2 + hlog2)); \
+} \
+\
+uint32_t vp9_highbd_10_sub_pixel_variance##w##x##h##_##opt( \
+ const uint8_t *src8, int src_stride, int x_offset, int y_offset, \
+ const uint8_t *dst8, int dst_stride, uint32_t *sse_ptr) { \
+ uint32_t sse; \
+ uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
+ uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); \
+ int se = vp9_highbd_sub_pixel_variance##wf##xh_##opt(src, src_stride, \
+ x_offset, y_offset, \
+ dst, dst_stride, \
+ h, &sse); \
+ if (w > wf) { \
+ uint32_t sse2; \
+ int se2 = vp9_highbd_sub_pixel_variance##wf##xh_##opt(src + 16, \
+ src_stride, \
+ x_offset, y_offset, \
+ dst + 16, \
+ dst_stride, \
+ h, &sse2); \
+ se += se2; \
+ sse += sse2; \
+ if (w > wf * 2) { \
+ se2 = vp9_highbd_sub_pixel_variance##wf##xh_##opt(src + 32, src_stride, \
+ x_offset, y_offset, \
+ dst + 32, dst_stride, \
+ h, &sse2); \
+ se += se2; \
+ sse += sse2; \
+ se2 = vp9_highbd_sub_pixel_variance##wf##xh_##opt(src + 48, src_stride, \
+ x_offset, y_offset, \
+ dst + 48, dst_stride, \
+ h, &sse2); \
+ se += se2; \
+ sse += sse2; \
+ } \
+ } \
+ se = ROUND_POWER_OF_TWO(se, 2); \
+ sse = ROUND_POWER_OF_TWO(sse, 4); \
+ *sse_ptr = sse; \
+ return sse - ((cast se * se) >> (wlog2 + hlog2)); \
+} \
+\
+uint32_t vp9_highbd_12_sub_pixel_variance##w##x##h##_##opt( \
+ const uint8_t *src8, int src_stride, int x_offset, int y_offset, \
+ const uint8_t *dst8, int dst_stride, uint32_t *sse_ptr) { \
+ int start_row; \
+ uint32_t sse; \
+ int se = 0; \
+ uint64_t long_sse = 0; \
+ uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
+ uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); \
+ for (start_row = 0; start_row < h; start_row +=16) { \
+ uint32_t sse2; \
+ int height = h - start_row < 16 ? h - start_row : 16; \
+ int se2 = vp9_highbd_sub_pixel_variance##wf##xh_##opt( \
+ src + (start_row * src_stride), src_stride, \
+ x_offset, y_offset, dst + (start_row * dst_stride), \
+ dst_stride, height, &sse2); \
+ se += se2; \
+ long_sse += sse2; \
+ if (w > wf) { \
+ se2 = vp9_highbd_sub_pixel_variance##wf##xh_##opt( \
+ src + 16 + (start_row * src_stride), src_stride, \
+ x_offset, y_offset, dst + 16 + (start_row * dst_stride), \
+ dst_stride, height, &sse2); \
+ se += se2; \
+ long_sse += sse2; \
+ if (w > wf * 2) { \
+ se2 = vp9_highbd_sub_pixel_variance##wf##xh_##opt( \
+ src + 32 + (start_row * src_stride), src_stride, \
+ x_offset, y_offset, dst + 32 + (start_row * dst_stride), \
+ dst_stride, height, &sse2); \
+ se += se2; \
+ long_sse += sse2; \
+ se2 = vp9_highbd_sub_pixel_variance##wf##xh_##opt( \
+ src + 48 + (start_row * src_stride), src_stride, \
+ x_offset, y_offset, dst + 48 + (start_row * dst_stride), \
+ dst_stride, height, &sse2); \
+ se += se2; \
+ long_sse += sse2; \
+ }\
+ } \
+ } \
+ se = ROUND_POWER_OF_TWO(se, 4); \
+ sse = ROUND_POWER_OF_TWO(long_sse, 8); \
+ *sse_ptr = sse; \
+ return sse - ((cast se * se) >> (wlog2 + hlog2)); \
+}
+
+#define FNS(opt1, opt2) \
+FN(64, 64, 16, 6, 6, opt1, (int64_t)); \
+FN(64, 32, 16, 6, 5, opt1, (int64_t)); \
+FN(32, 64, 16, 5, 6, opt1, (int64_t)); \
+FN(32, 32, 16, 5, 5, opt1, (int64_t)); \
+FN(32, 16, 16, 5, 4, opt1, (int64_t)); \
+FN(16, 32, 16, 4, 5, opt1, (int64_t)); \
+FN(16, 16, 16, 4, 4, opt1, (int64_t)); \
+FN(16, 8, 16, 4, 3, opt1, (int64_t)); \
+FN(8, 16, 8, 3, 4, opt1, (int64_t)); \
+FN(8, 8, 8, 3, 3, opt1, (int64_t)); \
+FN(8, 4, 8, 3, 2, opt1, (int64_t));
+
+
+FNS(sse2, sse);
+
+#undef FNS
+#undef FN
+
+#define DECL(w, opt) \
+int vp9_highbd_sub_pixel_avg_variance##w##xh_##opt(const uint16_t *src, \
+ ptrdiff_t src_stride, \
+ int x_offset, int y_offset, \
+ const uint16_t *dst, \
+ ptrdiff_t dst_stride, \
+ const uint16_t *sec, \
+ ptrdiff_t sec_stride, \
+ int height, \
+ unsigned int *sse);
+#define DECLS(opt1) \
+DECL(16, opt1) \
+DECL(8, opt1)
+
+DECLS(sse2);
+#undef DECL
+#undef DECLS
+
+#define FN(w, h, wf, wlog2, hlog2, opt, cast) \
+uint32_t vp9_highbd_sub_pixel_avg_variance##w##x##h##_##opt( \
+ const uint8_t *src8, int src_stride, int x_offset, int y_offset, \
+ const uint8_t *dst8, int dst_stride, uint32_t *sse_ptr, \
+ const uint8_t *sec8) { \
+ uint32_t sse; \
+ uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
+ uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); \
+ uint16_t *sec = CONVERT_TO_SHORTPTR(sec8); \
+ int se = vp9_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
+ src, src_stride, x_offset, \
+ y_offset, dst, dst_stride, sec, w, h, &sse); \
+ if (w > wf) { \
+ uint32_t sse2; \
+ int se2 = vp9_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
+ src + 16, src_stride, x_offset, y_offset, \
+ dst + 16, dst_stride, sec + 16, w, h, &sse2); \
+ se += se2; \
+ sse += sse2; \
+ if (w > wf * 2) { \
+ se2 = vp9_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
+ src + 32, src_stride, x_offset, y_offset, \
+ dst + 32, dst_stride, sec + 32, w, h, &sse2); \
+ se += se2; \
+ sse += sse2; \
+ se2 = vp9_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
+ src + 48, src_stride, x_offset, y_offset, \
+ dst + 48, dst_stride, sec + 48, w, h, &sse2); \
+ se += se2; \
+ sse += sse2; \
+ } \
+ } \
+ *sse_ptr = sse; \
+ return sse - ((cast se * se) >> (wlog2 + hlog2)); \
+} \
+\
+uint32_t vp9_highbd_10_sub_pixel_avg_variance##w##x##h##_##opt( \
+ const uint8_t *src8, int src_stride, int x_offset, int y_offset, \
+ const uint8_t *dst8, int dst_stride, uint32_t *sse_ptr, \
+ const uint8_t *sec8) { \
+ uint32_t sse; \
+ uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
+ uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); \
+ uint16_t *sec = CONVERT_TO_SHORTPTR(sec8); \
+ int se = vp9_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
+ src, src_stride, x_offset, \
+ y_offset, dst, dst_stride, \
+ sec, w, h, &sse); \
+ if (w > wf) { \
+ uint32_t sse2; \
+ int se2 = vp9_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
+ src + 16, src_stride, \
+ x_offset, y_offset, \
+ dst + 16, dst_stride, \
+ sec + 16, w, h, &sse2); \
+ se += se2; \
+ sse += sse2; \
+ if (w > wf * 2) { \
+ se2 = vp9_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
+ src + 32, src_stride, \
+ x_offset, y_offset, \
+ dst + 32, dst_stride, \
+ sec + 32, w, h, &sse2); \
+ se += se2; \
+ sse += sse2; \
+ se2 = vp9_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
+ src + 48, src_stride, \
+ x_offset, y_offset, \
+ dst + 48, dst_stride, \
+ sec + 48, w, h, &sse2); \
+ se += se2; \
+ sse += sse2; \
+ } \
+ } \
+ se = ROUND_POWER_OF_TWO(se, 2); \
+ sse = ROUND_POWER_OF_TWO(sse, 4); \
+ *sse_ptr = sse; \
+ return sse - ((cast se * se) >> (wlog2 + hlog2)); \
+} \
+\
+uint32_t vp9_highbd_12_sub_pixel_avg_variance##w##x##h##_##opt( \
+ const uint8_t *src8, int src_stride, int x_offset, int y_offset, \
+ const uint8_t *dst8, int dst_stride, uint32_t *sse_ptr, \
+ const uint8_t *sec8) { \
+ int start_row; \
+ uint32_t sse; \
+ int se = 0; \
+ uint64_t long_sse = 0; \
+ uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
+ uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); \
+ uint16_t *sec = CONVERT_TO_SHORTPTR(sec8); \
+ for (start_row = 0; start_row < h; start_row +=16) { \
+ uint32_t sse2; \
+ int height = h - start_row < 16 ? h - start_row : 16; \
+ int se2 = vp9_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
+ src + (start_row * src_stride), src_stride, x_offset, \
+ y_offset, dst + (start_row * dst_stride), dst_stride, \
+ sec + (start_row * w), w, height, &sse2); \
+ se += se2; \
+ long_sse += sse2; \
+ if (w > wf) { \
+ se2 = vp9_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
+ src + 16 + (start_row * src_stride), src_stride, \
+ x_offset, y_offset, \
+ dst + 16 + (start_row * dst_stride), dst_stride, \
+ sec + 16 + (start_row * w), w, height, &sse2); \
+ se += se2; \
+ long_sse += sse2; \
+ if (w > wf * 2) { \
+ se2 = vp9_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
+ src + 32 + (start_row * src_stride), src_stride, \
+ x_offset, y_offset, \
+ dst + 32 + (start_row * dst_stride), dst_stride, \
+ sec + 32 + (start_row * w), w, height, &sse2); \
+ se += se2; \
+ long_sse += sse2; \
+ se2 = vp9_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
+ src + 48 + (start_row * src_stride), src_stride, \
+ x_offset, y_offset, \
+ dst + 48 + (start_row * dst_stride), dst_stride, \
+ sec + 48 + (start_row * w), w, height, &sse2); \
+ se += se2; \
+ long_sse += sse2; \
+ } \
+ } \
+ } \
+ se = ROUND_POWER_OF_TWO(se, 4); \
+ sse = ROUND_POWER_OF_TWO(long_sse, 8); \
+ *sse_ptr = sse; \
+ return sse - ((cast se * se) >> (wlog2 + hlog2)); \
+}
+
+
+#define FNS(opt1) \
+FN(64, 64, 16, 6, 6, opt1, (int64_t)); \
+FN(64, 32, 16, 6, 5, opt1, (int64_t)); \
+FN(32, 64, 16, 5, 6, opt1, (int64_t)); \
+FN(32, 32, 16, 5, 5, opt1, (int64_t)); \
+FN(32, 16, 16, 5, 4, opt1, (int64_t)); \
+FN(16, 32, 16, 4, 5, opt1, (int64_t)); \
+FN(16, 16, 16, 4, 4, opt1, (int64_t)); \
+FN(16, 8, 16, 4, 3, opt1, (int64_t)); \
+FN(8, 16, 8, 4, 3, opt1, (int64_t)); \
+FN(8, 8, 8, 3, 3, opt1, (int64_t)); \
+FN(8, 4, 8, 3, 2, opt1, (int64_t));
+
+FNS(sse2);
+
+#undef FNS
+#undef FN