--- /dev/null
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <cmath>
+#include <cstdlib>
+#include <string>
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+
+#include "./vpx_config.h"
+#include "./vp9_rtcd.h"
+#include "vp9/common/vp9_entropy.h"
+#include "vpx/vpx_integer.h"
+
+using libvpx_test::ACMRandom;
+
+namespace {
+#if CONFIG_VP9_HIGHBITDEPTH
+const int number_of_iterations = 1000;
+
+typedef int64_t (*ErrorBlockFunc)(const tran_low_t *coeff,
+ const tran_low_t *dqcoeff, intptr_t block_size,
+ int64_t *ssz, int bps);
+typedef std::tr1::tuple<ErrorBlockFunc, ErrorBlockFunc, vpx_bit_depth_t>
+ ErrorBlockParam;
+class ErrorBlockTest
+ : public ::testing::TestWithParam<ErrorBlockParam> {
+ public:
+ virtual ~ErrorBlockTest() {}
+ virtual void SetUp() {
+ error_block_op_ = GET_PARAM(0);
+ ref_error_block_op_ = GET_PARAM(1);
+ bit_depth_ = GET_PARAM(2);
+ }
+
+ virtual void TearDown() { libvpx_test::ClearSystemState(); }
+
+ protected:
+ vpx_bit_depth_t bit_depth_;
+ ErrorBlockFunc error_block_op_;
+ ErrorBlockFunc ref_error_block_op_;
+};
+
+TEST_P(ErrorBlockTest, OperationCheck) {
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+ DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff, 4096);
+ DECLARE_ALIGNED_ARRAY(16, tran_low_t, dqcoeff, 4096);
+ int err_count_total = 0;
+ int first_failure = -1;
+ intptr_t block_size;
+ int64_t ssz;
+ int64_t ret;
+ int64_t ref_ssz;
+ int64_t ref_ret;
+ for (int i = 0; i < number_of_iterations; ++i) {
+ int err_count = 0;
+ block_size = 16 << (i % 9); // All block sizes from 4x4, 8x4 ..64x64
+ for (int j = 0; j < block_size; j++) {
+ coeff[j] = rnd(2<<20)-(1<<20);
+ dqcoeff[j] = rnd(2<<20)-(1<<20);
+ }
+ ref_ret = ref_error_block_op_(coeff, dqcoeff, block_size, &ref_ssz,
+ bit_depth_);
+ ASM_REGISTER_STATE_CHECK(ret = error_block_op_(coeff, dqcoeff, block_size,
+ &ssz, bit_depth_));
+ err_count += (ref_ret != ret) | (ref_ssz != ssz);
+ if (err_count && !err_count_total) {
+ first_failure = i;
+ }
+ err_count_total += err_count;
+ }
+ EXPECT_EQ(0, err_count_total)
+ << "Error: Error Block Test, C output doesn't match SSE2 output. "
+ << "First failed at test case " << first_failure;
+}
+
+TEST_P(ErrorBlockTest, ExtremeValues) {
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+ DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff, 4096);
+ DECLARE_ALIGNED_ARRAY(16, tran_low_t, dqcoeff, 4096);
+ int err_count_total = 0;
+ int first_failure = -1;
+ intptr_t block_size;
+ int64_t ssz;
+ int64_t ret;
+ int64_t ref_ssz;
+ int64_t ref_ret;
+ int max_val = ((1<<20)-1);
+ for (int i = 0; i < number_of_iterations; ++i) {
+ int err_count = 0;
+ int k = (i / 9) % 5;
+
+ // Change the maximum coeff value, to test different bit boundaries
+ if ( k == 4 && (i % 9) == 0 ) {
+ max_val >>= 1;
+ }
+ block_size = 16 << (i % 9); // All block sizes from 4x4, 8x4 ..64x64
+ for (int j = 0; j < block_size; j++) {
+ if (k < 4) { // Test at maximum values
+ coeff[j] = k % 2 ? max_val : -max_val;
+ dqcoeff[j] = (k >> 1) % 2 ? max_val : -max_val;
+ } else {
+ coeff[j] = rnd(2 << 14) - (1 << 14);
+ dqcoeff[j] = rnd(2 << 14) - (1 << 14);
+ }
+ }
+ ref_ret = ref_error_block_op_(coeff, dqcoeff, block_size, &ref_ssz,
+ bit_depth_);
+ ASM_REGISTER_STATE_CHECK(ret = error_block_op_(coeff, dqcoeff, block_size,
+ &ssz, bit_depth_));
+ err_count += (ref_ret != ret) | (ref_ssz != ssz);
+ if (err_count && !err_count_total) {
+ first_failure = i;
+ }
+ err_count_total += err_count;
+ }
+ EXPECT_EQ(0, err_count_total)
+ << "Error: Error Block Test, C output doesn't match SSE2 output. "
+ << "First failed at test case " << first_failure;
+}
+
+using std::tr1::make_tuple;
+#if HAVE_SSE2
+INSTANTIATE_TEST_CASE_P(
+ SSE2_C_COMPARE, ErrorBlockTest,
+ ::testing::Values(
+ make_tuple(&vp9_highbd_block_error_sse2,
+ &vp9_highbd_block_error_c, VPX_BITS_10),
+ make_tuple(&vp9_highbd_block_error_sse2,
+ &vp9_highbd_block_error_c, VPX_BITS_12),
+ make_tuple(&vp9_highbd_block_error_sse2,
+ &vp9_highbd_block_error_c, VPX_BITS_8)));
+#endif // HAVE_SSE2
+#endif // CONFIG_VP9_HIGHBITDEPTH
+} // namespace
--- /dev/null
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+
+#include "./vpx_config.h"
+#include "./vp9_rtcd.h"
+#include "vp9/common/vp9_entropy.h"
+#include "vpx/vpx_integer.h"
+
+using libvpx_test::ACMRandom;
+
+namespace {
+#if CONFIG_VP9_HIGHBITDEPTH
+const int number_of_iterations = 100;
+
+typedef void (*QuantizeFunc)(const tran_low_t *coeff, intptr_t count,
+ int skip_block, const int16_t *zbin,
+ const int16_t *round, const int16_t *quant,
+ const int16_t *quant_shift,
+ tran_low_t *qcoeff, tran_low_t *dqcoeff,
+ const int16_t *dequant, int zbin_oq_value,
+ uint16_t *eob, const int16_t *scan,
+ const int16_t *iscan);
+typedef std::tr1::tuple<QuantizeFunc, QuantizeFunc, vpx_bit_depth_t>
+ QuantizeParam;
+class QuantizeTest : public ::testing::TestWithParam<QuantizeParam> {
+ public:
+ virtual ~QuantizeTest() {}
+ virtual void SetUp() {
+ quantize_op_ = GET_PARAM(0);
+ ref_quantize_op_ = GET_PARAM(1);
+ bit_depth_ = GET_PARAM(2);
+ mask_ = (1 << bit_depth_) - 1;
+ }
+
+ virtual void TearDown() { libvpx_test::ClearSystemState(); }
+
+ protected:
+ vpx_bit_depth_t bit_depth_;
+ int mask_;
+ QuantizeFunc quantize_op_;
+ QuantizeFunc ref_quantize_op_;
+};
+class Quantize32Test : public ::testing::TestWithParam<QuantizeParam> {
+ public:
+ virtual ~Quantize32Test() {}
+ virtual void SetUp() {
+ quantize_op_ = GET_PARAM(0);
+ ref_quantize_op_ = GET_PARAM(1);
+ bit_depth_ = GET_PARAM(2);
+ mask_ = (1 << bit_depth_) - 1;
+ }
+
+ virtual void TearDown() { libvpx_test::ClearSystemState(); }
+
+ protected:
+ vpx_bit_depth_t bit_depth_;
+ int mask_;
+ QuantizeFunc quantize_op_;
+ QuantizeFunc ref_quantize_op_;
+};
+
+TEST_P(QuantizeTest, OperationCheck) {
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+ int zbin_oq_value = 0;
+ DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff_ptr, 256);
+ DECLARE_ALIGNED_ARRAY(16, int16_t, zbin_ptr, 2);
+ DECLARE_ALIGNED_ARRAY(16, int16_t, round_ptr, 2);
+ DECLARE_ALIGNED_ARRAY(16, int16_t, quant_ptr, 2);
+ DECLARE_ALIGNED_ARRAY(16, int16_t, quant_shift_ptr, 2);
+ DECLARE_ALIGNED_ARRAY(16, tran_low_t, qcoeff_ptr, 256);
+ DECLARE_ALIGNED_ARRAY(16, tran_low_t, dqcoeff_ptr, 256);
+ DECLARE_ALIGNED_ARRAY(16, tran_low_t, ref_qcoeff_ptr, 256);
+ DECLARE_ALIGNED_ARRAY(16, tran_low_t, ref_dqcoeff_ptr, 256);
+ DECLARE_ALIGNED_ARRAY(16, int16_t, dequant_ptr, 2);
+ DECLARE_ALIGNED_ARRAY(16, uint16_t, eob_ptr, 1);
+ DECLARE_ALIGNED_ARRAY(16, uint16_t, ref_eob_ptr, 1);
+ int err_count_total = 0;
+ int first_failure = -1;
+ for (int i = 0; i < number_of_iterations; ++i) {
+ int skip_block = i == 0;
+ TX_SIZE sz = (TX_SIZE)(i % 3); // TX_4X4, TX_8X8 TX_16X16
+ TX_TYPE tx_type = (TX_TYPE)((i >> 2) % 3);
+ const scan_order *scan_order = &vp9_scan_orders[sz][tx_type];
+ int count = (4 << sz) * (4 << sz); // 16, 64, 256
+ int err_count = 0;
+ *eob_ptr = rnd.Rand16();
+ *ref_eob_ptr = *eob_ptr;
+ for (int j = 0; j < count; j++) {
+ coeff_ptr[j] = rnd.Rand16()&mask_;
+ }
+ for (int j = 0; j < 2; j++) {
+ zbin_ptr[j] = rnd.Rand16()&mask_;
+ round_ptr[j] = rnd.Rand16();
+ quant_ptr[j] = rnd.Rand16();
+ quant_shift_ptr[j] = rnd.Rand16();
+ dequant_ptr[j] = rnd.Rand16();
+ }
+ ref_quantize_op_(coeff_ptr, count, skip_block, zbin_ptr, round_ptr,
+ quant_ptr, quant_shift_ptr, ref_qcoeff_ptr,
+ ref_dqcoeff_ptr, dequant_ptr, zbin_oq_value,
+ ref_eob_ptr, scan_order->scan, scan_order->iscan);
+ ASM_REGISTER_STATE_CHECK(quantize_op_(coeff_ptr, count, skip_block,
+ zbin_ptr, round_ptr, quant_ptr,
+ quant_shift_ptr, qcoeff_ptr,
+ dqcoeff_ptr, dequant_ptr,
+ zbin_oq_value, eob_ptr,
+ scan_order->scan, scan_order->iscan));
+ for (int j = 0; j < sz; ++j) {
+ err_count += (ref_qcoeff_ptr[j] != qcoeff_ptr[j]) |
+ (ref_dqcoeff_ptr[j] != dqcoeff_ptr[j]);
+ }
+ err_count += (*ref_eob_ptr != *eob_ptr);
+ if (err_count && !err_count_total) {
+ first_failure = i;
+ }
+ err_count_total += err_count;
+ }
+ EXPECT_EQ(0, err_count_total)
+ << "Error: Quantization Test, C output doesn't match SSE2 output. "
+ << "First failed at test case " << first_failure;
+}
+TEST_P(Quantize32Test, OperationCheck) {
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+ int zbin_oq_value = 0;
+ DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff_ptr, 1024);
+ DECLARE_ALIGNED_ARRAY(16, int16_t, zbin_ptr, 2);
+ DECLARE_ALIGNED_ARRAY(16, int16_t, round_ptr, 2);
+ DECLARE_ALIGNED_ARRAY(16, int16_t, quant_ptr, 2);
+ DECLARE_ALIGNED_ARRAY(16, int16_t, quant_shift_ptr, 2);
+ DECLARE_ALIGNED_ARRAY(16, tran_low_t, qcoeff_ptr, 1024);
+ DECLARE_ALIGNED_ARRAY(16, tran_low_t, dqcoeff_ptr, 1024);
+ DECLARE_ALIGNED_ARRAY(16, tran_low_t, ref_qcoeff_ptr, 1024);
+ DECLARE_ALIGNED_ARRAY(16, tran_low_t, ref_dqcoeff_ptr, 1024);
+ DECLARE_ALIGNED_ARRAY(16, int16_t, dequant_ptr, 2);
+ DECLARE_ALIGNED_ARRAY(16, uint16_t, eob_ptr, 1);
+ DECLARE_ALIGNED_ARRAY(16, uint16_t, ref_eob_ptr, 1);
+ int err_count_total = 0;
+ int first_failure = -1;
+ for (int i = 0; i < number_of_iterations; ++i) {
+ int skip_block = i == 0;
+ TX_SIZE sz = TX_32X32;
+ TX_TYPE tx_type = (TX_TYPE)(i % 4);
+
+ const scan_order *scan_order = &vp9_scan_orders[sz][tx_type];
+ int count = (4 << sz) * (4 << sz); // 1024
+ int err_count = 0;
+ *eob_ptr = rnd.Rand16();
+ *ref_eob_ptr = *eob_ptr;
+ for (int j = 0; j < count; j++) {
+ coeff_ptr[j] = rnd.Rand16()&mask_;
+ }
+ for (int j = 0; j < 2; j++) {
+ zbin_ptr[j] = rnd.Rand16()&mask_;
+ round_ptr[j] = rnd.Rand16();
+ quant_ptr[j] = rnd.Rand16();
+ quant_shift_ptr[j] = rnd.Rand16();
+ dequant_ptr[j] = rnd.Rand16();
+ }
+ ref_quantize_op_(coeff_ptr, count, skip_block, zbin_ptr, round_ptr,
+ quant_ptr, quant_shift_ptr, ref_qcoeff_ptr,
+ ref_dqcoeff_ptr, dequant_ptr, zbin_oq_value,
+ ref_eob_ptr, scan_order->scan, scan_order->iscan);
+ ASM_REGISTER_STATE_CHECK(quantize_op_(coeff_ptr, count, skip_block,
+ zbin_ptr, round_ptr, quant_ptr,
+ quant_shift_ptr, qcoeff_ptr,
+ dqcoeff_ptr, dequant_ptr,
+ zbin_oq_value, eob_ptr,
+ scan_order->scan, scan_order->iscan));
+ for (int j = 0; j < sz; ++j) {
+ err_count += (ref_qcoeff_ptr[j] != qcoeff_ptr[j]) |
+ (ref_dqcoeff_ptr[j] != dqcoeff_ptr[j]);
+ }
+ err_count += (*ref_eob_ptr != *eob_ptr);
+ if (err_count && !err_count_total) {
+ first_failure = i;
+ }
+ err_count_total += err_count;
+ }
+ EXPECT_EQ(0, err_count_total)
+ << "Error: Quantization Test, C output doesn't match SSE2 output. "
+ << "First failed at test case " << first_failure;
+}
+TEST_P(QuantizeTest, EOBCheck) {
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+ int zbin_oq_value = 0;
+ DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff_ptr, 256);
+ DECLARE_ALIGNED_ARRAY(16, int16_t, zbin_ptr, 2);
+ DECLARE_ALIGNED_ARRAY(16, int16_t, round_ptr, 2);
+ DECLARE_ALIGNED_ARRAY(16, int16_t, quant_ptr, 2);
+ DECLARE_ALIGNED_ARRAY(16, int16_t, quant_shift_ptr, 2);
+ DECLARE_ALIGNED_ARRAY(16, tran_low_t, qcoeff_ptr, 256);
+ DECLARE_ALIGNED_ARRAY(16, tran_low_t, dqcoeff_ptr, 256);
+ DECLARE_ALIGNED_ARRAY(16, tran_low_t, ref_qcoeff_ptr, 256);
+ DECLARE_ALIGNED_ARRAY(16, tran_low_t, ref_dqcoeff_ptr, 256);
+ DECLARE_ALIGNED_ARRAY(16, int16_t, dequant_ptr, 2);
+ DECLARE_ALIGNED_ARRAY(16, uint16_t, eob_ptr, 1);
+ DECLARE_ALIGNED_ARRAY(16, uint16_t, ref_eob_ptr, 1);
+ int err_count_total = 0;
+ int first_failure = -1;
+ for (int i = 0; i < number_of_iterations; ++i) {
+ int skip_block = i == 0;
+ TX_SIZE sz = (TX_SIZE)(i % 3); // TX_4X4, TX_8X8 TX_16X16
+ TX_TYPE tx_type = (TX_TYPE)((i >> 2) % 3);
+ const scan_order *scan_order = &vp9_scan_orders[sz][tx_type];
+ int count = (4 << sz) * (4 << sz); // 16, 64, 256
+ int err_count = 0;
+ *eob_ptr = rnd.Rand16();
+ *ref_eob_ptr = *eob_ptr;
+ // Two random entries
+ for (int j = 0; j < count; j++) {
+ coeff_ptr[j] = 0;
+ }
+ coeff_ptr[rnd(count)] = rnd.Rand16()&mask_;
+ coeff_ptr[rnd(count)] = rnd.Rand16()&mask_;
+ for (int j = 0; j < 2; j++) {
+ zbin_ptr[j] = rnd.Rand16()&mask_;
+ round_ptr[j] = rnd.Rand16();
+ quant_ptr[j] = rnd.Rand16();
+ quant_shift_ptr[j] = rnd.Rand16();
+ dequant_ptr[j] = rnd.Rand16();
+ }
+
+ ref_quantize_op_(coeff_ptr, count, skip_block, zbin_ptr, round_ptr,
+ quant_ptr, quant_shift_ptr, ref_qcoeff_ptr,
+ ref_dqcoeff_ptr, dequant_ptr, zbin_oq_value,
+ ref_eob_ptr, scan_order->scan, scan_order->iscan);
+ ASM_REGISTER_STATE_CHECK(quantize_op_(coeff_ptr, count, skip_block,
+ zbin_ptr, round_ptr, quant_ptr,
+ quant_shift_ptr, qcoeff_ptr,
+ dqcoeff_ptr, dequant_ptr,
+ zbin_oq_value, eob_ptr,
+ scan_order->scan, scan_order->iscan));
+
+ for (int j = 0; j < sz; ++j) {
+ err_count += (ref_qcoeff_ptr[j] != qcoeff_ptr[j]) |
+ (ref_dqcoeff_ptr[j] != dqcoeff_ptr[j]);
+ }
+ err_count += (*ref_eob_ptr != *eob_ptr);
+ if (err_count && !err_count_total) {
+ first_failure = i;
+ }
+ err_count_total += err_count;
+ }
+ EXPECT_EQ(0, err_count_total)
+ << "Error: Quantization Test, C output doesn't match SSE2 output. "
+ << "First failed at test case " << first_failure;
+}
+TEST_P(Quantize32Test, EOBCheck) {
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+ int zbin_oq_value = 0;
+ DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff_ptr, 1024);
+ DECLARE_ALIGNED_ARRAY(16, int16_t, zbin_ptr, 2);
+ DECLARE_ALIGNED_ARRAY(16, int16_t, round_ptr, 2);
+ DECLARE_ALIGNED_ARRAY(16, int16_t, quant_ptr, 2);
+ DECLARE_ALIGNED_ARRAY(16, int16_t, quant_shift_ptr, 2);
+ DECLARE_ALIGNED_ARRAY(16, tran_low_t, qcoeff_ptr, 1024);
+ DECLARE_ALIGNED_ARRAY(16, tran_low_t, dqcoeff_ptr, 1024);
+ DECLARE_ALIGNED_ARRAY(16, tran_low_t, ref_qcoeff_ptr, 1024);
+ DECLARE_ALIGNED_ARRAY(16, tran_low_t, ref_dqcoeff_ptr, 1024);
+ DECLARE_ALIGNED_ARRAY(16, int16_t, dequant_ptr, 2);
+ DECLARE_ALIGNED_ARRAY(16, uint16_t, eob_ptr, 1);
+ DECLARE_ALIGNED_ARRAY(16, uint16_t, ref_eob_ptr, 1);
+ int err_count_total = 0;
+ int first_failure = -1;
+ for (int i = 0; i < number_of_iterations; ++i) {
+ int skip_block = i == 0;
+ TX_SIZE sz = TX_32X32;
+ TX_TYPE tx_type = (TX_TYPE)(i % 4);
+ const scan_order *scan_order = &vp9_scan_orders[sz][tx_type];
+ int count = (4 << sz) * (4 << sz); // 1024
+ int err_count = 0;
+ *eob_ptr = rnd.Rand16();
+ *ref_eob_ptr = *eob_ptr;
+ for (int j = 0; j < count; j++) {
+ coeff_ptr[j] = 0;
+ }
+ // Two random entries
+ coeff_ptr[rnd(count)] = rnd.Rand16()&mask_;
+ coeff_ptr[rnd(count)] = rnd.Rand16()&mask_;
+ for (int j = 0; j < 2; j++) {
+ zbin_ptr[j] = rnd.Rand16()&mask_;
+ round_ptr[j] = rnd.Rand16();
+ quant_ptr[j] = rnd.Rand16();
+ quant_shift_ptr[j] = rnd.Rand16();
+ dequant_ptr[j] = rnd.Rand16();
+ }
+
+ ref_quantize_op_(coeff_ptr, count, skip_block, zbin_ptr, round_ptr,
+ quant_ptr, quant_shift_ptr, ref_qcoeff_ptr,
+ ref_dqcoeff_ptr, dequant_ptr, zbin_oq_value,
+ ref_eob_ptr, scan_order->scan, scan_order->iscan);
+ ASM_REGISTER_STATE_CHECK(quantize_op_(coeff_ptr, count, skip_block,
+ zbin_ptr, round_ptr, quant_ptr,
+ quant_shift_ptr, qcoeff_ptr,
+ dqcoeff_ptr, dequant_ptr,
+ zbin_oq_value, eob_ptr,
+ scan_order->scan, scan_order->iscan));
+
+ for (int j = 0; j < sz; ++j) {
+ err_count += (ref_qcoeff_ptr[j] != qcoeff_ptr[j]) |
+ (ref_dqcoeff_ptr[j] != dqcoeff_ptr[j]);
+ }
+ err_count += (*ref_eob_ptr != *eob_ptr);
+ if (err_count && !err_count_total) {
+ first_failure = i;
+ }
+ err_count_total += err_count;
+ }
+ EXPECT_EQ(0, err_count_total)
+ << "Error: Quantization Test, C output doesn't match SSE2 output. "
+ << "First failed at test case " << first_failure;
+}
+using std::tr1::make_tuple;
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_CASE_P(
+ SSE2_C_COMPARE, QuantizeTest,
+ ::testing::Values(
+ make_tuple(&vp9_highbd_quantize_b_sse2,
+ &vp9_highbd_quantize_b_c, VPX_BITS_8),
+ make_tuple(&vp9_highbd_quantize_b_sse2,
+ &vp9_highbd_quantize_b_c, VPX_BITS_10),
+ make_tuple(&vp9_highbd_quantize_b_sse2,
+ &vp9_highbd_quantize_b_c, VPX_BITS_12)));
+INSTANTIATE_TEST_CASE_P(
+ SSE2_C_COMPARE, Quantize32Test,
+ ::testing::Values(
+ make_tuple(&vp9_highbd_quantize_b_32x32_sse2,
+ &vp9_highbd_quantize_b_32x32_c, VPX_BITS_8),
+ make_tuple(&vp9_highbd_quantize_b_32x32_sse2,
+ &vp9_highbd_quantize_b_32x32_c, VPX_BITS_10),
+ make_tuple(&vp9_highbd_quantize_b_32x32_sse2,
+ &vp9_highbd_quantize_b_32x32_c, VPX_BITS_12)));
+#endif // HAVE_SSE2
+#endif // CONFIG_VP9_HIGHBITDEPTH
+} // namespace
--- /dev/null
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <emmintrin.h> // SSE2
+
+#include "vp9/common/vp9_common.h"
+
+#if CONFIG_VP9_HIGHBITDEPTH
+// from vp9_idct.h: typedef int32_t tran_low_t;
+void vp9_highbd_quantize_b_sse2(const tran_low_t *coeff_ptr, intptr_t count,
+ int skip_block,
+ const int16_t *zbin_ptr, const int16_t *round_ptr,
+ const int16_t *quant_ptr, const int16_t *quant_shift_ptr,
+ tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+ const int16_t *dequant_ptr,
+ int zbin_oq_value, uint16_t *eob_ptr,
+ const int16_t *scan, const int16_t *iscan) {
+ int i, j, non_zero_regs = (int)count / 4, eob_i = -1;
+ __m128i zbins[2];
+ __m128i nzbins[2];
+
+ zbins[0] = _mm_set_epi32((int)(zbin_ptr[1] + zbin_oq_value),
+ (int)(zbin_ptr[1] + zbin_oq_value),
+ (int)(zbin_ptr[1] + zbin_oq_value),
+ (int)(zbin_ptr[0] + zbin_oq_value));
+ zbins[1] = _mm_set1_epi32((int)(zbin_ptr[1] + zbin_oq_value));
+
+ nzbins[0] = _mm_setzero_si128();
+ nzbins[1] = _mm_setzero_si128();
+ nzbins[0] = _mm_sub_epi32(nzbins[0], zbins[0]);
+ nzbins[1] = _mm_sub_epi32(nzbins[1], zbins[1]);
+
+ (void)scan;
+
+ vpx_memset(qcoeff_ptr, 0, count * sizeof(*qcoeff_ptr));
+ vpx_memset(dqcoeff_ptr, 0, count * sizeof(*dqcoeff_ptr));
+
+ if (!skip_block) {
+ // Pre-scan pass
+ for (i = ((int)count / 4) - 1; i >= 0; i--) {
+ __m128i coeffs, cmp1, cmp2;
+ int test;
+ coeffs = _mm_load_si128((const __m128i *)(coeff_ptr + i * 4));
+ cmp1 = _mm_cmplt_epi32(coeffs, zbins[i != 0]);
+ cmp2 = _mm_cmpgt_epi32(coeffs, nzbins[i != 0]);
+ cmp1 = _mm_and_si128(cmp1, cmp2);
+ test = _mm_movemask_epi8(cmp1);
+ if (test == 0xffff)
+ non_zero_regs--;
+ else
+ break;
+ }
+
+ // Quantization pass:
+ for (i = 0; i < non_zero_regs; i++) {
+ __m128i coeffs, coeffs_sign, tmp1, tmp2;
+ int test;
+ int abs_coeff[4];
+ int coeff_sign[4];
+
+ coeffs = _mm_load_si128((const __m128i *)(coeff_ptr + i * 4));
+ coeffs_sign = _mm_srai_epi32(coeffs, 31);
+ coeffs = _mm_sub_epi32(
+ _mm_xor_si128(coeffs, coeffs_sign), coeffs_sign);
+ tmp1 = _mm_cmpgt_epi32(coeffs, zbins[i != 0]);
+ tmp2 = _mm_cmpeq_epi32(coeffs, zbins[i != 0]);
+ tmp1 = _mm_or_si128(tmp1, tmp2);
+ test = _mm_movemask_epi8(tmp1);
+ _mm_storeu_si128((__m128i*)abs_coeff, coeffs);
+ _mm_storeu_si128((__m128i*)coeff_sign, coeffs_sign);
+
+ for (j = 0; j < 4; j++) {
+ if (test & (1 << (4*j))) {
+ int k = 4 * i + j;
+ int64_t tmp = clamp(abs_coeff[j] + round_ptr[k != 0],
+ INT32_MIN, INT32_MAX);
+ tmp = ((((tmp * quant_ptr[k != 0]) >> 16) + tmp) *
+ quant_shift_ptr[k != 0]) >> 16; // quantization
+ qcoeff_ptr[k] = (tmp ^ coeff_sign[j]) - coeff_sign[j];
+ dqcoeff_ptr[k] = qcoeff_ptr[k] * dequant_ptr[k != 0];
+ if (tmp)
+ eob_i = iscan[k] > eob_i ? iscan[k] : eob_i;
+ }
+ }
+ }
+ }
+ *eob_ptr = eob_i + 1;
+}
+
+
+void vp9_highbd_quantize_b_32x32_sse2(const tran_low_t *coeff_ptr,
+ intptr_t n_coeffs, int skip_block,
+ const int16_t *zbin_ptr,
+ const int16_t *round_ptr,
+ const int16_t *quant_ptr,
+ const int16_t *quant_shift_ptr,
+ tran_low_t *qcoeff_ptr,
+ tran_low_t *dqcoeff_ptr,
+ const int16_t *dequant_ptr,
+ int zbin_oq_value, uint16_t *eob_ptr,
+ const int16_t *scan, const int16_t *iscan) {
+ __m128i zbins[2];
+ __m128i nzbins[2];
+ int idx = 0;
+ int idx_arr[1024];
+ int i, eob = -1;
+ const int zbin0_tmp = ROUND_POWER_OF_TWO(zbin_ptr[0] + zbin_oq_value, 1);
+ const int zbin1_tmp = ROUND_POWER_OF_TWO(zbin_ptr[1] + zbin_oq_value, 1);
+ (void)scan;
+ zbins[0] = _mm_set_epi32((zbin1_tmp + zbin_oq_value),
+ (zbin1_tmp + zbin_oq_value),
+ (zbin1_tmp + zbin_oq_value),
+ (zbin0_tmp + zbin_oq_value));
+ zbins[1] = _mm_set1_epi32((zbin1_tmp + zbin_oq_value));
+
+ nzbins[0] = _mm_setzero_si128();
+ nzbins[1] = _mm_setzero_si128();
+ nzbins[0] = _mm_sub_epi32(nzbins[0], zbins[0]);
+ nzbins[1] = _mm_sub_epi32(nzbins[1], zbins[1]);
+
+ vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+ vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+
+ if (!skip_block) {
+ // Pre-scan pass
+ for (i = 0; i < n_coeffs / 4; i++) {
+ __m128i coeffs, cmp1, cmp2;
+ int test;
+ coeffs = _mm_load_si128((const __m128i *)(coeff_ptr + i * 4));
+ cmp1 = _mm_cmplt_epi32(coeffs, zbins[i != 0]);
+ cmp2 = _mm_cmpgt_epi32(coeffs, nzbins[i != 0]);
+ cmp1 = _mm_and_si128(cmp1, cmp2);
+ test = _mm_movemask_epi8(cmp1);
+ if (!(test & 0xf))
+ idx_arr[idx++] = i*4;
+ if (!(test & 0xf0))
+ idx_arr[idx++] = i*4 + 1;
+ if (!(test & 0xf00))
+ idx_arr[idx++] = i*4 + 2;
+ if (!(test & 0xf000))
+ idx_arr[idx++] = i*4 + 3;
+ }
+
+ // Quantization pass: only process the coefficients selected in
+ // pre-scan pass. Note: idx can be zero.
+ for (i = 0; i < idx; i++) {
+ const int rc = idx_arr[i];
+ const int coeff = coeff_ptr[rc];
+ const int coeff_sign = (coeff >> 31);
+ int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+ int64_t tmp = clamp(abs_coeff +
+ ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1),
+ INT32_MIN, INT32_MAX);
+ tmp = ((((tmp * quant_ptr[rc != 0]) >> 16) + tmp) *
+ quant_shift_ptr[rc != 0]) >> 15;
+
+ qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
+ dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2;
+
+ if (tmp)
+ eob = iscan[idx_arr[i]] > eob ? iscan[idx_arr[i]] : eob;
+ }
+ }
+ *eob_ptr = eob + 1;
+}
+#endif