From 005fc6970b4c1997d40b98d2ac01d34f39310606 Mon Sep 17 00:00:00 2001 From: Dmitry Kovalev Date: Thu, 6 Feb 2014 11:54:15 -0800 Subject: [PATCH] Finally removing "short" from transform names. Change-Id: I5259b68dc1bcceb153e3ffe638a79a59a3019e9d --- test/dct16x16_test.cc | 18 ++-- test/fdct4x4_test.cc | 18 ++-- test/fdct8x8_test.cc | 18 ++-- vp9/common/vp9_rtcd_defs.sh | 12 +-- vp9/encoder/vp9_dct.c | 179 +++++++++++++++------------------ vp9/encoder/vp9_dct.h | 32 ------ vp9/encoder/vp9_encodemb.c | 7 +- vp9/encoder/vp9_rdopt.c | 2 +- vp9/encoder/x86/vp9_dct_avx2.c | 77 ++++++++------ vp9/encoder/x86/vp9_dct_sse2.c | 83 ++++++++------- vp9/vp9cx.mk | 1 - 11 files changed, 212 insertions(+), 235 deletions(-) delete mode 100644 vp9/encoder/vp9_dct.h diff --git a/test/dct16x16_test.cc b/test/dct16x16_test.cc index ce0431860..8d115fad3 100644 --- a/test/dct16x16_test.cc +++ b/test/dct16x16_test.cc @@ -273,7 +273,7 @@ void fdct16x16_ref(const int16_t *in, int16_t *out, int stride, int tx_type) { } void fht16x16_ref(const int16_t *in, int16_t *out, int stride, int tx_type) { - vp9_short_fht16x16_c(in, out, stride, tx_type); + vp9_fht16x16_c(in, out, stride, tx_type); } class Trans16x16TestBase { @@ -507,10 +507,10 @@ INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P( C, Trans16x16HT, ::testing::Values( - make_tuple(&vp9_short_fht16x16_c, &vp9_iht16x16_256_add_c, 0), - make_tuple(&vp9_short_fht16x16_c, &vp9_iht16x16_256_add_c, 1), - make_tuple(&vp9_short_fht16x16_c, &vp9_iht16x16_256_add_c, 2), - make_tuple(&vp9_short_fht16x16_c, &vp9_iht16x16_256_add_c, 3))); + make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 0), + make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 1), + make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 2), + make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 3))); #if HAVE_SSE2 INSTANTIATE_TEST_CASE_P( @@ -521,9 +521,9 @@ INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P( SSE2, Trans16x16HT, ::testing::Values( - make_tuple(&vp9_short_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 0), - make_tuple(&vp9_short_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 1), - make_tuple(&vp9_short_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 2), - make_tuple(&vp9_short_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 3))); + make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 0), + make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 1), + make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 2), + make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 3))); #endif } // namespace diff --git a/test/fdct4x4_test.cc b/test/fdct4x4_test.cc index 5db5f5cae..dc6668759 100644 --- a/test/fdct4x4_test.cc +++ b/test/fdct4x4_test.cc @@ -45,7 +45,7 @@ void fdct4x4_ref(const int16_t *in, int16_t *out, int stride, int tx_type) { } void fht4x4_ref(const int16_t *in, int16_t *out, int stride, int tx_type) { - vp9_short_fht4x4_c(in, out, stride, tx_type); + vp9_fht4x4_c(in, out, stride, tx_type); } class Trans4x4TestBase { @@ -281,10 +281,10 @@ INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P( C, Trans4x4HT, ::testing::Values( - make_tuple(&vp9_short_fht4x4_c, &vp9_iht4x4_16_add_c, 0), - make_tuple(&vp9_short_fht4x4_c, &vp9_iht4x4_16_add_c, 1), - make_tuple(&vp9_short_fht4x4_c, &vp9_iht4x4_16_add_c, 2), - make_tuple(&vp9_short_fht4x4_c, &vp9_iht4x4_16_add_c, 3))); + make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 0), + make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 1), + make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 2), + make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 3))); #if HAVE_SSE2 INSTANTIATE_TEST_CASE_P( @@ -295,10 +295,10 @@ INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P( SSE2, Trans4x4HT, ::testing::Values( - make_tuple(&vp9_short_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 0), - make_tuple(&vp9_short_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 1), - make_tuple(&vp9_short_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 2), - make_tuple(&vp9_short_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 3))); + make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 0), + make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 1), + make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 2), + make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 3))); #endif } // namespace diff --git a/test/fdct8x8_test.cc b/test/fdct8x8_test.cc index beef98055..98aabe6a2 100644 --- a/test/fdct8x8_test.cc +++ b/test/fdct8x8_test.cc @@ -44,7 +44,7 @@ void fdct8x8_ref(const int16_t *in, int16_t *out, int stride, int tx_type) { } void fht8x8_ref(const int16_t *in, int16_t *out, int stride, int tx_type) { - vp9_short_fht8x8_c(in, out, stride, tx_type); + vp9_fht8x8_c(in, out, stride, tx_type); } class FwdTrans8x8TestBase { @@ -308,10 +308,10 @@ INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P( C, FwdTrans8x8HT, ::testing::Values( - make_tuple(&vp9_short_fht8x8_c, &vp9_iht8x8_64_add_c, 0), - make_tuple(&vp9_short_fht8x8_c, &vp9_iht8x8_64_add_c, 1), - make_tuple(&vp9_short_fht8x8_c, &vp9_iht8x8_64_add_c, 2), - make_tuple(&vp9_short_fht8x8_c, &vp9_iht8x8_64_add_c, 3))); + make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 0), + make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 1), + make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 2), + make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 3))); #if HAVE_SSE2 INSTANTIATE_TEST_CASE_P( @@ -321,9 +321,9 @@ INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P( SSE2, FwdTrans8x8HT, ::testing::Values( - make_tuple(&vp9_short_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 0), - make_tuple(&vp9_short_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 1), - make_tuple(&vp9_short_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 2), - make_tuple(&vp9_short_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 3))); + make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 0), + make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 1), + make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 2), + make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 3))); #endif } // namespace diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh index 04a40bd58..7bdd11eb0 100644 --- a/vp9/common/vp9_rtcd_defs.sh +++ b/vp9/common/vp9_rtcd_defs.sh @@ -707,14 +707,14 @@ if [ "$CONFIG_INTERNAL_STATS" = "yes" ]; then fi # fdct functions -prototype void vp9_short_fht4x4 "const int16_t *input, int16_t *output, int stride, int tx_type" -specialize vp9_short_fht4x4 sse2 avx2 +prototype void vp9_fht4x4 "const int16_t *input, int16_t *output, int stride, int tx_type" +specialize vp9_fht4x4 sse2 avx2 -prototype void vp9_short_fht8x8 "const int16_t *input, int16_t *output, int stride, int tx_type" -specialize vp9_short_fht8x8 sse2 avx2 +prototype void vp9_fht8x8 "const int16_t *input, int16_t *output, int stride, int tx_type" +specialize vp9_fht8x8 sse2 avx2 -prototype void vp9_short_fht16x16 "const int16_t *input, int16_t *output, int stride, int tx_type" -specialize vp9_short_fht16x16 sse2 avx2 +prototype void vp9_fht16x16 "const int16_t *input, int16_t *output, int stride, int tx_type" +specialize vp9_fht16x16 sse2 avx2 prototype void vp9_fwht4x4 "const int16_t *input, int16_t *output, int stride" specialize vp9_fwht4x4 diff --git a/vp9/encoder/vp9_dct.c b/vp9/encoder/vp9_dct.c index a840b480a..a9d168cc8 100644 --- a/vp9/encoder/vp9_dct.c +++ b/vp9/encoder/vp9_dct.c @@ -18,8 +18,6 @@ #include "vp9/common/vp9_idct.h" #include "vp9/common/vp9_systemdependent.h" -#include "vp9/encoder/vp9_dct.h" - static INLINE int fdct_round_shift(int input) { int rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS); assert(INT16_MIN <= rv && rv <= INT16_MAX); @@ -157,32 +155,36 @@ static const transform_2d FHT_4[] = { { fadst4, fadst4 } // ADST_ADST = 3 }; -void vp9_short_fht4x4_c(const int16_t *input, int16_t *output, - int stride, int tx_type) { - int16_t out[4 * 4]; - int16_t *outptr = &out[0]; - int i, j; - int16_t temp_in[4], temp_out[4]; - const transform_2d ht = FHT_4[tx_type]; +void vp9_fht4x4_c(const int16_t *input, int16_t *output, + int stride, int tx_type) { + if (tx_type == DCT_DCT) { + vp9_fdct4x4_c(input, output, stride); + } else { + int16_t out[4 * 4]; + int16_t *outptr = &out[0]; + int i, j; + int16_t temp_in[4], temp_out[4]; + const transform_2d ht = FHT_4[tx_type]; - // Columns - for (i = 0; i < 4; ++i) { - for (j = 0; j < 4; ++j) - temp_in[j] = input[j * stride + i] * 16; - if (i == 0 && temp_in[0]) - temp_in[0] += 1; - ht.cols(temp_in, temp_out); - for (j = 0; j < 4; ++j) - outptr[j * 4 + i] = temp_out[j]; - } + // Columns + for (i = 0; i < 4; ++i) { + for (j = 0; j < 4; ++j) + temp_in[j] = input[j * stride + i] * 16; + if (i == 0 && temp_in[0]) + temp_in[0] += 1; + ht.cols(temp_in, temp_out); + for (j = 0; j < 4; ++j) + outptr[j * 4 + i] = temp_out[j]; + } - // Rows - for (i = 0; i < 4; ++i) { - for (j = 0; j < 4; ++j) - temp_in[j] = out[j + i * 4]; - ht.rows(temp_in, temp_out); - for (j = 0; j < 4; ++j) - output[j + i * 4] = (temp_out[j] + 1) >> 2; + // Rows + for (i = 0; i < 4; ++i) { + for (j = 0; j < 4; ++j) + temp_in[j] = out[j + i * 4]; + ht.rows(temp_in, temp_out); + for (j = 0; j < 4; ++j) + output[j + i * 4] = (temp_out[j] + 1) >> 2; + } } } @@ -565,30 +567,34 @@ static const transform_2d FHT_8[] = { { fadst8, fadst8 } // ADST_ADST = 3 }; -void vp9_short_fht8x8_c(const int16_t *input, int16_t *output, - int stride, int tx_type) { - int16_t out[64]; - int16_t *outptr = &out[0]; - int i, j; - int16_t temp_in[8], temp_out[8]; - const transform_2d ht = FHT_8[tx_type]; - - // Columns - for (i = 0; i < 8; ++i) { - for (j = 0; j < 8; ++j) - temp_in[j] = input[j * stride + i] * 4; - ht.cols(temp_in, temp_out); - for (j = 0; j < 8; ++j) - outptr[j * 8 + i] = temp_out[j]; - } +void vp9_fht8x8_c(const int16_t *input, int16_t *output, + int stride, int tx_type) { + if (tx_type == DCT_DCT) { + vp9_fdct8x8_c(input, output, stride); + } else { + int16_t out[64]; + int16_t *outptr = &out[0]; + int i, j; + int16_t temp_in[8], temp_out[8]; + const transform_2d ht = FHT_8[tx_type]; + + // Columns + for (i = 0; i < 8; ++i) { + for (j = 0; j < 8; ++j) + temp_in[j] = input[j * stride + i] * 4; + ht.cols(temp_in, temp_out); + for (j = 0; j < 8; ++j) + outptr[j * 8 + i] = temp_out[j]; + } - // Rows - for (i = 0; i < 8; ++i) { - for (j = 0; j < 8; ++j) - temp_in[j] = out[j + i * 8]; - ht.rows(temp_in, temp_out); - for (j = 0; j < 8; ++j) - output[j + i * 8] = (temp_out[j] + (temp_out[j] < 0)) >> 1; + // Rows + for (i = 0; i < 8; ++i) { + for (j = 0; j < 8; ++j) + temp_in[j] = out[j + i * 8]; + ht.rows(temp_in, temp_out); + for (j = 0; j < 8; ++j) + output[j + i * 8] = (temp_out[j] + (temp_out[j] < 0)) >> 1; + } } } @@ -958,31 +964,34 @@ static const transform_2d FHT_16[] = { { fadst16, fadst16 } // ADST_ADST = 3 }; -void vp9_short_fht16x16_c(const int16_t *input, int16_t *output, - int stride, int tx_type) { - int16_t out[256]; - int16_t *outptr = &out[0]; - int i, j; - int16_t temp_in[16], temp_out[16]; - const transform_2d ht = FHT_16[tx_type]; - - // Columns - for (i = 0; i < 16; ++i) { - for (j = 0; j < 16; ++j) - temp_in[j] = input[j * stride + i] * 4; - ht.cols(temp_in, temp_out); - for (j = 0; j < 16; ++j) - outptr[j * 16 + i] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2; -// outptr[j * 16 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2; - } +void vp9_fht16x16_c(const int16_t *input, int16_t *output, + int stride, int tx_type) { + if (tx_type == DCT_DCT) { + vp9_fdct16x16_c(input, output, stride); + } else { + int16_t out[256]; + int16_t *outptr = &out[0]; + int i, j; + int16_t temp_in[16], temp_out[16]; + const transform_2d ht = FHT_16[tx_type]; + + // Columns + for (i = 0; i < 16; ++i) { + for (j = 0; j < 16; ++j) + temp_in[j] = input[j * stride + i] * 4; + ht.cols(temp_in, temp_out); + for (j = 0; j < 16; ++j) + outptr[j * 16 + i] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2; + } - // Rows - for (i = 0; i < 16; ++i) { - for (j = 0; j < 16; ++j) - temp_in[j] = out[j + i * 16]; - ht.rows(temp_in, temp_out); - for (j = 0; j < 16; ++j) - output[j + i * 16] = temp_out[j]; + // Rows + for (i = 0; i < 16; ++i) { + for (j = 0; j < 16; ++j) + temp_in[j] = out[j + i * 16]; + ht.rows(temp_in, temp_out); + for (j = 0; j < 16; ++j) + output[j + i * 16] = temp_out[j]; + } } } @@ -1375,27 +1384,3 @@ void vp9_fdct32x32_rd_c(const int16_t *input, int16_t *out, int stride) { out[j + i * 32] = temp_out[j]; } } - -void vp9_fht4x4(TX_TYPE tx_type, const int16_t *input, int16_t *output, - int stride) { - if (tx_type == DCT_DCT) - vp9_fdct4x4(input, output, stride); - else - vp9_short_fht4x4(input, output, stride, tx_type); -} - -void vp9_fht8x8(TX_TYPE tx_type, const int16_t *input, int16_t *output, - int stride) { - if (tx_type == DCT_DCT) - vp9_fdct8x8(input, output, stride); - else - vp9_short_fht8x8(input, output, stride, tx_type); -} - -void vp9_fht16x16(TX_TYPE tx_type, const int16_t *input, int16_t *output, - int stride) { - if (tx_type == DCT_DCT) - vp9_fdct16x16(input, output, stride); - else - vp9_short_fht16x16(input, output, stride, tx_type); -} diff --git a/vp9/encoder/vp9_dct.h b/vp9/encoder/vp9_dct.h deleted file mode 100644 index cf5f001a9..000000000 --- a/vp9/encoder/vp9_dct.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright (c) 2013 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP9_ENCODER_VP9_DCT_H_ -#define VP9_ENCODER_VP9_DCT_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -void vp9_fht4x4(TX_TYPE tx_type, const int16_t *input, int16_t *output, - int stride); - -void vp9_fht8x8(TX_TYPE tx_type, const int16_t *input, int16_t *output, - int stride); - -void vp9_fht16x16(TX_TYPE tx_type, const int16_t *input, int16_t *output, - int stride); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_ENCODER_VP9_DCT_H_ diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index 376a899e0..c7507c13b 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -19,7 +19,6 @@ #include "vp9/common/vp9_reconintra.h" #include "vp9/common/vp9_systemdependent.h" -#include "vp9/encoder/vp9_dct.h" #include "vp9/encoder/vp9_encodemb.h" #include "vp9/encoder/vp9_quantize.h" #include "vp9/encoder/vp9_rdopt.h" @@ -571,7 +570,7 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, if (!x->skip_recode) { vp9_subtract_block(16, 16, src_diff, diff_stride, src, p->src.stride, dst, pd->dst.stride); - vp9_fht16x16(tx_type, src_diff, coeff, diff_stride); + vp9_fht16x16(src_diff, coeff, diff_stride, tx_type); vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, pd->dequant, p->zbin_extra, eob, scan_order->scan, @@ -591,7 +590,7 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, if (!x->skip_recode) { vp9_subtract_block(8, 8, src_diff, diff_stride, src, p->src.stride, dst, pd->dst.stride); - vp9_fht8x8(tx_type, src_diff, coeff, diff_stride); + vp9_fht8x8(src_diff, coeff, diff_stride, tx_type); vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, pd->dequant, p->zbin_extra, eob, scan_order->scan, @@ -617,7 +616,7 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, vp9_subtract_block(4, 4, src_diff, diff_stride, src, p->src.stride, dst, pd->dst.stride); if (tx_type != DCT_DCT) - vp9_short_fht4x4(src_diff, coeff, diff_stride, tx_type); + vp9_fht4x4(src_diff, coeff, diff_stride, tx_type); else x->fwd_txm4x4(src_diff, coeff, diff_stride); vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant, diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index e5230feb4..7b17b8582 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -1064,7 +1064,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, so = &vp9_scan_orders[TX_4X4][tx_type]; if (tx_type != DCT_DCT) - vp9_short_fht4x4(src_diff, coeff, 8, tx_type); + vp9_fht4x4(src_diff, coeff, 8, tx_type); else x->fwd_txm4x4(src_diff, coeff, 8); diff --git a/vp9/encoder/x86/vp9_dct_avx2.c b/vp9/encoder/x86/vp9_dct_avx2.c index ea031fb07..2b82d9750 100644 --- a/vp9/encoder/x86/vp9_dct_avx2.c +++ b/vp9/encoder/x86/vp9_dct_avx2.c @@ -244,32 +244,36 @@ void fadst4_avx2(__m128i *in) { transpose_4x4_avx2(in); } -void vp9_short_fht4x4_avx2(const int16_t *input, int16_t *output, - int stride, int tx_type) { +void vp9_fht4x4_avx2(const int16_t *input, int16_t *output, + int stride, int tx_type) { __m128i in[4]; - load_buffer_4x4_avx2(input, in, stride); + switch (tx_type) { - case 0: // DCT_DCT - fdct4_avx2(in); - fdct4_avx2(in); + case DCT_DCT: + vp9_fdct4x4_avx2(input, output, stride); break; - case 1: // ADST_DCT + case ADST_DCT: + load_buffer_4x4_avx2(input, in, stride); fadst4_avx2(in); fdct4_avx2(in); + write_buffer_4x4_avx2(output, in); break; - case 2: // DCT_ADST + case DCT_ADST: + load_buffer_4x4_avx2(input, in, stride); fdct4_avx2(in); fadst4_avx2(in); + write_buffer_4x4_avx2(output, in); break; - case 3: // ADST_ADST + case ADST_ADST: + load_buffer_4x4_avx2(input, in, stride); fadst4_avx2(in); fadst4_avx2(in); + write_buffer_4x4_avx2(output, in); break; default: assert(0); break; } - write_buffer_4x4_avx2(output, in); } void vp9_fdct8x8_avx2(const int16_t *input, int16_t *output, int stride) { @@ -1028,33 +1032,39 @@ void fadst8_avx2(__m128i *in) { array_transpose_8x8_avx2(in, in); } -void vp9_short_fht8x8_avx2(const int16_t *input, int16_t *output, - int stride, int tx_type) { +void vp9_fht8x8_avx2(const int16_t *input, int16_t *output, + int stride, int tx_type) { __m128i in[8]; - load_buffer_8x8_avx2(input, in, stride); + switch (tx_type) { - case 0: // DCT_DCT - fdct8_avx2(in); - fdct8_avx2(in); + case DCT_DCT: + vp9_fdct8x8_avx2(input, output, stride); break; - case 1: // ADST_DCT + case ADST_DCT: + load_buffer_8x8_avx2(input, in, stride); fadst8_avx2(in); fdct8_avx2(in); + right_shift_8x8_avx2(in, 1); + write_buffer_8x8_avx2(output, in, 8); break; - case 2: // DCT_ADST + case DCT_ADST: + load_buffer_8x8_avx2(input, in, stride); fdct8_avx2(in); fadst8_avx2(in); + right_shift_8x8_avx2(in, 1); + write_buffer_8x8_avx2(output, in, 8); break; - case 3: // ADST_ADST + case ADST_ADST: + load_buffer_8x8_avx2(input, in, stride); fadst8_avx2(in); fadst8_avx2(in); + right_shift_8x8_avx2(in, 1); + write_buffer_8x8_avx2(output, in, 8); break; default: assert(0); break; } - right_shift_8x8_avx2(in, 1); - write_buffer_8x8_avx2(output, in, 8); } void vp9_fdct16x16_avx2(const int16_t *input, int16_t *output, int stride) { @@ -2534,36 +2544,39 @@ void fadst16_avx2(__m128i *in0, __m128i *in1) { array_transpose_16x16_avx2(in0, in1); } -void vp9_short_fht16x16_avx2(const int16_t *input, int16_t *output, - int stride, int tx_type) { +void vp9_fht16x16_avx2(const int16_t *input, int16_t *output, + int stride, int tx_type) { __m128i in0[16], in1[16]; - load_buffer_16x16_avx2(input, in0, in1, stride); + switch (tx_type) { - case 0: // DCT_DCT - fdct16_avx2(in0, in1); - right_shift_16x16_avx2(in0, in1); - fdct16_avx2(in0, in1); + case DCT_DCT: + vp9_fdct16x16_avx2(input, output, stride); break; - case 1: // ADST_DCT + case ADST_DCT: + load_buffer_16x16_avx2(input, in0, in1, stride); fadst16_avx2(in0, in1); right_shift_16x16_avx2(in0, in1); fdct16_avx2(in0, in1); + write_buffer_16x16_avx2(output, in0, in1, 16); break; - case 2: // DCT_ADST + case DCT_ADST: + load_buffer_16x16_avx2(input, in0, in1, stride); fdct16_avx2(in0, in1); right_shift_16x16_avx2(in0, in1); fadst16_avx2(in0, in1); + write_buffer_16x16_avx2(output, in0, in1, 16); break; - case 3: // ADST_ADST + case ADST_ADST: + load_buffer_16x16_avx2(input, in0, in1, stride); fadst16_avx2(in0, in1); right_shift_16x16_avx2(in0, in1); fadst16_avx2(in0, in1); + write_buffer_16x16_avx2(output, in0, in1, 16); break; default: assert(0); break; } - write_buffer_16x16_avx2(output, in0, in1, 16); } #define FDCT32x32_2D_AVX2 vp9_fdct32x32_rd_avx2 diff --git a/vp9/encoder/x86/vp9_dct_sse2.c b/vp9/encoder/x86/vp9_dct_sse2.c index c876cc273..852cf8667 100644 --- a/vp9/encoder/x86/vp9_dct_sse2.c +++ b/vp9/encoder/x86/vp9_dct_sse2.c @@ -242,32 +242,36 @@ void fadst4_sse2(__m128i *in) { transpose_4x4(in); } -void vp9_short_fht4x4_sse2(const int16_t *input, int16_t *output, - int stride, int tx_type) { +void vp9_fht4x4_sse2(const int16_t *input, int16_t *output, + int stride, int tx_type) { __m128i in[4]; - load_buffer_4x4(input, in, stride); + switch (tx_type) { - case 0: // DCT_DCT - fdct4_sse2(in); - fdct4_sse2(in); + case DCT_DCT: + vp9_fdct4x4_sse2(input, output, stride); break; - case 1: // ADST_DCT + case ADST_DCT: + load_buffer_4x4(input, in, stride); fadst4_sse2(in); fdct4_sse2(in); + write_buffer_4x4(output, in); break; - case 2: // DCT_ADST + case DCT_ADST: + load_buffer_4x4(input, in, stride); fdct4_sse2(in); fadst4_sse2(in); + write_buffer_4x4(output, in); break; - case 3: // ADST_ADST + case ADST_ADST: + load_buffer_4x4(input, in, stride); fadst4_sse2(in); fadst4_sse2(in); + write_buffer_4x4(output, in); break; - default: - assert(0); - break; + default: + assert(0); + break; } - write_buffer_4x4(output, in); } void vp9_fdct8x8_sse2(const int16_t *input, int16_t *output, int stride) { @@ -1026,33 +1030,39 @@ void fadst8_sse2(__m128i *in) { array_transpose_8x8(in, in); } -void vp9_short_fht8x8_sse2(const int16_t *input, int16_t *output, - int stride, int tx_type) { +void vp9_fht8x8_sse2(const int16_t *input, int16_t *output, + int stride, int tx_type) { __m128i in[8]; - load_buffer_8x8(input, in, stride); + switch (tx_type) { - case 0: // DCT_DCT - fdct8_sse2(in); - fdct8_sse2(in); + case DCT_DCT: + vp9_fdct8x8_sse2(input, output, stride); break; - case 1: // ADST_DCT + case ADST_DCT: + load_buffer_8x8(input, in, stride); fadst8_sse2(in); fdct8_sse2(in); + right_shift_8x8(in, 1); + write_buffer_8x8(output, in, 8); break; - case 2: // DCT_ADST + case DCT_ADST: + load_buffer_8x8(input, in, stride); fdct8_sse2(in); fadst8_sse2(in); + right_shift_8x8(in, 1); + write_buffer_8x8(output, in, 8); break; - case 3: // ADST_ADST + case ADST_ADST: + load_buffer_8x8(input, in, stride); fadst8_sse2(in); fadst8_sse2(in); + right_shift_8x8(in, 1); + write_buffer_8x8(output, in, 8); break; default: assert(0); break; } - right_shift_8x8(in, 1); - write_buffer_8x8(output, in, 8); } void vp9_fdct16x16_sse2(const int16_t *input, int16_t *output, int stride) { @@ -2532,36 +2542,39 @@ void fadst16_sse2(__m128i *in0, __m128i *in1) { array_transpose_16x16(in0, in1); } -void vp9_short_fht16x16_sse2(const int16_t *input, int16_t *output, - int stride, int tx_type) { +void vp9_fht16x16_sse2(const int16_t *input, int16_t *output, + int stride, int tx_type) { __m128i in0[16], in1[16]; - load_buffer_16x16(input, in0, in1, stride); + switch (tx_type) { - case 0: // DCT_DCT - fdct16_sse2(in0, in1); - right_shift_16x16(in0, in1); - fdct16_sse2(in0, in1); + case DCT_DCT: + vp9_fdct16x16_sse2(input, output, stride); break; - case 1: // ADST_DCT + case ADST_DCT: + load_buffer_16x16(input, in0, in1, stride); fadst16_sse2(in0, in1); right_shift_16x16(in0, in1); fdct16_sse2(in0, in1); + write_buffer_16x16(output, in0, in1, 16); break; - case 2: // DCT_ADST + case DCT_ADST: + load_buffer_16x16(input, in0, in1, stride); fdct16_sse2(in0, in1); right_shift_16x16(in0, in1); fadst16_sse2(in0, in1); + write_buffer_16x16(output, in0, in1, 16); break; - case 3: // ADST_ADST + case ADST_ADST: + load_buffer_16x16(input, in0, in1, stride); fadst16_sse2(in0, in1); right_shift_16x16(in0, in1); fadst16_sse2(in0, in1); + write_buffer_16x16(output, in0, in1, 16); break; default: assert(0); break; } - write_buffer_16x16(output, in0, in1, 16); } #define FDCT32x32_2D vp9_fdct32x32_rd_sse2 diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk index c225f54b8..64f9f094c 100644 --- a/vp9/vp9cx.mk +++ b/vp9/vp9cx.mk @@ -19,7 +19,6 @@ VP9_CX_SRCS-yes += vp9_cx_iface.c VP9_CX_SRCS-yes += encoder/vp9_bitstream.c VP9_CX_SRCS-yes += encoder/vp9_dct.c -VP9_CX_SRCS-yes += encoder/vp9_dct.h VP9_CX_SRCS-yes += encoder/vp9_encodeframe.c VP9_CX_SRCS-yes += encoder/vp9_encodeframe.h VP9_CX_SRCS-yes += encoder/vp9_encodemb.c -- 2.40.0