From cf76ee2cb79ed5c53f754819f94ed9a9dacbe825 Mon Sep 17 00:00:00 2001 From: Linfeng Zhang Date: Wed, 11 Jan 2017 12:22:37 -0800 Subject: [PATCH] Add vpx_idct16x16_38_add_c() When eob is less than or equal to 38 for 16x16 idct, call this function. Change-Id: Ief6f3fb16a49ace3c92cebf4e220bf5bf52a6087 --- test/partial_idct_test.cc | 2 ++ vp9/common/vp9_idct.c | 2 ++ vpx_dsp/inv_txfm.c | 26 ++++++++++++++++++++++++++ vpx_dsp/vpx_dsp_rtcd_defs.pl | 16 ++++++++++++++++ 4 files changed, 46 insertions(+) diff --git a/test/partial_idct_test.cc b/test/partial_idct_test.cc index 98b283af3..38f58557e 100644 --- a/test/partial_idct_test.cc +++ b/test/partial_idct_test.cc @@ -426,6 +426,8 @@ const PartialInvTxfmParam c_partial_idct_tests[] = { &wrapper, TX_32X32, 1, 8, 1), make_tuple(&vpx_fdct16x16_c, &wrapper, &wrapper, TX_16X16, 256, 8, 1), + make_tuple(&vpx_fdct16x16_c, &wrapper, + &wrapper, TX_16X16, 38, 8, 1), make_tuple(&vpx_fdct16x16_c, &wrapper, &wrapper, TX_16X16, 10, 8, 1), make_tuple(&vpx_fdct16x16_c, &wrapper, diff --git a/vp9/common/vp9_idct.c b/vp9/common/vp9_idct.c index e3a088e28..9340d5d21 100644 --- a/vp9/common/vp9_idct.c +++ b/vp9/common/vp9_idct.c @@ -156,6 +156,8 @@ void vp9_idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride, vpx_idct16x16_1_add(input, dest, stride); else if (eob <= 10) vpx_idct16x16_10_add(input, dest, stride); + else if (eob <= 38) + vpx_idct16x16_38_add(input, dest, stride); else vpx_idct16x16_256_add(input, dest, stride); } diff --git a/vpx_dsp/inv_txfm.c b/vpx_dsp/inv_txfm.c index 0f9aff189..5cfc8e0bb 100644 --- a/vpx_dsp/inv_txfm.c +++ b/vpx_dsp/inv_txfm.c @@ -767,6 +767,32 @@ void vpx_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest, } } +void vpx_idct16x16_38_add_c(const tran_low_t *input, uint8_t *dest, + int stride) { + int i, j; + tran_low_t out[16 * 16] = { 0 }; + tran_low_t *outptr = out; + tran_low_t temp_in[16], temp_out[16]; + + // First transform rows. Since all non-zero dct coefficients are in + // upper-left 8x8 area, we only need to calculate first 8 rows here. + for (i = 0; i < 8; ++i) { + idct16_c(input, outptr); + input += 16; + outptr += 16; + } + + // Then transform columns + for (i = 0; i < 16; ++i) { + for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i]; + idct16_c(temp_in, temp_out); + for (j = 0; j < 16; ++j) { + dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], + ROUND_POWER_OF_TWO(temp_out[j], 6)); + } + } +} + void vpx_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) { int i, j; tran_high_t a1; diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl index 4674b0cf0..944ed5760 100644 --- a/vpx_dsp/vpx_dsp_rtcd_defs.pl +++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl @@ -650,6 +650,8 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { add_proto qw/void vpx_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int stride"; + add_proto qw/void vpx_idct16x16_38_add/, "const tran_low_t *input, uint8_t *dest, int stride"; + add_proto qw/void vpx_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int stride"; add_proto qw/void vpx_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int stride"; @@ -691,6 +693,11 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { add_proto qw/void vpx_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int stride"; specialize qw/vpx_idct16x16_256_add neon sse2/; + add_proto qw/void vpx_idct16x16_38_add/, "const tran_low_t *input, uint8_t *dest, int stride"; + specialize qw/vpx_idct16x16_38_add neon sse2/; + $vpx_idct16x16_38_add_neon=vpx_idct16x16_256_add_neon; + $vpx_idct16x16_38_add_sse2=vpx_idct16x16_256_add_sse2; + add_proto qw/void vpx_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int stride"; specialize qw/vpx_idct16x16_10_add neon sse2/; @@ -743,6 +750,8 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { add_proto qw/void vpx_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int stride"; + add_proto qw/void vpx_idct16x16_38_add/, "const tran_low_t *input, uint8_t *dest, int stride"; + add_proto qw/void vpx_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int stride"; add_proto qw/void vpx_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int stride"; @@ -778,6 +787,13 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { add_proto qw/void vpx_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int stride"; specialize qw/vpx_idct16x16_256_add sse2 neon dspr2 msa/; + add_proto qw/void vpx_idct16x16_38_add/, "const tran_low_t *input, uint8_t *dest, int stride"; + specialize qw/vpx_idct16x16_38_add sse2 neon dspr2 msa/; + $vpx_idct16x16_38_add_neon=vpx_idct16x16_256_add_neon; + $vpx_idct16x16_38_add_sse2=vpx_idct16x16_256_add_sse2; + $vpx_idct16x16_38_add_dspr2=vpx_idct16x16_256_add_dspr2; + $vpx_idct16x16_38_add_msa=vpx_idct16x16_256_add_msa; + add_proto qw/void vpx_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int stride"; specialize qw/vpx_idct16x16_10_add sse2 neon dspr2 msa/; -- 2.40.0