From: Debargha Mukherjee Date: Tue, 15 Mar 2016 05:30:09 +0000 (-0700) Subject: Adds 1D transforms for ADST/FlipADST to make 16 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=1b17559327f129ef790aeb1adaf871c2185eeb6c;p=libvpx Adds 1D transforms for ADST/FlipADST to make 16 Makes a set of 16 transforms total, adding all 1D combinations of ADST and FlipADST, and removng all DST transforms. lowres, midres both improve by about 0.1% and hdres by -0.378% in BDRATE but with fewer transforms that are also simpler. Further experiments to continue later. Change-Id: I7348a4c0e12078fdea5ae3a2d36a89a319ffcc6e --- diff --git a/test/vp10_fht16x16_test.cc b/test/vp10_fht16x16_test.cc index d501e10d6..39671493c 100644 --- a/test/vp10_fht16x16_test.cc +++ b/test/vp10_fht16x16_test.cc @@ -103,20 +103,6 @@ INSTANTIATE_TEST_CASE_P( make_tuple(&vp10_fht16x16_sse2, &vp10_iht16x16_256_add_sse2, 7, VPX_BITS_8, 256), make_tuple(&vp10_fht16x16_sse2, &vp10_iht16x16_256_add_sse2, 8, - VPX_BITS_8, 256), - make_tuple(&vp10_fht16x16_sse2, &vp10_iht16x16_256_add_sse2, 9, - VPX_BITS_8, 256), - make_tuple(&vp10_fht16x16_sse2, &vp10_iht16x16_256_add_sse2, 10, - VPX_BITS_8, 256), - make_tuple(&vp10_fht16x16_sse2, &vp10_iht16x16_256_add_sse2, 11, - VPX_BITS_8, 256), - make_tuple(&vp10_fht16x16_sse2, &vp10_iht16x16_256_add_sse2, 12, - VPX_BITS_8, 256), - make_tuple(&vp10_fht16x16_sse2, &vp10_iht16x16_256_add_sse2, 13, - VPX_BITS_8, 256), - make_tuple(&vp10_fht16x16_sse2, &vp10_iht16x16_256_add_sse2, 14, - VPX_BITS_8, 256), - make_tuple(&vp10_fht16x16_sse2, &vp10_iht16x16_256_add_sse2, 15, VPX_BITS_8, 256))); #endif // !CONFIG_EXT_TX #endif // HAVE_SSE2 diff --git a/test/vp10_fht4x4_test.cc b/test/vp10_fht4x4_test.cc index d2598f9f5..bee1a0cd6 100644 --- a/test/vp10_fht4x4_test.cc +++ b/test/vp10_fht4x4_test.cc @@ -102,20 +102,6 @@ INSTANTIATE_TEST_CASE_P( make_tuple(&vp10_fht4x4_sse2, &vp10_iht4x4_16_add_sse2, 7, VPX_BITS_8, 16), make_tuple(&vp10_fht4x4_sse2, &vp10_iht4x4_16_add_sse2, 8, - VPX_BITS_8, 16), - make_tuple(&vp10_fht4x4_sse2, &vp10_iht4x4_16_add_sse2, 9, - VPX_BITS_8, 16), - make_tuple(&vp10_fht4x4_sse2, &vp10_iht4x4_16_add_sse2, 10, - VPX_BITS_8, 16), - make_tuple(&vp10_fht4x4_sse2, &vp10_iht4x4_16_add_sse2, 11, - VPX_BITS_8, 16), - make_tuple(&vp10_fht4x4_sse2, &vp10_iht4x4_16_add_sse2, 12, - VPX_BITS_8, 16), - make_tuple(&vp10_fht4x4_sse2, &vp10_iht4x4_16_add_sse2, 13, - VPX_BITS_8, 16), - make_tuple(&vp10_fht4x4_sse2, &vp10_iht4x4_16_add_sse2, 14, - VPX_BITS_8, 16), - make_tuple(&vp10_fht4x4_sse2, &vp10_iht4x4_16_add_sse2, 15, VPX_BITS_8, 16))); #endif // !CONFIG_EXT_TX #endif // HAVE_SSE2 diff --git a/test/vp10_fht8x8_test.cc b/test/vp10_fht8x8_test.cc index 47feb3d6d..96f5632da 100644 --- a/test/vp10_fht8x8_test.cc +++ b/test/vp10_fht8x8_test.cc @@ -102,20 +102,6 @@ INSTANTIATE_TEST_CASE_P( make_tuple(&vp10_fht8x8_sse2, &vp10_iht8x8_64_add_sse2, 7, VPX_BITS_8, 64), make_tuple(&vp10_fht8x8_sse2, &vp10_iht8x8_64_add_sse2, 8, - VPX_BITS_8, 64), - make_tuple(&vp10_fht8x8_sse2, &vp10_iht8x8_64_add_sse2, 9, - VPX_BITS_8, 64), - make_tuple(&vp10_fht8x8_sse2, &vp10_iht8x8_64_add_sse2, 10, - VPX_BITS_8, 64), - make_tuple(&vp10_fht8x8_sse2, &vp10_iht8x8_64_add_sse2, 11, - VPX_BITS_8, 64), - make_tuple(&vp10_fht8x8_sse2, &vp10_iht8x8_64_add_sse2, 12, - VPX_BITS_8, 64), - make_tuple(&vp10_fht8x8_sse2, &vp10_iht8x8_64_add_sse2, 13, - VPX_BITS_8, 64), - make_tuple(&vp10_fht8x8_sse2, &vp10_iht8x8_64_add_sse2, 14, - VPX_BITS_8, 64), - make_tuple(&vp10_fht8x8_sse2, &vp10_iht8x8_64_add_sse2, 15, VPX_BITS_8, 64))); #endif // !CONFIG_EXT_TX #endif // HAVE_SSE2 diff --git a/vp10/common/blockd.h b/vp10/common/blockd.h index ffa3c64be..50b69811a 100644 --- a/vp10/common/blockd.h +++ b/vp10/common/blockd.h @@ -383,10 +383,10 @@ static INLINE int supertx_enabled(const MB_MODE_INFO *mbmi) { #define USE_MSKTX_FOR_32X32 0 static const int num_ext_tx_set_inter[EXT_TX_SETS_INTER] = { - 1, 19, 12, 2 + 1, 16, 12, 2 }; static const int num_ext_tx_set_intra[EXT_TX_SETS_INTRA] = { - 1, 17, 10 + 1, 12, 10 }; #if EXT_TX_SIZES == 4 @@ -437,17 +437,17 @@ static const int use_inter_ext_tx_for_txsize[EXT_TX_SETS_INTER][TX_SIZES] = { // Transform types used in each intra set static const int ext_tx_used_intra[EXT_TX_SETS_INTRA][TX_TYPES] = { - { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, - { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, }, - { 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, }, + {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0}, + {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0}, }; // Transform types used in each inter set static const int ext_tx_used_inter[EXT_TX_SETS_INTER][TX_TYPES] = { - { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, - { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, - { 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1}, - { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0}, + {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, + {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1}, + {1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}, }; static INLINE int get_ext_tx_types(TX_SIZE tx_size, BLOCK_SIZE bs, diff --git a/vp10/common/entropymode.c b/vp10/common/entropymode.c index d48679e06..8afcbb8cb 100644 --- a/vp10/common/entropymode.c +++ b/vp10/common/entropymode.c @@ -836,47 +836,27 @@ static const vpx_prob default_switchable_interp_prob[SWITCHABLE_FILTER_CONTEXTS] const vpx_tree_index vp10_ext_tx_inter_tree[EXT_TX_SETS_INTER] [TREE_SIZE(TX_TYPES)] = { { // ToDo(yaowu): remove used entry 0. - -IDTX, 2, - -V_DCT, 4, - -H_DCT, 6, - -DCT_DCT, 8, - -DST_DST, 10, - 12, 22, - 14, 16, - -DST_DCT, -DCT_DST, - 18, 20, - -ADST_DCT, -DCT_ADST, - -FLIPADST_DCT, -DCT_FLIPADST, - 24, 30, - 26, 28, - -DST_ADST, -ADST_DST, - -DST_FLIPADST, -FLIPADST_DST, - 32, 34, - -ADST_ADST, -FLIPADST_FLIPADST, - -ADST_FLIPADST, -FLIPADST_ADST, + 0 }, { -IDTX, 2, - -V_DCT, 4, - -H_DCT, 6, - -DCT_DCT, 8, - -DST_DST, 10, - 12, 22, - 14, 16, - -DST_DCT, -DCT_DST, - 18, 20, + 4, 14, + 6, 8, + -V_DCT, -H_DCT, + 10, 12, + -V_ADST, -H_ADST, + -V_FLIPADST, -H_FLIPADST, + -DCT_DCT, 16, + 18, 24, + 20, 22, -ADST_DCT, -DCT_ADST, -FLIPADST_DCT, -DCT_FLIPADST, - 24, 30, 26, 28, - -DST_ADST, -ADST_DST, - -DST_FLIPADST, -FLIPADST_DST, - 32, 34, -ADST_ADST, -FLIPADST_FLIPADST, - -ADST_FLIPADST, -FLIPADST_ADST, + -ADST_FLIPADST, -FLIPADST_ADST }, { -IDTX, 2, - -V_DCT, 4, - -H_DCT, 6, + 4, 6, + -V_DCT, -H_DCT, -DCT_DCT, 8, 10, 16, 12, 14, @@ -893,39 +873,19 @@ const vpx_tree_index vp10_ext_tx_inter_tree[EXT_TX_SETS_INTER] const vpx_tree_index vp10_ext_tx_intra_tree[EXT_TX_SETS_INTRA] [TREE_SIZE(TX_TYPES)] = { { // ToDo(yaowu): remove unused entry 0. - -IDTX, 2, - -DCT_DCT, 4, - -DST_DST, 6, - 8, 18, - 10, 12, - -DST_DCT, -DCT_DST, - 14, 16, - -ADST_DCT, -DCT_ADST, - -FLIPADST_DCT, -DCT_FLIPADST, - 20, 26, - 22, 24, - -DST_ADST, -ADST_DST, - -DST_FLIPADST, -FLIPADST_DST, - 28, 30, - -ADST_ADST, -FLIPADST_FLIPADST, - -ADST_FLIPADST, -FLIPADST_ADST, + 0 }, { -IDTX, 2, -DCT_DCT, 4, - -DST_DST, 6, - 8, 18, - 10, 12, - -DST_DCT, -DCT_DST, - 14, 16, + 6, 8, + -V_DCT, -H_DCT, + 10, 16, + 12, 14, -ADST_DCT, -DCT_ADST, -FLIPADST_DCT, -DCT_FLIPADST, - 20, 26, - 22, 24, - -DST_ADST, -ADST_DST, - -DST_FLIPADST, -FLIPADST_DST, - 28, 30, + 18, 20, -ADST_ADST, -FLIPADST_FLIPADST, - -ADST_FLIPADST, -FLIPADST_ADST, + -ADST_FLIPADST, -FLIPADST_ADST }, { -IDTX, 2, -DCT_DCT, 4, @@ -942,33 +902,25 @@ const vpx_tree_index vp10_ext_tx_intra_tree[EXT_TX_SETS_INTRA] static const vpx_prob default_inter_ext_tx_prob[EXT_TX_SETS_INTER][EXT_TX_SIZES][TX_TYPES - 1] = { { // ToDo(yaowu): remove unused entry 0. - { 12, 15, 15, 112, 16, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128 }, - { 12, 15, 15, 112, 16, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128 }, - { 12, 15, 15, 112, 16, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128 }, + { 0 }, + { 0 }, + { 0 }, #if EXT_TX_SIZES == 4 - { 12, 15, 15, 112, 16, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128 }, + { 0 }, #endif }, { - { 12, 15, 15, 112, 16, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128 }, - { 12, 15, 15, 112, 16, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128 }, - { 12, 15, 15, 112, 16, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128 }, + { 10, 24, 30, 128, 128, 128, 128, 112, 160, 128, 128, 128, 128, 128, 128}, + { 10, 24, 30, 128, 128, 128, 128, 112, 160, 128, 128, 128, 128, 128, 128}, + { 10, 24, 30, 128, 128, 128, 128, 112, 160, 128, 128, 128, 128, 128, 128}, #if EXT_TX_SIZES == 4 - { 12, 15, 15, 160, 16, 144, 160, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128 }, + { 10, 24, 30, 128, 128, 128, 128, 112, 160, 128, 128, 128, 128, 128, 128}, #endif }, { - { 12, 15, 15, 112, 128, 128, 128, 128, 128, 128, 128 }, - { 12, 15, 15, 112, 128, 128, 128, 128, 128, 128, 128 }, - { 12, 15, 15, 112, 128, 128, 128, 128, 128, 128, 128 }, + { 10, 30, 128, 112, 160, 128, 128, 128, 128, 128, 128 }, + { 10, 30, 128, 112, 160, 128, 128, 128, 128, 128, 128 }, + { 10, 30, 128, 112, 160, 128, 128, 128, 128, 128, 128 }, #if EXT_TX_SIZES == 4 - { 12, 15, 15, 160, 128, 128, 128, 128, 128, 128, 128 }, + { 10, 30, 128, 112, 160, 128, 128, 128, 128, 128, 128 }, #endif }, { { 12, }, @@ -985,266 +937,110 @@ default_intra_ext_tx_prob[EXT_TX_SETS_INTRA][EXT_TX_SIZES] [INTRA_MODES][TX_TYPES - 1] = { { // ToDo(yaowu): remove unused entry 0. { - { 8, 11, 24, 112, 87, 137, 127, 134, - 128, 86, 128, 124, 125, 133, 176, 123, }, - { 10, 9, 39, 106, 73, 155, 163, 228, - 35, 62, 129, 127, 133, 114, 213, 234, }, - { 10, 9, 14, 88, 91, 127, 151, 51, - 210, 89, 126, 58, 52, 116, 217, 24, }, - { 9, 6, 29, 113, 98, 131, 149, 210, - 119, 60, 124, 93, 90, 143, 170, 197, }, - { 8, 8, 38, 101, 111, 166, 167, 141, - 130, 105, 128, 75, 75, 118, 197, 117, }, - { 7, 8, 39, 91, 101, 153, 166, 200, - 99, 77, 123, 90, 83, 144, 224, 192, }, - { 7, 10, 26, 86, 119, 154, 130, 101, - 152, 91, 129, 75, 79, 137, 219, 77, }, - { 10, 13, 20, 86, 102, 162, 112, 76, - 171, 86, 134, 122, 106, 124, 196, 44, }, - { 8, 9, 33, 108, 100, 144, 148, 215, - 77, 60, 125, 125, 128, 126, 198, 220, }, - { 3, 10, 29, 111, 69, 141, 204, 141, - 139, 93, 120, 75, 77, 163, 242, 124, }, + { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, }, { - { 2, 53, 18, 147, 96, 98, 136, 133, - 131, 120, 153, 163, 169, 137, 173, 124, }, - { 4, 18, 34, 133, 54, 130, 179, 228, - 28, 72, 153, 164, 168, 118, 227, 239, }, - { 4, 18, 13, 125, 72, 110, 176, 36, - 221, 104, 148, 75, 72, 117, 225, 19, }, - { 8, 33, 24, 162, 113, 99, 147, 226, - 103, 85, 153, 143, 153, 124, 155, 210, }, - { 2, 15, 35, 107, 127, 158, 192, 128, - 126, 116, 151, 95, 88, 182, 241, 119, }, - { 3, 15, 36, 112, 100, 146, 194, 189, - 90, 98, 152, 99, 100, 165, 235, 175, }, - { 3, 16, 29, 109, 103, 140, 182, 76, - 173, 104, 147, 82, 85, 159, 235, 70, }, - { 9, 24, 14, 120, 86, 156, 161, 34, - 177, 121, 142, 128, 128, 126, 185, 37, }, - { 5, 24, 29, 152, 98, 99, 174, 228, - 82, 76, 147, 149, 128, 132, 191, 225, }, - { 2, 15, 29, 111, 77, 126, 200, 135, - 117, 93, 152, 96, 84, 191, 245, 135, }, + { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, }, { - { 2, 69, 13, 173, 111, 69, 137, 159, - 159, 146, 151, 193, 203, 131, 180, 123, }, - { 1, 12, 33, 164, 32, 98, 204, 242, - 23, 99, 149, 215, 232, 110, 239, 245, }, - { 1, 17, 9, 136, 82, 83, 171, 28, - 231, 128, 135, 76, 64, 118, 235, 17, }, - { 4, 41, 17, 195, 131, 58, 161, 237, - 141, 97, 153, 189, 191, 117, 182, 202, }, - { 2, 17, 36, 104, 149, 137, 217, 139, - 191, 119, 125, 107, 115, 223, 249, 110, }, - { 2, 14, 24, 127, 91, 135, 219, 198, - 113, 91, 164, 125, 173, 211, 250, 116, }, - { 3, 19, 24, 120, 102, 130, 209, 81, - 187, 95, 143, 102, 50, 190, 244, 56, }, - { 4, 27, 10, 128, 91, 157, 181, 33, - 181, 150, 141, 141, 166, 114, 215, 25, }, - { 2, 34, 27, 187, 102, 77, 210, 245, - 113, 107, 136, 184, 188, 121, 210, 234, }, - { 1, 15, 22, 141, 59, 94, 208, 133, - 154, 95, 152, 112, 105, 191, 242, 111, }, + { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, #if EXT_TX_SIZES == 4 }, { - { 2, 69, 13, 173, 111, 69, 137, 159, - 159, 146, 151, 193, 203, 131, 180, 123, }, - { 1, 12, 33, 164, 32, 98, 204, 242, - 23, 99, 149, 215, 232, 110, 239, 245, }, - { 1, 17, 9, 136, 82, 83, 171, 28, - 231, 128, 135, 76, 64, 118, 235, 17, }, - { 4, 41, 17, 195, 131, 58, 161, 237, - 141, 97, 153, 189, 191, 117, 182, 202, }, - { 2, 17, 36, 104, 149, 137, 217, 139, - 191, 119, 125, 107, 115, 223, 249, 110, }, - { 2, 14, 24, 127, 91, 135, 219, 198, - 113, 91, 164, 125, 173, 211, 250, 116, }, - { 3, 19, 24, 120, 102, 130, 209, 81, - 187, 95, 143, 102, 50, 190, 244, 56, }, - { 4, 27, 10, 128, 91, 157, 181, 33, - 181, 150, 141, 141, 166, 114, 215, 25, }, - { 2, 34, 27, 187, 102, 77, 210, 245, - 113, 107, 136, 184, 188, 121, 210, 234, }, - { 1, 15, 22, 141, 59, 94, 208, 133, - 154, 95, 152, 112, 105, 191, 242, 111, }, + { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, #endif }, }, { { - { 8, 11, 24, 112, 87, 137, 127, 134, - 128, 86, 128, 124, 125, 133, 176, 123, }, - { 10, 9, 39, 106, 73, 155, 163, 228, - 35, 62, 129, 127, 133, 114, 213, 234, }, - { 10, 9, 14, 88, 91, 127, 151, 51, - 210, 89, 126, 58, 52, 116, 217, 24, }, - { 9, 6, 29, 113, 98, 131, 149, 210, - 119, 60, 124, 93, 90, 143, 170, 197, }, - { 8, 8, 38, 101, 111, 166, 167, 141, - 130, 105, 128, 75, 75, 118, 197, 117, }, - { 7, 8, 39, 91, 101, 153, 166, 200, - 99, 77, 123, 90, 83, 144, 224, 192, }, - { 7, 10, 26, 86, 119, 154, 130, 101, - 152, 91, 129, 75, 79, 137, 219, 77, }, - { 10, 13, 20, 86, 102, 162, 112, 76, - 171, 86, 134, 122, 106, 124, 196, 44, }, - { 8, 9, 33, 108, 100, 144, 148, 215, - 77, 60, 125, 125, 128, 126, 198, 220, }, - { 3, 10, 29, 111, 69, 141, 204, 141, - 139, 93, 120, 75, 77, 163, 242, 124, }, + { 8, 176, 32, 128, 128, 128, 128, 128, 128, 128, 128, }, + { 10, 28, 32, 128, 176, 192, 208, 128, 128, 128, 128, }, + { 10, 28, 32, 128, 176, 192, 48, 128, 128, 128, 128, }, + { 9, 160, 32, 128, 128, 128, 128, 128, 128, 128, 128, }, + { 8, 28, 32, 128, 96, 128, 128, 128, 160, 192, 128, }, + { 7, 28, 32, 128, 160, 176, 192, 128, 128, 128, 128, }, + { 7, 20, 32, 128, 160, 176, 64, 128, 128, 128, 128, }, + { 10, 23, 32, 128, 160, 176, 64, 128, 128, 128, 128, }, + { 8, 29, 32, 128, 160, 176, 192, 128, 128, 128, 128, }, + { 3, 20, 32, 128, 96, 128, 128, 128, 160, 192, 128, }, }, { - { 2, 53, 18, 147, 96, 98, 136, 133, - 131, 120, 153, 163, 169, 137, 173, 124, }, - { 4, 18, 34, 133, 54, 130, 179, 228, - 28, 72, 153, 164, 168, 118, 227, 239, }, - { 4, 18, 13, 125, 72, 110, 176, 36, - 221, 104, 148, 75, 72, 117, 225, 19, }, - { 8, 33, 24, 162, 113, 99, 147, 226, - 103, 85, 153, 143, 153, 124, 155, 210, }, - { 2, 15, 35, 107, 127, 158, 192, 128, - 126, 116, 151, 95, 88, 182, 241, 119, }, - { 3, 15, 36, 112, 100, 146, 194, 189, - 90, 98, 152, 99, 100, 165, 235, 175, }, - { 3, 16, 29, 109, 103, 140, 182, 76, - 173, 104, 147, 82, 85, 159, 235, 70, }, - { 9, 24, 14, 120, 86, 156, 161, 34, - 177, 121, 142, 128, 128, 126, 185, 37, }, - { 5, 24, 29, 152, 98, 99, 174, 228, - 82, 76, 147, 149, 128, 132, 191, 225, }, - { 2, 15, 29, 111, 77, 126, 200, 135, - 117, 93, 152, 96, 84, 191, 245, 135, }, + { 2, 176, 32, 128, 128, 128, 128, 128, 128, 128, 128, }, + { 4, 28, 32, 128, 176, 192, 208, 128, 128, 128, 128, }, + { 4, 28, 32, 128, 176, 192, 48, 128, 128, 128, 128, }, + { 8, 160, 32, 128, 128, 128, 128, 128, 128, 128, 128, }, + { 2, 28, 32, 128, 96, 128, 128, 128, 160, 192, 128, }, + { 3, 28, 32, 128, 160, 176, 192, 128, 128, 128, 128, }, + { 3, 26, 32, 128, 160, 176, 64, 128, 128, 128, 128, }, + { 9, 24, 32, 128, 160, 176, 64, 128, 128, 128, 128, }, + { 5, 24, 32, 128, 160, 176, 192, 128, 128, 128, 128, }, + { 2, 25, 32, 128, 96, 128, 128, 128, 160, 192, 128, }, }, { - { 2, 69, 13, 173, 111, 69, 137, 159, - 159, 146, 151, 193, 203, 131, 180, 123, }, - { 1, 12, 33, 164, 32, 98, 204, 242, - 23, 99, 149, 215, 232, 110, 239, 245, }, - { 1, 17, 9, 136, 82, 83, 171, 28, - 231, 128, 135, 76, 64, 118, 235, 17, }, - { 4, 41, 17, 195, 131, 58, 161, 237, - 141, 97, 153, 189, 191, 117, 182, 202, }, - { 2, 17, 36, 104, 149, 137, 217, 139, - 191, 119, 125, 107, 115, 223, 249, 110, }, - { 2, 14, 24, 127, 91, 135, 219, 198, - 113, 91, 164, 125, 173, 211, 250, 116, }, - { 3, 19, 24, 120, 102, 130, 209, 81, - 187, 95, 143, 102, 50, 190, 244, 56, }, - { 4, 27, 10, 128, 91, 157, 181, 33, - 181, 150, 141, 141, 166, 114, 215, 25, }, - { 2, 34, 27, 187, 102, 77, 210, 245, - 113, 107, 136, 184, 188, 121, 210, 234, }, - { 1, 15, 22, 141, 59, 94, 208, 133, - 154, 95, 152, 112, 105, 191, 242, 111, }, + { 2, 176, 32, 128, 128, 128, 128, 128, 128, 128, 128, }, + { 1, 28, 32, 128, 176, 192, 208, 128, 128, 128, 128, }, + { 1, 28, 32, 128, 176, 192, 48, 128, 128, 128, 128, }, + { 4, 160, 32, 128, 128, 128, 128, 128, 128, 128, 128, }, + { 2, 28, 32, 128, 96, 128, 128, 128, 160, 192, 128, }, + { 2, 28, 32, 128, 160, 176, 192, 128, 128, 128, 128, }, + { 3, 29, 32, 128, 160, 176, 64, 128, 128, 128, 128, }, + { 4, 27, 32, 128, 160, 176, 64, 128, 128, 128, 128, }, + { 2, 34, 32, 128, 160, 176, 192, 128, 128, 128, 128, }, + { 1, 25, 32, 128, 96, 128, 128, 128, 160, 192, 128, }, #if EXT_TX_SIZES == 4 }, { - { 2, 69, 13, 173, 111, 69, 137, 159, - 159, 146, 151, 193, 203, 131, 180, 123, }, - { 1, 12, 33, 164, 32, 98, 204, 242, - 23, 99, 149, 215, 232, 110, 239, 245, }, - { 1, 17, 9, 136, 82, 83, 171, 28, - 231, 128, 135, 76, 64, 118, 235, 17, }, - { 4, 41, 17, 195, 131, 58, 161, 237, - 141, 97, 153, 189, 191, 117, 182, 202, }, - { 2, 17, 36, 104, 149, 137, 217, 139, - 191, 119, 125, 107, 115, 223, 249, 110, }, - { 2, 14, 24, 127, 91, 135, 219, 198, - 113, 91, 164, 125, 173, 211, 250, 116, }, - { 3, 19, 24, 120, 102, 130, 209, 81, - 187, 95, 143, 102, 50, 190, 244, 56, }, - { 4, 27, 10, 128, 91, 157, 181, 33, - 181, 150, 141, 141, 166, 114, 215, 25, }, - { 2, 34, 27, 187, 102, 77, 210, 245, - 113, 107, 136, 184, 188, 121, 210, 234, }, - { 1, 15, 22, 141, 59, 94, 208, 133, - 154, 95, 152, 112, 105, 191, 242, 111, }, + { 2, 176, 32, 128, 128, 128, 128, 128, 128, 128, 128, }, + { 1, 12, 32, 128, 160, 176, 192, 128, 128, 128, 128, }, + { 1, 17, 32, 128, 160, 176, 64, 128, 128, 128, 128, }, + { 4, 41, 32, 128, 128, 128, 128, 128, 128, 128, 128, }, + { 2, 17, 32, 128, 96, 128, 128, 128, 160, 192, 128, }, + { 2, 14, 32, 128, 160, 176, 192, 128, 128, 128, 128, }, + { 3, 19, 32, 128, 160, 176, 64, 128, 128, 128, 128, }, + { 4, 27, 32, 128, 160, 176, 64, 128, 128, 128, 128, }, + { 2, 34, 32, 128, 160, 176, 192, 128, 128, 128, 128, }, + { 1, 15, 32, 128, 96, 128, 128, 128, 160, 192, 128, }, #endif }, }, { { - { 8, 176, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, }, - { 10, 28, 176, 192, 208, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, }, - { 10, 28, 176, 192, 48, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, }, - { 9, 160, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, }, - { 8, 28, 96, 128, 128, 128, 160, 192, - 128, 128, 128, 128, 128, 128, 128, 128, }, - { 7, 28, 160, 176, 192, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, }, - { 7, 20, 160, 176, 64, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, }, - { 10, 23, 160, 176, 64, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, }, - { 8, 29, 160, 176, 192, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, }, - { 3, 20, 96, 128, 128, 128, 160, 192, - 128, 128, 128, 128, 128, 128, 128, 128, }, + { 8, 176, 128, 128, 128, 128, 128, 128, 128, }, + { 10, 28, 176, 192, 208, 128, 128, 128, 128, }, + { 10, 28, 176, 192, 48, 128, 128, 128, 128, }, + { 9, 160, 128, 128, 128, 128, 128, 128, 128, }, + { 8, 28, 96, 128, 128, 128, 160, 192, 128, }, + { 7, 28, 160, 176, 192, 128, 128, 128, 128, }, + { 7, 20, 160, 176, 64, 128, 128, 128, 128, }, + { 10, 23, 160, 176, 64, 128, 128, 128, 128, }, + { 8, 29, 160, 176, 192, 128, 128, 128, 128, }, + { 3, 20, 96, 128, 128, 128, 160, 192, 128, }, }, { - { 2, 176, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, }, - { 4, 28, 176, 192, 208, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, }, - { 4, 28, 176, 192, 48, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, }, - { 8, 160, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, }, - { 2, 28, 96, 128, 128, 128, 160, 192, - 128, 128, 128, 128, 128, 128, 128, 128, }, - { 3, 28, 160, 176, 192, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, }, - { 3, 26, 160, 176, 64, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, }, - { 9, 24, 160, 176, 64, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, }, - { 5, 24, 160, 176, 192, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, }, - { 2, 25, 96, 128, 128, 128, 160, 192, - 128, 128, 128, 128, 128, 128, 128, 128, }, + { 2, 176, 128, 128, 128, 128, 128, 128, 128, }, + { 4, 28, 176, 192, 208, 128, 128, 128, 128, }, + { 4, 28, 176, 192, 48, 128, 128, 128, 128, }, + { 8, 160, 128, 128, 128, 128, 128, 128, 128, }, + { 2, 28, 96, 128, 128, 128, 160, 192, 128, }, + { 3, 28, 160, 176, 192, 128, 128, 128, 128, }, + { 3, 26, 160, 176, 64, 128, 128, 128, 128, }, + { 9, 24, 160, 176, 64, 128, 128, 128, 128, }, + { 5, 24, 160, 176, 192, 128, 128, 128, 128, }, + { 2, 25, 96, 128, 128, 128, 160, 192, 128, }, }, { - { 2, 176, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, }, - { 1, 28, 176, 192, 208, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, }, - { 1, 28, 176, 192, 48, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, }, - { 4, 160, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, }, - { 2, 28, 96, 128, 128, 128, 160, 192, - 128, 128, 128, 128, 128, 128, 128, 128, }, - { 2, 28, 160, 176, 192, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, }, - { 3, 29, 160, 176, 64, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, }, - { 4, 27, 160, 176, 64, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, }, - { 2, 34, 160, 176, 192, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, }, - { 1, 25, 96, 128, 128, 128, 160, 192, - 128, 128, 128, 128, 128, 128, 128, 128, }, + { 2, 176, 128, 128, 128, 128, 128, 128, 128, }, + { 1, 28, 176, 192, 208, 128, 128, 128, 128, }, + { 1, 28, 176, 192, 48, 128, 128, 128, 128, }, + { 4, 160, 128, 128, 128, 128, 128, 128, 128, }, + { 2, 28, 96, 128, 128, 128, 160, 192, 128, }, + { 2, 28, 160, 176, 192, 128, 128, 128, 128, }, + { 3, 29, 160, 176, 64, 128, 128, 128, 128, }, + { 4, 27, 160, 176, 64, 128, 128, 128, 128, }, + { 2, 34, 160, 176, 192, 128, 128, 128, 128, }, + { 1, 25, 96, 128, 128, 128, 160, 192, 128, }, #if EXT_TX_SIZES == 4 }, { - { 2, 176, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, }, - { 1, 12, 160, 176, 192, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, }, - { 1, 17, 160, 176, 64, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, }, - { 4, 41, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, }, - { 2, 17, 96, 128, 128, 128, 160, 192, - 128, 128, 128, 128, 128, 128, 128, 128, }, - { 2, 14, 160, 176, 192, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, }, - { 3, 19, 160, 176, 64, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, }, - { 4, 27, 160, 176, 64, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, }, - { 2, 34, 160, 176, 192, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, }, - { 1, 15, 96, 128, 128, 128, 160, 192, - 128, 128, 128, 128, 128, 128, 128, 128, }, + { 2, 176, 128, 128, 128, 128, 128, 128, 128, }, + { 1, 12, 160, 176, 192, 128, 128, 128, 128, }, + { 1, 17, 160, 176, 64, 128, 128, 128, 128, }, + { 4, 41, 128, 128, 128, 128, 128, 128, 128, }, + { 2, 17, 96, 128, 128, 128, 160, 192, 128, }, + { 2, 14, 160, 176, 192, 128, 128, 128, 128, }, + { 3, 19, 160, 176, 64, 128, 128, 128, 128, }, + { 4, 27, 160, 176, 64, 128, 128, 128, 128, }, + { 2, 34, 160, 176, 192, 128, 128, 128, 128, }, + { 1, 15, 96, 128, 128, 128, 160, 192, 128, }, #endif }, }, diff --git a/vp10/common/enums.h b/vp10/common/enums.h index 87bcc8a71..4a0e243ac 100644 --- a/vp10/common/enums.h +++ b/vp10/common/enums.h @@ -111,21 +111,17 @@ typedef enum { FLIPADST_FLIPADST = 6, ADST_FLIPADST = 7, FLIPADST_ADST = 8, - DST_DCT = 9, - DCT_DST = 10, - DST_ADST = 11, - ADST_DST = 12, - DST_FLIPADST = 13, - FLIPADST_DST = 14, - DST_DST = 15, - IDTX = 16, - V_DCT = 17, - H_DCT = 18, + IDTX = 9, + V_DCT = 10, + H_DCT = 11, + V_ADST = 12, + H_ADST = 13, + V_FLIPADST = 14, + H_FLIPADST = 15, #endif // CONFIG_EXT_TX TX_TYPES, } TX_TYPE; - #if CONFIG_EXT_TX #define EXT_TX_SIZES 4 // number of sizes that use extended transforms #define EXT_TX_SETS_INTER 4 // Sets of transform selections for INTER diff --git a/vp10/common/idct.c b/vp10/common/idct.c index 863f0db6b..0e211ad67 100644 --- a/vp10/common/idct.c +++ b/vp10/common/idct.c @@ -18,247 +18,6 @@ #include "vpx_dsp/inv_txfm.h" #include "vpx_ports/mem.h" -#if CONFIG_EXT_TX -void idst4_c(const tran_low_t *input, tran_low_t *output) { - tran_low_t step[4]; - tran_high_t temp1, temp2; - // stage 1 - temp1 = (input[3] + input[1]) * cospi_16_64; - temp2 = (input[3] - input[1]) * cospi_16_64; - step[0] = WRAPLOW(dct_const_round_shift(temp1), 8); - step[1] = WRAPLOW(dct_const_round_shift(temp2), 8); - temp1 = input[2] * cospi_24_64 - input[0] * cospi_8_64; - temp2 = input[2] * cospi_8_64 + input[0] * cospi_24_64; - step[2] = WRAPLOW(dct_const_round_shift(temp1), 8); - step[3] = WRAPLOW(dct_const_round_shift(temp2), 8); - - // stage 2 - output[0] = WRAPLOW(step[0] + step[3], 8); - output[1] = WRAPLOW(-step[1] - step[2], 8); - output[2] = WRAPLOW(step[1] - step[2], 8); - output[3] = WRAPLOW(step[3] - step[0], 8); -} - -void idst8_c(const tran_low_t *input, tran_low_t *output) { - // vp9_igentx8(input, output, Tx8); - tran_low_t step1[8], step2[8]; - tran_high_t temp1, temp2; - // stage 1 - step1[0] = input[7]; - step1[2] = input[3]; - step1[1] = input[5]; - step1[3] = input[1]; - temp1 = input[6] * cospi_28_64 - input[0] * cospi_4_64; - temp2 = input[6] * cospi_4_64 + input[0] * cospi_28_64; - step1[4] = WRAPLOW(dct_const_round_shift(temp1), 8); - step1[7] = WRAPLOW(dct_const_round_shift(temp2), 8); - temp1 = input[2] * cospi_12_64 - input[4] * cospi_20_64; - temp2 = input[2] * cospi_20_64 + input[4] * cospi_12_64; - step1[5] = WRAPLOW(dct_const_round_shift(temp1), 8); - step1[6] = WRAPLOW(dct_const_round_shift(temp2), 8); - - // stage 2 - temp1 = (step1[0] + step1[2]) * cospi_16_64; - temp2 = (step1[0] - step1[2]) * cospi_16_64; - step2[0] = WRAPLOW(dct_const_round_shift(temp1), 8); - step2[1] = WRAPLOW(dct_const_round_shift(temp2), 8); - temp1 = step1[1] * cospi_24_64 - step1[3] * cospi_8_64; - temp2 = step1[1] * cospi_8_64 + step1[3] * cospi_24_64; - step2[2] = WRAPLOW(dct_const_round_shift(temp1), 8); - step2[3] = WRAPLOW(dct_const_round_shift(temp2), 8); - step2[4] = WRAPLOW(step1[4] + step1[5], 8); - step2[5] = WRAPLOW(step1[4] - step1[5], 8); - step2[6] = WRAPLOW(-step1[6] + step1[7], 8); - step2[7] = WRAPLOW(step1[6] + step1[7], 8); - - // stage 3 - step1[0] = WRAPLOW(step2[0] + step2[3], 8); - step1[1] = WRAPLOW(step2[1] + step2[2], 8); - step1[2] = WRAPLOW(step2[1] - step2[2], 8); - step1[3] = WRAPLOW(step2[0] - step2[3], 8); - step1[4] = step2[4]; - temp1 = (step2[6] - step2[5]) * cospi_16_64; - temp2 = (step2[5] + step2[6]) * cospi_16_64; - step1[5] = WRAPLOW(dct_const_round_shift(temp1), 8); - step1[6] = WRAPLOW(dct_const_round_shift(temp2), 8); - step1[7] = step2[7]; - - // stage 4 - output[0] = WRAPLOW(step1[0] + step1[7], 8); - output[1] = WRAPLOW(-step1[1] - step1[6], 8); - output[2] = WRAPLOW(step1[2] + step1[5], 8); - output[3] = WRAPLOW(-step1[3] - step1[4], 8); - output[4] = WRAPLOW(step1[3] - step1[4], 8); - output[5] = WRAPLOW(-step1[2] + step1[5], 8); - output[6] = WRAPLOW(step1[1] - step1[6], 8); - output[7] = WRAPLOW(-step1[0] + step1[7], 8); -} - -void idst16_c(const tran_low_t *input, tran_low_t *output) { - tran_low_t step1[16], step2[16]; - tran_high_t temp1, temp2; - - // stage 1 - step1[0] = input[15]; - step1[1] = input[7]; - step1[2] = input[11]; - step1[3] = input[3]; - step1[4] = input[13]; - step1[5] = input[5]; - step1[6] = input[9]; - step1[7] = input[1]; - step1[8] = input[14]; - step1[9] = input[6]; - step1[10] = input[10]; - step1[11] = input[2]; - step1[12] = input[12]; - step1[13] = input[4]; - step1[14] = input[8]; - step1[15] = input[0]; - - // stage 2 - step2[0] = step1[0]; - step2[1] = step1[1]; - step2[2] = step1[2]; - step2[3] = step1[3]; - step2[4] = step1[4]; - step2[5] = step1[5]; - step2[6] = step1[6]; - step2[7] = step1[7]; - - temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64; - temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64; - step2[8] = WRAPLOW(dct_const_round_shift(temp1), 8); - step2[15] = WRAPLOW(dct_const_round_shift(temp2), 8); - - temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64; - temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64; - step2[9] = WRAPLOW(dct_const_round_shift(temp1), 8); - step2[14] = WRAPLOW(dct_const_round_shift(temp2), 8); - - temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64; - temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64; - step2[10] = WRAPLOW(dct_const_round_shift(temp1), 8); - step2[13] = WRAPLOW(dct_const_round_shift(temp2), 8); - - temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64; - temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64; - step2[11] = WRAPLOW(dct_const_round_shift(temp1), 8); - step2[12] = WRAPLOW(dct_const_round_shift(temp2), 8); - - // stage 3 - step1[0] = step2[0]; - step1[1] = step2[1]; - step1[2] = step2[2]; - step1[3] = step2[3]; - - temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64; - temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64; - step1[4] = WRAPLOW(dct_const_round_shift(temp1), 8); - step1[7] = WRAPLOW(dct_const_round_shift(temp2), 8); - temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64; - temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64; - step1[5] = WRAPLOW(dct_const_round_shift(temp1), 8); - step1[6] = WRAPLOW(dct_const_round_shift(temp2), 8); - - step1[8] = WRAPLOW(step2[8] + step2[9], 8); - step1[9] = WRAPLOW(step2[8] - step2[9], 8); - step1[10] = WRAPLOW(-step2[10] + step2[11], 8); - step1[11] = WRAPLOW(step2[10] + step2[11], 8); - step1[12] = WRAPLOW(step2[12] + step2[13], 8); - step1[13] = WRAPLOW(step2[12] - step2[13], 8); - step1[14] = WRAPLOW(-step2[14] + step2[15], 8); - step1[15] = WRAPLOW(step2[14] + step2[15], 8); - - // stage 4 - temp1 = (step1[0] + step1[1]) * cospi_16_64; - temp2 = (step1[0] - step1[1]) * cospi_16_64; - step2[0] = WRAPLOW(dct_const_round_shift(temp1), 8); - step2[1] = WRAPLOW(dct_const_round_shift(temp2), 8); - temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64; - temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64; - step2[2] = WRAPLOW(dct_const_round_shift(temp1), 8); - step2[3] = WRAPLOW(dct_const_round_shift(temp2), 8); - step2[4] = WRAPLOW(step1[4] + step1[5], 8); - step2[5] = WRAPLOW(step1[4] - step1[5], 8); - step2[6] = WRAPLOW(-step1[6] + step1[7], 8); - step2[7] = WRAPLOW(step1[6] + step1[7], 8); - - step2[8] = step1[8]; - step2[15] = step1[15]; - temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64; - temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64; - step2[9] = WRAPLOW(dct_const_round_shift(temp1), 8); - step2[14] = WRAPLOW(dct_const_round_shift(temp2), 8); - temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64; - temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64; - step2[10] = WRAPLOW(dct_const_round_shift(temp1), 8); - step2[13] = WRAPLOW(dct_const_round_shift(temp2), 8); - step2[11] = step1[11]; - step2[12] = step1[12]; - - // stage 5 - step1[0] = WRAPLOW(step2[0] + step2[3], 8); - step1[1] = WRAPLOW(step2[1] + step2[2], 8); - step1[2] = WRAPLOW(step2[1] - step2[2], 8); - step1[3] = WRAPLOW(step2[0] - step2[3], 8); - step1[4] = step2[4]; - temp1 = (step2[6] - step2[5]) * cospi_16_64; - temp2 = (step2[5] + step2[6]) * cospi_16_64; - step1[5] = WRAPLOW(dct_const_round_shift(temp1), 8); - step1[6] = WRAPLOW(dct_const_round_shift(temp2), 8); - step1[7] = step2[7]; - - step1[8] = WRAPLOW(step2[8] + step2[11], 8); - step1[9] = WRAPLOW(step2[9] + step2[10], 8); - step1[10] = WRAPLOW(step2[9] - step2[10], 8); - step1[11] = WRAPLOW(step2[8] - step2[11], 8); - step1[12] = WRAPLOW(-step2[12] + step2[15], 8); - step1[13] = WRAPLOW(-step2[13] + step2[14], 8); - step1[14] = WRAPLOW(step2[13] + step2[14], 8); - step1[15] = WRAPLOW(step2[12] + step2[15], 8); - - // stage 6 - step2[0] = WRAPLOW(step1[0] + step1[7], 8); - step2[1] = WRAPLOW(step1[1] + step1[6], 8); - step2[2] = WRAPLOW(step1[2] + step1[5], 8); - step2[3] = WRAPLOW(step1[3] + step1[4], 8); - step2[4] = WRAPLOW(step1[3] - step1[4], 8); - step2[5] = WRAPLOW(step1[2] - step1[5], 8); - step2[6] = WRAPLOW(step1[1] - step1[6], 8); - step2[7] = WRAPLOW(step1[0] - step1[7], 8); - step2[8] = step1[8]; - step2[9] = step1[9]; - temp1 = (-step1[10] + step1[13]) * cospi_16_64; - temp2 = (step1[10] + step1[13]) * cospi_16_64; - step2[10] = WRAPLOW(dct_const_round_shift(temp1), 8); - step2[13] = WRAPLOW(dct_const_round_shift(temp2), 8); - temp1 = (-step1[11] + step1[12]) * cospi_16_64; - temp2 = (step1[11] + step1[12]) * cospi_16_64; - step2[11] = WRAPLOW(dct_const_round_shift(temp1), 8); - step2[12] = WRAPLOW(dct_const_round_shift(temp2), 8); - step2[14] = step1[14]; - step2[15] = step1[15]; - - // stage 7 - output[0] = WRAPLOW(step2[0] + step2[15], 8); - output[1] = WRAPLOW(-step2[1] - step2[14], 8); - output[2] = WRAPLOW(step2[2] + step2[13], 8); - output[3] = WRAPLOW(-step2[3] - step2[12], 8); - output[4] = WRAPLOW(step2[4] + step2[11], 8); - output[5] = WRAPLOW(-step2[5] - step2[10], 8); - output[6] = WRAPLOW(step2[6] + step2[9], 8); - output[7] = WRAPLOW(-step2[7] - step2[8], 8); - output[8] = WRAPLOW(step2[7] - step2[8], 8); - output[9] = WRAPLOW(-step2[6] + step2[9], 8); - output[10] = WRAPLOW(step2[5] - step2[10], 8); - output[11] = WRAPLOW(-step2[4] + step2[11], 8); - output[12] = WRAPLOW(step2[3] - step2[12], 8); - output[13] = WRAPLOW(-step2[2] + step2[13], 8); - output[14] = WRAPLOW(step2[1] - step2[14], 8); - output[15] = WRAPLOW(-step2[0] + step2[15], 8); -} - #if CONFIG_EXT_TX static void iidtx4_c(const tran_low_t *input, tran_low_t *output) { int i; @@ -285,21 +44,6 @@ static void iidtx32_c(const tran_low_t *input, tran_low_t *output) { } // For use in lieu of DST -static void ihalfcenter32_c(const tran_low_t *input, tran_low_t *output) { - int i; - tran_low_t inputhalf[16]; - for (i = 0; i < 8; ++i) { - output[i] = input[16 + i] * 4; - output[24 + i] = input[24 + i] * 4; - } - // Multiply input by sqrt(2) - for (i = 0; i < 16; ++i) { - inputhalf[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2); - } - idct16_c(inputhalf, output + 8); - // Note overall scaling factor is 4 times orthogonal -} - static void ihalfright32_c(const tran_low_t *input, tran_low_t *output) { int i; tran_low_t inputhalf[16]; @@ -379,7 +123,6 @@ static void highbd_ihalfright32_c(const tran_low_t *input, tran_low_t *output, // Note overall scaling factor is 4 times orthogonal } #endif // CONFIG_VP9_HIGHBITDEPTH -#endif // CONFIG_EXT_TX // Inverse identity transform and add. static void inv_idtx_add_c(const tran_low_t *input, uint8_t *dest, int stride, @@ -412,24 +155,21 @@ static void maybe_flip_strides(uint8_t **dst, int *dstride, case ADST_DCT: case DCT_ADST: case ADST_ADST: - case DST_DST: - case DCT_DST: - case DST_DCT: - case DST_ADST: - case ADST_DST: case IDTX: case V_DCT: case H_DCT: + case V_ADST: + case H_ADST: break; case FLIPADST_DCT: case FLIPADST_ADST: - case FLIPADST_DST: + case V_FLIPADST: // flip UD FLIPUD_PTR(*dst, *dstride, size); break; case DCT_FLIPADST: case ADST_FLIPADST: - case DST_FLIPADST: + case H_FLIPADST: // flip LR FLIPUD_PTR(*src, *sstride, size); break; @@ -716,24 +456,21 @@ static void maybe_flip_strides16(uint16_t **dst, int *dstride, case ADST_DCT: case DCT_ADST: case ADST_ADST: - case DST_DST: - case DCT_DST: - case DST_DCT: - case DST_ADST: - case ADST_DST: case IDTX: case V_DCT: case H_DCT: + case V_ADST: + case H_ADST: break; case FLIPADST_DCT: case FLIPADST_ADST: - case FLIPADST_DST: + case V_FLIPADST: // flip UD FLIPUD_PTR(*dst, *dstride, size); break; case DCT_FLIPADST: case ADST_FLIPADST: - case DST_FLIPADST: + case H_FLIPADST: // flip LR FLIPUD_PTR(*src, *sstride, size); break; @@ -754,26 +491,23 @@ static void maybe_flip_strides16(uint16_t **dst, int *dstride, void vp10_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride, int tx_type) { static const transform_2d IHT_4[] = { - { idct4_c, idct4_c }, // DCT_DCT = 0, - { iadst4_c, idct4_c }, // ADST_DCT = 1, - { idct4_c, iadst4_c }, // DCT_ADST = 2, - { iadst4_c, iadst4_c }, // ADST_ADST = 3, + { idct4_c, idct4_c }, // DCT_DCT + { iadst4_c, idct4_c }, // ADST_DCT + { idct4_c, iadst4_c }, // DCT_ADST + { iadst4_c, iadst4_c }, // ADST_ADST #if CONFIG_EXT_TX - { iadst4_c, idct4_c }, // FLIPADST_DCT = 4, - { idct4_c, iadst4_c }, // DCT_FLIPADST = 5, - { iadst4_c, iadst4_c }, // FLIPADST_FLIPADST = 6, - { iadst4_c, iadst4_c }, // ADST_FLIPADST = 7, - { iadst4_c, iadst4_c }, // FLIPADST_ADST = 8, - { idst4_c, idct4_c }, // DST_DCT = 9, - { idct4_c, idst4_c }, // DCT_DST = 10, - { idst4_c, iadst4_c }, // DST_ADST = 11, - { iadst4_c, idst4_c }, // ADST_DST = 12, - { idst4_c, iadst4_c }, // DST_FLIPADST = 13, - { iadst4_c, idst4_c }, // FLIPADST_DST = 14, - { idst4_c, idst4_c }, // DST_DST = 15 - { iidtx4_c, iidtx4_c }, // IDTX = 16 - { idct4_c, iidtx4_c }, // V_DCT = 17 - { iidtx4_c, idct4_c }, // H_DCT = 18 + { iadst4_c, idct4_c }, // FLIPADST_DCT + { idct4_c, iadst4_c }, // DCT_FLIPADST + { iadst4_c, iadst4_c }, // FLIPADST_FLIPADST + { iadst4_c, iadst4_c }, // ADST_FLIPADST + { iadst4_c, iadst4_c }, // FLIPADST_ADST + { iidtx4_c, iidtx4_c }, // IDTX + { idct4_c, iidtx4_c }, // V_DCT + { iidtx4_c, idct4_c }, // H_DCT + { iadst4_c, iidtx4_c }, // V_ADST + { iidtx4_c, iadst4_c }, // H_ADST + { iadst4_c, iidtx4_c }, // V_FLIPADST + { iidtx4_c, iadst4_c }, // H_FLIPADST #endif // CONFIG_EXT_TX }; @@ -820,26 +554,23 @@ void vp10_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride, void vp10_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride, int tx_type) { static const transform_2d IHT_8[] = { - { idct8_c, idct8_c }, // DCT_DCT = 0, - { iadst8_c, idct8_c }, // ADST_DCT = 1, - { idct8_c, iadst8_c }, // DCT_ADST = 2, - { iadst8_c, iadst8_c }, // ADST_ADST = 3, + { idct8_c, idct8_c }, // DCT_DCT + { iadst8_c, idct8_c }, // ADST_DCT + { idct8_c, iadst8_c }, // DCT_ADST + { iadst8_c, iadst8_c }, // ADST_ADST #if CONFIG_EXT_TX - { iadst8_c, idct8_c }, // FLIPADST_DCT = 4, - { idct8_c, iadst8_c }, // DCT_FLIPADST = 5, - { iadst8_c, iadst8_c }, // FLIPADST_FLIPADST = 6, - { iadst8_c, iadst8_c }, // ADST_FLIPADST = 7, - { iadst8_c, iadst8_c }, // FLIPADST_ADST = 8, - { idst8_c, idct8_c }, // DST_DCT = 9, - { idct8_c, idst8_c }, // DCT_DST = 10, - { idst8_c, iadst8_c }, // DST_ADST = 11, - { iadst8_c, idst8_c }, // ADST_DST = 12, - { idst8_c, iadst8_c }, // DST_FLIPADST = 13, - { iadst8_c, idst8_c }, // FLIPADST_DST = 14, - { idst8_c, idst8_c }, // DST_DST = 15 - { iidtx8_c, iidtx8_c }, // IDTX = 16 - { idct8_c, iidtx8_c }, // V_DCT = 17 - { iidtx8_c, idct8_c }, // H_DCT = 18 + { iadst8_c, idct8_c }, // FLIPADST_DCT + { idct8_c, iadst8_c }, // DCT_FLIPADST + { iadst8_c, iadst8_c }, // FLIPADST_FLIPADST + { iadst8_c, iadst8_c }, // ADST_FLIPADST + { iadst8_c, iadst8_c }, // FLIPADST_ADST + { iidtx8_c, iidtx8_c }, // IDTX + { idct8_c, iidtx8_c }, // V_DCT + { iidtx8_c, idct8_c }, // H_DCT + { iadst8_c, iidtx8_c }, // V_ADST + { iidtx8_c, iadst8_c }, // H_ADST + { iadst8_c, iidtx8_c }, // V_FLIPADST + { iidtx8_c, iadst8_c }, // H_FLIPADST #endif // CONFIG_EXT_TX }; @@ -886,26 +617,23 @@ void vp10_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride, void vp10_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int stride, int tx_type) { static const transform_2d IHT_16[] = { - { idct16_c, idct16_c }, // DCT_DCT = 0, - { iadst16_c, idct16_c }, // ADST_DCT = 1, - { idct16_c, iadst16_c }, // DCT_ADST = 2, - { iadst16_c, iadst16_c }, // ADST_ADST = 3, + { idct16_c, idct16_c }, // DCT_DCT + { iadst16_c, idct16_c }, // ADST_DCT + { idct16_c, iadst16_c }, // DCT_ADST + { iadst16_c, iadst16_c }, // ADST_ADST #if CONFIG_EXT_TX - { iadst16_c, idct16_c }, // FLIPADST_DCT = 4, - { idct16_c, iadst16_c }, // DCT_FLIPADST = 5, - { iadst16_c, iadst16_c }, // FLIPADST_FLIPADST = 6, - { iadst16_c, iadst16_c }, // ADST_FLIPADST = 7, - { iadst16_c, iadst16_c }, // FLIPADST_ADST = 8, - { idst16_c, idct16_c }, // DST_DCT = 9, - { idct16_c, idst16_c }, // DCT_DST = 10, - { idst16_c, iadst16_c }, // DST_ADST = 11, - { iadst16_c, idst16_c }, // ADST_DST = 12, - { idst16_c, iadst16_c }, // DST_FLIPADST = 13, - { iadst16_c, idst16_c }, // FLIPADST_DST = 14, - { idst16_c, idst16_c }, // DST_DST = 15 - { iidtx16_c, iidtx16_c }, // IDTX = 16 - { idct16_c, iidtx16_c }, // V_DCT = 17 - { iidtx16_c, idct16_c }, // H_DCT = 18 + { iadst16_c, idct16_c }, // FLIPADST_DCT + { idct16_c, iadst16_c }, // DCT_FLIPADST + { iadst16_c, iadst16_c }, // FLIPADST_FLIPADST + { iadst16_c, iadst16_c }, // ADST_FLIPADST + { iadst16_c, iadst16_c }, // FLIPADST_ADST + { iidtx16_c, iidtx16_c }, // IDTX + { idct16_c, iidtx16_c }, // V_DCT + { iidtx16_c, idct16_c }, // H_DCT + { iadst16_c, iidtx16_c }, // V_ADST + { iidtx16_c, iadst16_c }, // H_ADST + { iadst16_c, iidtx16_c }, // V_FLIPADST + { iidtx16_c, iadst16_c }, // H_FLIPADST #endif // CONFIG_EXT_TX }; @@ -953,25 +681,22 @@ void vp10_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int stride, void vp10_iht32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, int stride, int tx_type) { static const transform_2d IHT_32[] = { - { idct32_c, idct32_c }, // DCT_DCT = 0, - { ihalfright32_c, idct32_c }, // ADST_DCT = 1, - { idct32_c, ihalfright32_c }, // DCT_ADST = 2, - { ihalfright32_c, ihalfright32_c }, // ADST_ADST = 3, - { ihalfright32_c, idct32_c }, // FLIPADST_DCT = 4, - { idct32_c, ihalfright32_c }, // DCT_FLIPADST = 5, - { ihalfright32_c, ihalfright32_c }, // FLIPADST_FLIPADST = 6, - { ihalfright32_c, ihalfright32_c }, // ADST_FLIPADST = 7, - { ihalfright32_c, ihalfright32_c }, // FLIPADST_ADST = 8, - { ihalfcenter32_c, idct32_c }, // DST_DCT = 9, - { idct32_c, ihalfcenter32_c }, // DCT_DST = 10, - { ihalfcenter32_c, ihalfright32_c }, // DST_ADST = 11, - { ihalfright32_c, ihalfcenter32_c }, // ADST_DST = 12, - { ihalfcenter32_c, ihalfright32_c }, // DST_FLIPADST = 13, - { ihalfright32_c, ihalfcenter32_c }, // FLIPADST_DST = 14, - { ihalfcenter32_c, ihalfcenter32_c }, // DST_DST = 15 - { iidtx32_c, iidtx32_c }, // IDTX = 16 - { idct32_c, iidtx32_c }, // V_DCT = 17 - { iidtx32_c, idct32_c }, // H_DCT = 18 + { idct32_c, idct32_c }, // DCT_DCT + { ihalfright32_c, idct32_c }, // ADST_DCT + { idct32_c, ihalfright32_c }, // DCT_ADST + { ihalfright32_c, ihalfright32_c }, // ADST_ADST + { ihalfright32_c, idct32_c }, // FLIPADST_DCT + { idct32_c, ihalfright32_c }, // DCT_FLIPADST + { ihalfright32_c, ihalfright32_c }, // FLIPADST_FLIPADST + { ihalfright32_c, ihalfright32_c }, // ADST_FLIPADST + { ihalfright32_c, ihalfright32_c }, // FLIPADST_ADST + { iidtx32_c, iidtx32_c }, // IDTX + { idct32_c, iidtx32_c }, // V_DCT + { iidtx32_c, idct32_c }, // H_DCT + { ihalfright32_c, iidtx16_c }, // V_ADST + { iidtx16_c, ihalfright32_c }, // H_ADST + { ihalfright32_c, iidtx16_c }, // V_FLIPADST + { iidtx16_c, ihalfright32_c }, // H_FLIPADST }; int i, j; @@ -1098,15 +823,12 @@ void vp10_inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest, case FLIPADST_ADST: vp10_iht4x4_16_add(input, dest, stride, tx_type); break; - case DST_DST: - case DST_DCT: - case DCT_DST: - case DST_ADST: - case ADST_DST: - case FLIPADST_DST: - case DST_FLIPADST: - case H_DCT: case V_DCT: + case H_DCT: + case V_ADST: + case H_ADST: + case V_FLIPADST: + case H_FLIPADST: // Use C version since DST only exists in C code vp10_iht4x4_16_add_c(input, dest, stride, tx_type); break; @@ -1139,15 +861,12 @@ void vp10_inv_txfm_add_8x8(const tran_low_t *input, uint8_t *dest, case FLIPADST_ADST: vp10_iht8x8_64_add(input, dest, stride, tx_type); break; - case DST_DST: - case DST_DCT: - case DCT_DST: - case DST_ADST: - case ADST_DST: - case FLIPADST_DST: - case DST_FLIPADST: - case H_DCT: case V_DCT: + case H_DCT: + case V_ADST: + case H_ADST: + case V_FLIPADST: + case H_FLIPADST: // Use C version since DST only exists in C code vp10_iht8x8_64_add_c(input, dest, stride, tx_type); break; @@ -1180,15 +899,12 @@ void vp10_inv_txfm_add_16x16(const tran_low_t *input, uint8_t *dest, case FLIPADST_ADST: vp10_iht16x16_256_add(input, dest, stride, tx_type); break; - case DST_DST: - case DST_DCT: - case DCT_DST: - case DST_ADST: - case ADST_DST: - case FLIPADST_DST: - case DST_FLIPADST: - case H_DCT: case V_DCT: + case H_DCT: + case V_ADST: + case H_ADST: + case V_FLIPADST: + case H_FLIPADST: // Use C version since DST only exists in C code vp10_iht16x16_256_add_c(input, dest, stride, tx_type); break; @@ -1217,15 +933,12 @@ void vp10_inv_txfm_add_32x32(const tran_low_t *input, uint8_t *dest, case FLIPADST_FLIPADST: case ADST_FLIPADST: case FLIPADST_ADST: - case DST_DST: - case DST_DCT: - case DCT_DST: - case DST_ADST: - case ADST_DST: - case FLIPADST_DST: - case DST_FLIPADST: - case H_DCT: case V_DCT: + case H_DCT: + case V_ADST: + case H_ADST: + case V_FLIPADST: + case H_FLIPADST: vp10_iht32x32_1024_add_c(input, dest, stride, tx_type); break; case IDTX: @@ -1242,26 +955,23 @@ void vp10_inv_txfm_add_32x32(const tran_low_t *input, uint8_t *dest, void vp10_highbd_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, int stride, int tx_type, int bd) { static const highbd_transform_2d HIGH_IHT_4[] = { - { vpx_highbd_idct4_c, vpx_highbd_idct4_c }, // DCT_DCT = 0, - { vpx_highbd_iadst4_c, vpx_highbd_idct4_c }, // ADST_DCT = 1, - { vpx_highbd_idct4_c, vpx_highbd_iadst4_c }, // DCT_ADST = 2, - { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c }, // ADST_ADST = 3, + { vpx_highbd_idct4_c, vpx_highbd_idct4_c }, // DCT_DCT + { vpx_highbd_iadst4_c, vpx_highbd_idct4_c }, // ADST_DCT + { vpx_highbd_idct4_c, vpx_highbd_iadst4_c }, // DCT_ADST + { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c }, // ADST_ADST #if CONFIG_EXT_TX - { vpx_highbd_iadst4_c, vpx_highbd_idct4_c }, // FLIPADST_DCT = 4, - { vpx_highbd_idct4_c, vpx_highbd_iadst4_c }, // DCT_FLIPADST = 5, - { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c }, // FLIPADST_FLIPADST = 6, - { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c }, // ADST_FLIPADST = 7, - { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c }, // FLIPADST_ADST = 8, - { highbd_idst4_c, vpx_highbd_idct4_c }, // DST_DCT = 9, - { vpx_highbd_idct4_c, highbd_idst4_c }, // DCT_DST = 10, - { highbd_idst4_c, vpx_highbd_iadst4_c }, // DST_ADST = 11, - { vpx_highbd_iadst4_c, highbd_idst4_c }, // ADST_DST = 12, - { highbd_idst4_c, vpx_highbd_iadst4_c }, // DST_FLIPADST = 13, - { vpx_highbd_iadst4_c, highbd_idst4_c }, // FLIPADST_DST = 14, - { highbd_idst4_c, highbd_idst4_c }, // DST_DST = 15 - { highbd_iidtx4_c, highbd_iidtx4_c }, // IDTX = 16 - { vpx_highbd_idct4_c, highbd_iidtx4_c }, // V_DCT = 17 - { highbd_iidtx4_c, vpx_highbd_idct4_c }, // H_DCT = 18 + { vpx_highbd_iadst4_c, vpx_highbd_idct4_c }, // FLIPADST_DCT + { vpx_highbd_idct4_c, vpx_highbd_iadst4_c }, // DCT_FLIPADST + { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c }, // FLIPADST_FLIPADST + { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c }, // ADST_FLIPADST + { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c }, // FLIPADST_ADST + { highbd_iidtx4_c, highbd_iidtx4_c }, // IDTX + { vpx_highbd_idct4_c, highbd_iidtx4_c }, // V_DCT + { highbd_iidtx4_c, vpx_highbd_idct4_c }, // H_DCT + { vpx_highbd_iadst4_c, highbd_iidtx4_c }, // V_ADST + { highbd_iidtx4_c, vpx_highbd_iadst4_c }, // H_ADST + { vpx_highbd_iadst4_c, highbd_iidtx4_c }, // V_FLIPADST + { highbd_iidtx4_c, vpx_highbd_iadst4_c }, // H_FLIPADST #endif // CONFIG_EXT_TX }; @@ -1311,26 +1021,23 @@ void vp10_highbd_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, void vp10_highbd_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest8, int stride, int tx_type, int bd) { static const highbd_transform_2d HIGH_IHT_8[] = { - { vpx_highbd_idct8_c, vpx_highbd_idct8_c }, // DCT_DCT = 0, - { vpx_highbd_iadst8_c, vpx_highbd_idct8_c }, // ADST_DCT = 1, - { vpx_highbd_idct8_c, vpx_highbd_iadst8_c }, // DCT_ADST = 2, - { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c }, // ADST_ADST = 3, + { vpx_highbd_idct8_c, vpx_highbd_idct8_c }, // DCT_DCT + { vpx_highbd_iadst8_c, vpx_highbd_idct8_c }, // ADST_DCT + { vpx_highbd_idct8_c, vpx_highbd_iadst8_c }, // DCT_ADST + { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c }, // ADST_ADST #if CONFIG_EXT_TX - { vpx_highbd_iadst8_c, vpx_highbd_idct8_c }, // FLIPADST_DCT = 4, - { vpx_highbd_idct8_c, vpx_highbd_iadst8_c }, // DCT_FLIPADST = 5, - { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c }, // FLIPADST_FLIPADST = 6, - { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c }, // ADST_FLIPADST = 7, - { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c }, // FLIPADST_ADST = 8, - { highbd_idst8_c, vpx_highbd_idct8_c }, // DST_DCT = 9, - { vpx_highbd_idct8_c, highbd_idst8_c }, // DCT_DST = 10, - { highbd_idst8_c, vpx_highbd_iadst8_c }, // DST_ADST = 11, - { vpx_highbd_iadst8_c, highbd_idst8_c }, // ADST_DST = 12, - { highbd_idst8_c, vpx_highbd_iadst8_c }, // DST_FLIPADST = 13, - { vpx_highbd_iadst8_c, highbd_idst8_c }, // FLIPADST_DST = 14, - { highbd_idst8_c, highbd_idst8_c }, // DST_DST = 15 - { highbd_iidtx8_c, highbd_iidtx8_c }, // IDTX = 16 - { vpx_highbd_idct8_c, highbd_iidtx8_c }, // V_DCT = 17 - { highbd_iidtx8_c, vpx_highbd_idct8_c }, // H_DCT = 18 + { vpx_highbd_iadst8_c, vpx_highbd_idct8_c }, // FLIPADST_DCT + { vpx_highbd_idct8_c, vpx_highbd_iadst8_c }, // DCT_FLIPADST + { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c }, // FLIPADST_FLIPADST + { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c }, // ADST_FLIPADST + { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c }, // FLIPADST_ADST + { highbd_iidtx8_c, highbd_iidtx8_c }, // IDTX + { vpx_highbd_idct8_c, highbd_iidtx8_c }, // V_DCT + { highbd_iidtx8_c, vpx_highbd_idct8_c }, // H_DCT + { vpx_highbd_iadst8_c, highbd_iidtx8_c }, // V_ADST + { highbd_iidtx8_c, vpx_highbd_iadst8_c }, // H_ADST + { vpx_highbd_iadst8_c, highbd_iidtx8_c }, // V_FLIPADST + { highbd_iidtx8_c, vpx_highbd_iadst8_c }, // H_FLIPADST #endif // CONFIG_EXT_TX }; @@ -1380,26 +1087,23 @@ void vp10_highbd_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest8, void vp10_highbd_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest8, int stride, int tx_type, int bd) { static const highbd_transform_2d HIGH_IHT_16[] = { - { vpx_highbd_idct16_c, vpx_highbd_idct16_c }, // DCT_DCT = 0, - { vpx_highbd_iadst16_c, vpx_highbd_idct16_c }, // ADST_DCT = 1, - { vpx_highbd_idct16_c, vpx_highbd_iadst16_c }, // DCT_ADST = 2, - { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c }, // ADST_ADST = 3, + { vpx_highbd_idct16_c, vpx_highbd_idct16_c }, // DCT_DCT + { vpx_highbd_iadst16_c, vpx_highbd_idct16_c }, // ADST_DCT + { vpx_highbd_idct16_c, vpx_highbd_iadst16_c }, // DCT_ADST + { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c }, // ADST_ADST #if CONFIG_EXT_TX - { vpx_highbd_iadst16_c, vpx_highbd_idct16_c }, // FLIPADST_DCT = 4, - { vpx_highbd_idct16_c, vpx_highbd_iadst16_c }, // DCT_FLIPADST = 5, - { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c }, // FLIPADST_FLIPADST = 6, - { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c }, // ADST_FLIPADST = 7, - { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c }, // FLIPADST_ADST = 8, - { highbd_idst16_c, vpx_highbd_idct16_c }, // DST_DCT = 9, - { vpx_highbd_idct16_c, highbd_idst16_c }, // DCT_DST = 10, - { highbd_idst16_c, vpx_highbd_iadst16_c }, // DST_ADST = 11, - { vpx_highbd_iadst16_c, highbd_idst16_c }, // ADST_DST = 12, - { highbd_idst16_c, vpx_highbd_iadst16_c }, // DST_FLIPADST = 13, - { vpx_highbd_iadst16_c, highbd_idst16_c }, // FLIPADST_DST = 14, - { highbd_idst16_c, highbd_idst16_c }, // DST_DST = 15 - { highbd_iidtx16_c, highbd_iidtx16_c }, // IDTX = 16 - { vpx_highbd_idct16_c, highbd_iidtx16_c }, // V_DCT = 17 - { highbd_iidtx16_c, vpx_highbd_idct16_c }, // H_DCT = 18 + { vpx_highbd_iadst16_c, vpx_highbd_idct16_c }, // FLIPADST_DCT + { vpx_highbd_idct16_c, vpx_highbd_iadst16_c }, // DCT_FLIPADST + { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c }, // FLIPADST_FLIPADST + { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c }, // ADST_FLIPADST + { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c }, // FLIPADST_ADST + { highbd_iidtx16_c, highbd_iidtx16_c }, // IDTX + { vpx_highbd_idct16_c, highbd_iidtx16_c }, // V_DCT + { highbd_iidtx16_c, vpx_highbd_idct16_c }, // H_DCT + { vpx_highbd_iadst16_c, highbd_iidtx16_c }, // V_ADST + { highbd_iidtx16_c, vpx_highbd_iadst16_c }, // H_ADST + { vpx_highbd_iadst16_c, highbd_iidtx16_c }, // V_FLIPADST + { highbd_iidtx16_c, vpx_highbd_iadst16_c }, // H_FLIPADST #endif // CONFIG_EXT_TX }; @@ -1459,16 +1163,13 @@ void vp10_highbd_iht32x32_1024_add_c(const tran_low_t *input, uint8_t *dest8, { highbd_ihalfright32_c, highbd_ihalfright32_c }, // FLIPADST_FLIPADST { highbd_ihalfright32_c, highbd_ihalfright32_c }, // ADST_FLIPADST { highbd_ihalfright32_c, highbd_ihalfright32_c }, // FLIPADST_ADST - { highbd_ihalfcenter32_c, vpx_highbd_idct32_c }, // DST_DCT - { vpx_highbd_idct32_c, highbd_ihalfcenter32_c }, // DCT_DST - { highbd_ihalfcenter32_c, highbd_ihalfright32_c }, // DST_ADST - { highbd_ihalfright32_c, highbd_ihalfcenter32_c }, // ADST_DST - { highbd_ihalfcenter32_c, highbd_ihalfright32_c }, // DST_FLIPADST - { highbd_ihalfright32_c, highbd_ihalfcenter32_c }, // FLIPADST_DST - { highbd_ihalfcenter32_c, highbd_ihalfcenter32_c }, // DST_DST - { highbd_iidtx32_c, highbd_iidtx32_c }, // IDTX + { highbd_iidtx32_c, highbd_iidtx32_c }, // IDTX { vpx_highbd_idct32_c, highbd_iidtx32_c }, // V_DCT - { highbd_iidtx32_c, vpx_highbd_idct32_c }, // H_DCT + { highbd_iidtx32_c, vpx_highbd_idct32_c }, // H_DCT + { highbd_ihalfright32_c, highbd_iidtx32_c }, // V_ADST + { highbd_iidtx32_c, highbd_ihalfright32_c }, // H_ADST + { highbd_ihalfright32_c, highbd_iidtx32_c }, // V_FLIPADST + { highbd_iidtx32_c, highbd_ihalfright32_c }, // H_FLIPADST }; uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); @@ -1602,15 +1303,12 @@ void vp10_highbd_inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest, case FLIPADST_ADST: vp10_highbd_iht4x4_16_add(input, dest, stride, tx_type, bd); break; - case DST_DST: - case DST_DCT: - case DCT_DST: - case DST_ADST: - case ADST_DST: - case FLIPADST_DST: - case DST_FLIPADST: - case H_DCT: case V_DCT: + case H_DCT: + case V_ADST: + case H_ADST: + case V_FLIPADST: + case H_FLIPADST: // Use C version since DST only exists in C code vp10_highbd_iht4x4_16_add_c(input, dest, stride, tx_type, bd); break; @@ -1644,15 +1342,12 @@ void vp10_highbd_inv_txfm_add_8x8(const tran_low_t *input, uint8_t *dest, case FLIPADST_ADST: vp10_highbd_iht8x8_64_add(input, dest, stride, tx_type, bd); break; - case DST_DST: - case DST_DCT: - case DCT_DST: - case DST_ADST: - case ADST_DST: - case FLIPADST_DST: - case DST_FLIPADST: - case H_DCT: case V_DCT: + case H_DCT: + case V_ADST: + case H_ADST: + case V_FLIPADST: + case H_FLIPADST: // Use C version since DST only exists in C code vp10_highbd_iht8x8_64_add_c(input, dest, stride, tx_type, bd); break; @@ -1686,15 +1381,12 @@ void vp10_highbd_inv_txfm_add_16x16(const tran_low_t *input, uint8_t *dest, case FLIPADST_ADST: vp10_highbd_iht16x16_256_add(input, dest, stride, tx_type, bd); break; - case DST_DST: - case DST_DCT: - case DCT_DST: - case DST_ADST: - case ADST_DST: - case FLIPADST_DST: - case DST_FLIPADST: - case H_DCT: case V_DCT: + case H_DCT: + case V_ADST: + case H_ADST: + case V_FLIPADST: + case H_FLIPADST: // Use C version since DST only exists in C code vp10_highbd_iht16x16_256_add_c(input, dest, stride, tx_type, bd); break; @@ -1724,15 +1416,12 @@ void vp10_highbd_inv_txfm_add_32x32(const tran_low_t *input, uint8_t *dest, case FLIPADST_FLIPADST: case ADST_FLIPADST: case FLIPADST_ADST: - case DST_DST: - case DST_DCT: - case DCT_DST: - case DST_ADST: - case ADST_DST: - case FLIPADST_DST: - case DST_FLIPADST: - case H_DCT: case V_DCT: + case H_DCT: + case V_ADST: + case H_ADST: + case V_FLIPADST: + case H_FLIPADST: vp10_highbd_iht32x32_1024_add_c(input, dest, stride, tx_type, bd); break; case IDTX: diff --git a/vp10/common/scan.c b/vp10/common/scan.c index 6dc560457..2644ecf76 100644 --- a/vp10/common/scan.c +++ b/vp10/common/scan.c @@ -2882,13 +2882,10 @@ const scan_order vp10_intra_scan_orders[TX_SIZES][TX_TYPES] = { {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, - {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, - {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, - {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, - {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, - {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, - {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, - {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, + {row_scan_4x4, vp10_row_iscan_4x4, row_scan_4x4_neighbors}, + {col_scan_4x4, vp10_col_iscan_4x4, col_scan_4x4_neighbors}, + {row_scan_4x4, vp10_row_iscan_4x4, row_scan_4x4_neighbors}, + {col_scan_4x4, vp10_col_iscan_4x4, col_scan_4x4_neighbors}, {row_scan_4x4, vp10_row_iscan_4x4, row_scan_4x4_neighbors}, {col_scan_4x4, vp10_col_iscan_4x4, col_scan_4x4_neighbors}, }, { // TX_8X8 @@ -2902,13 +2899,10 @@ const scan_order vp10_intra_scan_orders[TX_SIZES][TX_TYPES] = { {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, - {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, - {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, - {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, - {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, - {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, - {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, - {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, + {row_scan_8x8, vp10_row_iscan_8x8, row_scan_8x8_neighbors}, + {col_scan_8x8, vp10_col_iscan_8x8, col_scan_8x8_neighbors}, + {row_scan_8x8, vp10_row_iscan_8x8, row_scan_8x8_neighbors}, + {col_scan_8x8, vp10_col_iscan_8x8, col_scan_8x8_neighbors}, {row_scan_8x8, vp10_row_iscan_8x8, row_scan_8x8_neighbors}, {col_scan_8x8, vp10_col_iscan_8x8, col_scan_8x8_neighbors}, }, { // TX_16X16 @@ -2930,22 +2924,12 @@ const scan_order vp10_intra_scan_orders[TX_SIZES][TX_TYPES] = { default_scan_16x16_neighbors}, {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors}, - {default_scan_16x16, vp10_default_iscan_16x16, - default_scan_16x16_neighbors}, - {default_scan_16x16, vp10_default_iscan_16x16, - default_scan_16x16_neighbors}, - {default_scan_16x16, vp10_default_iscan_16x16, - default_scan_16x16_neighbors}, - {default_scan_16x16, vp10_default_iscan_16x16, - default_scan_16x16_neighbors}, - {default_scan_16x16, vp10_default_iscan_16x16, - default_scan_16x16_neighbors}, - {default_scan_16x16, vp10_default_iscan_16x16, - default_scan_16x16_neighbors}, - {default_scan_16x16, vp10_default_iscan_16x16, - default_scan_16x16_neighbors}, - {row_scan_16x16, vp10_row_iscan_16x16, row_scan_16x16_neighbors}, - {col_scan_16x16, vp10_col_iscan_16x16, col_scan_16x16_neighbors}, + {row_scan_16x16, vp10_row_iscan_16x16, row_scan_16x16_neighbors}, + {col_scan_16x16, vp10_col_iscan_16x16, col_scan_16x16_neighbors}, + {row_scan_16x16, vp10_row_iscan_16x16, row_scan_16x16_neighbors}, + {col_scan_16x16, vp10_col_iscan_16x16, col_scan_16x16_neighbors}, + {row_scan_16x16, vp10_row_iscan_16x16, row_scan_16x16_neighbors}, + {col_scan_16x16, vp10_col_iscan_16x16, col_scan_16x16_neighbors}, }, { // TX_32X32 {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors}, @@ -2965,26 +2949,14 @@ const scan_order vp10_intra_scan_orders[TX_SIZES][TX_TYPES] = { qtr_scan_32x32_neighbors}, {qtr_scan_32x32, vp10_qtr_iscan_32x32, qtr_scan_32x32_neighbors}, - {h2_scan_32x32, vp10_h2_iscan_32x32, - h2_scan_32x32_neighbors}, - {v2_scan_32x32, vp10_v2_iscan_32x32, - v2_scan_32x32_neighbors}, - {qtr_scan_32x32, vp10_qtr_iscan_32x32, - qtr_scan_32x32_neighbors}, - {qtr_scan_32x32, vp10_qtr_iscan_32x32, - qtr_scan_32x32_neighbors}, - {qtr_scan_32x32, vp10_qtr_iscan_32x32, - qtr_scan_32x32_neighbors}, - {qtr_scan_32x32, vp10_qtr_iscan_32x32, - qtr_scan_32x32_neighbors}, - {qtr_scan_32x32, vp10_qtr_iscan_32x32, - qtr_scan_32x32_neighbors}, {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors}, - {h2_scan_32x32, vp10_h2_iscan_32x32, - h2_scan_32x32_neighbors}, - {v2_scan_32x32, vp10_v2_iscan_32x32, - v2_scan_32x32_neighbors}, + {mrow_scan_32x32, vp10_mrow_iscan_32x32, mrow_scan_32x32_neighbors}, + {mcol_scan_32x32, vp10_mcol_iscan_32x32, mcol_scan_32x32_neighbors}, + {mrow_scan_32x32, vp10_mrow_iscan_32x32, mrow_scan_32x32_neighbors}, + {mcol_scan_32x32, vp10_mcol_iscan_32x32, mcol_scan_32x32_neighbors}, + {mrow_scan_32x32, vp10_mrow_iscan_32x32, mrow_scan_32x32_neighbors}, + {mcol_scan_32x32, vp10_mcol_iscan_32x32, mcol_scan_32x32_neighbors}, } }; @@ -3000,13 +2972,10 @@ const scan_order vp10_inter_scan_orders[TX_SIZES][TX_TYPES] = { {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, - {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, - {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, - {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, - {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, - {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, - {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, - {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, + {mrow_scan_4x4, vp10_mrow_iscan_4x4, mrow_scan_4x4_neighbors}, + {mcol_scan_4x4, vp10_mcol_iscan_4x4, mcol_scan_4x4_neighbors}, + {mrow_scan_4x4, vp10_mrow_iscan_4x4, mrow_scan_4x4_neighbors}, + {mcol_scan_4x4, vp10_mcol_iscan_4x4, mcol_scan_4x4_neighbors}, {mrow_scan_4x4, vp10_mrow_iscan_4x4, mrow_scan_4x4_neighbors}, {mcol_scan_4x4, vp10_mcol_iscan_4x4, mcol_scan_4x4_neighbors}, }, { // TX_8X8 @@ -3020,13 +2989,10 @@ const scan_order vp10_inter_scan_orders[TX_SIZES][TX_TYPES] = { {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, - {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, - {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, - {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, - {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, - {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, - {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, - {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, + {mrow_scan_8x8, vp10_mrow_iscan_8x8, mrow_scan_8x8_neighbors}, + {mcol_scan_8x8, vp10_mcol_iscan_8x8, mcol_scan_8x8_neighbors}, + {mrow_scan_8x8, vp10_mrow_iscan_8x8, mrow_scan_8x8_neighbors}, + {mcol_scan_8x8, vp10_mcol_iscan_8x8, mcol_scan_8x8_neighbors}, {mrow_scan_8x8, vp10_mrow_iscan_8x8, mrow_scan_8x8_neighbors}, {mcol_scan_8x8, vp10_mcol_iscan_8x8, mcol_scan_8x8_neighbors}, }, { // TX_16X16 @@ -3050,22 +3016,12 @@ const scan_order vp10_inter_scan_orders[TX_SIZES][TX_TYPES] = { default_scan_16x16_neighbors}, {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors}, - {default_scan_16x16, vp10_default_iscan_16x16, - default_scan_16x16_neighbors}, - {default_scan_16x16, vp10_default_iscan_16x16, - default_scan_16x16_neighbors}, - {default_scan_16x16, vp10_default_iscan_16x16, - default_scan_16x16_neighbors}, - {default_scan_16x16, vp10_default_iscan_16x16, - default_scan_16x16_neighbors}, - {default_scan_16x16, vp10_default_iscan_16x16, - default_scan_16x16_neighbors}, - {default_scan_16x16, vp10_default_iscan_16x16, - default_scan_16x16_neighbors}, - {default_scan_16x16, vp10_default_iscan_16x16, - default_scan_16x16_neighbors}, - {mrow_scan_16x16, vp10_mrow_iscan_16x16, mrow_scan_16x16_neighbors}, - {mcol_scan_16x16, vp10_mcol_iscan_16x16, mcol_scan_16x16_neighbors}, + {mrow_scan_16x16, vp10_mrow_iscan_16x16, mrow_scan_16x16_neighbors}, + {mcol_scan_16x16, vp10_mcol_iscan_16x16, mcol_scan_16x16_neighbors}, + {mrow_scan_16x16, vp10_mrow_iscan_16x16, mrow_scan_16x16_neighbors}, + {mcol_scan_16x16, vp10_mcol_iscan_16x16, mcol_scan_16x16_neighbors}, + {mrow_scan_16x16, vp10_mrow_iscan_16x16, mrow_scan_16x16_neighbors}, + {mcol_scan_16x16, vp10_mcol_iscan_16x16, mcol_scan_16x16_neighbors}, }, { // TX_32X32 {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors}, @@ -3085,24 +3041,14 @@ const scan_order vp10_inter_scan_orders[TX_SIZES][TX_TYPES] = { qtr_scan_32x32_neighbors}, {qtr_scan_32x32, vp10_qtr_iscan_32x32, qtr_scan_32x32_neighbors}, - {h2_scan_32x32, vp10_h2_iscan_32x32, - h2_scan_32x32_neighbors}, - {v2_scan_32x32, vp10_v2_iscan_32x32, - v2_scan_32x32_neighbors}, - {qtr_scan_32x32, vp10_qtr_iscan_32x32, - qtr_scan_32x32_neighbors}, - {qtr_scan_32x32, vp10_qtr_iscan_32x32, - qtr_scan_32x32_neighbors}, - {qtr_scan_32x32, vp10_qtr_iscan_32x32, - qtr_scan_32x32_neighbors}, - {qtr_scan_32x32, vp10_qtr_iscan_32x32, - qtr_scan_32x32_neighbors}, - {qtr_scan_32x32, vp10_qtr_iscan_32x32, - qtr_scan_32x32_neighbors}, {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors}, {mrow_scan_32x32, vp10_mrow_iscan_32x32, mrow_scan_32x32_neighbors}, {mcol_scan_32x32, vp10_mcol_iscan_32x32, mcol_scan_32x32_neighbors}, + {mrow_scan_32x32, vp10_mrow_iscan_32x32, mrow_scan_32x32_neighbors}, + {mcol_scan_32x32, vp10_mcol_iscan_32x32, mcol_scan_32x32_neighbors}, + {mrow_scan_32x32, vp10_mrow_iscan_32x32, mrow_scan_32x32_neighbors}, + {mcol_scan_32x32, vp10_mcol_iscan_32x32, mcol_scan_32x32_neighbors}, } }; diff --git a/vp10/encoder/dct.c b/vp10/encoder/dct.c index 8a1ee201c..11d4a8e99 100644 --- a/vp10/encoder/dct.c +++ b/vp10/encoder/dct.c @@ -36,219 +36,6 @@ static INLINE void range_check(const tran_low_t *input, const int size, #endif } -#if CONFIG_EXT_TX -void fdst4(const tran_low_t *input, tran_low_t *output) { - tran_high_t step[4]; - tran_high_t temp1, temp2; - - step[0] = input[0] - input[3]; - step[1] = -input[1] + input[2]; - step[2] = -input[1] - input[2]; - step[3] = input[0] + input[3]; - - temp1 = (step[0] + step[1]) * cospi_16_64; - temp2 = (step[0] - step[1]) * cospi_16_64; - output[3] = fdct_round_shift(temp1); - output[1] = fdct_round_shift(temp2); - temp1 = step[2] * cospi_24_64 + step[3] * cospi_8_64; - temp2 = -step[2] * cospi_8_64 + step[3] * cospi_24_64; - output[2] = fdct_round_shift(temp1); - output[0] = fdct_round_shift(temp2); -} - -void fdst8(const tran_low_t *input, tran_low_t *output) { - tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; // canbe16 - tran_high_t t0, t1, t2, t3; // needs32 - tran_high_t x0, x1, x2, x3; // canbe16 - - // stage 1 - s0 = input[0] - input[7]; - s1 = -input[1] + input[6]; - s2 = input[2] - input[5]; - s3 = -input[3] + input[4]; - s4 = -input[3] - input[4]; - s5 = input[2] + input[5]; - s6 = -input[1] - input[6]; - s7 = input[0] + input[7]; - - x0 = s0 + s3; - x1 = s1 + s2; - x2 = s1 - s2; - x3 = s0 - s3; - t0 = (x0 + x1) * cospi_16_64; - t1 = (x0 - x1) * cospi_16_64; - t2 = x2 * cospi_24_64 + x3 * cospi_8_64; - t3 = -x2 * cospi_8_64 + x3 * cospi_24_64; - output[7] = fdct_round_shift(t0); - output[5] = fdct_round_shift(t2); - output[3] = fdct_round_shift(t1); - output[1] = fdct_round_shift(t3); - - // Stage 2 - t0 = (s6 - s5) * cospi_16_64; - t1 = (s6 + s5) * cospi_16_64; - t2 = fdct_round_shift(t0); - t3 = fdct_round_shift(t1); - - // Stage 3 - x0 = s4 + t2; - x1 = s4 - t2; - x2 = s7 - t3; - x3 = s7 + t3; - - // Stage 4 - t0 = x0 * cospi_28_64 + x3 * cospi_4_64; - t1 = x1 * cospi_12_64 + x2 * cospi_20_64; - t2 = x2 * cospi_12_64 + x1 * -cospi_20_64; - t3 = x3 * cospi_28_64 + x0 * -cospi_4_64; - output[6] = fdct_round_shift(t0); - output[4] = fdct_round_shift(t2); - output[2] = fdct_round_shift(t1); - output[0] = fdct_round_shift(t3); -} - -void fdst16(const tran_low_t *input, tran_low_t *output) { - tran_high_t step1[8]; // canbe16 - tran_high_t step2[8]; // canbe16 - tran_high_t step3[8]; // canbe16 - tran_high_t in[8]; // canbe16 - tran_high_t temp1, temp2; // needs32 - - // step 1 - in[0] = input[0] - input[15]; - in[1] = -input[1] + input[14]; - in[2] = input[2] - input[13]; - in[3] = -input[3] + input[12]; - in[4] = input[4] - input[11]; - in[5] = -input[5] + input[10]; - in[6] = input[6] - input[ 9]; - in[7] = -input[7] + input[ 8]; - - step1[0] = -input[7] - input[ 8]; - step1[1] = input[6] + input[ 9]; - step1[2] = -input[5] - input[10]; - step1[3] = input[4] + input[11]; - step1[4] = -input[3] - input[12]; - step1[5] = input[2] + input[13]; - step1[6] = -input[1] - input[14]; - step1[7] = input[0] + input[15]; - - // fdct8(step, step); - { - tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; // canbe16 - tran_high_t t0, t1, t2, t3; // needs32 - tran_high_t x0, x1, x2, x3; // canbe16 - - // stage 1 - s0 = in[0] + in[7]; - s1 = in[1] + in[6]; - s2 = in[2] + in[5]; - s3 = in[3] + in[4]; - s4 = in[3] - in[4]; - s5 = in[2] - in[5]; - s6 = in[1] - in[6]; - s7 = in[0] - in[7]; - - // fdct4(step, step); - x0 = s0 + s3; - x1 = s1 + s2; - x2 = s1 - s2; - x3 = s0 - s3; - t0 = (x0 + x1) * cospi_16_64; - t1 = (x0 - x1) * cospi_16_64; - t2 = x3 * cospi_8_64 + x2 * cospi_24_64; - t3 = x3 * cospi_24_64 - x2 * cospi_8_64; - output[15] = fdct_round_shift(t0); - output[11] = fdct_round_shift(t2); - output[7] = fdct_round_shift(t1); - output[3] = fdct_round_shift(t3); - - // Stage 2 - t0 = (s6 - s5) * cospi_16_64; - t1 = (s6 + s5) * cospi_16_64; - t2 = fdct_round_shift(t0); - t3 = fdct_round_shift(t1); - - // Stage 3 - x0 = s4 + t2; - x1 = s4 - t2; - x2 = s7 - t3; - x3 = s7 + t3; - - // Stage 4 - t0 = x0 * cospi_28_64 + x3 * cospi_4_64; - t1 = x1 * cospi_12_64 + x2 * cospi_20_64; - t2 = x2 * cospi_12_64 + x1 * -cospi_20_64; - t3 = x3 * cospi_28_64 + x0 * -cospi_4_64; - output[13] = fdct_round_shift(t0); - output[9] = fdct_round_shift(t2); - output[5] = fdct_round_shift(t1); - output[1] = fdct_round_shift(t3); - } - - // step 2 - temp1 = (step1[5] - step1[2]) * cospi_16_64; - temp2 = (step1[4] - step1[3]) * cospi_16_64; - step2[2] = fdct_round_shift(temp1); - step2[3] = fdct_round_shift(temp2); - temp1 = (step1[4] + step1[3]) * cospi_16_64; - temp2 = (step1[5] + step1[2]) * cospi_16_64; - step2[4] = fdct_round_shift(temp1); - step2[5] = fdct_round_shift(temp2); - - // step 3 - step3[0] = step1[0] + step2[3]; - step3[1] = step1[1] + step2[2]; - step3[2] = step1[1] - step2[2]; - step3[3] = step1[0] - step2[3]; - step3[4] = step1[7] - step2[4]; - step3[5] = step1[6] - step2[5]; - step3[6] = step1[6] + step2[5]; - step3[7] = step1[7] + step2[4]; - - // step 4 - temp1 = step3[1] * -cospi_8_64 + step3[6] * cospi_24_64; - temp2 = step3[2] * cospi_24_64 + step3[5] * cospi_8_64; - step2[1] = fdct_round_shift(temp1); - step2[2] = fdct_round_shift(temp2); - temp1 = step3[2] * cospi_8_64 - step3[5] * cospi_24_64; - temp2 = step3[1] * cospi_24_64 + step3[6] * cospi_8_64; - step2[5] = fdct_round_shift(temp1); - step2[6] = fdct_round_shift(temp2); - - // step 5 - step1[0] = step3[0] + step2[1]; - step1[1] = step3[0] - step2[1]; - step1[2] = step3[3] + step2[2]; - step1[3] = step3[3] - step2[2]; - step1[4] = step3[4] - step2[5]; - step1[5] = step3[4] + step2[5]; - step1[6] = step3[7] - step2[6]; - step1[7] = step3[7] + step2[6]; - - // step 6 - temp1 = step1[0] * cospi_30_64 + step1[7] * cospi_2_64; - temp2 = step1[1] * cospi_14_64 + step1[6] * cospi_18_64; - output[14] = fdct_round_shift(temp1); - output[6] = fdct_round_shift(temp2); - - temp1 = step1[2] * cospi_22_64 + step1[5] * cospi_10_64; - temp2 = step1[3] * cospi_6_64 + step1[4] * cospi_26_64; - output[10] = fdct_round_shift(temp1); - output[2] = fdct_round_shift(temp2); - - temp1 = step1[3] * -cospi_26_64 + step1[4] * cospi_6_64; - temp2 = step1[2] * -cospi_10_64 + step1[5] * cospi_22_64; - output[12] = fdct_round_shift(temp1); - output[4] = fdct_round_shift(temp2); - - temp1 = step1[1] * -cospi_18_64 + step1[6] * cospi_14_64; - temp2 = step1[0] * -cospi_2_64 + step1[7] * cospi_30_64; - output[8] = fdct_round_shift(temp1); - output[0] = fdct_round_shift(temp2); -} -#endif // CONFIG_EXT_TX - static void fdct4(const tran_low_t *input, tran_low_t *output) { tran_high_t temp; tran_low_t step[4]; @@ -1236,22 +1023,6 @@ static void fidtx32(const tran_low_t *input, tran_low_t *output) { output[i] = input[i] * 4; } -// For use in lieu of DST -static void fhalfcenter32(const tran_low_t *input, tran_low_t *output) { - int i; - tran_low_t inputhalf[16]; - for (i = 0; i < 8; ++i) { - output[16 + i] = input[i] * 4; - output[24 + i] = input[24 + i] * 4; - } - // Multiply input by sqrt(2) - for (i = 0; i < 16; ++i) { - inputhalf[i] = (tran_low_t)fdct_round_shift(input[i + 8] * Sqrt2); - } - fdct16(inputhalf, output); - // Note overall scaling factor is 4 times orthogonal -} - // For use in lieu of ADST static void fhalfright32(const tran_low_t *input, tran_low_t *output) { int i; @@ -1334,25 +1105,22 @@ static void maybe_flip_input(const int16_t **src, int *src_stride, int l, case ADST_DCT: case DCT_ADST: case ADST_ADST: - case DST_DST: - case DCT_DST: - case DST_DCT: - case DST_ADST: - case ADST_DST: case IDTX: - case H_DCT: case V_DCT: + case H_DCT: + case V_ADST: + case H_ADST: break; case FLIPADST_DCT: case FLIPADST_ADST: - case FLIPADST_DST: + case V_FLIPADST: copy_flipud(*src, *src_stride, l, buff, l); *src = buff; *src_stride = l; break; case DCT_FLIPADST: case ADST_FLIPADST: - case DST_FLIPADST: + case H_FLIPADST: copy_fliplr(*src, *src_stride, l, buff, l); *src = buff; *src_stride = l; @@ -1370,98 +1138,86 @@ static void maybe_flip_input(const int16_t **src, int *src_stride, int l, #endif // CONFIG_EXT_TX static const transform_2d FHT_4[] = { - { fdct4, fdct4 }, // DCT_DCT = 0, - { fadst4, fdct4 }, // ADST_DCT = 1, - { fdct4, fadst4 }, // DCT_ADST = 2, - { fadst4, fadst4 }, // ADST_ADST = 3, + { fdct4, fdct4 }, // DCT_DCT + { fadst4, fdct4 }, // ADST_DCT + { fdct4, fadst4 }, // DCT_ADST + { fadst4, fadst4 }, // ADST_ADST #if CONFIG_EXT_TX - { fadst4, fdct4 }, // FLIPADST_DCT = 4, - { fdct4, fadst4 }, // DCT_FLIPADST = 5, - { fadst4, fadst4 }, // FLIPADST_FLIPADST = 6, - { fadst4, fadst4 }, // ADST_FLIPADST = 7, - { fadst4, fadst4 }, // FLIPADST_ADST = 8, - { fdst4, fdct4 }, // DST_DCT = 9, - { fdct4, fdst4 }, // DCT_DST = 10, - { fdst4, fadst4 }, // DST_ADST = 11, - { fadst4, fdst4 }, // ADST_DST = 12, - { fdst4, fadst4 }, // DST_FLIPADST = 13, - { fadst4, fdst4 }, // FLIPADST_DST = 14, - { fdst4, fdst4 }, // DST_DST = 15 - { fidtx4, fidtx4 }, // IDTX = 16 - { fdct4, fidtx4 }, // V_DCT = 17 - { fidtx4, fdct4 }, // H_DCT = 18 + { fadst4, fdct4 }, // FLIPADST_DCT + { fdct4, fadst4 }, // DCT_FLIPADST + { fadst4, fadst4 }, // FLIPADST_FLIPADST + { fadst4, fadst4 }, // ADST_FLIPADST + { fadst4, fadst4 }, // FLIPADST_ADST + { fidtx4, fidtx4 }, // IDTX + { fdct4, fidtx4 }, // V_DCT + { fidtx4, fdct4 }, // H_DCT + { fadst4, fidtx4 }, // V_ADST + { fidtx4, fadst4 }, // H_ADST + { fadst4, fidtx4 }, // V_FLIPADST + { fidtx4, fadst4 }, // H_FLIPADST #endif // CONFIG_EXT_TX }; static const transform_2d FHT_8[] = { - { fdct8, fdct8 }, // DCT_DCT = 0, - { fadst8, fdct8 }, // ADST_DCT = 1, - { fdct8, fadst8 }, // DCT_ADST = 2, - { fadst8, fadst8 }, // ADST_ADST = 3, + { fdct8, fdct8 }, // DCT_DCT + { fadst8, fdct8 }, // ADST_DCT + { fdct8, fadst8 }, // DCT_ADST + { fadst8, fadst8 }, // ADST_ADST #if CONFIG_EXT_TX - { fadst8, fdct8 }, // FLIPADST_DCT = 4, - { fdct8, fadst8 }, // DCT_FLIPADST = 5, - { fadst8, fadst8 }, // FLIPADST_FLIPADST = 6, - { fadst8, fadst8 }, // ADST_FLIPADST = 7, - { fadst8, fadst8 }, // FLIPADST_ADST = 8, - { fdst8, fdct8 }, // DST_DCT = 9, - { fdct8, fdst8 }, // DCT_DST = 10, - { fdst8, fadst8 }, // DST_ADST = 11, - { fadst8, fdst8 }, // ADST_DST = 12, - { fdst8, fadst8 }, // DST_FLIPADST = 13, - { fadst8, fdst8 }, // FLIPADST_DST = 14, - { fdst8, fdst8 }, // DST_DST = 15 - { fidtx8, fidtx8 }, // IDTX = 16 - { fdct8, fidtx8 }, // V_DCT = 17 - { fidtx8, fdct8 }, // H_DCT = 18 + { fadst8, fdct8 }, // FLIPADST_DCT + { fdct8, fadst8 }, // DCT_FLIPADST + { fadst8, fadst8 }, // FLIPADST_FLIPADST + { fadst8, fadst8 }, // ADST_FLIPADST + { fadst8, fadst8 }, // FLIPADST_ADST + { fidtx8, fidtx8 }, // IDTX + { fdct8, fidtx8 }, // V_DCT + { fidtx8, fdct8 }, // H_DCT + { fadst8, fidtx8 }, // V_ADST + { fidtx8, fadst8 }, // H_ADST + { fadst8, fidtx8 }, // V_FLIPADST + { fidtx8, fadst8 }, // H_FLIPADST #endif // CONFIG_EXT_TX }; static const transform_2d FHT_16[] = { - { fdct16, fdct16 }, // DCT_DCT = 0, - { fadst16, fdct16 }, // ADST_DCT = 1, - { fdct16, fadst16 }, // DCT_ADST = 2, - { fadst16, fadst16 }, // ADST_ADST = 3, + { fdct16, fdct16 }, // DCT_DCT + { fadst16, fdct16 }, // ADST_DCT + { fdct16, fadst16 }, // DCT_ADST + { fadst16, fadst16 }, // ADST_ADST #if CONFIG_EXT_TX - { fadst16, fdct16 }, // FLIPADST_DCT = 4, - { fdct16, fadst16 }, // DCT_FLIPADST = 5, - { fadst16, fadst16 }, // FLIPADST_FLIPADST = 6, - { fadst16, fadst16 }, // ADST_FLIPADST = 7, - { fadst16, fadst16 }, // FLIPADST_ADST = 8, - { fdst16, fdct16 }, // DST_DCT = 9, - { fdct16, fdst16 }, // DCT_DST = 10, - { fdst16, fadst16 }, // DST_ADST = 11, - { fadst16, fdst16 }, // ADST_DST = 12, - { fdst16, fadst16 }, // DST_FLIPADST = 13, - { fadst16, fdst16 }, // FLIPADST_DST = 14, - { fdst16, fdst16 }, // DST_DST = 15 - { fidtx16, fidtx16 }, // IDTX = 16 - { fdct16, fidtx16 }, // V_DCT = 17 - { fidtx16, fdct16 }, // H_DCT = 18 + { fadst16, fdct16 }, // FLIPADST_DCT + { fdct16, fadst16 }, // DCT_FLIPADST + { fadst16, fadst16 }, // FLIPADST_FLIPADST + { fadst16, fadst16 }, // ADST_FLIPADST + { fadst16, fadst16 }, // FLIPADST_ADST + { fidtx16, fidtx16 }, // IDTX + { fdct16, fidtx16 }, // V_DCT + { fidtx16, fdct16 }, // H_DCT + { fadst16, fidtx16 }, // V_ADST + { fidtx16, fadst16 }, // H_ADST + { fadst16, fidtx16 }, // V_FLIPADST + { fidtx16, fadst16 }, // H_FLIPADST #endif // CONFIG_EXT_TX }; #if CONFIG_EXT_TX static const transform_2d FHT_32[] = { - { fdct32, fdct32 }, // DCT_DCT = 0, - { fhalfright32, fdct32 }, // ADST_DCT = 1, - { fdct32, fhalfright32 }, // DCT_ADST = 2, - { fhalfright32, fhalfright32 }, // ADST_ADST = 3, - { fhalfright32, fdct32 }, // FLIPADST_DCT = 4, - { fdct32, fhalfright32 }, // DCT_FLIPADST = 5, - { fhalfright32, fhalfright32 }, // FLIPADST_FLIPADST = 6, - { fhalfright32, fhalfright32 }, // ADST_FLIPADST = 7, - { fhalfright32, fhalfright32 }, // FLIPADST_ADST = 8, - { fhalfcenter32, fdct32 }, // DST_DCT = 9, - { fdct32, fhalfcenter32 }, // DCT_DST = 10, - { fhalfcenter32, fhalfright32 }, // DST_ADST = 11, - { fhalfright32, fhalfcenter32 }, // ADST_DST = 12, - { fhalfcenter32, fhalfright32 }, // DST_FLIPADST = 13, - { fhalfright32, fhalfcenter32 }, // FLIPADST_DST = 14, - { fhalfcenter32, fhalfcenter32 }, // DST_DST = 15 - { fidtx32, fidtx32 }, // IDTX = 16 - { fdct32, fidtx32 }, // V_DCT = 17 - { fidtx32, fdct32 }, // H_DCT = 18 + { fdct32, fdct32 }, // DCT_DCT + { fhalfright32, fdct32 }, // ADST_DCT + { fdct32, fhalfright32 }, // DCT_ADST + { fhalfright32, fhalfright32 }, // ADST_ADST + { fhalfright32, fdct32 }, // FLIPADST_DCT + { fdct32, fhalfright32 }, // DCT_FLIPADST + { fhalfright32, fhalfright32 }, // FLIPADST_FLIPADST + { fhalfright32, fhalfright32 }, // ADST_FLIPADST + { fhalfright32, fhalfright32 }, // FLIPADST_ADST + { fidtx32, fidtx32 }, // IDTX + { fdct32, fidtx32 }, // V_DCT + { fidtx32, fdct32 }, // H_DCT + { fhalfright32, fidtx32 }, // V_ADST + { fidtx32, fhalfright32 }, // H_ADST + { fhalfright32, fidtx32 }, // V_FLIPADST + { fidtx32, fhalfright32 }, // H_FLIPADST }; #endif // CONFIG_EXT_TX diff --git a/vp10/encoder/encodeframe.c b/vp10/encoder/encodeframe.c index 2c47be9b5..c0045c2a4 100644 --- a/vp10/encoder/encodeframe.c +++ b/vp10/encoder/encodeframe.c @@ -5314,11 +5314,6 @@ static void rd_supertx_sb(VP10_COMP *cpi, ThreadData *td, #if CONFIG_EXT_TX if (!ext_tx_used_inter[ext_tx_set][tx_type]) continue; - if (ext_tx_set == 1 && - tx_type >= DST_ADST && tx_type < IDTX && *best_tx == DCT_DCT) { - tx_type = IDTX - 1; - continue; - } #else if (tx_size >= TX_32X32 && tx_type != DCT_DCT) continue; diff --git a/vp10/encoder/hybrid_fwd_txfm.c b/vp10/encoder/hybrid_fwd_txfm.c index faedb4349..785fef088 100644 --- a/vp10/encoder/hybrid_fwd_txfm.c +++ b/vp10/encoder/hybrid_fwd_txfm.c @@ -54,17 +54,14 @@ void vp10_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff, case FLIPADST_FLIPADST: case ADST_FLIPADST: case FLIPADST_ADST: - case DST_DST: - case DCT_DST: - case DST_DCT: - case DST_ADST: - case ADST_DST: - case DST_FLIPADST: - case FLIPADST_DST: vp10_fht4x4(src_diff, coeff, diff_stride, tx_type); break; - case H_DCT: case V_DCT: + case H_DCT: + case V_ADST: + case H_ADST: + case V_FLIPADST: + case H_FLIPADST: vp10_fht4x4_c(src_diff, coeff, diff_stride, tx_type); break; case IDTX: @@ -96,17 +93,14 @@ static void fwd_txfm_8x8(const int16_t *src_diff, tran_low_t *coeff, case FLIPADST_FLIPADST: case ADST_FLIPADST: case FLIPADST_ADST: - case DST_DST: - case DCT_DST: - case DST_DCT: - case DST_ADST: - case ADST_DST: - case DST_FLIPADST: - case FLIPADST_DST: vp10_fht8x8(src_diff, coeff, diff_stride, tx_type); break; - case H_DCT: case V_DCT: + case H_DCT: + case V_ADST: + case H_ADST: + case V_FLIPADST: + case H_FLIPADST: vp10_fht8x8_c(src_diff, coeff, diff_stride, tx_type); break; case IDTX: @@ -138,17 +132,14 @@ static void fwd_txfm_16x16(const int16_t *src_diff, tran_low_t *coeff, case FLIPADST_FLIPADST: case ADST_FLIPADST: case FLIPADST_ADST: - case DST_DST: - case DCT_DST: - case DST_DCT: - case DST_ADST: - case ADST_DST: - case DST_FLIPADST: - case FLIPADST_DST: vp10_fht16x16(src_diff, coeff, diff_stride, tx_type); break; - case H_DCT: case V_DCT: + case H_DCT: + case V_ADST: + case H_ADST: + case V_FLIPADST: + case H_FLIPADST: vp10_fht16x16_c(src_diff, coeff, diff_stride, tx_type); break; case IDTX: @@ -180,17 +171,14 @@ static void fwd_txfm_32x32(int rd_transform, const int16_t *src_diff, case FLIPADST_FLIPADST: case ADST_FLIPADST: case FLIPADST_ADST: - case DST_DST: - case DCT_DST: - case DST_DCT: - case DST_ADST: - case ADST_DST: - case DST_FLIPADST: - case FLIPADST_DST: vp10_fht32x32_c(src_diff, coeff, diff_stride, tx_type); break; - case H_DCT: case V_DCT: + case H_DCT: + case V_ADST: + case H_ADST: + case V_FLIPADST: + case H_FLIPADST: vp10_fht32x32_c(src_diff, coeff, diff_stride, tx_type); break; case IDTX: @@ -227,15 +215,12 @@ void vp10_highbd_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff, case FLIPADST_ADST: vp10_highbd_fht4x4(src_diff, coeff, diff_stride, tx_type); break; - case DST_DST: - case DCT_DST: - case DST_DCT: - case DST_ADST: - case ADST_DST: - case DST_FLIPADST: - case FLIPADST_DST: - case H_DCT: case V_DCT: + case H_DCT: + case V_ADST: + case H_ADST: + case V_FLIPADST: + case H_FLIPADST: vp10_highbd_fht4x4_c(src_diff, coeff, diff_stride, tx_type); break; case IDTX: @@ -270,15 +255,12 @@ static void highbd_fwd_txfm_8x8(const int16_t *src_diff, tran_low_t *coeff, case FLIPADST_ADST: vp10_highbd_fht8x8(src_diff, coeff, diff_stride, tx_type); break; - case DST_DST: - case DCT_DST: - case DST_DCT: - case DST_ADST: - case ADST_DST: - case DST_FLIPADST: - case FLIPADST_DST: - case H_DCT: case V_DCT: + case H_DCT: + case V_ADST: + case H_ADST: + case V_FLIPADST: + case H_FLIPADST: // Use C version since DST exists only in C vp10_highbd_fht8x8_c(src_diff, coeff, diff_stride, tx_type); break; @@ -314,15 +296,12 @@ static void highbd_fwd_txfm_16x16(const int16_t *src_diff, tran_low_t *coeff, case FLIPADST_ADST: vp10_highbd_fht16x16(src_diff, coeff, diff_stride, tx_type); break; - case DST_DST: - case DCT_DST: - case DST_DCT: - case DST_ADST: - case ADST_DST: - case DST_FLIPADST: - case FLIPADST_DST: - case H_DCT: case V_DCT: + case H_DCT: + case V_ADST: + case H_ADST: + case V_FLIPADST: + case H_FLIPADST: // Use C version since DST exists only in C vp10_highbd_fht16x16_c(src_diff, coeff, diff_stride, tx_type); break; @@ -355,15 +334,12 @@ static void highbd_fwd_txfm_32x32(int rd_transform, const int16_t *src_diff, case FLIPADST_FLIPADST: case ADST_FLIPADST: case FLIPADST_ADST: - case DST_DST: - case DCT_DST: - case DST_DCT: - case DST_ADST: - case ADST_DST: - case DST_FLIPADST: - case FLIPADST_DST: - case H_DCT: case V_DCT: + case H_DCT: + case V_ADST: + case H_ADST: + case V_FLIPADST: + case H_FLIPADST: vp10_highbd_fht32x32_c(src_diff, coeff, diff_stride, tx_type); break; case IDTX: diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c index 3d9259196..9b1928a71 100644 --- a/vp10/encoder/rdopt.c +++ b/vp10/encoder/rdopt.c @@ -1349,11 +1349,6 @@ static void choose_largest_tx_size(VP10_COMP *cpi, MACROBLOCK *x, if (cpi->sf.tx_type_search > 0) { if (!do_tx_type_search(tx_type, prune)) continue; - } else if (ext_tx_set == 1 && - tx_type >= DST_ADST && tx_type < IDTX && - best_tx_type == DCT_DCT) { - tx_type = IDTX - 1; - continue; } } else { if (!ALLOW_INTRA_EXT_TX && bs >= BLOCK_8X8) { @@ -1362,12 +1357,6 @@ static void choose_largest_tx_size(VP10_COMP *cpi, MACROBLOCK *x, } if (!ext_tx_used_intra[ext_tx_set][tx_type]) continue; - if (ext_tx_set == 1 && - tx_type >= DST_ADST && tx_type < IDTX && - best_tx_type == DCT_DCT) { - tx_type = IDTX - 1; - continue; - } } mbmi->tx_type = tx_type; @@ -1533,11 +1522,6 @@ static void choose_tx_size_from_rd(VP10_COMP *cpi, MACROBLOCK *x, if (cpi->sf.tx_type_search > 0) { if (!do_tx_type_search(tx_type, prune)) continue; - } else if (ext_tx_set == 1 && - tx_type >= DST_ADST && tx_type < IDTX && - best_tx_type == DCT_DCT) { - tx_type = IDTX - 1; - continue; } } else { if (!ALLOW_INTRA_EXT_TX && bs >= BLOCK_8X8) { @@ -1546,12 +1530,6 @@ static void choose_tx_size_from_rd(VP10_COMP *cpi, MACROBLOCK *x, } if (!ext_tx_used_intra[ext_tx_set][tx_type]) continue; - if (ext_tx_set == 1 && - tx_type >= DST_ADST && tx_type < IDTX && - best_tx_type == DCT_DCT) { - tx_type = IDTX - 1; - break; - } } mbmi->tx_type = tx_type; txfm_rd_in_plane(x, @@ -3169,11 +3147,6 @@ static void select_tx_type_yrd(const VP10_COMP *cpi, MACROBLOCK *x, if (cpi->sf.tx_type_search > 0) { if (!do_tx_type_search(tx_type, prune)) continue; - } else if (ext_tx_set == 1 && - tx_type >= DST_ADST && tx_type < IDTX && - best_tx_type == DCT_DCT) { - tx_type = IDTX - 1; - continue; } } else { if (!ALLOW_INTRA_EXT_TX && bsize >= BLOCK_8X8) { @@ -3182,12 +3155,6 @@ static void select_tx_type_yrd(const VP10_COMP *cpi, MACROBLOCK *x, } if (!ext_tx_used_intra[ext_tx_set][tx_type]) continue; - if (ext_tx_set == 1 && - tx_type >= DST_ADST && tx_type < IDTX && - best_tx_type == DCT_DCT) { - tx_type = IDTX - 1; - break; - } } mbmi->tx_type = tx_type; diff --git a/vp10/encoder/x86/dct_sse2.c b/vp10/encoder/x86/dct_sse2.c index 8ff7c9c79..8a55425a0 100644 --- a/vp10/encoder/x86/dct_sse2.c +++ b/vp10/encoder/x86/dct_sse2.c @@ -172,42 +172,6 @@ static void fadst4_sse2(__m128i *in) { transpose_4x4(in); } -#if CONFIG_EXT_TX -static void fdst4_sse2(__m128i *in) { - const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t) cospi_16_64); - const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); - const __m128i k__cospi_p08_m24 = pair_set_epi16(cospi_8_64, -cospi_24_64); - const __m128i k__cospi_p24_p08 = pair_set_epi16(cospi_24_64, cospi_8_64); - const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING); - - __m128i u[4], v[4]; - - u[0] = _mm_unpacklo_epi16(in[0], in[1]); - u[1] = _mm_unpacklo_epi16(in[3], in[2]); - - v[0] = _mm_add_epi16(u[0], u[1]); - v[1] = _mm_sub_epi16(u[0], u[1]); - - u[0] = _mm_madd_epi16(v[0], k__cospi_p24_p08); - u[1] = _mm_madd_epi16(v[1], k__cospi_p16_p16); - u[2] = _mm_madd_epi16(v[0], k__cospi_p08_m24); - u[3] = _mm_madd_epi16(v[1], k__cospi_p16_m16); - - v[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING); - v[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING); - v[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING); - v[3] = _mm_add_epi32(u[3], k__DCT_CONST_ROUNDING); - u[0] = _mm_srai_epi32(v[0], DCT_CONST_BITS); - u[1] = _mm_srai_epi32(v[1], DCT_CONST_BITS); - u[2] = _mm_srai_epi32(v[2], DCT_CONST_BITS); - u[3] = _mm_srai_epi32(v[3], DCT_CONST_BITS); - - in[0] = _mm_packs_epi32(u[0], u[2]); - in[1] = _mm_packs_epi32(u[1], u[3]); - transpose_4x4(in); -} -#endif // CONFIG_EXT_TX - void vp10_fht4x4_sse2(const int16_t *input, tran_low_t *output, int stride, int tx_type) { __m128i in[4]; @@ -265,48 +229,6 @@ void vp10_fht4x4_sse2(const int16_t *input, tran_low_t *output, fadst4_sse2(in); write_buffer_4x4(output, in); break; - case DST_DST: - load_buffer_4x4(input, in, stride, 0, 0); - fdst4_sse2(in); - fdst4_sse2(in); - write_buffer_4x4(output, in); - break; - case DCT_DST: - load_buffer_4x4(input, in, stride, 0, 0); - fdct4_sse2(in); - fdst4_sse2(in); - write_buffer_4x4(output, in); - break; - case DST_DCT: - load_buffer_4x4(input, in, stride, 0, 0); - fdst4_sse2(in); - fdct4_sse2(in); - write_buffer_4x4(output, in); - break; - case DST_ADST: - load_buffer_4x4(input, in, stride, 0, 0); - fdst4_sse2(in); - fadst4_sse2(in); - write_buffer_4x4(output, in); - break; - case ADST_DST: - load_buffer_4x4(input, in, stride, 0, 0); - fadst4_sse2(in); - fdst4_sse2(in); - write_buffer_4x4(output, in); - break; - case DST_FLIPADST: - load_buffer_4x4(input, in, stride, 0, 1); - fdst4_sse2(in); - fadst4_sse2(in); - write_buffer_4x4(output, in); - break; - case FLIPADST_DST: - load_buffer_4x4(input, in, stride, 1, 0); - fadst4_sse2(in); - fdst4_sse2(in); - write_buffer_4x4(output, in); - break; #endif // CONFIG_EXT_TX default: assert(0); @@ -1288,155 +1210,6 @@ static void fadst8_sse2(__m128i *in) { array_transpose_8x8(in, in); } -#if CONFIG_EXT_TX -static void fdst8_sse2(__m128i *in) { - // Constants - const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t) cospi_16_64); - const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); - const __m128i k__cospi_m16_m16 = _mm_set1_epi16((int16_t) -cospi_16_64); - const __m128i k__cospi_m16_p16 = pair_set_epi16(-cospi_16_64, cospi_16_64); - const __m128i k__cospi_m24_p08 = pair_set_epi16(-cospi_24_64, cospi_8_64); - const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64); - const __m128i k__cospi_p28_p04 = pair_set_epi16(cospi_28_64, cospi_4_64); - const __m128i k__cospi_m04_p28 = pair_set_epi16(-cospi_4_64, cospi_28_64); - const __m128i k__cospi_p20_p12 = pair_set_epi16(cospi_20_64, cospi_12_64); - const __m128i k__cospi_m12_p20 = pair_set_epi16(-cospi_12_64, cospi_20_64); - const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING); - - __m128i s0, s1, s2, s3, s4, s5, s6, s7; - __m128i x0, x1, x2, x3, x4, x5, x6, x7; - __m128i t0, t1, t2, t3, t4, t5, t6, t7; - - s0 = _mm_sub_epi16(in[0], in[7]); - s1 = _mm_sub_epi16(in[1], in[6]); // -s1 - s2 = _mm_sub_epi16(in[2], in[5]); - s3 = _mm_sub_epi16(in[3], in[4]); // -s3 - s4 = _mm_add_epi16(in[3], in[4]); // -s4 - s5 = _mm_add_epi16(in[2], in[5]); - s6 = _mm_add_epi16(in[1], in[6]); // -s6 - s7 = _mm_add_epi16(in[0], in[7]); - - x0 = _mm_sub_epi16(s0, s3); - x1 = _mm_sub_epi16(s1, s2); // -x1 - x2 = _mm_add_epi16(s1, s2); // -x2 - x3 = _mm_add_epi16(s0, s3); - - // Interleave - t0 = _mm_unpacklo_epi16(x0, x1); - t1 = _mm_unpackhi_epi16(x0, x1); - t2 = _mm_unpacklo_epi16(x2, x3); - t3 = _mm_unpackhi_epi16(x2, x3); - - // Perform butterfly multiplication/addition - x0 = _mm_madd_epi16(t0, k__cospi_p16_m16); - x1 = _mm_madd_epi16(t1, k__cospi_p16_m16); - x2 = _mm_madd_epi16(t0, k__cospi_p16_p16); - x3 = _mm_madd_epi16(t1, k__cospi_p16_p16); - x4 = _mm_madd_epi16(t2, k__cospi_m24_p08); - x5 = _mm_madd_epi16(t3, k__cospi_m24_p08); - x6 = _mm_madd_epi16(t2, k__cospi_p08_p24); - x7 = _mm_madd_epi16(t3, k__cospi_p08_p24); - - // Rounding - t0 = _mm_add_epi32(x0, k__DCT_CONST_ROUNDING); - t1 = _mm_add_epi32(x1, k__DCT_CONST_ROUNDING); - t2 = _mm_add_epi32(x2, k__DCT_CONST_ROUNDING); - t3 = _mm_add_epi32(x3, k__DCT_CONST_ROUNDING); - t4 = _mm_add_epi32(x4, k__DCT_CONST_ROUNDING); - t5 = _mm_add_epi32(x5, k__DCT_CONST_ROUNDING); - t6 = _mm_add_epi32(x6, k__DCT_CONST_ROUNDING); - t7 = _mm_add_epi32(x7, k__DCT_CONST_ROUNDING); - // Shift - x0 = _mm_srai_epi32(t0, DCT_CONST_BITS); - x1 = _mm_srai_epi32(t1, DCT_CONST_BITS); - x2 = _mm_srai_epi32(t2, DCT_CONST_BITS); - x3 = _mm_srai_epi32(t3, DCT_CONST_BITS); - x4 = _mm_srai_epi32(t4, DCT_CONST_BITS); - x5 = _mm_srai_epi32(t5, DCT_CONST_BITS); - x6 = _mm_srai_epi32(t6, DCT_CONST_BITS); - x7 = _mm_srai_epi32(t7, DCT_CONST_BITS); - - // Pack 32b integer to 16b with signed saturation - in[7] = _mm_packs_epi32(x0, x1); - in[5] = _mm_packs_epi32(x4, x5); - in[3] = _mm_packs_epi32(x2, x3); - in[1] = _mm_packs_epi32(x6, x7); - - // Interleave - s0 = _mm_unpacklo_epi16(s6, s5); - s1 = _mm_unpackhi_epi16(s6, s5); - - // Perform butterfly multiplication/addition - x0 = _mm_madd_epi16(s0, k__cospi_m16_m16); - x1 = _mm_madd_epi16(s1, k__cospi_m16_m16); - x2 = _mm_madd_epi16(s0, k__cospi_m16_p16); - x3 = _mm_madd_epi16(s1, k__cospi_m16_p16); - - // Rounding - t0 = _mm_add_epi32(x0, k__DCT_CONST_ROUNDING); - t1 = _mm_add_epi32(x1, k__DCT_CONST_ROUNDING); - t2 = _mm_add_epi32(x2, k__DCT_CONST_ROUNDING); - t3 = _mm_add_epi32(x3, k__DCT_CONST_ROUNDING); - - // Shift - x0 = _mm_srai_epi32(t0, DCT_CONST_BITS); - x1 = _mm_srai_epi32(t1, DCT_CONST_BITS); - x2 = _mm_srai_epi32(t2, DCT_CONST_BITS); - x3 = _mm_srai_epi32(t3, DCT_CONST_BITS); - - // Pack 32b integer to 16b with signed saturation - t2 = _mm_packs_epi32(x0, x1); - t3 = _mm_packs_epi32(x2, x3); - - x0 = _mm_sub_epi16(t2, s4); - x1 = _mm_add_epi16(t2, s4); // -x1 - x2 = _mm_sub_epi16(s7, t3); - x3 = _mm_add_epi16(s7, t3); - - s0 = _mm_unpacklo_epi16(x0, x3); - s1 = _mm_unpackhi_epi16(x0, x3); - s2 = _mm_unpacklo_epi16(x1, x2); - s3 = _mm_unpackhi_epi16(x1, x2); - - t0 = _mm_madd_epi16(s0, k__cospi_p28_p04); - t1 = _mm_madd_epi16(s1, k__cospi_p28_p04); - t2 = _mm_madd_epi16(s2, k__cospi_m12_p20); - t3 = _mm_madd_epi16(s3, k__cospi_m12_p20); - t4 = _mm_madd_epi16(s2, k__cospi_p20_p12); - t5 = _mm_madd_epi16(s3, k__cospi_p20_p12); - t6 = _mm_madd_epi16(s0, k__cospi_m04_p28); - t7 = _mm_madd_epi16(s1, k__cospi_m04_p28); - - // Rounding - x0 = _mm_add_epi32(t0, k__DCT_CONST_ROUNDING); - x1 = _mm_add_epi32(t1, k__DCT_CONST_ROUNDING); - x2 = _mm_add_epi32(t2, k__DCT_CONST_ROUNDING); - x3 = _mm_add_epi32(t3, k__DCT_CONST_ROUNDING); - x4 = _mm_add_epi32(t4, k__DCT_CONST_ROUNDING); - x5 = _mm_add_epi32(t5, k__DCT_CONST_ROUNDING); - x6 = _mm_add_epi32(t6, k__DCT_CONST_ROUNDING); - x7 = _mm_add_epi32(t7, k__DCT_CONST_ROUNDING); - // Shift - s0 = _mm_srai_epi32(x0, DCT_CONST_BITS); - s1 = _mm_srai_epi32(x1, DCT_CONST_BITS); - s2 = _mm_srai_epi32(x2, DCT_CONST_BITS); - s3 = _mm_srai_epi32(x3, DCT_CONST_BITS); - s4 = _mm_srai_epi32(x4, DCT_CONST_BITS); - s5 = _mm_srai_epi32(x5, DCT_CONST_BITS); - s6 = _mm_srai_epi32(x6, DCT_CONST_BITS); - s7 = _mm_srai_epi32(x7, DCT_CONST_BITS); - - in[6] = _mm_packs_epi32(s0, s1); - in[4] = _mm_packs_epi32(s4, s5); - in[2] = _mm_packs_epi32(s2, s3); - in[0] = _mm_packs_epi32(s6, s7); - - // coeffs: [x3 x2 x1 x0, x7 x6 x5 x4] - // Transpose - array_transpose_8x8(in, in); -} -#endif // CONFIG_EXT_TX - void vp10_fht8x8_sse2(const int16_t *input, tran_low_t *output, int stride, int tx_type) { __m128i in[8]; @@ -1502,55 +1275,6 @@ void vp10_fht8x8_sse2(const int16_t *input, tran_low_t *output, right_shift_8x8(in, 1); write_buffer_8x8(output, in, 8); break; - case DST_DST: - load_buffer_8x8(input, in, stride, 0, 0); - fdst8_sse2(in); - fdst8_sse2(in); - right_shift_8x8(in, 1); - write_buffer_8x8(output, in, 8); - break; - case DCT_DST: - load_buffer_8x8(input, in, stride, 0, 0); - fdct8_sse2(in); - fdst8_sse2(in); - right_shift_8x8(in, 1); - write_buffer_8x8(output, in, 8); - break; - case DST_DCT: - load_buffer_8x8(input, in, stride, 0, 0); - fdst8_sse2(in); - fdct8_sse2(in); - right_shift_8x8(in, 1); - write_buffer_8x8(output, in, 8); - break; - case DST_ADST: - load_buffer_8x8(input, in, stride, 0, 0); - fdst8_sse2(in); - fadst8_sse2(in); - right_shift_8x8(in, 1); - write_buffer_8x8(output, in, 8); - break; - case ADST_DST: - load_buffer_8x8(input, in, stride, 0, 0); - fadst8_sse2(in); - fdst8_sse2(in); - right_shift_8x8(in, 1); - write_buffer_8x8(output, in, 8); - break; - case DST_FLIPADST: - load_buffer_8x8(input, in, stride, 0, 1); - fdst8_sse2(in); - fadst8_sse2(in); - right_shift_8x8(in, 1); - write_buffer_8x8(output, in, 8); - break; - case FLIPADST_DST: - load_buffer_8x8(input, in, stride, 1, 0); - fadst8_sse2(in); - fdst8_sse2(in); - right_shift_8x8(in, 1); - write_buffer_8x8(output, in, 8); - break; #endif // CONFIG_EXT_TX default: assert(0); @@ -2420,351 +2144,6 @@ static void fadst16_8col(__m128i *in) { in[15] = _mm_sub_epi16(kZero, s[1]); } -#if CONFIG_EXT_TX -static void fdst16_8col(__m128i *in) { - const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); - const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t) cospi_16_64); - const __m128i k__cospi_m24_p08 = pair_set_epi16(-cospi_24_64, cospi_8_64); - const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64); - - const __m128i k__cospi_m16_m16 = _mm_set1_epi16((int16_t) -cospi_16_64); - const __m128i k__cospi_m16_p16 = pair_set_epi16(-cospi_16_64, cospi_16_64); - const __m128i k__cospi_m28_p04 = pair_set_epi16(-cospi_28_64, cospi_4_64); - const __m128i k__cospi_m12_p20 = pair_set_epi16(-cospi_12_64, cospi_20_64); - const __m128i k__cospi_p20_p12 = pair_set_epi16(cospi_20_64, cospi_12_64); - const __m128i k__cospi_p04_p28 = pair_set_epi16(cospi_4_64, cospi_28_64); - - const __m128i k__cospi_m08_m24 = pair_set_epi16(-cospi_8_64, -cospi_24_64); - const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64); - - const __m128i k__cospi_m30_p02 = pair_set_epi16(-cospi_30_64, cospi_2_64); - const __m128i k__cospi_m14_p18 = pair_set_epi16(-cospi_14_64, cospi_18_64); - const __m128i k__cospi_m22_p10 = pair_set_epi16(-cospi_22_64, cospi_10_64); - const __m128i k__cospi_m06_p26 = pair_set_epi16(-cospi_6_64, cospi_26_64); - const __m128i k__cospi_p26_p06 = pair_set_epi16(cospi_26_64, cospi_6_64); - const __m128i k__cospi_p10_p22 = pair_set_epi16(cospi_10_64, cospi_22_64); - const __m128i k__cospi_p18_p14 = pair_set_epi16(cospi_18_64, cospi_14_64); - const __m128i k__cospi_p02_p30 = pair_set_epi16(cospi_2_64, cospi_30_64); - - const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING); - - __m128i u0, u1, u2, u3, u4, u5, u6, u7; - __m128i v0, v1, v2, v3, v4, v5, v6, v7; - __m128i s0, s1, s2, s3, s4, s5, s6, s7; - __m128i x0, x1, x2, x3, t0, t1, t2, t3; - __m128i y0, y1, y2, y3, y4, y5, y6, y7; - __m128i w0, w1, w2, w3, w4, w5, w6, w7; - - // (1) - u0 = _mm_sub_epi16(in[0], in[15]); - v7 = _mm_add_epi16(in[0], in[15]); - - u1 = _mm_sub_epi16(in[1], in[14]); // -u1 - v6 = _mm_add_epi16(in[1], in[14]); // -v6 - - u2 = _mm_sub_epi16(in[2], in[13]); - v5 = _mm_add_epi16(in[2], in[13]); - - u3 = _mm_sub_epi16(in[3], in[12]); // -u3 - v4 = _mm_add_epi16(in[3], in[12]); // -v4 - - u4 = _mm_sub_epi16(in[4], in[11]); - v3 = _mm_add_epi16(in[4], in[11]); - - u5 = _mm_sub_epi16(in[5], in[10]); // -u5 - v2 = _mm_add_epi16(in[5], in[10]); // -v2 - - u6 = _mm_sub_epi16(in[6], in[9]); - v1 = _mm_add_epi16(in[6], in[9]); - - u7 = _mm_sub_epi16(in[7], in[8]); // -u7 - v0 = _mm_add_epi16(in[7], in[8]); // -v0 - - s0 = _mm_sub_epi16(u0, u7); - s1 = _mm_sub_epi16(u1, u6); // -s1 - s2 = _mm_sub_epi16(u2, u5); - s3 = _mm_sub_epi16(u3, u4); // -s3 - s4 = _mm_add_epi16(u3, u4); // -s4 - s5 = _mm_add_epi16(u2, u5); - s6 = _mm_add_epi16(u1, u6); // -s6 - s7 = _mm_add_epi16(u0, u7); - - x0 = _mm_sub_epi16(s0, s3); - x1 = _mm_sub_epi16(s1, s2); // -x1 - x2 = _mm_add_epi16(s1, s2); // -x2 - x3 = _mm_add_epi16(s0, s3); - - y0 = _mm_unpacklo_epi16(x0, x1); - y1 = _mm_unpackhi_epi16(x0, x1); - y2 = _mm_unpacklo_epi16(x2, x3); - y3 = _mm_unpackhi_epi16(x2, x3); - - t0 = _mm_madd_epi16(y0, k__cospi_p16_m16); - t1 = _mm_madd_epi16(y1, k__cospi_p16_m16); - t2 = _mm_madd_epi16(y0, k__cospi_p16_p16); - t3 = _mm_madd_epi16(y1, k__cospi_p16_p16); - x0 = _mm_madd_epi16(y2, k__cospi_m24_p08); - x1 = _mm_madd_epi16(y3, k__cospi_m24_p08); - x2 = _mm_madd_epi16(y2, k__cospi_p08_p24); - x3 = _mm_madd_epi16(y3, k__cospi_p08_p24); - - y0 = _mm_add_epi32(t0, k__DCT_CONST_ROUNDING); - y1 = _mm_add_epi32(t1, k__DCT_CONST_ROUNDING); - y2 = _mm_add_epi32(t2, k__DCT_CONST_ROUNDING); - y3 = _mm_add_epi32(t3, k__DCT_CONST_ROUNDING); - y4 = _mm_add_epi32(x0, k__DCT_CONST_ROUNDING); - y5 = _mm_add_epi32(x1, k__DCT_CONST_ROUNDING); - y6 = _mm_add_epi32(x2, k__DCT_CONST_ROUNDING); - y7 = _mm_add_epi32(x3, k__DCT_CONST_ROUNDING); - - t0 = _mm_srai_epi32(y0, DCT_CONST_BITS); - t1 = _mm_srai_epi32(y1, DCT_CONST_BITS); - t2 = _mm_srai_epi32(y2, DCT_CONST_BITS); - t3 = _mm_srai_epi32(y3, DCT_CONST_BITS); - x0 = _mm_srai_epi32(y4, DCT_CONST_BITS); - x1 = _mm_srai_epi32(y5, DCT_CONST_BITS); - x2 = _mm_srai_epi32(y6, DCT_CONST_BITS); - x3 = _mm_srai_epi32(y7, DCT_CONST_BITS); - - in[15] = _mm_packs_epi32(t0, t1); - in[11] = _mm_packs_epi32(x0, x1); - in[7] = _mm_packs_epi32(t2, t3); - in[3] = _mm_packs_epi32(x2, x3); - - // (2) - t0 = _mm_unpacklo_epi16(s6, s5); - t1 = _mm_unpackhi_epi16(s6, s5); - - y0 = _mm_madd_epi16(t0, k__cospi_m16_m16); - y1 = _mm_madd_epi16(t1, k__cospi_m16_m16); - y2 = _mm_madd_epi16(t0, k__cospi_m16_p16); - y3 = _mm_madd_epi16(t1, k__cospi_m16_p16); - - x0 = _mm_add_epi32(y0, k__DCT_CONST_ROUNDING); - x1 = _mm_add_epi32(y1, k__DCT_CONST_ROUNDING); - x2 = _mm_add_epi32(y2, k__DCT_CONST_ROUNDING); - x3 = _mm_add_epi32(y3, k__DCT_CONST_ROUNDING); - - y4 = _mm_srai_epi32(x0, DCT_CONST_BITS); - y5 = _mm_srai_epi32(x1, DCT_CONST_BITS); - y6 = _mm_srai_epi32(x2, DCT_CONST_BITS); - y7 = _mm_srai_epi32(x3, DCT_CONST_BITS); - - t2 = _mm_packs_epi32(y4, y5); - t3 = _mm_packs_epi32(y6, y7); - - x0 = _mm_sub_epi16(s4, t2); // -x0 - x1 = _mm_add_epi16(s4, t2); // -x1 - x2 = _mm_sub_epi16(s7, t3); - x3 = _mm_add_epi16(s7, t3); - - y0 = _mm_unpacklo_epi16(x0, x3); - y1 = _mm_unpackhi_epi16(x0, x3); - y2 = _mm_unpacklo_epi16(x1, x2); - y3 = _mm_unpackhi_epi16(x1, x2); - - w0 = _mm_madd_epi16(y0, k__cospi_m28_p04); - w1 = _mm_madd_epi16(y1, k__cospi_m28_p04); - w2 = _mm_madd_epi16(y2, k__cospi_m12_p20); - w3 = _mm_madd_epi16(y3, k__cospi_m12_p20); - w4 = _mm_madd_epi16(y2, k__cospi_p20_p12); - w5 = _mm_madd_epi16(y3, k__cospi_p20_p12); - w6 = _mm_madd_epi16(y0, k__cospi_p04_p28); - w7 = _mm_madd_epi16(y1, k__cospi_p04_p28); - - u0 = _mm_add_epi32(w0, k__DCT_CONST_ROUNDING); - u1 = _mm_add_epi32(w1, k__DCT_CONST_ROUNDING); - u2 = _mm_add_epi32(w2, k__DCT_CONST_ROUNDING); - u3 = _mm_add_epi32(w3, k__DCT_CONST_ROUNDING); - u4 = _mm_add_epi32(w4, k__DCT_CONST_ROUNDING); - u5 = _mm_add_epi32(w5, k__DCT_CONST_ROUNDING); - u6 = _mm_add_epi32(w6, k__DCT_CONST_ROUNDING); - u7 = _mm_add_epi32(w7, k__DCT_CONST_ROUNDING); - - y0 = _mm_srai_epi32(u0, DCT_CONST_BITS); - y1 = _mm_srai_epi32(u1, DCT_CONST_BITS); - y2 = _mm_srai_epi32(u2, DCT_CONST_BITS); - y3 = _mm_srai_epi32(u3, DCT_CONST_BITS); - y4 = _mm_srai_epi32(u4, DCT_CONST_BITS); - y5 = _mm_srai_epi32(u5, DCT_CONST_BITS); - y6 = _mm_srai_epi32(u6, DCT_CONST_BITS); - y7 = _mm_srai_epi32(u7, DCT_CONST_BITS); - - in[13] = _mm_packs_epi32(y0, y1); - in[9] = _mm_packs_epi32(y4, y5); - in[5] = _mm_packs_epi32(y2, y3); - in[1] = _mm_packs_epi32(y6, y7); - - // (3) - y0 = _mm_unpacklo_epi16(v5, v2); - y1 = _mm_unpackhi_epi16(v5, v2); - y2 = _mm_unpacklo_epi16(v4, v3); - y3 = _mm_unpackhi_epi16(v4, v3); - - u0 = _mm_madd_epi16(y0, k__cospi_p16_p16); - u1 = _mm_madd_epi16(y1, k__cospi_p16_p16); - u2 = _mm_madd_epi16(y2, k__cospi_m16_m16); - u3 = _mm_madd_epi16(y3, k__cospi_m16_m16); - u4 = _mm_madd_epi16(y2, k__cospi_m16_p16); - u5 = _mm_madd_epi16(y3, k__cospi_m16_p16); - u6 = _mm_madd_epi16(y0, k__cospi_p16_m16); - u7 = _mm_madd_epi16(y1, k__cospi_p16_m16); - - w0 = _mm_add_epi32(u0, k__DCT_CONST_ROUNDING); - w1 = _mm_add_epi32(u1, k__DCT_CONST_ROUNDING); - w2 = _mm_add_epi32(u2, k__DCT_CONST_ROUNDING); - w3 = _mm_add_epi32(u3, k__DCT_CONST_ROUNDING); - w4 = _mm_add_epi32(u4, k__DCT_CONST_ROUNDING); - w5 = _mm_add_epi32(u5, k__DCT_CONST_ROUNDING); - w6 = _mm_add_epi32(u6, k__DCT_CONST_ROUNDING); - w7 = _mm_add_epi32(u7, k__DCT_CONST_ROUNDING); - - s0 = _mm_srai_epi32(w0, DCT_CONST_BITS); - s1 = _mm_srai_epi32(w1, DCT_CONST_BITS); - s2 = _mm_srai_epi32(w2, DCT_CONST_BITS); - s3 = _mm_srai_epi32(w3, DCT_CONST_BITS); - s4 = _mm_srai_epi32(w4, DCT_CONST_BITS); - s5 = _mm_srai_epi32(w5, DCT_CONST_BITS); - s6 = _mm_srai_epi32(w6, DCT_CONST_BITS); - s7 = _mm_srai_epi32(w7, DCT_CONST_BITS); - - y2 = _mm_packs_epi32(s0, s1); - y3 = _mm_packs_epi32(s2, s3); - y4 = _mm_packs_epi32(s4, s5); - y5 = _mm_packs_epi32(s6, s7); - - // step 3 - w0 = _mm_sub_epi16(v0, y3); // -w0 - w1 = _mm_add_epi16(v1, y2); - w2 = _mm_sub_epi16(v1, y2); - w3 = _mm_add_epi16(v0, y3); // -w3 - w4 = _mm_sub_epi16(v7, y4); - w5 = _mm_add_epi16(v6, y5); // -w5 - w6 = _mm_sub_epi16(v6, y5); // -w6 - w7 = _mm_add_epi16(v7, y4); - - // step 4 - x0 = _mm_unpacklo_epi16(w1, w6); - x1 = _mm_unpackhi_epi16(w1, w6); - x2 = _mm_unpacklo_epi16(w2, w5); - x3 = _mm_unpackhi_epi16(w2, w5); - - u0 = _mm_madd_epi16(x0, k__cospi_m08_m24); - u1 = _mm_madd_epi16(x1, k__cospi_m08_m24); - u2 = _mm_madd_epi16(x2, k__cospi_p24_m08); - u3 = _mm_madd_epi16(x3, k__cospi_p24_m08); - u4 = _mm_madd_epi16(x2, k__cospi_p08_p24); - u5 = _mm_madd_epi16(x3, k__cospi_p08_p24); - u6 = _mm_madd_epi16(x0, k__cospi_p24_m08); - u7 = _mm_madd_epi16(x1, k__cospi_p24_m08); - - s0 = _mm_add_epi32(u0, k__DCT_CONST_ROUNDING); - s1 = _mm_add_epi32(u1, k__DCT_CONST_ROUNDING); - s2 = _mm_add_epi32(u2, k__DCT_CONST_ROUNDING); - s3 = _mm_add_epi32(u3, k__DCT_CONST_ROUNDING); - s4 = _mm_add_epi32(u4, k__DCT_CONST_ROUNDING); - s5 = _mm_add_epi32(u5, k__DCT_CONST_ROUNDING); - s6 = _mm_add_epi32(u6, k__DCT_CONST_ROUNDING); - s7 = _mm_add_epi32(u7, k__DCT_CONST_ROUNDING); - - u0 = _mm_srai_epi32(s0, DCT_CONST_BITS); - u1 = _mm_srai_epi32(s1, DCT_CONST_BITS); - u2 = _mm_srai_epi32(s2, DCT_CONST_BITS); - u3 = _mm_srai_epi32(s3, DCT_CONST_BITS); - u4 = _mm_srai_epi32(s4, DCT_CONST_BITS); - u5 = _mm_srai_epi32(s5, DCT_CONST_BITS); - u6 = _mm_srai_epi32(s6, DCT_CONST_BITS); - u7 = _mm_srai_epi32(s7, DCT_CONST_BITS); - - y1 = _mm_packs_epi32(u0, u1); - y2 = _mm_packs_epi32(u2, u3); - y5 = _mm_packs_epi32(u4, u5); - y6 = _mm_packs_epi32(u6, u7); - - // step 5 - v0 = _mm_sub_epi16(w0, y1); // -v0 - v1 = _mm_add_epi16(w0, y1); // -v1 - v2 = _mm_sub_epi16(w3, y2); // -v2 - v3 = _mm_add_epi16(w3, y2); // -v3 - v4 = _mm_sub_epi16(w4, y5); - v5 = _mm_add_epi16(w4, y5); - v6 = _mm_sub_epi16(w7, y6); - v7 = _mm_add_epi16(w7, y6); - - u0 = _mm_unpacklo_epi16(v0, v7); - u1 = _mm_unpackhi_epi16(v0, v7); - u2 = _mm_unpacklo_epi16(v1, v6); - u3 = _mm_unpackhi_epi16(v1, v6); - u4 = _mm_unpacklo_epi16(v2, v5); - u5 = _mm_unpackhi_epi16(v2, v5); - u6 = _mm_unpacklo_epi16(v3, v4); - u7 = _mm_unpackhi_epi16(v3, v4); - - s0 = _mm_madd_epi16(u0, k__cospi_m30_p02); // x0 - s1 = _mm_madd_epi16(u1, k__cospi_m30_p02); - s2 = _mm_madd_epi16(u2, k__cospi_m14_p18); // x1 - s3 = _mm_madd_epi16(u3, k__cospi_m14_p18); - s4 = _mm_madd_epi16(u4, k__cospi_m22_p10); // x2 - s5 = _mm_madd_epi16(u5, k__cospi_m22_p10); - s6 = _mm_madd_epi16(u6, k__cospi_m06_p26); // x3 - s7 = _mm_madd_epi16(u7, k__cospi_m06_p26); - - w0 = _mm_madd_epi16(u6, k__cospi_p26_p06); // x4 - w1 = _mm_madd_epi16(u7, k__cospi_p26_p06); - w2 = _mm_madd_epi16(u4, k__cospi_p10_p22); // x5 - w3 = _mm_madd_epi16(u5, k__cospi_p10_p22); - w4 = _mm_madd_epi16(u2, k__cospi_p18_p14); // x6 - w5 = _mm_madd_epi16(u3, k__cospi_p18_p14); - w6 = _mm_madd_epi16(u0, k__cospi_p02_p30); // x7 - w7 = _mm_madd_epi16(u1, k__cospi_p02_p30); - - v0 = _mm_add_epi32(s0, k__DCT_CONST_ROUNDING); - v1 = _mm_add_epi32(s1, k__DCT_CONST_ROUNDING); - v2 = _mm_add_epi32(s2, k__DCT_CONST_ROUNDING); - v3 = _mm_add_epi32(s3, k__DCT_CONST_ROUNDING); - v4 = _mm_add_epi32(s4, k__DCT_CONST_ROUNDING); - v5 = _mm_add_epi32(s5, k__DCT_CONST_ROUNDING); - v6 = _mm_add_epi32(s6, k__DCT_CONST_ROUNDING); - v7 = _mm_add_epi32(s7, k__DCT_CONST_ROUNDING); - - y0 = _mm_add_epi32(w0, k__DCT_CONST_ROUNDING); - y1 = _mm_add_epi32(w1, k__DCT_CONST_ROUNDING); - y2 = _mm_add_epi32(w2, k__DCT_CONST_ROUNDING); - y3 = _mm_add_epi32(w3, k__DCT_CONST_ROUNDING); - y4 = _mm_add_epi32(w4, k__DCT_CONST_ROUNDING); - y5 = _mm_add_epi32(w5, k__DCT_CONST_ROUNDING); - y6 = _mm_add_epi32(w6, k__DCT_CONST_ROUNDING); - y7 = _mm_add_epi32(w7, k__DCT_CONST_ROUNDING); - - u0 = _mm_srai_epi32(v0, DCT_CONST_BITS); - u1 = _mm_srai_epi32(v1, DCT_CONST_BITS); - u2 = _mm_srai_epi32(v2, DCT_CONST_BITS); - u3 = _mm_srai_epi32(v3, DCT_CONST_BITS); - u4 = _mm_srai_epi32(v4, DCT_CONST_BITS); - u5 = _mm_srai_epi32(v5, DCT_CONST_BITS); - u6 = _mm_srai_epi32(v6, DCT_CONST_BITS); - u7 = _mm_srai_epi32(v7, DCT_CONST_BITS); - - s0 = _mm_srai_epi32(y0, DCT_CONST_BITS); - s1 = _mm_srai_epi32(y1, DCT_CONST_BITS); - s2 = _mm_srai_epi32(y2, DCT_CONST_BITS); - s3 = _mm_srai_epi32(y3, DCT_CONST_BITS); - s4 = _mm_srai_epi32(y4, DCT_CONST_BITS); - s5 = _mm_srai_epi32(y5, DCT_CONST_BITS); - s6 = _mm_srai_epi32(y6, DCT_CONST_BITS); - s7 = _mm_srai_epi32(y7, DCT_CONST_BITS); - - in[14] = _mm_packs_epi32(u0, u1); - in[6] = _mm_packs_epi32(u2, u3); - in[10] = _mm_packs_epi32(u4, u5); - in[2] = _mm_packs_epi32(u6, u7); - in[12] = _mm_packs_epi32(s0, s1); - in[4] = _mm_packs_epi32(s2, s3); - in[8] = _mm_packs_epi32(s4, s5); - in[0] = _mm_packs_epi32(s6, s7); -} -#endif // CONFIG_EXT_TX - static void fdct16_sse2(__m128i *in0, __m128i *in1) { fdct16_8col(in0); fdct16_8col(in1); @@ -2777,14 +2156,6 @@ static void fadst16_sse2(__m128i *in0, __m128i *in1) { array_transpose_16x16(in0, in1); } -#if CONFIG_EXT_TX -static void fdst16_sse2(__m128i *in0, __m128i *in1) { - fdst16_8col(in0); - fdst16_8col(in1); - array_transpose_16x16(in0, in1); -} -#endif // CONFIG_EXT_TX - void vp10_fht16x16_sse2(const int16_t *input, tran_low_t *output, int stride, int tx_type) { __m128i in0[16], in1[16]; @@ -2850,55 +2221,6 @@ void vp10_fht16x16_sse2(const int16_t *input, tran_low_t *output, fadst16_sse2(in0, in1); write_buffer_16x16(output, in0, in1, 16); break; - case DST_DST: - load_buffer_16x16(input, in0, in1, stride, 0, 0); - fdst16_sse2(in0, in1); - right_shift_16x16(in0, in1); - fdst16_sse2(in0, in1); - write_buffer_16x16(output, in0, in1, 16); - break; - case DCT_DST: - load_buffer_16x16(input, in0, in1, stride, 0, 0); - fdct16_sse2(in0, in1); - right_shift_16x16(in0, in1); - fdst16_sse2(in0, in1); - write_buffer_16x16(output, in0, in1, 16); - break; - case DST_DCT: - load_buffer_16x16(input, in0, in1, stride, 0, 0); - fdst16_sse2(in0, in1); - right_shift_16x16(in0, in1); - fdct16_sse2(in0, in1); - write_buffer_16x16(output, in0, in1, 16); - break; - case DST_ADST: - load_buffer_16x16(input, in0, in1, stride, 0, 0); - fdst16_sse2(in0, in1); - right_shift_16x16(in0, in1); - fadst16_sse2(in0, in1); - write_buffer_16x16(output, in0, in1, 16); - break; - case ADST_DST: - load_buffer_16x16(input, in0, in1, stride, 0, 0); - fadst16_sse2(in0, in1); - right_shift_16x16(in0, in1); - fdst16_sse2(in0, in1); - write_buffer_16x16(output, in0, in1, 16); - break; - case DST_FLIPADST: - load_buffer_16x16(input, in0, in1, stride, 0, 1); - fdst16_sse2(in0, in1); - right_shift_16x16(in0, in1); - fadst16_sse2(in0, in1); - write_buffer_16x16(output, in0, in1, 16); - break; - case FLIPADST_DST: - load_buffer_16x16(input, in0, in1, stride, 1, 0); - fadst16_sse2(in0, in1); - right_shift_16x16(in0, in1); - fdst16_sse2(in0, in1); - write_buffer_16x16(output, in0, in1, 16); - break; #endif // CONFIG_EXT_TX default: assert(0);