From: Linfeng Zhang Date: Wed, 10 May 2017 18:52:32 +0000 (-0700) Subject: Update specializations of idct functions X-Git-Tag: v1.7.0~480^2 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=764b3b80904fe6a7f5cf5679e06e5acb030a288e;p=libvpx Update specializations of idct functions Introduced append situation in Commit 0178d97 which could be confusing. Clean a little bit and add some comments. Change-Id: I69ad336f805aca7ce9d45515b8cd237423fadbb2 --- diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl index 3126ae6c8..5f9da7520 100644 --- a/vpx_dsp/vpx_dsp_rtcd_defs.pl +++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl @@ -565,63 +565,61 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { if (vpx_config("CONFIG_VP9") eq "yes") { add_proto qw/void vpx_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - add_proto qw/void vpx_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - add_proto qw/void vpx_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - add_proto qw/void vpx_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - add_proto qw/void vpx_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - add_proto qw/void vpx_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - add_proto qw/void vpx_idct16x16_38_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - add_proto qw/void vpx_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - add_proto qw/void vpx_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - add_proto qw/void vpx_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - add_proto qw/void vpx_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - add_proto qw/void vpx_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - add_proto qw/void vpx_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - add_proto qw/void vpx_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - add_proto qw/void vpx_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int stride"; if (vpx_config("CONFIG_EMULATE_HARDWARE") ne "yes") { + # Note that there are more specializations appended when CONFIG_VP9_HIGHBITDEPTH is off. specialize qw/vpx_idct4x4_16_add neon sse2/; - specialize qw/vpx_idct4x4_1_add neon sse2/; - specialize qw/vpx_idct8x8_64_add neon sse2 ssse3/; - specialize qw/vpx_idct8x8_12_add neon sse2 ssse3/; - specialize qw/vpx_idct8x8_1_add neon sse2/; - specialize qw/vpx_idct16x16_256_add neon sse2/; - specialize qw/vpx_idct16x16_38_add neon sse2/; $vpx_idct16x16_38_add_sse2=vpx_idct16x16_256_add_sse2; - specialize qw/vpx_idct16x16_10_add neon sse2/; - specialize qw/vpx_idct16x16_1_add neon sse2/; - specialize qw/vpx_idct32x32_1024_add neon sse2 ssse3/; - specialize qw/vpx_idct32x32_135_add neon sse2 ssse3/; $vpx_idct32x32_135_add_sse2=vpx_idct32x32_1024_add_sse2; - specialize qw/vpx_idct32x32_34_add neon sse2 ssse3/; - specialize qw/vpx_idct32x32_1_add neon sse2/; + + if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") ne "yes") { + # Note that these specializations appends to the above ones. + specialize qw/vpx_idct4x4_16_add dspr2 msa/; + specialize qw/vpx_idct4x4_1_add dspr2 msa/; + specialize qw/vpx_idct8x8_64_add dspr2 msa/; + specialize qw/vpx_idct8x8_12_add dspr2 msa/; + specialize qw/vpx_idct8x8_1_add dspr2 msa/; + specialize qw/vpx_idct16x16_256_add dspr2 msa/; + specialize qw/vpx_idct16x16_38_add dspr2 msa/; + $vpx_idct16x16_38_add_dspr2=vpx_idct16x16_256_add_dspr2; + $vpx_idct16x16_38_add_msa=vpx_idct16x16_256_add_msa; + specialize qw/vpx_idct16x16_10_add dspr2 msa/; + specialize qw/vpx_idct16x16_1_add dspr2 msa/; + specialize qw/vpx_idct32x32_1024_add dspr2 msa/; + specialize qw/vpx_idct32x32_135_add dspr2 msa/; + $vpx_idct32x32_135_add_dspr2=vpx_idct32x32_1024_add_dspr2; + $vpx_idct32x32_135_add_msa=vpx_idct32x32_1024_add_msa; + specialize qw/vpx_idct32x32_34_add dspr2 msa/; + specialize qw/vpx_idct32x32_1_add dspr2 msa/; + specialize qw/vpx_iwht4x4_16_add msa sse2/; + specialize qw/vpx_iwht4x4_1_add msa/; + } # !CONFIG_VP9_HIGHBITDEPTH } # !CONFIG_EMULATE_HARDWARE if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { @@ -630,95 +628,41 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { specialize qw/vpx_iwht4x4_16_add sse2/; add_proto qw/void vpx_highbd_idct4x4_16_add/, "const tran_low_t *input, uint16_t *dest, int stride, int bd"; - add_proto qw/void vpx_highbd_idct4x4_1_add/, "const tran_low_t *input, uint16_t *dest, int stride, int bd"; specialize qw/vpx_highbd_idct4x4_1_add neon/; add_proto qw/void vpx_highbd_idct8x8_64_add/, "const tran_low_t *input, uint16_t *dest, int stride, int bd"; - add_proto qw/void vpx_highbd_idct8x8_12_add/, "const tran_low_t *input, uint16_t *dest, int stride, int bd"; - add_proto qw/void vpx_highbd_idct8x8_1_add/, "const tran_low_t *input, uint16_t *dest, int stride, int bd"; specialize qw/vpx_highbd_idct8x8_1_add neon/; add_proto qw/void vpx_highbd_idct16x16_256_add/, "const tran_low_t *input, uint16_t *dest, int stride, int bd"; - add_proto qw/void vpx_highbd_idct16x16_38_add/, "const tran_low_t *input, uint16_t *dest, int stride, int bd"; - add_proto qw/void vpx_highbd_idct16x16_10_add/, "const tran_low_t *input, uint16_t *dest, int stride, int bd"; - add_proto qw/void vpx_highbd_idct16x16_1_add/, "const tran_low_t *input, uint16_t *dest, int stride, int bd"; specialize qw/vpx_highbd_idct16x16_1_add neon/; add_proto qw/void vpx_highbd_idct32x32_1024_add/, "const tran_low_t *input, uint16_t *dest, int stride, int bd"; - add_proto qw/void vpx_highbd_idct32x32_135_add/, "const tran_low_t *input, uint16_t *dest, int stride, int bd"; - add_proto qw/void vpx_highbd_idct32x32_34_add/, "const tran_low_t *input, uint16_t *dest, int stride, int bd"; - add_proto qw/void vpx_highbd_idct32x32_1_add/, "const tran_low_t *input, uint16_t *dest, int stride, int bd"; specialize qw/vpx_highbd_idct32x32_1_add neon sse2/; add_proto qw/void vpx_highbd_iwht4x4_16_add/, "const tran_low_t *input, uint16_t *dest, int stride, int bd"; - add_proto qw/void vpx_highbd_iwht4x4_1_add/, "const tran_low_t *input, uint16_t *dest, int stride, int bd"; if (vpx_config("CONFIG_EMULATE_HARDWARE") ne "yes") { specialize qw/vpx_highbd_idct4x4_16_add neon sse2/; - specialize qw/vpx_highbd_idct8x8_64_add neon sse2/; - specialize qw/vpx_highbd_idct8x8_12_add neon sse2/; - specialize qw/vpx_highbd_idct16x16_256_add neon sse2/; - specialize qw/vpx_highbd_idct16x16_38_add neon sse2/; $vpx_highbd_idct16x16_38_add_sse2=vpx_highbd_idct16x16_256_add_sse2; - specialize qw/vpx_highbd_idct16x16_10_add neon sse2/; - specialize qw/vpx_highbd_idct32x32_1024_add neon/; - specialize qw/vpx_highbd_idct32x32_135_add neon/; - specialize qw/vpx_highbd_idct32x32_34_add neon/; } # !CONFIG_EMULATE_HARDWARE -} else { - if (vpx_config("CONFIG_EMULATE_HARDWARE") ne "yes") { - specialize qw/vpx_idct4x4_16_add dspr2 msa/; - - specialize qw/vpx_idct4x4_1_add dspr2 msa/; - - specialize qw/vpx_idct8x8_64_add dspr2 msa/; - - specialize qw/vpx_idct8x8_12_add dspr2 msa/; - - specialize qw/vpx_idct8x8_1_add dspr2 msa/; - - specialize qw/vpx_idct16x16_256_add dspr2 msa/; - - specialize qw/vpx_idct16x16_38_add dspr2 msa/; - $vpx_idct16x16_38_add_dspr2=vpx_idct16x16_256_add_dspr2; - $vpx_idct16x16_38_add_msa=vpx_idct16x16_256_add_msa; - - specialize qw/vpx_idct16x16_10_add dspr2 msa/; - - specialize qw/vpx_idct16x16_1_add dspr2 msa/; - - specialize qw/vpx_idct32x32_1024_add dspr2 msa/; - - specialize qw/vpx_idct32x32_135_add dspr2 msa/; - $vpx_idct32x32_135_add_dspr2=vpx_idct32x32_1024_add_dspr2; - $vpx_idct32x32_135_add_msa=vpx_idct32x32_1024_add_msa; - - specialize qw/vpx_idct32x32_34_add dspr2 msa/; - - specialize qw/vpx_idct32x32_1_add dspr2 msa/; - - specialize qw/vpx_iwht4x4_16_add msa sse2/; - - specialize qw/vpx_iwht4x4_1_add msa/; - } # !CONFIG_EMULATE_HARDWARE } # CONFIG_VP9_HIGHBITDEPTH } # CONFIG_VP9