From 2992739b5d5ee0b87e8654b3caaeee3a7b468465 Mon Sep 17 00:00:00 2001 From: Jingning Han Date: Fri, 17 Jul 2015 12:31:53 -0700 Subject: [PATCH] Rename loop filter function from vp9_ to vpx_ Change-Id: I6f424bb8daec26bf8482b5d75dd9b0e45c11a665 --- test/lpf_8_test.cc | 240 +++++++++--------- test/test.mk | 2 +- .../mips/dspr2/vp9_convolve2_avg_dspr2.c | 10 +- .../dspr2/vp9_convolve2_avg_horiz_dspr2.c | 38 +-- vp9/common/mips/dspr2/vp9_convolve2_dspr2.c | 24 +- .../mips/dspr2/vp9_convolve2_horiz_dspr2.c | 40 +-- .../mips/dspr2/vp9_convolve2_vert_dspr2.c | 8 +- .../mips/dspr2/vp9_convolve8_avg_dspr2.c | 54 ++-- .../dspr2/vp9_convolve8_avg_horiz_dspr2.c | 38 +-- vp9/common/mips/dspr2/vp9_convolve8_dspr2.c | 68 ++--- .../mips/dspr2/vp9_convolve8_horiz_dspr2.c | 40 +-- .../mips/dspr2/vp9_convolve8_vert_dspr2.c | 10 +- vp9/common/mips/dspr2/vp9_itrans16_dspr2.c | 22 +- .../mips/dspr2/vp9_itrans32_cols_dspr2.c | 16 +- vp9/common/mips/dspr2/vp9_itrans32_dspr2.c | 4 +- vp9/common/mips/dspr2/vp9_itrans4_dspr2.c | 16 +- vp9/common/mips/dspr2/vp9_itrans8_dspr2.c | 16 +- vp9/common/vp9_loopfilter.c | 124 ++++----- vpx_dsp/arm/loopfilter_16_neon.asm | 16 +- vpx_dsp/arm/loopfilter_16_neon.c | 10 +- vpx_dsp/arm/loopfilter_4_neon.asm | 38 +-- vpx_dsp/arm/loopfilter_4_neon.c | 22 +- vpx_dsp/arm/loopfilter_8_neon.asm | 36 +-- vpx_dsp/arm/loopfilter_8_neon.c | 20 +- vpx_dsp/arm/loopfilter_mb_neon.asm | 26 +- vpx_dsp/arm/loopfilter_neon.c | 24 +- vpx_dsp/loopfilter.c | 76 +++--- vpx_dsp/mips/common_dspr2.h | 8 +- vpx_dsp/mips/loopfilter_16_msa.c | 34 +-- vpx_dsp/mips/loopfilter_4_msa.c | 8 +- vpx_dsp/mips/loopfilter_8_msa.c | 8 +- vpx_dsp/mips/loopfilter_filters_dspr2.c | 54 ++-- vpx_dsp/mips/loopfilter_filters_dspr2.h | 176 ++++++------- vpx_dsp/mips/loopfilter_masks_dspr2.h | 46 ++-- vpx_dsp/mips/loopfilter_mb_dspr2.c | 68 ++--- vpx_dsp/mips/loopfilter_mb_horiz_dspr2.c | 88 +++---- vpx_dsp/mips/loopfilter_mb_vert_dspr2.c | 88 +++---- vpx_dsp/vpx_dsp_rtcd_defs.pl | 98 +++---- vpx_dsp/x86/highbd_loopfilter_sse2.c | 42 +-- vpx_dsp/x86/loopfilter_avx2.c | 6 +- vpx_dsp/x86/loopfilter_mmx.asm | 12 +- vpx_dsp/x86/loopfilter_sse2.c | 34 +-- 42 files changed, 904 insertions(+), 904 deletions(-) diff --git a/test/lpf_8_test.cc b/test/lpf_8_test.cc index 2a251c13d..96aaa23df 100644 --- a/test/lpf_8_test.cc +++ b/test/lpf_8_test.cc @@ -60,49 +60,49 @@ typedef std::tr1::tuple dualloop8_param_t; void wrapper_vertical_16_sse2(uint16_t *s, int p, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd) { - vp9_highbd_lpf_vertical_16_sse2(s, p, blimit, limit, thresh, bd); + vpx_highbd_lpf_vertical_16_sse2(s, p, blimit, limit, thresh, bd); } void wrapper_vertical_16_c(uint16_t *s, int p, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd) { - vp9_highbd_lpf_vertical_16_c(s, p, blimit, limit, thresh, bd); + vpx_highbd_lpf_vertical_16_c(s, p, blimit, limit, thresh, bd); } void wrapper_vertical_16_dual_sse2(uint16_t *s, int p, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd) { - vp9_highbd_lpf_vertical_16_dual_sse2(s, p, blimit, limit, thresh, bd); + vpx_highbd_lpf_vertical_16_dual_sse2(s, p, blimit, limit, thresh, bd); } void wrapper_vertical_16_dual_c(uint16_t *s, int p, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd) { - vp9_highbd_lpf_vertical_16_dual_c(s, p, blimit, limit, thresh, bd); + vpx_highbd_lpf_vertical_16_dual_c(s, p, blimit, limit, thresh, bd); } #else void wrapper_vertical_16_sse2(uint8_t *s, int p, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count) { - vp9_lpf_vertical_16_sse2(s, p, blimit, limit, thresh); + vpx_lpf_vertical_16_sse2(s, p, blimit, limit, thresh); } void wrapper_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count) { - vp9_lpf_vertical_16_c(s, p, blimit, limit, thresh); + vpx_lpf_vertical_16_c(s, p, blimit, limit, thresh); } void wrapper_vertical_16_dual_sse2(uint8_t *s, int p, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count) { - vp9_lpf_vertical_16_dual_sse2(s, p, blimit, limit, thresh); + vpx_lpf_vertical_16_dual_sse2(s, p, blimit, limit, thresh); } void wrapper_vertical_16_dual_c(uint8_t *s, int p, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count) { - vp9_lpf_vertical_16_dual_c(s, p, blimit, limit, thresh); + vpx_lpf_vertical_16_dual_c(s, p, blimit, limit, thresh); } #endif // CONFIG_VP9_HIGHBITDEPTH #endif // HAVE_SSE2 @@ -114,25 +114,25 @@ void wrapper_vertical_16_dual_c(uint8_t *s, int p, const uint8_t *blimit, void wrapper_vertical_16_neon(uint8_t *s, int p, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count) { - vp9_lpf_vertical_16_neon(s, p, blimit, limit, thresh); + vpx_lpf_vertical_16_neon(s, p, blimit, limit, thresh); } void wrapper_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count) { - vp9_lpf_vertical_16_c(s, p, blimit, limit, thresh); + vpx_lpf_vertical_16_c(s, p, blimit, limit, thresh); } void wrapper_vertical_16_dual_neon(uint8_t *s, int p, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count) { - vp9_lpf_vertical_16_dual_neon(s, p, blimit, limit, thresh); + vpx_lpf_vertical_16_dual_neon(s, p, blimit, limit, thresh); } void wrapper_vertical_16_dual_c(uint8_t *s, int p, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count) { - vp9_lpf_vertical_16_dual_c(s, p, blimit, limit, thresh); + vpx_lpf_vertical_16_dual_c(s, p, blimit, limit, thresh); } #endif // CONFIG_VP9_HIGHBITDEPTH #endif // HAVE_NEON_ASM @@ -141,13 +141,13 @@ void wrapper_vertical_16_dual_c(uint8_t *s, int p, const uint8_t *blimit, void wrapper_vertical_16_msa(uint8_t *s, int p, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count) { - vp9_lpf_vertical_16_msa(s, p, blimit, limit, thresh); + vpx_lpf_vertical_16_msa(s, p, blimit, limit, thresh); } void wrapper_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count) { - vp9_lpf_vertical_16_c(s, p, blimit, limit, thresh); + vpx_lpf_vertical_16_c(s, p, blimit, limit, thresh); } #endif // HAVE_MSA && (!CONFIG_VP9_HIGHBITDEPTH) @@ -534,46 +534,46 @@ using std::tr1::make_tuple; INSTANTIATE_TEST_CASE_P( SSE2, Loop8Test6Param, ::testing::Values( - make_tuple(&vp9_highbd_lpf_horizontal_4_sse2, - &vp9_highbd_lpf_horizontal_4_c, 8, 1), - make_tuple(&vp9_highbd_lpf_vertical_4_sse2, - &vp9_highbd_lpf_vertical_4_c, 8, 1), - make_tuple(&vp9_highbd_lpf_horizontal_8_sse2, - &vp9_highbd_lpf_horizontal_8_c, 8, 1), - make_tuple(&vp9_highbd_lpf_horizontal_16_sse2, - &vp9_highbd_lpf_horizontal_16_c, 8, 1), - make_tuple(&vp9_highbd_lpf_horizontal_16_sse2, - &vp9_highbd_lpf_horizontal_16_c, 8, 2), - make_tuple(&vp9_highbd_lpf_vertical_8_sse2, - &vp9_highbd_lpf_vertical_8_c, 8, 1), + make_tuple(&vpx_highbd_lpf_horizontal_4_sse2, + &vpx_highbd_lpf_horizontal_4_c, 8, 1), + make_tuple(&vpx_highbd_lpf_vertical_4_sse2, + &vpx_highbd_lpf_vertical_4_c, 8, 1), + make_tuple(&vpx_highbd_lpf_horizontal_8_sse2, + &vpx_highbd_lpf_horizontal_8_c, 8, 1), + make_tuple(&vpx_highbd_lpf_horizontal_16_sse2, + &vpx_highbd_lpf_horizontal_16_c, 8, 1), + make_tuple(&vpx_highbd_lpf_horizontal_16_sse2, + &vpx_highbd_lpf_horizontal_16_c, 8, 2), + make_tuple(&vpx_highbd_lpf_vertical_8_sse2, + &vpx_highbd_lpf_vertical_8_c, 8, 1), make_tuple(&wrapper_vertical_16_sse2, &wrapper_vertical_16_c, 8, 1), - make_tuple(&vp9_highbd_lpf_horizontal_4_sse2, - &vp9_highbd_lpf_horizontal_4_c, 10, 1), - make_tuple(&vp9_highbd_lpf_vertical_4_sse2, - &vp9_highbd_lpf_vertical_4_c, 10, 1), - make_tuple(&vp9_highbd_lpf_horizontal_8_sse2, - &vp9_highbd_lpf_horizontal_8_c, 10, 1), - make_tuple(&vp9_highbd_lpf_horizontal_16_sse2, - &vp9_highbd_lpf_horizontal_16_c, 10, 1), - make_tuple(&vp9_highbd_lpf_horizontal_16_sse2, - &vp9_highbd_lpf_horizontal_16_c, 10, 2), - make_tuple(&vp9_highbd_lpf_vertical_8_sse2, - &vp9_highbd_lpf_vertical_8_c, 10, 1), + make_tuple(&vpx_highbd_lpf_horizontal_4_sse2, + &vpx_highbd_lpf_horizontal_4_c, 10, 1), + make_tuple(&vpx_highbd_lpf_vertical_4_sse2, + &vpx_highbd_lpf_vertical_4_c, 10, 1), + make_tuple(&vpx_highbd_lpf_horizontal_8_sse2, + &vpx_highbd_lpf_horizontal_8_c, 10, 1), + make_tuple(&vpx_highbd_lpf_horizontal_16_sse2, + &vpx_highbd_lpf_horizontal_16_c, 10, 1), + make_tuple(&vpx_highbd_lpf_horizontal_16_sse2, + &vpx_highbd_lpf_horizontal_16_c, 10, 2), + make_tuple(&vpx_highbd_lpf_vertical_8_sse2, + &vpx_highbd_lpf_vertical_8_c, 10, 1), make_tuple(&wrapper_vertical_16_sse2, &wrapper_vertical_16_c, 10, 1), - make_tuple(&vp9_highbd_lpf_horizontal_4_sse2, - &vp9_highbd_lpf_horizontal_4_c, 12, 1), - make_tuple(&vp9_highbd_lpf_vertical_4_sse2, - &vp9_highbd_lpf_vertical_4_c, 12, 1), - make_tuple(&vp9_highbd_lpf_horizontal_8_sse2, - &vp9_highbd_lpf_horizontal_8_c, 12, 1), - make_tuple(&vp9_highbd_lpf_horizontal_16_sse2, - &vp9_highbd_lpf_horizontal_16_c, 12, 1), - make_tuple(&vp9_highbd_lpf_horizontal_16_sse2, - &vp9_highbd_lpf_horizontal_16_c, 12, 2), - make_tuple(&vp9_highbd_lpf_vertical_8_sse2, - &vp9_highbd_lpf_vertical_8_c, 12, 1), + make_tuple(&vpx_highbd_lpf_horizontal_4_sse2, + &vpx_highbd_lpf_horizontal_4_c, 12, 1), + make_tuple(&vpx_highbd_lpf_vertical_4_sse2, + &vpx_highbd_lpf_vertical_4_c, 12, 1), + make_tuple(&vpx_highbd_lpf_horizontal_8_sse2, + &vpx_highbd_lpf_horizontal_8_c, 12, 1), + make_tuple(&vpx_highbd_lpf_horizontal_16_sse2, + &vpx_highbd_lpf_horizontal_16_c, 12, 1), + make_tuple(&vpx_highbd_lpf_horizontal_16_sse2, + &vpx_highbd_lpf_horizontal_16_c, 12, 2), + make_tuple(&vpx_highbd_lpf_vertical_8_sse2, + &vpx_highbd_lpf_vertical_8_c, 12, 1), make_tuple(&wrapper_vertical_16_sse2, &wrapper_vertical_16_c, 12, 1), make_tuple(&wrapper_vertical_16_dual_sse2, @@ -586,10 +586,10 @@ INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P( SSE2, Loop8Test6Param, ::testing::Values( - make_tuple(&vp9_lpf_horizontal_8_sse2, &vp9_lpf_horizontal_8_c, 8, 1), - make_tuple(&vp9_lpf_horizontal_16_sse2, &vp9_lpf_horizontal_16_c, 8, 1), - make_tuple(&vp9_lpf_horizontal_16_sse2, &vp9_lpf_horizontal_16_c, 8, 2), - make_tuple(&vp9_lpf_vertical_8_sse2, &vp9_lpf_vertical_8_c, 8, 1), + make_tuple(&vpx_lpf_horizontal_8_sse2, &vpx_lpf_horizontal_8_c, 8, 1), + make_tuple(&vpx_lpf_horizontal_16_sse2, &vpx_lpf_horizontal_16_c, 8, 1), + make_tuple(&vpx_lpf_horizontal_16_sse2, &vpx_lpf_horizontal_16_c, 8, 2), + make_tuple(&vpx_lpf_vertical_8_sse2, &vpx_lpf_vertical_8_c, 8, 1), make_tuple(&wrapper_vertical_16_sse2, &wrapper_vertical_16_c, 8, 1))); #endif // CONFIG_VP9_HIGHBITDEPTH #endif @@ -598,8 +598,8 @@ INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P( AVX2, Loop8Test6Param, ::testing::Values( - make_tuple(&vp9_lpf_horizontal_16_avx2, &vp9_lpf_horizontal_16_c, 8, 1), - make_tuple(&vp9_lpf_horizontal_16_avx2, &vp9_lpf_horizontal_16_c, 8, + make_tuple(&vpx_lpf_horizontal_16_avx2, &vpx_lpf_horizontal_16_c, 8, 1), + make_tuple(&vpx_lpf_horizontal_16_avx2, &vpx_lpf_horizontal_16_c, 8, 2))); #endif @@ -608,42 +608,42 @@ INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P( SSE2, Loop8Test9Param, ::testing::Values( - make_tuple(&vp9_highbd_lpf_horizontal_4_dual_sse2, - &vp9_highbd_lpf_horizontal_4_dual_c, 8), - make_tuple(&vp9_highbd_lpf_horizontal_8_dual_sse2, - &vp9_highbd_lpf_horizontal_8_dual_c, 8), - make_tuple(&vp9_highbd_lpf_vertical_4_dual_sse2, - &vp9_highbd_lpf_vertical_4_dual_c, 8), - make_tuple(&vp9_highbd_lpf_vertical_8_dual_sse2, - &vp9_highbd_lpf_vertical_8_dual_c, 8), - make_tuple(&vp9_highbd_lpf_horizontal_4_dual_sse2, - &vp9_highbd_lpf_horizontal_4_dual_c, 10), - make_tuple(&vp9_highbd_lpf_horizontal_8_dual_sse2, - &vp9_highbd_lpf_horizontal_8_dual_c, 10), - make_tuple(&vp9_highbd_lpf_vertical_4_dual_sse2, - &vp9_highbd_lpf_vertical_4_dual_c, 10), - make_tuple(&vp9_highbd_lpf_vertical_8_dual_sse2, - &vp9_highbd_lpf_vertical_8_dual_c, 10), - make_tuple(&vp9_highbd_lpf_horizontal_4_dual_sse2, - &vp9_highbd_lpf_horizontal_4_dual_c, 12), - make_tuple(&vp9_highbd_lpf_horizontal_8_dual_sse2, - &vp9_highbd_lpf_horizontal_8_dual_c, 12), - make_tuple(&vp9_highbd_lpf_vertical_4_dual_sse2, - &vp9_highbd_lpf_vertical_4_dual_c, 12), - make_tuple(&vp9_highbd_lpf_vertical_8_dual_sse2, - &vp9_highbd_lpf_vertical_8_dual_c, 12))); + make_tuple(&vpx_highbd_lpf_horizontal_4_dual_sse2, + &vpx_highbd_lpf_horizontal_4_dual_c, 8), + make_tuple(&vpx_highbd_lpf_horizontal_8_dual_sse2, + &vpx_highbd_lpf_horizontal_8_dual_c, 8), + make_tuple(&vpx_highbd_lpf_vertical_4_dual_sse2, + &vpx_highbd_lpf_vertical_4_dual_c, 8), + make_tuple(&vpx_highbd_lpf_vertical_8_dual_sse2, + &vpx_highbd_lpf_vertical_8_dual_c, 8), + make_tuple(&vpx_highbd_lpf_horizontal_4_dual_sse2, + &vpx_highbd_lpf_horizontal_4_dual_c, 10), + make_tuple(&vpx_highbd_lpf_horizontal_8_dual_sse2, + &vpx_highbd_lpf_horizontal_8_dual_c, 10), + make_tuple(&vpx_highbd_lpf_vertical_4_dual_sse2, + &vpx_highbd_lpf_vertical_4_dual_c, 10), + make_tuple(&vpx_highbd_lpf_vertical_8_dual_sse2, + &vpx_highbd_lpf_vertical_8_dual_c, 10), + make_tuple(&vpx_highbd_lpf_horizontal_4_dual_sse2, + &vpx_highbd_lpf_horizontal_4_dual_c, 12), + make_tuple(&vpx_highbd_lpf_horizontal_8_dual_sse2, + &vpx_highbd_lpf_horizontal_8_dual_c, 12), + make_tuple(&vpx_highbd_lpf_vertical_4_dual_sse2, + &vpx_highbd_lpf_vertical_4_dual_c, 12), + make_tuple(&vpx_highbd_lpf_vertical_8_dual_sse2, + &vpx_highbd_lpf_vertical_8_dual_c, 12))); #else INSTANTIATE_TEST_CASE_P( SSE2, Loop8Test9Param, ::testing::Values( - make_tuple(&vp9_lpf_horizontal_4_dual_sse2, - &vp9_lpf_horizontal_4_dual_c, 8), - make_tuple(&vp9_lpf_horizontal_8_dual_sse2, - &vp9_lpf_horizontal_8_dual_c, 8), - make_tuple(&vp9_lpf_vertical_4_dual_sse2, - &vp9_lpf_vertical_4_dual_c, 8), - make_tuple(&vp9_lpf_vertical_8_dual_sse2, - &vp9_lpf_vertical_8_dual_c, 8))); + make_tuple(&vpx_lpf_horizontal_4_dual_sse2, + &vpx_lpf_horizontal_4_dual_c, 8), + make_tuple(&vpx_lpf_horizontal_8_dual_sse2, + &vpx_lpf_horizontal_8_dual_c, 8), + make_tuple(&vpx_lpf_vertical_4_dual_sse2, + &vpx_lpf_vertical_4_dual_c, 8), + make_tuple(&vpx_lpf_vertical_8_dual_sse2, + &vpx_lpf_vertical_8_dual_c, 8))); #endif // CONFIG_VP9_HIGHBITDEPTH #endif @@ -657,36 +657,36 @@ INSTANTIATE_TEST_CASE_P( #if HAVE_NEON_ASM // Using #if inside the macro is unsupported on MSVS but the tests are not // currently built for MSVS with ARM and NEON. - make_tuple(&vp9_lpf_horizontal_16_neon, - &vp9_lpf_horizontal_16_c, 8, 1), - make_tuple(&vp9_lpf_horizontal_16_neon, - &vp9_lpf_horizontal_16_c, 8, 2), + make_tuple(&vpx_lpf_horizontal_16_neon, + &vpx_lpf_horizontal_16_c, 8, 1), + make_tuple(&vpx_lpf_horizontal_16_neon, + &vpx_lpf_horizontal_16_c, 8, 2), make_tuple(&wrapper_vertical_16_neon, &wrapper_vertical_16_c, 8, 1), make_tuple(&wrapper_vertical_16_dual_neon, &wrapper_vertical_16_dual_c, 8, 1), #endif // HAVE_NEON_ASM - make_tuple(&vp9_lpf_horizontal_8_neon, - &vp9_lpf_horizontal_8_c, 8, 1), - make_tuple(&vp9_lpf_vertical_8_neon, - &vp9_lpf_vertical_8_c, 8, 1), - make_tuple(&vp9_lpf_horizontal_4_neon, - &vp9_lpf_horizontal_4_c, 8, 1), - make_tuple(&vp9_lpf_vertical_4_neon, - &vp9_lpf_vertical_4_c, 8, 1))); + make_tuple(&vpx_lpf_horizontal_8_neon, + &vpx_lpf_horizontal_8_c, 8, 1), + make_tuple(&vpx_lpf_vertical_8_neon, + &vpx_lpf_vertical_8_c, 8, 1), + make_tuple(&vpx_lpf_horizontal_4_neon, + &vpx_lpf_horizontal_4_c, 8, 1), + make_tuple(&vpx_lpf_vertical_4_neon, + &vpx_lpf_vertical_4_c, 8, 1))); INSTANTIATE_TEST_CASE_P( NEON, Loop8Test9Param, ::testing::Values( #if HAVE_NEON_ASM - make_tuple(&vp9_lpf_horizontal_8_dual_neon, - &vp9_lpf_horizontal_8_dual_c, 8), - make_tuple(&vp9_lpf_vertical_8_dual_neon, - &vp9_lpf_vertical_8_dual_c, 8), + make_tuple(&vpx_lpf_horizontal_8_dual_neon, + &vpx_lpf_horizontal_8_dual_c, 8), + make_tuple(&vpx_lpf_vertical_8_dual_neon, + &vpx_lpf_vertical_8_dual_c, 8), #endif // HAVE_NEON_ASM - make_tuple(&vp9_lpf_horizontal_4_dual_neon, - &vp9_lpf_horizontal_4_dual_c, 8), - make_tuple(&vp9_lpf_vertical_4_dual_neon, - &vp9_lpf_vertical_4_dual_c, 8))); + make_tuple(&vpx_lpf_horizontal_4_dual_neon, + &vpx_lpf_horizontal_4_dual_c, 8), + make_tuple(&vpx_lpf_vertical_4_dual_neon, + &vpx_lpf_vertical_4_dual_c, 8))); #endif // CONFIG_VP9_HIGHBITDEPTH #endif // HAVE_NEON @@ -694,23 +694,23 @@ INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P( MSA, Loop8Test6Param, ::testing::Values( - make_tuple(&vp9_lpf_horizontal_8_msa, &vp9_lpf_horizontal_8_c, 8, 1), - make_tuple(&vp9_lpf_horizontal_16_msa, &vp9_lpf_horizontal_16_c, 8, 1), - make_tuple(&vp9_lpf_horizontal_16_msa, &vp9_lpf_horizontal_16_c, 8, 2), - make_tuple(&vp9_lpf_vertical_8_msa, &vp9_lpf_vertical_8_c, 8, 1), + make_tuple(&vpx_lpf_horizontal_8_msa, &vpx_lpf_horizontal_8_c, 8, 1), + make_tuple(&vpx_lpf_horizontal_16_msa, &vpx_lpf_horizontal_16_c, 8, 1), + make_tuple(&vpx_lpf_horizontal_16_msa, &vpx_lpf_horizontal_16_c, 8, 2), + make_tuple(&vpx_lpf_vertical_8_msa, &vpx_lpf_vertical_8_c, 8, 1), make_tuple(&wrapper_vertical_16_msa, &wrapper_vertical_16_c, 8, 1))); INSTANTIATE_TEST_CASE_P( MSA, Loop8Test9Param, ::testing::Values( - make_tuple(&vp9_lpf_horizontal_4_dual_msa, - &vp9_lpf_horizontal_4_dual_c, 8), - make_tuple(&vp9_lpf_horizontal_8_dual_msa, - &vp9_lpf_horizontal_8_dual_c, 8), - make_tuple(&vp9_lpf_vertical_4_dual_msa, - &vp9_lpf_vertical_4_dual_c, 8), - make_tuple(&vp9_lpf_vertical_8_dual_msa, - &vp9_lpf_vertical_8_dual_c, 8))); + make_tuple(&vpx_lpf_horizontal_4_dual_msa, + &vpx_lpf_horizontal_4_dual_c, 8), + make_tuple(&vpx_lpf_horizontal_8_dual_msa, + &vpx_lpf_horizontal_8_dual_c, 8), + make_tuple(&vpx_lpf_vertical_4_dual_msa, + &vpx_lpf_vertical_4_dual_c, 8), + make_tuple(&vpx_lpf_vertical_8_dual_msa, + &vpx_lpf_vertical_8_dual_c, 8))); #endif // HAVE_MSA && (!CONFIG_VP9_HIGHBITDEPTH) } // namespace diff --git a/test/test.mk b/test/test.mk index a8a365ec0..8ecc856b0 100644 --- a/test/test.mk +++ b/test/test.mk @@ -91,6 +91,7 @@ endif ## shared library builds don't make these functions accessible. ## ifeq ($(CONFIG_SHARED),) +LIBVPX_TEST_SRCS-$(CONFIG_VP9) += lpf_8_test.cc ## VP8 ifneq ($(CONFIG_VP8_ENCODER)$(CONFIG_VP8_DECODER),) @@ -142,7 +143,6 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct4x4_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct8x8_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += variance_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_subtract_test.cc -LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += lpf_8_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_avg_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_error_block_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_quantize_test.cc diff --git a/vp9/common/mips/dspr2/vp9_convolve2_avg_dspr2.c b/vp9/common/mips/dspr2/vp9_convolve2_avg_dspr2.c index 91d62bc49..aad7c4514 100644 --- a/vp9/common/mips/dspr2/vp9_convolve2_avg_dspr2.c +++ b/vp9/common/mips/dspr2/vp9_convolve2_avg_dspr2.c @@ -44,7 +44,7 @@ static void convolve_bi_avg_vert_4_dspr2(const uint8_t *src, for (y = h; y--;) { /* prefetch data to cache memory */ - vp9_prefetch_store(dst + dst_stride); + prefetch_store(dst + dst_stride); for (x = 0; x < w; x += 4) { src_ptr = src + x; @@ -148,8 +148,8 @@ static void convolve_bi_avg_vert_64_dspr2(const uint8_t *src, for (y = h; y--;) { /* prefetch data to cache memory */ - vp9_prefetch_store(dst + dst_stride); - vp9_prefetch_store(dst + dst_stride + 32); + prefetch_store(dst + dst_stride); + prefetch_store(dst + dst_stride + 32); for (x = 0; x < 64; x += 4) { src_ptr = src + x; @@ -245,7 +245,7 @@ void vp9_convolve2_avg_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride, : [pos] "r" (pos) ); - vp9_prefetch_store(dst); + prefetch_store(dst); switch (w) { case 4: @@ -257,7 +257,7 @@ void vp9_convolve2_avg_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride, filter_y, w, h); break; case 64: - vp9_prefetch_store(dst + 32); + prefetch_store(dst + 32); convolve_bi_avg_vert_64_dspr2(src, src_stride, dst, dst_stride, filter_y, h); diff --git a/vp9/common/mips/dspr2/vp9_convolve2_avg_horiz_dspr2.c b/vp9/common/mips/dspr2/vp9_convolve2_avg_horiz_dspr2.c index 148b20fba..bc60e9332 100644 --- a/vp9/common/mips/dspr2/vp9_convolve2_avg_horiz_dspr2.c +++ b/vp9/common/mips/dspr2/vp9_convolve2_avg_horiz_dspr2.c @@ -40,9 +40,9 @@ static void convolve_bi_avg_horiz_4_dspr2(const uint8_t *src, for (y = h; y--;) { /* prefetch data to cache memory */ - vp9_prefetch_load(src + src_stride); - vp9_prefetch_load(src + src_stride + 32); - vp9_prefetch_store(dst + dst_stride); + prefetch_load(src + src_stride); + prefetch_load(src + src_stride + 32); + prefetch_store(dst + dst_stride); __asm__ __volatile__ ( "ulw %[tp1], 0(%[src]) \n\t" @@ -135,9 +135,9 @@ static void convolve_bi_avg_horiz_8_dspr2(const uint8_t *src, for (y = h; y--;) { /* prefetch data to cache memory */ - vp9_prefetch_load(src + src_stride); - vp9_prefetch_load(src + src_stride + 32); - vp9_prefetch_store(dst + dst_stride); + prefetch_load(src + src_stride); + prefetch_load(src + src_stride + 32); + prefetch_store(dst + dst_stride); __asm__ __volatile__ ( "ulw %[tp1], 0(%[src]) \n\t" @@ -290,9 +290,9 @@ static void convolve_bi_avg_horiz_16_dspr2(const uint8_t *src_ptr, dst = dst_ptr; /* prefetch data to cache memory */ - vp9_prefetch_load(src_ptr + src_stride); - vp9_prefetch_load(src_ptr + src_stride + 32); - vp9_prefetch_store(dst_ptr + dst_stride); + prefetch_load(src_ptr + src_stride); + prefetch_load(src_ptr + src_stride + 32); + prefetch_store(dst_ptr + dst_stride); for (c = 0; c < count; c++) { __asm__ __volatile__ ( @@ -539,11 +539,11 @@ static void convolve_bi_avg_horiz_64_dspr2(const uint8_t *src_ptr, dst = dst_ptr; /* prefetch data to cache memory */ - vp9_prefetch_load(src_ptr + src_stride); - vp9_prefetch_load(src_ptr + src_stride + 32); - vp9_prefetch_load(src_ptr + src_stride + 64); - vp9_prefetch_store(dst_ptr + dst_stride); - vp9_prefetch_store(dst_ptr + dst_stride + 32); + prefetch_load(src_ptr + src_stride); + prefetch_load(src_ptr + src_stride + 32); + prefetch_load(src_ptr + src_stride + 64); + prefetch_store(dst_ptr + dst_stride); + prefetch_store(dst_ptr + dst_stride + 32); for (c = 0; c < 4; c++) { __asm__ __volatile__ ( @@ -781,9 +781,9 @@ void vp9_convolve2_avg_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride, ); /* prefetch data to cache memory */ - vp9_prefetch_load(src); - vp9_prefetch_load(src + 32); - vp9_prefetch_store(dst); + prefetch_load(src); + prefetch_load(src + 32); + prefetch_store(dst); switch (w) { case 4: @@ -807,8 +807,8 @@ void vp9_convolve2_avg_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride, filter_x, h, 2); break; case 64: - vp9_prefetch_load(src + 64); - vp9_prefetch_store(dst + 32); + prefetch_load(src + 64); + prefetch_store(dst + 32); convolve_bi_avg_horiz_64_dspr2(src, src_stride, dst, dst_stride, diff --git a/vp9/common/mips/dspr2/vp9_convolve2_dspr2.c b/vp9/common/mips/dspr2/vp9_convolve2_dspr2.c index 92644f2c8..b714f9a4d 100644 --- a/vp9/common/mips/dspr2/vp9_convolve2_dspr2.c +++ b/vp9/common/mips/dspr2/vp9_convolve2_dspr2.c @@ -41,8 +41,8 @@ static void convolve_bi_horiz_4_transposed_dspr2(const uint8_t *src, for (y = h; y--;) { dst_ptr = dst; /* prefetch data to cache memory */ - vp9_prefetch_load(src + src_stride); - vp9_prefetch_load(src + src_stride + 32); + prefetch_load(src + src_stride); + prefetch_load(src + src_stride + 32); __asm__ __volatile__ ( "ulw %[tp1], 0(%[src]) \n\t" @@ -132,8 +132,8 @@ static void convolve_bi_horiz_8_transposed_dspr2(const uint8_t *src, for (y = h; y--;) { /* prefetch data to cache memory */ - vp9_prefetch_load(src + src_stride); - vp9_prefetch_load(src + src_stride + 32); + prefetch_load(src + src_stride); + prefetch_load(src + src_stride + 32); dst_ptr = dst; odd_dst = (dst_ptr + dst_stride); @@ -272,8 +272,8 @@ static void convolve_bi_horiz_16_transposed_dspr2(const uint8_t *src_ptr, for (y = h; y--;) { /* prefetch data to cache memory */ - vp9_prefetch_load(src_ptr + src_stride); - vp9_prefetch_load(src_ptr + src_stride + 32); + prefetch_load(src_ptr + src_stride); + prefetch_load(src_ptr + src_stride + 32); src = src_ptr; dst = dst_ptr; @@ -504,9 +504,9 @@ static void convolve_bi_horiz_64_transposed_dspr2(const uint8_t *src_ptr, for (y = h; y--;) { /* prefetch data to cache memory */ - vp9_prefetch_load(src_ptr + src_stride); - vp9_prefetch_load(src_ptr + src_stride + 32); - vp9_prefetch_load(src_ptr + src_stride + 64); + prefetch_load(src_ptr + src_stride); + prefetch_load(src_ptr + src_stride + 32); + prefetch_load(src_ptr + src_stride + 64); src = src_ptr; dst = dst_ptr; @@ -747,8 +747,8 @@ void vp9_convolve2_dspr2(const uint8_t *src, ptrdiff_t src_stride, ); /* prefetch data to cache memory */ - vp9_prefetch_load(src); - vp9_prefetch_load(src + 32); + prefetch_load(src); + prefetch_load(src + 32); switch (w) { case 4: @@ -769,7 +769,7 @@ void vp9_convolve2_dspr2(const uint8_t *src, ptrdiff_t src_stride, (w/16)); break; case 64: - vp9_prefetch_load(src + 32); + prefetch_load(src + 32); convolve_bi_horiz_64_transposed_dspr2(src, src_stride, dst, dst_stride, filter, h); diff --git a/vp9/common/mips/dspr2/vp9_convolve2_horiz_dspr2.c b/vp9/common/mips/dspr2/vp9_convolve2_horiz_dspr2.c index 1debdb4c0..27ea100cd 100644 --- a/vp9/common/mips/dspr2/vp9_convolve2_horiz_dspr2.c +++ b/vp9/common/mips/dspr2/vp9_convolve2_horiz_dspr2.c @@ -39,9 +39,9 @@ static void convolve_bi_horiz_4_dspr2(const uint8_t *src, for (y = h; y--;) { /* prefetch data to cache memory */ - vp9_prefetch_load(src + src_stride); - vp9_prefetch_load(src + src_stride + 32); - vp9_prefetch_store(dst + dst_stride); + prefetch_load(src + src_stride); + prefetch_load(src + src_stride + 32); + prefetch_store(dst + dst_stride); __asm__ __volatile__ ( "ulw %[tp1], 0(%[src]) \n\t" @@ -122,9 +122,9 @@ static void convolve_bi_horiz_8_dspr2(const uint8_t *src, for (y = h; y--;) { /* prefetch data to cache memory */ - vp9_prefetch_load(src + src_stride); - vp9_prefetch_load(src + src_stride + 32); - vp9_prefetch_store(dst + dst_stride); + prefetch_load(src + src_stride); + prefetch_load(src + src_stride + 32); + prefetch_store(dst + dst_stride); __asm__ __volatile__ ( "ulw %[tp1], 0(%[src]) \n\t" @@ -252,9 +252,9 @@ static void convolve_bi_horiz_16_dspr2(const uint8_t *src_ptr, dst = dst_ptr; /* prefetch data to cache memory */ - vp9_prefetch_load(src_ptr + src_stride); - vp9_prefetch_load(src_ptr + src_stride + 32); - vp9_prefetch_store(dst_ptr + dst_stride); + prefetch_load(src_ptr + src_stride); + prefetch_load(src_ptr + src_stride + 32); + prefetch_store(dst_ptr + dst_stride); for (c = 0; c < count; c++) { __asm__ __volatile__ ( @@ -459,11 +459,11 @@ static void convolve_bi_horiz_64_dspr2(const uint8_t *src_ptr, dst = dst_ptr; /* prefetch data to cache memory */ - vp9_prefetch_load(src_ptr + src_stride); - vp9_prefetch_load(src_ptr + src_stride + 32); - vp9_prefetch_load(src_ptr + src_stride + 64); - vp9_prefetch_store(dst_ptr + dst_stride); - vp9_prefetch_store(dst_ptr + dst_stride + 32); + prefetch_load(src_ptr + src_stride); + prefetch_load(src_ptr + src_stride + 32); + prefetch_load(src_ptr + src_stride + 64); + prefetch_store(dst_ptr + dst_stride); + prefetch_store(dst_ptr + dst_stride + 32); for (c = 0; c < 4; c++) { __asm__ __volatile__ ( @@ -651,7 +651,7 @@ void vp9_convolve2_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride, if (16 == x_step_q4) { uint32_t pos = 38; - vp9_prefetch_load((const uint8_t *)filter_x); + prefetch_load((const uint8_t *)filter_x); /* bit positon for extract from acc */ __asm__ __volatile__ ( @@ -661,9 +661,9 @@ void vp9_convolve2_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride, ); /* prefetch data to cache memory */ - vp9_prefetch_load(src); - vp9_prefetch_load(src + 32); - vp9_prefetch_store(dst); + prefetch_load(src); + prefetch_load(src + 32); + prefetch_store(dst); switch (w) { case 4: @@ -687,8 +687,8 @@ void vp9_convolve2_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride, filter_x, (int32_t)h, 2); break; case 64: - vp9_prefetch_load(src + 64); - vp9_prefetch_store(dst + 32); + prefetch_load(src + 64); + prefetch_store(dst + 32); convolve_bi_horiz_64_dspr2(src, (int32_t)src_stride, dst, (int32_t)dst_stride, diff --git a/vp9/common/mips/dspr2/vp9_convolve2_vert_dspr2.c b/vp9/common/mips/dspr2/vp9_convolve2_vert_dspr2.c index bf01f1154..32f5fb663 100644 --- a/vp9/common/mips/dspr2/vp9_convolve2_vert_dspr2.c +++ b/vp9/common/mips/dspr2/vp9_convolve2_vert_dspr2.c @@ -44,7 +44,7 @@ static void convolve_bi_vert_4_dspr2(const uint8_t *src, for (y = h; y--;) { /* prefetch data to cache memory */ - vp9_prefetch_store(dst + dst_stride); + prefetch_store(dst + dst_stride); for (x = 0; x < w; x += 4) { src_ptr = src + x; @@ -141,7 +141,7 @@ static void convolve_bi_vert_64_dspr2(const uint8_t *src, for (y = h; y--;) { /* prefetch data to cache memory */ - vp9_prefetch_store(dst + dst_stride); + prefetch_store(dst + dst_stride); for (x = 0; x < 64; x += 4) { src_ptr = src + x; @@ -230,7 +230,7 @@ void vp9_convolve2_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride, : [pos] "r" (pos) ); - vp9_prefetch_store(dst); + prefetch_store(dst); switch (w) { case 4 : @@ -242,7 +242,7 @@ void vp9_convolve2_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride, filter_y, w, h); break; case 64 : - vp9_prefetch_store(dst + 32); + prefetch_store(dst + 32); convolve_bi_vert_64_dspr2(src, src_stride, dst, dst_stride, filter_y, h); diff --git a/vp9/common/mips/dspr2/vp9_convolve8_avg_dspr2.c b/vp9/common/mips/dspr2/vp9_convolve8_avg_dspr2.c index 17422798c..d9cbfe68f 100644 --- a/vp9/common/mips/dspr2/vp9_convolve8_avg_dspr2.c +++ b/vp9/common/mips/dspr2/vp9_convolve8_avg_dspr2.c @@ -49,7 +49,7 @@ static void convolve_avg_vert_4_dspr2(const uint8_t *src, for (y = h; y--;) { /* prefetch data to cache memory */ - vp9_prefetch_store(dst + dst_stride); + prefetch_store(dst + dst_stride); for (x = 0; x < w; x += 4) { src_ptr = src + x; @@ -210,8 +210,8 @@ static void convolve_avg_vert_64_dspr2(const uint8_t *src, for (y = h; y--;) { /* prefetch data to cache memory */ - vp9_prefetch_store(dst + dst_stride); - vp9_prefetch_store(dst + dst_stride + 32); + prefetch_store(dst + dst_stride); + prefetch_store(dst + dst_stride + 32); for (x = 0; x < 64; x += 4) { src_ptr = src + x; @@ -372,7 +372,7 @@ void vp9_convolve8_avg_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride, : [pos] "r" (pos) ); - vp9_prefetch_store(dst); + prefetch_store(dst); switch (w) { case 4: @@ -384,7 +384,7 @@ void vp9_convolve8_avg_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride, filter_y, w, h); break; case 64: - vp9_prefetch_store(dst + 32); + prefetch_store(dst + 32); convolve_avg_vert_64_dspr2(src, src_stride, dst, dst_stride, filter_y, h); @@ -452,17 +452,17 @@ void vp9_convolve_avg_dspr2(const uint8_t *src, ptrdiff_t src_stride, uint32_t tp3, tp4, tn2; /* prefetch data to cache memory */ - vp9_prefetch_load(src); - vp9_prefetch_load(src + 32); - vp9_prefetch_store(dst); + prefetch_load(src); + prefetch_load(src + 32); + prefetch_store(dst); switch (w) { case 4: /* 1 word storage */ for (y = h; y--; ) { - vp9_prefetch_load(src + src_stride); - vp9_prefetch_load(src + src_stride + 32); - vp9_prefetch_store(dst + dst_stride); + prefetch_load(src + src_stride); + prefetch_load(src + src_stride + 32); + prefetch_store(dst + dst_stride); __asm__ __volatile__ ( "ulw %[tp1], 0(%[src]) \n\t" @@ -482,9 +482,9 @@ void vp9_convolve_avg_dspr2(const uint8_t *src, ptrdiff_t src_stride, case 8: /* 2 word storage */ for (y = h; y--; ) { - vp9_prefetch_load(src + src_stride); - vp9_prefetch_load(src + src_stride + 32); - vp9_prefetch_store(dst + dst_stride); + prefetch_load(src + src_stride); + prefetch_load(src + src_stride + 32); + prefetch_store(dst + dst_stride); __asm__ __volatile__ ( "ulw %[tp1], 0(%[src]) \n\t" @@ -509,9 +509,9 @@ void vp9_convolve_avg_dspr2(const uint8_t *src, ptrdiff_t src_stride, case 16: /* 4 word storage */ for (y = h; y--; ) { - vp9_prefetch_load(src + src_stride); - vp9_prefetch_load(src + src_stride + 32); - vp9_prefetch_store(dst + dst_stride); + prefetch_load(src + src_stride); + prefetch_load(src + src_stride + 32); + prefetch_store(dst + dst_stride); __asm__ __volatile__ ( "ulw %[tp1], 0(%[src]) \n\t" @@ -544,9 +544,9 @@ void vp9_convolve_avg_dspr2(const uint8_t *src, ptrdiff_t src_stride, case 32: /* 8 word storage */ for (y = h; y--; ) { - vp9_prefetch_load(src + src_stride); - vp9_prefetch_load(src + src_stride + 32); - vp9_prefetch_store(dst + dst_stride); + prefetch_load(src + src_stride); + prefetch_load(src + src_stride + 32); + prefetch_store(dst + dst_stride); __asm__ __volatile__ ( "ulw %[tp1], 0(%[src]) \n\t" @@ -593,16 +593,16 @@ void vp9_convolve_avg_dspr2(const uint8_t *src, ptrdiff_t src_stride, } break; case 64: - vp9_prefetch_load(src + 64); - vp9_prefetch_store(dst + 32); + prefetch_load(src + 64); + prefetch_store(dst + 32); /* 16 word storage */ for (y = h; y--; ) { - vp9_prefetch_load(src + src_stride); - vp9_prefetch_load(src + src_stride + 32); - vp9_prefetch_load(src + src_stride + 64); - vp9_prefetch_store(dst + dst_stride); - vp9_prefetch_store(dst + dst_stride + 32); + prefetch_load(src + src_stride); + prefetch_load(src + src_stride + 32); + prefetch_load(src + src_stride + 64); + prefetch_store(dst + dst_stride); + prefetch_store(dst + dst_stride + 32); __asm__ __volatile__ ( "ulw %[tp1], 0(%[src]) \n\t" diff --git a/vp9/common/mips/dspr2/vp9_convolve8_avg_horiz_dspr2.c b/vp9/common/mips/dspr2/vp9_convolve8_avg_horiz_dspr2.c index 69da1cfd6..cdb831237 100644 --- a/vp9/common/mips/dspr2/vp9_convolve8_avg_horiz_dspr2.c +++ b/vp9/common/mips/dspr2/vp9_convolve8_avg_horiz_dspr2.c @@ -43,9 +43,9 @@ static void convolve_avg_horiz_4_dspr2(const uint8_t *src, for (y = h; y--;) { /* prefetch data to cache memory */ - vp9_prefetch_load(src + src_stride); - vp9_prefetch_load(src + src_stride + 32); - vp9_prefetch_store(dst + dst_stride); + prefetch_load(src + src_stride); + prefetch_load(src + src_stride + 32); + prefetch_store(dst + dst_stride); __asm__ __volatile__ ( "ulw %[tp1], 0(%[src]) \n\t" @@ -165,9 +165,9 @@ static void convolve_avg_horiz_8_dspr2(const uint8_t *src, for (y = h; y--;) { /* prefetch data to cache memory */ - vp9_prefetch_load(src + src_stride); - vp9_prefetch_load(src + src_stride + 32); - vp9_prefetch_store(dst + dst_stride); + prefetch_load(src + src_stride); + prefetch_load(src + src_stride + 32); + prefetch_store(dst + dst_stride); __asm__ __volatile__ ( "ulw %[tp1], 0(%[src]) \n\t" @@ -357,9 +357,9 @@ static void convolve_avg_horiz_16_dspr2(const uint8_t *src_ptr, dst = dst_ptr; /* prefetch data to cache memory */ - vp9_prefetch_load(src_ptr + src_stride); - vp9_prefetch_load(src_ptr + src_stride + 32); - vp9_prefetch_store(dst_ptr + dst_stride); + prefetch_load(src_ptr + src_stride); + prefetch_load(src_ptr + src_stride + 32); + prefetch_store(dst_ptr + dst_stride); for (c = 0; c < count; c++) { __asm__ __volatile__ ( @@ -668,11 +668,11 @@ static void convolve_avg_horiz_64_dspr2(const uint8_t *src_ptr, dst = dst_ptr; /* prefetch data to cache memory */ - vp9_prefetch_load(src_ptr + src_stride); - vp9_prefetch_load(src_ptr + src_stride + 32); - vp9_prefetch_load(src_ptr + src_stride + 64); - vp9_prefetch_store(dst_ptr + dst_stride); - vp9_prefetch_store(dst_ptr + dst_stride + 32); + prefetch_load(src_ptr + src_stride); + prefetch_load(src_ptr + src_stride + 32); + prefetch_load(src_ptr + src_stride + 64); + prefetch_store(dst_ptr + dst_stride); + prefetch_store(dst_ptr + dst_stride + 32); for (c = 0; c < 4; c++) { __asm__ __volatile__ ( @@ -985,9 +985,9 @@ void vp9_convolve8_avg_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride, ); /* prefetch data to cache memory */ - vp9_prefetch_load(src); - vp9_prefetch_load(src + 32); - vp9_prefetch_store(dst); + prefetch_load(src); + prefetch_load(src + 32); + prefetch_store(dst); switch (w) { case 4: @@ -1011,8 +1011,8 @@ void vp9_convolve8_avg_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride, filter_x, h, 2); break; case 64: - vp9_prefetch_load(src + 64); - vp9_prefetch_store(dst + 32); + prefetch_load(src + 64); + prefetch_store(dst + 32); convolve_avg_horiz_64_dspr2(src, src_stride, dst, dst_stride, diff --git a/vp9/common/mips/dspr2/vp9_convolve8_dspr2.c b/vp9/common/mips/dspr2/vp9_convolve8_dspr2.c index 58b50d2df..a1309d1ee 100644 --- a/vp9/common/mips/dspr2/vp9_convolve8_dspr2.c +++ b/vp9/common/mips/dspr2/vp9_convolve8_dspr2.c @@ -60,8 +60,8 @@ static void convolve_horiz_4_transposed_dspr2(const uint8_t *src, for (y = h; y--;) { dst_ptr = dst; /* prefetch data to cache memory */ - vp9_prefetch_load(src + src_stride); - vp9_prefetch_load(src + src_stride + 32); + prefetch_load(src + src_stride); + prefetch_load(src + src_stride + 32); __asm__ __volatile__ ( "ulw %[tp1], 0(%[src]) \n\t" @@ -176,8 +176,8 @@ static void convolve_horiz_8_transposed_dspr2(const uint8_t *src, for (y = h; y--;) { /* prefetch data to cache memory */ - vp9_prefetch_load(src + src_stride); - vp9_prefetch_load(src + src_stride + 32); + prefetch_load(src + src_stride); + prefetch_load(src + src_stride + 32); dst_ptr = dst; odd_dst = (dst_ptr + dst_stride); @@ -355,8 +355,8 @@ static void convolve_horiz_16_transposed_dspr2(const uint8_t *src_ptr, for (y = h; y--;) { /* prefetch data to cache memory */ - vp9_prefetch_load(src_ptr + src_stride); - vp9_prefetch_load(src_ptr + src_stride + 32); + prefetch_load(src_ptr + src_stride); + prefetch_load(src_ptr + src_stride + 32); src = src_ptr; dst = dst_ptr; @@ -645,9 +645,9 @@ static void convolve_horiz_64_transposed_dspr2(const uint8_t *src_ptr, for (y = h; y--;) { /* prefetch data to cache memory */ - vp9_prefetch_load(src_ptr + src_stride); - vp9_prefetch_load(src_ptr + src_stride + 32); - vp9_prefetch_load(src_ptr + src_stride + 64); + prefetch_load(src_ptr + src_stride); + prefetch_load(src_ptr + src_stride + 32); + prefetch_load(src_ptr + src_stride + 64); src = src_ptr; dst = dst_ptr; @@ -993,8 +993,8 @@ void vp9_convolve8_dspr2(const uint8_t *src, ptrdiff_t src_stride, src -= (src_stride * 3 + 3); /* prefetch data to cache memory */ - vp9_prefetch_load(src); - vp9_prefetch_load(src + 32); + prefetch_load(src); + prefetch_load(src + 32); switch (w) { case 4: @@ -1015,7 +1015,7 @@ void vp9_convolve8_dspr2(const uint8_t *src, ptrdiff_t src_stride, (w/16)); break; case 64: - vp9_prefetch_load(src + 32); + prefetch_load(src + 32); convolve_horiz_64_transposed_dspr2(src, src_stride, temp, intermediate_height, filter_x, intermediate_height); @@ -1078,9 +1078,9 @@ void vp9_convolve_copy_dspr2(const uint8_t *src, ptrdiff_t src_stride, int x, y; /* prefetch data to cache memory */ - vp9_prefetch_load(src); - vp9_prefetch_load(src + 32); - vp9_prefetch_store(dst); + prefetch_load(src); + prefetch_load(src + 32); + prefetch_store(dst); switch (w) { case 4: @@ -1089,9 +1089,9 @@ void vp9_convolve_copy_dspr2(const uint8_t *src, ptrdiff_t src_stride, /* 1 word storage */ for (y = h; y--; ) { - vp9_prefetch_load(src + src_stride); - vp9_prefetch_load(src + src_stride + 32); - vp9_prefetch_store(dst + dst_stride); + prefetch_load(src + src_stride); + prefetch_load(src + src_stride + 32); + prefetch_store(dst + dst_stride); __asm__ __volatile__ ( "ulw %[tp1], (%[src]) \n\t" @@ -1112,9 +1112,9 @@ void vp9_convolve_copy_dspr2(const uint8_t *src, ptrdiff_t src_stride, /* 2 word storage */ for (y = h; y--; ) { - vp9_prefetch_load(src + src_stride); - vp9_prefetch_load(src + src_stride + 32); - vp9_prefetch_store(dst + dst_stride); + prefetch_load(src + src_stride); + prefetch_load(src + src_stride + 32); + prefetch_store(dst + dst_stride); __asm__ __volatile__ ( "ulw %[tp1], 0(%[src]) \n\t" @@ -1137,9 +1137,9 @@ void vp9_convolve_copy_dspr2(const uint8_t *src, ptrdiff_t src_stride, /* 4 word storage */ for (y = h; y--; ) { - vp9_prefetch_load(src + src_stride); - vp9_prefetch_load(src + src_stride + 32); - vp9_prefetch_store(dst + dst_stride); + prefetch_load(src + src_stride); + prefetch_load(src + src_stride + 32); + prefetch_store(dst + dst_stride); __asm__ __volatile__ ( "ulw %[tp1], 0(%[src]) \n\t" @@ -1169,9 +1169,9 @@ void vp9_convolve_copy_dspr2(const uint8_t *src, ptrdiff_t src_stride, /* 8 word storage */ for (y = h; y--; ) { - vp9_prefetch_load(src + src_stride); - vp9_prefetch_load(src + src_stride + 32); - vp9_prefetch_store(dst + dst_stride); + prefetch_load(src + src_stride); + prefetch_load(src + src_stride + 32); + prefetch_store(dst + dst_stride); __asm__ __volatile__ ( "ulw %[tp1], 0(%[src]) \n\t" @@ -1209,16 +1209,16 @@ void vp9_convolve_copy_dspr2(const uint8_t *src, ptrdiff_t src_stride, uint32_t tp1, tp2, tp3, tp4; uint32_t tp5, tp6, tp7, tp8; - vp9_prefetch_load(src + 64); - vp9_prefetch_store(dst + 32); + prefetch_load(src + 64); + prefetch_store(dst + 32); /* 16 word storage */ for (y = h; y--; ) { - vp9_prefetch_load(src + src_stride); - vp9_prefetch_load(src + src_stride + 32); - vp9_prefetch_load(src + src_stride + 64); - vp9_prefetch_store(dst + dst_stride); - vp9_prefetch_store(dst + dst_stride + 32); + prefetch_load(src + src_stride); + prefetch_load(src + src_stride + 32); + prefetch_load(src + src_stride + 64); + prefetch_store(dst + dst_stride); + prefetch_store(dst + dst_stride + 32); __asm__ __volatile__ ( "ulw %[tp1], 0(%[src]) \n\t" diff --git a/vp9/common/mips/dspr2/vp9_convolve8_horiz_dspr2.c b/vp9/common/mips/dspr2/vp9_convolve8_horiz_dspr2.c index 030389615..d0e3095b3 100644 --- a/vp9/common/mips/dspr2/vp9_convolve8_horiz_dspr2.c +++ b/vp9/common/mips/dspr2/vp9_convolve8_horiz_dspr2.c @@ -43,9 +43,9 @@ static void convolve_horiz_4_dspr2(const uint8_t *src, for (y = h; y--;) { /* prefetch data to cache memory */ - vp9_prefetch_load(src + src_stride); - vp9_prefetch_load(src + src_stride + 32); - vp9_prefetch_store(dst + dst_stride); + prefetch_load(src + src_stride); + prefetch_load(src + src_stride + 32); + prefetch_store(dst + dst_stride); __asm__ __volatile__ ( "ulw %[tp1], 0(%[src]) \n\t" @@ -154,9 +154,9 @@ static void convolve_horiz_8_dspr2(const uint8_t *src, for (y = h; y--;) { /* prefetch data to cache memory */ - vp9_prefetch_load(src + src_stride); - vp9_prefetch_load(src + src_stride + 32); - vp9_prefetch_store(dst + dst_stride); + prefetch_load(src + src_stride); + prefetch_load(src + src_stride + 32); + prefetch_store(dst + dst_stride); __asm__ __volatile__ ( "ulw %[tp1], 0(%[src]) \n\t" @@ -323,9 +323,9 @@ static void convolve_horiz_16_dspr2(const uint8_t *src_ptr, dst = dst_ptr; /* prefetch data to cache memory */ - vp9_prefetch_load(src_ptr + src_stride); - vp9_prefetch_load(src_ptr + src_stride + 32); - vp9_prefetch_store(dst_ptr + dst_stride); + prefetch_load(src_ptr + src_stride); + prefetch_load(src_ptr + src_stride + 32); + prefetch_store(dst_ptr + dst_stride); for (c = 0; c < count; c++) { __asm__ __volatile__ ( @@ -593,11 +593,11 @@ static void convolve_horiz_64_dspr2(const uint8_t *src_ptr, dst = dst_ptr; /* prefetch data to cache memory */ - vp9_prefetch_load(src_ptr + src_stride); - vp9_prefetch_load(src_ptr + src_stride + 32); - vp9_prefetch_load(src_ptr + src_stride + 64); - vp9_prefetch_store(dst_ptr + dst_stride); - vp9_prefetch_store(dst_ptr + dst_stride + 32); + prefetch_load(src_ptr + src_stride); + prefetch_load(src_ptr + src_stride + 32); + prefetch_load(src_ptr + src_stride + 64); + prefetch_store(dst_ptr + dst_stride); + prefetch_store(dst_ptr + dst_stride + 32); for (c = 0; c < 4; c++) { __asm__ __volatile__ ( @@ -859,7 +859,7 @@ void vp9_convolve8_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride, if (16 == x_step_q4) { uint32_t pos = 38; - vp9_prefetch_load((const uint8_t *)filter_x); + prefetch_load((const uint8_t *)filter_x); src -= 3; /* bit positon for extract from acc */ @@ -870,9 +870,9 @@ void vp9_convolve8_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride, ); /* prefetch data to cache memory */ - vp9_prefetch_load(src); - vp9_prefetch_load(src + 32); - vp9_prefetch_store(dst); + prefetch_load(src); + prefetch_load(src + 32); + prefetch_store(dst); switch (w) { case 4: @@ -896,8 +896,8 @@ void vp9_convolve8_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride, filter_x, (int32_t)h, 2); break; case 64: - vp9_prefetch_load(src + 64); - vp9_prefetch_store(dst + 32); + prefetch_load(src + 64); + prefetch_store(dst + 32); convolve_horiz_64_dspr2(src, (int32_t)src_stride, dst, (int32_t)dst_stride, diff --git a/vp9/common/mips/dspr2/vp9_convolve8_vert_dspr2.c b/vp9/common/mips/dspr2/vp9_convolve8_vert_dspr2.c index 0930bb3d8..98acb81ba 100644 --- a/vp9/common/mips/dspr2/vp9_convolve8_vert_dspr2.c +++ b/vp9/common/mips/dspr2/vp9_convolve8_vert_dspr2.c @@ -49,7 +49,7 @@ static void convolve_vert_4_dspr2(const uint8_t *src, for (y = h; y--;) { /* prefetch data to cache memory */ - vp9_prefetch_store(dst + dst_stride); + prefetch_store(dst + dst_stride); for (x = 0; x < w; x += 4) { src_ptr = src + x; @@ -203,8 +203,8 @@ static void convolve_vert_64_dspr2(const uint8_t *src, for (y = h; y--;) { /* prefetch data to cache memory */ - vp9_prefetch_store(dst + dst_stride); - vp9_prefetch_store(dst + dst_stride + 32); + prefetch_store(dst + dst_stride); + prefetch_store(dst + dst_stride + 32); for (x = 0; x < 64; x += 4) { src_ptr = src + x; @@ -358,7 +358,7 @@ void vp9_convolve8_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride, : [pos] "r" (pos) ); - vp9_prefetch_store(dst); + prefetch_store(dst); switch (w) { case 4 : @@ -370,7 +370,7 @@ void vp9_convolve8_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride, filter_y, w, h); break; case 64 : - vp9_prefetch_store(dst + 32); + prefetch_store(dst + 32); convolve_vert_64_dspr2(src, src_stride, dst, dst_stride, filter_y, h); diff --git a/vp9/common/mips/dspr2/vp9_itrans16_dspr2.c b/vp9/common/mips/dspr2/vp9_itrans16_dspr2.c index 202d91381..10a24f33d 100644 --- a/vp9/common/mips/dspr2/vp9_itrans16_dspr2.c +++ b/vp9/common/mips/dspr2/vp9_itrans16_dspr2.c @@ -34,7 +34,7 @@ static void idct16_rows_dspr2(const int16_t *input, int16_t *output, for (i = no_rows; i--; ) { /* prefetch row */ - vp9_prefetch_load((const uint8_t *)(input + 16)); + prefetch_load((const uint8_t *)(input + 16)); __asm__ __volatile__ ( "lh %[load1], 0(%[input]) \n\t" @@ -421,14 +421,14 @@ static void idct16_cols_add_blk_dspr2(int16_t *input, uint8_t *dest, uint8_t *cm = vp9_ff_cropTbl; /* prefetch vp9_ff_cropTbl */ - vp9_prefetch_load(vp9_ff_cropTbl); - vp9_prefetch_load(vp9_ff_cropTbl + 32); - vp9_prefetch_load(vp9_ff_cropTbl + 64); - vp9_prefetch_load(vp9_ff_cropTbl + 96); - vp9_prefetch_load(vp9_ff_cropTbl + 128); - vp9_prefetch_load(vp9_ff_cropTbl + 160); - vp9_prefetch_load(vp9_ff_cropTbl + 192); - vp9_prefetch_load(vp9_ff_cropTbl + 224); + prefetch_load(vp9_ff_cropTbl); + prefetch_load(vp9_ff_cropTbl + 32); + prefetch_load(vp9_ff_cropTbl + 64); + prefetch_load(vp9_ff_cropTbl + 96); + prefetch_load(vp9_ff_cropTbl + 128); + prefetch_load(vp9_ff_cropTbl + 160); + prefetch_load(vp9_ff_cropTbl + 192); + prefetch_load(vp9_ff_cropTbl + 224); for (i = 0; i < 16; ++i) { dest_pix = (dest + i); @@ -1124,7 +1124,7 @@ void vp9_iht16x16_256_add_dspr2(const int16_t *input, uint8_t *dest, for (i = 0; i < 16; ++i) { /* prefetch row */ - vp9_prefetch_load((const uint8_t *)(input + 16)); + prefetch_load((const uint8_t *)(input + 16)); iadst16(input, outptr); input += 16; @@ -1144,7 +1144,7 @@ void vp9_iht16x16_256_add_dspr2(const int16_t *input, uint8_t *dest, for (i = 0; i < 16; ++i) { /* prefetch row */ - vp9_prefetch_load((const uint8_t *)(input + 16)); + prefetch_load((const uint8_t *)(input + 16)); iadst16(input, outptr); input += 16; diff --git a/vp9/common/mips/dspr2/vp9_itrans32_cols_dspr2.c b/vp9/common/mips/dspr2/vp9_itrans32_cols_dspr2.c index 7ceebb6d8..a25614581 100644 --- a/vp9/common/mips/dspr2/vp9_itrans32_cols_dspr2.c +++ b/vp9/common/mips/dspr2/vp9_itrans32_cols_dspr2.c @@ -44,14 +44,14 @@ void vp9_idct32_cols_add_blk_dspr2(int16_t *input, uint8_t *dest, uint8_t *cm = vp9_ff_cropTbl; /* prefetch vp9_ff_cropTbl */ - vp9_prefetch_load(vp9_ff_cropTbl); - vp9_prefetch_load(vp9_ff_cropTbl + 32); - vp9_prefetch_load(vp9_ff_cropTbl + 64); - vp9_prefetch_load(vp9_ff_cropTbl + 96); - vp9_prefetch_load(vp9_ff_cropTbl + 128); - vp9_prefetch_load(vp9_ff_cropTbl + 160); - vp9_prefetch_load(vp9_ff_cropTbl + 192); - vp9_prefetch_load(vp9_ff_cropTbl + 224); + prefetch_load(vp9_ff_cropTbl); + prefetch_load(vp9_ff_cropTbl + 32); + prefetch_load(vp9_ff_cropTbl + 64); + prefetch_load(vp9_ff_cropTbl + 96); + prefetch_load(vp9_ff_cropTbl + 128); + prefetch_load(vp9_ff_cropTbl + 160); + prefetch_load(vp9_ff_cropTbl + 192); + prefetch_load(vp9_ff_cropTbl + 224); for (i = 0; i < 32; ++i) { dest_pix = dest + i; diff --git a/vp9/common/mips/dspr2/vp9_itrans32_dspr2.c b/vp9/common/mips/dspr2/vp9_itrans32_dspr2.c index 74a90b02c..dd18831fc 100644 --- a/vp9/common/mips/dspr2/vp9_itrans32_dspr2.c +++ b/vp9/common/mips/dspr2/vp9_itrans32_dspr2.c @@ -96,8 +96,8 @@ static void idct32_rows_dspr2(const int16_t *input, int16_t *output, } /* prefetch row */ - vp9_prefetch_load((const uint8_t *)(input + 32)); - vp9_prefetch_load((const uint8_t *)(input + 48)); + prefetch_load((const uint8_t *)(input + 32)); + prefetch_load((const uint8_t *)(input + 48)); __asm__ __volatile__ ( "lh %[load1], 2(%[input]) \n\t" diff --git a/vp9/common/mips/dspr2/vp9_itrans4_dspr2.c b/vp9/common/mips/dspr2/vp9_itrans4_dspr2.c index 280190a39..4e31f9fee 100644 --- a/vp9/common/mips/dspr2/vp9_itrans4_dspr2.c +++ b/vp9/common/mips/dspr2/vp9_itrans4_dspr2.c @@ -115,14 +115,14 @@ static void vp9_idct4_columns_add_blk_dspr2(int16_t *input, uint8_t *dest, uint8_t *cm = vp9_ff_cropTbl; /* prefetch vp9_ff_cropTbl */ - vp9_prefetch_load(vp9_ff_cropTbl); - vp9_prefetch_load(vp9_ff_cropTbl + 32); - vp9_prefetch_load(vp9_ff_cropTbl + 64); - vp9_prefetch_load(vp9_ff_cropTbl + 96); - vp9_prefetch_load(vp9_ff_cropTbl + 128); - vp9_prefetch_load(vp9_ff_cropTbl + 160); - vp9_prefetch_load(vp9_ff_cropTbl + 192); - vp9_prefetch_load(vp9_ff_cropTbl + 224); + prefetch_load(vp9_ff_cropTbl); + prefetch_load(vp9_ff_cropTbl + 32); + prefetch_load(vp9_ff_cropTbl + 64); + prefetch_load(vp9_ff_cropTbl + 96); + prefetch_load(vp9_ff_cropTbl + 128); + prefetch_load(vp9_ff_cropTbl + 160); + prefetch_load(vp9_ff_cropTbl + 192); + prefetch_load(vp9_ff_cropTbl + 224); for (i = 0; i < 4; ++i) { dest_pix = (dest + i); diff --git a/vp9/common/mips/dspr2/vp9_itrans8_dspr2.c b/vp9/common/mips/dspr2/vp9_itrans8_dspr2.c index 04d226663..6898d569c 100644 --- a/vp9/common/mips/dspr2/vp9_itrans8_dspr2.c +++ b/vp9/common/mips/dspr2/vp9_itrans8_dspr2.c @@ -211,14 +211,14 @@ static void idct8_columns_add_blk_dspr2(int16_t *input, uint8_t *dest, uint8_t *cm = vp9_ff_cropTbl; /* prefetch vp9_ff_cropTbl */ - vp9_prefetch_load(vp9_ff_cropTbl); - vp9_prefetch_load(vp9_ff_cropTbl + 32); - vp9_prefetch_load(vp9_ff_cropTbl + 64); - vp9_prefetch_load(vp9_ff_cropTbl + 96); - vp9_prefetch_load(vp9_ff_cropTbl + 128); - vp9_prefetch_load(vp9_ff_cropTbl + 160); - vp9_prefetch_load(vp9_ff_cropTbl + 192); - vp9_prefetch_load(vp9_ff_cropTbl + 224); + prefetch_load(vp9_ff_cropTbl); + prefetch_load(vp9_ff_cropTbl + 32); + prefetch_load(vp9_ff_cropTbl + 64); + prefetch_load(vp9_ff_cropTbl + 96); + prefetch_load(vp9_ff_cropTbl + 128); + prefetch_load(vp9_ff_cropTbl + 160); + prefetch_load(vp9_ff_cropTbl + 192); + prefetch_load(vp9_ff_cropTbl + 224); for (i = 0; i < 8; ++i) { dest_pix = (dest + i); diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c index 5e35fc51d..0915918e5 100644 --- a/vp9/common/vp9_loopfilter.c +++ b/vp9/common/vp9_loopfilter.c @@ -327,55 +327,55 @@ static void filter_selectively_vert_row2(int subsampling_factor, if (mask & 1) { if ((mask_16x16_0 | mask_16x16_1) & 1) { if ((mask_16x16_0 & mask_16x16_1) & 1) { - vp9_lpf_vertical_16_dual(s, pitch, lfi0->mblim, lfi0->lim, + vpx_lpf_vertical_16_dual(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr); } else if (mask_16x16_0 & 1) { - vp9_lpf_vertical_16(s, pitch, lfi0->mblim, lfi0->lim, + vpx_lpf_vertical_16(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr); } else { - vp9_lpf_vertical_16(s + 8 *pitch, pitch, lfi1->mblim, + vpx_lpf_vertical_16(s + 8 *pitch, pitch, lfi1->mblim, lfi1->lim, lfi1->hev_thr); } } if ((mask_8x8_0 | mask_8x8_1) & 1) { if ((mask_8x8_0 & mask_8x8_1) & 1) { - vp9_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim, + vpx_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr, lfi1->mblim, lfi1->lim, lfi1->hev_thr); } else if (mask_8x8_0 & 1) { - vp9_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr, + vpx_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr, 1); } else { - vp9_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim, + vpx_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim, lfi1->hev_thr, 1); } } if ((mask_4x4_0 | mask_4x4_1) & 1) { if ((mask_4x4_0 & mask_4x4_1) & 1) { - vp9_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim, + vpx_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr, lfi1->mblim, lfi1->lim, lfi1->hev_thr); } else if (mask_4x4_0 & 1) { - vp9_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr, + vpx_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr, 1); } else { - vp9_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim, + vpx_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim, lfi1->hev_thr, 1); } } if ((mask_4x4_int_0 | mask_4x4_int_1) & 1) { if ((mask_4x4_int_0 & mask_4x4_int_1) & 1) { - vp9_lpf_vertical_4_dual(s + 4, pitch, lfi0->mblim, lfi0->lim, + vpx_lpf_vertical_4_dual(s + 4, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr, lfi1->mblim, lfi1->lim, lfi1->hev_thr); } else if (mask_4x4_int_0 & 1) { - vp9_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim, + vpx_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr, 1); } else { - vp9_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim, lfi1->lim, + vpx_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim, lfi1->lim, lfi1->hev_thr, 1); } } @@ -427,55 +427,55 @@ static void highbd_filter_selectively_vert_row2(int subsampling_factor, if (mask & 1) { if ((mask_16x16_0 | mask_16x16_1) & 1) { if ((mask_16x16_0 & mask_16x16_1) & 1) { - vp9_highbd_lpf_vertical_16_dual(s, pitch, lfi0->mblim, lfi0->lim, + vpx_highbd_lpf_vertical_16_dual(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr, bd); } else if (mask_16x16_0 & 1) { - vp9_highbd_lpf_vertical_16(s, pitch, lfi0->mblim, lfi0->lim, + vpx_highbd_lpf_vertical_16(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr, bd); } else { - vp9_highbd_lpf_vertical_16(s + 8 *pitch, pitch, lfi1->mblim, + vpx_highbd_lpf_vertical_16(s + 8 *pitch, pitch, lfi1->mblim, lfi1->lim, lfi1->hev_thr, bd); } } if ((mask_8x8_0 | mask_8x8_1) & 1) { if ((mask_8x8_0 & mask_8x8_1) & 1) { - vp9_highbd_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim, + vpx_highbd_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr, lfi1->mblim, lfi1->lim, lfi1->hev_thr, bd); } else if (mask_8x8_0 & 1) { - vp9_highbd_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim, + vpx_highbd_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr, 1, bd); } else { - vp9_highbd_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim, + vpx_highbd_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim, lfi1->hev_thr, 1, bd); } } if ((mask_4x4_0 | mask_4x4_1) & 1) { if ((mask_4x4_0 & mask_4x4_1) & 1) { - vp9_highbd_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim, + vpx_highbd_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr, lfi1->mblim, lfi1->lim, lfi1->hev_thr, bd); } else if (mask_4x4_0 & 1) { - vp9_highbd_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim, + vpx_highbd_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr, 1, bd); } else { - vp9_highbd_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim, + vpx_highbd_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim, lfi1->hev_thr, 1, bd); } } if ((mask_4x4_int_0 | mask_4x4_int_1) & 1) { if ((mask_4x4_int_0 & mask_4x4_int_1) & 1) { - vp9_highbd_lpf_vertical_4_dual(s + 4, pitch, lfi0->mblim, lfi0->lim, + vpx_highbd_lpf_vertical_4_dual(s + 4, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr, lfi1->mblim, lfi1->lim, lfi1->hev_thr, bd); } else if (mask_4x4_int_0 & 1) { - vp9_highbd_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim, + vpx_highbd_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr, 1, bd); } else { - vp9_highbd_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim, + vpx_highbd_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim, lfi1->lim, lfi1->hev_thr, 1, bd); } } @@ -513,11 +513,11 @@ static void filter_selectively_horiz(uint8_t *s, int pitch, if (mask & 1) { if (mask_16x16 & 1) { if ((mask_16x16 & 3) == 3) { - vp9_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim, + vpx_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 2); count = 2; } else { - vp9_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim, + vpx_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1); } } else if (mask_8x8 & 1) { @@ -525,28 +525,28 @@ static void filter_selectively_horiz(uint8_t *s, int pitch, // Next block's thresholds. const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1); - vp9_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim, + vpx_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, lfin->mblim, lfin->lim, lfin->hev_thr); if ((mask_4x4_int & 3) == 3) { - vp9_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim, + vpx_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, lfin->mblim, lfin->lim, lfin->hev_thr); } else { if (mask_4x4_int & 1) - vp9_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, + vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1); else if (mask_4x4_int & 2) - vp9_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim, + vpx_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim, lfin->lim, lfin->hev_thr, 1); } count = 2; } else { - vp9_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1); + vpx_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1); if (mask_4x4_int & 1) - vp9_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, + vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1); } } else if (mask_4x4 & 1) { @@ -554,31 +554,31 @@ static void filter_selectively_horiz(uint8_t *s, int pitch, // Next block's thresholds. const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1); - vp9_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim, + vpx_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, lfin->mblim, lfin->lim, lfin->hev_thr); if ((mask_4x4_int & 3) == 3) { - vp9_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim, + vpx_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, lfin->mblim, lfin->lim, lfin->hev_thr); } else { if (mask_4x4_int & 1) - vp9_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, + vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1); else if (mask_4x4_int & 2) - vp9_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim, + vpx_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim, lfin->lim, lfin->hev_thr, 1); } count = 2; } else { - vp9_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1); + vpx_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1); if (mask_4x4_int & 1) - vp9_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, + vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1); } } else if (mask_4x4_int & 1) { - vp9_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, + vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1); } } @@ -610,11 +610,11 @@ static void highbd_filter_selectively_horiz(uint16_t *s, int pitch, if (mask & 1) { if (mask_16x16 & 1) { if ((mask_16x16 & 3) == 3) { - vp9_highbd_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim, + vpx_highbd_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 2, bd); count = 2; } else { - vp9_highbd_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim, + vpx_highbd_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1, bd); } } else if (mask_8x8 & 1) { @@ -622,31 +622,31 @@ static void highbd_filter_selectively_horiz(uint16_t *s, int pitch, // Next block's thresholds. const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1); - vp9_highbd_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim, + vpx_highbd_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, lfin->mblim, lfin->lim, lfin->hev_thr, bd); if ((mask_4x4_int & 3) == 3) { - vp9_highbd_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim, + vpx_highbd_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, lfin->mblim, lfin->lim, lfin->hev_thr, bd); } else { if (mask_4x4_int & 1) { - vp9_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, + vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1, bd); } else if (mask_4x4_int & 2) { - vp9_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim, + vpx_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim, lfin->lim, lfin->hev_thr, 1, bd); } } count = 2; } else { - vp9_highbd_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim, + vpx_highbd_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1, bd); if (mask_4x4_int & 1) { - vp9_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, + vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1, bd); } } @@ -655,35 +655,35 @@ static void highbd_filter_selectively_horiz(uint16_t *s, int pitch, // Next block's thresholds. const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1); - vp9_highbd_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim, + vpx_highbd_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, lfin->mblim, lfin->lim, lfin->hev_thr, bd); if ((mask_4x4_int & 3) == 3) { - vp9_highbd_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim, + vpx_highbd_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, lfin->mblim, lfin->lim, lfin->hev_thr, bd); } else { if (mask_4x4_int & 1) { - vp9_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, + vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1, bd); } else if (mask_4x4_int & 2) { - vp9_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim, + vpx_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim, lfin->lim, lfin->hev_thr, 1, bd); } } count = 2; } else { - vp9_highbd_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, + vpx_highbd_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1, bd); if (mask_4x4_int & 1) { - vp9_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, + vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1, bd); } } } else if (mask_4x4_int & 1) { - vp9_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, + vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1, bd); } } @@ -1094,15 +1094,15 @@ static void filter_selectively_vert(uint8_t *s, int pitch, if (mask & 1) { if (mask_16x16 & 1) { - vp9_lpf_vertical_16(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); + vpx_lpf_vertical_16(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); } else if (mask_8x8 & 1) { - vp9_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1); + vpx_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1); } else if (mask_4x4 & 1) { - vp9_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1); + vpx_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1); } } if (mask_4x4_int & 1) - vp9_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1); + vpx_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1); s += 8; lfl += 1; mask_16x16 >>= 1; @@ -1128,18 +1128,18 @@ static void highbd_filter_selectively_vert(uint16_t *s, int pitch, if (mask & 1) { if (mask_16x16 & 1) { - vp9_highbd_lpf_vertical_16(s, pitch, lfi->mblim, lfi->lim, + vpx_highbd_lpf_vertical_16(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, bd); } else if (mask_8x8 & 1) { - vp9_highbd_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, + vpx_highbd_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1, bd); } else if (mask_4x4 & 1) { - vp9_highbd_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, + vpx_highbd_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1, bd); } } if (mask_4x4_int & 1) - vp9_highbd_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, + vpx_highbd_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1, bd); s += 8; lfl += 1; diff --git a/vpx_dsp/arm/loopfilter_16_neon.asm b/vpx_dsp/arm/loopfilter_16_neon.asm index 5b8ec2028..5a8fdd6af 100644 --- a/vpx_dsp/arm/loopfilter_16_neon.asm +++ b/vpx_dsp/arm/loopfilter_16_neon.asm @@ -8,12 +8,12 @@ ; be found in the AUTHORS file in the root of the source tree. ; - EXPORT |vp9_lpf_horizontal_4_dual_neon| + EXPORT |vpx_lpf_horizontal_4_dual_neon| ARM AREA ||.text||, CODE, READONLY, ALIGN=2 -;void vp9_lpf_horizontal_4_dual_neon(uint8_t *s, int p, +;void vpx_lpf_horizontal_4_dual_neon(uint8_t *s, int p, ; const uint8_t *blimit0, ; const uint8_t *limit0, ; const uint8_t *thresh0, @@ -29,7 +29,7 @@ ; sp+8 const uint8_t *limit1, ; sp+12 const uint8_t *thresh1, -|vp9_lpf_horizontal_4_dual_neon| PROC +|vpx_lpf_horizontal_4_dual_neon| PROC push {lr} ldr r12, [sp, #4] ; load thresh0 @@ -66,7 +66,7 @@ sub r2, r2, r1, lsl #1 sub r3, r3, r1, lsl #1 - bl vp9_loop_filter_neon_16 + bl vpx_loop_filter_neon_16 vst1.u8 {q5}, [r2@64], r1 ; store op1 vst1.u8 {q6}, [r3@64], r1 ; store op0 @@ -76,9 +76,9 @@ vpop {d8-d15} ; restore neon registers pop {pc} - ENDP ; |vp9_lpf_horizontal_4_dual_neon| + ENDP ; |vpx_lpf_horizontal_4_dual_neon| -; void vp9_loop_filter_neon_16(); +; void vpx_loop_filter_neon_16(); ; This is a helper function for the loopfilters. The invidual functions do the ; necessary load, transpose (if necessary) and store. This function uses ; registers d8-d15, so the calling function must save those registers. @@ -101,7 +101,7 @@ ; q6 op0 ; q7 oq0 ; q8 oq1 -|vp9_loop_filter_neon_16| PROC +|vpx_loop_filter_neon_16| PROC ; filter_mask vabd.u8 q11, q3, q4 ; m1 = abs(p3 - p2) @@ -194,6 +194,6 @@ veor q8, q12, q10 ; *oq1 = u^0x80 bx lr - ENDP ; |vp9_loop_filter_neon_16| + ENDP ; |vpx_loop_filter_neon_16| END diff --git a/vpx_dsp/arm/loopfilter_16_neon.c b/vpx_dsp/arm/loopfilter_16_neon.c index 27c282763..d24e6adc8 100644 --- a/vpx_dsp/arm/loopfilter_16_neon.c +++ b/vpx_dsp/arm/loopfilter_16_neon.c @@ -14,7 +14,7 @@ #include "./vpx_config.h" #include "vpx/vpx_integer.h" -static INLINE void vp9_loop_filter_neon_16( +static INLINE void loop_filter_neon_16( uint8x16_t qblimit, // blimit uint8x16_t qlimit, // limit uint8x16_t qthresh, // thresh @@ -124,7 +124,7 @@ static INLINE void vp9_loop_filter_neon_16( return; } -void vp9_lpf_horizontal_4_dual_neon(uint8_t *s, int p /* pitch */, +void vpx_lpf_horizontal_4_dual_neon(uint8_t *s, int p /* pitch */, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, @@ -163,9 +163,9 @@ void vp9_lpf_horizontal_4_dual_neon(uint8_t *s, int p /* pitch */, s += p; q10u8 = vld1q_u8(s); - vp9_loop_filter_neon_16(qblimit, qlimit, qthresh, - q3u8, q4u8, q5u8, q6u8, q7u8, q8u8, q9u8, q10u8, - &q5u8, &q6u8, &q7u8, &q8u8); + loop_filter_neon_16(qblimit, qlimit, qthresh, + q3u8, q4u8, q5u8, q6u8, q7u8, q8u8, q9u8, q10u8, + &q5u8, &q6u8, &q7u8, &q8u8); s -= (p * 5); vst1q_u8(s, q5u8); diff --git a/vpx_dsp/arm/loopfilter_4_neon.asm b/vpx_dsp/arm/loopfilter_4_neon.asm index 7738e0d3a..e45e34cd4 100644 --- a/vpx_dsp/arm/loopfilter_4_neon.asm +++ b/vpx_dsp/arm/loopfilter_4_neon.asm @@ -8,18 +8,18 @@ ; be found in the AUTHORS file in the root of the source tree. ; - EXPORT |vp9_lpf_horizontal_4_neon| - EXPORT |vp9_lpf_vertical_4_neon| + EXPORT |vpx_lpf_horizontal_4_neon| + EXPORT |vpx_lpf_vertical_4_neon| ARM AREA ||.text||, CODE, READONLY, ALIGN=2 -; Currently vp9 only works on iterations 8 at a time. The vp8 loop filter +; Currently vpx only works on iterations 8 at a time. The vp8 loop filter ; works on 16 iterations at a time. ; TODO(fgalligan): See about removing the count code as this function is only ; called with a count of 1. ; -; void vp9_lpf_horizontal_4_neon(uint8_t *s, +; void vpx_lpf_horizontal_4_neon(uint8_t *s, ; int p /* pitch */, ; const uint8_t *blimit, ; const uint8_t *limit, @@ -32,7 +32,7 @@ ; r3 const uint8_t *limit, ; sp const uint8_t *thresh, ; sp+4 int count -|vp9_lpf_horizontal_4_neon| PROC +|vpx_lpf_horizontal_4_neon| PROC push {lr} vld1.8 {d0[]}, [r2] ; duplicate *blimit @@ -41,7 +41,7 @@ add r1, r1, r1 ; double pitch cmp r12, #0 - beq end_vp9_lf_h_edge + beq end_vpx_lf_h_edge vld1.8 {d1[]}, [r3] ; duplicate *limit vld1.8 {d2[]}, [r2] ; duplicate *thresh @@ -62,7 +62,7 @@ count_lf_h_loop sub r2, r2, r1, lsl #1 sub r3, r3, r1, lsl #1 - bl vp9_loop_filter_neon + bl vpx_loop_filter_neon vst1.u8 {d4}, [r2@64], r1 ; store op1 vst1.u8 {d5}, [r3@64], r1 ; store op0 @@ -73,16 +73,16 @@ count_lf_h_loop subs r12, r12, #1 bne count_lf_h_loop -end_vp9_lf_h_edge +end_vpx_lf_h_edge pop {pc} - ENDP ; |vp9_lpf_horizontal_4_neon| + ENDP ; |vpx_lpf_horizontal_4_neon| -; Currently vp9 only works on iterations 8 at a time. The vp8 loop filter +; Currently vpx only works on iterations 8 at a time. The vp8 loop filter ; works on 16 iterations at a time. ; TODO(fgalligan): See about removing the count code as this function is only ; called with a count of 1. ; -; void vp9_lpf_vertical_4_neon(uint8_t *s, +; void vpx_lpf_vertical_4_neon(uint8_t *s, ; int p /* pitch */, ; const uint8_t *blimit, ; const uint8_t *limit, @@ -95,7 +95,7 @@ end_vp9_lf_h_edge ; r3 const uint8_t *limit, ; sp const uint8_t *thresh, ; sp+4 int count -|vp9_lpf_vertical_4_neon| PROC +|vpx_lpf_vertical_4_neon| PROC push {lr} vld1.8 {d0[]}, [r2] ; duplicate *blimit @@ -105,7 +105,7 @@ end_vp9_lf_h_edge ldr r3, [sp, #4] ; load thresh sub r2, r0, #4 ; move s pointer down by 4 columns cmp r12, #0 - beq end_vp9_lf_v_edge + beq end_vpx_lf_v_edge vld1.8 {d2[]}, [r3] ; duplicate *thresh @@ -135,7 +135,7 @@ count_lf_v_loop vtrn.8 d7, d16 vtrn.8 d17, d18 - bl vp9_loop_filter_neon + bl vpx_loop_filter_neon sub r0, r0, #2 @@ -154,11 +154,11 @@ count_lf_v_loop subne r2, r0, #4 ; move s pointer down by 4 columns bne count_lf_v_loop -end_vp9_lf_v_edge +end_vpx_lf_v_edge pop {pc} - ENDP ; |vp9_lpf_vertical_4_neon| + ENDP ; |vpx_lpf_vertical_4_neon| -; void vp9_loop_filter_neon(); +; void vpx_loop_filter_neon(); ; This is a helper function for the loopfilters. The invidual functions do the ; necessary load, transpose (if necessary) and store. The function does not use ; registers d8-d15. @@ -182,7 +182,7 @@ end_vp9_lf_v_edge ; d5 op0 ; d6 oq0 ; d7 oq1 -|vp9_loop_filter_neon| PROC +|vpx_loop_filter_neon| PROC ; filter_mask vabd.u8 d19, d3, d4 ; m1 = abs(p3 - p2) vabd.u8 d20, d4, d5 ; m2 = abs(p2 - p1) @@ -272,6 +272,6 @@ end_vp9_lf_v_edge veor d7, d20, d18 ; *oq1 = u^0x80 bx lr - ENDP ; |vp9_loop_filter_neon| + ENDP ; |vpx_loop_filter_neon| END diff --git a/vpx_dsp/arm/loopfilter_4_neon.c b/vpx_dsp/arm/loopfilter_4_neon.c index 476ed3412..7ad411aea 100644 --- a/vpx_dsp/arm/loopfilter_4_neon.c +++ b/vpx_dsp/arm/loopfilter_4_neon.c @@ -12,7 +12,7 @@ #include "./vpx_dsp_rtcd.h" -static INLINE void vp9_loop_filter_neon( +static INLINE void loop_filter_neon( uint8x8_t dblimit, // flimit uint8x8_t dlimit, // limit uint8x8_t dthresh, // thresh @@ -110,7 +110,7 @@ static INLINE void vp9_loop_filter_neon( return; } -void vp9_lpf_horizontal_4_neon( +void vpx_lpf_horizontal_4_neon( uint8_t *src, int pitch, const uint8_t *blimit, @@ -122,7 +122,7 @@ void vp9_lpf_horizontal_4_neon( uint8x8_t dblimit, dlimit, dthresh; uint8x8_t d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8; - if (count == 0) // end_vp9_lf_h_edge + if (count == 0) // end_vpx_lf_h_edge return; dblimit = vld1_u8(blimit); @@ -149,9 +149,9 @@ void vp9_lpf_horizontal_4_neon( s += pitch; d18u8 = vld1_u8(s); - vp9_loop_filter_neon(dblimit, dlimit, dthresh, - d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8, - &d4u8, &d5u8, &d6u8, &d7u8); + loop_filter_neon(dblimit, dlimit, dthresh, + d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8, + &d4u8, &d5u8, &d6u8, &d7u8); s -= (pitch * 5); vst1_u8(s, d4u8); @@ -165,7 +165,7 @@ void vp9_lpf_horizontal_4_neon( return; } -void vp9_lpf_vertical_4_neon( +void vpx_lpf_vertical_4_neon( uint8_t *src, int pitch, const uint8_t *blimit, @@ -181,7 +181,7 @@ void vp9_lpf_vertical_4_neon( uint8x8x2_t d2tmp8, d2tmp9, d2tmp10, d2tmp11; uint8x8x4_t d4Result; - if (count == 0) // end_vp9_lf_h_edge + if (count == 0) // end_vpx_lf_h_edge return; dblimit = vld1_u8(blimit); @@ -244,9 +244,9 @@ void vp9_lpf_vertical_4_neon( d17u8 = d2tmp11.val[0]; d18u8 = d2tmp11.val[1]; - vp9_loop_filter_neon(dblimit, dlimit, dthresh, - d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8, - &d4u8, &d5u8, &d6u8, &d7u8); + loop_filter_neon(dblimit, dlimit, dthresh, + d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8, + &d4u8, &d5u8, &d6u8, &d7u8); d4Result.val[0] = d4u8; d4Result.val[1] = d5u8; diff --git a/vpx_dsp/arm/loopfilter_8_neon.asm b/vpx_dsp/arm/loopfilter_8_neon.asm index 91aaec04e..e81734c04 100644 --- a/vpx_dsp/arm/loopfilter_8_neon.asm +++ b/vpx_dsp/arm/loopfilter_8_neon.asm @@ -8,18 +8,18 @@ ; be found in the AUTHORS file in the root of the source tree. ; - EXPORT |vp9_lpf_horizontal_8_neon| - EXPORT |vp9_lpf_vertical_8_neon| + EXPORT |vpx_lpf_horizontal_8_neon| + EXPORT |vpx_lpf_vertical_8_neon| ARM AREA ||.text||, CODE, READONLY, ALIGN=2 -; Currently vp9 only works on iterations 8 at a time. The vp8 loop filter +; Currently vpx only works on iterations 8 at a time. The vp8 loop filter ; works on 16 iterations at a time. ; TODO(fgalligan): See about removing the count code as this function is only ; called with a count of 1. ; -; void vp9_lpf_horizontal_8_neon(uint8_t *s, int p, +; void vpx_lpf_horizontal_8_neon(uint8_t *s, int p, ; const uint8_t *blimit, ; const uint8_t *limit, ; const uint8_t *thresh, @@ -30,7 +30,7 @@ ; r3 const uint8_t *limit, ; sp const uint8_t *thresh, ; sp+4 int count -|vp9_lpf_horizontal_8_neon| PROC +|vpx_lpf_horizontal_8_neon| PROC push {r4-r5, lr} vld1.8 {d0[]}, [r2] ; duplicate *blimit @@ -39,7 +39,7 @@ add r1, r1, r1 ; double pitch cmp r12, #0 - beq end_vp9_mblf_h_edge + beq end_vpx_mblf_h_edge vld1.8 {d1[]}, [r3] ; duplicate *limit vld1.8 {d2[]}, [r2] ; duplicate *thresh @@ -60,7 +60,7 @@ count_mblf_h_loop sub r3, r3, r1, lsl #1 sub r2, r2, r1, lsl #2 - bl vp9_mbloop_filter_neon + bl vpx_mbloop_filter_neon vst1.u8 {d0}, [r2@64], r1 ; store op2 vst1.u8 {d1}, [r3@64], r1 ; store op1 @@ -73,12 +73,12 @@ count_mblf_h_loop subs r12, r12, #1 bne count_mblf_h_loop -end_vp9_mblf_h_edge +end_vpx_mblf_h_edge pop {r4-r5, pc} - ENDP ; |vp9_lpf_horizontal_8_neon| + ENDP ; |vpx_lpf_horizontal_8_neon| -; void vp9_lpf_vertical_8_neon(uint8_t *s, +; void vpx_lpf_vertical_8_neon(uint8_t *s, ; int pitch, ; const uint8_t *blimit, ; const uint8_t *limit, @@ -91,7 +91,7 @@ end_vp9_mblf_h_edge ; r3 const uint8_t *limit, ; sp const uint8_t *thresh, ; sp+4 int count -|vp9_lpf_vertical_8_neon| PROC +|vpx_lpf_vertical_8_neon| PROC push {r4-r5, lr} vld1.8 {d0[]}, [r2] ; duplicate *blimit @@ -101,7 +101,7 @@ end_vp9_mblf_h_edge ldr r3, [sp, #12] ; load thresh sub r2, r0, #4 ; move s pointer down by 4 columns cmp r12, #0 - beq end_vp9_mblf_v_edge + beq end_vpx_mblf_v_edge vld1.8 {d2[]}, [r3] ; duplicate *thresh @@ -134,7 +134,7 @@ count_mblf_v_loop sub r2, r0, #3 add r3, r0, #1 - bl vp9_mbloop_filter_neon + bl vpx_mbloop_filter_neon ;store op2, op1, op0, oq0 vst4.8 {d0[0], d1[0], d2[0], d3[0]}, [r2], r1 @@ -161,11 +161,11 @@ count_mblf_v_loop subne r2, r0, #4 ; move s pointer down by 4 columns bne count_mblf_v_loop -end_vp9_mblf_v_edge +end_vpx_mblf_v_edge pop {r4-r5, pc} - ENDP ; |vp9_lpf_vertical_8_neon| + ENDP ; |vpx_lpf_vertical_8_neon| -; void vp9_mbloop_filter_neon(); +; void vpx_mbloop_filter_neon(); ; This is a helper function for the loopfilters. The invidual functions do the ; necessary load, transpose (if necessary) and store. The function does not use ; registers d8-d15. @@ -191,7 +191,7 @@ end_vp9_mblf_v_edge ; d3 oq0 ; d4 oq1 ; d5 oq2 -|vp9_mbloop_filter_neon| PROC +|vpx_mbloop_filter_neon| PROC ; filter_mask vabd.u8 d19, d3, d4 ; m1 = abs(p3 - p2) vabd.u8 d20, d4, d5 ; m2 = abs(p2 - p1) @@ -446,6 +446,6 @@ filter_branch_only bx lr - ENDP ; |vp9_mbloop_filter_neon| + ENDP ; |vpx_mbloop_filter_neon| END diff --git a/vpx_dsp/arm/loopfilter_8_neon.c b/vpx_dsp/arm/loopfilter_8_neon.c index 2179e5fe5..a887e2ee5 100644 --- a/vpx_dsp/arm/loopfilter_8_neon.c +++ b/vpx_dsp/arm/loopfilter_8_neon.c @@ -12,7 +12,7 @@ #include "./vpx_dsp_rtcd.h" -static INLINE void vp9_mbloop_filter_neon( +static INLINE void mbloop_filter_neon( uint8x8_t dblimit, // mblimit uint8x8_t dlimit, // limit uint8x8_t dthresh, // thresh @@ -263,7 +263,7 @@ static INLINE void vp9_mbloop_filter_neon( return; } -void vp9_lpf_horizontal_8_neon( +void vpx_lpf_horizontal_8_neon( uint8_t *src, int pitch, const uint8_t *blimit, @@ -276,7 +276,7 @@ void vp9_lpf_horizontal_8_neon( uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8; uint8x8_t d16u8, d17u8, d18u8; - if (count == 0) // end_vp9_mblf_h_edge + if (count == 0) // end_vpx_mblf_h_edge return; dblimit = vld1_u8(blimit); @@ -303,9 +303,9 @@ void vp9_lpf_horizontal_8_neon( s += pitch; d18u8 = vld1_u8(s); - vp9_mbloop_filter_neon(dblimit, dlimit, dthresh, - d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8, - &d0u8, &d1u8, &d2u8, &d3u8, &d4u8, &d5u8); + mbloop_filter_neon(dblimit, dlimit, dthresh, + d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8, + &d0u8, &d1u8, &d2u8, &d3u8, &d4u8, &d5u8); s -= (pitch * 6); vst1_u8(s, d0u8); @@ -323,7 +323,7 @@ void vp9_lpf_horizontal_8_neon( return; } -void vp9_lpf_vertical_8_neon( +void vpx_lpf_vertical_8_neon( uint8_t *src, int pitch, const uint8_t *blimit, @@ -403,9 +403,9 @@ void vp9_lpf_vertical_8_neon( d17u8 = d2tmp11.val[0]; d18u8 = d2tmp11.val[1]; - vp9_mbloop_filter_neon(dblimit, dlimit, dthresh, - d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8, - &d0u8, &d1u8, &d2u8, &d3u8, &d4u8, &d5u8); + mbloop_filter_neon(dblimit, dlimit, dthresh, + d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8, + &d0u8, &d1u8, &d2u8, &d3u8, &d4u8, &d5u8); d4Result.val[0] = d0u8; d4Result.val[1] = d1u8; diff --git a/vpx_dsp/arm/loopfilter_mb_neon.asm b/vpx_dsp/arm/loopfilter_mb_neon.asm index 5fe2bba46..20d9cfb11 100644 --- a/vpx_dsp/arm/loopfilter_mb_neon.asm +++ b/vpx_dsp/arm/loopfilter_mb_neon.asm @@ -8,13 +8,13 @@ ; be found in the AUTHORS file in the root of the source tree. ; - EXPORT |vp9_lpf_horizontal_16_neon| - EXPORT |vp9_lpf_vertical_16_neon| + EXPORT |vpx_lpf_horizontal_16_neon| + EXPORT |vpx_lpf_vertical_16_neon| ARM AREA ||.text||, CODE, READONLY, ALIGN=2 -; void vp9_lpf_horizontal_16_neon(uint8_t *s, int p, +; void vpx_lpf_horizontal_16_neon(uint8_t *s, int p, ; const uint8_t *blimit, ; const uint8_t *limit, ; const uint8_t *thresh @@ -24,7 +24,7 @@ ; r2 const uint8_t *blimit, ; r3 const uint8_t *limit, ; sp const uint8_t *thresh, -|vp9_lpf_horizontal_16_neon| PROC +|vpx_lpf_horizontal_16_neon| PROC push {r4-r8, lr} vpush {d8-d15} ldr r4, [sp, #88] ; load thresh @@ -54,7 +54,7 @@ h_count vld1.u8 {d14}, [r8@64], r1 ; q6 vld1.u8 {d15}, [r8@64], r1 ; q7 - bl vp9_wide_mbfilter_neon + bl vpx_wide_mbfilter_neon tst r7, #1 beq h_mbfilter @@ -115,9 +115,9 @@ h_next vpop {d8-d15} pop {r4-r8, pc} - ENDP ; |vp9_lpf_horizontal_16_neon| + ENDP ; |vpx_lpf_horizontal_16_neon| -; void vp9_lpf_vertical_16_neon(uint8_t *s, int p, +; void vpx_lpf_vertical_16_neon(uint8_t *s, int p, ; const uint8_t *blimit, ; const uint8_t *limit, ; const uint8_t *thresh) @@ -126,7 +126,7 @@ h_next ; r2 const uint8_t *blimit, ; r3 const uint8_t *limit, ; sp const uint8_t *thresh, -|vp9_lpf_vertical_16_neon| PROC +|vpx_lpf_vertical_16_neon| PROC push {r4-r8, lr} vpush {d8-d15} ldr r4, [sp, #88] ; load thresh @@ -176,7 +176,7 @@ h_next vtrn.8 d12, d13 vtrn.8 d14, d15 - bl vp9_wide_mbfilter_neon + bl vpx_wide_mbfilter_neon tst r7, #1 beq v_mbfilter @@ -279,9 +279,9 @@ v_end vpop {d8-d15} pop {r4-r8, pc} - ENDP ; |vp9_lpf_vertical_16_neon| + ENDP ; |vpx_lpf_vertical_16_neon| -; void vp9_wide_mbfilter_neon(); +; void vpx_wide_mbfilter_neon(); ; This is a helper function for the loopfilters. The invidual functions do the ; necessary load, transpose (if necessary) and store. ; @@ -305,7 +305,7 @@ v_end ; d13 q5 ; d14 q6 ; d15 q7 -|vp9_wide_mbfilter_neon| PROC +|vpx_wide_mbfilter_neon| PROC mov r7, #0 ; filter_mask @@ -601,6 +601,6 @@ v_end vbif d3, d14, d17 ; oq6 |= q6 & ~(f2 & f & m) bx lr - ENDP ; |vp9_wide_mbfilter_neon| + ENDP ; |vpx_wide_mbfilter_neon| END diff --git a/vpx_dsp/arm/loopfilter_neon.c b/vpx_dsp/arm/loopfilter_neon.c index 9ef322933..eff87d29b 100644 --- a/vpx_dsp/arm/loopfilter_neon.c +++ b/vpx_dsp/arm/loopfilter_neon.c @@ -14,45 +14,45 @@ #include "./vpx_config.h" #include "vpx/vpx_integer.h" -void vp9_lpf_vertical_4_dual_neon(uint8_t *s, int p, +void vpx_lpf_vertical_4_dual_neon(uint8_t *s, int p, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1) { - vp9_lpf_vertical_4_neon(s, p, blimit0, limit0, thresh0, 1); - vp9_lpf_vertical_4_neon(s + 8 * p, p, blimit1, limit1, thresh1, 1); + vpx_lpf_vertical_4_neon(s, p, blimit0, limit0, thresh0, 1); + vpx_lpf_vertical_4_neon(s + 8 * p, p, blimit1, limit1, thresh1, 1); } #if HAVE_NEON_ASM -void vp9_lpf_horizontal_8_dual_neon(uint8_t *s, int p /* pitch */, +void vpx_lpf_horizontal_8_dual_neon(uint8_t *s, int p /* pitch */, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1) { - vp9_lpf_horizontal_8_neon(s, p, blimit0, limit0, thresh0, 1); - vp9_lpf_horizontal_8_neon(s + 8, p, blimit1, limit1, thresh1, 1); + vpx_lpf_horizontal_8_neon(s, p, blimit0, limit0, thresh0, 1); + vpx_lpf_horizontal_8_neon(s + 8, p, blimit1, limit1, thresh1, 1); } -void vp9_lpf_vertical_8_dual_neon(uint8_t *s, int p, +void vpx_lpf_vertical_8_dual_neon(uint8_t *s, int p, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1) { - vp9_lpf_vertical_8_neon(s, p, blimit0, limit0, thresh0, 1); - vp9_lpf_vertical_8_neon(s + 8 * p, p, blimit1, limit1, thresh1, 1); + vpx_lpf_vertical_8_neon(s, p, blimit0, limit0, thresh0, 1); + vpx_lpf_vertical_8_neon(s + 8 * p, p, blimit1, limit1, thresh1, 1); } -void vp9_lpf_vertical_16_dual_neon(uint8_t *s, int p, +void vpx_lpf_vertical_16_dual_neon(uint8_t *s, int p, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh) { - vp9_lpf_vertical_16_neon(s, p, blimit, limit, thresh); - vp9_lpf_vertical_16_neon(s + 8 * p, p, blimit, limit, thresh); + vpx_lpf_vertical_16_neon(s, p, blimit, limit, thresh); + vpx_lpf_vertical_16_neon(s + 8 * p, p, blimit, limit, thresh); } #endif // HAVE_NEON_ASM diff --git a/vpx_dsp/loopfilter.c b/vpx_dsp/loopfilter.c index 73b448e93..dc8aca5c3 100644 --- a/vpx_dsp/loopfilter.c +++ b/vpx_dsp/loopfilter.c @@ -115,7 +115,7 @@ static INLINE void filter4(int8_t mask, uint8_t thresh, uint8_t *op1, *op1 = signed_char_clamp(ps1 + filter) ^ 0x80; } -void vp9_lpf_horizontal_4_c(uint8_t *s, int p /* pitch */, +void vpx_lpf_horizontal_4_c(uint8_t *s, int p /* pitch */, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count) { int i; @@ -132,15 +132,15 @@ void vp9_lpf_horizontal_4_c(uint8_t *s, int p /* pitch */, } } -void vp9_lpf_horizontal_4_dual_c(uint8_t *s, int p, const uint8_t *blimit0, +void vpx_lpf_horizontal_4_dual_c(uint8_t *s, int p, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1) { - vp9_lpf_horizontal_4_c(s, p, blimit0, limit0, thresh0, 1); - vp9_lpf_horizontal_4_c(s + 8, p, blimit1, limit1, thresh1, 1); + vpx_lpf_horizontal_4_c(s, p, blimit0, limit0, thresh0, 1); + vpx_lpf_horizontal_4_c(s + 8, p, blimit1, limit1, thresh1, 1); } -void vp9_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit, +void vpx_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count) { int i; @@ -157,12 +157,12 @@ void vp9_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit, } } -void vp9_lpf_vertical_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, +void vpx_lpf_vertical_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1) { - vp9_lpf_vertical_4_c(s, pitch, blimit0, limit0, thresh0, 1); - vp9_lpf_vertical_4_c(s + 8 * pitch, pitch, blimit1, limit1, + vpx_lpf_vertical_4_c(s, pitch, blimit0, limit0, thresh0, 1); + vpx_lpf_vertical_4_c(s + 8 * pitch, pitch, blimit1, limit1, thresh1, 1); } @@ -187,7 +187,7 @@ static INLINE void filter8(int8_t mask, uint8_t thresh, uint8_t flat, } } -void vp9_lpf_horizontal_8_c(uint8_t *s, int p, const uint8_t *blimit, +void vpx_lpf_horizontal_8_c(uint8_t *s, int p, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count) { int i; @@ -207,15 +207,15 @@ void vp9_lpf_horizontal_8_c(uint8_t *s, int p, const uint8_t *blimit, } } -void vp9_lpf_horizontal_8_dual_c(uint8_t *s, int p, const uint8_t *blimit0, +void vpx_lpf_horizontal_8_dual_c(uint8_t *s, int p, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1) { - vp9_lpf_horizontal_8_c(s, p, blimit0, limit0, thresh0, 1); - vp9_lpf_horizontal_8_c(s + 8, p, blimit1, limit1, thresh1, 1); + vpx_lpf_horizontal_8_c(s, p, blimit0, limit0, thresh0, 1); + vpx_lpf_horizontal_8_c(s + 8, p, blimit1, limit1, thresh1, 1); } -void vp9_lpf_vertical_8_c(uint8_t *s, int pitch, const uint8_t *blimit, +void vpx_lpf_vertical_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count) { int i; @@ -232,12 +232,12 @@ void vp9_lpf_vertical_8_c(uint8_t *s, int pitch, const uint8_t *blimit, } } -void vp9_lpf_vertical_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, +void vpx_lpf_vertical_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1) { - vp9_lpf_vertical_8_c(s, pitch, blimit0, limit0, thresh0, 1); - vp9_lpf_vertical_8_c(s + 8 * pitch, pitch, blimit1, limit1, + vpx_lpf_vertical_8_c(s, pitch, blimit0, limit0, thresh0, 1); + vpx_lpf_vertical_8_c(s + 8 * pitch, pitch, blimit1, limit1, thresh1, 1); } @@ -292,7 +292,7 @@ static INLINE void filter16(int8_t mask, uint8_t thresh, } } -void vp9_lpf_horizontal_16_c(uint8_t *s, int p, const uint8_t *blimit, +void vpx_lpf_horizontal_16_c(uint8_t *s, int p, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count) { int i; @@ -341,12 +341,12 @@ static void mb_lpf_vertical_edge_w(uint8_t *s, int p, } } -void vp9_lpf_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit, +void vpx_lpf_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh) { mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 8); } -void vp9_lpf_vertical_16_dual_c(uint8_t *s, int p, const uint8_t *blimit, +void vpx_lpf_vertical_16_dual_c(uint8_t *s, int p, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh) { mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 16); } @@ -446,7 +446,7 @@ static INLINE void highbd_filter4(int8_t mask, uint8_t thresh, uint16_t *op1, *op1 = signed_char_clamp_high(ps1 + filter, bd) + (0x80 << shift); } -void vp9_highbd_lpf_horizontal_4_c(uint16_t *s, int p /* pitch */, +void vpx_highbd_lpf_horizontal_4_c(uint16_t *s, int p /* pitch */, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd) { int i; @@ -469,7 +469,7 @@ void vp9_highbd_lpf_horizontal_4_c(uint16_t *s, int p /* pitch */, } } -void vp9_highbd_lpf_horizontal_4_dual_c(uint16_t *s, int p, +void vpx_highbd_lpf_horizontal_4_dual_c(uint16_t *s, int p, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, @@ -477,11 +477,11 @@ void vp9_highbd_lpf_horizontal_4_dual_c(uint16_t *s, int p, const uint8_t *limit1, const uint8_t *thresh1, int bd) { - vp9_highbd_lpf_horizontal_4_c(s, p, blimit0, limit0, thresh0, 1, bd); - vp9_highbd_lpf_horizontal_4_c(s + 8, p, blimit1, limit1, thresh1, 1, bd); + vpx_highbd_lpf_horizontal_4_c(s, p, blimit0, limit0, thresh0, 1, bd); + vpx_highbd_lpf_horizontal_4_c(s + 8, p, blimit1, limit1, thresh1, 1, bd); } -void vp9_highbd_lpf_vertical_4_c(uint16_t *s, int pitch, const uint8_t *blimit, +void vpx_highbd_lpf_vertical_4_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd) { int i; @@ -498,7 +498,7 @@ void vp9_highbd_lpf_vertical_4_c(uint16_t *s, int pitch, const uint8_t *blimit, } } -void vp9_highbd_lpf_vertical_4_dual_c(uint16_t *s, int pitch, +void vpx_highbd_lpf_vertical_4_dual_c(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, @@ -506,8 +506,8 @@ void vp9_highbd_lpf_vertical_4_dual_c(uint16_t *s, int pitch, const uint8_t *limit1, const uint8_t *thresh1, int bd) { - vp9_highbd_lpf_vertical_4_c(s, pitch, blimit0, limit0, thresh0, 1, bd); - vp9_highbd_lpf_vertical_4_c(s + 8 * pitch, pitch, blimit1, limit1, + vpx_highbd_lpf_vertical_4_c(s, pitch, blimit0, limit0, thresh0, 1, bd); + vpx_highbd_lpf_vertical_4_c(s + 8 * pitch, pitch, blimit1, limit1, thresh1, 1, bd); } @@ -532,7 +532,7 @@ static INLINE void highbd_filter8(int8_t mask, uint8_t thresh, uint8_t flat, } } -void vp9_highbd_lpf_horizontal_8_c(uint16_t *s, int p, const uint8_t *blimit, +void vpx_highbd_lpf_horizontal_8_c(uint16_t *s, int p, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd) { int i; @@ -554,7 +554,7 @@ void vp9_highbd_lpf_horizontal_8_c(uint16_t *s, int p, const uint8_t *blimit, } } -void vp9_highbd_lpf_horizontal_8_dual_c(uint16_t *s, int p, +void vpx_highbd_lpf_horizontal_8_dual_c(uint16_t *s, int p, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, @@ -562,11 +562,11 @@ void vp9_highbd_lpf_horizontal_8_dual_c(uint16_t *s, int p, const uint8_t *limit1, const uint8_t *thresh1, int bd) { - vp9_highbd_lpf_horizontal_8_c(s, p, blimit0, limit0, thresh0, 1, bd); - vp9_highbd_lpf_horizontal_8_c(s + 8, p, blimit1, limit1, thresh1, 1, bd); + vpx_highbd_lpf_horizontal_8_c(s, p, blimit0, limit0, thresh0, 1, bd); + vpx_highbd_lpf_horizontal_8_c(s + 8, p, blimit1, limit1, thresh1, 1, bd); } -void vp9_highbd_lpf_vertical_8_c(uint16_t *s, int pitch, const uint8_t *blimit, +void vpx_highbd_lpf_vertical_8_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd) { int i; @@ -586,7 +586,7 @@ void vp9_highbd_lpf_vertical_8_c(uint16_t *s, int pitch, const uint8_t *blimit, } } -void vp9_highbd_lpf_vertical_8_dual_c(uint16_t *s, int pitch, +void vpx_highbd_lpf_vertical_8_dual_c(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, @@ -594,8 +594,8 @@ void vp9_highbd_lpf_vertical_8_dual_c(uint16_t *s, int pitch, const uint8_t *limit1, const uint8_t *thresh1, int bd) { - vp9_highbd_lpf_vertical_8_c(s, pitch, blimit0, limit0, thresh0, 1, bd); - vp9_highbd_lpf_vertical_8_c(s + 8 * pitch, pitch, blimit1, limit1, + vpx_highbd_lpf_vertical_8_c(s, pitch, blimit0, limit0, thresh0, 1, bd); + vpx_highbd_lpf_vertical_8_c(s + 8 * pitch, pitch, blimit1, limit1, thresh1, 1, bd); } @@ -662,7 +662,7 @@ static INLINE void highbd_filter16(int8_t mask, uint8_t thresh, } } -void vp9_highbd_lpf_horizontal_16_c(uint16_t *s, int p, const uint8_t *blimit, +void vpx_highbd_lpf_horizontal_16_c(uint16_t *s, int p, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd) { int i; @@ -727,13 +727,13 @@ static void highbd_mb_lpf_vertical_edge_w(uint16_t *s, int p, } } -void vp9_highbd_lpf_vertical_16_c(uint16_t *s, int p, const uint8_t *blimit, +void vpx_highbd_lpf_vertical_16_c(uint16_t *s, int p, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd) { highbd_mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 8, bd); } -void vp9_highbd_lpf_vertical_16_dual_c(uint16_t *s, int p, +void vpx_highbd_lpf_vertical_16_dual_c(uint16_t *s, int p, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, diff --git a/vpx_dsp/mips/common_dspr2.h b/vpx_dsp/mips/common_dspr2.h index 879e2d0c6..8278101dc 100644 --- a/vpx_dsp/mips/common_dspr2.h +++ b/vpx_dsp/mips/common_dspr2.h @@ -21,7 +21,7 @@ extern "C" { #if HAVE_DSPR2 #define CROP_WIDTH 512 -static INLINE void vp9_prefetch_load(const unsigned char *src) { +static INLINE void prefetch_load(const unsigned char *src) { __asm__ __volatile__ ( "pref 0, 0(%[src]) \n\t" : @@ -30,7 +30,7 @@ static INLINE void vp9_prefetch_load(const unsigned char *src) { } /* prefetch data for store */ -static INLINE void vp9_prefetch_store(unsigned char *dst) { +static INLINE void prefetch_store(unsigned char *dst) { __asm__ __volatile__ ( "pref 1, 0(%[dst]) \n\t" : @@ -38,7 +38,7 @@ static INLINE void vp9_prefetch_store(unsigned char *dst) { ); } -static INLINE void vp9_prefetch_load_streamed(const unsigned char *src) { +static INLINE void prefetch_load_streamed(const unsigned char *src) { __asm__ __volatile__ ( "pref 4, 0(%[src]) \n\t" : @@ -47,7 +47,7 @@ static INLINE void vp9_prefetch_load_streamed(const unsigned char *src) { } /* prefetch data for store */ -static INLINE void vp9_prefetch_store_streamed(unsigned char *dst) { +static INLINE void prefetch_store_streamed(unsigned char *dst) { __asm__ __volatile__ ( "pref 5, 0(%[dst]) \n\t" : diff --git a/vpx_dsp/mips/loopfilter_16_msa.c b/vpx_dsp/mips/loopfilter_16_msa.c index b0b99b47e..b7c9f7bd0 100644 --- a/vpx_dsp/mips/loopfilter_16_msa.c +++ b/vpx_dsp/mips/loopfilter_16_msa.c @@ -11,7 +11,7 @@ #include "vpx_ports/mem.h" #include "vpx_dsp/mips/loopfilter_msa.h" -int32_t vp9_hz_lpf_t4_and_t8_16w(uint8_t *src, int32_t pitch, +int32_t vpx_hz_lpf_t4_and_t8_16w(uint8_t *src, int32_t pitch, uint8_t *filter48, const uint8_t *b_limit_ptr, const uint8_t *limit_ptr, @@ -79,7 +79,7 @@ int32_t vp9_hz_lpf_t4_and_t8_16w(uint8_t *src, int32_t pitch, } } -void vp9_hz_lpf_t16_16w(uint8_t *src, int32_t pitch, uint8_t *filter48) { +void vpx_hz_lpf_t16_16w(uint8_t *src, int32_t pitch, uint8_t *filter48) { v16u8 flat, flat2, filter8; v16i8 zero = { 0 }; v16u8 p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7; @@ -405,7 +405,7 @@ void vp9_hz_lpf_t16_16w(uint8_t *src, int32_t pitch, uint8_t *filter48) { } } -void vp9_lpf_horizontal_16_dual_msa(uint8_t *src, int32_t pitch, +void vpx_lpf_horizontal_16_dual_msa(uint8_t *src, int32_t pitch, const uint8_t *b_limit_ptr, const uint8_t *limit_ptr, const uint8_t *thresh_ptr, @@ -415,15 +415,15 @@ void vp9_lpf_horizontal_16_dual_msa(uint8_t *src, int32_t pitch, (void)count; - early_exit = vp9_hz_lpf_t4_and_t8_16w(src, pitch, &filter48[0], b_limit_ptr, + early_exit = vpx_hz_lpf_t4_and_t8_16w(src, pitch, &filter48[0], b_limit_ptr, limit_ptr, thresh_ptr); if (0 == early_exit) { - vp9_hz_lpf_t16_16w(src, pitch, filter48); + vpx_hz_lpf_t16_16w(src, pitch, filter48); } } -void vp9_lpf_horizontal_16_msa(uint8_t *src, int32_t pitch, +void vpx_lpf_horizontal_16_msa(uint8_t *src, int32_t pitch, const uint8_t *b_limit_ptr, const uint8_t *limit_ptr, const uint8_t *thresh_ptr, @@ -643,7 +643,7 @@ void vp9_lpf_horizontal_16_msa(uint8_t *src, int32_t pitch, } } } else { - vp9_lpf_horizontal_16_dual_msa(src, pitch, b_limit_ptr, limit_ptr, + vpx_lpf_horizontal_16_dual_msa(src, pitch, b_limit_ptr, limit_ptr, thresh_ptr, count); } } @@ -744,7 +744,7 @@ static void transpose_16x16(uint8_t *input, int32_t in_pitch, ST_UB8(q0, q1, q2, q3, q4, q5, q6, q7, output, out_pitch); } -int32_t vp9_vt_lpf_t4_and_t8_8w(uint8_t *src, uint8_t *filter48, +int32_t vpx_vt_lpf_t4_and_t8_8w(uint8_t *src, uint8_t *filter48, uint8_t *src_org, int32_t pitch_org, const uint8_t *b_limit_ptr, const uint8_t *limit_ptr, @@ -812,7 +812,7 @@ int32_t vp9_vt_lpf_t4_and_t8_8w(uint8_t *src, uint8_t *filter48, } } -int32_t vp9_vt_lpf_t16_8w(uint8_t *src, uint8_t *src_org, int32_t pitch, +int32_t vpx_vt_lpf_t16_8w(uint8_t *src, uint8_t *src_org, int32_t pitch, uint8_t *filter48) { v16i8 zero = { 0 }; v16u8 filter8, flat, flat2; @@ -1032,7 +1032,7 @@ int32_t vp9_vt_lpf_t16_8w(uint8_t *src, uint8_t *src_org, int32_t pitch, } } -void vp9_lpf_vertical_16_msa(uint8_t *src, int32_t pitch, +void vpx_lpf_vertical_16_msa(uint8_t *src, int32_t pitch, const uint8_t *b_limit_ptr, const uint8_t *limit_ptr, const uint8_t *thresh_ptr) { @@ -1042,12 +1042,12 @@ void vp9_lpf_vertical_16_msa(uint8_t *src, int32_t pitch, transpose_16x8_to_8x16(src - 8, pitch, transposed_input, 16); - early_exit = vp9_vt_lpf_t4_and_t8_8w((transposed_input + 16 * 8), + early_exit = vpx_vt_lpf_t4_and_t8_8w((transposed_input + 16 * 8), &filter48[0], src, pitch, b_limit_ptr, limit_ptr, thresh_ptr); if (0 == early_exit) { - early_exit = vp9_vt_lpf_t16_8w((transposed_input + 16 * 8), src, pitch, + early_exit = vpx_vt_lpf_t16_8w((transposed_input + 16 * 8), src, pitch, &filter48[0]); if (0 == early_exit) { @@ -1056,7 +1056,7 @@ void vp9_lpf_vertical_16_msa(uint8_t *src, int32_t pitch, } } -int32_t vp9_vt_lpf_t4_and_t8_16w(uint8_t *src, uint8_t *filter48, +int32_t vpx_vt_lpf_t4_and_t8_16w(uint8_t *src, uint8_t *filter48, uint8_t *src_org, int32_t pitch, const uint8_t *b_limit_ptr, const uint8_t *limit_ptr, @@ -1134,7 +1134,7 @@ int32_t vp9_vt_lpf_t4_and_t8_16w(uint8_t *src, uint8_t *filter48, } } -int32_t vp9_vt_lpf_t16_16w(uint8_t *src, uint8_t *src_org, int32_t pitch, +int32_t vpx_vt_lpf_t16_16w(uint8_t *src, uint8_t *src_org, int32_t pitch, uint8_t *filter48) { v16u8 flat, flat2, filter8; v16i8 zero = { 0 }; @@ -1455,7 +1455,7 @@ int32_t vp9_vt_lpf_t16_16w(uint8_t *src, uint8_t *src_org, int32_t pitch, } } -void vp9_lpf_vertical_16_dual_msa(uint8_t *src, int32_t pitch, +void vpx_lpf_vertical_16_dual_msa(uint8_t *src, int32_t pitch, const uint8_t *b_limit_ptr, const uint8_t *limit_ptr, const uint8_t *thresh_ptr) { @@ -1465,12 +1465,12 @@ void vp9_lpf_vertical_16_dual_msa(uint8_t *src, int32_t pitch, transpose_16x16((src - 8), pitch, &transposed_input[0], 16); - early_exit = vp9_vt_lpf_t4_and_t8_16w((transposed_input + 16 * 8), + early_exit = vpx_vt_lpf_t4_and_t8_16w((transposed_input + 16 * 8), &filter48[0], src, pitch, b_limit_ptr, limit_ptr, thresh_ptr); if (0 == early_exit) { - early_exit = vp9_vt_lpf_t16_16w((transposed_input + 16 * 8), src, pitch, + early_exit = vpx_vt_lpf_t16_16w((transposed_input + 16 * 8), src, pitch, &filter48[0]); if (0 == early_exit) { diff --git a/vpx_dsp/mips/loopfilter_4_msa.c b/vpx_dsp/mips/loopfilter_4_msa.c index 24db78705..daf5f38bf 100644 --- a/vpx_dsp/mips/loopfilter_4_msa.c +++ b/vpx_dsp/mips/loopfilter_4_msa.c @@ -10,7 +10,7 @@ #include "vpx_dsp/mips/loopfilter_msa.h" -void vp9_lpf_horizontal_4_msa(uint8_t *src, int32_t pitch, +void vpx_lpf_horizontal_4_msa(uint8_t *src, int32_t pitch, const uint8_t *b_limit_ptr, const uint8_t *limit_ptr, const uint8_t *thresh_ptr, @@ -39,7 +39,7 @@ void vp9_lpf_horizontal_4_msa(uint8_t *src, int32_t pitch, SD4(p1_d, p0_d, q0_d, q1_d, (src - 2 * pitch), pitch); } -void vp9_lpf_horizontal_4_dual_msa(uint8_t *src, int32_t pitch, +void vpx_lpf_horizontal_4_dual_msa(uint8_t *src, int32_t pitch, const uint8_t *b_limit0_ptr, const uint8_t *limit0_ptr, const uint8_t *thresh0_ptr, @@ -71,7 +71,7 @@ void vp9_lpf_horizontal_4_dual_msa(uint8_t *src, int32_t pitch, ST_UB4(p1, p0, q0, q1, (src - 2 * pitch), pitch); } -void vp9_lpf_vertical_4_msa(uint8_t *src, int32_t pitch, +void vpx_lpf_vertical_4_msa(uint8_t *src, int32_t pitch, const uint8_t *b_limit_ptr, const uint8_t *limit_ptr, const uint8_t *thresh_ptr, @@ -102,7 +102,7 @@ void vp9_lpf_vertical_4_msa(uint8_t *src, int32_t pitch, ST4x4_UB(vec3, vec3, 0, 1, 2, 3, src, pitch); } -void vp9_lpf_vertical_4_dual_msa(uint8_t *src, int32_t pitch, +void vpx_lpf_vertical_4_dual_msa(uint8_t *src, int32_t pitch, const uint8_t *b_limit0_ptr, const uint8_t *limit0_ptr, const uint8_t *thresh0_ptr, diff --git a/vpx_dsp/mips/loopfilter_8_msa.c b/vpx_dsp/mips/loopfilter_8_msa.c index 85c34ebfe..00b6db550 100644 --- a/vpx_dsp/mips/loopfilter_8_msa.c +++ b/vpx_dsp/mips/loopfilter_8_msa.c @@ -10,7 +10,7 @@ #include "vpx_dsp/mips/loopfilter_msa.h" -void vp9_lpf_horizontal_8_msa(uint8_t *src, int32_t pitch, +void vpx_lpf_horizontal_8_msa(uint8_t *src, int32_t pitch, const uint8_t *b_limit_ptr, const uint8_t *limit_ptr, const uint8_t *thresh_ptr, @@ -83,7 +83,7 @@ void vp9_lpf_horizontal_8_msa(uint8_t *src, int32_t pitch, } } -void vp9_lpf_horizontal_8_dual_msa(uint8_t *src, int32_t pitch, +void vpx_lpf_horizontal_8_dual_msa(uint8_t *src, int32_t pitch, const uint8_t *b_limit0, const uint8_t *limit0, const uint8_t *thresh0, @@ -158,7 +158,7 @@ void vp9_lpf_horizontal_8_dual_msa(uint8_t *src, int32_t pitch, } } -void vp9_lpf_vertical_8_msa(uint8_t *src, int32_t pitch, +void vpx_lpf_vertical_8_msa(uint8_t *src, int32_t pitch, const uint8_t *b_limit_ptr, const uint8_t *limit_ptr, const uint8_t *thresh_ptr, @@ -237,7 +237,7 @@ void vp9_lpf_vertical_8_msa(uint8_t *src, int32_t pitch, } } -void vp9_lpf_vertical_8_dual_msa(uint8_t *src, int32_t pitch, +void vpx_lpf_vertical_8_dual_msa(uint8_t *src, int32_t pitch, const uint8_t *b_limit0, const uint8_t *limit0, const uint8_t *thresh0, diff --git a/vpx_dsp/mips/loopfilter_filters_dspr2.c b/vpx_dsp/mips/loopfilter_filters_dspr2.c index a9cbc2bf1..99a96d89b 100644 --- a/vpx_dsp/mips/loopfilter_filters_dspr2.c +++ b/vpx_dsp/mips/loopfilter_filters_dspr2.c @@ -19,7 +19,7 @@ #include "vpx_mem/vpx_mem.h" #if HAVE_DSPR2 -void vp9_lpf_horizontal_4_dspr2(unsigned char *s, +void vpx_lpf_horizontal_4_dspr2(unsigned char *s, int pitch, const uint8_t *blimit, const uint8_t *limit, @@ -49,7 +49,7 @@ void vp9_lpf_horizontal_4_dspr2(unsigned char *s, ); /* prefetch data for store */ - vp9_prefetch_store(s); + prefetch_store(s); /* loop filter designed to work using chars so that we can make maximum use of 8 bit simd instructions. */ @@ -87,14 +87,14 @@ void vp9_lpf_horizontal_4_dspr2(unsigned char *s, : [sm1] "r" (sm1), [s0] "r" (s0), [s5] "r" (s5), [s6] "r" (s6) ); - vp9_filter_hev_mask_dspr2(limit_vec, flimit_vec, p1, p2, - pm1, p0, p3, p4, p5, p6, - thresh_vec, &hev, &mask); + filter_hev_mask_dspr2(limit_vec, flimit_vec, p1, p2, + pm1, p0, p3, p4, p5, p6, + thresh_vec, &hev, &mask); /* if mask == 0 do filtering is not needed */ if (mask) { /* filtering */ - vp9_filter_dspr2(mask, hev, &p1, &p2, &p3, &p4); + filter_dspr2(mask, hev, &p1, &p2, &p3, &p4); __asm__ __volatile__ ( "sw %[p1], (%[s1]) \n\t" @@ -113,7 +113,7 @@ void vp9_lpf_horizontal_4_dspr2(unsigned char *s, } } -void vp9_lpf_vertical_4_dspr2(unsigned char *s, +void vpx_lpf_vertical_4_dspr2(unsigned char *s, int pitch, const uint8_t *blimit, const uint8_t *limit, @@ -143,7 +143,7 @@ void vp9_lpf_vertical_4_dspr2(unsigned char *s, ); /* prefetch data for store */ - vp9_prefetch_store(s + pitch); + prefetch_store(s + pitch); for (i = 0; i < 2; i++) { s1 = s; @@ -216,14 +216,14 @@ void vp9_lpf_vertical_4_dspr2(unsigned char *s, * mask will be zero and filtering is not needed */ if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) { - vp9_filter_hev_mask_dspr2(limit_vec, flimit_vec, p1, p2, pm1, - p0, p3, p4, p5, p6, thresh_vec, - &hev, &mask); + filter_hev_mask_dspr2(limit_vec, flimit_vec, p1, p2, pm1, + p0, p3, p4, p5, p6, thresh_vec, + &hev, &mask); /* if mask == 0 do filtering is not needed */ if (mask) { /* filtering */ - vp9_filter_dspr2(mask, hev, &p1, &p2, &p3, &p4); + filter_dspr2(mask, hev, &p1, &p2, &p3, &p4); /* unpack processed 4x4 neighborhood * don't use transpose on output data @@ -306,56 +306,56 @@ void vp9_lpf_vertical_4_dspr2(unsigned char *s, } } -void vp9_lpf_horizontal_4_dual_dspr2(uint8_t *s, int p /* pitch */, +void vpx_lpf_horizontal_4_dual_dspr2(uint8_t *s, int p /* pitch */, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1) { - vp9_lpf_horizontal_4_dspr2(s, p, blimit0, limit0, thresh0, 1); - vp9_lpf_horizontal_4_dspr2(s + 8, p, blimit1, limit1, thresh1, 1); + vpx_lpf_horizontal_4_dspr2(s, p, blimit0, limit0, thresh0, 1); + vpx_lpf_horizontal_4_dspr2(s + 8, p, blimit1, limit1, thresh1, 1); } -void vp9_lpf_horizontal_8_dual_dspr2(uint8_t *s, int p /* pitch */, +void vpx_lpf_horizontal_8_dual_dspr2(uint8_t *s, int p /* pitch */, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1) { - vp9_lpf_horizontal_8_dspr2(s, p, blimit0, limit0, thresh0, 1); - vp9_lpf_horizontal_8_dspr2(s + 8, p, blimit1, limit1, thresh1, 1); + vpx_lpf_horizontal_8_dspr2(s, p, blimit0, limit0, thresh0, 1); + vpx_lpf_horizontal_8_dspr2(s + 8, p, blimit1, limit1, thresh1, 1); } -void vp9_lpf_vertical_4_dual_dspr2(uint8_t *s, int p, +void vpx_lpf_vertical_4_dual_dspr2(uint8_t *s, int p, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1) { - vp9_lpf_vertical_4_dspr2(s, p, blimit0, limit0, thresh0, 1); - vp9_lpf_vertical_4_dspr2(s + 8 * p, p, blimit1, limit1, thresh1, 1); + vpx_lpf_vertical_4_dspr2(s, p, blimit0, limit0, thresh0, 1); + vpx_lpf_vertical_4_dspr2(s + 8 * p, p, blimit1, limit1, thresh1, 1); } -void vp9_lpf_vertical_8_dual_dspr2(uint8_t *s, int p, +void vpx_lpf_vertical_8_dual_dspr2(uint8_t *s, int p, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1) { - vp9_lpf_vertical_8_dspr2(s, p, blimit0, limit0, thresh0, 1); - vp9_lpf_vertical_8_dspr2(s + 8 * p, p, blimit1, limit1, thresh1, + vpx_lpf_vertical_8_dspr2(s, p, blimit0, limit0, thresh0, 1); + vpx_lpf_vertical_8_dspr2(s + 8 * p, p, blimit1, limit1, thresh1, 1); } -void vp9_lpf_vertical_16_dual_dspr2(uint8_t *s, int p, +void vpx_lpf_vertical_16_dual_dspr2(uint8_t *s, int p, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh) { - vp9_lpf_vertical_16_dspr2(s, p, blimit, limit, thresh); - vp9_lpf_vertical_16_dspr2(s + 8 * p, p, blimit, limit, thresh); + vpx_lpf_vertical_16_dspr2(s, p, blimit, limit, thresh); + vpx_lpf_vertical_16_dspr2(s + 8 * p, p, blimit, limit, thresh); } #endif // #if HAVE_DSPR2 diff --git a/vpx_dsp/mips/loopfilter_filters_dspr2.h b/vpx_dsp/mips/loopfilter_filters_dspr2.h index f0efad9ca..4a1506ba1 100644 --- a/vpx_dsp/mips/loopfilter_filters_dspr2.h +++ b/vpx_dsp/mips/loopfilter_filters_dspr2.h @@ -24,10 +24,10 @@ extern "C" { #if HAVE_DSPR2 /* inputs & outputs are quad-byte vectors */ -static INLINE void vp9_filter_dspr2(uint32_t mask, uint32_t hev, - uint32_t *ps1, uint32_t *ps0, - uint32_t *qs0, uint32_t *qs1) { - int32_t vp9_filter_l, vp9_filter_r; +static INLINE void filter_dspr2(uint32_t mask, uint32_t hev, + uint32_t *ps1, uint32_t *ps0, + uint32_t *qs0, uint32_t *qs1) { + int32_t vpx_filter_l, vpx_filter_r; int32_t Filter1_l, Filter1_r, Filter2_l, Filter2_r; int32_t subr_r, subr_l; uint32_t t1, t2, HWM, t3; @@ -73,34 +73,34 @@ static INLINE void vp9_filter_dspr2(uint32_t mask, uint32_t hev, hev_r = hev_r & HWM; __asm__ __volatile__ ( - /* vp9_filter = vp8_signed_char_clamp(ps1 - qs1); */ - "subq_s.ph %[vp9_filter_l], %[vps1_l], %[vqs1_l] \n\t" - "subq_s.ph %[vp9_filter_r], %[vps1_r], %[vqs1_r] \n\t" + /* vpx_filter = vp8_signed_char_clamp(ps1 - qs1); */ + "subq_s.ph %[vpx_filter_l], %[vps1_l], %[vqs1_l] \n\t" + "subq_s.ph %[vpx_filter_r], %[vps1_r], %[vqs1_r] \n\t" /* qs0 - ps0 */ "subq_s.ph %[subr_l], %[vqs0_l], %[vps0_l] \n\t" "subq_s.ph %[subr_r], %[vqs0_r], %[vps0_r] \n\t" - /* vp9_filter &= hev; */ - "and %[vp9_filter_l], %[vp9_filter_l], %[hev_l] \n\t" - "and %[vp9_filter_r], %[vp9_filter_r], %[hev_r] \n\t" + /* vpx_filter &= hev; */ + "and %[vpx_filter_l], %[vpx_filter_l], %[hev_l] \n\t" + "and %[vpx_filter_r], %[vpx_filter_r], %[hev_r] \n\t" - /* vp9_filter = vp8_signed_char_clamp(vp9_filter + 3 * (qs0 - ps0)); */ - "addq_s.ph %[vp9_filter_l], %[vp9_filter_l], %[subr_l] \n\t" - "addq_s.ph %[vp9_filter_r], %[vp9_filter_r], %[subr_r] \n\t" + /* vpx_filter = vp8_signed_char_clamp(vpx_filter + 3 * (qs0 - ps0)); */ + "addq_s.ph %[vpx_filter_l], %[vpx_filter_l], %[subr_l] \n\t" + "addq_s.ph %[vpx_filter_r], %[vpx_filter_r], %[subr_r] \n\t" "xor %[invhev_l], %[hev_l], %[HWM] \n\t" - "addq_s.ph %[vp9_filter_l], %[vp9_filter_l], %[subr_l] \n\t" - "addq_s.ph %[vp9_filter_r], %[vp9_filter_r], %[subr_r] \n\t" + "addq_s.ph %[vpx_filter_l], %[vpx_filter_l], %[subr_l] \n\t" + "addq_s.ph %[vpx_filter_r], %[vpx_filter_r], %[subr_r] \n\t" "xor %[invhev_r], %[hev_r], %[HWM] \n\t" - "addq_s.ph %[vp9_filter_l], %[vp9_filter_l], %[subr_l] \n\t" - "addq_s.ph %[vp9_filter_r], %[vp9_filter_r], %[subr_r] \n\t" + "addq_s.ph %[vpx_filter_l], %[vpx_filter_l], %[subr_l] \n\t" + "addq_s.ph %[vpx_filter_r], %[vpx_filter_r], %[subr_r] \n\t" - /* vp9_filter &= mask; */ - "and %[vp9_filter_l], %[vp9_filter_l], %[mask_l] \n\t" - "and %[vp9_filter_r], %[vp9_filter_r], %[mask_r] \n\t" + /* vpx_filter &= mask; */ + "and %[vpx_filter_l], %[vpx_filter_l], %[mask_l] \n\t" + "and %[vpx_filter_r], %[vpx_filter_r], %[mask_r] \n\t" - : [vp9_filter_l] "=&r" (vp9_filter_l), - [vp9_filter_r] "=&r" (vp9_filter_r), + : [vpx_filter_l] "=&r" (vpx_filter_l), + [vpx_filter_r] "=&r" (vpx_filter_r), [subr_l] "=&r" (subr_l), [subr_r] "=&r" (subr_r), [invhev_l] "=&r" (invhev_l), [invhev_r] "=&r" (invhev_r) : [vps0_l] "r" (vps0_l), [vps0_r] "r" (vps0_r), [vps1_l] "r" (vps1_l), @@ -113,13 +113,13 @@ static INLINE void vp9_filter_dspr2(uint32_t mask, uint32_t hev, /* save bottom 3 bits so that we round one side +4 and the other +3 */ __asm__ __volatile__ ( - /* Filter2 = vp8_signed_char_clamp(vp9_filter + 3) >>= 3; */ - "addq_s.ph %[Filter1_l], %[vp9_filter_l], %[t2] \n\t" - "addq_s.ph %[Filter1_r], %[vp9_filter_r], %[t2] \n\t" + /* Filter2 = vp8_signed_char_clamp(vpx_filter + 3) >>= 3; */ + "addq_s.ph %[Filter1_l], %[vpx_filter_l], %[t2] \n\t" + "addq_s.ph %[Filter1_r], %[vpx_filter_r], %[t2] \n\t" - /* Filter1 = vp8_signed_char_clamp(vp9_filter + 4) >>= 3; */ - "addq_s.ph %[Filter2_l], %[vp9_filter_l], %[t1] \n\t" - "addq_s.ph %[Filter2_r], %[vp9_filter_r], %[t1] \n\t" + /* Filter1 = vp8_signed_char_clamp(vpx_filter + 4) >>= 3; */ + "addq_s.ph %[Filter2_l], %[vpx_filter_l], %[t1] \n\t" + "addq_s.ph %[Filter2_r], %[vpx_filter_r], %[t1] \n\t" "shra.ph %[Filter1_r], %[Filter1_r], 3 \n\t" "shra.ph %[Filter1_l], %[Filter1_l], 3 \n\t" @@ -142,23 +142,23 @@ static INLINE void vp9_filter_dspr2(uint32_t mask, uint32_t hev, [vps0_l] "+r" (vps0_l), [vps0_r] "+r" (vps0_r), [vqs0_l] "+r" (vqs0_l), [vqs0_r] "+r" (vqs0_r) : [t1] "r" (t1), [t2] "r" (t2), [HWM] "r" (HWM), - [vp9_filter_l] "r" (vp9_filter_l), [vp9_filter_r] "r" (vp9_filter_r) + [vpx_filter_l] "r" (vpx_filter_l), [vpx_filter_r] "r" (vpx_filter_r) ); __asm__ __volatile__ ( - /* (vp9_filter += 1) >>= 1 */ + /* (vpx_filter += 1) >>= 1 */ "addqh.ph %[Filter1_l], %[Filter1_l], %[t3] \n\t" "addqh.ph %[Filter1_r], %[Filter1_r], %[t3] \n\t" - /* vp9_filter &= ~hev; */ + /* vpx_filter &= ~hev; */ "and %[Filter1_l], %[Filter1_l], %[invhev_l] \n\t" "and %[Filter1_r], %[Filter1_r], %[invhev_r] \n\t" - /* vps1 = vp8_signed_char_clamp(ps1 + vp9_filter); */ + /* vps1 = vp8_signed_char_clamp(ps1 + vpx_filter); */ "addq_s.ph %[vps1_l], %[vps1_l], %[Filter1_l] \n\t" "addq_s.ph %[vps1_r], %[vps1_r], %[Filter1_r] \n\t" - /* vqs1 = vp8_signed_char_clamp(qs1 - vp9_filter); */ + /* vqs1 = vp8_signed_char_clamp(qs1 - vpx_filter); */ "subq_s.ph %[vqs1_l], %[vqs1_l], %[Filter1_l] \n\t" "subq_s.ph %[vqs1_r], %[vqs1_r], %[Filter1_r] \n\t" @@ -196,12 +196,12 @@ static INLINE void vp9_filter_dspr2(uint32_t mask, uint32_t hev, *qs1 = vqs1 ^ N128; } -static INLINE void vp9_filter1_dspr2(uint32_t mask, uint32_t hev, - uint32_t ps1, uint32_t ps0, - uint32_t qs0, uint32_t qs1, - uint32_t *p1_f0, uint32_t *p0_f0, - uint32_t *q0_f0, uint32_t *q1_f0) { - int32_t vp9_filter_l, vp9_filter_r; +static INLINE void filter1_dspr2(uint32_t mask, uint32_t hev, + uint32_t ps1, uint32_t ps0, + uint32_t qs0, uint32_t qs1, + uint32_t *p1_f0, uint32_t *p0_f0, + uint32_t *q0_f0, uint32_t *q1_f0) { + int32_t vpx_filter_l, vpx_filter_r; int32_t Filter1_l, Filter1_r, Filter2_l, Filter2_r; int32_t subr_r, subr_l; uint32_t t1, t2, HWM, t3; @@ -247,34 +247,34 @@ static INLINE void vp9_filter1_dspr2(uint32_t mask, uint32_t hev, hev_r = hev_r & HWM; __asm__ __volatile__ ( - /* vp9_filter = vp8_signed_char_clamp(ps1 - qs1); */ - "subq_s.ph %[vp9_filter_l], %[vps1_l], %[vqs1_l] \n\t" - "subq_s.ph %[vp9_filter_r], %[vps1_r], %[vqs1_r] \n\t" + /* vpx_filter = vp8_signed_char_clamp(ps1 - qs1); */ + "subq_s.ph %[vpx_filter_l], %[vps1_l], %[vqs1_l] \n\t" + "subq_s.ph %[vpx_filter_r], %[vps1_r], %[vqs1_r] \n\t" /* qs0 - ps0 */ "subq_s.ph %[subr_l], %[vqs0_l], %[vps0_l] \n\t" "subq_s.ph %[subr_r], %[vqs0_r], %[vps0_r] \n\t" - /* vp9_filter &= hev; */ - "and %[vp9_filter_l], %[vp9_filter_l], %[hev_l] \n\t" - "and %[vp9_filter_r], %[vp9_filter_r], %[hev_r] \n\t" + /* vpx_filter &= hev; */ + "and %[vpx_filter_l], %[vpx_filter_l], %[hev_l] \n\t" + "and %[vpx_filter_r], %[vpx_filter_r], %[hev_r] \n\t" - /* vp9_filter = vp8_signed_char_clamp(vp9_filter + 3 * (qs0 - ps0)); */ - "addq_s.ph %[vp9_filter_l], %[vp9_filter_l], %[subr_l] \n\t" - "addq_s.ph %[vp9_filter_r], %[vp9_filter_r], %[subr_r] \n\t" + /* vpx_filter = vp8_signed_char_clamp(vpx_filter + 3 * (qs0 - ps0)); */ + "addq_s.ph %[vpx_filter_l], %[vpx_filter_l], %[subr_l] \n\t" + "addq_s.ph %[vpx_filter_r], %[vpx_filter_r], %[subr_r] \n\t" "xor %[invhev_l], %[hev_l], %[HWM] \n\t" - "addq_s.ph %[vp9_filter_l], %[vp9_filter_l], %[subr_l] \n\t" - "addq_s.ph %[vp9_filter_r], %[vp9_filter_r], %[subr_r] \n\t" + "addq_s.ph %[vpx_filter_l], %[vpx_filter_l], %[subr_l] \n\t" + "addq_s.ph %[vpx_filter_r], %[vpx_filter_r], %[subr_r] \n\t" "xor %[invhev_r], %[hev_r], %[HWM] \n\t" - "addq_s.ph %[vp9_filter_l], %[vp9_filter_l], %[subr_l] \n\t" - "addq_s.ph %[vp9_filter_r], %[vp9_filter_r], %[subr_r] \n\t" + "addq_s.ph %[vpx_filter_l], %[vpx_filter_l], %[subr_l] \n\t" + "addq_s.ph %[vpx_filter_r], %[vpx_filter_r], %[subr_r] \n\t" - /* vp9_filter &= mask; */ - "and %[vp9_filter_l], %[vp9_filter_l], %[mask_l] \n\t" - "and %[vp9_filter_r], %[vp9_filter_r], %[mask_r] \n\t" + /* vpx_filter &= mask; */ + "and %[vpx_filter_l], %[vpx_filter_l], %[mask_l] \n\t" + "and %[vpx_filter_r], %[vpx_filter_r], %[mask_r] \n\t" - : [vp9_filter_l] "=&r" (vp9_filter_l), - [vp9_filter_r] "=&r" (vp9_filter_r), + : [vpx_filter_l] "=&r" (vpx_filter_l), + [vpx_filter_r] "=&r" (vpx_filter_r), [subr_l] "=&r" (subr_l), [subr_r] "=&r" (subr_r), [invhev_l] "=&r" (invhev_l), [invhev_r] "=&r" (invhev_r) : [vps0_l] "r" (vps0_l), [vps0_r] "r" (vps0_r), [vps1_l] "r" (vps1_l), @@ -286,13 +286,13 @@ static INLINE void vp9_filter1_dspr2(uint32_t mask, uint32_t hev, /* save bottom 3 bits so that we round one side +4 and the other +3 */ __asm__ __volatile__ ( - /* Filter2 = vp8_signed_char_clamp(vp9_filter + 3) >>= 3; */ - "addq_s.ph %[Filter1_l], %[vp9_filter_l], %[t2] \n\t" - "addq_s.ph %[Filter1_r], %[vp9_filter_r], %[t2] \n\t" + /* Filter2 = vp8_signed_char_clamp(vpx_filter + 3) >>= 3; */ + "addq_s.ph %[Filter1_l], %[vpx_filter_l], %[t2] \n\t" + "addq_s.ph %[Filter1_r], %[vpx_filter_r], %[t2] \n\t" - /* Filter1 = vp8_signed_char_clamp(vp9_filter + 4) >>= 3; */ - "addq_s.ph %[Filter2_l], %[vp9_filter_l], %[t1] \n\t" - "addq_s.ph %[Filter2_r], %[vp9_filter_r], %[t1] \n\t" + /* Filter1 = vp8_signed_char_clamp(vpx_filter + 4) >>= 3; */ + "addq_s.ph %[Filter2_l], %[vpx_filter_l], %[t1] \n\t" + "addq_s.ph %[Filter2_r], %[vpx_filter_r], %[t1] \n\t" "shra.ph %[Filter1_r], %[Filter1_r], 3 \n\t" "shra.ph %[Filter1_l], %[Filter1_l], 3 \n\t" @@ -315,23 +315,23 @@ static INLINE void vp9_filter1_dspr2(uint32_t mask, uint32_t hev, [vps0_l] "+r" (vps0_l), [vps0_r] "+r" (vps0_r), [vqs0_l] "+r" (vqs0_l), [vqs0_r] "+r" (vqs0_r) : [t1] "r" (t1), [t2] "r" (t2), [HWM] "r" (HWM), - [vp9_filter_l] "r" (vp9_filter_l), [vp9_filter_r] "r" (vp9_filter_r) + [vpx_filter_l] "r" (vpx_filter_l), [vpx_filter_r] "r" (vpx_filter_r) ); __asm__ __volatile__ ( - /* (vp9_filter += 1) >>= 1 */ + /* (vpx_filter += 1) >>= 1 */ "addqh.ph %[Filter1_l], %[Filter1_l], %[t3] \n\t" "addqh.ph %[Filter1_r], %[Filter1_r], %[t3] \n\t" - /* vp9_filter &= ~hev; */ + /* vpx_filter &= ~hev; */ "and %[Filter1_l], %[Filter1_l], %[invhev_l] \n\t" "and %[Filter1_r], %[Filter1_r], %[invhev_r] \n\t" - /* vps1 = vp8_signed_char_clamp(ps1 + vp9_filter); */ + /* vps1 = vp8_signed_char_clamp(ps1 + vpx_filter); */ "addq_s.ph %[vps1_l], %[vps1_l], %[Filter1_l] \n\t" "addq_s.ph %[vps1_r], %[vps1_r], %[Filter1_r] \n\t" - /* vqs1 = vp8_signed_char_clamp(qs1 - vp9_filter); */ + /* vqs1 = vp8_signed_char_clamp(qs1 - vpx_filter); */ "subq_s.ph %[vqs1_l], %[vqs1_l], %[Filter1_l] \n\t" "subq_s.ph %[vqs1_r], %[vqs1_r], %[Filter1_r] \n\t" @@ -369,10 +369,10 @@ static INLINE void vp9_filter1_dspr2(uint32_t mask, uint32_t hev, *q1_f0 = vqs1 ^ N128; } -static INLINE void vp9_mbfilter_dspr2(uint32_t *op3, uint32_t *op2, - uint32_t *op1, uint32_t *op0, - uint32_t *oq0, uint32_t *oq1, - uint32_t *oq2, uint32_t *oq3) { +static INLINE void mbfilter_dspr2(uint32_t *op3, uint32_t *op2, + uint32_t *op1, uint32_t *op0, + uint32_t *oq0, uint32_t *oq1, + uint32_t *oq2, uint32_t *oq3) { /* use a 7 tap filter [1, 1, 1, 2, 1, 1, 1] for flat line */ const uint32_t p3 = *op3, p2 = *op2, p1 = *op1, p0 = *op0; const uint32_t q0 = *oq0, q1 = *oq1, q2 = *oq2, q3 = *oq3; @@ -446,14 +446,14 @@ static INLINE void vp9_mbfilter_dspr2(uint32_t *op3, uint32_t *op2, *oq2 = res_oq2; } -static INLINE void vp9_mbfilter1_dspr2(uint32_t p3, uint32_t p2, - uint32_t p1, uint32_t p0, - uint32_t q0, uint32_t q1, - uint32_t q2, uint32_t q3, - uint32_t *op2_f1, - uint32_t *op1_f1, uint32_t *op0_f1, - uint32_t *oq0_f1, uint32_t *oq1_f1, - uint32_t *oq2_f1) { +static INLINE void mbfilter1_dspr2(uint32_t p3, uint32_t p2, + uint32_t p1, uint32_t p0, + uint32_t q0, uint32_t q1, + uint32_t q2, uint32_t q3, + uint32_t *op2_f1, + uint32_t *op1_f1, uint32_t *op0_f1, + uint32_t *oq0_f1, uint32_t *oq1_f1, + uint32_t *oq2_f1) { /* use a 7 tap filter [1, 1, 1, 2, 1, 1, 1] for flat line */ uint32_t res_op2, res_op1, res_op0; uint32_t res_oq0, res_oq1, res_oq2; @@ -524,14 +524,14 @@ static INLINE void vp9_mbfilter1_dspr2(uint32_t p3, uint32_t p2, *oq2_f1 = res_oq2; } -static INLINE void vp9_wide_mbfilter_dspr2(uint32_t *op7, uint32_t *op6, - uint32_t *op5, uint32_t *op4, - uint32_t *op3, uint32_t *op2, - uint32_t *op1, uint32_t *op0, - uint32_t *oq0, uint32_t *oq1, - uint32_t *oq2, uint32_t *oq3, - uint32_t *oq4, uint32_t *oq5, - uint32_t *oq6, uint32_t *oq7) { +static INLINE void wide_mbfilter_dspr2(uint32_t *op7, uint32_t *op6, + uint32_t *op5, uint32_t *op4, + uint32_t *op3, uint32_t *op2, + uint32_t *op1, uint32_t *op0, + uint32_t *oq0, uint32_t *oq1, + uint32_t *oq2, uint32_t *oq3, + uint32_t *oq4, uint32_t *oq5, + uint32_t *oq6, uint32_t *oq7) { const uint32_t p7 = *op7, p6 = *op6, p5 = *op5, p4 = *op4; const uint32_t p3 = *op3, p2 = *op2, p1 = *op1, p0 = *op0; const uint32_t q0 = *oq0, q1 = *oq1, q2 = *oq2, q3 = *oq3; diff --git a/vpx_dsp/mips/loopfilter_masks_dspr2.h b/vpx_dsp/mips/loopfilter_masks_dspr2.h index 3a69441ba..e82dfb7eb 100644 --- a/vpx_dsp/mips/loopfilter_masks_dspr2.h +++ b/vpx_dsp/mips/loopfilter_masks_dspr2.h @@ -24,13 +24,13 @@ extern "C" { #if HAVE_DSPR2 /* processing 4 pixels at the same time * compute hev and mask in the same function */ -static INLINE void vp9_filter_hev_mask_dspr2(uint32_t limit, uint32_t flimit, - uint32_t p1, uint32_t p0, - uint32_t p3, uint32_t p2, - uint32_t q0, uint32_t q1, - uint32_t q2, uint32_t q3, - uint32_t thresh, uint32_t *hev, - uint32_t *mask) { +static INLINE void filter_hev_mask_dspr2(uint32_t limit, uint32_t flimit, + uint32_t p1, uint32_t p0, + uint32_t p3, uint32_t p2, + uint32_t q0, uint32_t q1, + uint32_t q2, uint32_t q3, + uint32_t thresh, uint32_t *hev, + uint32_t *mask) { uint32_t c, r, r3, r_k; uint32_t s1, s2, s3; uint32_t ones = 0xFFFFFFFF; @@ -129,16 +129,16 @@ static INLINE void vp9_filter_hev_mask_dspr2(uint32_t limit, uint32_t flimit, *mask = s2; } -static INLINE void vp9_filter_hev_mask_flatmask4_dspr2(uint32_t limit, - uint32_t flimit, - uint32_t thresh, - uint32_t p1, uint32_t p0, - uint32_t p3, uint32_t p2, - uint32_t q0, uint32_t q1, - uint32_t q2, uint32_t q3, - uint32_t *hev, - uint32_t *mask, - uint32_t *flat) { +static INLINE void filter_hev_mask_flatmask4_dspr2(uint32_t limit, + uint32_t flimit, + uint32_t thresh, + uint32_t p1, uint32_t p0, + uint32_t p3, uint32_t p2, + uint32_t q0, uint32_t q1, + uint32_t q2, uint32_t q3, + uint32_t *hev, + uint32_t *mask, + uint32_t *flat) { uint32_t c, r, r3, r_k, r_flat; uint32_t s1, s2, s3; uint32_t ones = 0xFFFFFFFF; @@ -279,12 +279,12 @@ static INLINE void vp9_filter_hev_mask_flatmask4_dspr2(uint32_t limit, *flat = flat1; } -static INLINE void vp9_flatmask5(uint32_t p4, uint32_t p3, - uint32_t p2, uint32_t p1, - uint32_t p0, uint32_t q0, - uint32_t q1, uint32_t q2, - uint32_t q3, uint32_t q4, - uint32_t *flat2) { +static INLINE void flatmask5(uint32_t p4, uint32_t p3, + uint32_t p2, uint32_t p1, + uint32_t p0, uint32_t q0, + uint32_t q1, uint32_t q2, + uint32_t q3, uint32_t q4, + uint32_t *flat2) { uint32_t c, r, r_k, r_flat; uint32_t ones = 0xFFFFFFFF; uint32_t flat_thresh = 0x01010101; diff --git a/vpx_dsp/mips/loopfilter_mb_dspr2.c b/vpx_dsp/mips/loopfilter_mb_dspr2.c index f0abf50ec..4138f5697 100644 --- a/vpx_dsp/mips/loopfilter_mb_dspr2.c +++ b/vpx_dsp/mips/loopfilter_mb_dspr2.c @@ -19,7 +19,7 @@ #include "vpx_mem/vpx_mem.h" #if HAVE_DSPR2 -void vp9_lpf_horizontal_8_dspr2(unsigned char *s, +void vpx_lpf_horizontal_8_dspr2(unsigned char *s, int pitch, const uint8_t *blimit, const uint8_t *limit, @@ -52,7 +52,7 @@ void vp9_lpf_horizontal_8_dspr2(unsigned char *s, ); /* prefetch data for store */ - vp9_prefetch_store(s); + prefetch_store(s); for (i = 0; i < 2; i++) { sp3 = s - (pitch << 2); @@ -80,13 +80,13 @@ void vp9_lpf_horizontal_8_dspr2(unsigned char *s, [sq3] "r" (sq3), [sq2] "r" (sq2), [sq1] "r" (sq1), [sq0] "r" (sq0) ); - vp9_filter_hev_mask_flatmask4_dspr2(limit_vec, flimit_vec, thresh_vec, - p1, p0, p3, p2, q0, q1, q2, q3, - &hev, &mask, &flat); + filter_hev_mask_flatmask4_dspr2(limit_vec, flimit_vec, thresh_vec, + p1, p0, p3, p2, q0, q1, q2, q3, + &hev, &mask, &flat); if ((flat == 0) && (mask != 0)) { - vp9_filter1_dspr2(mask, hev, p1, p0, q0, q1, - &p1_f0, &p0_f0, &q0_f0, &q1_f0); + filter1_dspr2(mask, hev, p1, p0, q0, q1, + &p1_f0, &p0_f0, &q0_f0, &q1_f0); __asm__ __volatile__ ( "sw %[p1_f0], (%[sp1]) \n\t" @@ -103,13 +103,13 @@ void vp9_lpf_horizontal_8_dspr2(unsigned char *s, } else if ((mask & flat) == 0xFFFFFFFF) { /* left 2 element operation */ PACK_LEFT_0TO3() - vp9_mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l, - &q0_l, &q1_l, &q2_l, &q3_l); + mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l, + &q0_l, &q1_l, &q2_l, &q3_l); /* right 2 element operation */ PACK_RIGHT_0TO3() - vp9_mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r, - &q0_r, &q1_r, &q2_r, &q3_r); + mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r, + &q0_r, &q1_r, &q2_r, &q3_r); COMBINE_LEFT_RIGHT_0TO2() @@ -129,18 +129,18 @@ void vp9_lpf_horizontal_8_dspr2(unsigned char *s, ); } else if ((flat != 0) && (mask != 0)) { /* filtering */ - vp9_filter1_dspr2(mask, hev, p1, p0, q0, q1, - &p1_f0, &p0_f0, &q0_f0, &q1_f0); + filter1_dspr2(mask, hev, p1, p0, q0, q1, + &p1_f0, &p0_f0, &q0_f0, &q1_f0); /* left 2 element operation */ PACK_LEFT_0TO3() - vp9_mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l, - &q0_l, &q1_l, &q2_l, &q3_l); + mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l, + &q0_l, &q1_l, &q2_l, &q3_l); /* right 2 element operation */ PACK_RIGHT_0TO3() - vp9_mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r, - &q0_r, &q1_r, &q2_r, &q3_r); + mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r, + &q0_r, &q1_r, &q2_r, &q3_r); if (mask & flat & 0x000000FF) { __asm__ __volatile__ ( @@ -318,7 +318,7 @@ void vp9_lpf_horizontal_8_dspr2(unsigned char *s, } } -void vp9_lpf_vertical_8_dspr2(unsigned char *s, +void vpx_lpf_vertical_8_dspr2(unsigned char *s, int pitch, const uint8_t *blimit, const uint8_t *limit, @@ -350,7 +350,7 @@ void vp9_lpf_vertical_8_dspr2(unsigned char *s, : [uthresh] "r" (uthresh), [uflimit] "r" (uflimit), [ulimit] "r" (ulimit) ); - vp9_prefetch_store(s + pitch); + prefetch_store(s + pitch); for (i = 0; i < 2; i++) { s1 = s; @@ -450,39 +450,39 @@ void vp9_lpf_vertical_8_dspr2(unsigned char *s, : ); - vp9_filter_hev_mask_flatmask4_dspr2(limit_vec, flimit_vec, thresh_vec, - p1, p0, p3, p2, q0, q1, q2, q3, - &hev, &mask, &flat); + filter_hev_mask_flatmask4_dspr2(limit_vec, flimit_vec, thresh_vec, + p1, p0, p3, p2, q0, q1, q2, q3, + &hev, &mask, &flat); if ((flat == 0) && (mask != 0)) { - vp9_filter1_dspr2(mask, hev, p1, p0, q0, q1, - &p1_f0, &p0_f0, &q0_f0, &q1_f0); + filter1_dspr2(mask, hev, p1, p0, q0, q1, + &p1_f0, &p0_f0, &q0_f0, &q1_f0); STORE_F0() } else if ((mask & flat) == 0xFFFFFFFF) { /* left 2 element operation */ PACK_LEFT_0TO3() - vp9_mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l, - &q0_l, &q1_l, &q2_l, &q3_l); + mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l, + &q0_l, &q1_l, &q2_l, &q3_l); /* right 2 element operation */ PACK_RIGHT_0TO3() - vp9_mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r, - &q0_r, &q1_r, &q2_r, &q3_r); + mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r, + &q0_r, &q1_r, &q2_r, &q3_r); STORE_F1() } else if ((flat != 0) && (mask != 0)) { - vp9_filter1_dspr2(mask, hev, p1, p0, q0, q1, - &p1_f0, &p0_f0, &q0_f0, &q1_f0); + filter1_dspr2(mask, hev, p1, p0, q0, q1, + &p1_f0, &p0_f0, &q0_f0, &q1_f0); /* left 2 element operation */ PACK_LEFT_0TO3() - vp9_mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l, - &q0_l, &q1_l, &q2_l, &q3_l); + mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l, + &q0_l, &q1_l, &q2_l, &q3_l); /* right 2 element operation */ PACK_RIGHT_0TO3() - vp9_mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r, - &q0_r, &q1_r, &q2_r, &q3_r); + mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r, + &q0_r, &q1_r, &q2_r, &q3_r); if (mask & flat & 0x000000FF) { __asm__ __volatile__ ( diff --git a/vpx_dsp/mips/loopfilter_mb_horiz_dspr2.c b/vpx_dsp/mips/loopfilter_mb_horiz_dspr2.c index c5d4f11f6..8a4865073 100644 --- a/vpx_dsp/mips/loopfilter_mb_horiz_dspr2.c +++ b/vpx_dsp/mips/loopfilter_mb_horiz_dspr2.c @@ -19,7 +19,7 @@ #include "vpx_mem/vpx_mem.h" #if HAVE_DSPR2 -void vp9_lpf_horizontal_16_dspr2(unsigned char *s, +void vpx_lpf_horizontal_16_dspr2(unsigned char *s, int pitch, const uint8_t *blimit, const uint8_t *limit, @@ -57,7 +57,7 @@ void vp9_lpf_horizontal_16_dspr2(unsigned char *s, ); /* prefetch data for store */ - vp9_prefetch_store(s); + prefetch_store(s); for (i = 0; i < (2 * count); i++) { sp7 = s - (pitch << 3); @@ -109,17 +109,17 @@ void vp9_lpf_horizontal_16_dspr2(unsigned char *s, [sq4] "r" (sq4), [sq5] "r" (sq5), [sq6] "r" (sq6), [sq7] "r" (sq7) ); - vp9_filter_hev_mask_flatmask4_dspr2(limit_vec, flimit_vec, thresh_vec, - p1, p0, p3, p2, q0, q1, q2, q3, - &hev, &mask, &flat); + filter_hev_mask_flatmask4_dspr2(limit_vec, flimit_vec, thresh_vec, + p1, p0, p3, p2, q0, q1, q2, q3, + &hev, &mask, &flat); - vp9_flatmask5(p7, p6, p5, p4, p0, q0, q4, q5, q6, q7, &flat2); + flatmask5(p7, p6, p5, p4, p0, q0, q4, q5, q6, q7, &flat2); /* f0 */ if (((flat2 == 0) && (flat == 0) && (mask != 0)) || ((flat2 != 0) && (flat == 0) && (mask != 0))) { - vp9_filter1_dspr2(mask, hev, p1, p0, q0, q1, - &p1_f0, &p0_f0, &q0_f0, &q1_f0); + filter1_dspr2(mask, hev, p1, p0, q0, q1, + &p1_f0, &p0_f0, &q0_f0, &q1_f0); __asm__ __volatile__ ( "sw %[p1_f0], (%[sp1]) \n\t" @@ -138,17 +138,17 @@ void vp9_lpf_horizontal_16_dspr2(unsigned char *s, /* f2 */ PACK_LEFT_0TO3() PACK_LEFT_4TO7() - vp9_wide_mbfilter_dspr2(&p7_l, &p6_l, &p5_l, &p4_l, - &p3_l, &p2_l, &p1_l, &p0_l, - &q0_l, &q1_l, &q2_l, &q3_l, - &q4_l, &q5_l, &q6_l, &q7_l); + wide_mbfilter_dspr2(&p7_l, &p6_l, &p5_l, &p4_l, + &p3_l, &p2_l, &p1_l, &p0_l, + &q0_l, &q1_l, &q2_l, &q3_l, + &q4_l, &q5_l, &q6_l, &q7_l); PACK_RIGHT_0TO3() PACK_RIGHT_4TO7() - vp9_wide_mbfilter_dspr2(&p7_r, &p6_r, &p5_r, &p4_r, - &p3_r, &p2_r, &p1_r, &p0_r, - &q0_r, &q1_r, &q2_r, &q3_r, - &q4_r, &q5_r, &q6_r, &q7_r); + wide_mbfilter_dspr2(&p7_r, &p6_r, &p5_r, &p4_r, + &p3_r, &p2_r, &p1_r, &p0_r, + &q0_r, &q1_r, &q2_r, &q3_r, + &q4_r, &q5_r, &q6_r, &q7_r); COMBINE_LEFT_RIGHT_0TO2() COMBINE_LEFT_RIGHT_3TO6() @@ -188,13 +188,13 @@ void vp9_lpf_horizontal_16_dspr2(unsigned char *s, /* f1 */ /* left 2 element operation */ PACK_LEFT_0TO3() - vp9_mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l, - &q0_l, &q1_l, &q2_l, &q3_l); + mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l, + &q0_l, &q1_l, &q2_l, &q3_l); /* right 2 element operation */ PACK_RIGHT_0TO3() - vp9_mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r, - &q0_r, &q1_r, &q2_r, &q3_r); + mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r, + &q0_r, &q1_r, &q2_r, &q3_r); COMBINE_LEFT_RIGHT_0TO2() @@ -214,18 +214,18 @@ void vp9_lpf_horizontal_16_dspr2(unsigned char *s, ); } else if ((flat2 == 0) && (flat != 0) && (mask != 0)) { /* f0+f1 */ - vp9_filter1_dspr2(mask, hev, p1, p0, q0, q1, - &p1_f0, &p0_f0, &q0_f0, &q1_f0); + filter1_dspr2(mask, hev, p1, p0, q0, q1, + &p1_f0, &p0_f0, &q0_f0, &q1_f0); /* left 2 element operation */ PACK_LEFT_0TO3() - vp9_mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l, - &q0_l, &q1_l, &q2_l, &q3_l); + mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l, + &q0_l, &q1_l, &q2_l, &q3_l); /* right 2 element operation */ PACK_RIGHT_0TO3() - vp9_mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r, - &q0_r, &q1_r, &q2_r, &q3_r); + mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r, + &q0_r, &q1_r, &q2_r, &q3_r); if (mask & flat & 0x000000FF) { __asm__ __volatile__ ( @@ -398,36 +398,36 @@ void vp9_lpf_horizontal_16_dspr2(unsigned char *s, } else if ((flat2 != 0) && (flat != 0) && (mask != 0)) { /* f0 + f1 + f2 */ /* f0 function */ - vp9_filter1_dspr2(mask, hev, p1, p0, q0, q1, - &p1_f0, &p0_f0, &q0_f0, &q1_f0); + filter1_dspr2(mask, hev, p1, p0, q0, q1, + &p1_f0, &p0_f0, &q0_f0, &q1_f0); /* f1 function */ /* left 2 element operation */ PACK_LEFT_0TO3() - vp9_mbfilter1_dspr2(p3_l, p2_l, p1_l, p0_l, - q0_l, q1_l, q2_l, q3_l, - &p2_l_f1, &p1_l_f1, &p0_l_f1, - &q0_l_f1, &q1_l_f1, &q2_l_f1); + mbfilter1_dspr2(p3_l, p2_l, p1_l, p0_l, + q0_l, q1_l, q2_l, q3_l, + &p2_l_f1, &p1_l_f1, &p0_l_f1, + &q0_l_f1, &q1_l_f1, &q2_l_f1); /* right 2 element operation */ PACK_RIGHT_0TO3() - vp9_mbfilter1_dspr2(p3_r, p2_r, p1_r, p0_r, - q0_r, q1_r, q2_r, q3_r, - &p2_r_f1, &p1_r_f1, &p0_r_f1, - &q0_r_f1, &q1_r_f1, &q2_r_f1); + mbfilter1_dspr2(p3_r, p2_r, p1_r, p0_r, + q0_r, q1_r, q2_r, q3_r, + &p2_r_f1, &p1_r_f1, &p0_r_f1, + &q0_r_f1, &q1_r_f1, &q2_r_f1); /* f2 function */ PACK_LEFT_4TO7() - vp9_wide_mbfilter_dspr2(&p7_l, &p6_l, &p5_l, &p4_l, - &p3_l, &p2_l, &p1_l, &p0_l, - &q0_l, &q1_l, &q2_l, &q3_l, - &q4_l, &q5_l, &q6_l, &q7_l); + wide_mbfilter_dspr2(&p7_l, &p6_l, &p5_l, &p4_l, + &p3_l, &p2_l, &p1_l, &p0_l, + &q0_l, &q1_l, &q2_l, &q3_l, + &q4_l, &q5_l, &q6_l, &q7_l); PACK_RIGHT_4TO7() - vp9_wide_mbfilter_dspr2(&p7_r, &p6_r, &p5_r, &p4_r, - &p3_r, &p2_r, &p1_r, &p0_r, - &q0_r, &q1_r, &q2_r, &q3_r, - &q4_r, &q5_r, &q6_r, &q7_r); + wide_mbfilter_dspr2(&p7_r, &p6_r, &p5_r, &p4_r, + &p3_r, &p2_r, &p1_r, &p0_r, + &q0_r, &q1_r, &q2_r, &q3_r, + &q4_r, &q5_r, &q6_r, &q7_r); if (mask & flat & flat2 & 0x000000FF) { __asm__ __volatile__ ( diff --git a/vpx_dsp/mips/loopfilter_mb_vert_dspr2.c b/vpx_dsp/mips/loopfilter_mb_vert_dspr2.c index c9f6a1e63..e580f014e 100644 --- a/vpx_dsp/mips/loopfilter_mb_vert_dspr2.c +++ b/vpx_dsp/mips/loopfilter_mb_vert_dspr2.c @@ -19,7 +19,7 @@ #include "vpx_mem/vpx_mem.h" #if HAVE_DSPR2 -void vp9_lpf_vertical_16_dspr2(uint8_t *s, +void vpx_lpf_vertical_16_dspr2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, @@ -54,7 +54,7 @@ void vp9_lpf_vertical_16_dspr2(uint8_t *s, : [uthresh] "r" (uthresh), [uflimit] "r" (uflimit), [ulimit] "r" (ulimit) ); - vp9_prefetch_store(s + pitch); + prefetch_store(s + pitch); for (i = 0; i < 2; i++) { s1 = s; @@ -247,61 +247,61 @@ void vp9_lpf_vertical_16_dspr2(uint8_t *s, : ); - vp9_filter_hev_mask_flatmask4_dspr2(limit_vec, flimit_vec, thresh_vec, - p1, p0, p3, p2, q0, q1, q2, q3, - &hev, &mask, &flat); + filter_hev_mask_flatmask4_dspr2(limit_vec, flimit_vec, thresh_vec, + p1, p0, p3, p2, q0, q1, q2, q3, + &hev, &mask, &flat); - vp9_flatmask5(p7, p6, p5, p4, p0, q0, q4, q5, q6, q7, &flat2); + flatmask5(p7, p6, p5, p4, p0, q0, q4, q5, q6, q7, &flat2); /* f0 */ if (((flat2 == 0) && (flat == 0) && (mask != 0)) || ((flat2 != 0) && (flat == 0) && (mask != 0))) { - vp9_filter1_dspr2(mask, hev, p1, p0, q0, q1, - &p1_f0, &p0_f0, &q0_f0, &q1_f0); + filter1_dspr2(mask, hev, p1, p0, q0, q1, + &p1_f0, &p0_f0, &q0_f0, &q1_f0); STORE_F0() } else if ((flat2 == 0XFFFFFFFF) && (flat == 0xFFFFFFFF) && (mask == 0xFFFFFFFF)) { /* f2 */ PACK_LEFT_0TO3() PACK_LEFT_4TO7() - vp9_wide_mbfilter_dspr2(&p7_l, &p6_l, &p5_l, &p4_l, - &p3_l, &p2_l, &p1_l, &p0_l, - &q0_l, &q1_l, &q2_l, &q3_l, - &q4_l, &q5_l, &q6_l, &q7_l); + wide_mbfilter_dspr2(&p7_l, &p6_l, &p5_l, &p4_l, + &p3_l, &p2_l, &p1_l, &p0_l, + &q0_l, &q1_l, &q2_l, &q3_l, + &q4_l, &q5_l, &q6_l, &q7_l); PACK_RIGHT_0TO3() PACK_RIGHT_4TO7() - vp9_wide_mbfilter_dspr2(&p7_r, &p6_r, &p5_r, &p4_r, - &p3_r, &p2_r, &p1_r, &p0_r, - &q0_r, &q1_r, &q2_r, &q3_r, - &q4_r, &q5_r, &q6_r, &q7_r); + wide_mbfilter_dspr2(&p7_r, &p6_r, &p5_r, &p4_r, + &p3_r, &p2_r, &p1_r, &p0_r, + &q0_r, &q1_r, &q2_r, &q3_r, + &q4_r, &q5_r, &q6_r, &q7_r); STORE_F2() } else if ((flat2 == 0) && (flat == 0xFFFFFFFF) && (mask == 0xFFFFFFFF)) { /* f1 */ PACK_LEFT_0TO3() - vp9_mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l, - &q0_l, &q1_l, &q2_l, &q3_l); + mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l, + &q0_l, &q1_l, &q2_l, &q3_l); PACK_RIGHT_0TO3() - vp9_mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r, - &q0_r, &q1_r, &q2_r, &q3_r); + mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r, + &q0_r, &q1_r, &q2_r, &q3_r); STORE_F1() } else if ((flat2 == 0) && (flat != 0) && (mask != 0)) { /* f0 + f1 */ - vp9_filter1_dspr2(mask, hev, p1, p0, q0, q1, - &p1_f0, &p0_f0, &q0_f0, &q1_f0); + filter1_dspr2(mask, hev, p1, p0, q0, q1, + &p1_f0, &p0_f0, &q0_f0, &q1_f0); /* left 2 element operation */ PACK_LEFT_0TO3() - vp9_mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l, - &q0_l, &q1_l, &q2_l, &q3_l); + mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l, + &q0_l, &q1_l, &q2_l, &q3_l); /* right 2 element operation */ PACK_RIGHT_0TO3() - vp9_mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r, - &q0_r, &q1_r, &q2_r, &q3_r); + mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r, + &q0_r, &q1_r, &q2_r, &q3_r); if (mask & flat & 0x000000FF) { __asm__ __volatile__ ( @@ -465,32 +465,32 @@ void vp9_lpf_vertical_16_dspr2(uint8_t *s, } } else if ((flat2 != 0) && (flat != 0) && (mask != 0)) { /* f0+f1+f2 */ - vp9_filter1_dspr2(mask, hev, p1, p0, q0, q1, - &p1_f0, &p0_f0, &q0_f0, &q1_f0); + filter1_dspr2(mask, hev, p1, p0, q0, q1, + &p1_f0, &p0_f0, &q0_f0, &q1_f0); PACK_LEFT_0TO3() - vp9_mbfilter1_dspr2(p3_l, p2_l, p1_l, p0_l, - q0_l, q1_l, q2_l, q3_l, - &p2_l_f1, &p1_l_f1, &p0_l_f1, - &q0_l_f1, &q1_l_f1, &q2_l_f1); + mbfilter1_dspr2(p3_l, p2_l, p1_l, p0_l, + q0_l, q1_l, q2_l, q3_l, + &p2_l_f1, &p1_l_f1, &p0_l_f1, + &q0_l_f1, &q1_l_f1, &q2_l_f1); PACK_RIGHT_0TO3() - vp9_mbfilter1_dspr2(p3_r, p2_r, p1_r, p0_r, - q0_r, q1_r, q2_r, q3_r, - &p2_r_f1, &p1_r_f1, &p0_r_f1, - &q0_r_f1, &q1_r_f1, &q2_r_f1); + mbfilter1_dspr2(p3_r, p2_r, p1_r, p0_r, + q0_r, q1_r, q2_r, q3_r, + &p2_r_f1, &p1_r_f1, &p0_r_f1, + &q0_r_f1, &q1_r_f1, &q2_r_f1); PACK_LEFT_4TO7() - vp9_wide_mbfilter_dspr2(&p7_l, &p6_l, &p5_l, &p4_l, - &p3_l, &p2_l, &p1_l, &p0_l, - &q0_l, &q1_l, &q2_l, &q3_l, - &q4_l, &q5_l, &q6_l, &q7_l); + wide_mbfilter_dspr2(&p7_l, &p6_l, &p5_l, &p4_l, + &p3_l, &p2_l, &p1_l, &p0_l, + &q0_l, &q1_l, &q2_l, &q3_l, + &q4_l, &q5_l, &q6_l, &q7_l); PACK_RIGHT_4TO7() - vp9_wide_mbfilter_dspr2(&p7_r, &p6_r, &p5_r, &p4_r, - &p3_r, &p2_r, &p1_r, &p0_r, - &q0_r, &q1_r, &q2_r, &q3_r, - &q4_r, &q5_r, &q6_r, &q7_r); + wide_mbfilter_dspr2(&p7_r, &p6_r, &p5_r, &p4_r, + &p3_r, &p2_r, &p1_r, &p0_r, + &q0_r, &q1_r, &q2_r, &q3_r, + &q4_r, &q5_r, &q6_r, &q7_r); if (mask & flat & flat2 & 0x000000FF) { __asm__ __volatile__ ( diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl index 90666e048..2af0c71cc 100644 --- a/vpx_dsp/vpx_dsp_rtcd_defs.pl +++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl @@ -38,77 +38,77 @@ if ($opts{arch} eq "x86_64") { # # Loopfilter # -add_proto qw/void vp9_lpf_vertical_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; -specialize qw/vp9_lpf_vertical_16 sse2 neon_asm msa/; -$vp9_lpf_vertical_16_neon_asm=vp9_lpf_vertical_16_neon; +add_proto qw/void vpx_lpf_vertical_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; +specialize qw/vpx_lpf_vertical_16 sse2 neon_asm msa/; +$vpx_lpf_vertical_16_neon_asm=vpx_lpf_vertical_16_neon; -add_proto qw/void vp9_lpf_vertical_16_dual/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; -specialize qw/vp9_lpf_vertical_16_dual sse2 neon_asm msa/; -$vp9_lpf_vertical_16_dual_neon_asm=vp9_lpf_vertical_16_dual_neon; +add_proto qw/void vpx_lpf_vertical_16_dual/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; +specialize qw/vpx_lpf_vertical_16_dual sse2 neon_asm msa/; +$vpx_lpf_vertical_16_dual_neon_asm=vpx_lpf_vertical_16_dual_neon; -add_proto qw/void vp9_lpf_vertical_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"; -specialize qw/vp9_lpf_vertical_8 sse2 neon msa/; +add_proto qw/void vpx_lpf_vertical_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"; +specialize qw/vpx_lpf_vertical_8 sse2 neon msa/; -add_proto qw/void vp9_lpf_vertical_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; -specialize qw/vp9_lpf_vertical_8_dual sse2 neon_asm msa/; -$vp9_lpf_vertical_8_dual_neon_asm=vp9_lpf_vertical_8_dual_neon; +add_proto qw/void vpx_lpf_vertical_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; +specialize qw/vpx_lpf_vertical_8_dual sse2 neon_asm msa/; +$vpx_lpf_vertical_8_dual_neon_asm=vpx_lpf_vertical_8_dual_neon; -add_proto qw/void vp9_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"; -specialize qw/vp9_lpf_vertical_4 mmx neon msa/; +add_proto qw/void vpx_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"; +specialize qw/vpx_lpf_vertical_4 mmx neon msa/; -add_proto qw/void vp9_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; -specialize qw/vp9_lpf_vertical_4_dual sse2 neon msa/; +add_proto qw/void vpx_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; +specialize qw/vpx_lpf_vertical_4_dual sse2 neon msa/; -add_proto qw/void vp9_lpf_horizontal_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"; -specialize qw/vp9_lpf_horizontal_16 sse2 avx2 neon_asm msa/; -$vp9_lpf_horizontal_16_neon_asm=vp9_lpf_horizontal_16_neon; +add_proto qw/void vpx_lpf_horizontal_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"; +specialize qw/vpx_lpf_horizontal_16 sse2 avx2 neon_asm msa/; +$vpx_lpf_horizontal_16_neon_asm=vpx_lpf_horizontal_16_neon; -add_proto qw/void vp9_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"; -specialize qw/vp9_lpf_horizontal_8 sse2 neon msa/; +add_proto qw/void vpx_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"; +specialize qw/vpx_lpf_horizontal_8 sse2 neon msa/; -add_proto qw/void vp9_lpf_horizontal_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; -specialize qw/vp9_lpf_horizontal_8_dual sse2 neon_asm msa/; -$vp9_lpf_horizontal_8_dual_neon_asm=vp9_lpf_horizontal_8_dual_neon; +add_proto qw/void vpx_lpf_horizontal_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; +specialize qw/vpx_lpf_horizontal_8_dual sse2 neon_asm msa/; +$vpx_lpf_horizontal_8_dual_neon_asm=vpx_lpf_horizontal_8_dual_neon; -add_proto qw/void vp9_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"; -specialize qw/vp9_lpf_horizontal_4 mmx neon msa/; +add_proto qw/void vpx_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"; +specialize qw/vpx_lpf_horizontal_4 mmx neon msa/; -add_proto qw/void vp9_lpf_horizontal_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; -specialize qw/vp9_lpf_horizontal_4_dual sse2 neon msa/; +add_proto qw/void vpx_lpf_horizontal_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; +specialize qw/vpx_lpf_horizontal_4_dual sse2 neon msa/; if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { - add_proto qw/void vp9_highbd_lpf_vertical_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; - specialize qw/vp9_highbd_lpf_vertical_16 sse2/; + add_proto qw/void vpx_highbd_lpf_vertical_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; + specialize qw/vpx_highbd_lpf_vertical_16 sse2/; - add_proto qw/void vp9_highbd_lpf_vertical_16_dual/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; - specialize qw/vp9_highbd_lpf_vertical_16_dual sse2/; + add_proto qw/void vpx_highbd_lpf_vertical_16_dual/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; + specialize qw/vpx_highbd_lpf_vertical_16_dual sse2/; - add_proto qw/void vp9_highbd_lpf_vertical_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd"; - specialize qw/vp9_highbd_lpf_vertical_8 sse2/; + add_proto qw/void vpx_highbd_lpf_vertical_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd"; + specialize qw/vpx_highbd_lpf_vertical_8 sse2/; - add_proto qw/void vp9_highbd_lpf_vertical_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd"; - specialize qw/vp9_highbd_lpf_vertical_8_dual sse2/; + add_proto qw/void vpx_highbd_lpf_vertical_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd"; + specialize qw/vpx_highbd_lpf_vertical_8_dual sse2/; - add_proto qw/void vp9_highbd_lpf_vertical_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd"; - specialize qw/vp9_highbd_lpf_vertical_4 sse2/; + add_proto qw/void vpx_highbd_lpf_vertical_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd"; + specialize qw/vpx_highbd_lpf_vertical_4 sse2/; - add_proto qw/void vp9_highbd_lpf_vertical_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd"; - specialize qw/vp9_highbd_lpf_vertical_4_dual sse2/; + add_proto qw/void vpx_highbd_lpf_vertical_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd"; + specialize qw/vpx_highbd_lpf_vertical_4_dual sse2/; - add_proto qw/void vp9_highbd_lpf_horizontal_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd"; - specialize qw/vp9_highbd_lpf_horizontal_16 sse2/; + add_proto qw/void vpx_highbd_lpf_horizontal_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd"; + specialize qw/vpx_highbd_lpf_horizontal_16 sse2/; - add_proto qw/void vp9_highbd_lpf_horizontal_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd"; - specialize qw/vp9_highbd_lpf_horizontal_8 sse2/; + add_proto qw/void vpx_highbd_lpf_horizontal_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd"; + specialize qw/vpx_highbd_lpf_horizontal_8 sse2/; - add_proto qw/void vp9_highbd_lpf_horizontal_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd"; - specialize qw/vp9_highbd_lpf_horizontal_8_dual sse2/; + add_proto qw/void vpx_highbd_lpf_horizontal_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd"; + specialize qw/vpx_highbd_lpf_horizontal_8_dual sse2/; - add_proto qw/void vp9_highbd_lpf_horizontal_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd"; - specialize qw/vp9_highbd_lpf_horizontal_4 sse2/; + add_proto qw/void vpx_highbd_lpf_horizontal_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd"; + specialize qw/vpx_highbd_lpf_horizontal_4 sse2/; - add_proto qw/void vp9_highbd_lpf_horizontal_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd"; - specialize qw/vp9_highbd_lpf_horizontal_4_dual sse2/; + add_proto qw/void vpx_highbd_lpf_horizontal_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd"; + specialize qw/vpx_highbd_lpf_horizontal_4_dual sse2/; } # CONFIG_VP9_HIGHBITDEPTH if (vpx_config("CONFIG_ENCODERS") eq "yes") { diff --git a/vpx_dsp/x86/highbd_loopfilter_sse2.c b/vpx_dsp/x86/highbd_loopfilter_sse2.c index 358008f18..c4fd5e1a0 100644 --- a/vpx_dsp/x86/highbd_loopfilter_sse2.c +++ b/vpx_dsp/x86/highbd_loopfilter_sse2.c @@ -508,7 +508,7 @@ static void highbd_mb_lpf_horizontal_edge_w_sse2_16(uint16_t *s, } // TODO(yunqingwang): remove count and call these 2 functions(8 or 16) directly. -void vp9_highbd_lpf_horizontal_16_sse2(uint16_t *s, int p, +void vpx_highbd_lpf_horizontal_16_sse2(uint16_t *s, int p, const uint8_t *_blimit, const uint8_t *_limit, const uint8_t *_thresh, @@ -519,7 +519,7 @@ void vp9_highbd_lpf_horizontal_16_sse2(uint16_t *s, int p, highbd_mb_lpf_horizontal_edge_w_sse2_16(s, p, _blimit, _limit, _thresh, bd); } -void vp9_highbd_lpf_horizontal_8_sse2(uint16_t *s, int p, +void vpx_highbd_lpf_horizontal_8_sse2(uint16_t *s, int p, const uint8_t *_blimit, const uint8_t *_limit, const uint8_t *_thresh, @@ -687,7 +687,7 @@ void vp9_highbd_lpf_horizontal_8_sse2(uint16_t *s, int p, filt = _mm_adds_epi16(filt, work_a); filt = _mm_adds_epi16(filt, work_a); filt = _mm_adds_epi16(filt, work_a); - // (vp9_filter + 3 * (qs0 - ps0)) & mask + // (vpx_filter + 3 * (qs0 - ps0)) & mask filt = signed_char_clamp_bd_sse2(filt, bd); filt = _mm_and_si128(filt, mask); @@ -756,7 +756,7 @@ void vp9_highbd_lpf_horizontal_8_sse2(uint16_t *s, int p, _mm_store_si128((__m128i *)(s + 2 * p), q2); } -void vp9_highbd_lpf_horizontal_8_dual_sse2(uint16_t *s, int p, +void vpx_highbd_lpf_horizontal_8_dual_sse2(uint16_t *s, int p, const uint8_t *_blimit0, const uint8_t *_limit0, const uint8_t *_thresh0, @@ -764,12 +764,12 @@ void vp9_highbd_lpf_horizontal_8_dual_sse2(uint16_t *s, int p, const uint8_t *_limit1, const uint8_t *_thresh1, int bd) { - vp9_highbd_lpf_horizontal_8_sse2(s, p, _blimit0, _limit0, _thresh0, 1, bd); - vp9_highbd_lpf_horizontal_8_sse2(s + 8, p, _blimit1, _limit1, _thresh1, + vpx_highbd_lpf_horizontal_8_sse2(s, p, _blimit0, _limit0, _thresh0, 1, bd); + vpx_highbd_lpf_horizontal_8_sse2(s + 8, p, _blimit1, _limit1, _thresh1, 1, bd); } -void vp9_highbd_lpf_horizontal_4_sse2(uint16_t *s, int p, +void vpx_highbd_lpf_horizontal_4_sse2(uint16_t *s, int p, const uint8_t *_blimit, const uint8_t *_limit, const uint8_t *_thresh, @@ -891,7 +891,7 @@ void vp9_highbd_lpf_horizontal_4_sse2(uint16_t *s, int p, filt = _mm_adds_epi16(filt, work_a); filt = signed_char_clamp_bd_sse2(_mm_adds_epi16(filt, work_a), bd); - // (vp9_filter + 3 * (qs0 - ps0)) & mask + // (vpx_filter + 3 * (qs0 - ps0)) & mask filt = _mm_and_si128(filt, mask); filter1 = signed_char_clamp_bd_sse2(_mm_adds_epi16(filt, t4), bd); @@ -936,7 +936,7 @@ void vp9_highbd_lpf_horizontal_4_sse2(uint16_t *s, int p, _mm_storeu_si128((__m128i *)(s + 1 * p), q1); } -void vp9_highbd_lpf_horizontal_4_dual_sse2(uint16_t *s, int p, +void vpx_highbd_lpf_horizontal_4_dual_sse2(uint16_t *s, int p, const uint8_t *_blimit0, const uint8_t *_limit0, const uint8_t *_thresh0, @@ -944,8 +944,8 @@ void vp9_highbd_lpf_horizontal_4_dual_sse2(uint16_t *s, int p, const uint8_t *_limit1, const uint8_t *_thresh1, int bd) { - vp9_highbd_lpf_horizontal_4_sse2(s, p, _blimit0, _limit0, _thresh0, 1, bd); - vp9_highbd_lpf_horizontal_4_sse2(s + 8, p, _blimit1, _limit1, _thresh1, 1, + vpx_highbd_lpf_horizontal_4_sse2(s, p, _blimit0, _limit0, _thresh0, 1, bd); + vpx_highbd_lpf_horizontal_4_sse2(s + 8, p, _blimit1, _limit1, _thresh1, 1, bd); } @@ -1054,7 +1054,7 @@ static INLINE void highbd_transpose8x16(uint16_t *in0, uint16_t *in1, highbd_transpose(src1, in_p, dest1, out_p, 1); } -void vp9_highbd_lpf_vertical_4_sse2(uint16_t *s, int p, +void vpx_highbd_lpf_vertical_4_sse2(uint16_t *s, int p, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, @@ -1071,7 +1071,7 @@ void vp9_highbd_lpf_vertical_4_sse2(uint16_t *s, int p, highbd_transpose(src, p, dst, 8, 1); // Loop filtering - vp9_highbd_lpf_horizontal_4_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh, 1, + vpx_highbd_lpf_horizontal_4_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh, 1, bd); src[0] = t_dst; @@ -1081,7 +1081,7 @@ void vp9_highbd_lpf_vertical_4_sse2(uint16_t *s, int p, highbd_transpose(src, 8, dst, p, 1); } -void vp9_highbd_lpf_vertical_4_dual_sse2(uint16_t *s, int p, +void vpx_highbd_lpf_vertical_4_dual_sse2(uint16_t *s, int p, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, @@ -1097,7 +1097,7 @@ void vp9_highbd_lpf_vertical_4_dual_sse2(uint16_t *s, int p, highbd_transpose8x16(s - 4, s - 4 + p * 8, p, t_dst, 16); // Loop filtering - vp9_highbd_lpf_horizontal_4_dual_sse2(t_dst + 4 * 16, 16, blimit0, limit0, + vpx_highbd_lpf_horizontal_4_dual_sse2(t_dst + 4 * 16, 16, blimit0, limit0, thresh0, blimit1, limit1, thresh1, bd); src[0] = t_dst; src[1] = t_dst + 8; @@ -1108,7 +1108,7 @@ void vp9_highbd_lpf_vertical_4_dual_sse2(uint16_t *s, int p, highbd_transpose(src, 16, dst, p, 2); } -void vp9_highbd_lpf_vertical_8_sse2(uint16_t *s, int p, +void vpx_highbd_lpf_vertical_8_sse2(uint16_t *s, int p, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, @@ -1125,7 +1125,7 @@ void vp9_highbd_lpf_vertical_8_sse2(uint16_t *s, int p, highbd_transpose(src, p, dst, 8, 1); // Loop filtering - vp9_highbd_lpf_horizontal_8_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh, 1, + vpx_highbd_lpf_horizontal_8_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh, 1, bd); src[0] = t_dst; @@ -1135,7 +1135,7 @@ void vp9_highbd_lpf_vertical_8_sse2(uint16_t *s, int p, highbd_transpose(src, 8, dst, p, 1); } -void vp9_highbd_lpf_vertical_8_dual_sse2(uint16_t *s, int p, +void vpx_highbd_lpf_vertical_8_dual_sse2(uint16_t *s, int p, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, @@ -1151,7 +1151,7 @@ void vp9_highbd_lpf_vertical_8_dual_sse2(uint16_t *s, int p, highbd_transpose8x16(s - 4, s - 4 + p * 8, p, t_dst, 16); // Loop filtering - vp9_highbd_lpf_horizontal_8_dual_sse2(t_dst + 4 * 16, 16, blimit0, limit0, + vpx_highbd_lpf_horizontal_8_dual_sse2(t_dst + 4 * 16, 16, blimit0, limit0, thresh0, blimit1, limit1, thresh1, bd); src[0] = t_dst; src[1] = t_dst + 8; @@ -1163,7 +1163,7 @@ void vp9_highbd_lpf_vertical_8_dual_sse2(uint16_t *s, int p, highbd_transpose(src, 16, dst, p, 2); } -void vp9_highbd_lpf_vertical_16_sse2(uint16_t *s, int p, +void vpx_highbd_lpf_vertical_16_sse2(uint16_t *s, int p, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, @@ -1192,7 +1192,7 @@ void vp9_highbd_lpf_vertical_16_sse2(uint16_t *s, int p, highbd_transpose(src, 8, dst, p, 2); } -void vp9_highbd_lpf_vertical_16_dual_sse2(uint16_t *s, +void vpx_highbd_lpf_vertical_16_dual_sse2(uint16_t *s, int p, const uint8_t *blimit, const uint8_t *limit, diff --git a/vpx_dsp/x86/loopfilter_avx2.c b/vpx_dsp/x86/loopfilter_avx2.c index 36f59c9e5..23a97dd05 100644 --- a/vpx_dsp/x86/loopfilter_avx2.c +++ b/vpx_dsp/x86/loopfilter_avx2.c @@ -103,7 +103,7 @@ static void mb_lpf_horizontal_edge_w_avx2_8(unsigned char *s, int p, filt = _mm_adds_epi8(filt, work_a); filt = _mm_adds_epi8(filt, work_a); filt = _mm_adds_epi8(filt, work_a); - /* (vp9_filter + 3 * (qs0 - ps0)) & mask */ + /* (vpx_filter + 3 * (qs0 - ps0)) & mask */ filt = _mm_and_si128(filt, mask); filter1 = _mm_adds_epi8(filt, t4); @@ -515,7 +515,7 @@ static void mb_lpf_horizontal_edge_w_avx2_16(unsigned char *s, int p, filt = _mm_adds_epi8(filt, work_a); filt = _mm_adds_epi8(filt, work_a); filt = _mm_adds_epi8(filt, work_a); - /* (vp9_filter + 3 * (qs0 - ps0)) & mask */ + /* (vpx_filter + 3 * (qs0 - ps0)) & mask */ filt = _mm_and_si128(filt, mask); filter1 = _mm_adds_epi8(filt, t4); @@ -976,7 +976,7 @@ static void mb_lpf_horizontal_edge_w_avx2_16(unsigned char *s, int p, } } -void vp9_lpf_horizontal_16_avx2(unsigned char *s, int p, +void vpx_lpf_horizontal_16_avx2(unsigned char *s, int p, const unsigned char *_blimit, const unsigned char *_limit, const unsigned char *_thresh, int count) { if (count == 1) diff --git a/vpx_dsp/x86/loopfilter_mmx.asm b/vpx_dsp/x86/loopfilter_mmx.asm index f5f7d5af7..b9c18b680 100644 --- a/vpx_dsp/x86/loopfilter_mmx.asm +++ b/vpx_dsp/x86/loopfilter_mmx.asm @@ -12,7 +12,7 @@ %include "vpx_ports/x86_abi_support.asm" -;void vp9_lpf_horizontal_4_mmx +;void vpx_lpf_horizontal_4_mmx ;( ; unsigned char *src_ptr, ; int src_pixel_step, @@ -21,8 +21,8 @@ ; const char *thresh, ; int count ;) -global sym(vp9_lpf_horizontal_4_mmx) PRIVATE -sym(vp9_lpf_horizontal_4_mmx): +global sym(vpx_lpf_horizontal_4_mmx) PRIVATE +sym(vpx_lpf_horizontal_4_mmx): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 @@ -224,7 +224,7 @@ sym(vp9_lpf_horizontal_4_mmx): ret -;void vp9_lpf_vertical_4_mmx +;void vpx_lpf_vertical_4_mmx ;( ; unsigned char *src_ptr, ; int src_pixel_step, @@ -233,8 +233,8 @@ sym(vp9_lpf_horizontal_4_mmx): ; const char *thresh, ; int count ;) -global sym(vp9_lpf_vertical_4_mmx) PRIVATE -sym(vp9_lpf_vertical_4_mmx): +global sym(vpx_lpf_vertical_4_mmx) PRIVATE +sym(vpx_lpf_vertical_4_mmx): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 diff --git a/vpx_dsp/x86/loopfilter_sse2.c b/vpx_dsp/x86/loopfilter_sse2.c index 977f46b6f..ed1012736 100644 --- a/vpx_dsp/x86/loopfilter_sse2.c +++ b/vpx_dsp/x86/loopfilter_sse2.c @@ -100,7 +100,7 @@ static void mb_lpf_horizontal_edge_w_sse2_8(unsigned char *s, filt = _mm_adds_epi8(filt, work_a); filt = _mm_adds_epi8(filt, work_a); filt = _mm_adds_epi8(filt, work_a); - // (vp9_filter + 3 * (qs0 - ps0)) & mask + // (vpx_filter + 3 * (qs0 - ps0)) & mask filt = _mm_and_si128(filt, mask); filter1 = _mm_adds_epi8(filt, t4); @@ -495,7 +495,7 @@ static void mb_lpf_horizontal_edge_w_sse2_16(unsigned char *s, filt = _mm_adds_epi8(filt, work_a); filt = _mm_adds_epi8(filt, work_a); filt = _mm_adds_epi8(filt, work_a); - // (vp9_filter + 3 * (qs0 - ps0)) & mask + // (vpx_filter + 3 * (qs0 - ps0)) & mask filt = _mm_and_si128(filt, mask); filter1 = _mm_adds_epi8(filt, t4); filter2 = _mm_adds_epi8(filt, t3); @@ -717,7 +717,7 @@ static void mb_lpf_horizontal_edge_w_sse2_16(unsigned char *s, } // TODO(yunqingwang): remove count and call these 2 functions(8 or 16) directly. -void vp9_lpf_horizontal_16_sse2(unsigned char *s, int p, +void vpx_lpf_horizontal_16_sse2(unsigned char *s, int p, const unsigned char *_blimit, const unsigned char *_limit, const unsigned char *_thresh, int count) { @@ -727,7 +727,7 @@ void vp9_lpf_horizontal_16_sse2(unsigned char *s, int p, mb_lpf_horizontal_edge_w_sse2_16(s, p, _blimit, _limit, _thresh); } -void vp9_lpf_horizontal_8_sse2(unsigned char *s, int p, +void vpx_lpf_horizontal_8_sse2(unsigned char *s, int p, const unsigned char *_blimit, const unsigned char *_limit, const unsigned char *_thresh, int count) { @@ -874,7 +874,7 @@ void vp9_lpf_horizontal_8_sse2(unsigned char *s, int p, filt = _mm_adds_epi8(filt, work_a); filt = _mm_adds_epi8(filt, work_a); filt = _mm_adds_epi8(filt, work_a); - // (vp9_filter + 3 * (qs0 - ps0)) & mask + // (vpx_filter + 3 * (qs0 - ps0)) & mask filt = _mm_and_si128(filt, mask); filter1 = _mm_adds_epi8(filt, t4); @@ -943,7 +943,7 @@ void vp9_lpf_horizontal_8_sse2(unsigned char *s, int p, } } -void vp9_lpf_horizontal_8_dual_sse2(uint8_t *s, int p, +void vpx_lpf_horizontal_8_dual_sse2(uint8_t *s, int p, const uint8_t *_blimit0, const uint8_t *_limit0, const uint8_t *_thresh0, @@ -1115,7 +1115,7 @@ void vp9_lpf_horizontal_8_dual_sse2(uint8_t *s, int p, filt = _mm_adds_epi8(filt, work_a); filt = _mm_adds_epi8(filt, work_a); filt = _mm_adds_epi8(filt, work_a); - // (vp9_filter + 3 * (qs0 - ps0)) & mask + // (vpx_filter + 3 * (qs0 - ps0)) & mask filt = _mm_and_si128(filt, mask); filter1 = _mm_adds_epi8(filt, t4); @@ -1190,7 +1190,7 @@ void vp9_lpf_horizontal_8_dual_sse2(uint8_t *s, int p, } } -void vp9_lpf_horizontal_4_dual_sse2(unsigned char *s, int p, +void vpx_lpf_horizontal_4_dual_sse2(unsigned char *s, int p, const unsigned char *_blimit0, const unsigned char *_limit0, const unsigned char *_thresh0, @@ -1286,7 +1286,7 @@ void vp9_lpf_horizontal_4_dual_sse2(unsigned char *s, int p, filt = _mm_adds_epi8(filt, work_a); filt = _mm_adds_epi8(filt, work_a); filt = _mm_adds_epi8(filt, work_a); - // (vp9_filter + 3 * (qs0 - ps0)) & mask + // (vpx_filter + 3 * (qs0 - ps0)) & mask filt = _mm_and_si128(filt, mask); filter1 = _mm_adds_epi8(filt, t4); @@ -1464,7 +1464,7 @@ static INLINE void transpose(unsigned char *src[], int in_p, } while (++idx8x8 < num_8x8_to_transpose); } -void vp9_lpf_vertical_4_dual_sse2(uint8_t *s, int p, const uint8_t *blimit0, +void vpx_lpf_vertical_4_dual_sse2(uint8_t *s, int p, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, @@ -1478,7 +1478,7 @@ void vp9_lpf_vertical_4_dual_sse2(uint8_t *s, int p, const uint8_t *blimit0, transpose8x16(s - 4, s - 4 + p * 8, p, t_dst, 16); // Loop filtering - vp9_lpf_horizontal_4_dual_sse2(t_dst + 4 * 16, 16, blimit0, limit0, thresh0, + vpx_lpf_horizontal_4_dual_sse2(t_dst + 4 * 16, 16, blimit0, limit0, thresh0, blimit1, limit1, thresh1); src[0] = t_dst; src[1] = t_dst + 8; @@ -1489,7 +1489,7 @@ void vp9_lpf_vertical_4_dual_sse2(uint8_t *s, int p, const uint8_t *blimit0, transpose(src, 16, dst, p, 2); } -void vp9_lpf_vertical_8_sse2(unsigned char *s, int p, +void vpx_lpf_vertical_8_sse2(unsigned char *s, int p, const unsigned char *blimit, const unsigned char *limit, const unsigned char *thresh, int count) { @@ -1505,7 +1505,7 @@ void vp9_lpf_vertical_8_sse2(unsigned char *s, int p, transpose(src, p, dst, 8, 1); // Loop filtering - vp9_lpf_horizontal_8_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh, 1); + vpx_lpf_horizontal_8_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh, 1); src[0] = t_dst; dst[0] = s - 4; @@ -1514,7 +1514,7 @@ void vp9_lpf_vertical_8_sse2(unsigned char *s, int p, transpose(src, 8, dst, p, 1); } -void vp9_lpf_vertical_8_dual_sse2(uint8_t *s, int p, const uint8_t *blimit0, +void vpx_lpf_vertical_8_dual_sse2(uint8_t *s, int p, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, @@ -1528,7 +1528,7 @@ void vp9_lpf_vertical_8_dual_sse2(uint8_t *s, int p, const uint8_t *blimit0, transpose8x16(s - 4, s - 4 + p * 8, p, t_dst, 16); // Loop filtering - vp9_lpf_horizontal_8_dual_sse2(t_dst + 4 * 16, 16, blimit0, limit0, thresh0, + vpx_lpf_horizontal_8_dual_sse2(t_dst + 4 * 16, 16, blimit0, limit0, thresh0, blimit1, limit1, thresh1); src[0] = t_dst; src[1] = t_dst + 8; @@ -1540,7 +1540,7 @@ void vp9_lpf_vertical_8_dual_sse2(uint8_t *s, int p, const uint8_t *blimit0, transpose(src, 16, dst, p, 2); } -void vp9_lpf_vertical_16_sse2(unsigned char *s, int p, +void vpx_lpf_vertical_16_sse2(unsigned char *s, int p, const unsigned char *blimit, const unsigned char *limit, const unsigned char *thresh) { @@ -1568,7 +1568,7 @@ void vp9_lpf_vertical_16_sse2(unsigned char *s, int p, transpose(src, 8, dst, p, 2); } -void vp9_lpf_vertical_16_dual_sse2(unsigned char *s, int p, +void vpx_lpf_vertical_16_dual_sse2(unsigned char *s, int p, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh) { DECLARE_ALIGNED(16, unsigned char, t_dst[256]); -- 2.40.0