From 50adfdf5baf4243eecacb94282141568ef4b666a Mon Sep 17 00:00:00 2001 From: Jingning Han Date: Wed, 15 Jul 2015 19:14:54 -0700 Subject: [PATCH] Migrate loop filter functions from vp9/ to vpx_dsp/ The various tap loop filter operations are common functions across codec. This commit moves them along with SIMD optimizations to vpx_dsp folder. Change-Id: Ia5fa0b2e5289cdb98467502a549c380b9c60e92c --- test/lpf_8_test.cc | 4 +- vp9/common/vp9_loopfilter.c | 1 + vp9/common/vp9_rtcd_defs.pl | 79 ------------------- vp9/vp9_common.mk | 19 ----- .../arm/loopfilter_16_neon.asm | 0 .../arm/loopfilter_16_neon.c | 2 +- .../arm/loopfilter_4_neon.asm | 0 .../arm/loopfilter_4_neon.c | 18 ++--- .../arm/loopfilter_8_neon.asm | 0 .../arm/loopfilter_8_neon.c | 18 ++--- .../arm/loopfilter_mb_neon.asm | 0 .../arm/loopfilter_neon.c | 2 +- .../loopfilter.c | 2 +- .../mips/loopfilter_16_msa.c | 2 +- .../mips/loopfilter_4_msa.c | 2 +- .../mips/loopfilter_8_msa.c | 2 +- .../mips/loopfilter_msa.h | 6 +- vpx_dsp/vpx_dsp.mk | 30 +++++++ vpx_dsp/vpx_dsp_rtcd_defs.pl | 76 ++++++++++++++++++ .../x86/highbd_loopfilter_sse2.c | 2 +- .../x86/loopfilter_avx2.c | 2 +- .../x86/loopfilter_mmx.asm | 0 .../x86/loopfilter_sse2.c | 4 +- 23 files changed, 140 insertions(+), 131 deletions(-) rename vp9/common/arm/neon/vp9_loopfilter_16_neon_asm.asm => vpx_dsp/arm/loopfilter_16_neon.asm (100%) rename vp9/common/arm/neon/vp9_loopfilter_16_neon.c => vpx_dsp/arm/loopfilter_16_neon.c (99%) rename vp9/common/arm/neon/vp9_loopfilter_4_neon_asm.asm => vpx_dsp/arm/loopfilter_4_neon.asm (100%) rename vp9/common/arm/neon/vp9_loopfilter_4_neon.c => vpx_dsp/arm/loopfilter_4_neon.c (96%) rename vp9/common/arm/neon/vp9_loopfilter_8_neon_asm.asm => vpx_dsp/arm/loopfilter_8_neon.asm (100%) rename vp9/common/arm/neon/vp9_loopfilter_8_neon.c => vpx_dsp/arm/loopfilter_8_neon.c (98%) rename vp9/common/arm/neon/vp9_mb_lpf_neon.asm => vpx_dsp/arm/loopfilter_mb_neon.asm (100%) rename vp9/common/arm/neon/vp9_loopfilter_neon.c => vpx_dsp/arm/loopfilter_neon.c (98%) rename vp9/common/vp9_loopfilter_filters.c => vpx_dsp/loopfilter.c (99%) rename vp9/common/mips/msa/vp9_loopfilter_16_msa.c => vpx_dsp/mips/loopfilter_16_msa.c (99%) rename vp9/common/mips/msa/vp9_loopfilter_4_msa.c => vpx_dsp/mips/loopfilter_4_msa.c (99%) rename vp9/common/mips/msa/vp9_loopfilter_8_msa.c => vpx_dsp/mips/loopfilter_8_msa.c (99%) rename vp9/common/mips/msa/vp9_loopfilter_msa.h => vpx_dsp/mips/loopfilter_msa.h (99%) rename vp9/common/x86/vp9_high_loopfilter_intrin_sse2.c => vpx_dsp/x86/highbd_loopfilter_sse2.c (99%) rename vp9/common/x86/vp9_loopfilter_intrin_avx2.c => vpx_dsp/x86/loopfilter_avx2.c (99%) rename vp9/common/x86/vp9_loopfilter_mmx.asm => vpx_dsp/x86/loopfilter_mmx.asm (100%) rename vp9/common/x86/vp9_loopfilter_intrin_sse2.c => vpx_dsp/x86/loopfilter_sse2.c (99%) diff --git a/test/lpf_8_test.cc b/test/lpf_8_test.cc index ba51309ad..2a251c13d 100644 --- a/test/lpf_8_test.cc +++ b/test/lpf_8_test.cc @@ -19,7 +19,7 @@ #include "test/util.h" #include "./vpx_config.h" -#include "./vp9_rtcd.h" +#include "./vpx_dsp_rtcd.h" #include "vp9/common/vp9_entropy.h" #include "vp9/common/vp9_loopfilter.h" #include "vpx/vpx_integer.h" @@ -665,11 +665,11 @@ INSTANTIATE_TEST_CASE_P( &wrapper_vertical_16_c, 8, 1), make_tuple(&wrapper_vertical_16_dual_neon, &wrapper_vertical_16_dual_c, 8, 1), +#endif // HAVE_NEON_ASM make_tuple(&vp9_lpf_horizontal_8_neon, &vp9_lpf_horizontal_8_c, 8, 1), make_tuple(&vp9_lpf_vertical_8_neon, &vp9_lpf_vertical_8_c, 8, 1), -#endif // HAVE_NEON_ASM make_tuple(&vp9_lpf_horizontal_4_neon, &vp9_lpf_horizontal_4_c, 8, 1), make_tuple(&vp9_lpf_vertical_4_neon, diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c index 981672836..5e35fc51d 100644 --- a/vp9/common/vp9_loopfilter.c +++ b/vp9/common/vp9_loopfilter.c @@ -9,6 +9,7 @@ */ #include "./vpx_config.h" +#include "./vpx_dsp_rtcd.h" #include "vp9/common/vp9_loopfilter.h" #include "vp9/common/vp9_onyxc_int.h" #include "vp9/common/vp9_reconinter.h" diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl index 538f1ed7e..c54409f85 100644 --- a/vp9/common/vp9_rtcd_defs.pl +++ b/vp9/common/vp9_rtcd_defs.pl @@ -219,49 +219,6 @@ specialize qw/vp9_dc_left_predictor_32x32 msa neon/, "$sse2_x86inc"; add_proto qw/void vp9_dc_128_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; specialize qw/vp9_dc_128_predictor_32x32 msa neon/, "$sse2_x86inc"; -# -# Loopfilter -# -add_proto qw/void vp9_lpf_vertical_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; -specialize qw/vp9_lpf_vertical_16 sse2 neon_asm dspr2 msa/; -$vp9_lpf_vertical_16_neon_asm=vp9_lpf_vertical_16_neon; - -add_proto qw/void vp9_lpf_vertical_16_dual/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; -specialize qw/vp9_lpf_vertical_16_dual sse2 neon_asm dspr2 msa/; -$vp9_lpf_vertical_16_dual_neon_asm=vp9_lpf_vertical_16_dual_neon; - -add_proto qw/void vp9_lpf_vertical_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"; -specialize qw/vp9_lpf_vertical_8 sse2 neon_asm dspr2 msa/; -$vp9_lpf_vertical_8_neon_asm=vp9_lpf_vertical_8_neon; - -add_proto qw/void vp9_lpf_vertical_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; -specialize qw/vp9_lpf_vertical_8_dual sse2 neon_asm dspr2 msa/; -$vp9_lpf_vertical_8_dual_neon_asm=vp9_lpf_vertical_8_dual_neon; - -add_proto qw/void vp9_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"; -specialize qw/vp9_lpf_vertical_4 mmx neon dspr2 msa/; - -add_proto qw/void vp9_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; -specialize qw/vp9_lpf_vertical_4_dual sse2 neon dspr2 msa/; - -add_proto qw/void vp9_lpf_horizontal_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"; -specialize qw/vp9_lpf_horizontal_16 sse2 avx2 neon_asm dspr2 msa/; -$vp9_lpf_horizontal_16_neon_asm=vp9_lpf_horizontal_16_neon; - -add_proto qw/void vp9_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"; -specialize qw/vp9_lpf_horizontal_8 sse2 neon_asm dspr2 msa/; -$vp9_lpf_horizontal_8_neon_asm=vp9_lpf_horizontal_8_neon; - -add_proto qw/void vp9_lpf_horizontal_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; -specialize qw/vp9_lpf_horizontal_8_dual sse2 neon_asm dspr2 msa/; -$vp9_lpf_horizontal_8_dual_neon_asm=vp9_lpf_horizontal_8_dual_neon; - -add_proto qw/void vp9_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"; -specialize qw/vp9_lpf_horizontal_4 mmx neon dspr2 msa/; - -add_proto qw/void vp9_lpf_horizontal_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; -specialize qw/vp9_lpf_horizontal_4_dual sse2 neon dspr2 msa/; - # # post proc # @@ -667,42 +624,6 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { add_proto qw/void vp9_highbd_convolve8_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps"; specialize qw/vp9_highbd_convolve8_avg_vert/, "$sse2_x86_64"; - # - # Loopfilter - # - add_proto qw/void vp9_highbd_lpf_vertical_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; - specialize qw/vp9_highbd_lpf_vertical_16 sse2/; - - add_proto qw/void vp9_highbd_lpf_vertical_16_dual/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; - specialize qw/vp9_highbd_lpf_vertical_16_dual sse2/; - - add_proto qw/void vp9_highbd_lpf_vertical_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd"; - specialize qw/vp9_highbd_lpf_vertical_8 sse2/; - - add_proto qw/void vp9_highbd_lpf_vertical_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd"; - specialize qw/vp9_highbd_lpf_vertical_8_dual sse2/; - - add_proto qw/void vp9_highbd_lpf_vertical_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd"; - specialize qw/vp9_highbd_lpf_vertical_4 sse2/; - - add_proto qw/void vp9_highbd_lpf_vertical_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd"; - specialize qw/vp9_highbd_lpf_vertical_4_dual sse2/; - - add_proto qw/void vp9_highbd_lpf_horizontal_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd"; - specialize qw/vp9_highbd_lpf_horizontal_16 sse2/; - - add_proto qw/void vp9_highbd_lpf_horizontal_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd"; - specialize qw/vp9_highbd_lpf_horizontal_8 sse2/; - - add_proto qw/void vp9_highbd_lpf_horizontal_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd"; - specialize qw/vp9_highbd_lpf_horizontal_8_dual sse2/; - - add_proto qw/void vp9_highbd_lpf_horizontal_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd"; - specialize qw/vp9_highbd_lpf_horizontal_4 sse2/; - - add_proto qw/void vp9_highbd_lpf_horizontal_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd"; - specialize qw/vp9_highbd_lpf_horizontal_4_dual sse2/; - # # post proc # diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk index bcab5582c..9e27257c6 100644 --- a/vp9/vp9_common.mk +++ b/vp9/vp9_common.mk @@ -54,7 +54,6 @@ VP9_COMMON_SRCS-yes += common/vp9_textblit.h VP9_COMMON_SRCS-yes += common/vp9_tile_common.h VP9_COMMON_SRCS-yes += common/vp9_tile_common.c VP9_COMMON_SRCS-yes += common/vp9_loopfilter.c -VP9_COMMON_SRCS-yes += common/vp9_loopfilter_filters.c VP9_COMMON_SRCS-yes += common/vp9_thread_common.c VP9_COMMON_SRCS-yes += common/vp9_mvref_common.c VP9_COMMON_SRCS-yes += common/vp9_mvref_common.h @@ -69,14 +68,11 @@ VP9_COMMON_SRCS-yes += common/vp9_scan.h VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/convolve.h VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp9_asm_stubs.c -VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp9_loopfilter_intrin_sse2.c -VP9_COMMON_SRCS-$(HAVE_AVX2) += common/x86/vp9_loopfilter_intrin_avx2.c VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/vp9_postproc.h VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/vp9_postproc.c VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/vp9_mfqe.h VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/vp9_mfqe.c VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_idct_sse2.asm -VP9_COMMON_SRCS-$(HAVE_MMX) += common/x86/vp9_loopfilter_mmx.asm VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_subpixel_8t_sse2.asm VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_subpixel_bilinear_sse2.asm VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp9_subpixel_8t_ssse3.asm @@ -95,7 +91,6 @@ VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp9_intrapred_ssse3.asm endif ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes) -VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_high_loopfilter_intrin_sse2.c VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_high_subpixel_8t_sse2.asm VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_high_subpixel_bilinear_sse2.asm ifeq ($(CONFIG_USE_X86INC),yes) @@ -147,10 +142,6 @@ VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct16x16_msa.c VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct32x32_msa.c VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct_msa.h VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_intra_predict_msa.c -VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_loopfilter_4_msa.c -VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_loopfilter_8_msa.c -VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_loopfilter_16_msa.c -VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_loopfilter_msa.h ifeq ($(CONFIG_VP9_POSTPROC),yes) VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_mfqe_msa.c @@ -165,16 +156,12 @@ VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp9_idct_ssse3_x86_64.asm endif endif -VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_loopfilter_16_neon_asm$(ASM) -VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_loopfilter_8_neon_asm$(ASM) -VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_mb_lpf_neon$(ASM) VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_save_reg_neon$(ASM) ifneq ($(CONFIG_VP9_HIGHBITDEPTH),yes) VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_iht4x4_add_neon.c VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_iht8x8_add_neon.c endif -VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_loopfilter_neon.c # neon with assembly and intrinsics implementations. If both are available # prefer assembly. @@ -193,7 +180,6 @@ VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct4x4_1_add_neon_asm$(ASM) VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct4x4_add_neon_asm$(ASM) VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct8x8_1_add_neon_asm$(ASM) VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct8x8_add_neon_asm$(ASM) -VP9_COMMON_SRCS-yes += common/arm/neon/vp9_loopfilter_4_neon_asm$(ASM) VP9_COMMON_SRCS-yes += common/arm/neon/vp9_reconintra_neon_asm$(ASM) else ifeq ($(HAVE_NEON), yes) @@ -211,11 +197,6 @@ VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct4x4_1_add_neon.c VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct4x4_add_neon.c VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct8x8_1_add_neon.c VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct8x8_add_neon.c -VP9_COMMON_SRCS-yes += common/arm/neon/vp9_loopfilter_16_neon.c -VP9_COMMON_SRCS-yes += common/arm/neon/vp9_loopfilter_4_neon.c -# TODO(johannkoenig): re-enable when chromium build is fixed -# # https://code.google.com/p/chromium/issues/detail?id=443839 -#VP9_COMMON_SRCS-yes += common/arm/neon/vp9_loopfilter_8_neon.c endif # HAVE_NEON endif # HAVE_NEON_ASM diff --git a/vp9/common/arm/neon/vp9_loopfilter_16_neon_asm.asm b/vpx_dsp/arm/loopfilter_16_neon.asm similarity index 100% rename from vp9/common/arm/neon/vp9_loopfilter_16_neon_asm.asm rename to vpx_dsp/arm/loopfilter_16_neon.asm diff --git a/vp9/common/arm/neon/vp9_loopfilter_16_neon.c b/vpx_dsp/arm/loopfilter_16_neon.c similarity index 99% rename from vp9/common/arm/neon/vp9_loopfilter_16_neon.c rename to vpx_dsp/arm/loopfilter_16_neon.c index c69ee1009..27c282763 100644 --- a/vp9/common/arm/neon/vp9_loopfilter_16_neon.c +++ b/vpx_dsp/arm/loopfilter_16_neon.c @@ -10,7 +10,7 @@ #include -#include "./vp9_rtcd.h" +#include "./vpx_dsp_rtcd.h" #include "./vpx_config.h" #include "vpx/vpx_integer.h" diff --git a/vp9/common/arm/neon/vp9_loopfilter_4_neon_asm.asm b/vpx_dsp/arm/loopfilter_4_neon.asm similarity index 100% rename from vp9/common/arm/neon/vp9_loopfilter_4_neon_asm.asm rename to vpx_dsp/arm/loopfilter_4_neon.asm diff --git a/vp9/common/arm/neon/vp9_loopfilter_4_neon.c b/vpx_dsp/arm/loopfilter_4_neon.c similarity index 96% rename from vp9/common/arm/neon/vp9_loopfilter_4_neon.c rename to vpx_dsp/arm/loopfilter_4_neon.c index fd9db6187..476ed3412 100644 --- a/vp9/common/arm/neon/vp9_loopfilter_4_neon.c +++ b/vpx_dsp/arm/loopfilter_4_neon.c @@ -10,7 +10,7 @@ #include -#include "./vpx_config.h" +#include "./vpx_dsp_rtcd.h" static INLINE void vp9_loop_filter_neon( uint8x8_t dblimit, // flimit @@ -111,11 +111,11 @@ static INLINE void vp9_loop_filter_neon( } void vp9_lpf_horizontal_4_neon( - unsigned char *src, + uint8_t *src, int pitch, - unsigned char *blimit, - unsigned char *limit, - unsigned char *thresh, + const uint8_t *blimit, + const uint8_t *limit, + const uint8_t *thresh, int count) { int i; uint8_t *s, *psrc; @@ -166,11 +166,11 @@ void vp9_lpf_horizontal_4_neon( } void vp9_lpf_vertical_4_neon( - unsigned char *src, + uint8_t *src, int pitch, - unsigned char *blimit, - unsigned char *limit, - unsigned char *thresh, + const uint8_t *blimit, + const uint8_t *limit, + const uint8_t *thresh, int count) { int i, pitch8; uint8_t *s; diff --git a/vp9/common/arm/neon/vp9_loopfilter_8_neon_asm.asm b/vpx_dsp/arm/loopfilter_8_neon.asm similarity index 100% rename from vp9/common/arm/neon/vp9_loopfilter_8_neon_asm.asm rename to vpx_dsp/arm/loopfilter_8_neon.asm diff --git a/vp9/common/arm/neon/vp9_loopfilter_8_neon.c b/vpx_dsp/arm/loopfilter_8_neon.c similarity index 98% rename from vp9/common/arm/neon/vp9_loopfilter_8_neon.c rename to vpx_dsp/arm/loopfilter_8_neon.c index 33068a8a2..2179e5fe5 100644 --- a/vp9/common/arm/neon/vp9_loopfilter_8_neon.c +++ b/vpx_dsp/arm/loopfilter_8_neon.c @@ -10,7 +10,7 @@ #include -#include "./vpx_config.h" +#include "./vpx_dsp_rtcd.h" static INLINE void vp9_mbloop_filter_neon( uint8x8_t dblimit, // mblimit @@ -264,11 +264,11 @@ static INLINE void vp9_mbloop_filter_neon( } void vp9_lpf_horizontal_8_neon( - unsigned char *src, + uint8_t *src, int pitch, - unsigned char *blimit, - unsigned char *limit, - unsigned char *thresh, + const uint8_t *blimit, + const uint8_t *limit, + const uint8_t *thresh, int count) { int i; uint8_t *s, *psrc; @@ -324,11 +324,11 @@ void vp9_lpf_horizontal_8_neon( } void vp9_lpf_vertical_8_neon( - unsigned char *src, + uint8_t *src, int pitch, - unsigned char *blimit, - unsigned char *limit, - unsigned char *thresh, + const uint8_t *blimit, + const uint8_t *limit, + const uint8_t *thresh, int count) { int i; uint8_t *s; diff --git a/vp9/common/arm/neon/vp9_mb_lpf_neon.asm b/vpx_dsp/arm/loopfilter_mb_neon.asm similarity index 100% rename from vp9/common/arm/neon/vp9_mb_lpf_neon.asm rename to vpx_dsp/arm/loopfilter_mb_neon.asm diff --git a/vp9/common/arm/neon/vp9_loopfilter_neon.c b/vpx_dsp/arm/loopfilter_neon.c similarity index 98% rename from vp9/common/arm/neon/vp9_loopfilter_neon.c rename to vpx_dsp/arm/loopfilter_neon.c index 31fcc63ba..9ef322933 100644 --- a/vp9/common/arm/neon/vp9_loopfilter_neon.c +++ b/vpx_dsp/arm/loopfilter_neon.c @@ -10,7 +10,7 @@ #include -#include "./vp9_rtcd.h" +#include "./vpx_dsp_rtcd.h" #include "./vpx_config.h" #include "vpx/vpx_integer.h" diff --git a/vp9/common/vp9_loopfilter_filters.c b/vpx_dsp/loopfilter.c similarity index 99% rename from vp9/common/vp9_loopfilter_filters.c rename to vpx_dsp/loopfilter.c index 4876f9e3e..73b448e93 100644 --- a/vp9/common/vp9_loopfilter_filters.c +++ b/vpx_dsp/loopfilter.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source diff --git a/vp9/common/mips/msa/vp9_loopfilter_16_msa.c b/vpx_dsp/mips/loopfilter_16_msa.c similarity index 99% rename from vp9/common/mips/msa/vp9_loopfilter_16_msa.c rename to vpx_dsp/mips/loopfilter_16_msa.c index aeaa48e4e..8892b618d 100644 --- a/vp9/common/mips/msa/vp9_loopfilter_16_msa.c +++ b/vpx_dsp/mips/loopfilter_16_msa.c @@ -9,7 +9,7 @@ */ #include "vpx_ports/mem.h" -#include "vp9/common/mips/msa/vp9_loopfilter_msa.h" +#include "vpx_dsp/mips/loopfilter_msa.h" int32_t vp9_hz_lpf_t4_and_t8_16w(uint8_t *src, int32_t pitch, uint8_t *filter48, diff --git a/vp9/common/mips/msa/vp9_loopfilter_4_msa.c b/vpx_dsp/mips/loopfilter_4_msa.c similarity index 99% rename from vp9/common/mips/msa/vp9_loopfilter_4_msa.c rename to vpx_dsp/mips/loopfilter_4_msa.c index 7f691355a..24db78705 100644 --- a/vp9/common/mips/msa/vp9_loopfilter_4_msa.c +++ b/vpx_dsp/mips/loopfilter_4_msa.c @@ -8,7 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include "vp9/common/mips/msa/vp9_loopfilter_msa.h" +#include "vpx_dsp/mips/loopfilter_msa.h" void vp9_lpf_horizontal_4_msa(uint8_t *src, int32_t pitch, const uint8_t *b_limit_ptr, diff --git a/vp9/common/mips/msa/vp9_loopfilter_8_msa.c b/vpx_dsp/mips/loopfilter_8_msa.c similarity index 99% rename from vp9/common/mips/msa/vp9_loopfilter_8_msa.c rename to vpx_dsp/mips/loopfilter_8_msa.c index 26a858d6e..85c34ebfe 100644 --- a/vp9/common/mips/msa/vp9_loopfilter_8_msa.c +++ b/vpx_dsp/mips/loopfilter_8_msa.c @@ -8,7 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include "vp9/common/mips/msa/vp9_loopfilter_msa.h" +#include "vpx_dsp/mips/loopfilter_msa.h" void vp9_lpf_horizontal_8_msa(uint8_t *src, int32_t pitch, const uint8_t *b_limit_ptr, diff --git a/vp9/common/mips/msa/vp9_loopfilter_msa.h b/vpx_dsp/mips/loopfilter_msa.h similarity index 99% rename from vp9/common/mips/msa/vp9_loopfilter_msa.h rename to vpx_dsp/mips/loopfilter_msa.h index bfbe8708f..62b170610 100644 --- a/vp9/common/mips/msa/vp9_loopfilter_msa.h +++ b/vpx_dsp/mips/loopfilter_msa.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_COMMON_MIPS_MSA_VP9_LOOPFILTER_MSA_H_ -#define VP9_COMMON_MIPS_MSA_VP9_LOOPFILTER_MSA_H_ +#ifndef VPX_DSP_LOOPFILTER_MSA_H_ +#define VPX_DSP_LOOPFILTER_MSA_H_ #include "vpx_dsp/mips/macros_msa.h" @@ -243,4 +243,4 @@ mask_out = limit_in < (v16u8)mask_out; \ mask_out = __msa_xori_b(mask_out, 0xff); \ } -#endif /* VP9_COMMON_MIPS_MSA_VP9_LOOPFILTER_MSA_H_ */ +#endif /* VPX_DSP_LOOPFILTER_MSA_H_ */ diff --git a/vpx_dsp/vpx_dsp.mk b/vpx_dsp/vpx_dsp.mk index 22d1fed61..4a43c98ff 100644 --- a/vpx_dsp/vpx_dsp.mk +++ b/vpx_dsp/vpx_dsp.mk @@ -13,6 +13,36 @@ DSP_SRCS-yes += vpx_dsp_common.h DSP_SRCS-$(HAVE_MSA) += mips/macros_msa.h +# loop filters +DSP_SRCS-yes += loopfilter.c + +DSP_SRCS-$(ARCH_X86)$(ARCH_X86_64) += x86/loopfilter_sse2.c +DSP_SRCS-$(HAVE_AVX2) += x86/loopfilter_avx2.c +DSP_SRCS-$(HAVE_MMX) += x86/loopfilter_mmx.asm + +DSP_SRCS-$(HAVE_NEON) += arm/loopfilter_neon.c +ifeq ($(HAVE_NEON_ASM),yes) +DSP_SRCS-yes += arm/loopfilter_mb_neon$(ASM) +DSP_SRCS-yes += arm/loopfilter_16_neon$(ASM) +DSP_SRCS-yes += arm/loopfilter_8_neon$(ASM) +DSP_SRCS-yes += arm/loopfilter_4_neon$(ASM) +else +ifeq ($(HAVE_NEON),yes) +DSP_SRCS-yes += arm/loopfilter_16_neon.c +DSP_SRCS-yes += arm/loopfilter_8_neon.c +DSP_SRCS-yes += arm/loopfilter_4_neon.c +endif # HAVE_NEON +endif # HAVE_NEON_ASM + +DSP_SRCS-$(HAVE_MSA) += mips/loopfilter_msa.h +DSP_SRCS-$(HAVE_MSA) += mips/loopfilter_16_msa.c +DSP_SRCS-$(HAVE_MSA) += mips/loopfilter_8_msa.c +DSP_SRCS-$(HAVE_MSA) += mips/loopfilter_4_msa.c + +ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes) +DSP_SRCS-$(HAVE_SSE2) += x86/highbd_loopfilter_sse2.c +endif # CONFIG_VP9_HIGHBITDEPTH + ifeq ($(CONFIG_ENCODERS),yes) DSP_SRCS-yes += sad.c DSP_SRCS-yes += subtract.c diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl index 8e4e96634..90666e048 100644 --- a/vpx_dsp/vpx_dsp_rtcd_defs.pl +++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl @@ -35,6 +35,82 @@ if ($opts{arch} eq "x86_64") { $avx_x86_64 = $avx2_x86_64 = ''; } +# +# Loopfilter +# +add_proto qw/void vp9_lpf_vertical_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; +specialize qw/vp9_lpf_vertical_16 sse2 neon_asm msa/; +$vp9_lpf_vertical_16_neon_asm=vp9_lpf_vertical_16_neon; + +add_proto qw/void vp9_lpf_vertical_16_dual/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; +specialize qw/vp9_lpf_vertical_16_dual sse2 neon_asm msa/; +$vp9_lpf_vertical_16_dual_neon_asm=vp9_lpf_vertical_16_dual_neon; + +add_proto qw/void vp9_lpf_vertical_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"; +specialize qw/vp9_lpf_vertical_8 sse2 neon msa/; + +add_proto qw/void vp9_lpf_vertical_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; +specialize qw/vp9_lpf_vertical_8_dual sse2 neon_asm msa/; +$vp9_lpf_vertical_8_dual_neon_asm=vp9_lpf_vertical_8_dual_neon; + +add_proto qw/void vp9_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"; +specialize qw/vp9_lpf_vertical_4 mmx neon msa/; + +add_proto qw/void vp9_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; +specialize qw/vp9_lpf_vertical_4_dual sse2 neon msa/; + +add_proto qw/void vp9_lpf_horizontal_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"; +specialize qw/vp9_lpf_horizontal_16 sse2 avx2 neon_asm msa/; +$vp9_lpf_horizontal_16_neon_asm=vp9_lpf_horizontal_16_neon; + +add_proto qw/void vp9_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"; +specialize qw/vp9_lpf_horizontal_8 sse2 neon msa/; + +add_proto qw/void vp9_lpf_horizontal_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; +specialize qw/vp9_lpf_horizontal_8_dual sse2 neon_asm msa/; +$vp9_lpf_horizontal_8_dual_neon_asm=vp9_lpf_horizontal_8_dual_neon; + +add_proto qw/void vp9_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"; +specialize qw/vp9_lpf_horizontal_4 mmx neon msa/; + +add_proto qw/void vp9_lpf_horizontal_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; +specialize qw/vp9_lpf_horizontal_4_dual sse2 neon msa/; + +if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { + add_proto qw/void vp9_highbd_lpf_vertical_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; + specialize qw/vp9_highbd_lpf_vertical_16 sse2/; + + add_proto qw/void vp9_highbd_lpf_vertical_16_dual/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; + specialize qw/vp9_highbd_lpf_vertical_16_dual sse2/; + + add_proto qw/void vp9_highbd_lpf_vertical_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd"; + specialize qw/vp9_highbd_lpf_vertical_8 sse2/; + + add_proto qw/void vp9_highbd_lpf_vertical_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd"; + specialize qw/vp9_highbd_lpf_vertical_8_dual sse2/; + + add_proto qw/void vp9_highbd_lpf_vertical_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd"; + specialize qw/vp9_highbd_lpf_vertical_4 sse2/; + + add_proto qw/void vp9_highbd_lpf_vertical_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd"; + specialize qw/vp9_highbd_lpf_vertical_4_dual sse2/; + + add_proto qw/void vp9_highbd_lpf_horizontal_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd"; + specialize qw/vp9_highbd_lpf_horizontal_16 sse2/; + + add_proto qw/void vp9_highbd_lpf_horizontal_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd"; + specialize qw/vp9_highbd_lpf_horizontal_8 sse2/; + + add_proto qw/void vp9_highbd_lpf_horizontal_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd"; + specialize qw/vp9_highbd_lpf_horizontal_8_dual sse2/; + + add_proto qw/void vp9_highbd_lpf_horizontal_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd"; + specialize qw/vp9_highbd_lpf_horizontal_4 sse2/; + + add_proto qw/void vp9_highbd_lpf_horizontal_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd"; + specialize qw/vp9_highbd_lpf_horizontal_4_dual sse2/; +} # CONFIG_VP9_HIGHBITDEPTH + if (vpx_config("CONFIG_ENCODERS") eq "yes") { # # Block subtraction diff --git a/vp9/common/x86/vp9_high_loopfilter_intrin_sse2.c b/vpx_dsp/x86/highbd_loopfilter_sse2.c similarity index 99% rename from vp9/common/x86/vp9_high_loopfilter_intrin_sse2.c rename to vpx_dsp/x86/highbd_loopfilter_sse2.c index b40669c63..8cddf9b69 100644 --- a/vp9/common/x86/vp9_high_loopfilter_intrin_sse2.c +++ b/vpx_dsp/x86/highbd_loopfilter_sse2.c @@ -10,7 +10,7 @@ #include // SSE2 -#include "./vp9_rtcd.h" +#include "./vpx_dsp_rtcd.h" #include "vpx_ports/mem.h" #include "vp9/common/vp9_loopfilter.h" #include "vpx_ports/emmintrin_compat.h" diff --git a/vp9/common/x86/vp9_loopfilter_intrin_avx2.c b/vpx_dsp/x86/loopfilter_avx2.c similarity index 99% rename from vp9/common/x86/vp9_loopfilter_intrin_avx2.c rename to vpx_dsp/x86/loopfilter_avx2.c index 770a65f4c..36f59c9e5 100644 --- a/vp9/common/x86/vp9_loopfilter_intrin_avx2.c +++ b/vpx_dsp/x86/loopfilter_avx2.c @@ -10,7 +10,7 @@ #include /* AVX2 */ -#include "./vp9_rtcd.h" +#include "./vpx_dsp_rtcd.h" #include "vpx_ports/mem.h" static void mb_lpf_horizontal_edge_w_avx2_8(unsigned char *s, int p, diff --git a/vp9/common/x86/vp9_loopfilter_mmx.asm b/vpx_dsp/x86/loopfilter_mmx.asm similarity index 100% rename from vp9/common/x86/vp9_loopfilter_mmx.asm rename to vpx_dsp/x86/loopfilter_mmx.asm diff --git a/vp9/common/x86/vp9_loopfilter_intrin_sse2.c b/vpx_dsp/x86/loopfilter_sse2.c similarity index 99% rename from vp9/common/x86/vp9_loopfilter_intrin_sse2.c rename to vpx_dsp/x86/loopfilter_sse2.c index fe8af5463..977f46b6f 100644 --- a/vp9/common/x86/vp9_loopfilter_intrin_sse2.c +++ b/vpx_dsp/x86/loopfilter_sse2.c @@ -10,8 +10,8 @@ #include // SSE2 -#include "./vp9_rtcd.h" -#include "vp9/common/vp9_loopfilter.h" +#include "./vpx_dsp_rtcd.h" +#include "vpx_ports/mem.h" #include "vpx_ports/emmintrin_compat.h" static INLINE __m128i abs_diff(__m128i a, __m128i b) { -- 2.40.0