]> granicus.if.org Git - libvpx/commitdiff
Code refactor on InterpKernel
authorZoe Liu <zoeliu@google.com>
Wed, 22 Jul 2015 17:40:42 +0000 (10:40 -0700)
committerZoe Liu <zoeliu@google.com>
Fri, 31 Jul 2015 17:27:33 +0000 (10:27 -0700)
It in essence refactors the code for both the interpolation
filtering and the convolution. This change includes the moving
of all the files as well as the changing of the code from vp9_
prefix to vpx_ prefix accordingly, for underneath architectures:
(1) x86;
(2) arm/neon; and
(3) mips/msa.
The work on mips/drsp2 will be done in a separate change list.

Change-Id: Ic3ce7fb7f81210db7628b373c73553db68793c46

57 files changed:
test/convolve_test.cc
vp9/common/arm/neon/vp9_convolve8_avg_neon.c [deleted file]
vp9/common/arm/neon/vp9_convolve8_neon.c [deleted file]
vp9/common/arm/neon/vp9_convolve_avg_neon.c [deleted file]
vp9/common/arm/neon/vp9_copy_neon.c [deleted file]
vp9/common/vp9_entropymode.h
vp9/common/vp9_filter.h
vp9/common/vp9_idct.h
vp9/common/vp9_reconinter.c
vp9/common/vp9_reconinter.h
vp9/common/vp9_reconintra.c
vp9/common/vp9_rtcd_defs.pl
vp9/common/vp9_scale.c
vp9/common/vp9_scale.h
vp9/decoder/vp9_decodeframe.c
vp9/encoder/vp9_blockiness.c
vp9/encoder/vp9_denoiser.c
vp9/encoder/vp9_encoder.c
vp9/encoder/vp9_pickmode.c
vp9/encoder/vp9_resize.c
vp9/vp9_common.mk
vpx_dsp/arm/vpx_convolve8_avg_neon.c [new file with mode: 0644]
vpx_dsp/arm/vpx_convolve8_avg_neon_asm.asm [moved from vp9/common/arm/neon/vp9_convolve8_avg_neon_asm.asm with 92% similarity]
vpx_dsp/arm/vpx_convolve8_neon.c [new file with mode: 0644]
vpx_dsp/arm/vpx_convolve8_neon_asm.asm [moved from vp9/common/arm/neon/vp9_convolve8_neon_asm.asm with 92% similarity]
vpx_dsp/arm/vpx_convolve_avg_neon.c [new file with mode: 0644]
vpx_dsp/arm/vpx_convolve_avg_neon_asm.asm [moved from vp9/common/arm/neon/vp9_convolve_avg_neon_asm.asm with 98% similarity]
vpx_dsp/arm/vpx_convolve_copy_neon.c [new file with mode: 0644]
vpx_dsp/arm/vpx_convolve_copy_neon_asm.asm [moved from vp9/common/arm/neon/vp9_copy_neon_asm.asm with 97% similarity]
vpx_dsp/arm/vpx_convolve_neon.c [moved from vp9/common/arm/neon/vp9_convolve_neon.c with 85% similarity]
vpx_dsp/loopfilter.c
vpx_dsp/mips/vpx_convolve8_avg_horiz_msa.c [moved from vp9/common/mips/msa/vp9_convolve8_avg_horiz_msa.c with 98% similarity]
vpx_dsp/mips/vpx_convolve8_avg_msa.c [moved from vp9/common/mips/msa/vp9_convolve8_avg_msa.c with 98% similarity]
vpx_dsp/mips/vpx_convolve8_avg_vert_msa.c [moved from vp9/common/mips/msa/vp9_convolve8_avg_vert_msa.c with 98% similarity]
vpx_dsp/mips/vpx_convolve8_horiz_msa.c [moved from vp9/common/mips/msa/vp9_convolve8_horiz_msa.c with 98% similarity]
vpx_dsp/mips/vpx_convolve8_msa.c [moved from vp9/common/mips/msa/vp9_convolve8_msa.c with 98% similarity]
vpx_dsp/mips/vpx_convolve8_vert_msa.c [moved from vp9/common/mips/msa/vp9_convolve8_vert_msa.c with 98% similarity]
vpx_dsp/mips/vpx_convolve_avg_msa.c [moved from vp9/common/mips/msa/vp9_convolve_avg_msa.c with 99% similarity]
vpx_dsp/mips/vpx_convolve_copy_msa.c [moved from vp9/common/mips/msa/vp9_convolve_copy_msa.c with 99% similarity]
vpx_dsp/mips/vpx_convolve_msa.h [moved from vp9/common/mips/msa/vp9_convolve_msa.h with 97% similarity]
vpx_dsp/vpx_convolve.c [moved from vp9/common/vp9_convolve.c with 93% similarity]
vpx_dsp/vpx_convolve.h [moved from vp9/common/vp9_convolve.h with 92% similarity]
vpx_dsp/vpx_dsp.mk
vpx_dsp/vpx_dsp_common.h
vpx_dsp/vpx_dsp_rtcd_defs.pl
vpx_dsp/vpx_filter.h [new file with mode: 0644]
vpx_dsp/x86/convolve.h [moved from vp9/common/x86/convolve.h with 85% similarity]
vpx_dsp/x86/vpx_asm_stubs.c [moved from vp9/common/x86/vp9_asm_stubs.c with 60% similarity]
vpx_dsp/x86/vpx_convolve_copy_sse2.asm [moved from vp9/common/x86/vp9_copy_sse2.asm with 99% similarity]
vpx_dsp/x86/vpx_high_subpixel_8t_sse2.asm [moved from vp9/common/x86/vp9_high_subpixel_8t_sse2.asm with 94% similarity]
vpx_dsp/x86/vpx_high_subpixel_bilinear_sse2.asm [moved from vp9/common/x86/vp9_high_subpixel_bilinear_sse2.asm with 89% similarity]
vpx_dsp/x86/vpx_subpixel_8t_intrin_avx2.c [moved from vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c with 92% similarity]
vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c [moved from vp9/common/x86/vp9_subpixel_8t_intrin_ssse3.c with 89% similarity]
vpx_dsp/x86/vpx_subpixel_8t_sse2.asm [moved from vp9/common/x86/vp9_subpixel_8t_sse2.asm with 94% similarity]
vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm [moved from vp9/common/x86/vp9_subpixel_8t_ssse3.asm with 95% similarity]
vpx_dsp/x86/vpx_subpixel_bilinear_sse2.asm [moved from vp9/common/x86/vp9_subpixel_bilinear_sse2.asm with 89% similarity]
vpx_dsp/x86/vpx_subpixel_bilinear_ssse3.asm [moved from vp9/common/x86/vp9_subpixel_bilinear_ssse3.asm with 88% similarity]

index b66da683cb0c2b679f32b5bb0b588661cb3aaa7b..90f9579a39ce162c44ef0c7e968867f93d6ca771 100644 (file)
 
 #include "./vpx_config.h"
 #include "./vp9_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
 #include "test/acm_random.h"
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
 #include "test/util.h"
 #include "vp9/common/vp9_common.h"
 #include "vp9/common/vp9_filter.h"
+#include "vpx_dsp/vpx_dsp_common.h"
+#include "vpx_dsp/vpx_filter.h"
 #include "vpx_mem/vpx_mem.h"
 #include "vpx_ports/mem.h"
 
@@ -945,7 +948,7 @@ void wrap_convolve8_horiz_sse2_8(const uint8_t *src, ptrdiff_t src_stride,
                                  const int16_t *filter_y,
                                  int filter_y_stride,
                                  int w, int h) {
-  vp9_highbd_convolve8_horiz_sse2(src, src_stride, dst, dst_stride, filter_x,
+  vpx_highbd_convolve8_horiz_sse2(src, src_stride, dst, dst_stride, filter_x,
                                   filter_x_stride, filter_y, filter_y_stride,
                                   w, h, 8);
 }
@@ -957,7 +960,7 @@ void wrap_convolve8_avg_horiz_sse2_8(const uint8_t *src, ptrdiff_t src_stride,
                                      const int16_t *filter_y,
                                      int filter_y_stride,
                                      int w, int h) {
-  vp9_highbd_convolve8_avg_horiz_sse2(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_avg_horiz_sse2(src, src_stride, dst, dst_stride,
                                       filter_x, filter_x_stride,
                                       filter_y, filter_y_stride, w, h, 8);
 }
@@ -969,7 +972,7 @@ void wrap_convolve8_vert_sse2_8(const uint8_t *src, ptrdiff_t src_stride,
                                 const int16_t *filter_y,
                                 int filter_y_stride,
                                 int w, int h) {
-  vp9_highbd_convolve8_vert_sse2(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_vert_sse2(src, src_stride, dst, dst_stride,
                                  filter_x, filter_x_stride,
                                  filter_y, filter_y_stride, w, h, 8);
 }
@@ -981,7 +984,7 @@ void wrap_convolve8_avg_vert_sse2_8(const uint8_t *src, ptrdiff_t src_stride,
                                     const int16_t *filter_y,
                                     int filter_y_stride,
                                     int w, int h) {
-  vp9_highbd_convolve8_avg_vert_sse2(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_avg_vert_sse2(src, src_stride, dst, dst_stride,
                                      filter_x, filter_x_stride,
                                      filter_y, filter_y_stride, w, h, 8);
 }
@@ -993,7 +996,7 @@ void wrap_convolve8_sse2_8(const uint8_t *src, ptrdiff_t src_stride,
                            const int16_t *filter_y,
                            int filter_y_stride,
                            int w, int h) {
-  vp9_highbd_convolve8_sse2(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_sse2(src, src_stride, dst, dst_stride,
                             filter_x, filter_x_stride,
                             filter_y, filter_y_stride, w, h, 8);
 }
@@ -1005,7 +1008,7 @@ void wrap_convolve8_avg_sse2_8(const uint8_t *src, ptrdiff_t src_stride,
                                const int16_t *filter_y,
                                int filter_y_stride,
                                int w, int h) {
-  vp9_highbd_convolve8_avg_sse2(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_avg_sse2(src, src_stride, dst, dst_stride,
                                 filter_x, filter_x_stride,
                                 filter_y, filter_y_stride, w, h, 8);
 }
@@ -1017,7 +1020,7 @@ void wrap_convolve8_horiz_sse2_10(const uint8_t *src, ptrdiff_t src_stride,
                                   const int16_t *filter_y,
                                   int filter_y_stride,
                                   int w, int h) {
-  vp9_highbd_convolve8_horiz_sse2(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_horiz_sse2(src, src_stride, dst, dst_stride,
                                   filter_x, filter_x_stride,
                                   filter_y, filter_y_stride, w, h, 10);
 }
@@ -1029,7 +1032,7 @@ void wrap_convolve8_avg_horiz_sse2_10(const uint8_t *src, ptrdiff_t src_stride,
                                       const int16_t *filter_y,
                                       int filter_y_stride,
                                       int w, int h) {
-  vp9_highbd_convolve8_avg_horiz_sse2(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_avg_horiz_sse2(src, src_stride, dst, dst_stride,
                                       filter_x, filter_x_stride,
                                       filter_y, filter_y_stride, w, h, 10);
 }
@@ -1041,7 +1044,7 @@ void wrap_convolve8_vert_sse2_10(const uint8_t *src, ptrdiff_t src_stride,
                                  const int16_t *filter_y,
                                  int filter_y_stride,
                                  int w, int h) {
-  vp9_highbd_convolve8_vert_sse2(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_vert_sse2(src, src_stride, dst, dst_stride,
                                  filter_x, filter_x_stride,
                                  filter_y, filter_y_stride, w, h, 10);
 }
@@ -1053,7 +1056,7 @@ void wrap_convolve8_avg_vert_sse2_10(const uint8_t *src, ptrdiff_t src_stride,
                                      const int16_t *filter_y,
                                      int filter_y_stride,
                                      int w, int h) {
-  vp9_highbd_convolve8_avg_vert_sse2(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_avg_vert_sse2(src, src_stride, dst, dst_stride,
                                      filter_x, filter_x_stride,
                                      filter_y, filter_y_stride, w, h, 10);
 }
@@ -1065,7 +1068,7 @@ void wrap_convolve8_sse2_10(const uint8_t *src, ptrdiff_t src_stride,
                             const int16_t *filter_y,
                             int filter_y_stride,
                             int w, int h) {
-  vp9_highbd_convolve8_sse2(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_sse2(src, src_stride, dst, dst_stride,
                             filter_x, filter_x_stride,
                             filter_y, filter_y_stride, w, h, 10);
 }
@@ -1077,7 +1080,7 @@ void wrap_convolve8_avg_sse2_10(const uint8_t *src, ptrdiff_t src_stride,
                                 const int16_t *filter_y,
                                 int filter_y_stride,
                                 int w, int h) {
-  vp9_highbd_convolve8_avg_sse2(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_avg_sse2(src, src_stride, dst, dst_stride,
                                 filter_x, filter_x_stride,
                                 filter_y, filter_y_stride, w, h, 10);
 }
@@ -1089,7 +1092,7 @@ void wrap_convolve8_horiz_sse2_12(const uint8_t *src, ptrdiff_t src_stride,
                                   const int16_t *filter_y,
                                   int filter_y_stride,
                                   int w, int h) {
-  vp9_highbd_convolve8_horiz_sse2(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_horiz_sse2(src, src_stride, dst, dst_stride,
                                   filter_x, filter_x_stride,
                                   filter_y, filter_y_stride, w, h, 12);
 }
@@ -1101,7 +1104,7 @@ void wrap_convolve8_avg_horiz_sse2_12(const uint8_t *src, ptrdiff_t src_stride,
                                       const int16_t *filter_y,
                                       int filter_y_stride,
                                       int w, int h) {
-  vp9_highbd_convolve8_avg_horiz_sse2(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_avg_horiz_sse2(src, src_stride, dst, dst_stride,
                                       filter_x, filter_x_stride,
                                       filter_y, filter_y_stride, w, h, 12);
 }
@@ -1113,7 +1116,7 @@ void wrap_convolve8_vert_sse2_12(const uint8_t *src, ptrdiff_t src_stride,
                                  const int16_t *filter_y,
                                  int filter_y_stride,
                                  int w, int h) {
-  vp9_highbd_convolve8_vert_sse2(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_vert_sse2(src, src_stride, dst, dst_stride,
                                  filter_x, filter_x_stride,
                                  filter_y, filter_y_stride, w, h, 12);
 }
@@ -1125,7 +1128,7 @@ void wrap_convolve8_avg_vert_sse2_12(const uint8_t *src, ptrdiff_t src_stride,
                                      const int16_t *filter_y,
                                      int filter_y_stride,
                                      int w, int h) {
-  vp9_highbd_convolve8_avg_vert_sse2(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_avg_vert_sse2(src, src_stride, dst, dst_stride,
                                      filter_x, filter_x_stride,
                                      filter_y, filter_y_stride, w, h, 12);
 }
@@ -1137,7 +1140,7 @@ void wrap_convolve8_sse2_12(const uint8_t *src, ptrdiff_t src_stride,
                             const int16_t *filter_y,
                             int filter_y_stride,
                             int w, int h) {
-  vp9_highbd_convolve8_sse2(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_sse2(src, src_stride, dst, dst_stride,
                             filter_x, filter_x_stride,
                             filter_y, filter_y_stride, w, h, 12);
 }
@@ -1149,7 +1152,7 @@ void wrap_convolve8_avg_sse2_12(const uint8_t *src, ptrdiff_t src_stride,
                                 const int16_t *filter_y,
                                 int filter_y_stride,
                                 int w, int h) {
-  vp9_highbd_convolve8_avg_sse2(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_avg_sse2(src, src_stride, dst, dst_stride,
                                 filter_x, filter_x_stride,
                                 filter_y, filter_y_stride, w, h, 12);
 }
@@ -1162,7 +1165,7 @@ void wrap_convolve_copy_c_8(const uint8_t *src, ptrdiff_t src_stride,
                             const int16_t *filter_y,
                             int filter_y_stride,
                             int w, int h) {
-  vp9_highbd_convolve_copy_c(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve_copy_c(src, src_stride, dst, dst_stride,
                              filter_x, filter_x_stride,
                              filter_y, filter_y_stride, w, h, 8);
 }
@@ -1174,7 +1177,7 @@ void wrap_convolve_avg_c_8(const uint8_t *src, ptrdiff_t src_stride,
                            const int16_t *filter_y,
                            int filter_y_stride,
                            int w, int h) {
-  vp9_highbd_convolve_avg_c(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve_avg_c(src, src_stride, dst, dst_stride,
                             filter_x, filter_x_stride,
                             filter_y, filter_y_stride, w, h, 8);
 }
@@ -1186,7 +1189,7 @@ void wrap_convolve8_horiz_c_8(const uint8_t *src, ptrdiff_t src_stride,
                               const int16_t *filter_y,
                               int filter_y_stride,
                               int w, int h) {
-  vp9_highbd_convolve8_horiz_c(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_horiz_c(src, src_stride, dst, dst_stride,
                                filter_x, filter_x_stride,
                                filter_y, filter_y_stride, w, h, 8);
 }
@@ -1198,7 +1201,7 @@ void wrap_convolve8_avg_horiz_c_8(const uint8_t *src, ptrdiff_t src_stride,
                                   const int16_t *filter_y,
                                   int filter_y_stride,
                                   int w, int h) {
-  vp9_highbd_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride,
                                    filter_x, filter_x_stride,
                                    filter_y, filter_y_stride, w, h, 8);
 }
@@ -1210,7 +1213,7 @@ void wrap_convolve8_vert_c_8(const uint8_t *src, ptrdiff_t src_stride,
                              const int16_t *filter_y,
                              int filter_y_stride,
                              int w, int h) {
-  vp9_highbd_convolve8_vert_c(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_vert_c(src, src_stride, dst, dst_stride,
                               filter_x, filter_x_stride,
                               filter_y, filter_y_stride, w, h, 8);
 }
@@ -1222,7 +1225,7 @@ void wrap_convolve8_avg_vert_c_8(const uint8_t *src, ptrdiff_t src_stride,
                                  const int16_t *filter_y,
                                  int filter_y_stride,
                                  int w, int h) {
-  vp9_highbd_convolve8_avg_vert_c(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_avg_vert_c(src, src_stride, dst, dst_stride,
                                   filter_x, filter_x_stride,
                                   filter_y, filter_y_stride, w, h, 8);
 }
@@ -1234,7 +1237,7 @@ void wrap_convolve8_c_8(const uint8_t *src, ptrdiff_t src_stride,
                         const int16_t *filter_y,
                         int filter_y_stride,
                         int w, int h) {
-  vp9_highbd_convolve8_c(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_c(src, src_stride, dst, dst_stride,
                          filter_x, filter_x_stride,
                          filter_y, filter_y_stride, w, h, 8);
 }
@@ -1246,7 +1249,7 @@ void wrap_convolve8_avg_c_8(const uint8_t *src, ptrdiff_t src_stride,
                             const int16_t *filter_y,
                             int filter_y_stride,
                             int w, int h) {
-  vp9_highbd_convolve8_avg_c(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_avg_c(src, src_stride, dst, dst_stride,
                              filter_x, filter_x_stride,
                              filter_y, filter_y_stride, w, h, 8);
 }
@@ -1258,7 +1261,7 @@ void wrap_convolve_copy_c_10(const uint8_t *src, ptrdiff_t src_stride,
                              const int16_t *filter_y,
                              int filter_y_stride,
                              int w, int h) {
-  vp9_highbd_convolve_copy_c(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve_copy_c(src, src_stride, dst, dst_stride,
                              filter_x, filter_x_stride,
                              filter_y, filter_y_stride, w, h, 10);
 }
@@ -1270,7 +1273,7 @@ void wrap_convolve_avg_c_10(const uint8_t *src, ptrdiff_t src_stride,
                             const int16_t *filter_y,
                             int filter_y_stride,
                             int w, int h) {
-  vp9_highbd_convolve_avg_c(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve_avg_c(src, src_stride, dst, dst_stride,
                             filter_x, filter_x_stride,
                             filter_y, filter_y_stride, w, h, 10);
 }
@@ -1282,7 +1285,7 @@ void wrap_convolve8_horiz_c_10(const uint8_t *src, ptrdiff_t src_stride,
                                const int16_t *filter_y,
                                int filter_y_stride,
                                int w, int h) {
-  vp9_highbd_convolve8_horiz_c(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_horiz_c(src, src_stride, dst, dst_stride,
                                filter_x, filter_x_stride,
                                filter_y, filter_y_stride, w, h, 10);
 }
@@ -1294,7 +1297,7 @@ void wrap_convolve8_avg_horiz_c_10(const uint8_t *src, ptrdiff_t src_stride,
                                    const int16_t *filter_y,
                                    int filter_y_stride,
                                    int w, int h) {
-  vp9_highbd_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride,
                                    filter_x, filter_x_stride,
                                    filter_y, filter_y_stride, w, h, 10);
 }
@@ -1306,7 +1309,7 @@ void wrap_convolve8_vert_c_10(const uint8_t *src, ptrdiff_t src_stride,
                               const int16_t *filter_y,
                               int filter_y_stride,
                               int w, int h) {
-  vp9_highbd_convolve8_vert_c(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_vert_c(src, src_stride, dst, dst_stride,
                               filter_x, filter_x_stride,
                               filter_y, filter_y_stride, w, h, 10);
 }
@@ -1318,7 +1321,7 @@ void wrap_convolve8_avg_vert_c_10(const uint8_t *src, ptrdiff_t src_stride,
                                   const int16_t *filter_y,
                                   int filter_y_stride,
                                   int w, int h) {
-  vp9_highbd_convolve8_avg_vert_c(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_avg_vert_c(src, src_stride, dst, dst_stride,
                                   filter_x, filter_x_stride,
                                   filter_y, filter_y_stride, w, h, 10);
 }
@@ -1330,7 +1333,7 @@ void wrap_convolve8_c_10(const uint8_t *src, ptrdiff_t src_stride,
                          const int16_t *filter_y,
                          int filter_y_stride,
                          int w, int h) {
-  vp9_highbd_convolve8_c(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_c(src, src_stride, dst, dst_stride,
                          filter_x, filter_x_stride,
                          filter_y, filter_y_stride, w, h, 10);
 }
@@ -1342,7 +1345,7 @@ void wrap_convolve8_avg_c_10(const uint8_t *src, ptrdiff_t src_stride,
                              const int16_t *filter_y,
                              int filter_y_stride,
                              int w, int h) {
-  vp9_highbd_convolve8_avg_c(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_avg_c(src, src_stride, dst, dst_stride,
                              filter_x, filter_x_stride,
                              filter_y, filter_y_stride, w, h, 10);
 }
@@ -1354,7 +1357,7 @@ void wrap_convolve_copy_c_12(const uint8_t *src, ptrdiff_t src_stride,
                              const int16_t *filter_y,
                              int filter_y_stride,
                              int w, int h) {
-  vp9_highbd_convolve_copy_c(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve_copy_c(src, src_stride, dst, dst_stride,
                              filter_x, filter_x_stride,
                              filter_y, filter_y_stride, w, h, 12);
 }
@@ -1366,7 +1369,7 @@ void wrap_convolve_avg_c_12(const uint8_t *src, ptrdiff_t src_stride,
                             const int16_t *filter_y,
                             int filter_y_stride,
                             int w, int h) {
-  vp9_highbd_convolve_avg_c(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve_avg_c(src, src_stride, dst, dst_stride,
                             filter_x, filter_x_stride,
                             filter_y, filter_y_stride, w, h, 12);
 }
@@ -1378,7 +1381,7 @@ void wrap_convolve8_horiz_c_12(const uint8_t *src, ptrdiff_t src_stride,
                                const int16_t *filter_y,
                                int filter_y_stride,
                                int w, int h) {
-  vp9_highbd_convolve8_horiz_c(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_horiz_c(src, src_stride, dst, dst_stride,
                                filter_x, filter_x_stride,
                                filter_y, filter_y_stride, w, h, 12);
 }
@@ -1390,7 +1393,7 @@ void wrap_convolve8_avg_horiz_c_12(const uint8_t *src, ptrdiff_t src_stride,
                                    const int16_t *filter_y,
                                    int filter_y_stride,
                                    int w, int h) {
-  vp9_highbd_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride,
                                    filter_x, filter_x_stride,
                                    filter_y, filter_y_stride, w, h, 12);
 }
@@ -1402,7 +1405,7 @@ void wrap_convolve8_vert_c_12(const uint8_t *src, ptrdiff_t src_stride,
                               const int16_t *filter_y,
                               int filter_y_stride,
                               int w, int h) {
-  vp9_highbd_convolve8_vert_c(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_vert_c(src, src_stride, dst, dst_stride,
                               filter_x, filter_x_stride,
                               filter_y, filter_y_stride, w, h, 12);
 }
@@ -1414,7 +1417,7 @@ void wrap_convolve8_avg_vert_c_12(const uint8_t *src, ptrdiff_t src_stride,
                                   const int16_t *filter_y,
                                   int filter_y_stride,
                                   int w, int h) {
-  vp9_highbd_convolve8_avg_vert_c(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_avg_vert_c(src, src_stride, dst, dst_stride,
                                   filter_x, filter_x_stride,
                                   filter_y, filter_y_stride, w, h, 12);
 }
@@ -1426,7 +1429,7 @@ void wrap_convolve8_c_12(const uint8_t *src, ptrdiff_t src_stride,
                          const int16_t *filter_y,
                          int filter_y_stride,
                          int w, int h) {
-  vp9_highbd_convolve8_c(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_c(src, src_stride, dst, dst_stride,
                          filter_x, filter_x_stride,
                          filter_y, filter_y_stride, w, h, 12);
 }
@@ -1438,7 +1441,7 @@ void wrap_convolve8_avg_c_12(const uint8_t *src, ptrdiff_t src_stride,
                              const int16_t *filter_y,
                              int filter_y_stride,
                              int w, int h) {
-  vp9_highbd_convolve8_avg_c(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_avg_c(src, src_stride, dst, dst_stride,
                              filter_x, filter_x_stride,
                              filter_y, filter_y_stride, w, h, 12);
 }
@@ -1504,10 +1507,10 @@ INSTANTIATE_TEST_CASE_P(C_12, ConvolveTest, ::testing::Values(
 #else
 
 const ConvolveFunctions convolve8_c(
-    vp9_convolve_copy_c, vp9_convolve_avg_c,
-    vp9_convolve8_horiz_c, vp9_convolve8_avg_horiz_c,
-    vp9_convolve8_vert_c, vp9_convolve8_avg_vert_c,
-    vp9_convolve8_c, vp9_convolve8_avg_c, 0);
+    vpx_convolve_copy_c, vpx_convolve_avg_c,
+    vpx_convolve8_horiz_c, vpx_convolve8_avg_horiz_c,
+    vpx_convolve8_vert_c, vpx_convolve8_avg_vert_c,
+    vpx_convolve8_c, vpx_convolve8_avg_c, 0);
 
 INSTANTIATE_TEST_CASE_P(C, ConvolveTest, ::testing::Values(
     make_tuple(4, 4, &convolve8_c),
@@ -1585,13 +1588,13 @@ INSTANTIATE_TEST_CASE_P(SSE2, ConvolveTest, ::testing::Values(
 #else
 const ConvolveFunctions convolve8_sse2(
 #if CONFIG_USE_X86INC
-    vp9_convolve_copy_sse2, vp9_convolve_avg_sse2,
+    vpx_convolve_copy_sse2, vpx_convolve_avg_sse2,
 #else
-    vp9_convolve_copy_c, vp9_convolve_avg_c,
+    vpx_convolve_copy_c, vpx_convolve_avg_c,
 #endif  // CONFIG_USE_X86INC
-    vp9_convolve8_horiz_sse2, vp9_convolve8_avg_horiz_sse2,
-    vp9_convolve8_vert_sse2, vp9_convolve8_avg_vert_sse2,
-    vp9_convolve8_sse2, vp9_convolve8_avg_sse2, 0);
+    vpx_convolve8_horiz_sse2, vpx_convolve8_avg_horiz_sse2,
+    vpx_convolve8_vert_sse2, vpx_convolve8_avg_vert_sse2,
+    vpx_convolve8_sse2, vpx_convolve8_avg_sse2, 0);
 
 INSTANTIATE_TEST_CASE_P(SSE2, ConvolveTest, ::testing::Values(
     make_tuple(4, 4, &convolve8_sse2),
@@ -1612,10 +1615,10 @@ INSTANTIATE_TEST_CASE_P(SSE2, ConvolveTest, ::testing::Values(
 
 #if HAVE_SSSE3
 const ConvolveFunctions convolve8_ssse3(
-    vp9_convolve_copy_c, vp9_convolve_avg_c,
-    vp9_convolve8_horiz_ssse3, vp9_convolve8_avg_horiz_ssse3,
-    vp9_convolve8_vert_ssse3, vp9_convolve8_avg_vert_ssse3,
-    vp9_convolve8_ssse3, vp9_convolve8_avg_ssse3, 0);
+    vpx_convolve_copy_c, vpx_convolve_avg_c,
+    vpx_convolve8_horiz_ssse3, vpx_convolve8_avg_horiz_ssse3,
+    vpx_convolve8_vert_ssse3, vpx_convolve8_avg_vert_ssse3,
+    vpx_convolve8_ssse3, vpx_convolve8_avg_ssse3, 0);
 
 INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest, ::testing::Values(
     make_tuple(4, 4, &convolve8_ssse3),
@@ -1635,10 +1638,10 @@ INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest, ::testing::Values(
 
 #if HAVE_AVX2 && HAVE_SSSE3
 const ConvolveFunctions convolve8_avx2(
-    vp9_convolve_copy_c, vp9_convolve_avg_c,
-    vp9_convolve8_horiz_avx2, vp9_convolve8_avg_horiz_ssse3,
-    vp9_convolve8_vert_avx2, vp9_convolve8_avg_vert_ssse3,
-    vp9_convolve8_avx2, vp9_convolve8_avg_ssse3, 0);
+    vpx_convolve_copy_c, vpx_convolve_avg_c,
+    vpx_convolve8_horiz_avx2, vpx_convolve8_avg_horiz_ssse3,
+    vpx_convolve8_vert_avx2, vpx_convolve8_avg_vert_ssse3,
+    vpx_convolve8_avx2, vpx_convolve8_avg_ssse3, 0);
 
 INSTANTIATE_TEST_CASE_P(AVX2, ConvolveTest, ::testing::Values(
     make_tuple(4, 4, &convolve8_avx2),
@@ -1659,16 +1662,16 @@ INSTANTIATE_TEST_CASE_P(AVX2, ConvolveTest, ::testing::Values(
 #if HAVE_NEON
 #if HAVE_NEON_ASM
 const ConvolveFunctions convolve8_neon(
-    vp9_convolve_copy_neon, vp9_convolve_avg_neon,
-    vp9_convolve8_horiz_neon, vp9_convolve8_avg_horiz_neon,
-    vp9_convolve8_vert_neon, vp9_convolve8_avg_vert_neon,
-    vp9_convolve8_neon, vp9_convolve8_avg_neon, 0);
+    vpx_convolve_copy_neon, vpx_convolve_avg_neon,
+    vpx_convolve8_horiz_neon, vpx_convolve8_avg_horiz_neon,
+    vpx_convolve8_vert_neon, vpx_convolve8_avg_vert_neon,
+    vpx_convolve8_neon, vpx_convolve8_avg_neon, 0);
 #else  // HAVE_NEON
 const ConvolveFunctions convolve8_neon(
-    vp9_convolve_copy_neon, vp9_convolve_avg_neon,
-    vp9_convolve8_horiz_neon, vp9_convolve8_avg_horiz_neon,
-    vp9_convolve8_vert_neon, vp9_convolve8_avg_vert_neon,
-    vp9_convolve8_neon, vp9_convolve8_avg_neon, 0);
+    vpx_convolve_copy_neon, vpx_convolve_avg_neon,
+    vpx_convolve8_horiz_neon, vpx_convolve8_avg_horiz_neon,
+    vpx_convolve8_vert_neon, vpx_convolve8_avg_vert_neon,
+    vpx_convolve8_neon, vpx_convolve8_avg_neon, 0);
 #endif  // HAVE_NEON_ASM
 
 INSTANTIATE_TEST_CASE_P(NEON, ConvolveTest, ::testing::Values(
@@ -1689,10 +1692,10 @@ INSTANTIATE_TEST_CASE_P(NEON, ConvolveTest, ::testing::Values(
 
 #if HAVE_DSPR2
 const ConvolveFunctions convolve8_dspr2(
-    vp9_convolve_copy_dspr2, vp9_convolve_avg_dspr2,
-    vp9_convolve8_horiz_dspr2, vp9_convolve8_avg_horiz_dspr2,
-    vp9_convolve8_vert_dspr2, vp9_convolve8_avg_vert_dspr2,
-    vp9_convolve8_dspr2, vp9_convolve8_avg_dspr2, 0);
+    vpx_convolve_copy_dspr2, vpx_convolve_avg_dspr2,
+    vpx_convolve8_horiz_dspr2, vpx_convolve8_avg_horiz_dspr2,
+    vpx_convolve8_vert_dspr2, vpx_convolve8_avg_vert_dspr2,
+    vpx_convolve8_dspr2, vpx_convolve8_avg_dspr2, 0);
 
 INSTANTIATE_TEST_CASE_P(DSPR2, ConvolveTest, ::testing::Values(
     make_tuple(4, 4, &convolve8_dspr2),
@@ -1712,10 +1715,10 @@ INSTANTIATE_TEST_CASE_P(DSPR2, ConvolveTest, ::testing::Values(
 
 #if HAVE_MSA
 const ConvolveFunctions convolve8_msa(
-    vp9_convolve_copy_msa, vp9_convolve_avg_msa,
-    vp9_convolve8_horiz_msa, vp9_convolve8_avg_horiz_msa,
-    vp9_convolve8_vert_msa, vp9_convolve8_avg_vert_msa,
-    vp9_convolve8_msa, vp9_convolve8_avg_msa, 0);
+    vpx_convolve_copy_msa, vpx_convolve_avg_msa,
+    vpx_convolve8_horiz_msa, vpx_convolve8_avg_horiz_msa,
+    vpx_convolve8_vert_msa, vpx_convolve8_avg_vert_msa,
+    vpx_convolve8_msa, vpx_convolve8_avg_msa, 0);
 
 INSTANTIATE_TEST_CASE_P(MSA, ConvolveTest, ::testing::Values(
     make_tuple(4, 4, &convolve8_msa),
diff --git a/vp9/common/arm/neon/vp9_convolve8_avg_neon.c b/vp9/common/arm/neon/vp9_convolve8_avg_neon.c
deleted file mode 100644 (file)
index dd569d3..0000000
+++ /dev/null
@@ -1,390 +0,0 @@
-/*
- *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <stddef.h>
-#include <arm_neon.h>
-
-#include "./vpx_config.h"
-#include "vpx_ports/mem.h"
-
-void vp9_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
-                               uint8_t *dst, ptrdiff_t dst_stride,
-                               const int16_t *filter_x, int x_step_q4,
-                               const int16_t *filter_y, int y_step_q4,
-                               int w, int h);
-void vp9_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
-                               uint8_t *dst, ptrdiff_t dst_stride,
-                               const int16_t *filter_x, int x_step_q4,
-                               const int16_t *filter_y, int y_step_q4,
-                               int w, int h);
-
-static INLINE int32x4_t MULTIPLY_BY_Q0(
-        int16x4_t dsrc0,
-        int16x4_t dsrc1,
-        int16x4_t dsrc2,
-        int16x4_t dsrc3,
-        int16x4_t dsrc4,
-        int16x4_t dsrc5,
-        int16x4_t dsrc6,
-        int16x4_t dsrc7,
-        int16x8_t q0s16) {
-    int32x4_t qdst;
-    int16x4_t d0s16, d1s16;
-
-    d0s16 = vget_low_s16(q0s16);
-    d1s16 = vget_high_s16(q0s16);
-
-    qdst = vmull_lane_s16(dsrc0, d0s16, 0);
-    qdst = vmlal_lane_s16(qdst, dsrc1, d0s16, 1);
-    qdst = vmlal_lane_s16(qdst, dsrc2, d0s16, 2);
-    qdst = vmlal_lane_s16(qdst, dsrc3, d0s16, 3);
-    qdst = vmlal_lane_s16(qdst, dsrc4, d1s16, 0);
-    qdst = vmlal_lane_s16(qdst, dsrc5, d1s16, 1);
-    qdst = vmlal_lane_s16(qdst, dsrc6, d1s16, 2);
-    qdst = vmlal_lane_s16(qdst, dsrc7, d1s16, 3);
-    return qdst;
-}
-
-void vp9_convolve8_avg_horiz_neon(
-        uint8_t *src,
-        ptrdiff_t src_stride,
-        uint8_t *dst,
-        ptrdiff_t dst_stride,
-        const int16_t *filter_x,
-        int x_step_q4,
-        const int16_t *filter_y,  // unused
-        int y_step_q4,            // unused
-        int w,
-        int h) {
-    int width;
-    uint8_t *s, *d;
-    uint8x8_t d2u8, d3u8, d24u8, d25u8, d26u8, d27u8, d28u8, d29u8;
-    uint32x2_t d2u32, d3u32, d6u32, d7u32, d28u32, d29u32, d30u32, d31u32;
-    uint8x16_t q1u8, q3u8, q12u8, q13u8, q14u8, q15u8;
-    int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d22s16, d23s16;
-    int16x4_t d24s16, d25s16, d26s16, d27s16;
-    uint16x4_t d2u16, d3u16, d4u16, d5u16, d16u16, d17u16, d18u16, d19u16;
-    int16x8_t q0s16;
-    uint16x8_t q1u16, q2u16, q8u16, q9u16, q10u16, q11u16, q12u16, q13u16;
-    int32x4_t q1s32, q2s32, q14s32, q15s32;
-    uint16x8x2_t q0x2u16;
-    uint8x8x2_t d0x2u8, d1x2u8;
-    uint32x2x2_t d0x2u32;
-    uint16x4x2_t d0x2u16, d1x2u16;
-    uint32x4x2_t q0x2u32;
-
-    if (x_step_q4 != 16) {
-        vp9_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride,
-                                  filter_x, x_step_q4,
-                                  filter_y, y_step_q4, w, h);
-        return;
-    }
-
-    q0s16 = vld1q_s16(filter_x);
-
-    src -= 3;  // adjust for taps
-    for (; h > 0; h -= 4) {  // loop_horiz_v
-        s = src;
-        d24u8 = vld1_u8(s);
-        s += src_stride;
-        d25u8 = vld1_u8(s);
-        s += src_stride;
-        d26u8 = vld1_u8(s);
-        s += src_stride;
-        d27u8 = vld1_u8(s);
-
-        q12u8 = vcombine_u8(d24u8, d25u8);
-        q13u8 = vcombine_u8(d26u8, d27u8);
-
-        q0x2u16 = vtrnq_u16(vreinterpretq_u16_u8(q12u8),
-                            vreinterpretq_u16_u8(q13u8));
-        d24u8 = vreinterpret_u8_u16(vget_low_u16(q0x2u16.val[0]));
-        d25u8 = vreinterpret_u8_u16(vget_high_u16(q0x2u16.val[0]));
-        d26u8 = vreinterpret_u8_u16(vget_low_u16(q0x2u16.val[1]));
-        d27u8 = vreinterpret_u8_u16(vget_high_u16(q0x2u16.val[1]));
-        d0x2u8 = vtrn_u8(d24u8, d25u8);
-        d1x2u8 = vtrn_u8(d26u8, d27u8);
-
-        __builtin_prefetch(src + src_stride * 4);
-        __builtin_prefetch(src + src_stride * 5);
-
-        q8u16 = vmovl_u8(d0x2u8.val[0]);
-        q9u16 = vmovl_u8(d0x2u8.val[1]);
-        q10u16 = vmovl_u8(d1x2u8.val[0]);
-        q11u16 = vmovl_u8(d1x2u8.val[1]);
-
-        src += 7;
-        d16u16 = vget_low_u16(q8u16);
-        d17u16 = vget_high_u16(q8u16);
-        d18u16 = vget_low_u16(q9u16);
-        d19u16 = vget_high_u16(q9u16);
-        q8u16 = vcombine_u16(d16u16, d18u16);  // vswp 17 18
-        q9u16 = vcombine_u16(d17u16, d19u16);
-
-        d20s16 = vreinterpret_s16_u16(vget_low_u16(q10u16));
-        d23s16 = vreinterpret_s16_u16(vget_high_u16(q10u16));  // vmov 23 21
-        for (width = w;
-             width > 0;
-             width -= 4, src += 4, dst += 4) {  // loop_horiz
-            s = src;
-            d28u32 = vld1_dup_u32((const uint32_t *)s);
-            s += src_stride;
-            d29u32 = vld1_dup_u32((const uint32_t *)s);
-            s += src_stride;
-            d31u32 = vld1_dup_u32((const uint32_t *)s);
-            s += src_stride;
-            d30u32 = vld1_dup_u32((const uint32_t *)s);
-
-            __builtin_prefetch(src + 64);
-
-            d0x2u16 = vtrn_u16(vreinterpret_u16_u32(d28u32),
-                               vreinterpret_u16_u32(d31u32));
-            d1x2u16 = vtrn_u16(vreinterpret_u16_u32(d29u32),
-                               vreinterpret_u16_u32(d30u32));
-            d0x2u8 = vtrn_u8(vreinterpret_u8_u16(d0x2u16.val[0]),   // d28
-                             vreinterpret_u8_u16(d1x2u16.val[0]));  // d29
-            d1x2u8 = vtrn_u8(vreinterpret_u8_u16(d0x2u16.val[1]),   // d31
-                             vreinterpret_u8_u16(d1x2u16.val[1]));  // d30
-
-            __builtin_prefetch(src + 64 + src_stride);
-
-            q14u8 = vcombine_u8(d0x2u8.val[0], d0x2u8.val[1]);
-            q15u8 = vcombine_u8(d1x2u8.val[1], d1x2u8.val[0]);
-            q0x2u32 = vtrnq_u32(vreinterpretq_u32_u8(q14u8),
-                                vreinterpretq_u32_u8(q15u8));
-
-            d28u8 = vreinterpret_u8_u32(vget_low_u32(q0x2u32.val[0]));
-            d29u8 = vreinterpret_u8_u32(vget_high_u32(q0x2u32.val[0]));
-            q12u16 = vmovl_u8(d28u8);
-            q13u16 = vmovl_u8(d29u8);
-
-            __builtin_prefetch(src + 64 + src_stride * 2);
-
-            d = dst;
-            d6u32 = vld1_lane_u32((const uint32_t *)d, d6u32, 0);
-            d += dst_stride;
-            d7u32 = vld1_lane_u32((const uint32_t *)d, d7u32, 0);
-            d += dst_stride;
-            d6u32 = vld1_lane_u32((const uint32_t *)d, d6u32, 1);
-            d += dst_stride;
-            d7u32 = vld1_lane_u32((const uint32_t *)d, d7u32, 1);
-
-            d16s16 = vreinterpret_s16_u16(vget_low_u16(q8u16));
-            d17s16 = vreinterpret_s16_u16(vget_high_u16(q8u16));
-            d18s16 = vreinterpret_s16_u16(vget_low_u16(q9u16));
-            d19s16 = vreinterpret_s16_u16(vget_high_u16(q9u16));
-            d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));
-            d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));
-            d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));
-            d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16));
-            d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16));
-
-            q1s32  = MULTIPLY_BY_Q0(d16s16, d17s16, d20s16, d22s16,
-                                    d18s16, d19s16, d23s16, d24s16, q0s16);
-            q2s32  = MULTIPLY_BY_Q0(d17s16, d20s16, d22s16, d18s16,
-                                    d19s16, d23s16, d24s16, d26s16, q0s16);
-            q14s32 = MULTIPLY_BY_Q0(d20s16, d22s16, d18s16, d19s16,
-                                    d23s16, d24s16, d26s16, d27s16, q0s16);
-            q15s32 = MULTIPLY_BY_Q0(d22s16, d18s16, d19s16, d23s16,
-                                    d24s16, d26s16, d27s16, d25s16, q0s16);
-
-            __builtin_prefetch(src + 64 + src_stride * 3);
-
-            d2u16 = vqrshrun_n_s32(q1s32, 7);
-            d3u16 = vqrshrun_n_s32(q2s32, 7);
-            d4u16 = vqrshrun_n_s32(q14s32, 7);
-            d5u16 = vqrshrun_n_s32(q15s32, 7);
-
-            q1u16 = vcombine_u16(d2u16, d3u16);
-            q2u16 = vcombine_u16(d4u16, d5u16);
-
-            d2u8 = vqmovn_u16(q1u16);
-            d3u8 = vqmovn_u16(q2u16);
-
-            d0x2u16 = vtrn_u16(vreinterpret_u16_u8(d2u8),
-                               vreinterpret_u16_u8(d3u8));
-            d0x2u32 = vtrn_u32(vreinterpret_u32_u16(d0x2u16.val[0]),
-                               vreinterpret_u32_u16(d0x2u16.val[1]));
-            d0x2u8 = vtrn_u8(vreinterpret_u8_u32(d0x2u32.val[0]),
-                             vreinterpret_u8_u32(d0x2u32.val[1]));
-
-            q1u8 = vcombine_u8(d0x2u8.val[0], d0x2u8.val[1]);
-            q3u8 = vreinterpretq_u8_u32(vcombine_u32(d6u32, d7u32));
-
-            q1u8 = vrhaddq_u8(q1u8, q3u8);
-
-            d2u32 = vreinterpret_u32_u8(vget_low_u8(q1u8));
-            d3u32 = vreinterpret_u32_u8(vget_high_u8(q1u8));
-
-            d = dst;
-            vst1_lane_u32((uint32_t *)d, d2u32, 0);
-            d += dst_stride;
-            vst1_lane_u32((uint32_t *)d, d3u32, 0);
-            d += dst_stride;
-            vst1_lane_u32((uint32_t *)d, d2u32, 1);
-            d += dst_stride;
-            vst1_lane_u32((uint32_t *)d, d3u32, 1);
-
-            q8u16 = q9u16;
-            d20s16 = d23s16;
-            q11u16 = q12u16;
-            q9u16 = q13u16;
-            d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16));
-        }
-        src += src_stride * 4 - w - 7;
-        dst += dst_stride * 4 - w;
-    }
-    return;
-}
-
-void vp9_convolve8_avg_vert_neon(
-        uint8_t *src,
-        ptrdiff_t src_stride,
-        uint8_t *dst,
-        ptrdiff_t dst_stride,
-        const int16_t *filter_x,  // unused
-        int x_step_q4,            // unused
-        const int16_t *filter_y,
-        int y_step_q4,
-        int w,
-        int h) {
-    int height;
-    uint8_t *s, *d;
-    uint8x8_t d2u8, d3u8;
-    uint32x2_t d2u32, d3u32, d6u32, d7u32;
-    uint32x2_t d16u32, d18u32, d20u32, d22u32, d24u32, d26u32;
-    uint8x16_t q1u8, q3u8;
-    int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16;
-    int16x4_t d24s16, d25s16, d26s16, d27s16;
-    uint16x4_t d2u16, d3u16, d4u16, d5u16;
-    int16x8_t q0s16;
-    uint16x8_t q1u16, q2u16, q8u16, q9u16, q10u16, q11u16, q12u16, q13u16;
-    int32x4_t q1s32, q2s32, q14s32, q15s32;
-
-    if (y_step_q4 != 16) {
-        vp9_convolve8_avg_vert_c(src, src_stride, dst, dst_stride,
-                                 filter_x, x_step_q4,
-                                 filter_y, y_step_q4, w, h);
-        return;
-    }
-
-    src -= src_stride * 3;
-    q0s16 = vld1q_s16(filter_y);
-    for (; w > 0; w -= 4, src += 4, dst += 4) {  // loop_vert_h
-        s = src;
-        d16u32 = vld1_lane_u32((const uint32_t *)s, d16u32, 0);
-        s += src_stride;
-        d16u32 = vld1_lane_u32((const uint32_t *)s, d16u32, 1);
-        s += src_stride;
-        d18u32 = vld1_lane_u32((const uint32_t *)s, d18u32, 0);
-        s += src_stride;
-        d18u32 = vld1_lane_u32((const uint32_t *)s, d18u32, 1);
-        s += src_stride;
-        d20u32 = vld1_lane_u32((const uint32_t *)s, d20u32, 0);
-        s += src_stride;
-        d20u32 = vld1_lane_u32((const uint32_t *)s, d20u32, 1);
-        s += src_stride;
-        d22u32 = vld1_lane_u32((const uint32_t *)s, d22u32, 0);
-        s += src_stride;
-
-        q8u16  = vmovl_u8(vreinterpret_u8_u32(d16u32));
-        q9u16  = vmovl_u8(vreinterpret_u8_u32(d18u32));
-        q10u16 = vmovl_u8(vreinterpret_u8_u32(d20u32));
-        q11u16 = vmovl_u8(vreinterpret_u8_u32(d22u32));
-
-        d18s16 = vreinterpret_s16_u16(vget_low_u16(q9u16));
-        d19s16 = vreinterpret_s16_u16(vget_high_u16(q9u16));
-        d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));
-        d = dst;
-        for (height = h; height > 0; height -= 4) {  // loop_vert
-            d24u32 = vld1_lane_u32((const uint32_t *)s, d24u32, 0);
-            s += src_stride;
-            d26u32 = vld1_lane_u32((const uint32_t *)s, d26u32, 0);
-            s += src_stride;
-            d26u32 = vld1_lane_u32((const uint32_t *)s, d26u32, 1);
-            s += src_stride;
-            d24u32 = vld1_lane_u32((const uint32_t *)s, d24u32, 1);
-            s += src_stride;
-
-            q12u16 = vmovl_u8(vreinterpret_u8_u32(d24u32));
-            q13u16 = vmovl_u8(vreinterpret_u8_u32(d26u32));
-
-            d6u32 = vld1_lane_u32((const uint32_t *)d, d6u32, 0);
-            d += dst_stride;
-            d6u32 = vld1_lane_u32((const uint32_t *)d, d6u32, 1);
-            d += dst_stride;
-            d7u32 = vld1_lane_u32((const uint32_t *)d, d7u32, 0);
-            d += dst_stride;
-            d7u32 = vld1_lane_u32((const uint32_t *)d, d7u32, 1);
-            d -= dst_stride * 3;
-
-            d16s16 = vreinterpret_s16_u16(vget_low_u16(q8u16));
-            d17s16 = vreinterpret_s16_u16(vget_high_u16(q8u16));
-            d20s16 = vreinterpret_s16_u16(vget_low_u16(q10u16));
-            d21s16 = vreinterpret_s16_u16(vget_high_u16(q10u16));
-            d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));
-            d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));
-            d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16));
-            d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16));
-
-            __builtin_prefetch(s);
-            __builtin_prefetch(s + src_stride);
-            q1s32  = MULTIPLY_BY_Q0(d16s16, d17s16, d18s16, d19s16,
-                                    d20s16, d21s16, d22s16, d24s16, q0s16);
-            __builtin_prefetch(s + src_stride * 2);
-            __builtin_prefetch(s + src_stride * 3);
-            q2s32  = MULTIPLY_BY_Q0(d17s16, d18s16, d19s16, d20s16,
-                                    d21s16, d22s16, d24s16, d26s16, q0s16);
-            __builtin_prefetch(d);
-            __builtin_prefetch(d + dst_stride);
-            q14s32 = MULTIPLY_BY_Q0(d18s16, d19s16, d20s16, d21s16,
-                                    d22s16, d24s16, d26s16, d27s16, q0s16);
-            __builtin_prefetch(d + dst_stride * 2);
-            __builtin_prefetch(d + dst_stride * 3);
-            q15s32 = MULTIPLY_BY_Q0(d19s16, d20s16, d21s16, d22s16,
-                                    d24s16, d26s16, d27s16, d25s16, q0s16);
-
-            d2u16 = vqrshrun_n_s32(q1s32, 7);
-            d3u16 = vqrshrun_n_s32(q2s32, 7);
-            d4u16 = vqrshrun_n_s32(q14s32, 7);
-            d5u16 = vqrshrun_n_s32(q15s32, 7);
-
-            q1u16 = vcombine_u16(d2u16, d3u16);
-            q2u16 = vcombine_u16(d4u16, d5u16);
-
-            d2u8 = vqmovn_u16(q1u16);
-            d3u8 = vqmovn_u16(q2u16);
-
-            q1u8 = vcombine_u8(d2u8, d3u8);
-            q3u8 = vreinterpretq_u8_u32(vcombine_u32(d6u32, d7u32));
-
-            q1u8 = vrhaddq_u8(q1u8, q3u8);
-
-            d2u32 = vreinterpret_u32_u8(vget_low_u8(q1u8));
-            d3u32 = vreinterpret_u32_u8(vget_high_u8(q1u8));
-
-            vst1_lane_u32((uint32_t *)d, d2u32, 0);
-            d += dst_stride;
-            vst1_lane_u32((uint32_t *)d, d2u32, 1);
-            d += dst_stride;
-            vst1_lane_u32((uint32_t *)d, d3u32, 0);
-            d += dst_stride;
-            vst1_lane_u32((uint32_t *)d, d3u32, 1);
-            d += dst_stride;
-
-            q8u16 = q10u16;
-            d18s16 = d22s16;
-            d19s16 = d24s16;
-            q10u16 = q13u16;
-            d22s16 = d25s16;
-        }
-    }
-    return;
-}
diff --git a/vp9/common/arm/neon/vp9_convolve8_neon.c b/vp9/common/arm/neon/vp9_convolve8_neon.c
deleted file mode 100644 (file)
index 5c555c4..0000000
+++ /dev/null
@@ -1,357 +0,0 @@
-/*
- *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <stddef.h>
-#include <arm_neon.h>
-
-#include "./vpx_config.h"
-#include "vpx_ports/mem.h"
-
-void vp9_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
-                           uint8_t *dst, ptrdiff_t dst_stride,
-                           const int16_t *filter_x, int x_step_q4,
-                           const int16_t *filter_y, int y_step_q4,
-                           int w, int h);
-void vp9_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,
-                           uint8_t *dst, ptrdiff_t dst_stride,
-                           const int16_t *filter_x, int x_step_q4,
-                           const int16_t *filter_y, int y_step_q4,
-                           int w, int h);
-
-static INLINE int32x4_t MULTIPLY_BY_Q0(
-        int16x4_t dsrc0,
-        int16x4_t dsrc1,
-        int16x4_t dsrc2,
-        int16x4_t dsrc3,
-        int16x4_t dsrc4,
-        int16x4_t dsrc5,
-        int16x4_t dsrc6,
-        int16x4_t dsrc7,
-        int16x8_t q0s16) {
-    int32x4_t qdst;
-    int16x4_t d0s16, d1s16;
-
-    d0s16 = vget_low_s16(q0s16);
-    d1s16 = vget_high_s16(q0s16);
-
-    qdst = vmull_lane_s16(dsrc0, d0s16, 0);
-    qdst = vmlal_lane_s16(qdst, dsrc1, d0s16, 1);
-    qdst = vmlal_lane_s16(qdst, dsrc2, d0s16, 2);
-    qdst = vmlal_lane_s16(qdst, dsrc3, d0s16, 3);
-    qdst = vmlal_lane_s16(qdst, dsrc4, d1s16, 0);
-    qdst = vmlal_lane_s16(qdst, dsrc5, d1s16, 1);
-    qdst = vmlal_lane_s16(qdst, dsrc6, d1s16, 2);
-    qdst = vmlal_lane_s16(qdst, dsrc7, d1s16, 3);
-    return qdst;
-}
-
-void vp9_convolve8_horiz_neon(
-        uint8_t *src,
-        ptrdiff_t src_stride,
-        uint8_t *dst,
-        ptrdiff_t dst_stride,
-        const int16_t *filter_x,
-        int x_step_q4,
-        const int16_t *filter_y,  // unused
-        int y_step_q4,            // unused
-        int w,
-        int h) {
-    int width;
-    uint8_t *s, *d, *psrc, *pdst;
-    uint8x8_t d2u8, d3u8, d24u8, d25u8, d26u8, d27u8, d28u8, d29u8;
-    uint32x2_t d2u32, d3u32, d28u32, d29u32, d30u32, d31u32;
-    uint8x16_t q12u8, q13u8, q14u8, q15u8;
-    int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d22s16, d23s16;
-    int16x4_t d24s16, d25s16, d26s16, d27s16;
-    uint16x4_t d2u16, d3u16, d4u16, d5u16, d16u16, d17u16, d18u16, d19u16;
-    int16x8_t q0s16;
-    uint16x8_t q1u16, q2u16, q8u16, q9u16, q10u16, q11u16, q12u16, q13u16;
-    int32x4_t q1s32, q2s32, q14s32, q15s32;
-    uint16x8x2_t q0x2u16;
-    uint8x8x2_t d0x2u8, d1x2u8;
-    uint32x2x2_t d0x2u32;
-    uint16x4x2_t d0x2u16, d1x2u16;
-    uint32x4x2_t q0x2u32;
-
-    if (x_step_q4 != 16) {
-        vp9_convolve8_horiz_c(src, src_stride, dst, dst_stride,
-                              filter_x, x_step_q4,
-                              filter_y, y_step_q4, w, h);
-        return;
-    }
-
-    q0s16 = vld1q_s16(filter_x);
-
-    src -= 3;  // adjust for taps
-    for (; h > 0; h -= 4,
-        src += src_stride * 4,
-        dst += dst_stride * 4) {  // loop_horiz_v
-        s = src;
-        d24u8 = vld1_u8(s);
-        s += src_stride;
-        d25u8 = vld1_u8(s);
-        s += src_stride;
-        d26u8 = vld1_u8(s);
-        s += src_stride;
-        d27u8 = vld1_u8(s);
-
-        q12u8 = vcombine_u8(d24u8, d25u8);
-        q13u8 = vcombine_u8(d26u8, d27u8);
-
-        q0x2u16 = vtrnq_u16(vreinterpretq_u16_u8(q12u8),
-                            vreinterpretq_u16_u8(q13u8));
-        d24u8 = vreinterpret_u8_u16(vget_low_u16(q0x2u16.val[0]));
-        d25u8 = vreinterpret_u8_u16(vget_high_u16(q0x2u16.val[0]));
-        d26u8 = vreinterpret_u8_u16(vget_low_u16(q0x2u16.val[1]));
-        d27u8 = vreinterpret_u8_u16(vget_high_u16(q0x2u16.val[1]));
-        d0x2u8 = vtrn_u8(d24u8, d25u8);
-        d1x2u8 = vtrn_u8(d26u8, d27u8);
-
-        __builtin_prefetch(src + src_stride * 4);
-        __builtin_prefetch(src + src_stride * 5);
-        __builtin_prefetch(src + src_stride * 6);
-
-        q8u16  = vmovl_u8(d0x2u8.val[0]);
-        q9u16  = vmovl_u8(d0x2u8.val[1]);
-        q10u16 = vmovl_u8(d1x2u8.val[0]);
-        q11u16 = vmovl_u8(d1x2u8.val[1]);
-
-        d16u16 = vget_low_u16(q8u16);
-        d17u16 = vget_high_u16(q8u16);
-        d18u16 = vget_low_u16(q9u16);
-        d19u16 = vget_high_u16(q9u16);
-        q8u16 = vcombine_u16(d16u16, d18u16);  // vswp 17 18
-        q9u16 = vcombine_u16(d17u16, d19u16);
-
-        d20s16 = vreinterpret_s16_u16(vget_low_u16(q10u16));
-        d23s16 = vreinterpret_s16_u16(vget_high_u16(q10u16));  // vmov 23 21
-        for (width = w, psrc = src + 7, pdst = dst;
-             width > 0;
-             width -= 4, psrc += 4, pdst += 4) {  // loop_horiz
-            s = psrc;
-            d28u32 = vld1_dup_u32((const uint32_t *)s);
-            s += src_stride;
-            d29u32 = vld1_dup_u32((const uint32_t *)s);
-            s += src_stride;
-            d31u32 = vld1_dup_u32((const uint32_t *)s);
-            s += src_stride;
-            d30u32 = vld1_dup_u32((const uint32_t *)s);
-
-            __builtin_prefetch(psrc + 64);
-
-            d0x2u16 = vtrn_u16(vreinterpret_u16_u32(d28u32),
-                               vreinterpret_u16_u32(d31u32));
-            d1x2u16 = vtrn_u16(vreinterpret_u16_u32(d29u32),
-                               vreinterpret_u16_u32(d30u32));
-            d0x2u8 = vtrn_u8(vreinterpret_u8_u16(d0x2u16.val[0]),   // d28
-                             vreinterpret_u8_u16(d1x2u16.val[0]));  // d29
-            d1x2u8 = vtrn_u8(vreinterpret_u8_u16(d0x2u16.val[1]),   // d31
-                             vreinterpret_u8_u16(d1x2u16.val[1]));  // d30
-
-            __builtin_prefetch(psrc + 64 + src_stride);
-
-            q14u8 = vcombine_u8(d0x2u8.val[0], d0x2u8.val[1]);
-            q15u8 = vcombine_u8(d1x2u8.val[1], d1x2u8.val[0]);
-            q0x2u32 = vtrnq_u32(vreinterpretq_u32_u8(q14u8),
-                                vreinterpretq_u32_u8(q15u8));
-
-            d28u8 = vreinterpret_u8_u32(vget_low_u32(q0x2u32.val[0]));
-            d29u8 = vreinterpret_u8_u32(vget_high_u32(q0x2u32.val[0]));
-            q12u16 = vmovl_u8(d28u8);
-            q13u16 = vmovl_u8(d29u8);
-
-            __builtin_prefetch(psrc + 64 + src_stride * 2);
-
-            d16s16 = vreinterpret_s16_u16(vget_low_u16(q8u16));
-            d17s16 = vreinterpret_s16_u16(vget_high_u16(q8u16));
-            d18s16 = vreinterpret_s16_u16(vget_low_u16(q9u16));
-            d19s16 = vreinterpret_s16_u16(vget_high_u16(q9u16));
-            d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));
-            d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));
-            d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));
-            d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16));
-            d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16));
-
-            q1s32  = MULTIPLY_BY_Q0(d16s16, d17s16, d20s16, d22s16,
-                                    d18s16, d19s16, d23s16, d24s16, q0s16);
-            q2s32  = MULTIPLY_BY_Q0(d17s16, d20s16, d22s16, d18s16,
-                                    d19s16, d23s16, d24s16, d26s16, q0s16);
-            q14s32 = MULTIPLY_BY_Q0(d20s16, d22s16, d18s16, d19s16,
-                                    d23s16, d24s16, d26s16, d27s16, q0s16);
-            q15s32 = MULTIPLY_BY_Q0(d22s16, d18s16, d19s16, d23s16,
-                                    d24s16, d26s16, d27s16, d25s16, q0s16);
-
-            __builtin_prefetch(psrc + 60 + src_stride * 3);
-
-            d2u16 = vqrshrun_n_s32(q1s32, 7);
-            d3u16 = vqrshrun_n_s32(q2s32, 7);
-            d4u16 = vqrshrun_n_s32(q14s32, 7);
-            d5u16 = vqrshrun_n_s32(q15s32, 7);
-
-            q1u16 = vcombine_u16(d2u16, d3u16);
-            q2u16 = vcombine_u16(d4u16, d5u16);
-
-            d2u8 = vqmovn_u16(q1u16);
-            d3u8 = vqmovn_u16(q2u16);
-
-            d0x2u16 = vtrn_u16(vreinterpret_u16_u8(d2u8),
-                               vreinterpret_u16_u8(d3u8));
-            d0x2u32 = vtrn_u32(vreinterpret_u32_u16(d0x2u16.val[0]),
-                               vreinterpret_u32_u16(d0x2u16.val[1]));
-            d0x2u8 = vtrn_u8(vreinterpret_u8_u32(d0x2u32.val[0]),
-                             vreinterpret_u8_u32(d0x2u32.val[1]));
-
-            d2u32 = vreinterpret_u32_u8(d0x2u8.val[0]);
-            d3u32 = vreinterpret_u32_u8(d0x2u8.val[1]);
-
-            d = pdst;
-            vst1_lane_u32((uint32_t *)d, d2u32, 0);
-            d += dst_stride;
-            vst1_lane_u32((uint32_t *)d, d3u32, 0);
-            d += dst_stride;
-            vst1_lane_u32((uint32_t *)d, d2u32, 1);
-            d += dst_stride;
-            vst1_lane_u32((uint32_t *)d, d3u32, 1);
-
-            q8u16 = q9u16;
-            d20s16 = d23s16;
-            q11u16 = q12u16;
-            q9u16 = q13u16;
-            d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16));
-        }
-    }
-    return;
-}
-
-void vp9_convolve8_vert_neon(
-        uint8_t *src,
-        ptrdiff_t src_stride,
-        uint8_t *dst,
-        ptrdiff_t dst_stride,
-        const int16_t *filter_x,  // unused
-        int x_step_q4,            // unused
-        const int16_t *filter_y,
-        int y_step_q4,
-        int w,
-        int h) {
-    int height;
-    uint8_t *s, *d;
-    uint32x2_t d2u32, d3u32;
-    uint32x2_t d16u32, d18u32, d20u32, d22u32, d24u32, d26u32;
-    int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16;
-    int16x4_t d24s16, d25s16, d26s16, d27s16;
-    uint16x4_t d2u16, d3u16, d4u16, d5u16;
-    int16x8_t q0s16;
-    uint16x8_t q1u16, q2u16, q8u16, q9u16, q10u16, q11u16, q12u16, q13u16;
-    int32x4_t q1s32, q2s32, q14s32, q15s32;
-
-    if (y_step_q4 != 16) {
-        vp9_convolve8_vert_c(src, src_stride, dst, dst_stride,
-                             filter_x, x_step_q4,
-                             filter_y, y_step_q4, w, h);
-        return;
-    }
-
-    src -= src_stride * 3;
-    q0s16 = vld1q_s16(filter_y);
-    for (; w > 0; w -= 4, src += 4, dst += 4) {  // loop_vert_h
-        s = src;
-        d16u32 = vld1_lane_u32((const uint32_t *)s, d16u32, 0);
-        s += src_stride;
-        d16u32 = vld1_lane_u32((const uint32_t *)s, d16u32, 1);
-        s += src_stride;
-        d18u32 = vld1_lane_u32((const uint32_t *)s, d18u32, 0);
-        s += src_stride;
-        d18u32 = vld1_lane_u32((const uint32_t *)s, d18u32, 1);
-        s += src_stride;
-        d20u32 = vld1_lane_u32((const uint32_t *)s, d20u32, 0);
-        s += src_stride;
-        d20u32 = vld1_lane_u32((const uint32_t *)s, d20u32, 1);
-        s += src_stride;
-        d22u32 = vld1_lane_u32((const uint32_t *)s, d22u32, 0);
-        s += src_stride;
-
-        q8u16  = vmovl_u8(vreinterpret_u8_u32(d16u32));
-        q9u16  = vmovl_u8(vreinterpret_u8_u32(d18u32));
-        q10u16 = vmovl_u8(vreinterpret_u8_u32(d20u32));
-        q11u16 = vmovl_u8(vreinterpret_u8_u32(d22u32));
-
-        d18s16 = vreinterpret_s16_u16(vget_low_u16(q9u16));
-        d19s16 = vreinterpret_s16_u16(vget_high_u16(q9u16));
-        d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));
-        d = dst;
-        for (height = h; height > 0; height -= 4) {  // loop_vert
-            d24u32 = vld1_lane_u32((const uint32_t *)s, d24u32, 0);
-            s += src_stride;
-            d26u32 = vld1_lane_u32((const uint32_t *)s, d26u32, 0);
-            s += src_stride;
-            d26u32 = vld1_lane_u32((const uint32_t *)s, d26u32, 1);
-            s += src_stride;
-            d24u32 = vld1_lane_u32((const uint32_t *)s, d24u32, 1);
-            s += src_stride;
-
-            q12u16 = vmovl_u8(vreinterpret_u8_u32(d24u32));
-            q13u16 = vmovl_u8(vreinterpret_u8_u32(d26u32));
-
-            d16s16 = vreinterpret_s16_u16(vget_low_u16(q8u16));
-            d17s16 = vreinterpret_s16_u16(vget_high_u16(q8u16));
-            d20s16 = vreinterpret_s16_u16(vget_low_u16(q10u16));
-            d21s16 = vreinterpret_s16_u16(vget_high_u16(q10u16));
-            d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));
-            d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));
-            d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16));
-            d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16));
-
-            __builtin_prefetch(d);
-            __builtin_prefetch(d + dst_stride);
-            q1s32  = MULTIPLY_BY_Q0(d16s16, d17s16, d18s16, d19s16,
-                                    d20s16, d21s16, d22s16, d24s16, q0s16);
-            __builtin_prefetch(d + dst_stride * 2);
-            __builtin_prefetch(d + dst_stride * 3);
-            q2s32  = MULTIPLY_BY_Q0(d17s16, d18s16, d19s16, d20s16,
-                                    d21s16, d22s16, d24s16, d26s16, q0s16);
-            __builtin_prefetch(s);
-            __builtin_prefetch(s + src_stride);
-            q14s32 = MULTIPLY_BY_Q0(d18s16, d19s16, d20s16, d21s16,
-                                    d22s16, d24s16, d26s16, d27s16, q0s16);
-            __builtin_prefetch(s + src_stride * 2);
-            __builtin_prefetch(s + src_stride * 3);
-            q15s32 = MULTIPLY_BY_Q0(d19s16, d20s16, d21s16, d22s16,
-                                    d24s16, d26s16, d27s16, d25s16, q0s16);
-
-            d2u16 = vqrshrun_n_s32(q1s32, 7);
-            d3u16 = vqrshrun_n_s32(q2s32, 7);
-            d4u16 = vqrshrun_n_s32(q14s32, 7);
-            d5u16 = vqrshrun_n_s32(q15s32, 7);
-
-            q1u16 = vcombine_u16(d2u16, d3u16);
-            q2u16 = vcombine_u16(d4u16, d5u16);
-
-            d2u32 = vreinterpret_u32_u8(vqmovn_u16(q1u16));
-            d3u32 = vreinterpret_u32_u8(vqmovn_u16(q2u16));
-
-            vst1_lane_u32((uint32_t *)d, d2u32, 0);
-            d += dst_stride;
-            vst1_lane_u32((uint32_t *)d, d2u32, 1);
-            d += dst_stride;
-            vst1_lane_u32((uint32_t *)d, d3u32, 0);
-            d += dst_stride;
-            vst1_lane_u32((uint32_t *)d, d3u32, 1);
-            d += dst_stride;
-
-            q8u16 = q10u16;
-            d18s16 = d22s16;
-            d19s16 = d24s16;
-            q10u16 = q13u16;
-            d22s16 = d25s16;
-        }
-    }
-    return;
-}
diff --git a/vp9/common/arm/neon/vp9_convolve_avg_neon.c b/vp9/common/arm/neon/vp9_convolve_avg_neon.c
deleted file mode 100644 (file)
index 3a3db35..0000000
+++ /dev/null
@@ -1,145 +0,0 @@
-/*
- *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <stddef.h>
-#include <arm_neon.h>
-
-void vp9_convolve_avg_neon(
-        const uint8_t *src,    // r0
-        ptrdiff_t src_stride,  // r1
-        uint8_t *dst,          // r2
-        ptrdiff_t dst_stride,  // r3
-        const int16_t *filter_x,
-        int filter_x_stride,
-        const int16_t *filter_y,
-        int filter_y_stride,
-        int w,
-        int h) {
-    uint8_t *d;
-    uint8x8_t d0u8, d1u8, d2u8, d3u8;
-    uint32x2_t d0u32, d2u32;
-    uint8x16_t q0u8, q1u8, q2u8, q3u8, q8u8, q9u8, q10u8, q11u8;
-    (void)filter_x;  (void)filter_x_stride;
-    (void)filter_y;  (void)filter_y_stride;
-
-    d = dst;
-    if (w > 32) {  // avg64
-        for (; h > 0; h -= 1) {
-            q0u8  = vld1q_u8(src);
-            q1u8  = vld1q_u8(src + 16);
-            q2u8  = vld1q_u8(src + 32);
-            q3u8  = vld1q_u8(src + 48);
-            src += src_stride;
-            q8u8  = vld1q_u8(d);
-            q9u8  = vld1q_u8(d + 16);
-            q10u8 = vld1q_u8(d + 32);
-            q11u8 = vld1q_u8(d + 48);
-            d += dst_stride;
-
-            q0u8 = vrhaddq_u8(q0u8, q8u8);
-            q1u8 = vrhaddq_u8(q1u8, q9u8);
-            q2u8 = vrhaddq_u8(q2u8, q10u8);
-            q3u8 = vrhaddq_u8(q3u8, q11u8);
-
-            vst1q_u8(dst, q0u8);
-            vst1q_u8(dst + 16, q1u8);
-            vst1q_u8(dst + 32, q2u8);
-            vst1q_u8(dst + 48, q3u8);
-            dst += dst_stride;
-        }
-    } else if (w == 32) {  // avg32
-        for (; h > 0; h -= 2) {
-            q0u8 = vld1q_u8(src);
-            q1u8 = vld1q_u8(src + 16);
-            src += src_stride;
-            q2u8 = vld1q_u8(src);
-            q3u8 = vld1q_u8(src + 16);
-            src += src_stride;
-            q8u8 = vld1q_u8(d);
-            q9u8 = vld1q_u8(d + 16);
-            d += dst_stride;
-            q10u8 = vld1q_u8(d);
-            q11u8 = vld1q_u8(d + 16);
-            d += dst_stride;
-
-            q0u8 = vrhaddq_u8(q0u8, q8u8);
-            q1u8 = vrhaddq_u8(q1u8, q9u8);
-            q2u8 = vrhaddq_u8(q2u8, q10u8);
-            q3u8 = vrhaddq_u8(q3u8, q11u8);
-
-            vst1q_u8(dst, q0u8);
-            vst1q_u8(dst + 16, q1u8);
-            dst += dst_stride;
-            vst1q_u8(dst, q2u8);
-            vst1q_u8(dst + 16, q3u8);
-            dst += dst_stride;
-        }
-    } else if (w > 8) {  // avg16
-        for (; h > 0; h -= 2) {
-            q0u8 = vld1q_u8(src);
-            src += src_stride;
-            q1u8 = vld1q_u8(src);
-            src += src_stride;
-            q2u8 = vld1q_u8(d);
-            d += dst_stride;
-            q3u8 = vld1q_u8(d);
-            d += dst_stride;
-
-            q0u8 = vrhaddq_u8(q0u8, q2u8);
-            q1u8 = vrhaddq_u8(q1u8, q3u8);
-
-            vst1q_u8(dst, q0u8);
-            dst += dst_stride;
-            vst1q_u8(dst, q1u8);
-            dst += dst_stride;
-        }
-    } else if (w == 8) {  // avg8
-        for (; h > 0; h -= 2) {
-            d0u8 = vld1_u8(src);
-            src += src_stride;
-            d1u8 = vld1_u8(src);
-            src += src_stride;
-            d2u8 = vld1_u8(d);
-            d += dst_stride;
-            d3u8 = vld1_u8(d);
-            d += dst_stride;
-
-            q0u8 = vcombine_u8(d0u8, d1u8);
-            q1u8 = vcombine_u8(d2u8, d3u8);
-            q0u8 = vrhaddq_u8(q0u8, q1u8);
-
-            vst1_u8(dst, vget_low_u8(q0u8));
-            dst += dst_stride;
-            vst1_u8(dst, vget_high_u8(q0u8));
-            dst += dst_stride;
-        }
-    } else {  // avg4
-        for (; h > 0; h -= 2) {
-            d0u32 = vld1_lane_u32((const uint32_t *)src, d0u32, 0);
-            src += src_stride;
-            d0u32 = vld1_lane_u32((const uint32_t *)src, d0u32, 1);
-            src += src_stride;
-            d2u32 = vld1_lane_u32((const uint32_t *)d, d2u32, 0);
-            d += dst_stride;
-            d2u32 = vld1_lane_u32((const uint32_t *)d, d2u32, 1);
-            d += dst_stride;
-
-            d0u8 = vrhadd_u8(vreinterpret_u8_u32(d0u32),
-                             vreinterpret_u8_u32(d2u32));
-
-            d0u32 = vreinterpret_u32_u8(d0u8);
-            vst1_lane_u32((uint32_t *)dst, d0u32, 0);
-            dst += dst_stride;
-            vst1_lane_u32((uint32_t *)dst, d0u32, 1);
-            dst += dst_stride;
-        }
-    }
-    return;
-}
diff --git a/vp9/common/arm/neon/vp9_copy_neon.c b/vp9/common/arm/neon/vp9_copy_neon.c
deleted file mode 100644 (file)
index f334abe..0000000
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <stddef.h>
-#include <arm_neon.h>
-
-void vp9_convolve_copy_neon(
-        const uint8_t *src,    // r0
-        ptrdiff_t src_stride,  // r1
-        uint8_t *dst,          // r2
-        ptrdiff_t dst_stride,  // r3
-        const int16_t *filter_x,
-        int filter_x_stride,
-        const int16_t *filter_y,
-        int filter_y_stride,
-        int w,
-        int h) {
-    uint8x8_t d0u8, d2u8;
-    uint8x16_t q0u8, q1u8, q2u8, q3u8;
-    (void)filter_x;  (void)filter_x_stride;
-    (void)filter_y;  (void)filter_y_stride;
-
-    if (w > 32) {  // copy64
-        for (; h > 0; h--) {
-            q0u8 = vld1q_u8(src);
-            q1u8 = vld1q_u8(src + 16);
-            q2u8 = vld1q_u8(src + 32);
-            q3u8 = vld1q_u8(src + 48);
-            src += src_stride;
-
-            vst1q_u8(dst, q0u8);
-            vst1q_u8(dst + 16, q1u8);
-            vst1q_u8(dst + 32, q2u8);
-            vst1q_u8(dst + 48, q3u8);
-            dst += dst_stride;
-        }
-    } else if (w == 32) {  // copy32
-        for (; h > 0; h -= 2) {
-            q0u8 = vld1q_u8(src);
-            q1u8 = vld1q_u8(src + 16);
-            src += src_stride;
-            q2u8 = vld1q_u8(src);
-            q3u8 = vld1q_u8(src + 16);
-            src += src_stride;
-
-            vst1q_u8(dst, q0u8);
-            vst1q_u8(dst + 16, q1u8);
-            dst += dst_stride;
-            vst1q_u8(dst, q2u8);
-            vst1q_u8(dst + 16, q3u8);
-            dst += dst_stride;
-        }
-    } else if (w > 8) {  // copy16
-        for (; h > 0; h -= 2) {
-            q0u8 = vld1q_u8(src);
-            src += src_stride;
-            q1u8 = vld1q_u8(src);
-            src += src_stride;
-
-            vst1q_u8(dst, q0u8);
-            dst += dst_stride;
-            vst1q_u8(dst, q1u8);
-            dst += dst_stride;
-        }
-    } else if (w == 8) {  // copy8
-        for (; h > 0; h -= 2) {
-            d0u8 = vld1_u8(src);
-            src += src_stride;
-            d2u8 = vld1_u8(src);
-            src += src_stride;
-
-            vst1_u8(dst, d0u8);
-            dst += dst_stride;
-            vst1_u8(dst, d2u8);
-            dst += dst_stride;
-        }
-    } else {  // copy4
-        for (; h > 0; h--) {
-            *(uint32_t *)dst = *(const uint32_t *)src;
-            src += src_stride;
-            dst += dst_stride;
-        }
-    }
-    return;
-}
index 371738aba33102bea7765a9e636679a0dd83003a..0285be1557342ebf8ed53d2a6004719f12a81034 100644 (file)
 #ifndef VP9_COMMON_VP9_ENTROPYMODE_H_
 #define VP9_COMMON_VP9_ENTROPYMODE_H_
 
-#include "vp9/common/vp9_filter.h"
 #include "vp9/common/vp9_entropy.h"
 #include "vp9/common/vp9_entropymv.h"
+#include "vp9/common/vp9_filter.h"
+#include "vpx_dsp/vpx_filter.h"
 
 #ifdef __cplusplus
 extern "C" {
index 40d6a0d6a78abaee35481ab02169588db669a3b3..efa24bc67b8468e6898b1389b4548acb5666c619 100644 (file)
@@ -13,6 +13,7 @@
 
 #include "./vpx_config.h"
 #include "vpx/vpx_integer.h"
+#include "vpx_dsp/vpx_filter.h"
 #include "vpx_ports/mem.h"
 
 
 extern "C" {
 #endif
 
-#define FILTER_BITS 7
-
-#define SUBPEL_BITS 4
-#define SUBPEL_MASK ((1 << SUBPEL_BITS) - 1)
-#define SUBPEL_SHIFTS (1 << SUBPEL_BITS)
-#define SUBPEL_TAPS 8
-
 #define EIGHTTAP            0
 #define EIGHTTAP_SMOOTH     1
 #define EIGHTTAP_SHARP      2
@@ -36,9 +30,8 @@ extern "C" {
 // 8-tap, 8-tap-smooth, 8-tap-sharp, and switching between the three.
 #define SWITCHABLE_FILTER_CONTEXTS (SWITCHABLE_FILTERS + 1)
 #define SWITCHABLE 4 /* should be the last one */
-typedef uint8_t INTERP_FILTER;
 
-typedef int16_t InterpKernel[SUBPEL_TAPS];
+typedef uint8_t INTERP_FILTER;
 
 extern const InterpKernel *vp9_filter_kernels[4];
 
index 2aa8ee9787eb56c1aba731dec8aa41db7094c6ca..1a4ea2f04d9761f8ac42f8366b4abc4b22498958 100644 (file)
@@ -15,6 +15,9 @@
 
 #include "./vpx_config.h"
 #include "vpx_dsp/txfm_common.h"
+#if CONFIG_VP9_HIGHBITDEPTH
+#include "vpx_dsp/vpx_dsp_common.h"
+#endif  // CONFIG_VP9_HIGHBITDEPTH
 #include "vpx_ports/mem.h"
 #include "vp9/common/vp9_common.h"
 #include "vp9/common/vp9_enums.h"
index 6d38fabd1f0733a3c6a172467f2d94039489f8f2..db9971da7b2c4a9bf755d52f52fec472f7f7ea5a 100644 (file)
@@ -16,7 +16,6 @@
 #include "vpx/vpx_integer.h"
 
 #include "vp9/common/vp9_blockd.h"
-#include "vp9/common/vp9_filter.h"
 #include "vp9/common/vp9_reconinter.h"
 #include "vp9/common/vp9_reconintra.h"
 
index 7f63f5a3006e7d70f74c9f78a3917ec6a4570a41..9bc62900a7088d4b4f5933993b9c00bd268ec057 100644 (file)
 #ifndef VP9_COMMON_VP9_RECONINTER_H_
 #define VP9_COMMON_VP9_RECONINTER_H_
 
-#include "vpx/vpx_integer.h"
+#include "vp9/common/vp9_filter.h"
 #include "vp9/common/vp9_onyxc_int.h"
+#include "vpx/vpx_integer.h"
+#include "vpx_dsp/vpx_filter.h"
 
 #ifdef __cplusplus
 extern "C" {
index d87a0430bed407a4675d882435fd267410d9f41d..46f08a7bd13a57076db634da29953eeaf0646aa1 100644 (file)
@@ -11,6 +11,9 @@
 #include "./vpx_config.h"
 #include "./vpx_dsp_rtcd.h"
 
+#if CONFIG_VP9_HIGHBITDEPTH
+#include "vpx_dsp/vpx_dsp_common.h"
+#endif  // CONFIG_VP9_HIGHBITDEPTH
 #include "vpx_mem/vpx_mem.h"
 #include "vpx_ports/mem.h"
 #include "vpx_ports/vpx_once.h"
index ee99d7a5e04706c52131184df7b9c14f45583f46..883733f55e78cc7086ab480ff15f8015b742ae2f 100644 (file)
@@ -54,12 +54,6 @@ if ($opts{arch} eq "x86_64") {
   $avx2_x86_64 = 'avx2';
 }
 
-# optimizations which depend on multiple features
-$avx2_ssse3 = '';
-if ((vpx_config("HAVE_AVX2") eq "yes") && (vpx_config("HAVE_SSSE3") eq "yes")) {
-  $avx2_ssse3 = 'avx2';
-}
-
 #
 # post proc
 #
@@ -87,33 +81,6 @@ add_proto qw/void vp9_filter_by_weight8x8/, "const uint8_t *src, int src_stride,
 specialize qw/vp9_filter_by_weight8x8 sse2 msa/;
 }
 
-#
-# Sub Pixel Filters
-#
-add_proto qw/void vp9_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vp9_convolve_copy neon dspr2 msa/, "$sse2_x86inc";
-
-add_proto qw/void vp9_convolve_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vp9_convolve_avg neon dspr2 msa/, "$sse2_x86inc";
-
-add_proto qw/void vp9_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vp9_convolve8 sse2 ssse3 neon dspr2 msa/, "$avx2_ssse3";
-
-add_proto qw/void vp9_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vp9_convolve8_horiz sse2 ssse3 neon dspr2 msa/, "$avx2_ssse3";
-
-add_proto qw/void vp9_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vp9_convolve8_vert sse2 ssse3 neon dspr2 msa/, "$avx2_ssse3";
-
-add_proto qw/void vp9_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vp9_convolve8_avg sse2 ssse3 neon dspr2 msa/;
-
-add_proto qw/void vp9_convolve8_avg_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vp9_convolve8_avg_horiz sse2 ssse3 neon dspr2 msa/;
-
-add_proto qw/void vp9_convolve8_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vp9_convolve8_avg_vert sse2 ssse3 neon dspr2 msa/;
-
 #
 # dct
 #
index 6db8f9caa54cf2b1b036efd0f334a7013c32b3ce..8f5c72e7c6c067a80411a657764fcbd8adde3db0 100644 (file)
@@ -8,9 +8,10 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
-#include "./vp9_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
 #include "vp9/common/vp9_filter.h"
 #include "vp9/common/vp9_scale.h"
+#include "vpx_dsp/vpx_filter.h"
 
 static INLINE int scaled_x(int val, const struct scale_factors *sf) {
   return (int)((int64_t)val * sf->x_scale_fp >> REF_SCALE_SHIFT);
@@ -81,85 +82,85 @@ void vp9_setup_scale_factors_for_frame(struct scale_factors *sf,
   if (sf->x_step_q4 == 16) {
     if (sf->y_step_q4 == 16) {
       // No scaling in either direction.
-      sf->predict[0][0][0] = vp9_convolve_copy;
-      sf->predict[0][0][1] = vp9_convolve_avg;
-      sf->predict[0][1][0] = vp9_convolve8_vert;
-      sf->predict[0][1][1] = vp9_convolve8_avg_vert;
-      sf->predict[1][0][0] = vp9_convolve8_horiz;
-      sf->predict[1][0][1] = vp9_convolve8_avg_horiz;
+      sf->predict[0][0][0] = vpx_convolve_copy;
+      sf->predict[0][0][1] = vpx_convolve_avg;
+      sf->predict[0][1][0] = vpx_convolve8_vert;
+      sf->predict[0][1][1] = vpx_convolve8_avg_vert;
+      sf->predict[1][0][0] = vpx_convolve8_horiz;
+      sf->predict[1][0][1] = vpx_convolve8_avg_horiz;
     } else {
       // No scaling in x direction. Must always scale in the y direction.
-      sf->predict[0][0][0] = vp9_convolve8_vert;
-      sf->predict[0][0][1] = vp9_convolve8_avg_vert;
-      sf->predict[0][1][0] = vp9_convolve8_vert;
-      sf->predict[0][1][1] = vp9_convolve8_avg_vert;
-      sf->predict[1][0][0] = vp9_convolve8;
-      sf->predict[1][0][1] = vp9_convolve8_avg;
+      sf->predict[0][0][0] = vpx_convolve8_vert;
+      sf->predict[0][0][1] = vpx_convolve8_avg_vert;
+      sf->predict[0][1][0] = vpx_convolve8_vert;
+      sf->predict[0][1][1] = vpx_convolve8_avg_vert;
+      sf->predict[1][0][0] = vpx_convolve8;
+      sf->predict[1][0][1] = vpx_convolve8_avg;
     }
   } else {
     if (sf->y_step_q4 == 16) {
       // No scaling in the y direction. Must always scale in the x direction.
-      sf->predict[0][0][0] = vp9_convolve8_horiz;
-      sf->predict[0][0][1] = vp9_convolve8_avg_horiz;
-      sf->predict[0][1][0] = vp9_convolve8;
-      sf->predict[0][1][1] = vp9_convolve8_avg;
-      sf->predict[1][0][0] = vp9_convolve8_horiz;
-      sf->predict[1][0][1] = vp9_convolve8_avg_horiz;
+      sf->predict[0][0][0] = vpx_convolve8_horiz;
+      sf->predict[0][0][1] = vpx_convolve8_avg_horiz;
+      sf->predict[0][1][0] = vpx_convolve8;
+      sf->predict[0][1][1] = vpx_convolve8_avg;
+      sf->predict[1][0][0] = vpx_convolve8_horiz;
+      sf->predict[1][0][1] = vpx_convolve8_avg_horiz;
     } else {
       // Must always scale in both directions.
-      sf->predict[0][0][0] = vp9_convolve8;
-      sf->predict[0][0][1] = vp9_convolve8_avg;
-      sf->predict[0][1][0] = vp9_convolve8;
-      sf->predict[0][1][1] = vp9_convolve8_avg;
-      sf->predict[1][0][0] = vp9_convolve8;
-      sf->predict[1][0][1] = vp9_convolve8_avg;
+      sf->predict[0][0][0] = vpx_convolve8;
+      sf->predict[0][0][1] = vpx_convolve8_avg;
+      sf->predict[0][1][0] = vpx_convolve8;
+      sf->predict[0][1][1] = vpx_convolve8_avg;
+      sf->predict[1][0][0] = vpx_convolve8;
+      sf->predict[1][0][1] = vpx_convolve8_avg;
     }
   }
   // 2D subpel motion always gets filtered in both directions
-  sf->predict[1][1][0] = vp9_convolve8;
-  sf->predict[1][1][1] = vp9_convolve8_avg;
+  sf->predict[1][1][0] = vpx_convolve8;
+  sf->predict[1][1][1] = vpx_convolve8_avg;
 #if CONFIG_VP9_HIGHBITDEPTH
   if (use_highbd) {
     if (sf->x_step_q4 == 16) {
       if (sf->y_step_q4 == 16) {
         // No scaling in either direction.
-        sf->highbd_predict[0][0][0] = vp9_highbd_convolve_copy;
-        sf->highbd_predict[0][0][1] = vp9_highbd_convolve_avg;
-        sf->highbd_predict[0][1][0] = vp9_highbd_convolve8_vert;
-        sf->highbd_predict[0][1][1] = vp9_highbd_convolve8_avg_vert;
-        sf->highbd_predict[1][0][0] = vp9_highbd_convolve8_horiz;
-        sf->highbd_predict[1][0][1] = vp9_highbd_convolve8_avg_horiz;
+        sf->highbd_predict[0][0][0] = vpx_highbd_convolve_copy;
+        sf->highbd_predict[0][0][1] = vpx_highbd_convolve_avg;
+        sf->highbd_predict[0][1][0] = vpx_highbd_convolve8_vert;
+        sf->highbd_predict[0][1][1] = vpx_highbd_convolve8_avg_vert;
+        sf->highbd_predict[1][0][0] = vpx_highbd_convolve8_horiz;
+        sf->highbd_predict[1][0][1] = vpx_highbd_convolve8_avg_horiz;
       } else {
         // No scaling in x direction. Must always scale in the y direction.
-        sf->highbd_predict[0][0][0] = vp9_highbd_convolve8_vert;
-        sf->highbd_predict[0][0][1] = vp9_highbd_convolve8_avg_vert;
-        sf->highbd_predict[0][1][0] = vp9_highbd_convolve8_vert;
-        sf->highbd_predict[0][1][1] = vp9_highbd_convolve8_avg_vert;
-        sf->highbd_predict[1][0][0] = vp9_highbd_convolve8;
-        sf->highbd_predict[1][0][1] = vp9_highbd_convolve8_avg;
+        sf->highbd_predict[0][0][0] = vpx_highbd_convolve8_vert;
+        sf->highbd_predict[0][0][1] = vpx_highbd_convolve8_avg_vert;
+        sf->highbd_predict[0][1][0] = vpx_highbd_convolve8_vert;
+        sf->highbd_predict[0][1][1] = vpx_highbd_convolve8_avg_vert;
+        sf->highbd_predict[1][0][0] = vpx_highbd_convolve8;
+        sf->highbd_predict[1][0][1] = vpx_highbd_convolve8_avg;
       }
     } else {
       if (sf->y_step_q4 == 16) {
         // No scaling in the y direction. Must always scale in the x direction.
-        sf->highbd_predict[0][0][0] = vp9_highbd_convolve8_horiz;
-        sf->highbd_predict[0][0][1] = vp9_highbd_convolve8_avg_horiz;
-        sf->highbd_predict[0][1][0] = vp9_highbd_convolve8;
-        sf->highbd_predict[0][1][1] = vp9_highbd_convolve8_avg;
-        sf->highbd_predict[1][0][0] = vp9_highbd_convolve8_horiz;
-        sf->highbd_predict[1][0][1] = vp9_highbd_convolve8_avg_horiz;
+        sf->highbd_predict[0][0][0] = vpx_highbd_convolve8_horiz;
+        sf->highbd_predict[0][0][1] = vpx_highbd_convolve8_avg_horiz;
+        sf->highbd_predict[0][1][0] = vpx_highbd_convolve8;
+        sf->highbd_predict[0][1][1] = vpx_highbd_convolve8_avg;
+        sf->highbd_predict[1][0][0] = vpx_highbd_convolve8_horiz;
+        sf->highbd_predict[1][0][1] = vpx_highbd_convolve8_avg_horiz;
       } else {
         // Must always scale in both directions.
-        sf->highbd_predict[0][0][0] = vp9_highbd_convolve8;
-        sf->highbd_predict[0][0][1] = vp9_highbd_convolve8_avg;
-        sf->highbd_predict[0][1][0] = vp9_highbd_convolve8;
-        sf->highbd_predict[0][1][1] = vp9_highbd_convolve8_avg;
-        sf->highbd_predict[1][0][0] = vp9_highbd_convolve8;
-        sf->highbd_predict[1][0][1] = vp9_highbd_convolve8_avg;
+        sf->highbd_predict[0][0][0] = vpx_highbd_convolve8;
+        sf->highbd_predict[0][0][1] = vpx_highbd_convolve8_avg;
+        sf->highbd_predict[0][1][0] = vpx_highbd_convolve8;
+        sf->highbd_predict[0][1][1] = vpx_highbd_convolve8_avg;
+        sf->highbd_predict[1][0][0] = vpx_highbd_convolve8;
+        sf->highbd_predict[1][0][1] = vpx_highbd_convolve8_avg;
       }
     }
     // 2D subpel motion always gets filtered in both directions.
-    sf->highbd_predict[1][1][0] = vp9_highbd_convolve8;
-    sf->highbd_predict[1][1][1] = vp9_highbd_convolve8_avg;
+    sf->highbd_predict[1][1][0] = vpx_highbd_convolve8;
+    sf->highbd_predict[1][1][1] = vpx_highbd_convolve8_avg;
   }
 #endif
 }
index a1601a72fbebbecd4730bb3dfaef6811a3be70b5..5e910410797990c6f155e6ce8a54197260c1c820 100644 (file)
@@ -12,7 +12,7 @@
 #define VP9_COMMON_VP9_SCALE_H_
 
 #include "vp9/common/vp9_mv.h"
-#include "vp9/common/vp9_convolve.h"
+#include "vpx_dsp/vpx_convolve.h"
 
 #ifdef __cplusplus
 extern "C" {
index c6d3bf19ea3331ed4c92311144b87472c4d45ae9..ecebe1efb1fc28041f8472df368f1d7e8e824d16 100644 (file)
@@ -12,6 +12,7 @@
 #include <stdlib.h>  // qsort()
 
 #include "./vp9_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
 #include "./vpx_scale_rtcd.h"
 
 #include "vpx_dsp/bitreader_buffer.h"
index b8629bd3bb5780265a2942a1cb34953bb055d0be..fc3eac6c7b328e5c1db764ed932c1390102e9b11 100644 (file)
@@ -8,12 +8,14 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
-#include "./vpx_config.h"
 #include "./vp9_rtcd.h"
+#include "./vpx_config.h"
+#include "./vpx_dsp_rtcd.h"
 #include "vp9/common/vp9_common.h"
-#include "vp9/common/vp9_convolve.h"
 #include "vp9/common/vp9_filter.h"
 #include "vpx/vpx_integer.h"
+#include "vpx_dsp/vpx_convolve.h"
+#include "vpx_dsp/vpx_filter.h"
 #include "vpx_ports/mem.h"
 
 static int horizontal_filter(const uint8_t *s) {
index 08134e152aa64258e4a82f3941209340a1cbf326..f1d73790a773146dc757daee4d9db97b7bf6ab65 100644 (file)
@@ -10,6 +10,7 @@
 
 #include <assert.h>
 #include <limits.h>
+#include "./vpx_dsp_rtcd.h"
 #include "vpx_scale/yv12config.h"
 #include "vpx/vpx_integer.h"
 #include "vp9/common/vp9_reconinter.h"
@@ -336,12 +337,12 @@ void vp9_denoiser_denoise(VP9_DENOISER *denoiser, MACROBLOCK *mb,
   }
 
   if (decision == FILTER_BLOCK) {
-    vp9_convolve_copy(avg_start, avg.y_stride, src.buf, src.stride,
+    vpx_convolve_copy(avg_start, avg.y_stride, src.buf, src.stride,
                       NULL, 0, NULL, 0,
                       num_4x4_blocks_wide_lookup[bs] << 2,
                       num_4x4_blocks_high_lookup[bs] << 2);
   } else {  // COPY_BLOCK
-    vp9_convolve_copy(src.buf, src.stride, avg_start, avg.y_stride,
+    vpx_convolve_copy(src.buf, src.stride, avg_start, avg.y_stride,
                       NULL, 0, NULL, 0,
                       num_4x4_blocks_wide_lookup[bs] << 2,
                       num_4x4_blocks_high_lookup[bs] << 2);
index 53e747c260078232c1e6756b563114e66a54b6d0..8aee22756b21614d62f4d0f37ff0ab6ded7cdb64 100644 (file)
 #include <stdio.h>
 #include <limits.h>
 
-#include "./vpx_config.h"
 #include "./vp9_rtcd.h"
+#include "./vpx_config.h"
 #include "./vpx_dsp_rtcd.h"
 #include "./vpx_scale_rtcd.h"
 #include "vpx/internal/vpx_psnr.h"
+#include "vpx_dsp/vpx_filter.h"
 #include "vpx_ports/mem.h"
 #include "vpx_ports/vpx_timer.h"
 #include "vpx_scale/vpx_scale.h"
@@ -2580,18 +2581,18 @@ static void scale_and_extend_frame(const YV12_BUFFER_CONFIG *src,
 
 #if CONFIG_VP9_HIGHBITDEPTH
         if (src->flags & YV12_FLAG_HIGHBITDEPTH) {
-          vp9_highbd_convolve8(src_ptr, src_stride, dst_ptr, dst_stride,
+          vpx_highbd_convolve8(src_ptr, src_stride, dst_ptr, dst_stride,
                                kernel[x_q4 & 0xf], 16 * src_w / dst_w,
                                kernel[y_q4 & 0xf], 16 * src_h / dst_h,
                                16 / factor, 16 / factor, bd);
         } else {
-          vp9_convolve8(src_ptr, src_stride, dst_ptr, dst_stride,
+          vpx_convolve8(src_ptr, src_stride, dst_ptr, dst_stride,
                         kernel[x_q4 & 0xf], 16 * src_w / dst_w,
                         kernel[y_q4 & 0xf], 16 * src_h / dst_h,
                         16 / factor, 16 / factor);
         }
 #else
-        vp9_convolve8(src_ptr, src_stride, dst_ptr, dst_stride,
+        vpx_convolve8(src_ptr, src_stride, dst_ptr, dst_stride,
                       kernel[x_q4 & 0xf], 16 * src_w / dst_w,
                       kernel[y_q4 & 0xf], 16 * src_h / dst_h,
                       16 / factor, 16 / factor);
index 34bf8a6a1544ae80e78ae82126388cd1f095651d..00f7dcdf3a0ea3c65170aa166522693663dadede 100644 (file)
@@ -1504,15 +1504,15 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
         this_mode_pred = &tmp[get_pred_buffer(tmp, 3)];
 #if CONFIG_VP9_HIGHBITDEPTH
         if (cm->use_highbitdepth)
-          vp9_highbd_convolve_copy(best_pred->data, best_pred->stride,
+          vpx_highbd_convolve_copy(best_pred->data, best_pred->stride,
                                    this_mode_pred->data, this_mode_pred->stride,
                                    NULL, 0, NULL, 0, bw, bh, xd->bd);
         else
-          vp9_convolve_copy(best_pred->data, best_pred->stride,
+          vpx_convolve_copy(best_pred->data, best_pred->stride,
                           this_mode_pred->data, this_mode_pred->stride,
                           NULL, 0, NULL, 0, bw, bh);
 #else
-        vp9_convolve_copy(best_pred->data, best_pred->stride,
+        vpx_convolve_copy(best_pred->data, best_pred->stride,
                           this_mode_pred->data, this_mode_pred->stride,
                           NULL, 0, NULL, 0, bw, bh);
 #endif  // CONFIG_VP9_HIGHBITDEPTH
@@ -1577,15 +1577,15 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
     if (best_pred->data != orig_dst.buf && is_inter_mode(mbmi->mode)) {
 #if CONFIG_VP9_HIGHBITDEPTH
       if (cm->use_highbitdepth)
-        vp9_highbd_convolve_copy(best_pred->data, best_pred->stride,
+        vpx_highbd_convolve_copy(best_pred->data, best_pred->stride,
                                  pd->dst.buf, pd->dst.stride, NULL, 0,
                                  NULL, 0, bw, bh, xd->bd);
       else
-        vp9_convolve_copy(best_pred->data, best_pred->stride,
+        vpx_convolve_copy(best_pred->data, best_pred->stride,
                           pd->dst.buf, pd->dst.stride, NULL, 0,
                           NULL, 0, bw, bh);
 #else
-      vp9_convolve_copy(best_pred->data, best_pred->stride,
+      vpx_convolve_copy(best_pred->data, best_pred->stride,
                         pd->dst.buf, pd->dst.stride, NULL, 0,
                         NULL, 0, bw, bh);
 #endif  // CONFIG_VP9_HIGHBITDEPTH
index f46cad80491a83ba0ac77bedff05ac789c85edde..e5ae9594cf46c53905d134c4b29032ffeee25db7 100644 (file)
@@ -15,6 +15,9 @@
 #include <stdlib.h>
 #include <string.h>
 
+#if CONFIG_VP9_HIGHBITDEPTH
+#include "vpx_dsp/vpx_dsp_common.h"
+#endif  // CONFIG_VP9_HIGHBITDEPTH
 #include "vpx_ports/mem.h"
 #include "vp9/common/vp9_common.h"
 #include "vp9/encoder/vp9_resize.h"
index 78ea63fa1fb7b1eda200eee35fb07888ee74d284..339bc8b6e28cc754fd42b7a695ec17988a8f2ba0 100644 (file)
@@ -13,14 +13,10 @@ VP9_COMMON_SRCS-yes += vp9_iface_common.h
 VP9_COMMON_SRCS-yes += common/vp9_ppflags.h
 VP9_COMMON_SRCS-yes += common/vp9_alloccommon.c
 VP9_COMMON_SRCS-yes += common/vp9_blockd.c
-VP9_COMMON_SRCS-yes += common/vp9_convolve.c
-VP9_COMMON_SRCS-yes += common/vp9_convolve.h
 VP9_COMMON_SRCS-yes += common/vp9_debugmodes.c
 VP9_COMMON_SRCS-yes += common/vp9_entropy.c
 VP9_COMMON_SRCS-yes += common/vp9_entropymode.c
 VP9_COMMON_SRCS-yes += common/vp9_entropymv.c
-VP9_COMMON_SRCS-yes += common/vp9_filter.c
-VP9_COMMON_SRCS-yes += common/vp9_filter.h
 VP9_COMMON_SRCS-yes += common/vp9_frame_buffers.c
 VP9_COMMON_SRCS-yes += common/vp9_frame_buffers.h
 VP9_COMMON_SRCS-yes += common/vp9_idct.c
@@ -31,6 +27,8 @@ VP9_COMMON_SRCS-yes += common/vp9_entropy.h
 VP9_COMMON_SRCS-yes += common/vp9_entropymode.h
 VP9_COMMON_SRCS-yes += common/vp9_entropymv.h
 VP9_COMMON_SRCS-yes += common/vp9_enums.h
+VP9_COMMON_SRCS-yes += common/vp9_filter.h
+VP9_COMMON_SRCS-yes += common/vp9_filter.c
 VP9_COMMON_SRCS-yes += common/vp9_idct.h
 VP9_COMMON_SRCS-yes += common/vp9_loopfilter.h
 VP9_COMMON_SRCS-yes += common/vp9_thread_common.h
@@ -64,33 +62,16 @@ VP9_COMMON_SRCS-yes += common/vp9_common_data.h
 VP9_COMMON_SRCS-yes += common/vp9_scan.c
 VP9_COMMON_SRCS-yes += common/vp9_scan.h
 
-VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/convolve.h
-VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp9_asm_stubs.c
 VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/vp9_postproc.h
 VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/vp9_postproc.c
 VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/vp9_mfqe.h
 VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/vp9_mfqe.c
 VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_idct_sse2.asm
-VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_subpixel_8t_sse2.asm
-VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_subpixel_bilinear_sse2.asm
-VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp9_subpixel_8t_ssse3.asm
-VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp9_subpixel_bilinear_ssse3.asm
-VP9_COMMON_SRCS-$(HAVE_AVX2) += common/x86/vp9_subpixel_8t_intrin_avx2.c
-VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp9_subpixel_8t_intrin_ssse3.c
 ifeq ($(CONFIG_VP9_POSTPROC),yes)
 VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_mfqe_sse2.asm
 VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_postproc_sse2.asm
 endif
 
-ifeq ($(CONFIG_USE_X86INC),yes)
-VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_copy_sse2.asm
-endif
-
-ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
-VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_high_subpixel_8t_sse2.asm
-VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_high_subpixel_bilinear_sse2.asm
-endif
-
 # common (c)
 VP9_COMMON_SRCS-$(HAVE_DSPR2)  += common/mips/dspr2/vp9_common_dspr2.h
 VP9_COMMON_SRCS-$(HAVE_DSPR2)  += common/mips/dspr2/vp9_convolve2_avg_dspr2.c
@@ -113,15 +94,6 @@ VP9_COMMON_SRCS-$(HAVE_DSPR2)  += common/mips/dspr2/vp9_itrans32_dspr2.c
 endif
 
 # common (msa)
-VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_convolve8_avg_horiz_msa.c
-VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_convolve8_avg_msa.c
-VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_convolve8_avg_vert_msa.c
-VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_convolve8_horiz_msa.c
-VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_convolve8_msa.c
-VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_convolve8_vert_msa.c
-VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_convolve_avg_msa.c
-VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_convolve_copy_msa.c
-VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_convolve_msa.h
 VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct4x4_msa.c
 VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct8x8_msa.c
 VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct16x16_msa.c
@@ -151,11 +123,6 @@ endif
 # neon with assembly and intrinsics implementations. If both are available
 # prefer assembly.
 ifeq ($(HAVE_NEON_ASM), yes)
-VP9_COMMON_SRCS-yes += common/arm/neon/vp9_convolve8_avg_neon_asm$(ASM)
-VP9_COMMON_SRCS-yes += common/arm/neon/vp9_convolve8_neon_asm$(ASM)
-VP9_COMMON_SRCS-yes += common/arm/neon/vp9_convolve_avg_neon_asm$(ASM)
-VP9_COMMON_SRCS-yes += common/arm/neon/vp9_convolve_neon.c
-VP9_COMMON_SRCS-yes += common/arm/neon/vp9_copy_neon_asm$(ASM)
 VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct16x16_1_add_neon_asm$(ASM)
 VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct16x16_add_neon_asm$(ASM)
 VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct16x16_neon.c
@@ -167,11 +134,6 @@ VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct8x8_1_add_neon_asm$(ASM)
 VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct8x8_add_neon_asm$(ASM)
 else
 ifeq ($(HAVE_NEON), yes)
-VP9_COMMON_SRCS-yes += common/arm/neon/vp9_convolve8_avg_neon.c
-VP9_COMMON_SRCS-yes += common/arm/neon/vp9_convolve8_neon.c
-VP9_COMMON_SRCS-yes += common/arm/neon/vp9_convolve_avg_neon.c
-VP9_COMMON_SRCS-yes += common/arm/neon/vp9_convolve_neon.c
-VP9_COMMON_SRCS-yes += common/arm/neon/vp9_copy_neon.c
 VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct16x16_1_add_neon.c
 VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct16x16_add_neon.c
 VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct16x16_neon.c
diff --git a/vpx_dsp/arm/vpx_convolve8_avg_neon.c b/vpx_dsp/arm/vpx_convolve8_avg_neon.c
new file mode 100644 (file)
index 0000000..5464250
--- /dev/null
@@ -0,0 +1,393 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+
+#include "./vpx_config.h"
+#include "./vpx_dsp_rtcd.h"
+#include "vpx/vpx_integer.h"
+#include "vpx_ports/mem.h"
+
+void vpx_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
+                               uint8_t *dst, ptrdiff_t dst_stride,
+                               const int16_t *filter_x, int x_step_q4,
+                               const int16_t *filter_y, int y_step_q4,
+                               int w, int h);
+void vpx_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
+                               uint8_t *dst, ptrdiff_t dst_stride,
+                               const int16_t *filter_x, int x_step_q4,
+                               const int16_t *filter_y, int y_step_q4,
+                               int w, int h);
+
+static INLINE int32x4_t MULTIPLY_BY_Q0(
+    int16x4_t dsrc0,
+    int16x4_t dsrc1,
+    int16x4_t dsrc2,
+    int16x4_t dsrc3,
+    int16x4_t dsrc4,
+    int16x4_t dsrc5,
+    int16x4_t dsrc6,
+    int16x4_t dsrc7,
+    int16x8_t q0s16) {
+  int32x4_t qdst;
+  int16x4_t d0s16, d1s16;
+
+  d0s16 = vget_low_s16(q0s16);
+  d1s16 = vget_high_s16(q0s16);
+
+  qdst = vmull_lane_s16(dsrc0, d0s16, 0);
+  qdst = vmlal_lane_s16(qdst, dsrc1, d0s16, 1);
+  qdst = vmlal_lane_s16(qdst, dsrc2, d0s16, 2);
+  qdst = vmlal_lane_s16(qdst, dsrc3, d0s16, 3);
+  qdst = vmlal_lane_s16(qdst, dsrc4, d1s16, 0);
+  qdst = vmlal_lane_s16(qdst, dsrc5, d1s16, 1);
+  qdst = vmlal_lane_s16(qdst, dsrc6, d1s16, 2);
+  qdst = vmlal_lane_s16(qdst, dsrc7, d1s16, 3);
+  return qdst;
+}
+
+void vpx_convolve8_avg_horiz_neon(
+    const uint8_t *src,
+    ptrdiff_t src_stride,
+    uint8_t *dst,
+    ptrdiff_t dst_stride,
+    const int16_t *filter_x,
+    int x_step_q4,
+    const int16_t *filter_y,  // unused
+    int y_step_q4,            // unused
+    int w,
+    int h) {
+  int width;
+  const uint8_t *s;
+  uint8_t *d;
+  uint8x8_t d2u8, d3u8, d24u8, d25u8, d26u8, d27u8, d28u8, d29u8;
+  uint32x2_t d2u32, d3u32, d6u32, d7u32, d28u32, d29u32, d30u32, d31u32;
+  uint8x16_t q1u8, q3u8, q12u8, q13u8, q14u8, q15u8;
+  int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d22s16, d23s16;
+  int16x4_t d24s16, d25s16, d26s16, d27s16;
+  uint16x4_t d2u16, d3u16, d4u16, d5u16, d16u16, d17u16, d18u16, d19u16;
+  int16x8_t q0s16;
+  uint16x8_t q1u16, q2u16, q8u16, q9u16, q10u16, q11u16, q12u16, q13u16;
+  int32x4_t q1s32, q2s32, q14s32, q15s32;
+  uint16x8x2_t q0x2u16;
+  uint8x8x2_t d0x2u8, d1x2u8;
+  uint32x2x2_t d0x2u32;
+  uint16x4x2_t d0x2u16, d1x2u16;
+  uint32x4x2_t q0x2u32;
+
+  if (x_step_q4 != 16) {
+    vpx_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride,
+                              filter_x, x_step_q4,
+                              filter_y, y_step_q4, w, h);
+    return;
+}
+
+  q0s16 = vld1q_s16(filter_x);
+
+  src -= 3;  // adjust for taps
+  for (; h > 0; h -= 4) {  // loop_horiz_v
+    s = src;
+    d24u8 = vld1_u8(s);
+    s += src_stride;
+    d25u8 = vld1_u8(s);
+    s += src_stride;
+    d26u8 = vld1_u8(s);
+    s += src_stride;
+    d27u8 = vld1_u8(s);
+
+    q12u8 = vcombine_u8(d24u8, d25u8);
+    q13u8 = vcombine_u8(d26u8, d27u8);
+
+    q0x2u16 = vtrnq_u16(vreinterpretq_u16_u8(q12u8),
+                        vreinterpretq_u16_u8(q13u8));
+    d24u8 = vreinterpret_u8_u16(vget_low_u16(q0x2u16.val[0]));
+    d25u8 = vreinterpret_u8_u16(vget_high_u16(q0x2u16.val[0]));
+    d26u8 = vreinterpret_u8_u16(vget_low_u16(q0x2u16.val[1]));
+    d27u8 = vreinterpret_u8_u16(vget_high_u16(q0x2u16.val[1]));
+    d0x2u8 = vtrn_u8(d24u8, d25u8);
+    d1x2u8 = vtrn_u8(d26u8, d27u8);
+
+    __builtin_prefetch(src + src_stride * 4);
+    __builtin_prefetch(src + src_stride * 5);
+
+    q8u16 = vmovl_u8(d0x2u8.val[0]);
+    q9u16 = vmovl_u8(d0x2u8.val[1]);
+    q10u16 = vmovl_u8(d1x2u8.val[0]);
+    q11u16 = vmovl_u8(d1x2u8.val[1]);
+
+    src += 7;
+    d16u16 = vget_low_u16(q8u16);
+    d17u16 = vget_high_u16(q8u16);
+    d18u16 = vget_low_u16(q9u16);
+    d19u16 = vget_high_u16(q9u16);
+    q8u16 = vcombine_u16(d16u16, d18u16);  // vswp 17 18
+    q9u16 = vcombine_u16(d17u16, d19u16);
+
+    d20s16 = vreinterpret_s16_u16(vget_low_u16(q10u16));
+    d23s16 = vreinterpret_s16_u16(vget_high_u16(q10u16));  // vmov 23 21
+    for (width = w;
+         width > 0;
+         width -= 4, src += 4, dst += 4) {  // loop_horiz
+      s = src;
+      d28u32 = vld1_dup_u32((const uint32_t *)s);
+      s += src_stride;
+      d29u32 = vld1_dup_u32((const uint32_t *)s);
+      s += src_stride;
+      d31u32 = vld1_dup_u32((const uint32_t *)s);
+      s += src_stride;
+      d30u32 = vld1_dup_u32((const uint32_t *)s);
+
+      __builtin_prefetch(src + 64);
+
+      d0x2u16 = vtrn_u16(vreinterpret_u16_u32(d28u32),
+                         vreinterpret_u16_u32(d31u32));
+      d1x2u16 = vtrn_u16(vreinterpret_u16_u32(d29u32),
+                         vreinterpret_u16_u32(d30u32));
+      d0x2u8 = vtrn_u8(vreinterpret_u8_u16(d0x2u16.val[0]),   // d28
+                       vreinterpret_u8_u16(d1x2u16.val[0]));  // d29
+      d1x2u8 = vtrn_u8(vreinterpret_u8_u16(d0x2u16.val[1]),   // d31
+                       vreinterpret_u8_u16(d1x2u16.val[1]));  // d30
+
+      __builtin_prefetch(src + 64 + src_stride);
+
+      q14u8 = vcombine_u8(d0x2u8.val[0], d0x2u8.val[1]);
+      q15u8 = vcombine_u8(d1x2u8.val[1], d1x2u8.val[0]);
+      q0x2u32 = vtrnq_u32(vreinterpretq_u32_u8(q14u8),
+                          vreinterpretq_u32_u8(q15u8));
+
+      d28u8 = vreinterpret_u8_u32(vget_low_u32(q0x2u32.val[0]));
+      d29u8 = vreinterpret_u8_u32(vget_high_u32(q0x2u32.val[0]));
+      q12u16 = vmovl_u8(d28u8);
+      q13u16 = vmovl_u8(d29u8);
+
+      __builtin_prefetch(src + 64 + src_stride * 2);
+
+      d = dst;
+      d6u32 = vld1_lane_u32((const uint32_t *)d, d6u32, 0);
+      d += dst_stride;
+      d7u32 = vld1_lane_u32((const uint32_t *)d, d7u32, 0);
+      d += dst_stride;
+      d6u32 = vld1_lane_u32((const uint32_t *)d, d6u32, 1);
+      d += dst_stride;
+      d7u32 = vld1_lane_u32((const uint32_t *)d, d7u32, 1);
+
+      d16s16 = vreinterpret_s16_u16(vget_low_u16(q8u16));
+      d17s16 = vreinterpret_s16_u16(vget_high_u16(q8u16));
+      d18s16 = vreinterpret_s16_u16(vget_low_u16(q9u16));
+      d19s16 = vreinterpret_s16_u16(vget_high_u16(q9u16));
+      d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));
+      d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));
+      d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));
+      d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16));
+      d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16));
+
+      q1s32  = MULTIPLY_BY_Q0(d16s16, d17s16, d20s16, d22s16,
+                              d18s16, d19s16, d23s16, d24s16, q0s16);
+      q2s32  = MULTIPLY_BY_Q0(d17s16, d20s16, d22s16, d18s16,
+                              d19s16, d23s16, d24s16, d26s16, q0s16);
+      q14s32 = MULTIPLY_BY_Q0(d20s16, d22s16, d18s16, d19s16,
+                              d23s16, d24s16, d26s16, d27s16, q0s16);
+      q15s32 = MULTIPLY_BY_Q0(d22s16, d18s16, d19s16, d23s16,
+                              d24s16, d26s16, d27s16, d25s16, q0s16);
+
+      __builtin_prefetch(src + 64 + src_stride * 3);
+
+      d2u16 = vqrshrun_n_s32(q1s32, 7);
+      d3u16 = vqrshrun_n_s32(q2s32, 7);
+      d4u16 = vqrshrun_n_s32(q14s32, 7);
+      d5u16 = vqrshrun_n_s32(q15s32, 7);
+
+      q1u16 = vcombine_u16(d2u16, d3u16);
+      q2u16 = vcombine_u16(d4u16, d5u16);
+
+      d2u8 = vqmovn_u16(q1u16);
+      d3u8 = vqmovn_u16(q2u16);
+
+      d0x2u16 = vtrn_u16(vreinterpret_u16_u8(d2u8),
+                         vreinterpret_u16_u8(d3u8));
+      d0x2u32 = vtrn_u32(vreinterpret_u32_u16(d0x2u16.val[0]),
+                         vreinterpret_u32_u16(d0x2u16.val[1]));
+      d0x2u8 = vtrn_u8(vreinterpret_u8_u32(d0x2u32.val[0]),
+                       vreinterpret_u8_u32(d0x2u32.val[1]));
+
+      q1u8 = vcombine_u8(d0x2u8.val[0], d0x2u8.val[1]);
+      q3u8 = vreinterpretq_u8_u32(vcombine_u32(d6u32, d7u32));
+
+      q1u8 = vrhaddq_u8(q1u8, q3u8);
+
+      d2u32 = vreinterpret_u32_u8(vget_low_u8(q1u8));
+      d3u32 = vreinterpret_u32_u8(vget_high_u8(q1u8));
+
+      d = dst;
+      vst1_lane_u32((uint32_t *)d, d2u32, 0);
+      d += dst_stride;
+      vst1_lane_u32((uint32_t *)d, d3u32, 0);
+      d += dst_stride;
+      vst1_lane_u32((uint32_t *)d, d2u32, 1);
+      d += dst_stride;
+      vst1_lane_u32((uint32_t *)d, d3u32, 1);
+
+      q8u16 = q9u16;
+      d20s16 = d23s16;
+      q11u16 = q12u16;
+      q9u16 = q13u16;
+      d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16));
+    }
+    src += src_stride * 4 - w - 7;
+    dst += dst_stride * 4 - w;
+  }
+  return;
+}
+
+void vpx_convolve8_avg_vert_neon(
+    const uint8_t *src,
+    ptrdiff_t src_stride,
+    uint8_t *dst,
+    ptrdiff_t dst_stride,
+    const int16_t *filter_x,  // unused
+    int x_step_q4,            // unused
+    const int16_t *filter_y,
+    int y_step_q4,
+    int w,
+    int h) {
+  int height;
+  const uint8_t *s;
+  uint8_t *d;
+  uint8x8_t d2u8, d3u8;
+  uint32x2_t d2u32, d3u32, d6u32, d7u32;
+  uint32x2_t d16u32, d18u32, d20u32, d22u32, d24u32, d26u32;
+  uint8x16_t q1u8, q3u8;
+  int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16;
+  int16x4_t d24s16, d25s16, d26s16, d27s16;
+  uint16x4_t d2u16, d3u16, d4u16, d5u16;
+  int16x8_t q0s16;
+  uint16x8_t q1u16, q2u16, q8u16, q9u16, q10u16, q11u16, q12u16, q13u16;
+  int32x4_t q1s32, q2s32, q14s32, q15s32;
+
+  if (y_step_q4 != 16) {
+    vpx_convolve8_avg_vert_c(src, src_stride, dst, dst_stride,
+                             filter_x, x_step_q4,
+                             filter_y, y_step_q4, w, h);
+    return;
+  }
+
+  src -= src_stride * 3;
+  q0s16 = vld1q_s16(filter_y);
+  for (; w > 0; w -= 4, src += 4, dst += 4) {  // loop_vert_h
+    s = src;
+    d16u32 = vld1_lane_u32((const uint32_t *)s, d16u32, 0);
+    s += src_stride;
+    d16u32 = vld1_lane_u32((const uint32_t *)s, d16u32, 1);
+    s += src_stride;
+    d18u32 = vld1_lane_u32((const uint32_t *)s, d18u32, 0);
+    s += src_stride;
+    d18u32 = vld1_lane_u32((const uint32_t *)s, d18u32, 1);
+    s += src_stride;
+    d20u32 = vld1_lane_u32((const uint32_t *)s, d20u32, 0);
+    s += src_stride;
+    d20u32 = vld1_lane_u32((const uint32_t *)s, d20u32, 1);
+    s += src_stride;
+    d22u32 = vld1_lane_u32((const uint32_t *)s, d22u32, 0);
+    s += src_stride;
+
+    q8u16  = vmovl_u8(vreinterpret_u8_u32(d16u32));
+    q9u16  = vmovl_u8(vreinterpret_u8_u32(d18u32));
+    q10u16 = vmovl_u8(vreinterpret_u8_u32(d20u32));
+    q11u16 = vmovl_u8(vreinterpret_u8_u32(d22u32));
+
+    d18s16 = vreinterpret_s16_u16(vget_low_u16(q9u16));
+    d19s16 = vreinterpret_s16_u16(vget_high_u16(q9u16));
+    d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));
+    d = dst;
+    for (height = h; height > 0; height -= 4) {  // loop_vert
+      d24u32 = vld1_lane_u32((const uint32_t *)s, d24u32, 0);
+      s += src_stride;
+      d26u32 = vld1_lane_u32((const uint32_t *)s, d26u32, 0);
+      s += src_stride;
+      d26u32 = vld1_lane_u32((const uint32_t *)s, d26u32, 1);
+      s += src_stride;
+      d24u32 = vld1_lane_u32((const uint32_t *)s, d24u32, 1);
+      s += src_stride;
+
+      q12u16 = vmovl_u8(vreinterpret_u8_u32(d24u32));
+      q13u16 = vmovl_u8(vreinterpret_u8_u32(d26u32));
+
+      d6u32 = vld1_lane_u32((const uint32_t *)d, d6u32, 0);
+      d += dst_stride;
+      d6u32 = vld1_lane_u32((const uint32_t *)d, d6u32, 1);
+      d += dst_stride;
+      d7u32 = vld1_lane_u32((const uint32_t *)d, d7u32, 0);
+      d += dst_stride;
+      d7u32 = vld1_lane_u32((const uint32_t *)d, d7u32, 1);
+      d -= dst_stride * 3;
+
+      d16s16 = vreinterpret_s16_u16(vget_low_u16(q8u16));
+      d17s16 = vreinterpret_s16_u16(vget_high_u16(q8u16));
+      d20s16 = vreinterpret_s16_u16(vget_low_u16(q10u16));
+      d21s16 = vreinterpret_s16_u16(vget_high_u16(q10u16));
+      d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));
+      d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));
+      d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16));
+      d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16));
+
+      __builtin_prefetch(s);
+      __builtin_prefetch(s + src_stride);
+      q1s32  = MULTIPLY_BY_Q0(d16s16, d17s16, d18s16, d19s16,
+                              d20s16, d21s16, d22s16, d24s16, q0s16);
+      __builtin_prefetch(s + src_stride * 2);
+      __builtin_prefetch(s + src_stride * 3);
+      q2s32  = MULTIPLY_BY_Q0(d17s16, d18s16, d19s16, d20s16,
+                              d21s16, d22s16, d24s16, d26s16, q0s16);
+      __builtin_prefetch(d);
+      __builtin_prefetch(d + dst_stride);
+      q14s32 = MULTIPLY_BY_Q0(d18s16, d19s16, d20s16, d21s16,
+                              d22s16, d24s16, d26s16, d27s16, q0s16);
+      __builtin_prefetch(d + dst_stride * 2);
+      __builtin_prefetch(d + dst_stride * 3);
+      q15s32 = MULTIPLY_BY_Q0(d19s16, d20s16, d21s16, d22s16,
+                              d24s16, d26s16, d27s16, d25s16, q0s16);
+
+      d2u16 = vqrshrun_n_s32(q1s32, 7);
+      d3u16 = vqrshrun_n_s32(q2s32, 7);
+      d4u16 = vqrshrun_n_s32(q14s32, 7);
+      d5u16 = vqrshrun_n_s32(q15s32, 7);
+
+      q1u16 = vcombine_u16(d2u16, d3u16);
+      q2u16 = vcombine_u16(d4u16, d5u16);
+
+      d2u8 = vqmovn_u16(q1u16);
+      d3u8 = vqmovn_u16(q2u16);
+
+      q1u8 = vcombine_u8(d2u8, d3u8);
+      q3u8 = vreinterpretq_u8_u32(vcombine_u32(d6u32, d7u32));
+
+      q1u8 = vrhaddq_u8(q1u8, q3u8);
+
+      d2u32 = vreinterpret_u32_u8(vget_low_u8(q1u8));
+      d3u32 = vreinterpret_u32_u8(vget_high_u8(q1u8));
+
+      vst1_lane_u32((uint32_t *)d, d2u32, 0);
+      d += dst_stride;
+      vst1_lane_u32((uint32_t *)d, d2u32, 1);
+      d += dst_stride;
+      vst1_lane_u32((uint32_t *)d, d3u32, 0);
+      d += dst_stride;
+      vst1_lane_u32((uint32_t *)d, d3u32, 1);
+      d += dst_stride;
+
+      q8u16 = q10u16;
+      d18s16 = d22s16;
+      d19s16 = d24s16;
+      q10u16 = q13u16;
+      d22s16 = d25s16;
+    }
+  }
+  return;
+}
similarity index 92%
rename from vp9/common/arm/neon/vp9_convolve8_avg_neon_asm.asm
rename to vpx_dsp/arm/vpx_convolve8_avg_neon_asm.asm
index 4d85846f0a9cbf31d567ba7ac754f08455bb9598..a19f97db7af2504e9f8ce7c03011b9c8e5d6998e 100644 (file)
     ; VP9_FILTER_WEIGHT == 128
     ; VP9_FILTER_SHIFT == 7
 
-    EXPORT  |vp9_convolve8_avg_horiz_neon|
-    EXPORT  |vp9_convolve8_avg_vert_neon|
-    IMPORT  |vp9_convolve8_avg_horiz_c|
-    IMPORT  |vp9_convolve8_avg_vert_c|
+    EXPORT  |vpx_convolve8_avg_horiz_neon|
+    EXPORT  |vpx_convolve8_avg_vert_neon|
+    IMPORT  |vpx_convolve8_avg_horiz_c|
+    IMPORT  |vpx_convolve8_avg_vert_c|
     ARM
     REQUIRE8
     PRESERVE8
 ; sp[]int w
 ; sp[]int h
 
-|vp9_convolve8_avg_horiz_neon| PROC
+|vpx_convolve8_avg_horiz_neon| PROC
     ldr             r12, [sp, #4]           ; x_step_q4
     cmp             r12, #16
-    bne             vp9_convolve8_avg_horiz_c
+    bne             vpx_convolve8_avg_horiz_c
 
     push            {r4-r10, lr}
 
@@ -78,7 +78,7 @@
 
     mov             r10, r6                 ; w loop counter
 
-vp9_convolve8_avg_loop_horiz_v
+vpx_convolve8_avg_loop_horiz_v
     vld1.8          {d24}, [r0], r1
     vld1.8          {d25}, [r0], r1
     vld1.8          {d26}, [r0], r1
@@ -101,7 +101,7 @@ vp9_convolve8_avg_loop_horiz_v
 
     add             r0, r0, #3
 
-vp9_convolve8_avg_loop_horiz
+vpx_convolve8_avg_loop_horiz
     add             r5, r0, #64
 
     vld1.32         {d28[]}, [r0], r1
@@ -170,23 +170,23 @@ vp9_convolve8_avg_loop_horiz
     vmov            q9,  q13
 
     subs            r6, r6, #4              ; w -= 4
-    bgt             vp9_convolve8_avg_loop_horiz
+    bgt             vpx_convolve8_avg_loop_horiz
 
     ; outer loop
     mov             r6, r10                 ; restore w counter
     add             r0, r0, r9              ; src += src_stride * 4 - w
     add             r2, r2, r12             ; dst += dst_stride * 4 - w
     subs            r7, r7, #4              ; h -= 4
-    bgt vp9_convolve8_avg_loop_horiz_v
+    bgt vpx_convolve8_avg_loop_horiz_v
 
     pop             {r4-r10, pc}
 
     ENDP
 
-|vp9_convolve8_avg_vert_neon| PROC
+|vpx_convolve8_avg_vert_neon| PROC
     ldr             r12, [sp, #12]
     cmp             r12, #16
-    bne             vp9_convolve8_avg_vert_c
+    bne             vpx_convolve8_avg_vert_c
 
     push            {r4-r8, lr}
 
@@ -203,7 +203,7 @@ vp9_convolve8_avg_loop_horiz
     lsl             r1, r1, #1
     lsl             r3, r3, #1
 
-vp9_convolve8_avg_loop_vert_h
+vpx_convolve8_avg_loop_vert_h
     mov             r4, r0
     add             r7, r0, r1, asr #1
     mov             r5, r2
@@ -223,7 +223,7 @@ vp9_convolve8_avg_loop_vert_h
     vmovl.u8        q10, d20
     vmovl.u8        q11, d22
 
-vp9_convolve8_avg_loop_vert
+vpx_convolve8_avg_loop_vert
     ; always process a 4x4 block at a time
     vld1.u32        {d24[0]}, [r7], r1
     vld1.u32        {d26[0]}, [r4], r1
@@ -288,13 +288,13 @@ vp9_convolve8_avg_loop_vert
     vmov            d22, d25
 
     subs            r12, r12, #4            ; h -= 4
-    bgt             vp9_convolve8_avg_loop_vert
+    bgt             vpx_convolve8_avg_loop_vert
 
     ; outer loop
     add             r0, r0, #4
     add             r2, r2, #4
     subs            r6, r6, #4              ; w -= 4
-    bgt             vp9_convolve8_avg_loop_vert_h
+    bgt             vpx_convolve8_avg_loop_vert_h
 
     pop             {r4-r8, pc}
 
diff --git a/vpx_dsp/arm/vpx_convolve8_neon.c b/vpx_dsp/arm/vpx_convolve8_neon.c
new file mode 100644 (file)
index 0000000..6f634b3
--- /dev/null
@@ -0,0 +1,360 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+
+#include "./vpx_config.h"
+#include "./vpx_dsp_rtcd.h"
+#include "vpx/vpx_integer.h"
+#include "vpx_ports/mem.h"
+
+void vpx_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
+                           uint8_t *dst, ptrdiff_t dst_stride,
+                           const int16_t *filter_x, int x_step_q4,
+                           const int16_t *filter_y, int y_step_q4,
+                           int w, int h);
+void vpx_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,
+                           uint8_t *dst, ptrdiff_t dst_stride,
+                           const int16_t *filter_x, int x_step_q4,
+                           const int16_t *filter_y, int y_step_q4,
+                           int w, int h);
+
+static INLINE int32x4_t MULTIPLY_BY_Q0(
+    int16x4_t dsrc0,
+    int16x4_t dsrc1,
+    int16x4_t dsrc2,
+    int16x4_t dsrc3,
+    int16x4_t dsrc4,
+    int16x4_t dsrc5,
+    int16x4_t dsrc6,
+    int16x4_t dsrc7,
+    int16x8_t q0s16) {
+  int32x4_t qdst;
+  int16x4_t d0s16, d1s16;
+
+  d0s16 = vget_low_s16(q0s16);
+  d1s16 = vget_high_s16(q0s16);
+
+  qdst = vmull_lane_s16(dsrc0, d0s16, 0);
+  qdst = vmlal_lane_s16(qdst, dsrc1, d0s16, 1);
+  qdst = vmlal_lane_s16(qdst, dsrc2, d0s16, 2);
+  qdst = vmlal_lane_s16(qdst, dsrc3, d0s16, 3);
+  qdst = vmlal_lane_s16(qdst, dsrc4, d1s16, 0);
+  qdst = vmlal_lane_s16(qdst, dsrc5, d1s16, 1);
+  qdst = vmlal_lane_s16(qdst, dsrc6, d1s16, 2);
+  qdst = vmlal_lane_s16(qdst, dsrc7, d1s16, 3);
+  return qdst;
+}
+
+void vpx_convolve8_horiz_neon(
+    const uint8_t *src,
+    ptrdiff_t src_stride,
+    uint8_t *dst,
+    ptrdiff_t dst_stride,
+    const int16_t *filter_x,
+    int x_step_q4,
+    const int16_t *filter_y,  // unused
+    int y_step_q4,            // unused
+    int w,
+    int h) {
+  int width;
+  const uint8_t *s, *psrc;
+  uint8_t *d, *pdst;
+  uint8x8_t d2u8, d3u8, d24u8, d25u8, d26u8, d27u8, d28u8, d29u8;
+  uint32x2_t d2u32, d3u32, d28u32, d29u32, d30u32, d31u32;
+  uint8x16_t q12u8, q13u8, q14u8, q15u8;
+  int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d22s16, d23s16;
+  int16x4_t d24s16, d25s16, d26s16, d27s16;
+  uint16x4_t d2u16, d3u16, d4u16, d5u16, d16u16, d17u16, d18u16, d19u16;
+  int16x8_t q0s16;
+  uint16x8_t q1u16, q2u16, q8u16, q9u16, q10u16, q11u16, q12u16, q13u16;
+  int32x4_t q1s32, q2s32, q14s32, q15s32;
+  uint16x8x2_t q0x2u16;
+  uint8x8x2_t d0x2u8, d1x2u8;
+  uint32x2x2_t d0x2u32;
+  uint16x4x2_t d0x2u16, d1x2u16;
+  uint32x4x2_t q0x2u32;
+
+  if (x_step_q4 != 16) {
+    vpx_convolve8_horiz_c(src, src_stride, dst, dst_stride,
+                          filter_x, x_step_q4,
+                          filter_y, y_step_q4, w, h);
+    return;
+  }
+
+  q0s16 = vld1q_s16(filter_x);
+
+  src -= 3;  // adjust for taps
+  for (; h > 0; h -= 4,
+    src += src_stride * 4,
+    dst += dst_stride * 4) {  // loop_horiz_v
+    s = src;
+    d24u8 = vld1_u8(s);
+    s += src_stride;
+    d25u8 = vld1_u8(s);
+    s += src_stride;
+    d26u8 = vld1_u8(s);
+    s += src_stride;
+    d27u8 = vld1_u8(s);
+
+    q12u8 = vcombine_u8(d24u8, d25u8);
+    q13u8 = vcombine_u8(d26u8, d27u8);
+
+    q0x2u16 = vtrnq_u16(vreinterpretq_u16_u8(q12u8),
+                        vreinterpretq_u16_u8(q13u8));
+    d24u8 = vreinterpret_u8_u16(vget_low_u16(q0x2u16.val[0]));
+    d25u8 = vreinterpret_u8_u16(vget_high_u16(q0x2u16.val[0]));
+    d26u8 = vreinterpret_u8_u16(vget_low_u16(q0x2u16.val[1]));
+    d27u8 = vreinterpret_u8_u16(vget_high_u16(q0x2u16.val[1]));
+    d0x2u8 = vtrn_u8(d24u8, d25u8);
+    d1x2u8 = vtrn_u8(d26u8, d27u8);
+
+    __builtin_prefetch(src + src_stride * 4);
+    __builtin_prefetch(src + src_stride * 5);
+    __builtin_prefetch(src + src_stride * 6);
+
+    q8u16  = vmovl_u8(d0x2u8.val[0]);
+    q9u16  = vmovl_u8(d0x2u8.val[1]);
+    q10u16 = vmovl_u8(d1x2u8.val[0]);
+    q11u16 = vmovl_u8(d1x2u8.val[1]);
+
+    d16u16 = vget_low_u16(q8u16);
+    d17u16 = vget_high_u16(q8u16);
+    d18u16 = vget_low_u16(q9u16);
+    d19u16 = vget_high_u16(q9u16);
+    q8u16 = vcombine_u16(d16u16, d18u16);  // vswp 17 18
+    q9u16 = vcombine_u16(d17u16, d19u16);
+
+    d20s16 = vreinterpret_s16_u16(vget_low_u16(q10u16));
+    d23s16 = vreinterpret_s16_u16(vget_high_u16(q10u16));  // vmov 23 21
+    for (width = w, psrc = src + 7, pdst = dst;
+         width > 0;
+         width -= 4, psrc += 4, pdst += 4) {  // loop_horiz
+      s = psrc;
+      d28u32 = vld1_dup_u32((const uint32_t *)s);
+      s += src_stride;
+      d29u32 = vld1_dup_u32((const uint32_t *)s);
+      s += src_stride;
+      d31u32 = vld1_dup_u32((const uint32_t *)s);
+      s += src_stride;
+      d30u32 = vld1_dup_u32((const uint32_t *)s);
+
+      __builtin_prefetch(psrc + 64);
+
+      d0x2u16 = vtrn_u16(vreinterpret_u16_u32(d28u32),
+                         vreinterpret_u16_u32(d31u32));
+      d1x2u16 = vtrn_u16(vreinterpret_u16_u32(d29u32),
+                         vreinterpret_u16_u32(d30u32));
+      d0x2u8 = vtrn_u8(vreinterpret_u8_u16(d0x2u16.val[0]),   // d28
+                       vreinterpret_u8_u16(d1x2u16.val[0]));  // d29
+      d1x2u8 = vtrn_u8(vreinterpret_u8_u16(d0x2u16.val[1]),   // d31
+                       vreinterpret_u8_u16(d1x2u16.val[1]));  // d30
+
+      __builtin_prefetch(psrc + 64 + src_stride);
+
+      q14u8 = vcombine_u8(d0x2u8.val[0], d0x2u8.val[1]);
+      q15u8 = vcombine_u8(d1x2u8.val[1], d1x2u8.val[0]);
+      q0x2u32 = vtrnq_u32(vreinterpretq_u32_u8(q14u8),
+                          vreinterpretq_u32_u8(q15u8));
+
+      d28u8 = vreinterpret_u8_u32(vget_low_u32(q0x2u32.val[0]));
+      d29u8 = vreinterpret_u8_u32(vget_high_u32(q0x2u32.val[0]));
+      q12u16 = vmovl_u8(d28u8);
+      q13u16 = vmovl_u8(d29u8);
+
+      __builtin_prefetch(psrc + 64 + src_stride * 2);
+
+      d16s16 = vreinterpret_s16_u16(vget_low_u16(q8u16));
+      d17s16 = vreinterpret_s16_u16(vget_high_u16(q8u16));
+      d18s16 = vreinterpret_s16_u16(vget_low_u16(q9u16));
+      d19s16 = vreinterpret_s16_u16(vget_high_u16(q9u16));
+      d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));
+      d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));
+      d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));
+      d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16));
+      d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16));
+
+      q1s32  = MULTIPLY_BY_Q0(d16s16, d17s16, d20s16, d22s16,
+                              d18s16, d19s16, d23s16, d24s16, q0s16);
+      q2s32  = MULTIPLY_BY_Q0(d17s16, d20s16, d22s16, d18s16,
+                              d19s16, d23s16, d24s16, d26s16, q0s16);
+      q14s32 = MULTIPLY_BY_Q0(d20s16, d22s16, d18s16, d19s16,
+                              d23s16, d24s16, d26s16, d27s16, q0s16);
+      q15s32 = MULTIPLY_BY_Q0(d22s16, d18s16, d19s16, d23s16,
+                              d24s16, d26s16, d27s16, d25s16, q0s16);
+
+      __builtin_prefetch(psrc + 60 + src_stride * 3);
+
+      d2u16 = vqrshrun_n_s32(q1s32, 7);
+      d3u16 = vqrshrun_n_s32(q2s32, 7);
+      d4u16 = vqrshrun_n_s32(q14s32, 7);
+      d5u16 = vqrshrun_n_s32(q15s32, 7);
+
+      q1u16 = vcombine_u16(d2u16, d3u16);
+      q2u16 = vcombine_u16(d4u16, d5u16);
+
+      d2u8 = vqmovn_u16(q1u16);
+      d3u8 = vqmovn_u16(q2u16);
+
+      d0x2u16 = vtrn_u16(vreinterpret_u16_u8(d2u8),
+                         vreinterpret_u16_u8(d3u8));
+      d0x2u32 = vtrn_u32(vreinterpret_u32_u16(d0x2u16.val[0]),
+                         vreinterpret_u32_u16(d0x2u16.val[1]));
+      d0x2u8 = vtrn_u8(vreinterpret_u8_u32(d0x2u32.val[0]),
+                       vreinterpret_u8_u32(d0x2u32.val[1]));
+
+      d2u32 = vreinterpret_u32_u8(d0x2u8.val[0]);
+      d3u32 = vreinterpret_u32_u8(d0x2u8.val[1]);
+
+      d = pdst;
+      vst1_lane_u32((uint32_t *)d, d2u32, 0);
+      d += dst_stride;
+      vst1_lane_u32((uint32_t *)d, d3u32, 0);
+      d += dst_stride;
+      vst1_lane_u32((uint32_t *)d, d2u32, 1);
+      d += dst_stride;
+      vst1_lane_u32((uint32_t *)d, d3u32, 1);
+
+      q8u16 = q9u16;
+      d20s16 = d23s16;
+      q11u16 = q12u16;
+      q9u16 = q13u16;
+      d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16));
+    }
+  }
+  return;
+}
+
+void vpx_convolve8_vert_neon(
+    const uint8_t *src,
+    ptrdiff_t src_stride,
+    uint8_t *dst,
+    ptrdiff_t dst_stride,
+    const int16_t *filter_x,  // unused
+    int x_step_q4,            // unused
+    const int16_t *filter_y,
+    int y_step_q4,
+    int w,
+    int h) {
+  int height;
+  const uint8_t *s;
+  uint8_t *d;
+  uint32x2_t d2u32, d3u32;
+  uint32x2_t d16u32, d18u32, d20u32, d22u32, d24u32, d26u32;
+  int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16;
+  int16x4_t d24s16, d25s16, d26s16, d27s16;
+  uint16x4_t d2u16, d3u16, d4u16, d5u16;
+  int16x8_t q0s16;
+  uint16x8_t q1u16, q2u16, q8u16, q9u16, q10u16, q11u16, q12u16, q13u16;
+  int32x4_t q1s32, q2s32, q14s32, q15s32;
+
+  if (y_step_q4 != 16) {
+    vpx_convolve8_vert_c(src, src_stride, dst, dst_stride,
+                         filter_x, x_step_q4,
+                         filter_y, y_step_q4, w, h);
+    return;
+  }
+
+  src -= src_stride * 3;
+  q0s16 = vld1q_s16(filter_y);
+  for (; w > 0; w -= 4, src += 4, dst += 4) {  // loop_vert_h
+    s = src;
+    d16u32 = vld1_lane_u32((const uint32_t *)s, d16u32, 0);
+    s += src_stride;
+    d16u32 = vld1_lane_u32((const uint32_t *)s, d16u32, 1);
+    s += src_stride;
+    d18u32 = vld1_lane_u32((const uint32_t *)s, d18u32, 0);
+    s += src_stride;
+    d18u32 = vld1_lane_u32((const uint32_t *)s, d18u32, 1);
+    s += src_stride;
+    d20u32 = vld1_lane_u32((const uint32_t *)s, d20u32, 0);
+    s += src_stride;
+    d20u32 = vld1_lane_u32((const uint32_t *)s, d20u32, 1);
+    s += src_stride;
+    d22u32 = vld1_lane_u32((const uint32_t *)s, d22u32, 0);
+    s += src_stride;
+
+    q8u16  = vmovl_u8(vreinterpret_u8_u32(d16u32));
+    q9u16  = vmovl_u8(vreinterpret_u8_u32(d18u32));
+    q10u16 = vmovl_u8(vreinterpret_u8_u32(d20u32));
+    q11u16 = vmovl_u8(vreinterpret_u8_u32(d22u32));
+
+    d18s16 = vreinterpret_s16_u16(vget_low_u16(q9u16));
+    d19s16 = vreinterpret_s16_u16(vget_high_u16(q9u16));
+    d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));
+    d = dst;
+    for (height = h; height > 0; height -= 4) {  // loop_vert
+      d24u32 = vld1_lane_u32((const uint32_t *)s, d24u32, 0);
+      s += src_stride;
+      d26u32 = vld1_lane_u32((const uint32_t *)s, d26u32, 0);
+      s += src_stride;
+      d26u32 = vld1_lane_u32((const uint32_t *)s, d26u32, 1);
+      s += src_stride;
+      d24u32 = vld1_lane_u32((const uint32_t *)s, d24u32, 1);
+      s += src_stride;
+
+      q12u16 = vmovl_u8(vreinterpret_u8_u32(d24u32));
+      q13u16 = vmovl_u8(vreinterpret_u8_u32(d26u32));
+
+      d16s16 = vreinterpret_s16_u16(vget_low_u16(q8u16));
+      d17s16 = vreinterpret_s16_u16(vget_high_u16(q8u16));
+      d20s16 = vreinterpret_s16_u16(vget_low_u16(q10u16));
+      d21s16 = vreinterpret_s16_u16(vget_high_u16(q10u16));
+      d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));
+      d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));
+      d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16));
+      d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16));
+
+      __builtin_prefetch(d);
+      __builtin_prefetch(d + dst_stride);
+      q1s32  = MULTIPLY_BY_Q0(d16s16, d17s16, d18s16, d19s16,
+                              d20s16, d21s16, d22s16, d24s16, q0s16);
+      __builtin_prefetch(d + dst_stride * 2);
+      __builtin_prefetch(d + dst_stride * 3);
+      q2s32  = MULTIPLY_BY_Q0(d17s16, d18s16, d19s16, d20s16,
+                              d21s16, d22s16, d24s16, d26s16, q0s16);
+      __builtin_prefetch(s);
+      __builtin_prefetch(s + src_stride);
+      q14s32 = MULTIPLY_BY_Q0(d18s16, d19s16, d20s16, d21s16,
+                              d22s16, d24s16, d26s16, d27s16, q0s16);
+      __builtin_prefetch(s + src_stride * 2);
+      __builtin_prefetch(s + src_stride * 3);
+      q15s32 = MULTIPLY_BY_Q0(d19s16, d20s16, d21s16, d22s16,
+                              d24s16, d26s16, d27s16, d25s16, q0s16);
+
+      d2u16 = vqrshrun_n_s32(q1s32, 7);
+      d3u16 = vqrshrun_n_s32(q2s32, 7);
+      d4u16 = vqrshrun_n_s32(q14s32, 7);
+      d5u16 = vqrshrun_n_s32(q15s32, 7);
+
+      q1u16 = vcombine_u16(d2u16, d3u16);
+      q2u16 = vcombine_u16(d4u16, d5u16);
+
+      d2u32 = vreinterpret_u32_u8(vqmovn_u16(q1u16));
+      d3u32 = vreinterpret_u32_u8(vqmovn_u16(q2u16));
+
+      vst1_lane_u32((uint32_t *)d, d2u32, 0);
+      d += dst_stride;
+      vst1_lane_u32((uint32_t *)d, d2u32, 1);
+      d += dst_stride;
+      vst1_lane_u32((uint32_t *)d, d3u32, 0);
+      d += dst_stride;
+      vst1_lane_u32((uint32_t *)d, d3u32, 1);
+      d += dst_stride;
+
+      q8u16 = q10u16;
+      d18s16 = d22s16;
+      d19s16 = d24s16;
+      q10u16 = q13u16;
+      d22s16 = d25s16;
+    }
+  }
+  return;
+}
similarity index 92%
rename from vp9/common/arm/neon/vp9_convolve8_neon_asm.asm
rename to vpx_dsp/arm/vpx_convolve8_neon_asm.asm
index 184c3ad679cff98c956ce25c12b191a9f0f5b43e..bc530de41a673d50662eb127da367b35bc1aeb23 100644 (file)
     ; VP9_FILTER_WEIGHT == 128
     ; VP9_FILTER_SHIFT == 7
 
-    EXPORT  |vp9_convolve8_horiz_neon|
-    EXPORT  |vp9_convolve8_vert_neon|
-    IMPORT  |vp9_convolve8_horiz_c|
-    IMPORT  |vp9_convolve8_vert_c|
+    EXPORT  |vpx_convolve8_horiz_neon|
+    EXPORT  |vpx_convolve8_vert_neon|
+    IMPORT  |vpx_convolve8_horiz_c|
+    IMPORT  |vpx_convolve8_vert_c|
     ARM
     REQUIRE8
     PRESERVE8
 ; sp[]int w
 ; sp[]int h
 
-|vp9_convolve8_horiz_neon| PROC
+|vpx_convolve8_horiz_neon| PROC
     ldr             r12, [sp, #4]           ; x_step_q4
     cmp             r12, #16
-    bne             vp9_convolve8_horiz_c
+    bne             vpx_convolve8_horiz_c
 
     push            {r4-r10, lr}
 
@@ -78,7 +78,7 @@
 
     mov             r10, r6                 ; w loop counter
 
-vp9_convolve8_loop_horiz_v
+vpx_convolve8_loop_horiz_v
     vld1.8          {d24}, [r0], r1
     vld1.8          {d25}, [r0], r1
     vld1.8          {d26}, [r0], r1
@@ -101,7 +101,7 @@ vp9_convolve8_loop_horiz_v
 
     add             r0, r0, #3
 
-vp9_convolve8_loop_horiz
+vpx_convolve8_loop_horiz
     add             r5, r0, #64
 
     vld1.32         {d28[]}, [r0], r1
@@ -159,23 +159,23 @@ vp9_convolve8_loop_horiz
     vmov            q9,  q13
 
     subs            r6, r6, #4              ; w -= 4
-    bgt             vp9_convolve8_loop_horiz
+    bgt             vpx_convolve8_loop_horiz
 
     ; outer loop
     mov             r6, r10                 ; restore w counter
     add             r0, r0, r9              ; src += src_stride * 4 - w
     add             r2, r2, r12             ; dst += dst_stride * 4 - w
     subs            r7, r7, #4              ; h -= 4
-    bgt vp9_convolve8_loop_horiz_v
+    bgt vpx_convolve8_loop_horiz_v
 
     pop             {r4-r10, pc}
 
     ENDP
 
-|vp9_convolve8_vert_neon| PROC
+|vpx_convolve8_vert_neon| PROC
     ldr             r12, [sp, #12]
     cmp             r12, #16
-    bne             vp9_convolve8_vert_c
+    bne             vpx_convolve8_vert_c
 
     push            {r4-r8, lr}
 
@@ -192,7 +192,7 @@ vp9_convolve8_loop_horiz
     lsl             r1, r1, #1
     lsl             r3, r3, #1
 
-vp9_convolve8_loop_vert_h
+vpx_convolve8_loop_vert_h
     mov             r4, r0
     add             r7, r0, r1, asr #1
     mov             r5, r2
@@ -212,7 +212,7 @@ vp9_convolve8_loop_vert_h
     vmovl.u8        q10, d20
     vmovl.u8        q11, d22
 
-vp9_convolve8_loop_vert
+vpx_convolve8_loop_vert
     ; always process a 4x4 block at a time
     vld1.u32        {d24[0]}, [r7], r1
     vld1.u32        {d26[0]}, [r4], r1
@@ -266,13 +266,13 @@ vp9_convolve8_loop_vert
     vmov            d22, d25
 
     subs            r12, r12, #4            ; h -= 4
-    bgt             vp9_convolve8_loop_vert
+    bgt             vpx_convolve8_loop_vert
 
     ; outer loop
     add             r0, r0, #4
     add             r2, r2, #4
     subs            r6, r6, #4              ; w -= 4
-    bgt             vp9_convolve8_loop_vert_h
+    bgt             vpx_convolve8_loop_vert_h
 
     pop             {r4-r8, pc}
 
diff --git a/vpx_dsp/arm/vpx_convolve_avg_neon.c b/vpx_dsp/arm/vpx_convolve_avg_neon.c
new file mode 100644 (file)
index 0000000..dc58a33
--- /dev/null
@@ -0,0 +1,147 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+
+#include "./vpx_dsp_rtcd.h"
+#include "vpx/vpx_integer.h"
+
+void vpx_convolve_avg_neon(
+    const uint8_t *src,    // r0
+    ptrdiff_t src_stride,  // r1
+    uint8_t *dst,          // r2
+    ptrdiff_t dst_stride,  // r3
+    const int16_t *filter_x,
+    int filter_x_stride,
+    const int16_t *filter_y,
+    int filter_y_stride,
+    int w,
+    int h) {
+  uint8_t *d;
+  uint8x8_t d0u8, d1u8, d2u8, d3u8;
+  uint32x2_t d0u32, d2u32;
+  uint8x16_t q0u8, q1u8, q2u8, q3u8, q8u8, q9u8, q10u8, q11u8;
+  (void)filter_x;  (void)filter_x_stride;
+  (void)filter_y;  (void)filter_y_stride;
+
+  d = dst;
+  if (w > 32) {  // avg64
+    for (; h > 0; h -= 1) {
+      q0u8  = vld1q_u8(src);
+      q1u8  = vld1q_u8(src + 16);
+      q2u8  = vld1q_u8(src + 32);
+      q3u8  = vld1q_u8(src + 48);
+      src += src_stride;
+      q8u8  = vld1q_u8(d);
+      q9u8  = vld1q_u8(d + 16);
+      q10u8 = vld1q_u8(d + 32);
+      q11u8 = vld1q_u8(d + 48);
+      d += dst_stride;
+
+      q0u8 = vrhaddq_u8(q0u8, q8u8);
+      q1u8 = vrhaddq_u8(q1u8, q9u8);
+      q2u8 = vrhaddq_u8(q2u8, q10u8);
+      q3u8 = vrhaddq_u8(q3u8, q11u8);
+
+      vst1q_u8(dst, q0u8);
+      vst1q_u8(dst + 16, q1u8);
+      vst1q_u8(dst + 32, q2u8);
+      vst1q_u8(dst + 48, q3u8);
+      dst += dst_stride;
+    }
+  } else if (w == 32) {  // avg32
+    for (; h > 0; h -= 2) {
+      q0u8 = vld1q_u8(src);
+      q1u8 = vld1q_u8(src + 16);
+      src += src_stride;
+      q2u8 = vld1q_u8(src);
+      q3u8 = vld1q_u8(src + 16);
+      src += src_stride;
+      q8u8 = vld1q_u8(d);
+      q9u8 = vld1q_u8(d + 16);
+      d += dst_stride;
+      q10u8 = vld1q_u8(d);
+      q11u8 = vld1q_u8(d + 16);
+      d += dst_stride;
+
+      q0u8 = vrhaddq_u8(q0u8, q8u8);
+      q1u8 = vrhaddq_u8(q1u8, q9u8);
+      q2u8 = vrhaddq_u8(q2u8, q10u8);
+      q3u8 = vrhaddq_u8(q3u8, q11u8);
+
+      vst1q_u8(dst, q0u8);
+      vst1q_u8(dst + 16, q1u8);
+      dst += dst_stride;
+      vst1q_u8(dst, q2u8);
+      vst1q_u8(dst + 16, q3u8);
+      dst += dst_stride;
+    }
+  } else if (w > 8) {  // avg16
+    for (; h > 0; h -= 2) {
+      q0u8 = vld1q_u8(src);
+      src += src_stride;
+      q1u8 = vld1q_u8(src);
+      src += src_stride;
+      q2u8 = vld1q_u8(d);
+      d += dst_stride;
+      q3u8 = vld1q_u8(d);
+      d += dst_stride;
+
+      q0u8 = vrhaddq_u8(q0u8, q2u8);
+      q1u8 = vrhaddq_u8(q1u8, q3u8);
+
+      vst1q_u8(dst, q0u8);
+      dst += dst_stride;
+      vst1q_u8(dst, q1u8);
+      dst += dst_stride;
+    }
+  } else if (w == 8) {  // avg8
+    for (; h > 0; h -= 2) {
+      d0u8 = vld1_u8(src);
+      src += src_stride;
+      d1u8 = vld1_u8(src);
+      src += src_stride;
+      d2u8 = vld1_u8(d);
+      d += dst_stride;
+      d3u8 = vld1_u8(d);
+      d += dst_stride;
+
+      q0u8 = vcombine_u8(d0u8, d1u8);
+      q1u8 = vcombine_u8(d2u8, d3u8);
+      q0u8 = vrhaddq_u8(q0u8, q1u8);
+
+      vst1_u8(dst, vget_low_u8(q0u8));
+      dst += dst_stride;
+      vst1_u8(dst, vget_high_u8(q0u8));
+      dst += dst_stride;
+    }
+  } else {  // avg4
+    for (; h > 0; h -= 2) {
+      d0u32 = vld1_lane_u32((const uint32_t *)src, d0u32, 0);
+      src += src_stride;
+      d0u32 = vld1_lane_u32((const uint32_t *)src, d0u32, 1);
+      src += src_stride;
+      d2u32 = vld1_lane_u32((const uint32_t *)d, d2u32, 0);
+      d += dst_stride;
+      d2u32 = vld1_lane_u32((const uint32_t *)d, d2u32, 1);
+      d += dst_stride;
+
+      d0u8 = vrhadd_u8(vreinterpret_u8_u32(d0u32),
+                       vreinterpret_u8_u32(d2u32));
+
+      d0u32 = vreinterpret_u32_u8(d0u8);
+      vst1_lane_u32((uint32_t *)dst, d0u32, 0);
+      dst += dst_stride;
+      vst1_lane_u32((uint32_t *)dst, d0u32, 1);
+      dst += dst_stride;
+    }
+  }
+  return;
+}
similarity index 98%
rename from vp9/common/arm/neon/vp9_convolve_avg_neon_asm.asm
rename to vpx_dsp/arm/vpx_convolve_avg_neon_asm.asm
index 7d24530217720873846a2970873cd487c46ac33a..97e6189fda1632fcec4eeb3803575d6159c00f11 100644 (file)
@@ -8,14 +8,14 @@
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
-    EXPORT  |vp9_convolve_avg_neon|
+    EXPORT  |vpx_convolve_avg_neon|
     ARM
     REQUIRE8
     PRESERVE8
 
     AREA ||.text||, CODE, READONLY, ALIGN=2
 
-|vp9_convolve_avg_neon| PROC
+|vpx_convolve_avg_neon| PROC
     push                {r4-r6, lr}
     ldrd                r4, r5, [sp, #32]
     mov                 r6, r2
diff --git a/vpx_dsp/arm/vpx_convolve_copy_neon.c b/vpx_dsp/arm/vpx_convolve_copy_neon.c
new file mode 100644 (file)
index 0000000..d8fb97a
--- /dev/null
@@ -0,0 +1,94 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+
+#include "./vpx_dsp_rtcd.h"
+#include "vpx/vpx_integer.h"
+
+void vpx_convolve_copy_neon(
+    const uint8_t *src,    // r0
+    ptrdiff_t src_stride,  // r1
+    uint8_t *dst,          // r2
+    ptrdiff_t dst_stride,  // r3
+    const int16_t *filter_x,
+    int filter_x_stride,
+    const int16_t *filter_y,
+    int filter_y_stride,
+    int w,
+    int h) {
+  uint8x8_t d0u8, d2u8;
+  uint8x16_t q0u8, q1u8, q2u8, q3u8;
+  (void)filter_x;  (void)filter_x_stride;
+  (void)filter_y;  (void)filter_y_stride;
+
+  if (w > 32) {  // copy64
+    for (; h > 0; h--) {
+      q0u8 = vld1q_u8(src);
+      q1u8 = vld1q_u8(src + 16);
+      q2u8 = vld1q_u8(src + 32);
+      q3u8 = vld1q_u8(src + 48);
+      src += src_stride;
+
+      vst1q_u8(dst, q0u8);
+      vst1q_u8(dst + 16, q1u8);
+      vst1q_u8(dst + 32, q2u8);
+      vst1q_u8(dst + 48, q3u8);
+      dst += dst_stride;
+    }
+  } else if (w == 32) {  // copy32
+    for (; h > 0; h -= 2) {
+      q0u8 = vld1q_u8(src);
+      q1u8 = vld1q_u8(src + 16);
+      src += src_stride;
+      q2u8 = vld1q_u8(src);
+      q3u8 = vld1q_u8(src + 16);
+      src += src_stride;
+
+      vst1q_u8(dst, q0u8);
+      vst1q_u8(dst + 16, q1u8);
+      dst += dst_stride;
+      vst1q_u8(dst, q2u8);
+      vst1q_u8(dst + 16, q3u8);
+      dst += dst_stride;
+    }
+  } else if (w > 8) {  // copy16
+    for (; h > 0; h -= 2) {
+      q0u8 = vld1q_u8(src);
+      src += src_stride;
+      q1u8 = vld1q_u8(src);
+      src += src_stride;
+
+      vst1q_u8(dst, q0u8);
+      dst += dst_stride;
+      vst1q_u8(dst, q1u8);
+      dst += dst_stride;
+    }
+  } else if (w == 8) {  // copy8
+    for (; h > 0; h -= 2) {
+      d0u8 = vld1_u8(src);
+      src += src_stride;
+      d2u8 = vld1_u8(src);
+      src += src_stride;
+
+      vst1_u8(dst, d0u8);
+      dst += dst_stride;
+      vst1_u8(dst, d2u8);
+      dst += dst_stride;
+    }
+  } else {  // copy4
+    for (; h > 0; h--) {
+      *(uint32_t *)dst = *(const uint32_t *)src;
+      src += src_stride;
+      dst += dst_stride;
+    }
+  }
+  return;
+}
similarity index 97%
rename from vp9/common/arm/neon/vp9_copy_neon_asm.asm
rename to vpx_dsp/arm/vpx_convolve_copy_neon_asm.asm
index a0bd04a35f0172d185f57e665d1f15be137f6757..89164ad48ba0b622e35c438ac6da64ab9963f946 100644 (file)
@@ -8,14 +8,14 @@
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
-    EXPORT  |vp9_convolve_copy_neon|
+    EXPORT  |vpx_convolve_copy_neon|
     ARM
     REQUIRE8
     PRESERVE8
 
     AREA ||.text||, CODE, READONLY, ALIGN=2
 
-|vp9_convolve_copy_neon| PROC
+|vpx_convolve_copy_neon| PROC
     push                {r4-r5, lr}
     ldrd                r4, r5, [sp, #28]
 
similarity index 85%
rename from vp9/common/arm/neon/vp9_convolve_neon.c
rename to vpx_dsp/arm/vpx_convolve_neon.c
index 2e28cb20ebdeafd9609449399edd7b0abe4db58a..2c0f7e9e7e1f8f3fa8782860a0d9be666de94600 100644 (file)
@@ -8,11 +8,11 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
-#include "./vp9_rtcd.h"
-#include "vp9/common/vp9_common.h"
+#include "./vpx_dsp_rtcd.h"
+#include "vpx_dsp/vpx_dsp_common.h"
 #include "vpx_ports/mem.h"
 
-void vp9_convolve8_neon(const uint8_t *src, ptrdiff_t src_stride,
+void vpx_convolve8_neon(const uint8_t *src, ptrdiff_t src_stride,
                         uint8_t *dst, ptrdiff_t dst_stride,
                         const int16_t *filter_x, int x_step_q4,
                         const int16_t *filter_y, int y_step_q4,
@@ -26,7 +26,7 @@ void vp9_convolve8_neon(const uint8_t *src, ptrdiff_t src_stride,
   int intermediate_height = h + 7;
 
   if (x_step_q4 != 16 || y_step_q4 != 16) {
-    vp9_convolve8_c(src, src_stride,
+    vpx_convolve8_c(src, src_stride,
                     dst, dst_stride,
                     filter_x, x_step_q4,
                     filter_y, y_step_q4,
@@ -39,19 +39,19 @@ void vp9_convolve8_neon(const uint8_t *src, ptrdiff_t src_stride,
    * the temp buffer which has lots of extra room and is subsequently discarded
    * this is safe if somewhat less than ideal.
    */
-  vp9_convolve8_horiz_neon(src - src_stride * 3, src_stride,
+  vpx_convolve8_horiz_neon(src - src_stride * 3, src_stride,
                            temp, 64,
                            filter_x, x_step_q4, filter_y, y_step_q4,
                            w, intermediate_height);
 
   /* Step into the temp buffer 3 lines to get the actual frame data */
-  vp9_convolve8_vert_neon(temp + 64 * 3, 64,
+  vpx_convolve8_vert_neon(temp + 64 * 3, 64,
                           dst, dst_stride,
                           filter_x, x_step_q4, filter_y, y_step_q4,
                           w, h);
 }
 
-void vp9_convolve8_avg_neon(const uint8_t *src, ptrdiff_t src_stride,
+void vpx_convolve8_avg_neon(const uint8_t *src, ptrdiff_t src_stride,
                             uint8_t *dst, ptrdiff_t dst_stride,
                             const int16_t *filter_x, int x_step_q4,
                             const int16_t *filter_y, int y_step_q4,
@@ -60,7 +60,7 @@ void vp9_convolve8_avg_neon(const uint8_t *src, ptrdiff_t src_stride,
   int intermediate_height = h + 7;
 
   if (x_step_q4 != 16 || y_step_q4 != 16) {
-    vp9_convolve8_avg_c(src, src_stride,
+    vpx_convolve8_avg_c(src, src_stride,
                         dst, dst_stride,
                         filter_x, x_step_q4,
                         filter_y, y_step_q4,
@@ -71,11 +71,11 @@ void vp9_convolve8_avg_neon(const uint8_t *src, ptrdiff_t src_stride,
   /* This implementation has the same issues as above. In addition, we only want
    * to average the values after both passes.
    */
-  vp9_convolve8_horiz_neon(src - src_stride * 3, src_stride,
+  vpx_convolve8_horiz_neon(src - src_stride * 3, src_stride,
                            temp, 64,
                            filter_x, x_step_q4, filter_y, y_step_q4,
                            w, intermediate_height);
-  vp9_convolve8_avg_vert_neon(temp + 64 * 3,
+  vpx_convolve8_avg_vert_neon(temp + 64 * 3,
                               64, dst, dst_stride,
                               filter_x, x_step_q4, filter_y, y_step_q4,
                               w, h);
index dc8aca5c36677820f047544b34a3cd74e29f732d..66f4d9576c91e0cd0b5001c09173d4aac7f1fa3a 100644 (file)
@@ -8,6 +8,8 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
+#include <stdlib.h>
+
 #include "./vpx_config.h"
 #include "vpx_dsp/vpx_dsp_common.h"
 #include "vpx_ports/mem.h"
similarity index 98%
rename from vp9/common/mips/msa/vp9_convolve8_avg_horiz_msa.c
rename to vpx_dsp/mips/vpx_convolve8_avg_horiz_msa.c
index 89364cb95cba2ab100728b5ccddb793e9871afc9..c5ad08cc9ff506998d87675e7ea2e9b8ec239391 100644 (file)
@@ -8,8 +8,8 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
-#include "./vp9_rtcd.h"
-#include "vp9/common/mips/msa/vp9_convolve_msa.h"
+#include "./vpx_dsp_rtcd.h"
+#include "vpx_dsp/mips/vpx_convolve_msa.h"
 
 static void common_hz_8t_and_aver_dst_4x4_msa(const uint8_t *src,
                                               int32_t src_stride,
@@ -687,7 +687,7 @@ static void common_hz_2t_and_aver_dst_64w_msa(const uint8_t *src,
   }
 }
 
-void vp9_convolve8_avg_horiz_msa(const uint8_t *src, ptrdiff_t src_stride,
+void vpx_convolve8_avg_horiz_msa(const uint8_t *src, ptrdiff_t src_stride,
                                  uint8_t *dst, ptrdiff_t dst_stride,
                                  const int16_t *filter_x, int x_step_q4,
                                  const int16_t *filter_y, int y_step_q4,
@@ -695,14 +695,14 @@ void vp9_convolve8_avg_horiz_msa(const uint8_t *src, ptrdiff_t src_stride,
   int8_t cnt, filt_hor[8];
 
   if (16 != x_step_q4) {
-    vp9_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride,
+    vpx_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride,
                               filter_x, x_step_q4, filter_y, y_step_q4,
                               w, h);
     return;
   }
 
   if (((const int32_t *)filter_x)[1] == 0x800000) {
-    vp9_convolve_avg(src, src_stride, dst, dst_stride,
+    vpx_convolve_avg(src, src_stride, dst, dst_stride,
                      filter_x, x_step_q4, filter_y, y_step_q4,
                      w, h);
     return;
@@ -740,7 +740,7 @@ void vp9_convolve8_avg_horiz_msa(const uint8_t *src, ptrdiff_t src_stride,
                                           &filt_hor[3], h);
         break;
       default:
-        vp9_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride,
+        vpx_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride,
                                   filter_x, x_step_q4, filter_y, y_step_q4,
                                   w, h);
         break;
@@ -773,7 +773,7 @@ void vp9_convolve8_avg_horiz_msa(const uint8_t *src, ptrdiff_t src_stride,
                                           filt_hor, h);
         break;
       default:
-        vp9_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride,
+        vpx_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride,
                                   filter_x, x_step_q4, filter_y, y_step_q4,
                                   w, h);
         break;
similarity index 98%
rename from vp9/common/mips/msa/vp9_convolve8_avg_msa.c
rename to vpx_dsp/mips/vpx_convolve8_avg_msa.c
index e9f3a9dc3f2f421ceddf62e64a2f6b8194212947..f46df6782c5be107a031ab75b5859f484f3d0d77 100644 (file)
@@ -8,8 +8,8 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
-#include "./vp9_rtcd.h"
-#include "vp9/common/mips/msa/vp9_convolve_msa.h"
+#include "./vpx_dsp_rtcd.h"
+#include "vpx_dsp/mips/vpx_convolve_msa.h"
 
 static void common_hv_8ht_8vt_and_aver_dst_4w_msa(const uint8_t *src,
                                                   int32_t src_stride,
@@ -576,7 +576,7 @@ static void common_hv_2ht_2vt_and_aver_dst_64w_msa(const uint8_t *src,
   }
 }
 
-void vp9_convolve8_avg_msa(const uint8_t *src, ptrdiff_t src_stride,
+void vpx_convolve8_avg_msa(const uint8_t *src, ptrdiff_t src_stride,
                            uint8_t *dst, ptrdiff_t dst_stride,
                            const int16_t *filter_x, int x_step_q4,
                            const int16_t *filter_y, int y_step_q4,
@@ -584,7 +584,7 @@ void vp9_convolve8_avg_msa(const uint8_t *src, ptrdiff_t src_stride,
   int8_t cnt, filt_hor[8], filt_ver[8];
 
   if (16 != x_step_q4 || 16 != y_step_q4) {
-    vp9_convolve8_avg_c(src, src_stride, dst, dst_stride,
+    vpx_convolve8_avg_c(src, src_stride, dst, dst_stride,
                         filter_x, x_step_q4, filter_y, y_step_q4,
                         w, h);
     return;
@@ -592,7 +592,7 @@ void vp9_convolve8_avg_msa(const uint8_t *src, ptrdiff_t src_stride,
 
   if (((const int32_t *)filter_x)[1] == 0x800000 &&
       ((const int32_t *)filter_y)[1] == 0x800000) {
-    vp9_convolve_avg(src, src_stride, dst, dst_stride,
+    vpx_convolve_avg(src, src_stride, dst, dst_stride,
                      filter_x, x_step_q4, filter_y, y_step_q4,
                      w, h);
     return;
@@ -632,14 +632,14 @@ void vp9_convolve8_avg_msa(const uint8_t *src, ptrdiff_t src_stride,
                                                &filt_hor[3], &filt_ver[3], h);
         break;
       default:
-        vp9_convolve8_avg_c(src, src_stride, dst, dst_stride,
+        vpx_convolve8_avg_c(src, src_stride, dst, dst_stride,
                             filter_x, x_step_q4, filter_y, y_step_q4,
                             w, h);
         break;
     }
   } else if (((const int32_t *)filter_x)[0] == 0 ||
              ((const int32_t *)filter_y)[0] == 0) {
-    vp9_convolve8_avg_c(src, src_stride, dst, dst_stride,
+    vpx_convolve8_avg_c(src, src_stride, dst, dst_stride,
                         filter_x, x_step_q4, filter_y, y_step_q4,
                         w, h);
   } else {
@@ -670,7 +670,7 @@ void vp9_convolve8_avg_msa(const uint8_t *src, ptrdiff_t src_stride,
                                                filt_hor, filt_ver, h);
         break;
       default:
-        vp9_convolve8_avg_c(src, src_stride, dst, dst_stride,
+        vpx_convolve8_avg_c(src, src_stride, dst, dst_stride,
                             filter_x, x_step_q4, filter_y, y_step_q4,
                             w, h);
         break;
similarity index 98%
rename from vp9/common/mips/msa/vp9_convolve8_avg_vert_msa.c
rename to vpx_dsp/mips/vpx_convolve8_avg_vert_msa.c
index 85dfd30c7debb10365e2120f1490eb84b5ddbe85..4d4b3d9e353864f9e897ce681fcb7639e71d69a1 100644 (file)
@@ -8,8 +8,8 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
-#include "./vp9_rtcd.h"
-#include "vp9/common/mips/msa/vp9_convolve_msa.h"
+#include "./vpx_dsp_rtcd.h"
+#include "vpx_dsp/mips/vpx_convolve_msa.h"
 
 static void common_vt_8t_and_aver_dst_4w_msa(const uint8_t *src,
                                              int32_t src_stride,
@@ -657,7 +657,7 @@ static void common_vt_2t_and_aver_dst_64w_msa(const uint8_t *src,
   }
 }
 
-void vp9_convolve8_avg_vert_msa(const uint8_t *src, ptrdiff_t src_stride,
+void vpx_convolve8_avg_vert_msa(const uint8_t *src, ptrdiff_t src_stride,
                                 uint8_t *dst, ptrdiff_t dst_stride,
                                 const int16_t *filter_x, int x_step_q4,
                                 const int16_t *filter_y, int y_step_q4,
@@ -665,14 +665,14 @@ void vp9_convolve8_avg_vert_msa(const uint8_t *src, ptrdiff_t src_stride,
   int8_t cnt, filt_ver[8];
 
   if (16 != y_step_q4) {
-    vp9_convolve8_avg_vert_c(src, src_stride, dst, dst_stride,
+    vpx_convolve8_avg_vert_c(src, src_stride, dst, dst_stride,
                              filter_x, x_step_q4, filter_y, y_step_q4,
                              w, h);
     return;
   }
 
   if (((const int32_t *)filter_y)[1] == 0x800000) {
-    vp9_convolve_avg(src, src_stride, dst, dst_stride,
+    vpx_convolve_avg(src, src_stride, dst, dst_stride,
                      filter_x, x_step_q4, filter_y, y_step_q4,
                      w, h);
     return;
@@ -710,7 +710,7 @@ void vp9_convolve8_avg_vert_msa(const uint8_t *src, ptrdiff_t src_stride,
                                           &filt_ver[3], h);
         break;
       default:
-        vp9_convolve8_avg_vert_c(src, src_stride, dst, dst_stride,
+        vpx_convolve8_avg_vert_c(src, src_stride, dst, dst_stride,
                                  filter_x, x_step_q4, filter_y, y_step_q4,
                                  w, h);
         break;
@@ -744,7 +744,7 @@ void vp9_convolve8_avg_vert_msa(const uint8_t *src, ptrdiff_t src_stride,
                                           filt_ver, h);
         break;
       default:
-        vp9_convolve8_avg_vert_c(src, src_stride, dst, dst_stride,
+        vpx_convolve8_avg_vert_c(src, src_stride, dst, dst_stride,
                                  filter_x, x_step_q4, filter_y, y_step_q4,
                                  w, h);
         break;
similarity index 98%
rename from vp9/common/mips/msa/vp9_convolve8_horiz_msa.c
rename to vpx_dsp/mips/vpx_convolve8_horiz_msa.c
index f175bf9b66398aa1af5475a860bf02f1bae9a8e7..2bb314d7f5624b7b2fa57446e18c83af3f1f9db8 100644 (file)
@@ -8,8 +8,8 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
-#include "./vp9_rtcd.h"
-#include "vp9/common/mips/msa/vp9_convolve_msa.h"
+#include "./vpx_dsp_rtcd.h"
+#include "vpx_dsp/mips/vpx_convolve_msa.h"
 
 static void common_hz_8t_4x4_msa(const uint8_t *src, int32_t src_stride,
                                  uint8_t *dst, int32_t dst_stride,
@@ -647,7 +647,7 @@ static void common_hz_2t_64w_msa(const uint8_t *src, int32_t src_stride,
   }
 }
 
-void vp9_convolve8_horiz_msa(const uint8_t *src, ptrdiff_t src_stride,
+void vpx_convolve8_horiz_msa(const uint8_t *src, ptrdiff_t src_stride,
                              uint8_t *dst, ptrdiff_t dst_stride,
                              const int16_t *filter_x, int x_step_q4,
                              const int16_t *filter_y, int y_step_q4,
@@ -655,14 +655,14 @@ void vp9_convolve8_horiz_msa(const uint8_t *src, ptrdiff_t src_stride,
   int8_t cnt, filt_hor[8];
 
   if (16 != x_step_q4) {
-    vp9_convolve8_horiz_c(src, src_stride, dst, dst_stride,
+    vpx_convolve8_horiz_c(src, src_stride, dst, dst_stride,
                           filter_x, x_step_q4, filter_y, y_step_q4,
                           w, h);
     return;
   }
 
   if (((const int32_t *)filter_x)[1] == 0x800000) {
-    vp9_convolve_copy(src, src_stride, dst, dst_stride,
+    vpx_convolve_copy(src, src_stride, dst, dst_stride,
                       filter_x, x_step_q4, filter_y, y_step_q4,
                       w, h);
     return;
@@ -700,7 +700,7 @@ void vp9_convolve8_horiz_msa(const uint8_t *src, ptrdiff_t src_stride,
                              &filt_hor[3], h);
         break;
       default:
-        vp9_convolve8_horiz_c(src, src_stride, dst, dst_stride,
+        vpx_convolve8_horiz_c(src, src_stride, dst, dst_stride,
                               filter_x, x_step_q4, filter_y, y_step_q4,
                               w, h);
         break;
@@ -733,7 +733,7 @@ void vp9_convolve8_horiz_msa(const uint8_t *src, ptrdiff_t src_stride,
                              filt_hor, h);
         break;
       default:
-        vp9_convolve8_horiz_c(src, src_stride, dst, dst_stride,
+        vpx_convolve8_horiz_c(src, src_stride, dst, dst_stride,
                               filter_x, x_step_q4, filter_y, y_step_q4,
                               w, h);
         break;
similarity index 98%
rename from vp9/common/mips/msa/vp9_convolve8_msa.c
rename to vpx_dsp/mips/vpx_convolve8_msa.c
index b1279d97ce58c9a86813cf6f2741b84a9dab6268..52f2028cd23401f601a5cb4bf2812b91d60d8973 100644 (file)
@@ -8,8 +8,8 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
-#include "./vp9_rtcd.h"
-#include "vp9/common/mips/msa/vp9_convolve_msa.h"
+#include "./vpx_dsp_rtcd.h"
+#include "vpx_dsp/mips/vpx_convolve_msa.h"
 
 const uint8_t mc_filt_mask_arr[16 * 3] = {
   /* 8 width cases */
@@ -551,7 +551,7 @@ static void common_hv_2ht_2vt_64w_msa(const uint8_t *src, int32_t src_stride,
   }
 }
 
-void vp9_convolve8_msa(const uint8_t *src, ptrdiff_t src_stride,
+void vpx_convolve8_msa(const uint8_t *src, ptrdiff_t src_stride,
                        uint8_t *dst, ptrdiff_t dst_stride,
                        const int16_t *filter_x, int32_t x_step_q4,
                        const int16_t *filter_y, int32_t y_step_q4,
@@ -559,7 +559,7 @@ void vp9_convolve8_msa(const uint8_t *src, ptrdiff_t src_stride,
   int8_t cnt, filt_hor[8], filt_ver[8];
 
   if (16 != x_step_q4 || 16 != y_step_q4) {
-    vp9_convolve8_c(src, src_stride, dst, dst_stride,
+    vpx_convolve8_c(src, src_stride, dst, dst_stride,
                     filter_x, x_step_q4, filter_y, y_step_q4,
                     w, h);
     return;
@@ -567,7 +567,7 @@ void vp9_convolve8_msa(const uint8_t *src, ptrdiff_t src_stride,
 
   if (((const int32_t *)filter_x)[1] == 0x800000 &&
       ((const int32_t *)filter_y)[1] == 0x800000) {
-    vp9_convolve_copy(src, src_stride, dst, dst_stride,
+    vpx_convolve_copy(src, src_stride, dst, dst_stride,
                       filter_x, x_step_q4, filter_y, y_step_q4,
                       w, h);
     return;
@@ -607,14 +607,14 @@ void vp9_convolve8_msa(const uint8_t *src, ptrdiff_t src_stride,
                                   &filt_hor[3], &filt_ver[3], (int32_t)h);
         break;
       default:
-        vp9_convolve8_c(src, src_stride, dst, dst_stride,
+        vpx_convolve8_c(src, src_stride, dst, dst_stride,
                         filter_x, x_step_q4, filter_y, y_step_q4,
                         w, h);
         break;
     }
   } else if (((const int32_t *)filter_x)[0] == 0 ||
              ((const int32_t *)filter_y)[0] == 0) {
-    vp9_convolve8_c(src, src_stride, dst, dst_stride,
+    vpx_convolve8_c(src, src_stride, dst, dst_stride,
                     filter_x, x_step_q4, filter_y, y_step_q4,
                     w, h);
   } else {
@@ -645,7 +645,7 @@ void vp9_convolve8_msa(const uint8_t *src, ptrdiff_t src_stride,
                                   filt_hor, filt_ver, (int32_t)h);
         break;
       default:
-        vp9_convolve8_c(src, src_stride, dst, dst_stride,
+        vpx_convolve8_c(src, src_stride, dst, dst_stride,
                         filter_x, x_step_q4, filter_y, y_step_q4,
                         w, h);
         break;
similarity index 98%
rename from vp9/common/mips/msa/vp9_convolve8_vert_msa.c
rename to vpx_dsp/mips/vpx_convolve8_vert_msa.c
index e9ec2507a0a3b8ca5d20a95ef1ef795737a5d158..85f175760bbcbeb2d82e6e2c30c9d0e32b05f80d 100644 (file)
@@ -8,8 +8,8 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
-#include "./vp9_rtcd.h"
-#include "vp9/common/mips/msa/vp9_convolve_msa.h"
+#include "./vpx_dsp_rtcd.h"
+#include "vpx_dsp/mips/vpx_convolve_msa.h"
 
 static void common_vt_8t_4w_msa(const uint8_t *src, int32_t src_stride,
                                 uint8_t *dst, int32_t dst_stride,
@@ -650,7 +650,7 @@ static void common_vt_2t_64w_msa(const uint8_t *src, int32_t src_stride,
   }
 }
 
-void vp9_convolve8_vert_msa(const uint8_t *src, ptrdiff_t src_stride,
+void vpx_convolve8_vert_msa(const uint8_t *src, ptrdiff_t src_stride,
                             uint8_t *dst, ptrdiff_t dst_stride,
                             const int16_t *filter_x, int x_step_q4,
                             const int16_t *filter_y, int y_step_q4,
@@ -658,14 +658,14 @@ void vp9_convolve8_vert_msa(const uint8_t *src, ptrdiff_t src_stride,
   int8_t cnt, filt_ver[8];
 
   if (16 != y_step_q4) {
-    vp9_convolve8_vert_c(src, src_stride, dst, dst_stride,
+    vpx_convolve8_vert_c(src, src_stride, dst, dst_stride,
                          filter_x, x_step_q4, filter_y, y_step_q4,
                          w, h);
     return;
   }
 
   if (((const int32_t *)filter_y)[1] == 0x800000) {
-    vp9_convolve_copy(src, src_stride, dst, dst_stride,
+    vpx_convolve_copy(src, src_stride, dst, dst_stride,
                       filter_x, x_step_q4, filter_y, y_step_q4,
                       w, h);
     return;
@@ -703,7 +703,7 @@ void vp9_convolve8_vert_msa(const uint8_t *src, ptrdiff_t src_stride,
                              &filt_ver[3], h);
         break;
       default:
-        vp9_convolve8_vert_c(src, src_stride, dst, dst_stride,
+        vpx_convolve8_vert_c(src, src_stride, dst, dst_stride,
                              filter_x, x_step_q4, filter_y, y_step_q4,
                              w, h);
         break;
@@ -736,7 +736,7 @@ void vp9_convolve8_vert_msa(const uint8_t *src, ptrdiff_t src_stride,
                              filt_ver, h);
         break;
       default:
-        vp9_convolve8_vert_c(src, src_stride, dst, dst_stride,
+        vpx_convolve8_vert_c(src, src_stride, dst, dst_stride,
                              filter_x, x_step_q4, filter_y, y_step_q4,
                              w, h);
         break;
similarity index 99%
rename from vp9/common/mips/msa/vp9_convolve_avg_msa.c
rename to vpx_dsp/mips/vpx_convolve_avg_msa.c
index 7c11e4065d37cd186595a3048a573bbdd2bd827e..4c3d978031a57e008e5903e471e9ebd86b227035 100644 (file)
@@ -186,7 +186,7 @@ static void avg_width64_msa(const uint8_t *src, int32_t src_stride,
   }
 }
 
-void vp9_convolve_avg_msa(const uint8_t *src, ptrdiff_t src_stride,
+void vpx_convolve_avg_msa(const uint8_t *src, ptrdiff_t src_stride,
                           uint8_t *dst, ptrdiff_t dst_stride,
                           const int16_t *filter_x, int32_t filter_x_stride,
                           const int16_t *filter_y, int32_t filter_y_stride,
similarity index 99%
rename from vp9/common/mips/msa/vp9_convolve_copy_msa.c
rename to vpx_dsp/mips/vpx_convolve_copy_msa.c
index 39a0b24d55ed577235da5b99b060d648ebf878c1..ba4012281e84ae9402cff599fabc65b65f81de18 100644 (file)
@@ -196,7 +196,7 @@ static void copy_width64_msa(const uint8_t *src, int32_t src_stride,
   copy_16multx8mult_msa(src, src_stride, dst, dst_stride, height, 64);
 }
 
-void vp9_convolve_copy_msa(const uint8_t *src, ptrdiff_t src_stride,
+void vpx_convolve_copy_msa(const uint8_t *src, ptrdiff_t src_stride,
                            uint8_t *dst, ptrdiff_t dst_stride,
                            const int16_t *filter_x, int32_t filter_x_stride,
                            const int16_t *filter_y, int32_t filter_y_stride,
similarity index 97%
rename from vp9/common/mips/msa/vp9_convolve_msa.h
rename to vpx_dsp/mips/vpx_convolve_msa.h
index 71c616b67c1b6fe136968e5a6affee9f1bd4f93f..e0013983ae6ae4c0635819753def290e94917dac 100644 (file)
@@ -8,11 +8,11 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
-#ifndef VP9_COMMON_MIPS_MSA_VP9_CONVOLVE_MSA_H_
-#define VP9_COMMON_MIPS_MSA_VP9_CONVOLVE_MSA_H_
+#ifndef VPX_DSP_MIPS_VPX_CONVOLVE_MSA_H_
+#define VPX_DSP_MIPS_VPX_CONVOLVE_MSA_H_
 
-#include "vp9/common/vp9_filter.h"
 #include "vpx_dsp/mips/macros_msa.h"
+#include "vpx_dsp/vpx_filter.h"
 
 extern const uint8_t mc_filt_mask_arr[16 * 3];
 
@@ -116,4 +116,4 @@ extern const uint8_t mc_filt_mask_arr[16 * 3];
   AVER_UB2_UB(tmp0_m, tmp2_m, tmp1_m, tmp3_m, tmp0_m, tmp1_m);          \
   ST8x4_UB(tmp0_m, tmp1_m, pdst_m, stride);                             \
 }
-#endif  /* VP9_COMMON_MIPS_MSA_VP9_CONVOLVE_MSA_H_ */
+#endif  /* VPX_DSP_MIPS_VPX_CONVOLVE_MSA_H_ */
similarity index 93%
rename from vp9/common/vp9_convolve.c
rename to vpx_dsp/vpx_convolve.c
index 90e337fd66e13b8f649dd232e4a64a0c3a2341ae..f06da3d341d8af89bf7caaa430274f767ea339c4 100644 (file)
@@ -9,13 +9,14 @@
  */
 
 #include <assert.h>
+#include <string.h>
 
 #include "./vpx_config.h"
-#include "./vp9_rtcd.h"
-#include "vp9/common/vp9_common.h"
-#include "vp9/common/vp9_convolve.h"
-#include "vp9/common/vp9_filter.h"
+#include "./vpx_dsp_rtcd.h"
 #include "vpx/vpx_integer.h"
+#include "vpx_dsp/vpx_convolve.h"
+#include "vpx_dsp/vpx_dsp_common.h"
+#include "vpx_dsp/vpx_filter.h"
 #include "vpx_ports/mem.h"
 
 static void convolve_horiz(const uint8_t *src, ptrdiff_t src_stride,
@@ -154,7 +155,7 @@ static int get_filter_offset(const int16_t *f, const InterpKernel *base) {
   return (int)((const InterpKernel *)(intptr_t)f - base);
 }
 
-void vp9_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
+void vpx_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
                            uint8_t *dst, ptrdiff_t dst_stride,
                            const int16_t *filter_x, int x_step_q4,
                            const int16_t *filter_y, int y_step_q4,
@@ -169,7 +170,7 @@ void vp9_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
                  x0_q4, x_step_q4, w, h);
 }
 
-void vp9_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
+void vpx_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
                                uint8_t *dst, ptrdiff_t dst_stride,
                                const int16_t *filter_x, int x_step_q4,
                                const int16_t *filter_y, int y_step_q4,
@@ -184,7 +185,7 @@ void vp9_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
                      x0_q4, x_step_q4, w, h);
 }
 
-void vp9_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,
+void vpx_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,
                           uint8_t *dst, ptrdiff_t dst_stride,
                           const int16_t *filter_x, int x_step_q4,
                           const int16_t *filter_y, int y_step_q4,
@@ -199,7 +200,7 @@ void vp9_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,
                 y0_q4, y_step_q4, w, h);
 }
 
-void vp9_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
+void vpx_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
                               uint8_t *dst, ptrdiff_t dst_stride,
                               const int16_t *filter_x, int x_step_q4,
                               const int16_t *filter_y, int y_step_q4,
@@ -214,7 +215,7 @@ void vp9_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
                     y0_q4, y_step_q4, w, h);
 }
 
-void vp9_convolve8_c(const uint8_t *src, ptrdiff_t src_stride,
+void vpx_convolve8_c(const uint8_t *src, ptrdiff_t src_stride,
                      uint8_t *dst, ptrdiff_t dst_stride,
                      const int16_t *filter_x, int x_step_q4,
                      const int16_t *filter_y, int y_step_q4,
@@ -230,7 +231,7 @@ void vp9_convolve8_c(const uint8_t *src, ptrdiff_t src_stride,
            filters_y, y0_q4, y_step_q4, w, h);
 }
 
-void vp9_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride,
+void vpx_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride,
                          uint8_t *dst, ptrdiff_t dst_stride,
                          const int16_t *filter_x, int x_step_q4,
                          const int16_t *filter_y, int y_step_q4,
@@ -240,12 +241,12 @@ void vp9_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride,
   assert(w <= 64);
   assert(h <= 64);
 
-  vp9_convolve8_c(src, src_stride, temp, 64,
+  vpx_convolve8_c(src, src_stride, temp, 64,
                   filter_x, x_step_q4, filter_y, y_step_q4, w, h);
-  vp9_convolve_avg_c(temp, 64, dst, dst_stride, NULL, 0, NULL, 0, w, h);
+  vpx_convolve_avg_c(temp, 64, dst, dst_stride, NULL, 0, NULL, 0, w, h);
 }
 
-void vp9_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride,
+void vpx_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride,
                          uint8_t *dst, ptrdiff_t dst_stride,
                          const int16_t *filter_x, int filter_x_stride,
                          const int16_t *filter_y, int filter_y_stride,
@@ -262,7 +263,7 @@ void vp9_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride,
   }
 }
 
-void vp9_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride,
+void vpx_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride,
                         uint8_t *dst, ptrdiff_t dst_stride,
                         const int16_t *filter_x, int filter_x_stride,
                         const int16_t *filter_y, int filter_y_stride,
@@ -423,7 +424,7 @@ static void highbd_convolve(const uint8_t *src, ptrdiff_t src_stride,
 }
 
 
-void vp9_highbd_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
+void vpx_highbd_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
                                   uint8_t *dst, ptrdiff_t dst_stride,
                                   const int16_t *filter_x, int x_step_q4,
                                   const int16_t *filter_y, int y_step_q4,
@@ -437,7 +438,7 @@ void vp9_highbd_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
                         x0_q4, x_step_q4, w, h, bd);
 }
 
-void vp9_highbd_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
+void vpx_highbd_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
                                       uint8_t *dst, ptrdiff_t dst_stride,
                                       const int16_t *filter_x, int x_step_q4,
                                       const int16_t *filter_y, int y_step_q4,
@@ -451,7 +452,7 @@ void vp9_highbd_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
                             x0_q4, x_step_q4, w, h, bd);
 }
 
-void vp9_highbd_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,
+void vpx_highbd_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,
                                  uint8_t *dst, ptrdiff_t dst_stride,
                                  const int16_t *filter_x, int x_step_q4,
                                  const int16_t *filter_y, int y_step_q4,
@@ -465,7 +466,7 @@ void vp9_highbd_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,
                        y0_q4, y_step_q4, w, h, bd);
 }
 
-void vp9_highbd_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
+void vpx_highbd_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
                                      uint8_t *dst, ptrdiff_t dst_stride,
                                      const int16_t *filter_x, int x_step_q4,
                                      const int16_t *filter_y, int y_step_q4,
@@ -479,7 +480,7 @@ void vp9_highbd_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
                            y0_q4, y_step_q4, w, h, bd);
 }
 
-void vp9_highbd_convolve8_c(const uint8_t *src, ptrdiff_t src_stride,
+void vpx_highbd_convolve8_c(const uint8_t *src, ptrdiff_t src_stride,
                             uint8_t *dst, ptrdiff_t dst_stride,
                             const int16_t *filter_x, int x_step_q4,
                             const int16_t *filter_y, int y_step_q4,
@@ -495,7 +496,7 @@ void vp9_highbd_convolve8_c(const uint8_t *src, ptrdiff_t src_stride,
                   filters_y, y0_q4, y_step_q4, w, h, bd);
 }
 
-void vp9_highbd_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride,
+void vpx_highbd_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride,
                                 uint8_t *dst, ptrdiff_t dst_stride,
                                 const int16_t *filter_x, int x_step_q4,
                                 const int16_t *filter_y, int y_step_q4,
@@ -505,13 +506,13 @@ void vp9_highbd_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride,
   assert(w <= 64);
   assert(h <= 64);
 
-  vp9_highbd_convolve8_c(src, src_stride, CONVERT_TO_BYTEPTR(temp), 64,
+  vpx_highbd_convolve8_c(src, src_stride, CONVERT_TO_BYTEPTR(temp), 64,
                          filter_x, x_step_q4, filter_y, y_step_q4, w, h, bd);
-  vp9_highbd_convolve_avg_c(CONVERT_TO_BYTEPTR(temp), 64, dst, dst_stride,
+  vpx_highbd_convolve_avg_c(CONVERT_TO_BYTEPTR(temp), 64, dst, dst_stride,
                             NULL, 0, NULL, 0, w, h, bd);
 }
 
-void vp9_highbd_convolve_copy_c(const uint8_t *src8, ptrdiff_t src_stride,
+void vpx_highbd_convolve_copy_c(const uint8_t *src8, ptrdiff_t src_stride,
                                 uint8_t *dst8, ptrdiff_t dst_stride,
                                 const int16_t *filter_x, int filter_x_stride,
                                 const int16_t *filter_y, int filter_y_stride,
@@ -532,7 +533,7 @@ void vp9_highbd_convolve_copy_c(const uint8_t *src8, ptrdiff_t src_stride,
   }
 }
 
-void vp9_highbd_convolve_avg_c(const uint8_t *src8, ptrdiff_t src_stride,
+void vpx_highbd_convolve_avg_c(const uint8_t *src8, ptrdiff_t src_stride,
                                uint8_t *dst8, ptrdiff_t dst_stride,
                                const int16_t *filter_x, int filter_x_stride,
                                const int16_t *filter_y, int filter_y_stride,
similarity index 92%
rename from vp9/common/vp9_convolve.h
rename to vpx_dsp/vpx_convolve.h
index 8b044c897dd1013a9bfc745e0b6e89a95cb52bc0..9ed3f1750f2d627e774250f66b9f30f0f2f9492e 100644 (file)
@@ -7,8 +7,8 @@
  *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
-#ifndef VP9_COMMON_VP9_CONVOLVE_H_
-#define VP9_COMMON_VP9_CONVOLVE_H_
+#ifndef VPX_DSP_VPX_CONVOLVE_H_
+#define VPX_DSP_VPX_CONVOLVE_H_
 
 #include "./vpx_config.h"
 #include "vpx/vpx_integer.h"
@@ -35,4 +35,4 @@ typedef void (*highbd_convolve_fn_t)(const uint8_t *src, ptrdiff_t src_stride,
 }  // extern "C"
 #endif
 
-#endif  // VP9_COMMON_VP9_CONVOLVE_H_
+#endif  // VPX_DSP_VPX_CONVOLVE_H_
index ff2bb52366557a8e693330a6260ddebfbf8c3e50..de4578257b93b00acf18fbe12b57be0791b8c45f 100644 (file)
@@ -54,6 +54,54 @@ DSP_SRCS-$(HAVE_DSPR2)  += mips/intrapred8_dspr2.c
 DSP_SRCS-$(HAVE_DSPR2)  += mips/intrapred16_dspr2.c
 endif  # CONFIG_VP9
 
+# interpolation filters
+DSP_SRCS-yes += vpx_convolve.c
+DSP_SRCS-yes += vpx_convolve.h
+DSP_SRCS-yes += vpx_filter.h
+
+DSP_SRCS-$(ARCH_X86)$(ARCH_X86_64) += x86/convolve.h
+DSP_SRCS-$(ARCH_X86)$(ARCH_X86_64) += x86/vpx_asm_stubs.c
+DSP_SRCS-$(HAVE_SSE2)  += x86/vpx_subpixel_8t_sse2.asm
+DSP_SRCS-$(HAVE_SSE2)  += x86/vpx_subpixel_bilinear_sse2.asm
+DSP_SRCS-$(HAVE_SSSE3) += x86/vpx_subpixel_8t_ssse3.asm
+DSP_SRCS-$(HAVE_SSSE3) += x86/vpx_subpixel_bilinear_ssse3.asm
+DSP_SRCS-$(HAVE_AVX2)  += x86/vpx_subpixel_8t_intrin_avx2.c
+DSP_SRCS-$(HAVE_SSSE3) += x86/vpx_subpixel_8t_intrin_ssse3.c
+ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
+DSP_SRCS-$(HAVE_SSE2)  += x86/vpx_high_subpixel_8t_sse2.asm
+DSP_SRCS-$(HAVE_SSE2)  += x86/vpx_high_subpixel_bilinear_sse2.asm
+endif
+ifeq ($(CONFIG_USE_X86INC),yes)
+DSP_SRCS-$(HAVE_SSE2)  += x86/vpx_convolve_copy_sse2.asm
+endif
+
+ifeq ($(HAVE_NEON_ASM),yes)
+DSP_SRCS-yes += arm/vpx_convolve_copy_neon_asm$(ASM)
+DSP_SRCS-yes += arm/vpx_convolve8_avg_neon_asm$(ASM)
+DSP_SRCS-yes += arm/vpx_convolve8_neon_asm$(ASM)
+DSP_SRCS-yes += arm/vpx_convolve_avg_neon_asm$(ASM)
+DSP_SRCS-yes += arm/vpx_convolve_neon.c
+else
+ifeq ($(HAVE_NEON),yes)
+DSP_SRCS-yes += arm/vpx_convolve_copy_neon.c
+DSP_SRCS-yes += arm/vpx_convolve8_avg_neon.c
+DSP_SRCS-yes += arm/vpx_convolve8_neon.c
+DSP_SRCS-yes += arm/vpx_convolve_avg_neon.c
+DSP_SRCS-yes += arm/vpx_convolve_neon.c
+endif  # HAVE_NEON
+endif  # HAVE_NEON_ASM
+
+# common (msa)
+DSP_SRCS-$(HAVE_MSA) += mips/vpx_convolve8_avg_horiz_msa.c
+DSP_SRCS-$(HAVE_MSA) += mips/vpx_convolve8_avg_msa.c
+DSP_SRCS-$(HAVE_MSA) += mips/vpx_convolve8_avg_vert_msa.c
+DSP_SRCS-$(HAVE_MSA) += mips/vpx_convolve8_horiz_msa.c
+DSP_SRCS-$(HAVE_MSA) += mips/vpx_convolve8_msa.c
+DSP_SRCS-$(HAVE_MSA) += mips/vpx_convolve8_vert_msa.c
+DSP_SRCS-$(HAVE_MSA) += mips/vpx_convolve_avg_msa.c
+DSP_SRCS-$(HAVE_MSA) += mips/vpx_convolve_copy_msa.c
+DSP_SRCS-$(HAVE_MSA) += mips/vpx_convolve_msa.h
+
 # loop filters
 DSP_SRCS-yes += loopfilter.c
 
index 1fd7c153a782b3ea2c526aa5ded167bcff5648b0..ccb818959436abdef563ff5b01cbf8c9b2d4efeb 100644 (file)
@@ -11,8 +11,6 @@
 #ifndef VPX_DSP_COMMON_H_
 #define VPX_DSP_COMMON_H_
 
-#include <stdlib.h>
-
 #include "./vpx_config.h"
 #include "vpx/vpx_integer.h"
 #include "vpx_ports/mem.h"
index 85b25229a8f8695a55c9fae8876f9384e2543728..6e63271eb11548cfa68e2821ce5409a17856d1d3 100644 (file)
@@ -34,6 +34,12 @@ if (vpx_config("CONFIG_USE_X86INC") eq "yes") {
   }
 }
 
+# optimizations which depend on multiple features
+$avx2_ssse3 = '';
+if ((vpx_config("HAVE_AVX2") eq "yes") && (vpx_config("HAVE_SSSE3") eq "yes")) {
+  $avx2_ssse3 = 'avx2';
+}
+
 # functions that are 64 bit only.
 $mmx_x86_64 = $sse2_x86_64 = $ssse3_x86_64 = $avx_x86_64 = $avx2_x86_64 = '';
 if ($opts{arch} eq "x86_64") {
@@ -365,6 +371,62 @@ if (vpx_config("CONFIG_VP9") eq "yes") {
   }  # CONFIG_VP9_HIGHBITDEPTH
 }  # CONFIG_VP9
 
+#
+# Sub Pixel Filters
+#
+add_proto qw/void vpx_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
+specialize qw/vpx_convolve_copy neon msa/, "$sse2_x86inc";
+
+add_proto qw/void vpx_convolve_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
+specialize qw/vpx_convolve_avg neon msa/, "$sse2_x86inc";
+
+add_proto qw/void vpx_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
+specialize qw/vpx_convolve8 sse2 ssse3 neon msa/, "$avx2_ssse3";
+
+add_proto qw/void vpx_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
+specialize qw/vpx_convolve8_horiz sse2 ssse3 neon msa/, "$avx2_ssse3";
+
+add_proto qw/void vpx_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
+specialize qw/vpx_convolve8_vert sse2 ssse3 neon msa/, "$avx2_ssse3";
+
+add_proto qw/void vpx_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
+specialize qw/vpx_convolve8_avg sse2 ssse3 neon msa/;
+
+add_proto qw/void vpx_convolve8_avg_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
+specialize qw/vpx_convolve8_avg_horiz sse2 ssse3 neon msa/;
+
+add_proto qw/void vpx_convolve8_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
+specialize qw/vpx_convolve8_avg_vert sse2 ssse3 neon msa/;
+
+if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
+  #
+  # Sub Pixel Filters
+  #
+  add_proto qw/void vpx_highbd_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
+  specialize qw/vpx_highbd_convolve_copy/;
+
+  add_proto qw/void vpx_highbd_convolve_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
+  specialize qw/vpx_highbd_convolve_avg/;
+
+  add_proto qw/void vpx_highbd_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
+  specialize qw/vpx_highbd_convolve8/, "$sse2_x86_64";
+
+  add_proto qw/void vpx_highbd_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
+  specialize qw/vpx_highbd_convolve8_horiz/, "$sse2_x86_64";
+
+  add_proto qw/void vpx_highbd_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
+  specialize qw/vpx_highbd_convolve8_vert/, "$sse2_x86_64";
+
+  add_proto qw/void vpx_highbd_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
+  specialize qw/vpx_highbd_convolve8_avg/, "$sse2_x86_64";
+
+  add_proto qw/void vpx_highbd_convolve8_avg_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
+  specialize qw/vpx_highbd_convolve8_avg_horiz/, "$sse2_x86_64";
+
+  add_proto qw/void vpx_highbd_convolve8_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
+  specialize qw/vpx_highbd_convolve8_avg_vert/, "$sse2_x86_64";
+}  # CONFIG_VP9_HIGHBITDEPTH
+
 #
 # Loopfilter
 #
diff --git a/vpx_dsp/vpx_filter.h b/vpx_dsp/vpx_filter.h
new file mode 100644 (file)
index 0000000..2617feb
--- /dev/null
@@ -0,0 +1,34 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_DSP_VPX_FILTER_H_
+#define VPX_DSP_VPX_FILTER_H_
+
+#include "vpx/vpx_integer.h"
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define FILTER_BITS 7
+
+#define SUBPEL_BITS 4
+#define SUBPEL_MASK ((1 << SUBPEL_BITS) - 1)
+#define SUBPEL_SHIFTS (1 << SUBPEL_BITS)
+#define SUBPEL_TAPS 8
+
+typedef int16_t InterpKernel[SUBPEL_TAPS];
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VPX_DSP_VPX_FILTER_H_
similarity index 85%
rename from vp9/common/x86/convolve.h
rename to vpx_dsp/x86/convolve.h
index de2df47e5e58e437b8cee9b58be9beffa3a6e2e0..c0144981b493526072200103f411bdb8f0c2cd3b 100644 (file)
@@ -7,8 +7,8 @@
  *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
-#ifndef VP9_COMMON_X86_CONVOLVE_H_
-#define VP9_COMMON_X86_CONVOLVE_H_
+#ifndef VPX_DSP_X86_CONVOLVE_H_
+#define VPX_DSP_X86_CONVOLVE_H_
 
 #include <assert.h>
 
@@ -26,7 +26,7 @@ typedef void filter8_1dfunction (
 );
 
 #define FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \
-  void vp9_convolve8_##name##_##opt(const uint8_t *src, ptrdiff_t src_stride, \
+  void vpx_convolve8_##name##_##opt(const uint8_t *src, ptrdiff_t src_stride, \
                                     uint8_t *dst, ptrdiff_t dst_stride, \
                                     const int16_t *filter_x, int x_step_q4, \
                                     const int16_t *filter_y, int y_step_q4, \
@@ -34,7 +34,7 @@ typedef void filter8_1dfunction (
   if (step_q4 == 16 && filter[3] != 128) { \
     if (filter[0] || filter[1] || filter[2]) { \
       while (w >= 16) { \
-        vp9_filter_block1d16_##dir##8_##avg##opt(src_start, \
+        vpx_filter_block1d16_##dir##8_##avg##opt(src_start, \
                                                  src_stride, \
                                                  dst, \
                                                  dst_stride, \
@@ -45,7 +45,7 @@ typedef void filter8_1dfunction (
         w -= 16; \
       } \
       while (w >= 8) { \
-        vp9_filter_block1d8_##dir##8_##avg##opt(src_start, \
+        vpx_filter_block1d8_##dir##8_##avg##opt(src_start, \
                                                 src_stride, \
                                                 dst, \
                                                 dst_stride, \
@@ -56,7 +56,7 @@ typedef void filter8_1dfunction (
         w -= 8; \
       } \
       while (w >= 4) { \
-        vp9_filter_block1d4_##dir##8_##avg##opt(src_start, \
+        vpx_filter_block1d4_##dir##8_##avg##opt(src_start, \
                                                 src_stride, \
                                                 dst, \
                                                 dst_stride, \
@@ -68,7 +68,7 @@ typedef void filter8_1dfunction (
       } \
     } else { \
       while (w >= 16) { \
-        vp9_filter_block1d16_##dir##2_##avg##opt(src, \
+        vpx_filter_block1d16_##dir##2_##avg##opt(src, \
                                                  src_stride, \
                                                  dst, \
                                                  dst_stride, \
@@ -79,7 +79,7 @@ typedef void filter8_1dfunction (
         w -= 16; \
       } \
       while (w >= 8) { \
-        vp9_filter_block1d8_##dir##2_##avg##opt(src, \
+        vpx_filter_block1d8_##dir##2_##avg##opt(src, \
                                                 src_stride, \
                                                 dst, \
                                                 dst_stride, \
@@ -90,7 +90,7 @@ typedef void filter8_1dfunction (
         w -= 8; \
       } \
       while (w >= 4) { \
-        vp9_filter_block1d4_##dir##2_##avg##opt(src, \
+        vpx_filter_block1d4_##dir##2_##avg##opt(src, \
                                                 src_stride, \
                                                 dst, \
                                                 dst_stride, \
@@ -103,14 +103,14 @@ typedef void filter8_1dfunction (
     } \
   } \
   if (w) { \
-    vp9_convolve8_##name##_c(src, src_stride, dst, dst_stride, \
+    vpx_convolve8_##name##_c(src, src_stride, dst, dst_stride, \
                              filter_x, x_step_q4, filter_y, y_step_q4, \
                              w, h); \
   } \
 }
 
 #define FUN_CONV_2D(avg, opt) \
-void vp9_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \
+void vpx_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \
                               uint8_t *dst, ptrdiff_t dst_stride, \
                               const int16_t *filter_x, int x_step_q4, \
                               const int16_t *filter_y, int y_step_q4, \
@@ -121,23 +121,23 @@ void vp9_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \
     if (filter_x[0] || filter_x[1] || filter_x[2] || filter_x[3] == 128 || \
         filter_y[0] || filter_y[1] || filter_y[2] || filter_y[3] == 128) { \
       DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 71]); \
-      vp9_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, fdata2, 64, \
+      vpx_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, fdata2, 64, \
                                 filter_x, x_step_q4, filter_y, y_step_q4, \
                                 w, h + 7); \
-      vp9_convolve8_##avg##vert_##opt(fdata2 + 3 * 64, 64, dst, dst_stride, \
+      vpx_convolve8_##avg##vert_##opt(fdata2 + 3 * 64, 64, dst, dst_stride, \
                                       filter_x, x_step_q4, filter_y, \
                                       y_step_q4, w, h); \
     } else { \
       DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 65]); \
-      vp9_convolve8_horiz_##opt(src, src_stride, fdata2, 64, \
+      vpx_convolve8_horiz_##opt(src, src_stride, fdata2, 64, \
                                 filter_x, x_step_q4, filter_y, y_step_q4, \
                                 w, h + 1); \
-      vp9_convolve8_##avg##vert_##opt(fdata2, 64, dst, dst_stride, \
+      vpx_convolve8_##avg##vert_##opt(fdata2, 64, dst, dst_stride, \
                                       filter_x, x_step_q4, filter_y, \
                                       y_step_q4, w, h); \
     } \
   } else { \
-    vp9_convolve8_##avg##c(src, src_stride, dst, dst_stride, \
+    vpx_convolve8_##avg##c(src, src_stride, dst, dst_stride, \
                            filter_x, x_step_q4, filter_y, y_step_q4, w, h); \
   } \
 }
@@ -155,7 +155,7 @@ typedef void highbd_filter8_1dfunction (
 );
 
 #define HIGH_FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \
-  void vp9_highbd_convolve8_##name##_##opt(const uint8_t *src8, \
+  void vpx_highbd_convolve8_##name##_##opt(const uint8_t *src8, \
                                            ptrdiff_t src_stride, \
                                            uint8_t *dst8, \
                                            ptrdiff_t dst_stride, \
@@ -169,7 +169,7 @@ typedef void highbd_filter8_1dfunction (
     uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); \
     if (filter[0] || filter[1] || filter[2]) { \
       while (w >= 16) { \
-        vp9_highbd_filter_block1d16_##dir##8_##avg##opt(src_start, \
+        vpx_highbd_filter_block1d16_##dir##8_##avg##opt(src_start, \
                                                         src_stride, \
                                                         dst, \
                                                         dst_stride, \
@@ -181,7 +181,7 @@ typedef void highbd_filter8_1dfunction (
         w -= 16; \
       } \
       while (w >= 8) { \
-        vp9_highbd_filter_block1d8_##dir##8_##avg##opt(src_start, \
+        vpx_highbd_filter_block1d8_##dir##8_##avg##opt(src_start, \
                                                        src_stride, \
                                                        dst, \
                                                        dst_stride, \
@@ -193,7 +193,7 @@ typedef void highbd_filter8_1dfunction (
         w -= 8; \
       } \
       while (w >= 4) { \
-        vp9_highbd_filter_block1d4_##dir##8_##avg##opt(src_start, \
+        vpx_highbd_filter_block1d4_##dir##8_##avg##opt(src_start, \
                                                        src_stride, \
                                                        dst, \
                                                        dst_stride, \
@@ -206,7 +206,7 @@ typedef void highbd_filter8_1dfunction (
       } \
     } else { \
       while (w >= 16) { \
-        vp9_highbd_filter_block1d16_##dir##2_##avg##opt(src, \
+        vpx_highbd_filter_block1d16_##dir##2_##avg##opt(src, \
                                                         src_stride, \
                                                         dst, \
                                                         dst_stride, \
@@ -218,7 +218,7 @@ typedef void highbd_filter8_1dfunction (
         w -= 16; \
       } \
       while (w >= 8) { \
-        vp9_highbd_filter_block1d8_##dir##2_##avg##opt(src, \
+        vpx_highbd_filter_block1d8_##dir##2_##avg##opt(src, \
                                                        src_stride, \
                                                        dst, \
                                                        dst_stride, \
@@ -230,7 +230,7 @@ typedef void highbd_filter8_1dfunction (
         w -= 8; \
       } \
       while (w >= 4) { \
-        vp9_highbd_filter_block1d4_##dir##2_##avg##opt(src, \
+        vpx_highbd_filter_block1d4_##dir##2_##avg##opt(src, \
                                                        src_stride, \
                                                        dst, \
                                                        dst_stride, \
@@ -244,14 +244,14 @@ typedef void highbd_filter8_1dfunction (
     } \
   } \
   if (w) { \
-    vp9_highbd_convolve8_##name##_c(src8, src_stride, dst8, dst_stride, \
+    vpx_highbd_convolve8_##name##_c(src8, src_stride, dst8, dst_stride, \
                                     filter_x, x_step_q4, filter_y, y_step_q4, \
                                     w, h, bd); \
   } \
 }
 
 #define HIGH_FUN_CONV_2D(avg, opt) \
-void vp9_highbd_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \
+void vpx_highbd_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \
                                      uint8_t *dst, ptrdiff_t dst_stride, \
                                      const int16_t *filter_x, int x_step_q4, \
                                      const int16_t *filter_y, int y_step_q4, \
@@ -262,35 +262,35 @@ void vp9_highbd_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \
     if (filter_x[0] || filter_x[1] || filter_x[2] || filter_x[3] == 128 || \
         filter_y[0] || filter_y[1] || filter_y[2] || filter_y[3] == 128) { \
       DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 71]); \
-      vp9_highbd_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, \
+      vpx_highbd_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, \
                                        CONVERT_TO_BYTEPTR(fdata2), 64, \
                                        filter_x, x_step_q4, \
                                        filter_y, y_step_q4, \
                                        w, h + 7, bd); \
-      vp9_highbd_convolve8_##avg##vert_##opt(CONVERT_TO_BYTEPTR(fdata2) + 192, \
+      vpx_highbd_convolve8_##avg##vert_##opt(CONVERT_TO_BYTEPTR(fdata2) + 192, \
                                              64, dst, dst_stride, \
                                              filter_x, x_step_q4, \
                                              filter_y, y_step_q4, \
                                              w, h, bd); \
     } else { \
       DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 65]); \
-      vp9_highbd_convolve8_horiz_##opt(src, src_stride, \
+      vpx_highbd_convolve8_horiz_##opt(src, src_stride, \
                                        CONVERT_TO_BYTEPTR(fdata2), 64, \
                                        filter_x, x_step_q4, \
                                        filter_y, y_step_q4, \
                                        w, h + 1, bd); \
-      vp9_highbd_convolve8_##avg##vert_##opt(CONVERT_TO_BYTEPTR(fdata2), 64, \
+      vpx_highbd_convolve8_##avg##vert_##opt(CONVERT_TO_BYTEPTR(fdata2), 64, \
                                              dst, dst_stride, \
                                              filter_x, x_step_q4, \
                                              filter_y, y_step_q4, \
                                              w, h, bd); \
     } \
   } else { \
-    vp9_highbd_convolve8_##avg##c(src, src_stride, dst, dst_stride, \
+    vpx_highbd_convolve8_##avg##c(src, src_stride, dst, dst_stride, \
                                   filter_x, x_step_q4, filter_y, y_step_q4, w, \
                                   h, bd); \
   } \
 }
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 
-#endif  // VP9_COMMON_X86_CONVOLVE_H_
+#endif  // VPX_DSP_X86_CONVOLVE_H_
similarity index 60%
rename from vp9/common/x86/vp9_asm_stubs.c
rename to vpx_dsp/x86/vpx_asm_stubs.c
index fd55fb8c664cee29f228f9a6efbae4a8e6be5795..422b0fc422d68e0a22d81aec174a4ae5750169de 100644 (file)
@@ -8,53 +8,53 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
-#include "./vp9_rtcd.h"
 #include "./vpx_config.h"
-#include "vp9/common/x86/convolve.h"
+#include "./vpx_dsp_rtcd.h"
+#include "vpx_dsp/x86/convolve.h"
 
 #if HAVE_SSE2
-filter8_1dfunction vp9_filter_block1d16_v8_sse2;
-filter8_1dfunction vp9_filter_block1d16_h8_sse2;
-filter8_1dfunction vp9_filter_block1d8_v8_sse2;
-filter8_1dfunction vp9_filter_block1d8_h8_sse2;
-filter8_1dfunction vp9_filter_block1d4_v8_sse2;
-filter8_1dfunction vp9_filter_block1d4_h8_sse2;
-filter8_1dfunction vp9_filter_block1d16_v8_avg_sse2;
-filter8_1dfunction vp9_filter_block1d16_h8_avg_sse2;
-filter8_1dfunction vp9_filter_block1d8_v8_avg_sse2;
-filter8_1dfunction vp9_filter_block1d8_h8_avg_sse2;
-filter8_1dfunction vp9_filter_block1d4_v8_avg_sse2;
-filter8_1dfunction vp9_filter_block1d4_h8_avg_sse2;
+filter8_1dfunction vpx_filter_block1d16_v8_sse2;
+filter8_1dfunction vpx_filter_block1d16_h8_sse2;
+filter8_1dfunction vpx_filter_block1d8_v8_sse2;
+filter8_1dfunction vpx_filter_block1d8_h8_sse2;
+filter8_1dfunction vpx_filter_block1d4_v8_sse2;
+filter8_1dfunction vpx_filter_block1d4_h8_sse2;
+filter8_1dfunction vpx_filter_block1d16_v8_avg_sse2;
+filter8_1dfunction vpx_filter_block1d16_h8_avg_sse2;
+filter8_1dfunction vpx_filter_block1d8_v8_avg_sse2;
+filter8_1dfunction vpx_filter_block1d8_h8_avg_sse2;
+filter8_1dfunction vpx_filter_block1d4_v8_avg_sse2;
+filter8_1dfunction vpx_filter_block1d4_h8_avg_sse2;
 
-filter8_1dfunction vp9_filter_block1d16_v2_sse2;
-filter8_1dfunction vp9_filter_block1d16_h2_sse2;
-filter8_1dfunction vp9_filter_block1d8_v2_sse2;
-filter8_1dfunction vp9_filter_block1d8_h2_sse2;
-filter8_1dfunction vp9_filter_block1d4_v2_sse2;
-filter8_1dfunction vp9_filter_block1d4_h2_sse2;
-filter8_1dfunction vp9_filter_block1d16_v2_avg_sse2;
-filter8_1dfunction vp9_filter_block1d16_h2_avg_sse2;
-filter8_1dfunction vp9_filter_block1d8_v2_avg_sse2;
-filter8_1dfunction vp9_filter_block1d8_h2_avg_sse2;
-filter8_1dfunction vp9_filter_block1d4_v2_avg_sse2;
-filter8_1dfunction vp9_filter_block1d4_h2_avg_sse2;
+filter8_1dfunction vpx_filter_block1d16_v2_sse2;
+filter8_1dfunction vpx_filter_block1d16_h2_sse2;
+filter8_1dfunction vpx_filter_block1d8_v2_sse2;
+filter8_1dfunction vpx_filter_block1d8_h2_sse2;
+filter8_1dfunction vpx_filter_block1d4_v2_sse2;
+filter8_1dfunction vpx_filter_block1d4_h2_sse2;
+filter8_1dfunction vpx_filter_block1d16_v2_avg_sse2;
+filter8_1dfunction vpx_filter_block1d16_h2_avg_sse2;
+filter8_1dfunction vpx_filter_block1d8_v2_avg_sse2;
+filter8_1dfunction vpx_filter_block1d8_h2_avg_sse2;
+filter8_1dfunction vpx_filter_block1d4_v2_avg_sse2;
+filter8_1dfunction vpx_filter_block1d4_h2_avg_sse2;
 
-// void vp9_convolve8_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride,
+// void vpx_convolve8_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride,
 //                               uint8_t *dst, ptrdiff_t dst_stride,
 //                               const int16_t *filter_x, int x_step_q4,
 //                               const int16_t *filter_y, int y_step_q4,
 //                               int w, int h);
-// void vp9_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride,
+// void vpx_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride,
 //                              uint8_t *dst, ptrdiff_t dst_stride,
 //                              const int16_t *filter_x, int x_step_q4,
 //                              const int16_t *filter_y, int y_step_q4,
 //                              int w, int h);
-// void vp9_convolve8_avg_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride,
+// void vpx_convolve8_avg_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride,
 //                                   uint8_t *dst, ptrdiff_t dst_stride,
 //                                   const int16_t *filter_x, int x_step_q4,
 //                                   const int16_t *filter_y, int y_step_q4,
 //                                   int w, int h);
-// void vp9_convolve8_avg_vert_sse2(const uint8_t *src, ptrdiff_t src_stride,
+// void vpx_convolve8_avg_vert_sse2(const uint8_t *src, ptrdiff_t src_stride,
 //                                  uint8_t *dst, ptrdiff_t dst_stride,
 //                                  const int16_t *filter_x, int x_step_q4,
 //                                  const int16_t *filter_y, int y_step_q4,
@@ -64,12 +64,12 @@ FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , sse2);
 FUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, sse2);
 FUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_, sse2);
 
-// void vp9_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride,
+// void vpx_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride,
 //                         uint8_t *dst, ptrdiff_t dst_stride,
 //                         const int16_t *filter_x, int x_step_q4,
 //                         const int16_t *filter_y, int y_step_q4,
 //                         int w, int h);
-// void vp9_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride,
+// void vpx_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride,
 //                             uint8_t *dst, ptrdiff_t dst_stride,
 //                             const int16_t *filter_x, int x_step_q4,
 //                             const int16_t *filter_y, int y_step_q4,
@@ -78,33 +78,33 @@ FUN_CONV_2D(, sse2);
 FUN_CONV_2D(avg_ , sse2);
 
 #if CONFIG_VP9_HIGHBITDEPTH && ARCH_X86_64
-highbd_filter8_1dfunction vp9_highbd_filter_block1d16_v8_sse2;
-highbd_filter8_1dfunction vp9_highbd_filter_block1d16_h8_sse2;
-highbd_filter8_1dfunction vp9_highbd_filter_block1d8_v8_sse2;
-highbd_filter8_1dfunction vp9_highbd_filter_block1d8_h8_sse2;
-highbd_filter8_1dfunction vp9_highbd_filter_block1d4_v8_sse2;
-highbd_filter8_1dfunction vp9_highbd_filter_block1d4_h8_sse2;
-highbd_filter8_1dfunction vp9_highbd_filter_block1d16_v8_avg_sse2;
-highbd_filter8_1dfunction vp9_highbd_filter_block1d16_h8_avg_sse2;
-highbd_filter8_1dfunction vp9_highbd_filter_block1d8_v8_avg_sse2;
-highbd_filter8_1dfunction vp9_highbd_filter_block1d8_h8_avg_sse2;
-highbd_filter8_1dfunction vp9_highbd_filter_block1d4_v8_avg_sse2;
-highbd_filter8_1dfunction vp9_highbd_filter_block1d4_h8_avg_sse2;
+highbd_filter8_1dfunction vpx_highbd_filter_block1d16_v8_sse2;
+highbd_filter8_1dfunction vpx_highbd_filter_block1d16_h8_sse2;
+highbd_filter8_1dfunction vpx_highbd_filter_block1d8_v8_sse2;
+highbd_filter8_1dfunction vpx_highbd_filter_block1d8_h8_sse2;
+highbd_filter8_1dfunction vpx_highbd_filter_block1d4_v8_sse2;
+highbd_filter8_1dfunction vpx_highbd_filter_block1d4_h8_sse2;
+highbd_filter8_1dfunction vpx_highbd_filter_block1d16_v8_avg_sse2;
+highbd_filter8_1dfunction vpx_highbd_filter_block1d16_h8_avg_sse2;
+highbd_filter8_1dfunction vpx_highbd_filter_block1d8_v8_avg_sse2;
+highbd_filter8_1dfunction vpx_highbd_filter_block1d8_h8_avg_sse2;
+highbd_filter8_1dfunction vpx_highbd_filter_block1d4_v8_avg_sse2;
+highbd_filter8_1dfunction vpx_highbd_filter_block1d4_h8_avg_sse2;
 
-highbd_filter8_1dfunction vp9_highbd_filter_block1d16_v2_sse2;
-highbd_filter8_1dfunction vp9_highbd_filter_block1d16_h2_sse2;
-highbd_filter8_1dfunction vp9_highbd_filter_block1d8_v2_sse2;
-highbd_filter8_1dfunction vp9_highbd_filter_block1d8_h2_sse2;
-highbd_filter8_1dfunction vp9_highbd_filter_block1d4_v2_sse2;
-highbd_filter8_1dfunction vp9_highbd_filter_block1d4_h2_sse2;
-highbd_filter8_1dfunction vp9_highbd_filter_block1d16_v2_avg_sse2;
-highbd_filter8_1dfunction vp9_highbd_filter_block1d16_h2_avg_sse2;
-highbd_filter8_1dfunction vp9_highbd_filter_block1d8_v2_avg_sse2;
-highbd_filter8_1dfunction vp9_highbd_filter_block1d8_h2_avg_sse2;
-highbd_filter8_1dfunction vp9_highbd_filter_block1d4_v2_avg_sse2;
-highbd_filter8_1dfunction vp9_highbd_filter_block1d4_h2_avg_sse2;
+highbd_filter8_1dfunction vpx_highbd_filter_block1d16_v2_sse2;
+highbd_filter8_1dfunction vpx_highbd_filter_block1d16_h2_sse2;
+highbd_filter8_1dfunction vpx_highbd_filter_block1d8_v2_sse2;
+highbd_filter8_1dfunction vpx_highbd_filter_block1d8_h2_sse2;
+highbd_filter8_1dfunction vpx_highbd_filter_block1d4_v2_sse2;
+highbd_filter8_1dfunction vpx_highbd_filter_block1d4_h2_sse2;
+highbd_filter8_1dfunction vpx_highbd_filter_block1d16_v2_avg_sse2;
+highbd_filter8_1dfunction vpx_highbd_filter_block1d16_h2_avg_sse2;
+highbd_filter8_1dfunction vpx_highbd_filter_block1d8_v2_avg_sse2;
+highbd_filter8_1dfunction vpx_highbd_filter_block1d8_h2_avg_sse2;
+highbd_filter8_1dfunction vpx_highbd_filter_block1d4_v2_avg_sse2;
+highbd_filter8_1dfunction vpx_highbd_filter_block1d4_h2_avg_sse2;
 
-// void vp9_highbd_convolve8_horiz_sse2(const uint8_t *src,
+// void vpx_highbd_convolve8_horiz_sse2(const uint8_t *src,
 //                                      ptrdiff_t src_stride,
 //                                      uint8_t *dst,
 //                                      ptrdiff_t dst_stride,
@@ -113,7 +113,7 @@ highbd_filter8_1dfunction vp9_highbd_filter_block1d4_h2_avg_sse2;
 //                                      const int16_t *filter_y,
 //                                      int y_step_q4,
 //                                      int w, int h, int bd);
-// void vp9_highbd_convolve8_vert_sse2(const uint8_t *src,
+// void vpx_highbd_convolve8_vert_sse2(const uint8_t *src,
 //                                     ptrdiff_t src_stride,
 //                                     uint8_t *dst,
 //                                     ptrdiff_t dst_stride,
@@ -122,7 +122,7 @@ highbd_filter8_1dfunction vp9_highbd_filter_block1d4_h2_avg_sse2;
 //                                     const int16_t *filter_y,
 //                                     int y_step_q4,
 //                                     int w, int h, int bd);
-// void vp9_highbd_convolve8_avg_horiz_sse2(const uint8_t *src,
+// void vpx_highbd_convolve8_avg_horiz_sse2(const uint8_t *src,
 //                                          ptrdiff_t src_stride,
 //                                          uint8_t *dst,
 //                                          ptrdiff_t dst_stride,
@@ -131,7 +131,7 @@ highbd_filter8_1dfunction vp9_highbd_filter_block1d4_h2_avg_sse2;
 //                                          const int16_t *filter_y,
 //                                          int y_step_q4,
 //                                          int w, int h, int bd);
-// void vp9_highbd_convolve8_avg_vert_sse2(const uint8_t *src,
+// void vpx_highbd_convolve8_avg_vert_sse2(const uint8_t *src,
 //                                         ptrdiff_t src_stride,
 //                                         uint8_t *dst,
 //                                         ptrdiff_t dst_stride,
@@ -146,12 +146,12 @@ HIGH_FUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, sse2);
 HIGH_FUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_,
                  sse2);
 
-// void vp9_highbd_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride,
+// void vpx_highbd_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride,
 //                                uint8_t *dst, ptrdiff_t dst_stride,
 //                                const int16_t *filter_x, int x_step_q4,
 //                                const int16_t *filter_y, int y_step_q4,
 //                                int w, int h, int bd);
-// void vp9_highbd_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride,
+// void vpx_highbd_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride,
 //                                    uint8_t *dst, ptrdiff_t dst_stride,
 //                                    const int16_t *filter_x, int x_step_q4,
 //                                    const int16_t *filter_y, int y_step_q4,
similarity index 99%
rename from vp9/common/x86/vp9_copy_sse2.asm
rename to vpx_dsp/x86/vpx_convolve_copy_sse2.asm
index b26383708f7da272f0a6def7a475210a4c248c6a..6cd620a597f24aab6846d4f99087c4bd8f227efd 100644 (file)
@@ -8,6 +8,8 @@
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
+%define program_name vpx
+
 %include "third_party/x86inc/x86inc.asm"
 
 SECTION .text
similarity index 94%
rename from vp9/common/x86/vp9_high_subpixel_8t_sse2.asm
rename to vpx_dsp/x86/vpx_high_subpixel_8t_sse2.asm
index 29ec151ed1bb2ded4e1ae52220458007550f29c3..bfc816f235354d3e9326aafadb14f48bc55e848d 100644 (file)
     movdqu      [rdi + %2], xmm0
 %endm
 
-;void vp9_filter_block1d4_v8_sse2
+;void vpx_filter_block1d4_v8_sse2
 ;(
 ;    unsigned char *src_ptr,
 ;    unsigned int   src_pitch,
 ;    unsigned int   output_height,
 ;    short *filter
 ;)
-global sym(vp9_highbd_filter_block1d4_v8_sse2) PRIVATE
-sym(vp9_highbd_filter_block1d4_v8_sse2):
+global sym(vpx_highbd_filter_block1d4_v8_sse2) PRIVATE
+sym(vpx_highbd_filter_block1d4_v8_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 7
@@ -267,7 +267,7 @@ sym(vp9_highbd_filter_block1d4_v8_sse2):
     pop         rbp
     ret
 
-;void vp9_filter_block1d8_v8_sse2
+;void vpx_filter_block1d8_v8_sse2
 ;(
 ;    unsigned char *src_ptr,
 ;    unsigned int   src_pitch,
@@ -276,8 +276,8 @@ sym(vp9_highbd_filter_block1d4_v8_sse2):
 ;    unsigned int   output_height,
 ;    short *filter
 ;)
-global sym(vp9_highbd_filter_block1d8_v8_sse2) PRIVATE
-sym(vp9_highbd_filter_block1d8_v8_sse2):
+global sym(vpx_highbd_filter_block1d8_v8_sse2) PRIVATE
+sym(vpx_highbd_filter_block1d8_v8_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 7
@@ -326,7 +326,7 @@ sym(vp9_highbd_filter_block1d8_v8_sse2):
     pop         rbp
     ret
 
-;void vp9_filter_block1d16_v8_sse2
+;void vpx_filter_block1d16_v8_sse2
 ;(
 ;    unsigned char *src_ptr,
 ;    unsigned int   src_pitch,
@@ -335,8 +335,8 @@ sym(vp9_highbd_filter_block1d8_v8_sse2):
 ;    unsigned int   output_height,
 ;    short *filter
 ;)
-global sym(vp9_highbd_filter_block1d16_v8_sse2) PRIVATE
-sym(vp9_highbd_filter_block1d16_v8_sse2):
+global sym(vpx_highbd_filter_block1d16_v8_sse2) PRIVATE
+sym(vpx_highbd_filter_block1d16_v8_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 7
@@ -389,8 +389,8 @@ sym(vp9_highbd_filter_block1d16_v8_sse2):
     pop         rbp
     ret
 
-global sym(vp9_highbd_filter_block1d4_v8_avg_sse2) PRIVATE
-sym(vp9_highbd_filter_block1d4_v8_avg_sse2):
+global sym(vpx_highbd_filter_block1d4_v8_avg_sse2) PRIVATE
+sym(vpx_highbd_filter_block1d4_v8_avg_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 7
@@ -450,8 +450,8 @@ sym(vp9_highbd_filter_block1d4_v8_avg_sse2):
     pop         rbp
     ret
 
-global sym(vp9_highbd_filter_block1d8_v8_avg_sse2) PRIVATE
-sym(vp9_highbd_filter_block1d8_v8_avg_sse2):
+global sym(vpx_highbd_filter_block1d8_v8_avg_sse2) PRIVATE
+sym(vpx_highbd_filter_block1d8_v8_avg_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 7
@@ -499,8 +499,8 @@ sym(vp9_highbd_filter_block1d8_v8_avg_sse2):
     pop         rbp
     ret
 
-global sym(vp9_highbd_filter_block1d16_v8_avg_sse2) PRIVATE
-sym(vp9_highbd_filter_block1d16_v8_avg_sse2):
+global sym(vpx_highbd_filter_block1d16_v8_avg_sse2) PRIVATE
+sym(vpx_highbd_filter_block1d16_v8_avg_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 7
@@ -552,7 +552,7 @@ sym(vp9_highbd_filter_block1d16_v8_avg_sse2):
     pop         rbp
     ret
 
-;void vp9_filter_block1d4_h8_sse2
+;void vpx_filter_block1d4_h8_sse2
 ;(
 ;    unsigned char  *src_ptr,
 ;    unsigned int    src_pixels_per_line,
@@ -561,8 +561,8 @@ sym(vp9_highbd_filter_block1d16_v8_avg_sse2):
 ;    unsigned int    output_height,
 ;    short *filter
 ;)
-global sym(vp9_highbd_filter_block1d4_h8_sse2) PRIVATE
-sym(vp9_highbd_filter_block1d4_h8_sse2):
+global sym(vpx_highbd_filter_block1d4_h8_sse2) PRIVATE
+sym(vpx_highbd_filter_block1d4_h8_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 7
@@ -627,7 +627,7 @@ sym(vp9_highbd_filter_block1d4_h8_sse2):
     pop         rbp
     ret
 
-;void vp9_filter_block1d8_h8_sse2
+;void vpx_filter_block1d8_h8_sse2
 ;(
 ;    unsigned char  *src_ptr,
 ;    unsigned int    src_pixels_per_line,
@@ -636,8 +636,8 @@ sym(vp9_highbd_filter_block1d4_h8_sse2):
 ;    unsigned int    output_height,
 ;    short *filter
 ;)
-global sym(vp9_highbd_filter_block1d8_h8_sse2) PRIVATE
-sym(vp9_highbd_filter_block1d8_h8_sse2):
+global sym(vpx_highbd_filter_block1d8_h8_sse2) PRIVATE
+sym(vpx_highbd_filter_block1d8_h8_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 7
@@ -693,7 +693,7 @@ sym(vp9_highbd_filter_block1d8_h8_sse2):
     pop         rbp
     ret
 
-;void vp9_filter_block1d16_h8_sse2
+;void vpx_filter_block1d16_h8_sse2
 ;(
 ;    unsigned char  *src_ptr,
 ;    unsigned int    src_pixels_per_line,
@@ -702,8 +702,8 @@ sym(vp9_highbd_filter_block1d8_h8_sse2):
 ;    unsigned int    output_height,
 ;    short *filter
 ;)
-global sym(vp9_highbd_filter_block1d16_h8_sse2) PRIVATE
-sym(vp9_highbd_filter_block1d16_h8_sse2):
+global sym(vpx_highbd_filter_block1d16_h8_sse2) PRIVATE
+sym(vpx_highbd_filter_block1d16_h8_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 7
@@ -770,8 +770,8 @@ sym(vp9_highbd_filter_block1d16_h8_sse2):
     pop         rbp
     ret
 
-global sym(vp9_highbd_filter_block1d4_h8_avg_sse2) PRIVATE
-sym(vp9_highbd_filter_block1d4_h8_avg_sse2):
+global sym(vpx_highbd_filter_block1d4_h8_avg_sse2) PRIVATE
+sym(vpx_highbd_filter_block1d4_h8_avg_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 7
@@ -836,8 +836,8 @@ sym(vp9_highbd_filter_block1d4_h8_avg_sse2):
     pop         rbp
     ret
 
-global sym(vp9_highbd_filter_block1d8_h8_avg_sse2) PRIVATE
-sym(vp9_highbd_filter_block1d8_h8_avg_sse2):
+global sym(vpx_highbd_filter_block1d8_h8_avg_sse2) PRIVATE
+sym(vpx_highbd_filter_block1d8_h8_avg_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 7
@@ -893,8 +893,8 @@ sym(vp9_highbd_filter_block1d8_h8_avg_sse2):
     pop         rbp
     ret
 
-global sym(vp9_highbd_filter_block1d16_h8_avg_sse2) PRIVATE
-sym(vp9_highbd_filter_block1d16_h8_avg_sse2):
+global sym(vpx_highbd_filter_block1d16_h8_avg_sse2) PRIVATE
+sym(vpx_highbd_filter_block1d16_h8_avg_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 7
similarity index 89%
rename from vp9/common/x86/vp9_high_subpixel_bilinear_sse2.asm
rename to vpx_dsp/x86/vpx_high_subpixel_bilinear_sse2.asm
index 93784121c1eb2f390235f6c081f250ada361cbbb..72f2ff71da27cbde81c428f466b36e6ae22209a4 100644 (file)
 %endm
 %endif
 
-global sym(vp9_highbd_filter_block1d4_v2_sse2) PRIVATE
-sym(vp9_highbd_filter_block1d4_v2_sse2):
+global sym(vpx_highbd_filter_block1d4_v2_sse2) PRIVATE
+sym(vpx_highbd_filter_block1d4_v2_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 7
@@ -196,8 +196,8 @@ sym(vp9_highbd_filter_block1d4_v2_sse2):
     ret
 
 %if ARCH_X86_64
-global sym(vp9_highbd_filter_block1d8_v2_sse2) PRIVATE
-sym(vp9_highbd_filter_block1d8_v2_sse2):
+global sym(vpx_highbd_filter_block1d8_v2_sse2) PRIVATE
+sym(vpx_highbd_filter_block1d8_v2_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 7
@@ -222,8 +222,8 @@ sym(vp9_highbd_filter_block1d8_v2_sse2):
     pop         rbp
     ret
 
-global sym(vp9_highbd_filter_block1d16_v2_sse2) PRIVATE
-sym(vp9_highbd_filter_block1d16_v2_sse2):
+global sym(vpx_highbd_filter_block1d16_v2_sse2) PRIVATE
+sym(vpx_highbd_filter_block1d16_v2_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 7
@@ -251,8 +251,8 @@ sym(vp9_highbd_filter_block1d16_v2_sse2):
     ret
 %endif
 
-global sym(vp9_highbd_filter_block1d4_v2_avg_sse2) PRIVATE
-sym(vp9_highbd_filter_block1d4_v2_avg_sse2):
+global sym(vpx_highbd_filter_block1d4_v2_avg_sse2) PRIVATE
+sym(vpx_highbd_filter_block1d4_v2_avg_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 7
@@ -276,8 +276,8 @@ sym(vp9_highbd_filter_block1d4_v2_avg_sse2):
     ret
 
 %if ARCH_X86_64
-global sym(vp9_highbd_filter_block1d8_v2_avg_sse2) PRIVATE
-sym(vp9_highbd_filter_block1d8_v2_avg_sse2):
+global sym(vpx_highbd_filter_block1d8_v2_avg_sse2) PRIVATE
+sym(vpx_highbd_filter_block1d8_v2_avg_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 7
@@ -302,8 +302,8 @@ sym(vp9_highbd_filter_block1d8_v2_avg_sse2):
     pop         rbp
     ret
 
-global sym(vp9_highbd_filter_block1d16_v2_avg_sse2) PRIVATE
-sym(vp9_highbd_filter_block1d16_v2_avg_sse2):
+global sym(vpx_highbd_filter_block1d16_v2_avg_sse2) PRIVATE
+sym(vpx_highbd_filter_block1d16_v2_avg_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 7
@@ -331,8 +331,8 @@ sym(vp9_highbd_filter_block1d16_v2_avg_sse2):
     ret
 %endif
 
-global sym(vp9_highbd_filter_block1d4_h2_sse2) PRIVATE
-sym(vp9_highbd_filter_block1d4_h2_sse2):
+global sym(vpx_highbd_filter_block1d4_h2_sse2) PRIVATE
+sym(vpx_highbd_filter_block1d4_h2_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 7
@@ -357,8 +357,8 @@ sym(vp9_highbd_filter_block1d4_h2_sse2):
     ret
 
 %if ARCH_X86_64
-global sym(vp9_highbd_filter_block1d8_h2_sse2) PRIVATE
-sym(vp9_highbd_filter_block1d8_h2_sse2):
+global sym(vpx_highbd_filter_block1d8_h2_sse2) PRIVATE
+sym(vpx_highbd_filter_block1d8_h2_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 7
@@ -383,8 +383,8 @@ sym(vp9_highbd_filter_block1d8_h2_sse2):
     pop         rbp
     ret
 
-global sym(vp9_highbd_filter_block1d16_h2_sse2) PRIVATE
-sym(vp9_highbd_filter_block1d16_h2_sse2):
+global sym(vpx_highbd_filter_block1d16_h2_sse2) PRIVATE
+sym(vpx_highbd_filter_block1d16_h2_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 7
@@ -412,8 +412,8 @@ sym(vp9_highbd_filter_block1d16_h2_sse2):
     ret
 %endif
 
-global sym(vp9_highbd_filter_block1d4_h2_avg_sse2) PRIVATE
-sym(vp9_highbd_filter_block1d4_h2_avg_sse2):
+global sym(vpx_highbd_filter_block1d4_h2_avg_sse2) PRIVATE
+sym(vpx_highbd_filter_block1d4_h2_avg_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 7
@@ -438,8 +438,8 @@ sym(vp9_highbd_filter_block1d4_h2_avg_sse2):
     ret
 
 %if ARCH_X86_64
-global sym(vp9_highbd_filter_block1d8_h2_avg_sse2) PRIVATE
-sym(vp9_highbd_filter_block1d8_h2_avg_sse2):
+global sym(vpx_highbd_filter_block1d8_h2_avg_sse2) PRIVATE
+sym(vpx_highbd_filter_block1d8_h2_avg_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 7
@@ -464,8 +464,8 @@ sym(vp9_highbd_filter_block1d8_h2_avg_sse2):
     pop         rbp
     ret
 
-global sym(vp9_highbd_filter_block1d16_h2_avg_sse2) PRIVATE
-sym(vp9_highbd_filter_block1d16_h2_avg_sse2):
+global sym(vpx_highbd_filter_block1d16_h2_avg_sse2) PRIVATE
+sym(vpx_highbd_filter_block1d16_h2_avg_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 7
similarity index 92%
rename from vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c
rename to vpx_dsp/x86/vpx_subpixel_8t_intrin_avx2.c
index cee8d1e76acbfc0cf16f8f2a7c756f888e15c1c8..29ede19f76094337ebf191c7181b0f143e6072d0 100644 (file)
 // Due to a header conflict between math.h and intrinsics includes with ceil()
 // in certain configurations under vs9 this include needs to precede
 // immintrin.h.
-#include "./vp9_rtcd.h"
 
 #include <immintrin.h>
 
-#include "vp9/common/x86/convolve.h"
+#include "./vpx_dsp_rtcd.h"
+#include "vpx_dsp/x86/convolve.h"
 #include "vpx_ports/mem.h"
 
 // filters for 16_h8 and 16_v8
@@ -60,7 +60,7 @@ DECLARE_ALIGNED(32, static const uint8_t, filt4_global_avx2[32]) = {
 # define MM256_BROADCASTSI128_SI256(x) _mm256_broadcastsi128_si256(x)
 #endif  // __clang__
 
-static void vp9_filter_block1d16_h8_avx2(const uint8_t *src_ptr,
+static void vpx_filter_block1d16_h8_avx2(const uint8_t *src_ptr,
                                          ptrdiff_t src_pixels_per_line,
                                          uint8_t *output_ptr,
                                          ptrdiff_t output_pitch,
@@ -304,7 +304,7 @@ static void vp9_filter_block1d16_h8_avx2(const uint8_t *src_ptr,
   }
 }
 
-static void vp9_filter_block1d16_v8_avx2(const uint8_t *src_ptr,
+static void vpx_filter_block1d16_v8_avx2(const uint8_t *src_ptr,
                                          ptrdiff_t src_pitch,
                                          uint8_t *output_ptr,
                                          ptrdiff_t out_pitch,
@@ -551,41 +551,41 @@ static void vp9_filter_block1d16_v8_avx2(const uint8_t *src_ptr,
 }
 
 #if HAVE_AVX2 && HAVE_SSSE3
-filter8_1dfunction vp9_filter_block1d4_v8_ssse3;
+filter8_1dfunction vpx_filter_block1d4_v8_ssse3;
 #if ARCH_X86_64
-filter8_1dfunction vp9_filter_block1d8_v8_intrin_ssse3;
-filter8_1dfunction vp9_filter_block1d8_h8_intrin_ssse3;
-filter8_1dfunction vp9_filter_block1d4_h8_intrin_ssse3;
-#define vp9_filter_block1d8_v8_avx2 vp9_filter_block1d8_v8_intrin_ssse3
-#define vp9_filter_block1d8_h8_avx2 vp9_filter_block1d8_h8_intrin_ssse3
-#define vp9_filter_block1d4_h8_avx2 vp9_filter_block1d4_h8_intrin_ssse3
+filter8_1dfunction vpx_filter_block1d8_v8_intrin_ssse3;
+filter8_1dfunction vpx_filter_block1d8_h8_intrin_ssse3;
+filter8_1dfunction vpx_filter_block1d4_h8_intrin_ssse3;
+#define vpx_filter_block1d8_v8_avx2 vpx_filter_block1d8_v8_intrin_ssse3
+#define vpx_filter_block1d8_h8_avx2 vpx_filter_block1d8_h8_intrin_ssse3
+#define vpx_filter_block1d4_h8_avx2 vpx_filter_block1d4_h8_intrin_ssse3
 #else  // ARCH_X86
-filter8_1dfunction vp9_filter_block1d8_v8_ssse3;
-filter8_1dfunction vp9_filter_block1d8_h8_ssse3;
-filter8_1dfunction vp9_filter_block1d4_h8_ssse3;
-#define vp9_filter_block1d8_v8_avx2 vp9_filter_block1d8_v8_ssse3
-#define vp9_filter_block1d8_h8_avx2 vp9_filter_block1d8_h8_ssse3
-#define vp9_filter_block1d4_h8_avx2 vp9_filter_block1d4_h8_ssse3
+filter8_1dfunction vpx_filter_block1d8_v8_ssse3;
+filter8_1dfunction vpx_filter_block1d8_h8_ssse3;
+filter8_1dfunction vpx_filter_block1d4_h8_ssse3;
+#define vpx_filter_block1d8_v8_avx2 vpx_filter_block1d8_v8_ssse3
+#define vpx_filter_block1d8_h8_avx2 vpx_filter_block1d8_h8_ssse3
+#define vpx_filter_block1d4_h8_avx2 vpx_filter_block1d4_h8_ssse3
 #endif  // ARCH_X86_64
-filter8_1dfunction vp9_filter_block1d16_v2_ssse3;
-filter8_1dfunction vp9_filter_block1d16_h2_ssse3;
-filter8_1dfunction vp9_filter_block1d8_v2_ssse3;
-filter8_1dfunction vp9_filter_block1d8_h2_ssse3;
-filter8_1dfunction vp9_filter_block1d4_v2_ssse3;
-filter8_1dfunction vp9_filter_block1d4_h2_ssse3;
-#define vp9_filter_block1d4_v8_avx2 vp9_filter_block1d4_v8_ssse3
-#define vp9_filter_block1d16_v2_avx2 vp9_filter_block1d16_v2_ssse3
-#define vp9_filter_block1d16_h2_avx2 vp9_filter_block1d16_h2_ssse3
-#define vp9_filter_block1d8_v2_avx2  vp9_filter_block1d8_v2_ssse3
-#define vp9_filter_block1d8_h2_avx2  vp9_filter_block1d8_h2_ssse3
-#define vp9_filter_block1d4_v2_avx2  vp9_filter_block1d4_v2_ssse3
-#define vp9_filter_block1d4_h2_avx2  vp9_filter_block1d4_h2_ssse3
-// void vp9_convolve8_horiz_avx2(const uint8_t *src, ptrdiff_t src_stride,
+filter8_1dfunction vpx_filter_block1d16_v2_ssse3;
+filter8_1dfunction vpx_filter_block1d16_h2_ssse3;
+filter8_1dfunction vpx_filter_block1d8_v2_ssse3;
+filter8_1dfunction vpx_filter_block1d8_h2_ssse3;
+filter8_1dfunction vpx_filter_block1d4_v2_ssse3;
+filter8_1dfunction vpx_filter_block1d4_h2_ssse3;
+#define vpx_filter_block1d4_v8_avx2 vpx_filter_block1d4_v8_ssse3
+#define vpx_filter_block1d16_v2_avx2 vpx_filter_block1d16_v2_ssse3
+#define vpx_filter_block1d16_h2_avx2 vpx_filter_block1d16_h2_ssse3
+#define vpx_filter_block1d8_v2_avx2  vpx_filter_block1d8_v2_ssse3
+#define vpx_filter_block1d8_h2_avx2  vpx_filter_block1d8_h2_ssse3
+#define vpx_filter_block1d4_v2_avx2  vpx_filter_block1d4_v2_ssse3
+#define vpx_filter_block1d4_h2_avx2  vpx_filter_block1d4_h2_ssse3
+// void vpx_convolve8_horiz_avx2(const uint8_t *src, ptrdiff_t src_stride,
 //                                uint8_t *dst, ptrdiff_t dst_stride,
 //                                const int16_t *filter_x, int x_step_q4,
 //                                const int16_t *filter_y, int y_step_q4,
 //                                int w, int h);
-// void vp9_convolve8_vert_avx2(const uint8_t *src, ptrdiff_t src_stride,
+// void vpx_convolve8_vert_avx2(const uint8_t *src, ptrdiff_t src_stride,
 //                               uint8_t *dst, ptrdiff_t dst_stride,
 //                               const int16_t *filter_x, int x_step_q4,
 //                               const int16_t *filter_y, int y_step_q4,
@@ -593,7 +593,7 @@ filter8_1dfunction vp9_filter_block1d4_h2_ssse3;
 FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , avx2);
 FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , avx2);
 
-// void vp9_convolve8_avx2(const uint8_t *src, ptrdiff_t src_stride,
+// void vpx_convolve8_avx2(const uint8_t *src, ptrdiff_t src_stride,
 //                          uint8_t *dst, ptrdiff_t dst_stride,
 //                          const int16_t *filter_x, int x_step_q4,
 //                          const int16_t *filter_y, int y_step_q4,
similarity index 89%
rename from vp9/common/x86/vp9_subpixel_8t_intrin_ssse3.c
rename to vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c
index 5fd2857e14036710bf8006d746636c5a450df13d..01771dec95a098056c40bda43a82d84f91cca9f8 100644 (file)
 // Due to a header conflict between math.h and intrinsics includes with ceil()
 // in certain configurations under vs9 this include needs to precede
 // tmmintrin.h.
-#include "./vp9_rtcd.h"
 
 #include <tmmintrin.h>
 
-#include "vp9/common/x86/convolve.h"
+#include "./vpx_dsp_rtcd.h"
+#include "vpx_dsp/x86/convolve.h"
 #include "vpx_ports/mem.h"
 #include "vpx_ports/emmintrin_compat.h"
 
@@ -46,11 +46,11 @@ DECLARE_ALIGNED(16, static const uint8_t, filt4_global[16]) = {
 };
 
 // These are reused by the avx2 intrinsics.
-filter8_1dfunction vp9_filter_block1d8_v8_intrin_ssse3;
-filter8_1dfunction vp9_filter_block1d8_h8_intrin_ssse3;
-filter8_1dfunction vp9_filter_block1d4_h8_intrin_ssse3;
+filter8_1dfunction vpx_filter_block1d8_v8_intrin_ssse3;
+filter8_1dfunction vpx_filter_block1d8_h8_intrin_ssse3;
+filter8_1dfunction vpx_filter_block1d4_h8_intrin_ssse3;
 
-void vp9_filter_block1d4_h8_intrin_ssse3(const uint8_t *src_ptr,
+void vpx_filter_block1d4_h8_intrin_ssse3(const uint8_t *src_ptr,
                                          ptrdiff_t src_pixels_per_line,
                                          uint8_t *output_ptr,
                                          ptrdiff_t output_pitch,
@@ -121,7 +121,7 @@ void vp9_filter_block1d4_h8_intrin_ssse3(const uint8_t *src_ptr,
   }
 }
 
-void vp9_filter_block1d8_h8_intrin_ssse3(const uint8_t *src_ptr,
+void vpx_filter_block1d8_h8_intrin_ssse3(const uint8_t *src_ptr,
                                          ptrdiff_t src_pixels_per_line,
                                          uint8_t *output_ptr,
                                          ptrdiff_t output_pitch,
@@ -201,7 +201,7 @@ void vp9_filter_block1d8_h8_intrin_ssse3(const uint8_t *src_ptr,
   }
 }
 
-static void vp9_filter_block1d16_h8_intrin_ssse3(const uint8_t *src_ptr,
+static void vpx_filter_block1d16_h8_intrin_ssse3(const uint8_t *src_ptr,
                                                  ptrdiff_t src_pixels_per_line,
                                                  uint8_t *output_ptr,
                                                  ptrdiff_t output_pitch,
@@ -318,7 +318,7 @@ static void vp9_filter_block1d16_h8_intrin_ssse3(const uint8_t *src_ptr,
   }
 }
 
-void vp9_filter_block1d8_v8_intrin_ssse3(const uint8_t *src_ptr,
+void vpx_filter_block1d8_v8_intrin_ssse3(const uint8_t *src_ptr,
                                          ptrdiff_t src_pitch,
                                          uint8_t *output_ptr,
                                          ptrdiff_t out_pitch,
@@ -406,7 +406,7 @@ void vp9_filter_block1d8_v8_intrin_ssse3(const uint8_t *src_ptr,
   }
 }
 
-static void vp9_filter_block1d16_v8_intrin_ssse3(const uint8_t *src_ptr,
+static void vpx_filter_block1d16_v8_intrin_ssse3(const uint8_t *src_ptr,
                                                  ptrdiff_t src_pitch,
                                                  uint8_t *output_ptr,
                                                  ptrdiff_t out_pitch,
@@ -522,61 +522,61 @@ static void vp9_filter_block1d16_v8_intrin_ssse3(const uint8_t *src_ptr,
 }
 
 #if ARCH_X86_64
-filter8_1dfunction vp9_filter_block1d16_v8_intrin_ssse3;
-filter8_1dfunction vp9_filter_block1d16_h8_intrin_ssse3;
-filter8_1dfunction vp9_filter_block1d8_v8_intrin_ssse3;
-filter8_1dfunction vp9_filter_block1d8_h8_intrin_ssse3;
-filter8_1dfunction vp9_filter_block1d4_v8_ssse3;
-filter8_1dfunction vp9_filter_block1d4_h8_intrin_ssse3;
-#define vp9_filter_block1d16_v8_ssse3 vp9_filter_block1d16_v8_intrin_ssse3
-#define vp9_filter_block1d16_h8_ssse3 vp9_filter_block1d16_h8_intrin_ssse3
-#define vp9_filter_block1d8_v8_ssse3 vp9_filter_block1d8_v8_intrin_ssse3
-#define vp9_filter_block1d8_h8_ssse3 vp9_filter_block1d8_h8_intrin_ssse3
-#define vp9_filter_block1d4_h8_ssse3 vp9_filter_block1d4_h8_intrin_ssse3
+filter8_1dfunction vpx_filter_block1d16_v8_intrin_ssse3;
+filter8_1dfunction vpx_filter_block1d16_h8_intrin_ssse3;
+filter8_1dfunction vpx_filter_block1d8_v8_intrin_ssse3;
+filter8_1dfunction vpx_filter_block1d8_h8_intrin_ssse3;
+filter8_1dfunction vpx_filter_block1d4_v8_ssse3;
+filter8_1dfunction vpx_filter_block1d4_h8_intrin_ssse3;
+#define vpx_filter_block1d16_v8_ssse3 vpx_filter_block1d16_v8_intrin_ssse3
+#define vpx_filter_block1d16_h8_ssse3 vpx_filter_block1d16_h8_intrin_ssse3
+#define vpx_filter_block1d8_v8_ssse3 vpx_filter_block1d8_v8_intrin_ssse3
+#define vpx_filter_block1d8_h8_ssse3 vpx_filter_block1d8_h8_intrin_ssse3
+#define vpx_filter_block1d4_h8_ssse3 vpx_filter_block1d4_h8_intrin_ssse3
 #else  // ARCH_X86
-filter8_1dfunction vp9_filter_block1d16_v8_ssse3;
-filter8_1dfunction vp9_filter_block1d16_h8_ssse3;
-filter8_1dfunction vp9_filter_block1d8_v8_ssse3;
-filter8_1dfunction vp9_filter_block1d8_h8_ssse3;
-filter8_1dfunction vp9_filter_block1d4_v8_ssse3;
-filter8_1dfunction vp9_filter_block1d4_h8_ssse3;
+filter8_1dfunction vpx_filter_block1d16_v8_ssse3;
+filter8_1dfunction vpx_filter_block1d16_h8_ssse3;
+filter8_1dfunction vpx_filter_block1d8_v8_ssse3;
+filter8_1dfunction vpx_filter_block1d8_h8_ssse3;
+filter8_1dfunction vpx_filter_block1d4_v8_ssse3;
+filter8_1dfunction vpx_filter_block1d4_h8_ssse3;
 #endif  // ARCH_X86_64
-filter8_1dfunction vp9_filter_block1d16_v8_avg_ssse3;
-filter8_1dfunction vp9_filter_block1d16_h8_avg_ssse3;
-filter8_1dfunction vp9_filter_block1d8_v8_avg_ssse3;
-filter8_1dfunction vp9_filter_block1d8_h8_avg_ssse3;
-filter8_1dfunction vp9_filter_block1d4_v8_avg_ssse3;
-filter8_1dfunction vp9_filter_block1d4_h8_avg_ssse3;
-
-filter8_1dfunction vp9_filter_block1d16_v2_ssse3;
-filter8_1dfunction vp9_filter_block1d16_h2_ssse3;
-filter8_1dfunction vp9_filter_block1d8_v2_ssse3;
-filter8_1dfunction vp9_filter_block1d8_h2_ssse3;
-filter8_1dfunction vp9_filter_block1d4_v2_ssse3;
-filter8_1dfunction vp9_filter_block1d4_h2_ssse3;
-filter8_1dfunction vp9_filter_block1d16_v2_avg_ssse3;
-filter8_1dfunction vp9_filter_block1d16_h2_avg_ssse3;
-filter8_1dfunction vp9_filter_block1d8_v2_avg_ssse3;
-filter8_1dfunction vp9_filter_block1d8_h2_avg_ssse3;
-filter8_1dfunction vp9_filter_block1d4_v2_avg_ssse3;
-filter8_1dfunction vp9_filter_block1d4_h2_avg_ssse3;
-
-// void vp9_convolve8_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride,
+filter8_1dfunction vpx_filter_block1d16_v8_avg_ssse3;
+filter8_1dfunction vpx_filter_block1d16_h8_avg_ssse3;
+filter8_1dfunction vpx_filter_block1d8_v8_avg_ssse3;
+filter8_1dfunction vpx_filter_block1d8_h8_avg_ssse3;
+filter8_1dfunction vpx_filter_block1d4_v8_avg_ssse3;
+filter8_1dfunction vpx_filter_block1d4_h8_avg_ssse3;
+
+filter8_1dfunction vpx_filter_block1d16_v2_ssse3;
+filter8_1dfunction vpx_filter_block1d16_h2_ssse3;
+filter8_1dfunction vpx_filter_block1d8_v2_ssse3;
+filter8_1dfunction vpx_filter_block1d8_h2_ssse3;
+filter8_1dfunction vpx_filter_block1d4_v2_ssse3;
+filter8_1dfunction vpx_filter_block1d4_h2_ssse3;
+filter8_1dfunction vpx_filter_block1d16_v2_avg_ssse3;
+filter8_1dfunction vpx_filter_block1d16_h2_avg_ssse3;
+filter8_1dfunction vpx_filter_block1d8_v2_avg_ssse3;
+filter8_1dfunction vpx_filter_block1d8_h2_avg_ssse3;
+filter8_1dfunction vpx_filter_block1d4_v2_avg_ssse3;
+filter8_1dfunction vpx_filter_block1d4_h2_avg_ssse3;
+
+// void vpx_convolve8_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride,
 //                                uint8_t *dst, ptrdiff_t dst_stride,
 //                                const int16_t *filter_x, int x_step_q4,
 //                                const int16_t *filter_y, int y_step_q4,
 //                                int w, int h);
-// void vp9_convolve8_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride,
+// void vpx_convolve8_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride,
 //                               uint8_t *dst, ptrdiff_t dst_stride,
 //                               const int16_t *filter_x, int x_step_q4,
 //                               const int16_t *filter_y, int y_step_q4,
 //                               int w, int h);
-// void vp9_convolve8_avg_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride,
+// void vpx_convolve8_avg_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride,
 //                                    uint8_t *dst, ptrdiff_t dst_stride,
 //                                    const int16_t *filter_x, int x_step_q4,
 //                                    const int16_t *filter_y, int y_step_q4,
 //                                    int w, int h);
-// void vp9_convolve8_avg_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride,
+// void vpx_convolve8_avg_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride,
 //                                   uint8_t *dst, ptrdiff_t dst_stride,
 //                                   const int16_t *filter_x, int x_step_q4,
 //                                   const int16_t *filter_y, int y_step_q4,
@@ -587,12 +587,12 @@ FUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, ssse3);
 FUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_,
             ssse3);
 
-// void vp9_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride,
+// void vpx_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride,
 //                          uint8_t *dst, ptrdiff_t dst_stride,
 //                          const int16_t *filter_x, int x_step_q4,
 //                          const int16_t *filter_y, int y_step_q4,
 //                          int w, int h);
-// void vp9_convolve8_avg_ssse3(const uint8_t *src, ptrdiff_t src_stride,
+// void vpx_convolve8_avg_ssse3(const uint8_t *src, ptrdiff_t src_stride,
 //                              uint8_t *dst, ptrdiff_t dst_stride,
 //                              const int16_t *filter_x, int x_step_q4,
 //                              const int16_t *filter_y, int y_step_q4,
similarity index 94%
rename from vp9/common/x86/vp9_subpixel_8t_sse2.asm
rename to vpx_dsp/x86/vpx_subpixel_8t_sse2.asm
index 9dc8d0abb543d18e10cec57f84a2e4ff5b3d950e..08f3d6a6cf3588965bca9e165421132a41947cf7 100644 (file)
     movq        [rdi + %2], xmm0
 %endm
 
-;void vp9_filter_block1d4_v8_sse2
+;void vpx_filter_block1d4_v8_sse2
 ;(
 ;    unsigned char *src_ptr,
 ;    unsigned int   src_pitch,
 ;    unsigned int   output_height,
 ;    short *filter
 ;)
-global sym(vp9_filter_block1d4_v8_sse2) PRIVATE
-sym(vp9_filter_block1d4_v8_sse2):
+global sym(vpx_filter_block1d4_v8_sse2) PRIVATE
+sym(vpx_filter_block1d4_v8_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
@@ -243,7 +243,7 @@ sym(vp9_filter_block1d4_v8_sse2):
     pop         rbp
     ret
 
-;void vp9_filter_block1d8_v8_sse2
+;void vpx_filter_block1d8_v8_sse2
 ;(
 ;    unsigned char *src_ptr,
 ;    unsigned int   src_pitch,
@@ -252,8 +252,8 @@ sym(vp9_filter_block1d4_v8_sse2):
 ;    unsigned int   output_height,
 ;    short *filter
 ;)
-global sym(vp9_filter_block1d8_v8_sse2) PRIVATE
-sym(vp9_filter_block1d8_v8_sse2):
+global sym(vpx_filter_block1d8_v8_sse2) PRIVATE
+sym(vpx_filter_block1d8_v8_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
@@ -302,7 +302,7 @@ sym(vp9_filter_block1d8_v8_sse2):
     pop         rbp
     ret
 
-;void vp9_filter_block1d16_v8_sse2
+;void vpx_filter_block1d16_v8_sse2
 ;(
 ;    unsigned char *src_ptr,
 ;    unsigned int   src_pitch,
@@ -311,8 +311,8 @@ sym(vp9_filter_block1d8_v8_sse2):
 ;    unsigned int   output_height,
 ;    short *filter
 ;)
-global sym(vp9_filter_block1d16_v8_sse2) PRIVATE
-sym(vp9_filter_block1d16_v8_sse2):
+global sym(vpx_filter_block1d16_v8_sse2) PRIVATE
+sym(vpx_filter_block1d16_v8_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
@@ -365,8 +365,8 @@ sym(vp9_filter_block1d16_v8_sse2):
     pop         rbp
     ret
 
-global sym(vp9_filter_block1d4_v8_avg_sse2) PRIVATE
-sym(vp9_filter_block1d4_v8_avg_sse2):
+global sym(vpx_filter_block1d4_v8_avg_sse2) PRIVATE
+sym(vpx_filter_block1d4_v8_avg_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
@@ -423,8 +423,8 @@ sym(vp9_filter_block1d4_v8_avg_sse2):
     pop         rbp
     ret
 
-global sym(vp9_filter_block1d8_v8_avg_sse2) PRIVATE
-sym(vp9_filter_block1d8_v8_avg_sse2):
+global sym(vpx_filter_block1d8_v8_avg_sse2) PRIVATE
+sym(vpx_filter_block1d8_v8_avg_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
@@ -472,8 +472,8 @@ sym(vp9_filter_block1d8_v8_avg_sse2):
     pop         rbp
     ret
 
-global sym(vp9_filter_block1d16_v8_avg_sse2) PRIVATE
-sym(vp9_filter_block1d16_v8_avg_sse2):
+global sym(vpx_filter_block1d16_v8_avg_sse2) PRIVATE
+sym(vpx_filter_block1d16_v8_avg_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
@@ -525,7 +525,7 @@ sym(vp9_filter_block1d16_v8_avg_sse2):
     pop         rbp
     ret
 
-;void vp9_filter_block1d4_h8_sse2
+;void vpx_filter_block1d4_h8_sse2
 ;(
 ;    unsigned char  *src_ptr,
 ;    unsigned int    src_pixels_per_line,
@@ -534,8 +534,8 @@ sym(vp9_filter_block1d16_v8_avg_sse2):
 ;    unsigned int    output_height,
 ;    short *filter
 ;)
-global sym(vp9_filter_block1d4_h8_sse2) PRIVATE
-sym(vp9_filter_block1d4_h8_sse2):
+global sym(vpx_filter_block1d4_h8_sse2) PRIVATE
+sym(vpx_filter_block1d4_h8_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
@@ -599,7 +599,7 @@ sym(vp9_filter_block1d4_h8_sse2):
     pop         rbp
     ret
 
-;void vp9_filter_block1d8_h8_sse2
+;void vpx_filter_block1d8_h8_sse2
 ;(
 ;    unsigned char  *src_ptr,
 ;    unsigned int    src_pixels_per_line,
@@ -608,8 +608,8 @@ sym(vp9_filter_block1d4_h8_sse2):
 ;    unsigned int    output_height,
 ;    short *filter
 ;)
-global sym(vp9_filter_block1d8_h8_sse2) PRIVATE
-sym(vp9_filter_block1d8_h8_sse2):
+global sym(vpx_filter_block1d8_h8_sse2) PRIVATE
+sym(vpx_filter_block1d8_h8_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
@@ -674,7 +674,7 @@ sym(vp9_filter_block1d8_h8_sse2):
     pop         rbp
     ret
 
-;void vp9_filter_block1d16_h8_sse2
+;void vpx_filter_block1d16_h8_sse2
 ;(
 ;    unsigned char  *src_ptr,
 ;    unsigned int    src_pixels_per_line,
@@ -683,8 +683,8 @@ sym(vp9_filter_block1d8_h8_sse2):
 ;    unsigned int    output_height,
 ;    short *filter
 ;)
-global sym(vp9_filter_block1d16_h8_sse2) PRIVATE
-sym(vp9_filter_block1d16_h8_sse2):
+global sym(vpx_filter_block1d16_h8_sse2) PRIVATE
+sym(vpx_filter_block1d16_h8_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
@@ -769,8 +769,8 @@ sym(vp9_filter_block1d16_h8_sse2):
     pop         rbp
     ret
 
-global sym(vp9_filter_block1d4_h8_avg_sse2) PRIVATE
-sym(vp9_filter_block1d4_h8_avg_sse2):
+global sym(vpx_filter_block1d4_h8_avg_sse2) PRIVATE
+sym(vpx_filter_block1d4_h8_avg_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
@@ -834,8 +834,8 @@ sym(vp9_filter_block1d4_h8_avg_sse2):
     pop         rbp
     ret
 
-global sym(vp9_filter_block1d8_h8_avg_sse2) PRIVATE
-sym(vp9_filter_block1d8_h8_avg_sse2):
+global sym(vpx_filter_block1d8_h8_avg_sse2) PRIVATE
+sym(vpx_filter_block1d8_h8_avg_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
@@ -900,8 +900,8 @@ sym(vp9_filter_block1d8_h8_avg_sse2):
     pop         rbp
     ret
 
-global sym(vp9_filter_block1d16_h8_avg_sse2) PRIVATE
-sym(vp9_filter_block1d16_h8_avg_sse2):
+global sym(vpx_filter_block1d16_h8_avg_sse2) PRIVATE
+sym(vpx_filter_block1d16_h8_avg_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
similarity index 95%
rename from vp9/common/x86/vp9_subpixel_8t_ssse3.asm
rename to vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm
index 4a5bf1b6003a6c17205783195f43ea4b5f177dcb..68acc03cec417665e3dfa1f85333788243ebfd45 100644 (file)
     jnz         .loop
 %endm
 
-;void vp9_filter_block1d8_v8_ssse3
+;void vpx_filter_block1d8_v8_ssse3
 ;(
 ;    unsigned char *src_ptr,
 ;    unsigned int   src_pitch,
 ;    unsigned int   output_height,
 ;    short *filter
 ;)
-global sym(vp9_filter_block1d4_v8_ssse3) PRIVATE
-sym(vp9_filter_block1d4_v8_ssse3):
+global sym(vpx_filter_block1d4_v8_ssse3) PRIVATE
+sym(vpx_filter_block1d4_v8_ssse3):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
@@ -351,7 +351,7 @@ sym(vp9_filter_block1d4_v8_ssse3):
     pop         rbp
     ret
 
-;void vp9_filter_block1d8_v8_ssse3
+;void vpx_filter_block1d8_v8_ssse3
 ;(
 ;    unsigned char *src_ptr,
 ;    unsigned int   src_pitch,
@@ -360,8 +360,8 @@ sym(vp9_filter_block1d4_v8_ssse3):
 ;    unsigned int   output_height,
 ;    short *filter
 ;)
-global sym(vp9_filter_block1d8_v8_ssse3) PRIVATE
-sym(vp9_filter_block1d8_v8_ssse3):
+global sym(vpx_filter_block1d8_v8_ssse3) PRIVATE
+sym(vpx_filter_block1d8_v8_ssse3):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
@@ -392,7 +392,7 @@ sym(vp9_filter_block1d8_v8_ssse3):
     pop         rbp
     ret
 
-;void vp9_filter_block1d16_v8_ssse3
+;void vpx_filter_block1d16_v8_ssse3
 ;(
 ;    unsigned char *src_ptr,
 ;    unsigned int   src_pitch,
@@ -401,8 +401,8 @@ sym(vp9_filter_block1d8_v8_ssse3):
 ;    unsigned int   output_height,
 ;    short *filter
 ;)
-global sym(vp9_filter_block1d16_v8_ssse3) PRIVATE
-sym(vp9_filter_block1d16_v8_ssse3):
+global sym(vpx_filter_block1d16_v8_ssse3) PRIVATE
+sym(vpx_filter_block1d16_v8_ssse3):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
@@ -436,8 +436,8 @@ sym(vp9_filter_block1d16_v8_ssse3):
 ;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 
-global sym(vp9_filter_block1d4_v8_avg_ssse3) PRIVATE
-sym(vp9_filter_block1d4_v8_avg_ssse3):
+global sym(vpx_filter_block1d4_v8_avg_ssse3) PRIVATE
+sym(vpx_filter_block1d4_v8_avg_ssse3):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
@@ -468,8 +468,8 @@ sym(vp9_filter_block1d4_v8_avg_ssse3):
     pop         rbp
     ret
 
-global sym(vp9_filter_block1d8_v8_avg_ssse3) PRIVATE
-sym(vp9_filter_block1d8_v8_avg_ssse3):
+global sym(vpx_filter_block1d8_v8_avg_ssse3) PRIVATE
+sym(vpx_filter_block1d8_v8_avg_ssse3):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
@@ -500,8 +500,8 @@ sym(vp9_filter_block1d8_v8_avg_ssse3):
     pop         rbp
     ret
 
-global sym(vp9_filter_block1d16_v8_avg_ssse3) PRIVATE
-sym(vp9_filter_block1d16_v8_avg_ssse3):
+global sym(vpx_filter_block1d16_v8_avg_ssse3) PRIVATE
+sym(vpx_filter_block1d16_v8_avg_ssse3):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
@@ -838,7 +838,7 @@ sym(vp9_filter_block1d16_v8_avg_ssse3):
     jnz         .loop
 %endm
 
-;void vp9_filter_block1d4_h8_ssse3
+;void vpx_filter_block1d4_h8_ssse3
 ;(
 ;    unsigned char  *src_ptr,
 ;    unsigned int    src_pixels_per_line,
@@ -847,8 +847,8 @@ sym(vp9_filter_block1d16_v8_avg_ssse3):
 ;    unsigned int    output_height,
 ;    short *filter
 ;)
-global sym(vp9_filter_block1d4_h8_ssse3) PRIVATE
-sym(vp9_filter_block1d4_h8_ssse3):
+global sym(vpx_filter_block1d4_h8_ssse3) PRIVATE
+sym(vpx_filter_block1d4_h8_ssse3):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
@@ -877,7 +877,7 @@ sym(vp9_filter_block1d4_h8_ssse3):
     pop         rbp
     ret
 
-;void vp9_filter_block1d8_h8_ssse3
+;void vpx_filter_block1d8_h8_ssse3
 ;(
 ;    unsigned char  *src_ptr,
 ;    unsigned int    src_pixels_per_line,
@@ -886,8 +886,8 @@ sym(vp9_filter_block1d4_h8_ssse3):
 ;    unsigned int    output_height,
 ;    short *filter
 ;)
-global sym(vp9_filter_block1d8_h8_ssse3) PRIVATE
-sym(vp9_filter_block1d8_h8_ssse3):
+global sym(vpx_filter_block1d8_h8_ssse3) PRIVATE
+sym(vpx_filter_block1d8_h8_ssse3):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
@@ -919,7 +919,7 @@ sym(vp9_filter_block1d8_h8_ssse3):
     pop         rbp
     ret
 
-;void vp9_filter_block1d16_h8_ssse3
+;void vpx_filter_block1d16_h8_ssse3
 ;(
 ;    unsigned char  *src_ptr,
 ;    unsigned int    src_pixels_per_line,
@@ -928,8 +928,8 @@ sym(vp9_filter_block1d8_h8_ssse3):
 ;    unsigned int    output_height,
 ;    short *filter
 ;)
-global sym(vp9_filter_block1d16_h8_ssse3) PRIVATE
-sym(vp9_filter_block1d16_h8_ssse3):
+global sym(vpx_filter_block1d16_h8_ssse3) PRIVATE
+sym(vpx_filter_block1d16_h8_ssse3):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
@@ -961,8 +961,8 @@ sym(vp9_filter_block1d16_h8_ssse3):
     pop         rbp
     ret
 
-global sym(vp9_filter_block1d4_h8_avg_ssse3) PRIVATE
-sym(vp9_filter_block1d4_h8_avg_ssse3):
+global sym(vpx_filter_block1d4_h8_avg_ssse3) PRIVATE
+sym(vpx_filter_block1d4_h8_avg_ssse3):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
@@ -991,8 +991,8 @@ sym(vp9_filter_block1d4_h8_avg_ssse3):
     pop         rbp
     ret
 
-global sym(vp9_filter_block1d8_h8_avg_ssse3) PRIVATE
-sym(vp9_filter_block1d8_h8_avg_ssse3):
+global sym(vpx_filter_block1d8_h8_avg_ssse3) PRIVATE
+sym(vpx_filter_block1d8_h8_avg_ssse3):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
@@ -1024,8 +1024,8 @@ sym(vp9_filter_block1d8_h8_avg_ssse3):
     pop         rbp
     ret
 
-global sym(vp9_filter_block1d16_h8_avg_ssse3) PRIVATE
-sym(vp9_filter_block1d16_h8_avg_ssse3):
+global sym(vpx_filter_block1d16_h8_avg_ssse3) PRIVATE
+sym(vpx_filter_block1d16_h8_avg_ssse3):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
similarity index 89%
rename from vp9/common/x86/vp9_subpixel_bilinear_sse2.asm
rename to vpx_dsp/x86/vpx_subpixel_bilinear_sse2.asm
index d94ccf2e9b79d6e1cc011243143a9bd919572b93..a378dd0402822b5ad6902e1b15383c67cedb1dd4 100644 (file)
     dec         rcx
 %endm
 
-global sym(vp9_filter_block1d4_v2_sse2) PRIVATE
-sym(vp9_filter_block1d4_v2_sse2):
+global sym(vpx_filter_block1d4_v2_sse2) PRIVATE
+sym(vpx_filter_block1d4_v2_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
@@ -155,8 +155,8 @@ sym(vp9_filter_block1d4_v2_sse2):
     pop         rbp
     ret
 
-global sym(vp9_filter_block1d8_v2_sse2) PRIVATE
-sym(vp9_filter_block1d8_v2_sse2):
+global sym(vpx_filter_block1d8_v2_sse2) PRIVATE
+sym(vpx_filter_block1d8_v2_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
@@ -181,8 +181,8 @@ sym(vp9_filter_block1d8_v2_sse2):
     pop         rbp
     ret
 
-global sym(vp9_filter_block1d16_v2_sse2) PRIVATE
-sym(vp9_filter_block1d16_v2_sse2):
+global sym(vpx_filter_block1d16_v2_sse2) PRIVATE
+sym(vpx_filter_block1d16_v2_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
@@ -209,8 +209,8 @@ sym(vp9_filter_block1d16_v2_sse2):
     pop         rbp
     ret
 
-global sym(vp9_filter_block1d4_v2_avg_sse2) PRIVATE
-sym(vp9_filter_block1d4_v2_avg_sse2):
+global sym(vpx_filter_block1d4_v2_avg_sse2) PRIVATE
+sym(vpx_filter_block1d4_v2_avg_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
@@ -233,8 +233,8 @@ sym(vp9_filter_block1d4_v2_avg_sse2):
     pop         rbp
     ret
 
-global sym(vp9_filter_block1d8_v2_avg_sse2) PRIVATE
-sym(vp9_filter_block1d8_v2_avg_sse2):
+global sym(vpx_filter_block1d8_v2_avg_sse2) PRIVATE
+sym(vpx_filter_block1d8_v2_avg_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
@@ -259,8 +259,8 @@ sym(vp9_filter_block1d8_v2_avg_sse2):
     pop         rbp
     ret
 
-global sym(vp9_filter_block1d16_v2_avg_sse2) PRIVATE
-sym(vp9_filter_block1d16_v2_avg_sse2):
+global sym(vpx_filter_block1d16_v2_avg_sse2) PRIVATE
+sym(vpx_filter_block1d16_v2_avg_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
@@ -287,8 +287,8 @@ sym(vp9_filter_block1d16_v2_avg_sse2):
     pop         rbp
     ret
 
-global sym(vp9_filter_block1d4_h2_sse2) PRIVATE
-sym(vp9_filter_block1d4_h2_sse2):
+global sym(vpx_filter_block1d4_h2_sse2) PRIVATE
+sym(vpx_filter_block1d4_h2_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
@@ -312,8 +312,8 @@ sym(vp9_filter_block1d4_h2_sse2):
     pop         rbp
     ret
 
-global sym(vp9_filter_block1d8_h2_sse2) PRIVATE
-sym(vp9_filter_block1d8_h2_sse2):
+global sym(vpx_filter_block1d8_h2_sse2) PRIVATE
+sym(vpx_filter_block1d8_h2_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
@@ -339,8 +339,8 @@ sym(vp9_filter_block1d8_h2_sse2):
     pop         rbp
     ret
 
-global sym(vp9_filter_block1d16_h2_sse2) PRIVATE
-sym(vp9_filter_block1d16_h2_sse2):
+global sym(vpx_filter_block1d16_h2_sse2) PRIVATE
+sym(vpx_filter_block1d16_h2_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
@@ -367,8 +367,8 @@ sym(vp9_filter_block1d16_h2_sse2):
     pop         rbp
     ret
 
-global sym(vp9_filter_block1d4_h2_avg_sse2) PRIVATE
-sym(vp9_filter_block1d4_h2_avg_sse2):
+global sym(vpx_filter_block1d4_h2_avg_sse2) PRIVATE
+sym(vpx_filter_block1d4_h2_avg_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
@@ -392,8 +392,8 @@ sym(vp9_filter_block1d4_h2_avg_sse2):
     pop         rbp
     ret
 
-global sym(vp9_filter_block1d8_h2_avg_sse2) PRIVATE
-sym(vp9_filter_block1d8_h2_avg_sse2):
+global sym(vpx_filter_block1d8_h2_avg_sse2) PRIVATE
+sym(vpx_filter_block1d8_h2_avg_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
@@ -419,8 +419,8 @@ sym(vp9_filter_block1d8_h2_avg_sse2):
     pop         rbp
     ret
 
-global sym(vp9_filter_block1d16_h2_avg_sse2) PRIVATE
-sym(vp9_filter_block1d16_h2_avg_sse2):
+global sym(vpx_filter_block1d16_h2_avg_sse2) PRIVATE
+sym(vpx_filter_block1d16_h2_avg_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
similarity index 88%
rename from vp9/common/x86/vp9_subpixel_bilinear_ssse3.asm
rename to vpx_dsp/x86/vpx_subpixel_bilinear_ssse3.asm
index b5e18fe6d4a798b08bedd9d2db42aae613c8e2c2..3c8cfd225378f50826c73dfe46ce98d88bf34359 100644 (file)
     dec         rcx
 %endm
 
-global sym(vp9_filter_block1d4_v2_ssse3) PRIVATE
-sym(vp9_filter_block1d4_v2_ssse3):
+global sym(vpx_filter_block1d4_v2_ssse3) PRIVATE
+sym(vpx_filter_block1d4_v2_ssse3):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
@@ -133,8 +133,8 @@ sym(vp9_filter_block1d4_v2_ssse3):
     pop         rbp
     ret
 
-global sym(vp9_filter_block1d8_v2_ssse3) PRIVATE
-sym(vp9_filter_block1d8_v2_ssse3):
+global sym(vpx_filter_block1d8_v2_ssse3) PRIVATE
+sym(vpx_filter_block1d8_v2_ssse3):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
@@ -159,8 +159,8 @@ sym(vp9_filter_block1d8_v2_ssse3):
     pop         rbp
     ret
 
-global sym(vp9_filter_block1d16_v2_ssse3) PRIVATE
-sym(vp9_filter_block1d16_v2_ssse3):
+global sym(vpx_filter_block1d16_v2_ssse3) PRIVATE
+sym(vpx_filter_block1d16_v2_ssse3):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
@@ -186,8 +186,8 @@ sym(vp9_filter_block1d16_v2_ssse3):
     pop         rbp
     ret
 
-global sym(vp9_filter_block1d4_v2_avg_ssse3) PRIVATE
-sym(vp9_filter_block1d4_v2_avg_ssse3):
+global sym(vpx_filter_block1d4_v2_avg_ssse3) PRIVATE
+sym(vpx_filter_block1d4_v2_avg_ssse3):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
@@ -210,8 +210,8 @@ sym(vp9_filter_block1d4_v2_avg_ssse3):
     pop         rbp
     ret
 
-global sym(vp9_filter_block1d8_v2_avg_ssse3) PRIVATE
-sym(vp9_filter_block1d8_v2_avg_ssse3):
+global sym(vpx_filter_block1d8_v2_avg_ssse3) PRIVATE
+sym(vpx_filter_block1d8_v2_avg_ssse3):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
@@ -236,8 +236,8 @@ sym(vp9_filter_block1d8_v2_avg_ssse3):
     pop         rbp
     ret
 
-global sym(vp9_filter_block1d16_v2_avg_ssse3) PRIVATE
-sym(vp9_filter_block1d16_v2_avg_ssse3):
+global sym(vpx_filter_block1d16_v2_avg_ssse3) PRIVATE
+sym(vpx_filter_block1d16_v2_avg_ssse3):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
@@ -263,8 +263,8 @@ sym(vp9_filter_block1d16_v2_avg_ssse3):
     pop         rbp
     ret
 
-global sym(vp9_filter_block1d4_h2_ssse3) PRIVATE
-sym(vp9_filter_block1d4_h2_ssse3):
+global sym(vpx_filter_block1d4_h2_ssse3) PRIVATE
+sym(vpx_filter_block1d4_h2_ssse3):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
@@ -288,8 +288,8 @@ sym(vp9_filter_block1d4_h2_ssse3):
     pop         rbp
     ret
 
-global sym(vp9_filter_block1d8_h2_ssse3) PRIVATE
-sym(vp9_filter_block1d8_h2_ssse3):
+global sym(vpx_filter_block1d8_h2_ssse3) PRIVATE
+sym(vpx_filter_block1d8_h2_ssse3):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
@@ -315,8 +315,8 @@ sym(vp9_filter_block1d8_h2_ssse3):
     pop         rbp
     ret
 
-global sym(vp9_filter_block1d16_h2_ssse3) PRIVATE
-sym(vp9_filter_block1d16_h2_ssse3):
+global sym(vpx_filter_block1d16_h2_ssse3) PRIVATE
+sym(vpx_filter_block1d16_h2_ssse3):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
@@ -342,8 +342,8 @@ sym(vp9_filter_block1d16_h2_ssse3):
     pop         rbp
     ret
 
-global sym(vp9_filter_block1d4_h2_avg_ssse3) PRIVATE
-sym(vp9_filter_block1d4_h2_avg_ssse3):
+global sym(vpx_filter_block1d4_h2_avg_ssse3) PRIVATE
+sym(vpx_filter_block1d4_h2_avg_ssse3):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
@@ -367,8 +367,8 @@ sym(vp9_filter_block1d4_h2_avg_ssse3):
     pop         rbp
     ret
 
-global sym(vp9_filter_block1d8_h2_avg_ssse3) PRIVATE
-sym(vp9_filter_block1d8_h2_avg_ssse3):
+global sym(vpx_filter_block1d8_h2_avg_ssse3) PRIVATE
+sym(vpx_filter_block1d8_h2_avg_ssse3):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
@@ -394,8 +394,8 @@ sym(vp9_filter_block1d8_h2_avg_ssse3):
     pop         rbp
     ret
 
-global sym(vp9_filter_block1d16_h2_avg_ssse3) PRIVATE
-sym(vp9_filter_block1d16_h2_avg_ssse3):
+global sym(vpx_filter_block1d16_h2_avg_ssse3) PRIVATE
+sym(vpx_filter_block1d16_h2_avg_ssse3):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6