+++ /dev/null
-/*
- * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "./vpx_config.h"
-#include "./vpx_dsp_rtcd.h"
-#include "vpx_dsp/x86/convolve.h"
-
-#if HAVE_SSE2
-filter8_1dfunction vpx_filter_block1d16_v8_sse2;
-filter8_1dfunction vpx_filter_block1d16_h8_sse2;
-filter8_1dfunction vpx_filter_block1d8_v8_sse2;
-filter8_1dfunction vpx_filter_block1d8_h8_sse2;
-filter8_1dfunction vpx_filter_block1d4_v8_sse2;
-filter8_1dfunction vpx_filter_block1d4_h8_sse2;
-filter8_1dfunction vpx_filter_block1d16_v8_avg_sse2;
-filter8_1dfunction vpx_filter_block1d16_h8_avg_sse2;
-filter8_1dfunction vpx_filter_block1d8_v8_avg_sse2;
-filter8_1dfunction vpx_filter_block1d8_h8_avg_sse2;
-filter8_1dfunction vpx_filter_block1d4_v8_avg_sse2;
-filter8_1dfunction vpx_filter_block1d4_h8_avg_sse2;
-
-filter8_1dfunction vpx_filter_block1d16_h4_sse2;
-filter8_1dfunction vpx_filter_block1d16_v4_sse2;
-filter8_1dfunction vpx_filter_block1d8_h4_sse2;
-filter8_1dfunction vpx_filter_block1d8_v4_sse2;
-filter8_1dfunction vpx_filter_block1d4_h4_sse2;
-filter8_1dfunction vpx_filter_block1d4_v4_sse2;
-#define vpx_filter_block1d16_v4_avg_sse2 vpx_filter_block1d16_v8_avg_sse2
-#define vpx_filter_block1d16_h4_avg_sse2 vpx_filter_block1d16_h8_avg_sse2
-#define vpx_filter_block1d8_v4_avg_sse2 vpx_filter_block1d8_v8_avg_sse2
-#define vpx_filter_block1d8_h4_avg_sse2 vpx_filter_block1d8_h8_avg_sse2
-#define vpx_filter_block1d4_v4_avg_sse2 vpx_filter_block1d4_v8_avg_sse2
-#define vpx_filter_block1d4_h4_avg_sse2 vpx_filter_block1d4_h8_avg_sse2
-
-filter8_1dfunction vpx_filter_block1d16_v2_sse2;
-filter8_1dfunction vpx_filter_block1d16_h2_sse2;
-filter8_1dfunction vpx_filter_block1d8_v2_sse2;
-filter8_1dfunction vpx_filter_block1d8_h2_sse2;
-filter8_1dfunction vpx_filter_block1d4_v2_sse2;
-filter8_1dfunction vpx_filter_block1d4_h2_sse2;
-filter8_1dfunction vpx_filter_block1d16_v2_avg_sse2;
-filter8_1dfunction vpx_filter_block1d16_h2_avg_sse2;
-filter8_1dfunction vpx_filter_block1d8_v2_avg_sse2;
-filter8_1dfunction vpx_filter_block1d8_h2_avg_sse2;
-filter8_1dfunction vpx_filter_block1d4_v2_avg_sse2;
-filter8_1dfunction vpx_filter_block1d4_h2_avg_sse2;
-
-// void vpx_convolve8_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride,
-// uint8_t *dst, ptrdiff_t dst_stride,
-// const InterpKernel *filter, int x0_q4,
-// int32_t x_step_q4, int y0_q4, int y_step_q4,
-// int w, int h);
-// void vpx_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride,
-// uint8_t *dst, ptrdiff_t dst_stride,
-// const InterpKernel *filter, int x0_q4,
-// int32_t x_step_q4, int y0_q4, int y_step_q4,
-// int w, int h);
-// void vpx_convolve8_avg_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride,
-// uint8_t *dst, ptrdiff_t dst_stride,
-// const InterpKernel *filter, int x0_q4,
-// int32_t x_step_q4, int y0_q4,
-// int y_step_q4, int w, int h);
-// void vpx_convolve8_avg_vert_sse2(const uint8_t *src, ptrdiff_t src_stride,
-// uint8_t *dst, ptrdiff_t dst_stride,
-// const InterpKernel *filter, int x0_q4,
-// int32_t x_step_q4, int y0_q4, int y_step_q4,
-// int w, int h);
-FUN_CONV_1D(horiz, x0_q4, x_step_q4, h, src, , sse2);
-FUN_CONV_1D(vert, y0_q4, y_step_q4, v, src - src_stride * 3, , sse2);
-FUN_CONV_1D(avg_horiz, x0_q4, x_step_q4, h, src, avg_, sse2);
-FUN_CONV_1D(avg_vert, y0_q4, y_step_q4, v, src - src_stride * 3, avg_, sse2);
-
-// void vpx_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride,
-// uint8_t *dst, ptrdiff_t dst_stride,
-// const InterpKernel *filter, int x0_q4,
-// int32_t x_step_q4, int y0_q4, int y_step_q4,
-// int w, int h);
-// void vpx_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride,
-// uint8_t *dst, ptrdiff_t dst_stride,
-// const InterpKernel *filter, int x0_q4,
-// int32_t x_step_q4, int y0_q4, int y_step_q4,
-// int w, int h);
-FUN_CONV_2D(, sse2);
-FUN_CONV_2D(avg_, sse2);
-
-#if CONFIG_VP9_HIGHBITDEPTH && ARCH_X86_64
-highbd_filter8_1dfunction vpx_highbd_filter_block1d16_v8_sse2;
-highbd_filter8_1dfunction vpx_highbd_filter_block1d16_h8_sse2;
-highbd_filter8_1dfunction vpx_highbd_filter_block1d8_v8_sse2;
-highbd_filter8_1dfunction vpx_highbd_filter_block1d8_h8_sse2;
-highbd_filter8_1dfunction vpx_highbd_filter_block1d4_v8_sse2;
-highbd_filter8_1dfunction vpx_highbd_filter_block1d4_h8_sse2;
-highbd_filter8_1dfunction vpx_highbd_filter_block1d16_v8_avg_sse2;
-highbd_filter8_1dfunction vpx_highbd_filter_block1d16_h8_avg_sse2;
-highbd_filter8_1dfunction vpx_highbd_filter_block1d8_v8_avg_sse2;
-highbd_filter8_1dfunction vpx_highbd_filter_block1d8_h8_avg_sse2;
-highbd_filter8_1dfunction vpx_highbd_filter_block1d4_v8_avg_sse2;
-highbd_filter8_1dfunction vpx_highbd_filter_block1d4_h8_avg_sse2;
-
-highbd_filter8_1dfunction vpx_highbd_filter_block1d16_v4_sse2;
-highbd_filter8_1dfunction vpx_highbd_filter_block1d16_h4_sse2;
-highbd_filter8_1dfunction vpx_highbd_filter_block1d8_v4_sse2;
-highbd_filter8_1dfunction vpx_highbd_filter_block1d8_h4_sse2;
-highbd_filter8_1dfunction vpx_highbd_filter_block1d4_v4_sse2;
-highbd_filter8_1dfunction vpx_highbd_filter_block1d4_h4_sse2;
-#define vpx_highbd_filter_block1d16_v4_avg_sse2 \
- vpx_highbd_filter_block1d16_v8_avg_sse2
-#define vpx_highbd_filter_block1d16_h4_avg_sse2 \
- vpx_highbd_filter_block1d16_h8_avg_sse2
-#define vpx_highbd_filter_block1d8_v4_avg_sse2 \
- vpx_highbd_filter_block1d8_v8_avg_sse2
-#define vpx_highbd_filter_block1d8_h4_avg_sse2 \
- vpx_highbd_filter_block1d8_h8_avg_sse2
-#define vpx_highbd_filter_block1d4_v4_avg_sse2 \
- vpx_highbd_filter_block1d4_v8_avg_sse2
-#define vpx_highbd_filter_block1d4_h4_avg_sse2 \
- vpx_highbd_filter_block1d4_h8_avg_sse2
-
-highbd_filter8_1dfunction vpx_highbd_filter_block1d16_v2_sse2;
-highbd_filter8_1dfunction vpx_highbd_filter_block1d16_h2_sse2;
-highbd_filter8_1dfunction vpx_highbd_filter_block1d8_v2_sse2;
-highbd_filter8_1dfunction vpx_highbd_filter_block1d8_h2_sse2;
-highbd_filter8_1dfunction vpx_highbd_filter_block1d4_v2_sse2;
-highbd_filter8_1dfunction vpx_highbd_filter_block1d4_h2_sse2;
-highbd_filter8_1dfunction vpx_highbd_filter_block1d16_v2_avg_sse2;
-highbd_filter8_1dfunction vpx_highbd_filter_block1d16_h2_avg_sse2;
-highbd_filter8_1dfunction vpx_highbd_filter_block1d8_v2_avg_sse2;
-highbd_filter8_1dfunction vpx_highbd_filter_block1d8_h2_avg_sse2;
-highbd_filter8_1dfunction vpx_highbd_filter_block1d4_v2_avg_sse2;
-highbd_filter8_1dfunction vpx_highbd_filter_block1d4_h2_avg_sse2;
-
-// void vpx_highbd_convolve8_horiz_sse2(const uint8_t *src,
-// ptrdiff_t src_stride,
-// uint8_t *dst,
-// ptrdiff_t dst_stride,
-// const int16_t *filter_x,
-// int x_step_q4,
-// const int16_t *filter_y,
-// int y_step_q4,
-// int w, int h, int bd);
-// void vpx_highbd_convolve8_vert_sse2(const uint8_t *src,
-// ptrdiff_t src_stride,
-// uint8_t *dst,
-// ptrdiff_t dst_stride,
-// const int16_t *filter_x,
-// int x_step_q4,
-// const int16_t *filter_y,
-// int y_step_q4,
-// int w, int h, int bd);
-// void vpx_highbd_convolve8_avg_horiz_sse2(const uint8_t *src,
-// ptrdiff_t src_stride,
-// uint8_t *dst,
-// ptrdiff_t dst_stride,
-// const int16_t *filter_x,
-// int x_step_q4,
-// const int16_t *filter_y,
-// int y_step_q4,
-// int w, int h, int bd);
-// void vpx_highbd_convolve8_avg_vert_sse2(const uint8_t *src,
-// ptrdiff_t src_stride,
-// uint8_t *dst,
-// ptrdiff_t dst_stride,
-// const int16_t *filter_x,
-// int x_step_q4,
-// const int16_t *filter_y,
-// int y_step_q4,
-// int w, int h, int bd);
-HIGH_FUN_CONV_1D(horiz, x0_q4, x_step_q4, h, src, , sse2);
-HIGH_FUN_CONV_1D(vert, y0_q4, y_step_q4, v, src - src_stride * 3, , sse2);
-HIGH_FUN_CONV_1D(avg_horiz, x0_q4, x_step_q4, h, src, avg_, sse2);
-HIGH_FUN_CONV_1D(avg_vert, y0_q4, y_step_q4, v, src - src_stride * 3, avg_,
- sse2);
-
-// void vpx_highbd_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride,
-// uint8_t *dst, ptrdiff_t dst_stride,
-// const InterpKernel *filter, int x0_q4,
-// int32_t x_step_q4, int y0_q4, int y_step_q4,
-// int w, int h, int bd);
-// void vpx_highbd_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride,
-// uint8_t *dst, ptrdiff_t dst_stride,
-// const InterpKernel *filter, int x0_q4,
-// int32_t x_step_q4, int y0_q4,
-// int y_step_q4, int w, int h, int bd);
-HIGH_FUN_CONV_2D(, sse2);
-HIGH_FUN_CONV_2D(avg_, sse2);
-#endif // CONFIG_VP9_HIGHBITDEPTH && ARCH_X86_64
-#endif // HAVE_SSE2
#define CONV8_ROUNDING_BITS (7)
#define CONV8_ROUNDING_NUM (1 << (CONV8_ROUNDING_BITS - 1))
-void vpx_filter_block1d16_h4_sse2(const uint8_t *src_ptr, ptrdiff_t src_stride,
- uint8_t *dst_ptr, ptrdiff_t dst_stride,
- uint32_t height, const int16_t *kernel) {
+static void vpx_filter_block1d16_h4_sse2(const uint8_t *src_ptr,
+ ptrdiff_t src_stride, uint8_t *dst_ptr,
+ ptrdiff_t dst_stride, uint32_t height,
+ const int16_t *kernel) {
__m128i kernel_reg; // Kernel
__m128i kernel_reg_23, kernel_reg_45; // Segments of the kernel used
const __m128i reg_32 = _mm_set1_epi16(32); // Used for rounding
/* The macro used to generate functions shifts the src_ptr up by 3 rows already
* */
-void vpx_filter_block1d16_v4_sse2(const uint8_t *src_ptr, ptrdiff_t src_stride,
- uint8_t *dst_ptr, ptrdiff_t dst_stride,
- uint32_t height, const int16_t *kernel) {
+static void vpx_filter_block1d16_v4_sse2(const uint8_t *src_ptr,
+ ptrdiff_t src_stride, uint8_t *dst_ptr,
+ ptrdiff_t dst_stride, uint32_t height,
+ const int16_t *kernel) {
// Register for source s[-1:3, :]
__m128i src_reg_m1, src_reg_0, src_reg_1, src_reg_2, src_reg_3;
// Interleaved rows of the source. lo is first half, hi second
}
}
-void vpx_filter_block1d8_h4_sse2(const uint8_t *src_ptr, ptrdiff_t src_stride,
- uint8_t *dst_ptr, ptrdiff_t dst_stride,
- uint32_t height, const int16_t *kernel) {
+static void vpx_filter_block1d8_h4_sse2(const uint8_t *src_ptr,
+ ptrdiff_t src_stride, uint8_t *dst_ptr,
+ ptrdiff_t dst_stride, uint32_t height,
+ const int16_t *kernel) {
__m128i kernel_reg; // Kernel
__m128i kernel_reg_23, kernel_reg_45; // Segments of the kernel used
const __m128i reg_32 = _mm_set1_epi16(32); // Used for rounding
}
}
-void vpx_filter_block1d8_v4_sse2(const uint8_t *src_ptr, ptrdiff_t src_stride,
- uint8_t *dst_ptr, ptrdiff_t dst_stride,
- uint32_t height, const int16_t *kernel) {
+static void vpx_filter_block1d8_v4_sse2(const uint8_t *src_ptr,
+ ptrdiff_t src_stride, uint8_t *dst_ptr,
+ ptrdiff_t dst_stride, uint32_t height,
+ const int16_t *kernel) {
// Register for source s[-1:3, :]
__m128i src_reg_m1, src_reg_0, src_reg_1, src_reg_2, src_reg_3;
// Interleaved rows of the source. lo is first half, hi second
}
}
-void vpx_filter_block1d4_h4_sse2(const uint8_t *src_ptr, ptrdiff_t src_stride,
- uint8_t *dst_ptr, ptrdiff_t dst_stride,
- uint32_t height, const int16_t *kernel) {
+static void vpx_filter_block1d4_h4_sse2(const uint8_t *src_ptr,
+ ptrdiff_t src_stride, uint8_t *dst_ptr,
+ ptrdiff_t dst_stride, uint32_t height,
+ const int16_t *kernel) {
__m128i kernel_reg; // Kernel
__m128i kernel_reg_23, kernel_reg_45; // Segments of the kernel used
const __m128i reg_32 = _mm_set1_epi16(32); // Used for rounding
}
}
-void vpx_filter_block1d4_v4_sse2(const uint8_t *src_ptr, ptrdiff_t src_stride,
- uint8_t *dst_ptr, ptrdiff_t dst_stride,
- uint32_t height, const int16_t *kernel) {
+static void vpx_filter_block1d4_v4_sse2(const uint8_t *src_ptr,
+ ptrdiff_t src_stride, uint8_t *dst_ptr,
+ ptrdiff_t dst_stride, uint32_t height,
+ const int16_t *kernel) {
// Register for source s[-1:3, :]
__m128i src_reg_m1, src_reg_0, src_reg_1, src_reg_2, src_reg_3;
// Interleaved rows of the source. lo is first half, hi second
}
}
-void vpx_highbd_filter_block1d4_h4_sse2(const uint16_t *src_ptr,
- ptrdiff_t src_stride, uint16_t *dst_ptr,
- ptrdiff_t dst_stride, uint32_t height,
- const int16_t *kernel, int bd) {
+#if CONFIG_VP9_HIGHBITDEPTH && ARCH_X86_64
+static void vpx_highbd_filter_block1d4_h4_sse2(
+ const uint16_t *src_ptr, ptrdiff_t src_stride, uint16_t *dst_ptr,
+ ptrdiff_t dst_stride, uint32_t height, const int16_t *kernel, int bd) {
// We will load multiple shifted versions of the row and shuffle them into
// 16-bit words of the form
// ... s[2] s[1] s[0] s[-1]
}
}
-void vpx_highbd_filter_block1d4_v4_sse2(const uint16_t *src_ptr,
- ptrdiff_t src_stride, uint16_t *dst_ptr,
- ptrdiff_t dst_stride, uint32_t height,
- const int16_t *kernel, int bd) {
+static void vpx_highbd_filter_block1d4_v4_sse2(
+ const uint16_t *src_ptr, ptrdiff_t src_stride, uint16_t *dst_ptr,
+ ptrdiff_t dst_stride, uint32_t height, const int16_t *kernel, int bd) {
// We will load two rows of pixels as 16-bit words, and shuffle them into the
// form
// ... s[0,1] s[-1,1] s[0,0] s[-1,0]
}
}
-void vpx_highbd_filter_block1d8_h4_sse2(const uint16_t *src_ptr,
- ptrdiff_t src_stride, uint16_t *dst_ptr,
- ptrdiff_t dst_stride, uint32_t height,
- const int16_t *kernel, int bd) {
+static void vpx_highbd_filter_block1d8_h4_sse2(
+ const uint16_t *src_ptr, ptrdiff_t src_stride, uint16_t *dst_ptr,
+ ptrdiff_t dst_stride, uint32_t height, const int16_t *kernel, int bd) {
// We will load multiple shifted versions of the row and shuffle them into
// 16-bit words of the form
// ... s[2] s[1] s[0] s[-1]
}
}
-void vpx_highbd_filter_block1d8_v4_sse2(const uint16_t *src_ptr,
- ptrdiff_t src_stride, uint16_t *dst_ptr,
- ptrdiff_t dst_stride, uint32_t height,
- const int16_t *kernel, int bd) {
+static void vpx_highbd_filter_block1d8_v4_sse2(
+ const uint16_t *src_ptr, ptrdiff_t src_stride, uint16_t *dst_ptr,
+ ptrdiff_t dst_stride, uint32_t height, const int16_t *kernel, int bd) {
// We will load two rows of pixels as 16-bit words, and shuffle them into the
// form
// ... s[0,1] s[-1,1] s[0,0] s[-1,0]
}
}
-void vpx_highbd_filter_block1d16_h4_sse2(const uint16_t *src_ptr,
- ptrdiff_t src_stride,
- uint16_t *dst_ptr,
- ptrdiff_t dst_stride, uint32_t height,
- const int16_t *kernel, int bd) {
+static void vpx_highbd_filter_block1d16_h4_sse2(
+ const uint16_t *src_ptr, ptrdiff_t src_stride, uint16_t *dst_ptr,
+ ptrdiff_t dst_stride, uint32_t height, const int16_t *kernel, int bd) {
vpx_highbd_filter_block1d8_h4_sse2(src_ptr, src_stride, dst_ptr, dst_stride,
height, kernel, bd);
vpx_highbd_filter_block1d8_h4_sse2(src_ptr + 8, src_stride, dst_ptr + 8,
dst_stride, height, kernel, bd);
}
-void vpx_highbd_filter_block1d16_v4_sse2(const uint16_t *src_ptr,
- ptrdiff_t src_stride,
- uint16_t *dst_ptr,
- ptrdiff_t dst_stride, uint32_t height,
- const int16_t *kernel, int bd) {
+static void vpx_highbd_filter_block1d16_v4_sse2(
+ const uint16_t *src_ptr, ptrdiff_t src_stride, uint16_t *dst_ptr,
+ ptrdiff_t dst_stride, uint32_t height, const int16_t *kernel, int bd) {
vpx_highbd_filter_block1d8_v4_sse2(src_ptr, src_stride, dst_ptr, dst_stride,
height, kernel, bd);
vpx_highbd_filter_block1d8_v4_sse2(src_ptr + 8, src_stride, dst_ptr + 8,
dst_stride, height, kernel, bd);
}
+#endif // CONFIG_VP9_HIGHBITDEPTH && ARCH_X86_64
+
+// From vpx_subpixel_8t_sse2.asm.
+filter8_1dfunction vpx_filter_block1d16_v8_sse2;
+filter8_1dfunction vpx_filter_block1d16_h8_sse2;
+filter8_1dfunction vpx_filter_block1d8_v8_sse2;
+filter8_1dfunction vpx_filter_block1d8_h8_sse2;
+filter8_1dfunction vpx_filter_block1d4_v8_sse2;
+filter8_1dfunction vpx_filter_block1d4_h8_sse2;
+filter8_1dfunction vpx_filter_block1d16_v8_avg_sse2;
+filter8_1dfunction vpx_filter_block1d16_h8_avg_sse2;
+filter8_1dfunction vpx_filter_block1d8_v8_avg_sse2;
+filter8_1dfunction vpx_filter_block1d8_h8_avg_sse2;
+filter8_1dfunction vpx_filter_block1d4_v8_avg_sse2;
+filter8_1dfunction vpx_filter_block1d4_h8_avg_sse2;
+
+// Use the [vh]8 version because there is no [vh]4 implementation.
+#define vpx_filter_block1d16_v4_avg_sse2 vpx_filter_block1d16_v8_avg_sse2
+#define vpx_filter_block1d16_h4_avg_sse2 vpx_filter_block1d16_h8_avg_sse2
+#define vpx_filter_block1d8_v4_avg_sse2 vpx_filter_block1d8_v8_avg_sse2
+#define vpx_filter_block1d8_h4_avg_sse2 vpx_filter_block1d8_h8_avg_sse2
+#define vpx_filter_block1d4_v4_avg_sse2 vpx_filter_block1d4_v8_avg_sse2
+#define vpx_filter_block1d4_h4_avg_sse2 vpx_filter_block1d4_h8_avg_sse2
+
+// From vpx_dsp/x86/vpx_subpixel_bilinear_sse2.asm.
+filter8_1dfunction vpx_filter_block1d16_v2_sse2;
+filter8_1dfunction vpx_filter_block1d16_h2_sse2;
+filter8_1dfunction vpx_filter_block1d8_v2_sse2;
+filter8_1dfunction vpx_filter_block1d8_h2_sse2;
+filter8_1dfunction vpx_filter_block1d4_v2_sse2;
+filter8_1dfunction vpx_filter_block1d4_h2_sse2;
+filter8_1dfunction vpx_filter_block1d16_v2_avg_sse2;
+filter8_1dfunction vpx_filter_block1d16_h2_avg_sse2;
+filter8_1dfunction vpx_filter_block1d8_v2_avg_sse2;
+filter8_1dfunction vpx_filter_block1d8_h2_avg_sse2;
+filter8_1dfunction vpx_filter_block1d4_v2_avg_sse2;
+filter8_1dfunction vpx_filter_block1d4_h2_avg_sse2;
+
+// void vpx_convolve8_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride,
+// uint8_t *dst, ptrdiff_t dst_stride,
+// const InterpKernel *filter, int x0_q4,
+// int32_t x_step_q4, int y0_q4, int y_step_q4,
+// int w, int h);
+// void vpx_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride,
+// uint8_t *dst, ptrdiff_t dst_stride,
+// const InterpKernel *filter, int x0_q4,
+// int32_t x_step_q4, int y0_q4, int y_step_q4,
+// int w, int h);
+// void vpx_convolve8_avg_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride,
+// uint8_t *dst, ptrdiff_t dst_stride,
+// const InterpKernel *filter, int x0_q4,
+// int32_t x_step_q4, int y0_q4,
+// int y_step_q4, int w, int h);
+// void vpx_convolve8_avg_vert_sse2(const uint8_t *src, ptrdiff_t src_stride,
+// uint8_t *dst, ptrdiff_t dst_stride,
+// const InterpKernel *filter, int x0_q4,
+// int32_t x_step_q4, int y0_q4, int y_step_q4,
+// int w, int h);
+FUN_CONV_1D(horiz, x0_q4, x_step_q4, h, src, , sse2);
+FUN_CONV_1D(vert, y0_q4, y_step_q4, v, src - src_stride * 3, , sse2);
+FUN_CONV_1D(avg_horiz, x0_q4, x_step_q4, h, src, avg_, sse2);
+FUN_CONV_1D(avg_vert, y0_q4, y_step_q4, v, src - src_stride * 3, avg_, sse2);
+
+// void vpx_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride,
+// uint8_t *dst, ptrdiff_t dst_stride,
+// const InterpKernel *filter, int x0_q4,
+// int32_t x_step_q4, int y0_q4, int y_step_q4,
+// int w, int h);
+// void vpx_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride,
+// uint8_t *dst, ptrdiff_t dst_stride,
+// const InterpKernel *filter, int x0_q4,
+// int32_t x_step_q4, int y0_q4, int y_step_q4,
+// int w, int h);
+FUN_CONV_2D(, sse2);
+FUN_CONV_2D(avg_, sse2);
+
+#if CONFIG_VP9_HIGHBITDEPTH && ARCH_X86_64
+// From vpx_dsp/x86/vpx_high_subpixel_8t_sse2.asm.
+highbd_filter8_1dfunction vpx_highbd_filter_block1d16_v8_sse2;
+highbd_filter8_1dfunction vpx_highbd_filter_block1d16_h8_sse2;
+highbd_filter8_1dfunction vpx_highbd_filter_block1d8_v8_sse2;
+highbd_filter8_1dfunction vpx_highbd_filter_block1d8_h8_sse2;
+highbd_filter8_1dfunction vpx_highbd_filter_block1d4_v8_sse2;
+highbd_filter8_1dfunction vpx_highbd_filter_block1d4_h8_sse2;
+highbd_filter8_1dfunction vpx_highbd_filter_block1d16_v8_avg_sse2;
+highbd_filter8_1dfunction vpx_highbd_filter_block1d16_h8_avg_sse2;
+highbd_filter8_1dfunction vpx_highbd_filter_block1d8_v8_avg_sse2;
+highbd_filter8_1dfunction vpx_highbd_filter_block1d8_h8_avg_sse2;
+highbd_filter8_1dfunction vpx_highbd_filter_block1d4_v8_avg_sse2;
+highbd_filter8_1dfunction vpx_highbd_filter_block1d4_h8_avg_sse2;
+
+// Use the [vh]8 version because there is no [vh]4 implementation.
+#define vpx_highbd_filter_block1d16_v4_avg_sse2 \
+ vpx_highbd_filter_block1d16_v8_avg_sse2
+#define vpx_highbd_filter_block1d16_h4_avg_sse2 \
+ vpx_highbd_filter_block1d16_h8_avg_sse2
+#define vpx_highbd_filter_block1d8_v4_avg_sse2 \
+ vpx_highbd_filter_block1d8_v8_avg_sse2
+#define vpx_highbd_filter_block1d8_h4_avg_sse2 \
+ vpx_highbd_filter_block1d8_h8_avg_sse2
+#define vpx_highbd_filter_block1d4_v4_avg_sse2 \
+ vpx_highbd_filter_block1d4_v8_avg_sse2
+#define vpx_highbd_filter_block1d4_h4_avg_sse2 \
+ vpx_highbd_filter_block1d4_h8_avg_sse2
+
+// From vpx_dsp/x86/vpx_high_subpixel_bilinear_sse2.asm.
+highbd_filter8_1dfunction vpx_highbd_filter_block1d16_v2_sse2;
+highbd_filter8_1dfunction vpx_highbd_filter_block1d16_h2_sse2;
+highbd_filter8_1dfunction vpx_highbd_filter_block1d8_v2_sse2;
+highbd_filter8_1dfunction vpx_highbd_filter_block1d8_h2_sse2;
+highbd_filter8_1dfunction vpx_highbd_filter_block1d4_v2_sse2;
+highbd_filter8_1dfunction vpx_highbd_filter_block1d4_h2_sse2;
+highbd_filter8_1dfunction vpx_highbd_filter_block1d16_v2_avg_sse2;
+highbd_filter8_1dfunction vpx_highbd_filter_block1d16_h2_avg_sse2;
+highbd_filter8_1dfunction vpx_highbd_filter_block1d8_v2_avg_sse2;
+highbd_filter8_1dfunction vpx_highbd_filter_block1d8_h2_avg_sse2;
+highbd_filter8_1dfunction vpx_highbd_filter_block1d4_v2_avg_sse2;
+highbd_filter8_1dfunction vpx_highbd_filter_block1d4_h2_avg_sse2;
+
+// void vpx_highbd_convolve8_horiz_sse2(const uint8_t *src,
+// ptrdiff_t src_stride,
+// uint8_t *dst,
+// ptrdiff_t dst_stride,
+// const int16_t *filter_x,
+// int x_step_q4,
+// const int16_t *filter_y,
+// int y_step_q4,
+// int w, int h, int bd);
+// void vpx_highbd_convolve8_vert_sse2(const uint8_t *src,
+// ptrdiff_t src_stride,
+// uint8_t *dst,
+// ptrdiff_t dst_stride,
+// const int16_t *filter_x,
+// int x_step_q4,
+// const int16_t *filter_y,
+// int y_step_q4,
+// int w, int h, int bd);
+// void vpx_highbd_convolve8_avg_horiz_sse2(const uint8_t *src,
+// ptrdiff_t src_stride,
+// uint8_t *dst,
+// ptrdiff_t dst_stride,
+// const int16_t *filter_x,
+// int x_step_q4,
+// const int16_t *filter_y,
+// int y_step_q4,
+// int w, int h, int bd);
+// void vpx_highbd_convolve8_avg_vert_sse2(const uint8_t *src,
+// ptrdiff_t src_stride,
+// uint8_t *dst,
+// ptrdiff_t dst_stride,
+// const int16_t *filter_x,
+// int x_step_q4,
+// const int16_t *filter_y,
+// int y_step_q4,
+// int w, int h, int bd);
+HIGH_FUN_CONV_1D(horiz, x0_q4, x_step_q4, h, src, , sse2);
+HIGH_FUN_CONV_1D(vert, y0_q4, y_step_q4, v, src - src_stride * 3, , sse2);
+HIGH_FUN_CONV_1D(avg_horiz, x0_q4, x_step_q4, h, src, avg_, sse2);
+HIGH_FUN_CONV_1D(avg_vert, y0_q4, y_step_q4, v, src - src_stride * 3, avg_,
+ sse2);
+
+// void vpx_highbd_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride,
+// uint8_t *dst, ptrdiff_t dst_stride,
+// const InterpKernel *filter, int x0_q4,
+// int32_t x_step_q4, int y0_q4, int y_step_q4,
+// int w, int h, int bd);
+// void vpx_highbd_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride,
+// uint8_t *dst, ptrdiff_t dst_stride,
+// const InterpKernel *filter, int x0_q4,
+// int32_t x_step_q4, int y0_q4,
+// int y_step_q4, int w, int h, int bd);
+HIGH_FUN_CONV_2D(, sse2);
+HIGH_FUN_CONV_2D(avg_, sse2);
+#endif // CONFIG_VP9_HIGHBITDEPTH && ARCH_X86_64