AV1_COMMON_SRCS-yes += common/av1_fwd_txfm2d_cfg.h
AV1_COMMON_SRCS-yes += common/av1_inv_txfm2d.c
AV1_COMMON_SRCS-yes += common/av1_inv_txfm2d_cfg.h
-AV1_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/convolve_filter_ssse3.h
AV1_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/av1_convolve_ssse3.c
-AV1_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/av1_convolve_filters_ssse3.c
+AV1_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/av1_convolve_filters_ssse3.h
ifeq ($(CONFIG_AOM_HIGHBITDEPTH),yes)
-AV1_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/highbd_convolve_filter_sse4.h
AV1_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/av1_highbd_convolve_sse4.c
-AV1_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/av1_highbd_convolve_filters_sse4.c
+AV1_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/av1_highbd_convolve_filters_sse4.h
endif
AV1_COMMON_SRCS-yes += common/av1_convolve.c
AV1_COMMON_SRCS-yes += common/av1_convolve.h
};
#if USE_TEMPORALFILTER_12TAP
-DECLARE_ALIGNED(16, const int16_t,
- av1_sub_pel_filters_temporalfilter_12[SUBPEL_SHIFTS][12]) = {
+DECLARE_ALIGNED(16, static const int16_t,
+ sub_pel_filters_temporalfilter_12[SUBPEL_SHIFTS][12]) = {
// intfilt 0.8
{ 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, 0, 0 },
{ 0, 1, -1, 3, -7, 127, 8, -4, 2, -1, 0, 0 },
};
#endif // CONFIG_EXT_INTRA
-DECLARE_ALIGNED(256, const int16_t,
- av1_sub_pel_filters_10sharp[SUBPEL_SHIFTS][10]) = {
+DECLARE_ALIGNED(256, static const int16_t,
+ sub_pel_filters_10sharp[SUBPEL_SHIFTS][10]) = {
// intfilt 0.77
{ 0, 0, 0, 0, 128, 0, 0, 0, 0, 0 },
{ 0, -1, 3, -6, 127, 8, -4, 2, -1, 0 },
{ 0, 2, -12, 37, 94, 14, -9, 2 }, { 0, 2, -11, 31, 95, 19, -10, 2 },
};
-DECLARE_ALIGNED(16, const int16_t,
- av1_sub_pel_filters_12sharp[SUBPEL_SHIFTS][12]) = {
+DECLARE_ALIGNED(16, static const int16_t,
+ sub_pel_filters_12sharp[SUBPEL_SHIFTS][12]) = {
// intfilt 0.85
{ 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, 0, 0 },
{ 0, 1, -2, 3, -7, 127, 8, -4, 2, -1, 1, 0 },
#if CONFIG_EXT_INTERP
static const InterpFilterParams
av1_interp_filter_params_list[SWITCHABLE_FILTERS + 1] = {
- { (const int16_t *)sub_pel_filters_8, SUBPEL_TAPS, SUBPEL_SHIFTS },
- { (const int16_t *)sub_pel_filters_8smooth, SUBPEL_TAPS, SUBPEL_SHIFTS },
- { (const int16_t *)av1_sub_pel_filters_10sharp, 10, SUBPEL_SHIFTS },
- { (const int16_t *)sub_pel_filters_8smooth2, SUBPEL_TAPS, SUBPEL_SHIFTS },
- { (const int16_t *)av1_sub_pel_filters_12sharp, 12, SUBPEL_SHIFTS },
- { (const int16_t *)bilinear_filters, SUBPEL_TAPS, SUBPEL_SHIFTS }
+ { (const int16_t *)sub_pel_filters_8, SUBPEL_TAPS, SUBPEL_SHIFTS,
+ EIGHTTAP_REGULAR },
+ { (const int16_t *)sub_pel_filters_8smooth, SUBPEL_TAPS, SUBPEL_SHIFTS,
+ EIGHTTAP_SMOOTH },
+ { (const int16_t *)sub_pel_filters_10sharp, 10, SUBPEL_SHIFTS,
+ MULTITAP_SHARP },
+ { (const int16_t *)sub_pel_filters_8smooth2, SUBPEL_TAPS, SUBPEL_SHIFTS,
+ EIGHTTAP_SMOOTH2 },
+ { (const int16_t *)sub_pel_filters_12sharp, 12, SUBPEL_SHIFTS,
+ MULTITAP_SHARP2 },
+ { (const int16_t *)bilinear_filters, SUBPEL_TAPS, SUBPEL_SHIFTS,
+ BILINEAR }
};
#else
static const InterpFilterParams
av1_interp_filter_params_list[SWITCHABLE_FILTERS + 1] = {
- { (const int16_t *)sub_pel_filters_8, SUBPEL_TAPS, SUBPEL_SHIFTS },
- { (const int16_t *)sub_pel_filters_8smooth, SUBPEL_TAPS, SUBPEL_SHIFTS },
- { (const int16_t *)sub_pel_filters_8sharp, SUBPEL_TAPS, SUBPEL_SHIFTS },
- { (const int16_t *)bilinear_filters, SUBPEL_TAPS, SUBPEL_SHIFTS }
+ { (const int16_t *)sub_pel_filters_8, SUBPEL_TAPS, SUBPEL_SHIFTS,
+ EIGHTTAP_REGULAR },
+ { (const int16_t *)sub_pel_filters_8smooth, SUBPEL_TAPS, SUBPEL_SHIFTS,
+ EIGHTTAP_SMOOTH },
+ { (const int16_t *)sub_pel_filters_8sharp, SUBPEL_TAPS, SUBPEL_SHIFTS,
+ MULTITAP_SHARP },
+ { (const int16_t *)bilinear_filters, SUBPEL_TAPS, SUBPEL_SHIFTS,
+ BILINEAR }
};
#endif // CONFIG_EXT_INTERP
#if USE_TEMPORALFILTER_12TAP
static const InterpFilterParams av1_interp_temporalfilter_12tap = {
- (const int16_t *)av1_sub_pel_filters_temporalfilter_12, 12, SUBPEL_SHIFTS
+ (const int16_t *)sub_pel_filters_temporalfilter_12, 12, SUBPEL_SHIFTS,
+ TEMPORALFILTER_12TAP
};
#endif // USE_TEMPORALFILTER_12TAP
const int16_t *filter_ptr;
uint16_t taps;
uint16_t subpel_shifts;
+ InterpFilter interp_filter;
} InterpFilterParams;
InterpFilterParams av1_get_interp_filter_params(
return (ip.filter_ptr[ip.taps / 2 - 1] == 128);
}
-#if USE_TEMPORALFILTER_12TAP
-DECLARE_ALIGNED(16, extern const int16_t,
- av1_sub_pel_filters_temporalfilter_12[SUBPEL_SHIFTS][12]);
-#endif
-
-#if CONFIG_EXT_INTERP
-DECLARE_ALIGNED(256, extern const int16_t,
- av1_sub_pel_filters_10sharp[SUBPEL_SHIFTS][10]);
-DECLARE_ALIGNED(16, extern const int16_t,
- av1_sub_pel_filters_12sharp[SUBPEL_SHIFTS][12]);
-#endif
-
-typedef const int8_t (*SubpelFilterCoeffs)[16];
-#if CONFIG_AOM_HIGHBITDEPTH
-typedef const int16_t (*HbdSubpelFilterCoeffs)[8];
-#endif
-
-SubpelFilterCoeffs av1_get_subpel_filter_signal_dir(const InterpFilterParams p,
- int index);
-
-SubpelFilterCoeffs av1_get_subpel_filter_ver_signal_dir(
- const InterpFilterParams p, int index);
-#if CONFIG_AOM_HIGHBITDEPTH
-HbdSubpelFilterCoeffs av1_hbd_get_subpel_filter_ver_signal_dir(
- const InterpFilterParams p, int index);
-#endif
-
#ifdef __cplusplus
} // extern "C"
#endif
/*
- * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
*
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
+
+#ifndef AV1_COMMON_X86_AV1_CONVOLVE_FILTERS_SSSE3_H_
+#define AV1_COMMON_X86_AV1_CONVOLVE_FILTERS_SSSE3_H_
+
#include "./aom_config.h"
-#include "av1/common/filter.h"
#if CONFIG_EXT_INTERP
-DECLARE_ALIGNED(16, const int8_t,
- av1_sub_pel_filters_10sharp_signal_dir[15][2][16]) = {
+DECLARE_ALIGNED(16, static const int8_t,
+ sub_pel_filters_10sharp_signal_dir[15][2][16]) = {
{
{ 0, 0, -1, 3, -6, 127, 8, -4, 2, -1, 0, 0, 0, 0, 0, 0 },
{ 0, 0, 0, 0, -1, 3, -6, 127, 8, -4, 2, -1, 0, 0, 0, 0 },
};
#endif
#if CONFIG_EXT_INTERP
-DECLARE_ALIGNED(16, const int8_t,
- av1_sub_pel_filters_10sharp_ver_signal_dir[15][6][16]) = {
+DECLARE_ALIGNED(16, static const int8_t,
+ sub_pel_filters_10sharp_ver_signal_dir[15][6][16]) = {
{
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3 },
};
#endif
#if CONFIG_EXT_INTERP
-DECLARE_ALIGNED(16, const int8_t,
- av1_sub_pel_filters_12sharp_signal_dir[15][2][16]) = {
+DECLARE_ALIGNED(16, static const int8_t,
+ sub_pel_filters_12sharp_signal_dir[15][2][16]) = {
{
{ 0, 1, -2, 3, -7, 127, 8, -4, 2, -1, 1, 0, 0, 0, 0, 0 },
{ 0, 0, 0, 1, -2, 3, -7, 127, 8, -4, 2, -1, 1, 0, 0, 0 },
};
#endif
#if CONFIG_EXT_INTERP
-DECLARE_ALIGNED(16, const int8_t,
- av1_sub_pel_filters_12sharp_ver_signal_dir[15][6][16]) = {
+DECLARE_ALIGNED(16, static const int8_t,
+ sub_pel_filters_12sharp_ver_signal_dir[15][6][16]) = {
{
{ 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 },
{ -2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3 },
};
#endif
#if USE_TEMPORALFILTER_12TAP
-DECLARE_ALIGNED(16, const int8_t,
- av1_sub_pel_filters_temporalfilter_12_signal_dir[15][2][16]) = {
+DECLARE_ALIGNED(16, static const int8_t,
+ sub_pel_filters_temporalfilter_12_signal_dir[15][2][16]) = {
{
{ 0, 1, -1, 3, -7, 127, 8, -4, 2, -1, 0, 0, 0, 0, 0, 0 },
{ 0, 0, 0, 1, -1, 3, -7, 127, 8, -4, 2, -1, 0, 0, 0, 0 },
};
#endif
#if USE_TEMPORALFILTER_12TAP
-DECLARE_ALIGNED(16, const int8_t,
- av1_sub_pel_filters_temporalfilter_12_ver_signal_dir[15][6]
- [16]) = {
+DECLARE_ALIGNED(16, static const int8_t,
+ sub_pel_filters_temporalfilter_12_ver_signal_dir[15][6][16]) = {
{
{ 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 },
{ -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3 },
},
};
#endif
+#endif // AV1_COMMON_X86_AV1_CONVOLVE_FILTERS_SSSE3_H_
#include <assert.h>
#include <tmmintrin.h>
+#include "./aom_config.h"
#include "./av1_rtcd.h"
-#include "av1/common/x86/convolve_filter_ssse3.h"
+#include "av1/common/filter.h"
+#include "av1/common/x86/av1_convolve_filters_ssse3.h"
#define WIDTH_BOUND (16)
#define HEIGHT_BOUND (16)
-static SubpelFilterCoeffs get_subpel_filter_signal_dir(
- const InterpFilterParams p, int index) {
+typedef const int8_t (*SubpelFilterCoeffs)[16];
+
+static INLINE SubpelFilterCoeffs
+get_subpel_filter_signal_dir(const InterpFilterParams p, int index) {
#if CONFIG_EXT_INTERP
- if (p.filter_ptr == (const int16_t *)av1_sub_pel_filters_12sharp) {
- return &av1_sub_pel_filters_12sharp_signal_dir[index][0];
+ if (p.interp_filter == MULTITAP_SHARP2) {
+ return &sub_pel_filters_12sharp_signal_dir[index][0];
}
- if (p.filter_ptr == (const int16_t *)av1_sub_pel_filters_10sharp) {
- return &av1_sub_pel_filters_10sharp_signal_dir[index][0];
+ if (p.interp_filter == MULTITAP_SHARP) {
+ return &sub_pel_filters_10sharp_signal_dir[index][0];
}
#endif
#if USE_TEMPORALFILTER_12TAP
- if (p.filter_ptr == (const int16_t *)av1_sub_pel_filters_temporalfilter_12) {
- return &av1_sub_pel_filters_temporalfilter_12_signal_dir[index][0];
+ if (p.interp_filter == TEMPORALFILTER_12TAP) {
+ return &sub_pel_filters_temporalfilter_12_signal_dir[index][0];
}
#endif
(void)p;
return NULL;
}
-static SubpelFilterCoeffs get_subpel_filter_ver_signal_dir(
- const InterpFilterParams p, int index) {
+static INLINE SubpelFilterCoeffs
+get_subpel_filter_ver_signal_dir(const InterpFilterParams p, int index) {
#if CONFIG_EXT_INTERP
- if (p.filter_ptr == (const int16_t *)av1_sub_pel_filters_12sharp) {
- return &av1_sub_pel_filters_12sharp_ver_signal_dir[index][0];
+ if (p.interp_filter == MULTITAP_SHARP2) {
+ return &sub_pel_filters_12sharp_ver_signal_dir[index][0];
}
- if (p.filter_ptr == (const int16_t *)av1_sub_pel_filters_10sharp) {
- return &av1_sub_pel_filters_10sharp_ver_signal_dir[index][0];
+ if (p.interp_filter == MULTITAP_SHARP) {
+ return &sub_pel_filters_10sharp_ver_signal_dir[index][0];
}
#endif
#if USE_TEMPORALFILTER_12TAP
- if (p.filter_ptr == (const int16_t *)av1_sub_pel_filters_temporalfilter_12) {
- return &av1_sub_pel_filters_temporalfilter_12_ver_signal_dir[index][0];
+ if (p.interp_filter == TEMPORALFILTER_12TAP) {
+ return &sub_pel_filters_temporalfilter_12_ver_signal_dir[index][0];
}
#endif
(void)p;
/*
- * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
*
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
+
+#ifndef AV1_COMMON_X86_AV1_HIGHBD_CONVOLVE_FILTERS_SSE4_H_
+#define AV1_COMMON_X86_AV1_HIGHBD_CONVOLVE_FILTERS_SSE4_H_
+
#include "./aom_config.h"
-#include "av1/common/filter.h"
#if CONFIG_AOM_HIGHBITDEPTH
#if CONFIG_EXT_INTERP
-DECLARE_ALIGNED(16, const int16_t,
- av1_sub_pel_filters_10sharp_highbd_ver_signal_dir[15][6][8]) = {
+DECLARE_ALIGNED(16, static const int16_t,
+ sub_pel_filters_10sharp_highbd_ver_signal_dir[15][6][8]) = {
{
{ 0, 0, 0, 0, 0, 0, 0, 0 },
{ -1, 3, -1, 3, -1, 3, -1, 3 },
#endif
#if CONFIG_AOM_HIGHBITDEPTH
#if CONFIG_EXT_INTERP
-DECLARE_ALIGNED(16, const int16_t,
- av1_sub_pel_filters_12sharp_highbd_ver_signal_dir[15][6][8]) = {
+DECLARE_ALIGNED(16, static const int16_t,
+ sub_pel_filters_12sharp_highbd_ver_signal_dir[15][6][8]) = {
{
{ 0, 1, 0, 1, 0, 1, 0, 1 },
{ -2, 3, -2, 3, -2, 3, -2, 3 },
#endif
#if CONFIG_AOM_HIGHBITDEPTH
#if USE_TEMPORALFILTER_12TAP
-DECLARE_ALIGNED(
- 16, const int16_t,
- av1_sub_pel_filters_temporalfilter_12_highbd_ver_signal_dir[15][6][8]) = {
+DECLARE_ALIGNED(16, static const int16_t,
+ sub_pel_filters_temporalfilter_12_highbd_ver_signal_dir[15][6]
+ [8]) = {
{
{ 0, 1, 0, 1, 0, 1, 0, 1 },
{ -1, 3, -1, 3, -1, 3, -1, 3 },
};
#endif
#endif
+#endif // AV1_COMMON_X86_AV1_HIGHBD_CONVOLVE_FILTERS_SSE4_H_
#include <smmintrin.h>
#include "./av1_rtcd.h"
-#include "av1/common/x86/highbd_convolve_filter_sse4.h"
+#include "av1/common/filter.h"
+#include "av1/common/x86/av1_highbd_convolve_filters_sse4.h"
+
+typedef const int16_t (*HbdSubpelFilterCoeffs)[8];
typedef void (*TransposeSave)(const int width, int pixelsNum, uint32_t *src,
int src_stride, uint16_t *dst, int dst_stride,
int bd);
-static HbdSubpelFilterCoeffs hbd_get_subpel_filter_ver_signal_dir(
- const InterpFilterParams p, int index) {
+static INLINE HbdSubpelFilterCoeffs
+hbd_get_subpel_filter_ver_signal_dir(const InterpFilterParams p, int index) {
#if CONFIG_EXT_INTERP
- if (p.filter_ptr == (const int16_t *)av1_sub_pel_filters_12sharp) {
- return &av1_sub_pel_filters_12sharp_highbd_ver_signal_dir[index][0];
+ if (p.interp_filter == MULTITAP_SHARP2) {
+ return &sub_pel_filters_12sharp_highbd_ver_signal_dir[index][0];
}
- if (p.filter_ptr == (const int16_t *)av1_sub_pel_filters_10sharp) {
- return &av1_sub_pel_filters_10sharp_highbd_ver_signal_dir[index][0];
+ if (p.interp_filter == MULTITAP_SHARP) {
+ return &sub_pel_filters_10sharp_highbd_ver_signal_dir[index][0];
}
#endif
#if USE_TEMPORALFILTER_12TAP
- if (p.filter_ptr == (const int16_t *)av1_sub_pel_filters_temporalfilter_12) {
- return &av1_sub_pel_filters_temporalfilter_12_highbd_ver_signal_dir[index]
- [0];
+ if (p.interp_filter == TEMPORALFILTER_12TAP) {
+ return &sub_pel_filters_temporalfilter_12_highbd_ver_signal_dir[index][0];
}
#endif
(void)p;
+++ /dev/null
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AV1_COMMON_X86_CONVOLVE_FILTER_SSSE3_H_
-#define AV1_COMMON_X86_CONVOLVE_FILTER_SSSE3_H_
-
-#include "./aom_config.h"
-#include "av1/common/filter.h"
-
-#if USE_TEMPORALFILTER_12TAP
-DECLARE_ALIGNED(16, extern const int8_t,
- av1_sub_pel_filters_temporalfilter_12_signal_dir[15][2][16]);
-DECLARE_ALIGNED(16, extern const int8_t,
- av1_sub_pel_filters_temporalfilter_12_ver_signal_dir[15][6]
- [16]);
-#endif
-
-#if CONFIG_EXT_INTERP
-DECLARE_ALIGNED(16, extern const int8_t,
- av1_sub_pel_filters_12sharp_signal_dir[15][2][16]);
-DECLARE_ALIGNED(16, extern const int8_t,
- av1_sub_pel_filters_10sharp_signal_dir[15][2][16]);
-DECLARE_ALIGNED(16, extern const int8_t,
- av1_sub_pel_filters_12sharp_ver_signal_dir[15][6][16]);
-DECLARE_ALIGNED(16, extern const int8_t,
- av1_sub_pel_filters_10sharp_ver_signal_dir[15][6][16]);
-#endif
-
-#endif // AV1_COMMON_X86_CONVOLVE_FILTER_SSSE3_H_
+++ /dev/null
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AV1_COMMON_X86_HIGHBD_CONVOLVE_FILTER_SSE4_H_
-#define AV1_COMMON_X86_HIGHBD_CONVOLVE_FILTER_SSE4_H_
-
-#include "./aom_config.h"
-#include "av1/common/filter.h"
-
-#if USE_TEMPORALFILTER_12TAP
-DECLARE_ALIGNED(16, extern const int16_t,
- av1_sub_pel_filters_temporalfilter_12_highbd_ver_signal_dir[15]
- [6]
- [8]);
-#endif
-
-#if CONFIG_EXT_INTERP
-DECLARE_ALIGNED(16, extern const int16_t,
- av1_sub_pel_filters_12sharp_highbd_ver_signal_dir[15][6][8]);
-DECLARE_ALIGNED(16, extern const int16_t,
- av1_sub_pel_filters_10sharp_highbd_ver_signal_dir[15][6][8]);
-#endif
-
-#endif // AV1_COMMON_X86_HIGHBD_CONVOLVE_FILTER_SSE4_H_