From: Erik Niemeyer Date: Tue, 29 Oct 2013 15:48:12 +0000 (-0600) Subject: CL for adding AVX-AVX2 support in libvpx. X-Git-Tag: v1.3.0~117^2 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=e6863ef3185faf90e7be2c7f2c90bce0c08da1eb;p=libvpx CL for adding AVX-AVX2 support in libvpx. Change-Id: Idc03f3fca4bf2d0afd33631ea1d3caf8fc34ec29 --- diff --git a/build/make/Makefile b/build/make/Makefile index 7a2523960..030c1b57f 100644 --- a/build/make/Makefile +++ b/build/make/Makefile @@ -114,6 +114,10 @@ $(BUILD_PFX)%_ssse3.c.d: CFLAGS += -mssse3 $(BUILD_PFX)%_ssse3.c.o: CFLAGS += -mssse3 $(BUILD_PFX)%_sse4.c.d: CFLAGS += -msse4.1 $(BUILD_PFX)%_sse4.c.o: CFLAGS += -msse4.1 +$(BUILD_PFX)%_avx.c.d: CFLAGS += -mavx +$(BUILD_PFX)%_avx.c.o: CFLAGS += -mavx +$(BUILD_PFX)%_avx2.c.d: CFLAGS += -mavx2 +$(BUILD_PFX)%_avx2.c.o: CFLAGS += -mavx2 $(BUILD_PFX)%.c.d: %.c $(if $(quiet),@echo " [DEP] $@") diff --git a/build/make/configure.sh b/build/make/configure.sh index c6c866079..83f480a42 100755 --- a/build/make/configure.sh +++ b/build/make/configure.sh @@ -1108,6 +1108,18 @@ EOF soft_enable sse4_1 fi + if enabled gcc && ! disabled avx && ! check_cflags -mavx; then + RTCD_OPTIONS="${RTCD_OPTIONS}--disable-avx " + else + soft_enable avx + fi + + if enabled gcc && ! disabled avx2 && ! check_cflags -mavx2; then + RTCD_OPTIONS="${RTCD_OPTIONS}--disable-avx2 " + else + soft_enable avx2 + fi + case "${AS}" in auto|"") which nasm >/dev/null 2>&1 && AS=nasm diff --git a/build/make/rtcd.sh b/build/make/rtcd.sh index c531e95a1..2967b5aed 100755 --- a/build/make/rtcd.sh +++ b/build/make/rtcd.sh @@ -327,11 +327,11 @@ EOF require c case $arch in x86) - ALL_ARCHS=$(filter mmx sse sse2 sse3 ssse3 sse4_1) + ALL_ARCHS=$(filter mmx sse sse2 sse3 ssse3 sse4_1 avx avx2) x86 ;; x86_64) - ALL_ARCHS=$(filter mmx sse sse2 sse3 ssse3 sse4_1) + ALL_ARCHS=$(filter mmx sse sse2 sse3 ssse3 sse4_1 avx avx2) REQUIRES=${REQUIRES:-mmx sse sse2} require $(filter $REQUIRES) x86 diff --git a/configure b/configure index 297cec463..45d5c1204 100755 --- a/configure +++ b/configure @@ -234,6 +234,8 @@ ARCH_EXT_LIST=" sse3 ssse3 sse4_1 + avx + avx2 altivec " @@ -422,7 +424,7 @@ process_targets() { fi # The write_common_config (config.mk) logic is deferred until after the - # recursive calls to configure complete, becuase we want our universal + # recursive calls to configure complete, because we want our universal # targets to be executed last. write_common_config_targets enabled universal && echo "FAT_ARCHS=${fat_bin_archs}" >> config.mk diff --git a/test/test_libvpx.cc b/test/test_libvpx.cc index a4dbca4cd..80aca98b3 100644 --- a/test/test_libvpx.cc +++ b/test/test_libvpx.cc @@ -45,6 +45,10 @@ int main(int argc, char **argv) { append_gtest_filter(":-SSSE3/*"); if (!(simd_caps & HAS_SSE4_1)) append_gtest_filter(":-SSE4_1/*"); + if (!(simd_caps & HAS_AVX)) + append_gtest_filter(":-AVX/*"); + if (!(simd_caps & HAS_AVX2)) + append_gtest_filter(":-AVX2/*"); #endif #if !CONFIG_SHARED diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh index 2d9fbff97..862933826 100644 --- a/vp9/common/vp9_rtcd_defs.sh +++ b/vp9/common/vp9_rtcd_defs.sh @@ -22,10 +22,11 @@ forward_decls vp9_common_forward_decls # x86inc.asm doesn't work if pic is enabled on 32 bit platforms so no assembly. [ "$CONFIG_USE_X86INC" = "yes" ] && mmx_x86inc=mmx && sse_x86inc=sse && - sse2_x86inc=sse2 && ssse3_x86inc=ssse3 + sse2_x86inc=sse2 && ssse3_x86inc=ssse3 && avx_x86inc=avx && avx2_x86inc=avx2 # this variable is for functions that are 64 bit only. -[ $arch = "x86_64" ] && mmx_x86_64=mmx && sse2_x86_64=sse2 && ssse3_x86_64=ssse3 +[ $arch = "x86_64" ] && mmx_x86_64=mmx && sse2_x86_64=sse2 && + ssse3_x86_64=ssse3 && avx_x86_64=avx && avx2_x86_64=avx2 # # RECON diff --git a/vpx_ports/x86.h b/vpx_ports/x86.h index b009c3560..299058347 100644 --- a/vpx_ports/x86.h +++ b/vpx_ports/x86.h @@ -88,12 +88,14 @@ void __cpuid(int CPUInfo[4], int info_type); #endif #endif /* end others */ -#define HAS_MMX 0x01 -#define HAS_SSE 0x02 -#define HAS_SSE2 0x04 -#define HAS_SSE3 0x08 -#define HAS_SSSE3 0x10 -#define HAS_SSE4_1 0x20 +#define HAS_MMX 0x01 +#define HAS_SSE 0x02 +#define HAS_SSE2 0x04 +#define HAS_SSE3 0x08 +#define HAS_SSSE3 0x10 +#define HAS_SSE4_1 0x20 +#define HAS_AVX 0x40 +#define HAS_AVX2 0x80 #ifndef BIT #define BIT(n) (1<