From fdea2530bd4afb3d512cb9700b1d8cd603fab0e7 Mon Sep 17 00:00:00 2001 From: Simon Riggs Date: Tue, 30 Apr 2013 06:59:26 +0100 Subject: [PATCH] Compiler optimizations for page checksum code. Ants Aasma and Jeff Davis --- config/c-compiler.m4 | 25 ++++++ configure | 130 ++++++++++++++++++++++++++++++ configure.in | 10 +++ src/Makefile.global.in | 1 + src/backend/storage/page/Makefile | 3 + 5 files changed, 169 insertions(+) diff --git a/config/c-compiler.m4 b/config/c-compiler.m4 index 29db5b16b0..4ba3236ef4 100644 --- a/config/c-compiler.m4 +++ b/config/c-compiler.m4 @@ -242,6 +242,31 @@ undefine([Ac_cachevar])dnl +# PGAC_PROG_CC_VAR_OPT +# ----------------------- +# Given a variable name and a string, check if the compiler supports +# the string as a command-line option. If it does, add the string to +# the given variable. +AC_DEFUN([PGAC_PROG_CC_VAR_OPT], +[define([Ac_cachevar], [AS_TR_SH([pgac_cv_prog_cc_cflags_$2])])dnl +AC_CACHE_CHECK([whether $CC supports $2], [Ac_cachevar], +[pgac_save_CFLAGS=$CFLAGS +CFLAGS="$pgac_save_CFLAGS $2" +ac_save_c_werror_flag=$ac_c_werror_flag +ac_c_werror_flag=yes +_AC_COMPILE_IFELSE([AC_LANG_PROGRAM()], + [Ac_cachevar=yes], + [Ac_cachevar=no]) +ac_c_werror_flag=$ac_save_c_werror_flag +CFLAGS="$pgac_save_CFLAGS"]) +if test x"$Ac_cachevar" = x"yes"; then + $1="${$1} $2" +fi +undefine([Ac_cachevar])dnl +])# PGAC_PROG_CC_CFLAGS_OPT + + + # PGAC_PROG_CC_LDFLAGS_OPT # ------------------------ # Given a string, check if the compiler supports the string as a diff --git a/configure b/configure index b391308d81..826f3e183c 100755 --- a/configure +++ b/configure @@ -731,6 +731,7 @@ autodepend TAS GCC CPP +CFLAGS_VECTOR SUN_STUDIO_CC OBJEXT EXEEXT @@ -3944,6 +3945,11 @@ else fi fi +# set CFLAGS_VECTOR from the environment, if available +if test "$ac_env_CFLAGS_VECTOR_set" = set; then + CFLAGS_VECTOR=$ac_env_CFLAGS_VECTOR_value +fi + # Some versions of GCC support some additional useful warning flags. # Check whether they are supported, and add them to CFLAGS if so. # ICC pretends to be GCC but it's lying; it doesn't support these flags, @@ -4376,6 +4382,127 @@ if test x"$pgac_cv_prog_cc_cflags__fexcess_precision_standard" = x"yes"; then CFLAGS="$CFLAGS -fexcess-precision=standard" fi + # Optimization flags for specific files that benefit from vectorization + { $as_echo "$as_me:$LINENO: checking whether $CC supports -funroll-loops" >&5 +$as_echo_n "checking whether $CC supports -funroll-loops... " >&6; } +if test "${pgac_cv_prog_cc_cflags__funroll_loops+set}" = set; then + $as_echo_n "(cached) " >&6 +else + pgac_save_CFLAGS=$CFLAGS +CFLAGS="$pgac_save_CFLAGS -funroll-loops" +ac_save_c_werror_flag=$ac_c_werror_flag +ac_c_werror_flag=yes +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then + pgac_cv_prog_cc_cflags__funroll_loops=yes +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + pgac_cv_prog_cc_cflags__funroll_loops=no +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +ac_c_werror_flag=$ac_save_c_werror_flag +CFLAGS="$pgac_save_CFLAGS" +fi +{ $as_echo "$as_me:$LINENO: result: $pgac_cv_prog_cc_cflags__funroll_loops" >&5 +$as_echo "$pgac_cv_prog_cc_cflags__funroll_loops" >&6; } +if test x"$pgac_cv_prog_cc_cflags__funroll_loops" = x"yes"; then + CFLAGS_VECTOR="${CFLAGS_VECTOR} -funroll-loops" +fi + + { $as_echo "$as_me:$LINENO: checking whether $CC supports -ftree-vectorize" >&5 +$as_echo_n "checking whether $CC supports -ftree-vectorize... " >&6; } +if test "${pgac_cv_prog_cc_cflags__ftree_vectorize+set}" = set; then + $as_echo_n "(cached) " >&6 +else + pgac_save_CFLAGS=$CFLAGS +CFLAGS="$pgac_save_CFLAGS -ftree-vectorize" +ac_save_c_werror_flag=$ac_c_werror_flag +ac_c_werror_flag=yes +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then + pgac_cv_prog_cc_cflags__ftree_vectorize=yes +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + pgac_cv_prog_cc_cflags__ftree_vectorize=no +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +ac_c_werror_flag=$ac_save_c_werror_flag +CFLAGS="$pgac_save_CFLAGS" +fi +{ $as_echo "$as_me:$LINENO: result: $pgac_cv_prog_cc_cflags__ftree_vectorize" >&5 +$as_echo "$pgac_cv_prog_cc_cflags__ftree_vectorize" >&6; } +if test x"$pgac_cv_prog_cc_cflags__ftree_vectorize" = x"yes"; then + CFLAGS_VECTOR="${CFLAGS_VECTOR} -ftree-vectorize" +fi + elif test "$ICC" = yes; then # Intel's compiler has a bug/misoptimization in checking for # division by NAN (NaN == 0), -mp1 fixes it, so add it to the CFLAGS. @@ -4627,6 +4754,9 @@ fi fi +CFLAGS_VECTOR=$CFLAGS_VECTOR + + # supply -g if --enable-debug if test "$enable_debug" = yes && test "$ac_cv_prog_cc_g" = yes; then CFLAGS="$CFLAGS -g" diff --git a/configure.in b/configure.in index f81fda7564..ea3da2640d 100644 --- a/configure.in +++ b/configure.in @@ -400,6 +400,11 @@ else fi fi +# set CFLAGS_VECTOR from the environment, if available +if test "$ac_env_CFLAGS_VECTOR_set" = set; then + CFLAGS_VECTOR=$ac_env_CFLAGS_VECTOR_value +fi + # Some versions of GCC support some additional useful warning flags. # Check whether they are supported, and add them to CFLAGS if so. # ICC pretends to be GCC but it's lying; it doesn't support these flags, @@ -419,6 +424,9 @@ if test "$GCC" = yes -a "$ICC" = no; then PGAC_PROG_CC_CFLAGS_OPT([-fwrapv]) # Disable FP optimizations that cause various errors on gcc 4.5+ or maybe 4.6+ PGAC_PROG_CC_CFLAGS_OPT([-fexcess-precision=standard]) + # Optimization flags for specific files that benefit from vectorization + PGAC_PROG_CC_VAR_OPT(CFLAGS_VECTOR, [-funroll-loops]) + PGAC_PROG_CC_VAR_OPT(CFLAGS_VECTOR, [-ftree-vectorize]) elif test "$ICC" = yes; then # Intel's compiler has a bug/misoptimization in checking for # division by NAN (NaN == 0), -mp1 fixes it, so add it to the CFLAGS. @@ -434,6 +442,8 @@ elif test "$PORTNAME" = "hpux"; then PGAC_PROG_CC_CFLAGS_OPT([+Olibmerrno]) fi +AC_SUBST(CFLAGS_VECTOR, $CFLAGS_VECTOR) + # supply -g if --enable-debug if test "$enable_debug" = yes && test "$ac_cv_prog_cc_g" = yes; then CFLAGS="$CFLAGS -g" diff --git a/src/Makefile.global.in b/src/Makefile.global.in index 80f509fa87..89e39d2fa0 100644 --- a/src/Makefile.global.in +++ b/src/Makefile.global.in @@ -219,6 +219,7 @@ CC = @CC@ GCC = @GCC@ SUN_STUDIO_CC = @SUN_STUDIO_CC@ CFLAGS = @CFLAGS@ +CFLAGS_VECTOR = @CFLAGS_VECTOR@ # Kind-of compilers diff --git a/src/backend/storage/page/Makefile b/src/backend/storage/page/Makefile index 82d9c37fda..49ab40740a 100644 --- a/src/backend/storage/page/Makefile +++ b/src/backend/storage/page/Makefile @@ -15,3 +15,6 @@ include $(top_builddir)/src/Makefile.global OBJS = bufpage.o checksum.o itemptr.o include $(top_srcdir)/src/backend/common.mk + +# important optimizations flags for checksum.c +checksum.o: CFLAGS += ${CFLAGS_VECTOR} -- 2.40.0