]> granicus.if.org Git - zfs/commitdiff
Add support for AVX-512 family of instruction sets
authorGvozden Neskovic <neskovic@gmail.com>
Fri, 1 Jul 2016 16:33:04 +0000 (18:33 +0200)
committerBrian Behlendorf <behlendorf1@llnl.gov>
Tue, 16 Aug 2016 21:10:33 +0000 (14:10 -0700)
This patch adds compiler and runtime tests (user and kernel) for following
instruction sets: avx512f, avx512cd, avx512er, avx512pf, avx512bw, avx512dq,
avx512vl, avx512ifma, avx512vbmi.

note: Linux support for AVX-512F (Foundation) instruction set started with
linux v3.15

Signed-off-by: Gvozden Neskovic <neskovic@gmail.com>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Issue #4952

config/toolchain-simd.m4
include/linux/simd_x86.h

index 0f8c1f2d9fe809be3b373338fc57dcd5bc9cdc34..29abbbb5b6a34cea7dec6656892b209407c6bb54 100644 (file)
@@ -12,6 +12,15 @@ AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_TOOLCHAIN_SIMD], [
                        ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE4_2
                        ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX
                        ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX2
+                       ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512F
+                       ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512CD
+                       ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512DQ
+                       ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512BW
+                       ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512IFMA
+                       ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512VBMI
+                       ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512PF
+                       ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512ER
+                       ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512VL
                        ;;
        esac
 ])
@@ -170,3 +179,183 @@ AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX2], [
                AC_MSG_RESULT([no])
        ])
 ])
+
+dnl #
+dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512F
+dnl #
+AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512F], [
+       AC_MSG_CHECKING([whether host toolchain supports AVX512F])
+
+       AC_LINK_IFELSE([AC_LANG_SOURCE([
+       [
+               void main()
+               {
+                       __asm__ __volatile__("vpandd %zmm0,%zmm1,%zmm2");
+               }
+       ]])], [
+               AC_MSG_RESULT([yes])
+               AC_DEFINE([HAVE_AVX512F], 1, [Define if host toolchain supports AVX512F])
+       ], [
+               AC_MSG_RESULT([no])
+       ])
+])
+
+dnl #
+dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512CD
+dnl #
+AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512CD], [
+       AC_MSG_CHECKING([whether host toolchain supports AVX512CD])
+
+       AC_LINK_IFELSE([AC_LANG_SOURCE([
+       [
+               void main()
+               {
+                       __asm__ __volatile__("vplzcntd %zmm0,%zmm1");
+               }
+       ]])], [
+               AC_MSG_RESULT([yes])
+               AC_DEFINE([HAVE_AVX512CD], 1, [Define if host toolchain supports AVX512CD])
+       ], [
+               AC_MSG_RESULT([no])
+       ])
+])
+
+dnl #
+dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512DQ
+dnl #
+AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512DQ], [
+       AC_MSG_CHECKING([whether host toolchain supports AVX512DQ])
+
+       AC_LINK_IFELSE([AC_LANG_SOURCE([
+       [
+               void main()
+               {
+                       __asm__ __volatile__("vandpd %zmm0,%zmm1,%zmm2");
+               }
+       ]])], [
+               AC_MSG_RESULT([yes])
+               AC_DEFINE([HAVE_AVX512DQ], 1, [Define if host toolchain supports AVX512DQ])
+       ], [
+               AC_MSG_RESULT([no])
+       ])
+])
+
+dnl #
+dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512BW
+dnl #
+AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512BW], [
+       AC_MSG_CHECKING([whether host toolchain supports AVX512BW])
+
+       AC_LINK_IFELSE([AC_LANG_SOURCE([
+       [
+               void main()
+               {
+                       __asm__ __volatile__("vpshufb %zmm0,%zmm1,%zmm2");
+               }
+       ]])], [
+               AC_MSG_RESULT([yes])
+               AC_DEFINE([HAVE_AVX512BW], 1, [Define if host toolchain supports AVX512BW])
+       ], [
+               AC_MSG_RESULT([no])
+       ])
+])
+
+dnl #
+dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512IFMA
+dnl #
+AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512IFMA], [
+       AC_MSG_CHECKING([whether host toolchain supports AVX512IFMA])
+
+       AC_LINK_IFELSE([AC_LANG_SOURCE([
+       [
+               void main()
+               {
+                       __asm__ __volatile__("vpmadd52luq %zmm0,%zmm1,%zmm2");
+               }
+       ]])], [
+               AC_MSG_RESULT([yes])
+               AC_DEFINE([HAVE_AVX512IFMA], 1, [Define if host toolchain supports AVX512IFMA])
+       ], [
+               AC_MSG_RESULT([no])
+       ])
+])
+
+dnl #
+dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512VBMI
+dnl #
+AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512VBMI], [
+       AC_MSG_CHECKING([whether host toolchain supports AVX512VBMI])
+
+       AC_LINK_IFELSE([AC_LANG_SOURCE([
+       [
+               void main()
+               {
+                       __asm__ __volatile__("vpermb %zmm0,%zmm1,%zmm2");
+               }
+       ]])], [
+               AC_MSG_RESULT([yes])
+               AC_DEFINE([HAVE_AVX512VBMI], 1, [Define if host toolchain supports AVX512VBMI])
+       ], [
+               AC_MSG_RESULT([no])
+       ])
+])
+
+dnl #
+dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512PF
+dnl #
+AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512PF], [
+       AC_MSG_CHECKING([whether host toolchain supports AVX512PF])
+
+       AC_LINK_IFELSE([AC_LANG_SOURCE([
+       [
+               void main()
+               {
+                       __asm__ __volatile__("vgatherpf0dps (%rsi,%zmm0,4){%k1}");
+               }
+       ]])], [
+               AC_MSG_RESULT([yes])
+               AC_DEFINE([HAVE_AVX512PF], 1, [Define if host toolchain supports AVX512PF])
+       ], [
+               AC_MSG_RESULT([no])
+       ])
+])
+
+dnl #
+dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512ER
+dnl #
+AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512ER], [
+       AC_MSG_CHECKING([whether host toolchain supports AVX512ER])
+
+       AC_LINK_IFELSE([AC_LANG_SOURCE([
+       [
+               void main()
+               {
+                       __asm__ __volatile__("vexp2pd %zmm0,%zmm1");
+               }
+       ]])], [
+               AC_MSG_RESULT([yes])
+               AC_DEFINE([HAVE_AVX512ER], 1, [Define if host toolchain supports AVX512ER])
+       ], [
+               AC_MSG_RESULT([no])
+       ])
+])
+
+dnl #
+dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512VL
+dnl #
+AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512VL], [
+       AC_MSG_CHECKING([whether host toolchain supports AVX512VL])
+
+       AC_LINK_IFELSE([AC_LANG_SOURCE([
+       [
+               void main()
+               {
+                       __asm__ __volatile__("vpabsq %zmm0,%zmm1");
+               }
+       ]])], [
+               AC_MSG_RESULT([yes])
+               AC_DEFINE([HAVE_AVX512VL], 1, [Define if host toolchain supports AVX512VL])
+       ], [
+               AC_MSG_RESULT([no])
+       ])
+])
index 6aa51144c546eb5253a4b941376c91e66930d5e6..216dbed62f18a1e4a7e28f101124592699af417b 100644 (file)
  *     zfs_ssse3_available()
  *     zfs_sse4_1_available()
  *     zfs_sse4_2_available()
+ *
  *     zfs_avx_available()
  *     zfs_avx2_available()
+ *
  *     zfs_bmi1_available()
  *     zfs_bmi2_available()
+ *
+ *     zfs_avx512f_available()
+ *     zfs_avx512cd_available()
+ *     zfs_avx512er_available()
+ *     zfs_avx512pf_available()
+ *     zfs_avx512bw_available()
+ *     zfs_avx512dq_available()
+ *     zfs_avx512vl_available()
+ *     zfs_avx512ifma_available()
+ *     zfs_avx512vbmi_available()
+ *
+ * NOTE(AVX-512VL):    If using AVX-512 instructions with 128Bit registers
+ *                     also add zfs_avx512vl_available() to feature check.
  */
 
 #ifndef _SIMD_X86_H
@@ -124,7 +139,16 @@ typedef enum cpuid_inst_sets {
        AVX,
        AVX2,
        BMI1,
-       BMI2
+       BMI2,
+       AVX512F,
+       AVX512CD,
+       AVX512DQ,
+       AVX512BW,
+       AVX512IFMA,
+       AVX512VBMI,
+       AVX512PF,
+       AVX512ER,
+       AVX512VL
 } cpuid_inst_sets_t;
 
 /*
@@ -132,11 +156,21 @@ typedef enum cpuid_inst_sets {
  */
 typedef struct cpuid_feature_desc {
        uint32_t leaf;          /* CPUID leaf */
-       uint32_t subleaf;       /* CPUID subleaf */
+       uint32_t subleaf;       /* CPUID sub-leaf */
        uint32_t flag;          /* bit mask of the feature */
        cpuid_regs_t reg;       /* which CPUID return register to test */
 } cpuid_feature_desc_t;
 
+#define        _AVX512F_BIT            (1U << 16)
+#define        _AVX512CD_BIT           (_AVX512F_BIT | (1U << 28))
+#define        _AVX512DQ_BIT           (_AVX512F_BIT | (1U << 17))
+#define        _AVX512BW_BIT           (_AVX512F_BIT | (1U << 30))
+#define        _AVX512IFMA_BIT         (_AVX512F_BIT | (1U << 21))
+#define        _AVX512VBMI_BIT         (1U << 1) /* AVX512F_BIT is on another leaf  */
+#define        _AVX512PF_BIT           (_AVX512F_BIT | (1U << 26))
+#define        _AVX512ER_BIT           (_AVX512F_BIT | (1U << 27))
+#define        _AVX512VL_BIT           (1U << 31) /* if used also check other levels */
+
 /*
  * Descriptions of supported instruction sets
  */
@@ -151,7 +185,16 @@ static const cpuid_feature_desc_t cpuid_features[] = {
        [AVX]           = {1U, 0U,      1U << 28,       ECX     },
        [AVX2]          = {7U, 0U,      1U << 5,        EBX     },
        [BMI1]          = {7U, 0U,      1U << 3,        EBX     },
-       [BMI2]          = {7U, 0U,      1U << 8,        EBX     }
+       [BMI2]          = {7U, 0U,      1U << 8,        EBX     },
+       [AVX512F]       = {7U, 0U, _AVX512F_BIT,        EBX     },
+       [AVX512CD]      = {7U, 0U, _AVX512CD_BIT,       EBX     },
+       [AVX512DQ]      = {7U, 0U, _AVX512DQ_BIT,       EBX     },
+       [AVX512BW]      = {7U, 0U, _AVX512BW_BIT,       EBX     },
+       [AVX512IFMA]    = {7U, 0U, _AVX512IFMA_BIT,     EBX     },
+       [AVX512VBMI]    = {7U, 0U, _AVX512VBMI_BIT,     ECX     },
+       [AVX512PF]      = {7U, 0U, _AVX512PF_BIT,       EBX     },
+       [AVX512ER]      = {7U, 0U, _AVX512ER_BIT,       EBX     },
+       [AVX512VL]      = {7U, 0U, _AVX512ER_BIT,       EBX     }
 };
 
 /*
@@ -187,15 +230,15 @@ __cpuid_check_feature(const cpuid_feature_desc_t *desc)
                 */
                __cpuid_count(desc->leaf, desc->subleaf,
                        r[EAX], r[EBX], r[ECX], r[EDX]);
-               return (!!(r[desc->reg] & desc->flag));
+               return ((r[desc->reg] & desc->flag) == desc->flag);
        }
        return (B_FALSE);
 }
 
-#define        CPUID_FEATURE_CHECK(name, id) \
-static inline boolean_t        \
-__cpuid_has_ ## name(void)\
-{      \
+#define        CPUID_FEATURE_CHECK(name, id)                           \
+static inline boolean_t                                                \
+__cpuid_has_ ## name(void)                                     \
+{                                                              \
        return (__cpuid_check_feature(&cpuid_features[id]));    \
 }
 
@@ -213,16 +256,25 @@ CPUID_FEATURE_CHECK(avx2, AVX2);
 CPUID_FEATURE_CHECK(osxsave, OSXSAVE);
 CPUID_FEATURE_CHECK(bmi1, BMI1);
 CPUID_FEATURE_CHECK(bmi2, BMI2);
+CPUID_FEATURE_CHECK(avx512f, AVX512F);
+CPUID_FEATURE_CHECK(avx512cd, AVX512CD);
+CPUID_FEATURE_CHECK(avx512dq, AVX512DQ);
+CPUID_FEATURE_CHECK(avx512bw, AVX512BW);
+CPUID_FEATURE_CHECK(avx512ifma, AVX512IFMA);
+CPUID_FEATURE_CHECK(avx512vbmi, AVX512VBMI);
+CPUID_FEATURE_CHECK(avx512pf, AVX512PF);
+CPUID_FEATURE_CHECK(avx512er, AVX512ER);
+CPUID_FEATURE_CHECK(avx512vl, AVX512VL);
 
 #endif /* !defined(_KERNEL) */
 
+
 /*
- * Detect ymm register set support
+ * Detect register set support
  */
 static inline boolean_t
-__ymm_enabled(void)
+__simd_state_enabled(const uint64_t state)
 {
-       static const uint64_t XSTATE_SSE_AVX = 0x2 | 0x4;
        boolean_t has_osxsave;
        uint64_t xcr0;
 
@@ -238,9 +290,16 @@ __ymm_enabled(void)
                return (B_FALSE);
 
        xcr0 = xgetbv(0);
-       return ((xcr0 & XSTATE_SSE_AVX) == XSTATE_SSE_AVX);
+       return ((xcr0 & state) == state);
 }
 
+#define        _XSTATE_SSE_AVX         (0x2 | 0x4)
+#define        _XSTATE_AVX512          (0xE0 | _XSTATE_SSE_AVX)
+
+#define        __ymm_enabled() __simd_state_enabled(_XSTATE_SSE_AVX)
+#define        __zmm_enabled() __simd_state_enabled(_XSTATE_AVX512)
+
+
 /*
  * Check if SSE instruction set is available
  */
@@ -383,6 +442,168 @@ zfs_bmi2_available(void)
 #endif
 }
 
+
+/*
+ * AVX-512 family of instruction sets:
+ *
+ * AVX512F     Foundation
+ * AVX512CD    Conflict Detection Instructions
+ * AVX512ER    Exponential and Reciprocal Instructions
+ * AVX512PF    Prefetch Instructions
+ *
+ * AVX512BW    Byte and Word Instructions
+ * AVX512DQ    Double-word and Quadword Instructions
+ * AVX512VL    Vector Length Extensions
+ *
+ * AVX512IFMA  Integer Fused Multiply Add (Not supported by kernel 4.4)
+ * AVX512VBMI  Vector Byte Manipulation Instructions
+ */
+
+
+/* Check if AVX512F instruction set is available */
+static inline boolean_t
+zfs_avx512f_available(void)
+{
+       boolean_t has_avx512 = B_FALSE;
+
+#if defined(_KERNEL) && defined(X86_FEATURE_AVX512F)
+       has_avx512 = !!boot_cpu_has(X86_FEATURE_AVX512F);
+#elif !defined(_KERNEL)
+       has_avx512 = __cpuid_has_avx512f();
+#endif
+
+       return (has_avx512 && __zmm_enabled());
+}
+
+/* Check if AVX512CD instruction set is available */
+static inline boolean_t
+zfs_avx512cd_available(void)
+{
+       boolean_t has_avx512 = B_FALSE;
+
+#if defined(_KERNEL) && defined(X86_FEATURE_AVX512CD)
+       has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
+           boot_cpu_has(X86_FEATURE_AVX512CD);
+#elif !defined(_KERNEL)
+       has_avx512 = __cpuid_has_avx512cd();
+#endif
+
+       return (has_avx512 && __zmm_enabled());
+}
+
+/* Check if AVX512ER instruction set is available */
+static inline boolean_t
+zfs_avx512er_available(void)
+{
+       boolean_t has_avx512 = B_FALSE;
+
+#if defined(_KERNEL) && defined(X86_FEATURE_AVX512ER)
+       has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
+           boot_cpu_has(X86_FEATURE_AVX512ER);
+#elif !defined(_KERNEL)
+       has_avx512 = __cpuid_has_avx512er();
+#endif
+
+       return (has_avx512 && __zmm_enabled());
+}
+
+/* Check if AVX512PF instruction set is available */
+static inline boolean_t
+zfs_avx512pf_available(void)
+{
+       boolean_t has_avx512 = B_FALSE;
+
+#if defined(_KERNEL) && defined(X86_FEATURE_AVX512PF)
+       has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
+           boot_cpu_has(X86_FEATURE_AVX512PF);
+#elif !defined(_KERNEL)
+       has_avx512 = __cpuid_has_avx512pf();
+#endif
+
+       return (has_avx512 && __zmm_enabled());
+}
+
+/* Check if AVX512BW instruction set is available */
+static inline boolean_t
+zfs_avx512bw_available(void)
+{
+       boolean_t has_avx512 = B_FALSE;
+
+#if defined(_KERNEL) && defined(X86_FEATURE_AVX512BW)
+       has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
+           boot_cpu_has(X86_FEATURE_AVX512BW);
+#elif !defined(_KERNEL)
+       has_avx512 = __cpuid_has_avx512bw();
+#endif
+
+       return (has_avx512 && __zmm_enabled());
+}
+
+/* Check if AVX512DQ instruction set is available */
+static inline boolean_t
+zfs_avx512dq_available(void)
+{
+       boolean_t has_avx512 = B_FALSE;
+
+#if defined(_KERNEL) && defined(X86_FEATURE_AVX512DQ)
+       has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
+           boot_cpu_has(X86_FEATURE_AVX512DQ);
+#elif !defined(_KERNEL)
+       has_avx512 = __cpuid_has_avx512dq();
+#endif
+
+       return (has_avx512 && __zmm_enabled());
+}
+
+/* Check if AVX512VL instruction set is available */
+static inline boolean_t
+zfs_avx512vl_available(void)
+{
+       boolean_t has_avx512 = B_FALSE;
+
+#if defined(_KERNEL) && defined(X86_FEATURE_AVX512VL)
+       has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
+           boot_cpu_has(X86_FEATURE_AVX512VL);
+#elif !defined(_KERNEL)
+       has_avx512 = __cpuid_has_avx512vl();
+#endif
+
+       return (has_avx512 && __zmm_enabled());
+}
+
+/* Check if AVX512IFMA instruction set is available */
+static inline boolean_t
+zfs_avx512ifma_available(void)
+{
+       boolean_t has_avx512 = B_FALSE;
+
+#if defined(_KERNEL) && defined(X86_FEATURE_AVX512IFMA)
+       has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
+           boot_cpu_has(X86_FEATURE_AVX512IFMA);
+#elif !defined(_KERNEL)
+       has_avx512 = __cpuid_has_avx512ifma();
+#endif
+
+       return (has_avx512 && __zmm_enabled());
+}
+
+/* Check if AVX512VBMI instruction set is available */
+static inline boolean_t
+zfs_avx512vbmi_available(void)
+{
+       boolean_t has_avx512 = B_FALSE;
+
+#if defined(_KERNEL) && defined(X86_FEATURE_AVX512VBMI)
+       has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
+           boot_cpu_has(X86_FEATURE_AVX512VBMI);
+#elif !defined(_KERNEL)
+       has_avx512 = __cpuid_has_avx512f() &&
+           __cpuid_has_avx512vbmi();
+#endif
+
+       return (has_avx512 && __zmm_enabled());
+}
+
 #endif /* defined(__x86) */
 
 #endif /* _SIMD_X86_H */