/// __m128 _mm_ceil_ps(__m128 X);
/// \endcode
///
-/// This intrinsic corresponds to the <c> <i> VROUNDPS / ROUNDPS </i> </c>
-/// instruction.
+/// This intrinsic corresponds to the <c> VROUNDPS / ROUNDPS </c> instruction.
///
/// \param X
/// A 128-bit vector of [4 x float] values to be rounded up.
/// __m128d _mm_ceil_pd(__m128d X);
/// \endcode
///
-/// This intrinsic corresponds to the <c> <i> VROUNDPD / ROUNDPD </i> </c>
-/// instruction.
+/// This intrinsic corresponds to the <c> VROUNDPD / ROUNDPD </c> instruction.
///
/// \param X
/// A 128-bit vector of [2 x double] values to be rounded up.
/// __m128 _mm_ceil_ss(__m128 X, __m128 Y);
/// \endcode
///
-/// This intrinsic corresponds to the <c> <i> VROUNDSS / ROUNDSS </i> </c>
-/// instruction.
+/// This intrinsic corresponds to the <c> VROUNDSS / ROUNDSS </c> instruction.
///
/// \param X
/// A 128-bit vector of [4 x float]. The values stored in bits [127:32] are
/// __m128d _mm_ceil_sd(__m128d X, __m128d Y);
/// \endcode
///
-/// This intrinsic corresponds to the <c> <i> VROUNDSD / ROUNDSD </i> </c>
-/// instruction.
+/// This intrinsic corresponds to the <c> VROUNDSD / ROUNDSD </c> instruction.
///
/// \param X
/// A 128-bit vector of [2 x double]. The value stored in bits [127:64] is
/// __m128 _mm_floor_ps(__m128 X);
/// \endcode
///
-/// This intrinsic corresponds to the <c> <i> VROUNDPS / ROUNDPS </i> </c>
-/// instruction.
+/// This intrinsic corresponds to the <c> VROUNDPS / ROUNDPS </c> instruction.
///
/// \param X
/// A 128-bit vector of [4 x float] values to be rounded down.
/// __m128d _mm_floor_pd(__m128d X);
/// \endcode
///
-/// This intrinsic corresponds to the <c> <i> VROUNDPD / ROUNDPD </i> </c>
-/// instruction.
+/// This intrinsic corresponds to the <c> VROUNDPD / ROUNDPD </c> instruction.
///
/// \param X
/// A 128-bit vector of [2 x double].
/// __m128 _mm_floor_ss(__m128 X, __m128 Y);
/// \endcode
///
-/// This intrinsic corresponds to the <c> <i> VROUNDSS / ROUNDSS </i> </c>
-/// instruction.
+/// This intrinsic corresponds to the <c> VROUNDSS / ROUNDSS </c> instruction.
///
/// \param X
/// A 128-bit vector of [4 x float]. The values stored in bits [127:32] are
/// __m128d _mm_floor_sd(__m128d X, __m128d Y);
/// \endcode
///
-/// This intrinsic corresponds to the <c> <i> VROUNDSD / ROUNDSD </i> </c>
-/// instruction.
+/// This intrinsic corresponds to the <c> VROUNDSD / ROUNDSD </c> instruction.
///
/// \param X
/// A 128-bit vector of [2 x double]. The value stored in bits [127:64] is
/// __m128 _mm_round_ps(__m128 X, const int M);
/// \endcode
///
-/// This intrinsic corresponds to the <c> <i> VROUNDPS / ROUNDPS </i> </c>
-/// instruction.
+/// This intrinsic corresponds to the <c> VROUNDPS / ROUNDPS </c> instruction.
///
/// \param X
/// A 128-bit vector of [4 x float].
/// __m128 _mm_round_ss(__m128 X, __m128 Y, const int M);
/// \endcode
///
-/// This intrinsic corresponds to the <c> <i> VROUNDSS / ROUNDSS </i> </c>
-/// instruction.
+/// This intrinsic corresponds to the <c> VROUNDSS / ROUNDSS </c> instruction.
///
/// \param X
/// A 128-bit vector of [4 x float]. The values stored in bits [127:32] are
/// __m128d _mm_round_pd(__m128d X, const int M);
/// \endcode
///
-/// This intrinsic corresponds to the <c> <i> VROUNDPD / ROUNDPD </i> </c>
-/// instruction.
+/// This intrinsic corresponds to the <c> VROUNDPD / ROUNDPD </c> instruction.
///
/// \param X
/// A 128-bit vector of [2 x double].
#define _mm_round_pd(X, M) __extension__ ({ \
(__m128d)__builtin_ia32_roundpd((__v2df)(__m128d)(X), (M)); })
-
/// \brief Copies the upper element of the first 128-bit vector operand to the
/// corresponding upper element of the 128-bit result vector of [2 x double].
/// Rounds the lower element of the second 128-bit vector operand to an
/// __m128d _mm_round_sd(__m128d X, __m128d Y, const int M);
/// \endcode
///
-/// This intrinsic corresponds to the <c> <i> VROUNDSD / ROUNDSD </i> </c>
-/// instruction.
+/// This intrinsic corresponds to the <c> VROUNDSD / ROUNDSD </c> instruction.
///
/// \param X
/// A 128-bit vector of [2 x double]. The value stored in bits [127:64] is
/// __m128d _mm_blend_pd(__m128d V1, __m128d V2, const int M);
/// \endcode
///
-/// This intrinsic corresponds to the <c> <i> VBLENDPD / BLENDPD </i> </c>
-/// instruction.
+/// This intrinsic corresponds to the <c> VBLENDPD / BLENDPD </c> instruction.
///
/// \param V1
/// A 128-bit vector of [2 x double].
/// __m128 _mm_blend_ps(__m128 V1, __m128 V2, const int M);
/// \endcode
///
-/// This intrinsic corresponds to the <c> <i> VBLENDPS / BLENDPS </i> </c>
-/// instruction.
+/// This intrinsic corresponds to the <c> VBLENDPS / BLENDPS </c> instruction.
///
/// \param V1
/// A 128-bit vector of [4 x float].
///
/// \headerfile <x86intrin.h>
///
-/// This intrinsic corresponds to the <c> <i> VBLENDVPD / BLENDVPD </i> </c>
-/// instruction.
+/// This intrinsic corresponds to the <c> VBLENDVPD / BLENDVPD </c> instruction.
///
/// \param __V1
/// A 128-bit vector of [2 x double].
///
/// \headerfile <x86intrin.h>
///
-/// This intrinsic corresponds to the <c> <i> VBLENDVPS / BLENDVPS </i> </c>
-/// instruction.
+/// This intrinsic corresponds to the <c> VBLENDVPS / BLENDVPS </c> instruction.
///
/// \param __V1
/// A 128-bit vector of [4 x float].
///
/// \headerfile <x86intrin.h>
///
-/// This intrinsic corresponds to the <c> <i> VPBLENDVB / PBLENDVB </i> </c>
-/// instruction.
+/// This intrinsic corresponds to the <c> VPBLENDVB / PBLENDVB </c> instruction.
///
/// \param __V1
/// A 128-bit vector of [16 x i8].
/// __m128i _mm_blend_epi16(__m128i V1, __m128i V2, const int M);
/// \endcode
///
-/// This intrinsic corresponds to the <c> <i> VPBLENDW / PBLENDW </i> </c>
-/// instruction.
+/// This intrinsic corresponds to the <c> VPBLENDW / PBLENDW </c> instruction.
///
/// \param V1
/// A 128-bit vector of [8 x i16].
///
/// \headerfile <x86intrin.h>
///
-/// This intrinsic corresponds to the <c> <i> VPMULLD / PMULLD </i> </c>
-/// instruction.
+/// This intrinsic corresponds to the <c> VPMULLD / PMULLD </c> instruction.
///
/// \param __V1
/// A 128-bit integer vector.
///
/// \headerfile <x86intrin.h>
///
-/// This intrinsic corresponds to the <c> <i> VPMULDQ / PMULDQ </i> </c>
-/// instruction.
+/// This intrinsic corresponds to the <c> VPMULDQ / PMULDQ </c> instruction.
///
/// \param __V1
/// A 128-bit vector of [4 x i32].
/// __m128 _mm_dp_ps(__m128 X, __m128 Y, const int M);
/// \endcode
///
-/// This intrinsic corresponds to the <c> <i> VDPPS / DPPS </i> </c>
-/// instruction.
+/// This intrinsic corresponds to the <c> VDPPS / DPPS </c> instruction.
///
/// \param X
/// A 128-bit vector of [4 x float].
/// __m128d _mm_dp_pd(__m128d X, __m128d Y, const int M);
/// \endcode
///
-/// This intrinsic corresponds to the <c> <i> VDPPD / DPPD </i> </c>
-/// instruction.
+/// This intrinsic corresponds to the <c> VDPPD / DPPD </c> instruction.
///
/// \param X
/// A 128-bit vector of [2 x double].
///
/// \headerfile <x86intrin.h>
///
-/// This intrinsic corresponds to the <c> <i> VMOVNTDQA / MOVNTDQA </i> </c>
-/// instruction.
+/// This intrinsic corresponds to the <c> VMOVNTDQA / MOVNTDQA </c> instruction.
///
/// \param __V
/// A pointer to a 128-bit aligned memory location that contains the integer
///
/// \headerfile <x86intrin.h>
///
-/// This intrinsic corresponds to the <c> <i> VPMINSB / PMINSB </i> </c>
-/// instruction.
+/// This intrinsic corresponds to the <c> VPMINSB / PMINSB </c> instruction.
///
/// \param __V1
/// A 128-bit vector of [16 x i8].
///
/// \headerfile <x86intrin.h>
///
-/// This intrinsic corresponds to the <c> <i> VPMAXSB / PMAXSB </i> </c>
-/// instruction.
+/// This intrinsic corresponds to the <c> VPMAXSB / PMAXSB </c> instruction.
///
/// \param __V1
/// A 128-bit vector of [16 x i8].
///
/// \headerfile <x86intrin.h>
///
-/// This intrinsic corresponds to the <c> <i> VPMINUW / PMINUW </i> </c>
-/// instruction.
+/// This intrinsic corresponds to the <c> VPMINUW / PMINUW </c> instruction.
///
/// \param __V1
/// A 128-bit vector of [8 x u16].
///
/// \headerfile <x86intrin.h>
///
-/// This intrinsic corresponds to the <c> <i> VPMAXUW / PMAXUW </i> </c>
-/// instruction.
+/// This intrinsic corresponds to the <c> VPMAXUW / PMAXUW </c> instruction.
///
/// \param __V1
/// A 128-bit vector of [8 x u16].
///
/// \headerfile <x86intrin.h>
///
-/// This intrinsic corresponds to the <c> <i> VPMINSD / PMINSD </i> </c>
-/// instruction.
+/// This intrinsic corresponds to the <c> VPMINSD / PMINSD </c> instruction.
///
/// \param __V1
/// A 128-bit vector of [4 x i32].
///
/// \headerfile <x86intrin.h>
///
-/// This intrinsic corresponds to the <c> <i> VPMAXSD / PMAXSD </i> </c>
-/// instruction.
+/// This intrinsic corresponds to the <c> VPMAXSD / PMAXSD </c> instruction.
///
/// \param __V1
/// A 128-bit vector of [4 x i32].
///
/// \headerfile <x86intrin.h>
///
-/// This intrinsic corresponds to the <c> <i> VPMINUD / PMINUD </i> </c>
-/// instruction.
+/// This intrinsic corresponds to the <c> VPMINUD / PMINUD </c> instruction.
///
/// \param __V1
/// A 128-bit vector of [4 x u32].
///
/// \headerfile <x86intrin.h>
///
-/// This intrinsic corresponds to the <c> <i> VPMAXUD / PMAXUD </i> </c>
-/// instruction.
+/// This intrinsic corresponds to the <c> VPMAXUD / PMAXUD </c> instruction.
///
/// \param __V1
/// A 128-bit vector of [4 x u32].
/// __m128 _mm_insert_ps(__m128 X, __m128 Y, const int N);
/// \endcode
///
-/// This intrinsic corresponds to the <c> <i> VINSERTPS </i> </c> instruction.
+/// This intrinsic corresponds to the <c> VINSERTPS </c> instruction.
///
/// \param X
/// A 128-bit vector source operand of [4 x float]. With the exception of
/// int _mm_extract_ps(__m128 X, const int N);
/// \endcode
///
-/// This intrinsic corresponds to the <c> <i> VEXTRACTPS / EXTRACTPS </i> </c>
+/// This intrinsic corresponds to the <c> VEXTRACTPS / EXTRACTPS </c>
/// instruction.
///
/// \param X
/// __m128i _mm_insert_epi8(__m128i X, int I, const int N);
/// \endcode
///
-/// This intrinsic corresponds to the <c> <i> VPINSRB / PINSRB </i> </c>
-/// instruction.
+/// This intrinsic corresponds to the <c> VPINSRB / PINSRB </c> instruction.
///
/// \param X
/// A 128-bit integer vector of [16 x i8]. This vector is copied to the
/// __m128i _mm_insert_epi32(__m128i X, int I, const int N);
/// \endcode
///
-/// This intrinsic corresponds to the <c> <i> VPINSRD / PINSRD </i> </c>
-/// instruction.
+/// This intrinsic corresponds to the <c> VPINSRD / PINSRD </c> instruction.
///
/// \param X
/// A 128-bit integer vector of [4 x i32]. This vector is copied to the
/// specified by \a N.
/// \param N
/// An immediate value. Bits [1:0] specify the bit offset in the result at
-/// which the integer \a I is written.
+/// which the integer \a I is written. \n
/// 00: Bits [31:0] of the result are used for insertion. \n
/// 01: Bits [63:32] of the result are used for insertion. \n
/// 10: Bits [95:64] of the result are used for insertion. \n
({ __v4si __a = (__v4si)(__m128i)(X); \
__a[(N) & 3] = (I); \
(__m128i)__a;}))
+
#ifdef __x86_64__
/// \brief Constructs a 128-bit vector of [2 x i64] by first making a copy of
/// the 128-bit integer vector parameter, and then inserting the 64-bit
/// __m128i _mm_insert_epi64(__m128i X, long long I, const int N);
/// \endcode
///
-/// This intrinsic corresponds to the <c> <i> VPINSRQ / PINSRQ </i> </c>
-/// instruction.
+/// This intrinsic corresponds to the <c> VPINSRQ / PINSRQ </c> instruction.
///
/// \param X
/// A 128-bit integer vector of [2 x i64]. This vector is copied to the
/// specified by \a N.
/// \param N
/// An immediate value. Bit [0] specifies the bit offset in the result at
-/// which the integer \a I is written.
+/// which the integer \a I is written. \n
/// 0: Bits [63:0] of the result are used for insertion. \n
/// 1: Bits [127:64] of the result are used for insertion. \n
/// \returns A 128-bit integer vector containing the constructed values.
/// int _mm_extract_epi8(__m128i X, const int N);
/// \endcode
///
-/// This intrinsic corresponds to the <c> <i> VPEXTRB / PEXTRB </i> </c>
-/// instruction.
+/// This intrinsic corresponds to the <c> VPEXTRB / PEXTRB </c> instruction.
///
/// \param X
/// A 128-bit integer vector.
/// \param N
-/// An immediate value. Bits [3:0] specify which 8-bit vector element
-/// from the argument \a X to extract and copy to the result. \n
+/// An immediate value. Bits [3:0] specify which 8-bit vector element from
+/// the argument \a X to extract and copy to the result. \n
/// 0000: Bits [7:0] of parameter \a X are extracted. \n
/// 0001: Bits [15:8] of the parameter \a X are extracted. \n
/// 0010: Bits [23:16] of the parameter \a X are extracted. \n
/// int _mm_extract_epi32(__m128i X, const int N);
/// \endcode
///
-/// This intrinsic corresponds to the <c> <i> VPEXTRD / PEXTRD </i> </c>
-/// instruction.
+/// This intrinsic corresponds to the <c> VPEXTRD / PEXTRD </c> instruction.
///
/// \param X
/// A 128-bit integer vector.
/// \param N
-/// An immediate value. Bits [1:0] specify which 32-bit vector element
-/// from the argument \a X to extract and copy to the result. \n
+/// An immediate value. Bits [1:0] specify which 32-bit vector element from
+/// the argument \a X to extract and copy to the result. \n
/// 00: Bits [31:0] of the parameter \a X are extracted. \n
/// 01: Bits [63:32] of the parameter \a X are extracted. \n
/// 10: Bits [95:64] of the parameter \a X are extracted. \n
#define _mm_extract_epi32(X, N) (__extension__ \
({ __v4si __a = (__v4si)(__m128i)(X); \
(int)__a[(N) & 3];}))
+
#ifdef __x86_64__
/// \brief Extracts a 64-bit element from the 128-bit integer vector of
/// [2 x i64], using the immediate value parameter \a N as a selector.
/// long long _mm_extract_epi64(__m128i X, const int N);
/// \endcode
///
-/// This intrinsic corresponds to the <c> <i> VPEXTRQ / PEXTRQ </i> </c>
-/// instruction.
+/// This intrinsic corresponds to the <c> VPEXTRQ / PEXTRQ </c> instruction.
///
/// \param X
/// A 128-bit integer vector.
/// \param N
-/// An immediate value. Bit [0] specifies which 64-bit vector element
-/// from the argument \a X to return. \n
+/// An immediate value. Bit [0] specifies which 64-bit vector element from
+/// the argument \a X to return. \n
/// 0: Bits [63:0] are returned. \n
/// 1: Bits [127:64] are returned. \n
/// \returns A 64-bit integer.
///
/// \headerfile <x86intrin.h>
///
-/// This intrinsic corresponds to the <c> <i> VPTEST / PTEST </i> </c>
-/// instruction.
+/// This intrinsic corresponds to the <c> VPTEST / PTEST </c> instruction.
///
/// \param __M
/// A 128-bit integer vector containing the bits to be tested.
///
/// \headerfile <x86intrin.h>
///
-/// This intrinsic corresponds to the <c> <i> VPTEST / PTEST </i> </c>
-/// instruction.
+/// This intrinsic corresponds to the <c> VPTEST / PTEST </c> instruction.
///
/// \param __M
/// A 128-bit integer vector containing the bits to be tested.
///
/// \headerfile <x86intrin.h>
///
-/// This intrinsic corresponds to the <c> <i> VPTEST / PTEST </i> </c>
-/// instruction.
+/// This intrinsic corresponds to the <c> VPTEST / PTEST </c> instruction.
///
/// \param __M
/// A 128-bit integer vector containing the bits to be tested.
/// int _mm_test_all_ones(__m128i V);
/// \endcode
///
-/// This intrinsic corresponds to the <c> <i> VPTEST / PTEST </i> </c>
-/// instruction.
+/// This intrinsic corresponds to the <c> VPTEST / PTEST </c> instruction.
///
/// \param V
/// A 128-bit integer vector containing the bits to be tested.
/// int _mm_test_mix_ones_zeros(__m128i M, __m128i V);
/// \endcode
///
-/// This intrinsic corresponds to the <c> <i> VPTEST / PTEST </i> </c>
-/// instruction.
+/// This intrinsic corresponds to the <c> VPTEST / PTEST </c> instruction.
///
/// \param M
/// A 128-bit integer vector containing the bits to be tested.
/// int _mm_test_all_zeros(__m128i M, __m128i V);
/// \endcode
///
-/// This intrinsic corresponds to the <c> <i> VPTEST / PTEST </i> </c>
-/// instruction.
+/// This intrinsic corresponds to the <c> VPTEST / PTEST </c> instruction.
///
/// \param M
/// A 128-bit integer vector containing the bits to be tested.
///
/// \headerfile <x86intrin.h>
///
-/// This intrinsic corresponds to the <c> <i> VPCMPEQQ / PCMPEQQ </i> </c>
-/// instruction.
+/// This intrinsic corresponds to the <c> VPCMPEQQ / PCMPEQQ </c> instruction.
///
/// \param __V1
/// A 128-bit integer vector.
///
/// \headerfile <x86intrin.h>
///
-/// This intrinsic corresponds to the <c> <i> VPMOVSXBW / PMOVSXBW </i> </c>
-/// instruction.
+/// This intrinsic corresponds to the <c> VPMOVSXBW / PMOVSXBW </c> instruction.
///
/// \param __V
/// A 128-bit vector of [16 x i8]. The lower eight 8-bit elements are sign-
///
/// \headerfile <x86intrin.h>
///
-/// This intrinsic corresponds to the <c> <i> VPMOVSXBD / PMOVSXBD </i> </c>
-/// instruction.
+/// This intrinsic corresponds to the <c> VPMOVSXBD / PMOVSXBD </c> instruction.
///
/// \param __V
/// A 128-bit vector of [16 x i8]. The lower four 8-bit elements are sign-
///
/// \headerfile <x86intrin.h>
///
-/// This intrinsic corresponds to the <c> <i> VPMOVSXBQ / PMOVSXBQ </i> </c>
-/// instruction.
+/// This intrinsic corresponds to the <c> VPMOVSXBQ / PMOVSXBQ </c> instruction.
///
/// \param __V
/// A 128-bit vector of [16 x i8]. The lower two 8-bit elements are sign-
///
/// \headerfile <x86intrin.h>
///
-/// This intrinsic corresponds to the <c> <i> VPMOVSXWD / PMOVSXWD </i> </c>
-/// instruction.
+/// This intrinsic corresponds to the <c> VPMOVSXWD / PMOVSXWD </c> instruction.
///
/// \param __V
/// A 128-bit vector of [8 x i16]. The lower four 16-bit elements are sign-
///
/// \headerfile <x86intrin.h>
///
-/// This intrinsic corresponds to the <c> <i> VPMOVSXWQ / PMOVSXWQ </i> </c>
-/// instruction.
+/// This intrinsic corresponds to the <c> VPMOVSXWQ / PMOVSXWQ </c> instruction.
///
/// \param __V
/// A 128-bit vector of [8 x i16]. The lower two 16-bit elements are sign-
///
/// \headerfile <x86intrin.h>
///
-/// This intrinsic corresponds to the <c> <i> VPMOVSXDQ / PMOVSXDQ </i> </c>
-/// instruction.
+/// This intrinsic corresponds to the <c> VPMOVSXDQ / PMOVSXDQ </c> instruction.
///
/// \param __V
/// A 128-bit vector of [4 x i32]. The lower two 32-bit elements are sign-
///
/// \headerfile <x86intrin.h>
///
-/// This intrinsic corresponds to the <c> <i> VPMOVZXBW / PMOVZXBW </i> </c>
-/// instruction.
+/// This intrinsic corresponds to the <c> VPMOVZXBW / PMOVZXBW </c> instruction.
///
/// \param __V
/// A 128-bit vector of [16 x i8]. The lower eight 8-bit elements are zero-
///
/// \headerfile <x86intrin.h>
///
-/// This intrinsic corresponds to the <c> <i> VPMOVZXBD / PMOVZXBD </i> </c>
-/// instruction.
+/// This intrinsic corresponds to the <c> VPMOVZXBD / PMOVZXBD </c> instruction.
///
/// \param __V
/// A 128-bit vector of [16 x i8]. The lower four 8-bit elements are zero-
///
/// \headerfile <x86intrin.h>
///
-/// This intrinsic corresponds to the <c> <i> VPMOVZXBQ / PMOVZXBQ </i> </c>
-/// instruction.
+/// This intrinsic corresponds to the <c> VPMOVZXBQ / PMOVZXBQ </c> instruction.
///
/// \param __V
/// A 128-bit vector of [16 x i8]. The lower two 8-bit elements are zero-
///
/// \headerfile <x86intrin.h>
///
-/// This intrinsic corresponds to the <c> <i> VPMOVZXWD / PMOVZXWD </i> </c>
-/// instruction.
+/// This intrinsic corresponds to the <c> VPMOVZXWD / PMOVZXWD </c> instruction.
///
/// \param __V
/// A 128-bit vector of [8 x i16]. The lower four 16-bit elements are zero-
///
/// \headerfile <x86intrin.h>
///
-/// This intrinsic corresponds to the <c> <i> VPMOVZXWQ / PMOVZXWQ </i> </c>
-/// instruction.
+/// This intrinsic corresponds to the <c> VPMOVZXWQ / PMOVZXWQ </c> instruction.
///
/// \param __V
/// A 128-bit vector of [8 x i16]. The lower two 16-bit elements are zero-
///
/// \headerfile <x86intrin.h>
///
-/// This intrinsic corresponds to the <c> <i> VPMOVZXDQ / PMOVZXDQ </i> </c>
-/// instruction.
+/// This intrinsic corresponds to the <c> VPMOVZXDQ / PMOVZXDQ </c> instruction.
///
/// \param __V
/// A 128-bit vector of [4 x i32]. The lower two 32-bit elements are zero-
///
/// \headerfile <x86intrin.h>
///
-/// This intrinsic corresponds to the <c> <i> VPACKUSDW / PACKUSDW </i> </c>
-/// instruction.
+/// This intrinsic corresponds to the <c> VPACKUSDW / PACKUSDW </c> instruction.
///
/// \param __V1
/// A 128-bit vector of [4 x i32]. Each 32-bit element is treated as a
/// __m128i _mm_mpsadbw_epu8(__m128i X, __m128i Y, const int M);
/// \endcode
///
-/// This intrinsic corresponds to the <c> <i> VMPSADBW / MPSADBW </i> </c>
-/// instruction.
+/// This intrinsic corresponds to the <c> VMPSADBW / MPSADBW </c> instruction.
///
/// \param X
/// A 128-bit vector of [16 x i8].
///
/// \headerfile <x86intrin.h>
///
-/// This intrinsic corresponds to the <c> <i> VPHMINPOSUW / PHMINPOSUW </i> </c>
+/// This intrinsic corresponds to the <c> VPHMINPOSUW / PHMINPOSUW </c>
/// instruction.
///
/// \param __V
/// __m128i _mm_cmpistrm(__m128i A, __m128i B, const int M);
/// \endcode
///
-/// This intrinsic corresponds to the <c> <i> VPCMPISTRM / PCMPISTRM </i> </c>
+/// This intrinsic corresponds to the <c> VPCMPISTRM / PCMPISTRM </c>
/// instruction.
///
/// \param A
/// int _mm_cmpistri(__m128i A, __m128i B, const int M);
/// \endcode
///
-/// This intrinsic corresponds to the <c> <i> VPCMPISTRI / PCMPISTRI </i> </c>
+/// This intrinsic corresponds to the <c> VPCMPISTRI / PCMPISTRI </c>
/// instruction.
///
/// \param A
/// __m128i _mm_cmpestrm(__m128i A, int LA, __m128i B, int LB, const int M);
/// \endcode
///
-/// This intrinsic corresponds to the <c> <i> VPCMPESTRM / PCMPESTRM </i> </c>
+/// This intrinsic corresponds to the <c> VPCMPESTRM / PCMPESTRM </c>
/// instruction.
///
/// \param A
/// int _mm_cmpestri(__m128i A, int LA, __m128i B, int LB, const int M);
/// \endcode
///
-/// This intrinsic corresponds to the <c> <i> VPCMPESTRI / PCMPESTRI </i> </c>
+/// This intrinsic corresponds to the <c> VPCMPESTRI / PCMPESTRI </c>
/// instruction.
///
/// \param A
/// int _mm_cmpistra(__m128i A, __m128i B, const int M);
/// \endcode
///
-/// This intrinsic corresponds to the <c> <i> VPCMPISTRI / PCMPISTRI </i> </c>
+/// This intrinsic corresponds to the <c> VPCMPISTRI / PCMPISTRI </c>
/// instruction.
///
/// \param A
/// int _mm_cmpistrc(__m128i A, __m128i B, const int M);
/// \endcode
///
-/// This intrinsic corresponds to the <c> <i> VPCMPISTRI / PCMPISTRI </i> </c>
+/// This intrinsic corresponds to the <c> VPCMPISTRI / PCMPISTRI </c>
/// instruction.
///
/// \param A
/// int _mm_cmpistro(__m128i A, __m128i B, const int M);
/// \endcode
///
-/// This intrinsic corresponds to the <c> <i> VPCMPISTRI / PCMPISTRI </i> </c>
+/// This intrinsic corresponds to the <c> VPCMPISTRI / PCMPISTRI </c>
/// instruction.
///
/// \param A
/// int _mm_cmpistrs(__m128i A, __m128i B, const int M);
/// \endcode
///
-/// This intrinsic corresponds to the <c> <i> VPCMPISTRI / PCMPISTRI </i> </c>
+/// This intrinsic corresponds to the <c> VPCMPISTRI / PCMPISTRI </c>
/// instruction.
///
/// \param A
/// int _mm_cmpistrz(__m128i A, __m128i B, const int M);
/// \endcode
///
-/// This intrinsic corresponds to the <c> <i> VPCMPISTRI / PCMPISTRI </i> </c>
+/// This intrinsic corresponds to the <c> VPCMPISTRI / PCMPISTRI </c>
/// instruction.
///
/// \param A
/// int _mm_cmpestra(__m128i A, int LA, __m128i B, int LB, const int M);
/// \endcode
///
-/// This intrinsic corresponds to the <c> <i> VPCMPESTRI / PCMPESTRI </i> </c>
+/// This intrinsic corresponds to the <c> VPCMPESTRI / PCMPESTRI </c>
/// instruction.
///
/// \param A
/// int _mm_cmpestrc(__m128i A, int LA, __m128i B, int LB, const int M);
/// \endcode
///
-/// This intrinsic corresponds to the <c> <i> VPCMPESTRI / PCMPESTRI </i> </c>
+/// This intrinsic corresponds to the <c> VPCMPESTRI / PCMPESTRI </c>
/// instruction.
///
/// \param A
(int)__builtin_ia32_pcmpestric128((__v16qi)(__m128i)(A), (int)(LA), \
(__v16qi)(__m128i)(B), (int)(LB), \
(int)(M))
+
/// \brief Uses the immediate operand \a M to perform a comparison of string
/// data with explicitly defined lengths that is contained in source operands
/// \a A and \a B. Returns bit 0 of the resulting bit mask.
/// int _mm_cmpestro(__m128i A, int LA, __m128i B, int LB, const int M);
/// \endcode
///
-/// This intrinsic corresponds to the <c> <i> VPCMPESTRI / PCMPESTRI </i> </c>
+/// This intrinsic corresponds to the <c> VPCMPESTRI / PCMPESTRI </c>
/// instruction.
///
/// \param A
/// int _mm_cmpestrs(__m128i A, int LA, __m128i B, int LB, const int M);
/// \endcode
///
-/// This intrinsic corresponds to the <c> <i> VPCMPESTRI / PCMPESTRI </i> </c>
+/// This intrinsic corresponds to the <c> VPCMPESTRI / PCMPESTRI </c>
/// instruction.
///
/// \param A
/// int _mm_cmpestrz(__m128i A, int LA, __m128i B, int LB, const int M);
/// \endcode
///
-/// This intrinsic corresponds to the <c> <i> VPCMPESTRI </i> </c> instruction.
+/// This intrinsic corresponds to the <c> VPCMPESTRI </c> instruction.
///
/// \param A
/// A 128-bit integer vector containing one of the source operands to be
///
/// \headerfile <x86intrin.h>
///
-/// This intrinsic corresponds to the <c> <i> VPCMPGTQ / PCMPGTQ </i> </c>
-/// instruction.
+/// This intrinsic corresponds to the <c> VPCMPGTQ / PCMPGTQ </c> instruction.
///
/// \param __V1
/// A 128-bit integer vector.
///
/// \headerfile <x86intrin.h>
///
-/// This intrinsic corresponds to the <c> <i> CRC32B </i> </c> instruction.
+/// This intrinsic corresponds to the <c> CRC32B </c> instruction.
///
/// \param __C
/// An unsigned integer operand to add to the CRC-32C checksum of operand
///
/// \headerfile <x86intrin.h>
///
-/// This intrinsic corresponds to the <c> <i> CRC32W </i> </c> instruction.
+/// This intrinsic corresponds to the <c> CRC32W </c> instruction.
///
/// \param __C
/// An unsigned integer operand to add to the CRC-32C checksum of operand
///
/// \headerfile <x86intrin.h>
///
-/// This intrinsic corresponds to the <c> <i> CRC32L </i> </c> instruction.
+/// This intrinsic corresponds to the <c> CRC32L </c> instruction.
///
/// \param __C
/// An unsigned integer operand to add to the CRC-32C checksum of operand
///
/// \headerfile <x86intrin.h>
///
-/// This intrinsic corresponds to the <c> <i> CRC32Q </i> </c> instruction.
+/// This intrinsic corresponds to the <c> CRC32Q </c> instruction.
///
/// \param __C
/// An unsigned integer operand to add to the CRC-32C checksum of operand