/// \brief Calculates the square root of the lower double-precision value of
/// the second operand and returns it in the lower 64 bits of the result.
-/// The upper 64 bits of the result are copied from the upper double-
-/// precision value of the first operand.
+/// The upper 64 bits of the result are copied from the upper
+/// double-precision value of the first operand.
///
/// \headerfile <x86intrin.h>
///
/// \brief Compares lower 64-bit double-precision values of both operands, and
/// returns the lesser of the pair of values in the lower 64-bits of the
-/// result. The upper 64 bits of the result are copied from the upper double-
-/// precision value of the first operand.
+/// result. The upper 64 bits of the result are copied from the upper
+/// double-precision value of the first operand.
///
/// \headerfile <x86intrin.h>
///
/// \brief Compares lower 64-bit double-precision values of both operands, and
/// returns the greater of the pair of values in the lower 64-bits of the
-/// result. The upper 64 bits of the result are copied from the upper double-
-/// precision value of the first operand.
+/// result. The upper 64 bits of the result are copied from the upper
+/// double-precision value of the first operand.
///
/// \headerfile <x86intrin.h>
///
}
/// \brief Compares the lower double-precision floating-point values in each of
-/// the two 128-bit floating-point vectors of [2 x double] for equality. The
-/// comparison yields 0 for false, 1 for true.
+/// the two 128-bit floating-point vectors of [2 x double] for equality.
+///
+/// The comparison yields 0 for false, 1 for true. If either of the two
+/// lower double-precision values is NaN, 0 is returned.
///
/// \headerfile <x86intrin.h>
///
/// \param __b
/// A 128-bit vector of [2 x double]. The lower double-precision value is
/// compared to the lower double-precision value of \a __a.
-/// \returns An integer containing the comparison results.
+/// \returns An integer containing the comparison results. If either of the two
+/// lower double-precision values is NaN, 0 is returned.
static __inline__ int __DEFAULT_FN_ATTRS
_mm_comieq_sd(__m128d __a, __m128d __b)
{
/// the value in the first parameter is less than the corresponding value in
/// the second parameter.
///
-/// The comparison yields 0 for false, 1 for true.
+/// The comparison yields 0 for false, 1 for true. If either of the two
+/// lower double-precision values is NaN, 0 is returned.
///
/// \headerfile <x86intrin.h>
///
/// \param __b
/// A 128-bit vector of [2 x double]. The lower double-precision value is
/// compared to the lower double-precision value of \a __a.
-/// \returns An integer containing the comparison results.
+/// \returns An integer containing the comparison results. If either of the two
+/// lower double-precision values is NaN, 0 is returned.
static __inline__ int __DEFAULT_FN_ATTRS
_mm_comilt_sd(__m128d __a, __m128d __b)
{
/// the value in the first parameter is less than or equal to the
/// corresponding value in the second parameter.
///
-/// The comparison yields 0 for false, 1 for true.
+/// The comparison yields 0 for false, 1 for true. If either of the two
+/// lower double-precision values is NaN, 0 is returned.
///
/// \headerfile <x86intrin.h>
///
/// \param __b
/// A 128-bit vector of [2 x double]. The lower double-precision value is
/// compared to the lower double-precision value of \a __a.
-/// \returns An integer containing the comparison results.
+/// \returns An integer containing the comparison results. If either of the two
+/// lower double-precision values is NaN, 0 is returned.
static __inline__ int __DEFAULT_FN_ATTRS
_mm_comile_sd(__m128d __a, __m128d __b)
{
/// the value in the first parameter is greater than the corresponding value
/// in the second parameter.
///
-/// The comparison yields 0 for false, 1 for true.
+/// The comparison yields 0 for false, 1 for true. If either of the two
+/// lower double-precision values is NaN, 0 is returned.
///
/// \headerfile <x86intrin.h>
///
/// \param __b
/// A 128-bit vector of [2 x double]. The lower double-precision value is
/// compared to the lower double-precision value of \a __a.
-/// \returns An integer containing the comparison results.
+/// \returns An integer containing the comparison results. If either of the two
+/// lower double-precision values is NaN, 0 is returned.
static __inline__ int __DEFAULT_FN_ATTRS
_mm_comigt_sd(__m128d __a, __m128d __b)
{
/// the value in the first parameter is greater than or equal to the
/// corresponding value in the second parameter.
///
-/// The comparison yields 0 for false, 1 for true.
+/// The comparison yields 0 for false, 1 for true. If either of the two
+/// lower double-precision values is NaN, 0 is returned.
///
/// \headerfile <x86intrin.h>
///
/// \param __b
/// A 128-bit vector of [2 x double]. The lower double-precision value is
/// compared to the lower double-precision value of \a __a.
-/// \returns An integer containing the comparison results.
+/// \returns An integer containing the comparison results. If either of the two
+/// lower double-precision values is NaN, 0 is returned.
static __inline__ int __DEFAULT_FN_ATTRS
_mm_comige_sd(__m128d __a, __m128d __b)
{
/// the value in the first parameter is unequal to the corresponding value in
/// the second parameter.
///
-/// The comparison yields 0 for false, 1 for true.
+/// The comparison yields 0 for false, 1 for true. If either of the two
+/// lower double-precision values is NaN, 1 is returned.
///
/// \headerfile <x86intrin.h>
///
/// \param __b
/// A 128-bit vector of [2 x double]. The lower double-precision value is
/// compared to the lower double-precision value of \a __a.
-/// \returns An integer containing the comparison results.
+/// \returns An integer containing the comparison results. If either of the two
+/// lower double-precision values is NaN, 1 is returned.
static __inline__ int __DEFAULT_FN_ATTRS
_mm_comineq_sd(__m128d __a, __m128d __b)
{
/// the two 128-bit floating-point vectors of [2 x double] for equality. The
/// comparison yields 0 for false, 1 for true.
///
-/// If either of the two lower double-precision values is NaN, 1 is returned.
+/// If either of the two lower double-precision values is NaN, 0 is returned.
///
/// \headerfile <x86intrin.h>
///
/// A 128-bit vector of [2 x double]. The lower double-precision value is
/// compared to the lower double-precision value of \a __a.
/// \returns An integer containing the comparison results. If either of the two
-/// lower double-precision values is NaN, 1 is returned.
+/// lower double-precision values is NaN, 0 is returned.
static __inline__ int __DEFAULT_FN_ATTRS
_mm_ucomieq_sd(__m128d __a, __m128d __b)
{
/// the second parameter.
///
/// The comparison yields 0 for false, 1 for true. If either of the two lower
-/// double-precision values is NaN, 1 is returned.
+/// double-precision values is NaN, 0 is returned.
///
/// \headerfile <x86intrin.h>
///
/// A 128-bit vector of [2 x double]. The lower double-precision value is
/// compared to the lower double-precision value of \a __a.
/// \returns An integer containing the comparison results. If either of the two
-/// lower double-precision values is NaN, 1 is returned.
+/// lower double-precision values is NaN, 0 is returned.
static __inline__ int __DEFAULT_FN_ATTRS
_mm_ucomilt_sd(__m128d __a, __m128d __b)
{
/// corresponding value in the second parameter.
///
/// The comparison yields 0 for false, 1 for true. If either of the two lower
-/// double-precision values is NaN, 1 is returned.
+/// double-precision values is NaN, 0 is returned.
///
/// \headerfile <x86intrin.h>
///
/// A 128-bit vector of [2 x double]. The lower double-precision value is
/// compared to the lower double-precision value of \a __a.
/// \returns An integer containing the comparison results. If either of the two
-/// lower double-precision values is NaN, 1 is returned.
+/// lower double-precision values is NaN, 0 is returned.
static __inline__ int __DEFAULT_FN_ATTRS
_mm_ucomile_sd(__m128d __a, __m128d __b)
{
/// the second parameter.
///
/// The comparison yields 0 for false, 1 for true. If either of the two lower
-/// double-precision values is NaN, 0 is returned.
+/// double-precision values is NaN, 1 is returned.
///
/// \headerfile <x86intrin.h>
///
/// A 128-bit vector of [2 x double]. The lower double-precision value is
/// compared to the lower double-precision value of \a __a.
/// \returns An integer containing the comparison result. If either of the two
-/// lower double-precision values is NaN, 0 is returned.
+/// lower double-precision values is NaN, 1 is returned.
static __inline__ int __DEFAULT_FN_ATTRS
_mm_ucomineq_sd(__m128d __a, __m128d __b)
{
///
/// \headerfile <x86intrin.h>
///
-/// This intrinsic corresponds to the <c>VMOVDDUP + VMOVAPD / MOVLHPS + MOVAPS </c> instruction.
+/// This intrinsic corresponds to the
+/// <c> VMOVDDUP + VMOVAPD / MOVLHPS + MOVAPS </c> instruction.
///
/// \param __dp
/// A pointer to a memory location that can store two double-precision
/// values.
/// \param __a
/// A 128-bit vector of [2 x double] whose lower 64 bits are copied to each
-/// of the values in \a dp.
+/// of the values in \a __dp.
static __inline__ void __DEFAULT_FN_ATTRS
_mm_store1_pd(double *__dp, __m128d __a)
{
_mm_store_pd(__dp, __a);
}
-/// \brief Stores a 128-bit vector of [2 x double] into an aligned memory
-/// location.
+/// \brief Moves the lower 64 bits of a 128-bit vector of [2 x double] twice to
+/// the upper and lower 64 bits of a memory location.
///
/// \headerfile <x86intrin.h>
///
-/// This intrinsic corresponds to the <c> VMOVAPD / MOVAPD </c> instruction.
+/// This intrinsic corresponds to the
+/// <c> VMOVDDUP + VMOVAPD / MOVLHPS + MOVAPS </c> instruction.
///
/// \param __dp
-/// A pointer to a 128-bit memory location. The address of the memory
-/// location has to be 16-byte aligned.
+/// A pointer to a memory location that can store two double-precision
+/// values.
/// \param __a
-/// A 128-bit vector of [2 x double] containing the values to be stored.
+/// A 128-bit vector of [2 x double] whose lower 64 bits are copied to each
+/// of the values in \a __dp.
static __inline__ void __DEFAULT_FN_ATTRS
_mm_store_pd1(double *__dp, __m128d __a)
{
///
/// \headerfile <x86intrin.h>
///
-/// This intrinsic corresponds to the <c> VPUNPCKLQDQ / PUNPCKLQDQ </c>
-/// instruction.
+/// This intrinsic does not correspond to a specific instruction.
///
/// \param __q0
/// A 64-bit integral value used to initialize the lower 64 bits of the
/// specified unaligned memory location. When a mask bit is 1, the
/// corresponding byte is written, otherwise it is not written.
///
-/// To minimize caching, the date is flagged as non-temporal (unlikely to be
+/// To minimize caching, the data is flagged as non-temporal (unlikely to be
/// used again soon). Exception and trap behavior for elements not selected
/// for storage to memory are implementation dependent.
///
return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 2, 4+2, 3, 4+3);
}
-/// \brief Unpacks the high-order (odd-indexed) values from two 128-bit vectors
-/// of [2 x i64] and interleaves them into a 128-bit vector of [2 x i64].
+/// \brief Unpacks the high-order 64-bit elements from two 128-bit vectors of
+/// [2 x i64] and interleaves them into a 128-bit vector of [2 x i64].
///
/// \headerfile <x86intrin.h>
///
///
/// \headerfile <x86intrin.h>
///
-/// This intrinsic has no corresponding instruction.
+/// This intrinsic corresponds to the <c> MOVDQ2Q </c> instruction.
///
/// \param __a
/// A 128-bit integer vector operand. The lower 64 bits are moved to the
///
/// \headerfile <x86intrin.h>
///
-/// This intrinsic corresponds to the <c> VMOVQ / MOVQ / MOVD </c> instruction.
+/// This intrinsic corresponds to the <c> MOVD+VMOVQ </c> instruction.
///
/// \param __a
/// A 64-bit value.
return __builtin_shufflevector((__v2di)__a, (__m128i){ 0 }, 0, 2);
}
-/// \brief Unpacks the high-order (odd-indexed) values from two 128-bit vectors
-/// of [2 x double] and interleaves them into a 128-bit vector of [2 x
+/// \brief Unpacks the high-order 64-bit elements from two 128-bit vectors of
+/// [2 x double] and interleaves them into a 128-bit vector of [2 x
/// double].
///
/// \headerfile <x86intrin.h>
return __builtin_shufflevector((__v2df)__a, (__v2df)__b, 1, 2+1);
}
-/// \brief Unpacks the low-order (even-indexed) values from two 128-bit vectors
+/// \brief Unpacks the low-order 64-bit elements from two 128-bit vectors
/// of [2 x double] and interleaves them into a 128-bit vector of [2 x
/// double].
///
/// A 128-bit vector of [2 x double].
/// \param i
/// An 8-bit immediate value. The least significant two bits specify which
-/// elements to copy from a and b: \n
-/// Bit[0] = 0: lower element of a copied to lower element of result. \n
-/// Bit[0] = 1: upper element of a copied to lower element of result. \n
+/// elements to copy from \a a and \a b: \n
+/// Bit[0] = 0: lower element of \a a copied to lower element of result. \n
+/// Bit[0] = 1: upper element of \a a copied to lower element of result. \n
/// Bit[1] = 0: lower element of \a b copied to upper element of result. \n
/// Bit[1] = 1: upper element of \a b copied to upper element of result. \n
/// \returns A 128-bit vector of [2 x double] containing the shuffled values.