_mm_add_epi32(hg_fe_dc_ba, _mm_srli_si128(hg_fe_dc_ba, 8));
const __m128i hgfedcba =
_mm_add_epi32(hgfe_dcba, _mm_srli_si128(hgfe_dcba, 4));
- unsigned int sum_diff = abs(_mm_cvtsi128_si32(hgfedcba));
+ unsigned int sum_diff = (unsigned int)abs(_mm_cvtsi128_si32(hgfedcba));
return sum_diff;
}
s0 = _mm_add_epi32(s0, s1);
s0 = _mm_add_epi32(s0, _mm_srli_si128(s0, 8));
s0 = _mm_add_epi32(s0, _mm_srli_si128(s0, 4));
- avg = _mm_cvtsi128_si32(s0);
+ avg = (unsigned int)_mm_cvtsi128_si32(s0);
return (avg + 32) >> 6;
}
static INLINE void mm256_storeu2_epi32(__m128i *const dst_ptr_1,
__m128i *const dst_ptr_2,
const __m256i *const src) {
- *((uint32_t *)(dst_ptr_1)) = _mm_cvtsi128_si32(_mm256_castsi256_si128(*src));
- *((uint32_t *)(dst_ptr_2)) =
- _mm_cvtsi128_si32(_mm256_extractf128_si256(*src, 1));
+ *((int *)(dst_ptr_1)) = _mm_cvtsi128_si32(_mm256_castsi256_si128(*src));
+ *((int *)(dst_ptr_2)) = _mm_cvtsi128_si32(_mm256_extractf128_si256(*src, 1));
}
static INLINE __m256i mm256_round_epi32(const __m256i *const src,
}
static INLINE void store_unaligned_u32(void *const a, const __m128i v) {
- const uint32_t val = _mm_cvtsi128_si32(v);
+ const int val = _mm_cvtsi128_si32(v);
memcpy(a, &val, sizeof(val));
}
#define FSAD64_H(h) \
unsigned int vpx_sad64x##h##_avx2(const uint8_t *src_ptr, int src_stride, \
const uint8_t *ref_ptr, int ref_stride) { \
- int i, res; \
+ int i; \
__m256i sad1_reg, sad2_reg, ref1_reg, ref2_reg; \
__m256i sum_sad = _mm256_setzero_si256(); \
__m256i sum_sad_h; \
sum_sad = _mm256_add_epi32(sum_sad, sum_sad_h); \
sum_sad128 = _mm256_extracti128_si256(sum_sad, 1); \
sum_sad128 = _mm_add_epi32(_mm256_castsi256_si128(sum_sad), sum_sad128); \
- res = _mm_cvtsi128_si32(sum_sad128); \
- return res; \
+ return (unsigned int)_mm_cvtsi128_si32(sum_sad128); \
}
#define FSAD32_H(h) \
unsigned int vpx_sad64x##h##_avg_avx2( \
const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \
int ref_stride, const uint8_t *second_pred) { \
- int i, res; \
+ int i; \
__m256i sad1_reg, sad2_reg, ref1_reg, ref2_reg; \
__m256i sum_sad = _mm256_setzero_si256(); \
__m256i sum_sad_h; \
sum_sad = _mm256_add_epi32(sum_sad, sum_sad_h); \
sum_sad128 = _mm256_extracti128_si256(sum_sad, 1); \
sum_sad128 = _mm_add_epi32(_mm256_castsi256_si128(sum_sad), sum_sad128); \
- res = _mm_cvtsi128_si32(sum_sad128); \
- return res; \
+ return (unsigned int)_mm_cvtsi128_si32(sum_sad128); \
}
#define FSADAVG32_H(h) \
unsigned int vpx_sad32x##h##_avg_avx2( \
const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \
int ref_stride, const uint8_t *second_pred) { \
- int i, res; \
+ int i; \
__m256i sad1_reg, sad2_reg, ref1_reg, ref2_reg; \
__m256i sum_sad = _mm256_setzero_si256(); \
__m256i sum_sad_h; \
sum_sad = _mm256_add_epi32(sum_sad, sum_sad_h); \
sum_sad128 = _mm256_extracti128_si256(sum_sad, 1); \
sum_sad128 = _mm_add_epi32(_mm256_castsi256_si128(sum_sad), sum_sad128); \
- res = _mm_cvtsi128_si32(sum_sad128); \
- return res; \
+ return (unsigned int)_mm_cvtsi128_si32(sum_sad128); \
}
#define FSADAVG64 \
static INLINE unsigned int add32x4_sse2(__m128i val) {
val = _mm_add_epi32(val, _mm_srli_si128(val, 8));
val = _mm_add_epi32(val, _mm_srli_si128(val, 4));
- return _mm_cvtsi128_si32(val);
+ return (unsigned int)_mm_cvtsi128_si32(val);
}
unsigned int vpx_get_mb_ss_sse2(const int16_t *src_ptr) {
// Saturate and convert to 8-bit words
dst_first = _mm_packus_epi16(dst_first, _mm_setzero_si128());
- *((uint32_t *)(dst_ptr)) = _mm_cvtsi128_si32(dst_first);
+ *((int *)(dst_ptr)) = _mm_cvtsi128_si32(dst_first);
src_ptr += src_stride;
dst_ptr += dst_stride;
res_reg_0123 = _mm_packus_epi16(res_reg_0123_lo, reg_zero);
// Save only half of the register (8 words)
- *((uint32_t *)(dst_ptr)) = _mm_cvtsi128_si32(res_reg_m1012);
- *((uint32_t *)(dst_ptr + dst_stride)) = _mm_cvtsi128_si32(res_reg_0123);
+ *((int *)(dst_ptr)) = _mm_cvtsi128_si32(res_reg_m1012);
+ *((int *)(dst_ptr + dst_stride)) = _mm_cvtsi128_si32(res_reg_0123);
// Update the source by two rows
src_ptr += src_stride_unrolled;
// Pack to 8-bits
dst = _mm_packus_epi16(dst, _mm_setzero_si128());
- *((uint32_t *)(dst_ptr)) = _mm_cvtsi128_si32(dst);
+ *((int *)(dst_ptr)) = _mm_cvtsi128_si32(dst);
}
}
// Pack to 8-bits
dst_first = _mm_packus_epi16(dst_first, _mm_setzero_si128());
- *((uint32_t *)(dst_ptr)) = _mm_cvtsi128_si32(dst_first);
+ *((int *)(dst_ptr)) = _mm_cvtsi128_si32(dst_first);
src_ptr += src_stride;
dst_ptr += dst_stride;
reg_1 = _mm_packus_epi16(reg_1, reg_1);
// Save the result
- *((uint32_t *)(dst_ptr)) = _mm_cvtsi128_si32(reg_0);
- *((uint32_t *)(dst_ptr + dst_stride)) = _mm_cvtsi128_si32(reg_1);
+ *((int *)(dst_ptr)) = _mm_cvtsi128_si32(reg_0);
+ *((int *)(dst_ptr + dst_stride)) = _mm_cvtsi128_si32(reg_1);
// Update the source by two rows
src_ptr += src_stride_unrolled;