From 2c4fee17bcd3e64d8f831c637b34caab98e965d5 Mon Sep 17 00:00:00 2001 From: Yaowu Xu Date: Wed, 5 Nov 2014 11:29:21 -0800 Subject: [PATCH] Fix visual studio 2013 compiler warnings For configured with --enable-vp9-highbitdepth Change-Id: I2b181519d7192f8d7a241ad5760c3578255f24e6 --- vp9/common/vp9_idct.c | 42 +++--- vp9/common/vp9_idct.h | 6 +- vp9/common/x86/vp9_idct_intrin_sse2.c | 10 +- vp9/common/x86/vp9_idct_intrin_ssse3.c | 2 +- vp9/encoder/vp9_dct.c | 193 +++++++++++++------------ vp9/encoder/vp9_quantize.c | 12 +- vp9/encoder/vp9_variance.c | 12 +- vp9/encoder/x86/vp9_dct32x32_avx2.c | 13 +- vp9/encoder/x86/vp9_dct32x32_sse2.c | 4 +- vp9/encoder/x86/vp9_dct_sse2.c | 82 +++++------ 10 files changed, 190 insertions(+), 186 deletions(-) diff --git a/vp9/common/vp9_idct.c b/vp9/common/vp9_idct.c index d5b6f39b3..cc261abe7 100644 --- a/vp9/common/vp9_idct.c +++ b/vp9/common/vp9_idct.c @@ -34,7 +34,7 @@ // bd of x uses trans_low with 8+x bits, need to remove 24-x bits #define WRAPLOW(x, bd) ((((int32_t)(x)) << (24 - bd)) >> (24 - bd)) #else -#define WRAPLOW(x, bd) (x) +#define WRAPLOW(x, bd) ((int32_t)(x)) #endif // CONFIG_EMULATE_HARDWARE #if CONFIG_VP9_HIGHBITDEPTH @@ -367,14 +367,14 @@ static void iadst8(const tran_low_t *input, tran_low_t *output) { } // stage 1 - s0 = cospi_2_64 * x0 + cospi_30_64 * x1; - s1 = cospi_30_64 * x0 - cospi_2_64 * x1; - s2 = cospi_10_64 * x2 + cospi_22_64 * x3; - s3 = cospi_22_64 * x2 - cospi_10_64 * x3; - s4 = cospi_18_64 * x4 + cospi_14_64 * x5; - s5 = cospi_14_64 * x4 - cospi_18_64 * x5; - s6 = cospi_26_64 * x6 + cospi_6_64 * x7; - s7 = cospi_6_64 * x6 - cospi_26_64 * x7; + s0 = (int)(cospi_2_64 * x0 + cospi_30_64 * x1); + s1 = (int)(cospi_30_64 * x0 - cospi_2_64 * x1); + s2 = (int)(cospi_10_64 * x2 + cospi_22_64 * x3); + s3 = (int)(cospi_22_64 * x2 - cospi_10_64 * x3); + s4 = (int)(cospi_18_64 * x4 + cospi_14_64 * x5); + s5 = (int)(cospi_14_64 * x4 - cospi_18_64 * x5); + s6 = (int)(cospi_26_64 * x6 + cospi_6_64 * x7); + s7 = (int)(cospi_6_64 * x6 - cospi_26_64 * x7); x0 = WRAPLOW(dct_const_round_shift(s0 + s4), 8); x1 = WRAPLOW(dct_const_round_shift(s1 + s5), 8); @@ -386,14 +386,14 @@ static void iadst8(const tran_low_t *input, tran_low_t *output) { x7 = WRAPLOW(dct_const_round_shift(s3 - s7), 8); // stage 2 - s0 = x0; - s1 = x1; - s2 = x2; - s3 = x3; - s4 = cospi_8_64 * x4 + cospi_24_64 * x5; - s5 = cospi_24_64 * x4 - cospi_8_64 * x5; - s6 = -cospi_24_64 * x6 + cospi_8_64 * x7; - s7 = cospi_8_64 * x6 + cospi_24_64 * x7; + s0 = (int)x0; + s1 = (int)x1; + s2 = (int)x2; + s3 = (int)x3; + s4 = (int)(cospi_8_64 * x4 + cospi_24_64 * x5); + s5 = (int)(cospi_24_64 * x4 - cospi_8_64 * x5); + s6 = (int)(-cospi_24_64 * x6 + cospi_8_64 * x7); + s7 = (int)(cospi_8_64 * x6 + cospi_24_64 * x7); x0 = WRAPLOW(s0 + s2, 8); x1 = WRAPLOW(s1 + s3, 8); @@ -405,10 +405,10 @@ static void iadst8(const tran_low_t *input, tran_low_t *output) { x7 = WRAPLOW(dct_const_round_shift(s5 - s7), 8); // stage 3 - s2 = cospi_16_64 * (x2 + x3); - s3 = cospi_16_64 * (x2 - x3); - s6 = cospi_16_64 * (x6 + x7); - s7 = cospi_16_64 * (x6 - x7); + s2 = (int)(cospi_16_64 * (x2 + x3)); + s3 = (int)(cospi_16_64 * (x2 - x3)); + s6 = (int)(cospi_16_64 * (x6 + x7)); + s7 = (int)(cospi_16_64 * (x6 - x7)); x2 = WRAPLOW(dct_const_round_shift(s2), 8); x3 = WRAPLOW(dct_const_round_shift(s3), 8); diff --git a/vp9/common/vp9_idct.h b/vp9/common/vp9_idct.h index 12569b9de..fcabaa34f 100644 --- a/vp9/common/vp9_idct.h +++ b/vp9/common/vp9_idct.h @@ -29,10 +29,12 @@ extern "C" { #define UNIT_QUANT_FACTOR (1 << UNIT_QUANT_SHIFT) #define pair_set_epi16(a, b) \ - _mm_set_epi16(b, a, b, a, b, a, b, a) + _mm_set_epi16((int16_t)(b), (int16_t)(a), (int16_t)(b), (int16_t)(a), \ + (int16_t)(b), (int16_t)(a), (int16_t)(b), (int16_t)(a)) #define dual_set_epi16(a, b) \ - _mm_set_epi16(b, b, b, b, a, a, a, a) + _mm_set_epi16((int16_t)(b), (int16_t)(b), (int16_t)(b), (int16_t)(b), \ + (int16_t)(a), (int16_t)(a), (int16_t)(a), (int16_t)(a)) // Constants: // for (int i = 1; i< 32; ++i) diff --git a/vp9/common/x86/vp9_idct_intrin_sse2.c b/vp9/common/x86/vp9_idct_intrin_sse2.c index df609872b..8e73074fe 100644 --- a/vp9/common/x86/vp9_idct_intrin_sse2.c +++ b/vp9/common/x86/vp9_idct_intrin_sse2.c @@ -216,7 +216,7 @@ static void iadst4_sse2(__m128i *in) { const __m128i k__sinpi_p03_p02 = pair_set_epi16(sinpi_3_9, sinpi_2_9); const __m128i k__sinpi_p02_m01 = pair_set_epi16(sinpi_2_9, -sinpi_1_9); const __m128i k__sinpi_p03_m04 = pair_set_epi16(sinpi_3_9, -sinpi_4_9); - const __m128i k__sinpi_p03_p03 = _mm_set1_epi16(sinpi_3_9); + const __m128i k__sinpi_p03_p03 = _mm_set1_epi16((int16_t)sinpi_3_9); const __m128i kZero = _mm_set1_epi16(0); const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING); __m128i u[8], v[8], in7; @@ -641,7 +641,7 @@ static void iadst8_sse2(__m128i *in) { const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64); const __m128i k__cospi_m24_p08 = pair_set_epi16(-cospi_24_64, cospi_8_64); const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); - const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64); + const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64); const __m128i k__const_0 = _mm_set1_epi16(0); const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING); @@ -1530,8 +1530,8 @@ static void iadst16_8col(__m128i *in) { const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64); const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64); const __m128i k__cospi_m24_p08 = pair_set_epi16(-cospi_24_64, cospi_8_64); - const __m128i k__cospi_m16_m16 = _mm_set1_epi16(-cospi_16_64); - const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64); + const __m128i k__cospi_m16_m16 = _mm_set1_epi16((int16_t)-cospi_16_64); + const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64); const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); const __m128i k__cospi_m16_p16 = pair_set_epi16(-cospi_16_64, cospi_16_64); const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING); @@ -1985,7 +1985,7 @@ static void idct16_8col(__m128i *in) { const __m128i k__cospi_p04_p28 = pair_set_epi16(cospi_4_64, cospi_28_64); const __m128i k__cospi_p12_m20 = pair_set_epi16(cospi_12_64, -cospi_20_64); const __m128i k__cospi_p20_p12 = pair_set_epi16(cospi_20_64, cospi_12_64); - const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64); + const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64); const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64); const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64); diff --git a/vp9/common/x86/vp9_idct_intrin_ssse3.c b/vp9/common/x86/vp9_idct_intrin_ssse3.c index 73bf5d1d7..b9d2ef22b 100644 --- a/vp9/common/x86/vp9_idct_intrin_ssse3.c +++ b/vp9/common/x86/vp9_idct_intrin_ssse3.c @@ -36,7 +36,7 @@ static void idct16_8col(__m128i *in, int round) { const __m128i k__cospi_m24_m08 = pair_set_epi16(-cospi_24_64, -cospi_8_64); const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING); const __m128i k__cospi_p16_p16_x2 = pair_set_epi16(23170, 23170); - const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64); + const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64); const __m128i k__cospi_m16_p16 = pair_set_epi16(-cospi_16_64, cospi_16_64); __m128i v[16], u[16], s[16], t[16]; diff --git a/vp9/encoder/vp9_dct.c b/vp9/encoder/vp9_dct.c index 1090d04bb..2e01c3cef 100644 --- a/vp9/encoder/vp9_dct.c +++ b/vp9/encoder/vp9_dct.c @@ -37,12 +37,12 @@ static void fdct4(const tran_low_t *input, tran_low_t *output) { temp1 = (step[0] + step[1]) * cospi_16_64; temp2 = (step[0] - step[1]) * cospi_16_64; - output[0] = fdct_round_shift(temp1); - output[2] = fdct_round_shift(temp2); + output[0] = (tran_low_t)fdct_round_shift(temp1); + output[2] = (tran_low_t)fdct_round_shift(temp2); temp1 = step[2] * cospi_24_64 + step[3] * cospi_8_64; temp2 = -step[2] * cospi_8_64 + step[3] * cospi_24_64; - output[1] = fdct_round_shift(temp1); - output[3] = fdct_round_shift(temp2); + output[1] = (tran_low_t)fdct_round_shift(temp1); + output[3] = (tran_low_t)fdct_round_shift(temp2); } void vp9_fdct4x4_1_c(const int16_t *input, tran_low_t *output, int stride) { @@ -98,12 +98,12 @@ void vp9_fdct4x4_c(const int16_t *input, tran_low_t *output, int stride) { step[3] = input[0] - input[3]; temp1 = (step[0] + step[1]) * cospi_16_64; temp2 = (step[0] - step[1]) * cospi_16_64; - out[0] = fdct_round_shift(temp1); - out[2] = fdct_round_shift(temp2); + out[0] = (tran_low_t)fdct_round_shift(temp1); + out[2] = (tran_low_t)fdct_round_shift(temp2); temp1 = step[2] * cospi_24_64 + step[3] * cospi_8_64; temp2 = -step[2] * cospi_8_64 + step[3] * cospi_24_64; - out[1] = fdct_round_shift(temp1); - out[3] = fdct_round_shift(temp2); + out[1] = (tran_low_t)fdct_round_shift(temp1); + out[3] = (tran_low_t)fdct_round_shift(temp2); // Do next column (which is a transposed row in second/horizontal pass) in_pass0++; in++; @@ -157,10 +157,10 @@ static void fadst4(const tran_low_t *input, tran_low_t *output) { s3 = x2 - x0 + x3; // 1-D transform scaling factor is sqrt(2). - output[0] = fdct_round_shift(s0); - output[1] = fdct_round_shift(s1); - output[2] = fdct_round_shift(s2); - output[3] = fdct_round_shift(s3); + output[0] = (tran_low_t)fdct_round_shift(s0); + output[1] = (tran_low_t)fdct_round_shift(s1); + output[2] = (tran_low_t)fdct_round_shift(s2); + output[3] = (tran_low_t)fdct_round_shift(s3); } static const transform_2d FHT_4[] = { @@ -227,16 +227,16 @@ static void fdct8(const tran_low_t *input, tran_low_t *output) { t1 = (x0 - x1) * cospi_16_64; t2 = x2 * cospi_24_64 + x3 * cospi_8_64; t3 = -x2 * cospi_8_64 + x3 * cospi_24_64; - output[0] = fdct_round_shift(t0); - output[2] = fdct_round_shift(t2); - output[4] = fdct_round_shift(t1); - output[6] = fdct_round_shift(t3); + output[0] = (tran_low_t)fdct_round_shift(t0); + output[2] = (tran_low_t)fdct_round_shift(t2); + output[4] = (tran_low_t)fdct_round_shift(t1); + output[6] = (tran_low_t)fdct_round_shift(t3); // Stage 2 t0 = (s6 - s5) * cospi_16_64; t1 = (s6 + s5) * cospi_16_64; - t2 = fdct_round_shift(t0); - t3 = fdct_round_shift(t1); + t2 = (tran_low_t)fdct_round_shift(t0); + t3 = (tran_low_t)fdct_round_shift(t1); // Stage 3 x0 = s4 + t2; @@ -249,10 +249,10 @@ static void fdct8(const tran_low_t *input, tran_low_t *output) { t1 = x1 * cospi_12_64 + x2 * cospi_20_64; t2 = x2 * cospi_12_64 + x1 * -cospi_20_64; t3 = x3 * cospi_28_64 + x0 * -cospi_4_64; - output[1] = fdct_round_shift(t0); - output[3] = fdct_round_shift(t2); - output[5] = fdct_round_shift(t1); - output[7] = fdct_round_shift(t3); + output[1] = (tran_low_t)fdct_round_shift(t0); + output[3] = (tran_low_t)fdct_round_shift(t2); + output[5] = (tran_low_t)fdct_round_shift(t1); + output[7] = (tran_low_t)fdct_round_shift(t3); } void vp9_fdct8x8_1_c(const int16_t *input, tran_low_t *output, int stride) { @@ -298,10 +298,10 @@ void vp9_fdct8x8_c(const int16_t *input, tran_low_t *final_output, int stride) { t1 = (x0 - x1) * cospi_16_64; t2 = x2 * cospi_24_64 + x3 * cospi_8_64; t3 = -x2 * cospi_8_64 + x3 * cospi_24_64; - output[0 * 8] = fdct_round_shift(t0); - output[2 * 8] = fdct_round_shift(t2); - output[4 * 8] = fdct_round_shift(t1); - output[6 * 8] = fdct_round_shift(t3); + output[0 * 8] = (tran_low_t)fdct_round_shift(t0); + output[2 * 8] = (tran_low_t)fdct_round_shift(t2); + output[4 * 8] = (tran_low_t)fdct_round_shift(t1); + output[6 * 8] = (tran_low_t)fdct_round_shift(t3); // Stage 2 t0 = (s6 - s5) * cospi_16_64; @@ -320,10 +320,10 @@ void vp9_fdct8x8_c(const int16_t *input, tran_low_t *final_output, int stride) { t1 = x1 * cospi_12_64 + x2 * cospi_20_64; t2 = x2 * cospi_12_64 + x1 * -cospi_20_64; t3 = x3 * cospi_28_64 + x0 * -cospi_4_64; - output[1 * 8] = fdct_round_shift(t0); - output[3 * 8] = fdct_round_shift(t2); - output[5 * 8] = fdct_round_shift(t1); - output[7 * 8] = fdct_round_shift(t3); + output[1 * 8] = (tran_low_t)fdct_round_shift(t0); + output[3 * 8] = (tran_low_t)fdct_round_shift(t2); + output[5 * 8] = (tran_low_t)fdct_round_shift(t1); + output[7 * 8] = (tran_low_t)fdct_round_shift(t3); input++; output++; } @@ -434,10 +434,10 @@ void vp9_fdct16x16_c(const int16_t *input, tran_low_t *output, int stride) { t1 = (x0 - x1) * cospi_16_64; t2 = x3 * cospi_8_64 + x2 * cospi_24_64; t3 = x3 * cospi_24_64 - x2 * cospi_8_64; - out[0] = fdct_round_shift(t0); - out[4] = fdct_round_shift(t2); - out[8] = fdct_round_shift(t1); - out[12] = fdct_round_shift(t3); + out[0] = (tran_low_t)fdct_round_shift(t0); + out[4] = (tran_low_t)fdct_round_shift(t2); + out[8] = (tran_low_t)fdct_round_shift(t1); + out[12] = (tran_low_t)fdct_round_shift(t3); // Stage 2 t0 = (s6 - s5) * cospi_16_64; @@ -456,10 +456,10 @@ void vp9_fdct16x16_c(const int16_t *input, tran_low_t *output, int stride) { t1 = x1 * cospi_12_64 + x2 * cospi_20_64; t2 = x2 * cospi_12_64 + x1 * -cospi_20_64; t3 = x3 * cospi_28_64 + x0 * -cospi_4_64; - out[2] = fdct_round_shift(t0); - out[6] = fdct_round_shift(t2); - out[10] = fdct_round_shift(t1); - out[14] = fdct_round_shift(t3); + out[2] = (tran_low_t)fdct_round_shift(t0); + out[6] = (tran_low_t)fdct_round_shift(t2); + out[10] = (tran_low_t)fdct_round_shift(t1); + out[14] = (tran_low_t)fdct_round_shift(t3); } // Work on the next eight values; step1 -> odd_results { @@ -502,20 +502,20 @@ void vp9_fdct16x16_c(const int16_t *input, tran_low_t *output, int stride) { // step 6 temp1 = step1[0] * cospi_30_64 + step1[7] * cospi_2_64; temp2 = step1[1] * cospi_14_64 + step1[6] * cospi_18_64; - out[1] = fdct_round_shift(temp1); - out[9] = fdct_round_shift(temp2); + out[1] = (tran_low_t)fdct_round_shift(temp1); + out[9] = (tran_low_t)fdct_round_shift(temp2); temp1 = step1[2] * cospi_22_64 + step1[5] * cospi_10_64; temp2 = step1[3] * cospi_6_64 + step1[4] * cospi_26_64; - out[5] = fdct_round_shift(temp1); - out[13] = fdct_round_shift(temp2); + out[5] = (tran_low_t)fdct_round_shift(temp1); + out[13] = (tran_low_t)fdct_round_shift(temp2); temp1 = step1[3] * -cospi_26_64 + step1[4] * cospi_6_64; temp2 = step1[2] * -cospi_10_64 + step1[5] * cospi_22_64; - out[3] = fdct_round_shift(temp1); - out[11] = fdct_round_shift(temp2); + out[3] = (tran_low_t)fdct_round_shift(temp1); + out[11] = (tran_low_t)fdct_round_shift(temp2); temp1 = step1[1] * -cospi_18_64 + step1[6] * cospi_14_64; temp2 = step1[0] * -cospi_2_64 + step1[7] * cospi_30_64; - out[7] = fdct_round_shift(temp1); - out[15] = fdct_round_shift(temp2); + out[7] = (tran_low_t)fdct_round_shift(temp1); + out[15] = (tran_low_t)fdct_round_shift(temp2); } // Do next column (which is a transposed row in second/horizontal pass) in++; @@ -589,14 +589,14 @@ static void fadst8(const tran_low_t *input, tran_low_t *output) { x6 = fdct_round_shift(s6); x7 = fdct_round_shift(s7); - output[0] = x0; - output[1] = - x4; - output[2] = x6; - output[3] = - x2; - output[4] = x3; - output[5] = - x7; - output[6] = x5; - output[7] = - x1; + output[0] = (tran_low_t)x0; + output[1] = (tran_low_t)-x4; + output[2] = (tran_low_t)x6; + output[3] = (tran_low_t)-x2; + output[4] = (tran_low_t)x3; + output[5] = (tran_low_t)-x7; + output[6] = (tran_low_t)x5; + output[7] = (tran_low_t)-x1; } static const transform_2d FHT_8[] = { @@ -659,10 +659,10 @@ void vp9_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride) { c1 = e1 - c1; a1 -= c1; d1 += b1; - op[0] = a1; - op[4] = c1; - op[8] = d1; - op[12] = b1; + op[0] = (tran_low_t)a1; + op[4] = (tran_low_t)c1; + op[8] = (tran_low_t)d1; + op[12] = (tran_low_t)b1; ip_pass0++; op++; @@ -683,10 +683,10 @@ void vp9_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride) { c1 = e1 - c1; a1 -= c1; d1 += b1; - op[0] = a1 * UNIT_QUANT_FACTOR; - op[1] = c1 * UNIT_QUANT_FACTOR; - op[2] = d1 * UNIT_QUANT_FACTOR; - op[3] = b1 * UNIT_QUANT_FACTOR; + op[0] = (tran_low_t)(a1 * UNIT_QUANT_FACTOR); + op[1] = (tran_low_t)(c1 * UNIT_QUANT_FACTOR); + op[2] = (tran_low_t)(d1 * UNIT_QUANT_FACTOR); + op[3] = (tran_low_t)(b1 * UNIT_QUANT_FACTOR); ip += 4; op += 4; @@ -745,10 +745,10 @@ static void fdct16(const tran_low_t in[16], tran_low_t out[16]) { t1 = (x0 - x1) * cospi_16_64; t2 = x3 * cospi_8_64 + x2 * cospi_24_64; t3 = x3 * cospi_24_64 - x2 * cospi_8_64; - out[0] = fdct_round_shift(t0); - out[4] = fdct_round_shift(t2); - out[8] = fdct_round_shift(t1); - out[12] = fdct_round_shift(t3); + out[0] = (tran_low_t)fdct_round_shift(t0); + out[4] = (tran_low_t)fdct_round_shift(t2); + out[8] = (tran_low_t)fdct_round_shift(t1); + out[12] = (tran_low_t)fdct_round_shift(t3); // Stage 2 t0 = (s6 - s5) * cospi_16_64; @@ -767,10 +767,10 @@ static void fdct16(const tran_low_t in[16], tran_low_t out[16]) { t1 = x1 * cospi_12_64 + x2 * cospi_20_64; t2 = x2 * cospi_12_64 + x1 * -cospi_20_64; t3 = x3 * cospi_28_64 + x0 * -cospi_4_64; - out[2] = fdct_round_shift(t0); - out[6] = fdct_round_shift(t2); - out[10] = fdct_round_shift(t1); - out[14] = fdct_round_shift(t3); + out[2] = (tran_low_t)fdct_round_shift(t0); + out[6] = (tran_low_t)fdct_round_shift(t2); + out[10] = (tran_low_t)fdct_round_shift(t1); + out[14] = (tran_low_t)fdct_round_shift(t3); } // step 2 @@ -816,23 +816,23 @@ static void fdct16(const tran_low_t in[16], tran_low_t out[16]) { // step 6 temp1 = step1[0] * cospi_30_64 + step1[7] * cospi_2_64; temp2 = step1[1] * cospi_14_64 + step1[6] * cospi_18_64; - out[1] = fdct_round_shift(temp1); - out[9] = fdct_round_shift(temp2); + out[1] = (tran_low_t)fdct_round_shift(temp1); + out[9] = (tran_low_t)fdct_round_shift(temp2); temp1 = step1[2] * cospi_22_64 + step1[5] * cospi_10_64; temp2 = step1[3] * cospi_6_64 + step1[4] * cospi_26_64; - out[5] = fdct_round_shift(temp1); - out[13] = fdct_round_shift(temp2); + out[5] = (tran_low_t)fdct_round_shift(temp1); + out[13] = (tran_low_t)fdct_round_shift(temp2); temp1 = step1[3] * -cospi_26_64 + step1[4] * cospi_6_64; temp2 = step1[2] * -cospi_10_64 + step1[5] * cospi_22_64; - out[3] = fdct_round_shift(temp1); - out[11] = fdct_round_shift(temp2); + out[3] = (tran_low_t)fdct_round_shift(temp1); + out[11] = (tran_low_t)fdct_round_shift(temp2); temp1 = step1[1] * -cospi_18_64 + step1[6] * cospi_14_64; temp2 = step1[0] * -cospi_2_64 + step1[7] * cospi_30_64; - out[7] = fdct_round_shift(temp1); - out[15] = fdct_round_shift(temp2); + out[7] = (tran_low_t)fdct_round_shift(temp1); + out[15] = (tran_low_t)fdct_round_shift(temp2); } static void fadst16(const tran_low_t *input, tran_low_t *output) { @@ -980,22 +980,22 @@ static void fadst16(const tran_low_t *input, tran_low_t *output) { x14 = fdct_round_shift(s14); x15 = fdct_round_shift(s15); - output[0] = x0; - output[1] = - x8; - output[2] = x12; - output[3] = - x4; - output[4] = x6; - output[5] = x14; - output[6] = x10; - output[7] = x2; - output[8] = x3; - output[9] = x11; - output[10] = x15; - output[11] = x7; - output[12] = x5; - output[13] = - x13; - output[14] = x9; - output[15] = - x1; + output[0] = (tran_low_t)x0; + output[1] = (tran_low_t)-x8; + output[2] = (tran_low_t)x12; + output[3] = (tran_low_t)-x4; + output[4] = (tran_low_t)x6; + output[5] = (tran_low_t)x14; + output[6] = (tran_low_t)x10; + output[7] = (tran_low_t)x2; + output[8] = (tran_low_t)x3; + output[9] = (tran_low_t)x11; + output[10] = (tran_low_t)x15; + output[11] = (tran_low_t)x7; + output[12] = (tran_low_t)x5; + output[13] = (tran_low_t)-x13; + output[14] = (tran_low_t)x9; + output[15] = (tran_low_t)-x1; } static const transform_2d FHT_16[] = { @@ -1404,7 +1404,8 @@ void vp9_fdct32x32_c(const int16_t *input, tran_low_t *out, int stride) { temp_in[j] = output[j + i * 32]; fdct32(temp_in, temp_out, 0); for (j = 0; j < 32; ++j) - out[j + i * 32] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2; + out[j + i * 32] = + (tran_low_t)((temp_out[j] + 1 + (temp_out[j] < 0)) >> 2); } } @@ -1435,7 +1436,7 @@ void vp9_fdct32x32_rd_c(const int16_t *input, tran_low_t *out, int stride) { temp_in[j] = output[j + i * 32]; fdct32(temp_in, temp_out, 1); for (j = 0; j < 32; ++j) - out[j + i * 32] = temp_out[j]; + out[j + i * 32] = (tran_low_t)temp_out[j]; } } diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c index 2ba1f922b..ce0ae79a0 100644 --- a/vp9/encoder/vp9_quantize.c +++ b/vp9/encoder/vp9_quantize.c @@ -56,7 +56,7 @@ void vp9_highbd_quantize_dc(const tran_low_t *coeff_ptr, int skip_block, const int64_t tmp = (clamp(abs_coeff + round_ptr[rc != 0], INT32_MIN, INT32_MAX) * quant) >> 16; - qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign; + qcoeff_ptr[rc] = (tran_low_t)((tmp ^ coeff_sign) - coeff_sign); dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr; if (tmp) eob = 0; @@ -107,7 +107,7 @@ void vp9_highbd_quantize_dc_32x32(const tran_low_t *coeff_ptr, const int64_t tmp = (clamp(abs_coeff + round_ptr[rc != 0], INT32_MIN, INT32_MAX) * quant) >> 15; - qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign; + qcoeff_ptr[rc] = (tran_low_t)((tmp ^ coeff_sign) - coeff_sign); dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr / 2; if (tmp) eob = 0; @@ -197,7 +197,7 @@ void vp9_highbd_quantize_fp_c(const tran_low_t *coeff_ptr, (clamp(abs_coeff + round_ptr[rc != 0], INT32_MIN, INT32_MAX) * quant_ptr[rc != 0]) >> 16; - qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign; + qcoeff_ptr[rc] = (tran_low_t)((tmp ^ coeff_sign) - coeff_sign); dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0]; if (tmp) @@ -284,7 +284,7 @@ void vp9_highbd_quantize_fp_32x32_c(const tran_low_t *coeff_ptr, tmp = clamp(abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1), INT32_MIN, INT32_MAX); tmp = (tmp * quant_ptr[rc != 0]) >> 15; - qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign; + qcoeff_ptr[rc] = (tran_low_t)((tmp ^ coeff_sign) - coeff_sign); dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2; } @@ -393,7 +393,7 @@ void vp9_highbd_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, INT32_MIN, INT32_MAX); tmp = ((((tmp * quant_ptr[rc != 0]) >> 16) + tmp) * quant_shift_ptr[rc != 0]) >> 16; // quantization - qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign; + qcoeff_ptr[rc] = (tran_low_t)((tmp ^ coeff_sign) - coeff_sign); dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0]; if (tmp) @@ -510,7 +510,7 @@ void vp9_highbd_quantize_b_32x32_c(const tran_low_t *coeff_ptr, tmp = ((((tmp * quant_ptr[rc != 0]) >> 16) + tmp) * quant_shift_ptr[rc != 0]) >> 15; - qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign; + qcoeff_ptr[rc] = (tran_low_t)((tmp ^ coeff_sign) - coeff_sign); dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2; if (tmp) diff --git a/vp9/encoder/vp9_variance.c b/vp9/encoder/vp9_variance.c index 4555bde1e..fea5f3351 100644 --- a/vp9/encoder/vp9_variance.c +++ b/vp9/encoder/vp9_variance.c @@ -298,8 +298,8 @@ void highbd_variance(const uint8_t *a8, int a_stride, uint64_t sse_long = 0; uint64_t sum_long = 0; highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long); - *sse = sse_long; - *sum = sum_long; + *sse = (unsigned int)sse_long; + *sum = (int)sum_long; } void highbd_10_variance(const uint8_t *a8, int a_stride, @@ -309,8 +309,8 @@ void highbd_10_variance(const uint8_t *a8, int a_stride, uint64_t sse_long = 0; uint64_t sum_long = 0; highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long); - *sum = ROUND_POWER_OF_TWO(sum_long, 2); - *sse = ROUND_POWER_OF_TWO(sse_long, 4); + *sum = (int)ROUND_POWER_OF_TWO(sum_long, 2); + *sse = (unsigned int)ROUND_POWER_OF_TWO(sse_long, 4); } void highbd_12_variance(const uint8_t *a8, int a_stride, @@ -320,8 +320,8 @@ void highbd_12_variance(const uint8_t *a8, int a_stride, uint64_t sse_long = 0; uint64_t sum_long = 0; highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long); - *sum = ROUND_POWER_OF_TWO(sum_long, 4); - *sse = ROUND_POWER_OF_TWO(sse_long, 8); + *sum = (int)ROUND_POWER_OF_TWO(sum_long, 4); + *sse = (unsigned int)ROUND_POWER_OF_TWO(sse_long, 8); } static void highbd_var_filter_block2d_bil_first_pass( diff --git a/vp9/encoder/x86/vp9_dct32x32_avx2.c b/vp9/encoder/x86/vp9_dct32x32_avx2.c index 9ea22fed2..66827ad80 100644 --- a/vp9/encoder/x86/vp9_dct32x32_avx2.c +++ b/vp9/encoder/x86/vp9_dct32x32_avx2.c @@ -13,13 +13,14 @@ #include "vpx_ports/mem.h" #define pair256_set_epi16(a, b) \ - _mm256_set_epi16(b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a) + _mm256_set_epi16((int16_t)(b), (int16_t)(a), (int16_t)(b), (int16_t)(a), \ + (int16_t)(b), (int16_t)(a), (int16_t)(b), (int16_t)(a), \ + (int16_t)(b), (int16_t)(a), (int16_t)(b), (int16_t)(a), \ + (int16_t)(b), (int16_t)(a), (int16_t)(b), (int16_t)(a)) #define pair256_set_epi32(a, b) \ - _mm256_set_epi32(b, a, b, a, b, a, b, a) - - - + _mm256_set_epi32((int)(b), (int)(a), (int)(b), (int)(a), \ + (int)(b), (int)(a), (int)(b), (int)(a)) #if FDCT32x32_HIGH_PRECISION static INLINE __m256i k_madd_epi32_avx2(__m256i a, __m256i b) { @@ -50,7 +51,7 @@ void FDCT32x32_2D_AVX2(const int16_t *input, // When we use them, in one case, they are all the same. In all others // it's a pair of them that we need to repeat four times. This is done // by constructing the 32 bit constant corresponding to that pair. - const __m256i k__cospi_p16_p16 = _mm256_set1_epi16(+cospi_16_64); + const __m256i k__cospi_p16_p16 = _mm256_set1_epi16((int16_t)cospi_16_64); const __m256i k__cospi_p16_m16 = pair256_set_epi16(+cospi_16_64, -cospi_16_64); const __m256i k__cospi_m08_p24 = pair256_set_epi16(-cospi_8_64, cospi_24_64); const __m256i k__cospi_m24_m08 = pair256_set_epi16(-cospi_24_64, -cospi_8_64); diff --git a/vp9/encoder/x86/vp9_dct32x32_sse2.c b/vp9/encoder/x86/vp9_dct32x32_sse2.c index 42fdbbdc5..dc36cc471 100644 --- a/vp9/encoder/x86/vp9_dct32x32_sse2.c +++ b/vp9/encoder/x86/vp9_dct32x32_sse2.c @@ -13,7 +13,7 @@ #include "vpx_ports/mem.h" #define pair_set_epi32(a, b) \ - _mm_set_epi32(b, a, b, a) + _mm_set_epi32((int)(b), (int)(a), (int)(b), (int)(a)) #if FDCT32x32_HIGH_PRECISION static INLINE __m128i k_madd_epi32(__m128i a, __m128i b) { @@ -44,7 +44,7 @@ void FDCT32x32_2D(const int16_t *input, // When we use them, in one case, they are all the same. In all others // it's a pair of them that we need to repeat four times. This is done // by constructing the 32 bit constant corresponding to that pair. - const __m128i k__cospi_p16_p16 = _mm_set1_epi16(+cospi_16_64); + const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64); const __m128i k__cospi_p16_m16 = pair_set_epi16(+cospi_16_64, -cospi_16_64); const __m128i k__cospi_m08_p24 = pair_set_epi16(-cospi_8_64, cospi_24_64); const __m128i k__cospi_m24_m08 = pair_set_epi16(-cospi_24_64, -cospi_8_64); diff --git a/vp9/encoder/x86/vp9_dct_sse2.c b/vp9/encoder/x86/vp9_dct_sse2.c index e799951c2..676a996f0 100644 --- a/vp9/encoder/x86/vp9_dct_sse2.c +++ b/vp9/encoder/x86/vp9_dct_sse2.c @@ -58,38 +58,38 @@ void vp9_fdct4x4_sse2(const int16_t *input, int16_t *output, int stride) { // These are the coefficients used for the multiplies. // In the comments, pN means cos(N pi /64) and mN is -cos(N pi /64), // where cospi_N_64 = cos(N pi /64) - const __m128i k__cospi_A = _mm_setr_epi16(cospi_16_64, cospi_16_64, - cospi_16_64, cospi_16_64, - cospi_16_64, -cospi_16_64, - cospi_16_64, -cospi_16_64); - const __m128i k__cospi_B = _mm_setr_epi16(cospi_16_64, -cospi_16_64, - cospi_16_64, -cospi_16_64, - cospi_16_64, cospi_16_64, - cospi_16_64, cospi_16_64); - const __m128i k__cospi_C = _mm_setr_epi16(cospi_8_64, cospi_24_64, - cospi_8_64, cospi_24_64, - cospi_24_64, -cospi_8_64, - cospi_24_64, -cospi_8_64); - const __m128i k__cospi_D = _mm_setr_epi16(cospi_24_64, -cospi_8_64, - cospi_24_64, -cospi_8_64, - cospi_8_64, cospi_24_64, - cospi_8_64, cospi_24_64); - const __m128i k__cospi_E = _mm_setr_epi16(cospi_16_64, cospi_16_64, - cospi_16_64, cospi_16_64, - cospi_16_64, cospi_16_64, - cospi_16_64, cospi_16_64); - const __m128i k__cospi_F = _mm_setr_epi16(cospi_16_64, -cospi_16_64, - cospi_16_64, -cospi_16_64, - cospi_16_64, -cospi_16_64, - cospi_16_64, -cospi_16_64); - const __m128i k__cospi_G = _mm_setr_epi16(cospi_8_64, cospi_24_64, - cospi_8_64, cospi_24_64, - -cospi_8_64, -cospi_24_64, - -cospi_8_64, -cospi_24_64); - const __m128i k__cospi_H = _mm_setr_epi16(cospi_24_64, -cospi_8_64, - cospi_24_64, -cospi_8_64, - -cospi_24_64, cospi_8_64, - -cospi_24_64, cospi_8_64); + const __m128i k__cospi_A = _mm_setr_epi16((int16_t)cospi_16_64, (int16_t)cospi_16_64, + (int16_t)cospi_16_64, (int16_t)cospi_16_64, + (int16_t)cospi_16_64, (int16_t)-cospi_16_64, + (int16_t)cospi_16_64, (int16_t)-cospi_16_64); + const __m128i k__cospi_B = _mm_setr_epi16((int16_t)cospi_16_64, (int16_t)-cospi_16_64, + (int16_t)cospi_16_64, (int16_t)-cospi_16_64, + (int16_t)cospi_16_64, (int16_t)cospi_16_64, + (int16_t)cospi_16_64, (int16_t)cospi_16_64); + const __m128i k__cospi_C = _mm_setr_epi16((int16_t)cospi_8_64, (int16_t)cospi_24_64, + (int16_t)cospi_8_64, (int16_t)cospi_24_64, + (int16_t)cospi_24_64, (int16_t)-cospi_8_64, + (int16_t)cospi_24_64, (int16_t)-cospi_8_64); + const __m128i k__cospi_D = _mm_setr_epi16((int16_t)cospi_24_64, (int16_t)-cospi_8_64, + (int16_t)cospi_24_64, (int16_t)-cospi_8_64, + (int16_t)cospi_8_64, (int16_t)cospi_24_64, + (int16_t)cospi_8_64, (int16_t)cospi_24_64); + const __m128i k__cospi_E = _mm_setr_epi16((int16_t)cospi_16_64, (int16_t)cospi_16_64, + (int16_t)cospi_16_64, (int16_t)cospi_16_64, + (int16_t)cospi_16_64, (int16_t)cospi_16_64, + (int16_t)cospi_16_64, (int16_t)cospi_16_64); + const __m128i k__cospi_F = _mm_setr_epi16((int16_t)cospi_16_64, (int16_t)-cospi_16_64, + (int16_t)cospi_16_64, (int16_t)-cospi_16_64, + (int16_t)cospi_16_64, (int16_t)-cospi_16_64, + (int16_t)cospi_16_64, (int16_t)-cospi_16_64); + const __m128i k__cospi_G = _mm_setr_epi16((int16_t)cospi_8_64, (int16_t)cospi_24_64, + (int16_t)cospi_8_64, (int16_t)cospi_24_64, + (int16_t)-cospi_8_64, (int16_t)-cospi_24_64, + (int16_t)-cospi_8_64, (int16_t)-cospi_24_64); + const __m128i k__cospi_H = _mm_setr_epi16((int16_t)cospi_24_64, (int16_t)-cospi_8_64, + (int16_t)cospi_24_64, (int16_t)-cospi_8_64, + (int16_t)-cospi_24_64, (int16_t)cospi_8_64, + (int16_t)-cospi_24_64, (int16_t)cospi_8_64); const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING); // This second rounding constant saves doing some extra adds at the end @@ -296,7 +296,7 @@ static INLINE void transpose_4x4(__m128i *res) { } void fdct4_sse2(__m128i *in) { - const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64); + const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64); const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64); const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64); @@ -333,7 +333,7 @@ void fadst4_sse2(__m128i *in) { const __m128i k__sinpi_p04_m01 = pair_set_epi16(sinpi_4_9, -sinpi_1_9); const __m128i k__sinpi_p03_p04 = pair_set_epi16(sinpi_3_9, sinpi_4_9); const __m128i k__sinpi_m03_p02 = pair_set_epi16(-sinpi_3_9, sinpi_2_9); - const __m128i k__sinpi_p03_p03 = _mm_set1_epi16(sinpi_3_9); + const __m128i k__sinpi_p03_p03 = _mm_set1_epi16((int16_t)sinpi_3_9); const __m128i kZero = _mm_set1_epi16(0); const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING); __m128i u[8], v[8]; @@ -454,7 +454,7 @@ void vp9_fdct8x8_sse2(const int16_t *input, int16_t *output, int stride) { // When we use them, in one case, they are all the same. In all others // it's a pair of them that we need to repeat four times. This is done // by constructing the 32 bit constant corresponding to that pair. - const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64); + const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64); const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); const __m128i k__cospi_p24_p08 = pair_set_epi16(cospi_24_64, cospi_8_64); const __m128i k__cospi_m08_p24 = pair_set_epi16(-cospi_8_64, cospi_24_64); @@ -784,7 +784,7 @@ static INLINE void write_buffer_8x8(int16_t *output, __m128i *res, int stride) { void fdct8_sse2(__m128i *in) { // constants - const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64); + const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64); const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); const __m128i k__cospi_p24_p08 = pair_set_epi16(cospi_24_64, cospi_8_64); const __m128i k__cospi_m08_p24 = pair_set_epi16(-cospi_8_64, cospi_24_64); @@ -936,7 +936,7 @@ void fadst8_sse2(__m128i *in) { const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64); const __m128i k__cospi_m24_p08 = pair_set_epi16(-cospi_24_64, cospi_8_64); const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); - const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64); + const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64); const __m128i k__const_0 = _mm_set1_epi16(0); const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING); @@ -1271,7 +1271,7 @@ void vp9_fdct16x16_sse2(const int16_t *input, int16_t *output, int stride) { // When we use them, in one case, they are all the same. In all others // it's a pair of them that we need to repeat four times. This is done // by constructing the 32 bit constant corresponding to that pair. - const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64); + const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64); const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); const __m128i k__cospi_p24_p08 = pair_set_epi16(cospi_24_64, cospi_8_64); const __m128i k__cospi_p08_m24 = pair_set_epi16(cospi_8_64, -cospi_24_64); @@ -1914,7 +1914,7 @@ static INLINE void right_shift_16x16(__m128i *res0, __m128i *res1) { void fdct16_8col(__m128i *in) { // perform 16x16 1-D DCT for 8 columns __m128i i[8], s[8], p[8], t[8], u[16], v[16]; - const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64); + const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64); const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); const __m128i k__cospi_m16_p16 = pair_set_epi16(-cospi_16_64, cospi_16_64); const __m128i k__cospi_p24_p08 = pair_set_epi16(cospi_24_64, cospi_8_64); @@ -2261,8 +2261,8 @@ void fadst16_8col(__m128i *in) { const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64); const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64); const __m128i k__cospi_m24_p08 = pair_set_epi16(-cospi_24_64, cospi_8_64); - const __m128i k__cospi_m16_m16 = _mm_set1_epi16(-cospi_16_64); - const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64); + const __m128i k__cospi_m16_m16 = _mm_set1_epi16((int16_t)-cospi_16_64); + const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64); const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); const __m128i k__cospi_m16_p16 = pair_set_epi16(-cospi_16_64, cospi_16_64); const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING); -- 2.40.0