From: Alex Converse Date: Thu, 19 Nov 2015 23:57:57 +0000 (-0800) Subject: Tweak casts on vpx_sub_pixel_variance to avoid implicit overflow. X-Git-Tag: v1.6.0~184 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=a68b24fdeeab207219ac4631a7ddd46e6f13fbbd;p=libvpx Tweak casts on vpx_sub_pixel_variance to avoid implicit overflow. Change-Id: I481eb271b082fa3497b0283f37d9b4d1f6de270c --- diff --git a/vpx_dsp/x86/variance_sse2.c b/vpx_dsp/x86/variance_sse2.c index b14c548ac..43f4603ca 100644 --- a/vpx_dsp/x86/variance_sse2.c +++ b/vpx_dsp/x86/variance_sse2.c @@ -329,7 +329,7 @@ DECLS(ssse3, ssse3); #undef DECLS #undef DECL -#define FN(w, h, wf, wlog2, hlog2, opt, cast) \ +#define FN(w, h, wf, wlog2, hlog2, opt, cast_prod, cast) \ unsigned int vpx_sub_pixel_variance##w##x##h##_##opt(const uint8_t *src, \ int src_stride, \ int x_offset, \ @@ -365,23 +365,23 @@ unsigned int vpx_sub_pixel_variance##w##x##h##_##opt(const uint8_t *src, \ } \ } \ *sse_ptr = sse; \ - return sse - ((cast se * se) >> (wlog2 + hlog2)); \ + return sse - (cast_prod (cast se * se) >> (wlog2 + hlog2)); \ } #define FNS(opt1, opt2) \ -FN(64, 64, 16, 6, 6, opt1, (int64_t)); \ -FN(64, 32, 16, 6, 5, opt1, (int64_t)); \ -FN(32, 64, 16, 5, 6, opt1, (int64_t)); \ -FN(32, 32, 16, 5, 5, opt1, (int64_t)); \ -FN(32, 16, 16, 5, 4, opt1, (int64_t)); \ -FN(16, 32, 16, 4, 5, opt1, (int64_t)); \ -FN(16, 16, 16, 4, 4, opt1, (uint32_t)); \ -FN(16, 8, 16, 4, 3, opt1, (uint32_t)); \ -FN(8, 16, 8, 3, 4, opt1, (uint32_t)); \ -FN(8, 8, 8, 3, 3, opt1, (uint32_t)); \ -FN(8, 4, 8, 3, 2, opt1, (uint32_t)); \ -FN(4, 8, 4, 2, 3, opt2, (uint32_t)); \ -FN(4, 4, 4, 2, 2, opt2, (uint32_t)) +FN(64, 64, 16, 6, 6, opt1, (int64_t), (int64_t)); \ +FN(64, 32, 16, 6, 5, opt1, (int64_t), (int64_t)); \ +FN(32, 64, 16, 5, 6, opt1, (int64_t), (int64_t)); \ +FN(32, 32, 16, 5, 5, opt1, (int64_t), (int64_t)); \ +FN(32, 16, 16, 5, 4, opt1, (int64_t), (int64_t)); \ +FN(16, 32, 16, 4, 5, opt1, (int64_t), (int64_t)); \ +FN(16, 16, 16, 4, 4, opt1, (uint32_t), (int64_t)); \ +FN(16, 8, 16, 4, 3, opt1, (int32_t), (int32_t)); \ +FN(8, 16, 8, 3, 4, opt1, (int32_t), (int32_t)); \ +FN(8, 8, 8, 3, 3, opt1, (int32_t), (int32_t)); \ +FN(8, 4, 8, 3, 2, opt1, (int32_t), (int32_t)); \ +FN(4, 8, 4, 2, 3, opt2, (int32_t), (int32_t)); \ +FN(4, 4, 4, 2, 2, opt2, (int32_t), (int32_t)) FNS(sse2, sse); FNS(ssse3, ssse3); @@ -410,7 +410,7 @@ DECLS(ssse3, ssse3); #undef DECL #undef DECLS -#define FN(w, h, wf, wlog2, hlog2, opt, cast) \ +#define FN(w, h, wf, wlog2, hlog2, opt, cast_prod, cast) \ unsigned int vpx_sub_pixel_avg_variance##w##x##h##_##opt(const uint8_t *src, \ int src_stride, \ int x_offset, \ @@ -451,23 +451,23 @@ unsigned int vpx_sub_pixel_avg_variance##w##x##h##_##opt(const uint8_t *src, \ } \ } \ *sseptr = sse; \ - return sse - ((cast se * se) >> (wlog2 + hlog2)); \ + return sse - (cast_prod (cast se * se) >> (wlog2 + hlog2)); \ } #define FNS(opt1, opt2) \ -FN(64, 64, 16, 6, 6, opt1, (int64_t)); \ -FN(64, 32, 16, 6, 5, opt1, (int64_t)); \ -FN(32, 64, 16, 5, 6, opt1, (int64_t)); \ -FN(32, 32, 16, 5, 5, opt1, (int64_t)); \ -FN(32, 16, 16, 5, 4, opt1, (int64_t)); \ -FN(16, 32, 16, 4, 5, opt1, (int64_t)); \ -FN(16, 16, 16, 4, 4, opt1, (uint32_t)); \ -FN(16, 8, 16, 4, 3, opt1, (uint32_t)); \ -FN(8, 16, 8, 3, 4, opt1, (uint32_t)); \ -FN(8, 8, 8, 3, 3, opt1, (uint32_t)); \ -FN(8, 4, 8, 3, 2, opt1, (uint32_t)); \ -FN(4, 8, 4, 2, 3, opt2, (uint32_t)); \ -FN(4, 4, 4, 2, 2, opt2, (uint32_t)) +FN(64, 64, 16, 6, 6, opt1, (int64_t), (int64_t)); \ +FN(64, 32, 16, 6, 5, opt1, (int64_t), (int64_t)); \ +FN(32, 64, 16, 5, 6, opt1, (int64_t), (int64_t)); \ +FN(32, 32, 16, 5, 5, opt1, (int64_t), (int64_t)); \ +FN(32, 16, 16, 5, 4, opt1, (int64_t), (int64_t)); \ +FN(16, 32, 16, 4, 5, opt1, (int64_t), (int64_t)); \ +FN(16, 16, 16, 4, 4, opt1, (uint32_t), (int64_t)); \ +FN(16, 8, 16, 4, 3, opt1, (uint32_t), (int32_t)); \ +FN(8, 16, 8, 3, 4, opt1, (uint32_t), (int32_t)); \ +FN(8, 8, 8, 3, 3, opt1, (uint32_t), (int32_t)); \ +FN(8, 4, 8, 3, 2, opt1, (uint32_t), (int32_t)); \ +FN(4, 8, 4, 2, 3, opt2, (uint32_t), (int32_t)); \ +FN(4, 4, 4, 2, 2, opt2, (uint32_t), (int32_t)) FNS(sse2, sse); FNS(ssse3, ssse3);