From: DRC Date: Mon, 29 Feb 2016 23:21:02 +0000 (-0600) Subject: Win/x64: Fix improper callee save of xmm8-xmm11 X-Git-Tag: 1.4.90 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=056536f6605596d0246fd2b75a1ada2ac03b5bbe;p=libjpeg-turbo Win/x64: Fix improper callee save of xmm8-xmm11 The x86-64 SIMD accelerations for Huffman encoding used incorrect stack math to save xmm8-xmm11 on Windows. This caused TJBench to always report 1 Mpixel/sec for the compression performance, and it likely would have caused other application issues as well. --- diff --git a/simd/jchuff-sse2-64.asm b/simd/jchuff-sse2-64.asm index d22efc3..84eaeeb 100644 --- a/simd/jchuff-sse2-64.asm +++ b/simd/jchuff-sse2-64.asm @@ -196,11 +196,11 @@ EXTN(jsimd_huff_encode_one_block_sse2): lea rsp, [t2] collect_args %ifdef WIN64 - sub rsp, 4*SIZEOF_XMMWORD - movaps XMMWORD [rsp-3*SIZEOF_XMMWORD], xmm8 + movaps XMMWORD [rsp-1*SIZEOF_XMMWORD], xmm8 movaps XMMWORD [rsp-2*SIZEOF_XMMWORD], xmm9 - movaps XMMWORD [rsp-1*SIZEOF_XMMWORD], xmm10 - movaps XMMWORD [rsp-0*SIZEOF_XMMWORD], xmm11 + movaps XMMWORD [rsp-3*SIZEOF_XMMWORD], xmm10 + movaps XMMWORD [rsp-4*SIZEOF_XMMWORD], xmm11 + sub rsp, 4*SIZEOF_XMMWORD %endif push rbx @@ -344,10 +344,10 @@ EXTN(jsimd_huff_encode_one_block_sse2): pop rbx %ifdef WIN64 - movaps xmm8, XMMWORD [rsp-3*SIZEOF_XMMWORD] - movaps xmm9, XMMWORD [rsp-2*SIZEOF_XMMWORD] - movaps xmm10, XMMWORD [rsp-1*SIZEOF_XMMWORD] - movaps xmm11, XMMWORD [rsp-0*SIZEOF_XMMWORD] + movaps xmm11, XMMWORD [rsp+0*SIZEOF_XMMWORD] + movaps xmm10, XMMWORD [rsp+1*SIZEOF_XMMWORD] + movaps xmm9, XMMWORD [rsp+2*SIZEOF_XMMWORD] + movaps xmm8, XMMWORD [rsp+3*SIZEOF_XMMWORD] add rsp, 4*SIZEOF_XMMWORD %endif uncollect_args