From: mayeut Date: Sat, 24 Feb 2018 13:50:56 +0000 (+0100) Subject: Fix Win64 ABI conformance when using xmm8-xmm11 X-Git-Tag: 1.5.90~27^2 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=0dd9a2c1fd6c232021f66750d2b2846d5a646faf;p=libjpeg-turbo Fix Win64 ABI conformance when using xmm8-xmm11 Referring to https://docs.microsoft.com/en-US/cpp/build/stack-usage: "All memory beyond the current address of RSP is considered volatile: The OS, or a debugger, may overwrite this memory during a user debug session, or an interrupt handler. Thus, RSP must always be set before attempting to read or write values to a stack frame." Basically, if-- under extremely rare circumstances-- a context swap were to occur between saving the values of xmm8-xmm11 and setting the new value of rsp, the O/S might not preserve that area of the stack. In general, libjpeg-turbo should not be using xmm8-xmm11 before or after the call to jsimd_huff_encode_one_block_sse2(), so this is probably a non-issue, but it's still a good idea to fix it. Based on https://github.com/mayeut/libjpeg-turbo/commit/ff7d2030dd26c7b8c37ff540c594490198843f15 --- diff --git a/simd/jchuff-sse2-64.asm b/simd/jchuff-sse2-64.asm index b1144d1..0deb3e6 100644 --- a/simd/jchuff-sse2-64.asm +++ b/simd/jchuff-sse2-64.asm @@ -195,11 +195,11 @@ EXTN(jsimd_huff_encode_one_block_sse2): lea rsp, [t2] collect_args %ifdef WIN64 - movaps XMMWORD [rsp-1*SIZEOF_XMMWORD], xmm8 - movaps XMMWORD [rsp-2*SIZEOF_XMMWORD], xmm9 - movaps XMMWORD [rsp-3*SIZEOF_XMMWORD], xmm10 - movaps XMMWORD [rsp-4*SIZEOF_XMMWORD], xmm11 sub rsp, 4*SIZEOF_XMMWORD + movaps XMMWORD [rsp+0*SIZEOF_XMMWORD], xmm8 + movaps XMMWORD [rsp+1*SIZEOF_XMMWORD], xmm9 + movaps XMMWORD [rsp+2*SIZEOF_XMMWORD], xmm10 + movaps XMMWORD [rsp+3*SIZEOF_XMMWORD], xmm11 %endif push rbx @@ -343,10 +343,10 @@ EXTN(jsimd_huff_encode_one_block_sse2): pop rbx %ifdef WIN64 - movaps xmm11, XMMWORD [rsp+0*SIZEOF_XMMWORD] - movaps xmm10, XMMWORD [rsp+1*SIZEOF_XMMWORD] - movaps xmm9, XMMWORD [rsp+2*SIZEOF_XMMWORD] - movaps xmm8, XMMWORD [rsp+3*SIZEOF_XMMWORD] + movaps xmm8, XMMWORD [rsp+0*SIZEOF_XMMWORD] + movaps xmm9, XMMWORD [rsp+1*SIZEOF_XMMWORD] + movaps xmm10, XMMWORD [rsp+2*SIZEOF_XMMWORD] + movaps xmm11, XMMWORD [rsp+3*SIZEOF_XMMWORD] add rsp, 4*SIZEOF_XMMWORD %endif uncollect_args