]> granicus.if.org Git - libjpeg-turbo/commitdiff
Win/x64: Fix improper callee save of xmm8-xmm11 1.4.90
authorDRC <information@libjpeg-turbo.org>
Mon, 29 Feb 2016 23:21:02 +0000 (17:21 -0600)
committerDRC <information@libjpeg-turbo.org>
Mon, 29 Feb 2016 23:46:34 +0000 (17:46 -0600)
The x86-64 SIMD accelerations for Huffman encoding used incorrect
stack math to save xmm8-xmm11 on Windows.  This caused TJBench to
always report 1 Mpixel/sec for the compression performance, and it
likely would have caused other application issues as well.

simd/jchuff-sse2-64.asm

index d22efc36d60ebf5c03d9d9c714a3c611607d0169..84eaeebf097964b2675110c8bc944a1df416f596 100644 (file)
@@ -196,11 +196,11 @@ EXTN(jsimd_huff_encode_one_block_sse2):
         lea     rsp, [t2]
         collect_args
 %ifdef WIN64
-        sub     rsp, 4*SIZEOF_XMMWORD
-        movaps  XMMWORD [rsp-3*SIZEOF_XMMWORD], xmm8
+        movaps  XMMWORD [rsp-1*SIZEOF_XMMWORD], xmm8
         movaps  XMMWORD [rsp-2*SIZEOF_XMMWORD], xmm9
-        movaps  XMMWORD [rsp-1*SIZEOF_XMMWORD], xmm10
-        movaps  XMMWORD [rsp-0*SIZEOF_XMMWORD], xmm11
+        movaps  XMMWORD [rsp-3*SIZEOF_XMMWORD], xmm10
+        movaps  XMMWORD [rsp-4*SIZEOF_XMMWORD], xmm11
+        sub     rsp, 4*SIZEOF_XMMWORD
 %endif
         push rbx
 
@@ -344,10 +344,10 @@ EXTN(jsimd_huff_encode_one_block_sse2):
 
         pop rbx
 %ifdef WIN64
-        movaps  xmm8, XMMWORD [rsp-3*SIZEOF_XMMWORD]
-        movaps  xmm9, XMMWORD [rsp-2*SIZEOF_XMMWORD]
-        movaps  xmm10, XMMWORD [rsp-1*SIZEOF_XMMWORD]
-        movaps  xmm11, XMMWORD [rsp-0*SIZEOF_XMMWORD]
+        movaps  xmm11, XMMWORD [rsp+0*SIZEOF_XMMWORD]
+        movaps  xmm10, XMMWORD [rsp+1*SIZEOF_XMMWORD]
+        movaps  xmm9, XMMWORD [rsp+2*SIZEOF_XMMWORD]
+        movaps  xmm8, XMMWORD [rsp+3*SIZEOF_XMMWORD]
         add     rsp, 4*SIZEOF_XMMWORD
 %endif
         uncollect_args