Merge branch 'master' into dev

author DRC <information@libjpeg-turbo.org>

Thu, 24 Oct 2019 07:13:23 +0000 (02:13 -0500)

committer DRC <information@libjpeg-turbo.org>

Thu, 24 Oct 2019 07:13:23 +0000 (02:13 -0500)
author DRC <information@libjpeg-turbo.org>
Thu, 24 Oct 2019 07:13:23 +0000 (02:13 -0500)
committer DRC <information@libjpeg-turbo.org>
Thu, 24 Oct 2019 07:13:23 +0000 (02:13 -0500)
diff --cc ChangeLog.md

index e47b8746049582e0c2df566bba9f294d96dc7372,4d6960f3db3bc2e5eab97801619415df41cc218d..b4cca2ec233dba1a8dda704de37cc758b14f564c
--- 1/ChangeLog.md
--- 2/ChangeLog.md
+++ b/ChangeLog.md
@@@ -1,51 -1,14 +1,62 @@@
+ +2.1 pre-beta
+ +============
+ +
+ +### Significant changes relative to 2.0.1:
+ +
+ +1. The build system, x86-64 SIMD extensions, and accelerated Huffman codec now
+ +support the x32 ABI on Linux, which allows for using x86-64 instructions with
+ +32-bit pointers.  The x32 ABI is generally enabled by adding `-mx32` to the
+ +compiler flags.
+ +
+ +     Caveats:
+ +     - CMake 3.9.0 or later is required in order for the build system to
+ +automatically detect an x32 build.
+ +     - Java does not support the x32 ABI, and thus the TurboJPEG Java API will
+ +automatically be disabled with x32 builds.
+ +     - SIMD acceleration for progressive Huffman encoding does not (currently)
+ +work with the x32 ABI and will be disabled in x32 builds.
+ +
+ +2. Added Loongson MMI SIMD implementations of the RGB-to-grayscale, 4:2:2 fancy
+ +chroma upsampling, 4:2:2 and 4:2:0 merged chroma upsampling/color conversion,
+ +and fast integer DCT/IDCT algorithms.  Relative to libjpeg-turbo 2.0.x, this
+ +speeds up:
+ +
+ +     - the compression of RGB source images into grayscale JPEG images by
+ +approximately 20%
+ +     - the decompression of 4:2:2 JPEG images by approximately 40-60% when
+ +using fancy upsampling
+ +     - the decompression of 4:2:2 and 4:2:0 JPEG images by approximately
+ +15-20% when using merged upsampling
+ +     - the compression of RGB source images by approximately 30-45% when using
+ +the fast integer DCT
+ +     - the decompression of JPEG images into RGB destination images by
+ +approximately 2x when using the fast integer IDCT
+ +
+ +    The overall decompression speedup for RGB images is now approximately
+ +2.3-3.7x (compared to 2-3.5x with libjpeg-turbo 2.0.x.)
+ +
+ +3. 32-bit (ARMv7 or ARMv7s) iOS builds of libjpeg-turbo are no longer
+ +supported, and the libjpeg-turbo build system can no longer be used to package
+ +such builds.  32-bit iOS apps cannot run in iOS 11 and later, and the App Store
+ +no longer allows them.
+ +
+ +4. The TurboJPEG Java wrapper in the "official" libjpeg-turbo SDK for macOS no
+ +longer supports 32-bit Java virtual machines.  Oracle no longer provides a
+ +32-bit JVM for macOS, and Apple's implementation of Java 1.6 (Java for OS X
+ +systems) is long obsolete.
+ +
+ +
+ 2.0.4
+ =====
+ 
+ ### Significant changes relative to 2.0.3:
+ 
+ 1. Fixed a regression in the Windows packaging system (introduced by
+ 2.0 beta1[2]) whereby, if both the 64-bit libjpeg-turbo SDK for GCC and the
+ 64-bit libjpeg-turbo SDK for Visual C++ were installed on the same system, only
+ one of them could be uninstalled.
+ 
+ 
   2.0.3
   =====
   
diff --cc cmakescripts/BuildPackages.cmake
Simple merge
diff --cc jchuff.c
Simple merge
diff --cc simd/nasm/jsimdext.inc
Simple merge
diff --cc simd/x86_64/jccolext-avx2.asm
Simple merge
diff --cc simd/x86_64/jccolext-sse2.asm
Simple merge
diff --cc simd/x86_64/jcgryext-avx2.asm
Simple merge
diff --cc simd/x86_64/jcgryext-sse2.asm
Simple merge
diff --cc simd/x86_64/jchuff-sse2.asm

index 72333af0fa01b77dc17f5ff07b5d0c8b48c34720,aa78fd5cd5e02e7ae1f9b1eed46c90ae8f677d4b..267425e330445e2fd0abba03916a4dd6a7aeeab5
--- 1/simd/x86_64/jchuff-sse2.asm
--- 2/simd/x86_64/jchuff-sse2.asm
+++ b/simd/x86_64/jchuff-sse2.asm
@@@ -200,8 -197,8 +198,8 @@@ EXTN(jsimd_huff_encode_one_block_sse2)
   
       mov         buffer, r11                  ; r11 is now sratch
   
- -    mov         put_buffer, MMWORD [r10+16]  ; put_buffer = state->cur.put_buffer;
- -    mov         put_bits,    dword [r10+24]  ; put_bits = state->cur.put_bits;
+ +    mov         put_buffer, MMWORD [r10+SIZEOF_POINTER*2]    ; put_buffer = state->cur.put_buffer;
-     mov         put_bits,    DWORD [r10+SIZEOF_POINTER*2+8]  ; put_bits = state->cur.put_bits;
++    mov         put_bits,    dword [r10+SIZEOF_POINTER*2+8]  ; put_bits = state->cur.put_bits;
       push        r10                          ; r10 is now scratch
   
       ; Encode the DC coefficient difference per section F.1.2.1
@@@ -333,8 -330,8 +331,8 @@@
   .EFN:
       pop         r10
       ; Save put_buffer & put_bits
- -    mov         MMWORD [r10+16], put_buffer  ; state->cur.put_buffer = put_buffer;
- -    mov         dword  [r10+24], put_bits    ; state->cur.put_bits = put_bits;
+ +    mov         MMWORD [r10+SIZEOF_POINTER*2], put_buffer  ; state->cur.put_buffer = put_buffer;
-     mov         DWORD  [r10+SIZEOF_POINTER*2+8], put_bits  ; state->cur.put_bits = put_bits;
++    mov         dword  [r10+SIZEOF_POINTER*2+8], put_bits  ; state->cur.put_bits = put_bits;
   
       pop         rbx
       uncollect_args 6
diff --cc simd/x86_64/jcsample-avx2.asm
Simple merge
diff --cc simd/x86_64/jcsample-sse2.asm
Simple merge
diff --cc simd/x86_64/jdcolext-avx2.asm
Simple merge
diff --cc simd/x86_64/jdcolext-sse2.asm
Simple merge
diff --cc simd/x86_64/jdmrgext-avx2.asm
Simple merge
diff --cc simd/x86_64/jdmrgext-sse2.asm
Simple merge
diff --cc simd/x86_64/jdsample-avx2.asm
Simple merge
diff --cc simd/x86_64/jdsample-sse2.asm
Simple merge
diff --cc simd/x86_64/jidctflt-sse2.asm
Simple merge
diff --cc simd/x86_64/jidctfst-sse2.asm
Simple merge
diff --cc simd/x86_64/jidctint-avx2.asm
Simple merge
diff --cc simd/x86_64/jidctint-sse2.asm
Simple merge
diff --cc simd/x86_64/jidctred-sse2.asm

index d5a4782d993cb252b3650be38607629d5ab4879e,7fbfcc519dda23bf8696de11f6f77b81185aa8c7..4ece9d891cbd8b09518895164b8be5a3d321f7ac
--- 1/simd/x86_64/jidctred-sse2.asm
--- 2/simd/x86_64/jidctred-sse2.asm
+++ b/simd/x86_64/jidctred-sse2.asm
@@@ -561,10 -558,10 +559,10 @@@ EXTN(jsimd_idct_2x2_sse2)
       pextrw      ebx, xmm6, 0x00         ; ebx=(C0 D0 -- --)
       pextrw      ecx, xmm6, 0x01         ; ecx=(C1 D1 -- --)
   
- -    mov         rdx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW]
- -    mov         rsi, JSAMPROW [rdi+1*SIZEOF_JSAMPROW]
+ +    mov         rdxp, JSAMPROW [rdi+0*SIZEOF_JSAMPROW]
+ +    mov         rsip, JSAMPROW [rdi+1*SIZEOF_JSAMPROW]
-     mov         WORD [rdx+rax*SIZEOF_JSAMPLE], bx
-     mov         WORD [rsi+rax*SIZEOF_JSAMPLE], cx
+     mov         word [rdx+rax*SIZEOF_JSAMPLE], bx
+     mov         word [rsi+rax*SIZEOF_JSAMPLE], cx
   
       pop         rbx
       uncollect_args 4
diff --cc simd/x86_64/jquantf-sse2.asm
Simple merge
diff --cc simd/x86_64/jquanti-avx2.asm
Simple merge
diff --cc simd/x86_64/jquanti-sse2.asm
Simple merge
author	DRC <information@libjpeg-turbo.org>
	Thu, 24 Oct 2019 07:13:23 +0000 (02:13 -0500)
committer	DRC <information@libjpeg-turbo.org>
	Thu, 24 Oct 2019 07:13:23 +0000 (02:13 -0500)
		1	2
ChangeLog.md	patch \|	diff1 \|	diff2 \|	blob \| history
cmakescripts/BuildPackages.cmake	patch \|	diff1 \|	diff2 \|	blob \| history
jchuff.c	patch \|	diff1 \|	diff2 \|	blob \| history
simd/nasm/jsimdext.inc	patch \|	diff1 \|	diff2 \|	blob \| history
simd/x86_64/jccolext-avx2.asm	patch \|	diff1 \|	diff2 \|	blob \| history
simd/x86_64/jccolext-sse2.asm	patch \|	diff1 \|	diff2 \|	blob \| history
simd/x86_64/jcgryext-avx2.asm	patch \|	diff1 \|	diff2 \|	blob \| history
simd/x86_64/jcgryext-sse2.asm	patch \|	diff1 \|	diff2 \|	blob \| history
simd/x86_64/jchuff-sse2.asm	patch \|	diff1 \|	diff2 \|	blob \| history
simd/x86_64/jcsample-avx2.asm	patch \|	diff1 \|	diff2 \|	blob \| history
simd/x86_64/jcsample-sse2.asm	patch \|	diff1 \|	diff2 \|	blob \| history
simd/x86_64/jdcolext-avx2.asm	patch \|	diff1 \|	diff2 \|	blob \| history
simd/x86_64/jdcolext-sse2.asm	patch \|	diff1 \|	diff2 \|	blob \| history
simd/x86_64/jdmrgext-avx2.asm	patch \|	diff1 \|	diff2 \|	blob \| history
simd/x86_64/jdmrgext-sse2.asm	patch \|	diff1 \|	diff2 \|	blob \| history
simd/x86_64/jdsample-avx2.asm	patch \|	diff1 \|	diff2 \|	blob \| history
simd/x86_64/jdsample-sse2.asm	patch \|	diff1 \|	diff2 \|	blob \| history
simd/x86_64/jidctflt-sse2.asm	patch \|	diff1 \|	diff2 \|	blob \| history
simd/x86_64/jidctfst-sse2.asm	patch \|	diff1 \|	diff2 \|	blob \| history
simd/x86_64/jidctint-avx2.asm	patch \|	diff1 \|	diff2 \|	blob \| history
simd/x86_64/jidctint-sse2.asm	patch \|	diff1 \|	diff2 \|	blob \| history
simd/x86_64/jidctred-sse2.asm	patch \|	diff1 \|	diff2 \|	blob \| history
simd/x86_64/jquantf-sse2.asm	patch \|	diff1 \|	diff2 \|	blob \| history
simd/x86_64/jquanti-avx2.asm	patch \|	diff1 \|	diff2 \|	blob \| history
simd/x86_64/jquanti-sse2.asm	patch \|	diff1 \|	diff2 \|	blob \| history