From: DRC Date: Thu, 24 Oct 2019 07:13:23 +0000 (-0500) Subject: Merge branch 'master' into dev X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=95f4d6ef8b045d3e76d003ce7d23244f89927788;p=libjpeg-turbo Merge branch 'master' into dev --- 95f4d6ef8b045d3e76d003ce7d23244f89927788 diff --cc ChangeLog.md index e47b874,4d6960f..b4cca2e --- a/ChangeLog.md +++ b/ChangeLog.md @@@ -1,51 -1,14 +1,62 @@@ +2.1 pre-beta +============ + +### Significant changes relative to 2.0.1: + +1. The build system, x86-64 SIMD extensions, and accelerated Huffman codec now +support the x32 ABI on Linux, which allows for using x86-64 instructions with +32-bit pointers. The x32 ABI is generally enabled by adding `-mx32` to the +compiler flags. + + Caveats: + - CMake 3.9.0 or later is required in order for the build system to +automatically detect an x32 build. + - Java does not support the x32 ABI, and thus the TurboJPEG Java API will +automatically be disabled with x32 builds. + - SIMD acceleration for progressive Huffman encoding does not (currently) +work with the x32 ABI and will be disabled in x32 builds. + +2. Added Loongson MMI SIMD implementations of the RGB-to-grayscale, 4:2:2 fancy +chroma upsampling, 4:2:2 and 4:2:0 merged chroma upsampling/color conversion, +and fast integer DCT/IDCT algorithms. Relative to libjpeg-turbo 2.0.x, this +speeds up: + + - the compression of RGB source images into grayscale JPEG images by +approximately 20% + - the decompression of 4:2:2 JPEG images by approximately 40-60% when +using fancy upsampling + - the decompression of 4:2:2 and 4:2:0 JPEG images by approximately +15-20% when using merged upsampling + - the compression of RGB source images by approximately 30-45% when using +the fast integer DCT + - the decompression of JPEG images into RGB destination images by +approximately 2x when using the fast integer IDCT + + The overall decompression speedup for RGB images is now approximately +2.3-3.7x (compared to 2-3.5x with libjpeg-turbo 2.0.x.) + +3. 32-bit (ARMv7 or ARMv7s) iOS builds of libjpeg-turbo are no longer +supported, and the libjpeg-turbo build system can no longer be used to package +such builds. 32-bit iOS apps cannot run in iOS 11 and later, and the App Store +no longer allows them. + +4. The TurboJPEG Java wrapper in the "official" libjpeg-turbo SDK for macOS no +longer supports 32-bit Java virtual machines. Oracle no longer provides a +32-bit JVM for macOS, and Apple's implementation of Java 1.6 (Java for OS X +systems) is long obsolete. + + + 2.0.4 + ===== + + ### Significant changes relative to 2.0.3: + + 1. Fixed a regression in the Windows packaging system (introduced by + 2.0 beta1[2]) whereby, if both the 64-bit libjpeg-turbo SDK for GCC and the + 64-bit libjpeg-turbo SDK for Visual C++ were installed on the same system, only + one of them could be uninstalled. + + 2.0.3 ===== diff --cc simd/x86_64/jchuff-sse2.asm index 72333af,aa78fd5..267425e --- a/simd/x86_64/jchuff-sse2.asm +++ b/simd/x86_64/jchuff-sse2.asm @@@ -200,8 -197,8 +198,8 @@@ EXTN(jsimd_huff_encode_one_block_sse2) mov buffer, r11 ; r11 is now sratch - mov put_buffer, MMWORD [r10+16] ; put_buffer = state->cur.put_buffer; - mov put_bits, dword [r10+24] ; put_bits = state->cur.put_bits; + mov put_buffer, MMWORD [r10+SIZEOF_POINTER*2] ; put_buffer = state->cur.put_buffer; - mov put_bits, DWORD [r10+SIZEOF_POINTER*2+8] ; put_bits = state->cur.put_bits; ++ mov put_bits, dword [r10+SIZEOF_POINTER*2+8] ; put_bits = state->cur.put_bits; push r10 ; r10 is now scratch ; Encode the DC coefficient difference per section F.1.2.1 @@@ -333,8 -330,8 +331,8 @@@ .EFN: pop r10 ; Save put_buffer & put_bits - mov MMWORD [r10+16], put_buffer ; state->cur.put_buffer = put_buffer; - mov dword [r10+24], put_bits ; state->cur.put_bits = put_bits; + mov MMWORD [r10+SIZEOF_POINTER*2], put_buffer ; state->cur.put_buffer = put_buffer; - mov DWORD [r10+SIZEOF_POINTER*2+8], put_bits ; state->cur.put_bits = put_bits; ++ mov dword [r10+SIZEOF_POINTER*2+8], put_bits ; state->cur.put_bits = put_bits; pop rbx uncollect_args 6 diff --cc simd/x86_64/jidctred-sse2.asm index d5a4782,7fbfcc5..4ece9d8 --- a/simd/x86_64/jidctred-sse2.asm +++ b/simd/x86_64/jidctred-sse2.asm @@@ -561,10 -558,10 +559,10 @@@ EXTN(jsimd_idct_2x2_sse2) pextrw ebx, xmm6, 0x00 ; ebx=(C0 D0 -- --) pextrw ecx, xmm6, 0x01 ; ecx=(C1 D1 -- --) - mov rdx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW] - mov rsi, JSAMPROW [rdi+1*SIZEOF_JSAMPROW] + mov rdxp, JSAMPROW [rdi+0*SIZEOF_JSAMPROW] + mov rsip, JSAMPROW [rdi+1*SIZEOF_JSAMPROW] - mov WORD [rdx+rax*SIZEOF_JSAMPLE], bx - mov WORD [rsi+rax*SIZEOF_JSAMPLE], cx + mov word [rdx+rax*SIZEOF_JSAMPLE], bx + mov word [rsi+rax*SIZEOF_JSAMPLE], cx pop rbx uncollect_args 4