CMAKE_SYSTEM_PROCESSOR_LC MATCHES "i[0-9]86" OR
CMAKE_SYSTEM_PROCESSOR_LC MATCHES "x86" OR
CMAKE_SYSTEM_PROCESSOR_LC MATCHES "ia32")
- if(BITS EQUAL 64)
+ if(BITS EQUAL 64 OR CMAKE_C_COMPILER_ABI MATCHES "ELF X32")
set(CPU_TYPE x86_64)
else()
set(CPU_TYPE i386)
if(UNIX AND NOT APPLE)
if(BITS EQUAL 64)
set(CMAKE_INSTALL_DEFAULT_LIBDIR "lib64")
+ elseif(CMAKE_C_COMPILER_ABI MATCHES "ELF X32")
+ set(CMAKE_INSTALL_DEFAULT_LIBDIR "libx32")
else()
set(CMAKE_INSTALL_DEFAULT_LIBDIR "lib32")
endif()
boolean_number(WITH_ARITH_DEC)
option(WITH_ARITH_ENC "Include arithmetic encoding support when emulating the libjpeg v6b API/ABI" TRUE)
boolean_number(WITH_ARITH_ENC)
-option(WITH_JAVA "Build Java wrapper for the TurboJPEG API library (implies ENABLE_SHARED=1)" FALSE)
-boolean_number(WITH_JAVA)
+if(CMAKE_C_COMPILER_ABI MATCHES "ELF X32")
+ set(WITH_JAVA 0)
+else()
+ option(WITH_JAVA "Build Java wrapper for the TurboJPEG API library (implies ENABLE_SHARED=1)" FALSE)
+ boolean_number(WITH_JAVA)
+endif()
option(WITH_JPEG7 "Emulate libjpeg v7 API/ABI (this makes ${CMAKE_PROJECT_NAME} backward-incompatible with libjpeg v6b)" FALSE)
boolean_number(WITH_JPEG7)
option(WITH_JPEG8 "Emulate libjpeg v8 API/ABI (this makes ${CMAKE_PROJECT_NAME} backward-incompatible with libjpeg v6b)" FALSE)
+2.1 pre-beta
+============
+
+### Significant changes relative to 2.0.1:
+
+1. The build system, x86-64 SIMD extensions, and accelerated Huffman codec now
+support the x32 ABI on Linux, which allows for using x86-64 instructions with
+32-bit pointers. The x32 ABI is generally enabled by adding `-mx32` to the
+compiler flags.
+
+ Caveats:
+ - CMake 3.9.0 or later is required in order for the build system to
+automatically detect an x32 build.
+ - Java does not support the x32 ABI, and thus the TurboJPEG Java API will
+automatically be disabled with x32 builds.
+ - SIMD acceleration for progressive Huffman encoding does not (currently)
+work with the x32 ABI and will be disabled in x32 builds.
+
+
2.0.1
=====
# absolute paths where necessary, using the same logic.
#=============================================================================
+# Copyright 2018 Matthias Räncker
# Copyright 2016 D. R. Commander
# Copyright 2016 Dmitry Marakasov
# Copyright 2016 Roger Leigh
else()
if("${CMAKE_SIZEOF_VOID_P}" EQUAL "8")
set(CMAKE_INSTALL_DEFAULT_LIBDIR "lib64")
+ elseif(CMAKE_C_COMPILER_ABI MATCHES "ELF X32")
+ set(CMAKE_INSTALL_DEFAULT_LIBDIR "libx32")
endif()
endif()
endif()
* libjpeg-turbo Modifications:
* Copyright (C) 2009-2011, 2014-2016, 2018, D. R. Commander.
* Copyright (C) 2015, Matthieu Darbois.
+ * Copyright (C) 2018, Matthias Räncker.
* For conditions of distribution and use, see the accompanying README.ijg
* file.
*
* but must not be updated permanently until we complete the MCU.
*/
+#if defined(__x86_64__) && defined(__ILP32__)
+typedef unsigned long long bit_buf_type;
+#else
+typedef size_t bit_buf_type;
+#endif
+
typedef struct {
- size_t put_buffer; /* current bit-accumulation buffer */
+ bit_buf_type put_buffer; /* current bit-accumulation buffer */
int put_bits; /* # of bits now in it */
int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */
} savable_state;
#error Cannot determine word size
#endif
-#if SIZEOF_SIZE_T == 8 || defined(_WIN64)
+#if SIZEOF_SIZE_T == 8 || defined(_WIN64) || (defined(__x86_64__) && defined(__ILP32__))
#define EMIT_BITS(code, size) { \
CHECKBUF47() \
flush_bits(working_state *state)
{
JOCTET _buffer[BUFSIZE], *buffer;
- size_t put_buffer; int put_bits;
+ bit_buf_type put_buffer; int put_bits;
size_t bytes, bytestocopy; int localbuf = 0;
put_buffer = state->cur.put_buffer;
int nbits;
int r, code, size;
JOCTET _buffer[BUFSIZE], *buffer;
- size_t put_buffer; int put_bits;
+ bit_buf_type put_buffer; int put_bits;
int code_0xf0 = actbl->ehufco[0xf0], size_0xf0 = actbl->ehufsi[0xf0];
size_t bytes, bytestocopy; int localbuf = 0;
* Copyright (C) 1991-1997, Thomas G. Lane.
* libjpeg-turbo Modifications:
* Copyright (C) 2009-2011, 2016, 2018, D. R. Commander.
+ * Copyright (C) 2018, Matthias Räncker.
* For conditions of distribution and use, see the accompanying README.ijg
* file.
*
} \
}
-#if SIZEOF_SIZE_T == 8 || defined(_WIN64)
+#if SIZEOF_SIZE_T == 8 || defined(_WIN64) || (defined(__x86_64__) && defined(__ILP32__))
/* Pre-fetch 48 bytes, because the holding register is 64-bit */
#define FILL_BIT_BUFFER_FAST \
* Copyright (C) 1991-1997, Thomas G. Lane.
* libjpeg-turbo Modifications:
* Copyright (C) 2010-2011, 2015-2016, D. R. Commander.
+ * Copyright (C) 2018, Matthias Räncker.
* For conditions of distribution and use, see the accompanying README.ijg
* file.
*
typedef size_t bit_buf_type; /* type of bit-extraction buffer */
#define BIT_BUF_SIZE 64 /* size of buffer in bits */
+#elif defined(__x86_64__) && defined(__ILP32__)
+
+typedef unsigned long long bit_buf_type; /* type of bit-extraction buffer */
+#define BIT_BUF_SIZE 64 /* size of buffer in bits */
+
#else
typedef unsigned long bit_buf_type; /* type of bit-extraction buffer */
if(CYGWIN)
set(CMAKE_ASM_NASM_OBJECT_FORMAT win64)
endif()
+ if(CMAKE_C_COMPILER_ABI MATCHES "ELF X32")
+ set(CMAKE_ASM_NASM_OBJECT_FORMAT elfx32)
+ endif()
elseif(CPU_TYPE STREQUAL "i386")
if(BORLAND)
set(CMAKE_ASM_NASM_OBJECT_FORMAT obj)
; jsimdext.inc - common declarations
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
-; Copyright (C) 2010, 2016, D. R. Commander.
+; Copyright (C) 2010, 2016, 2018, D. R. Commander.
; Copyright (C) 2018, Matthieu Darbois.
+; Copyright (C) 2018, Matthias Räncker.
;
; Based on the x86 SIMD extension for IJG JPEG library - version 1.02
;
; Common types
;
%ifdef __x86_64__
+%ifnidn __OUTPUT_FORMAT__, elfx32
%define POINTER qword ; general pointer type
%define SIZEOF_POINTER SIZEOF_QWORD ; sizeof(POINTER)
%define POINTER_BIT QWORD_BIT ; sizeof(POINTER)*BYTE_BIT
-%else
+%define raxp rax
+%define rbxp rbx
+%define rcxp rcx
+%define rdxp rdx
+%define rsip rsi
+%define rdip rdi
+%define rbpp rbp
+%define rspp rsp
+%define r8p r8
+%define r9p r9
+%define r10p r10
+%define r11p r11
+%define r12p r12
+%define r13p r13
+%define r14p r14
+%define r15p r15
+%endif
+%endif
+%ifndef raxp
%define POINTER dword ; general pointer type
%define SIZEOF_POINTER SIZEOF_DWORD ; sizeof(POINTER)
%define POINTER_BIT DWORD_BIT ; sizeof(POINTER)*BYTE_BIT
+; x86_64 ILP32 ABI (x32)
+%define raxp eax
+%define rbxp ebx
+%define rcxp ecx
+%define rdxp edx
+%define rsip esi
+%define rdip edi
+%define rbpp ebp
+%define rspp esp
+%define r8p r8d
+%define r9p r9d
+%define r10p r10d
+%define r11p r11d
+%define r12p r12d
+%define r13p r13d
+%define r14p r14d
+%define r15p r15d
%endif
%define INT dword ; signed integer type
;
; Copyright (C) 2009, 2016, D. R. Commander.
; Copyright (C) 2015, Intel Corporation.
+; Copyright (C) 2018, Matthias Räncker.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
mov rsi, r12
mov ecx, r13d
- mov rdi, JSAMPARRAY [rsi+0*SIZEOF_JSAMPARRAY]
- mov rbx, JSAMPARRAY [rsi+1*SIZEOF_JSAMPARRAY]
- mov rdx, JSAMPARRAY [rsi+2*SIZEOF_JSAMPARRAY]
+ mov rdip, JSAMPARRAY [rsi+0*SIZEOF_JSAMPARRAY]
+ mov rbxp, JSAMPARRAY [rsi+1*SIZEOF_JSAMPARRAY]
+ mov rdxp, JSAMPARRAY [rsi+2*SIZEOF_JSAMPARRAY]
lea rdi, [rdi+rcx*SIZEOF_JSAMPROW]
lea rbx, [rbx+rcx*SIZEOF_JSAMPROW]
lea rdx, [rdx+rcx*SIZEOF_JSAMPROW]
push rsi
push rcx ; col
- mov rsi, JSAMPROW [rsi] ; inptr
- mov rdi, JSAMPROW [rdi] ; outptr0
- mov rbx, JSAMPROW [rbx] ; outptr1
- mov rdx, JSAMPROW [rdx] ; outptr2
+ mov rsip, JSAMPROW [rsi] ; inptr
+ mov rdip, JSAMPROW [rdi] ; outptr0
+ mov rbxp, JSAMPROW [rbx] ; outptr1
+ mov rdxp, JSAMPROW [rdx] ; outptr2
cmp rcx, byte SIZEOF_YMMWORD
jae near .columnloop
; jccolext.asm - colorspace conversion (64-bit SSE2)
;
; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2018, Matthias Räncker.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
mov rsi, r12
mov ecx, r13d
- mov rdi, JSAMPARRAY [rsi+0*SIZEOF_JSAMPARRAY]
- mov rbx, JSAMPARRAY [rsi+1*SIZEOF_JSAMPARRAY]
- mov rdx, JSAMPARRAY [rsi+2*SIZEOF_JSAMPARRAY]
+ mov rdip, JSAMPARRAY [rsi+0*SIZEOF_JSAMPARRAY]
+ mov rbxp, JSAMPARRAY [rsi+1*SIZEOF_JSAMPARRAY]
+ mov rdxp, JSAMPARRAY [rsi+2*SIZEOF_JSAMPARRAY]
lea rdi, [rdi+rcx*SIZEOF_JSAMPROW]
lea rbx, [rbx+rcx*SIZEOF_JSAMPROW]
lea rdx, [rdx+rcx*SIZEOF_JSAMPROW]
push rsi
push rcx ; col
- mov rsi, JSAMPROW [rsi] ; inptr
- mov rdi, JSAMPROW [rdi] ; outptr0
- mov rbx, JSAMPROW [rbx] ; outptr1
- mov rdx, JSAMPROW [rdx] ; outptr2
+ mov rsip, JSAMPROW [rsi] ; inptr
+ mov rdip, JSAMPROW [rdi] ; outptr0
+ mov rbxp, JSAMPROW [rbx] ; outptr1
+ mov rdxp, JSAMPROW [rdx] ; outptr2
cmp rcx, byte SIZEOF_XMMWORD
jae near .columnloop
;
; Copyright (C) 2011, 2016, D. R. Commander.
; Copyright (C) 2015, Intel Corporation.
+; Copyright (C) 2018, Matthias Räncker.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
mov rsi, r12
mov ecx, r13d
- mov rdi, JSAMPARRAY [rsi+0*SIZEOF_JSAMPARRAY]
+ mov rdip, JSAMPARRAY [rsi+0*SIZEOF_JSAMPARRAY]
lea rdi, [rdi+rcx*SIZEOF_JSAMPROW]
pop rcx
push rsi
push rcx ; col
- mov rsi, JSAMPROW [rsi] ; inptr
- mov rdi, JSAMPROW [rdi] ; outptr0
+ mov rsip, JSAMPROW [rsi] ; inptr
+ mov rdip, JSAMPROW [rdi] ; outptr0
cmp rcx, byte SIZEOF_YMMWORD
jae near .columnloop
; jcgryext.asm - grayscale colorspace conversion (64-bit SSE2)
;
; Copyright (C) 2011, 2016, D. R. Commander.
+; Copyright (C) 2018, Matthias Räncker.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
mov rsi, r12
mov ecx, r13d
- mov rdi, JSAMPARRAY [rsi+0*SIZEOF_JSAMPARRAY]
+ mov rdip, JSAMPARRAY [rsi+0*SIZEOF_JSAMPARRAY]
lea rdi, [rdi+rcx*SIZEOF_JSAMPROW]
pop rcx
push rsi
push rcx ; col
- mov rsi, JSAMPROW [rsi] ; inptr
- mov rdi, JSAMPROW [rdi] ; outptr0
+ mov rsip, JSAMPROW [rsi] ; inptr
+ mov rdip, JSAMPROW [rdi] ; outptr0
cmp rcx, byte SIZEOF_XMMWORD
jae near .columnloop
;
; Copyright (C) 2009-2011, 2014-2016, D. R. Commander.
; Copyright (C) 2015, Matthieu Darbois.
+; Copyright (C) 2018, Matthias Räncker.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
mov buffer, r11 ; r11 is now sratch
- mov put_buffer, MMWORD [r10+16] ; put_buffer = state->cur.put_buffer;
- mov put_bits, DWORD [r10+24] ; put_bits = state->cur.put_bits;
+ mov put_buffer, MMWORD [r10+SIZEOF_POINTER*2] ; put_buffer = state->cur.put_buffer;
+ mov put_bits, DWORD [r10+SIZEOF_POINTER*2+8] ; put_bits = state->cur.put_bits;
push r10 ; r10 is now scratch
; Encode the DC coefficient difference per section F.1.2.1
.EFN:
pop r10
; Save put_buffer & put_bits
- mov MMWORD [r10+16], put_buffer ; state->cur.put_buffer = put_buffer;
- mov DWORD [r10+24], put_bits ; state->cur.put_bits = put_bits;
+ mov MMWORD [r10+SIZEOF_POINTER*2], put_buffer ; state->cur.put_buffer = put_buffer;
+ mov DWORD [r10+SIZEOF_POINTER*2+8], put_bits ; state->cur.put_bits = put_bits;
pop rbx
uncollect_args 6
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, D. R. Commander.
; Copyright (C) 2015, Intel Corporation.
+; Copyright (C) 2018, Matthias Räncker.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
push rax
push rcx
- mov rdi, JSAMPROW [rsi]
+ mov rdip, JSAMPROW [rsi]
add rdi, rdx
mov al, JSAMPLE [rdi-1]
push rdi
push rsi
- mov rsi, JSAMPROW [rsi] ; inptr
- mov rdi, JSAMPROW [rdi] ; outptr
+ mov rsip, JSAMPROW [rsi] ; inptr
+ mov rdip, JSAMPROW [rdi] ; outptr
cmp rcx, byte SIZEOF_YMMWORD
jae short .columnloop
push rax
push rcx
- mov rdi, JSAMPROW [rsi]
+ mov rdip, JSAMPROW [rsi]
add rdi, rdx
mov al, JSAMPLE [rdi-1]
push rdi
push rsi
- mov rdx, JSAMPROW [rsi+0*SIZEOF_JSAMPROW] ; inptr0
- mov rsi, JSAMPROW [rsi+1*SIZEOF_JSAMPROW] ; inptr1
- mov rdi, JSAMPROW [rdi] ; outptr
+ mov rdxp, JSAMPROW [rsi+0*SIZEOF_JSAMPROW] ; inptr0
+ mov rsip, JSAMPROW [rsi+1*SIZEOF_JSAMPROW] ; inptr1
+ mov rdip, JSAMPROW [rdi] ; outptr
cmp rcx, byte SIZEOF_YMMWORD
jae short .columnloop
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2018, Matthias Räncker.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
push rax
push rcx
- mov rdi, JSAMPROW [rsi]
+ mov rdip, JSAMPROW [rsi]
add rdi, rdx
mov al, JSAMPLE [rdi-1]
push rdi
push rsi
- mov rsi, JSAMPROW [rsi] ; inptr
- mov rdi, JSAMPROW [rdi] ; outptr
+ mov rsip, JSAMPROW [rsi] ; inptr
+ mov rdip, JSAMPROW [rdi] ; outptr
cmp rcx, byte SIZEOF_XMMWORD
jae short .columnloop
push rax
push rcx
- mov rdi, JSAMPROW [rsi]
+ mov rdip, JSAMPROW [rsi]
add rdi, rdx
mov al, JSAMPLE [rdi-1]
push rdi
push rsi
- mov rdx, JSAMPROW [rsi+0*SIZEOF_JSAMPROW] ; inptr0
- mov rsi, JSAMPROW [rsi+1*SIZEOF_JSAMPROW] ; inptr1
- mov rdi, JSAMPROW [rdi] ; outptr
+ mov rdxp, JSAMPROW [rsi+0*SIZEOF_JSAMPROW] ; inptr0
+ mov rsip, JSAMPROW [rsi+1*SIZEOF_JSAMPROW] ; inptr1
+ mov rdip, JSAMPROW [rdi] ; outptr
cmp rcx, byte SIZEOF_XMMWORD
jae short .columnloop
; Copyright 2009, 2012 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2012, 2016, D. R. Commander.
; Copyright (C) 2015, Intel Corporation.
+; Copyright (C) 2018, Matthias Räncker.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
mov rdi, r11
mov ecx, r12d
- mov rsi, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY]
- mov rbx, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY]
- mov rdx, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY]
+ mov rsip, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY]
+ mov rbxp, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY]
+ mov rdxp, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY]
lea rsi, [rsi+rcx*SIZEOF_JSAMPROW]
lea rbx, [rbx+rcx*SIZEOF_JSAMPROW]
lea rdx, [rdx+rcx*SIZEOF_JSAMPROW]
push rsi
push rcx ; col
- mov rsi, JSAMPROW [rsi] ; inptr0
- mov rbx, JSAMPROW [rbx] ; inptr1
- mov rdx, JSAMPROW [rdx] ; inptr2
- mov rdi, JSAMPROW [rdi] ; outptr
+ mov rsip, JSAMPROW [rsi] ; inptr0
+ mov rbxp, JSAMPROW [rbx] ; inptr1
+ mov rdxp, JSAMPROW [rdx] ; inptr2
+ mov rdip, JSAMPROW [rdi] ; outptr
.columnloop:
vmovdqu ymm5, YMMWORD [rbx] ; ymm5=Cb(0123456789ABCDEFGHIJKLMNOPQRSTUV)
;
; Copyright 2009, 2012 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2012, 2016, D. R. Commander.
+; Copyright (C) 2018, Matthias Räncker.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
mov rdi, r11
mov ecx, r12d
- mov rsi, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY]
- mov rbx, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY]
- mov rdx, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY]
+ mov rsip, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY]
+ mov rbxp, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY]
+ mov rdxp, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY]
lea rsi, [rsi+rcx*SIZEOF_JSAMPROW]
lea rbx, [rbx+rcx*SIZEOF_JSAMPROW]
lea rdx, [rdx+rcx*SIZEOF_JSAMPROW]
push rsi
push rcx ; col
- mov rsi, JSAMPROW [rsi] ; inptr0
- mov rbx, JSAMPROW [rbx] ; inptr1
- mov rdx, JSAMPROW [rdx] ; inptr2
- mov rdi, JSAMPROW [rdi] ; outptr
+ mov rsip, JSAMPROW [rsi] ; inptr0
+ mov rbxp, JSAMPROW [rbx] ; inptr1
+ mov rdxp, JSAMPROW [rdx] ; inptr2
+ mov rdip, JSAMPROW [rdi] ; outptr
.columnloop:
movdqa xmm5, XMMWORD [rbx] ; xmm5=Cb(0123456789ABCDEF)
; Copyright 2009, 2012 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2012, 2016, D. R. Commander.
; Copyright (C) 2015, Intel Corporation.
+; Copyright (C) 2018, Matthias Räncker.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
mov rdi, r11
mov ecx, r12d
- mov rsi, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY]
- mov rbx, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY]
- mov rdx, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY]
+ mov rsip, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY]
+ mov rbxp, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY]
+ mov rdxp, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY]
mov rdi, r13
- mov rsi, JSAMPROW [rsi+rcx*SIZEOF_JSAMPROW] ; inptr0
- mov rbx, JSAMPROW [rbx+rcx*SIZEOF_JSAMPROW] ; inptr1
- mov rdx, JSAMPROW [rdx+rcx*SIZEOF_JSAMPROW] ; inptr2
- mov rdi, JSAMPROW [rdi] ; outptr
+ mov rsip, JSAMPROW [rsi+rcx*SIZEOF_JSAMPROW] ; inptr0
+ mov rbxp, JSAMPROW [rbx+rcx*SIZEOF_JSAMPROW] ; inptr1
+ mov rdxp, JSAMPROW [rdx+rcx*SIZEOF_JSAMPROW] ; inptr2
+ mov rdip, JSAMPROW [rdi] ; outptr
pop rcx ; col
mov rdi, r11
mov ecx, r12d
- mov rsi, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY]
- mov rbx, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY]
- mov rdx, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY]
+ mov rsip, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY]
+ mov rbxp, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY]
+ mov rdxp, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY]
mov rdi, r13
lea rsi, [rsi+rcx*SIZEOF_JSAMPROW]
- push rdx ; inptr2
- push rbx ; inptr1
- push rsi ; inptr00
+ sub rsp, SIZEOF_JSAMPARRAY*4
+ mov JSAMPARRAY [rsp+0*SIZEOF_JSAMPARRAY], rsip ; intpr00
+ mov JSAMPARRAY [rsp+1*SIZEOF_JSAMPARRAY], rbxp ; intpr1
+ mov JSAMPARRAY [rsp+2*SIZEOF_JSAMPARRAY], rdxp ; intpr2
mov rbx, rsp
push rdi
pop rax
pop rcx
pop rdi
- pop rsi
- pop rbx
- pop rdx
+ mov rsip, JSAMPARRAY [rsp+0*SIZEOF_JSAMPARRAY]
+ mov rbxp, JSAMPARRAY [rsp+1*SIZEOF_JSAMPARRAY]
+ mov rdxp, JSAMPARRAY [rsp+2*SIZEOF_JSAMPARRAY]
add rdi, byte SIZEOF_JSAMPROW ; outptr1
add rsi, byte SIZEOF_JSAMPROW ; inptr01
- push rdx ; inptr2
- push rbx ; inptr1
- push rsi ; inptr00
+ mov JSAMPARRAY [rsp+0*SIZEOF_JSAMPARRAY], rsip ; intpr00
+ mov JSAMPARRAY [rsp+1*SIZEOF_JSAMPARRAY], rbxp ; intpr1
+ mov JSAMPARRAY [rsp+2*SIZEOF_JSAMPARRAY], rdxp ; intpr2
mov rbx, rsp
push rdi
pop rax
pop rcx
pop rdi
- pop rsi
- pop rbx
- pop rdx
+ mov rsip, JSAMPARRAY [rsp+0*SIZEOF_JSAMPARRAY]
+ mov rbxp, JSAMPARRAY [rsp+1*SIZEOF_JSAMPARRAY]
+ mov rdxp, JSAMPARRAY [rsp+2*SIZEOF_JSAMPARRAY]
+ add rsp, SIZEOF_JSAMPARRAY*4
pop rbx
uncollect_args 4
;
; Copyright 2009, 2012 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2012, 2016, D. R. Commander.
+; Copyright (C) 2018, Matthias Räncker.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
mov rdi, r11
mov ecx, r12d
- mov rsi, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY]
- mov rbx, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY]
- mov rdx, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY]
+ mov rsip, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY]
+ mov rbxp, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY]
+ mov rdxp, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY]
mov rdi, r13
- mov rsi, JSAMPROW [rsi+rcx*SIZEOF_JSAMPROW] ; inptr0
- mov rbx, JSAMPROW [rbx+rcx*SIZEOF_JSAMPROW] ; inptr1
- mov rdx, JSAMPROW [rdx+rcx*SIZEOF_JSAMPROW] ; inptr2
- mov rdi, JSAMPROW [rdi] ; outptr
+ mov rsip, JSAMPROW [rsi+rcx*SIZEOF_JSAMPROW] ; inptr0
+ mov rbxp, JSAMPROW [rbx+rcx*SIZEOF_JSAMPROW] ; inptr1
+ mov rdxp, JSAMPROW [rdx+rcx*SIZEOF_JSAMPROW] ; inptr2
+ mov rdip, JSAMPROW [rdi] ; outptr
pop rcx ; col
mov rdi, r11
mov ecx, r12d
- mov rsi, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY]
- mov rbx, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY]
- mov rdx, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY]
+ mov rsip, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY]
+ mov rbxp, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY]
+ mov rdxp, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY]
mov rdi, r13
lea rsi, [rsi+rcx*SIZEOF_JSAMPROW]
- push rdx ; inptr2
- push rbx ; inptr1
- push rsi ; inptr00
+ sub rsp, SIZEOF_JSAMPARRAY*4
+ mov JSAMPARRAY [rsp+0*SIZEOF_JSAMPARRAY], rsip ; intpr00
+ mov JSAMPARRAY [rsp+1*SIZEOF_JSAMPARRAY], rbxp ; intpr1
+ mov JSAMPARRAY [rsp+2*SIZEOF_JSAMPARRAY], rdxp ; intpr2
mov rbx, rsp
push rdi
pop rax
pop rcx
pop rdi
- pop rsi
- pop rbx
- pop rdx
+ mov rsip, JSAMPARRAY [rsp+0*SIZEOF_JSAMPARRAY]
+ mov rbxp, JSAMPARRAY [rsp+1*SIZEOF_JSAMPARRAY]
+ mov rdxp, JSAMPARRAY [rsp+2*SIZEOF_JSAMPARRAY]
add rdi, byte SIZEOF_JSAMPROW ; outptr1
add rsi, byte SIZEOF_JSAMPROW ; inptr01
- push rdx ; inptr2
- push rbx ; inptr1
- push rsi ; inptr00
+ mov JSAMPARRAY [rsp+0*SIZEOF_JSAMPARRAY], rsip ; intpr00
+ mov JSAMPARRAY [rsp+1*SIZEOF_JSAMPARRAY], rbxp ; intpr1
+ mov JSAMPARRAY [rsp+2*SIZEOF_JSAMPARRAY], rdxp ; intpr2
mov rbx, rsp
push rdi
pop rax
pop rcx
pop rdi
- pop rsi
- pop rbx
- pop rdx
+ mov rsip, JSAMPARRAY [rsp+0*SIZEOF_JSAMPARRAY]
+ mov rbxp, JSAMPARRAY [rsp+1*SIZEOF_JSAMPARRAY]
+ mov rdxp, JSAMPARRAY [rsp+2*SIZEOF_JSAMPARRAY]
+ add rsp, SIZEOF_JSAMPARRAY*4
pop rbx
uncollect_args 4
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, D. R. Commander.
; Copyright (C) 2015, Intel Corporation.
+; Copyright (C) 2018, Matthias Räncker.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
mov rsi, r12 ; input_data
mov rdi, r13
- mov rdi, JSAMPARRAY [rdi] ; output_data
+ mov rdip, JSAMPARRAY [rdi] ; output_data
vpxor ymm0, ymm0, ymm0 ; ymm0=(all 0's)
vpcmpeqb xmm9, xmm9, xmm9
push rdi
push rsi
- mov rsi, JSAMPROW [rsi] ; inptr
- mov rdi, JSAMPROW [rdi] ; outptr
+ mov rsip, JSAMPROW [rsi] ; inptr
+ mov rdip, JSAMPROW [rdi] ; outptr
test rax, SIZEOF_YMMWORD-1
jz short .skip
mov rsi, r12 ; input_data
mov rdi, r13
- mov rdi, JSAMPARRAY [rdi] ; output_data
+ mov rdip, JSAMPARRAY [rdi] ; output_data
.rowloop:
push rax ; colctr
push rcx
push rdi
push rsi
- mov rcx, JSAMPROW [rsi-1*SIZEOF_JSAMPROW] ; inptr1(above)
- mov rbx, JSAMPROW [rsi+0*SIZEOF_JSAMPROW] ; inptr0
- mov rsi, JSAMPROW [rsi+1*SIZEOF_JSAMPROW] ; inptr1(below)
- mov rdx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW] ; outptr0
- mov rdi, JSAMPROW [rdi+1*SIZEOF_JSAMPROW] ; outptr1
+ mov rcxp, JSAMPROW [rsi-1*SIZEOF_JSAMPROW] ; inptr1(above)
+ mov rbxp, JSAMPROW [rsi+0*SIZEOF_JSAMPROW] ; inptr0
+ mov rsip, JSAMPROW [rsi+1*SIZEOF_JSAMPROW] ; inptr1(below)
+ mov rdxp, JSAMPROW [rdi+0*SIZEOF_JSAMPROW] ; outptr0
+ mov rdip, JSAMPROW [rdi+1*SIZEOF_JSAMPROW] ; outptr1
vpxor ymm8, ymm8, ymm8 ; ymm8=(all 0's)
vpcmpeqb xmm9, xmm9, xmm9
mov rsi, r12 ; input_data
mov rdi, r13
- mov rdi, JSAMPARRAY [rdi] ; output_data
+ mov rdip, JSAMPARRAY [rdi] ; output_data
.rowloop:
push rdi
push rsi
- mov rsi, JSAMPROW [rsi] ; inptr
- mov rdi, JSAMPROW [rdi] ; outptr
+ mov rsip, JSAMPROW [rsi] ; inptr
+ mov rdip, JSAMPROW [rdi] ; outptr
mov rax, rdx ; colctr
.columnloop:
mov rsi, r12 ; input_data
mov rdi, r13
- mov rdi, JSAMPARRAY [rdi] ; output_data
+ mov rdip, JSAMPARRAY [rdi] ; output_data
.rowloop:
push rdi
push rsi
- mov rsi, JSAMPROW [rsi] ; inptr
- mov rbx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW] ; outptr0
- mov rdi, JSAMPROW [rdi+1*SIZEOF_JSAMPROW] ; outptr1
+ mov rsip, JSAMPROW [rsi] ; inptr
+ mov rbxp, JSAMPROW [rdi+0*SIZEOF_JSAMPROW] ; outptr0
+ mov rdip, JSAMPROW [rdi+1*SIZEOF_JSAMPROW] ; outptr1
mov rax, rdx ; colctr
.columnloop:
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2018, Matthias Räncker.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
mov rsi, r12 ; input_data
mov rdi, r13
- mov rdi, JSAMPARRAY [rdi] ; output_data
+ mov rdip, JSAMPARRAY [rdi] ; output_data
.rowloop:
push rax ; colctr
push rdi
push rsi
- mov rsi, JSAMPROW [rsi] ; inptr
- mov rdi, JSAMPROW [rdi] ; outptr
+ mov rsip, JSAMPROW [rsi] ; inptr
+ mov rdip, JSAMPROW [rdi] ; outptr
test rax, SIZEOF_XMMWORD-1
jz short .skip
mov rsi, r12 ; input_data
mov rdi, r13
- mov rdi, JSAMPARRAY [rdi] ; output_data
+ mov rdip, JSAMPARRAY [rdi] ; output_data
.rowloop:
push rax ; colctr
push rcx
push rdi
push rsi
- mov rcx, JSAMPROW [rsi-1*SIZEOF_JSAMPROW] ; inptr1(above)
- mov rbx, JSAMPROW [rsi+0*SIZEOF_JSAMPROW] ; inptr0
- mov rsi, JSAMPROW [rsi+1*SIZEOF_JSAMPROW] ; inptr1(below)
- mov rdx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW] ; outptr0
- mov rdi, JSAMPROW [rdi+1*SIZEOF_JSAMPROW] ; outptr1
+ mov rcxp, JSAMPROW [rsi-1*SIZEOF_JSAMPROW] ; inptr1(above)
+ mov rbxp, JSAMPROW [rsi+0*SIZEOF_JSAMPROW] ; inptr0
+ mov rsip, JSAMPROW [rsi+1*SIZEOF_JSAMPROW] ; inptr1(below)
+ mov rdxp, JSAMPROW [rdi+0*SIZEOF_JSAMPROW] ; outptr0
+ mov rdip, JSAMPROW [rdi+1*SIZEOF_JSAMPROW] ; outptr1
test rax, SIZEOF_XMMWORD-1
jz short .skip
mov rsi, r12 ; input_data
mov rdi, r13
- mov rdi, JSAMPARRAY [rdi] ; output_data
+ mov rdip, JSAMPARRAY [rdi] ; output_data
.rowloop:
push rdi
push rsi
- mov rsi, JSAMPROW [rsi] ; inptr
- mov rdi, JSAMPROW [rdi] ; outptr
+ mov rsip, JSAMPROW [rsi] ; inptr
+ mov rdip, JSAMPROW [rdi] ; outptr
mov rax, rdx ; colctr
.columnloop:
mov rsi, r12 ; input_data
mov rdi, r13
- mov rdi, JSAMPARRAY [rdi] ; output_data
+ mov rdip, JSAMPARRAY [rdi] ; output_data
.rowloop:
push rdi
push rsi
- mov rsi, JSAMPROW [rsi] ; inptr
- mov rbx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW] ; outptr0
- mov rdi, JSAMPROW [rdi+1*SIZEOF_JSAMPROW] ; outptr1
+ mov rsip, JSAMPROW [rsi] ; inptr
+ mov rbxp, JSAMPROW [rdi+0*SIZEOF_JSAMPROW] ; outptr0
+ mov rdip, JSAMPROW [rdi+1*SIZEOF_JSAMPROW] ; outptr1
mov rax, rdx ; colctr
.columnloop:
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2018, Matthias Räncker.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
pshufd xmm5, xmm6, 0x4E ; xmm5=(10 11 12 13 14 15 16 17 00 01 02 03 04 05 06 07)
pshufd xmm3, xmm7, 0x4E ; xmm3=(30 31 32 33 34 35 36 37 20 21 22 23 24 25 26 27)
- mov rdx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW]
- mov rbx, JSAMPROW [rdi+2*SIZEOF_JSAMPROW]
+ mov rdxp, JSAMPROW [rdi+0*SIZEOF_JSAMPROW]
+ mov rbxp, JSAMPROW [rdi+2*SIZEOF_JSAMPROW]
movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm6
movq XMM_MMWORD [rbx+rax*SIZEOF_JSAMPLE], xmm7
- mov rdx, JSAMPROW [rdi+1*SIZEOF_JSAMPROW]
- mov rbx, JSAMPROW [rdi+3*SIZEOF_JSAMPROW]
+ mov rdxp, JSAMPROW [rdi+1*SIZEOF_JSAMPROW]
+ mov rbxp, JSAMPROW [rdi+3*SIZEOF_JSAMPROW]
movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm5
movq XMM_MMWORD [rbx+rax*SIZEOF_JSAMPLE], xmm3
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2018, Matthias Räncker.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
pshufd xmm6, xmm4, 0x4E ; xmm6=(50 51 52 53 54 55 56 57 40 41 42 43 44 45 46 47)
pshufd xmm2, xmm7, 0x4E ; xmm2=(70 71 72 73 74 75 76 77 60 61 62 63 64 65 66 67)
- mov rdx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW]
- mov rsi, JSAMPROW [rdi+2*SIZEOF_JSAMPROW]
+ mov rdxp, JSAMPROW [rdi+0*SIZEOF_JSAMPROW]
+ mov rsip, JSAMPROW [rdi+2*SIZEOF_JSAMPROW]
movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm1
movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm3
- mov rdx, JSAMPROW [rdi+4*SIZEOF_JSAMPROW]
- mov rsi, JSAMPROW [rdi+6*SIZEOF_JSAMPROW]
+ mov rdxp, JSAMPROW [rdi+4*SIZEOF_JSAMPROW]
+ mov rsip, JSAMPROW [rdi+6*SIZEOF_JSAMPROW]
movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm4
movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm7
- mov rdx, JSAMPROW [rdi+1*SIZEOF_JSAMPROW]
- mov rsi, JSAMPROW [rdi+3*SIZEOF_JSAMPROW]
+ mov rdxp, JSAMPROW [rdi+1*SIZEOF_JSAMPROW]
+ mov rsip, JSAMPROW [rdi+3*SIZEOF_JSAMPROW]
movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm5
movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm0
- mov rdx, JSAMPROW [rdi+5*SIZEOF_JSAMPROW]
- mov rsi, JSAMPROW [rdi+7*SIZEOF_JSAMPROW]
+ mov rdxp, JSAMPROW [rdi+5*SIZEOF_JSAMPROW]
+ mov rsip, JSAMPROW [rdi+7*SIZEOF_JSAMPROW]
movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm6
movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm2
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, 2018, D. R. Commander.
+; Copyright (C) 2018, Matthias Räncker.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
mov eax, r13d
- mov rdx, JSAMPROW [r12+0*SIZEOF_JSAMPROW] ; (JSAMPLE *)
- mov rsi, JSAMPROW [r12+1*SIZEOF_JSAMPROW] ; (JSAMPLE *)
+ mov rdxp, JSAMPROW [r12+0*SIZEOF_JSAMPROW] ; (JSAMPLE *)
+ mov rsip, JSAMPROW [r12+1*SIZEOF_JSAMPROW] ; (JSAMPLE *)
movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm0
movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm1
- mov rdx, JSAMPROW [r12+2*SIZEOF_JSAMPROW] ; (JSAMPLE *)
- mov rsi, JSAMPROW [r12+3*SIZEOF_JSAMPROW] ; (JSAMPLE *)
+ mov rdxp, JSAMPROW [r12+2*SIZEOF_JSAMPROW] ; (JSAMPLE *)
+ mov rsip, JSAMPROW [r12+3*SIZEOF_JSAMPROW] ; (JSAMPLE *)
movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm2
movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm3
- mov rdx, JSAMPROW [r12+4*SIZEOF_JSAMPROW] ; (JSAMPLE *)
- mov rsi, JSAMPROW [r12+5*SIZEOF_JSAMPROW] ; (JSAMPLE *)
+ mov rdxp, JSAMPROW [r12+4*SIZEOF_JSAMPROW] ; (JSAMPLE *)
+ mov rsip, JSAMPROW [r12+5*SIZEOF_JSAMPROW] ; (JSAMPLE *)
movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm4
movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm5
- mov rdx, JSAMPROW [r12+6*SIZEOF_JSAMPROW] ; (JSAMPLE *)
- mov rsi, JSAMPROW [r12+7*SIZEOF_JSAMPROW] ; (JSAMPLE *)
+ mov rdxp, JSAMPROW [r12+6*SIZEOF_JSAMPROW] ; (JSAMPLE *)
+ mov rsip, JSAMPROW [r12+7*SIZEOF_JSAMPROW] ; (JSAMPLE *)
movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm6
movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm7
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2018, Matthias Räncker.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
pshufd xmm2, xmm4, 0x4E ; xmm2=(50 51 52 53 54 55 56 57 40 41 42 43 44 45 46 47)
pshufd xmm5, xmm3, 0x4E ; xmm5=(70 71 72 73 74 75 76 77 60 61 62 63 64 65 66 67)
- mov rdx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW]
- mov rsi, JSAMPROW [rdi+2*SIZEOF_JSAMPROW]
+ mov rdxp, JSAMPROW [rdi+0*SIZEOF_JSAMPROW]
+ mov rsip, JSAMPROW [rdi+2*SIZEOF_JSAMPROW]
movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm7
movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm1
- mov rdx, JSAMPROW [rdi+4*SIZEOF_JSAMPROW]
- mov rsi, JSAMPROW [rdi+6*SIZEOF_JSAMPROW]
+ mov rdxp, JSAMPROW [rdi+4*SIZEOF_JSAMPROW]
+ mov rsip, JSAMPROW [rdi+6*SIZEOF_JSAMPROW]
movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm4
movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm3
- mov rdx, JSAMPROW [rdi+1*SIZEOF_JSAMPROW]
- mov rsi, JSAMPROW [rdi+3*SIZEOF_JSAMPROW]
+ mov rdxp, JSAMPROW [rdi+1*SIZEOF_JSAMPROW]
+ mov rsip, JSAMPROW [rdi+3*SIZEOF_JSAMPROW]
movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm6
movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm0
- mov rdx, JSAMPROW [rdi+5*SIZEOF_JSAMPROW]
- mov rsi, JSAMPROW [rdi+7*SIZEOF_JSAMPROW]
+ mov rdxp, JSAMPROW [rdi+5*SIZEOF_JSAMPROW]
+ mov rsip, JSAMPROW [rdi+7*SIZEOF_JSAMPROW]
movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm2
movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm5
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2018, Matthias Räncker.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
pshufd xmm1, xmm4, 0x4E ; xmm1=(20 21 22 23 30 31 32 33 00 ..)
pshufd xmm3, xmm4, 0x93 ; xmm3=(30 31 32 33 00 01 02 03 10 ..)
- mov rdx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW]
- mov rsi, JSAMPROW [rdi+1*SIZEOF_JSAMPROW]
+ mov rdxp, JSAMPROW [rdi+0*SIZEOF_JSAMPROW]
+ mov rsip, JSAMPROW [rdi+1*SIZEOF_JSAMPROW]
movd XMM_DWORD [rdx+rax*SIZEOF_JSAMPLE], xmm4
movd XMM_DWORD [rsi+rax*SIZEOF_JSAMPLE], xmm2
- mov rdx, JSAMPROW [rdi+2*SIZEOF_JSAMPROW]
- mov rsi, JSAMPROW [rdi+3*SIZEOF_JSAMPROW]
+ mov rdxp, JSAMPROW [rdi+2*SIZEOF_JSAMPROW]
+ mov rsip, JSAMPROW [rdi+3*SIZEOF_JSAMPROW]
movd XMM_DWORD [rdx+rax*SIZEOF_JSAMPLE], xmm1
movd XMM_DWORD [rsi+rax*SIZEOF_JSAMPLE], xmm3
pextrw ebx, xmm6, 0x00 ; ebx=(C0 D0 -- --)
pextrw ecx, xmm6, 0x01 ; ecx=(C1 D1 -- --)
- mov rdx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW]
- mov rsi, JSAMPROW [rdi+1*SIZEOF_JSAMPROW]
+ mov rdxp, JSAMPROW [rdi+0*SIZEOF_JSAMPROW]
+ mov rsip, JSAMPROW [rdi+1*SIZEOF_JSAMPROW]
mov WORD [rdx+rax*SIZEOF_JSAMPLE], bx
mov WORD [rsi+rax*SIZEOF_JSAMPLE], cx
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2018, Matthias Räncker.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
mov rdi, r12
mov rcx, DCTSIZE/2
.convloop:
- mov rbx, JSAMPROW [rsi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *)
- mov rdx, JSAMPROW [rsi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *)
+ mov rbxp, JSAMPROW [rsi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *)
+ mov rdxp, JSAMPROW [rsi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *)
movq xmm0, XMM_MMWORD [rbx+rax*SIZEOF_JSAMPLE]
movq xmm1, XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE]
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, 2018, D. R. Commander.
; Copyright (C) 2016, Matthieu Darbois.
+; Copyright (C) 2018, Matthias Räncker.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
mov eax, r11d
- mov rsi, JSAMPROW [r10+0*SIZEOF_JSAMPROW] ; (JSAMPLE *)
- mov rdi, JSAMPROW [r10+1*SIZEOF_JSAMPROW] ; (JSAMPLE *)
+ mov rsip, JSAMPROW [r10+0*SIZEOF_JSAMPROW] ; (JSAMPLE *)
+ mov rdip, JSAMPROW [r10+1*SIZEOF_JSAMPROW] ; (JSAMPLE *)
movq xmm0, XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE]
pinsrq xmm0, XMM_MMWORD [rdi+rax*SIZEOF_JSAMPLE], 1
- mov rsi, JSAMPROW [r10+2*SIZEOF_JSAMPROW] ; (JSAMPLE *)
- mov rdi, JSAMPROW [r10+3*SIZEOF_JSAMPROW] ; (JSAMPLE *)
+ mov rsip, JSAMPROW [r10+2*SIZEOF_JSAMPROW] ; (JSAMPLE *)
+ mov rdip, JSAMPROW [r10+3*SIZEOF_JSAMPROW] ; (JSAMPLE *)
movq xmm1, XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE]
pinsrq xmm1, XMM_MMWORD [rdi+rax*SIZEOF_JSAMPLE], 1
- mov rsi, JSAMPROW [r10+4*SIZEOF_JSAMPROW] ; (JSAMPLE *)
- mov rdi, JSAMPROW [r10+5*SIZEOF_JSAMPROW] ; (JSAMPLE *)
+ mov rsip, JSAMPROW [r10+4*SIZEOF_JSAMPROW] ; (JSAMPLE *)
+ mov rdip, JSAMPROW [r10+5*SIZEOF_JSAMPROW] ; (JSAMPLE *)
movq xmm2, XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE]
pinsrq xmm2, XMM_MMWORD [rdi+rax*SIZEOF_JSAMPLE], 1
- mov rsi, JSAMPROW [r10+6*SIZEOF_JSAMPROW] ; (JSAMPLE *)
- mov rdi, JSAMPROW [r10+7*SIZEOF_JSAMPROW] ; (JSAMPLE *)
+ mov rsip, JSAMPROW [r10+6*SIZEOF_JSAMPROW] ; (JSAMPLE *)
+ mov rdip, JSAMPROW [r10+7*SIZEOF_JSAMPROW] ; (JSAMPLE *)
movq xmm3, XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE]
pinsrq xmm3, XMM_MMWORD [rdi+rax*SIZEOF_JSAMPLE], 1
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2018, Matthias Räncker.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
mov rdi, r12
mov rcx, DCTSIZE/4
.convloop:
- mov rbx, JSAMPROW [rsi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *)
- mov rdx, JSAMPROW [rsi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *)
+ mov rbxp, JSAMPROW [rsi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *)
+ mov rdxp, JSAMPROW [rsi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *)
movq xmm0, XMM_MMWORD [rbx+rax*SIZEOF_JSAMPLE] ; xmm0=(01234567)
movq xmm1, XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE] ; xmm1=(89ABCDEF)
- mov rbx, JSAMPROW [rsi+2*SIZEOF_JSAMPROW] ; (JSAMPLE *)
- mov rdx, JSAMPROW [rsi+3*SIZEOF_JSAMPROW] ; (JSAMPLE *)
+ mov rbxp, JSAMPROW [rsi+2*SIZEOF_JSAMPROW] ; (JSAMPLE *)
+ mov rdxp, JSAMPROW [rsi+3*SIZEOF_JSAMPROW] ; (JSAMPLE *)
movq xmm2, XMM_MMWORD [rbx+rax*SIZEOF_JSAMPLE] ; xmm2=(GHIJKLMN)
movq xmm3, XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE] ; xmm3=(OPQRSTUV)