]> granicus.if.org Git - libjpeg-turbo/commitdiff
AVX2: Introduce YMMBLOCK macro for readability
authorDRC <information@libjpeg-turbo.org>
Sat, 17 Feb 2018 23:29:38 +0000 (17:29 -0600)
committerDRC <information@libjpeg-turbo.org>
Sat, 17 Feb 2018 23:29:38 +0000 (17:29 -0600)
simd/i386/jquanti-avx2.asm
simd/nasm/jdct.inc
simd/x86_64/jquanti-avx2.asm

index 0356185860ad13cbb03a7bae2decdd2706dc8e8b..a8f24f2986ddf3afa175217ffd6b875c65b7d221 100644 (file)
@@ -2,7 +2,7 @@
 ; jquanti.asm - sample quantization (AVX2)
 ;
 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2018, D. R. Commander.
 ; Copyright (C) 2016, Matthieu Darbois.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
@@ -37,9 +37,9 @@
 ;                      DCTELEM *workspace);
 ;
 
-%define RECIPROCAL(m,n,b)  XMMBLOCK(DCTSIZE*0+(m),(n),(b),SIZEOF_DCTELEM)
-%define CORRECTION(m,n,b)  XMMBLOCK(DCTSIZE*1+(m),(n),(b),SIZEOF_DCTELEM)
-%define SCALE(m,n,b)       XMMBLOCK(DCTSIZE*2+(m),(n),(b),SIZEOF_DCTELEM)
+%define RECIPROCAL(m,n,b)  YMMBLOCK(DCTSIZE*0+(m),(n),(b),SIZEOF_DCTELEM)
+%define CORRECTION(m,n,b)  YMMBLOCK(DCTSIZE*1+(m),(n),(b),SIZEOF_DCTELEM)
+%define SCALE(m,n,b)       YMMBLOCK(DCTSIZE*2+(m),(n),(b),SIZEOF_DCTELEM)
 
 %define coef_block  ebp+8               ; JCOEFPTR coef_block
 %define divisors    ebp+12              ; DCTELEM *divisors
@@ -61,10 +61,10 @@ EXTN(jsimd_quantize_avx2):
     mov         edx, POINTER [divisors]
     mov         edi, JCOEFPTR [coef_block]
 
-    vmovdqu     ymm4, [XMMBLOCK(0,0,esi,SIZEOF_DCTELEM)]
-    vmovdqu     ymm5, [XMMBLOCK(2,0,esi,SIZEOF_DCTELEM)]
-    vmovdqu     ymm6, [XMMBLOCK(4,0,esi,SIZEOF_DCTELEM)]
-    vmovdqu     ymm7, [XMMBLOCK(6,0,esi,SIZEOF_DCTELEM)]
+    vmovdqu     ymm4, [YMMBLOCK(0,0,esi,SIZEOF_DCTELEM)]
+    vmovdqu     ymm5, [YMMBLOCK(2,0,esi,SIZEOF_DCTELEM)]
+    vmovdqu     ymm6, [YMMBLOCK(4,0,esi,SIZEOF_DCTELEM)]
+    vmovdqu     ymm7, [YMMBLOCK(6,0,esi,SIZEOF_DCTELEM)]
     vpabsw      ymm0, ymm4
     vpabsw      ymm1, ymm5
     vpabsw      ymm2, ymm6
@@ -88,10 +88,10 @@ EXTN(jsimd_quantize_avx2):
     vpsignw     ymm2, ymm2, ymm6
     vpsignw     ymm3, ymm3, ymm7
 
-    vmovdqu     [XMMBLOCK(0,0,edi,SIZEOF_DCTELEM)], ymm0
-    vmovdqu     [XMMBLOCK(2,0,edi,SIZEOF_DCTELEM)], ymm1
-    vmovdqu     [XMMBLOCK(4,0,edi,SIZEOF_DCTELEM)], ymm2
-    vmovdqu     [XMMBLOCK(6,0,edi,SIZEOF_DCTELEM)], ymm3
+    vmovdqu     [YMMBLOCK(0,0,edi,SIZEOF_DCTELEM)], ymm0
+    vmovdqu     [YMMBLOCK(2,0,edi,SIZEOF_DCTELEM)], ymm1
+    vmovdqu     [YMMBLOCK(4,0,edi,SIZEOF_DCTELEM)], ymm2
+    vmovdqu     [YMMBLOCK(6,0,edi,SIZEOF_DCTELEM)], ymm3
 
     vzeroupper
     pop         edi
index 7ae2ca4f637c211d9bad946598df504d55f43638..20c8f3d1bb9a3d8ffff7568307596752eb0730ef 100644 (file)
@@ -2,6 +2,7 @@
 ; jdct.inc - private declarations for forward & reverse DCT subsystems
 ;
 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+; Copyright (C) 2018, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -23,5 +24,6 @@
 %define DWBLOCK(m,n,b,s)   ((b)+(m)*DCTSIZE*(s)+(n)*SIZEOF_DWORD)
 %define MMBLOCK(m,n,b,s)   ((b)+(m)*DCTSIZE*(s)+(n)*SIZEOF_MMWORD)
 %define XMMBLOCK(m,n,b,s)  ((b)+(m)*DCTSIZE*(s)+(n)*SIZEOF_XMMWORD)
+%define YMMBLOCK(m,n,b,s)  ((b)+(m)*DCTSIZE*(s)+(n)*SIZEOF_YMMWORD)
 
 ; --------------------------------------------------------------------------
index 60ce19f8d43d1224f3fc341c678b8991697701ad..5eadeaad7583275eaffe555861d9b9c42714ffb6 100644 (file)
@@ -2,7 +2,7 @@
 ; jquanti.asm - sample data conversion and quantization (64-bit AVX2)
 ;
 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2018, D. R. Commander.
 ; Copyright (C) 2016, Matthieu Darbois.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
@@ -37,9 +37,9 @@
 ;                      DCTELEM *workspace);
 ;
 
-%define RECIPROCAL(m,n,b)  XMMBLOCK(DCTSIZE*0+(m),(n),(b),SIZEOF_DCTELEM)
-%define CORRECTION(m,n,b)  XMMBLOCK(DCTSIZE*1+(m),(n),(b),SIZEOF_DCTELEM)
-%define SCALE(m,n,b)       XMMBLOCK(DCTSIZE*2+(m),(n),(b),SIZEOF_DCTELEM)
+%define RECIPROCAL(m,n,b)  YMMBLOCK(DCTSIZE*0+(m),(n),(b),SIZEOF_DCTELEM)
+%define CORRECTION(m,n,b)  YMMBLOCK(DCTSIZE*1+(m),(n),(b),SIZEOF_DCTELEM)
+%define SCALE(m,n,b)       YMMBLOCK(DCTSIZE*2+(m),(n),(b),SIZEOF_DCTELEM)
 
 ; r10 = JCOEFPTR coef_block
 ; r11 = DCTELEM *divisors
@@ -54,10 +54,10 @@ EXTN(jsimd_quantize_avx2):
     mov         rbp, rsp
     collect_args 3
 
-    vmovdqu     ymm4, [XMMBLOCK(0,0,r12,SIZEOF_DCTELEM)]
-    vmovdqu     ymm5, [XMMBLOCK(2,0,r12,SIZEOF_DCTELEM)]
-    vmovdqu     ymm6, [XMMBLOCK(4,0,r12,SIZEOF_DCTELEM)]
-    vmovdqu     ymm7, [XMMBLOCK(6,0,r12,SIZEOF_DCTELEM)]
+    vmovdqu     ymm4, [YMMBLOCK(0,0,r12,SIZEOF_DCTELEM)]
+    vmovdqu     ymm5, [YMMBLOCK(2,0,r12,SIZEOF_DCTELEM)]
+    vmovdqu     ymm6, [YMMBLOCK(4,0,r12,SIZEOF_DCTELEM)]
+    vmovdqu     ymm7, [YMMBLOCK(6,0,r12,SIZEOF_DCTELEM)]
     vpabsw      ymm0, ymm4
     vpabsw      ymm1, ymm5
     vpabsw      ymm2, ymm6
@@ -81,10 +81,10 @@ EXTN(jsimd_quantize_avx2):
     vpsignw     ymm2, ymm2, ymm6
     vpsignw     ymm3, ymm3, ymm7
 
-    vmovdqu     [XMMBLOCK(0,0,r10,SIZEOF_DCTELEM)], ymm0
-    vmovdqu     [XMMBLOCK(2,0,r10,SIZEOF_DCTELEM)], ymm1
-    vmovdqu     [XMMBLOCK(4,0,r10,SIZEOF_DCTELEM)], ymm2
-    vmovdqu     [XMMBLOCK(6,0,r10,SIZEOF_DCTELEM)], ymm3
+    vmovdqu     [YMMBLOCK(0,0,r10,SIZEOF_DCTELEM)], ymm0
+    vmovdqu     [YMMBLOCK(2,0,r10,SIZEOF_DCTELEM)], ymm1
+    vmovdqu     [YMMBLOCK(4,0,r10,SIZEOF_DCTELEM)], ymm2
+    vmovdqu     [YMMBLOCK(6,0,r10,SIZEOF_DCTELEM)], ymm3
 
     vzeroupper
     uncollect_args 3