From: DRC Date: Fri, 18 Oct 2019 00:59:01 +0000 (-0500) Subject: x86 SIMD: Consistify capitalization of NASM types X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=3a32d199dfa8910a1424250d150c466640032c81;p=libjpeg-turbo x86 SIMD: Consistify capitalization of NASM types byte, word, dword, qword, oword, and yword are all assembler keywords, so it makes sense to use lowercase for these so as not to mistake them for macros or constants. --- diff --git a/simd/i386/jccolext-avx2.asm b/simd/i386/jccolext-avx2.asm index 11674d5..c46d684 100644 --- a/simd/i386/jccolext-avx2.asm +++ b/simd/i386/jccolext-avx2.asm @@ -108,12 +108,12 @@ EXTN(jsimd_rgb_ycc_convert_avx2): test cl, SIZEOF_BYTE jz short .column_ld2 sub ecx, byte SIZEOF_BYTE - movzx eax, BYTE [esi+ecx] + movzx eax, byte [esi+ecx] .column_ld2: test cl, SIZEOF_WORD jz short .column_ld4 sub ecx, byte SIZEOF_WORD - movzx edx, WORD [esi+ecx] + movzx edx, word [esi+ecx] shl eax, WORD_BIT or eax, edx .column_ld4: diff --git a/simd/i386/jccolext-mmx.asm b/simd/i386/jccolext-mmx.asm index c18dbc4..6357a42 100644 --- a/simd/i386/jccolext-mmx.asm +++ b/simd/i386/jccolext-mmx.asm @@ -109,13 +109,13 @@ EXTN(jsimd_rgb_ycc_convert_mmx): jz short .column_ld2 sub ecx, byte SIZEOF_BYTE xor eax, eax - mov al, BYTE [esi+ecx] + mov al, byte [esi+ecx] .column_ld2: test cl, SIZEOF_WORD jz short .column_ld4 sub ecx, byte SIZEOF_WORD xor edx, edx - mov dx, WORD [esi+ecx] + mov dx, word [esi+ecx] shl eax, WORD_BIT or eax, edx .column_ld4: @@ -125,7 +125,7 @@ EXTN(jsimd_rgb_ycc_convert_mmx): test cl, SIZEOF_DWORD jz short .column_ld8 sub ecx, byte SIZEOF_DWORD - movd mmG, DWORD [esi+ecx] + movd mmG, dword [esi+ecx] psllq mmA, DWORD_BIT por mmA, mmG .column_ld8: @@ -195,7 +195,7 @@ EXTN(jsimd_rgb_ycc_convert_mmx): test cl, SIZEOF_MMWORD/8 jz short .column_ld2 sub ecx, byte SIZEOF_MMWORD/8 - movd mmA, DWORD [esi+ecx*RGB_PIXELSIZE] + movd mmA, dword [esi+ecx*RGB_PIXELSIZE] .column_ld2: test cl, SIZEOF_MMWORD/4 jz short .column_ld4 diff --git a/simd/i386/jccolext-sse2.asm b/simd/i386/jccolext-sse2.asm index 06b02d5..c6c8085 100644 --- a/simd/i386/jccolext-sse2.asm +++ b/simd/i386/jccolext-sse2.asm @@ -107,12 +107,12 @@ EXTN(jsimd_rgb_ycc_convert_sse2): test cl, SIZEOF_BYTE jz short .column_ld2 sub ecx, byte SIZEOF_BYTE - movzx eax, BYTE [esi+ecx] + movzx eax, byte [esi+ecx] .column_ld2: test cl, SIZEOF_WORD jz short .column_ld4 sub ecx, byte SIZEOF_WORD - movzx edx, WORD [esi+ecx] + movzx edx, word [esi+ecx] shl eax, WORD_BIT or eax, edx .column_ld4: diff --git a/simd/i386/jcgryext-avx2.asm b/simd/i386/jcgryext-avx2.asm index 5601a6a..3fa7973 100644 --- a/simd/i386/jcgryext-avx2.asm +++ b/simd/i386/jcgryext-avx2.asm @@ -100,12 +100,12 @@ EXTN(jsimd_rgb_gray_convert_avx2): test cl, SIZEOF_BYTE jz short .column_ld2 sub ecx, byte SIZEOF_BYTE - movzx eax, BYTE [esi+ecx] + movzx eax, byte [esi+ecx] .column_ld2: test cl, SIZEOF_WORD jz short .column_ld4 sub ecx, byte SIZEOF_WORD - movzx edx, WORD [esi+ecx] + movzx edx, word [esi+ecx] shl eax, WORD_BIT or eax, edx .column_ld4: diff --git a/simd/i386/jcgryext-mmx.asm b/simd/i386/jcgryext-mmx.asm index 727894f..8af42e5 100644 --- a/simd/i386/jcgryext-mmx.asm +++ b/simd/i386/jcgryext-mmx.asm @@ -101,13 +101,13 @@ EXTN(jsimd_rgb_gray_convert_mmx): jz short .column_ld2 sub ecx, byte SIZEOF_BYTE xor eax, eax - mov al, BYTE [esi+ecx] + mov al, byte [esi+ecx] .column_ld2: test cl, SIZEOF_WORD jz short .column_ld4 sub ecx, byte SIZEOF_WORD xor edx, edx - mov dx, WORD [esi+ecx] + mov dx, word [esi+ecx] shl eax, WORD_BIT or eax, edx .column_ld4: @@ -117,7 +117,7 @@ EXTN(jsimd_rgb_gray_convert_mmx): test cl, SIZEOF_DWORD jz short .column_ld8 sub ecx, byte SIZEOF_DWORD - movd mmG, DWORD [esi+ecx] + movd mmG, dword [esi+ecx] psllq mmA, DWORD_BIT por mmA, mmG .column_ld8: @@ -187,7 +187,7 @@ EXTN(jsimd_rgb_gray_convert_mmx): test cl, SIZEOF_MMWORD/8 jz short .column_ld2 sub ecx, byte SIZEOF_MMWORD/8 - movd mmA, DWORD [esi+ecx*RGB_PIXELSIZE] + movd mmA, dword [esi+ecx*RGB_PIXELSIZE] .column_ld2: test cl, SIZEOF_MMWORD/4 jz short .column_ld4 diff --git a/simd/i386/jcgryext-sse2.asm b/simd/i386/jcgryext-sse2.asm index 5b4559f..c9d6ff1 100644 --- a/simd/i386/jcgryext-sse2.asm +++ b/simd/i386/jcgryext-sse2.asm @@ -99,12 +99,12 @@ EXTN(jsimd_rgb_gray_convert_sse2): test cl, SIZEOF_BYTE jz short .column_ld2 sub ecx, byte SIZEOF_BYTE - movzx eax, BYTE [esi+ecx] + movzx eax, byte [esi+ecx] .column_ld2: test cl, SIZEOF_WORD jz short .column_ld4 sub ecx, byte SIZEOF_WORD - movzx edx, WORD [esi+ecx] + movzx edx, word [esi+ecx] shl eax, WORD_BIT or eax, edx .column_ld4: diff --git a/simd/i386/jchuff-sse2.asm b/simd/i386/jchuff-sse2.asm index 2b0b469..79f0ca5 100644 --- a/simd/i386/jchuff-sse2.asm +++ b/simd/i386/jchuff-sse2.asm @@ -195,8 +195,8 @@ EXTN(jsimd_huff_encode_one_block_sse2): push ebp mov esi, POINTER [eax+8] ; (working_state *state) - mov put_buffer, DWORD [esi+8] ; put_buffer = state->cur.put_buffer; - mov put_bits, DWORD [esi+12] ; put_bits = state->cur.put_bits; + mov put_buffer, dword [esi+8] ; put_buffer = state->cur.put_buffer; + mov put_bits, dword [esi+12] ; put_bits = state->cur.put_bits; push esi ; esi is now scratch get_GOT edx ; get GOT address @@ -212,7 +212,7 @@ EXTN(jsimd_huff_encode_one_block_sse2): ; Encode the DC coefficient difference per section F.1.2.1 mov esi, POINTER [esp+block] ; block movsx ecx, word [esi] ; temp = temp2 = block[0] - last_dc_val; - sub ecx, DWORD [eax+20] + sub ecx, dword [eax+20] mov esi, ecx ; This is a well-known technique for obtaining the absolute value @@ -227,12 +227,12 @@ EXTN(jsimd_huff_encode_one_block_sse2): ; For a negative input, want temp2 = bitwise complement of abs(input) ; This code assumes we are on a two's complement machine add esi, edx ; temp2 += temp3; - mov DWORD [esp+temp], esi ; backup temp2 in temp + mov dword [esp+temp], esi ; backup temp2 in temp ; Find the number of bits needed for the magnitude of the coefficient movpic ebp, POINTER [esp+gotptr] ; load GOT address (ebp) movzx edx, byte [GOTOFF(ebp, jpeg_nbits_table + ecx)] ; nbits = JPEG_NBITS(temp); - mov DWORD [esp+temp2], edx ; backup nbits in temp2 + mov dword [esp+temp2], edx ; backup nbits in temp2 ; Emit the Huffman-coded symbol for the number of bits mov ebp, POINTER [eax+24] ; After this point, arguments are not accessible anymore @@ -240,13 +240,13 @@ EXTN(jsimd_huff_encode_one_block_sse2): movzx ecx, byte [ebp + edx + 1024] ; size = dctbl->ehufsi[nbits]; EMIT_BITS eax ; EMIT_BITS(code, size) - mov ecx, DWORD [esp+temp2] ; restore nbits + mov ecx, dword [esp+temp2] ; restore nbits ; Mask off any extra bits in code mov eax, 1 shl eax, cl dec eax - and eax, DWORD [esp+temp] ; temp2 &= (((JLONG)1)<>= r; - mov DWORD [esp+temp3], edx + mov dword [esp+temp3], edx .BRLOOP: cmp ecx, 16 ; while (r > 15) { jl near .ERLOOP sub ecx, 16 ; r -= 16; - mov DWORD [esp+temp], ecx + mov dword [esp+temp], ecx mov eax, INT [ebp + 240 * 4] ; code_0xf0 = actbl->ehufco[0xf0]; movzx ecx, byte [ebp + 1024 + 240] ; size_0xf0 = actbl->ehufsi[0xf0]; EMIT_BITS eax ; EMIT_BITS(code_0xf0, size_0xf0) - mov ecx, DWORD [esp+temp] + mov ecx, dword [esp+temp] jmp .BRLOOP .ERLOOP: movsx eax, word [esi] ; temp = t1[k]; movpic edx, POINTER [esp+gotptr] ; load GOT address (edx) movzx eax, byte [GOTOFF(edx, jpeg_nbits_table + eax)] ; nbits = JPEG_NBITS(temp); - mov DWORD [esp+temp2], eax + mov dword [esp+temp2], eax ; Emit Huffman symbol for run length / number of bits shl ecx, 4 ; temp3 = (r << 4) + nbits; add ecx, eax @@ -314,13 +314,13 @@ EXTN(jsimd_huff_encode_one_block_sse2): movsx edx, word [esi+DCTSIZE2*2] ; temp2 = t2[k]; ; Mask off any extra bits in code - mov ecx, DWORD [esp+temp2] + mov ecx, dword [esp+temp2] mov eax, 1 shl eax, cl dec eax and eax, edx ; temp2 &= (((JLONG)1)<>= 1; @@ -350,29 +350,29 @@ EXTN(jsimd_huff_encode_one_block_sse2): shr edx, cl ; index >>= r; add ecx, eax lea esi, [esi+ecx*2] ; k += r; - mov DWORD [esp+temp3], edx + mov dword [esp+temp3], edx jmp .BRLOOP2 .BLOOP2: bsf ecx, edx ; r = __builtin_ctzl(index); jz near .ELOOP2 lea esi, [esi+ecx*2] ; k += r; shr edx, cl ; index >>= r; - mov DWORD [esp+temp3], edx + mov dword [esp+temp3], edx .BRLOOP2: cmp ecx, 16 ; while (r > 15) { jl near .ERLOOP2 sub ecx, 16 ; r -= 16; - mov DWORD [esp+temp], ecx + mov dword [esp+temp], ecx mov eax, INT [ebp + 240 * 4] ; code_0xf0 = actbl->ehufco[0xf0]; movzx ecx, byte [ebp + 1024 + 240] ; size_0xf0 = actbl->ehufsi[0xf0]; EMIT_BITS eax ; EMIT_BITS(code_0xf0, size_0xf0) - mov ecx, DWORD [esp+temp] + mov ecx, dword [esp+temp] jmp .BRLOOP2 .ERLOOP2: movsx eax, word [esi] ; temp = t1[k]; bsr eax, eax ; nbits = 32 - __builtin_clz(temp); inc eax - mov DWORD [esp+temp2], eax + mov dword [esp+temp2], eax ; Emit Huffman symbol for run length / number of bits shl ecx, 4 ; temp3 = (r << 4) + nbits; add ecx, eax @@ -382,13 +382,13 @@ EXTN(jsimd_huff_encode_one_block_sse2): movsx edx, word [esi+DCTSIZE2*2] ; temp2 = t2[k]; ; Mask off any extra bits in code - mov ecx, DWORD [esp+temp2] + mov ecx, dword [esp+temp2] mov eax, 1 shl eax, cl dec eax and eax, edx ; temp2 &= (((JLONG)1)<>= 1; @@ -405,8 +405,8 @@ EXTN(jsimd_huff_encode_one_block_sse2): mov eax, [esp+buffer] pop esi ; Save put_buffer & put_bits - mov DWORD [esi+8], put_buffer ; state->cur.put_buffer = put_buffer; - mov DWORD [esi+12], put_bits ; state->cur.put_bits = put_bits; + mov dword [esi+8], put_buffer ; state->cur.put_buffer = put_buffer; + mov dword [esi+12], put_bits ; state->cur.put_bits = put_bits; pop ebp pop edi diff --git a/simd/i386/jdcolext-avx2.asm b/simd/i386/jdcolext-avx2.asm index 66e2683..015be04 100644 --- a/simd/i386/jdcolext-avx2.asm +++ b/simd/i386/jdcolext-avx2.asm @@ -346,7 +346,7 @@ EXTN(jsimd_ycc_rgb_convert_avx2): vmovd eax, xmmA cmp ecx, byte SIZEOF_WORD jb short .column_st1 - mov WORD [edi], ax + mov word [edi], ax add edi, byte SIZEOF_WORD sub ecx, byte SIZEOF_WORD shr eax, 16 @@ -355,7 +355,7 @@ EXTN(jsimd_ycc_rgb_convert_avx2): ; space. test ecx, ecx jz short .nextrow - mov BYTE [edi], al + mov byte [edi], al %else ; RGB_PIXELSIZE == 4 ; ----------- diff --git a/simd/i386/jdcolext-mmx.asm b/simd/i386/jdcolext-mmx.asm index 29b6dd4..5813cfc 100644 --- a/simd/i386/jdcolext-mmx.asm +++ b/simd/i386/jdcolext-mmx.asm @@ -278,7 +278,7 @@ EXTN(jsimd_ycc_rgb_convert_mmx): movd eax, mmA cmp ecx, byte SIZEOF_DWORD jb short .column_st2 - mov DWORD [edi+0*SIZEOF_DWORD], eax + mov dword [edi+0*SIZEOF_DWORD], eax psrlq mmA, DWORD_BIT movd eax, mmA sub ecx, byte SIZEOF_DWORD @@ -286,14 +286,14 @@ EXTN(jsimd_ycc_rgb_convert_mmx): .column_st2: cmp ecx, byte SIZEOF_WORD jb short .column_st1 - mov WORD [edi+0*SIZEOF_WORD], ax + mov word [edi+0*SIZEOF_WORD], ax shr eax, WORD_BIT sub ecx, byte SIZEOF_WORD add edi, byte SIZEOF_WORD .column_st1: cmp ecx, byte SIZEOF_BYTE jb short .nextrow - mov BYTE [edi+0*SIZEOF_BYTE], al + mov byte [edi+0*SIZEOF_BYTE], al %else ; RGB_PIXELSIZE == 4 ; ----------- @@ -365,7 +365,7 @@ EXTN(jsimd_ycc_rgb_convert_mmx): .column_st4: cmp ecx, byte SIZEOF_MMWORD/8 jb short .nextrow - movd DWORD [edi+0*SIZEOF_DWORD], mmA + movd dword [edi+0*SIZEOF_DWORD], mmA %endif ; RGB_PIXELSIZE ; --------------- diff --git a/simd/i386/jdcolext-sse2.asm b/simd/i386/jdcolext-sse2.asm index 73b37de..d5572b3 100644 --- a/simd/i386/jdcolext-sse2.asm +++ b/simd/i386/jdcolext-sse2.asm @@ -318,7 +318,7 @@ EXTN(jsimd_ycc_rgb_convert_sse2): movd eax, xmmA cmp ecx, byte SIZEOF_WORD jb short .column_st1 - mov WORD [edi], ax + mov word [edi], ax add edi, byte SIZEOF_WORD sub ecx, byte SIZEOF_WORD shr eax, 16 @@ -327,7 +327,7 @@ EXTN(jsimd_ycc_rgb_convert_sse2): ; space. test ecx, ecx jz short .nextrow - mov BYTE [edi], al + mov byte [edi], al %else ; RGB_PIXELSIZE == 4 ; ----------- diff --git a/simd/i386/jdmrgext-avx2.asm b/simd/i386/jdmrgext-avx2.asm index 3512c50..e35f728 100644 --- a/simd/i386/jdmrgext-avx2.asm +++ b/simd/i386/jdmrgext-avx2.asm @@ -352,7 +352,7 @@ EXTN(jsimd_h2v1_merged_upsample_avx2): vmovd eax, xmmA cmp ecx, byte SIZEOF_WORD jb short .column_st1 - mov WORD [edi], ax + mov word [edi], ax add edi, byte SIZEOF_WORD sub ecx, byte SIZEOF_WORD shr eax, 16 @@ -361,7 +361,7 @@ EXTN(jsimd_h2v1_merged_upsample_avx2): ; space. test ecx, ecx jz short .endcolumn - mov BYTE [edi], al + mov byte [edi], al %else ; RGB_PIXELSIZE == 4 ; ----------- diff --git a/simd/i386/jdmrgext-mmx.asm b/simd/i386/jdmrgext-mmx.asm index dab5c32..eb3e36b 100644 --- a/simd/i386/jdmrgext-mmx.asm +++ b/simd/i386/jdmrgext-mmx.asm @@ -281,7 +281,7 @@ EXTN(jsimd_h2v1_merged_upsample_mmx): movd eax, mmA cmp ecx, byte SIZEOF_DWORD jb short .column_st2 - mov DWORD [edi+0*SIZEOF_DWORD], eax + mov dword [edi+0*SIZEOF_DWORD], eax psrlq mmA, DWORD_BIT movd eax, mmA sub ecx, byte SIZEOF_DWORD @@ -289,14 +289,14 @@ EXTN(jsimd_h2v1_merged_upsample_mmx): .column_st2: cmp ecx, byte SIZEOF_WORD jb short .column_st1 - mov WORD [edi+0*SIZEOF_WORD], ax + mov word [edi+0*SIZEOF_WORD], ax shr eax, WORD_BIT sub ecx, byte SIZEOF_WORD add edi, byte SIZEOF_WORD .column_st1: cmp ecx, byte SIZEOF_BYTE jb short .endcolumn - mov BYTE [edi+0*SIZEOF_BYTE], al + mov byte [edi+0*SIZEOF_BYTE], al %else ; RGB_PIXELSIZE == 4 ; ----------- @@ -371,7 +371,7 @@ EXTN(jsimd_h2v1_merged_upsample_mmx): .column_st4: cmp ecx, byte SIZEOF_MMWORD/8 jb short .endcolumn - movd DWORD [edi+0*SIZEOF_DWORD], mmA + movd dword [edi+0*SIZEOF_DWORD], mmA %endif ; RGB_PIXELSIZE ; --------------- diff --git a/simd/i386/jdmrgext-sse2.asm b/simd/i386/jdmrgext-sse2.asm index 91295ff..c113dc4 100644 --- a/simd/i386/jdmrgext-sse2.asm +++ b/simd/i386/jdmrgext-sse2.asm @@ -323,7 +323,7 @@ EXTN(jsimd_h2v1_merged_upsample_sse2): movd eax, xmmA cmp ecx, byte SIZEOF_WORD jb short .column_st1 - mov WORD [edi], ax + mov word [edi], ax add edi, byte SIZEOF_WORD sub ecx, byte SIZEOF_WORD shr eax, 16 @@ -332,7 +332,7 @@ EXTN(jsimd_h2v1_merged_upsample_sse2): ; space. test ecx, ecx jz short .endcolumn - mov BYTE [edi], al + mov byte [edi], al %else ; RGB_PIXELSIZE == 4 ; ----------- diff --git a/simd/i386/jidctflt-3dn.asm b/simd/i386/jidctflt-3dn.asm index 396f36a..8795191 100644 --- a/simd/i386/jidctflt-3dn.asm +++ b/simd/i386/jidctflt-3dn.asm @@ -90,23 +90,23 @@ EXTN(jsimd_idct_float_3dnow): alignx 16, 7 .columnloop: %ifndef NO_ZERO_COLUMN_TEST_FLOAT_3DNOW - mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)] - or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)] + mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)] + or eax, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)] jnz short .columnDCT pushpic ebx ; save GOT address - mov ebx, DWORD [DWBLOCK(3,0,esi,SIZEOF_JCOEF)] - mov eax, DWORD [DWBLOCK(4,0,esi,SIZEOF_JCOEF)] - or ebx, DWORD [DWBLOCK(5,0,esi,SIZEOF_JCOEF)] - or eax, DWORD [DWBLOCK(6,0,esi,SIZEOF_JCOEF)] - or ebx, DWORD [DWBLOCK(7,0,esi,SIZEOF_JCOEF)] + mov ebx, dword [DWBLOCK(3,0,esi,SIZEOF_JCOEF)] + mov eax, dword [DWBLOCK(4,0,esi,SIZEOF_JCOEF)] + or ebx, dword [DWBLOCK(5,0,esi,SIZEOF_JCOEF)] + or eax, dword [DWBLOCK(6,0,esi,SIZEOF_JCOEF)] + or ebx, dword [DWBLOCK(7,0,esi,SIZEOF_JCOEF)] or eax, ebx poppic ebx ; restore GOT address jnz short .columnDCT ; -- AC terms all zero - movd mm0, DWORD [DWBLOCK(0,0,esi,SIZEOF_JCOEF)] + movd mm0, dword [DWBLOCK(0,0,esi,SIZEOF_JCOEF)] punpcklwd mm0, mm0 psrad mm0, (DWORD_BIT-WORD_BIT) @@ -133,10 +133,10 @@ EXTN(jsimd_idct_float_3dnow): ; -- Even part - movd mm0, DWORD [DWBLOCK(0,0,esi,SIZEOF_JCOEF)] - movd mm1, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)] - movd mm2, DWORD [DWBLOCK(4,0,esi,SIZEOF_JCOEF)] - movd mm3, DWORD [DWBLOCK(6,0,esi,SIZEOF_JCOEF)] + movd mm0, dword [DWBLOCK(0,0,esi,SIZEOF_JCOEF)] + movd mm1, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)] + movd mm2, dword [DWBLOCK(4,0,esi,SIZEOF_JCOEF)] + movd mm3, dword [DWBLOCK(6,0,esi,SIZEOF_JCOEF)] punpcklwd mm0, mm0 punpcklwd mm1, mm1 @@ -180,10 +180,10 @@ EXTN(jsimd_idct_float_3dnow): ; -- Odd part - movd mm2, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)] - movd mm3, DWORD [DWBLOCK(3,0,esi,SIZEOF_JCOEF)] - movd mm5, DWORD [DWBLOCK(5,0,esi,SIZEOF_JCOEF)] - movd mm1, DWORD [DWBLOCK(7,0,esi,SIZEOF_JCOEF)] + movd mm2, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)] + movd mm3, dword [DWBLOCK(3,0,esi,SIZEOF_JCOEF)] + movd mm5, dword [DWBLOCK(5,0,esi,SIZEOF_JCOEF)] + movd mm1, dword [DWBLOCK(7,0,esi,SIZEOF_JCOEF)] punpcklwd mm2, mm2 punpcklwd mm3, mm3 diff --git a/simd/i386/jidctflt-sse.asm b/simd/i386/jidctflt-sse.asm index 6236fd5..b27ecfd 100644 --- a/simd/i386/jidctflt-sse.asm +++ b/simd/i386/jidctflt-sse.asm @@ -100,8 +100,8 @@ EXTN(jsimd_idct_float_sse): alignx 16, 7 .columnloop: %ifndef NO_ZERO_COLUMN_TEST_FLOAT_SSE - mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)] - or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)] + mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)] + or eax, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)] jnz near .columnDCT movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] diff --git a/simd/i386/jidctflt-sse2.asm b/simd/i386/jidctflt-sse2.asm index 0b38937..c646eae 100644 --- a/simd/i386/jidctflt-sse2.asm +++ b/simd/i386/jidctflt-sse2.asm @@ -100,8 +100,8 @@ EXTN(jsimd_idct_float_sse2): alignx 16, 7 .columnloop: %ifndef NO_ZERO_COLUMN_TEST_FLOAT_SSE - mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)] - or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)] + mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)] + or eax, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)] jnz near .columnDCT movq xmm1, XMM_MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] diff --git a/simd/i386/jidctfst-mmx.asm b/simd/i386/jidctfst-mmx.asm index c69bffa..24622d4 100644 --- a/simd/i386/jidctfst-mmx.asm +++ b/simd/i386/jidctfst-mmx.asm @@ -121,8 +121,8 @@ EXTN(jsimd_idct_ifast_mmx): alignx 16, 7 .columnloop: %ifndef NO_ZERO_COLUMN_TEST_IFAST_MMX - mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)] - or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)] + mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)] + or eax, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)] jnz short .columnDCT movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] diff --git a/simd/i386/jidctfst-sse2.asm b/simd/i386/jidctfst-sse2.asm index 03bac9d..19704ff 100644 --- a/simd/i386/jidctfst-sse2.asm +++ b/simd/i386/jidctfst-sse2.asm @@ -116,8 +116,8 @@ EXTN(jsimd_idct_ifast_sse2): mov esi, JCOEFPTR [coef_block(eax)] ; inptr %ifndef NO_ZERO_COLUMN_TEST_IFAST_SSE2 - mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)] - or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)] + mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)] + or eax, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)] jnz near .columnDCT movdqa xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)] diff --git a/simd/i386/jidctint-avx2.asm b/simd/i386/jidctint-avx2.asm index 1e94541..c371985 100644 --- a/simd/i386/jidctint-avx2.asm +++ b/simd/i386/jidctint-avx2.asm @@ -318,8 +318,8 @@ EXTN(jsimd_idct_islow_avx2): mov esi, JCOEFPTR [coef_block(eax)] ; inptr %ifndef NO_ZERO_COLUMN_TEST_ISLOW_AVX2 - mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)] - or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)] + mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)] + or eax, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)] jnz near .columnDCT movdqa xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)] diff --git a/simd/i386/jidctint-mmx.asm b/simd/i386/jidctint-mmx.asm index 281e5b3..4f07f56 100644 --- a/simd/i386/jidctint-mmx.asm +++ b/simd/i386/jidctint-mmx.asm @@ -134,8 +134,8 @@ EXTN(jsimd_idct_islow_mmx): alignx 16, 7 .columnloop: %ifndef NO_ZERO_COLUMN_TEST_ISLOW_MMX - mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)] - or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)] + mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)] + or eax, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)] jnz short .columnDCT movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] diff --git a/simd/i386/jidctint-sse2.asm b/simd/i386/jidctint-sse2.asm index acb1140..e442fdd 100644 --- a/simd/i386/jidctint-sse2.asm +++ b/simd/i386/jidctint-sse2.asm @@ -129,8 +129,8 @@ EXTN(jsimd_idct_islow_sse2): mov esi, JCOEFPTR [coef_block(eax)] ; inptr %ifndef NO_ZERO_COLUMN_TEST_ISLOW_SSE2 - mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)] - or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)] + mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)] + or eax, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)] jnz near .columnDCT movdqa xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)] diff --git a/simd/i386/jidctred-mmx.asm b/simd/i386/jidctred-mmx.asm index ad3fcb1..e2307e1 100644 --- a/simd/i386/jidctred-mmx.asm +++ b/simd/i386/jidctred-mmx.asm @@ -142,8 +142,8 @@ EXTN(jsimd_idct_4x4_mmx): alignx 16, 7 .columnloop: %ifndef NO_ZERO_COLUMN_TEST_4X4_MMX - mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)] - or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)] + mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)] + or eax, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)] jnz short .columnDCT movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] @@ -462,16 +462,16 @@ EXTN(jsimd_idct_4x4_mmx): mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] mov esi, JSAMPROW [edi+2*SIZEOF_JSAMPROW] - movd DWORD [edx+eax*SIZEOF_JSAMPLE], mm1 - movd DWORD [esi+eax*SIZEOF_JSAMPLE], mm0 + movd dword [edx+eax*SIZEOF_JSAMPLE], mm1 + movd dword [esi+eax*SIZEOF_JSAMPLE], mm0 psrlq mm1, 4*BYTE_BIT psrlq mm0, 4*BYTE_BIT mov edx, JSAMPROW [edi+1*SIZEOF_JSAMPROW] mov esi, JSAMPROW [edi+3*SIZEOF_JSAMPROW] - movd DWORD [edx+eax*SIZEOF_JSAMPLE], mm1 - movd DWORD [esi+eax*SIZEOF_JSAMPLE], mm0 + movd dword [edx+eax*SIZEOF_JSAMPLE], mm1 + movd dword [esi+eax*SIZEOF_JSAMPLE], mm0 emms ; empty MMX state @@ -686,8 +686,8 @@ EXTN(jsimd_idct_2x2_mmx): mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] mov esi, JSAMPROW [edi+1*SIZEOF_JSAMPROW] - mov WORD [edx+eax*SIZEOF_JSAMPLE], bx - mov WORD [esi+eax*SIZEOF_JSAMPLE], cx + mov word [edx+eax*SIZEOF_JSAMPLE], bx + mov word [esi+eax*SIZEOF_JSAMPLE], cx emms ; empty MMX state diff --git a/simd/i386/jidctred-sse2.asm b/simd/i386/jidctred-sse2.asm index c00d0e4..6e56494 100644 --- a/simd/i386/jidctred-sse2.asm +++ b/simd/i386/jidctred-sse2.asm @@ -137,8 +137,8 @@ EXTN(jsimd_idct_4x4_sse2): mov esi, JCOEFPTR [coef_block(eax)] ; inptr %ifndef NO_ZERO_COLUMN_TEST_4X4_SSE2 - mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)] - or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)] + mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)] + or eax, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)] jnz short .columnDCT movdqa xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)] @@ -576,8 +576,8 @@ EXTN(jsimd_idct_2x2_sse2): mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] mov esi, JSAMPROW [edi+1*SIZEOF_JSAMPROW] - mov WORD [edx+eax*SIZEOF_JSAMPLE], bx - mov WORD [esi+eax*SIZEOF_JSAMPLE], cx + mov word [edx+eax*SIZEOF_JSAMPLE], bx + mov word [esi+eax*SIZEOF_JSAMPLE], cx pop edi pop esi diff --git a/simd/nasm/jsimdext.inc b/simd/nasm/jsimdext.inc index 383b709..11fe8ef 100644 --- a/simd/nasm/jsimdext.inc +++ b/simd/nasm/jsimdext.inc @@ -165,19 +165,19 @@ section .note.GNU-stack noalloc noexec nowrite progbits %define XMM_DWORD %define XMM_MMWORD -%define SIZEOF_BYTE 1 ; sizeof(BYTE) -%define SIZEOF_WORD 2 ; sizeof(WORD) -%define SIZEOF_DWORD 4 ; sizeof(DWORD) -%define SIZEOF_QWORD 8 ; sizeof(QWORD) -%define SIZEOF_OWORD 16 ; sizeof(OWORD) -%define SIZEOF_YWORD 32 ; sizeof(YWORD) +%define SIZEOF_BYTE 1 ; sizeof(byte) +%define SIZEOF_WORD 2 ; sizeof(word) +%define SIZEOF_DWORD 4 ; sizeof(dword) +%define SIZEOF_QWORD 8 ; sizeof(qword) +%define SIZEOF_OWORD 16 ; sizeof(oword) +%define SIZEOF_YWORD 32 ; sizeof(yword) %define BYTE_BIT 8 ; CHAR_BIT in C -%define WORD_BIT 16 ; sizeof(WORD)*BYTE_BIT -%define DWORD_BIT 32 ; sizeof(DWORD)*BYTE_BIT -%define QWORD_BIT 64 ; sizeof(QWORD)*BYTE_BIT -%define OWORD_BIT 128 ; sizeof(OWORD)*BYTE_BIT -%define YWORD_BIT 256 ; sizeof(YWORD)*BYTE_BIT +%define WORD_BIT 16 ; sizeof(word)*BYTE_BIT +%define DWORD_BIT 32 ; sizeof(dword)*BYTE_BIT +%define QWORD_BIT 64 ; sizeof(qword)*BYTE_BIT +%define OWORD_BIT 128 ; sizeof(oword)*BYTE_BIT +%define YWORD_BIT 256 ; sizeof(yword)*BYTE_BIT ; -------------------------------------------------------------------------- ; External Symbol Name diff --git a/simd/x86_64/jccolext-avx2.asm b/simd/x86_64/jccolext-avx2.asm index 61f3b9e..10d2834 100644 --- a/simd/x86_64/jccolext-avx2.asm +++ b/simd/x86_64/jccolext-avx2.asm @@ -94,12 +94,12 @@ EXTN(jsimd_rgb_ycc_convert_avx2): test cl, SIZEOF_BYTE jz short .column_ld2 sub rcx, byte SIZEOF_BYTE - movzx rax, BYTE [rsi+rcx] + movzx rax, byte [rsi+rcx] .column_ld2: test cl, SIZEOF_WORD jz short .column_ld4 sub rcx, byte SIZEOF_WORD - movzx rdx, WORD [rsi+rcx] + movzx rdx, word [rsi+rcx] shl rax, WORD_BIT or rax, rdx .column_ld4: diff --git a/simd/x86_64/jccolext-sse2.asm b/simd/x86_64/jccolext-sse2.asm index 9df4f7f..2c914d3 100644 --- a/simd/x86_64/jccolext-sse2.asm +++ b/simd/x86_64/jccolext-sse2.asm @@ -93,12 +93,12 @@ EXTN(jsimd_rgb_ycc_convert_sse2): test cl, SIZEOF_BYTE jz short .column_ld2 sub rcx, byte SIZEOF_BYTE - movzx rax, BYTE [rsi+rcx] + movzx rax, byte [rsi+rcx] .column_ld2: test cl, SIZEOF_WORD jz short .column_ld4 sub rcx, byte SIZEOF_WORD - movzx rdx, WORD [rsi+rcx] + movzx rdx, word [rsi+rcx] shl rax, WORD_BIT or rax, rdx .column_ld4: diff --git a/simd/x86_64/jcgryext-avx2.asm b/simd/x86_64/jcgryext-avx2.asm index 20c48c1..175b60d 100644 --- a/simd/x86_64/jcgryext-avx2.asm +++ b/simd/x86_64/jcgryext-avx2.asm @@ -86,12 +86,12 @@ EXTN(jsimd_rgb_gray_convert_avx2): test cl, SIZEOF_BYTE jz short .column_ld2 sub rcx, byte SIZEOF_BYTE - movzx rax, BYTE [rsi+rcx] + movzx rax, byte [rsi+rcx] .column_ld2: test cl, SIZEOF_WORD jz short .column_ld4 sub rcx, byte SIZEOF_WORD - movzx rdx, WORD [rsi+rcx] + movzx rdx, word [rsi+rcx] shl rax, WORD_BIT or rax, rdx .column_ld4: diff --git a/simd/x86_64/jcgryext-sse2.asm b/simd/x86_64/jcgryext-sse2.asm index 70e6891..873be80 100644 --- a/simd/x86_64/jcgryext-sse2.asm +++ b/simd/x86_64/jcgryext-sse2.asm @@ -85,12 +85,12 @@ EXTN(jsimd_rgb_gray_convert_sse2): test cl, SIZEOF_BYTE jz short .column_ld2 sub rcx, byte SIZEOF_BYTE - movzx rax, BYTE [rsi+rcx] + movzx rax, byte [rsi+rcx] .column_ld2: test cl, SIZEOF_WORD jz short .column_ld4 sub rcx, byte SIZEOF_WORD - movzx rdx, WORD [rsi+rcx] + movzx rdx, word [rsi+rcx] shl rax, WORD_BIT or rax, rdx .column_ld4: diff --git a/simd/x86_64/jchuff-sse2.asm b/simd/x86_64/jchuff-sse2.asm index d49be5b..aa78fd5 100644 --- a/simd/x86_64/jchuff-sse2.asm +++ b/simd/x86_64/jchuff-sse2.asm @@ -198,7 +198,7 @@ EXTN(jsimd_huff_encode_one_block_sse2): mov buffer, r11 ; r11 is now sratch mov put_buffer, MMWORD [r10+16] ; put_buffer = state->cur.put_buffer; - mov put_bits, DWORD [r10+24] ; put_bits = state->cur.put_bits; + mov put_bits, dword [r10+24] ; put_bits = state->cur.put_bits; push r10 ; r10 is now scratch ; Encode the DC coefficient difference per section F.1.2.1 @@ -331,7 +331,7 @@ EXTN(jsimd_huff_encode_one_block_sse2): pop r10 ; Save put_buffer & put_bits mov MMWORD [r10+16], put_buffer ; state->cur.put_buffer = put_buffer; - mov DWORD [r10+24], put_bits ; state->cur.put_bits = put_bits; + mov dword [r10+24], put_bits ; state->cur.put_bits = put_bits; pop rbx uncollect_args 6 diff --git a/simd/x86_64/jdcolext-avx2.asm b/simd/x86_64/jdcolext-avx2.asm index 4b43baa..677b8ed 100644 --- a/simd/x86_64/jdcolext-avx2.asm +++ b/simd/x86_64/jdcolext-avx2.asm @@ -332,7 +332,7 @@ EXTN(jsimd_ycc_rgb_convert_avx2): vmovd eax, xmmA cmp rcx, byte SIZEOF_WORD jb short .column_st1 - mov WORD [rdi], ax + mov word [rdi], ax add rdi, byte SIZEOF_WORD sub rcx, byte SIZEOF_WORD shr rax, 16 @@ -341,7 +341,7 @@ EXTN(jsimd_ycc_rgb_convert_avx2): ; space. test rcx, rcx jz short .nextrow - mov BYTE [rdi], al + mov byte [rdi], al %else ; RGB_PIXELSIZE == 4 ; ----------- diff --git a/simd/x86_64/jdcolext-sse2.asm b/simd/x86_64/jdcolext-sse2.asm index e5bd0ee..071aa62 100644 --- a/simd/x86_64/jdcolext-sse2.asm +++ b/simd/x86_64/jdcolext-sse2.asm @@ -304,7 +304,7 @@ EXTN(jsimd_ycc_rgb_convert_sse2): movd eax, xmmA cmp rcx, byte SIZEOF_WORD jb short .column_st1 - mov WORD [rdi], ax + mov word [rdi], ax add rdi, byte SIZEOF_WORD sub rcx, byte SIZEOF_WORD shr rax, 16 @@ -313,7 +313,7 @@ EXTN(jsimd_ycc_rgb_convert_sse2): ; space. test rcx, rcx jz short .nextrow - mov BYTE [rdi], al + mov byte [rdi], al %else ; RGB_PIXELSIZE == 4 ; ----------- diff --git a/simd/x86_64/jdmrgext-avx2.asm b/simd/x86_64/jdmrgext-avx2.asm index 666d2ca..bb733c5 100644 --- a/simd/x86_64/jdmrgext-avx2.asm +++ b/simd/x86_64/jdmrgext-avx2.asm @@ -337,7 +337,7 @@ EXTN(jsimd_h2v1_merged_upsample_avx2): vmovd eax, xmmA cmp rcx, byte SIZEOF_WORD jb short .column_st1 - mov WORD [rdi], ax + mov word [rdi], ax add rdi, byte SIZEOF_WORD sub rcx, byte SIZEOF_WORD shr rax, 16 @@ -346,7 +346,7 @@ EXTN(jsimd_h2v1_merged_upsample_avx2): ; space. test rcx, rcx jz short .endcolumn - mov BYTE [rdi], al + mov byte [rdi], al %else ; RGB_PIXELSIZE == 4 ; ----------- diff --git a/simd/x86_64/jdmrgext-sse2.asm b/simd/x86_64/jdmrgext-sse2.asm index 4fa69af..b176a4c 100644 --- a/simd/x86_64/jdmrgext-sse2.asm +++ b/simd/x86_64/jdmrgext-sse2.asm @@ -308,7 +308,7 @@ EXTN(jsimd_h2v1_merged_upsample_sse2): movd eax, xmmA cmp rcx, byte SIZEOF_WORD jb short .column_st1 - mov WORD [rdi], ax + mov word [rdi], ax add rdi, byte SIZEOF_WORD sub rcx, byte SIZEOF_WORD shr rax, 16 @@ -317,7 +317,7 @@ EXTN(jsimd_h2v1_merged_upsample_sse2): ; space. test rcx, rcx jz short .endcolumn - mov BYTE [rdi], al + mov byte [rdi], al %else ; RGB_PIXELSIZE == 4 ; ----------- diff --git a/simd/x86_64/jidctflt-sse2.asm b/simd/x86_64/jidctflt-sse2.asm index 95aff82..ab95e1a 100644 --- a/simd/x86_64/jidctflt-sse2.asm +++ b/simd/x86_64/jidctflt-sse2.asm @@ -93,8 +93,8 @@ EXTN(jsimd_idct_float_sse2): mov rcx, DCTSIZE/4 ; ctr .columnloop: %ifndef NO_ZERO_COLUMN_TEST_FLOAT_SSE - mov eax, DWORD [DWBLOCK(1,0,rsi,SIZEOF_JCOEF)] - or eax, DWORD [DWBLOCK(2,0,rsi,SIZEOF_JCOEF)] + mov eax, dword [DWBLOCK(1,0,rsi,SIZEOF_JCOEF)] + or eax, dword [DWBLOCK(2,0,rsi,SIZEOF_JCOEF)] jnz near .columnDCT movq xmm1, XMM_MMWORD [MMBLOCK(1,0,rsi,SIZEOF_JCOEF)] diff --git a/simd/x86_64/jidctfst-sse2.asm b/simd/x86_64/jidctfst-sse2.asm index 03ca13f..a66a681 100644 --- a/simd/x86_64/jidctfst-sse2.asm +++ b/simd/x86_64/jidctfst-sse2.asm @@ -109,8 +109,8 @@ EXTN(jsimd_idct_ifast_sse2): mov rsi, r11 ; inptr %ifndef NO_ZERO_COLUMN_TEST_IFAST_SSE2 - mov eax, DWORD [DWBLOCK(1,0,rsi,SIZEOF_JCOEF)] - or eax, DWORD [DWBLOCK(2,0,rsi,SIZEOF_JCOEF)] + mov eax, dword [DWBLOCK(1,0,rsi,SIZEOF_JCOEF)] + or eax, dword [DWBLOCK(2,0,rsi,SIZEOF_JCOEF)] jnz near .columnDCT movdqa xmm0, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_JCOEF)] diff --git a/simd/x86_64/jidctint-avx2.asm b/simd/x86_64/jidctint-avx2.asm index b36cb31..50270f4 100644 --- a/simd/x86_64/jidctint-avx2.asm +++ b/simd/x86_64/jidctint-avx2.asm @@ -290,8 +290,8 @@ EXTN(jsimd_idct_islow_avx2): ; ---- Pass 1: process columns. %ifndef NO_ZERO_COLUMN_TEST_ISLOW_AVX2 - mov eax, DWORD [DWBLOCK(1,0,r11,SIZEOF_JCOEF)] - or eax, DWORD [DWBLOCK(2,0,r11,SIZEOF_JCOEF)] + mov eax, dword [DWBLOCK(1,0,r11,SIZEOF_JCOEF)] + or eax, dword [DWBLOCK(2,0,r11,SIZEOF_JCOEF)] jnz near .columnDCT movdqa xmm0, XMMWORD [XMMBLOCK(1,0,r11,SIZEOF_JCOEF)] diff --git a/simd/x86_64/jidctint-sse2.asm b/simd/x86_64/jidctint-sse2.asm index 0f52a6e..034530c 100644 --- a/simd/x86_64/jidctint-sse2.asm +++ b/simd/x86_64/jidctint-sse2.asm @@ -122,8 +122,8 @@ EXTN(jsimd_idct_islow_sse2): mov rsi, r11 ; inptr %ifndef NO_ZERO_COLUMN_TEST_ISLOW_SSE2 - mov eax, DWORD [DWBLOCK(1,0,rsi,SIZEOF_JCOEF)] - or eax, DWORD [DWBLOCK(2,0,rsi,SIZEOF_JCOEF)] + mov eax, dword [DWBLOCK(1,0,rsi,SIZEOF_JCOEF)] + or eax, dword [DWBLOCK(2,0,rsi,SIZEOF_JCOEF)] jnz near .columnDCT movdqa xmm0, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_JCOEF)] diff --git a/simd/x86_64/jidctred-sse2.asm b/simd/x86_64/jidctred-sse2.asm index 133a59a..7fbfcc5 100644 --- a/simd/x86_64/jidctred-sse2.asm +++ b/simd/x86_64/jidctred-sse2.asm @@ -130,8 +130,8 @@ EXTN(jsimd_idct_4x4_sse2): mov rsi, r11 ; inptr %ifndef NO_ZERO_COLUMN_TEST_4X4_SSE2 - mov eax, DWORD [DWBLOCK(1,0,rsi,SIZEOF_JCOEF)] - or eax, DWORD [DWBLOCK(2,0,rsi,SIZEOF_JCOEF)] + mov eax, dword [DWBLOCK(1,0,rsi,SIZEOF_JCOEF)] + or eax, dword [DWBLOCK(2,0,rsi,SIZEOF_JCOEF)] jnz short .columnDCT movdqa xmm0, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_JCOEF)] @@ -560,8 +560,8 @@ EXTN(jsimd_idct_2x2_sse2): mov rdx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW] mov rsi, JSAMPROW [rdi+1*SIZEOF_JSAMPROW] - mov WORD [rdx+rax*SIZEOF_JSAMPLE], bx - mov WORD [rsi+rax*SIZEOF_JSAMPLE], cx + mov word [rdx+rax*SIZEOF_JSAMPLE], bx + mov word [rsi+rax*SIZEOF_JSAMPLE], cx pop rbx uncollect_args 4