From 795e6ad334d86feb4ba8e509dd126a995a8f9972 Mon Sep 17 00:00:00 2001 From: DRC Date: Thu, 1 Dec 2011 11:14:18 +0000 Subject: [PATCH] Fixed non-fatal out-of-bounds read in SSE2 SIMD code reported by valgrind when decompressing a JPEG image to a bitmap buffer whose size was not a multiple of 16 bytes. git-svn-id: svn+ssh://svn.code.sf.net/p/libjpeg-turbo/code/branches/1.0.x@729 632fc199-4ca6-4c93-a231-07263d6284db --- ChangeLog.txt | 6 +++++ simd/jdclrss2-64.asm | 53 ++++++++++++++++++++++++++++++++++++++++++++ simd/jdclrss2.asm | 53 ++++++++++++++++++++++++++++++++++++++++++++ simd/jdmrgss2-64.asm | 53 ++++++++++++++++++++++++++++++++++++++++++++ simd/jdmrgss2.asm | 53 ++++++++++++++++++++++++++++++++++++++++++++ simd/jsimdext.inc | 2 ++ 6 files changed, 220 insertions(+) diff --git a/ChangeLog.txt b/ChangeLog.txt index a10f8e3..d482563 100644 --- a/ChangeLog.txt +++ b/ChangeLog.txt @@ -45,6 +45,12 @@ occurred when the application was invoked using I/O redirection fail to compile if the Windows system headers were included before jpeglib.h. This issue was caused by a conflict in the definition of the INT32 type. +[9] Fixed out-of-bounds read in SSE2 SIMD code that occurred when decompressing +a JPEG image to a bitmap buffer whose size was not a multiple of 16 bytes. +This was more of an annoyance than an actual bug, since it did not cause any +actual run-time problems, but the issue showed up when running libjpeg-turbo in +valgrind. See http://crbug.com/72399 for more information. + 1.0.1 ===== diff --git a/simd/jdclrss2-64.asm b/simd/jdclrss2-64.asm index 4282bd2..0acf188 100644 --- a/simd/jdclrss2-64.asm +++ b/simd/jdclrss2-64.asm @@ -292,6 +292,41 @@ EXTN(jsimd_ycc_rgb_convert_sse2): movdqa xmmA,xmmD sub rcx, byte SIZEOF_XMMWORD .column_st15: +%ifdef STRICT_MEMORY_ACCESS + ; Store the lower 8 bytes of xmmA to the output when it has enough + ; space. + cmp rcx, byte SIZEOF_MMWORD + jb short .column_st7 + movq MMWORD [rdi], xmmA + add rdi, byte SIZEOF_MMWORD + sub rcx, byte SIZEOF_MMWORD + psrldq xmmA, SIZEOF_MMWORD +.column_st7: + ; Store the lower 4 bytes of xmmA to the output when it has enough + ; space. + cmp rcx, byte SIZEOF_DWORD + jb short .column_st3 + movd DWORD [rdi], xmmA + add rdi, byte SIZEOF_DWORD + sub rcx, byte SIZEOF_DWORD + psrldq xmmA, SIZEOF_DWORD +.column_st3: + ; Store the lower 2 bytes of rax to the output when it has enough + ; space. + movd eax, xmmA + cmp rcx, byte SIZEOF_WORD + jb short .column_st1 + mov WORD [rdi], ax + add rdi, byte SIZEOF_WORD + sub rcx, byte SIZEOF_WORD + shr rax, 16 +.column_st1: + ; Store the lower 1 byte of rax to the output when it has enough + ; space. + test rcx, rcx + jz short .nextrow + mov BYTE [rdi], al +%else mov rax,rcx xor rcx, byte 0x0F shl rcx, 2 @@ -331,6 +366,7 @@ EXTN(jsimd_ycc_rgb_convert_sse2): por xmmE,xmmC .adj0: ; ---------------- maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [rdi], xmmA +%endif ; STRICT_MEMORY_ACCESS ; --------------- %else ; RGB_PIXELSIZE == 4 ; ----------- @@ -415,6 +451,22 @@ EXTN(jsimd_ycc_rgb_convert_sse2): movdqa xmmA,xmmD sub rcx, byte SIZEOF_XMMWORD/4 .column_st15: +%ifdef STRICT_MEMORY_ACCESS + ; Store two pixels (8 bytes) of xmmA to the output when it has enough + ; space. + cmp rcx, byte SIZEOF_XMMWORD/8 + jb short .column_st7 + movq MMWORD [rdi], xmmA + add rdi, byte SIZEOF_XMMWORD/8*4 + sub rcx, byte SIZEOF_XMMWORD/8 + psrldq xmmA, SIZEOF_XMMWORD/8*4 +.column_st7: + ; Store one pixel (4 bytes) of xmmA to the output when it has enough + ; space. + test rcx, rcx + jz short .nextrow + movd DWORD [rdi], xmmA +%else cmp rcx, byte SIZEOF_XMMWORD/16 jb near .nextrow mov rax,rcx @@ -454,6 +506,7 @@ EXTN(jsimd_ycc_rgb_convert_sse2): por xmmE,xmmG .adj0: ; ---------------- maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [rdi], xmmA +%endif ; STRICT_MEMORY_ACCESS ; --------------- %endif ; RGB_PIXELSIZE ; --------------- diff --git a/simd/jdclrss2.asm b/simd/jdclrss2.asm index 865fa82..71547ba 100644 --- a/simd/jdclrss2.asm +++ b/simd/jdclrss2.asm @@ -304,6 +304,41 @@ EXTN(jsimd_ycc_rgb_convert_sse2): movdqa xmmA,xmmD sub ecx, byte SIZEOF_XMMWORD .column_st15: +%ifdef STRICT_MEMORY_ACCESS + ; Store the lower 8 bytes of xmmA to the output when it has enough + ; space. + cmp ecx, byte SIZEOF_MMWORD + jb short .column_st7 + movq MMWORD [edi], xmmA + add edi, byte SIZEOF_MMWORD + sub ecx, byte SIZEOF_MMWORD + psrldq xmmA, SIZEOF_MMWORD +.column_st7: + ; Store the lower 4 bytes of xmmA to the output when it has enough + ; space. + cmp ecx, byte SIZEOF_DWORD + jb short .column_st3 + movd DWORD [edi], xmmA + add edi, byte SIZEOF_DWORD + sub ecx, byte SIZEOF_DWORD + psrldq xmmA, SIZEOF_DWORD +.column_st3: + ; Store the lower 2 bytes of eax to the output when it has enough + ; space. + movd eax, xmmA + cmp ecx, byte SIZEOF_WORD + jb short .column_st1 + mov WORD [edi], ax + add edi, byte SIZEOF_WORD + sub ecx, byte SIZEOF_WORD + shr eax, 16 +.column_st1: + ; Store the lower 1 byte of eax to the output when it has enough + ; space. + test ecx, ecx + jz short .nextrow + mov BYTE [edi], al +%else mov eax,ecx xor ecx, byte 0x0F shl ecx, 2 @@ -343,6 +378,7 @@ EXTN(jsimd_ycc_rgb_convert_sse2): por xmmE,xmmC .adj0: ; ---------------- maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA +%endif ; STRICT_MEMORY_ACCESS ; --------------- %else ; RGB_PIXELSIZE == 4 ; ----------- @@ -428,6 +464,22 @@ EXTN(jsimd_ycc_rgb_convert_sse2): movdqa xmmA,xmmD sub ecx, byte SIZEOF_XMMWORD/4 .column_st15: +%ifdef STRICT_MEMORY_ACCESS + ; Store two pixels (8 bytes) of xmmA to the output when it has enough + ; space. + cmp ecx, byte SIZEOF_XMMWORD/8 + jb short .column_st7 + movq MMWORD [edi], xmmA + add edi, byte SIZEOF_XMMWORD/8*4 + sub ecx, byte SIZEOF_XMMWORD/8 + psrldq xmmA, SIZEOF_XMMWORD/8*4 +.column_st7: + ; Store one pixel (4 bytes) of xmmA to the output when it has enough + ; space. + test ecx, ecx + jz short .nextrow + movd DWORD [edi], xmmA +%else cmp ecx, byte SIZEOF_XMMWORD/16 jb short .nextrow mov eax,ecx @@ -467,6 +519,7 @@ EXTN(jsimd_ycc_rgb_convert_sse2): por xmmE,xmmG .adj0: ; ---------------- maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA +%endif ; STRICT_MEMORY_ACCESS ; --------------- %endif ; RGB_PIXELSIZE ; --------------- diff --git a/simd/jdmrgss2-64.asm b/simd/jdmrgss2-64.asm index 121bb82..36e2582 100644 --- a/simd/jdmrgss2-64.asm +++ b/simd/jdmrgss2-64.asm @@ -296,6 +296,41 @@ EXTN(jsimd_h2v1_merged_upsample_sse2): movdqa xmmA,xmmD sub rcx, byte SIZEOF_XMMWORD .column_st15: +%ifdef STRICT_MEMORY_ACCESS + ; Store the lower 8 bytes of xmmA to the output when it has enough + ; space. + cmp rcx, byte SIZEOF_MMWORD + jb short .column_st7 + movq MMWORD [rdi], xmmA + add rdi, byte SIZEOF_MMWORD + sub rcx, byte SIZEOF_MMWORD + psrldq xmmA, SIZEOF_MMWORD +.column_st7: + ; Store the lower 4 bytes of xmmA to the output when it has enough + ; space. + cmp rcx, byte SIZEOF_DWORD + jb short .column_st3 + movd DWORD [rdi], xmmA + add rdi, byte SIZEOF_DWORD + sub rcx, byte SIZEOF_DWORD + psrldq xmmA, SIZEOF_DWORD +.column_st3: + ; Store the lower 2 bytes of rax to the output when it has enough + ; space. + movd eax, xmmA + cmp rcx, byte SIZEOF_WORD + jb short .column_st1 + mov WORD [rdi], ax + add rdi, byte SIZEOF_WORD + sub rcx, byte SIZEOF_WORD + shr rax, 16 +.column_st1: + ; Store the lower 1 byte of rax to the output when it has enough + ; space. + test rcx, rcx + jz short .endcolumn + mov BYTE [rdi], al +%else mov rax,rcx xor rcx, byte 0x0F shl rcx, 2 @@ -335,6 +370,7 @@ EXTN(jsimd_h2v1_merged_upsample_sse2): por xmmE,xmmC .adj0: ; ---------------- maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA +%endif ; STRICT_MEMORY_ACCESS ; --------------- %else ; RGB_PIXELSIZE == 4 ; ----------- @@ -422,6 +458,22 @@ EXTN(jsimd_h2v1_merged_upsample_sse2): movdqa xmmA,xmmD sub rcx, byte SIZEOF_XMMWORD/4 .column_st15: +%ifdef STRICT_MEMORY_ACCESS + ; Store two pixels (8 bytes) of xmmA to the output when it has enough + ; space. + cmp rcx, byte SIZEOF_XMMWORD/8 + jb short .column_st7 + movq MMWORD [rdi], xmmA + add rdi, byte SIZEOF_XMMWORD/8*4 + sub rcx, byte SIZEOF_XMMWORD/8 + psrldq xmmA, SIZEOF_XMMWORD/8*4 +.column_st7: + ; Store one pixel (4 bytes) of xmmA to the output when it has enough + ; space. + test rcx, rcx + jz short .endcolumn + movd DWORD [rdi], xmmA +%else cmp rcx, byte SIZEOF_XMMWORD/16 jb near .endcolumn mov rax,rcx @@ -461,6 +513,7 @@ EXTN(jsimd_h2v1_merged_upsample_sse2): por xmmE,xmmG .adj0: ; ---------------- maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA +%endif ; STRICT_MEMORY_ACCESS ; --------------- %endif ; RGB_PIXELSIZE ; --------------- diff --git a/simd/jdmrgss2.asm b/simd/jdmrgss2.asm index 99b7eb9..6a0dbd9 100644 --- a/simd/jdmrgss2.asm +++ b/simd/jdmrgss2.asm @@ -309,6 +309,41 @@ EXTN(jsimd_h2v1_merged_upsample_sse2): movdqa xmmA,xmmD sub ecx, byte SIZEOF_XMMWORD .column_st15: +%ifdef STRICT_MEMORY_ACCESS + ; Store the lower 8 bytes of xmmA to the output when it has enough + ; space. + cmp ecx, byte SIZEOF_MMWORD + jb short .column_st7 + movq MMWORD [edi], xmmA + add edi, byte SIZEOF_MMWORD + sub ecx, byte SIZEOF_MMWORD + psrldq xmmA, SIZEOF_MMWORD +.column_st7: + ; Store the lower 4 bytes of xmmA to the output when it has enough + ; space. + cmp ecx, byte SIZEOF_DWORD + jb short .column_st3 + movd DWORD [edi], xmmA + add edi, byte SIZEOF_DWORD + sub ecx, byte SIZEOF_DWORD + psrldq xmmA, SIZEOF_DWORD +.column_st3: + ; Store the lower 2 bytes of eax to the output when it has enough + ; space. + movd eax, xmmA + cmp ecx, byte SIZEOF_WORD + jb short .column_st1 + mov WORD [edi], ax + add edi, byte SIZEOF_WORD + sub ecx, byte SIZEOF_WORD + shr eax, 16 +.column_st1: + ; Store the lower 1 byte of eax to the output when it has enough + ; space. + test ecx, ecx + jz short .endcolumn + mov BYTE [edi], al +%else mov eax,ecx xor ecx, byte 0x0F shl ecx, 2 @@ -348,6 +383,7 @@ EXTN(jsimd_h2v1_merged_upsample_sse2): por xmmE,xmmC .adj0: ; ---------------- maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA +%endif ; STRICT_MEMORY_ACCESS ; --------------- %else ; RGB_PIXELSIZE == 4 ; ----------- @@ -436,6 +472,22 @@ EXTN(jsimd_h2v1_merged_upsample_sse2): movdqa xmmA,xmmD sub ecx, byte SIZEOF_XMMWORD/4 .column_st15: +%ifdef STRICT_MEMORY_ACCESS + ; Store two pixels (8 bytes) of xmmA to the output when it has enough + ; space. + cmp ecx, byte SIZEOF_XMMWORD/8 + jb short .column_st7 + movq MMWORD [edi], xmmA + add edi, byte SIZEOF_XMMWORD/2 + sub ecx, byte SIZEOF_XMMWORD/8 + psrldq xmmA, 64 +.column_st7: + ; Store one pixel (4 bytes) of xmmA to the output when it has enough + ; space. + test ecx, ecx + jz short .endcolumn + movd DWORD [edi], xmmA +%else cmp ecx, byte SIZEOF_XMMWORD/16 jb short .endcolumn mov eax,ecx @@ -475,6 +527,7 @@ EXTN(jsimd_h2v1_merged_upsample_sse2): por xmmE,xmmG .adj0: ; ---------------- maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA +%endif ; STRICT_MEMORY_ACCESS ; --------------- %endif ; RGB_PIXELSIZE ; --------------- diff --git a/simd/jsimdext.inc b/simd/jsimdext.inc index c4297f9..12a04c2 100644 --- a/simd/jsimdext.inc +++ b/simd/jsimdext.inc @@ -76,6 +76,8 @@ section .note.GNU-stack noalloc noexec nowrite progbits %define SEG_CONST .rodata progbits alloc noexec nowrite align=16 %endif +%define STRICT_MEMORY_ACCESS 1 + ; To make the code position-independent, append -DPIC to the commandline ; %define GOT_SYMBOL _GLOBAL_OFFSET_TABLE_ ; ELF supports PIC -- 2.40.0