From 49eaa7572d490471e0417c10a79fea086d956f62 Mon Sep 17 00:00:00 2001 From: DRC Date: Fri, 27 Sep 2013 17:39:57 +0000 Subject: [PATCH] SIMD-optimized RGB-to-grayscale conversion for MIPS DSPr2 git-svn-id: svn+ssh://svn.code.sf.net/p/libjpeg-turbo/code/trunk@1045 632fc199-4ca6-4c93-a231-07263d6284db --- ChangeLog.txt | 2 +- simd/jsimd.h | 31 +++++++++- simd/jsimd_mips.c | 48 ++++++++++++++- simd/jsimd_mips_dspr2.S | 128 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 206 insertions(+), 3 deletions(-) diff --git a/ChangeLog.txt b/ChangeLog.txt index fde8c49..c8103e0 100644 --- a/ChangeLog.txt +++ b/ChangeLog.txt @@ -8,7 +8,7 @@ extended to support image scaling. [2] Added SIMD acceleration for performing color conversion, downsampling, and upsampling on DSPr2-capable MIPS platforms. This speeds up the compression -of full-color JPEGs by 6-18% on such platforms and decompression by 3-12%. +of full-color JPEGs by 6-21% on such platforms and decompression by 6-17%. [3] Added support for 4:1:1 subsampling to the TurboJPEG API. This is mainly included for compatibility, since 4:1:1 is not fully accelerated in diff --git a/simd/jsimd.h b/simd/jsimd.h index 907e852..807fd7f 100644 --- a/simd/jsimd.h +++ b/simd/jsimd.h @@ -4,7 +4,7 @@ * Copyright 2009 Pierre Ossman for Cendio AB * Copyright 2011 D. R. Commander * Copyright (C) 2013, MIPS Technologies, Inc., California - * + * * Based on the x86 SIMD extension for IJG JPEG library, * Copyright (C) 1999-2006, MIYASAKA Masaru. * For conditions of distribution and use, see copyright notice in jsimdext.inc @@ -417,6 +417,35 @@ EXTERN(void) jsimd_extxrgb_ycc_convert_mips_dspr2 JSAMPARRAY input_buf, JSAMPIMAGE output_buf, JDIMENSION output_row, int num_rows)); +EXTERN(void) jsimd_rgb_gray_convert_mips_dspr2 + JPP((JDIMENSION img_width, + JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows)); +EXTERN(void) jsimd_extrgb_gray_convert_mips_dspr2 + JPP((JDIMENSION img_width, + JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows)); +EXTERN(void) jsimd_extrgbx_gray_convert_mips_dspr2 + JPP((JDIMENSION img_width, + JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows)); +EXTERN(void) jsimd_extbgr_gray_convert_mips_dspr2 + JPP((JDIMENSION img_width, + JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows)); +EXTERN(void) jsimd_extbgrx_gray_convert_mips_dspr2 + JPP((JDIMENSION img_width, + JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows)); +EXTERN(void) jsimd_extxbgr_gray_convert_mips_dspr2 + JPP((JDIMENSION img_width, + JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows)); +EXTERN(void) jsimd_extxrgb_gray_convert_mips_dspr2 + JPP((JDIMENSION img_width, + JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows)); + EXTERN (void) jsimd_ycc_rgb_convert_mips_dspr2 JPP((JDIMENSION img_width, JSAMPIMAGE input_buf, JDIMENSION input_row, diff --git a/simd/jsimd_mips.c b/simd/jsimd_mips.c index e0cefb0..63edcf6 100644 --- a/simd/jsimd_mips.c +++ b/simd/jsimd_mips.c @@ -4,7 +4,7 @@ * Copyright 2009 Pierre Ossman for Cendio AB * Copyright 2009-2011 D. R. Commander * Copyright (C) 2013, MIPS Technologies, Inc., California - * + * * Based on the x86 SIMD extension for IJG JPEG library, * Copyright (C) 1999-2006, MIYASAKA Masaru. * For conditions of distribution and use, see copyright notice in jsimdext.inc @@ -100,6 +100,18 @@ jsimd_can_rgb_ycc (void) GLOBAL(int) jsimd_can_rgb_gray (void) { + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) + return 0; + if (simd_support & JSIMD_MIPS_DSPR2) + return 1; + return 0; } @@ -167,6 +179,40 @@ jsimd_rgb_gray_convert (j_compress_ptr cinfo, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, JDIMENSION output_row, int num_rows) { + void (*mipsdspr2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); + switch(cinfo->in_color_space) + { + case JCS_EXT_RGB: + mipsdspr2fct=jsimd_extrgb_gray_convert_mips_dspr2; + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + mipsdspr2fct=jsimd_extrgbx_gray_convert_mips_dspr2; + break; + case JCS_EXT_BGR: + mipsdspr2fct=jsimd_extbgr_gray_convert_mips_dspr2; + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + mipsdspr2fct=jsimd_extbgrx_gray_convert_mips_dspr2; + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + mipsdspr2fct=jsimd_extxbgr_gray_convert_mips_dspr2; + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + mipsdspr2fct=jsimd_extxrgb_gray_convert_mips_dspr2; + break; + default: + mipsdspr2fct=jsimd_extrgb_gray_convert_mips_dspr2; + break; + } + + if (simd_support & JSIMD_MIPS_DSPR2) + mipsdspr2fct(cinfo->image_width, input_buf, + output_buf, output_row, num_rows); + } GLOBAL(void) diff --git a/simd/jsimd_mips_dspr2.S b/simd/jsimd_mips_dspr2.S index b7136b5..55635b5 100644 --- a/simd/jsimd_mips_dspr2.S +++ b/simd/jsimd_mips_dspr2.S @@ -247,6 +247,134 @@ GENERATE_JSIMD_YCC_RGB_CONVERT_MIPS_DSPR2 extbgrx, 4, 2, 1, 0, 3 GENERATE_JSIMD_YCC_RGB_CONVERT_MIPS_DSPR2 extxbgr, 4, 3, 2, 1, 0 GENERATE_JSIMD_YCC_RGB_CONVERT_MIPS_DSPR2 extxrgb, 4, 1, 2, 3, 0 +/*****************************************************************************/ +/* + * jsimd_extrgb_gray_convert_mips_dspr2 + * jsimd_extbgr_gray_convert_mips_dspr2 + * jsimd_extrgbx_gray_convert_mips_dspr2 + * jsimd_extbgrx_gray_convert_mips_dspr2 + * jsimd_extxbgr_gray_convert_mips_dspr2 + * jsimd_extxrgb_gray_convert_mips_dspr2 + * + * Colorspace conversion RGB -> GRAY + */ + +.macro GENERATE_JSIMD_RGB_GRAY_CONVERT_MIPS_DSPR2 colorid, pixel_size, r_offs, g_offs, b_offs + +.macro DO_RGB_TO_GRAY r, \ + g, \ + b, \ + inptr + lbu \r, \r_offs(\inptr) + lbu \g, \g_offs(\inptr) + lbu \b, \b_offs(\inptr) + addiu \inptr, \pixel_size +.endm + +LEAF_MIPS_DSPR2(jsimd_\colorid\()_gray_convert_mips_dspr2) +/* + * a0 - cinfo->image_width + * a1 - input_buf + * a2 - output_buf + * a3 - output_row + * 16(sp) - num_rows + */ + + SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7 + + li s0, 0x4c8b // s0 = FIX(0.29900) + li s1, 0x9646 // s1 = FIX(0.58700) + li s2, 0x1d2f // s2 = FIX(0.11400) + li s7, 0x8000 // s7 = FIX(0.50000) + lw s6, 48(sp) + andi t7, a0, 3 + +0: + addiu s6, -1 // s6 = num_rows + lw t0, 0(a1) + lw t1, 0(a2) + sll t3, a3, 2 + lwx t1, t3(t1) + addiu a3, 1 + addu t9, t1, a0 + subu t8, t9, t7 + beq t1, t8, 2f + nop + +1: + DO_RGB_TO_GRAY t3, t4, t5, t0 + DO_RGB_TO_GRAY s3, s4, s5, t0 + + mtlo s7, $ac0 + maddu $ac0, s2, t5 + maddu $ac0, s1, t4 + maddu $ac0, s0, t3 + mtlo s7, $ac1 + maddu $ac1, s2, s5 + maddu $ac1, s1, s4 + maddu $ac1, s0, s3 + extr.w t6, $ac0, 16 + + DO_RGB_TO_GRAY t3, t4, t5, t0 + DO_RGB_TO_GRAY s3, s4, s5, t0 + + mtlo s7, $ac0 + maddu $ac0, s2, t5 + maddu $ac0, s1, t4 + extr.w t2, $ac1, 16 + maddu $ac0, s0, t3 + mtlo s7, $ac1 + maddu $ac1, s2, s5 + maddu $ac1, s1, s4 + maddu $ac1, s0, s3 + extr.w t5, $ac0, 16 + sb t6, 0(t1) + sb t2, 1(t1) + extr.w t3, $ac1, 16 + addiu t1, 4 + sb t5, -2(t1) + sb t3, -1(t1) + bne t1, t8, 1b + nop + +2: + beqz t7, 4f + nop + +3: + DO_RGB_TO_GRAY t3, t4, t5, t0 + + mtlo s7, $ac0 + maddu $ac0, s2, t5 + maddu $ac0, s1, t4 + maddu $ac0, s0, t3 + extr.w t6, $ac0, 16 + sb t6, 0(t1) + addiu t1, 1 + bne t1, t9, 3b + nop + +4: + bgtz s6, 0b + addiu a1, 4 + + RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7 + + j ra + nop +END(jsimd_\colorid\()_gray_convert_mips_dspr2) + +.purgem DO_RGB_TO_GRAY + +.endm + +/*------------------------------------------id -- pix R G B */ +GENERATE_JSIMD_RGB_GRAY_CONVERT_MIPS_DSPR2 extrgb, 3, 0, 1, 2 +GENERATE_JSIMD_RGB_GRAY_CONVERT_MIPS_DSPR2 extbgr, 3, 2, 1, 0 +GENERATE_JSIMD_RGB_GRAY_CONVERT_MIPS_DSPR2 extrgbx, 4, 0, 1, 2 +GENERATE_JSIMD_RGB_GRAY_CONVERT_MIPS_DSPR2 extbgrx, 4, 2, 1, 0 +GENERATE_JSIMD_RGB_GRAY_CONVERT_MIPS_DSPR2 extxbgr, 4, 3, 2, 1 +GENERATE_JSIMD_RGB_GRAY_CONVERT_MIPS_DSPR2 extxrgb, 4, 1, 2, 3 /*****************************************************************************/ /* * jsimd_h2v2_fancy_upsample_mips_dspr2 -- 2.40.0