From 5ef463056ae22f24c3915ba7ab03eefd5bb6fde7 Mon Sep 17 00:00:00 2001 From: DRC Date: Sun, 18 May 2014 20:04:47 +0000 Subject: [PATCH] SIMD-accelerated int upsample routine for MIPS DSPr2 git-svn-id: svn+ssh://svn.code.sf.net/p/libjpeg-turbo/code/trunk@1315 632fc199-4ca6-4c93-a231-07263d6284db --- jccolor.c | 1 + jcsample.c | 1 + jddctmgr.c | 1 + jdsample.c | 8 +++- jsimd.h | 4 ++ jsimd_none.c | 12 ++++++ simd/jsimd.h | 6 +++ simd/jsimd_mips.c | 49 +++++++++++++++++++++++ simd/jsimd_mips_dspr2.S | 88 +++++++++++++++++++++++++++++++++++++++++ 9 files changed, 169 insertions(+), 1 deletion(-) diff --git a/jccolor.c b/jccolor.c index 0af1224..4be75f7 100644 --- a/jccolor.c +++ b/jccolor.c @@ -6,6 +6,7 @@ * libjpeg-turbo Modifications: * Copyright 2009 Pierre Ossman for Cendio AB * Copyright (C) 2009-2012, D. R. Commander. + * Copyright (C) 2014, MIPS Technologies, Inc., California * For conditions of distribution and use, see the accompanying README file. * * This file contains input colorspace conversion routines. diff --git a/jcsample.c b/jcsample.c index d8c376b..286f758 100644 --- a/jcsample.c +++ b/jcsample.c @@ -5,6 +5,7 @@ * Copyright (C) 1991-1996, Thomas G. Lane. * libjpeg-turbo Modifications: * Copyright 2009 Pierre Ossman for Cendio AB + * Copyright (C) 2014, MIPS Technologies, Inc., California * For conditions of distribution and use, see the accompanying README file. * * This file contains downsampling routines. diff --git a/jddctmgr.c b/jddctmgr.c index 93d5009..40e68f1 100644 --- a/jddctmgr.c +++ b/jddctmgr.c @@ -7,6 +7,7 @@ * libjpeg-turbo Modifications: * Copyright 2009 Pierre Ossman for Cendio AB * Copyright (C) 2010, D. R. Commander. + * Copyright (C) 2013, MIPS Technologies, Inc., California * For conditions of distribution and use, see the accompanying README file. * * This file contains the inverse-DCT management logic. diff --git a/jdsample.c b/jdsample.c index 66f2c04..2752966 100644 --- a/jdsample.c +++ b/jdsample.c @@ -6,6 +6,7 @@ * libjpeg-turbo Modifications: * Copyright 2009 Pierre Ossman for Cendio AB * Copyright (C) 2010, D. R. Commander. + * Copyright (C) 2014, MIPS Technologies, Inc., California * For conditions of distribution and use, see the accompanying README file. * * This file contains upsampling routines. @@ -482,7 +483,12 @@ jinit_upsampler (j_decompress_ptr cinfo) } else if ((h_out_group % h_in_group) == 0 && (v_out_group % v_in_group) == 0) { /* Generic integral-factors upsampling method */ - upsample->methods[ci] = int_upsample; +#if defined(__mips__) + if (jsimd_can_int_upsample()) + upsample->methods[ci] = jsimd_int_upsample; + else +#endif + upsample->methods[ci] = int_upsample; upsample->h_expand[ci] = (UINT8) (h_out_group / h_in_group); upsample->v_expand[ci] = (UINT8) (v_out_group / v_in_group); } else diff --git a/jsimd.h b/jsimd.h index 663c969..d45fd70 100644 --- a/jsimd.h +++ b/jsimd.h @@ -47,6 +47,7 @@ EXTERN(void) jsimd_h2v1_downsample EXTERN(int) jsimd_can_h2v2_upsample (void); EXTERN(int) jsimd_can_h2v1_upsample (void); +EXTERN(int) jsimd_can_int_upsample (void); EXTERN(void) jsimd_h2v2_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr, @@ -54,6 +55,9 @@ EXTERN(void) jsimd_h2v2_upsample EXTERN(void) jsimd_h2v1_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr, JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr); +EXTERN(void) jsimd_int_upsample + (j_decompress_ptr cinfo, jpeg_component_info * compptr, + JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr); EXTERN(int) jsimd_can_h2v2_fancy_upsample (void); EXTERN(int) jsimd_can_h2v1_fancy_upsample (void); diff --git a/jsimd_none.c b/jsimd_none.c index bdba693..96a9842 100644 --- a/jsimd_none.c +++ b/jsimd_none.c @@ -118,6 +118,18 @@ jsimd_can_h2v1_upsample (void) return 0; } +GLOBAL(int) +jsimd_can_int_upsample (void) +{ + return 0; +} + +GLOBAL(void) +jsimd_int_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr, + JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr) +{ +} + GLOBAL(void) jsimd_h2v2_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr, diff --git a/simd/jsimd.h b/simd/jsimd.h index 776edbb..609b91f 100644 --- a/simd/jsimd.h +++ b/simd/jsimd.h @@ -339,6 +339,12 @@ EXTERN(void) jsimd_h2v2_upsample_mips_dspr2 (int max_v_samp_factor, JDIMENSION output_width, JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr); +EXTERN(void) jsimd_int_upsample_mips_dspr2 + (UINT8 h_expand, UINT8 v_expand, JSAMPARRAY input_data, + JSAMPARRAY * output_data_ptr, JDIMENSION output_width, + int max_v_samp_factor); + + /* Fancy Upsampling */ EXTERN(void) jsimd_h2v1_fancy_upsample_mmx (int max_v_samp_factor, JDIMENSION downsampled_width, diff --git a/simd/jsimd_mips.c b/simd/jsimd_mips.c index ae9d41e..2a1059e 100644 --- a/simd/jsimd_mips.c +++ b/simd/jsimd_mips.c @@ -85,6 +85,25 @@ static const int mips_idct_ifast_coefs[4] = { 0xAC60AC60 // FIX(-2.613125930 / 4) = -21407 = 0xAC61 }; +/* The following struct is borrowed from jdsample.c */ +typedef void (*upsample1_ptr) (j_decompress_ptr cinfo, + jpeg_component_info * compptr, + JSAMPARRAY input_data, + JSAMPARRAY * output_data_ptr); + +typedef struct { + struct jpeg_upsampler pub; + JSAMPARRAY color_buf[MAX_COMPONENTS]; + upsample1_ptr methods[MAX_COMPONENTS]; + int next_row_out; + JDIMENSION rows_to_go; + int rowgroup_height[MAX_COMPONENTS]; + UINT8 h_expand[MAX_COMPONENTS]; + UINT8 v_expand[MAX_COMPONENTS]; +} my_upsampler; + +typedef my_upsampler * my_upsample_ptr; + GLOBAL(int) jsimd_can_rgb_ycc (void) { @@ -415,6 +434,23 @@ jsimd_can_h2v1_upsample (void) return 0; } +GLOBAL(int) +jsimd_can_int_upsample (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_MIPS_DSPR2) + return 1; + + return 0; +} + GLOBAL(void) jsimd_h2v2_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr, @@ -439,6 +475,19 @@ jsimd_h2v1_upsample (j_decompress_ptr cinfo, output_data_ptr); } +GLOBAL(void) +jsimd_int_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr, + JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr) +{ + my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample; + + jsimd_int_upsample_mips_dspr2(upsample->h_expand[compptr->component_index], + upsample->v_expand[compptr->component_index], + input_data, output_data_ptr, + cinfo->output_width, + cinfo->max_v_samp_factor); +} + GLOBAL(int) jsimd_can_h2v2_fancy_upsample (void) { diff --git a/simd/jsimd_mips_dspr2.S b/simd/jsimd_mips_dspr2.S index 532966a..4572a51 100644 --- a/simd/jsimd_mips_dspr2.S +++ b/simd/jsimd_mips_dspr2.S @@ -1613,6 +1613,94 @@ LEAF_MIPS_DSPR2(jsimd_h2v2_smooth_downsample_mips_dspr2) nop END(jsimd_h2v2_smooth_downsample_mips_dspr2) + +/*****************************************************************************/ +LEAF_MIPS_DSPR2(jsimd_int_upsample_mips_dspr2) +/* + * a0 - upsample->h_expand[compptr->component_index] + * a1 - upsample->v_expand[compptr->component_index] + * a2 - input_data + * a3 - output_data_ptr + * 16(sp) - cinfo->output_width + * 20(sp) - cinfo->max_v_samp_factor + */ + .set at + + SAVE_REGS_ON_STACK 16, s0, s1, s2, s3 + + lw s0, 0(a3) // s0 = output_data + lw s1, 32(sp) // s1 = cinfo->output_width + lw s2, 36(sp) // s2 = cinfo->max_v_samp_factor + li t6, 0 // t6 = inrow + beqz s2, 10f + li s3, 0 // s3 = outrow +0: + addu t0, a2, t6 + addu t7, s0, s3 + lw t3, 0(t0) // t3 = inptr + lw t8, 0(t7) // t8 = outptr + beqz s1, 4f + addu t5, t8, s1 // t5 = outend +1: + lb t2, 0(t3) // t2 = invalue = *inptr++ + addiu t3, 1 + beqz a0, 3f + move t0, a0 // t0 = h_expand +2: + sb t2, 0(t8) + addiu t0, -1 + bgtz t0, 2b + addiu t8, 1 +3: + bgt t5, t8, 1b + nop +4: + addiu t9, a1, -1 // t9 = v_expand - 1 + blez t9, 9f + nop +5: + lw t3, 0(s0) + lw t4, 4(s0) + subu t0, s1, 0xF + blez t0, 7f + addu t5, t3, s1 // t5 = end address + andi t7, s1, 0xF // t7 = residual + subu t8, t5, t7 +6: + ulw t0, 0(t3) + ulw t1, 4(t3) + ulw t2, 8(t3) + usw t0, 0(t4) + ulw t0, 12(t3) + usw t1, 4(t4) + usw t2, 8(t4) + usw t0, 12(t4) + addiu t3, 16 + bne t3, t8, 6b + addiu t4, 16 + beqz t7, 8f + nop +7: + lbu t0, 0(t3) + sb t0, 0(t4) + addiu t3, 1 + bne t3, t5, 7b + addiu t4, 1 +8: + addiu t9, -1 + bgtz t9, 5b + addiu s0, 8 +9: + addu s3, s3, a1 + bne s3, s2, 0b + addiu t6, 1 +10: + RESTORE_REGS_FROM_STACK 16, s0, s1, s2, s3 + + j ra + nop +END(jsimd_int_upsample_mips_dspr2) + /*****************************************************************************/ LEAF_MIPS_DSPR2(jsimd_h2v1_upsample_mips_dspr2) /* -- 2.40.0