]> granicus.if.org Git - libjpeg-turbo/commitdiff
SIMD-accelerated int upsample routine for MIPS DSPr2
authorDRC <dcommander@users.sourceforge.net>
Sun, 18 May 2014 20:04:47 +0000 (20:04 +0000)
committerDRC <dcommander@users.sourceforge.net>
Sun, 18 May 2014 20:04:47 +0000 (20:04 +0000)
git-svn-id: svn+ssh://svn.code.sf.net/p/libjpeg-turbo/code/trunk@1315 632fc199-4ca6-4c93-a231-07263d6284db

jccolor.c
jcsample.c
jddctmgr.c
jdsample.c
jsimd.h
jsimd_none.c
simd/jsimd.h
simd/jsimd_mips.c
simd/jsimd_mips_dspr2.S

index 0af122489ee722d837630eb287b09c160dc60727..4be75f714412631bf54b8d7e157731c5d77bb037 100644 (file)
--- a/jccolor.c
+++ b/jccolor.c
@@ -6,6 +6,7 @@
  * libjpeg-turbo Modifications:
  * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
  * Copyright (C) 2009-2012, D. R. Commander.
+ * Copyright (C) 2014, MIPS Technologies, Inc., California
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains input colorspace conversion routines.
index d8c376b60bdcfb559c45e9a51deecbe733553a98..286f7585dad7cd5b637119deb3e1d3eb00b216c3 100644 (file)
@@ -5,6 +5,7 @@
  * Copyright (C) 1991-1996, Thomas G. Lane.
  * libjpeg-turbo Modifications:
  * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+ * Copyright (C) 2014, MIPS Technologies, Inc., California
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains downsampling routines.
index 93d500902c61026c3289e5276d7a9c2a17e04ddf..40e68f1f299f244eab2c5f6d061cf5b0076df258 100644 (file)
@@ -7,6 +7,7 @@
  * libjpeg-turbo Modifications:
  * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
  * Copyright (C) 2010, D. R. Commander.
+ * Copyright (C) 2013, MIPS Technologies, Inc., California
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains the inverse-DCT management logic.
index 66f2c04a409981a0a9d92dcad3e372b16838bc4c..275296670ae4bab2de65a768b6a093db8e0c71ac 100644 (file)
@@ -6,6 +6,7 @@
  * libjpeg-turbo Modifications:
  * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
  * Copyright (C) 2010, D. R. Commander.
+ * Copyright (C) 2014, MIPS Technologies, Inc., California
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains upsampling routines.
@@ -482,7 +483,12 @@ jinit_upsampler (j_decompress_ptr cinfo)
     } else if ((h_out_group % h_in_group) == 0 &&
                (v_out_group % v_in_group) == 0) {
       /* Generic integral-factors upsampling method */
-      upsample->methods[ci] = int_upsample;
+#if defined(__mips__)
+      if (jsimd_can_int_upsample())
+        upsample->methods[ci] = jsimd_int_upsample;
+      else
+#endif
+        upsample->methods[ci] = int_upsample;
       upsample->h_expand[ci] = (UINT8) (h_out_group / h_in_group);
       upsample->v_expand[ci] = (UINT8) (v_out_group / v_in_group);
     } else
diff --git a/jsimd.h b/jsimd.h
index 663c9693400b53c9cedc3f1e48642ffcc63ede13..d45fd700b60e81b13c9cbe28474b3afb2c7c8367 100644 (file)
--- a/jsimd.h
+++ b/jsimd.h
@@ -47,6 +47,7 @@ EXTERN(void) jsimd_h2v1_downsample
 
 EXTERN(int) jsimd_can_h2v2_upsample (void);
 EXTERN(int) jsimd_can_h2v1_upsample (void);
+EXTERN(int) jsimd_can_int_upsample (void);
 
 EXTERN(void) jsimd_h2v2_upsample
         (j_decompress_ptr cinfo, jpeg_component_info * compptr,
@@ -54,6 +55,9 @@ EXTERN(void) jsimd_h2v2_upsample
 EXTERN(void) jsimd_h2v1_upsample
         (j_decompress_ptr cinfo, jpeg_component_info * compptr,
          JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr);
+EXTERN(void) jsimd_int_upsample
+        (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+         JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr);
 
 EXTERN(int) jsimd_can_h2v2_fancy_upsample (void);
 EXTERN(int) jsimd_can_h2v1_fancy_upsample (void);
index bdba693083bf54afa50d0e74ad242918f49f9395..96a9842a622d646674ff7b142c1f05e70203c970 100644 (file)
@@ -118,6 +118,18 @@ jsimd_can_h2v1_upsample (void)
   return 0;
 }
 
+GLOBAL(int)
+jsimd_can_int_upsample (void)
+{
+  return 0;
+}
+
+GLOBAL(void)
+jsimd_int_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+                      JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)
+{
+}
+
 GLOBAL(void)
 jsimd_h2v2_upsample (j_decompress_ptr cinfo,
                      jpeg_component_info * compptr,
index 776edbbaf2f841c8040f5d807c40a06eed70ed7e..609b91f568b239980d20bad7ae02ab2e1ede29d4 100644 (file)
@@ -339,6 +339,12 @@ EXTERN(void) jsimd_h2v2_upsample_mips_dspr2
         (int max_v_samp_factor, JDIMENSION output_width, JSAMPARRAY input_data,
          JSAMPARRAY * output_data_ptr);
 
+EXTERN(void) jsimd_int_upsample_mips_dspr2
+        (UINT8 h_expand, UINT8 v_expand, JSAMPARRAY input_data,
+         JSAMPARRAY * output_data_ptr, JDIMENSION output_width,
+         int max_v_samp_factor);
+
+
 /* Fancy Upsampling */
 EXTERN(void) jsimd_h2v1_fancy_upsample_mmx
         (int max_v_samp_factor, JDIMENSION downsampled_width,
index ae9d41ee4ceeaafb43b49cf9e475ac260028c910..2a1059e0ea2834f7d9c99d1f7488e02338326dcf 100644 (file)
@@ -85,6 +85,25 @@ static const int mips_idct_ifast_coefs[4] = {
   0xAC60AC60            // FIX(-2.613125930 / 4) = -21407 = 0xAC61
 };
 
+/* The following struct is borrowed from jdsample.c */
+typedef void (*upsample1_ptr) (j_decompress_ptr cinfo,
+                               jpeg_component_info * compptr,
+                               JSAMPARRAY input_data,
+                               JSAMPARRAY * output_data_ptr);
+
+typedef struct {
+  struct jpeg_upsampler pub;
+  JSAMPARRAY color_buf[MAX_COMPONENTS];
+  upsample1_ptr methods[MAX_COMPONENTS];
+  int next_row_out;
+  JDIMENSION rows_to_go;
+  int rowgroup_height[MAX_COMPONENTS];
+  UINT8 h_expand[MAX_COMPONENTS];
+  UINT8 v_expand[MAX_COMPONENTS];
+} my_upsampler;
+
+typedef my_upsampler * my_upsample_ptr;
+
 GLOBAL(int)
 jsimd_can_rgb_ycc (void)
 {
@@ -415,6 +434,23 @@ jsimd_can_h2v1_upsample (void)
   return 0;
 }
 
+GLOBAL(int)
+jsimd_can_int_upsample (void)
+{
+  init_simd();
+
+  /* The code is optimised for these values only */
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+
+  if (simd_support & JSIMD_MIPS_DSPR2)
+    return 1;
+
+  return 0;
+}
+
 GLOBAL(void)
 jsimd_h2v2_upsample (j_decompress_ptr cinfo,
                      jpeg_component_info * compptr,
@@ -439,6 +475,19 @@ jsimd_h2v1_upsample (j_decompress_ptr cinfo,
                                    output_data_ptr);
 }
 
+GLOBAL(void)
+jsimd_int_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+                    JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)
+{
+  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
+
+  jsimd_int_upsample_mips_dspr2(upsample->h_expand[compptr->component_index],
+                                upsample->v_expand[compptr->component_index],
+                                input_data, output_data_ptr,
+                                cinfo->output_width,
+                                cinfo->max_v_samp_factor);
+}
+
 GLOBAL(int)
 jsimd_can_h2v2_fancy_upsample (void)
 {
index 532966a598fd11fec48cdf120e68f24366a2239c..4572a51fa5397f0124f15157bb5f4c7dcae7dd01 100644 (file)
@@ -1613,6 +1613,94 @@ LEAF_MIPS_DSPR2(jsimd_h2v2_smooth_downsample_mips_dspr2)
      nop
 
 END(jsimd_h2v2_smooth_downsample_mips_dspr2)
+
+/*****************************************************************************/
+LEAF_MIPS_DSPR2(jsimd_int_upsample_mips_dspr2)
+/*
+ * a0     - upsample->h_expand[compptr->component_index]
+ * a1     - upsample->v_expand[compptr->component_index]
+ * a2     - input_data
+ * a3     - output_data_ptr
+ * 16(sp) - cinfo->output_width
+ * 20(sp) - cinfo->max_v_samp_factor
+ */
+    .set at
+
+    SAVE_REGS_ON_STACK 16, s0, s1, s2, s3
+
+    lw      s0, 0(a3)    // s0 = output_data
+    lw      s1, 32(sp)   // s1 = cinfo->output_width
+    lw      s2, 36(sp)   // s2 = cinfo->max_v_samp_factor
+    li      t6, 0        // t6 = inrow
+    beqz    s2, 10f
+     li     s3, 0        // s3 = outrow
+0:
+    addu    t0, a2, t6
+    addu    t7, s0, s3
+    lw      t3, 0(t0)    // t3 = inptr
+    lw      t8, 0(t7)    // t8 = outptr
+    beqz    s1, 4f
+     addu   t5, t8, s1   // t5 = outend
+1:
+    lb      t2, 0(t3)    // t2 = invalue = *inptr++
+    addiu   t3, 1
+    beqz    a0, 3f
+     move   t0, a0       // t0 = h_expand
+2:
+    sb      t2, 0(t8)
+    addiu   t0, -1
+    bgtz    t0, 2b
+     addiu  t8, 1
+3:
+    bgt     t5, t8, 1b
+     nop
+4:
+    addiu   t9, a1, -1   // t9 = v_expand - 1
+    blez    t9, 9f
+     nop
+5:
+    lw      t3, 0(s0)
+    lw      t4, 4(s0)
+    subu    t0, s1, 0xF
+    blez    t0, 7f
+     addu   t5, t3, s1   // t5 = end address
+    andi    t7, s1, 0xF  // t7 = residual
+    subu    t8, t5, t7
+6:
+    ulw     t0, 0(t3)
+    ulw     t1, 4(t3)
+    ulw     t2, 8(t3)
+    usw     t0, 0(t4)
+    ulw     t0, 12(t3)
+    usw     t1, 4(t4)
+    usw     t2, 8(t4)
+    usw     t0, 12(t4)
+    addiu   t3, 16
+    bne     t3, t8, 6b
+     addiu  t4, 16
+    beqz    t7, 8f
+     nop
+7:
+    lbu     t0, 0(t3)
+    sb      t0, 0(t4)
+    addiu   t3, 1
+    bne     t3, t5, 7b
+     addiu  t4, 1
+8:
+    addiu   t9, -1
+    bgtz    t9, 5b
+     addiu  s0, 8
+9:
+    addu    s3, s3, a1
+    bne     s3, s2, 0b
+     addiu  t6, 1
+10:
+    RESTORE_REGS_FROM_STACK 16, s0, s1, s2, s3
+
+    j       ra
+     nop
+END(jsimd_int_upsample_mips_dspr2)
+
 /*****************************************************************************/
 LEAF_MIPS_DSPR2(jsimd_h2v1_upsample_mips_dspr2)
 /*