]> granicus.if.org Git - libjpeg-turbo/commitdiff
SIMD-accelerated NULL convert routine for MIPS DSPr2
authorDRC <dcommander@users.sourceforge.net>
Thu, 15 May 2014 18:26:01 +0000 (18:26 +0000)
committerDRC <dcommander@users.sourceforge.net>
Thu, 15 May 2014 18:26:01 +0000 (18:26 +0000)
git-svn-id: svn+ssh://svn.code.sf.net/p/libjpeg-turbo/code/trunk@1304 632fc199-4ca6-4c93-a231-07263d6284db

jccolor.c
jsimd.h
jsimd_none.c
simd/jsimd.h
simd/jsimd_mips.c
simd/jsimd_mips_dspr2.S

index fe9422a95ececa951327128c175ffcc140d6b4b7..f32dcfa1ace6cb8e134d09bde08efe8f97a8e773 100644 (file)
--- a/jccolor.c
+++ b/jccolor.c
@@ -587,19 +587,24 @@ jinit_color_converter (j_compress_ptr cinfo)
     if (rgb_red[cinfo->in_color_space] == 0 &&
         rgb_green[cinfo->in_color_space] == 1 &&
         rgb_blue[cinfo->in_color_space] == 2 &&
-        rgb_pixelsize[cinfo->in_color_space] == 3)
-      cconvert->pub.color_convert = null_convert;
-    else if (cinfo->in_color_space == JCS_RGB ||
-             cinfo->in_color_space == JCS_EXT_RGB ||
-             cinfo->in_color_space == JCS_EXT_RGBX ||
-             cinfo->in_color_space == JCS_EXT_BGR ||
-             cinfo->in_color_space == JCS_EXT_BGRX ||
-             cinfo->in_color_space == JCS_EXT_XBGR ||
-             cinfo->in_color_space == JCS_EXT_XRGB ||
-             cinfo->in_color_space == JCS_EXT_RGBA ||
-             cinfo->in_color_space == JCS_EXT_BGRA ||
-             cinfo->in_color_space == JCS_EXT_ABGR ||
-             cinfo->in_color_space == JCS_EXT_ARGB)
+        rgb_pixelsize[cinfo->in_color_space] == 3) {
+#if defined(__mips__)
+      if (jsimd_c_can_null_convert())
+        cconvert->pub.color_convert = jsimd_c_null_convert;
+      else
+#endif
+        cconvert->pub.color_convert = null_convert;
+    } else if (cinfo->in_color_space == JCS_RGB ||
+               cinfo->in_color_space == JCS_EXT_RGB ||
+               cinfo->in_color_space == JCS_EXT_RGBX ||
+               cinfo->in_color_space == JCS_EXT_BGR ||
+               cinfo->in_color_space == JCS_EXT_BGRX ||
+               cinfo->in_color_space == JCS_EXT_XBGR ||
+               cinfo->in_color_space == JCS_EXT_XRGB ||
+               cinfo->in_color_space == JCS_EXT_RGBA ||
+               cinfo->in_color_space == JCS_EXT_BGRA ||
+               cinfo->in_color_space == JCS_EXT_ABGR ||
+               cinfo->in_color_space == JCS_EXT_ARGB)
       cconvert->pub.color_convert = rgb_rgb_convert;
     else
       ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
@@ -625,18 +630,28 @@ jinit_color_converter (j_compress_ptr cinfo)
         cconvert->pub.start_pass = rgb_ycc_start;
         cconvert->pub.color_convert = rgb_ycc_convert;
       }
-    } else if (cinfo->in_color_space == JCS_YCbCr)
-      cconvert->pub.color_convert = null_convert;
-    else
+    } else if (cinfo->in_color_space == JCS_YCbCr) {
+#if defined(__mips__)
+      if (jsimd_c_can_null_convert())
+        cconvert->pub.color_convert = jsimd_c_null_convert;
+      else
+#endif
+        cconvert->pub.color_convert = null_convert;
+    } else
       ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
     break;
 
   case JCS_CMYK:
     if (cinfo->num_components != 4)
       ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
-    if (cinfo->in_color_space == JCS_CMYK)
-      cconvert->pub.color_convert = null_convert;
-    else
+    if (cinfo->in_color_space == JCS_CMYK) {
+#if defined(__mips__)
+      if (jsimd_c_can_null_convert())
+        cconvert->pub.color_convert = jsimd_c_null_convert;
+      else
+#endif
+        cconvert->pub.color_convert = null_convert;
+    } else
       ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
     break;
 
@@ -646,9 +661,14 @@ jinit_color_converter (j_compress_ptr cinfo)
     if (cinfo->in_color_space == JCS_CMYK) {
       cconvert->pub.start_pass = rgb_ycc_start;
       cconvert->pub.color_convert = cmyk_ycck_convert;
-    } else if (cinfo->in_color_space == JCS_YCCK)
-      cconvert->pub.color_convert = null_convert;
-    else
+    } else if (cinfo->in_color_space == JCS_YCCK) {
+#if defined(__mips__)
+      if (jsimd_c_can_null_convert())
+        cconvert->pub.color_convert = jsimd_c_null_convert;
+      else
+#endif
+        cconvert->pub.color_convert = null_convert;
+    } else
       ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
     break;
 
@@ -656,7 +676,12 @@ jinit_color_converter (j_compress_ptr cinfo)
     if (cinfo->jpeg_color_space != cinfo->in_color_space ||
         cinfo->num_components != cinfo->input_components)
       ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
-    cconvert->pub.color_convert = null_convert;
+#if defined(__mips__)
+    if (jsimd_c_can_null_convert())
+      cconvert->pub.color_convert = jsimd_c_null_convert;
+    else
+#endif
+      cconvert->pub.color_convert = null_convert;
     break;
   }
 }
diff --git a/jsimd.h b/jsimd.h
index 2817137dafa12af68f5a6a2becfcd8763d015051..2b683e8a6b0ba2c9775f4c0942f193582a018847 100644 (file)
--- a/jsimd.h
+++ b/jsimd.h
@@ -40,6 +40,7 @@
 EXTERN(int) jsimd_can_rgb_ycc JPP((void));
 EXTERN(int) jsimd_can_rgb_gray JPP((void));
 EXTERN(int) jsimd_can_ycc_rgb JPP((void));
+EXTERN(int) jsimd_c_can_null_convert JPP((void));
 
 EXTERN(void) jsimd_rgb_ycc_convert
         JPP((j_compress_ptr cinfo,
@@ -53,6 +54,10 @@ EXTERN(void) jsimd_ycc_rgb_convert
         JPP((j_decompress_ptr cinfo,
              JSAMPIMAGE input_buf, JDIMENSION input_row,
              JSAMPARRAY output_buf, int num_rows));
+EXTERN(void) jsimd_c_null_convert
+        JPP((j_compress_ptr cinfo,
+             JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+             JDIMENSION output_row, int num_rows));
 
 EXTERN(int) jsimd_can_h2v2_downsample JPP((void));
 EXTERN(int) jsimd_can_h2v1_downsample JPP((void));
index a6e827599f0504b6d2f958d46b313c500ae3da64..ac126e01e260aac126eb0de3f5f63fb70e6e7bf9 100644 (file)
@@ -36,6 +36,12 @@ jsimd_can_ycc_rgb (void)
   return 0;
 }
 
+GLOBAL(int)
+jsimd_c_can_null_convert (void)
+{
+  return 0;
+}
+
 GLOBAL(void)
 jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
                        JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
@@ -57,6 +63,13 @@ jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
 {
 }
 
+GLOBAL(void)
+jsimd_c_null_convert (j_compress_ptr cinfo,
+                      JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+                      JDIMENSION output_row, int num_rows)
+{
+}
+
 GLOBAL(int)
 jsimd_can_h2v2_downsample (void)
 {
index 7067a2d7512cb7e32e0ec7921a970bcb141ec31f..504429cbebd7a70fa49bc9c1d55c4e8469c95d80 100644 (file)
@@ -475,6 +475,11 @@ EXTERN(void) jsimd_ycc_extxrgb_convert_mips_dspr2
              JSAMPIMAGE input_buf, JDIMENSION input_row,
              JSAMPARRAY output_buf, int num_rows));
 
+EXTERN(void) jsimd_c_null_convert_mips_dspr2
+        JPP((JDIMENSION img_width, JSAMPARRAY input_buf,
+             JSAMPIMAGE output_buf, JDIMENSION output_row,
+             int num_rows, int num_components));
+
 /* SIMD Downsample */
 EXTERN(void) jsimd_h2v2_downsample_mmx
         JPP((JDIMENSION image_width, int max_v_samp_factor,
index aebd549a328d972d34d3bd2c6e8cf2a85226ba1e..62244ffbfd34f5b4922ac4287e1e59aaadacc076 100644 (file)
@@ -139,6 +139,22 @@ jsimd_can_ycc_rgb (void)
   return 0;
 }
 
+GLOBAL(int)
+jsimd_c_can_null_convert (void)
+{
+  init_simd();
+
+  /* The code is optimised for these values only */
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+  if (simd_support & JSIMD_MIPS_DSPR2)
+    return 1;
+
+  return 0;
+}
+
 GLOBAL(void)
 jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
                        JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
@@ -262,6 +278,16 @@ jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
         input_row, output_buf, num_rows);
 }
 
+GLOBAL(void)
+jsimd_c_null_convert (j_compress_ptr cinfo,
+                      JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+                      JDIMENSION output_row, int num_rows)
+{
+  if (simd_support & JSIMD_MIPS_DSPR2)
+    jsimd_null_convert_compr_mips_dspr2 (cinfo->image_width, input_buf,
+        output_buf, output_row, num_rows, cinfo->num_components);
+}
+
 GLOBAL(int)
 jsimd_can_h2v2_downsample (void)
 {
index fa8f0c239d1a7a27da131cd41892ced9f0533252..532966a598fd11fec48cdf120e68f24366a2239c 100644 (file)
 
 #include "jsimd_mips_dspr2_asm.h"
 
+/*****************************************************************************/
+LEAF_MIPS_DSPR2(jsimd_c_null_convert_mips_dspr2)
+/*
+ * a0     - cinfo->image_width
+ * a1     - input_buf
+ * a2     - output_buf
+ * a3     - output_row
+ * 16(sp) - num_rows
+ * 20(sp) - cinfo->num_components
+ *
+ * Null conversion for compression
+ */
+
+    SAVE_REGS_ON_STACK 8, s0, s1
+
+    lw        t9, 24(sp)   // t9 = num_rows
+    lw        s0, 28(sp)   // s0 = cinfo->num_components
+    andi      t0, a0, 3    // t0 = cinfo->image_width & 3
+    beqz      t0, 4f       // no residual
+     nop
+0:
+    addiu     t9, t9, -1
+    bltz      t9, 7f
+     li       t1, 0
+1:
+    sll       t3, t1, 2
+    lwx       t5, t3(a2)   // t5 = outptr = output_buf[ci]
+    lw        t2, 0(a1)    // t2 = inptr = *input_buf
+    sll       t4, a3, 2
+    lwx       t5, t4(t5)   // t5 = outptr = output_buf[ci][output_row]
+    addu      t2, t2, t1
+    addu      s1, t5, a0
+    addu      t6, t5, t0
+2:
+    lbu       t3, 0(t2)
+    addiu     t5, t5, 1
+    sb        t3, -1(t5)
+    bne       t6, t5, 2b
+     addu     t2, t2, s0
+3:
+    lbu       t3, 0(t2)
+    addu      t4, t2, s0
+    addu      t7, t4, s0
+    addu      t8, t7, s0
+    addu      t2, t8, s0
+    lbu       t4, 0(t4)
+    lbu       t7, 0(t7)
+    lbu       t8, 0(t8)
+    addiu     t5, t5, 4
+    sb        t3, -4(t5)
+    sb        t4, -3(t5)
+    sb        t7, -2(t5)
+    bne       s1, t5, 3b
+     sb       t8, -1(t5)
+    addiu     t1, t1, 1
+    bne       t1, s0, 1b
+     nop
+    addiu     a1, a1, 4
+    bgez      t9, 0b
+     addiu    a3, a3, 1
+    b         7f
+     nop
+4:
+    addiu     t9, t9, -1
+    bltz      t9, 7f
+     li       t1, 0
+5:
+    sll       t3, t1, 2
+    lwx       t5, t3(a2)   // t5 = outptr = output_buf[ci]
+    lw        t2, 0(a1)    // t2 = inptr = *input_buf
+    sll       t4, a3, 2
+    lwx       t5, t4(t5)   // t5 = outptr = output_buf[ci][output_row]
+    addu      t2, t2, t1
+    addu      s1, t5, a0
+    addu      t6, t5, t0
+6:
+    lbu       t3, 0(t2)
+    addu      t4, t2, s0
+    addu      t7, t4, s0
+    addu      t8, t7, s0
+    addu      t2, t8, s0
+    lbu       t4, 0(t4)
+    lbu       t7, 0(t7)
+    lbu       t8, 0(t8)
+    addiu     t5, t5, 4
+    sb        t3, -4(t5)
+    sb        t4, -3(t5)
+    sb        t7, -2(t5)
+    bne       s1, t5, 6b
+     sb       t8, -1(t5)
+    addiu     t1, t1, 1
+    bne       t1, s0, 5b
+     nop
+    addiu     a1, a1, 4
+    bgez      t9, 4b
+     addiu    a3, a3, 1
+7:
+    RESTORE_REGS_FROM_STACK 8, s0, s1
+
+    j         ra
+     nop
+
+END(jsimd_c_null_convert_mips_dspr2)
+
 /*****************************************************************************/
 /*
  * jsimd_extrgb_ycc_convert_mips_dspr2